Problem
I'm trying to write millions of strings into a file using Node.js streams, but the RAM usage goes up to 800MB during the process:
const fs = require('fs')
const walkdir = require('walkdir')
let options = {
"max_depth": 0,
"track_inodes": true
}
let dir = "C:/"
let paths = walkdir(dir, options)
var wstream = fs.createWriteStream('C:/test/file.txt')
wstream.write('[')
paths.on('path', function(path, stat) {
wstream.write(`"${path}",`)
})
paths.on('end', function(path, stat) {
wstream.write(']')
wstream.end()
// Compressing the file after it's written:
const gzip = require('zlib').createGzip()
const inp = fs.createReadStream('C:/test/file.txt')
const out = fs.createWriteStream('C:/test/file.txt.gz')
inp.pipe(gzip).pipe(out)
})
I also tried writing the file like this:
...
paths.on('path', function(path, stat) {
fs.writeFileSync('C:/test/file.txt', path)
})
...
And I also tried sync:
walkdir.sync(dir, options, callback)
function callback(path) {
let res = wstream.write(`"${path}",`)
if (!res) {
wstream.once('drain', callback)
}
else {
callback()
}
}
But both of these produce the same result, RAM usage goes up to like 500-800MB
I also tried the following method, the RAM usage always stays at ~100MB but it doesn't really work, it writes 412kb into the file and then it keeps utilizing CPU but nothing really happens (other methods finish writing the file in under 1-2 minutes)
const readdirp = require('readdirp');
const { Transform } = require('stream');
const entryInfoStream = readdirp({
root: dir
});
entryInfoStream
.pipe(new Transform({
objectMode: true,
transform(entryInfo, encoding, callback) {
this.push(entryInfo.path);
callback();
},
}))
.pipe(wstream);
Questions
How do I make sure the stream works as expected (low memory usage)?
How do I compress (gzip) the file during the writing process? Or can I only do it after it's written?
flushmethod to overcome this lacks documentation examples :-(const readable = require('stream').Readableand then sending thepathsfrom within that emitter and then trying to write it like thisreadable.on('data', (path) => { WRITE HERE }hoping it would drain it automatically once it has readable/writable. But still not luck, I guess I'm doing it wrong