I wrote a naive Node.js script to migrate a MySQL table to an ArangoDB collection.
It works quite well, despite that there are always records missing as if the connection was closed too early. How many documents will be missing is not random however, it's always the same amount:
There are 68,750 records in the source,
my self-built buffer has a size of 1,000 and
68,682 (-68) documents are created in ArangoDB
var mysql = require('mysql');
var arango = require('arango');
var docs = [];
function processRow(row, connection) {
if (docs.length < 1000 && row !== false) {
docs.push(row);
} else {
connection.pause();
db.import.importJSONData(
"target_collection",
JSON.stringify(docs, function(key, value) {
if (value == null || (typeof value === "string" && !value.trim())) {
return undefined;
} else {
return value;
}
}),
{
createCollection: true,
waitForSync: false
},
function(err, ret) {
docs = [];
connection.resume();
if (row === false) process.exit();
}
);
}
}
var connection = mysql.createConnection({
host: 'localhost',
user: 'root',
password: ''
});
var db = arango.Connection("http://localhost:8529/my_database");
connection.connect();
var query = connection.query('SELECT * FROM my_database.source_table');
var i = 0;
query
.on('error', function(err) {
console.log(err);
})
.on('result', function(row) {
i++;
if (i % 1000 == 0) console.log(i);
processRow(row, connection);
})
.on('end', function() {
processRow(false, connection);
});
Another version of the script I wrote uses a transform stream and imports exactly 68,744 records, and a third script all of the records but creates the target collection and records as it finishes, although it should write every n source records.
Is there something obvious I am missing here?
A counter variable can confirm that all 68,750 records are read and there are no source records which are completely empty (all columns NULL), as there's at least a primary key integer (and I also tried without customized JSON stringify handler).
Solution:
Do something with every nth row when buffer is full, credits go to mscdex and mchacki for finding this obvious mistake!
Fixed stream_array_join.js