3

I've got a node.js script that loads an XML file. It loops through each element in the Mongo array and says that they're all getting inserted correctly, but when the script has completed a check of db.collection.count(); tells me that far fewer records have been inserted into the database than the number expected.

How can I make mongo and node.js play nicely with inserts?

GrabRss = function() {
var http    = require('http');
var sys     = require('sys');
var xml2js  = require('xml2js');
var fs      = require('fs');

var Db      = require('../lib/mongodb').Db,
    Conn    = require('../lib/mongodb').Connection,
    Server  = require('../lib/mongodb').Server,
    // BSON = require('../lib/mongodb').BSONPure;
    BSON    = require('../lib/mongodb').BSONNative;

var data;
var checked = 0;
var len = 0;

GotResponse = function(res) {
    var ResponseBody = "";
    res.on('data', DoChunk);
    res.on('end', EndResponse);

    function DoChunk(chunk){
        ResponseBody += chunk;
    }
    function EndResponse() {
        //console.log(ResponseBody);
        var parser = new xml2js.Parser();
        parser.addListener('end', GotRSSObject);
        parser.parseString(ResponseBody);
    }
}

GotError = function(e) {
    console.log("Got error: " + e.message);
}

GotRSSObject = function(r){
    items = r.item;
    //console.log(sys.inspect(r));

    var db = new Db('myrssdb', new Server('localhost', 27017, {}), {native_parser:false});
    db.open(function(err, db){
         db.collection('items', function(err, col) {
            len = movies.length;
            for (i in items) {
                SaveItem(items[i], col);
            }
         });
    });
}

SaveItem = function(m, c) {
            /*  REPLACE FROM HERE IN ANSWER  */
    c.find({'id': m.id}, function(err, cursor){
        cursor.nextObject(function(err, doc) { 
            if (doc == null) {
                c.insert(m, function(err, docs) {
                    docs.forEach(function(doc) {
                        console.log('Saved: '+doc.id+' '+doc.keywords);
                    });
                });
            } else {
                console.log('Skipped: '+m.id);
            }
            if (++checked >= len) {
                process.exit(0);
            } 
        });
    });
            /*  REPLACE TO HERE IN ANSWER  */
}
//http.get(options, GotResponse).on('error', GotError);
var x2js = new xml2js.Parser();

fs.readFile('/home/ubuntu/myrss.rss', function(err, data) {
    x2js.parseString(data);
});

x2js.addListener('end', GotRSSObject);

}
GrabRss();

As requested, the code is above. The file is read locally (though is used to be an HTTP request, but it's a 25 meg file now, lots of RSS records)

I just ran the file with some ~10k records in it and a count of the items in the mongoDB after the script has run is about 800 items.

As per the answer I replaced the insert code:

with:

c.update({'id': m.id}, {$set: m}, {upsert: true, safe: true}, function(err){
    if (err) console.warn(err.message);
        else console.log(m.keywords);
        if (++checked >= len) {
            console.log(len);
            //process.exit(0);

            process.exit(0);
        } 
});
2
  • You seem to have missed something in your description there ... Commented May 22, 2011 at 2:42
  • Could you add some more details to the question, and maybe post some of the code? Commented May 22, 2011 at 3:01

1 Answer 1

7

By default, MongoDB writes do not check for an error.

You need to set safe:true in the options to your insert, as explained in the documentation for node-mongodb-native:

var collection = new mongodb.Collection(client, 'test_collection');
collection.insert({hello: 'world'}, {safe:true},
                  function(err, objects) {
  if (err) console.warn(err.message);
  if (err && err.message.indexOf('E11000 ') !== -1) {
    // this _id was already inserted in the database
  }
});

Otherwise your callback will not be invoked for errors and your client won't know about them.

You probably also want to look at upserts and updates, as it is incredibly inefficient to do find & insert if null in a loop.

Instead, upsert will update if the matching document exists, otherwise it will insert a new one. An explanation on how to do this in Node is in the documentaiton for the driver.

Sign up to request clarification or add additional context in comments.

3 Comments

update works a LOT better. 10K/10K went in on the first run, about to try the 30K file.
been running for about 10 min now but so far it looks like 100% of the records are going in. thanks so much. the upsert code is way cleaner too
If you use "mongodb://localhost/?safe=true" connection string than MongoDb checks for each writes. You can read on mongodb.org/display/DOCS/Connections

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.