Question

I'm still learning mongodb and node.js.

I was trying to insert bulk amount of data say 100k rows with 80-100 columns. I created an app using express.js for that. I coded in such a a way that the page redirects to another page once upload button is clicked and in the background the app inserts the data into the db.

The issue is that once the inserting starts the app stops responding. Even after the completion of insert the app will be slower for about 2-3 minutes.

I tried this code on

1

  • Fedora 14
  • Intel P4 2.80GHz
  • 1.5GiB

2

  • Fedora 14
  • i3 3.20GHz
  • 3GiB

How to avoid this problem? Will it work better on a better system?

Code to store the data

MongoClient.connect(MONGODB_URI, function (err, database) {
    var collection = database.collection(collectionName);
    for (index = 0; index < jsonResult.length; ++index) {
        var obj = jsonResult[index];
        for (var prop in obj) {
            if (headers[prop] == 'Date') {
                obj[prop] = new Date(obj[prop].replace(/(\d{2})-(\d{2})-(\d{4})/, "$2/$1/$3"));
            } else if (headers[prop] == 'Integer') {
                obj[prop] = parseInt(obj[prop]);
            } else if (headers[prop] == 'Float') {
                obj[prop] = parseFloat(obj[prop]);
            }
        }
        collection.insert(obj, function (err, result) {
            if (err) {
                console.log(JSON.stringify(err));
            }
        });    
    }
});

Note:

  • jsonResult is the result of reading a file(csv) and converting it into json
  • headers is an object that have the key-data type mapping

    var headers = { 'iodate': 'date', 'sno': 'integer', 'salary': 'float' }

Code to read and convert csv to json

var cv_json = require('convert-json'); // required module convert-json (convert-csv,xls,xlsx into json)
cv_json({
    input: target_path,
    output: null
}, function (err, result) {
    if (err) {
        console.error(err);
    } else {
        persists(req, res, result, collectionName, headers);  //Function where insert takes place.
    }
})
Was it helpful?

Solution

Right now, the "persists()" return imidiately, even though the insert hasn't finished. This is due to the asyncronus nature of javascript/nodejs.

You need to serialize the insert operations so you don't clog the server. Use async.eachSeries()

Your "persists" function should use a callback to notify the caller that it has finished. Somehing like this:

persists(req, res, result, collectionName, headers, callback){
    MongoClient.connect(MONGODB_URI, function (err, database) {
        var collection = database.collection(collectionName);
        async.eachSeries(jsonResult, 
            function(obj, cb){
                for (var prop in obj) {
                    if (headers[prop] == 'Date') {
                        obj[prop] = new Date(obj[prop].replace(/(\d{2})-(\d{2})-(\d{4})/,"$2/$1/$3"));
                    } else if (headers[prop] == 'Integer') {
                        obj[prop] = parseInt(obj[prop]);
                    } else if (headers[prop] == 'Float') {
                        obj[prop] = parseFloat(obj[prop]);
                    }
                }
                collection.insert(obj, function (err, result) {
                    cb(err);
                });                    
            }, 
            function(err){
                callback();
            }
        );
    });
}

And then to use it

persists(req, res, result, collectionName, headers, function(){
    console.log("insert finished");
});
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top