سؤال

I would like to read a csv file and upload each row to a couchdb using a grunt task. At this point I am not yet doing any database validation such as checking if the record already exists but will have to do that at some point also.

Currently this is what I am doing and the problem is only the first 65 rows, of the first sub task named people is being uploaded to couchdb.

I know this has something to do with asynchronous execution but just can't work out how to do this

Gruntils.js

csv2couch: {
    people: {
        db: 'http://localhost:5984/db',
        collectionName: 'person',
        src:['./data/schema3/people.csv']
    },
    organisms: {
        db: '<%= qmconfig.COUCHDBURL %>',
        collectionName: 'organism',
        src:['./data/schema3/organisms.csv']
    }

}

csv2couch.js

'use strict';

var nanolib = require('nano'),
    csv = require('csv'),
    urls = require('url'),
    fs = require('fs');

module.exports = function(grunt) {

    grunt.registerMultiTask('csv2couch', 'Parse csv file and upload data to couchdb.', function() {

        var done, parts, dbname, _this, collectionName;
        _this = this;
        done = this.async();
        parts = urls.parse(this.data.db);
        dbname = parts.pathname.replace(/^\//, '');
        collectionName = this.data.collectionName;

        // Merge task-specific and/or target-specific options with these defaults.
        var options = this.options({});

        // couchdb connection
        try {
            var nano = nanolib(parts.protocol + '//' + parts.host);
        } catch (e) {
            grunt.warn(e);
            done(e, null);
        }

        // database connection
        var db = nano.use(dbname);

        // process each source csv file
        this.filesSrc.forEach(function(f) {

            console.log('source file:', f);

            csv()
                .from.path(f, {
                    columns:true,
                    delimeter:',',
                    quote:'"'
                })
                .on('record', function(row,index){
                  console.log('#'+index, row);
                  save(row, collectionName); 
                })
                .on('end', function(count){
                  console.log('Number of lines: '+count);
                  done();
                })
                .on('error', function(error){
                  console.log(error.message);
                  done(error);
                });
        });

        function save (data, collectionName) {

            // document ID is concatenation of collectionName and ID 
            var docID = collectionName[0]+'_'+data.ID;

            // add some additional data
            data.type = collectionName;

            // insert data into couchdb
            db.insert(data, docID, function(err, body, header) {
              if (err) {
                console.log('[db.insert] ', err.message);
                return;
              }
            });
        }

    });

};
هل كانت مفيدة؟

المحلول

You're right, the async code is incorrect. The CSV file is being read to the end before all your records are saved. You need to call done only when your last record has been saved.

Your save method needs to take a callback

var rowsRead = 0,  // the number of rows read from the csv file
  rowsWritten = 0; // the number of rows written to CouchdDb

caller:

.on('record', function(row,index){
  rowsRead++;
  save(row, collectionName, function(err){
    if(err){
      return done(err);
    }
    rowsWritten++;
    if(rowsRead===rowsWritten){ // check if we've written all records to CouchDb
      done();
    }
  }); 
})

save method:

function save (data, collectionName, callback) {
  // document ID is concatenation of collectionName and ID 
  var docID = collectionName[0]+'_'+data.ID;

  // add some additional data
  data.type = collectionName;

  // insert data into couchdb
  db.insert(data, docID, callback);
}
مرخصة بموجب: CC-BY-SA مع الإسناد
لا تنتمي إلى StackOverflow
scroll top