Question

I am now working on a node.js project based on cluster. I got stuck on the logging. After doing some research, I worked out a solution. here is it. i don't know if it is a good idea. The idea is like this. only master process can wirte to the log file, if the current process is a worker, then it send a log message to the master and then write to the log file while the master can directly write to the log file. this can avoid multiple process open and write to a same file.

var util = require('util');
var fs = require('fs');
var cluster = require('cluster');

var logger = module.exports;

var levels  = ['debug', 'info', 'warn', 'error', 'fatal'];
var logLevel = 'debug';

var logfile = null;
var errorLogfile  = null;


if(cluster.isMaster){

    logfile = fs.createWriteStream('debug.log', {flags:'a'});
    errorLogfile = fs.createWriteStream('error.log', {flags:'a'});

    cluster.on('online', function(worker){
    //collect log message from child and write to logfile.
    worker.on('message', function(msg){
        if(msg.type == 'logging') {
        var level = msg.data.level;
        var logStr = msg.data.msg;
        if(levels.indexOf(level) >= levels.indexOf('error')){
            errorLogfile.write(logStr + '\n');
        }else{
            logfile.write(logStr + '\n');
        }
        }
    });
    });
}


function log(level, args){

    if(levels.indexOf(level) < levels.indexOf(logLevel)) return;

    var args = Array.prototype.slice.call(args);

    args = args.map(function(a){
    if(typeof a !== 'string') 
        return JSON.stringify(a);
    else return a;
    });
    var msg = util.format.apply(null, args);

    var out = [];
    out.push(new Date());
    out.push('[' + level.toUpperCase() + ']');
    out.push(msg);


    if(cluster.isMaster){

    //write directly to the log file
    if(levels.indexOf(level) >= levels.indexOf('error')){
        errorLogfile.write(out.join(' ') + '\n');
    }else{
        logfile.write(out.join(' ') + '\n');
    }

    }else{

    //send to master
    cluster.worker.process.send({
        type : 'logging', 
        data : {
        level : level,
        msg : out.join(' ')
        }
    });
    }

}


logger.debug = function(){log('debug', arguments);}
logger.info = function(){log('info', arguments);}
logger.warn = function(){log('warn', arguments);}
logger.error = function(){log('error', arguments);}
logger.fatal = function(){log('fatal', arguments);}
Was it helpful?

Solution

  1. It must have an bottleneck issue because the master is the only place who can log the messages from n workers to the file. The communication between master and workers is not necessary because workers can directly write the message to the file. As long as the message length is less than the pipe buffer, the write operation is safe.

  2. You did not handle "drain" events. When you have numbers of messages which needs to be logged, the stream buffer will get full easily because the stream does not have enough time to flush and write the buffer to the disk. Meanwhile, you keep putting messages to the buffer. Finally you cannot fully log the messages to the file. "drain" event will be triggered when the buffer is flushed. For the details of "drain", please refer "http://nodejs.org/api/stream.html#stream_event_drain"

P.S. if you have time, please have a try my lib. It focuses on multiprocess logging and log rotation. And it is very very fast in file logging and small in memory consumption https://github.com/wood1986/ln

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top