Question

I am testing the cluster module for NodeJS. I am trying to achieve the following:

  • Start the HTTP server with two cluster workers.
  • Once the master and workers are started, master sends a "check" message to the workers every 5 seconds
  • If the worker respond back then the counter is set to 0
  • If there is no message before the next "check" message then the counter is incremented.
  • I check if the worker did not respond for 20 consequent seconds then I kill the worker and let the master start a new worker.

The Problem that I am facing is:

  • When I kill the worker, the next worker goes into a loop of restart,kill,restart,kill etc which means at any point of time only one worker is active and the other worker is unresponsive due to which it goes into a loop.

My code is:

var cluster = require('cluster');
var http = require('http');
var numCPUs = 2;
var workerStatus = new Object();

if (cluster.isMaster) {
  // Fork workers.
    for (var i = 0; i < numCPUs; i++) {
        cluster.fork(); 
    }

    cluster.on('exit', function(worker, code, signal) {
        cluster.fork();
        console.log('worker %d died (%s). restarting...', worker.process.pid, signal || code);      
    });


  // Go through all workers
    function eachWorker(callback) {
      for (var id in cluster.workers) {     
        callback(cluster.workers[id],id);
      }
    }
    setInterval(function (){
        eachWorker(function(worker,id) {    
            if (workerStatus[id] == undefined)  workerStatus[id]=-1;
            if (workerStatus[id] < 4){
                workerStatus[id]++;
                console.log("Message Sent : " + id);
                worker.send('check');
            }else{
                delete workerStatus[id]
                console.log("Some Problem with " + id);
                worker.disconnect();
                worker.kill();
            }
        });
    },5000);

    eachWorker(function(worker,id){
        var w_id =id;
        worker.on('message', function(msg){
            workerStatus[w_id]=0;
            console.log("Message Received : " + w_id);
        });
    });

} else {
    process.on('message', function(msg) {
        process.send(msg);
    });

  // Workers can share any TCP connection
  // In this case its a HTTP server
  http.createServer(function(req, res) {
    while (1) {}
    res.writeHead(200);
    res.end("hello world--" + cluster.worker.id);
    //cluster.worker.kill();
  }).listen(8080);
}
Was it helpful?

Solution

I found the issue.

When I kill the worker and start a new worker, I do not add a listener for the newly created worked. Due to which the message is never received for the newly created worker.

Just by adding the following code, the issue was fixed.

cluster.on('exit', function(worker, code, signal) {
    var w = cluster.fork();
    console.log('worker %d died (%s). restarting...', worker.process.pid, signal || code);
    w.on('message', function(msg){
        workerStatus[w.id]=0;
        console.log("Message Received : " + w.id);
    });

});

OTHER TIPS

I have simple support module that separates the worker script (and simply loads different scripts + handles communication), that could help: runworker

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top