質問

I have an array of links that use link parametr for function that scraped data by PhantomJS. How to serilize this function? This for statemant runs paralely 3 function in one time and i recive an event error.

In this case its proper to use async, but how it use in series? Time of running the functions are always different, but how async should understood that it's done and start with new URL?

var phantom = require('phantom')
  , async = require('async');

var urls = [
  'http://en.wikipedia.org/wiki/Main_Page',
  'http://es.wikipedia.org/wiki/Wikipedia:Portada',
  'http://de.wikipedia.org/wiki/Wikipedia:Hauptseite'
];

async.mapSeries(urls, getTitle, function(err, result){
    console.log(result);
})

function getTitle (link, callback) {
  phantom.create(function(ph) {
    return ph.createPage(function(page) {
      return page.open(link, function(status) {
        return page.evaluate((function() {
          return document.title;
        }), function(result) {
          callback(null, result);
          return ph.exit();
        });
      });
    });
  });
};
役に立ちましたか?

解決

I'd try something like:

var links = []
var _ph

function init(cb) {
    phantom.create(function(ph) {
        //for each link in links call doStuff()
        _ph = ph 
        doStuff(ph, link, cb)   
    })   
}

function doStuff(ph, link, cb) {
    ph.createPage(function(page) { //does things in parallel?
      page.open(link, function(status) {
        page.evaluate((function() {
          document.title;
        }), function(result) {
          cb(null, result);
          page.close();
        });
    });
}

var counter = links.length
var titles;

function results(err, res) {
  titles.push(res)

  if(--counter == 0) {
    //done
    _ph.exit()
  }
 }

init(results)

Probably not working code (I wrote it here), but I hope you get the idea. If you want to only use 1 page, something like:

var links = []
var _ph
var _page

function init(cb) {
    phantom.create(function(ph) {

        _ph = ph 
        ph.createPage(function(page) {
             _page = page
             doStuff(link, cb)
        }   
    })   
}

function doStuff(page, link, cb) {
      page.open(link, function(status) {
        page.evaluate((function() {
          document.title;
        }), function(result) {
          cb(null, result);
          page.close();
        });
    });
}

 var counter = links.length
var titles;

function results(err, res) {
  titles.push(res)

  if(--counter == 0) {
    //done
    _ph.exit()
    return
  }

  doStuff(links[counter], results)
 }

init(results)
ライセンス: CC-BY-SA帰属
所属していません StackOverflow
scroll top