Question

I'm trying to modify this script: https://github.com/ariya/phantomjs/blob/master/examples/render_multi_url.js so that instead of "rendermulti-1.png (and so on)" the output files will be named for the web pages they are caps of.

Here's what I tried.

    // Render Multiple URLs to file

var RenderUrlsToFile, arrayOfUrls, system;

system = require("system");

/*
Render given urls
@param array of URLs to render
@param callbackPerUrl Function called after finishing each URL, including the last URL
@param callbackFinal Function called after finishing everything 
*/
RenderUrlsToFile = function(urls, callbackPerUrl, callbackFinal) {
    var getFilename, next, page, retrieve, urlIndex, webpage, pagename ; //<--
    urlIndex = 0;
    webpage = require("webpage");
    page = null;
    // replace forward slashes with underscores          //<--
    pagename = arrayOfUrls[urlIndex].replace(/\//g,'_'); //<--
    getFilename = function() {
    //  return "rendermulti-" + urlIndex + ".png";       //<--
        return pagename + ".png";                        //<--
    };
    next = function(status, url, file) {
        page.close();
        callbackPerUrl(status, url, file);
        return retrieve();
    };
    retrieve = function() {
        var url;
        if (urls.length > 0) {
            url = urls.shift();
            urlIndex++;
            page = webpage.create();
            page.viewportSize = {
                width: 800,
                height: 600
            };
            page.settings.userAgent = "Phantom.js bot";
            return page.open("http://" + url, function(status) {
                var file;
                file = getFilename();
                if (status === "success") {
                    return window.setTimeout((function() {
                        page.render(file);
                        return next(status, url, file);
                    }), 200);
                } else {
                    return next(status, url, file);
                }
            });
        } else {
            return callbackFinal();
        }
    };
    return retrieve();
};

arrayOfUrls = null;

if (system.args.length > 1) {
    arrayOfUrls = Array.prototype.slice.call(system.args, 1);
} else {
    console.log("Usage: phantomjs render_multi_url.js [domain.name1, domain.name2, ...]");
    arrayOfUrls = ["www.google.com", "www.bbc.co.uk", "www.phantomjs.org"];
}

RenderUrlsToFile(arrayOfUrls, (function(status, url, file) {
    if (status !== "success") {
        return console.log("Unable to render '" + url + "'");
    } else {
        return console.log("Rendered '" + url + "' at '" + file + "'");
    }
}), function() {
    return phantom.exit();
});

The script runs, but names all files after the first supplied URL and ignores anything after the '/'.

I suspect I'm making some basic error, possibly something to do with scope, but when I move the new variable into the getFileName function things break.

Any help at all will be appreciated.

Thanks!

Was it helpful?

Solution

There are calls to urls.shift() so it's not a good approach to compute filname with arrayOfUrls.

You're true : '/' is an invalid character so you have to replace it with a generic token (as ?, ...).

Alos, because page.open already contains url, it's better to use it to compute filename.

A solution could be :

// Render Multiple URLs to file

var RenderUrlsToFile, arrayOfUrls, system;

system = require("system");

/*
Render given urls
@param array of URLs to render
@param callbackPerUrl Function called after finishing each URL, including the last URL
@param callbackFinal Function called after finishing everything 
*/
RenderUrlsToFile = function(urls, callbackPerUrl, callbackFinal) {
    var getFilename, next, page, retrieve, urlIndex, webpage, pagename ; //<--
    urlIndex = 0;
    webpage = require("webpage");
    page = null;

    getFilename = function(url) {
        return url.replace(/\//g,'_')+  ".png";  //<--
    };
    next = function(status, url, file) {
        page.close();
        callbackPerUrl(status, url, file);
        return retrieve();
    };
    retrieve = function() {
        var url;
        if (urls.length > 0) {
            url = urls.shift();
            urlIndex++;
            page = webpage.create();
            page.viewportSize = {
                width: 800,
                height: 600
            };
            page.settings.userAgent = "Phantom.js bot";
            return page.open("http://" + url, function(status) {
                var file;
                file = getFilename(url);
                if (status === "success") {
                    return window.setTimeout((function() {
                        page.render(file);
                        return next(status, url, file);
                    }), 200);
                } else {
                    return next(status, url, file);
                }
            });
        } else {
            return callbackFinal();
        }
    };
    return retrieve();
};

arrayOfUrls = null;

if (system.args.length > 1) {
    arrayOfUrls = Array.prototype.slice.call(system.args, 1);
} else {
    console.log("Usage: phantomjs render_multi_url.js [domain.name1, domain.name2, ...]");
    arrayOfUrls = ["www.google.com", "www.bbc.co.uk", "www.phantomjs.org"];
}

RenderUrlsToFile(arrayOfUrls, (function(status, url, file) {
    if (status !== "success") {
        return console.log("Unable to render '" + url + "'");
    } else {
        return console.log("Rendered '" + url + "' at '" + file + "'");
    }
}), function() {
    return phantom.exit();
});

OTHER TIPS

yes, I think you could just modify this method

getFilename = function() {
    return "rendermulti-" + urlIndex + ".png";       //<--
    return pagename + ".png";                        //<--
};

to customer your output file names.

PS. I want to ask some ones, do you get this issue: if you give this js to render 90 html files, but most time it couldn't render them all, I need execute many times to finished all those 90 files.

and I try to split my html files into small array, then to call this js to render, but in this js file, there is a exit method, it would exit my process. so I couldn't loop all these small html file url array to finish them one time.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top