Question

Upon using casperjs, when I launch this code:

var casper = require('casper').create();
var url = 'https://www.youtube.com/robots.txt';

casper.start(url, function() {
    var js = this.evaluate(function() {
        return document; 
    }); 
    this.echo(js.all[0].innerHTML); 
});
casper.run();

Instead of getting this:

# robots.txt file for YouTube
# Created in the distant future (the year 2000) after
# the robotic uprising of the mid 90's which wiped out all humans.

User-agent: Mediapartners-Google*
Disallow:

User-agent: *
Disallow: /bulletin
Disallow: /comment
Disallow: /forgot
Disallow: /get_video
Disallow: /get_video_info
Disallow: /login
Disallow: /results
Disallow: /signup
Disallow: /t/terms
Disallow: /t/privacy
Disallow: /verify_age
Disallow: /videos
Disallow: /watch_ajax
Disallow: /watch_popup
Disallow: /watch_queue_ajax

I get this result:

<head></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;"># robots.txt file for YouTube
# Created in the distant future (the year 2000) after
# the robotic uprising of the mid 90's which wiped out all humans.

User-agent: Mediapartners-Google*
Disallow:

User-agent: *
Disallow: /bulletin
Disallow: /comment
Disallow: /forgot
Disallow: /get_video
Disallow: /get_video_info
Disallow: /login
Disallow: /results
Disallow: /signup
Disallow: /t/terms
Disallow: /t/privacy
Disallow: /verify_age
Disallow: /videos
Disallow: /watch_ajax
Disallow: /watch_popup
Disallow: /watch_queue_ajax
</pre></body>

It seems that casperjs is adding html tags. How can I get the plain txt file exactly as source ?

Était-ce utile?

La solution

What about the download function?

The script become

var casper = require('casper').create();
var url = 'https://www.youtube.com/robots.txt';

casper.start(url, function() {
     this.download(url, 'robots.txt'); 
});
casper.run();

UPDATE

If you want to store the remote file contents into a string, use base64encode

var casper = require('casper').create();
var url = 'https://www.youtube.com/robots.txt';
var contents;
casper.start(url, function() {
    contents = atob(this.base64encode(url));
    console.log(contents);
});

casper.run();
Licencié sous: CC-BY-SA avec attribution
Non affilié à StackOverflow
scroll top