سؤال

Google most of time block my server IP whenever I try to fetch it. Like we are make one Bulk Google PR checker script, google will block our IP after nearly 3k or 5k requests. So, just to give our Users best results, we need to send cURL request through some proxy. Not only google, but we need it sometime to send requests through some proxy. So, if any body know then please tell the process, else shut your mouth to ask this off-topic. How can I send cURL request from a proxy, not from my server IP? OR tell me better way to scrape google data ?

What is legal way to use cURL ?

هل كانت مفيدة؟

المحلول

Use the option curl_setopt($ch, CURLOPT_PROXY, $proxy);

 //scape.php

class Scraper {

public function scrape($target_url) {
    $this->target_url = $target_url;

    $ch = curl_init();
    $proxy = $this->_getProxy();
    $userAgent = $this->_getUserAgent();
    if ($proxy) {
        curl_setopt($ch, CURLOPT_PROXY, $proxy);
     }

    curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
    curl_setopt($ch, CURLOPT_URL, $target_url);
    curl_setopt($ch, CURLOPT_FAILONERROR, true);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_TIMEOUT, 10);
    $html = curl_exec($ch);
    curl_close($ch);

    if (!$html) {
        echo 'url/curl error';
        return false;
    }

    $this->html = $html;
    $this->_ParseData();
}



public function setProxy($proxy) {
    $this->proxy = $proxy;
}

private function _getProxy() {
    if (isset($this->proxy))
        return $this->proxy;
    else
        return false;
}

public function setUserAgent($agent) {
    $this->agent = $agent;
}

private function _getUserAgent() {
    if (isset($this->agent))
        return $this->agent;
    else
        return false;
}

 //Parsing data
private function _parseData() {

    $dom = new DOMDocument();
    @$dom->loadHTML($this->html);
    $xpath = new DOMXPath($dom);
    // your xpath query here
    $elements = $xpath->query("//div[@id='ires']");

}

}

Example usage

require 'scrape.php';
$scraper=new Scraper;
$scraper->setProxy('127.0.0.1:9150');
$data=$scraper->scrape('https://www.google.com/#q=stack+overflow');
مرخصة بموجب: CC-BY-SA مع الإسناد
لا تنتمي إلى StackOverflow
scroll top