So I try to scrap this URL: xxxx.fr with cURL, but impossible to get access to the page HTML code, both header and body are empty. HTTP code return is 200 I tried with other URL (different domain) and it works like a charm. I also try with different User Agent and Referer
Do you know what is wrong ? At lest can someone try this code on your own server and let me know if you have the same issue ?
Thank you
Below is my code:
$url = 'http://www.xxxx.fr';
$header[] = "Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
$header[] = "Cache-Control: max-age=0";
$header[] = "Connection: keep-alive";
$header[] = "Keep-Alive: timeout=5, max=100";
$header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
$header[] = "Accept-Language: en-us,en;q=0.5";
$header[] = ""; // BROWSERS USUALLY LEAVE BLANK
$curl = curl_init ();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0");
curl_setopt($curl, CURLOPT_ENCODING, 'gzip,deflate');
curl_setopt($curl, CURLOPT_REFERER, "http://www.google.fr");
curl_setopt($curl, CURLOPT_HEADER, 1);
curl_setopt($curl, CURLINFO_HEADER_OUT, 1);
curl_setopt($curl, CURLOPT_VERBOSE, 1);
curl_setopt($curl, CURLOPT_COOKIEFILE, getcwd().'/cookies.txt');
curl_setopt($curl, CURLOPT_COOKIEJAR, getcwd().'/cookies.txt');
curl_setopt($curl, CURLOPT_TIMEOUT, 30);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
$curlData = curl_exec($curl);
$infos = curl_getinfo($curl);
print_r($infos);
curl_close ( $curl );
echo "<hr>Page:<br />";
echo htmlentities($curlData);
and here is the result from the print_r($infos):
Array (
[url] => http://www.xxxx.fr
[content_type] => text/html
[http_code] => 200
[header_size] => 625
[request_size] => 465
[filetime] => -1
[ssl_verify_result] => 0
[redirect_count] => 0
[total_time] => 0.032535
[namelookup_time] => 0.001488
[connect_time] => 0.002581
[pretransfer_time] => 0.002639
[size_upload] => 0
[size_download] => 10234
[speed_download] => 314553
[speed_upload] => 0
[download_content_length] => -1
[upload_content_length] => 0
[starttransfer_time] => 0.032088
[redirect_time] => 0
[certinfo] => Array ( )
[primary_ip] => xxx
[primary_port] => 80
[local_ip] => xxx
[local_port] => 37319
[redirect_url] =>
[request_header] => GET / HTTP/1.1 User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0 Host: www.xxxx.fr Accept-Encoding: gzip,deflate Referer: http://www.google.fr Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5 Cache-Control: max-age=0 Connection: keep-alive Keep-Alive: timeout=5, max=100 Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7 Accept-Language: en-us,en;q=0.5
)
$curlDatais empty for you? That is coming back with HTML for me when I execute your code.