0

I am trying to save a page as a PDF file, and to do so I am trying to read the content of that page then using a PDF library to create the file. The issue I am facing is that when reading the content using below code I am receiving "Please enable JavaScript to view the page content." instead of the page content

<?php
    $url='https://www.carfax.com/VehicleHistory/p/Report.cfx?vin=1J4RR5GG5BC586221&csearch=0&partner=GAZ_0';
    //file_get_contents() reads remote webpage content
    $lines_string=file_get_contents($url);
    //output, you can also save it locally on the server
    echo htmlspecialchars($lines_string);
?>

How can I bypass this java-script error or should i use a different approach ?

This is the response I am getting:

<html>
<head>
<meta http-equiv="Pragma" content="no-cache"/> 
<meta http-equiv="Expires" content="-1"/> 
<meta http-equiv="CacheControl" content="no-cache"/> 
<meta http-equiv="X-UA-Compatible" content="IE=edge"/> 
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> 
<link rel="shortcut icon" href="data:;base64,iVBORw0KGgo="/> 
<script> (function(){ var securemsg; var dosl7_common; window["bobcmn"] = "11111010101010200000002200000005200000000289128f7a200000096300000000300000000300000006/TSPD/300000008TSPD_101300000005https200000000200000000"; window.jar=!!window.jar;try{(function(){try{var jj,Jj,Lj=1,Zj=1,Sj=1;for(var ij=0;ij<Jj;++ij)Lj+=2,Zj+=2,Sj+=3;jj=Lj+Zj+Sj;window._O===jj&&(window._O=++jj)}catch(Ij){window._O=jj}var oJ=!0;function OJ(J){J&&(oJ=!1,document.cookie="brav=ad");return oJ}function _J(){}OJ(window[_J.name]===_J);OJ("function"!==typeof ie9rgb4);OJ(/\x3c/.test(function(){return"\x3c"})&!/x3d/.test(function(){return"'x3'+'d';"})); var iJ=window.attachEvent||/mobi/i.test(window["\x6e\x61vi\x67a\x74\x6f\x72"]["\x75\x73e\x72A\x67\x65\x6et"]),IJ=+new Date+6E5,ol,_l,Il=setTimeout,jL=iJ?3E4:6E3;function JL(){if(!document.querySelector)return!0;var J=+new Date,O=J>IJ;if(O)return OJ(!1);O=_l&&ol+jL<J;O=OJ(O);ol=J;_l||(_l=!0,Il(function(){_l=!1},1));return O}JL();var LL=[17795081,27611931586,1558153217]; function oL(J){J="string"===typeof J?J:J.toString(36);var O=window[J];if(!O.toString)return;var s=""+O;window[J]=function(J,s){_l=!1;return O(J,s)};window[J].toString=function(){return s}}for(var ZL=0;ZL<LL.length;++ZL)oL(LL[ZL]);OJ(!1!==window.jar);(function(){var J={decrypt:function(J){try{return JSON.parse(function(J){J=J.split("l");var O="";for(var s=0;s<J.length;++s)O+=String.fromCharCode(J[s]);return O}(J))}catch(s){}}};return J={configuration:J.decrypt("123l34l97l99l116l105l118l101l34l58l34l110l111l34l44l34l100l101l98l117l103l103l105l110l103l34l58l34l110l111l34l44l34l109l111l100l117l108l101l49l34l58l34l101l110l97l98l108l101l100l34l44l34l109l111l100l117l108l101l50l34l58l34l101l110l97l98l108l101l100l34l44l34l109l111l100l117l108l101l51l34l58l34l101l110l97l98l108l101l100l34l44l34l109l111l100l117l108l101l52l34l58l34l101l110l97l98l108l101l100l34l125")}})(); var sL=3;window.Ls={Os:"087ba4d0fa0178004caafc50a30d48046efd9a15f604d0926f4f95da1a85d369a6d1815489a54acc4a49a5998f87f099792ce5cf3c00ed82cb613e80bec837da827a4967e05d64d8670f7d97250745b00db5a2d96701cfc9d19e00ad5ebfd2aff76046976642518c76938888a8f784eed5b5ea881a1e3668f9b030002df03262"};function l(J){return 645>J}function L(J){var O=arguments.length,s=[];for(var S=1;S<O;++S)s.push(arguments[S]-J);return String.fromCharCode.apply(String,s)}function z(J,O){J+=O;return J.toString(36)}(function SL(O){O&&"number"!==typeof O||("number"!==typeof O&&(O=1E3),O=Math.max(O,1),setInterval(function(){SL(O-10)},O))})(JL());})();}catch(x){document.cookie='brav=oex'+x;}finally{ie9rgb4=void(0);};function ie9rgb4(a,b){return a>>b>>0}; })(); </script> 
<script type="text/javascript" src="/TSPD/086821c3deab2000f497f4a10d45047d2c741eba0afdeced26cf36a836d13b181cb57773ccf959f0?type=7"></script>
<noscript>Please enable JavaScript to view the page content.</noscript> 
</head>
<body> </body>
</html>

2
  • 1
    Use a web automated unit testing library that can handle javascript, for instance Selenium Web Driver, with it's headless browser Commented May 30, 2017 at 1:46
  • 1
    I am not understanding your approach .. can u give me some more details ? @DavidFindlay Commented May 30, 2017 at 1:51

1 Answer 1

1

The problem is that the site is doing javascript detection on the page you're trying to load meaning that's more than likely serving up a mostly blank document with noscript tags and script tag to load the rest of the content. You cannot bypass this with cURL the way you are trying. Instead you'll need a headless browser with a javascript engine. Selenium Web Driver is one such solution. I found this gist for you as well: https://gist.github.com/evandrix/3694955

Sign up to request clarification or add additional context in comments.

2 Comments

I was reading a little bit abou it and i am wondering if this require that i install selenium server as well or i can use the php libraries only to get the web content ?
Only asking because i am hosting on a godaddy linux account

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.