I understand this isn't exactly the smallest code, i've tried to cut it down as much as i could. The script just consumes more and more memory until it finally runs out. I've used unset() where possible but it doesn't seem to have any effect. It always seems to error in the MultiGet function but i'm not sure if that is where the leak is. Any input would be greatly appreciated.
public function Test()
{
$base = dirname(__FILE__) .'/';
$prod_file = $base.'products.dbf';
$this->dbf->load($prod_file);
$num_rec=$ci->dbf->dbf_num_rec;
$buffer = Array();
for($i=0;$i<$num_rec;$i++):
$row = $ci->dbf->getRowAssoc($i);
$info = Array('part_number' => $row['PART_NUM'],
'td_group_id' => $row['GRP'],
'name' => 'DESCR');
$this->db->where('td_group_id',$info['td_group_id']);
$result = $this->db->get('tbl_categories')->row_array();
if(isset($result['id'])):
$info['category_id'] = $result['id'];
$buffer[] = $info;
endif;
if(count($buffer) == 100 || $i == $num_rec -1):
$url_buffer = Array();
foreach($buffer as $row):
$url_buffer[] = $this->_product_url($row['part_number']);
endforeach;
$html_returns = $this->MultiCrawl($url_buffer);
foreach($html_returns as $url_index=>$html):
$more_info = $this->_extract_more_info($html);
if($more_info):
$more_info['category_id'] = $buffer[$url_index]['category_id'];
$more_info['td_part_number'] = $buffer[$url_index]['part_number'];
$this->_parse_product($more_info);
endif;
endforeach;
$buffer = Array();
endif;
endfor;
}
function MultiGet($all_urls)
{
$useragent = $this->_useragent;
$cookie_file = $this->_cookie_file;
$url_index = $this->UrlIndex($all_urls);
$return_buffer = Array();
$mh = curl_multi_init();
$ch = Array();
$max_connections = 15;
$index = 0;
$open_connections = 0;
$execReturnValue = true;
$running = true;
$max_index = count($all_urls)-1;
$url_count = count($all_urls);
$buffer_count = 0;
while ($buffer_count < $url_count){
if($open_connections < $max_connections && $index <= $max_index):
for($i=$open_connections;$i<$max_connections && $index <= $max_index;$i++):
$url = $all_urls[$index];
$ch[$index] = curl_init($url);
curl_setopt($ch[$index],CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch[$index],CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch[$index],CURLOPT_COOKIESESSION, false);
curl_setopt($ch[$index],CURLOPT_SSL_VERIFYHOST , false);
curl_setopt($ch[$index],CURLOPT_SSL_VERIFYPEER , false);
curl_setopt($ch[$index],CURLOPT_COOKIEJAR, $cookie_file);
curl_setopt($ch[$index],CURLOPT_COOKIEFILE, $cookie_file);
curl_setopt($ch[$index],CURLOPT_USERAGENT,$useragent);
curl_multi_add_handle($mh, $ch[$index]);
$open_connections++;
$index++;
$execReturnValue = curl_multi_exec($mh,$running);
usleep(200);
endfor;
endif;
$execReturnValue = curl_multi_exec($mh,$running);
$ready=curl_multi_select($mh);
while($info=curl_multi_info_read($mh)){
$status=curl_getinfo($info['handle'],CURLINFO_HTTP_CODE);
if($status==200){
$successUrl=curl_getinfo($info['handle'],CURLINFO_EFFECTIVE_URL);
$curl_index = $url_index[$successUrl];
$return_buffer[$curl_index] = curl_multi_getcontent($ch[$curl_index]);
$buffer_count = count($return_buffer);
curl_multi_remove_handle($mh, $ch[$curl_index]);
curl_close($ch[$curl_index]);
unset($ch[$curl_index]);
$open_connections--;
}else{
echo "ERROR: $status\n";
}
}
}
curl_multi_close($mh);
unset($mh);
return $return_buffer;
}
private function _extract_more_info($html)
{
$buffer = array();
$query = "//img[@id='ctl00_cphMain_cntrlProductProfile_imgprodimage']";
$result = $this->_xquery($html,$query);
$node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
if(!$node) return null;
$buffer['td_img_url'] = $node?trim($node->getAttribute('src')):null;
unset($result);
$query = "//span[@class='priceLarge']";
$result = $this->_xquery($html,$query);
$node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
if(!$node) return null;
$buffer['price'] = $node?trim($node->nodeValue):null;
if($buffer['price'] == 'Req. Auth.') return null;
unset($result);
$query = "//span[@id='ctl00_cphMain_cntrlProductProfile_newLtFinalPrice']";
$result = $this->_xquery($html,$query);
$node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
if(!$node) return null;
$buffer['msrp'] = $node?trim($node->nodeValue):null;
unset($result);
$query = "//span[@id='ctl00_cphMain_cntrlProductProfile_newLTMRF']";
$result = $this->_xquery($html,$query);
$node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
if(!$node) return null;
$buffer['manf_part_number'] = $node?trim($node->nodeValue):null;
unset($result);
$query = "//span[@id='ctl00_cphMain_cntrlProductProfile_newLblUPC']";
$result = $this->_xquery($html,$query);
$node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
$buffer['upc_part_number'] = $node?trim($node->nodeValue):null;
unset($result);
$query = "//td[@class='black_text_WUL']";
$result = $this->_xquery($html,$query);
$node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
if(!$node) return null;
$buffer['manufacturer'] = $node?trim($node->nodeValue):null;
unset($result);
$query = "//td[@class='textt' and @colspan='3']";
$result = $this->_xquery($html,$query);
$node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
if(!$node) return null;
$buffer['short_description'] = $node?trim($node->nodeValue):null;
unset($result);
$query = "//div[@id='ctl00_cphMain_pnlMarketingDesc']//td[@class='textt']";
$result = $this->_xquery($html,$query);
$node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
if(!$node) return null;
$buffer['long_description'] = $node?trim($node->nodeValue):null;
unset($result);
$query = "//table[@id='ctl00_cphMain_cntrlMainSpecs_dgSpecs']";
$result = $this->_xquery($html,$query);
$table = $result instanceof DOMNode?$this->_to_dom_node($result):null;
unset($result);
if(!$table) return null;
$table_array = Array();
$rows = $table->getElementsByTagName('tr');
foreach($rows as $tr):
$temp = Array();
$columns = $tr->getElementsByTagName('td');
$caption = $columns->length > 0 && $columns->length <= 2 ? trim($columns->item(0)->nodeValue) : null;
$value = $columns->length == 2 ? trim($columns->item(1)->nodeValue) : null;
if ($caption) $table_array[$caption] = $value;
endforeach;
$buffer['main_specs']=$table_array;
$query = "//table[@id='ctl00_cphMain_cntrlExtSpecs_tblData']";
$result = $this->_xquery($html,$query);
$table = $result instanceof DOMNode?$this->_to_dom_node($result):null;
unset($result);
$buffer['additional_specs'] = null;
if(!$table) return $buffer;
$table_array = Array();
$rows = $table->getElementsByTagName('tr');
foreach($rows as $tr):
$temp = Array();
$columns = $tr->getElementsByTagName('td');
$caption = $columns->length > 0 && $columns->length <= 2 ? trim($columns->item(0)->nodeValue) : null;
$value = $columns->length == 2 ? trim($columns->item(1)->nodeValue) : null;
if ($caption) $table_array[$caption] = $value;
endforeach;
$buffer['additional_specs']=$table_array;;
return $buffer;
}
private function _xquery($html,$query,$allnodes = false){
$src = '';
$dom = new DOMDocument();
$node = null;
if (@$dom->loadHTML($html)) {
$xpath = new DOMXpath($dom);
$nodeList = $xpath->query($query);
if ($nodeList->length > 0) {
$node = $allnodes==false?$nodeList->item(0):$nodeList;
}
}
unset($xpath);
unset($nodeList);
unset($dom);
return $node;
}