1

I'm having a problem with curl_multi_*, I want to create a class / function that receives, lets say 1000 URLs, and processes all those URLs 5 at a time, so when a URL finishes downloading it will allocate the now available slot to a new URL that hasn't been processed yet.

I've seen some implementations of curl_multi, but none of them allows me to do what I want, I believe the solution lies somewhere in the usage of curl_multi_select but the documentation isn't very clear and the user notes don't help much.

Can anyone please provide me with some examples how I can implement such a feature?

1 Answer 1

7

Here's one way to do it. This script will fetch any number of urls at a time, and add a new one as each is finished (so it's always fetching $maxConcurrent pages).

$sites = array('http://example.com', 'http://google.com', 'http://stackoverflow.com');
$concurrent = 2;   // Any number.

$mc = new MultiCurl($sites, $concurrent);
$mc->process();

echo '</pre>';

class MultiCurl
{
    private $allToDo;
    private $multiHandle;
    private $maxConcurrent = 2;
    private $currentIndex  = 0;
    private $info          = array();
    private $options       = array(CURLOPT_RETURNTRANSFER => true,
                                   CURLOPT_FOLLOWLOCATION => true,
                                   CURLOPT_MAXREDIRS      => 3,
                                   CURLOPT_TIMEOUT        => 3);

    public function __construct($todo, $concurrent)
    {
        $this->allToDo = $todo;
        $this->maxConcurrent = $concurrent;
        $this->multiHandle = curl_multi_init();
    }

    public function process()
    {
        $running = 0;
        do {
            $this->_addHandles(min(array($this->maxConcurrent - $running, $this->_moreToDo())));
            while ($exec = curl_multi_exec($this->multiHandle, $running) === -1) {
            }
            curl_multi_select($this->multiHandle);
            while ($multiInfo = curl_multi_info_read($this->multiHandle, $msgs)) {
                $this->_showData($multiInfo);
                curl_multi_remove_handle($this->multiHandle, $multiInfo['handle']);
                curl_close($multiInfo['handle']);
            }
        } while ($running || $this->_moreTodo());
        return $this;
    }    

    private function _addHandles($num)
    {
        while ($num-- > 0) {
            $handle = curl_init($this->allToDo[$this->currentIndex]);
            curl_setopt_array($handle, $this->options);
            curl_multi_add_handle($this->multiHandle, $handle);
            $this->info[$handle]['url'] = $this->allToDo[$this->currentIndex];
            $this->currentIndex++;
        }
    }        

    private function _moreToDo()
    {
        return count($this->allToDo) - $this->currentIndex;
    }

    private function _showData($multiInfo)
    {
        $this->info[$multiInfo['handle']]['multi'] = $multiInfo;
        $this->info[$multiInfo['handle']]['curl']  = curl_getinfo($multiInfo['handle']);
        //print_r($this->info[$multiInfo['handle']]);
        $content = curl_multi_getcontent($multiInfo['handle']);
        echo $this->info[$multiInfo['handle']]['url'] . ' - ' . strlen($content) . ' bytes<br />';
        //echo htmlspecialchars($content);
    }
}
Sign up to request clarification or add additional context in comments.

2 Comments

since this example doesn't use curl_multi_select() it will busy-loop like crazy and thus take 100% CPU until all transfers are done...
@Daniel Stenberg - Thanks; you're right. I've replaced that code with a (stripped-down) class that is faster and uses much less cpu time.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.