0

I'm working on an application in C++ that threads and hands a bunch of threads URLs for cURL to download in parallel.

I'm employing a method that should be safe to download images and videos, etc. I uses memcpy instead of assuming the data is a string or character array.

I pass each thread a structure, thread_status, for a number of things. The structure lets the parent process know the thread is done downloading. It also stores the data cURL is downloading and keeps track of it's size as cURL returns more buffers for writing.

I pass a (void *) pointer that points to each structure that's allocated at initialization to each thread that does the downloading. The first page is downloaded properly, after that I keep getting errors from realloc().

Here is the simplest example that illustrates my problem. This sample is not multi-threaded but uses a similar structure to keep track of itself.

#include <string>
#include <assert.h>
#include <iostream>
#include <curl/curl.h>
#include <stdlib.h> 
#include <stdio.h>
#include <string.h>

#define NOT_READY   1
#define READY       0

using namespace std;

struct thread_status {
    int id;
  pthread_t *pid;
    int readyState;
    char *url;
    void *data;
    size_t bufferlen;
    size_t writepos;
    int initialized;
} ;


size_t static 
writefunction(  void *ptr, size_t size, 
                    size_t nmemb, void *userdata)
{
    size_t nbytes = size*nmemb;
        struct thread_status **this_status;
        this_status = (struct thread_status **) userdata;

        if (!(*this_status)->initialized){
                (*this_status)->data = (void *)malloc(1024);
                (*this_status)->bufferlen = 1024;
                (*this_status)->writepos = 0;
                (*this_status)->initialized = true;
        }

        if ((*this_status)->bufferlen < ((*this_status)->writepos + nbytes)){
            (*this_status)->bufferlen = (*this_status)->bufferlen + nbytes;
            (*this_status)->data = realloc((*this_status)->data, (size_t) ((*this_status)->writepos + nbytes));
        }

        assert((*this_status)->data != NULL);
        memcpy((*this_status)->data + (*this_status)->writepos, ptr, nbytes);
        (*this_status)->writepos += nbytes; 
    return nbytes;
}

void *pull_data (void *my_struct){

struct thread_status *this_struct;
this_struct = (struct thread_status *) my_struct;
this_struct->initialized = false;

cout<<(char *)this_struct->url<<"\n";

CURL *curl;
curl = curl_easy_init();
size_t rc = 0;

while(true){

    curl_easy_setopt(curl,
        CURLOPT_WRITEFUNCTION, writefunction);
    curl_easy_setopt(curl,
        CURLOPT_WRITEDATA, (void *) &this_struct);
    curl_easy_setopt(curl,
        CURLOPT_NOSIGNAL, true);
    curl_easy_setopt(curl,
        CURLOPT_URL, (char *)this_struct->url);

    if (curl_easy_perform(curl) != 0){
        cout<<"curl did not perform\n";
        exit(1);
    } else { 
    if (this_struct->data != NULL){
            // Use a binary write.
            rc = fwrite(this_struct->data, this_struct->writepos, 1, stdout);
            free(this_struct->data);
        } else {
            cout<<"Data is NULL\n";
        } 
    }

    // Tell the babysitter the thread is ready.
    this_struct->readyState = READY;
// This would pause the thread until the parent thread has processed the data in it.
//  while(this_struct->readyState == READY){;}

    // Now get ready for another round!
    this_struct->writepos = (size_t) 0;
    this_struct->initialized = false;
    this_struct->bufferlen = (size_t) 0; 

    break;
}

    curl_easy_cleanup(curl);
    return (void *)"a";
}

int main(){

    char *urls[] = { "http://www.example.com/", "http://www.google.com", "http://www.touspassagers.com/", "http://www.facebook.com/" };
    int i=0;
    struct thread_status mystatuses[4];
    for (i=0;i<4;i++){

        struct thread_status my_status; 
        char *data;

        my_status.id = i;
        my_status.readyState = NOT_READY;
        my_status.url = urls[i];
        my_status.data = data;
        my_status.bufferlen = 0;
        my_status.writepos = 0;
        my_status.initialized = false;

        mystatuses[i] = my_status;
    }

    for (i=0;i<4;i++){
        cout<<"pulling #"<<i<<"\n";
        pull_data((void *)&mystatuses[i]);
    }

}

If anyone can enlighten me as to the source of my error or a remedy for it I would appreciate it.

1
  • 1
    Performance tip: Your current allocation strategy performs O(n^2) work, which will be very slow for large files and result in a lot of memory fragmentation. If you instead double the size of the buffer each time you run out of space, you will do only O(n) work, without ever wasting more than 50% of your allocated space. Commented Jan 16, 2011 at 17:21

2 Answers 2

3

You might consider using valgrind to help locate the source of the memory problems.

Sign up to request clarification or add additional context in comments.

1 Comment

I <3 valgrind. haha. YES. +1
1

Got it!

Apparently 1KB isn't enough memory to handle the first cURL buffer. I changed 1024 to nbytes and it works!

Before the memory memcpy put in the buffer ran over the allocated memory resulting in corruptions.

I did a post about it if anyone cares to see the full implementation: http://www.touspassagers.com/2011/01/a-working-curlopt_writefunction-function-for-libcurl/

1 Comment

Will do...just have to wait 2 days.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.