I'm working on an application in C++ that threads and hands a bunch of threads URLs for cURL to download in parallel.
I'm employing a method that should be safe to download images and videos, etc. I uses memcpy instead of assuming the data is a string or character array.
I pass each thread a structure, thread_status, for a number of things. The structure lets the parent process know the thread is done downloading. It also stores the data cURL is downloading and keeps track of it's size as cURL returns more buffers for writing.
I pass a (void *) pointer that points to each structure that's allocated at initialization to each thread that does the downloading. The first page is downloaded properly, after that I keep getting errors from realloc().
Here is the simplest example that illustrates my problem. This sample is not multi-threaded but uses a similar structure to keep track of itself.
#include <string>
#include <assert.h>
#include <iostream>
#include <curl/curl.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define NOT_READY 1
#define READY 0
using namespace std;
struct thread_status {
int id;
pthread_t *pid;
int readyState;
char *url;
void *data;
size_t bufferlen;
size_t writepos;
int initialized;
} ;
size_t static
writefunction( void *ptr, size_t size,
size_t nmemb, void *userdata)
{
size_t nbytes = size*nmemb;
struct thread_status **this_status;
this_status = (struct thread_status **) userdata;
if (!(*this_status)->initialized){
(*this_status)->data = (void *)malloc(1024);
(*this_status)->bufferlen = 1024;
(*this_status)->writepos = 0;
(*this_status)->initialized = true;
}
if ((*this_status)->bufferlen < ((*this_status)->writepos + nbytes)){
(*this_status)->bufferlen = (*this_status)->bufferlen + nbytes;
(*this_status)->data = realloc((*this_status)->data, (size_t) ((*this_status)->writepos + nbytes));
}
assert((*this_status)->data != NULL);
memcpy((*this_status)->data + (*this_status)->writepos, ptr, nbytes);
(*this_status)->writepos += nbytes;
return nbytes;
}
void *pull_data (void *my_struct){
struct thread_status *this_struct;
this_struct = (struct thread_status *) my_struct;
this_struct->initialized = false;
cout<<(char *)this_struct->url<<"\n";
CURL *curl;
curl = curl_easy_init();
size_t rc = 0;
while(true){
curl_easy_setopt(curl,
CURLOPT_WRITEFUNCTION, writefunction);
curl_easy_setopt(curl,
CURLOPT_WRITEDATA, (void *) &this_struct);
curl_easy_setopt(curl,
CURLOPT_NOSIGNAL, true);
curl_easy_setopt(curl,
CURLOPT_URL, (char *)this_struct->url);
if (curl_easy_perform(curl) != 0){
cout<<"curl did not perform\n";
exit(1);
} else {
if (this_struct->data != NULL){
// Use a binary write.
rc = fwrite(this_struct->data, this_struct->writepos, 1, stdout);
free(this_struct->data);
} else {
cout<<"Data is NULL\n";
}
}
// Tell the babysitter the thread is ready.
this_struct->readyState = READY;
// This would pause the thread until the parent thread has processed the data in it.
// while(this_struct->readyState == READY){;}
// Now get ready for another round!
this_struct->writepos = (size_t) 0;
this_struct->initialized = false;
this_struct->bufferlen = (size_t) 0;
break;
}
curl_easy_cleanup(curl);
return (void *)"a";
}
int main(){
char *urls[] = { "http://www.example.com/", "http://www.google.com", "http://www.touspassagers.com/", "http://www.facebook.com/" };
int i=0;
struct thread_status mystatuses[4];
for (i=0;i<4;i++){
struct thread_status my_status;
char *data;
my_status.id = i;
my_status.readyState = NOT_READY;
my_status.url = urls[i];
my_status.data = data;
my_status.bufferlen = 0;
my_status.writepos = 0;
my_status.initialized = false;
mystatuses[i] = my_status;
}
for (i=0;i<4;i++){
cout<<"pulling #"<<i<<"\n";
pull_data((void *)&mystatuses[i]);
}
}
If anyone can enlighten me as to the source of my error or a remedy for it I would appreciate it.