I have a program I'm working on. I'm new with CUDA and C,so it really has been a bumpy ride for me. I'm trying to copy a struct into the device. And then I'm trying to get the struct back to host by copying it to the device. Below is the code :
typedef struct {
int row;
int col;
float *arr;
int numElements;
} Matrix;
Matrix *RMatrix = //definition here
Matrix *d_RMatrix;
copyMatrix(d_RMatrix, RMatrix, hostToDevice);
Matrix *check = createMatrix(0, 0, NULL, 0);
copyMatrix(check, d_RMatrix, deviceToHost);
and the definition of copyMatrix :
void copyMatrix (Matrix *copyTo, Matrix *copyFrom, Copy_type type)
{
if(type == hostToDevice) {
// create temporary host matrix and array
Matrix *copyFrom_h = createMatrix(copyFrom->row, copyFrom->col, NULL, copyFrom->numElements);
// allocate device memory, pointing to array in host. Copy array to device memory
cudaMalloc((void**) ©From_h->arr, sizeof(float) * copyFrom_h->numElements);
cudaMemcpy(copyFrom_h->arr, copyFrom->arr, sizeof(float) * copyFrom_h->numElements, cudaMemcpyHostToDevice);
// copy the temporary memory to device
cudaMalloc((void**) ©To, sizeof(Matrix));
cudaMemcpy(copyTo, copyFrom_h, sizeof(Matrix), cudaMemcpyHostToDevice);
copyFrom_h = NULL;
free(copyFrom_h);
}
else if(type == deviceToHost) {
cudaMemcpy(copyTo, copyFrom, sizeof(Matrix), cudaMemcpyDeviceToHost);
// allocate space for array in the copy to matrix
copyTo->arr = makeArray(copyTo->col, copyTo->row);
cudaMemcpy(copyTo->arr, copyFrom->arr, sizeof(float) * copyTo->numElements, cudaMemcpyDeviceToHost);
}
}
The error says invalid memory access at 0x3 (value of d_RMatrix) for the 1st call to cudaMemcpy and results in segfault on the 2nd.
Is there anything I'm missing here? Thanks for your help :)