I am attempting to create an array of pointers on the host. Each pointer in the array points to an array of size 4. When I try to copy a pointer to the device, the copy fails and the device cannot access the contents of the array to which the pointer points to. How would I copy a pointer from an array of pointers that points to an array from host to device?
__global__ void kernel(int* D)
{
int tid = threadIdx.x + blockIdx.x * blockDim.x;
while (tid < 4)
{
printf("Device = %d\n", D[tid]);
tid += blockDim.x * gridDim.x;
}
}
int main(void)
{
cudaProfilerStart();
int* H[2];
int* D[2];
int test1[4] = { 1, 2, 3, 4 };
int test2[4] = { 10, 20, 30, 40 };
H[0] = test1;
H[1] = test2;
HANDLE_ERROR(cudaMalloc((void**)&D[0], 4 * sizeof(int)));
HANDLE_ERROR(cudaMemcpy(D[0], H[0], 4 * sizeof(int), cudaMemcpyHostToDevice));
kernel <<<1, 4 >>>(D[0]);
cudaProfilerStop();
return 0;
}
cuda-memcheckreport if you run your code with that?