I want to print d_t global 2D array variable using "printf" inside main method. But I got a compile warning saying that:
a __device__ variable "d_t" cannot be directly read in a host function
How can I copy global 2D array variable from device to host and then print the first column of each row?
__device__ double *d_t;
__device__ size_t d_gridPitch;
__global__ void kernelFunc()
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
double* rowt = (double*)((char *)d_t + i * d_gridPitch);
rowt[0] = rowt[0] + 40000;
}
int main()
{
int size = 16;
size_t d_pitchLoc;
double *d_tLoc;
cudaMallocPitch((void**)&d_tLoc, &d_pitchLoc, size * sizeof(double), size);
cudaMemset2D(d_tLoc, d_pitchLoc, 0, size * sizeof(double), size);
cudaMemcpyToSymbol(d_gridPitch, &d_pitchLoc, sizeof(int));
cudaMemcpyToSymbol(d_t, & d_tLoc, sizeof(d_tLoc));
kernelFunc<<<1,size>>>();
for(int i=0; i< size; i++){
double* rowt = (double*)((char *)d_t + i * d_gridPitch);
printf("%.0f, ",rowt[0]);
}
cudaDeviceReset();
return 0;
}
cudaMemcpy2Dfunction is used to copy to or from a pitched allocation (i.e. created withcudaMallocPitch). Here is the API documentation forcudaMemcpy2D. If you search on this CUDA tag you will find many questions and answers that demonstrate proper usage, such as this one. Use proper CUDA error checking.