I have a struct defined on my host and on my device. In the host I initialize an array of this struct with values.
MyStruct *h_s = (MyStruct *) malloc(objsize*sizeof(MyStruct));
hs[0] = ...
Mystruct *d_s;
cudaMalloc( &d_s, objsize * sizeof(MyStruct));
cudaMemcpy( d_s, h_s, objsize * sizeof(MyStruct), cudaMemcpyHostToDevice );
init<<< gridSize, blockSize >>> ( d_s );
In my kernel I have about 7 functions which should use this array. Some of them are global and some are simple device functions. For simplicity and efficiency i want to use a shared memory array.
__shared__ Mystruct *d_s;
__global__ void init(Mystruct *theStructArray){
//How to allocate memory for d_s
//How copy theStructArray to d_s
}
So the question is: How can I allocate memory for the shared array and set its values with the function parameter?
Edit: I am trying to write the smallpt code to CUDA.
struct Sphere {
double rad; // radius
Vec p, e, c; // position, emission, color
Refl_t refl; // reflection type (DIFFuse, SPECular, REFRactive)
Sphere(){
rad = 16.5;
p = (Vec(27,16.5,47) + Vec(73,16.5,78))*0.5;
e = Vec();
c = Vec(0.75, 0.75, 0.75);
refl = DIFF;
}
Sphere(double rad_, Vec p_, Vec e_, Vec c_, Refl_t refl_):
rad(rad_), p(p_), e(e_), c(c_), refl(refl_) {}
__device__ double intersect(const Ray &r) const { // returns distance, 0 if nohit
Vec op = p-r.o; // Solve t^2*d.d + 2*t*(o-p).d + (o-p).(o-p)-R^2 = 0
double t, eps=1e-4, b=op.dot(r.d), det=b*b-op.dot(op)+rad*rad;
if (det<0) return 0; else det=sqrt(det);
return (t=b-det)>eps ? t : ((t=b+det)>eps ? t : 0);
}
};
MyStruct. Does it have pointers to other data in it?__shared__memory (whether statically or dynamically allocated) has block scope. If you want to load something into shared memory, each block in each kernel you run must read the source memory and load the source contents to its shared memory. The host can't copy to shared memory.