Using an array of device function pointers

Question

I need a device version of the following host code:

double (**func)(double x);

double func1(double x)
{
 return x+1.;
}

double func2(double x)
{
 return x+2.;
}

double func3(double x)
{
 return x+3.;
}

void test(void)
{
 double x;

 for(int i=0;i<3;++i){
  x=func[i](2.0);
  printf("%g\n",x);
 }

}

int main(void)
{
 func=(double (**)(double))malloc(10*sizeof(double (*)(double)));
 
 test();

 return 0;
}

where func1, func2, func3 have to be __device__ functions and test has to be a (suitably modified) __global__ kernel.

I have a NVIDIA GeForce GTS 450 (compute capability 2.1)

A working solution

#define REAL double

typedef REAL (*func)(REAL x);

__host__ __device__ REAL func1(REAL x)
{
    return x+1.0f;
}

__host__ __device__ REAL func2(REAL x)
{
    return x+2.0f;
}

__host__ __device__ REAL func3(REAL x)
{
    return x+3.0f;
}

__device__ func func_list_d[3];
func func_list_h[3];

__global__ void assign_kernel(void)
{
    func_list_d[0]=func1;
    func_list_d[1]=func2;
    func_list_d[2]=func3;
}

void assign(void)
{
    func_list_h[0]=func1;
    func_list_h[1]=func2;
    func_list_h[2]=func3;
}


__global__ void test_kernel(void)
{
    REAL x;
    for(int i=0;i<3;++i){
        x=func_list_d[i](2.0);
        printf("%g\n",x);
  }
}

void test(void)
{
    REAL x;
    printf("=============\n");
    for(int i=0;i<3;++i){
        x=func_list_h[i](2.0);
        printf("%g\n",x);
  }
}

int main(void)
{
    assign_kernel<<<1,1>>>();
    test_kernel<<<1,1>>>();
    cudaThreadSynchronize();

    assign();
    test();

    return 0;
}

There is a function pointer sample which ships in the CUDA SDK, and you can see an example which is very similar to your question in this post on the CUDA developer forums. — talonmies
– talonmies, Commented Jan 25, 2012 at 10:17

brano · Accepted Answer · 2015-12-01 08:24:24Z

25

function pointers are allowed on Fermi. This is how you could do it:

typedef double (*func)(double x);

__device__ double func1(double x)
{
return x+1.0f;
}

__device__ double func2(double x)
{
return x+2.0f;
}

__device__ double func3(double x)
{
return x+3.0f;
}

__device__ func pfunc1 = func1;
__device__ func pfunc2 = func2;
__device__ func pfunc3 = func3;

__global__ void test_kernel(func* f, int n)
{
  double x;

  for(int i=0;i<n;++i){
   x=f[i](2.0);
   printf("%g\n",x);
  }
}

int main(void)
{
  int N = 5;
  func* h_f;
  func* d_f;
  h_f = (func*)malloc(N*sizeof(func));
  cudaMalloc((void**)&d_f,N*sizeof(func));

  cudaMemcpyFromSymbol( &h_f[0], pfunc1, sizeof(func));
  cudaMemcpyFromSymbol( &h_f[1], pfunc1, sizeof(func));
  cudaMemcpyFromSymbol( &h_f[2], pfunc2, sizeof(func));
  cudaMemcpyFromSymbol( &h_f[3], pfunc3, sizeof(func));
  cudaMemcpyFromSymbol( &h_f[4], pfunc3, sizeof(func));

  cudaMemcpy(d_f,h_f,N*sizeof(func),cudaMemcpyHostToDevice);

  test_kernel<<<1,1>>>(d_f,N);

  cudaFree(d_f);
  free(h_f);

  return 0;
}

edited Dec 1, 2015 at 8:24

answered Jan 25, 2012 at 10:53

brano

2,89421 silver badges15 bronze badges

Sign up to request clarification or add additional context in comments.

10 Comments

micheletuttafesta Over a year ago

Thank you very very much!! Your answer has been very useful for me. Is it possible to dynamically allocate the array func_list ?

brano Over a year ago

I have edited the code to illustrated how you could use dynamic allocation.

micheletuttafesta Over a year ago

brano I am infinitely grateful to you for your help!! However I found this working solution... is it correct? I have to do the assignment of "func_list_d" in a kernel

brano Over a year ago

The above example works. If you want to assign d_f inside a kernel than you can do so. Just remove all cudaMemcpyFromSymbol and launch a kernel instead that writes to d_f and uses pfunc1,pfunc2,pfunc3.

micheletuttafesta Over a year ago

OK brano! Your code will be a treasure for me in my future work

|

Collectives™ on Stack Overflow

Using an array of device function pointers

1 Answer 1

10 Comments

Your Answer

Linked

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

10 Comments

Your Answer

Sign up or log in

Post as a guest

Linked

Related