0

I want to copy the int m_CellParticleNumber and m_aCellParticleID[CELLMAXPARTICLENUM] value from host to device.For this i use constant memory. But i am not being able to copy the value to the constant memory. can you please help me in this code for copying the value.

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>
#include <memory>
#include<iostream>



#define m_CellNum 100
#define CELLMAXPARTICLENUM 100
int numPartilces = 10;

extern void Initialize();

struct CCell
{
    int m_CellParticleNumber ;
    int m_aCellParticleID[CELLMAXPARTICLENUM];
} ;
CCell* hvalue;
CCell* dvalue;
int* dCellParticleID;

__constant__ CCell* c_value;

__global__ void Sum_constant(CCell* value, int N)
{   
    int index;    
    index = blockIdx.x * blockDim.x + threadIdx.x;
    if (index<N)

        for(int idx=0;idx<N ;++idx)
            value[index].m_aCellParticleID[idx]= value[index].m_aCellParticleID[idx]+          c_value[index].m_aCellParticleID[idx] ;
    //return;

}


int main()
{

    hvalue = new CCell[m_CellNum];

    cudaMalloc((void**)&dvalue,m_CellNum * sizeof(CCell));
    //calling function to initialize the value
    Initialize();

    //initializing the device momory
    cudaMemcpy(dvalue, hvalue, sizeof(CCell)*m_CellNum,cudaMemcpyHostToDevice);
    //copying value to constant memory

    cudaMemcpyToSymbol(c_value->m_aCellParticleID, &dvalue->m_aCellParticleID,   sizeof(int)*m_CellNum);
    //dividing bolcks and grid
    int block_size = 4;
    int n_blocks = numPartilces/block_size + (numPartilces%block_size == 0 ? 0:1);
    //invocking kernel function
    Sum_constant <<< n_blocks, block_size >>> (c_value,numPartilces);
    //copying value from host to device
    cudaMemcpy(hvalue, dvalue,numPartilces * sizeof(int),cudaMemcpyDeviceToHost);
    //showing result
    for(int i = 0; i < 2; ++i)
    {
        for(int j = 0; j < numPartilces; ++j)
        {
            std::cout<<hvalue[i].m_aCellParticleID[j]<<"\n";
        }
    }


    free(hvalue);
    cudaFree(dvalue);
    return 0;
}
void Initialize()
{

    cudaMalloc((void**)&dCellParticleID,m_CellNum * sizeof(int));
    for(int i = 0; i < numPartilces; ++i)
    {

        hvalue[i].m_CellParticleNumber = 0;
        for(int j = 0; j < numPartilces; ++j)
        {
            hvalue[i].m_aCellParticleID[j] = j+2;
        }
        hvalue[i].m_CellParticleNumber++;
    }
}

This is what i tried according to the suggestion given but still it dont works. can you please help me. #include "cuda_runtime.h" #include "device_launch_parameters.h"

 #include <stdio.h>
 #include <memory>
 #include<iostream>



 #define m_CellNum 100
 #define CELLMAXPARTICLENUM 100
 int numPartilces = 10;

 extern void Initialize();

  struct CCell
 {
int m_CellParticleNumber ;
int m_aCellParticleID[CELLMAXPARTICLENUM];
  } ;
  CCell* hvalue;
  CCell* dvalue;
  int* dCellParticleID;

  __constant__ CCell c_value[m_CellNum];

   __global__ void Sum_constant(CCell* value, int N)
 {   
int index;    
  index = blockIdx.x * blockDim.x + threadIdx.x;
if (index<N)

for(int idx=0;idx<N ;++idx)
value[index].m_aCellParticleID[idx]= value[index].m_aCellParticleID[idx]+        c_value[index].m_aCellParticleID[idx] ;
//return;

 }


 int main()
 {
int numPartilces = 10;
hvalue = new CCell[m_CellNum];

cudaMalloc((void**)&dvalue,m_CellNum * sizeof(CCell));
//calling function to initialize the value
Initialize();

//initializing the device momory
cudaMemcpy(dvalue, hvalue, sizeof(CCell)*m_CellNum,cudaMemcpyHostToDevice);
//copying value to constant memory

  cudaMemcpyToSymbol(c_value, &dvalue, sizeof(dvalue));
//dividing bolcks and grid
int block_size = 4;
int n_blocks = numPartilces/block_size + (numPartilces%block_size == 0 ? 0:1);
//invocking kernel function
Sum_constant <<< n_blocks, block_size >>> (dvalue,numPartilces);
//copying value from host to device
cudaMemcpy(hvalue, dvalue,2*m_CellNum * sizeof(int),cudaMemcpyDeviceToHost);
//showing result
    for(int i = 0; i < numPartilces; ++i)
{
    for(int j = 0; j < numPartilces; ++j)
    {
        std::cout<<hvalue[i].m_aCellParticleID[j]<<"\n";
    }
}


free(hvalue);
cudaFree(dvalue);
  return 0;
 }
 void Initialize()
 {

cudaMalloc((void**)&dCellParticleID,m_CellNum * sizeof(int));
for(int i = 0; i < numPartilces; ++i)
{

    hvalue[i].m_CellParticleNumber = 0;
    for(int j = 0; j < numPartilces; ++j)
    {
        hvalue[i].m_aCellParticleID[j] = j+2;
    }
    hvalue[i].m_CellParticleNumber++;
}
 }
8
  • In order to use constant memory explicitly, the size of constant symbol have to be specified during compile time. Therefore, you should change __constant__ CCell* c_value; to something like __constant__ CCell c_value[N]; in which N is your anticipated supremum of occupied space by symbol. Look for documents on the internet about using constant memory in CUDA. I can tell if you use constant memory for the problem above, it will hurt the performance. Commented Dec 6, 2013 at 6:50
  • thank you for your response , but i tried that also but it doesnot works Commented Dec 6, 2013 at 6:53
  • Additionla to reply of Farzad your line cudaMemcpyToSymbol(c_value->m_aCellParticleID, &dvalue->m_aCellParticleID, sizeof(int)*m_CellNum); can't work! First problem is that cudaMemcpyToSymbol copy by default from host to device, if no other copy direction is set - see cudaMemcpyToSymbol. Next misstake is that on the host you want to copy from dvalue->m_aCellParticleID, but host isn't able to resolve this pointer, because it's a device pointer. Commented Dec 6, 2013 at 7:41
  • i tried this also but it dont works cudaMemcpy(dvalue, hvalue, sizeof(CCell)*m_CellNum,cudaMemcpyHostToDevice); //copying value to constant memory cudaMemcpyToSymbol(c_value, &dvalue, sizeof(dvalue)); Commented Dec 6, 2013 at 8:00
  • 1
    There are several other misstakes or unclear things in your code! You call your kernel Sum_constant <<< n_blocks, block_size >>> (c_value,numPartilces); with c_value as input. That's not right. You have to pass dvalue as pointer. When copy back the results you only copy numPartilces * sizeof(int) bytes from dvalue to hvalue. But when output the results you want to print overall 2 * numPartilces * sizeof(int). What exactly you want to do? It seems that there several fundamental misstakes in the way you allocate your memory and how you want to use that arrays. Commented Dec 6, 2013 at 8:40

1 Answer 1

1

This works #include "cuda_runtime.h" #include "device_launch_parameters.h" #include #include #include

#define m_CellNum 100
#define CELLMAXPARTICLENUM 10
 int numPartilces = 10;
extern void Initialize();

__device__ struct CCell
{
int m_CellParticleNumber ;
int m_aCellParticleID[CELLMAXPARTICLENUM];
} ;
 CCell* hvalue;
 CCell* dvalue;
 int* dCellParticleID;

 __constant__ CCell * c_value;
// #define VALUE "c_value"
__global__ void Sum_constant(CCell* value, int N)
{   
int index;    
    index = blockIdx.x * blockDim.x + threadIdx.x;
if (index>=100)     
    return;
for(int idx=0;idx<10 ;++idx)
value[index].m_aCellParticleID[idx]= value[index].m_aCellParticleID[idx]+    c_value[index].m_aCellParticleID[idx] ;
//return;

}


int main()
{

int numPartilces = 10;
hvalue = new CCell[m_CellNum];
cudaMalloc((void**)&dvalue,m_CellNum * sizeof(CCell));
//calling function to initialize the value
Initialize();   
//initializing the device momory
cudaMemcpy(dvalue, hvalue, sizeof(CCell)*m_CellNum,cudaMemcpyHostToDevice);
//copying value to constant memory
cudaMemcpyToSymbol(c_value, &dvalue, sizeof(dvalue));
//dividing bolcks and grid
int block_size = 4;
int n_blocks = m_CellNum/block_size + (m_CellNum%block_size == 0 ? 0:1);
//invocking kernel function
Sum_constant <<< n_blocks, block_size >>> (dvalue,m_CellNum);
//copying value from host to device
cudaMemcpy(hvalue, dvalue,m_CellNum * sizeof(CCell),cudaMemcpyDeviceToHost);
//showing result
    for(int i = 0; i < 100; ++i)
{
    std::cout<< "i=" << i<<std::endl ;
    for(int j = 0; j <numPartilces ; ++j)
    {
        std::cout<<hvalue[i].m_aCellParticleID[j]<<"\n";

    }
    std::cout<<hvalue[i].m_CellParticleNumber<<"Particle Num"<<std::endl;
}
free(hvalue);
cudaFree(dvalue);
cudaFree(c_value);
  return 0;
}
void Initialize()
    {

for(int i = 0; i < m_CellNum; ++i)
{

    hvalue[i].m_CellParticleNumber = 0;
    for(int j = 0; j < numPartilces; ++j)
    {
        hvalue[i].m_aCellParticleID[j] = j+2;
        hvalue[i].m_CellParticleNumber++;
    }

}
}
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.