Passing host function as function in __global__ OR __device__ function in CUDA

I am currently developing a GPU version of a CPU function (like the Calc (int a, int b, double * c, souble * d, CalcInvFunction GetInv) function) where the host function is passed as a function pointer (like in the above example GetInv is the main function of type CalcInvFunction). My question is, if I need to completely execute the Calc () function in the GPU, I have to pass the GetInv function as a function pointer argument to the kernel function / function, and is that possible?

0


source to share


2 answers


Yes, to implement a GPU, Calc

you must pass GetInv

as a function pointer __device__

.

Maybe here are some examples:

Example. 1



Example. 2

Example. 3

Most of the examples above show that the device function pointer falls back to the main code. This may not be necessary for your particular case. But from above it should be pretty obvious how to grab the function pointer __device__

(in the device code) and use it in the kernel.

+1


source


Finally, I was able to pass the host function as a function pointer in the cuda core function (__global__ function). Thanks to Robert Crovella and Newffe for the answer. I was able to pass a class member function (cpu function) as a function pointer to cuda core. But the main problem is that I can only pass the static member function of the class. I cannot pass a function that is not declared as static. For example:

/**/ __host__ __device__ static int CellfunPtr( void*ptr, int a ); /**/

The above function works because this member function is declared as a static member function. If I do not declare this member function as a static member,   /**/ __host__ __device__ int CellfunPtr( void*ptr, int a ); /**/

then it doesn't work.

The complete code has four files.


  • First file

/*start of fundef.h file*/

typedef int (*pFunc_t)(void* ptr, int N);

/*end of fundef.h file*/


  1. Second file

/*start of solver.h file*/



    class CalcVars {

       int eqnCount;
       int numCell;                      
       int numTri;
       int numTet;

    public:
       double* cellVel; 
       double* cellPre;

    /** Constructor */

    CalcVars(
        const int eqnCount_,             
        const int numCell_,          
        const int numTri_,             
        const int numTet_                
    );

    /** Destructor */

    ~CalcVars(void);

    public:

      void 
          CalcAdv();


      __host__ __device__ 
      static int 
          CellfunPtr(
          void*ptr, int a
    );

    };

      

/*end of solver.h file*/


  1. Third file

/*start of solver.cu file*/

     #include "solver.h"
     __device__ pFunc_t pF1_d = CalcVars::CellfunPtr;

    pFunc_t pF1_h ;


    __global__ void kernel(int*a, pFunc_t func, void* thisPtr_){
        int tid = threadIdx.x;
        a[tid] = (*func)(thisPtr_, a[tid]); 
    };

    /* Constructor */

    CalcVars::CalcVars(
        const int eqnCount_,             
        const int numCell_,          
        const int numTri_,             
        const int numTet_   

    )
    {
        this->eqnCount = eqnCount_;
        this->numCell = numCell_;
        this->numTri = numTri_;

        this->cellVel = (double*) calloc((size_t) eqnCount, sizeof(double)); 
        this->cellPre = (double*) calloc((size_t) eqnCount, sizeof(double)); 

    }

    /* Destructor */

    CalcVars::~CalcVars(void)
    {
       free(this->cellVel);
       free(this->cellPre);

    }


    void 
    CalcVars::CalcAdv(
    ){

        /*int b1 = 0;

        b1 = CellfunPtr(this, 1);*/

       int Num = 50;
       int *a1, *a1_dev;

        a1 = (int *)malloc(Num*sizeof(int));

        cudaMalloc((void**)&a1_dev, Num*sizeof(int));

        for(int i = 0; i <Num; i++){
            a1[i] = i;
        }

        cudaMemcpy(a1_dev, a1, Num*sizeof(int), cudaMemcpyHostToDevice);

        //copy addresses of device functions to host 
        cudaMemcpyFromSymbol(&pF1_h, pF1_d, sizeof(pFunc_t));


        kernel<<<1,42>>>(a1_dev, pF1_h, this);

        cudaDeviceSynchronize();

        cudaMemcpy(a1, a1_dev, Num*sizeof(int), cudaMemcpyDeviceToHost);


    };


    int 
    CalcVars::CellfunPtr(
        void* ptr, int a
    ){
        //CalcVars* ClsPtr = (CalcVars*)ptr;
        printf("Printing from CPU function\n");
        //int eqn_size = ClsPtr->eqnCount;
        //printf("The number is %d",eqn_size);
        return a-1;

    };

      

/*end of solver.cu file*/


  1. The fourth file

/*start of main.cpp file*/

    #include "solver.h"


    int main(){

        int n_Eqn, n_cell, n_tri, n_tetra;
        n_Eqn = 100;
        n_cell = 200;
        n_tri = 300;
        n_tetra = 400;

       CalcVars* calcvars;

       calcvars = new CalcVars(n_Eqn, n_cell, n_tri, n_tetra );

       calcvars->CalcAdv();

       system("pause");

    }

      

/*end of main.cpp file*/

+1


source







All Articles