Constexpr array in CUDA device code
Could you please tell me if there is a way to use constexpr arrays in device code? According to the "Cuda C 7.0 Programming Guide", I have no problem with constant scalars, but arrays don't seem to compile. Below is an example:
template<unsigned D, unsigned Q>
class LatticeArrangement
{
} ;
template<>
class LatticeArrangement<3,19>
{
public:
static constexpr double c[19] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18 } ;
static constexpr double d = 19.0 ;
__host__ __device__
static constexpr double getC( unsigned index )
{
// FIXME: error: identifier "LatticeArrangement<(unsigned int)3u, (unsigned int)19u> ::c" is undefined in device code
return c[ index ] ;
//return d * index ; // OK, this one works
} ;
} ;
constexpr double LatticeArrangement<3,19>::c[] ;
template< class LatticeArrangement >
class FluidModelIncompressible
{
public:
__host__ __device__
static double computeSomething(double v, unsigned idx)
{
return LatticeArrangement::getC( idx ) * v ;
}
} ;
// Does nothing useful, we want only to compile this
template< class FluidModel >
__global__ void
kernel1 ( double * data )
{
data[ threadIdx.x ] = FluidModel::computeSomething( threadIdx.y, threadIdx.z ) ;
}
int main( int argc, char ** argv )
{
dim3 numBlocks ( 2 ) ;
dim3 numThreads ( 4, 4, 4 ) ;
double * vptr = NULL ;
kernel1< FluidModelIncompressible< LatticeArrangement<3,19> > >
<<< numBlocks, numThreads >>> ( vptr ) ;
return 0 ;
}
I would like to use the same code on host and device and at the same time benefit from the constexpr expression compiler optimizations. Maybe there is another way to avoid duplicate code between host and device? I currently have a lot of code in my device code.
I am using nvcc: NVIDIA (R) Cuda Compiler Driver Copyright (c) 2005-2015 NVIDIA Corporation Built on Mon_Feb_16_22: 59: 02_CST_2015 Cuda Compilation Tools Release 7.0, V7.0.27
Thank you in advance:)
+3
source to share