Declaration of shared memory inside the device

How many decalvations of shared memory are allowed inside the device kernel in CUDA?

Can we do something like this:

extern __shared__ float a[];
extern __shared__ float b[];

      

I want to have 2 arrays of different sizes. For example, in an image of 1024 * 768. I can perform parallel minification by minifying first by row and then by column. Therefore, to store intermediate values, you will need

sizeof(a)/sizeof(float) = 768
sizeof(b)/sizeof(float) = 1024


Or should you just initialize one long common delimiter 1D and add "a" and "b" ??

+3


source to share


1 answer


You can have as many shared memory declarations as you like. However, the runtime only allocates one shared memory buffer, and each shared memory array will be assigned the same address (that is, the starting address of the shared memory allocation). So, for example, this:

#include <cstdio>

extern __shared__ int a[];
extern __shared__ int b[];
extern __shared__ int c[];

__global__
void kernel(void)
{
    int * a0 = &a[0];
    int * b0 = &b[0];
    int * c0 = &c[0];

    printf("a0 = %#x \n", a0);
    printf("b0 = %#x \n", b0);
    printf("c0 = %#x \n", c0);
}

int main()
{
    kernel<<<1,1,1024>>>();
    cudaDeviceReset();

    return 0;
}

      

does the following:

$ nvcc -arch=sm_30 -run extshm.cu 
a0 = 0x1000000 
b0 = 0x1000000 
c0 = 0x1000000 

      

If you want to have two common arrays, then on any supported GPU (e.g. compute capability> = 2.0) you can do something like this:



#include <cstdio>

extern __shared__ int a[];

__global__
void kernel(void)
{
    int * a0 = &a[0];
    int * b0 = &a[1024];
    int * c0 = &a[1024+768];

    printf("a0 = %#x \n", a0);
    printf("b0 = %#x \n", b0);
    printf("c0 = %#x \n", c0);
}

int main()
{
    kernel<<<1,1,1024+768+512>>>();
    cudaDeviceReset();

    return 0;
}

      

which gives:

nvcc -arch=sm_30 -run extshm2.cu 
a0 = 0x1000000 
b0 = 0x1001000 
c0 = 0x1001c00 

      

The last thing you are looking for I think.

+5


source







All Articles