Code: Select all
#include
#include
__global__ void kernel_A(int** s) {
__shared__ int sdata;
sdata = clock();
*s = &sdata;
printf("sdata: %p = %i\n", &sdata, sdata);
}
//somewhat surprisingly, inserting a device function with __shared__ data
//does not move kernel_B's shared data.
//Maybe this is just luck?
__device__ int& make_shared() {
__shared__ int bdata;
bdata = 1;
auto& result = bdata;
return result;
}
__global__ void kernel_B(int** ptr_sdata) {
auto& bdata = make_shared();
printf("bdata: %p = %i\n", &bdata, bdata);
__shared__ int sdata;
printf("A.sdata: %p = %i, B.sdata: %p = %i\n", *ptr_sdata, *ptr_sdata[0], &sdata, sdata);
}
int main() {
int** ptr_sdata;
cudaMalloc(&ptr_sdata, sizeof(int*));
kernel_A(ptr_sdata);
kernel_B(ptr_sdata);
cudaDeviceSynchronize();
}
sdata: 0x7d52c5000000 = -594894858
bdata: 0x7d52c5000004 = 1
A.sdata: 0x7d52c5000000 = -594894858, b.sdata: 0x7d52c5000000 = -594894858
Ich verstehe, dass Kernel_B möglicherweise nicht zugewiesen wird. Ein einzelner Block.>