15 #define BLOCKSDIVUP(a, b) (((a)+(b)-1)/(b)) 20 dim3 blocks(blockx,1,1);
27 # define CUDA_SAFE_CALL_NO_SYNC( call) { \ 28 cudaError err = call; \ 29 if( cudaSuccess != err) { \ 30 fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \ 31 __FILE__, __LINE__, cudaGetErrorString( err) ); \ 35 # define CUDA_SAFE_CALL( call) CUDA_SAFE_CALL_NO_SYNC(call); 46 int id = blockIdx.x *
blockDim.x + threadIdx.x;
49 curand_init(seed,
id + node_offset, 0, &state[
id]);
62 int id = blockIdx.x *
blockDim.x + threadIdx.x;
66 curand_init(seed,
id + node_offset, 0, &state[
id]);
72 int idd = ((((
x[3] *
arg.comm_dim[2] *
arg.X[2] +
x[2]) *
arg.comm_dim[1] *
arg.X[1]) +
x[1] ) *
arg.comm_dim[0] *
arg.X[0] +
x[0]) >> 1 ;
73 curand_init(seed, idd, 0, &state[
id]);
86 dim3 nthreads(128,1,1);
90 kernel_random<<<nblocks,nthreads>>>(state, seed, rng_size, node_offset);
93 for(
int i=0;
i < 4;
i++){
98 kernel_random<<<nblocks,nthreads>>>(state, seed, rng_size, 0,
arg);
109 for(
int i=0;
i<4;
i++)
X[
i]=0;
114 #elif defined(RG32k3a) 122 RNG::RNG(
int rng_sizes,
int seedin,
const int XX[4]){
128 for(
int i=0;
i<4;
i++)
X[
i]=XX[
i];
133 #elif defined(RG32k3a) 162 errorQuda(
"Array of random numbers not allocated, array size: %d !\nExiting...\n",
rng_size);
181 if (
err != cudaSuccess) {
183 printfQuda(
"ERROR: Failed to restore curand rng states array\n");
192 if (
err != cudaSuccess) {
194 printfQuda(
"ERROR: Failed to backup curand rng states array\n");
void AllocateRNG()
allocate curand rng states array in device memory
void Init()
Initialize CURAND RNG states.
__global__ void kernel_random(cuRNGState *state, int seed, int rng_size, int node_offset)
CUDA kernel to initialize CURAND RNG states.
struct curandStateMRG32k3a cuRNGState
RNG(int rng_sizes, int seedin, const int XX[4])
void backup()
Backup CURAND array states initialization.
#define BLOCKSDIVUP(a, b)
void Release()
Release Device memory for CURAND RNG states.
void restore()
Restore CURAND array states initialization.
int rng_size
number of curand states
void launch_kernel_random(cuRNGState *state, int seed, int rng_size, int node_offset, int X[4])
Call CUDA kernel to initialize CURAND RNG states.
#define CUDA_SAFE_CALL(call)
cuRNGState * backup_state
cudaError_t qudaDeviceSynchronize()
Wrapper around cudaDeviceSynchronize or cuDeviceSynchronize.
#define safe_malloc(size)
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
#define device_malloc(size)
int node_offset
offset in the index, in case of multigpus
dim3 GetBlockDim(size_t threads, size_t size)
static __device__ __host__ void getCoords(int x[], int cb_index, const I X[], int parity)