11 #define BLOCKSDIVUP(a, b) (((a)+(b)-1)/(b)) 12 #define CUDA_SAFE_CALL_NO_SYNC( call) { \ 13 cudaError err = call; \ 14 if( cudaSuccess != err) { \ 15 fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \ 16 __FILE__, __LINE__, cudaGetErrorString( err) ); \ 20 #define CUDA_SAFE_CALL( call) CUDA_SAFE_CALL_NO_SYNC(call); 27 dim3 blocks(blockx,1,1);
36 for (
int i=0; i<4; i++) {
53 int id = blockIdx.x * blockDim.x + threadIdx.x;
54 int parity = blockIdx.y * blockDim.y + threadIdx.y;
59 for (
int i = 0; i < 4; i++) x[i] += arg.
commCoord[i] * arg.
X[i];
63 curand_init(seed, idd, 0, &state[parity * size_cb +
id]);
77 dim3 nthreads(128,1,1);
81 kernel_random<<<nblocks, nthreads>>>(state, seed, size_cb,
arg);
88 size_cb(meta.VolumeCB())
91 for (
int i = 0; i < 4; i++)
X[i] = meta.
X()[i];
94 #elif defined(RG32k3a) 104 for (
int i = 0; i < 4; i++) {
112 #elif defined(RG32k3a) 135 printfQuda(
"Allocated array of random numbers with size: %.2f MB\n",
138 errorQuda(
"Array of random numbers not allocated, array size: %d !\nExiting...\n",
size);
158 if (err != cudaSuccess) {
160 errorQuda(
"Failed to restore curand rng states array\n");
169 if (err != cudaSuccess) {
171 errorQuda(
"Failed to backup curand rng states array\n");
void AllocateRNG()
local lattice dimensions
void Init()
Initialize CURAND RNG states.
struct curandStateMRG32k3a cuRNGState
QudaVerbosity getVerbosity()
__global__ void kernel_random(cuRNGState *state, unsigned long long seed, int size_cb, rngArg arg)
CUDA kernel to initialize CURAND RNG states.
void launch_kernel_random(cuRNGState *state, unsigned long long seed, int size_cb, int n_parity, int X[4])
Call CUDA kernel to initialize CURAND RNG states.
void backup()
Backup CURAND array states initialization.
#define BLOCKSDIVUP(a, b)
QudaSiteSubset siteSubset
int commCoord[QUDA_MAX_DIM]
#define qudaDeviceSynchronize()
int commDim[QUDA_MAX_DIM]
void Release()
Release Device memory for CURAND RNG states.
void restore()
Restore CURAND array states initialization.
#define CUDA_SAFE_CALL(call)
cuRNGState * backup_state
#define safe_malloc(size)
RNG(const LatticeField &meta, unsigned long long seedin)
allocate curand rng states array in device memory
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
#define device_malloc(size)
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
int size_cb
number of curand states
dim3 GetBlockDim(size_t threads, size_t size)
int X[4]
number of curand states checkerboarded (equal to size if we have a single parity) ...
__host__ __device__ int getCoords(int coord[], const Arg &arg, int &idx, int parity, int &dim)
Compute the space-time coordinates we are at.