QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
random.cu
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <string.h>
3 #include <iostream>
4 #include <random_quda.h>
5 #include <cuda.h>
6 #include <quda_internal.h>
7 
8 #include <comm_quda.h>
9 #include <index_helper.cuh>
10 
11 #define BLOCKSDIVUP(a, b) (((a)+(b)-1)/(b))
12 #define CUDA_SAFE_CALL_NO_SYNC( call) { \
13  cudaError err = call; \
14  if( cudaSuccess != err) { \
15  fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
16  __FILE__, __LINE__, cudaGetErrorString( err) ); \
17  exit(EXIT_FAILURE); \
18  } \
19  }
20 #define CUDA_SAFE_CALL( call) CUDA_SAFE_CALL_NO_SYNC(call);
21 
22 
23 namespace quda {
24 
25  dim3 GetBlockDim(size_t threads, size_t size) {
26  int blockx = BLOCKSDIVUP(size, threads);
27  dim3 blocks(blockx,1,1);
28  return blocks;
29  }
30 
31  struct rngArg {
35  rngArg(const int X_[]) {
36  for (int i=0; i<4; i++) {
37  commDim[i] = comm_dim(i);
38  commCoord[i] = comm_coord(i);
39  X[i] = X_[i];
40  }
41  }
42  };
43 
51  __global__ void kernel_random(cuRNGState *state, unsigned long long seed, int size_cb, rngArg arg)
52  {
53  int id = blockIdx.x * blockDim.x + threadIdx.x;
54  int parity = blockIdx.y * blockDim.y + threadIdx.y;
55  if (id < size_cb) {
56  // Each thread gets same seed, a different sequence number, no offset
57  int x[4];
58  getCoords(x, id, arg.X, parity);
59  for (int i = 0; i < 4; i++) x[i] += arg.commCoord[i] * arg.X[i];
60  int idd
61  = (((x[3] * arg.commDim[2] * arg.X[2] + x[2]) * arg.commDim[1] * arg.X[1]) + x[1]) * arg.commDim[0] * arg.X[0]
62  + x[0];
63  curand_init(seed, idd, 0, &state[parity * size_cb + id]);
64  }
65  }
66 
75  void launch_kernel_random(cuRNGState *state, unsigned long long seed, int size_cb, int n_parity, int X[4])
76  {
77  dim3 nthreads(128,1,1);
78  dim3 nblocks = GetBlockDim(nthreads.x, size_cb);
79  rngArg arg(X);
80  nblocks.y = n_parity;
81  kernel_random<<<nblocks, nthreads>>>(state, seed, size_cb, arg);
83  }
84 
85  RNG::RNG(const LatticeField &meta, unsigned long long seedin) :
86  seed(seedin),
87  size(meta.Volume()),
88  size_cb(meta.VolumeCB())
89  {
90  state = nullptr;
91  for (int i = 0; i < 4; i++) X[i] = meta.X()[i];
92 #if defined(XORWOW)
93  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Using curandStateXORWOW\n");
94 #elif defined(RG32k3a)
95  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Using curandStateMRG32k3a\n");
96 #else
97  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Using curandStateMRG32k3a\n");
98 #endif
99  }
100 
101  RNG::RNG(const LatticeFieldParam &param, unsigned long long seedin) : seed(seedin), size(1), size_cb(1)
102  {
103  state = nullptr;
104  for (int i = 0; i < 4; i++) {
105  X[i] = param.x[i];
106  size *= X[i];
107  }
108  size_cb = size / param.siteSubset;
109 
110 #if defined(XORWOW)
111  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Using curandStateXORWOW\n");
112 #elif defined(RG32k3a)
113  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Using curandStateMRG32k3a\n");
114 #else
115  if (getVerbosity() >= QUDA_VERBOSE) printfQuda("Using curandStateMRG32k3a\n");
116 #endif
117  }
118 
122  void RNG::Init() {
123  AllocateRNG();
125  }
126 
131  if (size > 0 && state == nullptr) {
132  state = (cuRNGState *)device_malloc(size * sizeof(cuRNGState));
133  CUDA_SAFE_CALL(cudaMemset(state, 0, size * sizeof(cuRNGState)));
135  printfQuda("Allocated array of random numbers with size: %.2f MB\n",
136  size * sizeof(cuRNGState) / (float)(1048576));
137  } else {
138  errorQuda("Array of random numbers not allocated, array size: %d !\nExiting...\n", size);
139  }
140  }
141 
145  void RNG::Release() {
146  if (size > 0 && state != nullptr) {
149  printfQuda("Free array of random numbers with size: %.2f MB\n", size * sizeof(cuRNGState) / (float)(1048576));
150  size = 0;
151  state = NULL;
152  }
153  }
154 
156  void RNG::restore() {
157  cudaError_t err = cudaMemcpy(state, backup_state, size * sizeof(cuRNGState), cudaMemcpyHostToDevice);
158  if (err != cudaSuccess) {
160  errorQuda("Failed to restore curand rng states array\n");
161  }
163  }
164 
166  void RNG::backup() {
168  cudaError_t err = cudaMemcpy(backup_state, state, size * sizeof(cuRNGState), cudaMemcpyDeviceToHost);
169  if (err != cudaSuccess) {
171  errorQuda("Failed to backup curand rng states array\n");
172  }
173  }
174 
175 } // namespace quda
void AllocateRNG()
local lattice dimensions
Definition: random.cu:130
void Init()
Initialize CURAND RNG states.
Definition: random.cu:122
struct curandStateMRG32k3a cuRNGState
Definition: random_quda.h:17
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
#define errorQuda(...)
Definition: util_quda.h:121
#define host_free(ptr)
Definition: malloc_quda.h:71
int comm_dim(int dim)
__global__ void kernel_random(cuRNGState *state, unsigned long long seed, int size_cb, rngArg arg)
CUDA kernel to initialize CURAND RNG states.
Definition: random.cu:51
int comm_coord(int dim)
void launch_kernel_random(cuRNGState *state, unsigned long long seed, int size_cb, int n_parity, int X[4])
Call CUDA kernel to initialize CURAND RNG states.
Definition: random.cu:75
void backup()
Backup CURAND array states initialization.
Definition: random.cu:166
rngArg(const int X_[])
Definition: random.cu:35
#define BLOCKSDIVUP(a, b)
Definition: random.cu:11
QudaSiteSubset siteSubset
Definition: lattice_field.h:71
cuRNGState * state
Definition: random_quda.h:26
int commCoord[QUDA_MAX_DIM]
Definition: random.cu:33
QudaGaugeParam param
Definition: pack_test.cpp:17
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:67
#define qudaDeviceSynchronize()
int commDim[QUDA_MAX_DIM]
Definition: random.cu:32
void Release()
Release Device memory for CURAND RNG states.
Definition: random.cu:145
void restore()
Restore CURAND array states initialization.
Definition: random.cu:156
constexpr int size
#define CUDA_SAFE_CALL(call)
Definition: random.cu:20
cuRNGState * backup_state
Definition: random_quda.h:27
#define safe_malloc(size)
Definition: malloc_quda.h:66
RNG(const LatticeField &meta, unsigned long long seedin)
allocate curand rng states array in device memory
Definition: random.cu:85
#define printfQuda(...)
Definition: util_quda.h:115
int X[QUDA_MAX_DIM]
Definition: random.cu:34
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
#define device_malloc(size)
Definition: malloc_quda.h:64
unsigned long long seed
Definition: random_quda.h:28
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
QudaParity parity
Definition: covdev_test.cpp:54
int size_cb
number of curand states
Definition: random_quda.h:30
dim3 GetBlockDim(size_t threads, size_t size)
Definition: random.cu:25
int X[4]
number of curand states checkerboarded (equal to size if we have a single parity) ...
Definition: random_quda.h:31
__host__ __device__ int getCoords(int coord[], const Arg &arg, int &idx, int parity, int &dim)
Compute the space-time coordinates we are at.
const int * X() const
#define device_free(ptr)
Definition: malloc_quda.h:69