QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
spinor_noise.cu
Go to the documentation of this file.
1 /*
2  Spinor reordering and copying routines. These are implemented to
3  un on both CPU and GPU. Here we are templating on the following:
4  - input precision
5  - output precision
6  - number of colors
7  - number of spins
8  - field ordering
9 */
10 
11 #include <color_spinor_field.h>
13 #include <tune_quda.h>
14 #include <utility> // for std::swap
15 #include <random_quda.h>
16 
17 namespace quda {
18 
19  using namespace colorspinor;
20 
21  template<typename real, int Ns, int Nc, QudaFieldOrder order>
22  struct Arg {
24  V v;
25  const int nParity;
26  const int volumeCB;
28  Arg(ColorSpinorField &v, RNG &rng) : v(v), nParity(v.SiteSubset()), volumeCB(v.VolumeCB()), rng(rng) { }
29  };
30 
31  template<typename real, typename Arg> // Gauss
32  __device__ __host__ inline void genGauss(Arg &arg, cuRNGState& localState, int parity, int x_cb, int s, int c) {
33  real phi = 2.0*M_PI*Random<real>(localState);
34  real radius = Random<real>(localState);
35  radius = sqrt(-1.0 * log(radius));
36  arg.v(parity, x_cb, s, c) = complex<real>(radius*cos(phi),radius*sin(phi));
37  }
38 
39  template<typename real, typename Arg> // Uniform
40  __device__ __host__ inline void genUniform(Arg &arg, cuRNGState& localState, int parity, int x_cb, int s, int c) {
41  real x = Random<real>(localState);
42  real y = Random<real>(localState);
43  arg.v(parity, x_cb, s, c) = complex<real>(x, y);
44  }
45 
47  template <typename real, int Ns, int Nc, QudaNoiseType type, typename Arg> void SpinorNoiseCPU(Arg &arg)
48  {
49 
50  for (int parity = 0; parity < arg.nParity; parity++) {
51  for (int x_cb = 0; x_cb < arg.volumeCB; x_cb++) {
52  for (int s = 0; s < Ns; s++) {
53  for (int c = 0; c < Nc; c++) {
54  cuRNGState localState = arg.rng.State()[parity * arg.volumeCB + x_cb];
55  if (type == QUDA_NOISE_GAUSS)
56  genGauss<real>(arg, localState, parity, x_cb, s, c);
57  else if (type == QUDA_NOISE_UNIFORM)
58  genUniform<real>(arg, localState, parity, x_cb, s, c);
59  arg.rng.State()[parity * arg.volumeCB + x_cb] = localState;
60  }
61  }
62  }
63  }
64  }
65 
67  template <typename real, int Ns, int Nc, QudaNoiseType type, typename Arg>
68  __global__ void SpinorNoiseGPU(Arg arg) {
69 
70  int x_cb = blockIdx.x * blockDim.x + threadIdx.x;
71  if (x_cb >= arg.volumeCB) return;
72 
73  int parity = blockIdx.y * blockDim.y + threadIdx.y;
74  if (parity >= arg.nParity) return;
75 
76  cuRNGState localState = arg.rng.State()[parity * arg.volumeCB + x_cb];
77  for (int s=0; s<Ns; s++) {
78  for (int c=0; c<Nc; c++) {
79  if (type == QUDA_NOISE_GAUSS) genGauss<real>(arg, localState, parity, x_cb, s, c);
80  else if (type == QUDA_NOISE_UNIFORM) genUniform<real>(arg, localState, parity, x_cb, s, c);
81  }
82  }
83  arg.rng.State()[parity * arg.volumeCB + x_cb] = localState;
84  }
85 
86  template <typename real, int Ns, int Nc, QudaNoiseType type, typename Arg>
88  Arg &arg;
89  const ColorSpinorField &meta; // this reference is for meta data only
90 
91  private:
92  unsigned int sharedBytesPerThread() const { return 0; }
93  unsigned int sharedBytesPerBlock(const TuneParam &param) const { return 0; }
94  bool tuneGridDim() const { return false; } // Don't tune the grid dimensions.
95  unsigned int minThreads() const { return meta.VolumeCB(); }
96 
97  public:
98  SpinorNoise(Arg &arg, const ColorSpinorField &meta)
99  : TunableVectorY(meta.SiteSubset()), arg(arg), meta(meta) {
100  strcpy(aux, meta.AuxString());
101  strcat(aux, meta.Location()==QUDA_CUDA_FIELD_LOCATION ? ",GPU" : ",CPU");
102  }
103 
104  void apply(const cudaStream_t &stream) {
105  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
106  SpinorNoiseGPU<real, Ns, Nc, type><<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg);
107  }
108 
110  if (meta.Location() == QUDA_CUDA_FIELD_LOCATION) return Tunable::advanceTuneParam(param);
111  else return false;
112  }
113 
114  TuneKey tuneKey() const { return TuneKey(meta.VolString(), typeid(*this).name(), aux); }
115  long long flops() const { return 0; }
116  long long bytes() const { return meta.Bytes(); }
117  void preTune() { arg.rng.backup(); }
118  void postTune(){ arg.rng.restore(); }
119  };
120 
121  template <typename real, int Ns, int Nc, QudaFieldOrder order>
122  void spinorNoise(ColorSpinorField &in, RNG &rngstate, QudaNoiseType type) {
123  Arg<real, Ns, Nc, order> arg(in, rngstate);
124  switch (type) {
125  case QUDA_NOISE_GAUSS:
126  {
128  noise.apply(0);
129  break;
130  }
131  case QUDA_NOISE_UNIFORM:
132  {
134  noise.apply(0);
135  break;
136  }
137  default:
138  errorQuda("Noise type %d not implemented", type);
139  }
140  }
141 
143  template <typename real, int Ns, int Nc>
144  void spinorNoise(ColorSpinorField &in, RNG &rngstate, QudaNoiseType type)
145  {
146  if (in.FieldOrder() == QUDA_FLOAT2_FIELD_ORDER) {
147  spinorNoise<real,Ns,Nc,QUDA_FLOAT2_FIELD_ORDER>(in, rngstate, type);
148  } else if (in.FieldOrder() == QUDA_FLOAT4_FIELD_ORDER) {
149  spinorNoise<real,Ns,Nc,QUDA_FLOAT4_FIELD_ORDER>(in, rngstate, type);
150  } else {
151  errorQuda("Order %d not defined (Ns=%d, Nc=%d)", in.FieldOrder(), Ns, Nc);
152  }
153  }
154 
155  template <typename real, int Ns>
156  void spinorNoise(ColorSpinorField &src, RNG& randstates, QudaNoiseType type)
157  {
158  if (src.Ncolor() == 3) {
159  spinorNoise<real,Ns,3>(src, randstates, type);
160  } else if (src.Ncolor() == 6) {
161  spinorNoise<real,Ns,6>(src, randstates, type);
162  } else if (src.Ncolor() == 24) {
163  spinorNoise<real,Ns,24>(src, randstates, type);
164  } else if (src.Ncolor() == 32) {
165  spinorNoise<real,Ns,32>(src, randstates, type);
166  } else {
167  errorQuda("nColor = %d not implemented", src.Ncolor());
168  }
169  }
170 
171  template <typename real>
172  void spinorNoise(ColorSpinorField &src, RNG& randstates, QudaNoiseType type)
173  {
174  if (src.Nspin() == 4) {
175  spinorNoise<real,4>(src, randstates, type);
176  } else if (src.Nspin() == 2) {
177  spinorNoise<real,2>(src, randstates, type);
178  } else if (src.Nspin() == 1) {
179  spinorNoise<real,1>(src, randstates, type);
180  } else {
181  errorQuda("Nspin = %d not implemented", src.Nspin());
182  }
183  }
184 
185  void spinorNoise(ColorSpinorField &src_, RNG &randstates, QudaNoiseType type)
186  {
187  // if src is a CPU field then create GPU field
188  ColorSpinorField *src = &src_;
190  ColorSpinorParam param(src_);
192  param.setPrecision(prec, prec, true); // change to native field order
195  src = ColorSpinorField::Create(param);
196  }
197 
198  switch (src->Precision()) {
199  case QUDA_DOUBLE_PRECISION: spinorNoise<double>(*src, randstates, type); break;
200  case QUDA_SINGLE_PRECISION: spinorNoise<float>(*src, randstates, type); break;
201  default: errorQuda("Precision %d not implemented", src->Precision());
202  }
203 
204  if (src != &src_) {
205  src_ = *src; // upload result
206  delete src;
207  }
208  }
209 
210  void spinorNoise(ColorSpinorField &src, unsigned long long seed, QudaNoiseType type)
211  {
212  RNG *randstates = new RNG(src, seed);
213  randstates->Init();
214  spinorNoise(src, *randstates, type);
215  randstates->Release();
216  delete randstates;
217  }
218 
219 } // namespace quda
void Init()
Initialize CURAND RNG states.
Definition: random.cu:122
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
struct curandStateMRG32k3a cuRNGState
Definition: random_quda.h:17
enum QudaPrecision_s QudaPrecision
const char * AuxString() const
void apply(const cudaStream_t &stream)
enum QudaNoiseType_s QudaNoiseType
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
#define errorQuda(...)
Definition: util_quda.h:121
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120
cudaStream_t * stream
Arg(ColorSpinorField &v, RNG &rng)
Definition: spinor_noise.cu:28
static ColorSpinorField * Create(const ColorSpinorParam &param)
void backup()
Backup CURAND array states initialization.
Definition: random.cu:166
const char * VolString() const
SpinorNoise(Arg &arg, const ColorSpinorField &meta)
Definition: spinor_noise.cu:98
__device__ __host__ void genUniform(Arg &arg, cuRNGState &localState, int parity, int x_cb, int s, int c)
Definition: spinor_noise.cu:40
TuneKey tuneKey() const
QudaGaugeParam param
Definition: pack_test.cpp:17
void genGauss(GaugeField &U, RNG &rngstate, double sigma)
QudaFieldLocation location
__host__ __device__ ValueType sin(ValueType x)
Definition: complex_quda.h:51
void Release()
Release Device memory for CURAND RNG states.
Definition: random.cu:145
void restore()
Restore CURAND array states initialization.
Definition: random.cu:156
cpuColorSpinorField * in
Class declaration to initialize and hold CURAND RNG states.
Definition: random_quda.h:23
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Definition: tune.cpp:643
bool tuneGridDim() const
Definition: spinor_noise.cu:94
void SpinorNoiseCPU(Arg &arg)
Definition: spinor_noise.cu:47
unsigned int sharedBytesPerThread() const
Definition: spinor_noise.cu:92
QudaFieldLocation Location() const
__host__ __device__ ValueType log(ValueType x)
Definition: complex_quda.h:101
const ColorSpinorField & meta
Definition: spinor_noise.cu:89
const int nParity
Definition: spinor_noise.cu:25
unsigned int sharedBytesPerBlock(const TuneParam &param) const
Definition: spinor_noise.cu:93
__global__ void SpinorNoiseGPU(Arg arg)
Definition: spinor_noise.cu:68
__shared__ float s[]
long long flops() const
bool advanceTuneParam(TuneParam &param) const
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
colorspinor::FieldOrderCB< real, Ns, Nc, 1, order > V
Definition: spinor_noise.cu:23
const int volumeCB
Definition: spinor_noise.cu:26
__host__ __device__ __inline__ cuRNGState * State()
Definition: random_quda.h:57
__host__ __device__ ValueType cos(ValueType x)
Definition: complex_quda.h:46
long long bytes() const
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
Definition: util_quda.cpp:52
QudaPrecision Precision() const
unsigned int minThreads() const
Definition: spinor_noise.cu:95
QudaParity parity
Definition: covdev_test.cpp:54
QudaPrecision prec
Definition: test_util.cpp:1608
QudaFieldOrder FieldOrder() const
void spinorNoise(ColorSpinorField &src, RNG &randstates, QudaNoiseType type)
Generate a random noise spinor. This variant allows the user to manage the RNG state.
virtual bool advanceTuneParam(TuneParam &param) const
Definition: tune_quda.h:335