QUDA  v1.1.0
A library for QCD on GPUs
spinor_noise.cu
Go to the documentation of this file.
1 /*
2  Spinor reordering and copying routines. These are implemented to
3  un on both CPU and GPU. Here we are templating on the following:
4  - input precision
5  - output precision
6  - number of colors
7  - number of spins
8  - field ordering
9 */
10 
11 #include <color_spinor_field.h>
12 #include <color_spinor_field_order.h>
13 #include <tune_quda.h>
14 #include <utility> // for std::swap
15 #include <random_quda.h>
16 
17 namespace quda {
18 
19  using namespace colorspinor;
20 
21  template<typename real, int Ns, int Nc, QudaFieldOrder order>
22  struct Arg {
23  typedef typename colorspinor::FieldOrderCB<real,Ns,Nc,1,order> V;
24  V v;
25  const int nParity;
26  const int volumeCB;
27  RNG rng;
28  Arg(ColorSpinorField &v, RNG &rng) : v(v), nParity(v.SiteSubset()), volumeCB(v.VolumeCB()), rng(rng) { }
29  };
30 
31  template<typename real, typename Arg> // Gauss
32  __device__ __host__ inline void genGauss(Arg &arg, cuRNGState& localState, int parity, int x_cb, int s, int c) {
33  real phi = 2.0*M_PI*Random<real>(localState);
34  real radius = Random<real>(localState);
35  radius = sqrt(-1.0 * log(radius));
36  arg.v(parity, x_cb, s, c) = complex<real>(radius*cos(phi),radius*sin(phi));
37  }
38 
39  template<typename real, typename Arg> // Uniform
40  __device__ __host__ inline void genUniform(Arg &arg, cuRNGState& localState, int parity, int x_cb, int s, int c) {
41  real x = Random<real>(localState);
42  real y = Random<real>(localState);
43  arg.v(parity, x_cb, s, c) = complex<real>(x, y);
44  }
45 
46  /** CPU function to reorder spinor fields. */
47  template <typename real, int Ns, int Nc, QudaNoiseType type, typename Arg> void SpinorNoiseCPU(Arg &arg)
48  {
49 
50  for (int parity = 0; parity < arg.nParity; parity++) {
51  for (int x_cb = 0; x_cb < arg.volumeCB; x_cb++) {
52  for (int s = 0; s < Ns; s++) {
53  for (int c = 0; c < Nc; c++) {
54  cuRNGState localState = arg.rng.State()[parity * arg.volumeCB + x_cb];
55  if (type == QUDA_NOISE_GAUSS)
56  genGauss<real>(arg, localState, parity, x_cb, s, c);
57  else if (type == QUDA_NOISE_UNIFORM)
58  genUniform<real>(arg, localState, parity, x_cb, s, c);
59  arg.rng.State()[parity * arg.volumeCB + x_cb] = localState;
60  }
61  }
62  }
63  }
64  }
65 
66  /** CUDA kernel to reorder spinor fields. Adopts a similar form as the CPU version, using the same inlined functions. */
67  template <typename real, int Ns, int Nc, QudaNoiseType type, typename Arg>
68  __global__ void SpinorNoiseGPU(Arg arg) {
69 
70  int x_cb = blockIdx.x * blockDim.x + threadIdx.x;
71  if (x_cb >= arg.volumeCB) return;
72 
73  int parity = blockIdx.y * blockDim.y + threadIdx.y;
74  if (parity >= arg.nParity) return;
75 
76  cuRNGState localState = arg.rng.State()[parity * arg.volumeCB + x_cb];
77  for (int s=0; s<Ns; s++) {
78  for (int c=0; c<Nc; c++) {
79  if (type == QUDA_NOISE_GAUSS) genGauss<real>(arg, localState, parity, x_cb, s, c);
80  else if (type == QUDA_NOISE_UNIFORM) genUniform<real>(arg, localState, parity, x_cb, s, c);
81  }
82  }
83  arg.rng.State()[parity * arg.volumeCB + x_cb] = localState;
84  }
85 
86  template <typename real, int Ns, int Nc, QudaNoiseType type, typename Arg>
87  class SpinorNoise : TunableVectorY {
88  Arg &arg;
89  const ColorSpinorField &meta; // this reference is for meta data only
90 
91  private:
92  unsigned int sharedBytesPerThread() const { return 0; }
93  unsigned int sharedBytesPerBlock(const TuneParam &param) const { return 0; }
94  bool tuneGridDim() const { return false; } // Don't tune the grid dimensions.
95  unsigned int minThreads() const { return meta.VolumeCB(); }
96 
97  public:
98  SpinorNoise(Arg &arg, const ColorSpinorField &meta)
99  : TunableVectorY(meta.SiteSubset()), arg(arg), meta(meta) {
100  strcpy(aux, meta.AuxString());
101  strcat(aux, meta.Location()==QUDA_CUDA_FIELD_LOCATION ? ",GPU" : ",CPU");
102  }
103 
104  void apply(const qudaStream_t &stream) {
105  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
106  qudaLaunchKernel(SpinorNoiseGPU<real, Ns, Nc, type, Arg>, tp, stream, arg);
107  }
108 
109  bool advanceTuneParam(TuneParam &param) const {
110  if (meta.Location() == QUDA_CUDA_FIELD_LOCATION) return Tunable::advanceTuneParam(param);
111  else return false;
112  }
113 
114  TuneKey tuneKey() const { return TuneKey(meta.VolString(), typeid(*this).name(), aux); }
115  long long flops() const { return 0; }
116  long long bytes() const { return meta.Bytes(); }
117  void preTune() { arg.rng.backup(); }
118  void postTune(){ arg.rng.restore(); }
119  };
120 
121  template <typename real, int Ns, int Nc, QudaFieldOrder order>
122  void spinorNoise(ColorSpinorField &in, RNG &rngstate, QudaNoiseType type) {
123  Arg<real, Ns, Nc, order> arg(in, rngstate);
124  switch (type) {
125  case QUDA_NOISE_GAUSS:
126  {
127  SpinorNoise<real, Ns, Nc, QUDA_NOISE_GAUSS, Arg<real, Ns, Nc, order> > noise(arg, in);
128  noise.apply(0);
129  break;
130  }
131  case QUDA_NOISE_UNIFORM:
132  {
133  SpinorNoise<real, Ns, Nc, QUDA_NOISE_UNIFORM, Arg<real, Ns, Nc, order> > noise(arg, in);
134  noise.apply(0);
135  break;
136  }
137  default:
138  errorQuda("Noise type %d not implemented", type);
139  }
140  }
141 
142  /** Decide on the input order*/
143  template <typename real, int Ns, int Nc>
144  void spinorNoise(ColorSpinorField &in, RNG &rngstate, QudaNoiseType type)
145  {
146  if (in.FieldOrder() == QUDA_FLOAT2_FIELD_ORDER) {
147  spinorNoise<real,Ns,Nc,QUDA_FLOAT2_FIELD_ORDER>(in, rngstate, type);
148  } else if (in.FieldOrder() == QUDA_FLOAT4_FIELD_ORDER) {
149  spinorNoise<real,Ns,Nc,QUDA_FLOAT4_FIELD_ORDER>(in, rngstate, type);
150  } else {
151  errorQuda("Order %d not defined (Ns=%d, Nc=%d)", in.FieldOrder(), Ns, Nc);
152  }
153  }
154 
155  template <typename real, int Ns>
156  void spinorNoise(ColorSpinorField &src, RNG& randstates, QudaNoiseType type)
157  {
158  if (src.Ncolor() == 3) {
159  spinorNoise<real,Ns,3>(src, randstates, type);
160  } else if (src.Ncolor() == 6) {
161  spinorNoise<real,Ns,6>(src, randstates, type);
162  } else if (src.Ncolor() == 24) {
163  spinorNoise<real,Ns,24>(src, randstates, type);
164  } else if (src.Ncolor() == 32) {
165  spinorNoise<real,Ns,32>(src, randstates, type);
166  } else if (src.Ncolor() == 64) {
167  spinorNoise<real,Ns,64>(src, randstates, type);
168  } else if (src.Ncolor() == 96) {
169  spinorNoise<real,Ns,96>(src, randstates, type);
170  } else {
171  errorQuda("nColor = %d not implemented", src.Ncolor());
172  }
173  }
174 
175  template <typename real>
176  void spinorNoise(ColorSpinorField &src, RNG& randstates, QudaNoiseType type)
177  {
178  if (src.Nspin() == 4) {
179 #ifdef NSPIN4
180  spinorNoise<real,4>(src, randstates, type);
181 #else
182  errorQuda("spinorNoise has not been built for nSpin=%d fields", src.Nspin());
183 #endif
184  } else if (src.Nspin() == 2) {
185 #ifdef NSPIN2
186  spinorNoise<real,2>(src, randstates, type);
187 #else
188  errorQuda("spinorNoise has not been built for nSpin=%d fields", src.Nspin());
189 #endif
190  } else if (src.Nspin() == 1) {
191 #ifdef NSPIN1
192  spinorNoise<real,1>(src, randstates, type);
193 #else
194  errorQuda("spinorNoise has not been built for nSpin=%d fields", src.Nspin());
195 #endif
196  } else {
197  errorQuda("Nspin = %d not implemented", src.Nspin());
198  }
199  }
200 
201  void spinorNoise(ColorSpinorField &src_, RNG &randstates, QudaNoiseType type)
202  {
203  // if src is a CPU field then create GPU field
204  ColorSpinorField *src = &src_;
205  if (src_.Location() == QUDA_CPU_FIELD_LOCATION || src_.Precision() < QUDA_SINGLE_PRECISION) {
206  ColorSpinorParam param(src_);
207  QudaPrecision prec = std::max(src_.Precision(), QUDA_SINGLE_PRECISION);
208  param.setPrecision(prec, prec, true); // change to native field order
209  param.create = QUDA_NULL_FIELD_CREATE;
210  param.location = QUDA_CUDA_FIELD_LOCATION;
211  src = ColorSpinorField::Create(param);
212  }
213 
214  switch (src->Precision()) {
215  case QUDA_DOUBLE_PRECISION: spinorNoise<double>(*src, randstates, type); break;
216  case QUDA_SINGLE_PRECISION: spinorNoise<float>(*src, randstates, type); break;
217  default: errorQuda("Precision %d not implemented", src->Precision());
218  }
219 
220  if (src != &src_) {
221  src_ = *src; // upload result
222  delete src;
223  }
224  }
225 
226  void spinorNoise(ColorSpinorField &src, unsigned long long seed, QudaNoiseType type)
227  {
228  RNG *randstates = new RNG(src, seed);
229  randstates->Init();
230  spinorNoise(src, *randstates, type);
231  randstates->Release();
232  delete randstates;
233  }
234 
235 } // namespace quda