QUDA  0.9.0
spinor_gauss.cu
Go to the documentation of this file.
1 /*
2  Spinor reordering and copying routines. These are implemented to
3  un on both CPU and GPU. Here we are templating on the following:
4  - input precision
5  - output precision
6  - number of colors
7  - number of spins
8  - field ordering
9 */
10 
11 #include <color_spinor_field.h>
13 #include <tune_quda.h>
14 #include <algorithm> // for std::swap
15 #include <random_quda.h>
16 
17 namespace quda {
18 
19  using namespace colorspinor;
20 
21 
22  template<typename InOrder, typename FloatIn>
23  __device__ __host__ void genGauss(InOrder& inOrder, cuRNGState& localState, int x, int s, int c){
24  FloatIn phi = 2.0*M_PI*Random<FloatIn>(localState);
25  FloatIn radius = Random<FloatIn>(localState);
26  radius = sqrt(-1.0 * log(radius));
27  inOrder(0, x, s, c) = complex<FloatIn>(radius*cos(phi),radius*sin(phi));
28  }
29 
31  template <typename FloatIn, int Ns, int Nc, typename InOrder>
32  void gaussSpinor(InOrder &inOrder, int volume, RNG rngstate) {
33  for (int x=0; x<volume; x++) {
34  for (int s=0; s<Ns; s++) {
35  for (int c=0; c<Nc; c++) {
36  //inOrder(0, x, s, c) = complex<FloatIn>(0,0);
37  cuRNGState localState = rngstate.State()[x];
38  genGauss<InOrder, FloatIn>(inOrder, localState, x, s, c);
39  rngstate.State()[x] = localState;
40  }
41  }
42  }
43  }
44 
46  template <typename FloatIn, int Ns, int Nc, typename InOrder>
47  __global__ void gaussSpinorKernel(InOrder inOrder, int volume, RNG rngstate) {
48  int x = blockIdx.x * blockDim.x + threadIdx.x;
49  if (x >= volume) return;
50 
51  cuRNGState localState = rngstate.State()[x];
52  for (int s=0; s<Ns; s++) {
53  for (int c=0; c<Nc; c++) {
54  genGauss<InOrder, FloatIn>(inOrder, localState, x, s, c);
55  }
56  }
57  rngstate.State()[x] = localState;
58  }
59 
60  template <typename FloatIn, int Ns, int Nc, typename InOrder>
61  class GaussSpinor : Tunable {
62  InOrder &in;
63  const ColorSpinorField &meta; // this reference is for meta data only
65 
66  private:
67  unsigned int sharedBytesPerThread() const { return 0; }
68 
69  unsigned int sharedBytesPerBlock(const TuneParam &param) const { return 0; }
70  //bool advanceSharedBytes(TuneParam &param) const { return false; } // Don't tune shared mem
71  bool tuneGridDim() const { return false; } // Don't tune the grid dimensions.
72  unsigned int minThreads() const { return meta.VolumeCB(); }
73 
74  public:
75  GaussSpinor(InOrder &in, const ColorSpinorField &meta, RNG &rngstate)
76  : in(in), meta(meta), rngstate(rngstate){ }
77 
78  void apply(const cudaStream_t &stream) {
79  if (meta.Location() == QUDA_CPU_FIELD_LOCATION) {
80  gaussSpinor<FloatIn, Ns, Nc>(in, meta.VolumeCB(), rngstate);
81  } else {
82  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
83  gaussSpinorKernel<FloatIn, Ns, Nc, InOrder>
84  <<<tp.grid, tp.block, tp.shared_bytes, stream>>>
85  (in, meta.VolumeCB(), rngstate);
86  }
87  }
88 
89  TuneKey tuneKey() const { return TuneKey(meta.VolString(), typeid(*this).name(), meta.AuxString()); }
90 
91  long long flops() const { return 0; }
92  long long bytes() const { return in.Bytes(); }
93 
94  void preTune(){
95  rngstate.backup();
96  }
97  void postTune(){
98  rngstate.restore();
99  }
100  };
101 
102  template <typename FloatIn, int Ns, int Nc, typename InOrder>
103  void gaussSpinor(InOrder &inOrder, const ColorSpinorField &meta, RNG &rngstate) {
104  GaussSpinor<FloatIn, Ns, Nc, InOrder> gauss(inOrder, meta, rngstate);
105  gauss.apply(0);
106  }
107 
109  template <typename FloatIn, int Ns, int Nc>
110  void gaussSpinor(ColorSpinorField &in, RNG &rngstate) {
111 
112  if (in.FieldOrder() == QUDA_FLOAT2_FIELD_ORDER) {
114  ColorSpinor inOrder(in);
115  gaussSpinor<FloatIn,Ns,Nc>(inOrder, in, rngstate);
116  } else if (in.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) {
118  ColorSpinor inOrder(in);
119  gaussSpinor<FloatIn,Ns,Nc>(inOrder, in, rngstate);
120  } else {
121  errorQuda("Order %d not defined (Ns=%d, Nc=%d)", in.FieldOrder(), Ns, Nc);
122  }
123 
124  }
125 
126  void spinorGauss(ColorSpinorField &src, RNG& randstates){
127 
128  if (src.Ncolor() != 3 ){
129  errorQuda(" is not implemented for Ncolor!=3");
130  }
131  if (src.Nspin() == 4 ){
132  if (src.Precision() == QUDA_SINGLE_PRECISION){
133  gaussSpinor<float, 4, 3>(src, randstates);
134  } else if(src.Precision() == QUDA_DOUBLE_PRECISION) {
135  gaussSpinor<double, 4, 3>(src, randstates);
136  }
137  }else if (src.Nspin() == 1 ){
138  if (src.Precision() == QUDA_SINGLE_PRECISION){
139  gaussSpinor<float, 1, 3>(src, randstates);
140  } else if(src.Precision() == QUDA_DOUBLE_PRECISION) {
141  gaussSpinor<double, 1, 3>(src, randstates);
142  }
143  }else{
144  errorQuda("spinorGauss not implemented for Nspin != 1 or Nspin !=4");
145  }
146 
147  }
148 
150  {
151  RNG* randstates = new RNG(src.VolumeCB(), seed, src.X());
152  randstates->Init();
153  spinorGauss(src, *randstates);
154  randstates->Release();
155  delete randstates;
156  }
157 } // namespace quda
void Init()
Initialize CURAND RNG states.
Definition: random.cu:146
dim3 dim3 blockDim
GaussSpinor(InOrder &in, const ColorSpinorField &meta, RNG &rngstate)
Definition: spinor_gauss.cu:75
struct curandStateMRG32k3a cuRNGState
Definition: random_quda.h:17
bool tuneGridDim() const
Definition: spinor_gauss.cu:71
unsigned int sharedBytesPerBlock(const TuneParam &param) const
Definition: spinor_gauss.cu:69
const char * AuxString() const
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:20
const void * src
void spinorGauss(ColorSpinorField &src, int seed)
#define errorQuda(...)
Definition: util_quda.h:90
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:105
void apply(const cudaStream_t &stream)
Definition: spinor_gauss.cu:78
cudaStream_t * stream
void backup()
Backup CURAND array states initialization.
Definition: random.cu:189
const char * VolString() const
TuneKey tuneKey() const
Definition: spinor_gauss.cu:89
const ColorSpinorField & meta
Definition: spinor_gauss.cu:63
long long bytes() const
Definition: spinor_gauss.cu:92
QudaGaugeParam param
Definition: pack_test.cpp:17
long long flops() const
Definition: spinor_gauss.cu:91
__host__ __device__ ValueType sin(ValueType x)
Definition: complex_quda.h:40
void Release()
Release Device memory for CURAND RNG states.
Definition: random.cu:168
__device__ __host__ void genGauss(InOrder &inOrder, cuRNGState &localState, int x, int s, int c)
Definition: spinor_gauss.cu:23
void restore()
Restore CURAND array states initialization.
Definition: random.cu:179
cpuColorSpinorField * in
Class declaration to initialize and hold CURAND RNG states.
Definition: random_quda.h:23
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Definition: tune.cpp:603
void gaussSpinor(InOrder &inOrder, int volume, RNG rngstate)
Definition: spinor_gauss.cu:32
unsigned int minThreads() const
Definition: spinor_gauss.cu:72
QudaFieldLocation Location() const
__host__ __device__ ValueType log(ValueType x)
Definition: complex_quda.h:90
__host__ __device__ __inline__ cuRNGState * State()
Definition: random_quda.h:35
const void * c
__host__ __device__ ValueType cos(ValueType x)
Definition: complex_quda.h:35
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
Definition: util_quda.cpp:51
unsigned int sharedBytesPerThread() const
Definition: spinor_gauss.cu:67
__global__ void gaussSpinorKernel(InOrder inOrder, int volume, RNG rngstate)
Definition: spinor_gauss.cu:47