QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
copy_gauge.cuh
Go to the documentation of this file.
1 #include <gauge_field_order.h>
2 #include <quda_matrix.h>
3 
4 namespace quda {
5 
6  using namespace gauge;
7 
11  template <typename OutOrder, typename InOrder>
12  struct CopyGaugeArg {
13  OutOrder out;
14  const InOrder in;
15  int volume;
16  int faceVolumeCB[QUDA_MAX_DIM];
20  int in_offset;
21  CopyGaugeArg(const OutOrder &out, const InOrder &in, const GaugeField &meta)
22  : out(out), in(in), volume(meta.Volume()), nDim(meta.Ndim()),
23  geometry(meta.Geometry()), out_offset(0), in_offset(0) {
24  for (int d=0; d<nDim; d++) faceVolumeCB[d] = meta.SurfaceCB(d) * meta.Nface();
25  }
26  };
27 
31  template <typename FloatOut, typename FloatIn, int length, typename Arg>
32  void copyGauge(Arg &arg) {
33  typedef typename mapper<FloatIn>::type RegTypeIn;
34  typedef typename mapper<FloatOut>::type RegTypeOut;
35  constexpr int nColor = Ncolor(length);
36 
37  for (int parity=0; parity<2; parity++) {
38 
39  for (int d=0; d<arg.geometry; d++) {
40  for (int x=0; x<arg.volume/2; x++) {
41 #ifdef FINE_GRAINED_ACCESS
42  for (int i=0; i<nColor; i++)
43  for (int j=0; j<nColor; j++) {
44  arg.out(d, parity, x, i, j) = arg.in(d, parity, x, i, j);
45  }
46 #else
49  in = arg.in(d, x, parity);
50  out = in;
51  arg.out(d, x, parity) = out;
52 #endif
53  }
54  }
55 
56  }
57  }
58 
62  template <typename Float, int length, typename Arg>
63  void checkNan(Arg &arg) {
64  typedef typename mapper<Float>::type RegType;
65  constexpr int nColor = Ncolor(length);
66 
67  for (int parity=0; parity<2; parity++) {
68 
69  for (int d=0; d<arg.geometry; d++) {
70  for (int x=0; x<arg.volume/2; x++) {
71 #ifdef FINE_GRAINED_ACCESS
72  for (int i=0; i<nColor; i++)
73  for (int j=0; j<nColor; j++) {
74  complex<Float> u = arg.in(d, parity, x, i, j);
75  if (isnan(u.real()))
76  errorQuda("Nan detected at parity=%d, dir=%d, x=%d, i=%d", parity, d, x, 2*(i*Ncolor(length)+j));
77  if (isnan(u.imag()))
78  errorQuda("Nan detected at parity=%d, dir=%d, x=%d, i=%d", parity, d, x, 2*(i*Ncolor(length)+j+1));
79  }
80 #else
81  Matrix<complex<RegType>, nColor> u = arg.in(d, x, parity);
82  for (int i=0; i<length/2; i++)
83  if (isnan(u(i).real()) || isnan(u(i).imag())) errorQuda("Nan detected at parity=%d, dir=%d, x=%d, i=%d", parity, d, x, i);
84 #endif
85  }
86  }
87 
88  }
89  }
90 
95  template <typename FloatOut, typename FloatIn, int length, typename Arg>
96  __global__ void copyGaugeKernel(Arg arg) {
97  typedef typename mapper<FloatIn>::type RegTypeIn;
98  typedef typename mapper<FloatOut>::type RegTypeOut;
99  constexpr int nColor = Ncolor(length);
100 
101  int x = blockIdx.x * blockDim.x + threadIdx.x;
102  int parity_d = blockIdx.z * blockDim.z + threadIdx.z; //parity_d = parity*geometry + d
103  int parity = parity_d / arg.geometry;
104  int d = parity_d % arg.geometry;
105 
106  if (x >= arg.volume/2) return;
107  if (parity_d >= 2 * arg.geometry) return;
108 
109 #ifdef FINE_GRAINED_ACCESS
110  int i = blockIdx.y * blockDim.y + threadIdx.y;
111  if (i >= nColor) return;
112  for (int j=0; j<nColor; j++) arg.out(d, parity, x, i, j) = arg.in(d, parity, x, i, j);
113 #else
114  Matrix<complex<RegTypeIn>, nColor> in;
116  in = arg.in(d, x, parity);
117  out = in;
118  arg.out(d, x, parity) = out;
119 #endif
120  }
121 
125  template <typename FloatOut, typename FloatIn, int length, typename Arg>
126  void copyGhost(Arg &arg) {
127  typedef typename mapper<FloatIn>::type RegTypeIn;
128  typedef typename mapper<FloatOut>::type RegTypeOut;
129  constexpr int nColor = Ncolor(length);
130 
131  for (int parity=0; parity<2; parity++) {
132 
133  for (int d=0; d<arg.nDim; d++) {
134  for (int x=0; x<arg.faceVolumeCB[d]; x++) {
135 #ifdef FINE_GRAINED_ACCESS
136  for (int i=0; i<nColor; i++)
137  for (int j=0; j<nColor; j++)
138  arg.out.Ghost(d+arg.out_offset, parity, x, i, j) = arg.in.Ghost(d+arg.in_offset, parity, x, i, j);
139 #else
140  Matrix<complex<RegTypeIn>, nColor> in;
142  in = arg.in.Ghost(d+arg.in_offset, x, parity);
143  out = in;
144  arg.out.Ghost(d+arg.out_offset, x, parity) = out;
145 #endif
146  }
147  }
148 
149  }
150  }
151 
156  template <typename FloatOut, typename FloatIn, int length, typename Arg>
157  __global__ void copyGhostKernel(Arg arg) {
158  typedef typename mapper<FloatIn>::type RegTypeIn;
159  typedef typename mapper<FloatOut>::type RegTypeOut;
160  constexpr int nColor = Ncolor(length);
161 
162  int x = blockIdx.x * blockDim.x + threadIdx.x;
163  int parity_d = blockIdx.z * blockDim.z + threadIdx.z; //parity_d = parity*nDim + d
164  int parity = parity_d / arg.nDim;
165  int d = parity_d % arg.nDim;
166  if (parity_d >= 2 * arg.nDim) return;
167 
168  if (x < arg.faceVolumeCB[d]) {
169 #ifdef FINE_GRAINED_ACCESS
170  int i = blockIdx.y * blockDim.y + threadIdx.y;
171  if (i >= nColor) return;
172  for (int j=0; j<nColor; j++)
173  arg.out.Ghost(d+arg.out_offset, parity, x, i, j) = arg.in.Ghost(d+arg.in_offset, parity, x, i, j);
174 #else
175  Matrix<complex<RegTypeIn>, nColor> in;
177  in = arg.in.Ghost(d+arg.in_offset, x, parity);
178  out = in;
179  arg.out.Ghost(d+arg.out_offset, x, parity) = out;
180 #endif
181  }
182 
183  }
184 
185 } // namespace quda
__host__ __device__ constexpr int Ncolor(int length)
Return the number of colors of the accessor based on the length of the field.
int_fastdiv geometry
Definition: copy_gauge.cuh:18
#define errorQuda(...)
Definition: util_quda.h:121
const int * SurfaceCB() const
void copyGauge(Arg &arg)
Definition: copy_gauge.cuh:32
void checkNan(Arg &arg)
Definition: copy_gauge.cuh:63
int length[]
int Nface() const
Definition: gauge_field.h:281
__global__ void copyGhostKernel(Arg arg)
Definition: copy_gauge.cuh:157
const int nColor
Definition: covdev_test.cpp:75
CopyGaugeArg(const OutOrder &out, const InOrder &in, const GaugeField &meta)
Definition: copy_gauge.cuh:21
cpuColorSpinorField * in
Main header file for host and device accessors to GaugeFields.
const InOrder in
Definition: copy_gauge.cuh:14
cpuColorSpinorField * out
int_fastdiv nDim
Definition: copy_gauge.cuh:17
void copyGhost(Arg &arg)
Definition: copy_gauge.cuh:126
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
__global__ void copyGaugeKernel(Arg arg)
Definition: copy_gauge.cuh:96
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
QudaParity parity
Definition: covdev_test.cpp:54