QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
copy_color_spinor.cuh
Go to the documentation of this file.
1 /*
2  Spinor reordering and copying routines. These are implemented to
3  un on both CPU and GPU. Here we are templating on the following:
4  - input precision
5  - output precision
6  - number of colors
7  - number of spins
8  - field ordering
9 */
10 
11 #include <color_spinor_field.h>
13 #include <tune_quda.h>
14 #include <utility> // for std::swap
15 
16 #define PRESERVE_SPINOR_NORM
17 
18 #ifdef PRESERVE_SPINOR_NORM // Preserve the norm regardless of basis
19 #define kP (1.0/sqrt(2.0))
20 #define kU (1.0/sqrt(2.0))
21 #else // More numerically accurate not to preserve the norm between basis
22 #define kP (0.5)
23 #define kU (1.0)
24 #endif
25 
26 namespace quda {
27 
28  using namespace colorspinor;
29 
30  template <typename FloatOut, typename FloatIn, int nSpin_, int nColor_, typename Out, typename In>
32  using realOut = typename mapper<FloatOut>::type;
33  using realIn = typename mapper<FloatIn>::type;
34  static constexpr int nSpin = nSpin_;
35  static constexpr int nColor = nColor_;
36  Out out;
37  const In in;
38  const int volumeCB;
39  const int nParity;
40  const int outParity;
41  const int inParity;
42  CopyColorSpinorArg(const Out &out, const In &in, const ColorSpinorField &out_, const ColorSpinorField &in_)
43  : out(out), in(in), volumeCB(in_.VolumeCB()), nParity(in_.SiteSubset()),
44  outParity(out_.SiteOrder()==QUDA_ODD_EVEN_SITE_ORDER ? 1 : 0),
45  inParity(in_.SiteOrder()==QUDA_ODD_EVEN_SITE_ORDER ? 1 : 0) { }
46  };
47 
49  template <typename Arg>
50  struct PreserveBasis {
51  static constexpr int Ns = Arg::nSpin;
52  static constexpr int Nc = Arg::nColor;
53  template <typename FloatOut, typename FloatIn>
54  __device__ __host__ inline void operator()(complex<FloatOut> out[Ns*Nc], const complex<FloatIn> in[Ns*Nc]) const {
55  for (int s=0; s<Ns; s++) for (int c=0; c<Nc; c++) out[s*Nc+c] = in[s*Nc+c];
56  }
57  };
58 
60  template <typename Arg>
61  struct NonRelBasis {
62  static constexpr int Ns = Arg::nSpin;
63  static constexpr int Nc = Arg::nColor;
64  template <typename FloatOut, typename FloatIn>
65  __device__ __host__ inline void operator()(complex<FloatOut> out[Ns*Nc], const complex<FloatIn> in[Ns*Nc]) const {
66  int s1[4] = {1, 2, 3, 0};
67  int s2[4] = {3, 0, 1, 2};
68  FloatOut K1[4] = {static_cast<FloatOut>(kP), static_cast<FloatOut>(-kP), static_cast<FloatOut>(-kP), static_cast<FloatOut>(-kP)};
69  FloatOut K2[4] = {static_cast<FloatOut>(kP), static_cast<FloatOut>(-kP), static_cast<FloatOut>(kP), static_cast<FloatOut>(kP)};
70  for (int s=0; s<Ns; s++) {
71  for (int c=0; c<Nc; c++) {
72  out[s*Nc+c] = K1[s]*static_cast<complex<FloatOut> >(in[s1[s]*Nc+c]) + K2[s]*static_cast<complex<FloatOut> >(in[s2[s]*Nc+c]);
73  }
74  }
75  }
76  };
77 
79  template <typename Arg>
80  struct RelBasis {
81  static constexpr int Ns = Arg::nSpin;
82  static constexpr int Nc = Arg::nColor;
83  template <typename FloatOut, typename FloatIn>
84  __device__ __host__ inline void operator()(complex<FloatOut> out[Ns*Nc], const complex<FloatIn> in[Ns*Nc]) const {
85  int s1[4] = {1, 2, 3, 0};
86  int s2[4] = {3, 0, 1, 2};
87  FloatOut K1[4] = {static_cast<FloatOut>(-kU), static_cast<FloatOut>(kU), static_cast<FloatOut>(kU), static_cast<FloatOut>(kU)};
88  FloatOut K2[4] = {static_cast<FloatOut>(-kU), static_cast<FloatOut>(kU), static_cast<FloatOut>(-kU), static_cast<FloatOut>(-kU)};
89  for (int s=0; s<Ns; s++) {
90  for (int c=0; c<Nc; c++) {
91  out[s*Nc+c] = K1[s]*static_cast<complex<FloatOut> >(in[s1[s]*Nc+c]) + K2[s]*static_cast<complex<FloatOut> >(in[s2[s]*Nc+c]);
92  }
93  }
94  }
95  };
96 
98  template <typename Arg>
100  static constexpr int Ns = Arg::nSpin;
101  static constexpr int Nc = Arg::nColor;
102  template <typename FloatOut, typename FloatIn>
103  __device__ __host__ inline void operator()(complex<FloatOut> out[Ns*Nc], const complex<FloatIn> in[Ns*Nc]) const {
104  int s1[4] = {0, 1, 0, 1};
105  int s2[4] = {2, 3, 2, 3};
106  FloatOut K1[4] = {static_cast<FloatOut>(-kP), static_cast<FloatOut>(-kP), static_cast<FloatOut>(kP), static_cast<FloatOut>(kP)};
107  FloatOut K2[4] = {static_cast<FloatOut>(kP), static_cast<FloatOut>(kP), static_cast<FloatOut>(kP), static_cast<FloatOut>(kP)};
108  for (int s=0; s<Ns; s++) {
109  for (int c=0; c<Nc; c++) {
110  out[s*Nc+c] = K1[s]*static_cast<complex<FloatOut> >(in[s1[s]*Nc+c]) + K2[s]*static_cast<complex<FloatOut> >(in[s2[s]*Nc+c]);
111  }
112  }
113  }
114  };
115 
117  template <typename Arg>
119  static constexpr int Ns = Arg::nSpin;
120  static constexpr int Nc = Arg::nColor;
121  template <typename FloatOut, typename FloatIn>
122  __device__ __host__ inline void operator()(complex<FloatOut> out[Ns*Nc], const complex<FloatIn> in[Ns*Nc]) const {
123  int s1[4] = {0, 1, 0, 1};
124  int s2[4] = {2, 3, 2, 3};
125  FloatOut K1[4] = {static_cast<FloatOut>(-kU), static_cast<FloatOut>(-kU), static_cast<FloatOut>(kU), static_cast<FloatOut>(kU)};
126  FloatOut K2[4] = {static_cast<FloatOut>(kU),static_cast<FloatOut>(kU), static_cast<FloatOut>(kU), static_cast<FloatOut>(kU)};
127  for (int s=0; s<Ns; s++) {
128  for (int c=0; c<Nc; c++) {
129  out[s*Nc+c] = K1[s]*static_cast<complex<FloatOut> >(in[s1[s]*Nc+c]) + K2[s]*static_cast<complex<FloatOut> >(in[s2[s]*Nc+c]);
130  }
131  }
132  }
133  };
134 
136  template <typename Arg, typename Basis> void copyColorSpinor(Arg &arg, const Basis &basis)
137  {
138  for (int parity = 0; parity<arg.nParity; parity++) {
139  for (int x=0; x<arg.volumeCB; x++) {
142  basis(out.data, in.data);
143  arg.out(x, (parity+arg.outParity)&1) = out;
144  }
145  }
146  }
147 
149  template <typename Arg, typename Basis> __global__ void copyColorSpinorKernel(Arg arg, Basis basis)
150  {
151  int x = blockIdx.x * blockDim.x + threadIdx.x;
152  if (x >= arg.volumeCB) return;
153  int parity = blockIdx.y * blockDim.y + threadIdx.y;
154 
155  ColorSpinor<typename Arg::realIn, Arg::nColor, Arg::nSpin> in = arg.in(x, (parity+arg.inParity)&1);
157  basis(out.data, in.data);
158  arg.out(x, (parity+arg.outParity)&1) = out;
159  }
160 
161  template <int Ns, typename Arg>
166 
167  private:
168  unsigned int sharedBytesPerThread() const { return 0; }
169  unsigned int sharedBytesPerBlock(const TuneParam &param) const { return 0; }
170  bool advanceSharedBytes(TuneParam &param) const { return false; } // Don't tune shared mem
171  bool tuneGridDim() const { return false; } // Don't tune the grid dimensions.
172  unsigned int minThreads() const { return meta.VolumeCB(); }
173 
174  public:
176  QudaFieldLocation location)
177  : TunableVectorY(arg.nParity), arg(arg), meta(in), location(location) {
178  if (out.GammaBasis()!=in.GammaBasis()) errorQuda("Cannot change gamma basis for nSpin=%d\n", Ns);
179  writeAuxString("out_stride=%d,in_stride=%d", arg.out.stride, arg.in.stride);
180  }
181  virtual ~CopyColorSpinor() { ; }
182 
183  void apply(const cudaStream_t &stream) {
184  if (location == QUDA_CPU_FIELD_LOCATION) {
186  } else {
187  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
189  }
190  }
191 
192  TuneKey tuneKey() const { return TuneKey(meta.VolString(), typeid(*this).name(), aux); }
193  long long flops() const { return 0; }
194  long long bytes() const { return arg.in.Bytes() + arg.out.Bytes(); }
195  };
196 
197  template <typename Arg>
199  static constexpr int Ns = 4;
204 
205  private:
206  unsigned int sharedBytesPerThread() const { return 0; }
207  unsigned int sharedBytesPerBlock(const TuneParam &param) const { return 0; }
208  bool advanceSharedBytes(TuneParam &param) const { return false; } // Don't tune shared mem
209  bool tuneGridDim() const { return false; } // Don't tune the grid dimensions.
210  unsigned int minThreads() const { return in.VolumeCB(); }
211 
212  public:
214  QudaFieldLocation location)
215  : TunableVectorY(arg.nParity), arg(arg), out(out), in(in), location(location) {
216 
217  if (out.GammaBasis()==in.GammaBasis()) {
218  writeAuxString("out_stride=%d,in_stride=%d,PreserveBasis", arg.out.stride, arg.in.stride);
220  writeAuxString("out_stride=%d,in_stride=%d,NonRelBasis", arg.out.stride, arg.in.stride);
222  writeAuxString("out_stride=%d,in_stride=%d,RelBasis", arg.out.stride, arg.in.stride);
223  } else if (out.GammaBasis() == QUDA_UKQCD_GAMMA_BASIS && in.GammaBasis() == QUDA_CHIRAL_GAMMA_BASIS) {
224  writeAuxString("out_stride=%d,in_stride=%d,ChiralToNonRelBasis", arg.out.stride, arg.in.stride);
225  } else if (in.GammaBasis() == QUDA_UKQCD_GAMMA_BASIS && out.GammaBasis() == QUDA_CHIRAL_GAMMA_BASIS) {
226  writeAuxString("out_stride=%d,in_stride=%d,NonRelToChiralBasis", arg.out.stride, arg.in.stride);
227  } else {
228  errorQuda("Basis change from %d to %d not supported", in.GammaBasis(), out.GammaBasis());
229  }
230  }
231  virtual ~CopyColorSpinor() { ; }
232 
233  void apply(const cudaStream_t &stream) {
234  if (location == QUDA_CPU_FIELD_LOCATION) {
235  if (out.GammaBasis()==in.GammaBasis()) {
241  } else if (out.GammaBasis() == QUDA_UKQCD_GAMMA_BASIS && in.GammaBasis() == QUDA_CHIRAL_GAMMA_BASIS) {
243  } else if (in.GammaBasis() == QUDA_UKQCD_GAMMA_BASIS && out.GammaBasis() == QUDA_CHIRAL_GAMMA_BASIS) {
245  }
246  } else {
247  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
248  if (out.GammaBasis()==in.GammaBasis()) {
249  copyColorSpinorKernel<<<tp.grid, tp.block, tp.shared_bytes, stream>>> (arg, PreserveBasis<Arg>());
251  copyColorSpinorKernel<<<tp.grid, tp.block, tp.shared_bytes, stream>>> (arg, NonRelBasis<Arg>());
253  copyColorSpinorKernel<<<tp.grid, tp.block, tp.shared_bytes, stream>>> (arg, RelBasis<Arg>());
254  } else if (out.GammaBasis() == QUDA_UKQCD_GAMMA_BASIS && in.GammaBasis() == QUDA_CHIRAL_GAMMA_BASIS) {
256  } else if (in.GammaBasis() == QUDA_UKQCD_GAMMA_BASIS && out.GammaBasis() == QUDA_CHIRAL_GAMMA_BASIS) {
258  }
259  }
260  }
261 
262  TuneKey tuneKey() const { return TuneKey(in.VolString(), typeid(*this).name(), aux); }
263  long long flops() const { return 0; }
264  long long bytes() const { return arg.in.Bytes() + arg.out.Bytes(); }
265  };
266 
267 
269  template <typename FloatOut, typename FloatIn, int Ns, int Nc, typename Out, typename In>
270  void genericCopyColorSpinor(Out &outOrder, const In &inOrder, const ColorSpinorField &out,
271  const ColorSpinorField &in, QudaFieldLocation location)
272  {
274  CopyColorSpinor<Ns, decltype(arg)> copy(arg, out, in, location);
275  copy.apply(0);
276  }
277 
279  template <typename FloatOut, typename FloatIn, int Ns, int Nc, typename InOrder>
280  void genericCopyColorSpinor(InOrder &inOrder, ColorSpinorField &out,
281  const ColorSpinorField &in, QudaFieldLocation location,
282  FloatOut *Out, float *outNorm) {
283  const bool override = true;
284  if (out.isNative()) {
286  ColorSpinor outOrder(out, 1, Out, outNorm, nullptr, override);
287  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>
288  (outOrder, inOrder, out, in, location);
289  } else if (out.FieldOrder() == QUDA_FLOAT2_FIELD_ORDER && Ns == 4) {
290  // this is needed for single-precision mg for changing basis in the transfer
292  ColorSpinor outOrder(out, 1, Out, outNorm, nullptr, override);
293  genericCopyColorSpinor<FloatOut,FloatIn,4,Nc>
294  (outOrder, inOrder, out, in, location);
295  } else if (out.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) {
296  SpaceSpinorColorOrder<FloatOut, Ns, Nc> outOrder(out, 1, Out);
297  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>
298  (outOrder, inOrder, out, in, location);
299  } else if (out.FieldOrder() == QUDA_SPACE_COLOR_SPIN_FIELD_ORDER) {
300  SpaceColorSpinorOrder<FloatOut, Ns, Nc> outOrder(out, 1, Out);
301  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>
302  (outOrder, inOrder, out, in, location);
304 
305 #ifdef BUILD_TIFR_INTERFACE
306  PaddedSpaceSpinorColorOrder<FloatOut, Ns, Nc> outOrder(out, 1, Out);
307  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>
308  (outOrder, inOrder, out, in, location);
309 #else
310  errorQuda("TIFR interface has not been built\n");
311 #endif
312 
313  } else if (out.FieldOrder() == QUDA_QDPJIT_FIELD_ORDER) {
314 
315 #ifdef BUILD_QDPJIT_INTERFACE
316  QDPJITDiracOrder<FloatOut, Ns, Nc> outOrder(out, 1, Out);
317  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>
318  (outOrder, inOrder, out, in, location);
319 #else
320  errorQuda("QDPJIT interface has not been built\n");
321 #endif
322  } else {
323  errorQuda("Order %d not defined (Ns=%d, Nc=%d)", out.FieldOrder(), Ns, Nc);
324  }
325 
326  }
327 
329  template <typename FloatOut, typename FloatIn, int Ns, int Nc>
331  QudaFieldLocation location, FloatOut *Out, FloatIn *In,
332  float *outNorm, float *inNorm) {
333  const bool override = true;
334  if (in.isNative()) {
336  ColorSpinor inOrder(in, 1, In, inNorm, nullptr, override);
337  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>(inOrder, out, in, location, Out, outNorm);
338  } else if (in.FieldOrder() == QUDA_FLOAT2_FIELD_ORDER && Ns == 4) {
339  // this is needed for single-precision mg for changing basis in the transfer
341  ColorSpinor inOrder(in, 1, In, inNorm, nullptr, override);
342  genericCopyColorSpinor<FloatOut,FloatIn,4,Nc>(inOrder, out, in, location, Out, outNorm);
343  } else if (in.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) {
344  SpaceSpinorColorOrder<FloatIn, Ns, Nc> inOrder(in, 1, In);
345  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>(inOrder, out, in, location, Out, outNorm);
346  } else if (in.FieldOrder() == QUDA_SPACE_COLOR_SPIN_FIELD_ORDER) {
347  SpaceColorSpinorOrder<FloatIn, Ns, Nc> inOrder(in, 1, In);
348  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>(inOrder, out, in, location, Out, outNorm);
350 
351 #ifdef BUILD_TIFR_INTERFACE
353  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>(inOrder, out, in, location, Out, outNorm);
354 #else
355  errorQuda("TIFR interface has not been built\n");
356 #endif
357 
358  } else if (in.FieldOrder() == QUDA_QDPJIT_FIELD_ORDER) {
359 
360 #ifdef BUILD_QDPJIT_INTERFACE
361  QDPJITDiracOrder<FloatIn, Ns, Nc> inOrder(in, 1, In);
362  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>(inOrder, out, in, location, Out, outNorm);
363 #else
364  errorQuda("QDPJIT interface has not been built\n");
365 #endif
366  } else {
367  errorQuda("Order %d not defined (Ns=%d, Nc=%d)", in.FieldOrder(), Ns, Nc);
368  }
369 
370  }
371 
372 
373  template <int Ns, int Nc, typename dstFloat, typename srcFloat>
375  QudaFieldLocation location, dstFloat *Dst, srcFloat *Src,
376  float *dstNorm, float *srcNorm) {
377 
378  if (dst.Ndim() != src.Ndim())
379  errorQuda("Number of dimensions %d %d don't match", dst.Ndim(), src.Ndim());
380 
381  if (dst.Volume() != src.Volume())
382  errorQuda("Volumes %d %d don't match", dst.Volume(), src.Volume());
383 
384  if (!( dst.SiteOrder() == src.SiteOrder() ||
385  (dst.SiteOrder() == QUDA_EVEN_ODD_SITE_ORDER &&
387  (dst.SiteOrder() == QUDA_ODD_EVEN_SITE_ORDER &&
388  src.SiteOrder() == QUDA_EVEN_ODD_SITE_ORDER) ) ) {
389  errorQuda("Subset orders %d %d don't match", dst.SiteOrder(), src.SiteOrder());
390  }
391 
392  if (dst.SiteSubset() != src.SiteSubset())
393  errorQuda("Subset types do not match %d %d", dst.SiteSubset(), src.SiteSubset());
394 
395  // We currently only support parity-ordered fields; even-odd or odd-even
397  errorQuda("Copying to full fields with lexicographical ordering is not currently supported");
398  }
399 
401  errorQuda("QDPJIT field ordering not supported for full site fields");
402  }
403 
404  genericCopyColorSpinor<dstFloat, srcFloat, Ns, Nc>(dst, src, location, Dst, Src, dstNorm, srcNorm);
405 
406  }
407 
408  template <int Nc, typename dstFloat, typename srcFloat>
410  QudaFieldLocation location, dstFloat *Dst, srcFloat *Src,
411  float *dstNorm=0, float *srcNorm=0) {
412 
413  if (dst.Nspin() != src.Nspin())
414  errorQuda("source and destination spins must match");
415 
416  if (dst.Nspin() == 4) {
417 #if defined(GPU_WILSON_DIRAC) || defined(GPU_DOMAIN_WALL_DIRAC) || defined(GPU_COVDEV) || defined(GPU_CONTRACT)
418  copyGenericColorSpinor<4,Nc>(dst, src, location, Dst, Src, dstNorm, srcNorm);
419 #else
420  errorQuda("%s has not been built for Nspin=%d fields", __func__, src.Nspin());
421 #endif
422  } else if (dst.Nspin() == 2) {
423 #if defined(GPU_WILSON_DIRAC) || defined(GPU_DOMAIN_WALL_DIRAC) || defined(GPU_STAGGERED_DIRAC)
424  copyGenericColorSpinor<2,Nc>(dst, src, location, Dst, Src, dstNorm, srcNorm);
425 #else
426  errorQuda("%s has not been built for Nspin=%d fields", __func__, src.Nspin());
427 #endif
428  } else if (dst.Nspin() == 1) {
429 #ifdef GPU_STAGGERED_DIRAC
430  copyGenericColorSpinor<1,Nc>(dst, src, location, Dst, Src, dstNorm, srcNorm);
431 #else
432  errorQuda("%s has not been built for Nspin=%d fields", __func__, src.Nspin());
433 #endif
434  } else {
435  errorQuda("Nspin=%d unsupported", dst.Nspin());
436  }
437 
438  }
439 
440 } // namespace quda
unsigned int minThreads() const
const ColorSpinorField & meta
unsigned int sharedBytesPerThread() const
void CopyGenericColorSpinor(ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat *Dst, srcFloat *Src, float *dstNorm=0, float *srcNorm=0)
typename mapper< FloatIn >::type realIn
CopyColorSpinorArg(const Out &out, const In &in, const ColorSpinorField &out_, const ColorSpinorField &in_)
#define kP
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
#define errorQuda(...)
Definition: util_quda.h:121
complex< Float > data[size]
Definition: color_spinor.h:27
typename mapper< FloatOut >::type realOut
__device__ __host__ void operator()(complex< FloatOut > out[Ns *Nc], const complex< FloatIn > in[Ns *Nc]) const
void genericCopyColorSpinor(Out &outOrder, const In &inOrder, const ColorSpinorField &out, const ColorSpinorField &in, QudaFieldLocation location)
__device__ __host__ void operator()(complex< FloatOut > out[Ns *Nc], const complex< FloatIn > in[Ns *Nc]) const
cudaStream_t * stream
QudaGammaBasis GammaBasis() const
const char * VolString() const
__host__ __device__ void copy(T1 &a, const T2 &b)
void copyGenericColorSpinor(ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, void *Dst=0, void *Src=0, void *dstNorm=0, void *srcNorm=0)
const QudaFieldLocation location
QudaGaugeParam param
Definition: pack_test.cpp:17
bool advanceSharedBytes(TuneParam &param) const
const int nColor
Definition: covdev_test.cpp:75
__global__ void copyColorSpinorKernel(Arg arg, Basis basis)
unsigned int sharedBytesPerThread() const
CopyColorSpinor(Arg &arg, const ColorSpinorField &out, const ColorSpinorField &in, QudaFieldLocation location)
cpuColorSpinorField * in
__device__ __host__ void operator()(complex< FloatOut > out[Ns *Nc], const complex< FloatIn > in[Ns *Nc]) const
const QudaFieldLocation location
QudaSiteSubset SiteSubset() const
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Definition: tune.cpp:643
void apply(const cudaStream_t &stream)
__device__ __host__ void operator()(complex< FloatOut > out[Ns *Nc], const complex< FloatIn > in[Ns *Nc]) const
void copyColorSpinor(Arg &arg, const Basis &basis)
enum QudaFieldLocation_s QudaFieldLocation
cpuColorSpinorField * out
const int nParity
Definition: spinor_noise.cu:25
__shared__ float s[]
QudaSiteOrder SiteOrder() const
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
bool advanceSharedBytes(TuneParam &param) const
const int volumeCB
Definition: spinor_noise.cu:26
__device__ __host__ void operator()(complex< FloatOut > out[Ns *Nc], const complex< FloatIn > in[Ns *Nc]) const
Accessor routine for ColorSpinorFields in native field order.
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
Definition: util_quda.cpp:52
#define kU
void apply(const cudaStream_t &stream)
QudaParity parity
Definition: covdev_test.cpp:54
QudaFieldOrder FieldOrder() const
unsigned int sharedBytesPerBlock(const TuneParam &param) const
CopyColorSpinor(Arg &arg, const ColorSpinorField &out, const ColorSpinorField &in, QudaFieldLocation location)
unsigned int sharedBytesPerBlock(const TuneParam &param) const