QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
copy_color_spinor.cu
Go to the documentation of this file.
1 /*
2  Spinor reordering and copying routines. These are implemented to
3  un on both CPU and GPU. Here we are templating on the following:
4  - input precision
5  - output precision
6  - number of colors
7  - number of spins
8  - field ordering
9 */
10 
11 #include <color_spinor_field.h>
13 #include <tune_quda.h>
14 #include <algorithm> // for std::swap
15 
16 #define PRESERVE_SPINOR_NORM
17 
18 #ifdef PRESERVE_SPINOR_NORM // Preserve the norm regardless of basis
19 #define kP (1.0/sqrt(2.0))
20 #define kU (1.0/sqrt(2.0))
21 #else // More numerically accurate not to preserve the norm between basis
22 #define kP (0.5)
23 #define kU (1.0)
24 #endif
25 
26 namespace quda {
27 
29  template <typename FloatOut, typename FloatIn, int Ns, int Nc>
30  class PreserveBasis {
31  typedef typename mapper<FloatIn>::type RegTypeIn;
32  typedef typename mapper<FloatOut>::type RegTypeOut;
33  public:
34  __device__ __host__ inline void operator()(RegTypeOut out[Ns*Nc*2], const RegTypeIn in[Ns*Nc*2]) {
35  for (int s=0; s<Ns; s++) {
36  for (int c=0; c<Nc; c++) {
37  for (int z=0; z<2; z++) {
38  out[(s*Nc+c)*2+z] = in[(s*Nc+c)*2+z];
39  }
40  }
41  }
42  }
43  };
44 
46  template <typename FloatOut, typename FloatIn, int Ns, int Nc>
47  struct NonRelBasis {
48  typedef typename mapper<FloatIn>::type RegTypeIn;
50  __device__ __host__ inline void operator()(RegTypeOut out[Ns*Nc*2], const RegTypeIn in[Ns*Nc*2]) {
51  int s1[4] = {1, 2, 3, 0};
52  int s2[4] = {3, 0, 1, 2};
53  RegTypeOut K1[4] = {kP, -kP, -kP, -kP};
54  RegTypeOut K2[4] = {kP, -kP, kP, kP};
55  for (int s=0; s<Ns; s++) {
56  for (int c=0; c<Nc; c++) {
57  for (int z=0; z<2; z++) {
58  out[(s*Nc+c)*2+z] = K1[s]*in[(s1[s]*Nc+c)*2+z] + K2[s]*in[(s2[s]*Nc+c)*2+z];
59  }
60  }
61  }
62  }
63  };
64 
66  template <typename FloatOut, typename FloatIn, int Ns, int Nc>
67  struct RelBasis {
68  typedef typename mapper<FloatIn>::type RegTypeIn;
70  __device__ __host__ inline void operator()(RegTypeOut out[Ns*Nc*2], const RegTypeIn in[Ns*Nc*2]) {
71  int s1[4] = {1, 2, 3, 0};
72  int s2[4] = {3, 0, 1, 2};
73  RegTypeOut K1[4] = {-kU, kU, kU, kU};
74  RegTypeOut K2[4] = {-kU, kU, -kU, -kU};
75  for (int s=0; s<Ns; s++) {
76  for (int c=0; c<Nc; c++) {
77  for (int z=0; z<2; z++) {
78  out[(s*Nc+c)*2+z] = K1[s]*in[(s1[s]*Nc+c)*2+z] + K2[s]*in[(s2[s]*Nc+c)*2+z];
79  }
80  }
81  }
82  }
83  };
84 
86  template <typename FloatOut, typename FloatIn, int Ns, int Nc>
88  typedef typename mapper<FloatIn>::type RegTypeIn;
90  __device__ __host__ inline void operator()(RegTypeOut out[Ns*Nc*2], const RegTypeIn in[Ns*Nc*2]) {
91  int s1[4] = {0, 1, 0, 1};
92  int s2[4] = {2, 3, 2, 3};
93  RegTypeOut K1[4] = {-kP, -kP, kP, kP};
94  RegTypeOut K2[4] = { kP, kP, kP, kP};
95  for (int s=0; s<Ns; s++) {
96  for (int c=0; c<Nc; c++) {
97  for (int z=0; z<2; z++) {
98  out[(s*Nc+c)*2+z] = K1[s]*in[(s1[s]*Nc+c)*2+z] + K2[s]*in[(s2[s]*Nc+c)*2+z];
99  }
100  }
101  }
102  }
103  };
104 
106  template <typename FloatOut, typename FloatIn, int Ns, int Nc>
110  __device__ __host__ inline void operator()(RegTypeOut out[Ns*Nc*2], const RegTypeIn in[Ns*Nc*2]) {
111  int s1[4] = {0, 1, 0, 1};
112  int s2[4] = {2, 3, 2, 3};
113  RegTypeOut K1[4] = {-kU, -kU, kU, kU};
114  RegTypeOut K2[4] = { kU, kU, kU, kU};
115  for (int s=0; s<Ns; s++) {
116  for (int c=0; c<Nc; c++) {
117  for (int z=0; z<2; z++) {
118  out[(s*Nc+c)*2+z] = K1[s]*in[(s1[s]*Nc+c)*2+z] + K2[s]*in[(s2[s]*Nc+c)*2+z];
119  }
120  }
121  }
122  }
123  };
124 
126  template <typename FloatOut, typename FloatIn, int Ns, int Nc, typename OutOrder, typename InOrder, typename Basis>
127  void packSpinor(OutOrder &outOrder, const InOrder &inOrder, Basis basis, int volume) {
128  typedef typename mapper<FloatIn>::type RegTypeIn;
129  typedef typename mapper<FloatOut>::type RegTypeOut;
130  for (int x=0; x<volume; x++) {
131  RegTypeIn in[Ns*Nc*2];
132  RegTypeOut out[Ns*Nc*2];
133  inOrder.load(in, x);
134  basis(out, in);
135  outOrder.save(out, x);
136  }
137  }
138 
140  template <typename FloatOut, typename FloatIn, int Ns, int Nc, typename OutOrder, typename InOrder, typename Basis>
141  __global__ void packSpinorKernel(OutOrder outOrder, const InOrder inOrder, Basis basis, int volume) {
142  typedef typename mapper<FloatIn>::type RegTypeIn;
143  typedef typename mapper<FloatOut>::type RegTypeOut;
144 
145  int x = blockIdx.x * blockDim.x + threadIdx.x;
146  RegTypeIn in[Ns*Nc*2];
147  RegTypeOut out[Ns*Nc*2];
148  inOrder.load(in, x);
149  // if (x >= volume) return; all load and save routines are index safe (needed for shared variants)
150  basis(out, in);
151  outOrder.save(out, x);
152  }
153 
154  template <typename FloatOut, typename FloatIn, int Ns, int Nc, typename OutOrder, typename InOrder, typename Basis>
155  class PackSpinor : Tunable {
156  const InOrder &in;
157  OutOrder &out;
158  Basis &basis;
159  const ColorSpinorField &meta; // this reference is for meta data only
160 
161  private:
162  unsigned int sharedBytesPerThread() const {
163  size_t regSize = sizeof(FloatOut) > sizeof(FloatIn) ? sizeof(FloatOut) : sizeof(FloatIn);
164  return Ns*Nc*2*regSize;
165  }
166 
167  // the minimum shared memory per block is (block+1) because we pad to avoid bank conflicts
168  unsigned int sharedBytesPerBlock(const TuneParam &param) const { return (param.block.x+1)*sharedBytesPerThread(); }
169  bool advanceSharedBytes(TuneParam &param) const { return false; } // Don't tune shared mem
170  bool tuneGridDim() const { return false; } // Don't tune the grid dimensions.
171  unsigned int minThreads() const { return meta.VolumeCB(); }
172  bool advanceBlockDim(TuneParam &param) const {
173  bool advance = Tunable::advanceBlockDim(param);
174  param.shared_bytes = sharedBytesPerThread() * (param.block.x+1); // FIXME: use sharedBytesPerBlock
175  return advance;
176  }
177 
178 
179  public:
180  PackSpinor(OutOrder &out, const InOrder &in, Basis &basis, const ColorSpinorField &meta)
181  : out(out), in(in), basis(basis), meta(meta) {
182  writeAuxString("out_stride=%d,in_stride=%d", out.stride, in.stride);
183  }
184  virtual ~PackSpinor() { ; }
185 
186  void apply(const cudaStream_t &stream) {
187  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
188  packSpinorKernel<FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder, Basis>
189  <<<tp.grid, tp.block, tp.shared_bytes, stream>>>
190  (out, in, basis, meta.VolumeCB());
191  }
192 
193  TuneKey tuneKey() const { return TuneKey(meta.VolString(), typeid(*this).name(), aux); }
194 
195  std::string paramString(const TuneParam &param) const { // Don't bother printing the grid dim.
196  std::stringstream ps;
197  ps << "block=(" << param.block.x << "," << param.block.y << "," << param.block.z << "), ";
198  ps << "shared=" << param.shared_bytes;
199  return ps.str();
200  }
201 
202  long long flops() const { return 0; }
203  long long bytes() const { return in.Bytes() + out.Bytes(); }
204  };
205 
206 
208  template <typename FloatOut, typename FloatIn, int Ns, int Nc, typename OutOrder, typename InOrder>
209  void genericCopyColorSpinor(OutOrder &outOrder, const InOrder &inOrder,
210  QudaGammaBasis dstBasis, QudaGammaBasis srcBasis,
212  if (dstBasis==srcBasis) {
214  if (location == QUDA_CPU_FIELD_LOCATION) {
215  packSpinor<FloatOut, FloatIn, Ns, Nc>(outOrder, inOrder, basis, out.VolumeCB());
216  } else {
218  pack(outOrder, inOrder, basis, out);
219  pack.apply(0);
220  }
221  } else if (dstBasis == QUDA_UKQCD_GAMMA_BASIS && srcBasis == QUDA_DEGRAND_ROSSI_GAMMA_BASIS) {
222  if (Ns != 4) errorQuda("Can only change basis with Nspin = 4, not Nspin = %d", Ns);
224  if (location == QUDA_CPU_FIELD_LOCATION) {
225  packSpinor<FloatOut, FloatIn, Ns, Nc>(outOrder, inOrder, basis, out.VolumeCB());
226  } else {
228  pack(outOrder, inOrder, basis, out);
229  pack.apply(0);
230  }
231  } else if (srcBasis == QUDA_UKQCD_GAMMA_BASIS && dstBasis == QUDA_DEGRAND_ROSSI_GAMMA_BASIS) {
232  if (Ns != 4) errorQuda("Can only change basis with Nspin = 4, not Nspin = %d", Ns);
234  if (location == QUDA_CPU_FIELD_LOCATION) {
235  packSpinor<FloatOut, FloatIn, Ns, Nc>(outOrder, inOrder, basis, out.VolumeCB());
236  } else {
238  pack(outOrder, inOrder, basis, out);
239  pack.apply(0);
240  }
241  } else if (dstBasis == QUDA_UKQCD_GAMMA_BASIS && srcBasis == QUDA_CHIRAL_GAMMA_BASIS) {
242  if (Ns != 4) errorQuda("Can only change basis with Nspin = 4, not Nspin = %d", Ns);
244  if (location == QUDA_CPU_FIELD_LOCATION) {
245  packSpinor<FloatOut, FloatIn, Ns, Nc>(outOrder, inOrder, basis, out.VolumeCB());
246  } else {
248  pack(outOrder, inOrder, basis, out);
249  pack.apply(0);
250  }
251  } else if (srcBasis == QUDA_UKQCD_GAMMA_BASIS && dstBasis == QUDA_CHIRAL_GAMMA_BASIS) {
252  if (Ns != 4) errorQuda("Can only change basis with Nspin = 4, not Nspin = %d", Ns);
254  if (location == QUDA_CPU_FIELD_LOCATION) {
255  packSpinor<FloatOut, FloatIn, Ns, Nc>(outOrder, inOrder, basis, out.VolumeCB());
256  } else {
258  pack(outOrder, inOrder, basis, out);
259  pack.apply(0);
260  }
261  } else {
262  errorQuda("Basis change not supported");
263  }
264  }
265 
267  template <typename FloatOut, typename FloatIn, int Ns, int Nc, typename InOrder>
268  void genericCopyColorSpinor(InOrder &inOrder, ColorSpinorField &out,
270  FloatOut *Out, float *outNorm) {
271  if (out.FieldOrder() == QUDA_FLOAT4_FIELD_ORDER) {
272  FloatNOrder<FloatOut, Ns, Nc, 4> outOrder(out, Out, outNorm);
273  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>
274  (outOrder, inOrder, out.GammaBasis(), inBasis, out, location);
275  } else if (out.FieldOrder() == QUDA_FLOAT2_FIELD_ORDER) {
276  FloatNOrder<FloatOut, Ns, Nc, 2> outOrder(out, Out, outNorm);
277  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>
278  (outOrder, inOrder, out.GammaBasis(), inBasis, out, location);
279  } else if (out.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) {
280  SpaceSpinorColorOrder<FloatOut, Ns, Nc> outOrder(out, Out);
281  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>
282  (outOrder, inOrder, out.GammaBasis(), inBasis, out, location);
283  } else if (out.FieldOrder() == QUDA_SPACE_COLOR_SPIN_FIELD_ORDER) {
284  SpaceColorSpinorOrder<FloatOut, Ns, Nc> outOrder(out, Out);
285  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>
286  (outOrder, inOrder, out.GammaBasis(), inBasis, out, location);
287  } else if (out.FieldOrder() == QUDA_QDPJIT_FIELD_ORDER) {
288 
289 #ifdef BUILD_QDPJIT_INTERFACE
290  QDPJITDiracOrder<FloatOut, Ns, Nc> outOrder(out, Out);
291  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>
292  (outOrder, inOrder, out.GammaBasis(), inBasis, out, location);
293 #else
294  errorQuda("QDPJIT interface has not been built\n");
295 #endif
296 
297  } else {
298  errorQuda("Order not defined");
299  }
300 
301  }
302 
304  template <typename FloatOut, typename FloatIn, int Ns, int Nc>
306  QudaFieldLocation location, FloatOut *Out, FloatIn *In,
307  float *outNorm, float *inNorm) {
308  if (in.FieldOrder() == QUDA_FLOAT4_FIELD_ORDER) {
309  FloatNOrder<FloatIn, Ns, Nc, 4> inOrder(in, In, inNorm);
310  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>(inOrder, out, in.GammaBasis(), location, Out, outNorm);
311  } else if (in.FieldOrder() == QUDA_FLOAT2_FIELD_ORDER) {
312  FloatNOrder<FloatIn, Ns, Nc, 2> inOrder(in, In, inNorm);
313  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>(inOrder, out, in.GammaBasis(), location, Out, outNorm);
314  } else if (in.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) {
316  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>(inOrder, out, in.GammaBasis(), location, Out, outNorm);
317  } else if (in.FieldOrder() == QUDA_SPACE_COLOR_SPIN_FIELD_ORDER) {
319  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>(inOrder, out, in.GammaBasis(), location, Out, outNorm);
320  } else if (in.FieldOrder() == QUDA_QDPJIT_FIELD_ORDER) {
321 
322 #ifdef BUILD_QDPJIT_INTERFACE
323  QDPJITDiracOrder<FloatIn, Ns, Nc> inOrder(in, In);
324  genericCopyColorSpinor<FloatOut,FloatIn,Ns,Nc>(inOrder, out, in.GammaBasis(), location, Out, outNorm);
325 #else
326  errorQuda("QDPJIT interface has not been built\n");
327 #endif
328 
329  } else {
330  errorQuda("Order not defined");
331  }
332 
333  }
334 
335 
336  template <int Ns, typename dstFloat, typename srcFloat>
338  QudaFieldLocation location, dstFloat *Dst, srcFloat *Src,
339  float *dstNorm, float *srcNorm) {
340 
341  if (dst.Ndim() != src.Ndim())
342  errorQuda("Number of dimensions %d %d don't match", dst.Ndim(), src.Ndim());
343 
344  if (dst.Volume() != src.Volume())
345  errorQuda("Volumes %d %d don't match", dst.Volume(), src.Volume());
346 
347  if (!( dst.SiteOrder() == src.SiteOrder() ||
348  (dst.SiteOrder() == QUDA_EVEN_ODD_SITE_ORDER &&
350  (dst.SiteOrder() == QUDA_ODD_EVEN_SITE_ORDER &&
351  src.SiteOrder() == QUDA_EVEN_ODD_SITE_ORDER) ) ) {
352  errorQuda("Subset orders %d %d don't match", dst.SiteOrder(), src.SiteOrder());
353  }
354 
355  if (dst.SiteSubset() != src.SiteSubset())
356  errorQuda("Subset types do not match %d %d", dst.SiteSubset(), src.SiteSubset());
357 
358  if (dst.Ncolor() != 3 || src.Ncolor() != 3) errorQuda("Nc != 3 not yet supported");
359 
360  const int Nc = 3;
361 
362  // We currently only support parity-ordered fields; even-odd or odd-even
364  errorQuda("Copying to full fields with lexicographical ordering is not currently supported");
365  }
366 
367  if (dst.SiteSubset() == QUDA_FULL_SITE_SUBSET) { // full field
368  if (src.FieldOrder() == QUDA_QDPJIT_FIELD_ORDER ||
370  errorQuda("QDPJIT field ordering not supported for full site fields");
371  }
372 
373  // set for the source subset ordering
374  srcFloat *srcEven = Src ? Src : (srcFloat*)src.V();
375  srcFloat *srcOdd = (srcFloat*)((char*)srcEven + src.Bytes()/2);
376  float *srcNormEven = srcNorm ? srcNorm : (float*)src.Norm();
377  float *srcNormOdd = (float*)((char*)srcNormEven + src.NormBytes()/2);
378  if (src.SiteOrder() == QUDA_ODD_EVEN_SITE_ORDER) {
379  std::swap<srcFloat*>(srcEven, srcOdd);
380  std::swap<float*>(srcNormEven, srcNormOdd);
381  }
382 
383  // set for the destination subset ordering
384  dstFloat *dstEven = Dst ? Dst : (dstFloat*)dst.V();
385  dstFloat *dstOdd = (dstFloat*)((char*)dstEven + dst.Bytes()/2);
386  float *dstNormEven = dstNorm ? dstNorm : (float*)dst.Norm();
387  float *dstNormOdd = (float*)((char*)dstNormEven + dst.NormBytes()/2);
388  if (dst.SiteOrder() == QUDA_ODD_EVEN_SITE_ORDER) {
389  std::swap<dstFloat*>(dstEven, dstOdd);
390  std::swap<float*>(dstNormEven, dstNormOdd);
391  }
392 
393  genericCopyColorSpinor<dstFloat, srcFloat, Ns, Nc>
394  (dst, src, location, dstEven, srcEven, dstNormEven, srcNormEven);
395  genericCopyColorSpinor<dstFloat, srcFloat, Ns, Nc>
396  (dst, src, location, dstOdd, srcOdd, dstNormOdd, srcNormOdd);
397  } else { // parity field
398  genericCopyColorSpinor<dstFloat, srcFloat, Ns, Nc>
399  (dst, src, location, Dst, Src, dstNorm, srcNorm);
400  }
401 
402  }
403 
404  template <typename dstFloat, typename srcFloat>
406  QudaFieldLocation location, dstFloat *Dst, srcFloat *Src,
407  float *dstNorm=0, float *srcNorm=0) {
408 
409  if (dst.Nspin() != src.Nspin())
410  errorQuda("source and destination spins must match");
411 
412  if (dst.Nspin() == 4) {
413  copyGenericColorSpinor<4>(dst, src, location, Dst, Src, dstNorm, srcNorm);
414  } else if (dst.Nspin() == 1) {
415  copyGenericColorSpinor<1>(dst, src, location, Dst, Src, dstNorm, srcNorm);
416  } else {
417  errorQuda("Nspin=%d unsupported", dst.Nspin());
418  }
419 
420  }
421 
423  QudaFieldLocation location, void *Dst, void *Src,
424  void *dstNorm, void *srcNorm) {
425 
426  if (dst.Precision() == QUDA_DOUBLE_PRECISION) {
427  if (src.Precision() == QUDA_DOUBLE_PRECISION) {
428  CopyGenericColorSpinor(dst, src, location, (double*)Dst, (double*)Src);
429  } else if (src.Precision() == QUDA_SINGLE_PRECISION) {
430  CopyGenericColorSpinor(dst, src, location, (double*)Dst, (float*)Src);
431  } else if (src.Precision() == QUDA_HALF_PRECISION) {
432  CopyGenericColorSpinor(dst, src, location, (double*)Dst, (short*)Src, 0, (float*)srcNorm);
433  } else {
434  errorQuda("Unsupported Precision %d", src.Precision());
435  }
436  } else if (dst.Precision() == QUDA_SINGLE_PRECISION) {
437  if (src.Precision() == QUDA_DOUBLE_PRECISION) {
438  CopyGenericColorSpinor(dst, src, location, (float*)Dst, (double*)Src);
439  } else if (src.Precision() == QUDA_SINGLE_PRECISION) {
440  CopyGenericColorSpinor(dst, src, location, (float*)Dst, (float*)Src);
441  } else if (src.Precision() == QUDA_HALF_PRECISION) {
442  CopyGenericColorSpinor(dst, src, location, (float*)Dst, (short*)Src, 0, (float*)srcNorm);
443  } else {
444  errorQuda("Unsupported Precision %d", src.Precision());
445  }
446  } else if (dst.Precision() == QUDA_HALF_PRECISION) {
447  if (src.Precision() == QUDA_DOUBLE_PRECISION) {
448  CopyGenericColorSpinor(dst, src, location, (short*)Dst, (double*)Src, (float*)dstNorm, 0);
449  } else if (src.Precision() == QUDA_SINGLE_PRECISION) {
450  CopyGenericColorSpinor(dst, src, location, (short*)Dst, (float*)Src, (float*)dstNorm, 0);
451  } else if (src.Precision() == QUDA_HALF_PRECISION) {
452  CopyGenericColorSpinor(dst, src, location, (short*)Dst, (short*)Src, (float*)dstNorm, (float*)srcNorm);
453  } else {
454  errorQuda("Unsupported Precision %d", src.Precision());
455  }
456  } else {
457  errorQuda("Unsupported Precision %d", dst.Precision());
458  }
459  }
460 
461 } // namespace quda
std::string paramString(const TuneParam &param) const
__device__ __host__ void operator()(RegTypeOut out[Ns *Nc *2], const RegTypeIn in[Ns *Nc *2])
mapper< FloatOut >::type RegTypeOut
#define kP
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:20
mapper< FloatOut >::type RegTypeOut
#define errorQuda(...)
Definition: util_quda.h:73
void CopyGenericColorSpinor(ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat *Dst, srcFloat *Src, float *dstNorm=0, float *srcNorm=0)
mapper< FloatIn >::type RegTypeIn
cudaStream_t * stream
::std::string string
Definition: gtest.h:1979
void copyGenericColorSpinor(ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, void *Dst=0, void *Src=0, void *dstNorm=0, void *srcNorm=0)
QudaGaugeParam param
Definition: pack_test.cpp:17
TuneKey tuneKey() const
__device__ __host__ void operator()(RegTypeOut out[Ns *Nc *2], const RegTypeIn in[Ns *Nc *2])
void writeAuxString(const char *format,...)
Definition: tune_quda.h:138
void apply(const cudaStream_t &stream)
const QudaFieldLocation location
Definition: pack_test.cpp:46
__global__ void packSpinorKernel(OutOrder outOrder, const InOrder inOrder, Basis basis, int volume)
cpuColorSpinorField * in
virtual bool advanceBlockDim(TuneParam &param) const
Definition: tune_quda.h:74
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Definition: tune.cpp:271
long long bytes() const
long long flops() const
const char * VolString() const
__device__ __host__ void operator()(RegTypeOut out[Ns *Nc *2], const RegTypeIn in[Ns *Nc *2])
QudaSiteOrder SiteOrder() const
int x[4]
mapper< FloatOut >::type RegTypeOut
mapper< FloatIn >::type RegTypeIn
QudaFieldOrder FieldOrder() const
__device__ __host__ void operator()(RegTypeOut out[Ns *Nc *2], const RegTypeIn in[Ns *Nc *2])
enum QudaFieldLocation_s QudaFieldLocation
mapper< FloatIn >::type RegTypeIn
cpuColorSpinorField * out
PackSpinor(OutOrder &out, const InOrder &in, Basis &basis, const ColorSpinorField &meta)
mapper< FloatIn >::type RegTypeIn
enum QudaGammaBasis_s QudaGammaBasis
mapper< FloatOut >::type RegTypeOut
QudaPrecision Precision() const
#define kU
QudaGammaBasis GammaBasis() const
void genericCopyColorSpinor(OutOrder &outOrder, const InOrder &inOrder, QudaGammaBasis dstBasis, QudaGammaBasis srcBasis, const ColorSpinorField &out, QudaFieldLocation location)
QudaTune getTuning()
Definition: util_quda.cpp:32
VOLATILE spinorFloat * s
QudaSiteSubset SiteSubset() const
char aux[TuneKey::aux_n]
Definition: tune_quda.h:136
void packSpinor(OutOrder &outOrder, const InOrder &inOrder, Basis basis, int volume)
__device__ __host__ void operator()(RegTypeOut out[Ns *Nc *2], const RegTypeIn in[Ns *Nc *2])