QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
copy_gauge_inc.cu
Go to the documentation of this file.
1 #include <gauge_field_order.h>
2 
3 namespace quda {
4 
8  template <typename OutOrder, typename InOrder>
9  struct CopyGaugeArg {
10  OutOrder out;
11  const InOrder in;
12  int volume;
14  int nDim;
15  int geometry;
16  CopyGaugeArg(const OutOrder &out, const InOrder &in, int volume,
17  const int *faceVolumeCB, int nDim, int geometry)
18  : out(out), in(in), volume(volume), nDim(nDim), geometry(geometry) {
19  for (int d=0; d<nDim; d++) this->faceVolumeCB[d] = faceVolumeCB[d];
20  }
21  };
22 
26  template <typename FloatOut, typename FloatIn, int length, typename OutOrder, typename InOrder>
28  typedef typename mapper<FloatIn>::type RegTypeIn;
29  typedef typename mapper<FloatOut>::type RegTypeOut;
30 
31  for (int parity=0; parity<2; parity++) {
32 
33  for (int d=0; d<arg.geometry; d++) {
34  for (int x=0; x<arg.volume/2; x++) {
35  RegTypeIn in[length];
36  RegTypeOut out[length];
37  arg.in.load(in, x, d, parity);
38  for (int i=0; i<length; i++) out[i] = in[i];
39  arg.out.save(out, x, d, parity);
40  }
41  }
42 
43  }
44  }
45 
50  template <typename FloatOut, typename FloatIn, int length, typename OutOrder, typename InOrder>
52  typedef typename mapper<FloatIn>::type RegTypeIn;
53  typedef typename mapper<FloatOut>::type RegTypeOut;
54 
55  for (int parity=0; parity<2; parity++) {
56 
57  for (int d=0; d<arg.geometry; d++) {
58  int x = blockIdx.x * blockDim.x + threadIdx.x;
59  if (x >= arg.volume/2) return;
60 
61  RegTypeIn in[length];
62  RegTypeOut out[length];
63  arg.in.load(in, x, d, parity);
64  for (int i=0; i<length; i++) out[i] = in[i];
65  arg.out.save(out, x, d, parity);
66  }
67  }
68  }
69 
73  template <typename FloatOut, typename FloatIn, int length, typename OutOrder, typename InOrder>
75  typedef typename mapper<FloatIn>::type RegTypeIn;
76  typedef typename mapper<FloatOut>::type RegTypeOut;
77 
78  for (int parity=0; parity<2; parity++) {
79 
80  for (int d=0; d<arg.nDim; d++) {
81  for (int x=0; x<arg.faceVolumeCB[d]; x++) {
82  RegTypeIn in[length];
83  RegTypeOut out[length];
84  arg.in.loadGhost(in, x, d, parity); // assumes we are loading
85  for (int i=0; i<length; i++) out[i] = in[i];
86  arg.out.saveGhost(out, x, d, parity);
87  }
88  }
89 
90  }
91  }
92 
97  template <typename FloatOut, typename FloatIn, int length, typename OutOrder, typename InOrder>
99  typedef typename mapper<FloatIn>::type RegTypeIn;
100  typedef typename mapper<FloatOut>::type RegTypeOut;
101 
102  int x = blockIdx.x * blockDim.x + threadIdx.x;
103 
104  for (int parity=0; parity<2; parity++) {
105  for (int d=0; d<arg.nDim; d++) {
106  if (x < arg.faceVolumeCB[d]) {
107  RegTypeIn in[length];
108  RegTypeOut out[length];
109  arg.in.loadGhost(in, x, d, parity); // assumes we are loading
110  for (int i=0; i<length; i++) out[i] = in[i];
111  arg.out.saveGhost(out, x, d, parity);
112  }
113  }
114 
115  }
116  }
117 
118  template <typename FloatOut, typename FloatIn, int length, typename OutOrder, typename InOrder, bool isGhost>
119  class CopyGauge : Tunable {
121  int size;
122  const GaugeField &meta;
123 
124  private:
125  unsigned int sharedBytesPerThread() const { return 0; }
126  unsigned int sharedBytesPerBlock(const TuneParam &param) const { return 0 ;}
127 
128  bool tuneGridDim() const { return false; } // Don't tune the grid dimensions.
129  unsigned int minThreads() const { return size; }
130 
131  public:
132  CopyGauge(CopyGaugeArg<OutOrder,InOrder> &arg, const GaugeField &meta) : arg(arg), meta(meta) {
133  int faceMax = 0;
134  for (int d=0; d<arg.nDim; d++) {
135  faceMax = (arg.faceVolumeCB[d] > faceMax ) ? arg.faceVolumeCB[d] : faceMax;
136  }
137  size = isGhost ? faceMax : arg.volume/2;
138  writeAuxString("out_stride=%d,in_stride=%d", arg.out.stride, arg.in.stride);
139  }
140 
141  virtual ~CopyGauge() { ; }
142 
143  void apply(const cudaStream_t &stream) {
144  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
145 #if (__COMPUTE_CAPABILITY__ >= 200)
146  if (!isGhost) {
147  copyGaugeKernel<FloatOut, FloatIn, length, OutOrder, InOrder>
148  <<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg);
149  } else {
150  copyGhostKernel<FloatOut, FloatIn, length, OutOrder, InOrder>
151  <<<tp.grid, tp.block, tp.shared_bytes, stream>>>(arg);
152  }
153 #else
154  errorQuda("Gauge copy not supported on pre-Fermi architecture");
155 #endif
156  }
157 
158  TuneKey tuneKey() const { return TuneKey(meta.VolString(), typeid(*this).name(), aux); }
159 
160  std::string paramString(const TuneParam &param) const { // Don't bother printing the grid dim.
161  std::stringstream ps;
162  ps << "block=(" << param.block.x << "," << param.block.y << "," << param.block.z << "), ";
163  ps << "shared=" << param.shared_bytes;
164  return ps.str();
165  }
166 
167  long long flops() const { return 0; }
168  long long bytes() const {
169  int sites = 4*arg.volume/2;
170  if (isGhost) {
171  sites = 0;
172  for (int d=0; d<4; d++) sites += arg.faceVolumeCB[d];
173  }
174 #if __COMPUTE_CAPABILITY__ >= 200
175  return 2 * sites * ( arg.in.Bytes() + arg.in.hasPhase*sizeof(FloatIn)
176  + arg.out.Bytes() + arg.out.hasPhase*sizeof(FloatOut) );
177 #else
178  return 2 * sites * ( arg.in.Bytes() + arg.out.Bytes() );
179 #endif
180  }
181  };
182 
183 
184  template <typename FloatOut, typename FloatIn, int length, typename OutOrder, typename InOrder>
185  void copyGauge(OutOrder outOrder, const InOrder inOrder, int volume, const int *faceVolumeCB,
186  int nDim, int geometry, const GaugeField &out, QudaFieldLocation location, int type) {
187 
188  CopyGaugeArg<OutOrder,InOrder> arg(outOrder, inOrder, volume, faceVolumeCB, nDim, geometry);
189 
190  if (location == QUDA_CPU_FIELD_LOCATION) {
191  if (type == 0 || type == 2) {
192  copyGauge<FloatOut, FloatIn, length>(arg);
193  }
194 #ifdef MULTI_GPU // only copy the ghost zone if doing multi-gpu
195  if (type == 0 || type == 1) {
196  if (geometry == QUDA_VECTOR_GEOMETRY) copyGhost<FloatOut, FloatIn, length>(arg);
197  //else warningQuda("Cannot copy for %d geometry gauge field", geometry);
198  }
199 #endif
200  } else if (location == QUDA_CUDA_FIELD_LOCATION) {
201  // first copy body
202  if (type == 0 || type == 2) {
204  gaugeCopier.apply(0);
205  }
206 #ifdef MULTI_GPU
207  if (type == 0 || type == 1) {
208  if (geometry == QUDA_VECTOR_GEOMETRY) {
209  // now copy ghost
211  ghostCopier.apply(0);
212  } else {
213  //warningQuda("Cannot copy for %d geometry gauge field", geometry);
214  }
215  }
216 #endif
217  } else {
218  errorQuda("Undefined field location %d for copyGauge", location);
219  }
220 
221  }
222 
223  template <typename FloatOut, typename FloatIn, int length, typename InOrder>
224  void copyGauge(const InOrder &inOrder, GaugeField &out, QudaFieldLocation location,
225  FloatOut *Out, FloatOut **outGhost, int type) {
226  int faceVolumeCB[QUDA_MAX_DIM];
227  for (int i=0; i<4; i++) faceVolumeCB[i] = out.SurfaceCB(i) * out.Nface();
228  if (out.Order() == QUDA_FLOAT2_GAUGE_ORDER) {
229  if (out.Reconstruct() == QUDA_RECONSTRUCT_NO) {
230  if (typeid(FloatOut)==typeid(short) && out.LinkType() == QUDA_ASQTAD_FAT_LINKS) {
231  copyGauge<FloatOut,FloatIn,length>
232  (FloatNOrder<FloatOut,length,2,19>(out, Out, outGhost), inOrder, out.Volume(),
233  faceVolumeCB, out.Ndim(), out.Geometry(), out, location, type);
234  } else {
235  copyGauge<FloatOut,FloatIn,length>
236  (FloatNOrder<FloatOut,length,2,18>(out, Out, outGhost), inOrder, out.Volume(),
237  faceVolumeCB, out.Ndim(), out.Geometry(), out, location, type);
238  }
239  } else if (out.Reconstruct() == QUDA_RECONSTRUCT_12) {
240  copyGauge<FloatOut,FloatIn,length>
241  (FloatNOrder<FloatOut,length,2,12>(out, Out, outGhost), inOrder, out.Volume(),
242  faceVolumeCB, out.Ndim(), out.Geometry(), out, location, type);
243  } else if (out.Reconstruct() == QUDA_RECONSTRUCT_8) {
244  copyGauge<FloatOut,FloatIn,length>
245  (FloatNOrder<FloatOut,length,2,8>(out, Out, outGhost), inOrder, out.Volume(),
246  faceVolumeCB, out.Ndim(), out.Geometry(), out, location, type);
247 #if defined(GPU_STAGGERED_DIRAC) && __COMPUTE_CAPABILITY__ >= 200
248  } else if (out.Reconstruct() == QUDA_RECONSTRUCT_13) {
249  copyGauge<FloatOut,FloatIn,length>
250  (FloatNOrder<FloatOut,length,2,13>(out, Out, outGhost), inOrder, out.Volume(),
251  faceVolumeCB, out.Ndim(), out.Geometry(), out, location, type);
252  } else if (out.Reconstruct() == QUDA_RECONSTRUCT_9) {
253  copyGauge<FloatOut,FloatIn,length>
254  (FloatNOrder<FloatOut,length,2,9>(out, Out, outGhost), inOrder, out.Volume(),
255  faceVolumeCB, out.Ndim(), out.Geometry(), out, location, type);
256 #endif
257  } else {
258  errorQuda("Reconstruction %d and order %d not supported", out.Reconstruct(), out.Order());
259  }
260  } else if (out.Order() == QUDA_FLOAT4_GAUGE_ORDER) {
261  if (out.Reconstruct() == QUDA_RECONSTRUCT_12) {
262  copyGauge<FloatOut,FloatIn,length>
263  (FloatNOrder<FloatOut,length,4,12>(out, Out, outGhost), inOrder, out.Volume(),
264  faceVolumeCB, out.Ndim(), out.Geometry(), out, location, type);
265  } else if (out.Reconstruct() == QUDA_RECONSTRUCT_8) {
266  copyGauge<FloatOut,FloatIn,length>
267  (FloatNOrder<FloatOut,length,4,8>(out, Out, outGhost), inOrder, out.Volume(),
268  faceVolumeCB, out.Ndim(), out.Geometry(), out, location, type);
269 #if defined(GPU_STAGGERED_DIRAC) && __COMPUTE_CAPABILITY__ >= 200
270  } else if (out.Reconstruct() == QUDA_RECONSTRUCT_13) {
271  copyGauge<FloatOut,FloatIn,length>
272  (FloatNOrder<FloatOut,length,4,13>(out, Out, outGhost), inOrder, out.Volume(),
273  faceVolumeCB, out.Ndim(), out.Geometry(), out, location, type);
274  } else if (out.Reconstruct() == QUDA_RECONSTRUCT_9) {
275  copyGauge<FloatOut,FloatIn,length>
276  (FloatNOrder<FloatOut,length,4,9>(out, Out, outGhost), inOrder, out.Volume(),
277  faceVolumeCB, out.Ndim(), out.Geometry(), out, location, type);
278 #endif
279  } else {
280  errorQuda("Reconstruction %d and order %d not supported", out.Reconstruct(), out.Order());
281  }
282  } else if (out.Order() == QUDA_QDP_GAUGE_ORDER) {
283 
284 #ifdef BUILD_QDP_INTERFACE
285  copyGauge<FloatOut,FloatIn,length>
286  (QDPOrder<FloatOut,length>(out, Out, outGhost), inOrder, out.Volume(),
287  faceVolumeCB, out.Ndim(), out.Geometry(), out, location, type);
288 #else
289  errorQuda("QDP interface has not been built\n");
290 #endif
291 
292  } else if (out.Order() == QUDA_QDPJIT_GAUGE_ORDER) {
293 
294 #ifdef BUILD_QDPJIT_INTERFACE
295  copyGauge<FloatOut,FloatIn,length>
296  (QDPJITOrder<FloatOut,length>(out, Out, outGhost), inOrder, out.Volume(),
297  faceVolumeCB, out.Ndim(), out.Geometry(), out, location, type);
298 #else
299  errorQuda("QDPJIT interface has not been built\n");
300 #endif
301 
302  } else if (out.Order() == QUDA_CPS_WILSON_GAUGE_ORDER) {
303 
304 #ifdef BUILD_CPS_INTERFACE
305  copyGauge<FloatOut,FloatIn,length>
306  (CPSOrder<FloatOut,length>(out, Out, outGhost), inOrder, out.Volume(),
307  faceVolumeCB, out.Ndim(), out.Geometry(), out, location, type);
308 #else
309  errorQuda("CPS interface has not been built\n");
310 #endif
311 
312  } else if (out.Order() == QUDA_MILC_GAUGE_ORDER) {
313 
314 #ifdef BUILD_MILC_INTERFACE
315  copyGauge<FloatOut,FloatIn,length>
316  (MILCOrder<FloatOut,length>(out, Out, outGhost), inOrder, out.Volume(),
317  faceVolumeCB, out.Ndim(), out.Geometry(), out, location, type);
318 #else
319  errorQuda("MILC interface has not been built\n");
320 #endif
321 
322  } else if (out.Order() == QUDA_BQCD_GAUGE_ORDER) {
323 
324 #ifdef BUILD_BQCD_INTERFACE
325  copyGauge<FloatOut,FloatIn,length>
326  (BQCDOrder<FloatOut,length>(out, Out, outGhost), inOrder, out.Volume(),
327  faceVolumeCB, out.Ndim(), out.Geometry(), out, location, type);
328 #else
329  errorQuda("BQCD interface has not been built\n");
330 #endif
331 
332  } else if (out.Order() == QUDA_TIFR_GAUGE_ORDER) {
333 
334 #ifdef BUILD_TIFR_INTERFACE
335  copyGauge<FloatOut,FloatIn,length>
336  (TIFROrder<FloatOut,length>(out, Out, outGhost), inOrder, out.Volume(),
337  faceVolumeCB, out.Ndim(), out.Geometry(), out, location, type);
338 #else
339  errorQuda("TIFR interface has not been built\n");
340 #endif
341 
342  } else {
343  errorQuda("Gauge field %d order not supported", out.Order());
344  }
345 
346  }
347 
348  template <typename FloatOut, typename FloatIn, int length>
350  FloatOut *Out, FloatIn *In, FloatOut **outGhost, FloatIn **inGhost, int type) {
351 
352  // reconstruction only supported on FloatN fields currently
353  if (in.Order() == QUDA_FLOAT2_GAUGE_ORDER) {
354  if (in.Reconstruct() == QUDA_RECONSTRUCT_NO) {
355  if (typeid(FloatIn)==typeid(short) && in.LinkType() == QUDA_ASQTAD_FAT_LINKS) {
356  copyGauge<FloatOut,FloatIn,length> (FloatNOrder<FloatIn,length,2,19>(in, In, inGhost),
357  out, location, Out, outGhost, type);
358  } else {
359  copyGauge<FloatOut,FloatIn,length> (FloatNOrder<FloatIn,length,2,18>(in, In, inGhost),
360  out, location, Out, outGhost, type);
361  }
362  } else if (in.Reconstruct() == QUDA_RECONSTRUCT_12) {
363  copyGauge<FloatOut,FloatIn,length> (FloatNOrder<FloatIn,length,2,12>(in, In, inGhost),
364  out, location, Out, outGhost, type);
365  } else if (in.Reconstruct() == QUDA_RECONSTRUCT_8) {
366  copyGauge<FloatOut,FloatIn,length> (FloatNOrder<FloatIn,length,2,8>(in, In, inGhost),
367  out, location, Out, outGhost, type);
368 #if defined(GPU_STAGGERED_DIRAC) && __COMPUTE_CAPABILITY__ >= 200
369  } else if (in.Reconstruct() == QUDA_RECONSTRUCT_13) {
370  copyGauge<FloatOut,FloatIn,length> (FloatNOrder<FloatIn,length,2,13>(in, In, inGhost),
371  out, location, Out, outGhost, type);
372  } else if (in.Reconstruct() == QUDA_RECONSTRUCT_9) {
373  copyGauge<FloatOut,FloatIn,length> (FloatNOrder<FloatIn,length,2,9>(in, In, inGhost),
374  out, location, Out, outGhost, type);
375 #endif
376  } else {
377  errorQuda("Reconstruction %d and order %d not supported", in.Reconstruct(), in.Order());
378  }
379  } else if (in.Order() == QUDA_FLOAT4_GAUGE_ORDER) {
380  if (in.Reconstruct() == QUDA_RECONSTRUCT_12) {
381  copyGauge<FloatOut,FloatIn,length> (FloatNOrder<FloatIn,length,4,12>(in, In, inGhost),
382  out, location, Out, outGhost, type);
383  } else if (in.Reconstruct() == QUDA_RECONSTRUCT_8) {
384  copyGauge<FloatOut,FloatIn,length> (FloatNOrder<FloatIn,length,4,8>(in, In, inGhost),
385  out, location, Out, outGhost, type);
386 #if defined(GPU_STAGGERED_DIRAC) && __COMPUTE_CAPABILITY__ >= 200
387  } else if (in.Reconstruct() == QUDA_RECONSTRUCT_13) {
388  copyGauge<FloatOut,FloatIn,length> (FloatNOrder<FloatIn,length,4,13>(in, In, inGhost),
389  out, location, Out, outGhost, type);
390  } else if (in.Reconstruct() == QUDA_RECONSTRUCT_9) {
391  copyGauge<FloatOut,FloatIn,length> (FloatNOrder<FloatIn,length,4,9>(in, In, inGhost),
392  out, location, Out, outGhost, type);
393 #endif
394  } else {
395  errorQuda("Reconstruction %d and order %d not supported", in.Reconstruct(), in.Order());
396  }
397  } else if (in.Order() == QUDA_QDP_GAUGE_ORDER) {
398 
399 #ifdef BUILD_QDP_INTERFACE
400  copyGauge<FloatOut,FloatIn,length>(QDPOrder<FloatIn,length>(in, In, inGhost),
401  out, location, Out, outGhost, type);
402 #else
403  errorQuda("QDP interface has not been built\n");
404 #endif
405 
406  } else if (in.Order() == QUDA_QDPJIT_GAUGE_ORDER) {
407 
408 #ifdef BUILD_QDPJIT_INTERFACE
409  copyGauge<FloatOut,FloatIn,length>(QDPJITOrder<FloatIn,length>(in, In, inGhost),
410  out, location, Out, outGhost, type);
411 #else
412  errorQuda("QDPJIT interface has not been built\n");
413 #endif
414 
415  } else if (in.Order() == QUDA_CPS_WILSON_GAUGE_ORDER) {
416 
417 #ifdef BUILD_CPS_INTERFACE
418  copyGauge<FloatOut,FloatIn,length>(CPSOrder<FloatIn,length>(in, In, inGhost),
419  out, location, Out, outGhost, type);
420 #else
421  errorQuda("CPS interface has not been built\n");
422 #endif
423 
424  } else if (in.Order() == QUDA_MILC_GAUGE_ORDER) {
425 
426 #ifdef BUILD_MILC_INTERFACE
427  copyGauge<FloatOut,FloatIn,length>(MILCOrder<FloatIn,length>(in, In, inGhost),
428  out, location, Out, outGhost, type);
429 #else
430  errorQuda("MILC interface has not been built\n");
431 #endif
432 
433  } else if (in.Order() == QUDA_BQCD_GAUGE_ORDER) {
434 
435 #ifdef BUILD_BQCD_INTERFACE
436  copyGauge<FloatOut,FloatIn,length>(BQCDOrder<FloatIn,length>(in, In, inGhost),
437  out, location, Out, outGhost, type);
438 #else
439  errorQuda("BQCD interface has not been built\n");
440 #endif
441 
442  } else if (in.Order() == QUDA_TIFR_GAUGE_ORDER) {
443 
444 #ifdef BUILD_TIFR_INTERFACE
445  copyGauge<FloatOut,FloatIn,length>(TIFROrder<FloatIn,length>(in, In, inGhost),
446  out, location, Out, outGhost, type);
447 #else
448  errorQuda("TIFR interface has not been built\n");
449 #endif
450 
451  } else {
452  errorQuda("Gauge field %d order not supported", in.Order());
453  }
454 
455  }
456 
457  void checkMomOrder(const GaugeField &u);
458 
459  template <typename FloatOut, typename FloatIn>
461  FloatIn *In, FloatOut **outGhost, FloatIn **inGhost, int type) {
462 
463  if (in.Ncolor() != 3 && out.Ncolor() != 3) {
464  errorQuda("Unsupported number of colors; out.Nc=%d, in.Nc=%d", out.Ncolor(), in.Ncolor());
465  }
466 
467  if (out.Geometry() != in.Geometry()) {
468  errorQuda("Field geometries %d %d do not match", out.Geometry(), in.Geometry());
469  }
470 
471 #if __COMPUTE_CAPABILITY__ < 200
474  errorQuda("Reconstruct 9/13 not supported on pre-Fermi architecture");
475 #endif
476 
478  // we are doing gauge field packing
479  copyGauge<FloatOut,FloatIn,18>(out, in, location, Out, In, outGhost, inGhost, type);
480  } else {
481  if (location != QUDA_CPU_FIELD_LOCATION) errorQuda("Location %d not supported", location);
482  if (out.Geometry() != QUDA_VECTOR_GEOMETRY) errorQuda("Unsupported geometry %d", out.Geometry());
483 
484  checkMomOrder(in);
485  checkMomOrder(out);
486 
487  int faceVolumeCB[QUDA_MAX_DIM];
488  for (int d=0; d<in.Ndim(); d++) faceVolumeCB[d] = in.SurfaceCB(d) * in.Nface();
489 
490  // momentum only currently supported on MILC (10), TIFR (18) and Float2 (10) fields currently
491  if (out.Order() == QUDA_FLOAT2_GAUGE_ORDER) {
492  if (in.Order() == QUDA_FLOAT2_GAUGE_ORDER) {
495  FloatNOrder<FloatIn,10,2,10>(in, In), in.Volume(), faceVolumeCB, in.Ndim(), in.Geometry());
496  copyGauge<FloatOut,FloatIn,10>(arg);
497  } else if (in.Order() == QUDA_MILC_GAUGE_ORDER) {
498 #ifdef BUILD_MILC_INTERFACE
501  in.Volume(), faceVolumeCB, in.Ndim(), in.Geometry());
502  copyGauge<FloatOut,FloatIn,10>(arg);
503 #else
504  errorQuda("MILC interface has not been built\n");
505 #endif
506 
507  } else if (in.Order() == QUDA_TIFR_GAUGE_ORDER) {
508 #ifdef BUILD_TIFR_INTERFACE
511  in.Volume(), faceVolumeCB, in.Ndim(), in.Geometry());
512  copyGauge<FloatOut,FloatIn,18>(arg);
513 #else
514  errorQuda("TIFR interface has not been built\n");
515 #endif
516 
517  } else {
518  errorQuda("Gauge field orders %d not supported", in.Order());
519  }
520  } else if (out.Order() == QUDA_MILC_GAUGE_ORDER) {
521 #ifdef BUILD_MILC_INTERFACE
522  if (in.Order() == QUDA_FLOAT2_GAUGE_ORDER) {
525  in.Volume(), faceVolumeCB, in.Ndim(), in.Geometry());
526  copyGauge<FloatOut,FloatIn,10>(arg);
527  } else if (in.Order() == QUDA_MILC_GAUGE_ORDER) {
530  in.Volume(), faceVolumeCB, in.Ndim(), in.Geometry());
531  copyGauge<FloatOut,FloatIn,10>(arg);
532  } else {
533  errorQuda("Gauge field orders %d not supported", in.Order());
534  }
535 #else
536  errorQuda("MILC interface has not been built\n");
537 #endif
538 
539  } else if (out.Order() == QUDA_TIFR_GAUGE_ORDER) {
540 #ifdef BUILD_TIFR_INTERFACE
541  if (in.Order() == QUDA_FLOAT2_GAUGE_ORDER) {
542  // FIX ME - 11 is a misnomer to avoid confusion in template instantiation
545  in.Volume(), faceVolumeCB, in.Ndim(), in.Geometry());
546  copyGauge<FloatOut,FloatIn,18>(arg);
547  } else if (in.Order() == QUDA_TIFR_GAUGE_ORDER) {
550  in.Volume(), faceVolumeCB, in.Ndim(), in.Geometry());
551  copyGauge<FloatOut,FloatIn,10>(arg);
552  } else {
553  errorQuda("Gauge field orders %d not supported", in.Order());
554  }
555 #else
556  errorQuda("TIFR interface has not been built\n");
557 #endif
558  } else {
559  errorQuda("Gauge field orders %d not supported", out.Order());
560  }
561  }
562  }
563 
564 
565 } // namespace quda
std::string paramString(const TuneParam &param) const
void apply(const cudaStream_t &stream)
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:20
int Ncolor() const
Definition: gauge_field.h:167
void copyGauge(CopyGaugeArg< OutOrder, InOrder > arg)
#define errorQuda(...)
Definition: util_quda.h:73
CopyGauge(CopyGaugeArg< OutOrder, InOrder > &arg, const GaugeField &meta)
QudaGaugeFieldOrder Order() const
Definition: gauge_field.h:169
cudaStream_t * stream
::std::string string
Definition: gtest.h:1979
TuneKey tuneKey() const
int Nface() const
Definition: gauge_field.h:193
int faceVolumeCB[QUDA_MAX_DIM]
int length[]
const int * SurfaceCB() const
virtual ~CopyGauge()
__global__ void copyGhostKernel(CopyGaugeArg< OutOrder, InOrder > arg)
QudaGaugeParam param
Definition: pack_test.cpp:17
void writeAuxString(const char *format,...)
Definition: tune_quda.h:138
int Volume() const
const QudaFieldLocation location
Definition: pack_test.cpp:46
cpuColorSpinorField * in
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Definition: tune.cpp:271
QudaReconstructType Reconstruct() const
Definition: gauge_field.h:168
long long bytes() const
const char * VolString() const
__global__ void copyGaugeKernel(CopyGaugeArg< OutOrder, InOrder > arg)
long long flops() const
const InOrder in
int x[4]
enum QudaFieldLocation_s QudaFieldLocation
cpuColorSpinorField * out
CopyGaugeArg(const OutOrder &out, const InOrder &in, int volume, const int *faceVolumeCB, int nDim, int geometry)
QudaLinkType LinkType() const
Definition: gauge_field.h:174
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
Definition: complex_quda.h:843
int Ndim() const
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
QudaFieldGeometry Geometry() const
Definition: gauge_field.h:177
QudaTune getTuning()
Definition: util_quda.cpp:32
void checkMomOrder(const GaugeField &u)
Definition: copy_gauge.cu:14
const QudaParity parity
Definition: dslash_test.cpp:29
char aux[TuneKey::aux_n]
Definition: tune_quda.h:136
void copyGhost(CopyGaugeArg< OutOrder, InOrder > arg)