QUDA  v1.1.0
A library for QCD on GPUs
color_spinor_field.h
Go to the documentation of this file.
1 #ifndef _COLOR_SPINOR_FIELD_H
2 #define _COLOR_SPINOR_FIELD_H
3 
4 #include <quda_internal.h>
5 #include <quda.h>
6 
7 #include <iostream>
8 
9 #include <lattice_field.h>
10 #include <random_quda.h>
11 #include <fast_intdiv.h>
12 
13 #include <comm_key.h>
14 
15 namespace quda {
16 
17  namespace colorspinor
18  {
19 
20  inline bool isNative(QudaFieldOrder order, QudaPrecision precision, int nSpin, int nColor)
21  {
22  if (precision == QUDA_DOUBLE_PRECISION) {
23  if (order == QUDA_FLOAT2_FIELD_ORDER) return true;
24  } else if (precision == QUDA_SINGLE_PRECISION) {
25  if (nSpin == 4) {
26  if (order == QUDA_FLOAT4_FIELD_ORDER) return true;
27  } else if (nSpin == 2) {
28  if (order == QUDA_FLOAT2_FIELD_ORDER) return true;
29  } else if (nSpin == 1) {
30  if (order == QUDA_FLOAT2_FIELD_ORDER) return true;
31  }
32  } else if (precision == QUDA_HALF_PRECISION || precision == QUDA_QUARTER_PRECISION) {
33  if (nSpin == 4) {
34 #ifdef FLOAT8
35  if (order == QUDA_FLOAT8_FIELD_ORDER) return true;
36 #else
37  if (order == QUDA_FLOAT4_FIELD_ORDER) return true;
38 #endif
39  } else if (nSpin == 2) {
40  if (order == QUDA_FLOAT2_FIELD_ORDER) return true;
41  } else if (nSpin == 1) {
42  if (order == QUDA_FLOAT2_FIELD_ORDER) return true;
43  }
44  }
45  return false;
46  }
47 
48  } // namespace colorspinor
49 
50  enum MemoryLocation { Device = 1, Host = 2, Remote = 4, Shmem = 8 };
51 
52  struct FullClover;
53 
60  {
62  return QUDA_EVEN_PARITY;
64  return QUDA_ODD_PARITY;
65  } else {
66  return QUDA_INVALID_PARITY;
67  }
68  }
69 
71  typedef std::vector<ColorSpinorField*> CompositeColorSpinorField;
72 
81 
82  bool is_composite; //set to 'false' for a regular spinor field
83  bool is_component; //set to 'true' if we want to work with an individual component (otherwise will work with the whole set)
84 
85  int dim;//individual component has dim = 0
86  int id;
87 
88  size_t volume; // volume of a single eigenvector
89  size_t volumeCB; // CB volume of a single eigenvector
90  size_t stride; // stride of a single eigenvector
91  size_t real_length; // physical length of a single eigenvector
92  size_t length; // length including pads (but not ghost zones)
93 
94  size_t bytes; // size in bytes of spinor field
95  size_t norm_bytes; // makes no sense but let's keep it...
96 
98  : is_composite(false), is_component(false), dim(0), id(0), volume(0), volumeCB(0),
99  stride(0), real_length(0), length(0), bytes(0), norm_bytes(0) {};
100 
103  stride(0), real_length(0), length(0), bytes(0), norm_bytes(0)
104  {
105  if(is_composite && is_component) errorQuda("\nComposite type is not implemented.\n");
106  else if(is_composite && dim == 0) is_composite = false;
107  }
108 
110  {
111  is_composite = descr.is_composite;
112  is_component = descr.is_component;
113 
114  if(is_composite && is_component) errorQuda("\nComposite type is not implemented.\n");
115 
116  dim = descr.dim;
117  id = descr.id;
118 
119  volume = descr.volume;
120  volumeCB = descr.volumeCB;
121  stride = descr.stride; // stride of a single eigenvector
122  real_length = descr.real_length; // physical length of a single eigenvector
123  length = descr.length; // length including pads (but not ghost zones)
124 
125  bytes = descr.bytes; // size in bytes of spinor field
126  norm_bytes = descr.norm_bytes; // makes no sense but let's keep it...
127  }
128 
129  };
130 
132 
133  public:
134  QudaFieldLocation location; // where are we storing the field (CUDA or CPU)?
135 
136  int nColor; // Number of colors of the field
137  int nSpin; // =1 for staggered, =2 for coarse Dslash, =4 for 4d spinor
138  int nVec; // number of packed vectors (for multigrid transfer operator)
139 
140  QudaTwistFlavorType twistFlavor; // used by twisted mass
141 
142  QudaSiteOrder siteOrder; // defined for full fields
143 
144  QudaFieldOrder fieldOrder; // Float, Float2, Float4 etc.
147 
148  QudaPCType pc_type; // used to select preconditioning method in DWF
149 
155 
156  void *v; // pointer to field
157  void *norm;
158 
161  int composite_dim; //e.g., number of eigenvectors in the set
163  int component_id; //eigenvector index
164 
173  bool force_native = false)
174  {
175  // is the current status in native field order?
176  bool native = force_native ? true : colorspinor::isNative(fieldOrder, this->precision, nSpin, nColor);
177  this->precision = precision;
179 
180  // if this is a native field order, let's preserve that status, else keep the same field order
181  if (native) {
184 #ifdef FLOAT8
186 #endif
187  }
188  }
189 
191 
195  nColor(0),
196  nSpin(0),
197  nVec(1),
205  is_composite(false),
206  composite_dim(0),
207  is_component(false),
208  component_id(0)
209  {
210  ;
211  }
212 
213  // used to create cpu params
214 
215  ColorSpinorParam(void *V, QudaInvertParam &inv_param, const int *X, const bool pc_solution,
219  nColor(3),
222  1 :
223  4),
224  nVec(1),
228  gammaBasis(inv_param.gamma_basis),
231  v(V),
232  is_composite(false),
233  composite_dim(0),
234  is_component(false),
235  component_id(0)
236  {
237 
238  if (nDim > QUDA_MAX_DIM) errorQuda("Number of dimensions too great");
239  for (int d = 0; d < nDim; d++) x[d] = X[d];
240 
241  if (!pc_solution) {
243  } else {
244  x[0] /= 2; // X defined the full lattice dimensions
246  }
247 
249 
252  nDim++;
253  x[4] = inv_param.Ls;
255  nDim++;
256  x[4] = 2; // for two flavors
258  nDim++;
259  x[4] = inv_param.Ls;
260  } else {
261  x[4] = 1;
262  }
263 
273  } else if (inv_param.dirac_order == QUDA_DIRAC_ORDER) {
282  } else {
283  errorQuda("Dirac order %d not supported", inv_param.dirac_order);
284  }
285  }
286 
287  // normally used to create cuda param from a cpu param
290  LatticeFieldParam(cpuParam.nDim, cpuParam.x, 0, inv_param.cuda_prec),
292  nColor(cpuParam.nColor),
293  nSpin(cpuParam.nSpin),
294  nVec(cpuParam.nVec),
295  twistFlavor(cpuParam.twistFlavor),
300  pc_type(cpuParam.pc_type),
302  v(0),
303  is_composite(cpuParam.is_composite),
304  composite_dim(cpuParam.composite_dim),
305  is_component(false),
306  component_id(0)
307  {
308  siteSubset = cpuParam.siteSubset;
310  for (int d = 0; d < QUDA_MAX_DIM; d++) x[d] = cpuParam.x[d];
311  }
312 
313  void print() {
314  printfQuda("nColor = %d\n", nColor);
315  printfQuda("nSpin = %d\n", nSpin);
316  printfQuda("twistFlavor = %d\n", twistFlavor);
317  printfQuda("nDim = %d\n", nDim);
318  for (int d=0; d<nDim; d++) printfQuda("x[%d] = %d\n", d, x[d]);
319  printfQuda("precision = %d\n", precision);
320  printfQuda("ghost_precision = %d\n", ghost_precision);
321  printfQuda("pad = %d\n", pad);
322  printfQuda("siteSubset = %d\n", siteSubset);
323  printfQuda("siteOrder = %d\n", siteOrder);
324  printfQuda("fieldOrder = %d\n", fieldOrder);
325  printfQuda("gammaBasis = %d\n", gammaBasis);
326  printfQuda("create = %d\n", create);
327  printfQuda("pc_type = %d\n", pc_type);
328  printfQuda("suggested_parity = %d\n", suggested_parity);
329  printfQuda("v = %lx\n", (unsigned long)v);
330  printfQuda("norm = %lx\n", (unsigned long)norm);
332  if(is_composite) printfQuda("Number of elements = %d\n", composite_dim);
333  }
334 
335  virtual ~ColorSpinorParam() {
336  }
337 
338  };
339 
340  class cpuColorSpinorField;
341  class cudaColorSpinorField;
342 
346  struct DslashConstant {
347  int Vh;
350  int Ls;
351 
354 
362 
365 
366  int X2X1;
367  int X3X2X1;
368  int X4X3X2X1;
369 
370  int X2X1mX1;
375 
377  };
378 
380 
381  private:
382  void create(int nDim, const int *x, int Nc, int Ns, int Nvec, QudaTwistFlavorType Twistflavor,
385  void destroy();
386 
387  protected:
388  bool init;
389 
392 
393  int nColor;
394  int nSpin;
395  int nVec;
396 
397  int nDim;
399 
400  size_t volume;
401  size_t volumeCB;
402  size_t pad;
403  size_t stride;
404 
406 
407  QudaPCType pc_type; // used to select preconditioning method in DWF
408 
414 
415  size_t real_length; // physical length only
416  size_t length; // length including pads, but not ghost zone - used for BLAS
417 
418  void *v; // the field elements
419  void *norm; // the normalization field
420 
421  void *v_h; // the field elements
422  void *norm_h; // the normalization field
423 
424  // multi-GPU parameters
425 
426  void* ghost[2][QUDA_MAX_DIM]; // pointers to the ghost regions - NULL by default
427  void* ghostNorm[2][QUDA_MAX_DIM]; // pointers to ghost norms - NULL by default
428 
429  mutable int ghostFace[QUDA_MAX_DIM]; // the size of each face
430  mutable int ghostFaceCB[QUDA_MAX_DIM]; // the size of each checkboarded face
431 
432  mutable void *ghost_buf[2*QUDA_MAX_DIM]; // wrapper that points to current ghost zone
433 
434  mutable DslashConstant dslash_constant; // constants used by dslash and packing kernels
435 
436  size_t bytes; // size in bytes of spinor field
437  size_t norm_bytes; // size in bytes of norm field
438 
443 
444  // in the case of full fields, these are references to the even / odd sublattices
447 
450  //
452 
458  void createGhostZone(int nFace, bool spin_project=true) const;
459 
460  // resets the above attributes based on contents of param
461  void reset(const ColorSpinorParam &);
462  void fill(ColorSpinorParam &) const;
463  static void checkField(const ColorSpinorField &, const ColorSpinorField &);
464 
468  void setTuningString();
469 
470  public:
471  //ColorSpinorField();
474 
475  virtual ~ColorSpinorField();
476 
477  virtual ColorSpinorField& operator=(const ColorSpinorField &);
478 
479  int Ncolor() const { return nColor; }
480  int Nspin() const { return nSpin; }
481  int Nvec() const { return nVec; }
483  int Ndim() const { return nDim; }
484  const int* X() const { return x; }
485  int X(int d) const { return x[d]; }
486  size_t RealLength() const { return real_length; }
487  size_t Length() const { return length; }
488  size_t Stride() const { return stride; }
489  size_t Volume() const { return volume; }
490  size_t VolumeCB() const { return siteSubset == QUDA_PARITY_SITE_SUBSET ? volume : volume / 2; }
491  int Pad() const { return pad; }
492  size_t Bytes() const { return bytes; }
493  size_t NormBytes() const { return norm_bytes; }
494  size_t TotalBytes() const { return bytes + norm_bytes; }
495  size_t GhostBytes() const { return ghost_bytes; }
496  size_t GhostFaceBytes(int i) const { return ghost_face_bytes[i]; }
497  size_t GhostNormBytes() const { return ghost_bytes; }
498  void PrintDims() const { printfQuda("dimensions=%d %d %d %d\n", x[0], x[1], x[2], x[3]); }
499 
500  void* V() {return v;}
501  const void* V() const {return v;}
502  void* Norm(){return norm;}
503  const void* Norm() const {return norm;}
504  virtual const void* Ghost2() const { return nullptr; }
505 
506  virtual int full_dim(int d) const { return (d == 0 && siteSubset == 1) ? x[d] * 2 : x[d]; }
507 
512 
521  void exchange(void **ghost, void **sendbuf, int nFace=1) const;
522 
537  virtual void exchangeGhost(QudaParity parity, int nFace, int dagger, const MemoryLocation *pack_destination=nullptr,
538  const MemoryLocation *halo_location=nullptr, bool gdr_send=false, bool gdr_recv=false,
540 
547 
548  bool IsComposite() const { return composite_descr.is_composite; }
549  bool IsComponent() const { return composite_descr.is_component; }
550 
551  int CompositeDim() const { return composite_descr.dim; }
552  int ComponentId() const { return composite_descr.id; }
553  int ComponentVolume() const { return composite_descr.volume; }
555  int ComponentStride() const { return composite_descr.stride; }
556  size_t ComponentLength() const { return composite_descr.length; }
558 
559  size_t ComponentBytes() const { return composite_descr.bytes; }
560  size_t ComponentNormBytes() const { return composite_descr.norm_bytes; }
561 
562  QudaPCType PCType() const { return pc_type; }
565 
567  QudaSiteOrder SiteOrder() const { return siteOrder; }
570 
571  const int *GhostFace() const { return ghostFace; }
572  const int *GhostFaceCB() const { return ghostFaceCB; }
573 
580  size_t GhostOffset(const int dim, const int dir) const { return ghost_offset[dim][dir]; }
581 
582  void* Ghost(const int i);
583  const void* Ghost(const int i) const;
584  void* GhostNorm(const int i);
585  const void* GhostNorm(const int i) const;
586 
590  void* const* Ghost() const;
591 
596 
597  const ColorSpinorField& Even() const;
598  const ColorSpinorField& Odd() const;
599 
602 
603  ColorSpinorField& Component(const int idx) const;
604  ColorSpinorField& Component(const int idx);
605 
607  return components;
608  };
609 
610  virtual void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0) = 0;
611 
612  virtual void PrintVector(unsigned int x) const = 0;
613 
620  void PrintVector(unsigned int x_cb, unsigned int parity) const { PrintVector(2 * x_cb + parity); }
621 
627  void LatticeIndex(int *y, int i) const;
628 
634  void OffsetIndex(int &i, int *y) const;
635 
637  static ColorSpinorField* Create(const ColorSpinorField &src, const ColorSpinorParam &param);
638 
648 
658  ColorSpinorField* CreateCoarse(const int *geoBlockSize, int spinBlockSize, int Nvec,
662 
672  ColorSpinorField* CreateFine(const int *geoblockSize, int spinBlockSize, int Nvec,
676 
677  friend std::ostream& operator<<(std::ostream &out, const ColorSpinorField &);
678  friend class ColorSpinorParam;
679  };
680 
681  // CUDA implementation
683 
684  friend class cpuColorSpinorField;
685 
686  private:
687  bool alloc; // whether we allocated memory
688  bool init;
689 
690  bool texInit; // whether a texture object has been created or not
691  mutable bool ghostTexInit; // whether the ghost texture object has been created
692  mutable QudaPrecision ghost_precision_tex;
694  bool reference; // whether the field is a reference or not
695 
696  mutable void *ghost_field_tex[4]; // instance pointer to GPU halo buffer (used to check if static allocation has changed)
697 
698  void create(const QudaFieldCreate);
699  void destroy();
700 
706  void zeroPad();
707 
712  void copySpinorField(const ColorSpinorField &src);
713 
714  void loadSpinorField(const ColorSpinorField &src);
715  void saveSpinorField (ColorSpinorField &src) const;
716 
717  public:
718 
719  //cudaColorSpinorField();
724  virtual ~cudaColorSpinorField();
725 
729 
730  void copy(const cudaColorSpinorField &);
731 
736  virtual void copy_to_buffer(void *buffer) const;
737 
742  virtual void copy_from_buffer(void *buffer);
743 
745 
751  void createComms(int nFace, bool spin_project=true);
752 
758  void allocateGhostBuffer(int nFace, bool spin_project=true) const;
759 
778  void packGhost(const int nFace, const QudaParity parity, const int dim, const QudaDirection dir, const int dagger,
779  qudaStream_t *stream, MemoryLocation location[2 * QUDA_MAX_DIM], MemoryLocation location_label,
780  bool spin_project, double a = 0, double b = 0, double c = 0, int shmem = 0);
781 
782  void packGhostExtended(const int nFace, const int R[], const QudaParity parity, const int dim,
783  const QudaDirection dir, const int dagger, qudaStream_t *stream, bool zero_copy = false);
784 
794  void sendGhost(void *ghost_spinor, const int nFace, const int dim, const QudaDirection dir, const int dagger,
796 
806  void unpackGhost(const void *ghost_spinor, const int nFace, const int dim, const QudaDirection dir,
807  const int dagger, qudaStream_t *stream);
808 
820  void unpackGhostExtended(const void *ghost_spinor, const int nFace, const QudaParity parity, const int dim,
821  const QudaDirection dir, const int dagger, qudaStream_t *stream, bool zero_copy);
822 
823  void streamInit(qudaStream_t *stream_p);
824 
841  void pack(int nFace, int parity, int dagger, int stream_idx, MemoryLocation location[], MemoryLocation location_label,
842  bool spin_project = true, double a = 0, double b = 0, double c = 0, int shmem = 0);
843 
844  void packExtended(const int nFace, const int R[], const int parity, const int dagger, const int dim,
845  qudaStream_t *stream_p, const bool zeroCopyPack = false);
846 
847  void gather(int nFace, int dagger, int dir, qudaStream_t *stream_p = NULL);
848 
858  void recvStart(int nFace, int dir, int dagger = 0, qudaStream_t *stream_p = nullptr, bool gdr = false);
859 
871  void sendStart(int nFace, int d, int dagger = 0, qudaStream_t *stream_p = nullptr, bool gdr = false,
872  bool remote_write = false);
873 
884  void commsStart(int nFace, int d, int dagger = 0, qudaStream_t *stream_p = nullptr, bool gdr_send = false,
885  bool gdr_recv = false);
886 
897  int commsQuery(int nFace, int d, int dagger = 0, qudaStream_t *stream_p = nullptr, bool gdr_send = false,
898  bool gdr_recv = false);
899 
910  void commsWait(int nFace, int d, int dagger = 0, qudaStream_t *stream_p = nullptr, bool gdr_send = false,
911  bool gdr_recv = false);
912 
913  void scatter(int nFace, int dagger, int dir, qudaStream_t *stream_p);
914  void scatter(int nFace, int dagger, int dir);
915 
916  void scatterExtended(int nFace, int parity, int dagger, int dir);
917 
918  inline const void* Ghost2() const {
919  if (bufferIndex < 2) {
921  } else {
923  }
924  }
925 
940  void exchangeGhost(QudaParity parity, int nFace, int dagger, const MemoryLocation *pack_destination=nullptr,
941  const MemoryLocation *halo_location=nullptr, bool gdr_send=false, bool gdr_recv=false,
943 
944  cudaColorSpinorField& Component(const int idx) const;
946  void CopySubset(cudaColorSpinorField& dst, const int range, const int first_element=0) const;
947 
948  void zero();
949 
950  friend std::ostream& operator<<(std::ostream &out, const cudaColorSpinorField &);
951 
952  void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0);
953 
954  void PrintVector(unsigned int x) const;
955 
959  void backup() const;
960 
964  void restore() const;
965 
972  void prefetch(QudaFieldLocation mem_space, qudaStream_t stream = 0) const;
973  };
974 
975  // CPU implementation
977 
978  friend class cudaColorSpinorField;
979 
980  public:
981  static void* fwdGhostFaceBuffer[QUDA_MAX_DIM]; //cpu memory
982  static void* backGhostFaceBuffer[QUDA_MAX_DIM]; //cpu memory
983  static void* fwdGhostFaceSendBuffer[QUDA_MAX_DIM]; //cpu memory
984  static void* backGhostFaceSendBuffer[QUDA_MAX_DIM]; //cpu memory
986  static size_t ghostFaceBytes[QUDA_MAX_DIM];
987 
988  private:
989  //void *v; // the field elements
990  //void *norm; // the normalization field
991  bool init;
992  bool reference; // whether the field is a reference or not
993 
994  void create(const QudaFieldCreate);
995  void destroy();
996 
997  public:
998  //cpuColorSpinorField();
1003  virtual ~cpuColorSpinorField();
1004 
1008 
1009  void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0);
1010 
1022  static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1);
1023 
1024  void PrintVector(unsigned int x) const;
1025 
1030  void allocateGhostBuffer(int nFace) const;
1031  static void freeGhostBuffer(void);
1032 
1033  void packGhost(void **ghost, const QudaParity parity, const int nFace, const int dagger) const;
1034  void unpackGhost(void* ghost_spinor, const int dim,
1035  const QudaDirection dir, const int dagger);
1036 
1037  void copy(const cpuColorSpinorField&);
1038  void zero();
1039 
1044  virtual void copy_to_buffer(void *buffer) const;
1045 
1050  virtual void copy_from_buffer(void *buffer);
1051 
1066  void exchangeGhost(QudaParity parity, int nFace, int dagger, const MemoryLocation *pack_destination=nullptr,
1067  const MemoryLocation *halo_location=nullptr, bool gdr_send=false, bool gdr_recv=false,
1069 
1073  void backup() const;
1074 
1078  void restore() const;
1079  };
1080 
1082  QudaFieldLocation location, void *Dst=0, void *Src=0,
1083  void *dstNorm=0, void*srcNorm=0);
1084  void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c);
1086 
1095  void copyFieldOffset(ColorSpinorField &out, const ColorSpinorField &in, CommKey offset, QudaPCType pc_type);
1096 
1097  void genericPrintVector(const cpuColorSpinorField &a, unsigned int x);
1098  void genericCudaPrintVector(const cudaColorSpinorField &a, unsigned x);
1099 
1101 
1103  QudaFieldLocation location, const int parity, void *Dst, void *Src, void *dstNorm, void *srcNorm);
1104 
1114  void genericPackGhost(void **ghost, const ColorSpinorField &a, QudaParity parity,
1115  int nFace, int dagger, MemoryLocation *destination=nullptr);
1116 
1123  void spinorNoise(ColorSpinorField &src, RNG& randstates, QudaNoiseType type);
1124 
1132  void spinorNoise(ColorSpinorField &src, unsigned long long seed, QudaNoiseType type);
1133 
1142  const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b)
1143  {
1144  QudaPCType type = QUDA_PC_INVALID;
1145  if (a.PCType() == b.PCType())
1146  type = a.PCType();
1147  else
1148  errorQuda("PCTypes %d %d do not match (%s:%d in %s())\n", a.PCType(), b.PCType(), file, line, func);
1149  return type;
1150  }
1151 
1159  template <typename... Args>
1160  inline QudaPCType PCType_(const char *func, const char *file, int line, const ColorSpinorField &a,
1161  const ColorSpinorField &b, const Args &... args)
1162  {
1163  return static_cast<QudaPCType>(PCType_(func, file, line, a, b) & PCType_(func, file, line, a, args...));
1164  }
1165 
1166 #define checkPCType(...) PCType_(__func__, __FILE__, __LINE__, __VA_ARGS__)
1167 
1174  inline QudaFieldOrder Order_(const char *func, const char *file, int line, const ColorSpinorField &a,
1175  const ColorSpinorField &b)
1176  {
1178  if (a.FieldOrder() == b.FieldOrder())
1179  order = a.FieldOrder();
1180  else
1181  errorQuda("Orders %d %d do not match (%s:%d in %s())\n", a.FieldOrder(), b.FieldOrder(), file, line, func);
1182  return order;
1183  }
1184 
1192  template <typename... Args>
1193  inline QudaFieldOrder Order_(const char *func, const char *file, int line, const ColorSpinorField &a,
1194  const ColorSpinorField &b, const Args &... args)
1195  {
1196  return static_cast<QudaFieldOrder>(Order_(func, file, line, a, b) & Order_(func, file, line, a, args...));
1197  }
1198 
1199 #define checkOrder(...) Order_(__func__, __FILE__, __LINE__, __VA_ARGS__)
1200 
1207  inline int Length_(const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b)
1208  {
1209  int length = 0;
1210  if (a.Length() == b.Length())
1211  length = a.Length();
1212  else
1213  errorQuda("Lengths %lu %lu do not match (%s:%d in %s())\n", a.Length(), b.Length(), file, line, func);
1214  return length;
1215  }
1216 
1224  template <typename... Args>
1225  inline int Length_(const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b,
1226  const Args &... args)
1227  {
1228  return static_cast<int>(Length_(func, file, line, a, b) & Length_(func, file, line, a, args...));
1229  }
1230 
1231 #define checkLength(...) Length_(__func__, __FILE__, __LINE__, __VA_ARGS__)
1232 
1233 } // namespace quda
1234 
1235 #endif // _COLOR_SPINOR_FIELD_H
const DslashConstant & getDslashConstant() const
Get the dslash_constant structure from this field.
void exchange(void **ghost, void **sendbuf, int nFace=1) const
void fill(ColorSpinorParam &) const
void LatticeIndex(int *y, int i) const
int ghostFace[QUDA_MAX_DIM]
virtual void PrintVector(unsigned int x) const =0
void *const * Ghost() const
virtual const void * Ghost2() const
ColorSpinorField * CreateAlias(const ColorSpinorParam &param)
Create a field that aliases this field's storage. The alias field can use a different precision than ...
friend std::ostream & operator<<(std::ostream &out, const ColorSpinorField &)
virtual ColorSpinorField & operator=(const ColorSpinorField &)
void * ghost_buf[2 *QUDA_MAX_DIM]
QudaParity SuggestedParity() const
ColorSpinorField * even
const ColorSpinorField & Odd() const
CompositeColorSpinorFieldDescriptor composite_descr
used for deflation eigenvector sets etc.:
size_t ComponentNormBytes() const
size_t GhostFaceBytes(int i) const
ColorSpinorField(const ColorSpinorField &)
CompositeColorSpinorField & Components()
size_t GhostOffset(const int dim, const int dir) const
QudaTwistFlavorType TwistFlavor() const
void setSuggestedParity(QudaParity suggested_parity)
QudaTwistFlavorType twistFlavor
ColorSpinorField * CreateFine(const int *geoblockSize, int spinBlockSize, int Nvec, QudaPrecision precision=QUDA_INVALID_PRECISION, QudaFieldLocation location=QUDA_INVALID_FIELD_LOCATION, QudaMemoryType mem_type=QUDA_MEMORY_INVALID)
Create a fine color-spinor field, using this field to set the meta data.
size_t ComponentRealLength() const
virtual int full_dim(int d) const
QudaSiteOrder SiteOrder() const
CompositeColorSpinorField components
QudaSiteSubset SiteSubset() const
const int * GhostFace() const
const void * V() const
size_t ComponentLength() const
void * ghost[2][QUDA_MAX_DIM]
static ColorSpinorField * Create(const ColorSpinorParam &param)
void reset(const ColorSpinorParam &)
void createGhostZone(int nFace, bool spin_project=true) const
static void checkField(const ColorSpinorField &, const ColorSpinorField &)
QudaPrecision ghost_precision_allocated
ColorSpinorField * CreateCoarse(const int *geoBlockSize, int spinBlockSize, int Nvec, QudaPrecision precision=QUDA_INVALID_PRECISION, QudaFieldLocation location=QUDA_INVALID_FIELD_LOCATION, QudaMemoryType mem_Type=QUDA_MEMORY_INVALID)
Create a coarse color-spinor field, using this field to set the meta data.
void OffsetIndex(int &i, int *y) const
void * GhostNorm(const int i)
virtual void exchangeGhost(QudaParity parity, int nFace, int dagger, const MemoryLocation *pack_destination=nullptr, const MemoryLocation *halo_location=nullptr, bool gdr_send=false, bool gdr_recv=false, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION) const =0
QudaFieldOrder FieldOrder() const
const ColorSpinorField & Even() const
QudaGammaBasis GammaBasis() const
void PrintVector(unsigned int x_cb, unsigned int parity) const
Thin wrapper around PrintVector that takes in a checkerboard index and a parity instead of a full ind...
ColorSpinorField & Component(const int idx) const
const void * Norm() const
int ghostFaceCB[QUDA_MAX_DIM]
void * ghostNorm[2][QUDA_MAX_DIM]
void setTuningString()
Set the vol_string and aux_string for use in tuning.
virtual void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)=0
const int * X() const
QudaPCType PCType() const
DslashConstant dslash_constant
const int * GhostFaceCB() const
ColorSpinorField * odd
QudaFieldLocation location
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
ColorSpinorParam(ColorSpinorParam &cpuParam, QudaInvertParam &inv_param, QudaFieldLocation location=QUDA_CUDA_FIELD_LOCATION)
bool is_composite
for deflation solvers:
ColorSpinorParam(void *V, QudaInvertParam &inv_param, const int *X, const bool pc_solution, QudaFieldLocation location=QUDA_CPU_FIELD_LOCATION)
QudaTwistFlavorType twistFlavor
static int bufferIndex
size_t ghost_offset[QUDA_MAX_DIM][2]
QudaPrecision ghost_precision
QudaPrecision precision
static void * ghost_pinned_recv_buffer_hd[2]
size_t ghost_face_bytes[QUDA_MAX_DIM]
QudaMemoryType mem_type
const int * R() const
static void * ghost_recv_buffer_d[2]
Class declaration to initialize and hold CURAND RNG states.
Definition: random_quda.h:23
virtual void copy_from_buffer(void *buffer)
Copy all contents of the field from a host buffer to this field.
void unpackGhost(void *ghost_spinor, const int dim, const QudaDirection dir, const int dagger)
void exchangeGhost(QudaParity parity, int nFace, int dagger, const MemoryLocation *pack_destination=nullptr, const MemoryLocation *halo_location=nullptr, bool gdr_send=false, bool gdr_recv=false, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION) const
This is a unified ghost exchange function for doing a complete halo exchange regardless of the type o...
static size_t ghostFaceBytes[QUDA_MAX_DIM]
static void * fwdGhostFaceSendBuffer[QUDA_MAX_DIM]
virtual void copy_to_buffer(void *buffer) const
Copy all contents of the field to a host buffer.
void backup() const
Backs up the cpuColorSpinorField.
static void * fwdGhostFaceBuffer[QUDA_MAX_DIM]
void packGhost(void **ghost, const QudaParity parity, const int nFace, const int dagger) const
void restore() const
Restores the cpuColorSpinorField.
static void * backGhostFaceSendBuffer[QUDA_MAX_DIM]
void copy(const cpuColorSpinorField &)
ColorSpinorField & operator=(const ColorSpinorField &)
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1)
Perform a component by component comparison of two color-spinor fields. In doing we normalize with re...
cpuColorSpinorField(const cpuColorSpinorField &)
static void * backGhostFaceBuffer[QUDA_MAX_DIM]
void PrintVector(unsigned int x) const
void allocateGhostBuffer(int nFace) const
Allocate the ghost buffers.
friend std::ostream & operator<<(std::ostream &out, const cudaColorSpinorField &)
void createComms(int nFace, bool spin_project=true)
Create the communication handlers and buffers.
void prefetch(QudaFieldLocation mem_space, qudaStream_t stream=0) const
If managed memory and prefetch is enabled, prefetch the spinor, the norm field (as appropriate),...
virtual void copy_from_buffer(void *buffer)
Copy all contents of the field from a host buffer to this field.
cudaColorSpinorField(const cudaColorSpinorField &)
void recvStart(int nFace, int dir, int dagger=0, qudaStream_t *stream_p=nullptr, bool gdr=false)
Initiate halo communication receive.
void commsWait(int nFace, int d, int dagger=0, qudaStream_t *stream_p=nullptr, bool gdr_send=false, bool gdr_recv=false)
Wait on halo communication to complete.
void pack(int nFace, int parity, int dagger, int stream_idx, MemoryLocation location[], MemoryLocation location_label, bool spin_project=true, double a=0, double b=0, double c=0, int shmem=0)
void CopySubset(cudaColorSpinorField &dst, const int range, const int first_element=0) const
void streamInit(qudaStream_t *stream_p)
void sendGhost(void *ghost_spinor, const int nFace, const int dim, const QudaDirection dir, const int dagger, qudaStream_t *stream)
const void * Ghost2() const
void packGhostExtended(const int nFace, const int R[], const QudaParity parity, const int dim, const QudaDirection dir, const int dagger, qudaStream_t *stream, bool zero_copy=false)
ColorSpinorField & operator=(const ColorSpinorField &)
void sendStart(int nFace, int d, int dagger=0, qudaStream_t *stream_p=nullptr, bool gdr=false, bool remote_write=false)
Initiate halo communication sending.
cudaColorSpinorField & Component(const int idx) const
for composite fields:
void scatterExtended(int nFace, int parity, int dagger, int dir)
void unpackGhostExtended(const void *ghost_spinor, const int nFace, const QudaParity parity, const int dim, const QudaDirection dir, const int dagger, qudaStream_t *stream, bool zero_copy)
void backup() const
Backs up the cudaColorSpinorField.
void commsStart(int nFace, int d, int dagger=0, qudaStream_t *stream_p=nullptr, bool gdr_send=false, bool gdr_recv=false)
Initiate halo communication.
virtual void copy_to_buffer(void *buffer) const
Copy all contents of the field to a host buffer.
void gather(int nFace, int dagger, int dir, qudaStream_t *stream_p=NULL)
int commsQuery(int nFace, int d, int dagger=0, qudaStream_t *stream_p=nullptr, bool gdr_send=false, bool gdr_recv=false)
Non-blocking query if the halo communication has completed.
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
CompositeColorSpinorField & Components() const
void exchangeGhost(QudaParity parity, int nFace, int dagger, const MemoryLocation *pack_destination=nullptr, const MemoryLocation *halo_location=nullptr, bool gdr_send=false, bool gdr_recv=false, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION) const
This is a unified ghost exchange function for doing a complete halo exchange regardless of the type o...
void copy(const cudaColorSpinorField &)
void scatter(int nFace, int dagger, int dir, qudaStream_t *stream_p)
void restore() const
Restores the cudaColorSpinorField.
void packGhost(const int nFace, const QudaParity parity, const int dim, const QudaDirection dir, const int dagger, qudaStream_t *stream, MemoryLocation location[2 *QUDA_MAX_DIM], MemoryLocation location_label, bool spin_project, double a=0, double b=0, double c=0, int shmem=0)
Packs the cudaColorSpinorField's ghost zone.
void allocateGhostBuffer(int nFace, bool spin_project=true) const
Allocate the ghost buffers.
void packExtended(const int nFace, const int R[], const int parity, const int dagger, const int dim, qudaStream_t *stream_p, const bool zeroCopyPack=false)
void PrintVector(unsigned int x) const
void unpackGhost(const void *ghost_spinor, const int nFace, const int dim, const QudaDirection dir, const int dagger, qudaStream_t *stream)
double tol
std::array< int, 4 > dim
QudaTwistFlavorType twist_flavor
QudaDslashType dslash_type
QudaMatPCType matpc_type
bool dagger
int V
Definition: host_utils.cpp:37
QudaParity parity
Definition: covdev_test.cpp:40
const int nColor
Definition: covdev_test.cpp:44
cpuColorSpinorField * spinor
Definition: covdev_test.cpp:31
QudaInvertParam inv_param
Definition: covdev_test.cpp:27
enum QudaSiteOrder_s QudaSiteOrder
enum QudaPrecision_s QudaPrecision
@ QUDA_STAGGERED_DSLASH
Definition: enum_quda.h:97
@ QUDA_MOBIUS_DWF_DSLASH
Definition: enum_quda.h:95
@ QUDA_TWISTED_MASS_DSLASH
Definition: enum_quda.h:99
@ QUDA_DOMAIN_WALL_DSLASH
Definition: enum_quda.h:93
@ QUDA_ASQTAD_DSLASH
Definition: enum_quda.h:98
@ QUDA_MOBIUS_DWF_EOFA_DSLASH
Definition: enum_quda.h:96
@ QUDA_LAPLACE_DSLASH
Definition: enum_quda.h:101
@ QUDA_DOMAIN_WALL_4D_DSLASH
Definition: enum_quda.h:94
@ QUDA_CUDA_FIELD_LOCATION
Definition: enum_quda.h:326
@ QUDA_CPU_FIELD_LOCATION
Definition: enum_quda.h:325
@ QUDA_INVALID_FIELD_LOCATION
Definition: enum_quda.h:327
enum QudaTwistFlavorType_s QudaTwistFlavorType
enum QudaDirection_s QudaDirection
@ QUDA_FULL_SITE_SUBSET
Definition: enum_quda.h:333
@ QUDA_PARITY_SITE_SUBSET
Definition: enum_quda.h:332
@ QUDA_DEGRAND_ROSSI_GAMMA_BASIS
Definition: enum_quda.h:368
@ QUDA_UKQCD_GAMMA_BASIS
Definition: enum_quda.h:369
@ QUDA_INVALID_GAMMA_BASIS
Definition: enum_quda.h:371
enum QudaPCType_s QudaPCType
@ QUDA_EVEN_PARITY
Definition: enum_quda.h:284
@ QUDA_ODD_PARITY
Definition: enum_quda.h:284
@ QUDA_INVALID_PARITY
Definition: enum_quda.h:284
@ QUDA_MEMORY_INVALID
Definition: enum_quda.h:16
@ QUDA_QDPJIT_DIRAC_ORDER
Definition: enum_quda.h:247
@ QUDA_INTERNAL_DIRAC_ORDER
Definition: enum_quda.h:244
@ QUDA_TIFR_PADDED_DIRAC_ORDER
Definition: enum_quda.h:250
@ QUDA_DIRAC_ORDER
Definition: enum_quda.h:245
@ QUDA_QDP_DIRAC_ORDER
Definition: enum_quda.h:246
@ QUDA_CPS_WILSON_DIRAC_ORDER
Definition: enum_quda.h:248
enum QudaNoiseType_s QudaNoiseType
enum QudaFieldOrder_s QudaFieldOrder
enum QudaSiteSubset_s QudaSiteSubset
enum QudaFieldLocation_s QudaFieldLocation
enum QudaFieldCreate_s QudaFieldCreate
@ QUDA_MATPC_ODD_ODD_ASYMMETRIC
Definition: enum_quda.h:219
@ QUDA_MATPC_EVEN_EVEN_ASYMMETRIC
Definition: enum_quda.h:218
@ QUDA_MATPC_ODD_ODD
Definition: enum_quda.h:217
@ QUDA_MATPC_EVEN_EVEN
Definition: enum_quda.h:216
enum QudaMatPCType_s QudaMatPCType
@ QUDA_EVEN_ODD_SITE_ORDER
Definition: enum_quda.h:340
@ QUDA_INVALID_SITE_ORDER
Definition: enum_quda.h:342
@ QUDA_ODD_EVEN_SITE_ORDER
Definition: enum_quda.h:341
enum QudaMemoryType_s QudaMemoryType
@ QUDA_DOUBLE_PRECISION
Definition: enum_quda.h:65
@ QUDA_SINGLE_PRECISION
Definition: enum_quda.h:64
@ QUDA_INVALID_PRECISION
Definition: enum_quda.h:66
@ QUDA_QUARTER_PRECISION
Definition: enum_quda.h:62
@ QUDA_HALF_PRECISION
Definition: enum_quda.h:63
@ QUDA_5D_PC
Definition: enum_quda.h:397
@ QUDA_4D_PC
Definition: enum_quda.h:397
@ QUDA_PC_INVALID
Definition: enum_quda.h:397
enum QudaSourceType_s QudaSourceType
@ QUDA_INVALID_FIELD_ORDER
Definition: enum_quda.h:356
@ QUDA_FLOAT2_FIELD_ORDER
Definition: enum_quda.h:348
@ QUDA_SPACE_COLOR_SPIN_FIELD_ORDER
Definition: enum_quda.h:352
@ QUDA_FLOAT4_FIELD_ORDER
Definition: enum_quda.h:349
@ QUDA_FLOAT8_FIELD_ORDER
Definition: enum_quda.h:350
@ QUDA_QDPJIT_FIELD_ORDER
Definition: enum_quda.h:353
@ QUDA_PADDED_SPACE_SPIN_COLOR_FIELD_ORDER
Definition: enum_quda.h:355
@ QUDA_SPACE_SPIN_COLOR_FIELD_ORDER
Definition: enum_quda.h:351
enum QudaGammaBasis_s QudaGammaBasis
@ QUDA_COPY_FIELD_CREATE
Definition: enum_quda.h:362
@ QUDA_INVALID_FIELD_CREATE
Definition: enum_quda.h:364
@ QUDA_REFERENCE_FIELD_CREATE
Definition: enum_quda.h:363
@ QUDA_TWIST_INVALID
Definition: enum_quda.h:404
@ QUDA_TWIST_NONDEG_DOUBLET
Definition: enum_quda.h:401
enum QudaParity_s QudaParity
int length[]
QudaPrecision & cuda_prec
Definition: host_utils.cpp:58
QudaPrecision & cpu_prec
Definition: host_utils.cpp:57
void init()
Create the BLAS context.
void destroy()
Destroy the BLAS context.
bool isNative(QudaFieldOrder order, QudaPrecision precision, int nSpin, int nColor)
void genericPackGhost(void **ghost, const ColorSpinorField &a, QudaParity parity, int nFace, int dagger, MemoryLocation *destination=nullptr)
Generic ghost packing routine.
QudaFieldOrder Order_(const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b)
Helper function for determining if the order of the fields is the same.
void copyGenericColorSpinor(ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, void *Dst=0, void *Src=0, void *dstNorm=0, void *srcNorm=0)
void exchangeExtendedGhost(cudaColorSpinorField *spinor, int R[], int parity, qudaStream_t *stream_p)
void genericPrintVector(const cpuColorSpinorField &a, unsigned int x)
qudaStream_t * stream
void genericCudaPrintVector(const cudaColorSpinorField &a, unsigned x)
QudaPCType PCType_(const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b)
Helper function for determining if the preconditioning type of the fields is the same.
std::vector< ColorSpinorField * > CompositeColorSpinorField
int Length_(const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b)
Helper function for determining if the length of the fields is the same.
void copyExtendedColorSpinor(ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, const int parity, void *Dst, void *Src, void *dstNorm, void *srcNorm)
void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c)
void copyFieldOffset(CloverField &out, const CloverField &in, CommKey offset, QudaPCType pc_type)
This function is used for copying from a source clover field to a destination clover field with an of...
constexpr QudaParity impliedParityFromMatPC(const QudaMatPCType &matpc_type)
Helper function for getting the implied spinor parity from a matrix preconditioning type.
void spinorNoise(ColorSpinorField &src, RNG &randstates, QudaNoiseType type)
Generate a random noise spinor. This variant allows the user to manage the RNG state.
int genericCompare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol)
QudaGaugeParam param
Definition: pack_test.cpp:18
Main header file for the QUDA library.
cudaStream_t qudaStream_t
Definition: quda_api.h:9
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5.
QudaDslashType dslash_type
Definition: quda.h:106
QudaMatPCType matpc_type
Definition: quda.h:230
QudaDiracFieldOrder dirac_order
Definition: quda.h:244
CompositeColorSpinorFieldDescriptor(const CompositeColorSpinorFieldDescriptor &descr)
CompositeColorSpinorFieldDescriptor(bool is_composite, int dim, bool is_component=false, int id=0)
Constants used by dslash and packing kernels.
int_fastdiv Xh[QUDA_MAX_DIM]
int ghostFace[QUDA_MAX_DIM+1]
int ghostFaceCB[QUDA_MAX_DIM+1]
int_fastdiv X[QUDA_MAX_DIM]
int_fastdiv dims[4][3]
QudaPrecision precision
Definition: lattice_field.h:52
QudaPrecision ghost_precision
Definition: lattice_field.h:55
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:68
QudaSiteSubset siteSubset
Definition: lattice_field.h:72
#define printfQuda(...)
Definition: util_quda.h:114
#define errorQuda(...)
Definition: util_quda.h:120