QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
color_spinor_field.h
Go to the documentation of this file.
1 #ifndef _COLOR_SPINOR_FIELD_H
2 #define _COLOR_SPINOR_FIELD_H
3 
4 #include <quda_internal.h>
5 #include <quda.h>
6 
7 #include <iostream>
8 
9 #include <lattice_field.h>
10 #include <random_quda.h>
11 #include <fast_intdiv.h>
12 
13 namespace quda {
14 
15  enum MemoryLocation { Device = 1, Host = 2, Remote = 4 };
16 
17  struct FullClover;
18 
20  typedef std::vector<ColorSpinorField*> CompositeColorSpinorField;
21 
30 
31  bool is_composite; //set to 'false' for a regular spinor field
32  bool is_component; //set to 'true' if we want to work with an individual component (otherwise will work with the whole set)
33 
34  int dim;//individual component has dim = 0
35  int id;
36 
37  int volume; // volume of a single eigenvector
38  int volumeCB; // CB volume of a single eigenvector
39  int stride; // stride of a single eigenvector
40  size_t real_length; // physical length of a single eigenvector
41  size_t length; // length including pads (but not ghost zones)
42 
43  size_t bytes; // size in bytes of spinor field
44  size_t norm_bytes; // makes no sense but let's keep it...
45 
47  : is_composite(false), is_component(false), dim(0), id(0), volume(0), volumeCB(0),
48  stride(0), real_length(0), length(0), bytes(0), norm_bytes(0) {};
49 
50  CompositeColorSpinorFieldDescriptor(bool is_composite, int dim, bool is_component = false, int id = 0)
51  : is_composite(is_composite), is_component(is_component), dim(dim), id(id), volume(0), volumeCB(0),
52  stride(0), real_length(0), length(0), bytes(0), norm_bytes(0)
53  {
54  if(is_composite && is_component) errorQuda("\nComposite type is not implemented.\n");
55  else if(is_composite && dim == 0) is_composite = false;
56  }
57 
59  {
60  is_composite = descr.is_composite;
61  is_component = descr.is_component;
62 
63  if(is_composite && is_component) errorQuda("\nComposite type is not implemented.\n");
64 
65  dim = descr.dim;
66  id = descr.id;
67 
68  volume = descr.volume;
69  volumeCB = descr.volumeCB;
70  stride = descr.stride; // stride of a single eigenvector
71  real_length = descr.real_length; // physical length of a single eigenvector
72  length = descr.length; // length including pads (but not ghost zones)
73 
74  bytes = descr.bytes; // size in bytes of spinor field
75  norm_bytes = descr.norm_bytes; // makes no sense but let's keep it...
76  }
77 
78  };
79 
81 
82  public:
83  QudaFieldLocation location; // where are we storing the field (CUDA or CPU)?
84 
85  int nColor; // Number of colors of the field
86  int nSpin; // =1 for staggered, =2 for coarse Dslash, =4 for 4d spinor
87  int nVec; // number of packed vectors (for multigrid transfer operator)
88 
89  QudaTwistFlavorType twistFlavor; // used by twisted mass
90 
91  QudaSiteOrder siteOrder; // defined for full fields
92 
93  QudaFieldOrder fieldOrder; // Float, Float2, Float4 etc.
96 
97  QudaPCType pc_type; // used to select preconditioning method in DWF
98 
99  void *v; // pointer to field
100  void *norm;
101 
104  int composite_dim; //e.g., number of eigenvectors in the set
106  int component_id; //eigenvector index
107 
109 
112  location(QUDA_INVALID_FIELD_LOCATION),
113  nColor(0),
114  nSpin(0),
115  nVec(1),
116  twistFlavor(QUDA_TWIST_INVALID),
117  siteOrder(QUDA_INVALID_SITE_ORDER),
118  fieldOrder(QUDA_INVALID_FIELD_ORDER),
119  gammaBasis(QUDA_INVALID_GAMMA_BASIS),
121  pc_type(QUDA_PC_INVALID),
122  is_composite(false),
123  composite_dim(0),
124  is_component(false),
125  component_id(0)
126  {
127  ;
128  }
129 
130  // used to create cpu params
131  ColorSpinorParam(void *V, QudaInvertParam &inv_param, const int *X, const bool pc_solution,
133  LatticeFieldParam(4, X, 0, inv_param.cpu_prec),
134  location(location),
135  nColor(3),
136  nSpin((inv_param.dslash_type == QUDA_ASQTAD_DSLASH || inv_param.dslash_type == QUDA_STAGGERED_DSLASH
137  || inv_param.dslash_type == QUDA_LAPLACE_DSLASH) ?
138  1 :
139  4),
140  nVec(1),
141  twistFlavor(inv_param.twist_flavor),
142  siteOrder(QUDA_INVALID_SITE_ORDER),
143  fieldOrder(QUDA_INVALID_FIELD_ORDER),
144  gammaBasis(inv_param.gamma_basis),
146  pc_type(inv_param.dslash_type == QUDA_DOMAIN_WALL_DSLASH ? QUDA_5D_PC : QUDA_4D_PC),
147  v(V),
148  is_composite(false),
149  composite_dim(0),
150  is_component(false),
151  component_id(0)
152  {
153 
154  if (nDim > QUDA_MAX_DIM) errorQuda("Number of dimensions too great");
155  for (int d = 0; d < nDim; d++) x[d] = X[d];
156 
157  if (!pc_solution) {
158  siteSubset = QUDA_FULL_SITE_SUBSET;
159  } else {
160  x[0] /= 2; // X defined the full lattice dimensions
161  siteSubset = QUDA_PARITY_SITE_SUBSET;
162  }
163 
165  || inv_param.dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
166  nDim++;
167  x[4] = inv_param.Ls;
168  } else if (inv_param.dslash_type == QUDA_TWISTED_MASS_DSLASH && (twistFlavor == QUDA_TWIST_NONDEG_DOUBLET)) {
169  nDim++;
170  x[4] = 2; // for two flavors
171  } else if (inv_param.dslash_type == QUDA_STAGGERED_DSLASH || inv_param.dslash_type == QUDA_ASQTAD_DSLASH) {
172  nDim++;
173  x[4] = inv_param.Ls;
174  }
175 
176  if (inv_param.dirac_order == QUDA_INTERNAL_DIRAC_ORDER) {
177  fieldOrder
178  = (precision == QUDA_DOUBLE_PRECISION || nSpin == 1 || nSpin == 2) ? QUDA_FLOAT2_FIELD_ORDER : QUDA_FLOAT4_FIELD_ORDER;
179  siteOrder = QUDA_EVEN_ODD_SITE_ORDER;
180  } else if (inv_param.dirac_order == QUDA_CPS_WILSON_DIRAC_ORDER) {
182  siteOrder = QUDA_ODD_EVEN_SITE_ORDER;
183  } else if (inv_param.dirac_order == QUDA_QDP_DIRAC_ORDER) {
185  siteOrder = QUDA_EVEN_ODD_SITE_ORDER;
186  } else if (inv_param.dirac_order == QUDA_DIRAC_ORDER) {
188  siteOrder = QUDA_EVEN_ODD_SITE_ORDER;
189  } else if (inv_param.dirac_order == QUDA_QDPJIT_DIRAC_ORDER) {
190  fieldOrder = QUDA_QDPJIT_FIELD_ORDER;
191  siteOrder = QUDA_EVEN_ODD_SITE_ORDER;
192  } else if (inv_param.dirac_order == QUDA_TIFR_PADDED_DIRAC_ORDER) {
194  siteOrder = QUDA_EVEN_ODD_SITE_ORDER;
195  } else {
196  errorQuda("Dirac order %d not supported", inv_param.dirac_order);
197  }
198  }
199 
200  // normally used to create cuda param from a cpu param
203  LatticeFieldParam(cpuParam.nDim, cpuParam.x, inv_param.sp_pad, inv_param.cuda_prec),
204  location(location),
205  nColor(cpuParam.nColor),
206  nSpin(cpuParam.nSpin),
207  nVec(cpuParam.nVec),
208  twistFlavor(cpuParam.twistFlavor),
209  siteOrder(QUDA_EVEN_ODD_SITE_ORDER),
210  fieldOrder(QUDA_INVALID_FIELD_ORDER),
211  gammaBasis(nSpin == 4 ? QUDA_UKQCD_GAMMA_BASIS : QUDA_DEGRAND_ROSSI_GAMMA_BASIS),
212  create(QUDA_COPY_FIELD_CREATE),
213  pc_type(cpuParam.pc_type),
214  v(0),
215  is_composite(cpuParam.is_composite),
216  composite_dim(cpuParam.composite_dim),
217  is_component(false),
218  component_id(0)
219  {
220  siteSubset = cpuParam.siteSubset;
221  fieldOrder = (precision == QUDA_DOUBLE_PRECISION || nSpin == 1 || nSpin == 2) ? QUDA_FLOAT2_FIELD_ORDER : QUDA_FLOAT4_FIELD_ORDER;
222  }
223 
231  void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false) {
232  // is the current status in native field order?
233  bool native = force_native ? true : false;
234  if ( ((this->precision == QUDA_DOUBLE_PRECISION || nSpin==1 || nSpin==2) &&
235  (fieldOrder == QUDA_FLOAT2_FIELD_ORDER)) ||
236  ((this->precision == QUDA_SINGLE_PRECISION || this->precision == QUDA_HALF_PRECISION || this->precision == QUDA_QUARTER_PRECISION) &&
237  (nSpin==4) && fieldOrder == QUDA_FLOAT4_FIELD_ORDER) ) { native = true; }
238 
239  this->precision = precision;
240  this->ghost_precision = (ghost_precision == QUDA_INVALID_PRECISION) ? precision : ghost_precision;
241 
242  // if this is a native field order, let's preserve that status, else keep the same field order
243  if (native) fieldOrder = (precision == QUDA_DOUBLE_PRECISION || nSpin == 1 || nSpin == 2) ?
245  }
246 
247  void print() {
248  printfQuda("nColor = %d\n", nColor);
249  printfQuda("nSpin = %d\n", nSpin);
250  printfQuda("twistFlavor = %d\n", twistFlavor);
251  printfQuda("nDim = %d\n", nDim);
252  for (int d=0; d<nDim; d++) printfQuda("x[%d] = %d\n", d, x[d]);
253  printfQuda("precision = %d\n", precision);
254  printfQuda("ghost_precision = %d\n", ghost_precision);
255  printfQuda("pad = %d\n", pad);
256  printfQuda("siteSubset = %d\n", siteSubset);
257  printfQuda("siteOrder = %d\n", siteOrder);
258  printfQuda("fieldOrder = %d\n", fieldOrder);
259  printfQuda("gammaBasis = %d\n", gammaBasis);
260  printfQuda("create = %d\n", create);
261  printfQuda("v = %lx\n", (unsigned long)v);
262  printfQuda("norm = %lx\n", (unsigned long)norm);
264  if(is_composite) printfQuda("Number of elements = %d\n", composite_dim);
265  }
266 
267  virtual ~ColorSpinorParam() {
268  }
269 
270  };
271 
272  class cpuColorSpinorField;
273  class cudaColorSpinorField;
274 
278  struct DslashConstant {
279  int Vh;
282  int Ls;
283 
286 
287  int_fastdiv face_X[4];
288  int_fastdiv face_Y[4];
289  int_fastdiv face_Z[4];
290  int_fastdiv face_T[4];
291  int_fastdiv face_XY[4];
292  int_fastdiv face_XYZ[4];
293  int_fastdiv face_XYZT[4];
294 
295  int ghostFace[QUDA_MAX_DIM+1];
296  int ghostFaceCB[QUDA_MAX_DIM + 1];
297 
298  int X2X1;
299  int X3X2X1;
300  int X4X3X2X1;
301 
302  int X2X1mX1;
307 
309  };
310 
312 
313  private:
314  void create(int nDim, const int *x, int Nc, int Ns, int Nvec, QudaTwistFlavorType Twistflavor,
315  QudaPrecision precision, int pad, QudaSiteSubset subset, QudaSiteOrder siteOrder, QudaFieldOrder fieldOrder,
316  QudaGammaBasis gammaBasis, QudaPCType pc_type);
317  void destroy();
318 
319  protected:
320  bool init;
321 
324 
325  int nColor;
326  int nSpin;
327  int nVec;
328 
329  int nDim;
330  int x[QUDA_MAX_DIM];
331 
332  int volume;
333  int volumeCB;
334  int pad;
335  int stride;
336 
338 
339  QudaPCType pc_type; // used to select preconditioning method in DWF
340 
341  size_t real_length; // physical length only
342  size_t length; // length including pads, but not ghost zone - used for BLAS
343 
344  void *v; // the field elements
345  void *norm; // the normalization field
346 
347  void *v_h; // the field elements
348  void *norm_h; // the normalization field
349 
350  // multi-GPU parameters
351 
352  void* ghost[2][QUDA_MAX_DIM]; // pointers to the ghost regions - NULL by default
353  void* ghostNorm[2][QUDA_MAX_DIM]; // pointers to ghost norms - NULL by default
354 
355  mutable int ghostFace[QUDA_MAX_DIM]; // the size of each face
356  mutable int ghostFaceCB[QUDA_MAX_DIM]; // the size of each checkboarded face
357 
358  mutable void *ghost_buf[2*QUDA_MAX_DIM]; // wrapper that points to current ghost zone
359 
360  mutable DslashConstant dslash_constant; // constants used by dslash and packing kernels
361 
362  size_t bytes; // size in bytes of spinor field
363  size_t norm_bytes; // size in bytes of norm field
364 
369 
370  // in the case of full fields, these are references to the even / odd sublattices
373 
376  //
377  CompositeColorSpinorField components;
378 
384  void createGhostZone(int nFace, bool spin_project=true) const;
385 
386  // resets the above attributes based on contents of param
387  void reset(const ColorSpinorParam &);
388  void fill(ColorSpinorParam &) const;
389  static void checkField(const ColorSpinorField &, const ColorSpinorField &);
390 
394  void setTuningString();
395 
396  public:
397  //ColorSpinorField();
400 
401  virtual ~ColorSpinorField();
402 
403  virtual ColorSpinorField& operator=(const ColorSpinorField &);
404 
405  int Ncolor() const { return nColor; }
406  int Nspin() const { return nSpin; }
407  int Nvec() const { return nVec; }
408  QudaTwistFlavorType TwistFlavor() const { return twistFlavor; }
409  int Ndim() const { return nDim; }
410  const int* X() const { return x; }
411  int X(int d) const { return x[d]; }
412  size_t RealLength() const { return real_length; }
413  size_t Length() const { return length; }
414  int Stride() const { return stride; }
415  int Volume() const { return volume; }
416  int VolumeCB() const { return siteSubset == QUDA_PARITY_SITE_SUBSET ? volume : volume / 2; }
417  int Pad() const { return pad; }
418  size_t Bytes() const { return bytes; }
419  size_t NormBytes() const { return norm_bytes; }
420  size_t GhostBytes() const { return ghost_bytes; }
421  size_t GhostNormBytes() const { return ghost_bytes; }
422  void PrintDims() const { printfQuda("dimensions=%d %d %d %d\n", x[0], x[1], x[2], x[3]); }
423 
424  void* V() {return v;}
425  const void* V() const {return v;}
426  void* Norm(){return norm;}
427  const void* Norm() const {return norm;}
428  virtual const void* Ghost2() const { return nullptr; }
429 
438  void exchange(void **ghost, void **sendbuf, int nFace=1) const;
439 
454  virtual void exchangeGhost(QudaParity parity, int nFace, int dagger, const MemoryLocation *pack_destination=nullptr,
455  const MemoryLocation *halo_location=nullptr, bool gdr_send=false, bool gdr_recv=false,
456  QudaPrecision ghost_precision=QUDA_INVALID_PRECISION) const = 0;
457 
463  bool isNative() const;
464 
465  bool IsComposite() const { return composite_descr.is_composite; }
466  bool IsComponent() const { return composite_descr.is_component; }
467 
468  int CompositeDim() const { return composite_descr.dim; }
469  int ComponentId() const { return composite_descr.id; }
470  int ComponentVolume() const { return composite_descr.volume; }
471  int ComponentVolumeCB() const { return composite_descr.volumeCB; }
472  int ComponentStride() const { return composite_descr.stride; }
473  size_t ComponentLength() const { return composite_descr.length; }
474  size_t ComponentRealLength() const { return composite_descr.real_length; }
475 
476  size_t ComponentBytes() const { return composite_descr.bytes; }
477  size_t ComponentNormBytes() const { return composite_descr.norm_bytes; }
478 
479  QudaPCType PCType() const { return pc_type; }
480 
481  QudaSiteSubset SiteSubset() const { return siteSubset; }
482  QudaSiteOrder SiteOrder() const { return siteOrder; }
483  QudaFieldOrder FieldOrder() const { return fieldOrder; }
484  QudaGammaBasis GammaBasis() const { return gammaBasis; }
485 
486  const int *GhostFace() const { return ghostFace; }
487  const int *GhostFaceCB() const { return ghostFaceCB; }
488  int GhostOffset(const int i) const { return ghostOffset[i][0]; }
489  int GhostOffset(const int i, const int j) const { return ghostOffset[i][j]; }
490  int GhostNormOffset(const int i ) const { return ghostNormOffset[i][0]; }
491  int GhostNormOffset(const int i, const int j) const { return ghostNormOffset[i][j]; }
492 
493  void* Ghost(const int i);
494  const void* Ghost(const int i) const;
495  void* GhostNorm(const int i);
496  const void* GhostNorm(const int i) const;
497 
501  void* const* Ghost() const;
502 
506  const DslashConstant& getDslashConstant() const { return dslash_constant; }
507 
508  const ColorSpinorField& Even() const;
509  const ColorSpinorField& Odd() const;
510 
511  ColorSpinorField& Even();
512  ColorSpinorField& Odd();
513 
514  ColorSpinorField& Component(const int idx) const;
515  ColorSpinorField& Component(const int idx);
516 
517  CompositeColorSpinorField& Components(){
518  return components;
519  };
520 
521  virtual void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0) = 0;
522 
523  virtual void PrintVector(unsigned int x) const = 0;
524 
530  void LatticeIndex(int *y, int i) const;
531 
537  void OffsetIndex(int &i, int *y) const;
538 
539  static ColorSpinorField* Create(const ColorSpinorParam &param);
540  static ColorSpinorField* Create(const ColorSpinorField &src, const ColorSpinorParam &param);
541 
551  ColorSpinorField* CreateCoarse(const int *geoBlockSize, int spinBlockSize, int Nvec,
555 
565  ColorSpinorField* CreateFine(const int *geoblockSize, int spinBlockSize, int Nvec,
569 
570  friend std::ostream& operator<<(std::ostream &out, const ColorSpinorField &);
571  friend class ColorSpinorParam;
572  };
573 
574  // CUDA implementation
576 
577  friend class cpuColorSpinorField;
578 
579  private:
580  bool alloc; // whether we allocated memory
581  bool init;
582 
583  bool texInit; // whether a texture object has been created or not
584  mutable bool ghostTexInit; // whether the ghost texture object has been created
587 #ifdef USE_TEXTURE_OBJECTS
588  cudaTextureObject_t tex;
589  cudaTextureObject_t texNorm;
590  void createTexObject();
591  void destroyTexObject();
592  mutable cudaTextureObject_t ghostTex[4]; // these are double buffered and variants to host-mapped buffers
593  mutable cudaTextureObject_t ghostTexNorm[4];
594  void createGhostTexObject() const;
595  void destroyGhostTexObject() const;
596 #endif
597 
598  bool reference; // whether the field is a reference or not
599 
600  mutable void *ghost_field_tex[4]; // instance pointer to GPU halo buffer (used to check if static allocation has changed)
601 
602  void create(const QudaFieldCreate);
603  void destroy();
604  void copy(const cudaColorSpinorField &);
605 
611  void zeroPad();
612 
617  void copySpinorField(const ColorSpinorField &src);
618 
619  void loadSpinorField(const ColorSpinorField &src);
620  void saveSpinorField (ColorSpinorField &src) const;
621 
622  public:
623 
624  //cudaColorSpinorField();
629  virtual ~cudaColorSpinorField();
630 
631  ColorSpinorField& operator=(const ColorSpinorField &);
632  cudaColorSpinorField& operator=(const cudaColorSpinorField&);
633  cudaColorSpinorField& operator=(const cpuColorSpinorField&);
634 
635  void switchBufferPinned();
636 
642  void createComms(int nFace, bool spin_project=true);
643 
649  void allocateGhostBuffer(int nFace, bool spin_project=true) const;
650 
669  void packGhost(const int nFace, const QudaParity parity, const int dim, const QudaDirection dir, const int dagger,
670  cudaStream_t *stream, MemoryLocation location[2 * QUDA_MAX_DIM], MemoryLocation location_label,
671  bool spin_project, double a = 0, double b = 0, double c = 0);
672 
673  void packGhostExtended(const int nFace, const int R[], const QudaParity parity, const int dim, const QudaDirection dir,
674  const int dagger,cudaStream_t* stream, bool zero_copy=false);
675 
685  void sendGhost(void *ghost_spinor, const int nFace, const int dim, const QudaDirection dir,
686  const int dagger, cudaStream_t *stream);
687 
697  void unpackGhost(const void* ghost_spinor, const int nFace, const int dim,
698  const QudaDirection dir, const int dagger, cudaStream_t* stream);
699 
711  void unpackGhostExtended(const void* ghost_spinor, const int nFace, const QudaParity parity,
712  const int dim, const QudaDirection dir, const int dagger, cudaStream_t* stream, bool zero_copy);
713 
714 
715  void streamInit(cudaStream_t *stream_p);
716 
733  void pack(int nFace, int parity, int dagger, int stream_idx, MemoryLocation location[],
734  MemoryLocation location_label, bool spin_project = true, double a = 0, double b = 0, double c = 0);
735 
736  void packExtended(const int nFace, const int R[], const int parity, const int dagger,
737  const int dim, cudaStream_t *stream_p, const bool zeroCopyPack=false);
738 
739  void gather(int nFace, int dagger, int dir, cudaStream_t *stream_p=NULL);
740 
750  void recvStart(int nFace, int dir, int dagger=0, cudaStream_t *stream_p=nullptr, bool gdr=false);
751 
763  void sendStart(int nFace, int d, int dagger=0, cudaStream_t *stream_p=nullptr, bool gdr=false, bool remote_write=false);
764 
775  void commsStart(int nFace, int d, int dagger=0, cudaStream_t *stream_p=nullptr, bool gdr_send=false, bool gdr_recv=false);
776 
787  int commsQuery(int nFace, int d, int dagger=0, cudaStream_t *stream_p=nullptr, bool gdr_send=false, bool gdr_recv=false);
788 
799  void commsWait(int nFace, int d, int dagger=0, cudaStream_t *stream_p=nullptr, bool gdr_send=false, bool gdr_recv=false);
800 
801  void scatter(int nFace, int dagger, int dir, cudaStream_t *stream_p);
802  void scatter(int nFace, int dagger, int dir);
803 
804  void scatterExtended(int nFace, int parity, int dagger, int dir);
805 
806  inline const void* Ghost2() const {
807  if (bufferIndex < 2) {
808  return ghost_recv_buffer_d[bufferIndex];
809  } else {
810  return ghost_pinned_recv_buffer_hd[bufferIndex%2];
811  }
812  }
813 
828  void exchangeGhost(QudaParity parity, int nFace, int dagger, const MemoryLocation *pack_destination=nullptr,
829  const MemoryLocation *halo_location=nullptr, bool gdr_send=false, bool gdr_recv=false,
830  QudaPrecision ghost_precision=QUDA_INVALID_PRECISION) const;
831 
832 #ifdef USE_TEXTURE_OBJECTS
833  inline const cudaTextureObject_t& Tex() const { return tex; }
834  inline const cudaTextureObject_t& TexNorm() const { return texNorm; }
835  inline const cudaTextureObject_t& GhostTex() const { return ghostTex[bufferIndex]; }
836  inline const cudaTextureObject_t& GhostTexNorm() const { return ghostTexNorm[bufferIndex]; }
837 #endif
838 
839  cudaColorSpinorField& Component(const int idx) const;
840  CompositeColorSpinorField& Components() const;
841  void CopySubset(cudaColorSpinorField& dst, const int range, const int first_element=0) const;
842 
843  void zero();
844 
845  friend std::ostream& operator<<(std::ostream &out, const cudaColorSpinorField &);
846 
847  void getTexObjectInfo() const;
848 
849  void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0);
850 
851  void PrintVector(unsigned int x) const;
852 
856  void backup() const;
857 
861  void restore() const;
862  };
863 
864  // CPU implementation
866 
867  friend class cudaColorSpinorField;
868 
869  public:
870  static void* fwdGhostFaceBuffer[QUDA_MAX_DIM]; //cpu memory
871  static void* backGhostFaceBuffer[QUDA_MAX_DIM]; //cpu memory
872  static void* fwdGhostFaceSendBuffer[QUDA_MAX_DIM]; //cpu memory
873  static void* backGhostFaceSendBuffer[QUDA_MAX_DIM]; //cpu memory
875  static size_t ghostFaceBytes[QUDA_MAX_DIM];
876 
877  private:
878  //void *v; // the field elements
879  //void *norm; // the normalization field
880  bool init;
881  bool reference; // whether the field is a reference or not
882 
883  void create(const QudaFieldCreate);
884  void destroy();
885 
886  public:
887  //cpuColorSpinorField();
892  virtual ~cpuColorSpinorField();
893 
894  ColorSpinorField& operator=(const ColorSpinorField &);
895  cpuColorSpinorField& operator=(const cpuColorSpinorField&);
896  cpuColorSpinorField& operator=(const cudaColorSpinorField&);
897 
898  void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0);
899 
911  static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1);
912 
913  void PrintVector(unsigned int x) const;
914 
919  void allocateGhostBuffer(int nFace) const;
920  static void freeGhostBuffer(void);
921 
922  void packGhost(void **ghost, const QudaParity parity, const int nFace, const int dagger) const;
923  void unpackGhost(void* ghost_spinor, const int dim,
924  const QudaDirection dir, const int dagger);
925 
926  void copy(const cpuColorSpinorField&);
927  void zero();
928 
943  void exchangeGhost(QudaParity parity, int nFace, int dagger, const MemoryLocation *pack_destination=nullptr,
944  const MemoryLocation *halo_location=nullptr, bool gdr_send=false, bool gdr_recv=false,
945  QudaPrecision ghost_precision=QUDA_INVALID_PRECISION) const;
946 
950  void backup() const;
951 
955  void restore() const;
956  };
957 
959  QudaFieldLocation location, void *Dst=0, void *Src=0,
960  void *dstNorm=0, void*srcNorm=0);
961  void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c);
962  int genericCompare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol);
963 
964  void genericPrintVector(const cpuColorSpinorField &a, unsigned int x);
965  void genericCudaPrintVector(const cudaColorSpinorField &a, unsigned x);
966 
967  void wuppertalStep(ColorSpinorField &out, const ColorSpinorField &in, int parity, const GaugeField& U, double A, double B);
968  void wuppertalStep(ColorSpinorField &out, const ColorSpinorField &in, int parity, const GaugeField& U, double alpha);
969 
970  void exchangeExtendedGhost(cudaColorSpinorField* spinor, int R[], int parity, cudaStream_t *stream_p);
971 
973  QudaFieldLocation location, const int parity, void *Dst, void *Src, void *dstNorm, void *srcNorm);
974 
984  void genericPackGhost(void **ghost, const ColorSpinorField &a, QudaParity parity,
985  int nFace, int dagger, MemoryLocation *destination=nullptr);
986 
993  void spinorNoise(ColorSpinorField &src, RNG& randstates, QudaNoiseType type);
994 
1002  void spinorNoise(ColorSpinorField &src, unsigned long long seed, QudaNoiseType type);
1003 
1012  const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b)
1013  {
1014  QudaPCType type = QUDA_PC_INVALID;
1015  if (a.PCType() == b.PCType())
1016  type = a.PCType();
1017  else
1018  errorQuda("PCTypes %d %d do not match (%s:%d in %s())\n", a.PCType(), b.PCType(), file, line, func);
1019  return type;
1020  }
1021 
1029  template <typename... Args>
1030  inline QudaPCType PCType_(const char *func, const char *file, int line, const ColorSpinorField &a,
1031  const ColorSpinorField &b, const Args &... args)
1032  {
1033  return static_cast<QudaPCType>(PCType_(func, file, line, a, b) & PCType_(func, file, line, a, args...));
1034  }
1035 
1036 #define checkPCType(...) PCType_(__func__, __FILE__, __LINE__, __VA_ARGS__)
1037 
1038 } // namespace quda
1039 
1040 #endif // _COLOR_SPINOR_FIELD_H
QudaDslashType dslash_type
Definition: test_util.cpp:1621
QudaDiracFieldOrder dirac_order
Definition: quda.h:219
int genericCompare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol)
CompositeColorSpinorField components
void genericPrintVector(const cpuColorSpinorField &a, unsigned int x)
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
enum QudaPrecision_s QudaPrecision
void destroy()
Destroy the CUBLAS context.
Definition: blas_cublas.cu:38
const void * Ghost2() const
const void * Norm() const
Constants used by dslash and packing kernels.
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
enum QudaNoiseType_s QudaNoiseType
#define errorQuda(...)
Definition: util_quda.h:121
int GhostOffset(const int i, const int j) const
QudaDslashType dslash_type
Definition: quda.h:102
enum QudaPCType_s QudaPCType
enum QudaFieldOrder_s QudaFieldOrder
CompositeColorSpinorFieldDescriptor composite_descr
used for deflation eigenvector sets etc.:
QudaPrecision & cuda_prec
cudaStream_t * stream
enum QudaSiteOrder_s QudaSiteOrder
CompositeColorSpinorField & Components()
QudaGammaBasis GammaBasis() const
__host__ __device__ void copy(T1 &a, const T2 &b)
size_t ComponentLength() const
static int R[4]
DslashConstant dslash_constant
void copyGenericColorSpinor(ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, void *Dst=0, void *Src=0, void *dstNorm=0, void *srcNorm=0)
ColorSpinorParam(ColorSpinorParam &cpuParam, QudaInvertParam &inv_param, QudaFieldLocation location=QUDA_CUDA_FIELD_LOCATION)
QudaSiteSubset siteSubset
Definition: lattice_field.h:71
std::ostream & operator<<(std::ostream &output, const CloverFieldParam &param)
enum QudaSourceType_s QudaSourceType
QudaPCType PCType_(const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b)
Helper function for determining if the preconditioning type of the fields is the same.
QudaGaugeParam param
Definition: pack_test.cpp:17
bool is_composite
for deflation solvers:
enum QudaDirection_s QudaDirection
void genericPackGhost(void **ghost, const ColorSpinorField &a, QudaParity parity, int nFace, int dagger, MemoryLocation *destination=nullptr)
Generic ghost packing routine.
std::vector< ColorSpinorField * > CompositeColorSpinorField
ColorSpinorField * odd
double tol
Definition: test_util.cpp:1656
QudaFieldLocation location
QudaInvertParam inv_param
Definition: covdev_test.cpp:37
__device__ __host__ __forceinline__ void packGhost(Arg &arg, int x_cb, int parity, int spinor_parity, int spin_block, int color_block)
const int nColor
Definition: covdev_test.cpp:75
cpuColorSpinorField * in
int GhostNormOffset(const int i, const int j) const
QudaSiteSubset SiteSubset() const
Class declaration to initialize and hold CURAND RNG states.
Definition: random_quda.h:23
const int * GhostFaceCB() const
QudaPCType PCType() const
void exchangeExtendedGhost(cudaColorSpinorField *spinor, int R[], int parity, cudaStream_t *stream_p)
int X[4]
Definition: covdev_test.cpp:70
enum QudaParity_s QudaParity
void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c)
QudaTwistFlavorType twistFlavor
int GhostOffset(const int i) const
int V
Definition: test_util.cpp:27
static int dims[4]
Definition: face_gauge.cpp:41
enum QudaSiteSubset_s QudaSiteSubset
const DslashConstant & getDslashConstant() const
Get the dslash_constant structure from this field.
QudaPrecision ghost_precision_allocated
CompositeColorSpinorFieldDescriptor(bool is_composite, int dim, bool is_component=false, int id=0)
void wuppertalStep(ColorSpinorField &out, const ColorSpinorField &in, int parity, const GaugeField &U, double A, double B)
enum QudaFieldLocation_s QudaFieldLocation
cpuColorSpinorField * out
const void * V() const
__device__ __host__ void pack(Arg &arg, int ghost_idx, int s, int parity)
Definition: dslash_pack.cuh:83
void copyExtendedColorSpinor(ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, const int parity, void *Dst, void *Src, void *dstNorm, void *srcNorm)
enum QudaGammaBasis_s QudaGammaBasis
Main header file for the QUDA library.
__shared__ float s[]
size_t ComponentNormBytes() const
#define printfQuda(...)
Definition: util_quda.h:115
QudaTwistFlavorType twistFlavor
QudaSiteOrder SiteOrder() const
QudaTwistFlavorType TwistFlavor() const
QudaTwistFlavorType twist_flavor
Definition: test_util.cpp:1660
const int * GhostFace() const
size_t ComponentRealLength() const
enum QudaFieldCreate_s QudaFieldCreate
const int * X() const
QudaPrecision & cpu_prec
virtual const void * Ghost2() const
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
ColorSpinorParam(void *V, QudaInvertParam &inv_param, const int *X, const bool pc_solution, QudaFieldLocation location=QUDA_CPU_FIELD_LOCATION)
int GhostNormOffset(const int i) const
QudaDagType dagger
Definition: test_util.cpp:1620
QudaParity parity
Definition: covdev_test.cpp:54
QudaFieldOrder FieldOrder() const
__device__ __host__ void zero(vector_type< scalar, n > &v)
Definition: cub_helper.cuh:54
ColorSpinorField * even
CompositeColorSpinorFieldDescriptor(const CompositeColorSpinorFieldDescriptor &descr)
enum QudaMemoryType_s QudaMemoryType
cpuColorSpinorField * spinor
Definition: covdev_test.cpp:41
void spinorNoise(ColorSpinorField &src, RNG &randstates, QudaNoiseType type)
Generate a random noise spinor. This variant allows the user to manage the RNG state.
void genericCudaPrintVector(const cudaColorSpinorField &a, unsigned x)
enum QudaTwistFlavorType_s QudaTwistFlavorType