QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
lattice_field.h
Go to the documentation of this file.
1 #ifndef _LATTICE_FIELD_H
2 #define _LATTICE_FIELD_H
3 
4 #include <map>
5 #include <quda.h>
6 #include <iostream>
7 #include <comm_quda.h>
8 #include <util_quda.h>
9 #include <object.h>
10 #include <cuda_runtime.h>
11 
20 namespace quda {
21 
22  // LatticeField is an abstract base clase for all Field objects.
23 
24  // Forward declaration of all children
25  class LatticeField;
26 
27  class ColorSpinorField;
28  class cudaColorSpinorField;
29  class cpuColorSpinorField;
30 
31  class EigValueSet;
32  class cudaEigValueSet;
33  class cpuEigValueSet;
34 
35  class EigVecSet;
36  class cpuEigVecSet;
37  class cudaEigVecSet;
38 
39  class GaugeField;
40  class cpuGaugeField;
41  class cudaGaugeField;
42 
43  class CloverField;
44  class cudaCloverField;
45  class cpuCloverField;
46 
48 
49  protected:
52 
55 
56  public:
58  QudaPrecision Precision() const { return precision; }
59 
62 
64  int nDim;
65 
68 
69  int pad;
70 
72 
74 
77 
80 
82  double scale;
83 
88  : precision(QUDA_INVALID_PRECISION), ghost_precision(QUDA_INVALID_PRECISION), nDim(4), pad(0),
89  siteSubset(QUDA_INVALID_SITE_SUBSET), mem_type(QUDA_MEMORY_DEVICE),
90  ghostExchange(QUDA_GHOST_EXCHANGE_PAD), scale(1.0)
91  {
92  for (int i=0; i<nDim; i++) {
93  x[i] = 0;
94  r[i] = 0;
95  }
96  }
97 
106  LatticeFieldParam(int nDim, const int *x, int pad, QudaPrecision precision,
108  : precision(precision), ghost_precision(precision), nDim(nDim), pad(pad),
109  siteSubset(QUDA_FULL_SITE_SUBSET), mem_type(QUDA_MEMORY_DEVICE),
110  ghostExchange(ghostExchange), scale(1.0)
111  {
112  if (nDim > QUDA_MAX_DIM) errorQuda("Number of dimensions too great");
113  for (int i=0; i<nDim; i++) {
114  this->x[i] = x[i];
115  this->r[i] = 0;
116  }
117  }
118 
126  : precision(param.cpu_prec), ghost_precision(param.cpu_prec), nDim(4), pad(0),
127  siteSubset(QUDA_FULL_SITE_SUBSET), mem_type(QUDA_MEMORY_DEVICE),
128  ghostExchange(QUDA_GHOST_EXCHANGE_NO), scale(param.scale)
129  {
130  for (int i=0; i<nDim; i++) {
131  this->x[i] = param.X[i];
132  this->r[i] = 0;
133  }
134  }
135 
139  LatticeFieldParam(const LatticeField &field);
140  };
141 
142  std::ostream& operator<<(std::ostream& output, const LatticeFieldParam& param);
143 
144  class LatticeField : public Object {
145 
146  protected:
148  int volume;
149 
151  int volumeCB;
152 
153  int stride;
154  int pad;
155 
156  size_t total_bytes;
157 
159  int nDim;
160 
163 
164  int surface[QUDA_MAX_DIM];
165  int surfaceCB[QUDA_MAX_DIM];
166 
169 
172 
175 
177  mutable bool ghost_precision_reset;
178 
180  double scale;
181 
184 
187 
188  // The below are additions for inter-GPU communication (merging FaceBuffer functionality)
189 
192 
193  /*
194  The need for persistent message handlers (for GPUDirect support)
195  means that we allocate different message handlers for each number of
196  faces we can send.
197  */
198 
202  static void *ghost_send_buffer_d[2];
203 
207  static void *ghost_recv_buffer_d[2];
208 
212  static void *ghost_pinned_send_buffer_h[2];
213 
217  static void *ghost_pinned_recv_buffer_h[2];
218 
222  static void *ghost_pinned_send_buffer_hd[2];
223 
227  static void *ghost_pinned_recv_buffer_hd[2];
228 
232  static void *ghost_remote_send_buffer_d[2][QUDA_MAX_DIM][2];
233 
237  static size_t ghostFaceBytes;
238 
242  static bool initGhostFaceBuffer;
243 
247  mutable size_t ghost_bytes;
248 
252  mutable size_t ghost_bytes_old;
253 
257  mutable size_t ghost_face_bytes[QUDA_MAX_DIM];
258 
262  mutable int ghostOffset[QUDA_MAX_DIM][2];
263 
267  mutable int ghostNormOffset[QUDA_MAX_DIM][2];
268 
272  void *my_face_h[2];
273 
277  void *my_face_hd[2];
278 
282  void *my_face_d[2];
283 
285  void *my_face_dim_dir_h[2][QUDA_MAX_DIM][2];
286 
288  void *my_face_dim_dir_hd[2][QUDA_MAX_DIM][2];
289 
291  void *my_face_dim_dir_d[2][QUDA_MAX_DIM][2];
292 
296  void *from_face_h[2];
297 
301  void *from_face_hd[2];
302 
306  void *from_face_d[2];
307 
309  void *from_face_dim_dir_h[2][QUDA_MAX_DIM][2];
310 
312  void *from_face_dim_dir_hd[2][QUDA_MAX_DIM][2];
313 
315  void *from_face_dim_dir_d[2][QUDA_MAX_DIM][2];
316 
318  MsgHandle *mh_recv_fwd[2][QUDA_MAX_DIM];
319 
321  MsgHandle *mh_recv_back[2][QUDA_MAX_DIM];
322 
324  MsgHandle *mh_send_fwd[2][QUDA_MAX_DIM];
325 
327  MsgHandle *mh_send_back[2][QUDA_MAX_DIM];
328 
330  MsgHandle *mh_recv_rdma_fwd[2][QUDA_MAX_DIM];
331 
333  MsgHandle *mh_recv_rdma_back[2][QUDA_MAX_DIM];
334 
336  MsgHandle *mh_send_rdma_fwd[2][QUDA_MAX_DIM];
337 
339  MsgHandle *mh_send_rdma_back[2][QUDA_MAX_DIM];
340 
342  static MsgHandle* mh_send_p2p_fwd[2][QUDA_MAX_DIM];
343 
345  static MsgHandle* mh_send_p2p_back[2][QUDA_MAX_DIM];
346 
348  static MsgHandle* mh_recv_p2p_fwd[2][QUDA_MAX_DIM];
349 
351  static MsgHandle* mh_recv_p2p_back[2][QUDA_MAX_DIM];
352 
354  static int buffer_send_p2p_fwd[2][QUDA_MAX_DIM];
355 
357  static int buffer_recv_p2p_fwd[2][QUDA_MAX_DIM];
358 
360  static int buffer_send_p2p_back[2][QUDA_MAX_DIM];
361 
363  static int buffer_recv_p2p_back[2][QUDA_MAX_DIM];
364 
366  static cudaEvent_t ipcCopyEvent[2][2][QUDA_MAX_DIM];
367 
369  static cudaEvent_t ipcRemoteCopyEvent[2][2][QUDA_MAX_DIM];
370 
372  bool initComms;
373 
375  static bool initIPCComms;
376 
378  char vol_string[TuneKey::volume_n];
379 
381  char aux_string[TuneKey::aux_n];
382 
384  virtual void setTuningString();
385 
388 
389  void precisionCheck() {
390  switch(precision) {
392  case QUDA_HALF_PRECISION:
395  break;
396  default:
397  errorQuda("Unknown precision %d\n", precision);
398  }
399  }
400 
401  mutable char *backup_h;
402  mutable char *backup_norm_h;
403  mutable bool backed_up;
404 
405  public:
406 
411  LatticeField(const LatticeFieldParam &param);
412 
418  LatticeField(const LatticeField &field);
419 
423  virtual ~LatticeField();
424 
429  void allocateGhostBuffer(size_t ghost_bytes) const;
430 
434  static void freeGhostBuffer(void);
435 
444  void createComms(bool no_comms_fill=false, bool bidir=true);
445 
449  void destroyComms();
450 
454  void createIPCComms();
455 
459  static void destroyIPCComms();
460 
464  inline bool ipcCopyComplete(int dir, int dim);
465 
469  inline bool ipcRemoteCopyComplete(int dir, int dim);
470 
474  const cudaEvent_t& getIPCCopyEvent(int dir, int dim) const;
475 
479  const cudaEvent_t& getIPCRemoteCopyEvent(int dir, int dim) const;
480 
484  static int bufferIndex;
485 
489  static bool ghost_field_reset;
490 
494  int Ndim() const { return nDim; }
495 
499  const int* X() const { return x; }
500 
504  int Volume() const { return volume; }
505 
509  int VolumeCB() const { return volumeCB; }
510 
515  const int* SurfaceCB() const { return surfaceCB; }
516 
521  int SurfaceCB(const int i) const { return surfaceCB[i]; }
522 
526  int Stride() const { return stride; }
527 
531  int Pad() const { return pad; }
532 
536  const int* R() const { return r; }
537 
542 
546  QudaPrecision Precision() const { return precision; }
547 
552 
556  double Scale() const { return scale; }
557 
562  void Scale(double scale_) { scale = scale_; }
563 
567  virtual QudaSiteSubset SiteSubset() const { return siteSubset; }
568 
572  virtual QudaMemoryType MemType() const { return mem_type; }
573 
578  int Nvec() const;
579 
583  QudaFieldLocation Location() const;
584 
588  size_t GBytes() const { return total_bytes / (1<<30); }
589 
594  void checkField(const LatticeField &a) const;
595 
600  virtual void read(char *filename);
601 
606  virtual void write(char *filename);
607 
608  virtual void gather(int nFace, int dagger, int dir, cudaStream_t *stream_p=NULL)
609  { errorQuda("Not implemented"); }
610 
611  virtual void commsStart(int nFace, int dir, int dagger=0, cudaStream_t *stream_p=NULL, bool gdr_send=false, bool gdr_recv=true)
612  { errorQuda("Not implemented"); }
613 
614  virtual int commsQuery(int nFace, int dir, int dagger=0, cudaStream_t *stream_p=NULL, bool gdr_send=false, bool gdr_recv=true)
615  { errorQuda("Not implemented"); return 0; }
616 
617  virtual void commsWait(int nFace, int dir, int dagger=0, cudaStream_t *stream_p=NULL, bool gdr_send=false, bool gdr_recv=true)
618  { errorQuda("Not implemented"); }
619 
620  virtual void scatter(int nFace, int dagger, int dir)
621  { errorQuda("Not implemented"); }
622 
624  inline const char *VolString() const { return vol_string; }
625 
627  inline const char *AuxString() const { return aux_string; }
628 
630  virtual void backup() const { errorQuda("Not implemented"); }
631 
633  virtual void restore() const { errorQuda("Not implemented"); }
634  };
635 
642  inline QudaFieldLocation Location_(const char *func, const char *file, int line,
643  const LatticeField &a, const LatticeField &b) {
645  if (a.Location() == b.Location()) location = a.Location();
646  else errorQuda("Locations %d %d do not match (%s:%d in %s())\n",
647  a.Location(), b.Location(), file, line, func);
648  return location;
649  }
650 
658  template <typename... Args>
659  inline QudaFieldLocation Location_(const char *func, const char *file, int line,
660  const LatticeField &a, const LatticeField &b, const Args &... args) {
661  return static_cast<QudaFieldLocation>(Location_(func,file,line,a,b) & Location_(func,file,line,a,args...));
662  }
663 
664 #define checkLocation(...)Location_(__func__, __FILE__, __LINE__, __VA_ARGS__)
665 
672  inline QudaPrecision Precision_(const char *func, const char *file, int line,
673  const LatticeField &a, const LatticeField &b) {
675  if (a.Precision() == b.Precision()) precision = a.Precision();
676  else errorQuda("Precisions %d %d do not match (%s:%d in %s())\n",
677  a.Precision(), b.Precision(), file, line, func);
678  return precision;
679  }
680 
688  template <typename... Args>
689  inline QudaPrecision Precision_(const char *func, const char *file, int line,
690  const LatticeField &a, const LatticeField &b,
691  const Args &... args) {
692  return static_cast<QudaPrecision>(Precision_(func,file,line,a,b) & Precision_(func,file,line,a,args...));
693  }
694 
695 #define checkPrecision(...) Precision_(__func__, __FILE__, __LINE__, __VA_ARGS__)
696 
704 
712 
718  inline const char *compile_type_str(const LatticeField &meta, QudaFieldLocation location_ = QUDA_INVALID_FIELD_LOCATION)
719  {
720  QudaFieldLocation location = (location_ == QUDA_INVALID_FIELD_LOCATION ? meta.Location() : location_);
721 #ifdef JITIFY
722  return location == QUDA_CUDA_FIELD_LOCATION ? "GPU-jitify," : "CPU,";
723 #else
724  return location == QUDA_CUDA_FIELD_LOCATION ? "GPU-offline," : "CPU,";
725 #endif
726  }
727 
728 } // namespace quda
729 
730 #endif // _LATTICE_FIELD_H
QudaPrecision Precision_(const char *func, const char *file, int line, const LatticeField &a, const LatticeField &b)
Helper function for determining if the precision of the fields is the same.
QudaFieldLocation reorder_location()
Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the env...
QudaGhostExchange ghostExchange
Definition: lattice_field.h:76
enum QudaPrecision_s QudaPrecision
const char * AuxString() const
virtual void commsWait(int nFace, int dir, int dagger=0, cudaStream_t *stream_p=NULL, bool gdr_send=false, bool gdr_recv=true)
int Stride() const
#define errorQuda(...)
Definition: util_quda.h:121
QudaSiteSubset siteSubset
QudaPrecision GhostPrecision() const
virtual int commsQuery(int nFace, int dir, int dagger=0, cudaStream_t *stream_p=NULL, bool gdr_send=false, bool gdr_recv=true)
double Scale() const
int SurfaceCB(const int i) const
QudaPrecision GhostPrecision() const
Definition: lattice_field.h:61
const char * VolString() const
QudaPrecision precision
Definition: lattice_field.h:51
const int * SurfaceCB() const
LatticeFieldParam(const QudaGaugeParam &param)
Constructor for creating a LatticeFieldParam from a QudaGaugeParam. Used for wrapping around a CPU re...
static bool initGhostFaceBuffer
const char * compile_type_str(const LatticeField &meta, QudaFieldLocation location_=QUDA_INVALID_FIELD_LOCATION)
Helper function for setting auxilary string.
QudaSiteSubset siteSubset
Definition: lattice_field.h:71
std::ostream & operator<<(std::ostream &output, const CloverFieldParam &param)
QudaGaugeParam param
Definition: pack_test.cpp:17
virtual QudaMemoryType MemType() const
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:67
const int * R() const
static bool ghost_field_reset
static int bufferIndex
enum QudaGhostExchange_s QudaGhostExchange
QudaGhostExchange ghostExchange
int Volume() const
virtual void backup() const
Backs up the LatticeField.
int X[4]
Definition: quda.h:36
enum QudaSiteSubset_s QudaSiteSubset
QudaFieldLocation Location() const
static QudaFieldLocation reorder_location_
static size_t ghostFaceBytes
enum QudaFieldLocation_s QudaFieldLocation
void Scale(double scale_)
Set the scale factor for a fixed-point field.
QudaPrecision ghost_precision
virtual void scatter(int nFace, int dagger, int dir)
Main header file for the QUDA library.
QudaPrecision Precision() const
Definition: lattice_field.h:58
static const int aux_n
Definition: tune_key.h:12
virtual QudaSiteSubset SiteSubset() const
QudaMemoryType mem_type
Definition: lattice_field.h:73
int VolumeCB() const
LatticeFieldParam()
Default constructor for LatticeFieldParam.
Definition: lattice_field.h:87
QudaPrecision ghost_precision
Definition: lattice_field.h:54
QudaPrecision & cpu_prec
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
static int volumeCB
Definition: face_gauge.cpp:43
QudaGhostExchange GhostExchange() const
virtual void commsStart(int nFace, int dir, int dagger=0, cudaStream_t *stream_p=NULL, bool gdr_send=false, bool gdr_recv=true)
static const int volume_n
Definition: tune_key.h:10
LatticeFieldParam(int nDim, const int *x, int pad, QudaPrecision precision, QudaGhostExchange ghostExchange=QUDA_GHOST_EXCHANGE_PAD)
Constructor for creating a LatticeFieldParam from a set of parameters.
QudaPrecision Precision() const
QudaDagType dagger
Definition: test_util.cpp:1620
int r[QUDA_MAX_DIM]
Definition: lattice_field.h:79
void reorder_location_set(QudaFieldLocation reorder_location_)
Set whether data is reorderd on the CPU or GPU. This can set at QUDA initialization using the environ...
QudaMemoryType mem_type
QudaPrecision precision
QudaFieldLocation Location_(const char *func, const char *file, int line, const LatticeField &a, const LatticeField &b)
Helper function for determining if the location of the fields is the same.
virtual void gather(int nFace, int dagger, int dir, cudaStream_t *stream_p=NULL)
virtual void restore() const
Restores the LatticeField.
enum QudaMemoryType_s QudaMemoryType
static bool initIPCComms
size_t GBytes() const
const int * X() const