QUDA  v1.1.0
A library for QCD on GPUs
lattice_field.h
Go to the documentation of this file.
1 #pragma once
2 
3 #include <map>
4 #include <quda.h>
5 #include <iostream>
6 #include <comm_quda.h>
7 #include <util_quda.h>
8 #include <object.h>
9 #include <quda_api.h>
10 
19 namespace quda {
20 
21  // LatticeField is an abstract base clase for all Field objects.
22 
23  // Forward declaration of all children
24  class LatticeField;
25 
26  class ColorSpinorField;
27  class cudaColorSpinorField;
28  class cpuColorSpinorField;
29 
30  class EigValueSet;
31  class cudaEigValueSet;
32  class cpuEigValueSet;
33 
34  class EigVecSet;
35  class cpuEigVecSet;
36  class cudaEigVecSet;
37 
38  class GaugeField;
39  class cpuGaugeField;
40  class cudaGaugeField;
41 
42  class CloverField;
43  class cudaCloverField;
44  class cpuCloverField;
45 
47 
49 
50  protected:
53 
56 
57  public:
59  QudaPrecision Precision() const { return precision; }
60 
63 
65  int nDim;
66 
69 
70  int pad;
71 
73 
75 
78 
81 
83  double scale;
84 
92  {
93  for (int i=0; i<nDim; i++) {
94  x[i] = 0;
95  r[i] = 0;
96  }
97  }
98 
112  {
113  if (nDim > QUDA_MAX_DIM) errorQuda("Number of dimensions too great");
114  for (int i=0; i<nDim; i++) {
115  this->x[i] = x[i];
116  this->r[i] = 0;
117  }
118  }
119 
130  {
131  for (int i=0; i<nDim; i++) {
132  this->x[i] = param.X[i];
133  this->r[i] = 0;
134  }
135  }
136 
140  LatticeFieldParam(const LatticeField &field);
141  };
142 
143  std::ostream& operator<<(std::ostream& output, const LatticeFieldParam& param);
144 
145  class LatticeField : public Object {
146 
147  protected:
149  size_t volume;
150 
152  size_t volumeCB;
153 
155  size_t localVolume;
156 
159 
160  size_t stride;
161  int pad;
162 
163  size_t total_bytes;
164 
166  int nDim;
167 
170 
173 
176 
179 
182 
184  mutable bool ghost_precision_reset;
185 
187  double scale;
188 
191 
194 
195  // The below are additions for inter-GPU communication (merging FaceBuffer functionality)
196 
199 
200  /*
201  The need for persistent message handlers (for GPUDirect support)
202  means that we allocate different message handlers for each number of
203  faces we can send.
204  */
205 
209  static void *ghost_send_buffer_d[2];
210 
214  static void *ghost_recv_buffer_d[2];
215 
219  static void *ghost_pinned_send_buffer_h[2];
220 
224  static void *ghost_pinned_recv_buffer_h[2];
225 
230 
235 
240 
244  static size_t ghostFaceBytes;
245 
249  static bool initGhostFaceBuffer;
250 
254  mutable size_t ghost_bytes;
255 
259  mutable size_t ghost_bytes_old;
260 
265 
270 
274  mutable size_t ghost_offset[QUDA_MAX_DIM][2];
275 
279  void *my_face_h[2];
280 
284  void *my_face_hd[2];
285 
289  void *my_face_d[2];
290 
293 
296 
299 
303  void *from_face_h[2];
304 
308  void *from_face_hd[2];
309 
313  void *from_face_d[2];
314 
317 
320 
323 
326 
329 
332 
335 
338 
341 
344 
347 
350 
353 
356 
359 
362 
365 
368 
371 
373  static cudaEvent_t ipcCopyEvent[2][2][QUDA_MAX_DIM];
374 
376  static cudaEvent_t ipcRemoteCopyEvent[2][2][QUDA_MAX_DIM];
377 
379  bool initComms;
380 
382  static bool initIPCComms;
383 
386 
389 
391  virtual void setTuningString();
392 
395 
397  {
398  switch (precision) {
400  case QUDA_HALF_PRECISION:
402  case QUDA_DOUBLE_PRECISION: break;
403  default: errorQuda("Unknown precision %d\n", precision);
404  }
405  }
406 
407  mutable char *backup_h;
408  mutable char *backup_norm_h;
409  mutable bool backed_up;
410 
411  public:
412 
418 
424  LatticeField(const LatticeField &field);
425 
429  virtual ~LatticeField();
430 
435  void allocateGhostBuffer(size_t ghost_bytes) const;
436 
440  static void freeGhostBuffer(void);
441 
450  void createComms(bool no_comms_fill=false, bool bidir=true);
451 
455  void destroyComms();
456 
460  void createIPCComms();
461 
465  static void destroyIPCComms();
466 
470  inline bool ipcCopyComplete(int dir, int dim);
471 
475  inline bool ipcRemoteCopyComplete(int dir, int dim);
476 
480  const cudaEvent_t& getIPCCopyEvent(int dir, int dim) const;
481 
485  const cudaEvent_t& getIPCRemoteCopyEvent(int dir, int dim) const;
486 
490  static int bufferIndex;
491 
495  static bool ghost_field_reset;
496 
500  int Ndim() const { return nDim; }
501 
505  const int* X() const { return x; }
506 
510  virtual int full_dim(int d) const = 0;
511 
515  size_t Volume() const { return volume; }
516 
520  size_t VolumeCB() const { return volumeCB; }
521 
525  size_t LocalVolume() const { return localVolume; }
526 
530  size_t LocalVolumeCB() const { return localVolumeCB; }
531 
536  const int* SurfaceCB() const { return surfaceCB; }
537 
542  int SurfaceCB(const int i) const { return surfaceCB[i]; }
543 
547  size_t Stride() const { return stride; }
548 
552  int Pad() const { return pad; }
553 
557  const int* R() const { return r; }
558 
563 
567  QudaPrecision Precision() const { return precision; }
568 
573 
577  double Scale() const { return scale; }
578 
583  void Scale(double scale_) { scale = scale_; }
584 
588  virtual QudaSiteSubset SiteSubset() const { return siteSubset; }
589 
593  virtual QudaMemoryType MemType() const { return mem_type; }
594 
599  int Nvec() const;
600 
604  QudaFieldLocation Location() const;
605 
609  size_t GBytes() const { return total_bytes / (1<<30); }
610 
615  void checkField(const LatticeField &a) const;
616 
621  virtual void read(char *filename);
622 
627  virtual void write(char *filename);
628 
636  void *myFace_h(int dir, int dim) const { return my_face_dim_dir_h[bufferIndex][dim][dir]; }
637 
645  void *myFace_hd(int dir, int dim) const { return my_face_dim_dir_hd[bufferIndex][dim][dir]; }
646 
654  void *myFace_d(int dir, int dim) const { return my_face_dim_dir_d[bufferIndex][dim][dir]; }
655 
665  void *remoteFace_d(int dir, int dim) const { return ghost_remote_send_buffer_d[bufferIndex][dim][dir]; }
666 
673  void *remoteFace_r() const { return ghost_recv_buffer_d[bufferIndex]; }
674 
675  virtual void gather(int nFace, int dagger, int dir, qudaStream_t *stream_p = NULL) { errorQuda("Not implemented"); }
676 
677  virtual void commsStart(int nFace, int dir, int dagger = 0, qudaStream_t *stream_p = NULL, bool gdr_send = false,
678  bool gdr_recv = true)
679  { errorQuda("Not implemented"); }
680 
681  virtual int commsQuery(int nFace, int dir, int dagger = 0, qudaStream_t *stream_p = NULL, bool gdr_send = false,
682  bool gdr_recv = true)
683  { errorQuda("Not implemented"); return 0; }
684 
685  virtual void commsWait(int nFace, int dir, int dagger = 0, qudaStream_t *stream_p = NULL, bool gdr_send = false,
686  bool gdr_recv = true)
687  { errorQuda("Not implemented"); }
688 
689  virtual void scatter(int nFace, int dagger, int dir)
690  { errorQuda("Not implemented"); }
691 
693  inline const char *VolString() const { return vol_string; }
694 
696  inline const char *AuxString() const { return aux_string; }
697 
699  virtual void backup() const { errorQuda("Not implemented"); }
700 
702  virtual void restore() const { errorQuda("Not implemented"); }
703 
709  virtual void prefetch(QudaFieldLocation mem_space, qudaStream_t stream = 0) const { ; }
710 
711  virtual bool isNative() const = 0;
712 
720  virtual void copy_to_buffer(void *buffer) const = 0;
721 
729  virtual void copy_from_buffer(void *buffer) = 0;
730  };
731 
738  inline QudaFieldLocation Location_(const char *func, const char *file, int line,
739  const LatticeField &a, const LatticeField &b) {
741  if (a.Location() == b.Location()) location = a.Location();
742  else errorQuda("Locations %d %d do not match (%s:%d in %s())\n",
743  a.Location(), b.Location(), file, line, func);
744  return location;
745  }
746 
754  template <typename... Args>
755  inline QudaFieldLocation Location_(const char *func, const char *file, int line,
756  const LatticeField &a, const LatticeField &b, const Args &... args) {
757  return static_cast<QudaFieldLocation>(Location_(func,file,line,a,b) & Location_(func,file,line,a,args...));
758  }
759 
760 #define checkLocation(...) Location_(__func__, __FILE__, __LINE__, __VA_ARGS__)
761 
768  inline QudaPrecision Precision_(const char *func, const char *file, int line,
769  const LatticeField &a, const LatticeField &b) {
771  if (a.Precision() == b.Precision()) precision = a.Precision();
772  else errorQuda("Precisions %d %d do not match (%s:%d in %s())\n",
773  a.Precision(), b.Precision(), file, line, func);
774  return precision;
775  }
776 
784  template <typename... Args>
785  inline QudaPrecision Precision_(const char *func, const char *file, int line,
786  const LatticeField &a, const LatticeField &b,
787  const Args &... args) {
788  return static_cast<QudaPrecision>(Precision_(func,file,line,a,b) & Precision_(func,file,line,a,args...));
789  }
790 
791 #define checkPrecision(...) Precision_(__func__, __FILE__, __LINE__, __VA_ARGS__)
792 
798  inline bool Native_(const char *func, const char *file, int line, const LatticeField &a)
799  {
800  if (!a.isNative()) errorQuda("Non-native field detected (%s:%d in %s())\n", file, line, func);
801  return true;
802  }
803 
810  template <typename... Args>
811  inline bool Native_(const char *func, const char *file, int line, const LatticeField &a, const Args &... args)
812  {
813  return (Native_(func, file, line, a) & Native_(func, file, line, args...));
814  }
815 
816 #define checkNative(...) Native_(__func__, __FILE__, __LINE__, __VA_ARGS__)
817 
825 
832  void reorder_location_set(QudaFieldLocation reorder_location_);
833 
839  inline const char *compile_type_str(const LatticeField &meta, QudaFieldLocation location_ = QUDA_INVALID_FIELD_LOCATION)
840  {
841  QudaFieldLocation location = (location_ == QUDA_INVALID_FIELD_LOCATION ? meta.Location() : location_);
842 #ifdef JITIFY
843  return location == QUDA_CUDA_FIELD_LOCATION ? "GPU-jitify," : "CPU,";
844 #else
845  return location == QUDA_CUDA_FIELD_LOCATION ? "GPU-offline," : "CPU,";
846 #endif
847  }
848 
849 } // namespace quda
QudaGhostExchange ghostExchange
void * remoteFace_r() const
Return base pointer to the ghost recv buffer. Since this is a base pointer, one still needs to take c...
virtual void scatter(int nFace, int dagger, int dir)
int SurfaceCB(const int i) const
static bool initGhostFaceBuffer
MsgHandle * mh_send_fwd[2][QUDA_MAX_DIM]
QudaSiteSubset siteSubset
virtual void commsStart(int nFace, int dir, int dagger=0, qudaStream_t *stream_p=NULL, bool gdr_send=false, bool gdr_recv=true)
void * myFace_h(int dir, int dim) const
Return pointer to the local pinned my_face buffer in a given direction and dimension.
static int buffer_recv_p2p_fwd[2][QUDA_MAX_DIM]
virtual void copy_from_buffer(void *buffer)=0
Copy all contents of the field from a host buffer to this field.
static int bufferIndex
MsgHandle * mh_recv_rdma_back[2][QUDA_MAX_DIM]
size_t LocalVolumeCB() const
MsgHandle * mh_send_rdma_fwd[2][QUDA_MAX_DIM]
void * from_face_dim_dir_d[2][QUDA_MAX_DIM][2]
const char * AuxString() const
static MsgHandle * mh_recv_p2p_back[2][QUDA_MAX_DIM]
bool ipcCopyComplete(int dir, int dim)
virtual void backup() const
Backs up the LatticeField.
virtual QudaSiteSubset SiteSubset() const
const char * VolString() const
MsgHandle * mh_send_rdma_back[2][QUDA_MAX_DIM]
static void * ghost_pinned_send_buffer_hd[2]
int x[QUDA_MAX_DIM]
void * my_face_dim_dir_h[2][QUDA_MAX_DIM][2]
void * from_face_dim_dir_h[2][QUDA_MAX_DIM][2]
void * remoteFace_d(int dir, int dim) const
Return base pointer to a remote device buffer for direct sending in a given direction and dimension....
void * myFace_d(int dir, int dim) const
Return pointer to the device send buffer in a given direction and dimension.
static MsgHandle * mh_send_p2p_fwd[2][QUDA_MAX_DIM]
size_t Volume() const
static MsgHandle * mh_recv_p2p_fwd[2][QUDA_MAX_DIM]
static void * ghost_pinned_recv_buffer_h[2]
QudaPrecision GhostPrecision() const
size_t ghost_offset[QUDA_MAX_DIM][2]
QudaPrecision ghost_precision
QudaPrecision Precision() const
size_t LocalVolume() const
QudaFieldLocation Location() const
const int * SurfaceCB() const
QudaPrecision precision
void * myFace_hd(int dir, int dim) const
Return pointer to the local mapped my_face buffer in a given direction and dimension.
void * my_face_dim_dir_d[2][QUDA_MAX_DIM][2]
static void destroyIPCComms()
static void * ghost_pinned_recv_buffer_hd[2]
size_t ghost_face_bytes[QUDA_MAX_DIM]
char aux_string[TuneKey::aux_n]
virtual void commsWait(int nFace, int dir, int dagger=0, qudaStream_t *stream_p=NULL, bool gdr_send=false, bool gdr_recv=true)
virtual void write(char *filename)
void Scale(double scale_)
Set the scale factor for a fixed-point field.
static size_t ghostFaceBytes
QudaMemoryType mem_type
virtual bool isNative() const =0
char vol_string[TuneKey::volume_n]
size_t GBytes() const
static void * ghost_pinned_send_buffer_h[2]
static void * ghost_remote_send_buffer_d[2][QUDA_MAX_DIM][2]
void * from_face_dim_dir_hd[2][QUDA_MAX_DIM][2]
static MsgHandle * mh_send_p2p_back[2][QUDA_MAX_DIM]
virtual void restore() const
Restores the LatticeField.
size_t VolumeCB() const
static int buffer_send_p2p_fwd[2][QUDA_MAX_DIM]
virtual void setTuningString()
static bool ghost_field_reset
const int * R() const
const int * X() const
int surfaceCB[QUDA_MAX_DIM]
int surface[QUDA_MAX_DIM]
virtual void prefetch(QudaFieldLocation mem_space, qudaStream_t stream=0) const
If managed memory and prefetch is enabled, prefetch all relevant memory fields to the current device ...
static int buffer_recv_p2p_back[2][QUDA_MAX_DIM]
bool ipcRemoteCopyComplete(int dir, int dim)
void checkField(const LatticeField &a) const
const cudaEvent_t & getIPCCopyEvent(int dir, int dim) const
static bool initIPCComms
int r[QUDA_MAX_DIM]
virtual void gather(int nFace, int dagger, int dir, qudaStream_t *stream_p=NULL)
size_t Stride() const
virtual int full_dim(int d) const =0
MsgHandle * mh_send_back[2][QUDA_MAX_DIM]
virtual QudaMemoryType MemType() const
void * my_face_dim_dir_hd[2][QUDA_MAX_DIM][2]
QudaGhostExchange GhostExchange() const
static cudaEvent_t ipcCopyEvent[2][2][QUDA_MAX_DIM]
static cudaEvent_t ipcRemoteCopyEvent[2][2][QUDA_MAX_DIM]
const cudaEvent_t & getIPCRemoteCopyEvent(int dir, int dim) const
MsgHandle * mh_recv_fwd[2][QUDA_MAX_DIM]
static int buffer_send_p2p_back[2][QUDA_MAX_DIM]
void allocateGhostBuffer(size_t ghost_bytes) const
Allocate the static ghost buffers.
virtual void read(char *filename)
MsgHandle * mh_recv_rdma_fwd[2][QUDA_MAX_DIM]
size_t ghost_face_bytes_aligned[QUDA_MAX_DIM]
virtual int commsQuery(int nFace, int dir, int dagger=0, qudaStream_t *stream_p=NULL, bool gdr_send=false, bool gdr_recv=true)
static void * ghost_recv_buffer_d[2]
MsgHandle * mh_recv_back[2][QUDA_MAX_DIM]
double Scale() const
void createComms(bool no_comms_fill=false, bool bidir=true)
void * from_face_hd[2]
LatticeField(const LatticeFieldParam &param)
static void freeGhostBuffer(void)
Free statically allocated ghost buffers.
static void * ghost_send_buffer_d[2]
virtual void copy_to_buffer(void *buffer) const =0
Copy all contents of the field to a host buffer.
std::array< int, 4 > dim
bool dagger
enum QudaPrecision_s QudaPrecision
@ QUDA_CUDA_FIELD_LOCATION
Definition: enum_quda.h:326
@ QUDA_INVALID_FIELD_LOCATION
Definition: enum_quda.h:327
@ QUDA_INVALID_SITE_SUBSET
Definition: enum_quda.h:334
@ QUDA_FULL_SITE_SUBSET
Definition: enum_quda.h:333
@ QUDA_MEMORY_DEVICE
Definition: enum_quda.h:13
enum QudaSiteSubset_s QudaSiteSubset
enum QudaFieldLocation_s QudaFieldLocation
@ QUDA_GHOST_EXCHANGE_NO
Definition: enum_quda.h:508
@ QUDA_GHOST_EXCHANGE_PAD
Definition: enum_quda.h:509
enum QudaGhostExchange_s QudaGhostExchange
enum QudaMemoryType_s QudaMemoryType
@ QUDA_DOUBLE_PRECISION
Definition: enum_quda.h:65
@ QUDA_SINGLE_PRECISION
Definition: enum_quda.h:64
@ QUDA_INVALID_PRECISION
Definition: enum_quda.h:66
@ QUDA_QUARTER_PRECISION
Definition: enum_quda.h:62
@ QUDA_HALF_PRECISION
Definition: enum_quda.h:63
QudaPrecision & cpu_prec
Definition: host_utils.cpp:57
const char * compile_type_str(const LatticeField &meta, QudaFieldLocation location_=QUDA_INVALID_FIELD_LOCATION)
Helper function for setting auxilary string.
QudaOffsetCopyMode
Definition: lattice_field.h:46
qudaStream_t * stream
QudaFieldLocation reorder_location()
Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the env...
void reorder_location_set(QudaFieldLocation reorder_location_)
Set whether data is reorderd on the CPU or GPU. This can set at QUDA initialization using the environ...
QudaPrecision Precision_(const char *func, const char *file, int line, const LatticeField &a, const LatticeField &b)
Helper function for determining if the precision of the fields is the same.
QudaFieldLocation Location_(const char *func, const char *file, int line, const LatticeField &a, const LatticeField &b)
Helper function for determining if the location of the fields is the same.
bool Native_(const char *func, const char *file, int line, const LatticeField &a)
Helper function for determining if the field is in native order.
std::ostream & operator<<(std::ostream &output, const CloverFieldParam &param)
QudaGaugeParam param
Definition: pack_test.cpp:18
Main header file for the QUDA library.
cudaStream_t qudaStream_t
Definition: quda_api.h:9
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5.
int X[4]
Definition: quda.h:35
LatticeFieldParam(int nDim, const int *x, int pad, QudaPrecision precision, QudaGhostExchange ghostExchange=QUDA_GHOST_EXCHANGE_PAD)
Constructor for creating a LatticeFieldParam from a set of parameters.
QudaPrecision precision
Definition: lattice_field.h:52
LatticeFieldParam()
Default constructor for LatticeFieldParam.
Definition: lattice_field.h:88
QudaMemoryType mem_type
Definition: lattice_field.h:74
int r[QUDA_MAX_DIM]
Definition: lattice_field.h:80
QudaPrecision GhostPrecision() const
Definition: lattice_field.h:62
QudaGhostExchange ghostExchange
Definition: lattice_field.h:77
LatticeFieldParam(const QudaGaugeParam &param)
Constructor for creating a LatticeFieldParam from a QudaGaugeParam. Used for wrapping around a CPU re...
QudaPrecision ghost_precision
Definition: lattice_field.h:55
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:68
QudaSiteSubset siteSubset
Definition: lattice_field.h:72
QudaPrecision Precision() const
Definition: lattice_field.h:59
static const int aux_n
Definition: tune_key.h:12
static const int volume_n
Definition: tune_key.h:10
#define errorQuda(...)
Definition: util_quda.h:120