QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
face_quda.h
Go to the documentation of this file.
1 #ifndef _FACE_QUDA_H
2 #define _FACE_QUDA_H
3 
4 #include <map>
5 #include <quda_internal.h>
6 #include <color_spinor_field.h>
7 #include <comm_quda.h>
8 
9 namespace quda {
10  class FaceBuffer {
11 
12  private:
13 
14  // We cache pinned memory allocations so that Dirac objects can be created and
15  // destroyed at will with minimal overhead.
16  static std::multimap<size_t, void *> pinnedCache;
17 
18  // For convenience, we keep track of the sizes of active allocations (i.e., those not in the cache).
19  static std::map<void *, size_t> pinnedSize;
20 
21  // set these both = 0 `for no overlap of qmp and cudamemcpyasync
22  // sendBackIdx = 0, and sendFwdIdx = 1 for overlap
23  int sendBackStrmIdx; // = 0;
24  int sendFwdStrmIdx; // = 1;
25  int recFwdStrmIdx; // = sendBackIdx;
26  int recBackStrmIdx; // = sendFwdIdx;
27 
28  // CUDA pinned memory
29  void *my_face;
30  void *my_fwd_face[QUDA_MAX_DIM];
31  void *my_back_face[QUDA_MAX_DIM];
32  void *from_face;
33  void *from_back_face[QUDA_MAX_DIM];
34  void *from_fwd_face[QUDA_MAX_DIM];
35 
36  // IB pinned memory
37  void* ib_my_fwd_face[QUDA_MAX_DIM];
38  void* ib_my_back_face[QUDA_MAX_DIM];
39  void* ib_from_back_face[QUDA_MAX_DIM];
40  void* ib_from_fwd_face[QUDA_MAX_DIM];
41 
42  // Message handles
43  MsgHandle* mh_recv_fwd[QUDA_MAX_DIM];
44  MsgHandle* mh_recv_back[QUDA_MAX_DIM];
45  MsgHandle* mh_send_fwd[QUDA_MAX_DIM];
46  MsgHandle* mh_send_back[QUDA_MAX_DIM];
47 
48  int Ninternal; // number of internal degrees of freedom (12 for spin projected Wilson, 6 for staggered)
49  QudaPrecision precision;
50 
51  int Volume;
52  int VolumeCB;
53  int faceVolume[QUDA_MAX_DIM];
54  int faceVolumeCB[QUDA_MAX_DIM];
55  int X[QUDA_MAX_DIM];
56  int nDim; // the actual number of space-time communications
57  int nDimComms; // the number of dimensions in which we communicate
58  int nFace;
59  size_t nbytes[QUDA_MAX_DIM];
60 
61  void setupDims(const int *X, int Ls);
62 
63  void *allocatePinned(size_t nbytes);
64  void freePinned(void *ptr);
65 
66  public:
67  FaceBuffer(const int *X, const int nDim, const int Ninternal,
68  const int nFace, const QudaPrecision precision, const int Ls = 1);
69  FaceBuffer(const FaceBuffer &);
70  virtual ~FaceBuffer();
71 
85  int dim, int dir, int parity, int dagger, cudaStream_t *stream,
86  bool zeroCopyPack=false, double a=0);
87 
88  void pack(quda::cudaColorSpinorField &in, quda::FullClover &clov, quda::FullClover &clovInv, int dir, int parity, int dagger,
89  cudaStream_t *stream, bool zeroCopyPack=false, double a=0);
90 
91  void pack(quda::cudaColorSpinorField &in, quda::FullClover &clov, quda::FullClover &clovInv, int parity, int dagger,
92  cudaStream_t *stream, bool zeroCopyPack=false, double a=0);
93 
94  void pack(quda::cudaColorSpinorField &in, int dim, int dir, int parity, int dagger,
95  cudaStream_t *stream, bool zeroCopyPack=false, double a=0, double b=0);
96 
97  void pack(quda::cudaColorSpinorField &in, int dir, int parity, int dagger,
98  cudaStream_t *stream, bool zeroCopyPack=false, double a=0, double b=0);
99 
100  void pack(quda::cudaColorSpinorField &in, int parity, int dagger,
101  cudaStream_t *stream, bool zeroCopyPack=false, double a=0, double b=0);
102 
103 
104  void gather(quda::cudaColorSpinorField &in, int dagger, int dir, int streamIdx);
105 
106  void gather(quda::cudaColorSpinorField &in, int dagger, int dir);
107 
108 
109 
110  void sendStart(int dir);
111  void recvStart(int dir);
112  void commsStart(int dir);
113  int commsQuery(int dir);
114  void scatter(quda::cudaColorSpinorField &out, int dagger, int dir);
115 
116  void scatter(quda::cudaColorSpinorField &out, int dagger, int dir, int streamIdx);
117 
118 
119  void exchangeCpuSpinor(quda::cpuColorSpinorField &in, int parity, int dagger);
120 
121  void exchangeLink(void** ghost_link, void** link_sendbuf, QudaFieldLocation location);
122 
123  static void flushPinnedCache();
124  };
125 }
126 
127 void reduceMaxDouble(double &);
128 void reduceDouble(double &);
129 void reduceDoubleArray(double *, const int len);
130 int commDim(int);
131 int commCoords(int);
132 int commDimPartitioned(int dir);
133 void commDimPartitionedSet(int dir);
134 
135 #ifdef __cplusplus
136  extern "C" {
137 #endif
138 
139  // implemented in face_gauge.cpp
140 
141  void exchange_cpu_sitelink(int* X,void** sitelink, void** ghost_sitelink,
142  void** ghost_sitelink_diag,
143  QudaPrecision gPrecision, QudaGaugeParam* param, int optflag);
144  void exchange_cpu_sitelink_ex(int* X, int *R, void** sitelink, QudaGaugeFieldOrder cpu_order,
145  QudaPrecision gPrecision, int optflag, int geometry);
146  void exchange_gpu_staple_start(int* X, void* _cudaStaple, int dir, int whichway, cudaStream_t * stream);
147  void exchange_gpu_staple_comms(int* X, void* _cudaStaple, int dir, int whichway, cudaStream_t * stream);
148  void exchange_gpu_staple_wait(int* X, void* _cudaStaple, int dir, int whichway, cudaStream_t * stream);
149  void exchange_gpu_staple(int* X, void* _cudaStaple, cudaStream_t * stream);
150  void exchange_gpu_staple(int* X, void* _cudaStaple, cudaStream_t * stream);
151  void exchange_cpu_staple(int* X, void* staple, void** ghost_staple,
152  QudaPrecision gPrecision);
154  void exchange_llfat_cleanup(void);
155 
156  extern bool globalReduce;
157 
158 #ifdef __cplusplus
159  }
160 #endif
161 
162 #endif // _FACE_QUDA_H
int commDim(int)
void reduceMaxDouble(double &)
void exchange_llfat_cleanup(void)
void pack(quda::cudaColorSpinorField &in, quda::FullClover &clov, quda::FullClover &clovInv, int dim, int dir, int parity, int dagger, cudaStream_t *stream, bool zeroCopyPack=false, double a=0)
enum QudaPrecision_s QudaPrecision
int commDimPartitioned(int dir)
void exchange_cpu_staple(int *X, void *staple, void **ghost_staple, QudaPrecision gPrecision)
cudaStream_t * stream
int commsQuery(int dir)
void sendStart(int dir)
void commDimPartitionedSet(int dir)
QudaDagType dagger
Definition: test_util.cpp:1558
void exchange_gpu_staple(int *X, void *_cudaStaple, cudaStream_t *stream)
int Ls
Definition: test_util.cpp:40
QudaGaugeParam param
Definition: pack_test.cpp:17
void exchange_llfat_init(QudaPrecision prec)
void exchangeLink(void **ghost_link, void **link_sendbuf, QudaFieldLocation location)
void exchange_cpu_sitelink_ex(int *X, int *R, void **sitelink, QudaGaugeFieldOrder cpu_order, QudaPrecision gPrecision, int optflag, int geometry)
void exchange_gpu_staple_start(int *X, void *_cudaStaple, int dir, int whichway, cudaStream_t *stream)
void gather(quda::cudaColorSpinorField &in, int dagger, int dir, int streamIdx)
const QudaFieldLocation location
Definition: pack_test.cpp:46
void scatter(quda::cudaColorSpinorField &out, int dagger, int dir)
static void flushPinnedCache()
cpuColorSpinorField * in
void reduceDoubleArray(double *, const int len)
void exchangeCpuSpinor(quda::cpuColorSpinorField &in, int parity, int dagger)
FaceBuffer(const int *X, const int nDim, const int Ninternal, const int nFace, const QudaPrecision precision, const int Ls=1)
Definition: face_buffer.cpp:14
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
int commCoords(int)
void exchange_gpu_staple_wait(int *X, void *_cudaStaple, int dir, int whichway, cudaStream_t *stream)
enum QudaFieldLocation_s QudaFieldLocation
cpuColorSpinorField * out
void exchange_gpu_staple_comms(int *X, void *_cudaStaple, int dir, int whichway, cudaStream_t *stream)
void reduceDouble(double &)
virtual ~FaceBuffer()
Definition: face_buffer.cpp:86
void recvStart(int dir)
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
void commsStart(int dir)
QudaPrecision prec
Definition: test_util.cpp:1551
void exchange_cpu_sitelink(int *X, void **sitelink, void **ghost_sitelink, void **ghost_sitelink_diag, QudaPrecision gPrecision, QudaGaugeParam *param, int optflag)
bool globalReduce
Definition: face_buffer.cpp:11
const QudaParity parity
Definition: dslash_test.cpp:29