QUDA  v0.5.0
A library for QCD on GPUs
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
face_quda.h
Go to the documentation of this file.
1 #ifndef _FACE_QUDA_H
2 #define _FACE_QUDA_H
3 
4 #include <map>
5 #include <quda_internal.h>
6 #include <color_spinor_field.h>
7 #include <comm_quda.h>
8 
9 namespace quda {
10  class FaceBuffer {
11 
12  private:
13 
14  // We cache pinned memory allocations so that Dirac objects can be created and
15  // destroyed at will with minimal overhead.
16  static std::multimap<size_t, void *> pinnedCache;
17 
18  // For convenience, we keep track of the sizes of active allocations (i.e., those not in the cache).
19  static std::map<void *, size_t> pinnedSize;
20 
21  // set these both = 0 `for no overlap of qmp and cudamemcpyasync
22  // sendBackIdx = 0, and sendFwdIdx = 1 for overlap
23  int sendBackStrmIdx; // = 0;
24  int sendFwdStrmIdx; // = 1;
25  int recFwdStrmIdx; // = sendBackIdx;
26  int recBackStrmIdx; // = sendFwdIdx;
27 
28  // CUDA pinned memory
29  void *my_fwd_face[QUDA_MAX_DIM];
30  void *my_back_face[QUDA_MAX_DIM];
31  void *from_back_face[QUDA_MAX_DIM];
32  void *from_fwd_face[QUDA_MAX_DIM];
33 
34  // IB pinned memory
35  void* ib_my_fwd_face[QUDA_MAX_DIM];
36  void* ib_my_back_face[QUDA_MAX_DIM];
37  void* ib_from_back_face[QUDA_MAX_DIM];
38  void* ib_from_fwd_face[QUDA_MAX_DIM];
39 
40  // Message handles
41  MsgHandle* mh_recv_fwd[QUDA_MAX_DIM];
42  MsgHandle* mh_recv_back[QUDA_MAX_DIM];
43  MsgHandle* mh_send_fwd[QUDA_MAX_DIM];
44  MsgHandle* mh_send_back[QUDA_MAX_DIM];
45 
46  int Ninternal; // number of internal degrees of freedom (12 for spin projected Wilson, 6 for staggered)
47  QudaPrecision precision;
48 
49  int Volume;
50  int VolumeCB;
51  int faceVolume[QUDA_MAX_DIM];
52  int faceVolumeCB[QUDA_MAX_DIM];
53  int X[QUDA_MAX_DIM];
54  int nDim; // the actual number of space-time communications
55  int nDimComms; // the number of dimensions in which we communicate
56  int nFace;
57  size_t nbytes[QUDA_MAX_DIM];
58 
59  void setupDims(const int *X, int Ls);
60 
61  void *allocatePinned(size_t nbytes);
62  void freePinned(void *ptr);
63 
64  public:
65  FaceBuffer(const int *X, const int nDim, const int Ninternal,
66  const int nFace, const QudaPrecision precision, const int Ls = 1);
67  FaceBuffer(const FaceBuffer &);
68  virtual ~FaceBuffer();
69 
70  void pack(quda::cudaColorSpinorField &in, int parity, int dagger, cudaStream_t *stream);
71  void gather(quda::cudaColorSpinorField &in, int dagger, int dir);
72  void commsStart(int dir);
73  int commsQuery(int dir);
74  void scatter(quda::cudaColorSpinorField &out, int dagger, int dir);
75 
76  void exchangeCpuSpinor(quda::cpuColorSpinorField &in, int parity, int dagger);
77 
78  void exchangeCpuLink(void** ghost_link, void** link_sendbuf);
79 
80  static void flushPinnedCache();
81  };
82 }
83 
84 void reduceMaxDouble(double &);
85 void reduceDouble(double &);
86 void reduceDoubleArray(double *, const int len);
87 int commDim(int);
88 int commCoords(int);
89 int commDimPartitioned(int dir);
90 void commDimPartitionedSet(int dir);
91 
92 #ifdef __cplusplus
93  extern "C" {
94 #endif
95 
96  // implemented in face_gauge.cpp
97 
98  void exchange_cpu_sitelink(int* X,void** sitelink, void** ghost_sitelink,
99  void** ghost_sitelink_diag,
100  QudaPrecision gPrecision, QudaGaugeParam* param, int optflag);
101  void exchange_cpu_sitelink_ex(int* X, int *R, void** sitelink, QudaGaugeFieldOrder cpu_order,
102  QudaPrecision gPrecision, int optflag);
103  void exchange_gpu_staple_start(int* X, void* _cudaStaple, int dir, int whichway, cudaStream_t * stream);
104  void exchange_gpu_staple_comms(int* X, void* _cudaStaple, int dir, int whichway, cudaStream_t * stream);
105  void exchange_gpu_staple_wait(int* X, void* _cudaStaple, int dir, int whichway, cudaStream_t * stream);
106  void exchange_gpu_staple(int* X, void* _cudaStaple, cudaStream_t * stream);
107  void exchange_gpu_staple(int* X, void* _cudaStaple, cudaStream_t * stream);
108  void exchange_cpu_staple(int* X, void* staple, void** ghost_staple,
109  QudaPrecision gPrecision);
111  void exchange_llfat_cleanup(void);
112 
113  extern bool globalReduce;
114 
115 #ifdef __cplusplus
116  }
117 #endif
118 
119 #endif // _FACE_QUDA_H