QUDA  0.9.0
lattice_field.cpp
Go to the documentation of this file.
1 #include <typeinfo>
2 #include <quda_internal.h>
3 #include <lattice_field.h>
4 #include <color_spinor_field.h>
5 #include <gauge_field.h>
6 #include <clover_field.h>
7 
8 namespace quda {
9 
10  bool LatticeField::initIPCComms = false;
11 
16 
21 
22  cudaEvent_t LatticeField::ipcCopyEvent[2][2][QUDA_MAX_DIM];
24 
25  void *LatticeField::ghost_pinned_buffer_h[2] = {nullptr, nullptr};
26  void *LatticeField::ghost_pinned_buffer_hd[2] = {nullptr, nullptr};
27 
28  // gpu ghost receive buffer
29  void *LatticeField::ghost_recv_buffer_d[2] = {nullptr, nullptr};
30 
31  // gpu ghost send buffer
32  void *LatticeField::ghost_send_buffer_d[2] = {nullptr, nullptr};
33 
35 
37 
40 
42 
44  : nDim(field.Ndim()), pad(field.Pad()), precision(field.Precision()),
45  siteSubset(field.SiteSubset()), mem_type(field.MemType()), ghostExchange(field.GhostExchange())
46  {
47  for(int dir=0; dir<nDim; ++dir) {
48  x[dir] = field.X()[dir];
49  r[dir] = field.R()[dir];
50  }
51  }
52 
54  : volume(1), pad(param.pad), total_bytes(0), nDim(param.nDim), precision(param.precision),
55  siteSubset(param.siteSubset), ghostExchange(param.ghostExchange), ghost_bytes(0),
56  ghost_face_bytes{ }, ghostOffset( ), ghostNormOffset( ),
57  my_face_h{ }, my_face_hd{ }, initComms(false), mem_type(param.mem_type),
58  backup_h(nullptr), backup_norm_h(nullptr), backed_up(false)
59  {
60  for (int i=0; i<nDim; i++) {
61  x[i] = param.x[i];
62  r[i] = ghostExchange == QUDA_GHOST_EXCHANGE_EXTENDED ? param.r[i] : 0;
63  volume *= param.x[i];
64  surface[i] = 1;
65  for (int j=0; j<nDim; j++) {
66  if (i==j) continue;
67  surface[i] *= param.x[j];
68  }
69  }
70 
71  if (siteSubset == QUDA_INVALID_SITE_SUBSET) errorQuda("siteSubset is not set");
72  volumeCB = (siteSubset == QUDA_FULL_SITE_SUBSET) ? volume / 2 : volume;
73  stride = volumeCB + pad;
74 
75  // for parity fields the factor of half is present for all surfaces dimensions except x, so add it manually
76  for (int i=0; i<nDim; i++)
77  surfaceCB[i] = (siteSubset == QUDA_FULL_SITE_SUBSET || i==0) ? surface[i] / 2 : surface[i];
78 
79  // for 5-dimensional fields, we only communicate in the space-time dimensions
80  nDimComms = nDim == 5 ? 4 : nDim;
81 
82  switch (precision) {
86  break;
87  default:
88  errorQuda("Unknown precision %d", precision);
89  }
90 
91  setTuningString();
92  }
93 
95  : volume(1), pad(field.pad), total_bytes(0), nDim(field.nDim), precision(field.precision),
96  siteSubset(field.siteSubset), ghostExchange(field.ghostExchange), ghost_bytes(0),
97  ghost_face_bytes{ }, ghostOffset( ), ghostNormOffset( ),
98  my_face_h{ }, my_face_hd{ }, initComms(false), mem_type(field.mem_type),
99  backup_h(nullptr), backup_norm_h(nullptr), backed_up(false)
100  {
101  for (int i=0; i<nDim; i++) {
102  x[i] = field.x[i];
103  r[i] = ghostExchange == QUDA_GHOST_EXCHANGE_EXTENDED ? field.r[i] : 0;
104  volume *= field.x[i];
105  surface[i] = 1;
106  for (int j=0; j<nDim; j++) {
107  if (i==j) continue;
108  surface[i] *= field.x[j];
109  }
110  }
111 
112  if (siteSubset == QUDA_INVALID_SITE_SUBSET) errorQuda("siteSubset is not set");
113  volumeCB = (siteSubset == QUDA_FULL_SITE_SUBSET) ? volume / 2 : volume;
114  stride = volumeCB + pad;
115 
116  // for parity fields the factor of half is present for all surfaces dimensions except x, so add it manually
117  for (int i=0; i<nDim; i++)
118  surfaceCB[i] = (siteSubset == QUDA_FULL_SITE_SUBSET || i==0) ? surface[i] / 2 : surface[i];
119 
120  // for 5-dimensional fields, we only communicate in the space-time dimensions
121  nDimComms = nDim == 5 ? 4 : nDim;
122 
123  setTuningString();
124  }
125 
127 
128  void LatticeField::allocateGhostBuffer(size_t ghost_bytes) const
129  {
130  // only allocate if not already allocated or buffer required is bigger than previously
132 
133  if (initGhostFaceBuffer) {
134  if (ghost_bytes) {
135  for (int b=0; b<2; b++) {
139  }
140  }
141  }
142 
143  if (ghost_bytes > 0) {
144  for (int b=0; b<2; ++b) {
145  // gpu receive buffer (use pinned allocator to avoid this being redirected, e.g., by QDPJIT)
147 
148  // gpu send buffer (use pinned allocator to avoid this being redirected, e.g., by QDPJIT)
150 
151  // pinned buffer used for sending and receiving
153 
154  // set the matching device-mapper pointer
155  cudaHostGetDevicePointer(&ghost_pinned_buffer_hd[b], ghost_pinned_buffer_h[b], 0);
156  }
157 
158  initGhostFaceBuffer = true;
160  }
161 
162  LatticeField::ghost_field_reset = true; // this signals that we must reset the IPC comms
163  }
164 
165  }
166 
168  {
169  destroyIPCComms();
170 
171  if (!initGhostFaceBuffer) return;
172 
173  for (int b=0; b<2; b++) {
174  // free receive buffer
176  ghost_recv_buffer_d[b] = nullptr;
177 
178  // free send buffer
180  ghost_send_buffer_d[b] = nullptr;
181 
182  // free pinned memory buffers
184  ghost_pinned_buffer_h[b] = nullptr;
185  ghost_pinned_buffer_hd[b] = nullptr;
186  }
187  initGhostFaceBuffer = false;
188  }
189 
190  void LatticeField::createComms(bool no_comms_fill)
191  {
192  destroyComms(); // if we are requesting a new number of faces destroy and start over
193 
194  // initialize the ghost pinned buffers
195  for (int b=0; b<2; b++) {
198  from_face_h[b] = static_cast<char*>(my_face_h[b]) + ghost_bytes;
199  from_face_hd[b] = static_cast<char*>(my_face_hd[b]) + ghost_bytes;
200  }
201 
202  // initialize ghost send pointers
203  size_t offset = 0;
204  for (int i=0; i<nDimComms; i++) {
205  if (!commDimPartitioned(i) && no_comms_fill==false) continue;
206 
207  for (int b=0; b<2; ++b) {
208  my_face_dim_dir_h[b][i][0] = static_cast<char*>(my_face_h[b]) + offset;
209  from_face_dim_dir_h[b][i][0] = static_cast<char*>(from_face_h[b]) + offset;
210 
211  my_face_dim_dir_hd[b][i][0] = static_cast<char*>(my_face_hd[b]) + offset;
212  from_face_dim_dir_hd[b][i][0] = static_cast<char*>(from_face_hd[b]) + offset;
213 
214  my_face_dim_dir_d[b][i][0] = static_cast<char*>(ghost_send_buffer_d[b]) + offset;
215  from_face_dim_dir_d[b][i][0] = static_cast<char*>(ghost_recv_buffer_d[b]) + ghostOffset[i][0]*precision;
216  } // loop over b
217 
219 
220  for (int b=0; b<2; ++b) {
221  my_face_dim_dir_h[b][i][1] = static_cast<char*>(my_face_h[b]) + offset;
222  from_face_dim_dir_h[b][i][1] = static_cast<char*>(from_face_h[b]) + offset;
223 
224  my_face_dim_dir_hd[b][i][1] = static_cast<char*>(my_face_hd[b]) + offset;
225  from_face_dim_dir_hd[b][i][1] = static_cast<char*>(from_face_hd[b]) + offset;
226 
227  my_face_dim_dir_d[b][i][1] = static_cast<char*>(ghost_send_buffer_d[b]) + offset;
228  from_face_dim_dir_d[b][i][1] = static_cast<char*>(ghost_recv_buffer_d[b]) + ghostOffset[i][1]*precision;
229  } // loop over b
231 
232  } // loop over dimension
233 
234  bool gdr = comm_gdr_enabled(); // only allocate rdma buffers if GDR enabled
235 
236  // initialize the message handlers
237  for (int i=0; i<nDimComms; i++) {
238  if (!commDimPartitioned(i)) continue;
239 
240  for (int b=0; b<2; ++b) {
243 
246 
249 
252  } // loop over b
253 
254  } // loop over dimension
255 
256  initComms = true;
257  checkCudaError();
258  }
259 
261  {
262  if (initComms) {
263 
264  for (int b=0; b<2; ++b) {
265  for (int i=0; i<nDimComms; i++) {
266  if (commDimPartitioned(i)) {
267  if (mh_recv_fwd[b][i]) comm_free(mh_recv_fwd[b][i]);
269  if (mh_send_fwd[b][i]) comm_free(mh_send_fwd[b][i]);
271 
276  }
277  }
278  } // loop over b
279 
280  initComms = false;
281  checkCudaError();
282  }
283 
284  }
285 
287  if ( initIPCComms && !ghost_field_reset ) return;
288 
289  if (!initComms) errorQuda("Can only be called after create comms");
290  if ( (!ghost_recv_buffer_d[0] || !ghost_recv_buffer_d[1]) && comm_size() > 1) errorQuda("ghost_field appears not to be allocated");
291 
292  // handles for obtained ghost pointers
293  cudaIpcMemHandle_t ipcRemoteGhostDestHandle[2][2][QUDA_MAX_DIM];
294 
295  for (int b=0; b<2; b++) {
296  for (int dim=0; dim<4; ++dim) {
297  if (comm_dim(dim)==1) continue;
298  for (int dir=0; dir<2; ++dir) {
299  MsgHandle* sendHandle = nullptr;
300  MsgHandle* receiveHandle = nullptr;
301  int disp = (dir == 1) ? +1 : -1;
302 
303  // first set up receive
304  if (comm_peer2peer_enabled(1-dir,dim)) {
305  receiveHandle = comm_declare_receive_relative(&ipcRemoteGhostDestHandle[b][1-dir][dim],
306  dim, -disp,
307  sizeof(ipcRemoteGhostDestHandle[b][1-dir][dim]));
308  }
309  // now send
310  if (comm_peer2peer_enabled(dir,dim)) {
311  cudaIpcMemHandle_t ipcLocalGhostDestHandle;
312  cudaIpcGetMemHandle(&ipcLocalGhostDestHandle, ghost_recv_buffer_d[b]);
313  sendHandle = comm_declare_send_relative(&ipcLocalGhostDestHandle,
314  dim, disp,
315  sizeof(ipcLocalGhostDestHandle));
316  }
317  if (receiveHandle) comm_start(receiveHandle);
318  if (sendHandle) comm_start(sendHandle);
319 
320  if (receiveHandle) comm_wait(receiveHandle);
321  if (sendHandle) comm_wait(sendHandle);
322 
323  if (sendHandle) comm_free(sendHandle);
324  if (receiveHandle) comm_free(receiveHandle);
325  }
326  }
327 
328  checkCudaError();
329 
330  // open the remote memory handles and set the send ghost pointers
331  for (int dim=0; dim<4; ++dim) {
332  if (comm_dim(dim)==1) continue;
333  // even if comm_dim(2) == 2, we not have p2p enabled in both directions, so check this
334  const int num_dir = (comm_dim(dim) == 2 && comm_peer2peer_enabled(0,dim) && comm_peer2peer_enabled(1,dim)) ? 1 : 2;
335  for (int dir=0; dir<num_dir; ++dir) {
336  if (!comm_peer2peer_enabled(dir,dim)) continue;
337  void **ghostDest = &(ghost_remote_send_buffer_d[b][dim][dir]);
338  cudaIpcOpenMemHandle(ghostDest, ipcRemoteGhostDestHandle[b][dir][dim],
339  cudaIpcMemLazyEnablePeerAccess);
340  }
341  if (num_dir == 1) ghost_remote_send_buffer_d[b][dim][1] = ghost_remote_send_buffer_d[b][dim][0];
342  }
343  } // buffer index
344 
345  checkCudaError();
346 
347  // handles for obtained events
348  cudaIpcEventHandle_t ipcRemoteEventHandle[2][2][QUDA_MAX_DIM];
349 
350  // Note that no b index is necessary here
351  // Now communicate the event handles
352  for (int dim=0; dim<4; ++dim) {
353  if (comm_dim(dim)==1) continue;
354  for (int dir=0; dir<2; ++dir) {
355  for (int b=0; b<2; b++) {
356 
357  MsgHandle* sendHandle = NULL;
358  MsgHandle* receiveHandle = NULL;
359  int disp = (dir == 1) ? +1 : -1;
360 
361  // first set up receive
362  if (comm_peer2peer_enabled(1-dir,dim)) {
363  receiveHandle = comm_declare_receive_relative(&ipcRemoteEventHandle[b][1-dir][dim], dim, -disp,
364  sizeof(ipcRemoteEventHandle[b][1-dir][dim]));
365  }
366 
367  // now send
368  if (comm_peer2peer_enabled(dir,dim)) {
369  cudaEventCreate(&ipcCopyEvent[b][dir][dim], cudaEventDisableTiming | cudaEventInterprocess);
370  cudaIpcEventHandle_t ipcLocalEventHandle;
371  cudaIpcGetEventHandle(&ipcLocalEventHandle, ipcCopyEvent[b][dir][dim]);
372 
373  sendHandle = comm_declare_send_relative(&ipcLocalEventHandle, dim, disp,
374  sizeof(ipcLocalEventHandle));
375  }
376 
377  if (receiveHandle) comm_start(receiveHandle);
378  if (sendHandle) comm_start(sendHandle);
379 
380  if (receiveHandle) comm_wait(receiveHandle);
381  if (sendHandle) comm_wait(sendHandle);
382 
383  if (sendHandle) comm_free(sendHandle);
384  if (receiveHandle) comm_free(receiveHandle);
385 
386  } // buffer index
387  }
388  }
389 
390  checkCudaError();
391 
392  for (int dim=0; dim<4; ++dim) {
393  if (comm_dim(dim)==1) continue;
394  for (int dir=0; dir<2; ++dir) {
395  if (!comm_peer2peer_enabled(dir,dim)) continue;
396  for (int b=0; b<2; b++) {
397  cudaIpcOpenEventHandle(&(ipcRemoteCopyEvent[b][dir][dim]), ipcRemoteEventHandle[b][dir][dim]);
398  }
399  }
400  }
401 
402  // Create message handles for IPC synchronization
403  for (int dim=0; dim<4; ++dim) {
404  if (comm_dim(dim)==1) continue;
405  if (comm_peer2peer_enabled(1,dim)) {
406  for (int b=0; b<2; b++) {
407  // send to processor in forward direction
409  // receive from processor in forward direction
411  }
412  }
413 
414  if (comm_peer2peer_enabled(0,dim)) {
415  for (int b=0; b<2; b++) {
416  // send to processor in backward direction
418  // receive from processor in backward direction
420  }
421  }
422  }
423  checkCudaError();
424 
425  initIPCComms = true;
426  ghost_field_reset = false;
427  }
428 
430 
431  if (!initIPCComms) return;
432  checkCudaError();
433 
434  for (int dim=0; dim<4; ++dim) {
435 
436  if (comm_dim(dim)==1) continue;
437  const int num_dir = (comm_dim(dim) == 2 && comm_peer2peer_enabled(0,dim) && comm_peer2peer_enabled(1,dim)) ? 1 : 2;
438 
439  for (int b=0; b<2; b++) {
440  if (comm_peer2peer_enabled(1,dim)) {
443  cudaEventDestroy(ipcCopyEvent[b][1][dim]);
444 
445  // only close this handle if it doesn't alias the back ghost
446  if (num_dir == 2) cudaIpcCloseMemHandle(ghost_remote_send_buffer_d[b][dim][1]);
447  }
448 
449  if (comm_peer2peer_enabled(0,dim)) {
452  cudaEventDestroy(ipcCopyEvent[b][0][dim]);
453 
454  cudaIpcCloseMemHandle(ghost_remote_send_buffer_d[b][dim][0]);
455  }
456  } // buffer
457  } // iterate over dim
458 
459  checkCudaError();
460  initIPCComms = false;
461  }
462 
464  {
465  return (cudaSuccess == cudaEventQuery(ipcCopyEvent[bufferIndex][dir][dim]) ? true : false);
466  }
467 
469  {
470  return (cudaSuccess == cudaEventQuery(ipcRemoteCopyEvent[bufferIndex][dir][dim]) ? true : false);
471  }
472 
473  const cudaEvent_t& LatticeField::getIPCCopyEvent(int dir, int dim) const {
474  return ipcCopyEvent[bufferIndex][dir][dim];
475  }
476 
477  const cudaEvent_t& LatticeField::getIPCRemoteCopyEvent(int dir, int dim) const {
478  return ipcRemoteCopyEvent[bufferIndex][dir][dim];
479  }
480 
482  char vol_tmp[TuneKey::volume_n];
483  int check;
484  check = snprintf(vol_string, TuneKey::volume_n, "%d", x[0]);
485  if (check < 0 || check >= TuneKey::volume_n) errorQuda("Error writing volume string");
486  for (int d=1; d<nDim; d++) {
487  strcpy(vol_tmp, vol_string);
488  check = snprintf(vol_string, TuneKey::volume_n, "%sx%d", vol_tmp, x[d]);
489  if (check < 0 || check >= TuneKey::volume_n) errorQuda("Error writing volume string");
490  }
491  }
492 
494  if (a.nDim != nDim) errorQuda("nDim does not match %d %d", nDim, a.nDim);
496  // if source is extended by I am not then we need to compare their interior volume to my volume
497  int a_volume_interior = 1;
498  for (int i=0; i<nDim; i++) {
499  if (a.x[i]-2*a.r[i] != x[i]) errorQuda("x[%d] does not match %d %d", i, x[i], a.x[i]-2*a.r[i]);
500  a_volume_interior *= a.x[i] - 2*a.r[i];
501  }
502  if (a_volume_interior != volume) errorQuda("Interior volume does not match %d %d", volume, a_volume_interior);
503  } else if (a.ghostExchange != QUDA_GHOST_EXCHANGE_EXTENDED && ghostExchange == QUDA_GHOST_EXCHANGE_EXTENDED) {
504  // if source is extended by I am not then we need to compare their interior volume to my volume
505  int this_volume_interior = 1;
506  for (int i=0; i<nDim; i++) {
507  if (x[i]-2*r[i] != a.x[i]) errorQuda("x[%d] does not match %d %d", i, x[i]-2*r[i], a.x[i]);
508  this_volume_interior *= x[i] - 2*r[i];
509  }
510  if (this_volume_interior != a.volume) errorQuda("Interior volume does not match %d %d", this_volume_interior, a.volume);
511  } else {
512  if (a.volume != volume) errorQuda("Volume does not match %d %d", volume, a.volume);
513  if (a.volumeCB != volumeCB) errorQuda("VolumeCB does not match %d %d", volumeCB, a.volumeCB);
514  for (int i=0; i<nDim; i++) {
515  if (a.x[i] != x[i]) errorQuda("x[%d] does not match %d %d", i, x[i], a.x[i]);
516  if (a.surface[i] != surface[i]) errorQuda("surface[%d] does not match %d %d", i, surface[i], a.surface[i]);
517  if (a.surfaceCB[i] != surfaceCB[i]) errorQuda("surfaceCB[%d] does not match %d %d", i, surfaceCB[i], a.surfaceCB[i]);
518  }
519  }
520  }
521 
524  if (typeid(*this)==typeid(cudaCloverField) ||
525  typeid(*this)==typeid(cudaColorSpinorField) ||
526  typeid(*this)==typeid(cudaGaugeField)) {
527  location = QUDA_CUDA_FIELD_LOCATION;
528  } else if (typeid(*this)==typeid(cpuCloverField) ||
529  typeid(*this)==typeid(cpuColorSpinorField) ||
530  typeid(*this)==typeid(cpuGaugeField)) {
531  location = QUDA_CPU_FIELD_LOCATION;
532  } else {
533  errorQuda("Unknown field %s, so cannot determine location", typeid(*this).name());
534  }
535  return location;
536  }
537 
538  void LatticeField::read(char *filename) {
539  errorQuda("Not implemented");
540  }
541 
542  void LatticeField::write(char *filename) {
543  errorQuda("Not implemented");
544  }
545 
546  int LatticeField::Nvec() const {
547  if (typeid(*this) == typeid(const cudaColorSpinorField)) {
548  const ColorSpinorField &csField = static_cast<const ColorSpinorField&>(*this);
549  if (csField.FieldOrder() == 2 || csField.FieldOrder() == 4)
550  return static_cast<int>(csField.FieldOrder());
551  } else if (typeid(*this) == typeid(const cudaGaugeField)) {
552  const GaugeField &gField = static_cast<const GaugeField&>(*this);
553  if (gField.Order() == 2 || gField.Order() == 4)
554  return static_cast<int>(gField.Order());
555  } else if (typeid(*this) == typeid(const cudaCloverField)) {
556  const CloverField &cField = static_cast<const CloverField&>(*this);
557  if (cField.Order() == 2 || cField.Order() == 4)
558  return static_cast<int>(cField.Order());
559  }
560 
561  errorQuda("Unsupported field type");
562  return -1;
563  }
564 
565  // This doesn't really live here, but is fine for the moment
566  std::ostream& operator<<(std::ostream& output, const LatticeFieldParam& param)
567  {
568  output << "nDim = " << param.nDim << std::endl;
569  for (int i=0; i<param.nDim; i++) {
570  output << "x[" << i << "] = " << param.x[i] << std::endl;
571  }
572  output << "pad = " << param.pad << std::endl;
573  output << "precision = " << param.precision << std::endl;
574 
575  output << "ghostExchange = " << param.ghostExchange << std::endl;
576  for (int i=0; i<param.nDim; i++) {
577  output << "r[" << i << "] = " << param.r[i] << std::endl;
578  }
579 
580  return output; // for multiple << operators.
581  }
582 
584 
586  void reorder_location_set(QudaFieldLocation _reorder_location) { reorder_location_ = _reorder_location; }
587 
588 } // namespace quda
static int buffer_recv_p2p_back[2][QUDA_MAX_DIM]
bool ipcRemoteCopyComplete(int dir, int dim)
QudaFieldLocation reorder_location()
Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the env...
virtual void read(char *filename)
void allocateGhostBuffer(size_t ghost_bytes) const
Allocate the static ghost buffers.
int commDimPartitioned(int dir)
int snprintf(char *__str, size_t __size, const char *__format,...) __attribute__((__format__(__printf__
void * my_face_dim_dir_d[2][QUDA_MAX_DIM][2]
void createComms(bool no_comms_fill=false)
static void * ghost_pinned_buffer_hd[2]
#define errorQuda(...)
Definition: util_quda.h:90
#define host_free(ptr)
Definition: malloc_quda.h:59
int comm_dim(int dim)
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
char * strcpy(char *__dst, const char *__src)
int x[QUDA_MAX_DIM]
static MsgHandle * mh_send_p2p_back[2][QUDA_MAX_DIM]
void * from_face_dim_dir_hd[2][QUDA_MAX_DIM][2]
QudaCloverFieldOrder Order() const
Definition: clover_field.h:92
virtual void setTuningString()
MsgHandle * mh_send_rdma_fwd[2][QUDA_MAX_DIM]
bool ipcCopyComplete(int dir, int dim)
static bool initGhostFaceBuffer
std::ostream & operator<<(std::ostream &output, const CloverFieldParam &param)
size_t size_t offset
static int buffer_recv_p2p_fwd[2][QUDA_MAX_DIM]
QudaGaugeParam param
Definition: pack_test.cpp:17
#define b
void comm_free(MsgHandle *mh)
Definition: comm_mpi.cpp:252
void * my_face_dim_dir_hd[2][QUDA_MAX_DIM][2]
static void * ghost_pinned_buffer_h[2]
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:50
const int * R() const
MsgHandle * mh_send_rdma_back[2][QUDA_MAX_DIM]
static MsgHandle * mh_recv_p2p_fwd[2][QUDA_MAX_DIM]
const cudaEvent_t & getIPCCopyEvent(int dir, int dim) const
int comm_size(void)
Definition: comm_mpi.cpp:126
static bool ghost_field_reset
static int bufferIndex
void * from_face_hd[2]
#define comm_declare_send_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:59
void checkField(const LatticeField &a) const
#define comm_declare_receive_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:74
MsgHandle * mh_recv_back[2][QUDA_MAX_DIM]
#define device_pinned_malloc(size)
Definition: malloc_quda.h:53
void comm_start(MsgHandle *mh)
Definition: comm_mpi.cpp:260
MsgHandle * mh_recv_rdma_fwd[2][QUDA_MAX_DIM]
QudaGhostExchange ghostExchange
static void * ghost_remote_send_buffer_d[2][QUDA_MAX_DIM][2]
const cudaEvent_t & getIPCRemoteCopyEvent(int dir, int dim) const
char vol_string[TuneKey::volume_n]
virtual void write(char *filename)
void * from_face_dim_dir_d[2][QUDA_MAX_DIM][2]
size_t ghost_face_bytes[QUDA_MAX_DIM]
static void destroyIPCComms()
MsgHandle * mh_send_fwd[2][QUDA_MAX_DIM]
static int buffer_send_p2p_fwd[2][QUDA_MAX_DIM]
bool comm_peer2peer_enabled(int dir, int dim)
QudaFieldLocation Location() const
static QudaFieldLocation reorder_location_
static int buffer_send_p2p_back[2][QUDA_MAX_DIM]
static size_t ghostFaceBytes
static void * ghost_send_buffer_d[2]
int surface[QUDA_MAX_DIM]
enum QudaFieldLocation_s QudaFieldLocation
int ghostOffset[QUDA_MAX_DIM][2]
void * from_face_dim_dir_h[2][QUDA_MAX_DIM][2]
int r[QUDA_MAX_DIM]
static MsgHandle * mh_send_p2p_fwd[2][QUDA_MAX_DIM]
static void * ghost_recv_buffer_d[2]
MsgHandle * mh_recv_rdma_back[2][QUDA_MAX_DIM]
static cudaEvent_t ipcCopyEvent[2][2][QUDA_MAX_DIM]
LatticeFieldParam()
Default constructor for LatticeFieldParam.
Definition: lattice_field.h:68
bool comm_gdr_enabled()
Query if GPU Direct RDMA communication is enabled (global setting)
static long total_bytes[N_ALLOC_TYPE]
Definition: malloc.cpp:52
int surfaceCB[QUDA_MAX_DIM]
static cudaEvent_t ipcRemoteCopyEvent[2][2][QUDA_MAX_DIM]
QudaGaugeFieldOrder Order() const
Definition: gauge_field.h:204
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
#define checkCudaError()
Definition: util_quda.h:129
MsgHandle * mh_recv_fwd[2][QUDA_MAX_DIM]
#define mapped_malloc(size)
Definition: malloc_quda.h:56
void comm_wait(MsgHandle *mh)
Definition: comm_mpi.cpp:266
static MsgHandle * mh_recv_p2p_back[2][QUDA_MAX_DIM]
#define device_pinned_free(ptr)
Definition: malloc_quda.h:58
static const int volume_n
Definition: tune_key.h:10
static __inline__ size_t size_t d
void * my_face_dim_dir_h[2][QUDA_MAX_DIM][2]
int r[QUDA_MAX_DIM]
Definition: lattice_field.h:63
void reorder_location_set(QudaFieldLocation reorder_location_)
Set whether data is reorderd on the CPU or GPU. This can set at QUDA initialization using the environ...
static void freeGhostBuffer(void)
Free statically allocated ghost buffers.
QudaPrecision precision
LatticeField(const LatticeFieldParam &param)
MsgHandle * mh_send_back[2][QUDA_MAX_DIM]
#define a
void initComms(int argc, char **argv, const int *commDims)
Definition: test_util.cpp:72
QudaFieldOrder FieldOrder() const
static bool initIPCComms
const int * X() const