44 : nDim(field.Ndim()), pad(field.Pad()), precision(field.Precision()),
45 siteSubset(field.SiteSubset()), mem_type(field.MemType()), ghostExchange(field.GhostExchange())
47 for(
int dir=0; dir<
nDim; ++dir) {
48 x[dir] = field.
X()[dir];
49 r[dir] = field.
R()[dir];
55 siteSubset(
param.siteSubset), ghostExchange(
param.ghostExchange), ghost_bytes(0),
56 ghost_face_bytes{ }, ghostOffset( ), ghostNormOffset( ),
57 my_face_h{ }, my_face_hd{ },
initComms(
false), mem_type(
param.mem_type),
58 backup_h(
nullptr), backup_norm_h(
nullptr), backed_up(
false)
60 for (
int i=0;
i<nDim;
i++) {
65 for (
int j=0; j<nDim; j++) {
73 stride = volumeCB + pad;
76 for (
int i=0;
i<nDim;
i++)
80 nDimComms = nDim == 5 ? 4 : nDim;
88 errorQuda(
"Unknown precision %d", precision);
95 : volume(1), pad(field.pad),
total_bytes(0), nDim(field.nDim), precision(field.precision),
96 siteSubset(field.siteSubset), ghostExchange(field.ghostExchange), ghost_bytes(0),
97 ghost_face_bytes{ }, ghostOffset( ), ghostNormOffset( ),
98 my_face_h{ }, my_face_hd{ },
initComms(
false), mem_type(field.mem_type),
99 backup_h(
nullptr), backup_norm_h(
nullptr), backed_up(
false)
101 for (
int i=0;
i<nDim;
i++) {
104 volume *= field.x[
i];
106 for (
int j=0; j<nDim; j++) {
108 surface[
i] *= field.x[j];
114 stride = volumeCB + pad;
117 for (
int i=0;
i<nDim;
i++)
121 nDimComms = nDim == 5 ? 4 : nDim;
135 for (
int b=0;
b<2;
b++) {
144 for (
int b=0;
b<2; ++
b) {
173 for (
int b=0;
b<2;
b++) {
195 for (
int b=0;
b<2;
b++) {
207 for (
int b=0;
b<2; ++
b) {
220 for (
int b=0;
b<2; ++
b) {
240 for (
int b=0;
b<2; ++
b) {
264 for (
int b=0;
b<2; ++
b) {
295 for (
int b=0;
b<2;
b++) {
298 for (
int dir=0; dir<2; ++dir) {
301 int disp = (dir == 1) ? +1 : -1;
307 sizeof(ipcRemoteGhostDestHandle[
b][1-dir][
dim]));
315 sizeof(ipcLocalGhostDestHandle));
320 if (receiveHandle)
comm_wait(receiveHandle);
324 if (receiveHandle)
comm_free(receiveHandle);
335 for (
int dir=0; dir<num_dir; ++dir) {
338 cudaIpcOpenMemHandle(ghostDest, ipcRemoteGhostDestHandle[
b][dir][
dim],
339 cudaIpcMemLazyEnablePeerAccess);
354 for (
int dir=0; dir<2; ++dir) {
355 for (
int b=0;
b<2;
b++) {
359 int disp = (dir == 1) ? +1 : -1;
364 sizeof(ipcRemoteEventHandle[
b][1-dir][
dim]));
369 cudaEventCreate(&
ipcCopyEvent[
b][dir][
dim], cudaEventDisableTiming | cudaEventInterprocess);
374 sizeof(ipcLocalEventHandle));
380 if (receiveHandle)
comm_wait(receiveHandle);
384 if (receiveHandle)
comm_free(receiveHandle);
394 for (
int dir=0; dir<2; ++dir) {
396 for (
int b=0;
b<2;
b++) {
406 for (
int b=0;
b<2;
b++) {
415 for (
int b=0;
b<2;
b++) {
439 for (
int b=0;
b<2;
b++) {
497 int a_volume_interior = 1;
500 a_volume_interior *=
a.x[
i] - 2*
a.r[
i];
502 if (a_volume_interior !=
volume)
errorQuda(
"Interior volume does not match %d %d",
volume, a_volume_interior);
505 int this_volume_interior = 1;
508 this_volume_interior *=
x[
i] - 2*
r[
i];
510 if (this_volume_interior !=
a.volume)
errorQuda(
"Interior volume does not match %d %d", this_volume_interior,
a.volume);
533 errorQuda(
"Unknown field %s, so cannot determine location",
typeid(*this).name());
550 return static_cast<int>(csField.
FieldOrder());
553 if (gField.
Order() == 2 || gField.
Order() == 4)
554 return static_cast<int>(gField.
Order());
557 if (cField.
Order() == 2 || cField.
Order() == 4)
558 return static_cast<int>(cField.
Order());
568 output <<
"nDim = " <<
param.nDim << std::endl;
570 output <<
"x[" <<
i <<
"] = " <<
param.x[
i] << std::endl;
572 output <<
"pad = " <<
param.pad << std::endl;
573 output <<
"precision = " <<
param.precision << std::endl;
575 output <<
"ghostExchange = " <<
param.ghostExchange << std::endl;
577 output <<
"r[" <<
i <<
"] = " <<
param.r[
i] << std::endl;
static int buffer_recv_p2p_back[2][QUDA_MAX_DIM]
bool ipcRemoteCopyComplete(int dir, int dim)
QudaFieldLocation reorder_location()
Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the env...
virtual void read(char *filename)
void allocateGhostBuffer(size_t ghost_bytes) const
Allocate the static ghost buffers.
int commDimPartitioned(int dir)
int snprintf(char *__str, size_t __size, const char *__format,...) __attribute__((__format__(__printf__
void * my_face_dim_dir_d[2][QUDA_MAX_DIM][2]
void createComms(bool no_comms_fill=false)
static void * ghost_pinned_buffer_hd[2]
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
char * strcpy(char *__dst, const char *__src)
static MsgHandle * mh_send_p2p_back[2][QUDA_MAX_DIM]
void * from_face_dim_dir_hd[2][QUDA_MAX_DIM][2]
QudaCloverFieldOrder Order() const
virtual void setTuningString()
MsgHandle * mh_send_rdma_fwd[2][QUDA_MAX_DIM]
bool ipcCopyComplete(int dir, int dim)
static bool initGhostFaceBuffer
std::ostream & operator<<(std::ostream &output, const CloverFieldParam ¶m)
static int buffer_recv_p2p_fwd[2][QUDA_MAX_DIM]
void comm_free(MsgHandle *mh)
void * my_face_dim_dir_hd[2][QUDA_MAX_DIM][2]
static void * ghost_pinned_buffer_h[2]
MsgHandle * mh_send_rdma_back[2][QUDA_MAX_DIM]
static MsgHandle * mh_recv_p2p_fwd[2][QUDA_MAX_DIM]
const cudaEvent_t & getIPCCopyEvent(int dir, int dim) const
static bool ghost_field_reset
#define comm_declare_send_relative(buffer, dim, dir, nbytes)
void checkField(const LatticeField &a) const
#define comm_declare_receive_relative(buffer, dim, dir, nbytes)
MsgHandle * mh_recv_back[2][QUDA_MAX_DIM]
#define device_pinned_malloc(size)
void comm_start(MsgHandle *mh)
MsgHandle * mh_recv_rdma_fwd[2][QUDA_MAX_DIM]
QudaGhostExchange ghostExchange
static void * ghost_remote_send_buffer_d[2][QUDA_MAX_DIM][2]
const cudaEvent_t & getIPCRemoteCopyEvent(int dir, int dim) const
char vol_string[TuneKey::volume_n]
virtual void write(char *filename)
void * from_face_dim_dir_d[2][QUDA_MAX_DIM][2]
size_t ghost_face_bytes[QUDA_MAX_DIM]
static void destroyIPCComms()
MsgHandle * mh_send_fwd[2][QUDA_MAX_DIM]
static int buffer_send_p2p_fwd[2][QUDA_MAX_DIM]
bool comm_peer2peer_enabled(int dir, int dim)
QudaFieldLocation Location() const
static QudaFieldLocation reorder_location_
static int buffer_send_p2p_back[2][QUDA_MAX_DIM]
static size_t ghostFaceBytes
static void * ghost_send_buffer_d[2]
int surface[QUDA_MAX_DIM]
enum QudaFieldLocation_s QudaFieldLocation
int ghostOffset[QUDA_MAX_DIM][2]
void * from_face_dim_dir_h[2][QUDA_MAX_DIM][2]
static MsgHandle * mh_send_p2p_fwd[2][QUDA_MAX_DIM]
static void * ghost_recv_buffer_d[2]
MsgHandle * mh_recv_rdma_back[2][QUDA_MAX_DIM]
static cudaEvent_t ipcCopyEvent[2][2][QUDA_MAX_DIM]
LatticeFieldParam()
Default constructor for LatticeFieldParam.
bool comm_gdr_enabled()
Query if GPU Direct RDMA communication is enabled (global setting)
static long total_bytes[N_ALLOC_TYPE]
int surfaceCB[QUDA_MAX_DIM]
static cudaEvent_t ipcRemoteCopyEvent[2][2][QUDA_MAX_DIM]
QudaGaugeFieldOrder Order() const
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
MsgHandle * mh_recv_fwd[2][QUDA_MAX_DIM]
#define mapped_malloc(size)
void comm_wait(MsgHandle *mh)
static MsgHandle * mh_recv_p2p_back[2][QUDA_MAX_DIM]
#define device_pinned_free(ptr)
static const int volume_n
static __inline__ size_t size_t d
void * my_face_dim_dir_h[2][QUDA_MAX_DIM][2]
void reorder_location_set(QudaFieldLocation reorder_location_)
Set whether data is reorderd on the CPU or GPU. This can set at QUDA initialization using the environ...
static void freeGhostBuffer(void)
Free statically allocated ghost buffers.
LatticeField(const LatticeFieldParam ¶m)
MsgHandle * mh_send_back[2][QUDA_MAX_DIM]
void initComms(int argc, char **argv, const int *commDims)
QudaFieldOrder FieldOrder() const