48 : precision(field.Precision()), ghost_precision(field.Precision()),
49 nDim(field.Ndim()), pad(field.Pad()),
50 siteSubset(field.SiteSubset()), mem_type(field.MemType()),
51 ghostExchange(field.GhostExchange()), scale(field.Scale())
53 for(
int dir=0; dir<
nDim; ++dir) {
54 x[dir] = field.
X()[dir];
55 r[dir] = field.
R()[dir];
66 ghost_precision_reset(false),
89 for (
int dir = 0; dir < 2; dir++) {
92 for (
int b = 0; b < 2; b++) {
114 for (
int i=0; i<
nDim; i++) {
119 for (
int j=0; j<
nDim; j++) {
130 for (
int i=0; i<
nDim; i++)
179 for (
int dir = 0; dir < 2; dir++) {
193 for (
int i=0; i<
nDim; i++) {
198 for (
int j=0; j<
nDim; j++) {
209 for (
int i=0; i<
nDim; i++)
232 for (
int b=0; b<2; b++) {
241 if (ghost_bytes > 0) {
242 for (
int b = 0; b < 2; ++b) {
266 LatticeField::ghost_field_reset =
true;
277 for (
int b=0; b<2; b++) {
311 for (
int b=0; b<2; b++) {
325 for (
int b=0; b<2; ++b) {
339 for (
int b=0; b<2; ++b) {
359 for (
int b=0; b<2; ++b) {
388 for (
int b=0; b<2; ++b) {
417 errorQuda(
"ghost_field appears not to be allocated");
420 cudaIpcMemHandle_t ipcRemoteGhostDestHandle[2][2][
QUDA_MAX_DIM];
422 for (
int b=0; b<2; b++) {
423 for (
int dim=0; dim<4; ++dim) {
425 for (
int dir=0; dir<2; ++dir) {
428 int disp = (dir == 1) ? +1 : -1;
434 sizeof(ipcRemoteGhostDestHandle[b][1-dir][dim]));
437 cudaIpcMemHandle_t ipcLocalGhostDestHandle;
442 sizeof(ipcLocalGhostDestHandle));
447 if (receiveHandle)
comm_wait(receiveHandle);
451 if (receiveHandle)
comm_free(receiveHandle);
458 for (
int dim=0; dim<4; ++dim) {
462 for (
int dir=0; dir<num_dir; ++dir) {
465 cudaIpcOpenMemHandle(ghostDest, ipcRemoteGhostDestHandle[b][dir][dim],
466 cudaIpcMemLazyEnablePeerAccess);
475 cudaIpcEventHandle_t ipcRemoteEventHandle[2][2][
QUDA_MAX_DIM];
479 for (
int dim=0; dim<4; ++dim) {
481 for (
int dir=0; dir<2; ++dir) {
482 for (
int b=0; b<2; b++) {
486 int disp = (dir == 1) ? +1 : -1;
491 sizeof(ipcRemoteEventHandle[b][1-dir][dim]));
495 cudaIpcEventHandle_t ipcLocalEventHandle;
497 cudaEventCreate(&
ipcCopyEvent[b][dir][dim], cudaEventDisableTiming | cudaEventInterprocess);
498 cudaIpcGetEventHandle(&ipcLocalEventHandle,
ipcCopyEvent[b][dir][dim]);
501 sizeof(ipcLocalEventHandle));
507 if (receiveHandle)
comm_wait(receiveHandle);
511 if (receiveHandle)
comm_free(receiveHandle);
519 for (
int dim=0; dim<4; ++dim) {
521 for (
int dir=0; dir<2; ++dir) {
523 for (
int b=0; b<2; b++) {
524 cudaIpcOpenEventHandle(&(
ipcRemoteCopyEvent[b][dir][dim]), ipcRemoteEventHandle[b][dir][dim]);
530 for (
int dim=0; dim<4; ++dim) {
533 for (
int b=0; b<2; b++) {
542 for (
int b=0; b<2; b++) {
566 for (
int dim=0; dim<4; ++dim) {
571 for (
int b=0; b<2; b++) {
624 for (
int d=1; d<
nDim; d++) {
635 int a_volume_interior = 1;
636 for (
int i=0; i<
nDim; i++) {
637 if (a.
x[i]-2*a.
r[i] !=
x[i])
errorQuda(
"x[%d] does not match %d %d", i,
x[i], a.
x[i]-2*a.
r[i]);
638 a_volume_interior *= a.
x[i] - 2*a.
r[i];
640 if (a_volume_interior !=
volume)
errorQuda(
"Interior volume does not match %d %d",
volume, a_volume_interior);
643 int this_volume_interior = 1;
644 for (
int i=0; i<
nDim; i++) {
645 if (
x[i]-2*
r[i] != a.
x[i])
errorQuda(
"x[%d] does not match %d %d", i,
x[i]-2*
r[i], a.
x[i]);
646 this_volume_interior *=
x[i] - 2*
r[i];
648 if (this_volume_interior != a.
volume)
errorQuda(
"Interior volume does not match %d %d", this_volume_interior, a.
volume);
652 for (
int i=0; i<
nDim; i++) {
653 if (a.
x[i] !=
x[i])
errorQuda(
"x[%d] does not match %d %d", i,
x[i], a.
x[i]);
671 errorQuda(
"Unknown field %s, so cannot determine location",
typeid(*this).name());
688 return static_cast<int>(csField.
FieldOrder());
691 if (gField.
Order() == 2 || gField.
Order() == 4)
692 return static_cast<int>(gField.
Order());
695 if (cField.
Order() == 2 || cField.
Order() == 4)
696 return static_cast<int>(cField.
Order());
706 output <<
"nDim = " << param.
nDim << std::endl;
707 for (
int i=0; i<param.
nDim; i++) {
708 output <<
"x[" << i <<
"] = " << param.
x[i] << std::endl;
710 output <<
"pad = " << param.
pad << std::endl;
711 output <<
"precision = " << param.
Precision() << std::endl;
712 output <<
"ghost_precision = " << param.
GhostPrecision() << std::endl;
713 output <<
"scale = " << param.
scale << std::endl;
715 output <<
"ghostExchange = " << param.
ghostExchange << std::endl;
716 for (
int i=0; i<param.
nDim; i++) {
717 output <<
"r[" << i <<
"] = " << param.
r[i] << std::endl;
static int buffer_recv_p2p_back[2][QUDA_MAX_DIM]
bool ipcRemoteCopyComplete(int dir, int dim)
QudaFieldLocation reorder_location()
Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the env...
QudaGhostExchange ghostExchange
virtual void read(char *filename)
void allocateGhostBuffer(size_t ghost_bytes) const
Allocate the static ghost buffers.
int commDimPartitioned(int dir)
int ghostNormOffset[QUDA_MAX_DIM][2]
void * my_face_dim_dir_d[2][QUDA_MAX_DIM][2]
QudaSiteSubset siteSubset
static void * ghost_pinned_recv_buffer_hd[2]
QudaPrecision GhostPrecision() const
static void * ghost_pinned_send_buffer_h[2]
static MsgHandle * mh_send_p2p_back[2][QUDA_MAX_DIM]
void * from_face_dim_dir_hd[2][QUDA_MAX_DIM][2]
QudaCloverFieldOrder Order() const
virtual void setTuningString()
MsgHandle * mh_send_rdma_fwd[2][QUDA_MAX_DIM]
bool ipcCopyComplete(int dir, int dim)
bool ghost_precision_reset
static bool initGhostFaceBuffer
QudaSiteSubset siteSubset
std::ostream & operator<<(std::ostream &output, const CloverFieldParam ¶m)
static int buffer_recv_p2p_fwd[2][QUDA_MAX_DIM]
void * my_face_dim_dir_hd[2][QUDA_MAX_DIM][2]
MsgHandle * mh_send_rdma_back[2][QUDA_MAX_DIM]
static MsgHandle * mh_recv_p2p_fwd[2][QUDA_MAX_DIM]
const cudaEvent_t & getIPCCopyEvent(int dir, int dim) const
#define qudaDeviceSynchronize()
static bool ghost_field_reset
#define comm_declare_send_relative(buffer, dim, dir, nbytes)
void checkField(const LatticeField &a) const
#define comm_declare_receive_relative(buffer, dim, dir, nbytes)
MsgHandle * mh_recv_back[2][QUDA_MAX_DIM]
#define device_pinned_malloc(size)
void comm_start(MsgHandle *mh)
MsgHandle * mh_recv_rdma_fwd[2][QUDA_MAX_DIM]
QudaGhostExchange ghostExchange
static void * ghost_remote_send_buffer_d[2][QUDA_MAX_DIM][2]
void comm_free(MsgHandle *&mh)
const cudaEvent_t & getIPCRemoteCopyEvent(int dir, int dim) const
char vol_string[TuneKey::volume_n]
virtual void write(char *filename)
void * from_face_dim_dir_d[2][QUDA_MAX_DIM][2]
size_t ghost_face_bytes[QUDA_MAX_DIM]
static void destroyIPCComms()
MsgHandle * mh_send_fwd[2][QUDA_MAX_DIM]
static int buffer_send_p2p_fwd[2][QUDA_MAX_DIM]
bool comm_peer2peer_enabled(int dir, int dim)
QudaFieldLocation Location() const
static QudaFieldLocation reorder_location_
static int buffer_send_p2p_back[2][QUDA_MAX_DIM]
static size_t ghostFaceBytes
static void * ghost_send_buffer_d[2]
int surface[QUDA_MAX_DIM]
enum QudaFieldLocation_s QudaFieldLocation
int ghostOffset[QUDA_MAX_DIM][2]
QudaPrecision ghost_precision
void * from_face_dim_dir_h[2][QUDA_MAX_DIM][2]
static MsgHandle * mh_send_p2p_fwd[2][QUDA_MAX_DIM]
QudaPrecision Precision() const
static void * ghost_recv_buffer_d[2]
MsgHandle * mh_recv_rdma_back[2][QUDA_MAX_DIM]
static cudaEvent_t ipcCopyEvent[2][2][QUDA_MAX_DIM]
LatticeFieldParam()
Default constructor for LatticeFieldParam.
bool comm_gdr_enabled()
Query if GPU Direct RDMA communication is enabled (global setting)
QudaPrecision ghost_precision
static void * ghost_pinned_send_buffer_hd[2]
static long total_bytes[N_ALLOC_TYPE]
int surfaceCB[QUDA_MAX_DIM]
static cudaEvent_t ipcRemoteCopyEvent[2][2][QUDA_MAX_DIM]
QudaGaugeFieldOrder Order() const
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
MsgHandle * mh_recv_fwd[2][QUDA_MAX_DIM]
void createComms(bool no_comms_fill=false, bool bidir=true)
#define mapped_malloc(size)
void comm_wait(MsgHandle *mh)
static MsgHandle * mh_recv_p2p_back[2][QUDA_MAX_DIM]
#define device_pinned_free(ptr)
static const int volume_n
void * my_face_dim_dir_h[2][QUDA_MAX_DIM][2]
void reorder_location_set(QudaFieldLocation reorder_location_)
Set whether data is reorderd on the CPU or GPU. This can set at QUDA initialization using the environ...
static void freeGhostBuffer(void)
Free statically allocated ghost buffers.
LatticeField(const LatticeFieldParam ¶m)
MsgHandle * mh_send_back[2][QUDA_MAX_DIM]
QudaFieldOrder FieldOrder() const
static void * ghost_pinned_recv_buffer_h[2]