v0.9.0/doc/lattice__field_8cpp_source.html

 #include <typeinfo>
 #include <quda_internal.h>
 #include <lattice_field.h>
 #include <color_spinor_field.h>
 #include <gauge_field.h>
 #include <clover_field.h>

 namespace quda {

   bool LatticeField::initIPCComms = false;

   int LatticeField::buffer_send_p2p_fwd[2][QUDA_MAX_DIM] { };
   int LatticeField::buffer_recv_p2p_fwd[2][QUDA_MAX_DIM] { };
   int LatticeField::buffer_send_p2p_back[2][QUDA_MAX_DIM] { };
   int LatticeField::buffer_recv_p2p_back[2][QUDA_MAX_DIM] { };

   MsgHandle* LatticeField::mh_send_p2p_fwd[2][QUDA_MAX_DIM] { };
   MsgHandle* LatticeField::mh_send_p2p_back[2][QUDA_MAX_DIM] { };
   MsgHandle* LatticeField::mh_recv_p2p_fwd[2][QUDA_MAX_DIM] { };
   MsgHandle* LatticeField::mh_recv_p2p_back[2][QUDA_MAX_DIM] { };

   cudaEvent_t LatticeField::ipcCopyEvent[2][2][QUDA_MAX_DIM];
   cudaEvent_t LatticeField::ipcRemoteCopyEvent[2][2][QUDA_MAX_DIM];

   void *LatticeField::ghost_pinned_buffer_h[2] = {nullptr, nullptr};
   void *LatticeField::ghost_pinned_buffer_hd[2] = {nullptr, nullptr};

   // gpu ghost receive buffer
   void *LatticeField::ghost_recv_buffer_d[2] = {nullptr, nullptr};

   // gpu ghost send buffer
   void *LatticeField::ghost_send_buffer_d[2] = {nullptr, nullptr};

   bool LatticeField::ghost_field_reset = false;

   void* LatticeField::ghost_remote_send_buffer_d[2][QUDA_MAX_DIM][2];

   bool LatticeField::initGhostFaceBuffer = false;
   size_t LatticeField::ghostFaceBytes = 0;

   int LatticeField::bufferIndex = 0;

   LatticeFieldParam::LatticeFieldParam(const LatticeField &field)
     : nDim(field.Ndim()), pad(field.Pad()), precision(field.Precision()),
       siteSubset(field.SiteSubset()), mem_type(field.MemType()), ghostExchange(field.GhostExchange())
   {
     for(int dir=0; dir<nDim; ++dir) {
       x[dir] = field.X()[dir];
       r[dir] = field.R()[dir];
     }
   }

   LatticeField::LatticeField(const LatticeFieldParam &param)
     : volume(1), pad(param.pad), total_bytes(0), nDim(param.nDim), precision(param.precision),
       siteSubset(param.siteSubset), ghostExchange(param.ghostExchange), ghost_bytes(0),
       ghost_face_bytes{ }, ghostOffset( ), ghostNormOffset( ),
       my_face_h{ }, my_face_hd{ }, initComms(false), mem_type(param.mem_type),
       backup_h(nullptr), backup_norm_h(nullptr), backed_up(false)
   {
     for (int i=0; i<nDim; i++) {
       x[i] = param.x[i];
       r[i] = ghostExchange == QUDA_GHOST_EXCHANGE_EXTENDED ? param.r[i] : 0;
       volume *= param.x[i];
       surface[i] = 1;
       for (int j=0; j<nDim; j++) {
   if (i==j) continue;
   surface[i] *= param.x[j];
       }
     }

     if (siteSubset == QUDA_INVALID_SITE_SUBSET) errorQuda("siteSubset is not set");
     volumeCB = (siteSubset == QUDA_FULL_SITE_SUBSET) ? volume / 2 : volume;
     stride = volumeCB + pad;

     // for parity fields the factor of half is present for all surfaces dimensions except x, so add it manually
     for (int i=0; i<nDim; i++)
       surfaceCB[i] = (siteSubset == QUDA_FULL_SITE_SUBSET || i==0) ? surface[i] / 2 : surface[i];

     // for 5-dimensional fields, we only communicate in the space-time dimensions
     nDimComms = nDim == 5 ? 4 : nDim;

     switch (precision) {
     case QUDA_DOUBLE_PRECISION:
     case QUDA_SINGLE_PRECISION:
     case QUDA_HALF_PRECISION:
       break;
     default:
       errorQuda("Unknown precision %d", precision);
     }

     setTuningString();
   }

   LatticeField::LatticeField(const LatticeField &field)
     : volume(1), pad(field.pad), total_bytes(0), nDim(field.nDim), precision(field.precision),
       siteSubset(field.siteSubset), ghostExchange(field.ghostExchange), ghost_bytes(0),
       ghost_face_bytes{ }, ghostOffset( ), ghostNormOffset( ),
       my_face_h{ }, my_face_hd{ }, initComms(false), mem_type(field.mem_type),
       backup_h(nullptr), backup_norm_h(nullptr), backed_up(false)
   {
     for (int i=0; i<nDim; i++) {
       x[i] = field.x[i];
       r[i] = ghostExchange == QUDA_GHOST_EXCHANGE_EXTENDED ? field.r[i] : 0;
       volume *= field.x[i];
       surface[i] = 1;
       for (int j=0; j<nDim; j++) {
   if (i==j) continue;
   surface[i] *= field.x[j];
       }
     }

     if (siteSubset == QUDA_INVALID_SITE_SUBSET) errorQuda("siteSubset is not set");
     volumeCB = (siteSubset == QUDA_FULL_SITE_SUBSET) ? volume / 2 : volume;
     stride = volumeCB + pad;

     // for parity fields the factor of half is present for all surfaces dimensions except x, so add it manually
     for (int i=0; i<nDim; i++)
       surfaceCB[i] = (siteSubset == QUDA_FULL_SITE_SUBSET || i==0) ? surface[i] / 2 : surface[i];

     // for 5-dimensional fields, we only communicate in the space-time dimensions
     nDimComms = nDim == 5 ? 4 : nDim;

     setTuningString();
   }

   LatticeField::~LatticeField() { }

   void LatticeField::allocateGhostBuffer(size_t ghost_bytes) const
   {
     // only allocate if not already allocated or buffer required is bigger than previously
     if ( !initGhostFaceBuffer || ghost_bytes > ghostFaceBytes) {

       if (initGhostFaceBuffer) {
   if (ghost_bytes) {
     for (int b=0; b<2; b++) {
       device_pinned_free(ghost_recv_buffer_d[b]);
       device_pinned_free(ghost_send_buffer_d[b]);
       host_free(ghost_pinned_buffer_h[b]);
     }
   }
       }

       if (ghost_bytes > 0) {
   for (int b=0; b<2; ++b) {
     // gpu receive buffer (use pinned allocator to avoid this being redirected, e.g., by QDPJIT)
     ghost_recv_buffer_d[b] = device_pinned_malloc(ghost_bytes);

     // gpu send buffer (use pinned allocator to avoid this being redirected, e.g., by QDPJIT)
     ghost_send_buffer_d[b] = device_pinned_malloc(ghost_bytes);

     // pinned buffer used for sending and receiving
     ghost_pinned_buffer_h[b] = mapped_malloc(2*ghost_bytes);

     // set the matching device-mapper pointer
     cudaHostGetDevicePointer(&ghost_pinned_buffer_hd[b], ghost_pinned_buffer_h[b], 0);
   }

   initGhostFaceBuffer = true;
   ghostFaceBytes = ghost_bytes;
       }

       LatticeField::ghost_field_reset = true; // this signals that we must reset the IPC comms
     }

   }

   void LatticeField::freeGhostBuffer(void)
   {
     destroyIPCComms();

     if (!initGhostFaceBuffer) return;

     for (int b=0; b<2; b++) {
       // free receive buffer
       if (ghost_recv_buffer_d[b]) device_pinned_free(ghost_recv_buffer_d[b]);
       ghost_recv_buffer_d[b] = nullptr;

       // free send buffer
       if (ghost_send_buffer_d[b]) device_pinned_free(ghost_send_buffer_d[b]);
       ghost_send_buffer_d[b] = nullptr;

       // free pinned memory buffers
       if (ghost_pinned_buffer_h[b]) host_free(ghost_pinned_buffer_h[b]);
       ghost_pinned_buffer_h[b] = nullptr;
       ghost_pinned_buffer_hd[b] = nullptr;
     }
     initGhostFaceBuffer = false;
   }

   void LatticeField::createComms(bool no_comms_fill)
   {
     destroyComms(); // if we are requesting a new number of faces destroy and start over

     // initialize the ghost pinned buffers
     for (int b=0; b<2; b++) {
       my_face_h[b] = ghost_pinned_buffer_h[b];
       my_face_hd[b] = ghost_pinned_buffer_hd[b];
       from_face_h[b] = static_cast<char*>(my_face_h[b]) + ghost_bytes;
       from_face_hd[b] = static_cast<char*>(my_face_hd[b]) + ghost_bytes;
     }

     // initialize ghost send pointers
     size_t offset = 0;
     for (int i=0; i<nDimComms; i++) {
       if (!commDimPartitioned(i) && no_comms_fill==false) continue;

       for (int b=0; b<2; ++b) {
   my_face_dim_dir_h[b][i][0] = static_cast<char*>(my_face_h[b]) + offset;
   from_face_dim_dir_h[b][i][0] = static_cast<char*>(from_face_h[b]) + offset;

   my_face_dim_dir_hd[b][i][0] = static_cast<char*>(my_face_hd[b]) + offset;
   from_face_dim_dir_hd[b][i][0] = static_cast<char*>(from_face_hd[b]) + offset;

   my_face_dim_dir_d[b][i][0] = static_cast<char*>(ghost_send_buffer_d[b]) + offset;
   from_face_dim_dir_d[b][i][0] = static_cast<char*>(ghost_recv_buffer_d[b]) + ghostOffset[i][0]*precision;
       } // loop over b

       offset += ghost_face_bytes[i];

       for (int b=0; b<2; ++b) {
   my_face_dim_dir_h[b][i][1] = static_cast<char*>(my_face_h[b]) + offset;
   from_face_dim_dir_h[b][i][1] = static_cast<char*>(from_face_h[b]) + offset;

   my_face_dim_dir_hd[b][i][1] = static_cast<char*>(my_face_hd[b]) + offset;
   from_face_dim_dir_hd[b][i][1] = static_cast<char*>(from_face_hd[b]) + offset;

   my_face_dim_dir_d[b][i][1] = static_cast<char*>(ghost_send_buffer_d[b]) + offset;
   from_face_dim_dir_d[b][i][1] = static_cast<char*>(ghost_recv_buffer_d[b]) + ghostOffset[i][1]*precision;
       } // loop over b
       offset += ghost_face_bytes[i];

     } // loop over dimension

     bool gdr = comm_gdr_enabled(); // only allocate rdma buffers if GDR enabled

     // initialize the message handlers
     for (int i=0; i<nDimComms; i++) {
       if (!commDimPartitioned(i)) continue;

       for (int b=0; b<2; ++b) {
   mh_send_fwd[b][i] = comm_declare_send_relative(my_face_dim_dir_h[b][i][1], i, +1, ghost_face_bytes[i]);
   mh_send_back[b][i] = comm_declare_send_relative(my_face_dim_dir_h[b][i][0], i, -1, ghost_face_bytes[i]);

   mh_recv_fwd[b][i] = comm_declare_receive_relative(from_face_dim_dir_h[b][i][1], i, +1, ghost_face_bytes[i]);
   mh_recv_back[b][i] = comm_declare_receive_relative(from_face_dim_dir_h[b][i][0], i, -1, ghost_face_bytes[i]);

   mh_send_rdma_fwd[b][i] = gdr ? comm_declare_send_relative(my_face_dim_dir_d[b][i][1], i, +1, ghost_face_bytes[i]) : nullptr;
   mh_send_rdma_back[b][i] = gdr ? comm_declare_send_relative(my_face_dim_dir_d[b][i][0], i, -1, ghost_face_bytes[i]) : nullptr;

   mh_recv_rdma_fwd[b][i] = gdr ? comm_declare_receive_relative(from_face_dim_dir_d[b][i][1], i, +1, ghost_face_bytes[i]) : nullptr;
   mh_recv_rdma_back[b][i] = gdr ? comm_declare_receive_relative(from_face_dim_dir_d[b][i][0], i, -1, ghost_face_bytes[i]) : nullptr;
       } // loop over b

     } // loop over dimension

     initComms = true;
     checkCudaError();
   }

   void LatticeField::destroyComms()
   {
     if (initComms) {

       for (int b=0; b<2; ++b) {
   for (int i=0; i<nDimComms; i++) {
     if (commDimPartitioned(i)) {
       if (mh_recv_fwd[b][i]) comm_free(mh_recv_fwd[b][i]);
       if (mh_recv_back[b][i]) comm_free(mh_recv_back[b][i]);
       if (mh_send_fwd[b][i]) comm_free(mh_send_fwd[b][i]);
       if (mh_send_back[b][i]) comm_free(mh_send_back[b][i]);

       if (mh_recv_rdma_fwd[b][i]) comm_free(mh_recv_rdma_fwd[b][i]);
       if (mh_recv_rdma_back[b][i]) comm_free(mh_recv_rdma_back[b][i]);
       if (mh_send_rdma_fwd[b][i]) comm_free(mh_send_rdma_fwd[b][i]);
       if (mh_send_rdma_back[b][i]) comm_free(mh_send_rdma_back[b][i]);
     }
   }
       } // loop over b

       initComms = false;
       checkCudaError();
     }

   }

   void LatticeField::createIPCComms() {
     if ( initIPCComms && !ghost_field_reset ) return;

     if (!initComms) errorQuda("Can only be called after create comms");
     if ( (!ghost_recv_buffer_d[0] || !ghost_recv_buffer_d[1]) && comm_size() > 1) errorQuda("ghost_field appears not to be allocated");

     // handles for obtained ghost pointers
     cudaIpcMemHandle_t ipcRemoteGhostDestHandle[2][2][QUDA_MAX_DIM];

     for (int b=0; b<2; b++) {
       for (int dim=0; dim<4; ++dim) {
   if (comm_dim(dim)==1) continue;
   for (int dir=0; dir<2; ++dir) {
     MsgHandle* sendHandle = nullptr;
     MsgHandle* receiveHandle = nullptr;
     int disp = (dir == 1) ? +1 : -1;

     // first set up receive
     if (comm_peer2peer_enabled(1-dir,dim)) {
       receiveHandle = comm_declare_receive_relative(&ipcRemoteGhostDestHandle[b][1-dir][dim],
                 dim, -disp,
                 sizeof(ipcRemoteGhostDestHandle[b][1-dir][dim]));
     }
     // now send
     if (comm_peer2peer_enabled(dir,dim)) {
       cudaIpcMemHandle_t ipcLocalGhostDestHandle;
       cudaIpcGetMemHandle(&ipcLocalGhostDestHandle, ghost_recv_buffer_d[b]);
       sendHandle = comm_declare_send_relative(&ipcLocalGhostDestHandle,
                 dim, disp,
                 sizeof(ipcLocalGhostDestHandle));
     }
     if (receiveHandle) comm_start(receiveHandle);
     if (sendHandle) comm_start(sendHandle);

     if (receiveHandle) comm_wait(receiveHandle);
     if (sendHandle) comm_wait(sendHandle);

     if (sendHandle) comm_free(sendHandle);
     if (receiveHandle) comm_free(receiveHandle);
   }
       }

       checkCudaError();

       // open the remote memory handles and set the send ghost pointers
       for (int dim=0; dim<4; ++dim) {
   if (comm_dim(dim)==1) continue;
   // even if comm_dim(2) == 2, we not have p2p enabled in both directions, so check this
   const int num_dir = (comm_dim(dim) == 2 && comm_peer2peer_enabled(0,dim) && comm_peer2peer_enabled(1,dim)) ? 1 : 2;
   for (int dir=0; dir<num_dir; ++dir) {
     if (!comm_peer2peer_enabled(dir,dim)) continue;
     void **ghostDest = &(ghost_remote_send_buffer_d[b][dim][dir]);
     cudaIpcOpenMemHandle(ghostDest, ipcRemoteGhostDestHandle[b][dir][dim],
              cudaIpcMemLazyEnablePeerAccess);
   }
   if (num_dir == 1) ghost_remote_send_buffer_d[b][dim][1] = ghost_remote_send_buffer_d[b][dim][0];
       }
     } // buffer index

     checkCudaError();

     // handles for obtained events
     cudaIpcEventHandle_t ipcRemoteEventHandle[2][2][QUDA_MAX_DIM];

     // Note that no b index is necessary here
     // Now communicate the event handles
     for (int dim=0; dim<4; ++dim) {
       if (comm_dim(dim)==1) continue;
       for (int dir=0; dir<2; ++dir) {
   for (int b=0; b<2; b++) {

     MsgHandle* sendHandle = NULL;
     MsgHandle* receiveHandle = NULL;
     int disp = (dir == 1) ? +1 : -1;

     // first set up receive
     if (comm_peer2peer_enabled(1-dir,dim)) {
       receiveHandle = comm_declare_receive_relative(&ipcRemoteEventHandle[b][1-dir][dim], dim, -disp,
                 sizeof(ipcRemoteEventHandle[b][1-dir][dim]));
     }

     // now send
     if (comm_peer2peer_enabled(dir,dim)) {
       cudaEventCreate(&ipcCopyEvent[b][dir][dim], cudaEventDisableTiming | cudaEventInterprocess);
       cudaIpcEventHandle_t ipcLocalEventHandle;
       cudaIpcGetEventHandle(&ipcLocalEventHandle, ipcCopyEvent[b][dir][dim]);

       sendHandle = comm_declare_send_relative(&ipcLocalEventHandle, dim, disp,
                 sizeof(ipcLocalEventHandle));
     }

     if (receiveHandle) comm_start(receiveHandle);
     if (sendHandle) comm_start(sendHandle);

     if (receiveHandle) comm_wait(receiveHandle);
     if (sendHandle) comm_wait(sendHandle);

     if (sendHandle) comm_free(sendHandle);
     if (receiveHandle) comm_free(receiveHandle);

   } // buffer index
       }
     }

     checkCudaError();

     for (int dim=0; dim<4; ++dim) {
       if (comm_dim(dim)==1) continue;
       for (int dir=0; dir<2; ++dir) {
   if (!comm_peer2peer_enabled(dir,dim)) continue;
   for (int b=0; b<2; b++) {
     cudaIpcOpenEventHandle(&(ipcRemoteCopyEvent[b][dir][dim]), ipcRemoteEventHandle[b][dir][dim]);
   }
       }
     }

     // Create message handles for IPC synchronization
     for (int dim=0; dim<4; ++dim) {
       if (comm_dim(dim)==1) continue;
       if (comm_peer2peer_enabled(1,dim)) {
   for (int b=0; b<2; b++) {
     // send to processor in forward direction
     mh_send_p2p_fwd[b][dim] = comm_declare_send_relative(&buffer_send_p2p_fwd[b][dim], dim, +1, sizeof(int));
     // receive from processor in forward direction
     mh_recv_p2p_fwd[b][dim] = comm_declare_receive_relative(&buffer_recv_p2p_fwd[b][dim], dim, +1, sizeof(int));
   }
       }

       if (comm_peer2peer_enabled(0,dim)) {
   for (int b=0; b<2; b++) {
     // send to processor in backward direction
     mh_send_p2p_back[b][dim] = comm_declare_send_relative(&buffer_send_p2p_back[b][dim], dim, -1, sizeof(int));
     // receive from processor in backward direction
     mh_recv_p2p_back[b][dim] = comm_declare_receive_relative(&buffer_recv_p2p_back[b][dim], dim, -1, sizeof(int));
   }
       }
     }
     checkCudaError();

     initIPCComms = true;
     ghost_field_reset = false;
   }

   void LatticeField::destroyIPCComms() {

     if (!initIPCComms) return;
     checkCudaError();

     for (int dim=0; dim<4; ++dim) {

       if (comm_dim(dim)==1) continue;
       const int num_dir = (comm_dim(dim) == 2 && comm_peer2peer_enabled(0,dim) && comm_peer2peer_enabled(1,dim)) ? 1 : 2;

       for (int b=0; b<2; b++) {
   if (comm_peer2peer_enabled(1,dim)) {
     comm_free(mh_send_p2p_fwd[b][dim]);
     comm_free(mh_recv_p2p_fwd[b][dim]);
     cudaEventDestroy(ipcCopyEvent[b][1][dim]);

     // only close this handle if it doesn't alias the back ghost
     if (num_dir == 2) cudaIpcCloseMemHandle(ghost_remote_send_buffer_d[b][dim][1]);
   }

   if (comm_peer2peer_enabled(0,dim)) {
     comm_free(mh_send_p2p_back[b][dim]);
     comm_free(mh_recv_p2p_back[b][dim]);
     cudaEventDestroy(ipcCopyEvent[b][0][dim]);

     cudaIpcCloseMemHandle(ghost_remote_send_buffer_d[b][dim][0]);
   }
       } // buffer
     } // iterate over dim

     checkCudaError();
     initIPCComms = false;
   }

   bool LatticeField::ipcCopyComplete(int dir, int dim)
   {
     return (cudaSuccess == cudaEventQuery(ipcCopyEvent[bufferIndex][dir][dim]) ? true : false);
   }

   bool LatticeField::ipcRemoteCopyComplete(int dir, int dim)
   {
     return (cudaSuccess == cudaEventQuery(ipcRemoteCopyEvent[bufferIndex][dir][dim]) ? true : false);
   }

   const cudaEvent_t& LatticeField::getIPCCopyEvent(int dir, int dim) const {
     return ipcCopyEvent[bufferIndex][dir][dim];
   }

   const cudaEvent_t& LatticeField::getIPCRemoteCopyEvent(int dir, int dim) const {
     return ipcRemoteCopyEvent[bufferIndex][dir][dim];
   }

   void LatticeField::setTuningString() {
     char vol_tmp[TuneKey::volume_n];
     int check;
     check = snprintf(vol_string, TuneKey::volume_n, "%d", x[0]);
     if (check < 0 || check >= TuneKey::volume_n) errorQuda("Error writing volume string");
     for (int d=1; d<nDim; d++) {
       strcpy(vol_tmp, vol_string);
       check = snprintf(vol_string, TuneKey::volume_n, "%sx%d", vol_tmp, x[d]);
       if (check < 0 || check >= TuneKey::volume_n) errorQuda("Error writing volume string");
     }
   }

   void LatticeField::checkField(const LatticeField &a) const {
     if (a.nDim != nDim) errorQuda("nDim does not match %d %d", nDim, a.nDim);
     if (ghostExchange != QUDA_GHOST_EXCHANGE_EXTENDED && a.ghostExchange == QUDA_GHOST_EXCHANGE_EXTENDED) {
       // if source is extended by I am not then we need to compare their interior volume to my volume
       int a_volume_interior = 1;
       for (int i=0; i<nDim; i++) {
   if (a.x[i]-2*a.r[i] != x[i]) errorQuda("x[%d] does not match %d %d", i, x[i], a.x[i]-2*a.r[i]);
   a_volume_interior *= a.x[i] - 2*a.r[i];
       }
       if (a_volume_interior != volume) errorQuda("Interior volume does not match %d %d", volume, a_volume_interior);
     } else if (a.ghostExchange != QUDA_GHOST_EXCHANGE_EXTENDED && ghostExchange == QUDA_GHOST_EXCHANGE_EXTENDED) {
       // if source is extended by I am not then we need to compare their interior volume to my volume
       int this_volume_interior = 1;
       for (int i=0; i<nDim; i++) {
   if (x[i]-2*r[i] != a.x[i]) errorQuda("x[%d] does not match %d %d", i, x[i]-2*r[i], a.x[i]);
   this_volume_interior *= x[i] - 2*r[i];
       }
       if (this_volume_interior != a.volume) errorQuda("Interior volume does not match %d %d", this_volume_interior, a.volume);
     } else {
       if (a.volume != volume) errorQuda("Volume does not match %d %d", volume, a.volume);
       if (a.volumeCB != volumeCB) errorQuda("VolumeCB does not match %d %d", volumeCB, a.volumeCB);
       for (int i=0; i<nDim; i++) {
   if (a.x[i] != x[i]) errorQuda("x[%d] does not match %d %d", i, x[i], a.x[i]);
   if (a.surface[i] != surface[i]) errorQuda("surface[%d] does not match %d %d", i, surface[i], a.surface[i]);
   if (a.surfaceCB[i] != surfaceCB[i]) errorQuda("surfaceCB[%d] does not match %d %d", i, surfaceCB[i], a.surfaceCB[i]);
       }
     }
   }

   QudaFieldLocation LatticeField::Location() const {
     QudaFieldLocation location = QUDA_INVALID_FIELD_LOCATION;
     if (typeid(*this)==typeid(cudaCloverField) ||
   typeid(*this)==typeid(cudaColorSpinorField) ||
   typeid(*this)==typeid(cudaGaugeField)) {
       location = QUDA_CUDA_FIELD_LOCATION;
     } else if (typeid(*this)==typeid(cpuCloverField) ||
          typeid(*this)==typeid(cpuColorSpinorField) ||
          typeid(*this)==typeid(cpuGaugeField)) {
       location = QUDA_CPU_FIELD_LOCATION;
     } else {
       errorQuda("Unknown field %s, so cannot determine location", typeid(*this).name());
     }
     return location;
   }

   void LatticeField::read(char *filename) {
     errorQuda("Not implemented");
   }

   void LatticeField::write(char *filename) {
     errorQuda("Not implemented");
   }

   int LatticeField::Nvec() const {
     if (typeid(*this) == typeid(const cudaColorSpinorField)) {
       const ColorSpinorField &csField = static_cast<const ColorSpinorField&>(*this);
       if (csField.FieldOrder() == 2 || csField.FieldOrder() == 4)
   return static_cast<int>(csField.FieldOrder());
     } else if (typeid(*this) == typeid(const cudaGaugeField)) {
       const GaugeField &gField = static_cast<const GaugeField&>(*this);
       if (gField.Order() == 2 || gField.Order() == 4)
   return static_cast<int>(gField.Order());
     } else if (typeid(*this) == typeid(const cudaCloverField)) {
       const CloverField &cField = static_cast<const CloverField&>(*this);
       if (cField.Order() == 2 || cField.Order() == 4)
   return static_cast<int>(cField.Order());
     }

     errorQuda("Unsupported field type");
     return -1;
   }

   // This doesn't really live here, but is fine for the moment
   std::ostream& operator<<(std::ostream& output, const LatticeFieldParam& param)
   {
     output << "nDim = " << param.nDim << std::endl;
     for (int i=0; i<param.nDim; i++) {
       output << "x[" << i << "] = " << param.x[i] << std::endl;
     }
     output << "pad = " << param.pad << std::endl;
     output << "precision = " << param.precision << std::endl;

     output << "ghostExchange = " << param.ghostExchange << std::endl;
     for (int i=0; i<param.nDim; i++) {
       output << "r[" << i << "] = " << param.r[i] << std::endl;
     }

     return output;  // for multiple << operators.
   }

   static QudaFieldLocation reorder_location_ = QUDA_CUDA_FIELD_LOCATION;

   QudaFieldLocation reorder_location() { return reorder_location_; }
   void reorder_location_set(QudaFieldLocation _reorder_location) { reorder_location_ = _reorder_location; }

 } // namespace quda
quda::LatticeField::buffer_recv_p2p_back
static int buffer_recv_p2p_back[2][QUDA_MAX_DIM]
Definition: lattice_field.h:297

quda::LatticeField::ipcRemoteCopyComplete
bool ipcRemoteCopyComplete(int dir, int dim)
Definition: lattice_field.cpp:468

quda::reorder_location
QudaFieldLocation reorder_location()
Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the env...
Definition: lattice_field.cpp:585

quda::LatticeField::read
virtual void read(char *filename)
Definition: lattice_field.cpp:538

quda::LatticeField::allocateGhostBuffer
void allocateGhostBuffer(size_t ghost_bytes) const
Allocate the static ghost buffers.
Definition: lattice_field.cpp:128

commDimPartitioned
int commDimPartitioned(int dir)
Definition: comm_common.cpp:674

snprintf
int snprintf(char *__str, size_t __size, const char *__format,...) __attribute__((__format__(__printf__

lattice_field.h

quda::LatticeField::my_face_dim_dir_d
void * my_face_dim_dir_d[2][QUDA_MAX_DIM][2]
Definition: lattice_field.h:235

quda::ColorSpinorField
Definition: color_spinor_field.h:271

quda::LatticeField::createComms
void createComms(bool no_comms_fill=false)
Definition: lattice_field.cpp:190

quda::LatticeField::initComms
bool initComms
Definition: lattice_field.h:306

quda::LatticeFieldParam
Definition: lattice_field.h:44

quda::LatticeField::ghost_pinned_buffer_hd
static void * ghost_pinned_buffer_hd[2]
Definition: lattice_field.h:186

quda::LatticeField::volumeCB
int volumeCB
Definition: lattice_field.h:129

errorQuda
#define errorQuda(...)
Definition: util_quda.h:90

color_spinor_field.h

QUDA_CUDA_FIELD_LOCATION
Definition: enum_quda.h:297

host_free
#define host_free(ptr)
Definition: malloc_quda.h:59

QUDA_HALF_PRECISION
Definition: enum_quda.h:59

quda::CloverField
Definition: clover_field.h:44

comm_dim
int comm_dim(int dim)
Definition: comm_common.cpp:404

dim
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
Definition: CMakeCUDACompilerId.cpp1.ii:15687

QUDA_FULL_SITE_SUBSET
Definition: enum_quda.h:304

quda::LatticeField::nDimComms
int nDimComms
Definition: lattice_field.h:160

strcpy
char * strcpy(char *__dst, const char *__src)

quda::LatticeField::x
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:140

quda::cudaColorSpinorField
Definition: color_spinor_field.h:504

quda::LatticeField::mh_send_p2p_back
static MsgHandle * mh_send_p2p_back[2][QUDA_MAX_DIM]
Definition: lattice_field.h:279

quda::LatticeField::from_face_dim_dir_hd
void * from_face_dim_dir_hd[2][QUDA_MAX_DIM][2]
Definition: lattice_field.h:246

quda::CloverField::Order
QudaCloverFieldOrder Order() const
Definition: clover_field.h:92

quda::LatticeField::volume
int volume
Definition: lattice_field.h:126

quda::LatticeField::setTuningString
virtual void setTuningString()
Definition: lattice_field.cpp:481

quda::LatticeField::mh_send_rdma_fwd
MsgHandle * mh_send_rdma_fwd[2][QUDA_MAX_DIM]
Definition: lattice_field.h:270

quda::LatticeField::ipcCopyComplete
bool ipcCopyComplete(int dir, int dim)
Definition: lattice_field.cpp:463

QUDA_INVALID_SITE_SUBSET
Definition: enum_quda.h:305

quda::LatticeField::initGhostFaceBuffer
static bool initGhostFaceBuffer
Definition: lattice_field.h:201

quda
Definition: blas_cublas.h:6

quda::operator<<
std::ostream & operator<<(std::ostream &output, const CloverFieldParam &param)
Definition: clover_field.cpp:404

offset
size_t size_t offset
Definition: CMakeCUDACompilerId.cpp1.ii:2497

quda::LatticeField::buffer_recv_p2p_fwd
static int buffer_recv_p2p_fwd[2][QUDA_MAX_DIM]
Definition: lattice_field.h:291

param
QudaGaugeParam param
Definition: pack_test.cpp:17

b
#define b
Definition: dw_dslash4_core.h:83

comm_free
void comm_free(MsgHandle *mh)
Definition: comm_mpi.cpp:252

x
p x
Definition: CMakeCUDACompilerId.cpp1.ii:3011

quda::LatticeField::my_face_dim_dir_hd
void * my_face_dim_dir_hd[2][QUDA_MAX_DIM][2]
Definition: lattice_field.h:232

quda::LatticeField::ghost_pinned_buffer_h
static void * ghost_pinned_buffer_h[2]
Definition: lattice_field.h:181

quda::LatticeFieldParam::x
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:50

quda::LatticeField::destroyComms
void destroyComms()
Definition: lattice_field.cpp:260

quda::LatticeField::R
const int * R() const
Definition: lattice_field.h:452

QUDA_GHOST_EXCHANGE_EXTENDED
Definition: enum_quda.h:436

quda::LatticeField::mh_send_rdma_back
MsgHandle * mh_send_rdma_back[2][QUDA_MAX_DIM]
Definition: lattice_field.h:273

quda::LatticeField::mh_recv_p2p_fwd
static MsgHandle * mh_recv_p2p_fwd[2][QUDA_MAX_DIM]
Definition: lattice_field.h:282

quda::LatticeField::getIPCCopyEvent
const cudaEvent_t & getIPCCopyEvent(int dir, int dim) const
Definition: lattice_field.cpp:473

quda::LatticeField::createIPCComms
void createIPCComms()
Definition: lattice_field.cpp:286

comm_size
int comm_size(void)
Definition: comm_mpi.cpp:126

quda::cudaCloverField
Definition: clover_field.h:132

quda::LatticeField::ghost_field_reset
static bool ghost_field_reset
Definition: lattice_field.h:405

quda::LatticeField::bufferIndex
static int bufferIndex
Definition: lattice_field.h:400

quda::LatticeField::from_face_hd
void * from_face_hd[2]
Definition: lattice_field.h:240

comm_declare_send_relative
#define comm_declare_send_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:59

quda::LatticeField::checkField
void checkField(const LatticeField &a) const
Definition: lattice_field.cpp:493

comm_declare_receive_relative
#define comm_declare_receive_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:74

quda::LatticeField::mh_recv_back
MsgHandle * mh_recv_back[2][QUDA_MAX_DIM]
Definition: lattice_field.h:255

device_pinned_malloc
#define device_pinned_malloc(size)
Definition: malloc_quda.h:53

quda::LatticeField
Definition: lattice_field.h:122

quda::cpuGaugeField
Definition: gauge_field.h:464

QUDA_INVALID_FIELD_LOCATION
Definition: enum_quda.h:298

comm_start
void comm_start(MsgHandle *mh)
Definition: comm_mpi.cpp:260

quda::LatticeField::mh_recv_rdma_fwd
MsgHandle * mh_recv_rdma_fwd[2][QUDA_MAX_DIM]
Definition: lattice_field.h:264

fused_exterior_ndeg_tm_dslash_cuda_gen.i
int i
start here
Definition: fused_exterior_ndeg_tm_dslash_cuda_gen.py:816

quda::LatticeField::ghostExchange
QudaGhostExchange ghostExchange
Definition: lattice_field.h:155

quda::cudaGaugeField
Definition: gauge_field.h:298

quda::LatticeField::ghost_remote_send_buffer_d
static void * ghost_remote_send_buffer_d[2][QUDA_MAX_DIM][2]
Definition: lattice_field.h:191

quda::LatticeField::my_face_hd
void * my_face_hd[2]
Definition: lattice_field.h:226

quda::LatticeField::from_face_h
void * from_face_h[2]
Definition: lattice_field.h:238

quda::LatticeField::ghost_bytes
size_t ghost_bytes
Definition: lattice_field.h:206

quda::LatticeField::my_face_h
void * my_face_h[2]
Definition: lattice_field.h:224

quda::LatticeField::getIPCRemoteCopyEvent
const cudaEvent_t & getIPCRemoteCopyEvent(int dir, int dim) const
Definition: lattice_field.cpp:477

quda::LatticeField::vol_string
char vol_string[TuneKey::volume_n]
Definition: lattice_field.h:312

quda::LatticeField::write
virtual void write(char *filename)
Definition: lattice_field.cpp:542

quda::LatticeField::from_face_dim_dir_d
void * from_face_dim_dir_d[2][QUDA_MAX_DIM][2]
Definition: lattice_field.h:249

quda::LatticeField::ghost_face_bytes
size_t ghost_face_bytes[QUDA_MAX_DIM]
Definition: lattice_field.h:211

quda::LatticeField::nDim
int nDim
Definition: lattice_field.h:137

clover_field.h

quda::LatticeField::destroyIPCComms
static void destroyIPCComms()
Definition: lattice_field.cpp:429

quda::LatticeField::mh_send_fwd
MsgHandle * mh_send_fwd[2][QUDA_MAX_DIM]
Definition: lattice_field.h:258

quda::LatticeFieldParam::nDim
int nDim
Definition: lattice_field.h:47

quda::LatticeField::buffer_send_p2p_fwd
static int buffer_send_p2p_fwd[2][QUDA_MAX_DIM]
Definition: lattice_field.h:288

comm_peer2peer_enabled
bool comm_peer2peer_enabled(int dir, int dim)
Definition: comm_common.cpp:265

quda::LatticeField::Location
QudaFieldLocation Location() const
Definition: lattice_field.cpp:522

quda::reorder_location_
static QudaFieldLocation reorder_location_
Definition: lattice_field.cpp:583

quda::LatticeField::buffer_send_p2p_back
static int buffer_send_p2p_back[2][QUDA_MAX_DIM]
Definition: lattice_field.h:294

quda::LatticeField::ghostFaceBytes
static size_t ghostFaceBytes
Definition: lattice_field.h:196

quda::LatticeField::ghost_send_buffer_d
static void * ghost_send_buffer_d[2]
Definition: lattice_field.h:171

quda::LatticeField::surface
int surface[QUDA_MAX_DIM]
Definition: lattice_field.h:142

QUDA_DOUBLE_PRECISION
Definition: enum_quda.h:61

QudaFieldLocation
enum QudaFieldLocation_s QudaFieldLocation

quda::LatticeField::ghostOffset
int ghostOffset[QUDA_MAX_DIM][2]
Definition: lattice_field.h:216

cudaIpcEventHandle_t
cudaIpcEventHandle_t
Definition: CMakeCUDACompilerId.cpp1.ii:1701

QUDA_SINGLE_PRECISION
Definition: enum_quda.h:60

quda::LatticeField::from_face_dim_dir_h
void * from_face_dim_dir_h[2][QUDA_MAX_DIM][2]
Definition: lattice_field.h:243

quda::LatticeField::r
int r[QUDA_MAX_DIM]
Definition: lattice_field.h:146

quda::LatticeField::mh_send_p2p_fwd
static MsgHandle * mh_send_p2p_fwd[2][QUDA_MAX_DIM]
Definition: lattice_field.h:276

quda::LatticeField::ghost_recv_buffer_d
static void * ghost_recv_buffer_d[2]
Definition: lattice_field.h:176

cudaIpcMemHandle_t
cudaIpcMemHandle_t
Definition: CMakeCUDACompilerId.cpp1.ii:1709

quda::cpuCloverField
Definition: clover_field.h:208

quda::LatticeField::mh_recv_rdma_back
MsgHandle * mh_recv_rdma_back[2][QUDA_MAX_DIM]
Definition: lattice_field.h:267

quda::LatticeField::ipcCopyEvent
static cudaEvent_t ipcCopyEvent[2][2][QUDA_MAX_DIM]
Definition: lattice_field.h:300

quda::LatticeFieldParam::LatticeFieldParam
LatticeFieldParam()
Default constructor for LatticeFieldParam.
Definition: lattice_field.h:68

comm_gdr_enabled
bool comm_gdr_enabled()
Query if GPU Direct RDMA communication is enabled (global setting)
Definition: comm_common.cpp:611

quda::LatticeField::Nvec
int Nvec() const
Definition: lattice_field.cpp:546

quda::total_bytes
static long total_bytes[N_ALLOC_TYPE]
Definition: malloc.cpp:52

quda::LatticeField::surfaceCB
int surfaceCB[QUDA_MAX_DIM]
Definition: lattice_field.h:143

quda::LatticeField::ipcRemoteCopyEvent
static cudaEvent_t ipcRemoteCopyEvent[2][2][QUDA_MAX_DIM]
Definition: lattice_field.h:303

quda::GaugeField::Order
QudaGaugeFieldOrder Order() const
Definition: gauge_field.h:204

QUDA_MAX_DIM
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
Definition: quda_constants.h:17

checkCudaError
#define checkCudaError()
Definition: util_quda.h:129

quda::LatticeField::mh_recv_fwd
MsgHandle * mh_recv_fwd[2][QUDA_MAX_DIM]
Definition: lattice_field.h:252

mapped_malloc
#define mapped_malloc(size)
Definition: malloc_quda.h:56

comm_wait
void comm_wait(MsgHandle *mh)
Definition: comm_mpi.cpp:266

quda::LatticeField::mh_recv_p2p_back
static MsgHandle * mh_recv_p2p_back[2][QUDA_MAX_DIM]
Definition: lattice_field.h:285

device_pinned_free
#define device_pinned_free(ptr)
Definition: malloc_quda.h:58

quda::TuneKey::volume_n
static const int volume_n
Definition: tune_key.h:10

d
static __inline__ size_t size_t d
Definition: CMakeCUDACompilerId.cpp1.ii:3019

quda::LatticeField::my_face_dim_dir_h
void * my_face_dim_dir_h[2][QUDA_MAX_DIM][2]
Definition: lattice_field.h:229

quda::LatticeFieldParam::r
int r[QUDA_MAX_DIM]
Definition: lattice_field.h:63

quda::reorder_location_set
void reorder_location_set(QudaFieldLocation reorder_location_)
Set whether data is reorderd on the CPU or GPU. This can set at QUDA initialization using the environ...
Definition: lattice_field.cpp:586

QUDA_CPU_FIELD_LOCATION
Definition: enum_quda.h:296

quda::LatticeField::freeGhostBuffer
static void freeGhostBuffer(void)
Free statically allocated ghost buffers.
Definition: lattice_field.cpp:167

quda::LatticeField::precision
QudaPrecision precision
Definition: lattice_field.h:149

quda::LatticeField::LatticeField
LatticeField(const LatticeFieldParam &param)
Definition: lattice_field.cpp:53

quda::LatticeField::mh_send_back
MsgHandle * mh_send_back[2][QUDA_MAX_DIM]
Definition: lattice_field.h:261

a
#define a
Definition: dw_dslash4_core.h:82

initComms
void initComms(int argc, char **argv, const int *commDims)
Definition: test_util.cpp:72

quda::ColorSpinorField::FieldOrder
QudaFieldOrder FieldOrder() const
Definition: color_spinor_field.h:437

gauge_field.h

MsgHandle_s
Definition: comm_mpi.cpp:22

quda::cpuColorSpinorField
Definition: color_spinor_field.h:789

quda::LatticeField::~LatticeField
virtual ~LatticeField()
Definition: lattice_field.cpp:126

quda::LatticeField::initIPCComms
static bool initIPCComms
Definition: lattice_field.h:309

quda::LatticeField::X
const int * X() const
Definition: lattice_field.h:415

quda_internal.h

quda::GaugeField
Definition: gauge_field.h:123