quda-ref/v1.1.0/lattice__field_8cpp_source.html

 #include <typeinfo>

 #include <quda_internal.h>

 #include <lattice_field.h>

 #include <color_spinor_field.h>

 #include <gauge_field.h>

 #include <clover_field.h>


 #include <shmem_helper.cuh>


 namespace quda {


   bool LatticeField::initIPCComms = false;


   int LatticeField::buffer_send_p2p_fwd[2][QUDA_MAX_DIM] { };

   int LatticeField::buffer_recv_p2p_fwd[2][QUDA_MAX_DIM] { };

   int LatticeField::buffer_send_p2p_back[2][QUDA_MAX_DIM] { };

   int LatticeField::buffer_recv_p2p_back[2][QUDA_MAX_DIM] { };


   MsgHandle* LatticeField::mh_send_p2p_fwd[2][QUDA_MAX_DIM] { };

   MsgHandle* LatticeField::mh_send_p2p_back[2][QUDA_MAX_DIM] { };

   MsgHandle* LatticeField::mh_recv_p2p_fwd[2][QUDA_MAX_DIM] { };

   MsgHandle* LatticeField::mh_recv_p2p_back[2][QUDA_MAX_DIM] { };


   cudaEvent_t LatticeField::ipcCopyEvent[2][2][QUDA_MAX_DIM];

   cudaEvent_t LatticeField::ipcRemoteCopyEvent[2][2][QUDA_MAX_DIM];


   void *LatticeField::ghost_pinned_send_buffer_h[2] = {nullptr, nullptr};

   void *LatticeField::ghost_pinned_send_buffer_hd[2] = {nullptr, nullptr};


   void *LatticeField::ghost_pinned_recv_buffer_h[2] = {nullptr, nullptr};

   void *LatticeField::ghost_pinned_recv_buffer_hd[2] = {nullptr, nullptr};


   // gpu ghost receive buffer

   void *LatticeField::ghost_recv_buffer_d[2] = {nullptr, nullptr};


   // gpu ghost send buffer

   void *LatticeField::ghost_send_buffer_d[2] = {nullptr, nullptr};


   bool LatticeField::ghost_field_reset = false;


   void* LatticeField::ghost_remote_send_buffer_d[2][QUDA_MAX_DIM][2];


   bool LatticeField::initGhostFaceBuffer = false;


   size_t LatticeField::ghostFaceBytes = 0;


   int LatticeField::bufferIndex = 0;


   LatticeFieldParam::LatticeFieldParam(const LatticeField &field)

     : precision(field.Precision()), ghost_precision(field.Precision()),

       nDim(field.Ndim()), pad(field.Pad()),

       siteSubset(field.SiteSubset()), mem_type(field.MemType()),

       ghostExchange(field.GhostExchange()), scale(field.Scale())

   {

     for(int dir=0; dir<nDim; ++dir) {

       x[dir] = field.X()[dir];

       r[dir] = field.R()[dir];

     }

   }


   LatticeField::LatticeField(const LatticeFieldParam &param) :

     volume(1),

     localVolume(1),

     pad(param.pad),

     total_bytes(0),

     nDim(param.nDim),

     precision(param.Precision()),

     ghost_precision(param.GhostPrecision()),

     ghost_precision_reset(false),

     scale(param.scale),

     siteSubset(param.siteSubset),

     ghostExchange(param.ghostExchange),

     ghost_bytes(0),

     ghost_bytes_old(0),

     ghost_face_bytes {},

     ghost_face_bytes_aligned {},

     ghost_offset(),

     my_face_h {},

     my_face_hd {},

     my_face_d {},

     from_face_h {},

     from_face_hd {},

     from_face_d {},

     initComms(false),

     mem_type(param.mem_type),

     backup_h(nullptr),

     backup_norm_h(nullptr),

     backed_up(false)

   {

     precisionCheck();


     for (int dir = 0; dir < 2; dir++) { // XLC cannot do multi-dimensional array initialization

       for (int dim = 0; dim < QUDA_MAX_DIM; dim++) {


         for (int b = 0; b < 2; b++) {

           my_face_dim_dir_d[b][dim][dir] = nullptr;

           my_face_dim_dir_hd[b][dim][dir] = nullptr;

           my_face_dim_dir_h[b][dim][dir] = nullptr;


           from_face_dim_dir_d[b][dim][dir] = nullptr;

           from_face_dim_dir_hd[b][dim][dir] = nullptr;

           from_face_dim_dir_h[b][dim][dir] = nullptr;

         }


         mh_recv_fwd[dir][dim] = nullptr;

         mh_recv_back[dir][dim] = nullptr;

         mh_send_fwd[dir][dim] = nullptr;

         mh_send_back[dir][dim] = nullptr;


         mh_recv_rdma_fwd[dir][dim] = nullptr;

         mh_recv_rdma_back[dir][dim] = nullptr;

         mh_send_rdma_fwd[dir][dim] = nullptr;

         mh_send_rdma_back[dir][dim] = nullptr;

       }

     }


     for (int i=0; i<nDim; i++) {

       x[i] = param.x[i];

       r[i] = ghostExchange == QUDA_GHOST_EXCHANGE_EXTENDED ? param.r[i] : 0;

       volume *= param.x[i];

       localVolume *= (x[i] - 2 * r[i]);

       surface[i] = 1;

       for (int j=0; j<nDim; j++) {

         if (i==j) continue;

         surface[i] *= param.x[j];

       }

     }


     if (siteSubset == QUDA_INVALID_SITE_SUBSET) errorQuda("siteSubset is not set");

     volumeCB = (siteSubset == QUDA_FULL_SITE_SUBSET) ? volume / 2 : volume;

     localVolumeCB = (siteSubset == QUDA_FULL_SITE_SUBSET) ? localVolume / 2 : localVolume;

     stride = volumeCB + pad;


     // for parity fields the factor of half is present for all surfaces dimensions except x, so add it manually

     for (int i=0; i<nDim; i++)

       surfaceCB[i] = (siteSubset == QUDA_FULL_SITE_SUBSET || i==0) ? surface[i] / 2 : surface[i];


     // for 5-dimensional fields, we only communicate in the space-time dimensions

     nDimComms = nDim == 5 ? 4 : nDim;


     switch (precision) {

     case QUDA_DOUBLE_PRECISION:

     case QUDA_SINGLE_PRECISION:

     case QUDA_HALF_PRECISION:

     case QUDA_QUARTER_PRECISION:

       break;

     default:

       errorQuda("Unknown precision %d", precision);

     }


     setTuningString();

   }


   LatticeField::LatticeField(const LatticeField &field) :

     volume(1),

     localVolume(1),

     pad(field.pad),

     total_bytes(0),

     nDim(field.nDim),

     precision(field.precision),

     ghost_precision(field.ghost_precision),

     ghost_precision_reset(false),

     scale(field.scale),

     siteSubset(field.siteSubset),

     ghostExchange(field.ghostExchange),

     ghost_bytes(0),

     ghost_bytes_old(0),

     ghost_face_bytes {},

     ghost_face_bytes_aligned {},

     ghost_offset(),

     my_face_h {},

     my_face_hd {},

     my_face_d {},

     from_face_h {},

     from_face_hd {},

     from_face_d {},

     initComms(false),

     mem_type(field.mem_type),

     backup_h(nullptr),

     backup_norm_h(nullptr),

     backed_up(false)

   {

     precisionCheck();


     for (int dir = 0; dir < 2; dir++) { // XLC cannot do multi-dimensional array initialization

       for (int dim = 0; dim < QUDA_MAX_DIM; dim++) {

         mh_recv_fwd[dir][dim] = nullptr;

         mh_recv_back[dir][dim] = nullptr;

         mh_send_fwd[dir][dim] = nullptr;

         mh_send_back[dir][dim] = nullptr;


         mh_recv_rdma_fwd[dir][dim] = nullptr;

         mh_recv_rdma_back[dir][dim] = nullptr;

         mh_send_rdma_fwd[dir][dim] = nullptr;

         mh_send_rdma_back[dir][dim] = nullptr;

       }

     }


     for (int i=0; i<nDim; i++) {

       x[i] = field.x[i];

       r[i] = ghostExchange == QUDA_GHOST_EXCHANGE_EXTENDED ? field.r[i] : 0;

       volume *= field.x[i];

       localVolume *= (x[i] - 2 * r[i]);

       surface[i] = 1;

       for (int j=0; j<nDim; j++) {

         if (i==j) continue;

         surface[i] *= field.x[j];

       }

     }


     if (siteSubset == QUDA_INVALID_SITE_SUBSET) errorQuda("siteSubset is not set");

     volumeCB = (siteSubset == QUDA_FULL_SITE_SUBSET) ? volume / 2 : volume;

     localVolumeCB = (siteSubset == QUDA_FULL_SITE_SUBSET) ? localVolume / 2 : localVolume;

     stride = volumeCB + pad;


     // for parity fields the factor of half is present for all surfaces dimensions except x, so add it manually

     for (int i=0; i<nDim; i++)

       surfaceCB[i] = (siteSubset == QUDA_FULL_SITE_SUBSET || i==0) ? surface[i] / 2 : surface[i];


     // for 5-dimensional fields, we only communicate in the space-time dimensions

     nDimComms = nDim == 5 ? 4 : nDim;


     setTuningString();

   }


   LatticeField::~LatticeField() { }


   void LatticeField::allocateGhostBuffer(size_t ghost_bytes) const

   {

     // only allocate if not already allocated or buffer required is bigger than previously

     if ( !initGhostFaceBuffer || ghost_bytes > ghostFaceBytes) {


       if (initGhostFaceBuffer) {

         if (ghostFaceBytes) {

           // remove potential for inter-process race conditions

           // ensures that all outstanding communication is complete

           // before we free any comms buffers

           qudaDeviceSynchronize();

           comm_barrier();

           for (int b=0; b<2; b++) {

             device_comms_pinned_free(ghost_recv_buffer_d[b]);

             device_comms_pinned_free(ghost_send_buffer_d[b]);

             host_free(ghost_pinned_send_buffer_h[b]);

             host_free(ghost_pinned_recv_buffer_h[b]);

           }

         }

       }


       if (ghost_bytes > 0) {

         for (int b = 0; b < 2; ++b) {

           // gpu receive buffer (use pinned allocator to avoid this being redirected, e.g., by QDPJIT)

           ghost_recv_buffer_d[b] = device_comms_pinned_malloc(ghost_bytes);

           // silence any false cuda-memcheck initcheck errors

           qudaMemset(ghost_recv_buffer_d[b], 0, ghost_bytes);


           // gpu send buffer (use pinned allocator to avoid this being redirected, e.g., by QDPJIT)

           ghost_send_buffer_d[b] = device_comms_pinned_malloc(ghost_bytes);

           // silence any false cuda-memcheck initcheck errors

           qudaMemset(ghost_send_buffer_d[b], 0, ghost_bytes);


           // pinned buffer used for sending

           ghost_pinned_send_buffer_h[b] = mapped_malloc(ghost_bytes);


           // set the matching device-mapped pointer

           ghost_pinned_send_buffer_hd[b] = get_mapped_device_pointer(ghost_pinned_send_buffer_h[b]);


           // pinned buffer used for receiving

           ghost_pinned_recv_buffer_h[b] = mapped_malloc(ghost_bytes);


           // set the matching device-mapped pointer

           ghost_pinned_recv_buffer_hd[b] = get_mapped_device_pointer(ghost_pinned_recv_buffer_h[b]);

         }


         initGhostFaceBuffer = true;

         ghostFaceBytes = ghost_bytes;

       }


       LatticeField::ghost_field_reset = true; // this signals that we must reset the IPC comms

     }


   }


   void LatticeField::freeGhostBuffer(void)

   {

     destroyIPCComms();


     if (!initGhostFaceBuffer) return;


     for (int b=0; b<2; b++) {

       // free receive buffer

       if (ghost_recv_buffer_d[b]) device_comms_pinned_free(ghost_recv_buffer_d[b]);

       ghost_recv_buffer_d[b] = nullptr;


       // free send buffer

       if (ghost_send_buffer_d[b]) device_comms_pinned_free(ghost_send_buffer_d[b]);

       ghost_send_buffer_d[b] = nullptr;


       // free pinned send memory buffer

       if (ghost_pinned_recv_buffer_h[b]) host_free(ghost_pinned_recv_buffer_h[b]);


       // free pinned send memory buffer

       if (ghost_pinned_send_buffer_h[b]) host_free(ghost_pinned_send_buffer_h[b]);


       ghost_pinned_recv_buffer_h[b] = nullptr;

       ghost_pinned_recv_buffer_hd[b] = nullptr;

       ghost_pinned_send_buffer_h[b] = nullptr;

       ghost_pinned_send_buffer_hd[b] = nullptr;

     }

     initGhostFaceBuffer = false;

   }


   void LatticeField::createComms(bool no_comms_fill, bool bidir)

   {

     destroyComms(); // if we are requesting a new number of faces destroy and start over


     // before allocating local comm handles, synchronize since the

     // comms buffers are static so remove potential for interferring

     // with any outstanding exchanges to the same buffers

     qudaDeviceSynchronize();

     comm_barrier();


     // initialize the ghost pinned buffers

     for (int b=0; b<2; b++) {

       my_face_h[b] = ghost_pinned_send_buffer_h[b];

       my_face_hd[b] = ghost_pinned_send_buffer_hd[b];

       my_face_d[b] = ghost_send_buffer_d[b];

       from_face_h[b] = ghost_pinned_recv_buffer_h[b];

       from_face_hd[b] = ghost_pinned_recv_buffer_hd[b];

       from_face_d[b] = ghost_recv_buffer_d[b];

     }


     // initialize ghost send pointers

     for (int i=0; i<nDimComms; i++) {

       if (!commDimPartitioned(i) && no_comms_fill==false) continue;


       for (int b=0; b<2; ++b) {

         my_face_dim_dir_h[b][i][0] = static_cast<char *>(my_face_h[b]) + ghost_offset[i][0];

         from_face_dim_dir_h[b][i][0] = static_cast<char *>(from_face_h[b]) + ghost_offset[i][0];


         my_face_dim_dir_hd[b][i][0] = static_cast<char *>(my_face_hd[b]) + ghost_offset[i][0];

         from_face_dim_dir_hd[b][i][0] = static_cast<char *>(from_face_hd[b]) + ghost_offset[i][0];


         my_face_dim_dir_d[b][i][0] = static_cast<char *>(my_face_d[b]) + ghost_offset[i][0];

         from_face_dim_dir_d[b][i][0] = static_cast<char *>(from_face_d[b]) + ghost_offset[i][0];

       } // loop over b


       for (int b=0; b<2; ++b) {

         my_face_dim_dir_h[b][i][1] = static_cast<char *>(my_face_h[b]) + ghost_offset[i][1];

         from_face_dim_dir_h[b][i][1] = static_cast<char *>(from_face_h[b]) + ghost_offset[i][1];


         my_face_dim_dir_hd[b][i][1] = static_cast<char *>(my_face_hd[b]) + ghost_offset[i][1];

         from_face_dim_dir_hd[b][i][1] = static_cast<char *>(from_face_hd[b]) + ghost_offset[i][1];


         my_face_dim_dir_d[b][i][1] = static_cast<char *>(my_face_d[b]) + ghost_offset[i][1];

         from_face_dim_dir_d[b][i][1] = static_cast<char *>(from_face_d[b]) + ghost_offset[i][1];

       } // loop over b


     } // loop over dimension


     bool gdr = comm_gdr_enabled(); // only allocate rdma buffers if GDR enabled


     // initialize the message handlers

     for (int i=0; i<nDimComms; i++) {

       if (!commDimPartitioned(i)) continue;


       for (int b=0; b<2; ++b) {

         mh_send_fwd[b][i] = comm_declare_send_relative(my_face_dim_dir_h[b][i][1], i, +1, ghost_face_bytes[i]);

         mh_send_back[b][i] = comm_declare_send_relative(my_face_dim_dir_h[b][i][0], i, -1, ghost_face_bytes[i]);


         mh_recv_fwd[b][i] = comm_declare_receive_relative(from_face_dim_dir_h[b][i][1], i, +1, ghost_face_bytes[i]);

         mh_recv_back[b][i] = comm_declare_receive_relative(from_face_dim_dir_h[b][i][0], i, -1, ghost_face_bytes[i]);


         mh_send_rdma_fwd[b][i] = gdr ? comm_declare_send_relative(my_face_dim_dir_d[b][i][1], i, +1, ghost_face_bytes[i]) : nullptr;

         mh_send_rdma_back[b][i] = gdr ? comm_declare_send_relative(my_face_dim_dir_d[b][i][0], i, -1, ghost_face_bytes[i]) : nullptr;


         mh_recv_rdma_fwd[b][i] = gdr ? comm_declare_receive_relative(from_face_dim_dir_d[b][i][1], i, +1, ghost_face_bytes[i]) : nullptr;

         mh_recv_rdma_back[b][i] = gdr ? comm_declare_receive_relative(from_face_dim_dir_d[b][i][0], i, -1, ghost_face_bytes[i]) : nullptr;

       } // loop over b


     } // loop over dimension


     initComms = true;

     checkCudaError();

   }


   void LatticeField::destroyComms()

   {

     if (initComms) {


       // ensure that all processes bring down their communicators

       // synchronously so that we don't end up in an undefined state

       qudaDeviceSynchronize();

       comm_barrier();


       for (int b=0; b<2; ++b) {

         for (int i=0; i<nDimComms; i++) {

           if (mh_recv_fwd[b][i]) comm_free(mh_recv_fwd[b][i]);

           if (mh_recv_back[b][i]) comm_free(mh_recv_back[b][i]);

           if (mh_send_fwd[b][i]) comm_free(mh_send_fwd[b][i]);

           if (mh_send_back[b][i]) comm_free(mh_send_back[b][i]);


           if (mh_recv_rdma_fwd[b][i]) comm_free(mh_recv_rdma_fwd[b][i]);

           if (mh_recv_rdma_back[b][i]) comm_free(mh_recv_rdma_back[b][i]);

           if (mh_send_rdma_fwd[b][i]) comm_free(mh_send_rdma_fwd[b][i]);

           if (mh_send_rdma_back[b][i]) comm_free(mh_send_rdma_back[b][i]);

         }

       } // loop over b


       // local take down complete - now synchronize to ensure globally complete

       qudaDeviceSynchronize();

       comm_barrier();


       initComms = false;

     }


   }


   void LatticeField::createIPCComms() {

     if ( initIPCComms && !ghost_field_reset ) return;


     if (!initComms) errorQuda("Can only be called after create comms");

     if ((!ghost_recv_buffer_d[0] || !ghost_recv_buffer_d[1]) && comm_size() > 1)

       errorQuda("ghost_field appears not to be allocated");

 #ifndef NVSHMEM_COMMS

     // handles for obtained ghost pointers

     cudaIpcMemHandle_t ipcRemoteGhostDestHandle[2][2][QUDA_MAX_DIM];

 #endif


     for (int b=0; b<2; b++) {

 #ifndef NVSHMEM_COMMS

       for (int dim=0; dim<4; ++dim) {

         if (comm_dim(dim)==1) continue;

         for (int dir=0; dir<2; ++dir) {

           MsgHandle* sendHandle = nullptr;

           MsgHandle* receiveHandle = nullptr;

           int disp = (dir == 1) ? +1 : -1;


           // first set up receive

           if (comm_peer2peer_enabled(1-dir,dim)) {

             receiveHandle = comm_declare_receive_relative(&ipcRemoteGhostDestHandle[b][1-dir][dim],

                                                           dim, -disp,

                                                           sizeof(ipcRemoteGhostDestHandle[b][1-dir][dim]));

           }

           // now send

           cudaIpcMemHandle_t ipcLocalGhostDestHandle;

           if (comm_peer2peer_enabled(dir,dim)) {

             cudaIpcGetMemHandle(&ipcLocalGhostDestHandle, ghost_recv_buffer_d[b]);

             sendHandle = comm_declare_send_relative(&ipcLocalGhostDestHandle,

                                                     dim, disp,

                                                     sizeof(ipcLocalGhostDestHandle));

           }

           if (receiveHandle) comm_start(receiveHandle);

           if (sendHandle) comm_start(sendHandle);


           if (receiveHandle) comm_wait(receiveHandle);

           if (sendHandle) comm_wait(sendHandle);


           if (sendHandle) comm_free(sendHandle);

           if (receiveHandle) comm_free(receiveHandle);

         }

       }


       checkCudaError();

 #endif

       // open the remote memory handles and set the send ghost pointers

       for (int dim = 0; dim < 4; ++dim) {

 #ifndef NVSHMEM_COMMS

         // TODO: We maybe can force loopback comms to use the IB path here

         if (comm_dim(dim) == 1) continue;

 #endif

         // even if comm_dim(2) == 2, we might not have p2p enabled in both directions, so check this

         const int num_dir

           = (comm_dim(dim) == 2 && comm_peer2peer_enabled(0, dim) && comm_peer2peer_enabled(1, dim)) ? 1 : 2;

         for (int dir = 0; dir < num_dir; ++dir) {

 #ifndef NVSHMEM_COMMS

           if (!comm_peer2peer_enabled(dir, dim)) continue;

           void **ghostDest = &(ghost_remote_send_buffer_d[b][dim][dir]);

           cudaIpcOpenMemHandle(ghostDest, ipcRemoteGhostDestHandle[b][dir][dim], cudaIpcMemLazyEnablePeerAccess);

 #else

           ghost_remote_send_buffer_d[b][dim][dir]

             = nvshmem_ptr(static_cast<char *>(ghost_recv_buffer_d[b]), comm_neighbor_rank(dir, dim));

 #endif

         }

         if (num_dir == 1) ghost_remote_send_buffer_d[b][dim][1] = ghost_remote_send_buffer_d[b][dim][0];

       }

     } // buffer index


     checkCudaError();


     // handles for obtained events

     cudaIpcEventHandle_t ipcRemoteEventHandle[2][2][QUDA_MAX_DIM];


     // Note that no b index is necessary here

     // Now communicate the event handles

     for (int dim=0; dim<4; ++dim) {

       if (comm_dim(dim)==1) continue;

       for (int dir=0; dir<2; ++dir) {

         for (int b=0; b<2; b++) {


           MsgHandle* sendHandle = NULL;

           MsgHandle* receiveHandle = NULL;

           int disp = (dir == 1) ? +1 : -1;


           // first set up receive

           if (comm_peer2peer_enabled(1-dir,dim)) {

             receiveHandle = comm_declare_receive_relative(&ipcRemoteEventHandle[b][1-dir][dim], dim, -disp,

                                                           sizeof(ipcRemoteEventHandle[b][1-dir][dim]));

           }


           // now send

           cudaIpcEventHandle_t ipcLocalEventHandle;

           if (comm_peer2peer_enabled(dir,dim)) {

             cudaEventCreate(&ipcCopyEvent[b][dir][dim], cudaEventDisableTiming | cudaEventInterprocess);

             cudaIpcGetEventHandle(&ipcLocalEventHandle, ipcCopyEvent[b][dir][dim]);


             sendHandle = comm_declare_send_relative(&ipcLocalEventHandle, dim, disp,

                                                     sizeof(ipcLocalEventHandle));

           }


           if (receiveHandle) comm_start(receiveHandle);

           if (sendHandle) comm_start(sendHandle);


           if (receiveHandle) comm_wait(receiveHandle);

           if (sendHandle) comm_wait(sendHandle);


           if (sendHandle) comm_free(sendHandle);

           if (receiveHandle) comm_free(receiveHandle);


         } // buffer index

       }

     }


     checkCudaError();


     for (int dim=0; dim<4; ++dim) {

       if (comm_dim(dim)==1) continue;

       for (int dir=0; dir<2; ++dir) {

         if (!comm_peer2peer_enabled(dir,dim)) continue;

         for (int b=0; b<2; b++) {

           cudaIpcOpenEventHandle(&(ipcRemoteCopyEvent[b][dir][dim]), ipcRemoteEventHandle[b][dir][dim]);

         }

       }

     }


     // Create message handles for IPC synchronization

     for (int dim=0; dim<4; ++dim) {

       if (comm_dim(dim)==1) continue;

       if (comm_peer2peer_enabled(1,dim)) {

         for (int b=0; b<2; b++) {

           // send to processor in forward direction

           mh_send_p2p_fwd[b][dim] = comm_declare_send_relative(&buffer_send_p2p_fwd[b][dim], dim, +1, sizeof(int));

           // receive from processor in forward direction

           mh_recv_p2p_fwd[b][dim] = comm_declare_receive_relative(&buffer_recv_p2p_fwd[b][dim], dim, +1, sizeof(int));

         }

       }


       if (comm_peer2peer_enabled(0,dim)) {

         for (int b=0; b<2; b++) {

           // send to processor in backward direction

           mh_send_p2p_back[b][dim] = comm_declare_send_relative(&buffer_send_p2p_back[b][dim], dim, -1, sizeof(int));

           // receive from processor in backward direction

           mh_recv_p2p_back[b][dim] = comm_declare_receive_relative(&buffer_recv_p2p_back[b][dim], dim, -1, sizeof(int));

         }

       }

     }

     checkCudaError();


     initIPCComms = true;

     ghost_field_reset = false;

   }


   void LatticeField::destroyIPCComms() {


     if (!initIPCComms) return;


     // ensure that all processes bring down their communicators

     // synchronously so that we don't end up in an undefined state

     qudaDeviceSynchronize();

     comm_barrier();


     for (int dim=0; dim<4; ++dim) {


       if (comm_dim(dim)==1) continue;

 #ifndef NVSHMEM_COMMS

       const int num_dir = (comm_dim(dim) == 2 && comm_peer2peer_enabled(0,dim) && comm_peer2peer_enabled(1,dim)) ? 1 : 2;

 #endif

       for (int b=0; b<2; b++) {

         if (comm_peer2peer_enabled(1,dim)) {

           if (mh_send_p2p_fwd[b][dim] || mh_recv_p2p_fwd[b][dim]) {

             cudaEventDestroy(ipcCopyEvent[b][1][dim]);

             // only close this handle if it doesn't alias the back ghost


 #ifndef NVSHMEM_COMMS

             if (num_dir == 2) cudaIpcCloseMemHandle(ghost_remote_send_buffer_d[b][dim][1]);

 #endif

           }

           if (mh_send_p2p_fwd[b][dim]) comm_free(mh_send_p2p_fwd[b][dim]);

           if (mh_recv_p2p_fwd[b][dim]) comm_free(mh_recv_p2p_fwd[b][dim]);

         }


         if (comm_peer2peer_enabled(0,dim)) {

           if (mh_send_p2p_back[b][dim] || mh_recv_p2p_back[b][dim]) {

             cudaEventDestroy(ipcCopyEvent[b][0][dim]);


 #ifndef NVSHMEM_COMMS

             cudaIpcCloseMemHandle(ghost_remote_send_buffer_d[b][dim][0]);

 #endif

           }

           if (mh_send_p2p_back[b][dim]) comm_free(mh_send_p2p_back[b][dim]);

           if (mh_recv_p2p_back[b][dim]) comm_free(mh_recv_p2p_back[b][dim]);

         }

       } // buffer

     } // iterate over dim


     checkCudaError();


     // local take down complete - now synchronize to ensure globally complete

     qudaDeviceSynchronize();

     comm_barrier();


     initIPCComms = false;

   }


   bool LatticeField::ipcCopyComplete(int dir, int dim)

   {

     return (cudaSuccess == cudaEventQuery(ipcCopyEvent[bufferIndex][dir][dim]) ? true : false);

   }


   bool LatticeField::ipcRemoteCopyComplete(int dir, int dim)

   {

     return (cudaSuccess == cudaEventQuery(ipcRemoteCopyEvent[bufferIndex][dir][dim]) ? true : false);

   }


   const cudaEvent_t& LatticeField::getIPCCopyEvent(int dir, int dim) const {

     return ipcCopyEvent[bufferIndex][dir][dim];

   }


   const cudaEvent_t& LatticeField::getIPCRemoteCopyEvent(int dir, int dim) const {

     return ipcRemoteCopyEvent[bufferIndex][dir][dim];

   }


   void LatticeField::setTuningString() {

     char vol_tmp[TuneKey::volume_n];

     int check  = snprintf(vol_string, TuneKey::volume_n, "%d", x[0]);

     if (check < 0 || check >= TuneKey::volume_n) errorQuda("Error writing volume string");

     for (int d=1; d<nDim; d++) {

       strcpy(vol_tmp, vol_string);

       check = snprintf(vol_string, TuneKey::volume_n, "%sx%d", vol_tmp, x[d]);

       if (check < 0 || check >= TuneKey::volume_n) errorQuda("Error writing volume string");

     }

   }


   void LatticeField::checkField(const LatticeField &a) const {

     if (a.nDim != nDim) errorQuda("nDim does not match %d %d", nDim, a.nDim);

     if (ghostExchange != QUDA_GHOST_EXCHANGE_EXTENDED && a.ghostExchange == QUDA_GHOST_EXCHANGE_EXTENDED) {

       // if source is extended by I am not then we need to compare their interior volume to my volume

       size_t a_volume_interior = 1;

       for (int i=0; i<nDim; i++) {

         if (a.x[i]-2*a.r[i] != x[i]) errorQuda("x[%d] does not match %d %d", i, x[i], a.x[i]-2*a.r[i]);

         a_volume_interior *= a.x[i] - 2*a.r[i];

       }

       if (a_volume_interior != volume) errorQuda("Interior volume does not match %lu %lu", volume, a_volume_interior);

     } else if (a.ghostExchange != QUDA_GHOST_EXCHANGE_EXTENDED && ghostExchange == QUDA_GHOST_EXCHANGE_EXTENDED) {

       // if source is extended by I am not then we need to compare their interior volume to my volume

       size_t this_volume_interior = 1;

       for (int i=0; i<nDim; i++) {

         if (x[i]-2*r[i] != a.x[i]) errorQuda("x[%d] does not match %d %d", i, x[i]-2*r[i], a.x[i]);

         this_volume_interior *= x[i] - 2*r[i];

       }

       if (this_volume_interior != a.volume)

         errorQuda("Interior volume does not match %lu %lu", this_volume_interior, a.volume);

     } else {

       if (a.volume != volume) errorQuda("Volume does not match %lu %lu", volume, a.volume);

       if (a.volumeCB != volumeCB) errorQuda("VolumeCB does not match %lu %lu", volumeCB, a.volumeCB);

       for (int i=0; i<nDim; i++) {

         if (a.x[i] != x[i]) errorQuda("x[%d] does not match %d %d", i, x[i], a.x[i]);

         if (a.surface[i] != surface[i]) errorQuda("surface[%d] does not match %d %d", i, surface[i], a.surface[i]);

         if (a.surfaceCB[i] != surfaceCB[i]) errorQuda("surfaceCB[%d] does not match %d %d", i, surfaceCB[i], a.surfaceCB[i]);

       }

     }

   }


   QudaFieldLocation LatticeField::Location() const {

     QudaFieldLocation location = QUDA_INVALID_FIELD_LOCATION;

     if (typeid(*this)==typeid(cudaCloverField) ||

         typeid(*this)==typeid(cudaColorSpinorField) ||

         typeid(*this)==typeid(cudaGaugeField)) {

       location = QUDA_CUDA_FIELD_LOCATION;

     } else if (typeid(*this)==typeid(cpuCloverField) ||

                typeid(*this)==typeid(cpuColorSpinorField) ||

                typeid(*this)==typeid(cpuGaugeField)) {

       location = QUDA_CPU_FIELD_LOCATION;

     } else {

       errorQuda("Unknown field %s, so cannot determine location", typeid(*this).name());

     }

     return location;

   }


   void LatticeField::read(char *filename) {

     errorQuda("Not implemented");

   }


   void LatticeField::write(char *filename) {

     errorQuda("Not implemented");

   }


   int LatticeField::Nvec() const {

     if (typeid(*this) == typeid(const cudaColorSpinorField)) {

       const ColorSpinorField &csField = static_cast<const ColorSpinorField&>(*this);

       if (csField.FieldOrder() == 2 || csField.FieldOrder() == 4)

         return static_cast<int>(csField.FieldOrder());

     } else if (typeid(*this) == typeid(const cudaGaugeField)) {

       const GaugeField &gField = static_cast<const GaugeField&>(*this);

       if (gField.Order() == 2 || gField.Order() == 4)

         return static_cast<int>(gField.Order());

     } else if (typeid(*this) == typeid(const cudaCloverField)) {

       const CloverField &cField = static_cast<const CloverField&>(*this);

       if (cField.Order() == 2 || cField.Order() == 4)

         return static_cast<int>(cField.Order());

     }


     errorQuda("Unsupported field type");

     return -1;

   }


   // This doesn't really live here, but is fine for the moment

   std::ostream& operator<<(std::ostream& output, const LatticeFieldParam& param)

   {

     output << "nDim = " << param.nDim << std::endl;

     for (int i=0; i<param.nDim; i++) {

       output << "x[" << i << "] = " << param.x[i] << std::endl;

     }

     output << "pad = " << param.pad << std::endl;

     output << "precision = " << param.Precision() << std::endl;

     output << "ghost_precision = " << param.GhostPrecision() << std::endl;

     output << "scale = " << param.scale << std::endl;


     output << "ghostExchange = " << param.ghostExchange << std::endl;

     for (int i=0; i<param.nDim; i++) {

       output << "r[" << i << "] = " << param.r[i] << std::endl;

     }


     return output;  // for multiple << operators.

   }


   static QudaFieldLocation reorder_location_ = QUDA_CUDA_FIELD_LOCATION;


   QudaFieldLocation reorder_location() { return reorder_location_; }

   void reorder_location_set(QudaFieldLocation _reorder_location) { reorder_location_ = _reorder_location; }


 } // namespace quda

quda::CloverField
Definition: clover_field.h:106

quda::CloverField::Order
QudaCloverFieldOrder Order() const
Definition: clover_field.h:162

quda::ColorSpinorField
Definition: color_spinor_field.h:379

quda::ColorSpinorField::FieldOrder
QudaFieldOrder FieldOrder() const
Definition: color_spinor_field.h:568

quda::GaugeField
Definition: gauge_field.h:200

quda::GaugeField::Order
QudaGaugeFieldOrder Order() const
Definition: gauge_field.h:287

quda::LatticeField
Definition: lattice_field.h:145

quda::LatticeField::my_face_d
void * my_face_d[2]
Definition: lattice_field.h:289

quda::LatticeField::initComms
bool initComms
Definition: lattice_field.h:379

quda::LatticeField::ghostExchange
QudaGhostExchange ghostExchange
Definition: lattice_field.h:193

quda::LatticeField::stride
size_t stride
Definition: lattice_field.h:160

quda::LatticeField::createIPCComms
void createIPCComms()
Definition: lattice_field.cpp:418

quda::LatticeField::initGhostFaceBuffer
static bool initGhostFaceBuffer
Definition: lattice_field.h:249

quda::LatticeField::mh_send_fwd
MsgHandle * mh_send_fwd[2][QUDA_MAX_DIM]
Definition: lattice_field.h:331

quda::LatticeField::siteSubset
QudaSiteSubset siteSubset
Definition: lattice_field.h:190

quda::LatticeField::buffer_recv_p2p_fwd
static int buffer_recv_p2p_fwd[2][QUDA_MAX_DIM]
Definition: lattice_field.h:364

quda::LatticeField::bufferIndex
static int bufferIndex
Definition: lattice_field.h:490

quda::LatticeField::mh_recv_rdma_back
MsgHandle * mh_recv_rdma_back[2][QUDA_MAX_DIM]
Definition: lattice_field.h:340

quda::LatticeField::mh_send_rdma_fwd
MsgHandle * mh_send_rdma_fwd[2][QUDA_MAX_DIM]
Definition: lattice_field.h:343

quda::LatticeField::from_face_dim_dir_d
void * from_face_dim_dir_d[2][QUDA_MAX_DIM][2]
Definition: lattice_field.h:322

quda::LatticeField::mh_recv_p2p_back
static MsgHandle * mh_recv_p2p_back[2][QUDA_MAX_DIM]
Definition: lattice_field.h:358

quda::LatticeField::ipcCopyComplete
bool ipcCopyComplete(int dir, int dim)
Definition: lattice_field.cpp:624

quda::LatticeField::localVolumeCB
size_t localVolumeCB
Definition: lattice_field.h:158

quda::LatticeField::volume
size_t volume
Definition: lattice_field.h:149

quda::LatticeField::mh_send_rdma_back
MsgHandle * mh_send_rdma_back[2][QUDA_MAX_DIM]
Definition: lattice_field.h:346

quda::LatticeField::nDimComms
int nDimComms
Definition: lattice_field.h:198

quda::LatticeField::ghost_pinned_send_buffer_hd
static void * ghost_pinned_send_buffer_hd[2]
Definition: lattice_field.h:229

quda::LatticeField::x
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:169

quda::LatticeField::my_face_dim_dir_h
void * my_face_dim_dir_h[2][QUDA_MAX_DIM][2]
Definition: lattice_field.h:292

quda::LatticeField::from_face_dim_dir_h
void * from_face_dim_dir_h[2][QUDA_MAX_DIM][2]
Definition: lattice_field.h:316

quda::LatticeField::pad
int pad
Definition: lattice_field.h:161

quda::LatticeField::mh_send_p2p_fwd
static MsgHandle * mh_send_p2p_fwd[2][QUDA_MAX_DIM]
Definition: lattice_field.h:349

quda::LatticeField::nDim
int nDim
Definition: lattice_field.h:166

quda::LatticeField::mh_recv_p2p_fwd
static MsgHandle * mh_recv_p2p_fwd[2][QUDA_MAX_DIM]
Definition: lattice_field.h:355

quda::LatticeField::ghost_pinned_recv_buffer_h
static void * ghost_pinned_recv_buffer_h[2]
Definition: lattice_field.h:224

quda::LatticeField::from_face_d
void * from_face_d[2]
Definition: lattice_field.h:313

quda::LatticeField::ghost_offset
size_t ghost_offset[QUDA_MAX_DIM][2]
Definition: lattice_field.h:274

quda::LatticeField::Location
QudaFieldLocation Location() const
Definition: lattice_field.cpp:683

quda::LatticeField::precision
QudaPrecision precision
Definition: lattice_field.h:178

quda::LatticeField::my_face_dim_dir_d
void * my_face_dim_dir_d[2][QUDA_MAX_DIM][2]
Definition: lattice_field.h:298

quda::LatticeField::destroyIPCComms
static void destroyIPCComms()
Definition: lattice_field.cpp:572

quda::LatticeField::ghost_pinned_recv_buffer_hd
static void * ghost_pinned_recv_buffer_hd[2]
Definition: lattice_field.h:234

quda::LatticeField::Nvec
int Nvec() const
Definition: lattice_field.cpp:707

quda::LatticeField::ghost_face_bytes
size_t ghost_face_bytes[QUDA_MAX_DIM]
Definition: lattice_field.h:264

quda::LatticeField::ghost_bytes
size_t ghost_bytes
Definition: lattice_field.h:254

quda::LatticeField::write
virtual void write(char *filename)
Definition: lattice_field.cpp:703

quda::LatticeField::~LatticeField
virtual ~LatticeField()
Definition: lattice_field.cpp:226

quda::LatticeField::ghostFaceBytes
static size_t ghostFaceBytes
Definition: lattice_field.h:244

quda::LatticeField::vol_string
char vol_string[TuneKey::volume_n]
Definition: lattice_field.h:385

quda::LatticeField::my_face_h
void * my_face_h[2]
Definition: lattice_field.h:279

quda::LatticeField::volumeCB
size_t volumeCB
Definition: lattice_field.h:152

quda::LatticeField::ghost_pinned_send_buffer_h
static void * ghost_pinned_send_buffer_h[2]
Definition: lattice_field.h:219

quda::LatticeField::ghost_remote_send_buffer_d
static void * ghost_remote_send_buffer_d[2][QUDA_MAX_DIM][2]
Definition: lattice_field.h:239

quda::LatticeField::my_face_hd
void * my_face_hd[2]
Definition: lattice_field.h:284

quda::LatticeField::from_face_dim_dir_hd
void * from_face_dim_dir_hd[2][QUDA_MAX_DIM][2]
Definition: lattice_field.h:319

quda::LatticeField::mh_send_p2p_back
static MsgHandle * mh_send_p2p_back[2][QUDA_MAX_DIM]
Definition: lattice_field.h:352

quda::LatticeField::buffer_send_p2p_fwd
static int buffer_send_p2p_fwd[2][QUDA_MAX_DIM]
Definition: lattice_field.h:361

quda::LatticeField::setTuningString
virtual void setTuningString()
Definition: lattice_field.cpp:642

quda::LatticeField::ghost_field_reset
static bool ghost_field_reset
Definition: lattice_field.h:495

quda::LatticeField::R
const int * R() const
Definition: lattice_field.h:557

quda::LatticeField::X
const int * X() const
Definition: lattice_field.h:505

quda::LatticeField::surfaceCB
int surfaceCB[QUDA_MAX_DIM]
Definition: lattice_field.h:172

quda::LatticeField::destroyComms
void destroyComms()
Definition: lattice_field.cpp:386

quda::LatticeField::surface
int surface[QUDA_MAX_DIM]
Definition: lattice_field.h:171

quda::LatticeField::buffer_recv_p2p_back
static int buffer_recv_p2p_back[2][QUDA_MAX_DIM]
Definition: lattice_field.h:370

quda::LatticeField::ipcRemoteCopyComplete
bool ipcRemoteCopyComplete(int dir, int dim)
Definition: lattice_field.cpp:629

quda::LatticeField::checkField
void checkField(const LatticeField &a) const
Definition: lattice_field.cpp:653

quda::LatticeField::getIPCCopyEvent
const cudaEvent_t & getIPCCopyEvent(int dir, int dim) const
Definition: lattice_field.cpp:634

quda::LatticeField::initIPCComms
static bool initIPCComms
Definition: lattice_field.h:382

quda::LatticeField::r
int r[QUDA_MAX_DIM]
Definition: lattice_field.h:175

quda::LatticeField::mh_send_back
MsgHandle * mh_send_back[2][QUDA_MAX_DIM]
Definition: lattice_field.h:334

quda::LatticeField::my_face_dim_dir_hd
void * my_face_dim_dir_hd[2][QUDA_MAX_DIM][2]
Definition: lattice_field.h:295

quda::LatticeField::ipcCopyEvent
static cudaEvent_t ipcCopyEvent[2][2][QUDA_MAX_DIM]
Definition: lattice_field.h:373

quda::LatticeField::ipcRemoteCopyEvent
static cudaEvent_t ipcRemoteCopyEvent[2][2][QUDA_MAX_DIM]
Definition: lattice_field.h:376

quda::LatticeField::getIPCRemoteCopyEvent
const cudaEvent_t & getIPCRemoteCopyEvent(int dir, int dim) const
Definition: lattice_field.cpp:638

quda::LatticeField::mh_recv_fwd
MsgHandle * mh_recv_fwd[2][QUDA_MAX_DIM]
Definition: lattice_field.h:325

quda::LatticeField::buffer_send_p2p_back
static int buffer_send_p2p_back[2][QUDA_MAX_DIM]
Definition: lattice_field.h:367

quda::LatticeField::allocateGhostBuffer
void allocateGhostBuffer(size_t ghost_bytes) const
Allocate the static ghost buffers.
Definition: lattice_field.cpp:228

quda::LatticeField::read
virtual void read(char *filename)
Definition: lattice_field.cpp:699

quda::LatticeField::localVolume
size_t localVolume
Definition: lattice_field.h:155

quda::LatticeField::mh_recv_rdma_fwd
MsgHandle * mh_recv_rdma_fwd[2][QUDA_MAX_DIM]
Definition: lattice_field.h:337

quda::LatticeField::precisionCheck
void precisionCheck()
Definition: lattice_field.h:396

quda::LatticeField::ghost_recv_buffer_d
static void * ghost_recv_buffer_d[2]
Definition: lattice_field.h:214

quda::LatticeField::mh_recv_back
MsgHandle * mh_recv_back[2][QUDA_MAX_DIM]
Definition: lattice_field.h:328

quda::LatticeField::createComms
void createComms(bool no_comms_fill=false, bool bidir=true)
Definition: lattice_field.cpp:312

quda::LatticeField::from_face_hd
void * from_face_hd[2]
Definition: lattice_field.h:308

quda::LatticeField::LatticeField
LatticeField(const LatticeFieldParam &param)
Definition: lattice_field.cpp:61

quda::LatticeField::freeGhostBuffer
static void freeGhostBuffer(void)
Free statically allocated ghost buffers.
Definition: lattice_field.cpp:283

quda::LatticeField::from_face_h
void * from_face_h[2]
Definition: lattice_field.h:303

quda::LatticeField::ghost_send_buffer_d
static void * ghost_send_buffer_d[2]
Definition: lattice_field.h:209

quda::cpuCloverField
Definition: clover_field.h:332

quda::cpuColorSpinorField
Definition: color_spinor_field.h:976

quda::cpuGaugeField
Definition: gauge_field.h:626

quda::cudaCloverField
Definition: clover_field.h:253

quda::cudaColorSpinorField
Definition: color_spinor_field.h:682

quda::cudaGaugeField
Definition: gauge_field.h:449

clover_field.h

color_spinor_field.h

comm_start
void comm_start(MsgHandle *mh)
Definition: communicator_stack.cpp:165

comm_barrier
void comm_barrier(void)
Definition: communicator_stack.cpp:192

comm_neighbor_rank
int comm_neighbor_rank(int dir, int dim)
Definition: communicator_stack.cpp:54

comm_size
int comm_size(void)
Definition: communicator_stack.cpp:91

comm_gdr_enabled
bool comm_gdr_enabled()
Query if GPU Direct RDMA communication is enabled (global setting)
Definition: communicator_stack.cpp:124

comm_peer2peer_enabled
bool comm_peer2peer_enabled(int dir, int dim)
Definition: communicator_stack.cpp:116

comm_declare_receive_relative
#define comm_declare_receive_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:82

comm_wait
void comm_wait(MsgHandle *mh)
Definition: communicator_stack.cpp:167

comm_free
void comm_free(MsgHandle *&mh)
Definition: communicator_stack.cpp:163

comm_dim
int comm_dim(int dim)
Definition: communicator_stack.cpp:56

commDimPartitioned
int commDimPartitioned(int dir)
Definition: communicator_stack.cpp:206

comm_declare_send_relative
#define comm_declare_send_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:67

dim
std::array< int, 4 > dim
Definition: command_line_params.cpp:34

QUDA_CUDA_FIELD_LOCATION
@ QUDA_CUDA_FIELD_LOCATION
Definition: enum_quda.h:326

QUDA_CPU_FIELD_LOCATION
@ QUDA_CPU_FIELD_LOCATION
Definition: enum_quda.h:325

QUDA_INVALID_FIELD_LOCATION
@ QUDA_INVALID_FIELD_LOCATION
Definition: enum_quda.h:327

QUDA_INVALID_SITE_SUBSET
@ QUDA_INVALID_SITE_SUBSET
Definition: enum_quda.h:334

QUDA_FULL_SITE_SUBSET
@ QUDA_FULL_SITE_SUBSET
Definition: enum_quda.h:333

QudaFieldLocation
enum QudaFieldLocation_s QudaFieldLocation

QUDA_GHOST_EXCHANGE_EXTENDED
@ QUDA_GHOST_EXCHANGE_EXTENDED
Definition: enum_quda.h:510

QUDA_DOUBLE_PRECISION
@ QUDA_DOUBLE_PRECISION
Definition: enum_quda.h:65

QUDA_SINGLE_PRECISION
@ QUDA_SINGLE_PRECISION
Definition: enum_quda.h:64

QUDA_QUARTER_PRECISION
@ QUDA_QUARTER_PRECISION
Definition: enum_quda.h:62

QUDA_HALF_PRECISION
@ QUDA_HALF_PRECISION
Definition: enum_quda.h:63

gauge_field.h

initComms
void initComms(int argc, char **argv, std::array< int, 4 > &commDims)
Definition: host_utils.cpp:255

lattice_field.h

device_comms_pinned_free
#define device_comms_pinned_free(ptr)
Definition: malloc_quda.h:112

device_comms_pinned_malloc
#define device_comms_pinned_malloc(size)
Definition: malloc_quda.h:104

get_mapped_device_pointer
#define get_mapped_device_pointer(ptr)
Definition: malloc_quda.h:116

host_free
#define host_free(ptr)
Definition: malloc_quda.h:115

mapped_malloc
#define mapped_malloc(size)
Definition: malloc_quda.h:108

quda
Definition: blas_lapack.h:24

quda::reorder_location
QudaFieldLocation reorder_location()
Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the env...
Definition: lattice_field.cpp:748

quda::reorder_location_set
void reorder_location_set(QudaFieldLocation reorder_location_)
Set whether data is reorderd on the CPU or GPU. This can set at QUDA initialization using the environ...
Definition: lattice_field.cpp:749

quda::operator<<
std::ostream & operator<<(std::ostream &output, const CloverFieldParam &param)
Definition: clover_field.cpp:441

param
QudaGaugeParam param
Definition: pack_test.cpp:18

qudaMemset
#define qudaMemset(ptr, value, count)
Definition: quda_api.h:218

qudaDeviceSynchronize
#define qudaDeviceSynchronize()
Definition: quda_api.h:250

QUDA_MAX_DIM
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5.
Definition: quda_constants.h:17

quda_internal.h

MsgHandle_s
Definition: communicator_mpi.cpp:15

QudaGaugeParam_s::scale
double scale
Definition: quda.h:39

quda::LatticeFieldParam
Definition: lattice_field.h:48

quda::LatticeFieldParam::LatticeFieldParam
LatticeFieldParam()
Default constructor for LatticeFieldParam.
Definition: lattice_field.h:88

quda::LatticeFieldParam::r
int r[QUDA_MAX_DIM]
Definition: lattice_field.h:80

quda::LatticeFieldParam::nDim
int nDim
Definition: lattice_field.h:65

quda::LatticeFieldParam::x
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:68

quda::TuneKey::volume_n
static const int volume_n
Definition: tune_key.h:10

checkCudaError
#define checkCudaError()
Definition: util_quda.h:158

errorQuda
#define errorQuda(...)
Definition: util_quda.h:120