13 errorQuda(
"QDP ordering only supported for reference fields");
19 errorQuda(
"Field ordering %d presently disabled for this type",
order);
25 bool pad_check =
true;
29 if (
pad < minimum_pad) pad_check =
false;
31 errorQuda(
"cudaGaugeField being constructed with insufficient padding (%d < %d)\n",
pad, minimum_pad);
64 #ifdef USE_TEXTURE_OBJECTS 66 createTexObject(evenTex,
even,
false);
67 createTexObject(oddTex,
odd,
false);
73 createTexObject(oddPhaseTex, (
char*)
odd +
phase_offset,
false, isPhase);
79 #ifdef USE_TEXTURE_OBJECTS 80 void cudaGaugeField::createTexObject(cudaTextureObject_t &
tex,
void *field,
bool full,
bool isPhase) {
84 cudaChannelFormatDesc
desc;
85 memset(&
desc, 0,
sizeof(cudaChannelFormatDesc));
87 else desc.f = cudaChannelFormatKindSigned;
96 texel_size = 2*
sizeof(
int);
109 texel_size = 4*
sizeof(
int);
119 cudaResourceDesc resDesc;
120 memset(&resDesc, 0,
sizeof(resDesc));
121 resDesc.resType = cudaResourceTypeLinear;
122 resDesc.res.linear.devPtr = field;
123 resDesc.res.linear.desc =
desc;
126 unsigned long texels = resDesc.res.linear.sizeInBytes / texel_size;
127 if (texels > (
unsigned)
deviceProp.maxTexture1DLinear) {
128 errorQuda(
"Attempting to bind too large a texture %lu > %d", texels,
deviceProp.maxTexture1DLinear);
131 cudaTextureDesc texDesc;
132 memset(&texDesc, 0,
sizeof(texDesc));
134 else texDesc.readMode = cudaReadModeElementType;
136 cudaCreateTextureObject(&
tex, &resDesc, &texDesc, NULL);
141 void cudaGaugeField::destroyTexObject() {
143 cudaDestroyTextureObject(
tex);
144 cudaDestroyTextureObject(evenTex);
145 cudaDestroyTextureObject(oddTex);
147 cudaDestroyTextureObject(phaseTex);
148 cudaDestroyTextureObject(evenPhaseTex);
149 cudaDestroyTextureObject(oddPhaseTex);
158 #ifdef USE_TEXTURE_OBJECTS 184 errorQuda(
"Cannot request exchange of forward links on non-coarse geometry");
189 const bool no_comms_fill =
true;
194 for (
int link_dir = 0; link_dir<2; link_dir++) {
273 const bool no_comms_fill =
false;
278 for (
int link_dir = 0; link_dir<2; link_dir++) {
413 stream_p ? *stream_p : 0);
495 for (
int dir=0; dir<2; dir++) {
510 for (
int dir=0; dir<2; dir++) {
542 errorQuda(
"Setting gauge pointer is only allowed when create=" 543 "QUDA_REFERENCE_FIELD_CREATE type\n");
550 void **buffer =
new void*[geometry];
552 return ((
void*)buffer);
562 void **buffer =
new void*[geometry];
574 delete []((
void**)buffer);
588 if (
this == &
src)
return;
595 errorQuda(
"fat_link_max has not been computed");
636 cudaError_t error = cudaHostGetDevicePointer(&src_d, const_cast<void*>(
src.Gauge_p()), 0);
637 if (error != cudaSuccess)
errorQuda(
"Failed to get device pointer for MILC site / BQCD array");
642 errorQuda(
"Ghost copy not supported here");
711 cudaError_t error = cudaHostGetDevicePointer(&cpu_d, const_cast<void*>(cpu.
Gauge_p()), 0);
712 if (error != cudaSuccess)
errorQuda(
"Failed to get device pointer for MILC site / BQCD array");
716 errorQuda(
"Ghost copy not supported here");
#define qudaMemcpy(dst, src, count, kind)
QudaFieldLocation reorder_location()
Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the env...
void extractGaugeGhost(const GaugeField &u, void **ghost, bool extract=true, int offset=0)
void allocateGhostBuffer(size_t ghost_bytes) const
Allocate the static ghost buffers.
#define pool_pinned_free(ptr)
cudaError_t qudaEventSynchronize(cudaEvent_t &event)
Wrapper around cudaEventSynchronize or cuEventSynchronize.
void * my_face_dim_dir_d[2][QUDA_MAX_DIM][2]
void saveCPUField(cpuGaugeField &cpu) const
Upload from this field into a CPU field.
void copyGenericGauge(GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0, void **ghostOut=0, void **ghostIn=0, int type=0)
cudaDeviceProp deviceProp
void createComms(bool no_comms_fill=false)
void free_gauge_buffer(void *buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
enum QudaLinkDirection_s QudaLinkDirection
void injectGhost(QudaLinkDirection link_direction=QUDA_LINK_BACKWARDS)
The opposite of exchangeGhost: take the ghost zone on x, send to node x-1, and inject back into the f...
void free_ghost_buffer(void **buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
QudaReconstructType reconstruct
bool staggeredPhaseApplied
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
static MsgHandle * mh_send_p2p_back[2][QUDA_MAX_DIM]
QudaFieldGeometry Geometry() const
void restore()
Restores the cudaGaugeField to CUDA memory.
MsgHandle * mh_send_rdma_fwd[2][QUDA_MAX_DIM]
QudaStaggeredPhase staggeredPhaseType
cudaGaugeField(const GaugeFieldParam &)
QudaFieldGeometry geometry
static void * ghost_pinned_buffer_h[2]
void loadCPUField(const cpuGaugeField &cpu)
Download into this field from a CPU field.
MsgHandle * mh_send_rdma_back[2][QUDA_MAX_DIM]
static MsgHandle * mh_recv_p2p_fwd[2][QUDA_MAX_DIM]
void checkField(const LatticeField &) const
static bool ghost_field_reset
void commsComplete(int dim, int dir)
Wait for communication to complete.
void allocateGhostBuffer(const int *R, bool no_comms_fill) const
Allocate the ghost buffers.
cudaError_t qudaStreamSynchronize(cudaStream_t &stream)
Wrapper around cudaStreamSynchronize or cuStreamSynchronize.
void exchangeExtendedGhost(const int *R, bool no_comms_fill=false)
This does routine will populate the border / halo region of a gauge field that has been created using...
MsgHandle * mh_recv_back[2][QUDA_MAX_DIM]
void sendStart(int dim, int dir, cudaStream_t *stream_p=nullptr)
Start the sending communicators.
void extractExtendedGaugeGhost(const GaugeField &u, int dim, const int *R, void **ghost, bool extract)
void comm_start(MsgHandle *mh)
#define pool_device_malloc(size)
MsgHandle * mh_recv_rdma_fwd[2][QUDA_MAX_DIM]
QudaGhostExchange ghostExchange
static void * ghost_remote_send_buffer_d[2][QUDA_MAX_DIM][2]
void * create_gauge_buffer(size_t bytes, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
void exchangeGhost(QudaLinkDirection link_direction=QUDA_LINK_BACKWARDS)
Exchange the ghost and store store in the padded region.
const void ** Ghost() const
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
void backup() const
Backs up the cudaGaugeField to CPU memory.
void * from_face_dim_dir_d[2][QUDA_MAX_DIM][2]
size_t ghost_face_bytes[QUDA_MAX_DIM]
cudaError_t qudaDeviceSynchronize()
Wrapper around cudaDeviceSynchronize or cuDeviceSynchronize.
static void destroyIPCComms()
MsgHandle * mh_send_fwd[2][QUDA_MAX_DIM]
bool comm_peer2peer_enabled(int dir, int dim)
static void * ghost_send_buffer_d[2]
void * memset(void *__b, int __c, size_t __len)
int surface[QUDA_MAX_DIM]
#define pool_pinned_malloc(size)
void recvStart(int dim, int dir)
Start the receive communicators.
void createGhostZone(const int *R, bool no_comms_fill) const
int ghostOffset[QUDA_MAX_DIM][2]
void ** create_ghost_buffer(size_t bytes[], QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
void * from_face_dim_dir_h[2][QUDA_MAX_DIM][2]
static MsgHandle * mh_send_p2p_fwd[2][QUDA_MAX_DIM]
static void * ghost_recv_buffer_d[2]
MsgHandle * mh_recv_rdma_back[2][QUDA_MAX_DIM]
static cudaEvent_t ipcCopyEvent[2][2][QUDA_MAX_DIM]
void setGauge(void *_gauge)
static __inline__ dim3 dim3 void size_t cudaStream_t int enum cudaTextureReadMode readMode static __inline__ const struct texture< T, dim, readMode > & tex
void createComms(const int *R, bool no_comms_fill)
Create the communication handlers and buffers.
bool comm_gdr_enabled()
Query if GPU Direct RDMA communication is enabled (global setting)
cudaError_t qudaEventRecord(cudaEvent_t &event, cudaStream_t stream=0)
Wrapper around cudaEventRecord or cuEventRecord.
int surfaceCB[QUDA_MAX_DIM]
QudaReconstructType Reconstruct() const
static cudaEvent_t ipcRemoteCopyEvent[2][2][QUDA_MAX_DIM]
enum QudaFieldGeometry_s QudaFieldGeometry
virtual ~cudaGaugeField()
#define pool_device_free(ptr)
QudaGaugeFieldOrder Order() const
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
const struct cudaChannelFormatDesc * desc
MsgHandle * mh_recv_fwd[2][QUDA_MAX_DIM]
void comm_wait(MsgHandle *mh)
static MsgHandle * mh_recv_p2p_back[2][QUDA_MAX_DIM]
QudaGaugeFieldOrder order
QudaGhostExchange GhostExchange() const
void copy(const GaugeField &src)
static __inline__ size_t size_t d
QudaPrecision Precision() const
void * my_face_dim_dir_h[2][QUDA_MAX_DIM][2]
void * ghost[2 *QUDA_MAX_DIM]
MsgHandle * mh_send_back[2][QUDA_MAX_DIM]
void copyExtendedGauge(GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0)
int comm_dim_partitioned(int dim)