13 errorQuda(
"QDP ordering only supported for reference fields");
19 errorQuda(
"Field ordering %d presently disabled for this type",
order);
25 bool pad_check =
true;
26 for (
int i=0; i<
nDim; i++) {
29 if (
pad < minimum_pad) pad_check =
false;
31 errorQuda(
"cudaGaugeField being constructed with insufficient padding in dim %d (%d < %d)\n", i,
pad, minimum_pad);
60 for (
int i=0; i<
nDim; i++) {
75 #ifdef USE_TEXTURE_OBJECTS 76 createTexObject(tex,
gauge,
true);
77 createTexObject(evenTex,
even,
false);
78 createTexObject(oddTex,
odd,
false);
84 createTexObject(oddPhaseTex, (
char*)
odd +
phase_offset,
false, isPhase);
96 cudaMemset2D(static_cast<char*>(
even) +
volumeCB*
order*precision, pitch, 0, pad_bytes, Npad);
97 cudaMemset2D(static_cast<char*>(
odd) +
volumeCB*
order*precision, pitch, 0, pad_bytes, Npad);
101 #ifdef USE_TEXTURE_OBJECTS 102 void cudaGaugeField::createTexObject(cudaTextureObject_t &tex,
void *field,
bool full,
bool isPhase) {
106 cudaChannelFormatDesc desc;
107 memset(&desc, 0,
sizeof(cudaChannelFormatDesc));
109 else desc.f = cudaChannelFormatKindSigned;
114 desc.x = 8*
sizeof(int);
115 desc.y = 8*
sizeof(int);
118 texel_size = 2*
sizeof(int);
121 desc.y = desc.z = desc.w = 0;
127 desc.x = 8*
sizeof(int);
128 desc.y = 8*
sizeof(int);
129 desc.z = 8*
sizeof(int);
130 desc.w = 8*
sizeof(int);
131 texel_size = 4*
sizeof(int);
141 cudaResourceDesc resDesc;
142 memset(&resDesc, 0,
sizeof(resDesc));
143 resDesc.resType = cudaResourceTypeLinear;
144 resDesc.res.linear.devPtr = field;
145 resDesc.res.linear.desc = desc;
146 resDesc.res.linear.sizeInBytes = (isPhase ?
phase_bytes :
bytes) / (!full ? 2 : 1);
148 if (resDesc.res.linear.sizeInBytes %
deviceProp.textureAlignment != 0
150 errorQuda(
"Allocation size %lu does not have correct alignment for textures (%lu)",
151 resDesc.res.linear.sizeInBytes,
deviceProp.textureAlignment);
154 unsigned long texels = resDesc.res.linear.sizeInBytes / texel_size;
155 if (texels > (
unsigned)
deviceProp.maxTexture1DLinear) {
156 errorQuda(
"Attempting to bind too large a texture %lu > %d", texels,
deviceProp.maxTexture1DLinear);
159 cudaTextureDesc texDesc;
160 memset(&texDesc, 0,
sizeof(texDesc));
162 else texDesc.readMode = cudaReadModeElementType;
164 cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
169 void cudaGaugeField::destroyTexObject() {
171 cudaDestroyTextureObject(tex);
172 cudaDestroyTextureObject(evenTex);
173 cudaDestroyTextureObject(oddTex);
175 cudaDestroyTextureObject(phaseTex);
176 cudaDestroyTextureObject(evenPhaseTex);
177 cudaDestroyTextureObject(oddPhaseTex);
186 #ifdef USE_TEXTURE_OBJECTS 206 for (
int i=0; i<
nDim; i++) {
221 errorQuda(
"Cannot request exchange of forward links on non-coarse geometry");
226 const bool no_comms_fill =
true;
227 const bool bidir =
false;
232 for (
int link_dir = 0; link_dir<2; link_dir++) {
239 for (
int d=0; d<
nDim; d++) {
247 for (
int dim=0; dim<
nDim; dim++) {
260 for (
int dim=0; dim<
nDim; dim++) {
267 for (
int dim=0; dim<
nDim; dim++) {
277 for (
int dim=0; dim<
nDim; dim++) {
287 for (
int dim=0; dim<
nDim; dim++)
309 const bool no_comms_fill =
false;
310 const bool bidir =
false;
315 for (
int link_dir = 0; link_dir<2; link_dir++) {
322 for (
int d=0; d<
nDim; d++) {
336 for (
int dim=0; dim<
nDim; dim++) {
349 for (
int dim=0; dim<
nDim; dim++) {
356 for (
int dim=0; dim<
nDim; dim++) {
366 for (
int dim=0; dim<
nDim; dim++) {
452 stream_p ? *stream_p : 0);
518 for (
int dim=0; dim<
nDim; dim++) {
529 for (
int dim=0; dim<
nDim; dim++) {
536 for (
int dir=0; dir<2; dir++)
recvStart(dim, dir);
538 for (
int dir=0; dir<2; dir++) {
553 for (
int dir=0; dir<2; dir++) {
585 errorQuda(
"Setting gauge pointer is only allowed when create=" 586 "QUDA_REFERENCE_FIELD_CREATE type\n");
593 void **buffer =
new void*[
geometry];
595 return ((
void*)buffer);
605 void **buffer =
new void*[
geometry];
617 delete []((
void**)buffer);
631 if (
this == &src)
return;
638 errorQuda(
"fat_link_max has not been computed");
682 cudaError_t error = cudaHostGetDevicePointer(&src_d, const_cast<void*>(src.
Gauge_p()), 0);
683 if (error != cudaSuccess)
errorQuda(
"Failed to get device pointer for MILC site / BQCD array");
688 errorQuda(
"Ghost copy not supported here");
709 qudaMemcpy(ghost_buffer[d], src.
Ghost()[d], ghost_bytes[d], cudaMemcpyHostToDevice);
760 cudaError_t error = cudaHostGetDevicePointer(&cpu_d, const_cast<void*>(cpu.
Gauge_p()), 0);
761 if (error != cudaSuccess)
errorQuda(
"Failed to get device pointer for MILC site / BQCD array");
765 errorQuda(
"Ghost copy not supported here");
792 qudaMemcpy(cpu.
Ghost()[d], ghost_buffer[d], ghost_bytes[d], cudaMemcpyDeviceToHost);
#define qudaMemcpy(dst, src, count, kind)
QudaFieldLocation reorder_location()
Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the env...
void extractGaugeGhost(const GaugeField &u, void **ghost, bool extract=true, int offset=0)
void allocateGhostBuffer(size_t ghost_bytes) const
Allocate the static ghost buffers.
#define pool_pinned_free(ptr)
cudaError_t qudaEventSynchronize(cudaEvent_t &event)
Wrapper around cudaEventSynchronize or cuEventSynchronize.
void * my_face_dim_dir_d[2][QUDA_MAX_DIM][2]
void saveCPUField(cpuGaugeField &cpu) const
Upload from this field into a CPU field.
void copyGenericGauge(GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0, void **ghostOut=0, void **ghostIn=0, int type=0)
cudaDeviceProp deviceProp
void free_gauge_buffer(void *buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
enum QudaLinkDirection_s QudaLinkDirection
void injectGhost(QudaLinkDirection link_direction=QUDA_LINK_BACKWARDS)
The opposite of exchangeGhost: take the ghost zone on x, send to node x-1, and inject back into the f...
void free_ghost_buffer(void **buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
QudaReconstructType reconstruct
bool staggeredPhaseApplied
static void * ghost_pinned_send_buffer_h[2]
static MsgHandle * mh_send_p2p_back[2][QUDA_MAX_DIM]
QudaFieldGeometry Geometry() const
void createComms(const int *R, bool no_comms_fill, bool bidir=true)
Create the communication handlers and buffers.
MsgHandle * mh_send_rdma_fwd[2][QUDA_MAX_DIM]
QudaStaggeredPhase staggeredPhaseType
cudaGaugeField(const GaugeFieldParam &)
QudaFieldGeometry geometry
bool is_aligned(const void *ptr, size_t alignment)
void loadCPUField(const cpuGaugeField &cpu)
Download into this field from a CPU field.
MsgHandle * mh_send_rdma_back[2][QUDA_MAX_DIM]
static MsgHandle * mh_recv_p2p_fwd[2][QUDA_MAX_DIM]
void checkField(const LatticeField &) const
#define qudaDeviceSynchronize()
static bool ghost_field_reset
void commsComplete(int dim, int dir)
Wait for communication to complete.
void allocateGhostBuffer(const int *R, bool no_comms_fill, bool bidir=true) const
Allocate the ghost buffers.
cudaError_t qudaStreamSynchronize(cudaStream_t &stream)
Wrapper around cudaStreamSynchronize or cuStreamSynchronize.
void zeroPad()
Initialize the padded region to 0.
bool StaggeredPhaseApplied() const
void exchangeExtendedGhost(const int *R, bool no_comms_fill=false)
This does routine will populate the border / halo region of a gauge field that has been created using...
MsgHandle * mh_recv_back[2][QUDA_MAX_DIM]
void sendStart(int dim, int dir, cudaStream_t *stream_p=nullptr)
Start the sending communicators.
void extractExtendedGaugeGhost(const GaugeField &u, int dim, const int *R, void **ghost, bool extract)
void comm_start(MsgHandle *mh)
#define pool_device_malloc(size)
MsgHandle * mh_recv_rdma_fwd[2][QUDA_MAX_DIM]
QudaGhostExchange ghostExchange
static void * ghost_remote_send_buffer_d[2][QUDA_MAX_DIM][2]
void * create_gauge_buffer(size_t bytes, QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
void exchangeGhost(QudaLinkDirection link_direction=QUDA_LINK_BACKWARDS)
Exchange the ghost and store store in the padded region.
const void ** Ghost() const
enum QudaGaugeFieldOrder_s QudaGaugeFieldOrder
void backup() const
Backs up the cudaGaugeField to CPU memory.
void * from_face_dim_dir_d[2][QUDA_MAX_DIM][2]
size_t ghost_face_bytes[QUDA_MAX_DIM]
static void destroyIPCComms()
MsgHandle * mh_send_fwd[2][QUDA_MAX_DIM]
void createGhostZone(const int *R, bool no_comms_fill, bool bidir=true) const
void * memset(void *s, int c, size_t n)
bool comm_peer2peer_enabled(int dir, int dim)
static void * ghost_send_buffer_d[2]
int surface[QUDA_MAX_DIM]
#define pool_pinned_malloc(size)
void recvStart(int dim, int dir)
Start the receive communicators.
int ghostOffset[QUDA_MAX_DIM][2]
void ** create_ghost_buffer(size_t bytes[], QudaGaugeFieldOrder order, QudaFieldGeometry geometry)
const double & LinkMax() const
void * from_face_dim_dir_h[2][QUDA_MAX_DIM][2]
static MsgHandle * mh_send_p2p_fwd[2][QUDA_MAX_DIM]
static void * ghost_recv_buffer_d[2]
MsgHandle * mh_recv_rdma_back[2][QUDA_MAX_DIM]
static cudaEvent_t ipcCopyEvent[2][2][QUDA_MAX_DIM]
void setGauge(void *_gauge)
bool comm_gdr_enabled()
Query if GPU Direct RDMA communication is enabled (global setting)
cudaError_t qudaEventRecord(cudaEvent_t &event, cudaStream_t stream=0)
Wrapper around cudaEventRecord or cuEventRecord.
int surfaceCB[QUDA_MAX_DIM]
QudaReconstructType Reconstruct() const
static cudaEvent_t ipcRemoteCopyEvent[2][2][QUDA_MAX_DIM]
enum QudaFieldGeometry_s QudaFieldGeometry
virtual ~cudaGaugeField()
#define pool_device_free(ptr)
QudaGaugeFieldOrder Order() const
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
MsgHandle * mh_recv_fwd[2][QUDA_MAX_DIM]
QudaStaggeredPhase StaggeredPhase() const
void createComms(bool no_comms_fill=false, bool bidir=true)
#define mapped_malloc(size)
void comm_wait(MsgHandle *mh)
static MsgHandle * mh_recv_p2p_back[2][QUDA_MAX_DIM]
QudaGaugeFieldOrder order
QudaGhostExchange GhostExchange() const
void copy(const GaugeField &src)
QudaPrecision Precision() const
void * my_face_dim_dir_h[2][QUDA_MAX_DIM][2]
void * ghost[2 *QUDA_MAX_DIM]
MsgHandle * mh_send_back[2][QUDA_MAX_DIM]
void copyExtendedGauge(GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0)
void restore() const
Restores the cudaGaugeField to CUDA memory.
static void * ghost_pinned_recv_buffer_h[2]
int comm_dim_partitioned(int dim)