15 errorQuda(
"QDP ordering only supported for reference fields");
21 errorQuda(
"Field ordering %d presently disabled for this type",
order);
27 bool pad_check =
true;
28 for (
int i=0; i<
nDim; i++)
31 errorQuda(
"cudaGaugeField being constructed with insufficient padding\n");
49 for (
int i=0; i<
nDim; i++) {
60 odd = (
char*)gauge +
bytes/2;
62 #ifdef USE_TEXTURE_OBJECTS
63 createTexObject(evenTex, even);
64 createTexObject(oddTex, odd);
67 const int isPhase = 1;
68 createTexObject(evenPhaseTex, (
char*)even +
phase_offset, isPhase);
69 createTexObject(oddPhaseTex, (
char*)odd +
phase_offset, isPhase);
75 #ifdef USE_TEXTURE_OBJECTS
76 void cudaGaugeField::createTexObject(cudaTextureObject_t &tex,
void *field,
int isPhase) {
80 cudaChannelFormatDesc desc;
81 memset(&desc, 0,
sizeof(cudaChannelFormatDesc));
83 else desc.f = cudaChannelFormatKindSigned;
87 desc.x = 8*
sizeof(int);
88 desc.y = 8*
sizeof(int);
93 desc.y = desc.z = desc.w = 0;
98 desc.x = 8*
sizeof(int);
99 desc.y = 8*
sizeof(int);
100 desc.z = 8*
sizeof(int);
101 desc.w = 8*
sizeof(int);
110 cudaResourceDesc resDesc;
111 memset(&resDesc, 0,
sizeof(resDesc));
112 resDesc.resType = cudaResourceTypeLinear;
113 resDesc.res.linear.devPtr = field;
114 resDesc.res.linear.desc = desc;
117 cudaTextureDesc texDesc;
118 memset(&texDesc, 0,
sizeof(texDesc));
120 else texDesc.readMode = cudaReadModeElementType;
122 cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
127 void cudaGaugeField::destroyTexObject() {
129 cudaDestroyTextureObject(evenTex);
130 cudaDestroyTextureObject(oddTex);
132 cudaDestroyTextureObject(evenPhaseTex);
133 cudaDestroyTextureObject(oddPhaseTex);
142 #ifdef USE_TEXTURE_OBJECTS
151 for (
int i=0; i<
nDim; i++) {
162 errorQuda(
"Cannot call exchangeGhost with ghostExchange=%d",
170 for (
int d=0; d<
nDim; d++) {
199 for (
int d=0; d<
nDim; d++) {
211 for (
int d=0; d<
nDim; d++) {
213 total_bytes += 4*bytes[d];
219 for (
int d=0; d<
nDim; d++) {
222 recv_h[d] =
static_cast<char*
>(
bufferPinned[0]) + offset;
223 send_h[d] =
static_cast<char*
>(recv_h[d]) + 2*bytes[d];
224 offset += 4*bytes[d];
234 for (
int d=0; d<
nDim; d++) {
253 for (
int d=0; d<
nDim; d++) {
270 cudaMemcpyAsync(send_h[d], send_d[d], bytes[d], cudaMemcpyDeviceToHost,
streams[0]);
271 cudaMemcpyAsync(static_cast<char*>(send_h[d])+bytes[d],
272 static_cast<char*>(send_d[d])+bytes[d], bytes[d], cudaMemcpyDeviceToHost,
streams[1]);
276 cudaStreamSynchronize(
streams[0]);
281 cudaStreamSynchronize(
streams[1]);
289 cudaMemcpyAsync(static_cast<char*>(recv_d[d])+bytes[d],
290 static_cast<char*>(recv_h[d])+bytes[d], bytes[d], cudaMemcpyHostToDevice,
streams[0]);
297 cudaMemcpyAsync(recv_d[d], recv_h[d], bytes[d], cudaMemcpyHostToDevice,
streams[1]);
300 cudaMemcpy(static_cast<char*>(recv_d[d])+bytes[d], send_d[d], bytes[d], cudaMemcpyDeviceToDevice);
301 cudaMemcpy(recv_d[d], static_cast<char*>(send_d[d])+bytes[d], bytes[d], cudaMemcpyDeviceToDevice);
308 for (
int d=0; d<
nDim; d++) {
327 errorQuda(
"Setting gauge pointer is only allowed when create="
328 "QUDA_REFERENCE_FIELD_CREATE type\n");
334 if (
this == &src)
return;
341 errorQuda(
"fat_link_max has not been computed");
349 static_cast<const cudaGaugeField&>(src).gauge);
355 static_cast<const cpuGaugeField&>(src).gauge);
381 void *bufferPinnedMapped;
382 cudaHostGetDevicePointer(&bufferPinnedMapped,
bufferPinned[0], 0);
390 errorQuda(
"Invalid pack location %d", pack_location);
403 template<
typename FloatN,
typename Float>
408 for (
int i=0; i<2; i++) cudaStreamCreate(&streams[i]);
410 FloatN *even =
gauge;
411 FloatN *odd = (FloatN*)((
char*)gauge + bytes/2);
415 void *unpackedEven = unpacked;
416 void *unpackedOdd = (
char*)unpacked + datalen/2;
421 cudaMemcpyAsync(cpuGauge, unpackedEven, datalen/2, cudaMemcpyDeviceToHost, streams[0]);
423 cudaMemcpy(cpuGauge, unpackedEven, datalen/2, cudaMemcpyDeviceToHost);
429 cudaMemcpyAsync(cpuGauge + 4*volumeCB*
gaugeSiteSize, unpackedOdd, datalen/2, cudaMemcpyDeviceToHost, streams[1]);
430 for(
int i=0; i<2; i++) cudaStreamSynchronize(streams[i]);
432 cudaMemcpy(cpuGauge + 4*volumeCB*
gaugeSiteSize, unpackedOdd, datalen/2, cudaMemcpyDeviceToHost);
436 for(
int i=0; i<2; i++) cudaStreamDestroy(streams[i]);
446 errorQuda(
"cpu precision %d and cuda precision %d must be the same",
454 storeGaugeField((
double*)cpu.gauge, (double2*)gauge, bytes, volumeCB, stride,
precision);
456 storeGaugeField((
float*)cpu.gauge, (float2*)gauge, bytes, volumeCB, stride,
precision);
458 errorQuda(
"Half precision not supported");
465 cudaMemcpy(
bufferPinned[0], gauge, bytes, cudaMemcpyDeviceToHost);
470 errorQuda(
"Invalid pack location %d", pack_location);
478 cudaMemcpy(
backup_h, gauge, bytes, cudaMemcpyDeviceToHost);
485 cudaMemcpy(gauge,
backup_h, bytes, cudaMemcpyHostToDevice);
516 errorQuda(
"Casting a cudaGaugeField into cudaColorSpinorField not possible in half precision");
524 spinor_param.
nDim = spin;
525 for (
int d=0; d<a.
Ndim(); d++) spinor_param.
x[d] = a.
X()[d];
527 spinor_param.
pad = a.
Pad();
534 spinor_param.
v = (
void*)a.
Gauge_p();
void setGhostSpinor(bool value)
QudaGaugeFieldOrder FieldOrder() const
QudaGhostExchange GhostExchange() const
enum QudaPrecision_s QudaPrecision
int commDimPartitioned(int dir)
void copyGenericGauge(GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0, void **ghostOut=0, void **ghostIn=0, int type=0)
void saveCPUField(cpuGaugeField &, const QudaFieldLocation &) const
MsgHandle *** mh_send_back[2]
void * ghost[QUDA_MAX_DIM]
void extractGaugeGhost(const GaugeField &u, void **ghost)
QudaGaugeFieldOrder Order() const
QudaReconstructType reconstruct
MsgHandle * comm_declare_send_relative(void *buffer, int dim, int dir, size_t nbytes)
QudaSiteSubset siteSubset
cudaGaugeField(const GaugeFieldParam &)
QudaFieldGeometry geometry
void comm_free(MsgHandle *mh)
QudaPrecision Precision() const
void exchangeLink(void **ghost_link, void **link_sendbuf, QudaFieldLocation location)
QudaFieldOrder fieldOrder
void exchangeExtendedGhost(const int *R, bool no_comms_fill=false)
FloatingPoint< float > Float
void extractExtendedGaugeGhost(const GaugeField &u, int dim, const int *R, void **ghost, bool extract)
QudaGammaBasis gammaBasis
void comm_start(MsgHandle *mh)
QudaReconstructType Reconstruct() const
void loadCPUField(const cpuGaugeField &, const QudaFieldLocation &)
const double & LinkMax() const
MsgHandle *** mh_recv_back[2]
MsgHandle * comm_declare_receive_relative(void *buffer, int dim, int dir, size_t nbytes)
void checkField(const GaugeField &)
int surface[QUDA_MAX_DIM]
enum QudaFieldLocation_s QudaFieldLocation
QudaGhostExchange ghostExchange
void * memset(void *s, int c, size_t n)
void setGauge(void *_gauge)
#define device_malloc(size)
void copy(const GaugeField &)
int surfaceCB[QUDA_MAX_DIM]
virtual ~cudaGaugeField()
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
QudaFieldGeometry Geometry() const
void comm_wait(MsgHandle *mh)
QudaGaugeFieldOrder order
MsgHandle *** mh_send_fwd[2]
double norm2(const ColorSpinorField &)
void link_format_gpu_to_cpu(void *dst, void *src, int Vh, int stride, QudaPrecision prec, cudaStream_t stream)
void resizeBufferPinned(size_t bytes, const int index=0) const
static void * bufferPinned[2]
MsgHandle *** mh_recv_fwd[2]