34 for (
int dir = 0; dir <
nDim; ++dir)
x[dir] = a.
X()[dir];
45 errorQuda(
"QDPJIT ordered clover fields only supported for reference fields");
151 #ifdef USE_TEXTURE_OBJECTS 152 createTexObject(tex, normTex,
clover,
norm,
true);
155 createTexObject(evenTex, evenNormTex,
even,
evenNorm,
false);
156 createTexObject(oddTex, oddNormTex,
odd,
oddNorm,
false);
166 #ifdef USE_TEXTURE_OBJECTS 167 void cudaCloverField::createTexObject(cudaTextureObject_t &tex, cudaTextureObject_t &texNorm,
168 void *field,
void *
norm,
bool full) {
172 cudaChannelFormatDesc desc;
173 memset(&desc, 0,
sizeof(cudaChannelFormatDesc));
175 else desc.f = cudaChannelFormatKindSigned;
184 cudaResourceDesc resDesc;
185 memset(&resDesc, 0,
sizeof(resDesc));
186 resDesc.resType = cudaResourceTypeLinear;
187 resDesc.res.linear.devPtr = field;
188 resDesc.res.linear.desc = desc;
189 resDesc.res.linear.sizeInBytes =
bytes/(!full ? 2 : 1);
191 if (resDesc.res.linear.sizeInBytes %
deviceProp.textureAlignment != 0
193 errorQuda(
"Allocation size %lu does not have correct alignment for textures (%lu)",
194 resDesc.res.linear.sizeInBytes,
deviceProp.textureAlignment);
197 unsigned long texels = resDesc.res.linear.sizeInBytes / texel_size;
198 if (texels > (
unsigned)
deviceProp.maxTexture1DLinear) {
199 errorQuda(
"Attempting to bind too large a texture %lu > %d", texels,
deviceProp.maxTexture1DLinear);
202 cudaTextureDesc texDesc;
203 memset(&texDesc, 0,
sizeof(texDesc));
205 texDesc.readMode = cudaReadModeNormalizedFloat;
207 texDesc.readMode = cudaReadModeElementType;
209 cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
214 cudaChannelFormatDesc desc;
215 memset(&desc, 0,
sizeof(cudaChannelFormatDesc));
216 desc.f = cudaChannelFormatKindFloat;
219 cudaResourceDesc resDesc;
220 memset(&resDesc, 0,
sizeof(resDesc));
221 resDesc.resType = cudaResourceTypeLinear;
222 resDesc.res.linear.devPtr =
norm;
223 resDesc.res.linear.desc = desc;
224 resDesc.res.linear.sizeInBytes =
norm_bytes/(!full ? 2 : 1);
227 errorQuda(
"Allocation size %lu does not have correct alignment for textures (%lu)",
228 resDesc.res.linear.sizeInBytes,
deviceProp.textureAlignment);
231 cudaTextureDesc texDesc;
232 memset(&texDesc, 0,
sizeof(texDesc));
233 texDesc.readMode = cudaReadModeElementType;
235 cudaCreateTextureObject(&texNorm, &resDesc, &texDesc, NULL);
242 void cudaCloverField::destroyTexObject() {
244 cudaDestroyTextureObject(tex);
245 cudaDestroyTextureObject(invTex);
246 cudaDestroyTextureObject(evenTex);
247 cudaDestroyTextureObject(oddTex);
248 cudaDestroyTextureObject(evenInvTex);
249 cudaDestroyTextureObject(oddInvTex);
251 cudaDestroyTextureObject(normTex);
252 cudaDestroyTextureObject(invNormTex);
253 cudaDestroyTextureObject(evenNormTex);
254 cudaDestroyTextureObject(oddNormTex);
255 cudaDestroyTextureObject(evenInvNormTex);
256 cudaDestroyTextureObject(oddInvNormTex);
265 #ifdef USE_TEXTURE_OBJECTS 291 static_cast<char *>(packClover) +
bytes :
312 static_cast<char *>(packClover) + src.
Bytes() :
351 if (
V(
false) && cpu.
V(
false)) {
356 }
else if((
V(
false) && !cpu.
V(
false)) || (!
V(
false) && cpu.
V(
false))) {
357 errorQuda(
"Mismatch between Clover field GPU V(false) and CPU.V(false)");
361 if (
V(
true) && cpu.
V(
true)) {
366 }
else if ((
V(
true) && !cpu.
V(
true)) || (!
V(
true) && cpu.
V(
true))) {
367 errorQuda(
"Mismatch between Clover field GPU V(true) and CPU.V(true)");
382 errorQuda(
"Gauge and clover precisions must match");
414 if (param.
pad != 0)
errorQuda(
"%s pad must be zero", __func__);
429 output << static_cast<const LatticeFieldParam&>(
param);
430 output <<
"direct = " << param.
direct << std::endl;
431 output <<
"inverse = " << param.
inverse << std::endl;
432 output <<
"clover = " << param.
clover << std::endl;
433 output <<
"norm = " << param.
norm << std::endl;
434 output <<
"cloverInv = " << param.
cloverInv << std::endl;
435 output <<
"invNorm = " << param.
invNorm << std::endl;
436 output <<
"csw = " << param.
csw << std::endl;
437 output <<
"twisted = " << param.
twisted << std::endl;
438 output <<
"mu2 = " << param.
mu2 << std::endl;
439 output <<
"rho = " << param.
rho << std::endl;
440 output <<
"order = " << param.
order << std::endl;
441 output <<
"create = " << param.
create << std::endl;
448 errorQuda(
"Casting a CloverField into ColorSpinorField not possible in half precision");
453 spinor_param.
nSpin = 4;
455 for (
int d=0; d<a.
Ndim(); d++) spinor_param.
x[d] = a.
X()[d];
457 spinor_param.
pad = a.
Pad();
464 spinor_param.
v = (
void*)a.
V(inverse);
QudaCloverFieldOrder order
void setRho(double rho)
Bakes in the rho factor into the clover field, (for real diagonal additive Hasenbusch), e.g., A + rho.
#define qudaMemcpy(dst, src, count, kind)
QudaFieldLocation reorder_location()
Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the env...
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
cudaCloverField(const CloverFieldParam ¶m)
#define pool_pinned_free(ptr)
cudaDeviceProp deviceProp
void * V(bool inverse=false)
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
double norm2(const ColorSpinorField &a)
void saveCPUField(cpuCloverField &cpu) const
void loadCPUField(const cpuCloverField &cpu)
virtual ~cpuCloverField()
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
QudaCloverFieldOrder order
double norm2() const
Compute the L2 norm squared of the field.
CloverField(const CloverFieldParam ¶m)
QudaSiteSubset siteSubset
std::ostream & operator<<(std::ostream &output, const CloverFieldParam ¶m)
bool is_aligned(const void *ptr, size_t alignment)
#define qudaDeviceSynchronize()
QudaFieldLocation location
void checkField(const LatticeField &a) const
QudaFieldOrder fieldOrder
#define ALIGNMENT_ADJUST(n)
void compute(const cudaGaugeField &gauge)
QudaGammaBasis gammaBasis
#define pool_device_malloc(size)
cpuCloverField(const CloverFieldParam ¶m)
#define safe_malloc(size)
void copy(const CloverField &src, bool inverse=true)
Copy into this CloverField from the generic CloverField src.
void * memset(void *s, int c, size_t n)
QudaFieldLocation Location() const
#define pool_pinned_malloc(size)
__device__ __host__ Matrix< T, 3 > inverse(const Matrix< T, 3 > &u)
double norm1(const ColorSpinorField &b)
bool twisted
Clover coefficient.
void * Norm(bool inverse=false)
ColorSpinorParam colorSpinorParam(const CloverField &a, bool inverse)
virtual ~cudaCloverField()
double norm1() const
Compute the L1 norm of the field.
#define pool_device_free(ptr)
QudaPrecision Precision() const
void computeClover(CloverField &clover, const GaugeField &gauge, double coeff, QudaFieldLocation location)
void copyGenericClover(CloverField &out, const CloverField &in, bool inverse, QudaFieldLocation location, void *Out=0, void *In=0, void *outNorm=0, void *inNorm=0)
This generic function is used for copying the clover field where in the input and output can be in an...