34 for(
int dir=0; dir<
nDim; ++dir)
x[dir] =
a.X()[dir];
40 order(
param.order), create(
param.create), trlog{0, 0}
42 if (nDim != 4)
errorQuda(
"Number of dimensions must be 4, not %d", nDim);
45 errorQuda(
"QDPJIT ordered clover fields only supported for reference fields");
53 norm_bytes =
sizeof(
float)*2*stride*2;
101 if (!
param.inverse) {
140 if (!
param.inverse) {
149 #ifdef USE_TEXTURE_OBJECTS 153 createTexObject(evenTex, evenNormTex,
even,
evenNorm,
false);
154 createTexObject(oddTex, oddNormTex,
odd,
oddNorm,
false);
164 #ifdef USE_TEXTURE_OBJECTS 165 void cudaCloverField::createTexObject(cudaTextureObject_t &
tex, cudaTextureObject_t &texNorm,
166 void *field,
void *
norm,
bool full) {
170 cudaChannelFormatDesc
desc;
171 memset(&
desc, 0,
sizeof(cudaChannelFormatDesc));
173 else desc.f = cudaChannelFormatKindSigned;
182 cudaResourceDesc resDesc;
183 memset(&resDesc, 0,
sizeof(resDesc));
184 resDesc.resType = cudaResourceTypeLinear;
185 resDesc.res.linear.devPtr = field;
186 resDesc.res.linear.desc =
desc;
187 resDesc.res.linear.sizeInBytes =
bytes/(!full ? 2 : 1);
189 unsigned long texels = resDesc.res.linear.sizeInBytes / texel_size;
190 if (texels > (
unsigned)
deviceProp.maxTexture1DLinear) {
191 errorQuda(
"Attempting to bind too large a texture %lu > %d", texels,
deviceProp.maxTexture1DLinear);
194 cudaTextureDesc texDesc;
195 memset(&texDesc, 0,
sizeof(texDesc));
197 else texDesc.readMode = cudaReadModeElementType;
199 cudaCreateTextureObject(&
tex, &resDesc, &texDesc, NULL);
204 cudaChannelFormatDesc
desc;
205 memset(&
desc, 0,
sizeof(cudaChannelFormatDesc));
206 desc.f = cudaChannelFormatKindFloat;
209 cudaResourceDesc resDesc;
210 memset(&resDesc, 0,
sizeof(resDesc));
211 resDesc.resType = cudaResourceTypeLinear;
212 resDesc.res.linear.devPtr =
norm;
213 resDesc.res.linear.desc =
desc;
214 resDesc.res.linear.sizeInBytes =
norm_bytes/(!full ? 2 : 1);
216 cudaTextureDesc texDesc;
217 memset(&texDesc, 0,
sizeof(texDesc));
218 texDesc.readMode = cudaReadModeElementType;
220 cudaCreateTextureObject(&texNorm, &resDesc, &texDesc, NULL);
227 void cudaCloverField::destroyTexObject() {
229 cudaDestroyTextureObject(
tex);
230 cudaDestroyTextureObject(invTex);
231 cudaDestroyTextureObject(evenTex);
232 cudaDestroyTextureObject(oddTex);
233 cudaDestroyTextureObject(evenInvTex);
234 cudaDestroyTextureObject(oddInvTex);
236 cudaDestroyTextureObject(normTex);
237 cudaDestroyTextureObject(invNormTex);
238 cudaDestroyTextureObject(evenNormTex);
239 cudaDestroyTextureObject(oddNormTex);
240 cudaDestroyTextureObject(evenInvNormTex);
241 cudaDestroyTextureObject(oddInvNormTex);
250 #ifdef USE_TEXTURE_OBJECTS 284 if (
src.V(
true) && inverse) {
299 qudaMemcpy(packCloverNorm,
src.Norm(
false),
src.NormBytes(), cudaMemcpyHostToDevice);
304 if (
src.V(
true) && inverse) {
307 qudaMemcpy(packCloverNorm,
src.Norm(
true),
src.NormBytes(), cudaMemcpyHostToDevice);
331 if (
V(
false) && cpu.
V(
false)) {
336 }
else if((
V(
false) && !cpu.
V(
false)) || (!
V(
false) && cpu.
V(
false))) {
337 errorQuda(
"Mismatch between Clover field GPU V(false) and CPU.V(false)");
341 if (
V(
true) && cpu.
V(
true)) {
346 }
else if ((
V(
true) && !cpu.
V(
true)) || (!
V(
true) && cpu.
V(
true))) {
347 errorQuda(
"Mismatch between Clover field GPU V(true) and CPU.V(true)");
359 errorQuda(
"Gauge and clover precisions must match");
406 output << static_cast<const LatticeFieldParam&>(
param);
407 output <<
"direct = " <<
param.direct << std::endl;
408 output <<
"inverse = " <<
param.inverse << std::endl;
409 output <<
"clover = " <<
param.clover << std::endl;
410 output <<
"norm = " <<
param.norm << std::endl;
411 output <<
"cloverInv = " <<
param.cloverInv << std::endl;
412 output <<
"invNorm = " <<
param.invNorm << std::endl;
413 output <<
"csw = " <<
param.csw << std::endl;
414 output <<
"twisted = " <<
param.twisted << std::endl;
415 output <<
"mu2 = " <<
param.mu2 << std::endl;
416 output <<
"rho = " <<
param.rho << std::endl;
417 output <<
"order = " <<
param.order << std::endl;
418 output <<
"create = " <<
param.create << std::endl;
425 errorQuda(
"Casting a CloverField into ColorSpinorField not possible in half precision");
430 spinor_param.
nSpin = 4;
431 spinor_param.
nDim =
a.Ndim();
432 for (
int d=0;
d<
a.Ndim();
d++) spinor_param.
x[
d] =
a.X()[
d];
434 spinor_param.
pad =
a.Pad();
441 spinor_param.
v = (
void*)
a.V(inverse);
void setRho(double rho)
Bakes in the rho factor into the clover field, (for real diagonal additive Hasenbusch), e.g., A + rho.
#define qudaMemcpy(dst, src, count, kind)
QudaFieldLocation reorder_location()
Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the env...
cudaCloverField(const CloverFieldParam ¶m)
#define pool_pinned_free(ptr)
cudaDeviceProp deviceProp
void * V(bool inverse=false)
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
double norm2(const ColorSpinorField &a)
void saveCPUField(cpuCloverField &cpu) const
void loadCPUField(const cpuCloverField &cpu)
virtual ~cpuCloverField()
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
QudaCloverFieldOrder order
CloverField(const CloverFieldParam ¶m)
QudaSiteSubset siteSubset
std::ostream & operator<<(std::ostream &output, const CloverFieldParam ¶m)
double norm2(const CloverField &a, bool inverse=false)
double norm1(const CloverField &u, bool inverse=false)
QudaFieldLocation location
void checkField(const LatticeField &a) const
QudaFieldOrder fieldOrder
#define ALIGNMENT_ADJUST(n)
void compute(const cudaGaugeField &gauge)
QudaGammaBasis gammaBasis
#define pool_device_malloc(size)
cpuCloverField(const CloverFieldParam ¶m)
#define safe_malloc(size)
void copy(const CloverField &src, bool inverse=true)
Copy into this CloverField from the generic CloverField src.
void * memset(void *__b, int __c, size_t __len)
#define pool_pinned_malloc(size)
double norm1(const ColorSpinorField &b)
static __inline__ dim3 dim3 void size_t cudaStream_t int enum cudaTextureReadMode readMode static __inline__ const struct texture< T, dim, readMode > & tex
ColorSpinorParam colorSpinorParam(const CloverField &a, bool inverse)
virtual ~cudaCloverField()
#define pool_device_free(ptr)
const struct cudaChannelFormatDesc * desc
static __inline__ size_t size_t d
QudaPrecision Precision() const
void computeClover(CloverField &clover, const GaugeField &gauge, double coeff, QudaFieldLocation location)
void copyGenericClover(CloverField &out, const CloverField &in, bool inverse, QudaFieldLocation location, void *Out=0, void *In=0, void *outNorm=0, void *inNorm=0)
This generic function is used for copying the clover field where in the input and output can be in an...