QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
clover_field.cpp
Go to the documentation of this file.
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <math.h>
5 #include <typeinfo>
6 
7 #include <quda_internal.h>
8 #include <clover_field.h>
9 #include <gauge_field.h>
10 #include <color_spinor_field.h>
11 #include <blas_quda.h>
12 
13 namespace quda {
14 
17  direct(false),
18  inverse(false),
19  clover(NULL),
20  norm(NULL),
21  cloverInv(NULL),
22  invNorm(NULL),
23  csw(a.Csw()),
24  twisted(a.Twisted()),
25  mu2(a.Mu2()),
26  rho(a.Rho()),
27  order(a.Order()),
29  {
30  precision = a.Precision();
31  nDim = a.Ndim();
32  pad = a.Pad();
34  for (int dir = 0; dir < nDim; ++dir) x[dir] = a.X()[dir];
35  }
36 
38  LatticeField(param), bytes(0), norm_bytes(0), nColor(3), nSpin(4),
39  clover(0), norm(0), cloverInv(0), invNorm(0), csw(param.csw), rho(param.rho),
40  order(param.order), create(param.create), trlog{0, 0}
41  {
42  if (nDim != 4) errorQuda("Number of dimensions must be 4, not %d", nDim);
43 
45  errorQuda("QDPJIT ordered clover fields only supported for reference fields");
46 
47  real_length = 2 * ((size_t)volumeCB) * nColor * nColor * nSpin * nSpin / 2; // block-diagonal Hermitian (72 reals)
48  length = 2 * ((size_t)stride) * nColor * nColor * nSpin * nSpin / 2;
49 
51  if (isNative()) bytes = 2*ALIGNMENT_ADJUST(bytes/2);
52  if (precision == QUDA_HALF_PRECISION || precision == QUDA_QUARTER_PRECISION) {
53  norm_bytes = sizeof(float)*2*stride*2; // 2 chirality
55  }
56 //for twisted mass only:
57  twisted = false;//param.twisted;
58  mu2 = 0.0; //param.mu2;
59  }
60 
62 
63  bool CloverField::isNative() const {
65  if (order == QUDA_FLOAT2_CLOVER_ORDER) return true;
68  if (order == QUDA_FLOAT4_CLOVER_ORDER) return true;
69  }
70  return false;
71  }
72 
73  void CloverField::setRho(double rho_)
74  {
75  rho = rho_;
76  }
77 
79 
81  errorQuda("Create type %d not supported", create);
82 
83  if (param.direct) {
85  clover = bytes ? pool_device_malloc(bytes) : nullptr;
88  } else {
89  clover = param.clover;
90  norm = param.norm;
91  }
92 
93  even = clover;
94  odd = static_cast<char*>(clover) + bytes/2;
95 
96  evenNorm = norm;
97  oddNorm = static_cast<char*>(norm) + norm_bytes/2;
98 
100 
101  // this is a hack to prevent us allocating a texture object for an unallocated inverse field
102  if (!param.inverse) {
103  cloverInv = clover;
104  evenInv = even;
105  oddInv = odd;
106  invNorm = norm;
109  }
110  }
111 
112  if (param.inverse) {
114  cloverInv = bytes ? pool_device_malloc(bytes) : nullptr;
117  } else {
118  cloverInv = param.cloverInv;
119  invNorm = param.invNorm;
120  }
121 
122  evenInv = cloverInv;
123  oddInv = static_cast<char*>(cloverInv) + bytes/2;
124 
126  oddInvNorm = static_cast<char*>(invNorm) + norm_bytes/2;
127 
129 
130  // this is a hack to ensure that we can autotune the clover
131  // operator when just using symmetric preconditioning
132  if (!param.direct) {
133  clover = cloverInv;
134  even = evenInv;
135  odd = oddInv;
136  norm = invNorm;
139  }
140  }
141 
142  if (!param.inverse) {
143  cloverInv = clover;
144  evenInv = even;
145  oddInv = odd;
146  invNorm = norm;
149  }
150 
151 #ifdef USE_TEXTURE_OBJECTS
152  createTexObject(tex, normTex, clover, norm, true);
153  createTexObject(invTex, invNormTex, cloverInv, invNorm, true);
154 
155  createTexObject(evenTex, evenNormTex, even, evenNorm, false);
156  createTexObject(oddTex, oddNormTex, odd, oddNorm, false);
157 
158  createTexObject(evenInvTex, evenInvNormTex, evenInv, evenInvNorm, false);
159  createTexObject(oddInvTex, oddInvNormTex, oddInv, oddInvNorm, false);
160 #endif
161  twisted = param.twisted;
162  mu2 = param.mu2;
163 
164  }
165 
166 #ifdef USE_TEXTURE_OBJECTS
167  void cudaCloverField::createTexObject(cudaTextureObject_t &tex, cudaTextureObject_t &texNorm,
168  void *field, void *norm, bool full) {
169  if (isNative()) {
170  // create the texture for the field components
171 
172  cudaChannelFormatDesc desc;
173  memset(&desc, 0, sizeof(cudaChannelFormatDesc));
174  if (precision == QUDA_SINGLE_PRECISION) desc.f = cudaChannelFormatKindFloat;
175  else desc.f = cudaChannelFormatKindSigned; // half is short, double is int2
176 
177  // always four components regardless of precision
178  desc.x = (precision == QUDA_DOUBLE_PRECISION) ? 8*sizeof(int) : 8*precision;
179  desc.y = (precision == QUDA_DOUBLE_PRECISION) ? 8*sizeof(int) : 8*precision;
180  desc.z = (precision == QUDA_DOUBLE_PRECISION) ? 8*sizeof(int) : 8*precision;
181  desc.w = (precision == QUDA_DOUBLE_PRECISION) ? 8*sizeof(int) : 8*precision;
182  int texel_size = 4 * (precision == QUDA_DOUBLE_PRECISION ? sizeof(int) : precision);
183 
184  cudaResourceDesc resDesc;
185  memset(&resDesc, 0, sizeof(resDesc));
186  resDesc.resType = cudaResourceTypeLinear;
187  resDesc.res.linear.devPtr = field;
188  resDesc.res.linear.desc = desc;
189  resDesc.res.linear.sizeInBytes = bytes/(!full ? 2 : 1);
190 
191  if (resDesc.res.linear.sizeInBytes % deviceProp.textureAlignment != 0
192  || !is_aligned(resDesc.res.linear.devPtr, deviceProp.textureAlignment)) {
193  errorQuda("Allocation size %lu does not have correct alignment for textures (%lu)",
194  resDesc.res.linear.sizeInBytes, deviceProp.textureAlignment);
195  }
196 
197  unsigned long texels = resDesc.res.linear.sizeInBytes / texel_size;
198  if (texels > (unsigned)deviceProp.maxTexture1DLinear) {
199  errorQuda("Attempting to bind too large a texture %lu > %d", texels, deviceProp.maxTexture1DLinear);
200  }
201 
202  cudaTextureDesc texDesc;
203  memset(&texDesc, 0, sizeof(texDesc));
205  texDesc.readMode = cudaReadModeNormalizedFloat;
206  else
207  texDesc.readMode = cudaReadModeElementType;
208 
209  cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
210  checkCudaError();
211 
212  // create the texture for the norm components
214  cudaChannelFormatDesc desc;
215  memset(&desc, 0, sizeof(cudaChannelFormatDesc));
216  desc.f = cudaChannelFormatKindFloat;
217  desc.x = 8*QUDA_SINGLE_PRECISION; desc.y = 0; desc.z = 0; desc.w = 0;
218 
219  cudaResourceDesc resDesc;
220  memset(&resDesc, 0, sizeof(resDesc));
221  resDesc.resType = cudaResourceTypeLinear;
222  resDesc.res.linear.devPtr = norm;
223  resDesc.res.linear.desc = desc;
224  resDesc.res.linear.sizeInBytes = norm_bytes/(!full ? 2 : 1);
225 
226  if (!is_aligned(resDesc.res.linear.devPtr, deviceProp.textureAlignment)) {
227  errorQuda("Allocation size %lu does not have correct alignment for textures (%lu)",
228  resDesc.res.linear.sizeInBytes, deviceProp.textureAlignment);
229  }
230 
231  cudaTextureDesc texDesc;
232  memset(&texDesc, 0, sizeof(texDesc));
233  texDesc.readMode = cudaReadModeElementType;
234 
235  cudaCreateTextureObject(&texNorm, &resDesc, &texDesc, NULL);
236  checkCudaError();
237  }
238  }
239 
240  }
241 
242  void cudaCloverField::destroyTexObject() {
243  if (isNative()) {
244  cudaDestroyTextureObject(tex);
245  cudaDestroyTextureObject(invTex);
246  cudaDestroyTextureObject(evenTex);
247  cudaDestroyTextureObject(oddTex);
248  cudaDestroyTextureObject(evenInvTex);
249  cudaDestroyTextureObject(oddInvTex);
251  cudaDestroyTextureObject(normTex);
252  cudaDestroyTextureObject(invNormTex);
253  cudaDestroyTextureObject(evenNormTex);
254  cudaDestroyTextureObject(oddNormTex);
255  cudaDestroyTextureObject(evenInvNormTex);
256  cudaDestroyTextureObject(oddInvNormTex);
257  }
258  checkCudaError();
259  }
260  }
261 #endif
262 
264  {
265 #ifdef USE_TEXTURE_OBJECTS
266  destroyTexObject();
267 #endif
268 
270  if (clover != cloverInv) {
272  if (norm) pool_device_free(norm);
273  }
276  }
277 
278  checkCudaError();
279  }
280 
281  void cudaCloverField::copy(const CloverField &src, bool inverse) {
282 
283  checkField(src);
284 
285  if (typeid(src) == typeid(cudaCloverField)) {
286  if (src.V(false)) copyGenericClover(*this, src, false, QUDA_CUDA_FIELD_LOCATION);
287  if (src.V(true)) copyGenericClover(*this, src, true, QUDA_CUDA_FIELD_LOCATION);
288  } else if (reorder_location() == QUDA_CPU_FIELD_LOCATION && typeid(src) == typeid(cpuCloverField)) {
289  void *packClover = pool_pinned_malloc(bytes + norm_bytes);
290  void *packCloverNorm = (precision == QUDA_HALF_PRECISION || precision == QUDA_QUARTER_PRECISION) ?
291  static_cast<char *>(packClover) + bytes :
292  0;
293 
294  if (src.V(false)) {
295  copyGenericClover(*this, src, false, QUDA_CPU_FIELD_LOCATION, packClover, 0, packCloverNorm, 0);
296  qudaMemcpy(clover, packClover, bytes, cudaMemcpyHostToDevice);
298  qudaMemcpy(norm, packCloverNorm, norm_bytes, cudaMemcpyHostToDevice);
299  }
300 
301  if (src.V(true) && inverse) {
302  copyGenericClover(*this, src, true, QUDA_CPU_FIELD_LOCATION, packClover, 0, packCloverNorm, 0);
303  qudaMemcpy(cloverInv, packClover, bytes, cudaMemcpyHostToDevice);
305  qudaMemcpy(invNorm, packCloverNorm, norm_bytes, cudaMemcpyHostToDevice);
306  }
307 
308  pool_pinned_free(packClover);
309  } else if (reorder_location() == QUDA_CUDA_FIELD_LOCATION && typeid(src) == typeid(cpuCloverField)) {
310  void *packClover = pool_device_malloc(src.Bytes() + src.NormBytes());
311  void *packCloverNorm = (precision == QUDA_HALF_PRECISION || precision == QUDA_QUARTER_PRECISION) ?
312  static_cast<char *>(packClover) + src.Bytes() :
313  0;
314 
315  if (src.V(false)) {
316  qudaMemcpy(packClover, src.V(false), src.Bytes(), cudaMemcpyHostToDevice);
318  qudaMemcpy(packCloverNorm, src.Norm(false), src.NormBytes(), cudaMemcpyHostToDevice);
319 
320  copyGenericClover(*this, src, false, QUDA_CUDA_FIELD_LOCATION, 0, packClover, 0, packCloverNorm);
321  }
322 
323  if (src.V(true) && inverse) {
324  qudaMemcpy(packClover, src.V(true), src.Bytes(), cudaMemcpyHostToDevice);
326  qudaMemcpy(packCloverNorm, src.Norm(true), src.NormBytes(), cudaMemcpyHostToDevice);
327 
328  copyGenericClover(*this, src, true, QUDA_CUDA_FIELD_LOCATION, 0, packClover, 0, packCloverNorm);
329  }
330 
331  pool_device_free(packClover);
332  } else {
333  errorQuda("Invalid clover field type");
334  }
335 
337  checkCudaError();
338  }
339 
341 
343  checkField(cpu);
344 
345  // we know we are copying from GPU to CPU here, so for now just
346  // assume that reordering is on CPU
347  void *packClover = pool_pinned_malloc(bytes + norm_bytes);
348  void *packCloverNorm = (precision == QUDA_HALF_PRECISION) ? static_cast<char*>(packClover) + bytes : 0;
349 
350  // first copy over the direct part (if it exists)
351  if (V(false) && cpu.V(false)) {
352  qudaMemcpy(packClover, clover, bytes, cudaMemcpyDeviceToHost);
354  qudaMemcpy(packCloverNorm, norm, norm_bytes, cudaMemcpyDeviceToHost);
355  copyGenericClover(cpu, *this, false, QUDA_CPU_FIELD_LOCATION, 0, packClover, 0, packCloverNorm);
356  } else if((V(false) && !cpu.V(false)) || (!V(false) && cpu.V(false))) {
357  errorQuda("Mismatch between Clover field GPU V(false) and CPU.V(false)");
358  }
359 
360  // now copy the inverse part (if it exists)
361  if (V(true) && cpu.V(true)) {
362  qudaMemcpy(packClover, cloverInv, bytes, cudaMemcpyDeviceToHost);
364  qudaMemcpy(packCloverNorm, invNorm, norm_bytes, cudaMemcpyDeviceToHost);
365  copyGenericClover(cpu, *this, true, QUDA_CPU_FIELD_LOCATION, 0, packClover, 0, packCloverNorm);
366  } else if ((V(true) && !cpu.V(true)) || (!V(true) && cpu.V(true))) {
367  errorQuda("Mismatch between Clover field GPU V(true) and CPU.V(true)");
368  }
369 
370  pool_pinned_free(packClover);
371 
373  checkCudaError();
374  }
375 
380 
381  if (gauge.Precision() != precision)
382  errorQuda("Gauge and clover precisions must match");
383 
384  computeClover(*this, gauge, 1.0, QUDA_CUDA_FIELD_LOCATION);
385 
386  }
387 
389 
391  if(order != QUDA_PACKED_CLOVER_ORDER) {errorQuda("cpuCloverField only supports QUDA_PACKED_CLOVER_ORDER");}
392  clover = (void *) safe_malloc(bytes);
393  if (precision == QUDA_HALF_PRECISION) norm = (void *) safe_malloc(norm_bytes);
394  if(param.inverse) {
395  cloverInv = (void *) safe_malloc(bytes);
397  }
398 
400  memset(clover, '\0', bytes);
401  if(param.inverse) memset(cloverInv, '\0', bytes);
402  if(precision == QUDA_HALF_PRECISION) memset(norm, '\0', norm_bytes);
404  }
405  } else if (create == QUDA_REFERENCE_FIELD_CREATE) {
406  clover = param.clover;
407  norm = param.norm;
408  cloverInv = param.cloverInv;
409  invNorm = param.invNorm;
410  } else {
411  errorQuda("Create type %d not supported", create);
412  }
413 
414  if (param.pad != 0) errorQuda("%s pad must be zero", __func__);
415  }
416 
419  if (clover) host_free(clover);
420  if (norm) host_free(norm);
422  if (invNorm) host_free(invNorm);
423  }
424  }
425 
426  // This doesn't really live here, but is fine for the moment
427  std::ostream& operator<<(std::ostream& output, const CloverFieldParam& param)
428  {
429  output << static_cast<const LatticeFieldParam&>(param);
430  output << "direct = " << param.direct << std::endl;
431  output << "inverse = " << param.inverse << std::endl;
432  output << "clover = " << param.clover << std::endl;
433  output << "norm = " << param.norm << std::endl;
434  output << "cloverInv = " << param.cloverInv << std::endl;
435  output << "invNorm = " << param.invNorm << std::endl;
436  output << "csw = " << param.csw << std::endl;
437  output << "twisted = " << param.twisted << std::endl;
438  output << "mu2 = " << param.mu2 << std::endl;
439  output << "rho = " << param.rho << std::endl;
440  output << "order = " << param.order << std::endl;
441  output << "create = " << param.create << std::endl;
442  return output; // for multiple << operators.
443  }
444 
446 
447  if (a.Precision() == QUDA_HALF_PRECISION)
448  errorQuda("Casting a CloverField into ColorSpinorField not possible in half precision");
449 
450  ColorSpinorParam spinor_param;
451  // 72 = 9 * 4 * 2
452  spinor_param.nColor = 9;
453  spinor_param.nSpin = 4;
454  spinor_param.nDim = a.Ndim();
455  for (int d=0; d<a.Ndim(); d++) spinor_param.x[d] = a.X()[d];
456  spinor_param.setPrecision(a.Precision());
457  spinor_param.pad = a.Pad();
458  spinor_param.siteSubset = QUDA_FULL_SITE_SUBSET;
459  spinor_param.siteOrder = QUDA_EVEN_ODD_SITE_ORDER;
460  spinor_param.fieldOrder = a.Precision() == QUDA_DOUBLE_PRECISION ?
462  spinor_param.gammaBasis = QUDA_UKQCD_GAMMA_BASIS;
463  spinor_param.create = QUDA_REFERENCE_FIELD_CREATE;
464  spinor_param.v = (void*)a.V(inverse);
465  spinor_param.location = a.Location();
466  return spinor_param;
467  }
468 
469  // Return the L2 norm squared of the clover field
470  double norm2(const CloverField &a, bool inverse) {
472  double nrm2 = blas::norm2(*b);
473  delete b;
474  return nrm2;
475  }
476 
477  // Return the L1 norm of the clover field
478  double norm1(const CloverField &a, bool inverse) {
480  double nrm1 = blas::norm1(*b);
481  delete b;
482  return nrm1;
483  }
484 
485 } // namespace quda
virtual ~CloverField()
QudaCloverFieldOrder order
Definition: clover_field.h:21
void setRho(double rho)
Bakes in the rho factor into the clover field, (for real diagonal additive Hasenbusch), e.g., A + rho.
#define qudaMemcpy(dst, src, count, kind)
Definition: quda_cuda_api.h:33
QudaFieldLocation reorder_location()
Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the env...
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
cudaCloverField(const CloverFieldParam &param)
#define pool_pinned_free(ptr)
Definition: malloc_quda.h:128
cudaDeviceProp deviceProp
void * V(bool inverse=false)
Definition: clover_field.h:74
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
#define errorQuda(...)
Definition: util_quda.h:121
double norm2(const ColorSpinorField &a)
Definition: reduce_quda.cu:721
QudaFieldCreate create
Definition: clover_field.h:22
size_t Bytes() const
Definition: clover_field.h:98
void saveCPUField(cpuCloverField &cpu) const
#define host_free(ptr)
Definition: malloc_quda.h:71
void loadCPUField(const cpuCloverField &cpu)
static ColorSpinorField * Create(const ColorSpinorParam &param)
QudaPrecision precision
Definition: lattice_field.h:51
QudaCloverFieldOrder order
Definition: clover_field.h:65
double norm2() const
Compute the L2 norm squared of the field.
Definition: max_clover.cu:59
CloverField(const CloverFieldParam &param)
QudaSiteSubset siteSubset
Definition: lattice_field.h:71
QudaFieldCreate create
Definition: clover_field.h:66
std::ostream & operator<<(std::ostream &output, const CloverFieldParam &param)
bool is_aligned(const void *ptr, size_t alignment)
Definition: malloc_quda.h:57
QudaGaugeParam param
Definition: pack_test.cpp:17
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:67
#define qudaDeviceSynchronize()
QudaFieldLocation location
void checkField(const LatticeField &a) const
const int nColor
Definition: covdev_test.cpp:75
#define ALIGNMENT_ADJUST(n)
Definition: quda_internal.h:22
void compute(const cudaGaugeField &gauge)
#define pool_device_malloc(size)
Definition: malloc_quda.h:125
cpuCloverField(const CloverFieldParam &param)
bool isNative() const
#define safe_malloc(size)
Definition: malloc_quda.h:66
void copy(const CloverField &src, bool inverse=true)
Copy into this CloverField from the generic CloverField src.
void * memset(void *s, int c, size_t n)
QudaFieldLocation Location() const
#define pool_pinned_malloc(size)
Definition: malloc_quda.h:127
__device__ __host__ Matrix< T, 3 > inverse(const Matrix< T, 3 > &u)
Definition: quda_matrix.h:611
double norm1(const ColorSpinorField &b)
Definition: reduce_quda.cu:714
bool twisted
Clover coefficient.
Definition: clover_field.h:17
size_t NormBytes() const
Definition: clover_field.h:103
void * Norm(bool inverse=false)
Definition: clover_field.h:75
ColorSpinorParam colorSpinorParam(const CloverField &a, bool inverse)
double norm1() const
Compute the L1 norm of the field.
Definition: max_clover.cu:49
#define pool_device_free(ptr)
Definition: malloc_quda.h:126
#define checkCudaError()
Definition: util_quda.h:161
QudaPrecision Precision() const
void computeClover(CloverField &clover, const GaugeField &gauge, double coeff, QudaFieldLocation location)
Definition: clover_quda.cu:204
QudaPrecision precision
unsigned long long bytes
Definition: blas_quda.cu:23
void copyGenericClover(CloverField &out, const CloverField &in, bool inverse, QudaFieldLocation location, void *Out=0, void *In=0, void *outNorm=0, void *inNorm=0)
This generic function is used for copying the clover field where in the input and output can be in an...
Definition: copy_clover.cu:175
const int * X() const