QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
clover_field.cpp
Go to the documentation of this file.
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <math.h>
5 #include <typeinfo>
6 
7 #include <quda_internal.h>
8 #include <clover_field.h>
9 #include <gauge_field.h>
10 
11 namespace quda {
12 
14  LatticeField(param), bytes(0), norm_bytes(0), nColor(3), nSpin(4),
15  clover(0), norm(0), cloverInv(0), invNorm(0), order(param.order), create(param.create),
16  trlog(static_cast<double*>(pinned_malloc(2*sizeof(double))))
17  {
18  if (nDim != 4) errorQuda("Number of dimensions must be 4, not %d", nDim);
19 
21  errorQuda("QDPJIT ordered clover fields only supported for reference fields");
22 
23  real_length = 2*volumeCB*nColor*nColor*nSpin*nSpin/2; // block-diagonal Hermitian (72 reals)
25 
28  if (precision == QUDA_HALF_PRECISION) {
29  norm_bytes = sizeof(float)*2*stride*2; // 2 chirality
31  }
32 //for twisted mass only:
33  twisted = false;//param.twisted;
34  mu2 = 0.0; //param.mu2;
35  }
36 
39  }
40 
42 
44  errorQuda("Create type %d not supported", create);
45 
46  if (param.direct) {
50  } else {
51  clover = param.clover;
52  norm = param.norm;
53  }
54 
55  even = clover;
56  odd = (char*)clover + bytes/2;
57 
58  evenNorm = norm;
59  oddNorm = (char*)norm + norm_bytes/2;
60 
62 
63  // this is a hack to prevent us allocating a texture object for an unallocated inverse field
64  if (!param.inverse) {
65  cloverInv = clover;
66  evenInv = even;
67  oddInv = odd;
68  invNorm = norm;
69  evenInvNorm = evenNorm;
70  oddInvNorm = oddNorm;
71  }
72  }
73 
74  if (param.inverse) {
78  } else {
79  cloverInv = param.cloverInv;
80  invNorm = param.invNorm;
81  }
82 
83  evenInv = cloverInv;
84  oddInv = (char*)cloverInv + bytes/2;
85 
86  evenInvNorm = invNorm;
87  oddInvNorm = (char*)invNorm + norm_bytes/2;
88 
90 
91  // this is a hack to ensure that we can autotune the clover
92  // operator when just using symmetric preconditioning
93  if (!param.direct) {
94  clover = cloverInv;
95  even = evenInv;
96  odd = oddInv;
97  norm = invNorm;
98  evenNorm = evenInvNorm;
99  oddNorm = oddInvNorm;
100  }
101  }
102 
103 #ifdef USE_TEXTURE_OBJECTS
104  createTexObject(evenTex, evenNormTex, even, evenNorm);
105  createTexObject(oddTex, oddNormTex, odd, oddNorm);
106  createTexObject(evenInvTex, evenInvNormTex, evenInv, evenInvNorm);
107  createTexObject(oddInvTex, oddInvNormTex, oddInv, oddInvNorm);
108 #endif
109  twisted = param.twisted;
110  mu2 = param.mu2;
111 
112  }
113 
114 #ifdef USE_TEXTURE_OBJECTS
115  void cudaCloverField::createTexObject(cudaTextureObject_t &tex, cudaTextureObject_t &texNorm,
116  void *field, void *norm) {
118  // create the texture for the field components
119 
120  cudaChannelFormatDesc desc;
121  memset(&desc, 0, sizeof(cudaChannelFormatDesc));
122  if (precision == QUDA_SINGLE_PRECISION) desc.f = cudaChannelFormatKindFloat;
123  else desc.f = cudaChannelFormatKindSigned; // half is short, double is int2
124 
125  // always four components regardless of precision
126  desc.x = (precision == QUDA_DOUBLE_PRECISION) ? 8*sizeof(int) : 8*precision;
127  desc.y = (precision == QUDA_DOUBLE_PRECISION) ? 8*sizeof(int) : 8*precision;
128  desc.z = (precision == QUDA_DOUBLE_PRECISION) ? 8*sizeof(int) : 8*precision;
129  desc.w = (precision == QUDA_DOUBLE_PRECISION) ? 8*sizeof(int) : 8*precision;
130 
131  cudaResourceDesc resDesc;
132  memset(&resDesc, 0, sizeof(resDesc));
133  resDesc.resType = cudaResourceTypeLinear;
134  resDesc.res.linear.devPtr = field;
135  resDesc.res.linear.desc = desc;
136  resDesc.res.linear.sizeInBytes = bytes/2;
137 
138  cudaTextureDesc texDesc;
139  memset(&texDesc, 0, sizeof(texDesc));
140  if (precision == QUDA_HALF_PRECISION) texDesc.readMode = cudaReadModeNormalizedFloat;
141  else texDesc.readMode = cudaReadModeElementType;
142 
143  cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL);
144  checkCudaError();
145 
146  // create the texture for the norm components
148  cudaChannelFormatDesc desc;
149  memset(&desc, 0, sizeof(cudaChannelFormatDesc));
150  desc.f = cudaChannelFormatKindFloat;
151  desc.x = 8*QUDA_SINGLE_PRECISION; desc.y = 0; desc.z = 0; desc.w = 0;
152 
153  cudaResourceDesc resDesc;
154  memset(&resDesc, 0, sizeof(resDesc));
155  resDesc.resType = cudaResourceTypeLinear;
156  resDesc.res.linear.devPtr = norm;
157  resDesc.res.linear.desc = desc;
158  resDesc.res.linear.sizeInBytes = norm_bytes/2;
159 
160  cudaTextureDesc texDesc;
161  memset(&texDesc, 0, sizeof(texDesc));
162  texDesc.readMode = cudaReadModeElementType;
163 
164  cudaCreateTextureObject(&texNorm, &resDesc, &texDesc, NULL);
165  checkCudaError();
166  }
167  }
168 
169  }
170 
171  void cudaCloverField::destroyTexObject() {
173  cudaDestroyTextureObject(evenTex);
174  cudaDestroyTextureObject(oddTex);
175  cudaDestroyTextureObject(evenInvTex);
176  cudaDestroyTextureObject(oddInvTex);
178  cudaDestroyTextureObject(evenNormTex);
179  cudaDestroyTextureObject(oddNormTex);
180  cudaDestroyTextureObject(evenInvNormTex);
181  cudaDestroyTextureObject(oddInvNormTex);
182  }
183  checkCudaError();
184  }
185  }
186 #endif
187 
189  {
190 #ifdef USE_TEXTURE_OBJECTS
191  destroyTexObject();
192 #endif
193 
195  if (clover != cloverInv) {
196  if (clover) device_free(clover);
197  if (norm) device_free(norm);
198  }
201  }
202 
203  checkCudaError();
204  }
205 
206  void cudaCloverField::copy(const CloverField &src, bool inverse) {
207 
208  checkField(src);
209 
210  if (typeid(src) == typeid(cudaCloverField)) {
211  if (src.V(false)) copyGenericClover(*this, src, false, QUDA_CUDA_FIELD_LOCATION);
212  if (src.V(true)) copyGenericClover(*this, src, true, QUDA_CUDA_FIELD_LOCATION);
213  } else if (typeid(src) == typeid(cpuCloverField)) {
215  void *packClover = bufferPinned[0];
216  void *packCloverNorm = (precision == QUDA_HALF_PRECISION) ? (char*)bufferPinned[0] + bytes : 0;
217 
218  if (src.V(false)) {
219  copyGenericClover(*this, src, false, QUDA_CPU_FIELD_LOCATION, packClover, 0, packCloverNorm, 0);
220  cudaMemcpy(clover, packClover, bytes, cudaMemcpyHostToDevice);
222  cudaMemcpy(norm, packCloverNorm, norm_bytes, cudaMemcpyHostToDevice);
223  }
224 
225  if (src.V(true) && inverse) {
226  copyGenericClover(*this, src, true, QUDA_CPU_FIELD_LOCATION, packClover, 0, packCloverNorm, 0);
227  cudaMemcpy(cloverInv, packClover, bytes, cudaMemcpyHostToDevice);
229  cudaMemcpy(invNorm, packCloverNorm, norm_bytes, cudaMemcpyHostToDevice);
230  }
231  } else {
232  errorQuda("Invalid clover field type");
233  }
234 
235  checkCudaError();
236  }
237 
239 
243  void cudaCloverField::compute(const cudaGaugeField &gauge) {
244 
245  if (gauge.Precision() != precision)
246  errorQuda("Gauge and clover precisions must match");
247 
248  computeClover(*this, gauge, 1.0, QUDA_CUDA_FIELD_LOCATION);
249 
250  }
251 
253  if (create != QUDA_REFERENCE_FIELD_CREATE) errorQuda("Create type %d not supported", create);
254 
256  clover = param.clover;
257  norm = param.norm;
258  cloverInv = param.cloverInv;
259  invNorm = param.invNorm;
260  }
261  }
262 
265  if (clover) host_free(clover);
266  if (norm) host_free(norm);
268  if (invNorm) host_free(invNorm);
269  }
270  }
271 
272  // This doesn't really live here, but is fine for the moment
273  std::ostream& operator<<(std::ostream& output, const CloverFieldParam& param)
274  {
275  output << static_cast<const LatticeFieldParam&>(param);
276  output << "direct = " << param.direct << std::endl;
277  output << "inverse = " << param.inverse << std::endl;
278  output << "clover = " << param.clover << std::endl;
279  output << "norm = " << param.norm << std::endl;
280  output << "cloverInv = " << param.cloverInv << std::endl;
281  output << "invNorm = " << param.invNorm << std::endl;
282  output << "twisted = " << param.twisted << std::endl;
283  output << "mu2 = " << param.mu2 << std::endl;
284  output << "order = " << param.order << std::endl;
285  output << "create = " << param.create << std::endl;
286  return output; // for multiple << operators.
287  }
288 
289 } // namespace quda
virtual ~CloverField()
QudaCloverFieldOrder order
Definition: clover_field.h:21
cudaCloverField(const CloverFieldParam &param)
#define pinned_malloc(size)
Definition: malloc_quda.h:26
void * V(bool inverse=false)
Definition: clover_field.h:59
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
Definition: complex_quda.h:859
#define errorQuda(...)
Definition: util_quda.h:73
QudaFieldCreate create
Definition: clover_field.h:22
#define host_free(ptr)
Definition: malloc_quda.h:29
void loadCPUField(const cpuCloverField &cpu)
QudaCloverFieldOrder order
Definition: clover_field.h:50
CloverField(const CloverFieldParam &param)
QudaFieldCreate create
Definition: clover_field.h:51
std::ostream & operator<<(std::ostream &output, const CloverFieldParam &param)
QudaGaugeParam param
Definition: pack_test.cpp:17
QudaPrecision Precision() const
#define ALIGNMENT_ADJUST(n)
Definition: quda_internal.h:33
void checkField(const LatticeField &)
cpuCloverField(const CloverFieldParam &param)
void copy(const CloverField &src, bool inverse=true)
void * memset(void *s, int c, size_t n)
#define device_malloc(size)
Definition: malloc_quda.h:24
#define checkCudaError()
Definition: util_quda.h:110
void computeClover(CloverField &clover, const GaugeField &gauge, double coeff, QudaFieldLocation location)
Definition: clover_quda.cu:602
QudaPrecision precision
Definition: lattice_field.h:84
void * gauge[4]
Definition: su3_test.cpp:15
void resizeBufferPinned(size_t bytes, const int index=0) const
static void * bufferPinned[2]
Definition: lattice_field.h:90
void copyGenericClover(CloverField &out, const CloverField &in, bool inverse, QudaFieldLocation location, void *Out=0, void *In=0, void *outNorm=0, void *inNorm=0)
Definition: copy_clover.cu:182
#define device_free(ptr)
Definition: malloc_quda.h:28