QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
color_spinor_field.cpp
Go to the documentation of this file.
1 #include <color_spinor_field.h>
2 #include <string.h>
3 #include <iostream>
4 #include <typeinfo>
5 #include <face_quda.h>
6 
7 namespace quda {
8 
9  // forward declarations
10  double normCpu(const cpuColorSpinorField &b);
11  double normCuda(const cudaColorSpinorField &b);
12 
13  /*ColorSpinorField::ColorSpinorField() : init(false) {
14 
15  }*/
16 
18  field.fill(*this);
19  }
20 
22  : LatticeField(param), init(false), v(0), norm(0), even(0), odd(0), eigenvectors(0)
23  {
24  create(param.nDim, param.x, param.nColor, param.nSpin, param.twistFlavor,
25  param.precision, param.pad, param.siteSubset, param.siteOrder,
26  param.fieldOrder, param.gammaBasis, param.PCtype, param.eigv_dim);
27  }
28 
30  : LatticeField(field), init(false), v(0), norm(0), even(0), odd(0), eigenvectors(0)
31  {
32  create(field.nDim, field.x, field.nColor, field.nSpin, field.twistFlavor,
33  field.precision, field.pad, field.siteSubset, field.siteOrder,
34  field.fieldOrder, field.gammaBasis, field.PCtype, field.eigv_dim, field.eigv_id);
35  }
36 
38  destroy();
39  }
40 
41  static bool createSpinorGhost = true;
42  void setGhostSpinor(bool value) { createSpinorGhost = value; }
43 
45 
46  if (!createSpinorGhost) {
49  return;
50  }
51 
53  printfQuda("Precision = %d, Subset = %d\n", precision, siteSubset);
54 
55  int num_faces = 1;
56  int num_norm_faces=2;
57 
58  // FIXME - this is a hack from hell that needs to be fixed. When
59  // the TIFR interface is enabled we are forcing naive staggered
60  // support which breaks asqtad/hisq fermions. The problem occurs
61  // because the ghost zone is allocated before we know which
62  // operator (and hence number of faces are needed). One solution
63  // may be to separate the ghost zone memory allocation from the
64  // field itself, which has other benefits (1. on multi-gpu
65  // machines with UVA, we can read the ghost zone directly from the
66  // neighbouring field and 2.) we can use a single contiguous
67  // buffer for the ghost zone and its norm which will reduce
68  // latency for half precision and allow us to enable GPU_COMMS
69  // support for half precision).
70 #ifdef BUILD_TIFR_INTERFACE
71  if (nSpin == 1) { //staggered
72  num_faces=2;
73  num_norm_faces=2;
74  }
75 #else
76  if (nSpin == 1) { // improved staggered
77  num_faces=6;
78  num_norm_faces=6;
79  }
80 #endif
81 
82  // calculate size of ghost zone required
83  int ghostVolume = 0;
84  //temporal hack
85  int dims = nDim == 5 ? (nDim - 1) : nDim;
86  int x5 = nDim == 5 ? x[4] : 1;
87  for (int i=0; i<dims; i++) {
88  ghostFace[i] = 0;
89  if (commDimPartitioned(i)) {
90  ghostFace[i] = 1;
91  for (int j=0; j<dims; j++) {
92  if (i==j) continue;
93  ghostFace[i] *= x[j];
94  }
95  ghostFace[i] *= x5;
96  if (i==0 && siteSubset != QUDA_FULL_SITE_SUBSET) ghostFace[i] /= 2;
98  ghostVolume += ghostFace[i];
99  }
100  if(i==0){
101  ghostOffset[i] = 0;
102  ghostNormOffset[i] = 0;
103  }else{
104  ghostOffset[i] = ghostOffset[i-1] + num_faces*ghostFace[i-1];
105  ghostNormOffset[i] = ghostNormOffset[i-1] + num_norm_faces*ghostFace[i-1];
106  }
107 
108 #ifdef MULTI_GPU
110  printfQuda("face %d = %6d commDimPartitioned = %6d ghostOffset = %6d ghostNormOffset = %6d\n",
112 #endif
113  }//end of outmost for loop
114  int ghostNormVolume = num_norm_faces * ghostVolume;
115  ghostVolume *= num_faces;
116 
118  printfQuda("Allocated ghost volume = %d, ghost norm volume %d\n", ghostVolume, ghostNormVolume);
119 
120  // ghost zones are calculated on c/b volumes
121 #ifdef MULTI_GPU
122  ghost_length = ghostVolume*nColor*nSpin*2;
123  ghost_norm_length = (precision == QUDA_HALF_PRECISION) ? ghostNormVolume : 0;
124 #else
125  ghost_length = 0;
126  ghost_norm_length = 0;
127 #endif
128 
130  total_length = length + 2*ghost_length; // 2 ghost zones in a full field
131  total_norm_length = 2*(stride + ghost_norm_length); // norm length = 2*stride
132  } else {
134  total_norm_length = (precision == QUDA_HALF_PRECISION) ? stride + ghost_norm_length : 0; // norm length = stride
135  }
136 
138 
139  if (getVerbosity() == QUDA_DEBUG_VERBOSE) {
140  printfQuda("ghost length = %d, ghost norm length = %d\n", ghost_length, ghost_norm_length);
141  printfQuda("total length = %d, total norm length = %d\n", total_length, total_norm_length);
142  }
143 
144  } // createGhostZone
145 
146  void ColorSpinorField::create(int Ndim, const int *X, int Nc, int Ns, QudaTwistFlavorType Twistflavor,
147  QudaPrecision Prec, int Pad, QudaSiteSubset siteSubset,
148  QudaSiteOrder siteOrder, QudaFieldOrder fieldOrder,
149  QudaGammaBasis gammaBasis, QudaDWFPCType DWFPC, int evdim /*= 0*/, int evid /* = -1*/) {
150  this->siteSubset = siteSubset;
151  this->siteOrder = siteOrder;
152  this->fieldOrder = fieldOrder;
153  this->gammaBasis = gammaBasis;
154 
155  if (Ndim > QUDA_MAX_DIM){
156  errorQuda("Number of dimensions nDim = %d too great", Ndim);
157  }
158  nDim = Ndim;
159  nColor = Nc;
160  nSpin = Ns;
161  twistFlavor = Twistflavor;
162 
163  PCtype = DWFPC;
164 
166  eigv_dim = evdim;
167  eigv_id = (evdim == 0) ? -1: evid;
168 
169  precision = Prec;
170  volume = 1;
171  for (int d=0; d<nDim; d++) {
172  x[d] = X[d];
173  volume *= x[d];
174  }
175  volumeCB = siteSubset == QUDA_PARITY_SITE_SUBSET ? volume : volume/2;
176 
177  if((twistFlavor == QUDA_TWIST_NONDEG_DOUBLET || twistFlavor == QUDA_TWIST_DEG_DOUBLET) && x[4] != 2) errorQuda("Must be two flavors for non-degenerate twisted mass spinor (while provided with %d number of components)\n", x[4]);//two flavors
178 
179  pad = Pad;
180  if (siteSubset == QUDA_FULL_SITE_SUBSET) {
181  stride = volume/2 + pad; // padding is based on half volume
182  length = 2*stride*nColor*nSpin*2;
183  } else {
184  stride = volume + pad;
186  }
187 
188  real_length = volume*nColor*nSpin*2; // physical length
189 
190  createGhostZone();
191 
192  bytes = total_length * precision; // includes pads and ghost zones
194 
195  norm_bytes = total_norm_length * sizeof(float);
196  norm_bytes = (siteSubset == QUDA_FULL_SITE_SUBSET) ? 2*ALIGNMENT_ADJUST(norm_bytes/2) : ALIGNMENT_ADJUST(norm_bytes);
197 
198  init = true;
199 
201  if(evdim != 0){
202 
207 //multi-gpu:
210 
213 
214  eigv_bytes = bytes;
216 
217  volume *= evdim;
218  stride *= evdim;
219  length *= evdim;
220  real_length *= evdim;
221 
222  total_length *= evdim;
223  total_norm_length *= evdim;
224 //won't be used.
225  ghost_length *= evdim;
226  ghost_norm_length *= evdim;
227 
228  bytes *= evdim;
229  norm_bytes *= evdim;
230 
231  }else{
232 
233  eigv_volume = 0;
234  eigv_stride = 0;
235  eigv_length = 0;
236  eigv_real_length = 0;
237 
238  eigv_total_length = 0;
240 
241  eigv_ghost_length = 0;
243 
244  eigv_bytes = 0;
245  eigv_norm_bytes = 0;
246 
247  }
248 
250  setTuningString();
251  }
252 
254  char vol_tmp[TuneKey::volume_n];
255  int check;
256  check = snprintf(vol_string, TuneKey::volume_n, "%d", x[0]);
257  if (check < 0 || check >= TuneKey::volume_n) errorQuda("Error writing volume string");
258  for (int d=1; d<nDim; d++) {
259  strcpy(vol_tmp, vol_string);
260  check = snprintf(vol_string, TuneKey::volume_n, "%sx%d", vol_tmp, x[d]);
261  if (check < 0 || check >= TuneKey::volume_n) errorQuda("Error writing volume string");
262  }
263 
264  int aux_string_n = TuneKey::aux_n / 2;
265  char aux_tmp[aux_string_n];
266  check = snprintf(aux_string, aux_string_n, "vol=%d,stride=%d,precision=%d", volume, stride, precision);
267  if (check < 0 || check >= aux_string_n) errorQuda("Error writing aux string");
268 
270  strcpy(aux_tmp, aux_string);
271  check = snprintf(aux_string, aux_string_n, "%s,TwistFlavour=%d", aux_tmp, twistFlavor);
272  if (check < 0 || check >= aux_string_n) errorQuda("Error writing aux string");
273  }
274  }
275 
276  void ColorSpinorField::destroy() {
277  init = false;
278  }
279 
281  if (&src != this) {
282  create(src.nDim, src.x, src.nColor, src.nSpin, src.twistFlavor,
283  src.precision, src.pad, src.siteSubset,
284  src.siteOrder, src.fieldOrder, src.gammaBasis, src.PCtype, src.eigv_dim, src.eigv_id);
285  }
286  return *this;
287  }
288 
289  // Resets the attributes of this field if param disagrees (and is defined)
291 
292  if (param.nColor != 0) nColor = param.nColor;
293  if (param.nSpin != 0) nSpin = param.nSpin;
295 
296  if (param.PCtype != QUDA_PC_INVALID) PCtype = param.PCtype;
297 
298  if (param.precision != QUDA_INVALID_PRECISION) precision = param.precision;
299  if (param.nDim != 0) nDim = param.nDim;
300 
301  if (param.eigv_dim != 0 ){
302  eigv_dim = param.eigv_dim;
303  eigv_id = param.eigv_id;
304  }
305  else
306  {
307  eigv_dim = 0;
308  eigv_id = -1;
309  }
310 
311  volume = 1;
312  for (int d=0; d<nDim; d++) {
313  if (param.x[0] != 0) x[d] = param.x[d];
314  volume *= x[d];
315  }
316  volumeCB = siteSubset == QUDA_PARITY_SITE_SUBSET ? volume : volume/2;
317 
318  if((twistFlavor == QUDA_TWIST_NONDEG_DOUBLET || twistFlavor == QUDA_TWIST_DEG_DOUBLET) && x[4] != 2) errorQuda("Must be two flavors for non-degenerate twisted mass spinor (provided with %d)\n", x[4]);
319 
320 
321  if (param.pad != 0) pad = param.pad;
322 
323  if (param.siteSubset == QUDA_FULL_SITE_SUBSET){
324  stride = volume/2 + pad;
325  length = 2*stride*nColor*nSpin*2;
326  } else if (param.siteSubset == QUDA_PARITY_SITE_SUBSET){
327  stride = volume + pad;
328  length = stride*nColor*nSpin*2;
329  } else {
330  //errorQuda("SiteSubset not defined %d", param.siteSubset);
331  //do nothing, not an error (can't remember why - need to document this sometime! )
332  }
333 
334  if (param.siteSubset != QUDA_INVALID_SITE_SUBSET) siteSubset = param.siteSubset;
335  if (param.siteOrder != QUDA_INVALID_SITE_ORDER) siteOrder = param.siteOrder;
336  if (param.fieldOrder != QUDA_INVALID_FIELD_ORDER) fieldOrder = param.fieldOrder;
337  if (param.gammaBasis != QUDA_INVALID_GAMMA_BASIS) gammaBasis = param.gammaBasis;
338 
339  createGhostZone();
340 
341  real_length = volume*nColor*nSpin*2;
342 
343  bytes = total_length * precision; // includes pads and ghost zones
345 
346  if (precision == QUDA_HALF_PRECISION) {
347  norm_bytes = total_norm_length * sizeof(float);
348  norm_bytes = (siteSubset == QUDA_FULL_SITE_SUBSET) ? 2*ALIGNMENT_ADJUST(norm_bytes/2) : ALIGNMENT_ADJUST(norm_bytes);
349  } else {
350  norm_bytes = 0;
351  }
352 
354  if(eigv_dim > 0)
355  {
356  if(eigv_id == -1)
357  {
362 
365 
368 
369  eigv_bytes = bytes;
371 
372  volume *= eigv_dim;
373  stride *= eigv_dim;
374  length *= eigv_dim;
376 
381  }
382  else if(eigv_id > -1)
383  {
384  // just to be safe...
385  eigv_volume = 0;
386  eigv_stride = 0;
387  eigv_length = 0;
388  eigv_real_length = 0;
389 
390  eigv_total_length = 0;
392 
393  eigv_ghost_length = 0;
395 
396  eigv_bytes = 0;
397  eigv_norm_bytes = 0;
398  }
399  else
400  {
401  errorQuda("\nIncorrect eigenvector index.\n");
402  }
403  }
404 
405  if (!init) errorQuda("Shouldn't be resetting a non-inited field\n");
406 
407  if (getVerbosity() >= QUDA_DEBUG_VERBOSE) {
408  printfQuda("\nPrinting out reset field\n");
409  std::cout << *this << std::endl;
410  printfQuda("\n");
411  }
412 
413  setTuningString();
414  }
415 
416  // Fills the param with the contents of this field
418  param.nColor = nColor;
419  param.nSpin = nSpin;
420  param.twistFlavor = twistFlavor;
421  param.precision = precision;
422  param.nDim = nDim;
423  param.eigv_dim = eigv_dim;
424  param.eigv_id = eigv_id;
425  memcpy(param.x, x, QUDA_MAX_DIM*sizeof(int));
426  param.pad = pad;
427  param.siteSubset = siteSubset;
428  param.siteOrder = siteOrder;
429  param.fieldOrder = fieldOrder;
430  param.gammaBasis = gammaBasis;
431  param.PCtype = PCtype;
433  }
434 
435  // For kernels with precision conversion built in
437  if (a.Length() != b.Length()) {
438  errorQuda("checkSpinor: lengths do not match: %d %d", a.Length(), b.Length());
439  }
440 
441  if (a.Ncolor() != b.Ncolor()) {
442  errorQuda("checkSpinor: colors do not match: %d %d", a.Ncolor(), b.Ncolor());
443  }
444 
445  if (a.Nspin() != b.Nspin()) {
446  errorQuda("checkSpinor: spins do not match: %d %d", a.Nspin(), b.Nspin());
447  }
448 
449  if (a.TwistFlavor() != b.TwistFlavor()) {
450  errorQuda("checkSpinor: twist flavors do not match: %d %d", a.TwistFlavor(), b.TwistFlavor());
451  }
452  }
453 
454  // Set the ghost pointers to NULL.
455  // This is a private initialisation routine.
457  {
458  for(int dim=0; dim<QUDA_MAX_DIM; ++dim){
459  ghost[dim] = NULL;
460  ghostNorm[dim] = NULL;
461  }
462  }
463 
464 
465  void* ColorSpinorField::Ghost(const int i) {
466  if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);
467  return ghost[i];
468  }
469 
470  const void* ColorSpinorField::Ghost(const int i) const {
471  if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);
472  return ghost[i];
473  }
474 
475 
476  void* ColorSpinorField::GhostNorm(const int i){
477  if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);
478  return ghostNorm[i];
479  }
480 
481  const void* ColorSpinorField::GhostNorm(const int i) const{
482  if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);
483  return ghostNorm[i];
484  }
485 
486  double norm2(const ColorSpinorField &a) {
487 
488  double rtn = 0.0;
489  if (typeid(a) == typeid(cudaColorSpinorField)) {
490  rtn = normCuda(dynamic_cast<const cudaColorSpinorField&>(a));
491  } else if (typeid(a) == typeid(cpuColorSpinorField)) {
492  rtn = normCpu(dynamic_cast<const cpuColorSpinorField&>(a));
493  } else {
494  errorQuda("Unknown input ColorSpinorField %s", typeid(a).name());
495  }
496 
497  return rtn;
498  }
499 
500  std::ostream& operator<<(std::ostream &out, const ColorSpinorField &a) {
501  out << "typedid = " << typeid(a).name() << std::endl;
502  out << "nColor = " << a.nColor << std::endl;
503  out << "nSpin = " << a.nSpin << std::endl;
504  out << "twistFlavor = " << a.twistFlavor << std::endl;
505  out << "nDim = " << a.nDim << std::endl;
506  for (int d=0; d<a.nDim; d++) out << "x[" << d << "] = " << a.x[d] << std::endl;
507  out << "volume = " << a.volume << std::endl;
508  out << "precision = " << a.precision << std::endl;
509  out << "pad = " << a.pad << std::endl;
510  out << "stride = " << a.stride << std::endl;
511  out << "real_length = " << a.real_length << std::endl;
512  out << "length = " << a.length << std::endl;
513  out << "ghost_length = " << a.ghost_length << std::endl;
514  out << "total_length = " << a.total_length << std::endl;
515  out << "ghost_norm_length = " << a.ghost_norm_length << std::endl;
516  out << "total_norm_length = " << a.total_norm_length << std::endl;
517  out << "bytes = " << a.bytes << std::endl;
518  out << "norm_bytes = " << a.norm_bytes << std::endl;
519  out << "siteSubset = " << a.siteSubset << std::endl;
520  out << "siteOrder = " << a.siteOrder << std::endl;
521  out << "fieldOrder = " << a.fieldOrder << std::endl;
522  out << "gammaBasis = " << a.gammaBasis << std::endl;
523  out << "eigDim = " << a.eigv_dim << std::endl;
524  out << "eigID = " << a.eigv_id << std::endl;
525  out << "eigVolume = " << a.eigv_volume << std::endl;
526  out << "eigStride = " << a.eigv_stride << std::endl;
527  out << "eigLength = " << a.eigv_length << std::endl;
528  out << "PC type = " << a.PCtype << std::endl;
529  return out;
530  }
531 
532 } // namespace quda
void setGhostSpinor(bool value)
enum QudaPrecision_s QudaPrecision
int commDimPartitioned(int dir)
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
Definition: complex_quda.h:859
int ghostFace[QUDA_MAX_DIM]
void * ghost[QUDA_MAX_DIM]
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:20
void * ghostNorm[QUDA_MAX_DIM]
#define errorQuda(...)
Definition: util_quda.h:73
char aux_tmp[TuneKey::aux_n]
Definition: blas_quda.cu:47
enum QudaFieldOrder_s QudaFieldOrder
void fill(ColorSpinorParam &) const
int ghostOffset[QUDA_MAX_DIM]
enum QudaSiteOrder_s QudaSiteOrder
QudaPrecision precision
Definition: lattice_field.h:41
int eigv_dim
used for eigcg:
QudaSiteSubset siteSubset
Definition: lattice_field.h:42
ColorSpinorField(const ColorSpinorField &)
std::ostream & operator<<(std::ostream &output, const CloverFieldParam &param)
QudaGaugeParam param
Definition: pack_test.cpp:17
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:38
enum QudaDWFPCType_s QudaDWFPCType
static void checkField(const ColorSpinorField &, const ColorSpinorField &)
void reset(const ColorSpinorParam &)
#define ALIGNMENT_ADJUST(n)
Definition: quda_internal.h:33
void * GhostNorm(const int i)
int ghostNormOffset[QUDA_MAX_DIM]
double normCuda(const cudaColorSpinorField &b)
Definition: reduce_quda.cu:145
char vol_string[TuneKey::volume_n]
QudaTwistFlavorType twistFlavor
virtual ColorSpinorField & operator=(const ColorSpinorField &)
void * Ghost(const int i)
enum QudaSiteSubset_s QudaSiteSubset
char aux_string[TuneKey::aux_n]
cpuColorSpinorField * out
enum QudaGammaBasis_s QudaGammaBasis
static const int aux_n
Definition: tune_key.h:12
#define printfQuda(...)
Definition: util_quda.h:67
QudaTwistFlavorType twistFlavor
double normCpu(const cpuColorSpinorField &b)
Definition: blas_cpu.cpp:166
QudaTwistFlavorType TwistFlavor() const
void init(int argc, char **argv)
Definition: dslash_test.cpp:79
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
static const int volume_n
Definition: tune_key.h:10
double norm2(const ColorSpinorField &)
enum QudaTwistFlavorType_s QudaTwistFlavorType