QUDA  v1.1.0
A library for QCD on GPUs
color_spinor_field.cpp
Go to the documentation of this file.
1 #include <color_spinor_field.h>
2 #include <string.h>
3 #include <iostream>
4 #include <typeinfo>
5 
6 namespace quda {
7 
8  /*ColorSpinorField::ColorSpinorField() : init(false) {
9 
10  }*/
11 
13  field.fill(*this);
14  }
15 
17  : LatticeField(param), init(false), ghost_precision_allocated(QUDA_INVALID_PRECISION), v(0), norm(0),
18  ghost( ), ghostNorm( ), ghostFace( ),
19  bytes(0), norm_bytes(0), even(0), odd(0),
20  composite_descr(param.is_composite, param.composite_dim, param.is_component, param.component_id),
21  components(0)
22  {
23  if (param.create == QUDA_INVALID_FIELD_CREATE) errorQuda("Invalid create type");
24  for (int i = 0; i < 2 * QUDA_MAX_DIM; i++) ghost_buf[i] = nullptr;
25  create(param.nDim, param.x, param.nColor, param.nSpin, param.nVec, param.twistFlavor, param.Precision(), param.pad,
26  param.siteSubset, param.siteOrder, param.fieldOrder, param.gammaBasis, param.pc_type, param.suggested_parity);
27  }
28 
30  : LatticeField(field), init(false), ghost_precision_allocated(QUDA_INVALID_PRECISION), v(0), norm(0),
31  ghost( ), ghostNorm( ), ghostFace( ),
32  bytes(0), norm_bytes(0), even(0), odd(0),
33  composite_descr(field.composite_descr), components(0)
34  {
35  for (int i = 0; i < 2 * QUDA_MAX_DIM; i++) ghost_buf[i] = nullptr;
36  create(field.nDim, field.x, field.nColor, field.nSpin, field.nVec, field.twistFlavor, field.Precision(), field.pad,
37  field.siteSubset, field.siteOrder, field.fieldOrder, field.gammaBasis, field.pc_type, field.suggested_parity);
38  }
39 
41  destroy();
42  }
43 
44  void ColorSpinorField::createGhostZone(int nFace, bool spin_project) const
45  {
46  if ( typeid(*this) == typeid(cpuColorSpinorField) || ghost_precision_allocated == ghost_precision ) return;
47 
49  int nSpinGhost = (nSpin == 4 && spin_project) ? 2 : nSpin;
50  size_t site_size = nSpinGhost * nColor * 2 * ghost_precision + (is_fixed ? sizeof(float) : 0);
51 
52  // calculate size of ghost zone required
53  int ghost_volume = 0;
54  int dims = nDim == 5 ? (nDim - 1) : nDim;
55  int x5 = nDim == 5 ? x[4] : 1;
56  const int ghost_align
57  = 1; // TODO perhaps in the future we should align each ghost dim/dir, e.g., along 32-byte boundaries
58  ghost_bytes = 0;
59  for (int i=0; i<dims; i++) {
60  ghostFace[i] = 0;
61  if (comm_dim_partitioned(i)) {
62  ghostFace[i] = 1;
63  for (int j=0; j<dims; j++) {
64  if (i==j) continue;
65  ghostFace[i] *= x[j];
66  }
67  ghostFace[i] *= x5; // temporary hack : extra dimension for DW ghosts
68  if (i == 0 && siteSubset != QUDA_FULL_SITE_SUBSET) ghostFace[i] /= 2;
69  ghost_volume += 2 * nFace * ghostFace[i];
70  }
71 
72  ghost_face_bytes[i] = nFace * ghostFace[i] * site_size;
73  ghost_face_bytes_aligned[i] = ((ghost_face_bytes[i] + ghost_align - 1) / ghost_align) * ghost_align;
74  ghost_offset[i][0] = i == 0 ? 0 : ghost_offset[i - 1][0] + 2 * ghost_face_bytes_aligned[i - 1];
77 
79  } // dim
80 
82 
83  { // compute temporaries needed by dslash and packing kernels
84  auto &X = dslash_constant.X;
85  for (int dim=0; dim<nDim; dim++) X[dim] = x[dim];
86  for (int dim=nDim; dim<QUDA_MAX_DIM; dim++) X[dim] = 1;
87  if (siteSubset == QUDA_PARITY_SITE_SUBSET) X[0] = 2*X[0];
88 
89  for (int i=0; i<nDim; i++) dslash_constant.Xh[i] = X[i]/2;
90 
91  dslash_constant.Ls = X[4];
92  dslash_constant.volume_4d_cb = volumeCB / (nDim == 5 ? x[4] : 1);
94 
95  int face[4];
96  for (int dim=0; dim<4; dim++) {
97  for (int j=0; j<4; j++) face[j] = X[j];
98  face[dim] = nFace;
99  dslash_constant.face_X[dim] = face[0];
100  dslash_constant.face_Y[dim] = face[1];
101  dslash_constant.face_Z[dim] = face[2];
102  dslash_constant.face_T[dim] = face[3];
106  }
107 
108  dslash_constant.Vh = (X[3]*X[2]*X[1]*X[0])/2;
109  dslash_constant.ghostFace[0] = X[1] * X[2] * X[3];
110  dslash_constant.ghostFace[1] = X[0] * X[2] * X[3];
111  dslash_constant.ghostFace[2] = X[0] * X[1] * X[3];
112  dslash_constant.ghostFace[3] = X[0] * X[1] * X[2];
113  for (int d = 0; d < 4; d++) dslash_constant.ghostFaceCB[d] = dslash_constant.ghostFace[d] / 2;
114 
115  dslash_constant.X2X1 = X[1]*X[0];
116  dslash_constant.X3X2X1 = X[2]*X[1]*X[0];
117  dslash_constant.X4X3X2X1 = X[3] * X[2] * X[1] * X[0];
118  dslash_constant.X2X1mX1 = (X[1]-1)*X[0];
119  dslash_constant.X3X2X1mX2X1 = (X[2]-1)*X[1]*X[0];
120  dslash_constant.X4X3X2X1mX3X2X1 = (X[3]-1)*X[2]*X[1]*X[0];
121  dslash_constant.X5X4X3X2X1mX4X3X2X1 = (X[4] - 1) * X[3] * X[2] * X[1] * X[0];
123 
124  // used by indexFromFaceIndexStaggered
125  dslash_constant.dims[0][0]=X[1];
126  dslash_constant.dims[0][1]=X[2];
127  dslash_constant.dims[0][2]=X[3];
128 
129  dslash_constant.dims[1][0]=X[0];
130  dslash_constant.dims[1][1]=X[2];
131  dslash_constant.dims[1][2]=X[3];
132 
133  dslash_constant.dims[2][0]=X[0];
134  dslash_constant.dims[2][1]=X[1];
135  dslash_constant.dims[2][2]=X[3];
136 
137  dslash_constant.dims[3][0]=X[0];
138  dslash_constant.dims[3][1]=X[1];
139  dslash_constant.dims[3][2]=X[2];
140  }
142 
143  } // createGhostZone
144 
145  void ColorSpinorField::create(int Ndim, const int *X, int Nc, int Ns, int Nvec, QudaTwistFlavorType Twistflavor,
146  QudaPrecision Prec, int Pad, QudaSiteSubset siteSubset, QudaSiteOrder siteOrder,
147  QudaFieldOrder fieldOrder, QudaGammaBasis gammaBasis, QudaPCType pc_type,
148  QudaParity suggested_parity)
149  {
150  this->siteSubset = siteSubset;
151  this->siteOrder = siteOrder;
152  this->fieldOrder = fieldOrder;
153  this->gammaBasis = gammaBasis;
154 
155  if (Ndim > QUDA_MAX_DIM){
156  errorQuda("Number of dimensions nDim = %d too great", Ndim);
157  }
158  nDim = Ndim;
159  nColor = Nc;
160  nSpin = Ns;
161  nVec = Nvec;
162  twistFlavor = Twistflavor;
163 
164  this->pc_type = pc_type;
165  this->suggested_parity = suggested_parity;
166 
167  precision = Prec;
168  // Copy all data in X
169  for (int d = 0; d < QUDA_MAX_DIM; d++) x[d] = X[d];
170  volume = 1;
171  for (int d=0; d<nDim; d++) {
172  volume *= x[d];
173  }
175 
177  errorQuda("Must be two flavors for non-degenerate twisted mass spinor (while provided with %d number of components)\n", x[4]);//two flavors
178 
179  pad = Pad;
181  stride = volume/2 + pad; // padding is based on half volume
182  length = 2*stride*nColor*nSpin*2;
183  } else {
184  stride = volume + pad;
186  }
187 
188  real_length = volume*nColor*nSpin*2; // physical length
189 
190  bytes = (size_t)length * precision; // includes pads and ghost zones
192 
194  norm_bytes = (siteSubset == QUDA_FULL_SITE_SUBSET ? 2*stride : stride) * sizeof(float);
196  } else {
197  norm_bytes = 0;
198  }
199 
200  init = true;
201 
204 
205  if (composite_descr.is_component) errorQuda("\nComposite type is not implemented.\n");
206 
214 
220 
223  } else if (composite_descr.is_component) {
224  composite_descr.dim = 0;
225 
233  }
234 
235  setTuningString();
236  }
237 
239  {
240  //LatticeField::setTuningString(); // FIXME - LatticeField needs correct dims for single-parity
241  char vol_tmp[TuneKey::volume_n];
242  int check = snprintf(vol_string, TuneKey::volume_n, "%d", x[0]);
243  if (check < 0 || check >= TuneKey::volume_n) errorQuda("Error writing volume string");
244  for (int d=1; d<nDim; d++) {
245  strcpy(vol_tmp, vol_string);
246  check = snprintf(vol_string, TuneKey::volume_n, "%sx%d", vol_tmp, x[d]);
247  if (check < 0 || check >= TuneKey::volume_n) errorQuda("Error writing volume string");
248  }
249  }
250 
251  {
252  int aux_string_n = TuneKey::aux_n / 2;
253  char aux_tmp[aux_string_n];
254  int check = snprintf(aux_string, aux_string_n, "vol=%lu,stride=%lu,precision=%d,order=%d,Ns=%d,Nc=%d", volume,
256  if (check < 0 || check >= aux_string_n) errorQuda("Error writing aux string");
258  strcpy(aux_tmp, aux_string);
259  check = snprintf(aux_string, aux_string_n, "%s,TwistFlavour=%d", aux_tmp, twistFlavor);
260  if (check < 0 || check >= aux_string_n) errorQuda("Error writing aux string");
261  }
262  }
263  }
264 
265  void ColorSpinorField::destroy() {
266  init = false;
267  }
268 
270  if (&src != this) {
272  this->composite_descr.is_composite = true;
274  this->composite_descr.is_component = false;
275  this->composite_descr.id = 0;
276  }
277  else if(src.composite_descr.is_component){
278  this->composite_descr.is_composite = false;
279  this->composite_descr.dim = 0;
280  //this->composite_descr.is_component = false;
281  //this->composite_descr.id = 0;
282  }
283 
284  create(src.nDim, src.x, src.nColor, src.nSpin, src.nVec, src.twistFlavor, src.precision, src.pad, src.siteSubset,
285  src.siteOrder, src.fieldOrder, src.gammaBasis, src.pc_type, src.suggested_parity);
286  }
287  return *this;
288  }
289 
290  // Resets the attributes of this field if param disagrees (and is defined)
292  {
293  if (param.nColor != 0) nColor = param.nColor;
294  if (param.nSpin != 0) nSpin = param.nSpin;
295  if (param.nVec != 0) nVec = param.nVec;
296  if (param.twistFlavor != QUDA_TWIST_INVALID) twistFlavor = param.twistFlavor;
297 
298  if (param.pc_type != QUDA_PC_INVALID) pc_type = param.pc_type;
299  if (param.suggested_parity != QUDA_INVALID_PARITY) suggested_parity = param.suggested_parity;
300 
301  if (param.Precision() != QUDA_INVALID_PRECISION) precision = param.Precision();
302  if (param.GhostPrecision() != QUDA_INVALID_PRECISION) ghost_precision = param.GhostPrecision();
303  if (param.nDim != 0) nDim = param.nDim;
304 
305  composite_descr.is_composite = param.is_composite;
306  composite_descr.is_component = param.is_component;
307  composite_descr.dim = param.is_composite ? param.composite_dim : 0;
308  composite_descr.id = param.component_id;
309 
310  volume = 1;
311  for (int d=0; d<nDim; d++) {
312  if (param.x[d] != 0) x[d] = param.x[d];
313  volume *= x[d];
314  }
315  volumeCB = param.siteSubset == QUDA_PARITY_SITE_SUBSET ? volume : volume/2;
316 
318  errorQuda("Must be two flavors for non-degenerate twisted mass spinor (provided with %d)\n", x[4]);
319 
320  if (param.pad != 0) pad = param.pad;
321 
322  if (param.siteSubset == QUDA_FULL_SITE_SUBSET) {
323  stride = volume/2 + pad;
324  length = 2*stride*nColor*nSpin*2;
325  } else if (param.siteSubset == QUDA_PARITY_SITE_SUBSET) {
326  stride = volume + pad;
328  } else {
329  //errorQuda("SiteSubset not defined %d", param.siteSubset);
330  //do nothing, not an error (can't remember why - need to document this sometime! )
331  }
332 
333  if (param.siteSubset != QUDA_INVALID_SITE_SUBSET) siteSubset = param.siteSubset;
334  if (param.siteOrder != QUDA_INVALID_SITE_ORDER) siteOrder = param.siteOrder;
335  if (param.fieldOrder != QUDA_INVALID_FIELD_ORDER) fieldOrder = param.fieldOrder;
336  if (param.gammaBasis != QUDA_INVALID_GAMMA_BASIS) gammaBasis = param.gammaBasis;
337 
339 
340  bytes = (size_t)length * precision; // includes pads
342 
344  norm_bytes = (siteSubset == QUDA_FULL_SITE_SUBSET ? 2*stride : stride) * sizeof(float);
346  } else {
347  norm_bytes = 0;
348  }
349 
358 
363 
366  } else {
373  }
374 
375  if (!init) errorQuda("Shouldn't be resetting a non-inited field\n");
376 
377  setTuningString();
378  }
379 
380  // Fills the param with the contents of this field
382  param.location = Location();
383  param.nColor = nColor;
384  param.nSpin = nSpin;
385  param.nVec = nVec;
386  param.twistFlavor = twistFlavor;
387  param.fieldOrder = fieldOrder;
388  param.setPrecision(precision, ghost_precision);
389  param.nDim = nDim;
390 
391  param.is_composite = composite_descr.is_composite;
392  param.composite_dim = composite_descr.dim;
393  param.is_component = false;//always either a regular spinor or a composite object
394  param.component_id = 0;
395 
396  memcpy(param.x, x, QUDA_MAX_DIM*sizeof(int));
397  param.pad = pad;
398  param.siteSubset = siteSubset;
399  param.siteOrder = siteOrder;
400  param.gammaBasis = gammaBasis;
401  param.pc_type = pc_type;
402  param.suggested_parity = suggested_parity;
403  param.create = QUDA_NULL_FIELD_CREATE;
404  }
405 
406  void ColorSpinorField::exchange(void **ghost, void **sendbuf, int nFace) const {
407 
408  // FIXME: use LatticeField MsgHandles
410  MsgHandle *mh_from_back[4];
411  MsgHandle *mh_from_fwd[4];
413  size_t bytes[4];
414 
415  const int Ninternal = 2*nColor*nSpin;
416  size_t total_bytes = 0;
417  for (int i=0; i<nDimComms; i++) {
418  bytes[i] = siteSubset*nFace*surfaceCB[i]*Ninternal*ghost_precision;
419  if (comm_dim_partitioned(i)) total_bytes += 2*bytes[i]; // 2 for fwd/bwd
420  }
421 
422  void *total_send = nullptr;
423  void *total_recv = nullptr;
424  void *send_fwd[4];
425  void *send_back[4];
426  void *recv_fwd[4];
427  void *recv_back[4];
428 
429  // leave this option in there just in case
430  bool no_comms_fill = false;
431 
432  // If this is set to false, then we are assuming that the send and
433  // ghost buffers are in a single contiguous memory space. Setting
434  // to false means we aggregate all cudaMemcpys which reduces
435  // latency.
436  bool fine_grained_memcpy = false;
437 
439  for (int i=0; i<nDimComms; i++) {
440  if (comm_dim_partitioned(i)) {
441  send_back[i] = sendbuf[2*i + 0];
442  send_fwd[i] = sendbuf[2*i + 1];
443  recv_fwd[i] = ghost[2*i + 1];
444  recv_back[i] = ghost[2*i + 0];
445  } else if (no_comms_fill) {
446  memcpy(ghost[2*i+1], sendbuf[2*i+0], bytes[i]);
447  memcpy(ghost[2*i+0], sendbuf[2*i+1], bytes[i]);
448  }
449  }
450  } else { // FIXME add GPU_COMMS support
451  if (total_bytes) {
452  total_send = pool_pinned_malloc(total_bytes);
453  total_recv = pool_pinned_malloc(total_bytes);
454  }
455  size_t offset = 0;
456  for (int i=0; i<nDimComms; i++) {
457  if (comm_dim_partitioned(i)) {
458  send_back[i] = static_cast<char*>(total_send) + offset;
459  recv_back[i] = static_cast<char*>(total_recv) + offset;
460  offset += bytes[i];
461  send_fwd[i] = static_cast<char*>(total_send) + offset;
462  recv_fwd[i] = static_cast<char*>(total_recv) + offset;
463  offset += bytes[i];
464  if (fine_grained_memcpy) {
465  qudaMemcpy(send_back[i], sendbuf[2*i + 0], bytes[i], cudaMemcpyDeviceToHost);
466  qudaMemcpy(send_fwd[i], sendbuf[2*i + 1], bytes[i], cudaMemcpyDeviceToHost);
467  }
468  } else if (no_comms_fill) {
469  qudaMemcpy(ghost[2*i+1], sendbuf[2*i+0], bytes[i], cudaMemcpyDeviceToDevice);
470  qudaMemcpy(ghost[2*i+0], sendbuf[2*i+1], bytes[i], cudaMemcpyDeviceToDevice);
471  }
472  }
473  if (!fine_grained_memcpy && total_bytes) {
474  // find first non-zero pointer
475  void *send_ptr = nullptr;
476  for (int i=0; i<nDimComms; i++) {
477  if (comm_dim_partitioned(i)) {
478  send_ptr = sendbuf[2*i];
479  break;
480  }
481  }
482  qudaMemcpy(total_send, send_ptr, total_bytes, cudaMemcpyDeviceToHost);
483  }
484  }
485 
486  for (int i=0; i<nDimComms; i++) {
487  if (!comm_dim_partitioned(i)) continue;
488  mh_send_fwd[i] = comm_declare_send_relative(send_fwd[i], i, +1, bytes[i]);
489  mh_send_back[i] = comm_declare_send_relative(send_back[i], i, -1, bytes[i]);
490  mh_from_fwd[i] = comm_declare_receive_relative(recv_fwd[i], i, +1, bytes[i]);
491  mh_from_back[i] = comm_declare_receive_relative(recv_back[i], i, -1, bytes[i]);
492  }
493 
494  for (int i=0; i<nDimComms; i++) {
495  if (comm_dim_partitioned(i)) {
496  comm_start(mh_from_back[i]);
497  comm_start(mh_from_fwd[i]);
500  }
501  }
502 
503  for (int i=0; i<nDimComms; i++) {
504  if (!comm_dim_partitioned(i)) continue;
507  comm_wait(mh_from_back[i]);
508  comm_wait(mh_from_fwd[i]);
509  }
510 
512  for (int i=0; i<nDimComms; i++) {
513  if (!comm_dim_partitioned(i)) continue;
514  if (fine_grained_memcpy) {
515  qudaMemcpy(ghost[2*i+0], recv_back[i], bytes[i], cudaMemcpyHostToDevice);
516  qudaMemcpy(ghost[2*i+1], recv_fwd[i], bytes[i], cudaMemcpyHostToDevice);
517  }
518  }
519 
520  if (!fine_grained_memcpy && total_bytes) {
521  // find first non-zero pointer
522  void *ghost_ptr = nullptr;
523  for (int i=0; i<nDimComms; i++) {
524  if (comm_dim_partitioned(i)) {
525  ghost_ptr = ghost[2*i];
526  break;
527  }
528  }
529  qudaMemcpy(ghost_ptr, total_recv, total_bytes, cudaMemcpyHostToDevice);
530  }
531 
532  if (total_bytes) {
533  pool_pinned_free(total_send);
534  pool_pinned_free(total_recv);
535  }
536  }
537 
538  for (int i=0; i<nDimComms; i++) {
539  if (!comm_dim_partitioned(i)) continue;
542  comm_free(mh_from_back[i]);
543  comm_free(mh_from_fwd[i]);
544  }
545  }
546 
547  // For kernels with precision conversion built in
549  if (a.Length() != b.Length()) {
550  errorQuda("checkSpinor: lengths do not match: %lu %lu", a.Length(), b.Length());
551  }
552 
553  if (a.Ncolor() != b.Ncolor()) {
554  errorQuda("checkSpinor: colors do not match: %d %d", a.Ncolor(), b.Ncolor());
555  }
556 
557  if (a.Nspin() != b.Nspin()) {
558  errorQuda("checkSpinor: spins do not match: %d %d", a.Nspin(), b.Nspin());
559  }
560 
561  if (a.Nvec() != b.Nvec()) {
562  errorQuda("checkSpinor: nVec does not match: %d %d", a.Nvec(), b.Nvec());
563  }
564 
565  if (a.TwistFlavor() != b.TwistFlavor()) {
566  errorQuda("checkSpinor: twist flavors do not match: %d %d", a.TwistFlavor(), b.TwistFlavor());
567  }
568  }
569 
572  errorQuda("Cannot return even subset of %d subset", siteSubset);
574  errorQuda("Cannot return even subset of QDPJIT field");
575  return *even;
576  }
577 
580  errorQuda("Cannot return odd subset of %d subset", siteSubset);
582  errorQuda("Cannot return even subset of QDPJIT field");
583  return *odd;
584  }
585 
588  errorQuda("Cannot return even subset of %d subset", siteSubset);
590  errorQuda("Cannot return even subset of QDPJIT field");
591  return *even;
592  }
593 
596  errorQuda("Cannot return odd subset of %d subset", siteSubset);
598  errorQuda("Cannot return even subset of QDPJIT field");
599  return *odd;
600  }
601 
603  if (this->IsComposite()) {
604  if (idx < this->CompositeDim()) { // setup eigenvector form the set
605  return *(dynamic_cast<ColorSpinorField*>(components[idx]));
606  }
607  else{
608  errorQuda("Incorrect component index...");
609  }
610  }
611  errorQuda("Cannot get requested component");
612  exit(-1);
613  }
614 
616  if (this->IsComposite()) {
617  if (idx < this->CompositeDim()) { // setup eigenvector form the set
618  return *(dynamic_cast<ColorSpinorField*>(components[idx]));
619  }
620  else{
621  errorQuda("Incorrect component index...");
622  }
623  }
624  errorQuda("Cannot get requested component");
625  exit(-1);
626  }
627 
628 
629  void* ColorSpinorField::Ghost(const int i) {
630  if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);
631  return ghost[i];
632  }
633 
634  const void* ColorSpinorField::Ghost(const int i) const {
635  if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);
636  return ghost[i];
637  }
638 
639 
640  void* ColorSpinorField::GhostNorm(const int i){
641  if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);
642  return ghostNorm[i];
643  }
644 
645  const void* ColorSpinorField::GhostNorm(const int i) const{
646  if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);
647  return ghostNorm[i];
648  }
649 
650  void* const* ColorSpinorField::Ghost() const {
651  return ghost_buf;
652  }
653 
654  /*
655  Convert from 1-dimensional index to the n-dimensional spatial index.
656  With full fields, we assume that the field is even-odd ordered. The
657  lattice coordinates that are computed here are full-field
658  coordinates.
659  */
660  void ColorSpinorField::LatticeIndex(int *y, int i) const {
661  int z[QUDA_MAX_DIM];
662  memcpy(z, x, QUDA_MAX_DIM*sizeof(int));
663 
664  // parity is the slowest running dimension
665  int parity = 0;
666  if (siteSubset == QUDA_FULL_SITE_SUBSET) z[0] /= 2;
667 
668  for (int d=0; d<nDim; d++) {
669  y[d] = i % z[d];
670  i /= z[d];
671  }
672 
673  parity = i;
674 
675  // convert into the full-field lattice coordinate
676  int oddBit = parity;
678  for (int d=1; d<nDim; d++) oddBit += y[d];
679  oddBit = oddBit & 1;
680  }
681  y[0] = 2*y[0] + oddBit; // compute the full x coordinate
682  }
683 
684  /*
685  Convert from n-dimensional spatial index to the 1-dimensional index.
686  With full fields, we assume that the field is even-odd ordered. The
687  input lattice coordinates are always full-field coordinates.
688  */
689  void ColorSpinorField::OffsetIndex(int &i, int *y) const {
690 
691  int parity = 0;
692  int z[QUDA_MAX_DIM];
693  memcpy(z, x, QUDA_MAX_DIM*sizeof(int));
694  int savey0 = y[0];
695 
697  for (int d=0; d<nDim; d++) parity += y[d];
698  parity = parity & 1;
699  y[0] /= 2;
700  z[0] /= 2;
701  }
702 
703  i = parity;
704  for (int d=nDim-1; d>=0; d--) {
705  i = z[d]*i + y[d];
706  //printf("z[%d]=%d y[%d]=%d ", d, z[d], d, y[d]);
707  }
708 
709  //printf("\nparity = %d\n", parity);
710 
711  if (siteSubset == QUDA_FULL_SITE_SUBSET) y[0] = savey0;
712  }
713 
715 
716  ColorSpinorField *field = nullptr;
718  field = new cpuColorSpinorField(param);
719  } else if (param.location== QUDA_CUDA_FIELD_LOCATION) {
720  field = new cudaColorSpinorField(param);
721  } else {
722  errorQuda("Invalid field location %d", param.location);
723  }
724 
725  return field;
726  }
727 
729 
730  ColorSpinorField *field = nullptr;
732  field = new cpuColorSpinorField(src, param);
733  } else if (param.location== QUDA_CUDA_FIELD_LOCATION) {
734  field = new cudaColorSpinorField(src, param);
735  } else {
736  errorQuda("Invalid field location %d", param.location);
737  }
738 
739  return field;
740  }
741 
743  {
744  if (param_.Precision() > precision)
745  errorQuda("Cannot create an alias to source with lower precision than the alias");
746  ColorSpinorParam param(param_);
748  param.v = V();
749 
750  // if norm field in the source exists, use it, else use the second
751  // half of main field for norm storage, ensuring that the start of
752  // the norm field is on an alignment boundary if we're using an
753  // internal field
754  if (param.Precision() < QUDA_SINGLE_PRECISION) {
755  auto norm_offset = (isNative() || fieldOrder == QUDA_FLOAT2_FIELD_ORDER) ?
757  0;
758  param.norm = Norm() ? Norm() : static_cast<char *>(V()) + norm_offset;
759  }
760 
761  auto alias = ColorSpinorField::Create(param);
762 
763  if (alias->Bytes() > Bytes()) errorQuda("Alias footprint %lu greater than source %lu", alias->Bytes(), Bytes());
764  if (alias->Precision() < QUDA_SINGLE_PRECISION) {
765  // check that norm does not overlap with body
766  if (static_cast<char *>(alias->V()) + alias->Bytes() > alias->Norm())
767  errorQuda("Overlap between alias body and norm");
768  // check that norm does fall off the end
769  if (static_cast<char *>(alias->Norm()) + alias->NormBytes() > static_cast<char *>(V()) + Bytes())
770  errorQuda("Norm is not contained in the srouce field");
771  }
772 
773  return alias;
774  }
775 
776  ColorSpinorField* ColorSpinorField::CreateCoarse(const int *geoBlockSize, int spinBlockSize, int Nvec,
777  QudaPrecision new_precision, QudaFieldLocation new_location,
778  QudaMemoryType new_mem_type) {
779  ColorSpinorParam coarseParam(*this);
780  for (int d=0; d<nDim; d++) coarseParam.x[d] = x[d]/geoBlockSize[d];
781 
782  int geoBlockVolume = 1;
783  for (int d = 0; d < nDim; d++) { geoBlockVolume *= geoBlockSize[d]; }
784 
785  // Detect if the "coarse" op is the Kahler-Dirac op or something else
786  // that still acts on a fine staggered ColorSpinorField
787  if (geoBlockVolume == 1 && Nvec == nColor && nSpin == 1) {
788  coarseParam.nSpin = nSpin;
789  coarseParam.nColor = nColor;
790  } else {
791  coarseParam.nSpin = (nSpin == 1) ? 2 : (nSpin / spinBlockSize); // coarsening staggered check
792  coarseParam.nColor = Nvec;
793  }
794 
795  coarseParam.siteSubset = QUDA_FULL_SITE_SUBSET; // coarse grid is always full
796  coarseParam.create = QUDA_ZERO_FIELD_CREATE;
797 
798  // if new precision is not set, use this->precision
799  new_precision = (new_precision == QUDA_INVALID_PRECISION) ? Precision() : new_precision;
800 
801  // if new location is not set, use this->location
802  new_location = (new_location == QUDA_INVALID_FIELD_LOCATION) ? Location() : new_location;
803 
804  // for GPU fields, always use native ordering to ensure coalescing
805  if (new_location == QUDA_CUDA_FIELD_LOCATION) coarseParam.fieldOrder = QUDA_FLOAT2_FIELD_ORDER;
807 
808  coarseParam.setPrecision(new_precision);
809 
810  // set where we allocate the field
811  coarseParam.mem_type = (new_mem_type != QUDA_MEMORY_INVALID) ? new_mem_type :
813 
814  ColorSpinorField *coarse = NULL;
815  if (new_location == QUDA_CPU_FIELD_LOCATION) {
816  coarse = new cpuColorSpinorField(coarseParam);
817  } else if (new_location== QUDA_CUDA_FIELD_LOCATION) {
818  coarse = new cudaColorSpinorField(coarseParam);
819  } else {
820  errorQuda("Invalid field location %d", new_location);
821  }
822 
823  return coarse;
824  }
825 
826  ColorSpinorField* ColorSpinorField::CreateFine(const int *geoBlockSize, int spinBlockSize, int Nvec,
827  QudaPrecision new_precision, QudaFieldLocation new_location,
828  QudaMemoryType new_mem_type) {
829  ColorSpinorParam fineParam(*this);
830  for (int d=0; d<nDim; d++) fineParam.x[d] = x[d] * geoBlockSize[d];
831  fineParam.nSpin = nSpin * spinBlockSize;
832  fineParam.nColor = Nvec;
833  fineParam.siteSubset = QUDA_FULL_SITE_SUBSET; // FIXME fine grid is always full
834  fineParam.create = QUDA_ZERO_FIELD_CREATE;
835 
836  // if new precision is not set, use this->precision
837  new_precision = (new_precision == QUDA_INVALID_PRECISION) ? Precision() : new_precision;
838 
839  // if new location is not set, use this->location
840  new_location = (new_location == QUDA_INVALID_FIELD_LOCATION) ? Location(): new_location;
841 
842  // for GPU fields, always use native ordering to ensure coalescing
843  if (new_location == QUDA_CUDA_FIELD_LOCATION) {
844  fineParam.setPrecision(new_precision, new_precision, true);
845  } else {
847  fineParam.setPrecision(new_precision);
848  }
849 
850  // set where we allocate the field
851  fineParam.mem_type = (new_mem_type != QUDA_MEMORY_INVALID) ? new_mem_type :
853 
854  ColorSpinorField *fine = NULL;
855  if (new_location == QUDA_CPU_FIELD_LOCATION) {
856  fine = new cpuColorSpinorField(fineParam);
857  } else if (new_location == QUDA_CUDA_FIELD_LOCATION) {
858  fine = new cudaColorSpinorField(fineParam);
859  } else {
860  errorQuda("Invalid field location %d", new_location);
861  }
862  return fine;
863  }
864 
865  std::ostream& operator<<(std::ostream &out, const ColorSpinorField &a) {
866  out << "typedid = " << typeid(a).name() << std::endl;
867  out << "nColor = " << a.nColor << std::endl;
868  out << "nSpin = " << a.nSpin << std::endl;
869  out << "twistFlavor = " << a.twistFlavor << std::endl;
870  out << "nDim = " << a.nDim << std::endl;
871  for (int d=0; d<a.nDim; d++) out << "x[" << d << "] = " << a.x[d] << std::endl;
872  out << "volume = " << a.volume << std::endl;
873  out << "pc_type = " << a.pc_type << std::endl;
874  out << "suggested_parity = " << a.suggested_parity << std::endl;
875  out << "precision = " << a.precision << std::endl;
876  out << "ghost_precision = " << a.ghost_precision << std::endl;
877  out << "pad = " << a.pad << std::endl;
878  out << "stride = " << a.stride << std::endl;
879  out << "real_length = " << a.real_length << std::endl;
880  out << "length = " << a.length << std::endl;
881  out << "bytes = " << a.bytes << std::endl;
882  out << "norm_bytes = " << a.norm_bytes << std::endl;
883  out << "siteSubset = " << a.siteSubset << std::endl;
884  out << "siteOrder = " << a.siteOrder << std::endl;
885  out << "fieldOrder = " << a.fieldOrder << std::endl;
886  out << "gammaBasis = " << a.gammaBasis << std::endl;
887  out << "Is composite = " << a.composite_descr.is_composite << std::endl;
889  {
890  out << "Composite Dim = " << a.composite_descr.dim << std::endl;
891  out << "Composite Volume = " << a.composite_descr.volume << std::endl;
892  out << "Composite Stride = " << a.composite_descr.stride << std::endl;
893  out << "Composite Length = " << a.composite_descr.length << std::endl;
894  }
895  out << "Is component = " << a.composite_descr.is_component << std::endl;
896  if(a.composite_descr.is_composite) out << "Component ID = " << a.composite_descr.id << std::endl;
897  out << "pc_type = " << a.pc_type << std::endl;
898  return out;
899  }
900 
901 } // namespace quda
void exchange(void **ghost, void **sendbuf, int nFace=1) const
void fill(ColorSpinorParam &) const
void LatticeIndex(int *y, int i) const
int ghostFace[QUDA_MAX_DIM]
void *const * Ghost() const
ColorSpinorField * CreateAlias(const ColorSpinorParam &param)
Create a field that aliases this field's storage. The alias field can use a different precision than ...
virtual ColorSpinorField & operator=(const ColorSpinorField &)
void * ghost_buf[2 *QUDA_MAX_DIM]
ColorSpinorField * even
const ColorSpinorField & Odd() const
CompositeColorSpinorFieldDescriptor composite_descr
used for deflation eigenvector sets etc.:
ColorSpinorField(const ColorSpinorField &)
QudaTwistFlavorType TwistFlavor() const
QudaTwistFlavorType twistFlavor
ColorSpinorField * CreateFine(const int *geoblockSize, int spinBlockSize, int Nvec, QudaPrecision precision=QUDA_INVALID_PRECISION, QudaFieldLocation location=QUDA_INVALID_FIELD_LOCATION, QudaMemoryType mem_type=QUDA_MEMORY_INVALID)
Create a fine color-spinor field, using this field to set the meta data.
CompositeColorSpinorField components
void * ghost[2][QUDA_MAX_DIM]
static ColorSpinorField * Create(const ColorSpinorParam &param)
void reset(const ColorSpinorParam &)
void createGhostZone(int nFace, bool spin_project=true) const
static void checkField(const ColorSpinorField &, const ColorSpinorField &)
QudaPrecision ghost_precision_allocated
ColorSpinorField * CreateCoarse(const int *geoBlockSize, int spinBlockSize, int Nvec, QudaPrecision precision=QUDA_INVALID_PRECISION, QudaFieldLocation location=QUDA_INVALID_FIELD_LOCATION, QudaMemoryType mem_Type=QUDA_MEMORY_INVALID)
Create a coarse color-spinor field, using this field to set the meta data.
void OffsetIndex(int &i, int *y) const
void * GhostNorm(const int i)
const ColorSpinorField & Even() const
ColorSpinorField & Component(const int idx) const
int ghostFaceCB[QUDA_MAX_DIM]
void * ghostNorm[2][QUDA_MAX_DIM]
void setTuningString()
Set the vol_string and aux_string for use in tuning.
const int * X() const
DslashConstant dslash_constant
ColorSpinorField * odd
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
MsgHandle * mh_send_fwd[2][QUDA_MAX_DIM]
size_t ghost_offset[QUDA_MAX_DIM][2]
QudaPrecision ghost_precision
QudaPrecision Precision() const
QudaFieldLocation Location() const
QudaPrecision precision
size_t ghost_face_bytes[QUDA_MAX_DIM]
char aux_string[TuneKey::aux_n]
char vol_string[TuneKey::volume_n]
int surfaceCB[QUDA_MAX_DIM]
MsgHandle * mh_send_back[2][QUDA_MAX_DIM]
size_t ghost_face_bytes_aligned[QUDA_MAX_DIM]
void comm_start(MsgHandle *mh)
int comm_dim_partitioned(int dim)
#define comm_declare_receive_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:82
void comm_wait(MsgHandle *mh)
void comm_free(MsgHandle *&mh)
#define comm_declare_send_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:67
std::array< int, 4 > dim
QudaParity parity
Definition: covdev_test.cpp:40
enum QudaSiteOrder_s QudaSiteOrder
enum QudaPrecision_s QudaPrecision
@ QUDA_CUDA_FIELD_LOCATION
Definition: enum_quda.h:326
@ QUDA_CPU_FIELD_LOCATION
Definition: enum_quda.h:325
@ QUDA_INVALID_FIELD_LOCATION
Definition: enum_quda.h:327
enum QudaTwistFlavorType_s QudaTwistFlavorType
@ QUDA_INVALID_SITE_SUBSET
Definition: enum_quda.h:334
@ QUDA_FULL_SITE_SUBSET
Definition: enum_quda.h:333
@ QUDA_PARITY_SITE_SUBSET
Definition: enum_quda.h:332
@ QUDA_INVALID_GAMMA_BASIS
Definition: enum_quda.h:371
enum QudaPCType_s QudaPCType
@ QUDA_INVALID_PARITY
Definition: enum_quda.h:284
@ QUDA_MEMORY_PINNED
Definition: enum_quda.h:14
@ QUDA_MEMORY_DEVICE
Definition: enum_quda.h:13
@ QUDA_MEMORY_INVALID
Definition: enum_quda.h:16
enum QudaFieldOrder_s QudaFieldOrder
enum QudaSiteSubset_s QudaSiteSubset
enum QudaFieldLocation_s QudaFieldLocation
@ QUDA_INVALID_SITE_ORDER
Definition: enum_quda.h:342
enum QudaMemoryType_s QudaMemoryType
@ QUDA_SINGLE_PRECISION
Definition: enum_quda.h:64
@ QUDA_INVALID_PRECISION
Definition: enum_quda.h:66
@ QUDA_QUARTER_PRECISION
Definition: enum_quda.h:62
@ QUDA_HALF_PRECISION
Definition: enum_quda.h:63
@ QUDA_PC_INVALID
Definition: enum_quda.h:397
@ QUDA_INVALID_FIELD_ORDER
Definition: enum_quda.h:356
@ QUDA_FLOAT2_FIELD_ORDER
Definition: enum_quda.h:348
@ QUDA_QDPJIT_FIELD_ORDER
Definition: enum_quda.h:353
@ QUDA_SPACE_SPIN_COLOR_FIELD_ORDER
Definition: enum_quda.h:351
enum QudaGammaBasis_s QudaGammaBasis
@ QUDA_ZERO_FIELD_CREATE
Definition: enum_quda.h:361
@ QUDA_INVALID_FIELD_CREATE
Definition: enum_quda.h:364
@ QUDA_REFERENCE_FIELD_CREATE
Definition: enum_quda.h:363
@ QUDA_NULL_FIELD_CREATE
Definition: enum_quda.h:360
@ QUDA_TWIST_NO
Definition: enum_quda.h:403
@ QUDA_TWIST_INVALID
Definition: enum_quda.h:404
@ QUDA_TWIST_NONDEG_DOUBLET
Definition: enum_quda.h:401
@ QUDA_TWIST_DEG_DOUBLET
Definition: enum_quda.h:402
enum QudaParity_s QudaParity
#define pool_pinned_malloc(size)
Definition: malloc_quda.h:172
#define pool_pinned_free(ptr)
Definition: malloc_quda.h:173
void init()
Create the BLAS context.
unsigned long long bytes
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
std::ostream & operator<<(std::ostream &output, const CloverFieldParam &param)
QudaGaugeParam param
Definition: pack_test.cpp:18
#define qudaMemcpy(dst, src, count, kind)
Definition: quda_api.h:204
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5.
#define ALIGNMENT_ADJUST(n)
Definition: quda_internal.h:42
QudaFieldLocation location
Definition: quda.h:33
int_fastdiv Xh[QUDA_MAX_DIM]
int ghostFace[QUDA_MAX_DIM+1]
int ghostFaceCB[QUDA_MAX_DIM+1]
int_fastdiv X[QUDA_MAX_DIM]
int_fastdiv dims[4][3]
QudaMemoryType mem_type
Definition: lattice_field.h:74
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:68
QudaSiteSubset siteSubset
Definition: lattice_field.h:72
QudaPrecision Precision() const
Definition: lattice_field.h:59
static const int aux_n
Definition: tune_key.h:12
static const int volume_n
Definition: tune_key.h:10
#define errorQuda(...)
Definition: util_quda.h:120