QUDA  0.9.0
color_spinor_field.cpp
Go to the documentation of this file.
1 #include <color_spinor_field.h>
2 #include <string.h>
3 #include <iostream>
4 #include <typeinfo>
5 
6 namespace quda {
7 
8  /*ColorSpinorField::ColorSpinorField() : init(false) {
9 
10  }*/
11 
13  field.fill(*this);
14  }
15 
17  : LatticeField(param), init(false), init_ghost_zone(false), v(0), norm(0),
18  ghost( ), ghostNorm( ), ghostFace( ),
19  bytes(0), norm_bytes(0), even(0), odd(0),
20  composite_descr(param.is_composite, param.composite_dim, param.is_component, param.component_id),
21  components(0)
22  {
23  create(param.nDim, param.x, param.nColor, param.nSpin, param.twistFlavor,
24  param.precision, param.pad, param.siteSubset, param.siteOrder,
25  param.fieldOrder, param.gammaBasis, param.PCtype);
26  }
27 
29  : LatticeField(field), init(false), init_ghost_zone(false), v(0), norm(0),
30  ghost( ), ghostNorm( ), ghostFace( ),
31  bytes(0), norm_bytes(0), even(0), odd(0),
32  composite_descr(field.composite_descr), components(0)
33  {
34  create(field.nDim, field.x, field.nColor, field.nSpin, field.twistFlavor,
35  field.precision, field.pad, field.siteSubset, field.siteOrder,
36  field.fieldOrder, field.gammaBasis, field.PCtype);
37  }
38 
40  destroy();
41  }
42 
43  void ColorSpinorField::createGhostZone(int nFace, bool spin_project) const {
44 
45  // note need to check for ghost_precision switch when merged into feature/multigrid
46  if (typeid(*this) == typeid(cpuColorSpinorField) || init_ghost_zone) return;
47 
48  // For Wilson we half the number of effective faces if the fields are spin projected.
49  int num_faces = ((nSpin == 4 && spin_project) ? 1 : 2) * nFace;
50  int num_norm_faces = 2*nFace;
51 
52  // calculate size of ghost zone required
53  int ghostVolume = 0;
54  int dims = nDim == 5 ? (nDim - 1) : nDim;
55  int x5 = nDim == 5 ? x[4] : 1;
56  for (int i=0; i<dims; i++) {
57  ghostFace[i] = 0;
58  if (comm_dim_partitioned(i)) {
59  ghostFace[i] = 1;
60  for (int j=0; j<dims; j++) {
61  if (i==j) continue;
62  ghostFace[i] *= x[j];
63  }
64  ghostFace[i] *= x5;
65  if (i==0 && siteSubset != QUDA_FULL_SITE_SUBSET) ghostFace[i] /= 2;
66  ghostVolume += ghostFace[i];
67  }
68  if (i==0) {
69  ghostOffset[i][0] = 0;
70  } else {
72  ghostOffset[i][0] = (ghostNormOffset[i-1][1] + num_norm_faces*ghostFace[i-1]/2)*sizeof(float)/sizeof(short);
73  // Ensure that start of ghostOffset is aligned on four word boundaries (check if this is needed)
74  ghostOffset[i][0] = 4*((ghostOffset[i][0] + 3)/4);
75  } else {
76  ghostOffset[i][0] = ghostOffset[i-1][0] + num_faces*ghostFace[i-1]*nSpin*nColor*2;
77  }
78  }
79 
81  ghostNormOffset[i][0] = (ghostOffset[i][0] + (num_faces*ghostFace[i]*nSpin*nColor*2/2))*sizeof(short)/sizeof(float);
82  ghostOffset[i][1] = (ghostNormOffset[i][0] + num_norm_faces*ghostFace[i]/2)*sizeof(float)/sizeof(short);
83  // Ensure that start of ghostOffset is aligned on four word boundaries (check if this is needed)
84  ghostOffset[i][1] = 4*((ghostOffset[i][1] + 3)/4);
85  ghostNormOffset[i][1] = (ghostOffset[i][1] + (num_faces*ghostFace[i]*nSpin*nColor*2/2))*sizeof(short)/sizeof(float);
86  } else {
87  ghostOffset[i][1] = ghostOffset[i][0] + num_faces*ghostFace[i]*nSpin*nColor*2/2;
88  }
89 
90  int Nint = nColor * nSpin * 2 / (nSpin == 4 && spin_project ? 2 : 1); // number of internal degrees of freedom
91  ghost_face_bytes[i] = nFace*ghostFace[i]*Nint*precision;
92  if (precision == QUDA_HALF_PRECISION) ghost_face_bytes[i] += nFace*ghostFace[i]*sizeof(float);
93 
94  if(GhostOffset(i,0)%FieldOrder()) errorQuda("ghostOffset(%d,0) %d is not a multiple of FloatN\n", i, GhostOffset(i,0));
95  if(GhostOffset(i,1)%FieldOrder()) errorQuda("ghostOffset(%d,1) %d is not a multiple of FloatN\n", i, GhostOffset(i,1));
96 
97  } // dim
98 
99  int ghostNormVolume = num_norm_faces * ghostVolume;
100  ghostVolume *= num_faces;
101 
102  size_t ghost_length = ghostVolume*nColor*nSpin*2;
103  size_t ghost_norm_length = (precision == QUDA_HALF_PRECISION) ? ghostNormVolume : 0;
104 
105  if (getVerbosity() == QUDA_DEBUG_VERBOSE) {
106  printfQuda("Allocated ghost volume = %d, ghost norm volume %d\n", ghostVolume, ghostNormVolume);
107  printfQuda("ghost length = %lu, ghost norm length = %lu\n", ghost_length, ghost_norm_length);
108  }
109 
110  ghost_bytes = (size_t)ghost_length*precision;
111  if (precision == QUDA_HALF_PRECISION) ghost_bytes += ghost_norm_length*sizeof(float);
113 
114  { // compute temporaries needed by dslash and packing kernels
115  auto &X = dslash_constant.X;
116  for (int dim=0; dim<nDim; dim++) X[dim] = x[dim];
117  for (int dim=nDim; dim<QUDA_MAX_DIM; dim++) X[dim] = 1;
118  if (siteSubset == QUDA_PARITY_SITE_SUBSET) X[0] = 2*X[0];
119 
120  for (int i=0; i<nDim; i++) dslash_constant.Xh[i] = X[i]/2;
121 
122  dslash_constant.Ls = X[4];
123  dslash_constant.volume_4d_cb = volumeCB / (nDim == 5 ? x[4] : 1);
125 
126  int face[4];
127  for (int dim=0; dim<4; dim++) {
128  for (int j=0; j<4; j++) face[j] = X[j];
129  face[dim] = nFace;
130  dslash_constant.face_X[dim] = face[0];
131  dslash_constant.face_Y[dim] = face[1];
132  dslash_constant.face_Z[dim] = face[2];
133  dslash_constant.face_T[dim] = face[3];
137  }
138 
139  dslash_constant.Vh = (X[3]*X[2]*X[1]*X[0])/2;
140  dslash_constant.ghostFace[0] = (X[1]*X[2]*X[3])/2;
141  dslash_constant.ghostFace[1] = (X[0]*X[2]*X[3])/2;
142  dslash_constant.ghostFace[2] = (X[0]*X[1]*X[3])/2;
143  dslash_constant.ghostFace[3] = (X[0]*X[1]*X[2])/2;
144 
145  dslash_constant.X2X1 = X[1]*X[0];
146  dslash_constant.X3X2X1 = X[2]*X[1]*X[0];
147  dslash_constant.X2X1mX1 = (X[1]-1)*X[0];
148  dslash_constant.X3X2X1mX2X1 = (X[2]-1)*X[1]*X[0];
149  dslash_constant.X4X3X2X1mX3X2X1 = (X[3]-1)*X[2]*X[1]*X[0];
151 
152  // used by indexFromFaceIndexStaggered
153  dslash_constant.dims[0][0]=X[1];
154  dslash_constant.dims[0][1]=X[2];
155  dslash_constant.dims[0][2]=X[3];
156 
157  dslash_constant.dims[1][0]=X[0];
158  dslash_constant.dims[1][1]=X[2];
159  dslash_constant.dims[1][2]=X[3];
160 
161  dslash_constant.dims[2][0]=X[0];
162  dslash_constant.dims[2][1]=X[1];
163  dslash_constant.dims[2][2]=X[3];
164 
165  dslash_constant.dims[3][0]=X[0];
166  dslash_constant.dims[3][1]=X[1];
167  dslash_constant.dims[3][2]=X[2];
168  }
169  init_ghost_zone = true;
170 
171  } // createGhostZone
172 
173  void ColorSpinorField::create(int Ndim, const int *X, int Nc, int Ns, QudaTwistFlavorType Twistflavor,
174  QudaPrecision Prec, int Pad, QudaSiteSubset siteSubset,
175  QudaSiteOrder siteOrder, QudaFieldOrder fieldOrder,
176  QudaGammaBasis gammaBasis, QudaDWFPCType DWFPC) {
177  this->siteSubset = siteSubset;
178  this->siteOrder = siteOrder;
179  this->fieldOrder = fieldOrder;
180  this->gammaBasis = gammaBasis;
181 
182  if (Ndim > QUDA_MAX_DIM){
183  errorQuda("Number of dimensions nDim = %d too great", Ndim);
184  }
185  nDim = Ndim;
186  nColor = Nc;
187  nSpin = Ns;
188  twistFlavor = Twistflavor;
189 
190  PCtype = DWFPC;
191 
192  precision = Prec;
193  volume = 1;
194  for (int d=0; d<nDim; d++) {
195  x[d] = X[d];
196  volume *= x[d];
197  }
199 
201  errorQuda("Must be two flavors for non-degenerate twisted mass spinor (while provided with %d number of components)\n", x[4]);//two flavors
202 
203  pad = Pad;
205  stride = volume/2 + pad; // padding is based on half volume
206  length = 2*stride*nColor*nSpin*2;
207  } else {
208  stride = volume + pad;
210  }
211 
212  real_length = volume*nColor*nSpin*2; // physical length
213 
214  bytes = (size_t)length * precision; // includes pads and ghost zones
216 
218  norm_bytes = (siteSubset == QUDA_FULL_SITE_SUBSET ? 2*stride : stride) * sizeof(float);
220  } else {
221  norm_bytes = 0;
222  }
223 
224  init = true;
225 
228 
229  if (composite_descr.is_component) errorQuda("\nComposite type is not implemented.\n");
230 
238 
243 
246  } else if (composite_descr.is_component) {
247  composite_descr.dim = 0;
248 
256  }
257 
258  setTuningString();
259  }
260 
262  char vol_tmp[TuneKey::volume_n];
263  int check;
264  check = snprintf(vol_string, TuneKey::volume_n, "%d", x[0]);
265  if (check < 0 || check >= TuneKey::volume_n) errorQuda("Error writing volume string");
266  for (int d=1; d<nDim; d++) {
267  strcpy(vol_tmp, vol_string);
268  check = snprintf(vol_string, TuneKey::volume_n, "%sx%d", vol_tmp, x[d]);
269  if (check < 0 || check >= TuneKey::volume_n) errorQuda("Error writing volume string");
270  }
271 
272  int aux_string_n = TuneKey::aux_n / 2;
273  char aux_tmp[aux_string_n];
274  check = snprintf(aux_string, aux_string_n, "vol=%d,stride=%d,precision=%d,Ns=%d,Nc=%d",
276  if (check < 0 || check >= aux_string_n) errorQuda("Error writing aux string");
277 
280  check = snprintf(aux_string, aux_string_n, "%s,TwistFlavour=%d", aux_tmp, twistFlavor);
281  if (check < 0 || check >= aux_string_n) errorQuda("Error writing aux string");
282  }
283  }
284 
286  init = false;
287  }
288 
290  if (&src != this) {
291  if(src.composite_descr.is_composite){
292  this->composite_descr.is_composite = true;
294  this->composite_descr.is_component = false;
295  this->composite_descr.id = 0;
296  }
297  else if(src.composite_descr.is_component){
298  this->composite_descr.is_composite = false;
299  this->composite_descr.dim = 0;
300  //this->composite_descr.is_component = false;
301  //this->composite_descr.id = 0;
302  }
303 
304  create(src.nDim, src.x, src.nColor, src.nSpin, src.twistFlavor,
305  src.precision, src.pad, src.siteSubset,
306  src.siteOrder, src.fieldOrder, src.gammaBasis, src.PCtype);
307  }
308  return *this;
309  }
310 
311  // Resets the attributes of this field if param disagrees (and is defined)
313  {
314  if (param.nColor != 0) nColor = param.nColor;
315  if (param.nSpin != 0) nSpin = param.nSpin;
316  if (param.twistFlavor != QUDA_TWIST_INVALID) twistFlavor = param.twistFlavor;
317 
318  if (param.PCtype != QUDA_PC_INVALID) PCtype = param.PCtype;
319 
320  if (param.precision != QUDA_INVALID_PRECISION) precision = param.precision;
321  if (param.nDim != 0) nDim = param.nDim;
322 
323  composite_descr.is_composite = param.is_composite;
324  composite_descr.is_component = param.is_component;
325  composite_descr.dim = param.is_composite ? param.composite_dim : 0;
326  composite_descr.id = param.component_id;
327 
328  volume = 1;
329  for (int d=0; d<nDim; d++) {
330  if (param.x[d] != 0) x[d] = param.x[d];
331  volume *= x[d];
332  }
333  volumeCB = param.siteSubset == QUDA_PARITY_SITE_SUBSET ? volume : volume/2;
334 
336  errorQuda("Must be two flavors for non-degenerate twisted mass spinor (provided with %d)\n", x[4]);
337 
338  if (param.pad != 0) pad = param.pad;
339 
340  if (param.siteSubset == QUDA_FULL_SITE_SUBSET) {
341  stride = volume/2 + pad;
342  length = 2*stride*nColor*nSpin*2;
343  } else if (param.siteSubset == QUDA_PARITY_SITE_SUBSET) {
344  stride = volume + pad;
346  } else {
347  //errorQuda("SiteSubset not defined %d", param.siteSubset);
348  //do nothing, not an error (can't remember why - need to document this sometime! )
349  }
350 
351  if (param.siteSubset != QUDA_INVALID_SITE_SUBSET) siteSubset = param.siteSubset;
352  if (param.siteOrder != QUDA_INVALID_SITE_ORDER) siteOrder = param.siteOrder;
353  if (param.fieldOrder != QUDA_INVALID_FIELD_ORDER) fieldOrder = param.fieldOrder;
354  if (param.gammaBasis != QUDA_INVALID_GAMMA_BASIS) gammaBasis = param.gammaBasis;
355 
357 
358  bytes = (size_t)length * precision; // includes pads
360 
362  norm_bytes = (siteSubset == QUDA_FULL_SITE_SUBSET ? 2*stride : stride) * sizeof(float);
364  } else {
365  norm_bytes = 0;
366  }
367 
376 
381 
384  } else {
391  }
392 
393  if (!init) errorQuda("Shouldn't be resetting a non-inited field\n");
394 
395  if (getVerbosity() >= QUDA_DEBUG_VERBOSE) {
396  printfQuda("\nPrinting out reset field\n");
397  std::cout << *this << std::endl;
398  printfQuda("\n");
399  }
400 
401  setTuningString();
402  }
403 
404  // Fills the param with the contents of this field
406  param.location = Location();
407  param.nColor = nColor;
408  param.nSpin = nSpin;
409  param.twistFlavor = twistFlavor;
410  param.precision = precision;
411  param.nDim = nDim;
412 
413  param.is_composite = composite_descr.is_composite;
414  param.composite_dim = composite_descr.dim;
415  param.is_component = false;//always either a regular spinor or a composite object
416  param.component_id = 0;
417 
418  memcpy(param.x, x, QUDA_MAX_DIM*sizeof(int));
419  param.pad = pad;
420  param.siteSubset = siteSubset;
421  param.siteOrder = siteOrder;
422  param.fieldOrder = fieldOrder;
423  param.gammaBasis = gammaBasis;
424  param.PCtype = PCtype;
426  }
427 
428  void ColorSpinorField::exchange(void **ghost, void **sendbuf, int nFace) const {
429 
430  // FIXME: use LatticeField MsgHandles
432  MsgHandle *mh_from_back[4];
433  MsgHandle *mh_from_fwd[4];
435  size_t bytes[4];
436 
437  const int Ninternal = 2*nColor*nSpin;
438  size_t total_bytes = 0;
439  for (int i=0; i<nDimComms; i++) {
440  bytes[i] = siteSubset*nFace*surfaceCB[i]*Ninternal*precision;
441  if (comm_dim_partitioned(i)) total_bytes += 2*bytes[i]; // 2 for fwd/bwd
442  }
443 
444  void *total_send = nullptr;
445  void *total_recv = nullptr;
446  void *send_fwd[4];
447  void *send_back[4];
448  void *recv_fwd[4];
449  void *recv_back[4];
450 
451  // leave this option in there just in case
452  bool no_comms_fill = false;
453 
454  // If this is set to false, then we are assuming that the send and
455  // ghost buffers are in a single contiguous memory space. Setting
456  // to false means we aggregate all cudaMemcpys which reduces
457  // latency.
458  bool fine_grained_memcpy = false;
459 
461  for (int i=0; i<nDimComms; i++) {
462  if (comm_dim_partitioned(i)) {
463  send_back[i] = sendbuf[2*i + 0];
464  send_fwd[i] = sendbuf[2*i + 1];
465  recv_fwd[i] = ghost[2*i + 1];
466  recv_back[i] = ghost[2*i + 0];
467  } else if (no_comms_fill) {
468  memcpy(ghost[2*i+1], sendbuf[2*i+0], bytes[i]);
469  memcpy(ghost[2*i+0], sendbuf[2*i+1], bytes[i]);
470  }
471  }
472  } else { // FIXME add GPU_COMMS support
473  if (total_bytes) {
474  total_send = pool_pinned_malloc(total_bytes);
475  total_recv = pool_pinned_malloc(total_bytes);
476  }
477  size_t offset = 0;
478  for (int i=0; i<nDimComms; i++) {
479  if (comm_dim_partitioned(i)) {
480  send_back[i] = static_cast<char*>(total_send) + offset;
481  recv_back[i] = static_cast<char*>(total_recv) + offset;
482  offset += bytes[i];
483  send_fwd[i] = static_cast<char*>(total_send) + offset;
484  recv_fwd[i] = static_cast<char*>(total_recv) + offset;
485  offset += bytes[i];
486  if (fine_grained_memcpy) {
487  qudaMemcpy(send_back[i], sendbuf[2*i + 0], bytes[i], cudaMemcpyDeviceToHost);
488  qudaMemcpy(send_fwd[i], sendbuf[2*i + 1], bytes[i], cudaMemcpyDeviceToHost);
489  }
490  } else if (no_comms_fill) {
491  qudaMemcpy(ghost[2*i+1], sendbuf[2*i+0], bytes[i], cudaMemcpyDeviceToDevice);
492  qudaMemcpy(ghost[2*i+0], sendbuf[2*i+1], bytes[i], cudaMemcpyDeviceToDevice);
493  }
494  }
495  if (!fine_grained_memcpy && total_bytes) {
496  // find first non-zero pointer
497  void *send_ptr = nullptr;
498  for (int i=0; i<nDimComms; i++) {
499  if (comm_dim_partitioned(i)) {
500  send_ptr = sendbuf[2*i];
501  break;
502  }
503  }
504  qudaMemcpy(total_send, send_ptr, total_bytes, cudaMemcpyDeviceToHost);
505  }
506  }
507 
508  for (int i=0; i<nDimComms; i++) {
509  if (!comm_dim_partitioned(i)) continue;
510  mh_send_fwd[i] = comm_declare_send_relative(send_fwd[i], i, +1, bytes[i]);
511  mh_send_back[i] = comm_declare_send_relative(send_back[i], i, -1, bytes[i]);
512  mh_from_fwd[i] = comm_declare_receive_relative(recv_fwd[i], i, +1, bytes[i]);
513  mh_from_back[i] = comm_declare_receive_relative(recv_back[i], i, -1, bytes[i]);
514  }
515 
516  for (int i=0; i<nDimComms; i++) {
517  if (comm_dim_partitioned(i)) {
518  comm_start(mh_from_back[i]);
519  comm_start(mh_from_fwd[i]);
522  }
523  }
524 
525  for (int i=0; i<nDimComms; i++) {
526  if (!comm_dim_partitioned(i)) continue;
529  comm_wait(mh_from_back[i]);
530  comm_wait(mh_from_fwd[i]);
531  }
532 
534  for (int i=0; i<nDimComms; i++) {
535  if (!comm_dim_partitioned(i)) continue;
536  if (fine_grained_memcpy) {
537  qudaMemcpy(ghost[2*i+0], recv_back[i], bytes[i], cudaMemcpyHostToDevice);
538  qudaMemcpy(ghost[2*i+1], recv_fwd[i], bytes[i], cudaMemcpyHostToDevice);
539  }
540  }
541 
542  if (!fine_grained_memcpy && total_bytes) {
543  // find first non-zero pointer
544  void *ghost_ptr = nullptr;
545  for (int i=0; i<nDimComms; i++) {
546  if (comm_dim_partitioned(i)) {
547  ghost_ptr = ghost[2*i];
548  break;
549  }
550  }
551  qudaMemcpy(ghost_ptr, total_recv, total_bytes, cudaMemcpyHostToDevice);
552  }
553 
554  if (total_bytes) {
555  pool_pinned_free(total_send);
556  pool_pinned_free(total_recv);
557  }
558  }
559 
560  for (int i=0; i<nDimComms; i++) {
561  if (!comm_dim_partitioned(i)) continue;
564  comm_free(mh_from_back[i]);
565  comm_free(mh_from_fwd[i]);
566  }
567  }
568 
571  if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) return true;
572  } else if (precision == QUDA_SINGLE_PRECISION ||
574  if (nSpin == 4) {
575  if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) return true;
576  } else if (nSpin == 2) {
577  if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) return true;
578  } else if (nSpin == 1) {
579  if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) return true;
580  }
581  }
582  return false;
583  }
584 
585  // For kernels with precision conversion built in
587  if (a.Length() != b.Length()) {
588  errorQuda("checkSpinor: lengths do not match: %lu %lu", a.Length(), b.Length());
589  }
590 
591  if (a.Ncolor() != b.Ncolor()) {
592  errorQuda("checkSpinor: colors do not match: %d %d", a.Ncolor(), b.Ncolor());
593  }
594 
595  if (a.Nspin() != b.Nspin()) {
596  errorQuda("checkSpinor: spins do not match: %d %d", a.Nspin(), b.Nspin());
597  }
598 
599  if (a.TwistFlavor() != b.TwistFlavor()) {
600  errorQuda("checkSpinor: twist flavors do not match: %d %d", a.TwistFlavor(), b.TwistFlavor());
601  }
602  }
603 
606  errorQuda("Cannot return even subset of %d subset", siteSubset);
608  errorQuda("Cannot return even subset of QDPJIT field");
609  return *even;
610  }
611 
614  errorQuda("Cannot return odd subset of %d subset", siteSubset);
616  errorQuda("Cannot return even subset of QDPJIT field");
617  return *odd;
618  }
619 
622  errorQuda("Cannot return even subset of %d subset", siteSubset);
624  errorQuda("Cannot return even subset of QDPJIT field");
625  return *even;
626  }
627 
630  errorQuda("Cannot return odd subset of %d subset", siteSubset);
632  errorQuda("Cannot return even subset of QDPJIT field");
633  return *odd;
634  }
635 
637  if (this->IsComposite()) {
638  if (idx < this->CompositeDim()) { // setup eigenvector form the set
639  return *(dynamic_cast<ColorSpinorField*>(components[idx]));
640  }
641  else{
642  errorQuda("Incorrect component index...");
643  }
644  }
645  errorQuda("Cannot get requested component");
646  exit(-1);
647  }
648 
650  if (this->IsComposite()) {
651  if (idx < this->CompositeDim()) { // setup eigenvector form the set
652  return *(dynamic_cast<ColorSpinorField*>(components[idx]));
653  }
654  else{
655  errorQuda("Incorrect component index...");
656  }
657  }
658  errorQuda("Cannot get requested component");
659  exit(-1);
660  }
661 
662 
663  void* ColorSpinorField::Ghost(const int i) {
664  if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);
665  return ghost[i];
666  }
667 
668  const void* ColorSpinorField::Ghost(const int i) const {
669  if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);
670  return ghost[i];
671  }
672 
673 
674  void* ColorSpinorField::GhostNorm(const int i){
675  if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);
676  return ghostNorm[i];
677  }
678 
679  const void* ColorSpinorField::GhostNorm(const int i) const{
680  if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);
681  return ghostNorm[i];
682  }
683 
684  void* const* ColorSpinorField::Ghost() const {
685  return ghost_buf;
686  }
687 
688  /*
689  Convert from 1-dimensional index to the n-dimensional spatial index.
690  With full fields, we assume that the field is even-odd ordered. The
691  lattice coordinates that are computed here are full-field
692  coordinates.
693  */
694  void ColorSpinorField::LatticeIndex(int *y, int i) const {
695  int z[QUDA_MAX_DIM];
696  memcpy(z, x, QUDA_MAX_DIM*sizeof(int));
697 
698  // parity is the slowest running dimension
699  int parity = 0;
700  if (siteSubset == QUDA_FULL_SITE_SUBSET) z[0] /= 2;
701 
702  for (int d=0; d<nDim; d++) {
703  y[d] = i % z[d];
704  i /= z[d];
705  }
706 
707  parity = i;
708 
709  // convert into the full-field lattice coordinate
710  int oddBit = parity;
712  for (int d=1; d<nDim; d++) oddBit += y[d];
713  oddBit = oddBit & 1;
714  }
715  y[0] = 2*y[0] + oddBit; // compute the full x coordinate
716  }
717 
718  /*
719  Convert from n-dimensional spatial index to the 1-dimensional index.
720  With full fields, we assume that the field is even-odd ordered. The
721  input lattice coordinates are always full-field coordinates.
722  */
723  void ColorSpinorField::OffsetIndex(int &i, int *y) const {
724 
725  int parity = 0;
726  int z[QUDA_MAX_DIM];
727  memcpy(z, x, QUDA_MAX_DIM*sizeof(int));
728  int savey0 = y[0];
729 
731  for (int d=0; d<nDim; d++) parity += y[d];
732  parity = parity & 1;
733  y[0] /= 2;
734  z[0] /= 2;
735  }
736 
737  i = parity;
738  for (int d=nDim-1; d>=0; d--) {
739  i = z[d]*i + y[d];
740  //printf("z[%d]=%d y[%d]=%d ", d, z[d], d, y[d]);
741  }
742 
743  //printf("\nparity = %d\n", parity);
744 
745  if (siteSubset == QUDA_FULL_SITE_SUBSET) y[0] = savey0;
746  }
747 
749 
750  ColorSpinorField *field = NULL;
752  field = new cpuColorSpinorField(param);
753  } else if (param.location== QUDA_CUDA_FIELD_LOCATION) {
754  field = new cudaColorSpinorField(param);
755  } else {
756  errorQuda("Invalid field location %d", param.location);
757  }
758 
759  return field;
760  }
761 
763 
764  ColorSpinorField *field = NULL;
766  field = new cpuColorSpinorField(src, param);
767  } else if (param.location== QUDA_CUDA_FIELD_LOCATION) {
768  field = new cudaColorSpinorField(src, param);
769  } else {
770  errorQuda("Invalid field location %d", param.location);
771  }
772 
773  return field;
774  }
775 
776  ColorSpinorField* ColorSpinorField::CreateCoarse(const int *geoBlockSize, int spinBlockSize, int Nvec,
777  QudaFieldLocation new_location) {
778  ColorSpinorParam coarseParam(*this);
779  for (int d=0; d<nDim; d++) coarseParam.x[d] = x[d]/geoBlockSize[d];
780  coarseParam.nSpin = nSpin / spinBlockSize; //for staggered coarseParam.nSpin = nSpin
781 
782  coarseParam.nColor = Nvec;
783  coarseParam.siteSubset = QUDA_FULL_SITE_SUBSET; // coarse grid is always full
784  coarseParam.create = QUDA_ZERO_FIELD_CREATE;
785 
786  // if new location is not set, use this->location
787  new_location = (new_location == QUDA_INVALID_FIELD_LOCATION) ? Location(): new_location;
788 
789  // for GPU fields, always use native ordering to ensure coalescing
790  if (new_location == QUDA_CUDA_FIELD_LOCATION) coarseParam.fieldOrder = QUDA_FLOAT2_FIELD_ORDER;
791 
792  ColorSpinorField *coarse = NULL;
793  if (new_location == QUDA_CPU_FIELD_LOCATION) {
794  coarse = new cpuColorSpinorField(coarseParam);
795  } else if (new_location== QUDA_CUDA_FIELD_LOCATION) {
796  coarse = new cudaColorSpinorField(coarseParam);
797  } else {
798  errorQuda("Invalid field location %d", new_location);
799  }
800 
801  return coarse;
802  }
803 
804  ColorSpinorField* ColorSpinorField::CreateFine(const int *geoBlockSize, int spinBlockSize, int Nvec,
805  QudaFieldLocation new_location) {
806  ColorSpinorParam fineParam(*this);
807  for (int d=0; d<nDim; d++) fineParam.x[d] = x[d] * geoBlockSize[d];
808  fineParam.nSpin = nSpin * spinBlockSize;
809  fineParam.nColor = Nvec;
810  fineParam.siteSubset = QUDA_FULL_SITE_SUBSET; // FIXME fine grid is always full
811  fineParam.create = QUDA_ZERO_FIELD_CREATE;
812 
813  // if new location is not set, use this->location
814  new_location = (new_location == QUDA_INVALID_FIELD_LOCATION) ? Location(): new_location;
815 
816  // for GPU fields, always use native ordering to ensure coalescing
817  if (new_location == QUDA_CUDA_FIELD_LOCATION) {
818  fineParam.fieldOrder = (fineParam.nSpin==4 && fineParam.precision!= QUDA_DOUBLE_PRECISION) ?
820  }
821 
822  ColorSpinorField *fine = NULL;
823  if (new_location == QUDA_CPU_FIELD_LOCATION) {
824  fine = new cpuColorSpinorField(fineParam);
825  } else if (new_location == QUDA_CUDA_FIELD_LOCATION) {
826  fine = new cudaColorSpinorField(fineParam);
827  } else {
828  errorQuda("Invalid field location %d", new_location);
829  }
830  return fine;
831  }
832 
833  std::ostream& operator<<(std::ostream &out, const ColorSpinorField &a) {
834  out << "typedid = " << typeid(a).name() << std::endl;
835  out << "nColor = " << a.nColor << std::endl;
836  out << "nSpin = " << a.nSpin << std::endl;
837  out << "twistFlavor = " << a.twistFlavor << std::endl;
838  out << "nDim = " << a.nDim << std::endl;
839  for (int d=0; d<a.nDim; d++) out << "x[" << d << "] = " << a.x[d] << std::endl;
840  out << "volume = " << a.volume << std::endl;
841  out << "precision = " << a.precision << std::endl;
842  out << "pad = " << a.pad << std::endl;
843  out << "stride = " << a.stride << std::endl;
844  out << "real_length = " << a.real_length << std::endl;
845  out << "length = " << a.length << std::endl;
846  out << "bytes = " << a.bytes << std::endl;
847  out << "norm_bytes = " << a.norm_bytes << std::endl;
848  out << "siteSubset = " << a.siteSubset << std::endl;
849  out << "siteOrder = " << a.siteOrder << std::endl;
850  out << "fieldOrder = " << a.fieldOrder << std::endl;
851  out << "gammaBasis = " << a.gammaBasis << std::endl;
852  out << "Is composite = " << a.composite_descr.is_composite << std::endl;
853  if(a.composite_descr.is_composite)
854  {
855  out << "Composite Dim = " << a.composite_descr.dim << std::endl;
856  out << "Composite Volume = " << a.composite_descr.volume << std::endl;
857  out << "Composite Stride = " << a.composite_descr.stride << std::endl;
858  out << "Composite Length = " << a.composite_descr.length << std::endl;
859  }
860  out << "Is component = " << a.composite_descr.is_component << std::endl;
861  if(a.composite_descr.is_composite) out << "Component ID = " << a.composite_descr.id << std::endl;
862  out << "PC type = " << a.PCtype << std::endl;
863  return out;
864  }
865 
866 } // namespace quda
#define qudaMemcpy(dst, src, count, kind)
Definition: quda_cuda_api.h:32
CompositeColorSpinorField components
void OffsetIndex(int &i, int *y) const
#define pool_pinned_free(ptr)
Definition: malloc_quda.h:116
int ghostNormOffset[QUDA_MAX_DIM][2]
enum QudaPrecision_s QudaPrecision
int snprintf(char *__str, size_t __size, const char *__format,...) __attribute__((__format__(__printf__
char aux_tmp[TuneKey::aux_n]
Definition: blas_quda.cu:58
void * ghostNorm[2][QUDA_MAX_DIM]
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
Definition: complex_quda.h:896
int ghostFace[QUDA_MAX_DIM]
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:20
const void * src
#define errorQuda(...)
Definition: util_quda.h:90
void init()
Definition: blas_quda.cu:64
enum QudaFieldOrder_s QudaFieldOrder
CompositeColorSpinorFieldDescriptor composite_descr
used for deflation eigenvector sets etc.:
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
const ColorSpinorField & Even() const
enum QudaSiteOrder_s QudaSiteOrder
const ColorSpinorField & Odd() const
static ColorSpinorField * Create(const ColorSpinorParam &param)
char * strcpy(char *__dst, const char *__src)
ColorSpinorField * CreateFine(const int *geoblockSize, int spinBlockSize, int Nvec, QudaFieldLocation location=QUDA_INVALID_FIELD_LOCATION)
QudaPrecision precision
Definition: lattice_field.h:54
DslashConstant dslash_constant
ColorSpinorField & Component(const int idx) const
int_fastdiv dims[4][3]
QudaSiteSubset siteSubset
Definition: lattice_field.h:55
void exit(int) __attribute__((noreturn))
ColorSpinorField(const ColorSpinorField &)
std::ostream & operator<<(std::ostream &output, const CloverFieldParam &param)
size_t size_t offset
QudaGaugeParam param
Definition: pack_test.cpp:17
#define b
void comm_free(MsgHandle *mh)
Definition: comm_mpi.cpp:252
int_fastdiv Xh[QUDA_MAX_DIM]
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:50
enum QudaDWFPCType_s QudaDWFPCType
ColorSpinorField * odd
int_fastdiv X[QUDA_MAX_DIM]
#define comm_declare_send_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:59
static void checkField(const ColorSpinorField &, const ColorSpinorField &)
#define comm_declare_receive_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:74
void create(int nDim, const int *x, int Nc, int Ns, QudaTwistFlavorType Twistflavor, QudaPrecision precision, int pad, QudaSiteSubset subset, QudaSiteOrder siteOrder, QudaFieldOrder fieldOrder, QudaGammaBasis gammaBasis, QudaDWFPCType PCtype)
void exchange(void **ghost, void **sendbuf, int nFace=1) const
void reset(const ColorSpinorParam &)
#define ALIGNMENT_ADJUST(n)
Definition: quda_internal.h:32
void * GhostNorm(const int i)
void comm_start(MsgHandle *mh)
Definition: comm_mpi.cpp:260
void * ghost[2][QUDA_MAX_DIM]
long unsigned int size_t
char vol_string[TuneKey::volume_n]
size_t ghost_face_bytes[QUDA_MAX_DIM]
QudaTwistFlavorType twistFlavor
void * memcpy(void *__dst, const void *__src, size_t __n)
MsgHandle * mh_send_fwd[2][QUDA_MAX_DIM]
int GhostOffset(const int i) const
virtual ColorSpinorField & operator=(const ColorSpinorField &)
enum QudaSiteSubset_s QudaSiteSubset
QudaFieldLocation Location() const
#define pool_pinned_malloc(size)
Definition: malloc_quda.h:115
ColorSpinorField * CreateCoarse(const int *geoblockSize, int spinBlockSize, int Nvec, QudaFieldLocation location=QUDA_INVALID_FIELD_LOCATION)
enum QudaFieldLocation_s QudaFieldLocation
char aux_string[TuneKey::aux_n]
QudaFieldLocation location
Definition: quda.h:27
cpuColorSpinorField * out
int ghostOffset[QUDA_MAX_DIM][2]
void createGhostZone(int nFace, bool spin_project=true) const
enum QudaGammaBasis_s QudaGammaBasis
void fill(ColorSpinorParam &) const
static const int aux_n
Definition: tune_key.h:12
int ghostFace[QUDA_MAX_DIM+1]
#define printfQuda(...)
Definition: util_quda.h:84
void LatticeIndex(int *y, int i) const
int surfaceCB[QUDA_MAX_DIM]
const int * X() const
void *const * Ghost() const
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
void comm_wait(MsgHandle *mh)
Definition: comm_mpi.cpp:266
static const int volume_n
Definition: tune_key.h:10
void * ghost_buf[2 *QUDA_MAX_DIM]
static __inline__ size_t size_t d
QudaParity parity
Definition: covdev_test.cpp:53
QudaPrecision precision
MsgHandle * mh_send_back[2][QUDA_MAX_DIM]
#define a
QudaFieldOrder FieldOrder() const
ColorSpinorField * even
unsigned long long bytes
Definition: blas_quda.cu:43
int comm_dim_partitioned(int dim)
enum QudaTwistFlavorType_s QudaTwistFlavorType