QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
color_spinor_field.cpp
Go to the documentation of this file.
1 #include <color_spinor_field.h>
2 #include <string.h>
3 #include <iostream>
4 #include <typeinfo>
5 
6 namespace quda {
7 
8  /*ColorSpinorField::ColorSpinorField() : init(false) {
9 
10  }*/
11 
13  field.fill(*this);
14  }
15 
17  : LatticeField(param), init(false), ghost_precision_allocated(QUDA_INVALID_PRECISION), v(0), norm(0),
18  ghost( ), ghostNorm( ), ghostFace( ),
19  bytes(0), norm_bytes(0), even(0), odd(0),
20  composite_descr(param.is_composite, param.composite_dim, param.is_component, param.component_id),
21  components(0)
22  {
23  for (int i = 0; i < 2 * QUDA_MAX_DIM; i++) ghost_buf[i] = nullptr;
24  create(param.nDim, param.x, param.nColor, param.nSpin, param.nVec, param.twistFlavor, param.Precision(), param.pad,
25  param.siteSubset, param.siteOrder, param.fieldOrder, param.gammaBasis, param.pc_type);
26  }
27 
30  ghost( ), ghostNorm( ), ghostFace( ),
31  bytes(0), norm_bytes(0), even(0), odd(0),
33  {
34  for (int i = 0; i < 2 * QUDA_MAX_DIM; i++) ghost_buf[i] = nullptr;
35  create(field.nDim, field.x, field.nColor, field.nSpin, field.nVec, field.twistFlavor, field.Precision(), field.pad,
36  field.siteSubset, field.siteOrder, field.fieldOrder, field.gammaBasis, field.pc_type);
37  }
38 
40  destroy();
41  }
42 
43  void ColorSpinorField::createGhostZone(int nFace, bool spin_project) const {
44 
45  if ( typeid(*this) == typeid(cpuColorSpinorField) || ghost_precision_allocated == ghost_precision ) return;
46 
47  // For Wilson we half the number of effective faces if the fields are spin projected.
48  int num_faces = ((nSpin == 4 && spin_project) ? 1 : 2) * nFace;
49  int num_norm_faces = 2*nFace;
50 
51  // calculate size of ghost zone required
52  int ghostVolume = 0;
53  int dims = nDim == 5 ? (nDim - 1) : nDim;
54  int x5 = nDim == 5 ? x[4] : 1;
55  for (int i=0; i<dims; i++) {
56  ghostFace[i] = 0;
57  if (comm_dim_partitioned(i)) {
58  ghostFace[i] = 1;
59  for (int j=0; j<dims; j++) {
60  if (i==j) continue;
61  ghostFace[i] *= x[j];
62  }
63  ghostFace[i] *= x5;
64  if (i==0 && siteSubset != QUDA_FULL_SITE_SUBSET) ghostFace[i] /= 2;
65  ghostVolume += ghostFace[i];
66  }
67  if (i==0) {
68  ghostOffset[i][0] = 0;
69  } else {
71  ghostOffset[i][0] = (ghostNormOffset[i-1][1] + num_norm_faces*ghostFace[i-1]/2)*sizeof(float)/ghost_precision;
72  // Ensure that start of ghostOffset is aligned on four word boundaries (check if this is needed)
73  ghostOffset[i][0] = 4*((ghostOffset[i][0] + 3)/4);
74  } else {
75  ghostOffset[i][0] = ghostOffset[i-1][0] + num_faces*ghostFace[i-1]*nSpin*nColor*2;
76  }
77  }
78 
80  ghostNormOffset[i][0] = (ghostOffset[i][0] + (num_faces*ghostFace[i]*nSpin*nColor*2/2))*ghost_precision/sizeof(float);
81  ghostOffset[i][1] = (ghostNormOffset[i][0] + num_norm_faces*ghostFace[i]/2)*sizeof(float)/ghost_precision;
82  // Ensure that start of ghostOffset is aligned on four word boundaries (check if this is needed)
83  ghostOffset[i][1] = 4*((ghostOffset[i][1] + 3)/4);
84  ghostNormOffset[i][1] = (ghostOffset[i][1] + (num_faces*ghostFace[i]*nSpin*nColor*2/2))*ghost_precision/sizeof(float);
85  } else {
86  ghostOffset[i][1] = ghostOffset[i][0] + num_faces*ghostFace[i]*nSpin*nColor*2/2;
87  }
88 
89  int Nint = nColor * nSpin * 2 / (nSpin == 4 && spin_project ? 2 : 1); // number of internal degrees of freedom
90  ghost_face_bytes[i] = nFace*ghostFace[i]*Nint*ghost_precision;
91  if (ghost_precision == QUDA_HALF_PRECISION || ghost_precision == QUDA_QUARTER_PRECISION) ghost_face_bytes[i] += nFace*ghostFace[i]*sizeof(float);
92 
93  if(GhostOffset(i,0)%FieldOrder()) errorQuda("ghostOffset(%d,0) %d is not a multiple of FloatN\n", i, GhostOffset(i,0));
94  if(GhostOffset(i,1)%FieldOrder()) errorQuda("ghostOffset(%d,1) %d is not a multiple of FloatN\n", i, GhostOffset(i,1));
95 
97  } // dim
98 
99  int ghostNormVolume = num_norm_faces * ghostVolume;
100  ghostVolume *= num_faces;
101 
102  size_t ghost_length = ghostVolume*nColor*nSpin*2;
103  size_t ghost_norm_length = (ghost_precision == QUDA_HALF_PRECISION || ghost_precision == QUDA_QUARTER_PRECISION) ? ghostNormVolume : 0;
104 
105  ghost_bytes = (size_t)ghost_length*ghost_precision;
106  if (ghost_precision == QUDA_HALF_PRECISION || ghost_precision == QUDA_QUARTER_PRECISION) ghost_bytes += ghost_norm_length*sizeof(float);
108 
109  { // compute temporaries needed by dslash and packing kernels
110  auto &X = dslash_constant.X;
111  for (int dim=0; dim<nDim; dim++) X[dim] = x[dim];
112  for (int dim=nDim; dim<QUDA_MAX_DIM; dim++) X[dim] = 1;
113  if (siteSubset == QUDA_PARITY_SITE_SUBSET) X[0] = 2*X[0];
114 
115  for (int i=0; i<nDim; i++) dslash_constant.Xh[i] = X[i]/2;
116 
117  dslash_constant.Ls = X[4];
118  dslash_constant.volume_4d_cb = volumeCB / (nDim == 5 ? x[4] : 1);
120 
121  int face[4];
122  for (int dim=0; dim<4; dim++) {
123  for (int j=0; j<4; j++) face[j] = X[j];
124  face[dim] = nFace;
125  dslash_constant.face_X[dim] = face[0];
126  dslash_constant.face_Y[dim] = face[1];
127  dslash_constant.face_Z[dim] = face[2];
128  dslash_constant.face_T[dim] = face[3];
129  dslash_constant.face_XY[dim] = dslash_constant.face_X[dim] * face[1];
130  dslash_constant.face_XYZ[dim] = dslash_constant.face_XY[dim] * face[2];
131  dslash_constant.face_XYZT[dim] = dslash_constant.face_XYZ[dim] * face[3];
132  }
133 
134  dslash_constant.Vh = (X[3]*X[2]*X[1]*X[0])/2;
135  dslash_constant.ghostFace[0] = X[1] * X[2] * X[3];
136  dslash_constant.ghostFace[1] = X[0] * X[2] * X[3];
137  dslash_constant.ghostFace[2] = X[0] * X[1] * X[3];
138  dslash_constant.ghostFace[3] = X[0] * X[1] * X[2];
139  for (int d = 0; d < 4; d++) dslash_constant.ghostFaceCB[d] = dslash_constant.ghostFace[d] / 2;
140 
141  dslash_constant.X2X1 = X[1]*X[0];
142  dslash_constant.X3X2X1 = X[2]*X[1]*X[0];
143  dslash_constant.X4X3X2X1 = X[3] * X[2] * X[1] * X[0];
144  dslash_constant.X2X1mX1 = (X[1]-1)*X[0];
145  dslash_constant.X3X2X1mX2X1 = (X[2]-1)*X[1]*X[0];
146  dslash_constant.X4X3X2X1mX3X2X1 = (X[3]-1)*X[2]*X[1]*X[0];
147  dslash_constant.X5X4X3X2X1mX4X3X2X1 = (X[4] - 1) * X[3] * X[2] * X[1] * X[0];
149 
150  // used by indexFromFaceIndexStaggered
151  dslash_constant.dims[0][0]=X[1];
152  dslash_constant.dims[0][1]=X[2];
153  dslash_constant.dims[0][2]=X[3];
154 
155  dslash_constant.dims[1][0]=X[0];
156  dslash_constant.dims[1][1]=X[2];
157  dslash_constant.dims[1][2]=X[3];
158 
159  dslash_constant.dims[2][0]=X[0];
160  dslash_constant.dims[2][1]=X[1];
161  dslash_constant.dims[2][2]=X[3];
162 
163  dslash_constant.dims[3][0]=X[0];
164  dslash_constant.dims[3][1]=X[1];
165  dslash_constant.dims[3][2]=X[2];
166  }
168 
169  } // createGhostZone
170 
171  void ColorSpinorField::create(int Ndim, const int *X, int Nc, int Ns, int Nvec, QudaTwistFlavorType Twistflavor,
174  {
175  this->siteSubset = siteSubset;
176  this->siteOrder = siteOrder;
177  this->fieldOrder = fieldOrder;
178  this->gammaBasis = gammaBasis;
179 
180  if (Ndim > QUDA_MAX_DIM){
181  errorQuda("Number of dimensions nDim = %d too great", Ndim);
182  }
183  nDim = Ndim;
184  nColor = Nc;
185  nSpin = Ns;
186  nVec = Nvec;
187  twistFlavor = Twistflavor;
188 
189  this->pc_type = pc_type;
190 
191  precision = Prec;
192  volume = 1;
193  for (int d=0; d<nDim; d++) {
194  x[d] = X[d];
195  volume *= x[d];
196  }
197  volumeCB = siteSubset == QUDA_PARITY_SITE_SUBSET ? volume : volume/2;
198 
200  errorQuda("Must be two flavors for non-degenerate twisted mass spinor (while provided with %d number of components)\n", x[4]);//two flavors
201 
202  pad = Pad;
203  if (siteSubset == QUDA_FULL_SITE_SUBSET) {
204  stride = volume/2 + pad; // padding is based on half volume
205  length = 2*stride*nColor*nSpin*2;
206  } else {
207  stride = volume + pad;
209  }
210 
211  real_length = volume*nColor*nSpin*2; // physical length
212 
213  bytes = (size_t)length * precision; // includes pads and ghost zones
214  if (isNative() || fieldOrder == QUDA_FLOAT2_FIELD_ORDER) bytes = (siteSubset == QUDA_FULL_SITE_SUBSET) ? 2*ALIGNMENT_ADJUST(bytes/2) : ALIGNMENT_ADJUST(bytes);
215 
217  norm_bytes = (siteSubset == QUDA_FULL_SITE_SUBSET ? 2*stride : stride) * sizeof(float);
219  } else {
220  norm_bytes = 0;
221  }
222 
223  init = true;
224 
227 
228  if (composite_descr.is_component) errorQuda("\nComposite type is not implemented.\n");
229 
237 
243 
246  } else if (composite_descr.is_component) {
247  composite_descr.dim = 0;
248 
256  }
257 
258  setTuningString();
259  }
260 
262  {
263  //LatticeField::setTuningString(); // FIXME - LatticeField needs correct dims for single-parity
264  char vol_tmp[TuneKey::volume_n];
265  int check = snprintf(vol_string, TuneKey::volume_n, "%d", x[0]);
266  if (check < 0 || check >= TuneKey::volume_n) errorQuda("Error writing volume string");
267  for (int d=1; d<nDim; d++) {
268  strcpy(vol_tmp, vol_string);
269  check = snprintf(vol_string, TuneKey::volume_n, "%sx%d", vol_tmp, x[d]);
270  if (check < 0 || check >= TuneKey::volume_n) errorQuda("Error writing volume string");
271  }
272  }
273 
274  {
275  int aux_string_n = TuneKey::aux_n / 2;
276  char aux_tmp[aux_string_n];
277  int check = snprintf(aux_string, aux_string_n, "vol=%d,stride=%d,precision=%d,Ns=%d,Nc=%d",
279  if (check < 0 || check >= aux_string_n) errorQuda("Error writing aux string");
281  strcpy(aux_tmp, aux_string);
282  check = snprintf(aux_string, aux_string_n, "%s,TwistFlavour=%d", aux_tmp, twistFlavor);
283  if (check < 0 || check >= aux_string_n) errorQuda("Error writing aux string");
284  }
285  }
286  }
287 
289  init = false;
290  }
291 
293  if (&src != this) {
295  this->composite_descr.is_composite = true;
297  this->composite_descr.is_component = false;
298  this->composite_descr.id = 0;
299  }
300  else if(src.composite_descr.is_component){
301  this->composite_descr.is_composite = false;
302  this->composite_descr.dim = 0;
303  //this->composite_descr.is_component = false;
304  //this->composite_descr.id = 0;
305  }
306 
307  create(src.nDim, src.x, src.nColor, src.nSpin, src.nVec, src.twistFlavor, src.precision, src.pad, src.siteSubset,
308  src.siteOrder, src.fieldOrder, src.gammaBasis, src.pc_type);
309  }
310  return *this;
311  }
312 
313  // Resets the attributes of this field if param disagrees (and is defined)
315  {
316  if (param.nColor != 0) nColor = param.nColor;
317  if (param.nSpin != 0) nSpin = param.nSpin;
318  if (param.nVec != 0) nVec = param.nVec;
320 
321  if (param.pc_type != QUDA_PC_INVALID) pc_type = param.pc_type;
322 
323  if (param.Precision() != QUDA_INVALID_PRECISION) precision = param.Precision();
325  if (param.nDim != 0) nDim = param.nDim;
326 
329  composite_descr.dim = param.is_composite ? param.composite_dim : 0;
331 
332  volume = 1;
333  for (int d=0; d<nDim; d++) {
334  if (param.x[d] != 0) x[d] = param.x[d];
335  volume *= x[d];
336  }
338 
340  errorQuda("Must be two flavors for non-degenerate twisted mass spinor (provided with %d)\n", x[4]);
341 
342  if (param.pad != 0) pad = param.pad;
343 
344  if (param.siteSubset == QUDA_FULL_SITE_SUBSET) {
345  stride = volume/2 + pad;
346  length = 2*stride*nColor*nSpin*2;
347  } else if (param.siteSubset == QUDA_PARITY_SITE_SUBSET) {
348  stride = volume + pad;
350  } else {
351  //errorQuda("SiteSubset not defined %d", param.siteSubset);
352  //do nothing, not an error (can't remember why - need to document this sometime! )
353  }
354 
359 
361 
362  bytes = (size_t)length * precision; // includes pads
364 
366  norm_bytes = (siteSubset == QUDA_FULL_SITE_SUBSET ? 2*stride : stride) * sizeof(float);
368  } else {
369  norm_bytes = 0;
370  }
371 
380 
385 
388  } else {
395  }
396 
397  if (!init) errorQuda("Shouldn't be resetting a non-inited field\n");
398 
399  setTuningString();
400  }
401 
402  // Fills the param with the contents of this field
404  param.location = Location();
405  param.nColor = nColor;
406  param.nSpin = nSpin;
407  param.nVec = nVec;
408  param.twistFlavor = twistFlavor;
409  param.fieldOrder = fieldOrder;
411  param.nDim = nDim;
412 
415  param.is_component = false;//always either a regular spinor or a composite object
416  param.component_id = 0;
417 
418  memcpy(param.x, x, QUDA_MAX_DIM*sizeof(int));
419  param.pad = pad;
420  param.siteSubset = siteSubset;
421  param.siteOrder = siteOrder;
422  param.gammaBasis = gammaBasis;
423  param.pc_type = pc_type;
425  }
426 
427  void ColorSpinorField::exchange(void **ghost, void **sendbuf, int nFace) const {
428 
429  // FIXME: use LatticeField MsgHandles
431  MsgHandle *mh_from_back[4];
432  MsgHandle *mh_from_fwd[4];
434  size_t bytes[4];
435 
436  const int Ninternal = 2*nColor*nSpin;
437  size_t total_bytes = 0;
438  for (int i=0; i<nDimComms; i++) {
439  bytes[i] = siteSubset*nFace*surfaceCB[i]*Ninternal*ghost_precision;
440  if (comm_dim_partitioned(i)) total_bytes += 2*bytes[i]; // 2 for fwd/bwd
441  }
442 
443  void *total_send = nullptr;
444  void *total_recv = nullptr;
445  void *send_fwd[4];
446  void *send_back[4];
447  void *recv_fwd[4];
448  void *recv_back[4];
449 
450  // leave this option in there just in case
451  bool no_comms_fill = false;
452 
453  // If this is set to false, then we are assuming that the send and
454  // ghost buffers are in a single contiguous memory space. Setting
455  // to false means we aggregate all cudaMemcpys which reduces
456  // latency.
457  bool fine_grained_memcpy = false;
458 
460  for (int i=0; i<nDimComms; i++) {
461  if (comm_dim_partitioned(i)) {
462  send_back[i] = sendbuf[2*i + 0];
463  send_fwd[i] = sendbuf[2*i + 1];
464  recv_fwd[i] = ghost[2*i + 1];
465  recv_back[i] = ghost[2*i + 0];
466  } else if (no_comms_fill) {
467  memcpy(ghost[2*i+1], sendbuf[2*i+0], bytes[i]);
468  memcpy(ghost[2*i+0], sendbuf[2*i+1], bytes[i]);
469  }
470  }
471  } else { // FIXME add GPU_COMMS support
472  if (total_bytes) {
473  total_send = pool_pinned_malloc(total_bytes);
474  total_recv = pool_pinned_malloc(total_bytes);
475  }
476  size_t offset = 0;
477  for (int i=0; i<nDimComms; i++) {
478  if (comm_dim_partitioned(i)) {
479  send_back[i] = static_cast<char*>(total_send) + offset;
480  recv_back[i] = static_cast<char*>(total_recv) + offset;
481  offset += bytes[i];
482  send_fwd[i] = static_cast<char*>(total_send) + offset;
483  recv_fwd[i] = static_cast<char*>(total_recv) + offset;
484  offset += bytes[i];
485  if (fine_grained_memcpy) {
486  qudaMemcpy(send_back[i], sendbuf[2*i + 0], bytes[i], cudaMemcpyDeviceToHost);
487  qudaMemcpy(send_fwd[i], sendbuf[2*i + 1], bytes[i], cudaMemcpyDeviceToHost);
488  }
489  } else if (no_comms_fill) {
490  qudaMemcpy(ghost[2*i+1], sendbuf[2*i+0], bytes[i], cudaMemcpyDeviceToDevice);
491  qudaMemcpy(ghost[2*i+0], sendbuf[2*i+1], bytes[i], cudaMemcpyDeviceToDevice);
492  }
493  }
494  if (!fine_grained_memcpy && total_bytes) {
495  // find first non-zero pointer
496  void *send_ptr = nullptr;
497  for (int i=0; i<nDimComms; i++) {
498  if (comm_dim_partitioned(i)) {
499  send_ptr = sendbuf[2*i];
500  break;
501  }
502  }
503  qudaMemcpy(total_send, send_ptr, total_bytes, cudaMemcpyDeviceToHost);
504  }
505  }
506 
507  for (int i=0; i<nDimComms; i++) {
508  if (!comm_dim_partitioned(i)) continue;
509  mh_send_fwd[i] = comm_declare_send_relative(send_fwd[i], i, +1, bytes[i]);
510  mh_send_back[i] = comm_declare_send_relative(send_back[i], i, -1, bytes[i]);
511  mh_from_fwd[i] = comm_declare_receive_relative(recv_fwd[i], i, +1, bytes[i]);
512  mh_from_back[i] = comm_declare_receive_relative(recv_back[i], i, -1, bytes[i]);
513  }
514 
515  for (int i=0; i<nDimComms; i++) {
516  if (comm_dim_partitioned(i)) {
517  comm_start(mh_from_back[i]);
518  comm_start(mh_from_fwd[i]);
519  comm_start(mh_send_fwd[i]);
520  comm_start(mh_send_back[i]);
521  }
522  }
523 
524  for (int i=0; i<nDimComms; i++) {
525  if (!comm_dim_partitioned(i)) continue;
526  comm_wait(mh_send_fwd[i]);
527  comm_wait(mh_send_back[i]);
528  comm_wait(mh_from_back[i]);
529  comm_wait(mh_from_fwd[i]);
530  }
531 
533  for (int i=0; i<nDimComms; i++) {
534  if (!comm_dim_partitioned(i)) continue;
535  if (fine_grained_memcpy) {
536  qudaMemcpy(ghost[2*i+0], recv_back[i], bytes[i], cudaMemcpyHostToDevice);
537  qudaMemcpy(ghost[2*i+1], recv_fwd[i], bytes[i], cudaMemcpyHostToDevice);
538  }
539  }
540 
541  if (!fine_grained_memcpy && total_bytes) {
542  // find first non-zero pointer
543  void *ghost_ptr = nullptr;
544  for (int i=0; i<nDimComms; i++) {
545  if (comm_dim_partitioned(i)) {
546  ghost_ptr = ghost[2*i];
547  break;
548  }
549  }
550  qudaMemcpy(ghost_ptr, total_recv, total_bytes, cudaMemcpyHostToDevice);
551  }
552 
553  if (total_bytes) {
554  pool_pinned_free(total_send);
555  pool_pinned_free(total_recv);
556  }
557  }
558 
559  for (int i=0; i<nDimComms; i++) {
560  if (!comm_dim_partitioned(i)) continue;
561  comm_free(mh_send_fwd[i]);
562  comm_free(mh_send_back[i]);
563  comm_free(mh_from_back[i]);
564  comm_free(mh_from_fwd[i]);
565  }
566  }
567 
570  if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) return true;
571  } else if (precision == QUDA_SINGLE_PRECISION ||
572  (precision == QUDA_HALF_PRECISION && nColor == 3) ||
573  (precision == QUDA_QUARTER_PRECISION && nColor == 3)) {
574  if (nSpin == 4) {
575  if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) return true;
576  } else if (nSpin == 2) {
577  if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) return true;
578  } else if (nSpin == 1) {
579  if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) return true;
580  }
581  }
582  return false;
583  }
584 
585  // For kernels with precision conversion built in
587  if (a.Length() != b.Length()) {
588  errorQuda("checkSpinor: lengths do not match: %lu %lu", a.Length(), b.Length());
589  }
590 
591  if (a.Ncolor() != b.Ncolor()) {
592  errorQuda("checkSpinor: colors do not match: %d %d", a.Ncolor(), b.Ncolor());
593  }
594 
595  if (a.Nspin() != b.Nspin()) {
596  errorQuda("checkSpinor: spins do not match: %d %d", a.Nspin(), b.Nspin());
597  }
598 
599  if (a.Nvec() != b.Nvec()) {
600  errorQuda("checkSpinor: nVec does not match: %d %d", a.Nvec(), b.Nvec());
601  }
602 
603  if (a.TwistFlavor() != b.TwistFlavor()) {
604  errorQuda("checkSpinor: twist flavors do not match: %d %d", a.TwistFlavor(), b.TwistFlavor());
605  }
606  }
607 
610  errorQuda("Cannot return even subset of %d subset", siteSubset);
612  errorQuda("Cannot return even subset of QDPJIT field");
613  return *even;
614  }
615 
618  errorQuda("Cannot return odd subset of %d subset", siteSubset);
620  errorQuda("Cannot return even subset of QDPJIT field");
621  return *odd;
622  }
623 
626  errorQuda("Cannot return even subset of %d subset", siteSubset);
628  errorQuda("Cannot return even subset of QDPJIT field");
629  return *even;
630  }
631 
634  errorQuda("Cannot return odd subset of %d subset", siteSubset);
636  errorQuda("Cannot return even subset of QDPJIT field");
637  return *odd;
638  }
639 
641  if (this->IsComposite()) {
642  if (idx < this->CompositeDim()) { // setup eigenvector form the set
643  return *(dynamic_cast<ColorSpinorField*>(components[idx]));
644  }
645  else{
646  errorQuda("Incorrect component index...");
647  }
648  }
649  errorQuda("Cannot get requested component");
650  exit(-1);
651  }
652 
654  if (this->IsComposite()) {
655  if (idx < this->CompositeDim()) { // setup eigenvector form the set
656  return *(dynamic_cast<ColorSpinorField*>(components[idx]));
657  }
658  else{
659  errorQuda("Incorrect component index...");
660  }
661  }
662  errorQuda("Cannot get requested component");
663  exit(-1);
664  }
665 
666 
667  void* ColorSpinorField::Ghost(const int i) {
668  if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);
669  return ghost[i];
670  }
671 
672  const void* ColorSpinorField::Ghost(const int i) const {
673  if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);
674  return ghost[i];
675  }
676 
677 
678  void* ColorSpinorField::GhostNorm(const int i){
679  if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);
680  return ghostNorm[i];
681  }
682 
683  const void* ColorSpinorField::GhostNorm(const int i) const{
684  if(siteSubset != QUDA_PARITY_SITE_SUBSET) errorQuda("Site Subset %d is not supported",siteSubset);
685  return ghostNorm[i];
686  }
687 
688  void* const* ColorSpinorField::Ghost() const {
689  return ghost_buf;
690  }
691 
692  /*
693  Convert from 1-dimensional index to the n-dimensional spatial index.
694  With full fields, we assume that the field is even-odd ordered. The
695  lattice coordinates that are computed here are full-field
696  coordinates.
697  */
698  void ColorSpinorField::LatticeIndex(int *y, int i) const {
699  int z[QUDA_MAX_DIM];
700  memcpy(z, x, QUDA_MAX_DIM*sizeof(int));
701 
702  // parity is the slowest running dimension
703  int parity = 0;
704  if (siteSubset == QUDA_FULL_SITE_SUBSET) z[0] /= 2;
705 
706  for (int d=0; d<nDim; d++) {
707  y[d] = i % z[d];
708  i /= z[d];
709  }
710 
711  parity = i;
712 
713  // convert into the full-field lattice coordinate
714  int oddBit = parity;
716  for (int d=1; d<nDim; d++) oddBit += y[d];
717  oddBit = oddBit & 1;
718  }
719  y[0] = 2*y[0] + oddBit; // compute the full x coordinate
720  }
721 
722  /*
723  Convert from n-dimensional spatial index to the 1-dimensional index.
724  With full fields, we assume that the field is even-odd ordered. The
725  input lattice coordinates are always full-field coordinates.
726  */
727  void ColorSpinorField::OffsetIndex(int &i, int *y) const {
728 
729  int parity = 0;
730  int z[QUDA_MAX_DIM];
731  memcpy(z, x, QUDA_MAX_DIM*sizeof(int));
732  int savey0 = y[0];
733 
735  for (int d=0; d<nDim; d++) parity += y[d];
736  parity = parity & 1;
737  y[0] /= 2;
738  z[0] /= 2;
739  }
740 
741  i = parity;
742  for (int d=nDim-1; d>=0; d--) {
743  i = z[d]*i + y[d];
744  //printf("z[%d]=%d y[%d]=%d ", d, z[d], d, y[d]);
745  }
746 
747  //printf("\nparity = %d\n", parity);
748 
749  if (siteSubset == QUDA_FULL_SITE_SUBSET) y[0] = savey0;
750  }
751 
753 
754  ColorSpinorField *field = nullptr;
755  if (param.location == QUDA_CPU_FIELD_LOCATION) {
756  field = new cpuColorSpinorField(param);
757  } else if (param.location== QUDA_CUDA_FIELD_LOCATION) {
758  field = new cudaColorSpinorField(param);
759  } else {
760  errorQuda("Invalid field location %d", param.location);
761  }
762 
763  return field;
764  }
765 
767 
768  ColorSpinorField *field = nullptr;
769  if (param.location == QUDA_CPU_FIELD_LOCATION) {
770  field = new cpuColorSpinorField(src, param);
771  } else if (param.location== QUDA_CUDA_FIELD_LOCATION) {
772  field = new cudaColorSpinorField(src, param);
773  } else {
774  errorQuda("Invalid field location %d", param.location);
775  }
776 
777  return field;
778  }
779 
780  ColorSpinorField* ColorSpinorField::CreateCoarse(const int *geoBlockSize, int spinBlockSize, int Nvec,
781  QudaPrecision new_precision, QudaFieldLocation new_location,
782  QudaMemoryType new_mem_type) {
783  ColorSpinorParam coarseParam(*this);
784  for (int d=0; d<nDim; d++) coarseParam.x[d] = x[d]/geoBlockSize[d];
785  coarseParam.nSpin = nSpin / spinBlockSize; //for staggered coarseParam.nSpin = nSpin
786 
787  coarseParam.nColor = Nvec;
788  coarseParam.siteSubset = QUDA_FULL_SITE_SUBSET; // coarse grid is always full
789  coarseParam.create = QUDA_ZERO_FIELD_CREATE;
790 
791  // if new precision is not set, use this->precision
792  new_precision = (new_precision == QUDA_INVALID_PRECISION) ? Precision() : new_precision;
793 
794  // if new location is not set, use this->location
795  new_location = (new_location == QUDA_INVALID_FIELD_LOCATION) ? Location() : new_location;
796 
797  // for GPU fields, always use native ordering to ensure coalescing
798  if (new_location == QUDA_CUDA_FIELD_LOCATION) coarseParam.fieldOrder = QUDA_FLOAT2_FIELD_ORDER;
800 
801  coarseParam.setPrecision(new_precision);
802 
803  // set where we allocate the field
804  coarseParam.mem_type = (new_mem_type != QUDA_MEMORY_INVALID) ? new_mem_type :
806 
807  ColorSpinorField *coarse = NULL;
808  if (new_location == QUDA_CPU_FIELD_LOCATION) {
809  coarse = new cpuColorSpinorField(coarseParam);
810  } else if (new_location== QUDA_CUDA_FIELD_LOCATION) {
811  coarse = new cudaColorSpinorField(coarseParam);
812  } else {
813  errorQuda("Invalid field location %d", new_location);
814  }
815 
816  return coarse;
817  }
818 
819  ColorSpinorField* ColorSpinorField::CreateFine(const int *geoBlockSize, int spinBlockSize, int Nvec,
820  QudaPrecision new_precision, QudaFieldLocation new_location,
821  QudaMemoryType new_mem_type) {
822  ColorSpinorParam fineParam(*this);
823  for (int d=0; d<nDim; d++) fineParam.x[d] = x[d] * geoBlockSize[d];
824  fineParam.nSpin = nSpin * spinBlockSize;
825  fineParam.nColor = Nvec;
826  fineParam.siteSubset = QUDA_FULL_SITE_SUBSET; // FIXME fine grid is always full
827  fineParam.create = QUDA_ZERO_FIELD_CREATE;
828 
829  // if new precision is not set, use this->precision
830  new_precision = (new_precision == QUDA_INVALID_PRECISION) ? Precision() : new_precision;
831 
832  // if new location is not set, use this->location
833  new_location = (new_location == QUDA_INVALID_FIELD_LOCATION) ? Location(): new_location;
834 
835  // for GPU fields, always use native ordering to ensure coalescing
836  if (new_location == QUDA_CUDA_FIELD_LOCATION) {
837  fineParam.fieldOrder = (fineParam.nSpin==4 && fineParam.Precision()!= QUDA_DOUBLE_PRECISION) ?
839  } else {
841  }
842 
843  fineParam.setPrecision(new_precision);
844 
845  // set where we allocate the field
846  fineParam.mem_type = (new_mem_type != QUDA_MEMORY_INVALID) ? new_mem_type :
848 
849  ColorSpinorField *fine = NULL;
850  if (new_location == QUDA_CPU_FIELD_LOCATION) {
851  fine = new cpuColorSpinorField(fineParam);
852  } else if (new_location == QUDA_CUDA_FIELD_LOCATION) {
853  fine = new cudaColorSpinorField(fineParam);
854  } else {
855  errorQuda("Invalid field location %d", new_location);
856  }
857  return fine;
858  }
859 
860  std::ostream& operator<<(std::ostream &out, const ColorSpinorField &a) {
861  out << "typedid = " << typeid(a).name() << std::endl;
862  out << "nColor = " << a.nColor << std::endl;
863  out << "nSpin = " << a.nSpin << std::endl;
864  out << "twistFlavor = " << a.twistFlavor << std::endl;
865  out << "nDim = " << a.nDim << std::endl;
866  for (int d=0; d<a.nDim; d++) out << "x[" << d << "] = " << a.x[d] << std::endl;
867  out << "volume = " << a.volume << std::endl;
868  out << "precision = " << a.precision << std::endl;
869  out << "ghost_precision = " << a.ghost_precision << std::endl;
870  out << "pad = " << a.pad << std::endl;
871  out << "stride = " << a.stride << std::endl;
872  out << "real_length = " << a.real_length << std::endl;
873  out << "length = " << a.length << std::endl;
874  out << "bytes = " << a.bytes << std::endl;
875  out << "norm_bytes = " << a.norm_bytes << std::endl;
876  out << "siteSubset = " << a.siteSubset << std::endl;
877  out << "siteOrder = " << a.siteOrder << std::endl;
878  out << "fieldOrder = " << a.fieldOrder << std::endl;
879  out << "gammaBasis = " << a.gammaBasis << std::endl;
880  out << "Is composite = " << a.composite_descr.is_composite << std::endl;
882  {
883  out << "Composite Dim = " << a.composite_descr.dim << std::endl;
884  out << "Composite Volume = " << a.composite_descr.volume << std::endl;
885  out << "Composite Stride = " << a.composite_descr.stride << std::endl;
886  out << "Composite Length = " << a.composite_descr.length << std::endl;
887  }
888  out << "Is component = " << a.composite_descr.is_component << std::endl;
889  if(a.composite_descr.is_composite) out << "Component ID = " << a.composite_descr.id << std::endl;
890  out << "pc_type = " << a.pc_type << std::endl;
891  return out;
892  }
893 
894 } // namespace quda
#define qudaMemcpy(dst, src, count, kind)
Definition: quda_cuda_api.h:33
CompositeColorSpinorField components
void create(int nDim, const int *x, int Nc, int Ns, int Nvec, QudaTwistFlavorType Twistflavor, QudaPrecision precision, int pad, QudaSiteSubset subset, QudaSiteOrder siteOrder, QudaFieldOrder fieldOrder, QudaGammaBasis gammaBasis, QudaPCType pc_type)
void OffsetIndex(int &i, int *y) const
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
#define pool_pinned_free(ptr)
Definition: malloc_quda.h:128
friend std::ostream & operator<<(std::ostream &out, const ColorSpinorField &)
int ghostNormOffset[QUDA_MAX_DIM][2]
enum QudaPrecision_s QudaPrecision
void * ghostNorm[2][QUDA_MAX_DIM]
int ghostFace[QUDA_MAX_DIM]
#define errorQuda(...)
Definition: util_quda.h:121
enum QudaPCType_s QudaPCType
enum QudaFieldOrder_s QudaFieldOrder
CompositeColorSpinorFieldDescriptor composite_descr
used for deflation eigenvector sets etc.:
const ColorSpinorField & Even() const
enum QudaSiteOrder_s QudaSiteOrder
const ColorSpinorField & Odd() const
static ColorSpinorField * Create(const ColorSpinorParam &param)
QudaPrecision GhostPrecision() const
Definition: lattice_field.h:61
DslashConstant dslash_constant
ColorSpinorField * CreateFine(const int *geoblockSize, int spinBlockSize, int Nvec, QudaPrecision precision=QUDA_INVALID_PRECISION, QudaFieldLocation location=QUDA_INVALID_FIELD_LOCATION, QudaMemoryType mem_type=QUDA_MEMORY_INVALID)
Create a fine color-spinor field, using this field to set the meta data.
ColorSpinorField & Component(const int idx) const
int_fastdiv dims[4][3]
QudaSiteSubset siteSubset
Definition: lattice_field.h:71
ColorSpinorField(const ColorSpinorField &)
QudaGaugeParam param
Definition: pack_test.cpp:17
int_fastdiv Xh[QUDA_MAX_DIM]
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:67
bool is_composite
for deflation solvers:
int ghostFaceCB[QUDA_MAX_DIM]
ColorSpinorField * odd
QudaFieldLocation location
int_fastdiv X[QUDA_MAX_DIM]
#define comm_declare_send_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:59
static void checkField(const ColorSpinorField &, const ColorSpinorField &)
#define comm_declare_receive_relative(buffer, dim, dir, nbytes)
Definition: comm_quda.h:74
void exchange(void **ghost, void **sendbuf, int nFace=1) const
void reset(const ColorSpinorParam &)
ColorSpinorField * CreateCoarse(const int *geoBlockSize, int spinBlockSize, int Nvec, QudaPrecision precision=QUDA_INVALID_PRECISION, QudaFieldLocation location=QUDA_INVALID_FIELD_LOCATION, QudaMemoryType mem_Type=QUDA_MEMORY_INVALID)
Create a coarse color-spinor field, using this field to set the meta data.
char aux_string[TuneKey::aux_n]
#define ALIGNMENT_ADJUST(n)
Definition: quda_internal.h:22
void * GhostNorm(const int i)
void setTuningString()
Set the vol_string and aux_string for use in tuning.
void comm_start(MsgHandle *mh)
Definition: comm_mpi.cpp:216
void * ghost[2][QUDA_MAX_DIM]
void comm_free(MsgHandle *&mh)
Definition: comm_mpi.cpp:207
char vol_string[TuneKey::volume_n]
void init()
Create the CUBLAS context.
Definition: blas_cublas.cu:31
size_t ghost_face_bytes[QUDA_MAX_DIM]
QudaTwistFlavorType twistFlavor
MsgHandle * mh_send_fwd[2][QUDA_MAX_DIM]
int ghostFaceCB[QUDA_MAX_DIM+1]
int GhostOffset(const int i) const
static int dims[4]
Definition: face_gauge.cpp:41
virtual ColorSpinorField & operator=(const ColorSpinorField &)
enum QudaSiteSubset_s QudaSiteSubset
QudaFieldLocation Location() const
QudaPrecision ghost_precision_allocated
#define pool_pinned_malloc(size)
Definition: malloc_quda.h:127
enum QudaFieldLocation_s QudaFieldLocation
cpuColorSpinorField * out
int ghostOffset[QUDA_MAX_DIM][2]
void createGhostZone(int nFace, bool spin_project=true) const
QudaPrecision ghost_precision
enum QudaGammaBasis_s QudaGammaBasis
void fill(ColorSpinorParam &) const
QudaPrecision Precision() const
Definition: lattice_field.h:58
static const int aux_n
Definition: tune_key.h:12
int ghostFace[QUDA_MAX_DIM+1]
QudaMemoryType mem_type
Definition: lattice_field.h:73
QudaTwistFlavorType twistFlavor
QudaTwistFlavorType TwistFlavor() const
void LatticeIndex(int *y, int i) const
int surfaceCB[QUDA_MAX_DIM]
const int * X() const
void *const * Ghost() const
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
void comm_wait(MsgHandle *mh)
Definition: comm_mpi.cpp:222
static const int volume_n
Definition: tune_key.h:10
void * ghost_buf[2 *QUDA_MAX_DIM]
QudaPrecision Precision() const
QudaParity parity
Definition: covdev_test.cpp:54
QudaPrecision precision
MsgHandle * mh_send_back[2][QUDA_MAX_DIM]
QudaFieldOrder FieldOrder() const
ColorSpinorField * even
enum QudaMemoryType_s QudaMemoryType
unsigned long long bytes
Definition: blas_quda.cu:23
int comm_dim_partitioned(int dim)
enum QudaTwistFlavorType_s QudaTwistFlavorType