QUDA  v1.1.0
A library for QCD on GPUs
color_spinor_util.cu
Go to the documentation of this file.
1 #include <color_spinor_field.h>
2 #include <color_spinor_field_order.h>
3 #include <index_helper.cuh>
4 #include <blas_quda.h>
5 #include <instantiate.h>
6 
7 namespace quda {
8 
9  using namespace colorspinor;
10 
11  /**
12  Random number insertion over all field elements
13  */
14  template <class T>
15  void random(T &t) {
16  for (int parity=0; parity<t.Nparity(); parity++) {
17  for (int x_cb=0; x_cb<t.VolumeCB(); x_cb++) {
18  for (int s=0; s<t.Nspin(); s++) {
19  for (int c=0; c<t.Ncolor(); c++) {
20  t(parity,x_cb,s,c).real(comm_drand());
21  t(parity,x_cb,s,c).imag(comm_drand());
22  }
23  }
24  }
25  }
26  }
27 
28  /**
29  Create a point source at spacetime point x, spin s and colour c
30  */
31  template <class T>
32  void point(T &t, int x, int s, int c) { t(x%2, x/2, s, c) = 1.0; }
33 
34  /**
35  Set all space-time real elements at spin s and color c of the
36  field equal to k
37  */
38  template <class T>
39  void constant(T &t, int k, int s, int c) {
40  for (int parity=0; parity<t.Nparity(); parity++) {
41  for (int x_cb=0; x_cb<t.VolumeCB(); x_cb++) {
42  // set all color-spin components to zero
43  for (int s2=0; s2<t.Nspin(); s2++) {
44  for (int c2=0; c2<t.Ncolor(); c2++) {
45  t(parity,x_cb,s2,c2) = 0.0;
46  }
47  }
48  t(parity,x_cb,s,c) = k; // now set the one we want
49  }
50  }
51  }
52 
53  /**
54  Insert a sinusoidal wave sin ( n * (x[d] / X[d]) * pi ) in dimension d
55  */
56  template <class P>
57  void sin(P &p, int d, int n, int offset) {
58  int coord[4];
59  int X[4] = { p.X(0), p.X(1), p.X(2), p.X(3)};
60  X[0] *= (p.Nparity() == 1) ? 2 : 1; // need full lattice dims
61 
62  for (int parity=0; parity<p.Nparity(); parity++) {
63  for (int x_cb=0; x_cb<p.VolumeCB(); x_cb++) {
64  getCoords(coord, x_cb, X, parity);
65 
66  double mode = n * (double)coord[d] / X[d];
67  double k = (double)offset + sin (M_PI * mode);
68 
69  for (int s=0; s<p.Nspin(); s++)
70  for (int c=0; c<p.Ncolor(); c++)
71  p(parity, x_cb, s, c) = k;
72  }
73  }
74  }
75 
76  /**
77  Create a corner source with value "v" on color "c"
78  on a single corner overloaded into "s". "s" is
79  encoded via a bitmap: 1010 -> x = 0, y = 1, z = 0, t = 1
80  corner, for ex.
81  */
82  template <class T>
83  void corner(T &p, int v, int s, int c) {
84  if (p.Nspin() != 1)
85  errorQuda("ERROR: lib/color_spinor_util.cu, corner() is only defined for Nspin = 1 fields.\n");
86 
87  int coord[4];
88  int X[4] = { p.X(0), p.X(1), p.X(2), p.X(3)};
89  X[0] *= (p.Nparity() == 1) ? 2 : 1; // need full lattice dims
90 
91  for (int parity=0; parity<p.Nparity(); parity++) {
92  for (int x_cb=0; x_cb<p.VolumeCB(); x_cb++) {
93 
94  // get coords
95  getCoords(coord, x_cb, X, parity);
96 
97  // Figure out corner of current site.
98  int corner = 8*(coord[3]%2)+4*(coord[2]%2)+2*(coord[1]%2)+(coord[0]%2);
99 
100  // set all color components to zero
101  for (int c2=0; c2<p.Ncolor(); c2++) {
102  p(parity,x_cb,0,c2) = 0.0;
103  }
104  // except the corner and color we want
105  if (s == corner)
106  p(parity,x_cb,0,c) = (double)v;
107  }
108  }
109  }
110 
111  // print out the vector at volume point x
112  template <typename Float, int nSpin, int nColor, QudaFieldOrder order>
113  void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c) {
114  FieldOrderCB<Float,nSpin,nColor,1,order> A(a);
115  if (sourceType == QUDA_RANDOM_SOURCE) random(A);
116  else if (sourceType == QUDA_POINT_SOURCE) point(A, x, s, c);
117  else if (sourceType == QUDA_CONSTANT_SOURCE) constant(A, x, s, c);
118  else if (sourceType == QUDA_SINUSOIDAL_SOURCE) sin(A, x, s, c);
119  else if (sourceType == QUDA_CORNER_SOURCE) corner(A, x, s, c);
120  else errorQuda("Unsupported source type %d", sourceType);
121  }
122 
123  template <typename Float, int nSpin, QudaFieldOrder order>
124  void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c) {
125  if (a.Ncolor() == 3) {
126  genericSource<Float,nSpin,3,order>(a,sourceType, x, s, c);
127 #ifdef GPU_MULTIGRID
128  } else if (a.Ncolor() == 4) {
129  genericSource<Float,nSpin,4,order>(a,sourceType, x, s, c);
130  } else if (a.Ncolor() == 6) { // for Wilson free field
131  genericSource<Float,nSpin,6,order>(a,sourceType, x, s, c);
132  } else if (a.Ncolor() == 8) {
133  genericSource<Float,nSpin,8,order>(a,sourceType, x, s, c);
134  } else if (a.Ncolor() == 12) {
135  genericSource<Float,nSpin,12,order>(a,sourceType, x, s, c);
136  } else if (a.Ncolor() == 16) {
137  genericSource<Float,nSpin,16,order>(a,sourceType, x, s, c);
138  } else if (a.Ncolor() == 20) {
139  genericSource<Float,nSpin,20,order>(a,sourceType, x, s, c);
140  } else if (a.Ncolor() == 24) {
141  genericSource<Float,nSpin,24,order>(a,sourceType, x, s, c);
142 #ifdef NSPIN4
143  } else if (a.Ncolor() == 32) {
144  genericSource<Float,nSpin,32,order>(a,sourceType, x, s, c);
145 #endif // NSPIN4
146 #ifdef NSPIN1
147  } else if (a.Ncolor() == 64) {
148  genericSource<Float,nSpin,64,order>(a,sourceType, x, s, c);
149  } else if (a.Ncolor() == 96) {
150  genericSource<Float,nSpin,96,order>(a,sourceType, x, s, c);
151 #endif // NSPIN1
152 #endif // GPU_MULTIGRID
153  } else {
154  errorQuda("Unsupported nColor=%d\n", a.Ncolor());
155  }
156  }
157 
158  template <typename Float, QudaFieldOrder order>
159  void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c) {
160  if (a.Nspin() == 1) {
161 #ifdef NSPIN1
162  genericSource<Float,1,order>(a,sourceType, x, s, c);
163 #else
164  errorQuda("nSpin=1 not enabled for this build");
165 #endif
166  } else if (a.Nspin() == 2) {
167 #ifdef NSPIN2
168  genericSource<Float,2,order>(a,sourceType, x, s, c);
169 #else
170  errorQuda("nSpin=2 not enabled for this build");
171 #endif
172  } else if (a.Nspin() == 4) {
173 #ifdef NSPIN4
174  genericSource<Float,4,order>(a,sourceType, x, s, c);
175 #else
176  errorQuda("nSpin=4 not enabled for this build");
177 #endif
178  } else {
179  errorQuda("Unsupported nSpin=%d\n", a.Nspin());
180  }
181  }
182 
183  template <typename Float>
184  void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c) {
185  if (a.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) {
186  genericSource<Float,QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>(a,sourceType, x, s, c);
187  } else {
188  errorQuda("Unsupported field order %d\n", a.FieldOrder());
189  }
190 
191  }
192 
193  void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c) {
194 
195  if (a.Precision() == QUDA_DOUBLE_PRECISION) {
196  genericSource<double>(a,sourceType, x, s, c);
197  } else if (a.Precision() == QUDA_SINGLE_PRECISION) {
198  genericSource<float>(a, sourceType, x, s, c);
199  } else {
200  errorQuda("Precision not supported");
201  }
202 
203  }
204 
205 
206  template <class U, class V>
207  int compareSpinor(const U &u, const V &v, const int tol) {
208  int fail_check = 16*tol;
209  int *fail = new int[fail_check];
210  for (int f=0; f<fail_check; f++) fail[f] = 0;
211 
212  int N = 2*u.Nspin()*u.Ncolor();
213  int *iter = new int[N];
214  for (int i=0; i<N; i++) iter[i] = 0;
215 
216  for (int parity=0; parity<v.Nparity(); parity++) {
217  for (int x_cb=0; x_cb<u.VolumeCB(); x_cb++) {
218 
219  for (int s=0; s<u.Nspin(); s++) {
220  for (int c=0; c<u.Ncolor(); c++) {
221  for (int z=0; z<2; z++) {
222  int j = (s*u.Ncolor() + c)*2+z;
223 
224  double diff = z==0 ? fabs(u(parity,x_cb,s,c,z).real() - v(parity,x_cb,s,c,z).real()) :
225  fabs(u(parity,x_cb,s,c).imag() - v(parity,x_cb,s,c).imag());
226 
227  for (int f=0; f<fail_check; f++) {
228  if (diff > pow(10.0,-(f+1)/(double)tol) || std::isnan(diff)) {
229  fail[f]++;
230  }
231  }
232 
233  if (diff > 1e-3 || std::isnan(diff)) iter[j]++;
234  }
235  }
236  }
237  }
238  }
239 
240  // reduce over all processes
241  for (int i=0; i<N; i++) comm_allreduce_int(&iter[i]);
242  for (int f=0; f<fail_check; f++) comm_allreduce_int(&fail[f]);
243 
244  for (int i=0; i<N; i++) printfQuda("%d fails = %d\n", i, iter[i]);
245 
246  int accuracy_level =0;
247  for (int f=0; f<fail_check; f++) {
248  if (fail[f] == 0) accuracy_level = f+1;
249  }
250 
251  size_t total = u.Nparity()*u.VolumeCB()*N*comm_size();
252  for (int f=0; f<fail_check; f++) {
253  printfQuda("%e Failures: %d / %lu = %e\n", pow(10.0,-(f+1)/(double)tol),
254  fail[f], total, fail[f] / (double)total);
255  }
256 
257  delete []iter;
258  delete []fail;
259 
260  return accuracy_level;
261  }
262 
263  template <typename oFloat, typename iFloat, QudaFieldOrder order>
264  int genericCompare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol) {
265  int ret = 0;
266  if (a.Ncolor() == 3) {
267  constexpr int Nc = 3;
268  if (a.Nspin() == 4) {
269  constexpr int Ns = 4;
270  FieldOrderCB<oFloat,Ns,Nc,1,order> A(a);
271  FieldOrderCB<iFloat,Ns,Nc,1,order> B(b);
272 
273  double rescale = 1.0 / A.abs_max();
274 
275  auto a_(a), b_(b);
276  blas::ax(rescale, a_);
277  blas::ax(rescale, b_);
278  FieldOrderCB<oFloat, Ns, Nc, 1, order> A_(a_);
279  FieldOrderCB<iFloat, Ns, Nc, 1, order> B_(b_);
280 
281  ret = compareSpinor(A_, B_, tol);
282  } else if (a.Nspin() == 1) {
283  constexpr int Ns = 1;
284  FieldOrderCB<oFloat,Ns,Nc,1,order> A(a);
285  FieldOrderCB<iFloat,Ns,Nc,1,order> B(b);
286 
287  double rescale = 1.0 / A.abs_max();
288 
289  auto a_(a), b_(b);
290  blas::ax(rescale, a_);
291  blas::ax(rescale, b_);
292  FieldOrderCB<oFloat, Ns, Nc, 1, order> A_(a_);
293  FieldOrderCB<iFloat, Ns, Nc, 1, order> B_(b_);
294 
295  ret = compareSpinor(A_, B_, tol);
296  }
297  } else {
298  errorQuda("Number of colors %d not supported", a.Ncolor());
299  }
300  return ret;
301  }
302 
303 
304  template <typename oFloat, typename iFloat>
305  int genericCompare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol) {
306  int ret = 0;
307  if (a.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER && b.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) {
308  ret = genericCompare<oFloat,iFloat,QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>(a, b, tol);
309  } else {
310  errorQuda("Unsupported field order %d\n", a.FieldOrder());
311  }
312  return ret;
313  }
314 
315 
316  template <typename oFloat>
317  int genericCompare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol) {
318  int ret = 0;
319  if (b.Precision() == QUDA_DOUBLE_PRECISION) {
320  ret = genericCompare<oFloat,double>(a, b, tol);
321  } else if (b.Precision() == QUDA_SINGLE_PRECISION) {
322  ret = genericCompare<oFloat,float>(a, b, tol);
323  } else {
324  errorQuda("Precision not supported");
325  }
326  return ret;
327  }
328 
329 
330  int genericCompare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol) {
331  int ret = 0;
332  if (a.Precision() == QUDA_DOUBLE_PRECISION) {
333  ret = genericCompare<double>(a, b, tol);
334  } else if (a.Precision() == QUDA_SINGLE_PRECISION) {
335  ret = genericCompare<float>(a, b, tol);
336  } else {
337  errorQuda("Precision not supported");
338  }
339  return ret;
340  }
341 
342 
343  template <class Order>
344  void print_vector(const Order &o, unsigned int x) {
345 
346  int x_cb = x / o.Nparity();
347  int parity = x%o.Nparity();
348 
349  for (int s=0; s<o.Nspin(); s++) {
350  printfQuda("x = %u, s = %d, { ", x_cb, s);
351  for (int c=0; c<o.Ncolor(); c++) {
352  printfQuda("(%f,%f) ", o(parity, x_cb, s, c).real(), o(parity, x_cb, s, c).imag());
353  }
354  printfQuda("}\n");
355  }
356 
357  }
358 
359  // print out the vector at volume point x
360  template <typename Float, QudaFieldOrder order> void genericPrintVector(const cpuColorSpinorField &a, unsigned int x)
361  {
362  if (a.Ncolor() == 3 && a.Nspin() == 1) {
363  FieldOrderCB<Float,1,3,1,order> A(a);
364  print_vector(A, x);
365  }
366  else if (a.Ncolor() == 3 && a.Nspin() == 4) {
367  FieldOrderCB<Float,4,3,1,order> A(a);
368  print_vector(A, x);
369  }
370  else if (a.Ncolor() == 3 && a.Nspin() == 1) {
371  FieldOrderCB<Float,1,3,1,order> A(a);
372  print_vector(A, x);
373  }
374  else if (a.Ncolor() == 2 && a.Nspin() == 2) {
375  FieldOrderCB<Float,2,2,1,order> A(a);
376  print_vector(A, x);
377  }
378  else if (a.Ncolor() == 24 && a.Nspin() == 2) {
379  FieldOrderCB<Float,2,24,1,order> A(a);
380  print_vector(A, x);
381  }
382  else if (a.Ncolor() == 6 && a.Nspin() == 4) {
383  FieldOrderCB<Float,4,6,1,order> A(a);
384  print_vector(A, x);
385  }
386  else if (a.Ncolor() == 72 && a.Nspin() == 4) {
387  FieldOrderCB<Float,4,72,1,order> A(a);
388  print_vector(A, x);
389  }
390  else if (a.Ncolor() == 576 && a.Nspin() == 2) {
391  FieldOrderCB<Float,2,576,1,order> A(a);
392  print_vector(A, x);
393  }
394 #ifdef GPU_STAGGERED_DIRAC
395  else if (a.Ncolor() == 64 && a.Nspin() == 2) {
396  FieldOrderCB<Float,2,64,1,order> A(a);
397  print_vector(A, x);
398  }
399 else if (a.Ncolor() == 96 && a.Nspin() == 2) {
400  FieldOrderCB<Float,2,96,1,order> A(a);
401  print_vector(A, x);
402  }
403 #endif
404  else {
405  errorQuda("Not supported Ncolor = %d, Nspin = %d", a.Ncolor(), a.Nspin());
406  }
407  }
408 
409  // print out the vector at volume point x
410  template <typename Float> void genericPrintVector(const cpuColorSpinorField &a, unsigned int x)
411  {
412  if (a.FieldOrder() == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) {
413  genericPrintVector<Float,QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>(a,x);
414  } else {
415  errorQuda("Unsupported field order %d\n", a.FieldOrder());
416  }
417  }
418 
419  // print out the vector at volume point x
420  void genericPrintVector(const cpuColorSpinorField &a, unsigned int x)
421  {
422  if (a.Precision() == QUDA_DOUBLE_PRECISION) {
423  genericPrintVector<double>(a,x);
424  } else if (a.Precision() == QUDA_SINGLE_PRECISION) {
425  genericPrintVector<float>(a,x);
426  } else {
427  errorQuda("Precision %d not implemented", a.Precision());
428  }
429  }
430 
431  // Eventually we should merge the below device print function and
432  // the above host print function.
433 
434  template <typename StoreType, int Ns, int Nc, QudaFieldOrder FieldOrder>
435  void genericCudaPrintVector(const cudaColorSpinorField &field, unsigned int i)
436  {
437 
438  typedef colorspinor::AccessorCB<StoreType, Ns, Nc, 1, FieldOrder> AccessorType;
439 
440  AccessorType A(field);
441 
442  // Register type
443  typedef typename scalar<typename mapper<StoreType>::type>::type Float;
444 
445  // Allocate a real+imag component for the storage type.
446  StoreType indiv_num[2];
447 
448  // Allocate space for the full site.
449  Float *data_cpu = new Float[2 * Ns * Nc];
450 
451  // Grab the pointer to the field.
452  complex<StoreType> *field_ptr = (complex<StoreType> *)field.V();
453 
454  // Grab the pointer to the norm field. Might be ignored as appropriate.
455  float *norm_ptr = (float *)field.Norm();
456  float scale = 1.0;
457 
458  if (isFixed<StoreType>::value) {
459  qudaMemcpy(&scale, &norm_ptr[i], sizeof(float), cudaMemcpyDeviceToHost);
460  scale *= fixedInvMaxValue<StoreType>::value;
461  }
462 
463  for (int s = 0; s < Ns; s++) {
464  for (int c = 0; c < Nc; c++) {
465  qudaMemcpy(indiv_num, &field_ptr[A.index(i % 2, i / 2, s, c, 0)], 2 * sizeof(StoreType), cudaMemcpyDeviceToHost);
466  data_cpu[2 * (c + Nc * s)] = scale * static_cast<Float>(indiv_num[0]);
467  data_cpu[2 * (c + Nc * s) + 1] = scale * static_cast<Float>(indiv_num[1]);
468  }
469  }
470  // print
471  for (int s = 0; s < Ns; s++) {
472  printfQuda("x = %u, s = %d, { ", i, s);
473  for (int c = 0; c < Nc; c++) {
474  printfQuda("(%f,%f) ", data_cpu[(s * Nc + c) * 2], data_cpu[(s * Nc + c) * 2 + 1]);
475  }
476  printfQuda("}\n");
477  }
478 
479  delete[] data_cpu;
480  }
481 
482  template <typename Float, int Ns, int Nc>
483  void genericCudaPrintVector(const cudaColorSpinorField &field, unsigned int i)
484  {
485  switch (field.FieldOrder()) {
486  case QUDA_FLOAT_FIELD_ORDER: genericCudaPrintVector<Float, Ns, Nc, QUDA_FLOAT_FIELD_ORDER>(field, i); break;
487  case QUDA_FLOAT2_FIELD_ORDER: genericCudaPrintVector<Float, Ns, Nc, QUDA_FLOAT2_FIELD_ORDER>(field, i); break;
488  case QUDA_FLOAT4_FIELD_ORDER: genericCudaPrintVector<Float, Ns, Nc, QUDA_FLOAT4_FIELD_ORDER>(field, i); break;
489  case QUDA_FLOAT8_FIELD_ORDER: genericCudaPrintVector<Float, Ns, Nc, QUDA_FLOAT8_FIELD_ORDER>(field, i); break;
490  case QUDA_SPACE_SPIN_COLOR_FIELD_ORDER:
491  genericCudaPrintVector<Float, Ns, Nc, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>(field, i);
492  break;
493  case QUDA_SPACE_COLOR_SPIN_FIELD_ORDER:
494  genericCudaPrintVector<Float, Ns, Nc, QUDA_SPACE_COLOR_SPIN_FIELD_ORDER>(field, i);
495  break;
496  default: errorQuda("Unsupported field order %d", field.FieldOrder());
497  }
498  }
499 
500  template <typename Float> struct GenericCudaPrintVector {
501  GenericCudaPrintVector(const cudaColorSpinorField &field, unsigned int i)
502  {
503  if (field.Ncolor() == 3 && field.Nspin() == 4) {
504  genericCudaPrintVector<Float, 4, 3>(field, i);
505  } else if (field.Ncolor() == 3 && field.Nspin() == 1) {
506  genericCudaPrintVector<Float, 1, 3>(field, i);
507  } else if (field.Ncolor() == 6 && field.Nspin() == 2) { // wilson free field MG
508  genericCudaPrintVector<Float, 2, 6>(field, i);
509  } else if (field.Ncolor() == 24 && field.Nspin() == 2) { // common value for Wilson, also staggered free field
510  genericCudaPrintVector<Float, 2, 24>(field, i);
511  } else if (field.Ncolor() == 32 && field.Nspin() == 2) {
512  genericCudaPrintVector<Float, 2, 32>(field, i);
513 #ifdef GPU_STAGGERED_DIRAC
514  } else if (field.Ncolor() == 64 && field.Nspin() == 2) {
515  genericCudaPrintVector<Float, 2, 64>(field, i);
516  } else if (field.Ncolor() == 96 && field.Nspin() == 2) {
517  genericCudaPrintVector<Float, 2, 96>(field, i);
518 #endif
519  } else {
520  errorQuda("Not supported Ncolor = %d, Nspin = %d", field.Ncolor(), field.Nspin());
521  }
522  }
523  };
524 
525  void genericCudaPrintVector(const cudaColorSpinorField &field, unsigned int i)
526  {
527  instantiatePrecision<GenericCudaPrintVector>(field, i);
528  }
529 
530 } // namespace quda