QUDA  1.0.0
color_spinor_util.cu
Go to the documentation of this file.
1 #include <color_spinor_field.h>
3 #include <index_helper.cuh>
4 #include <blas_quda.h>
5 
6 namespace quda {
7 
8  using namespace colorspinor;
9 
13  template <class T>
14  void random(T &t) {
15  for (int parity=0; parity<t.Nparity(); parity++) {
16  for (int x_cb=0; x_cb<t.VolumeCB(); x_cb++) {
17  for (int s=0; s<t.Nspin(); s++) {
18  for (int c=0; c<t.Ncolor(); c++) {
19  t(parity,x_cb,s,c).real(comm_drand());
20  t(parity,x_cb,s,c).imag(comm_drand());
21  }
22  }
23  }
24  }
25  }
26 
30  template <class T>
31  void point(T &t, int x, int s, int c) { t(x%2, x/2, s, c) = 1.0; }
32 
37  template <class T>
38  void constant(T &t, int k, int s, int c) {
39  for (int parity=0; parity<t.Nparity(); parity++) {
40  for (int x_cb=0; x_cb<t.VolumeCB(); x_cb++) {
41  // set all color-spin components to zero
42  for (int s2=0; s2<t.Nspin(); s2++) {
43  for (int c2=0; c2<t.Ncolor(); c2++) {
44  t(parity,x_cb,s2,c2) = 0.0;
45  }
46  }
47  t(parity,x_cb,s,c) = k; // now set the one we want
48  }
49  }
50  }
51 
55  template <class P>
56  void sin(P &p, int d, int n, int offset) {
57  int coord[4];
58  int X[4] = { p.X(0), p.X(1), p.X(2), p.X(3)};
59  X[0] *= (p.Nparity() == 1) ? 2 : 1; // need full lattice dims
60 
61  for (int parity=0; parity<p.Nparity(); parity++) {
62  for (int x_cb=0; x_cb<p.VolumeCB(); x_cb++) {
63  getCoords(coord, x_cb, X, parity);
64 
65  double mode = n * (double)coord[d] / X[d];
66  double k = (double)offset + sin (M_PI * mode);
67 
68  for (int s=0; s<p.Nspin(); s++)
69  for (int c=0; c<p.Ncolor(); c++)
70  p(parity, x_cb, s, c) = k;
71  }
72  }
73  }
74 
81  template <class T>
82  void corner(T &p, int v, int s, int c) {
83  if (p.Nspin() != 1)
84  errorQuda("ERROR: lib/color_spinor_util.cu, corner() is only defined for Nspin = 1 fields.\n");
85 
86  int coord[4];
87  int X[4] = { p.X(0), p.X(1), p.X(2), p.X(3)};
88  X[0] *= (p.Nparity() == 1) ? 2 : 1; // need full lattice dims
89 
90  for (int parity=0; parity<p.Nparity(); parity++) {
91  for (int x_cb=0; x_cb<p.VolumeCB(); x_cb++) {
92 
93  // get coords
94  getCoords(coord, x_cb, X, parity);
95 
96  // Figure out corner of current site.
97  int corner = 8*(coord[3]%2)+4*(coord[2]%2)+2*(coord[1]%2)+(coord[0]%2);
98 
99  // set all color components to zero
100  for (int c2=0; c2<p.Ncolor(); c2++) {
101  p(parity,x_cb,0,c2) = 0.0;
102  // except the corner and color we want
103  if (s == corner)
104  p(parity,x_cb,0,c) = (double)v;
105  }
106  }
107  }
108  }
109 
110  // print out the vector at volume point x
111  template <typename Float, int nSpin, int nColor, QudaFieldOrder order>
112  void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c) {
114  if (sourceType == QUDA_RANDOM_SOURCE) random(A);
115  else if (sourceType == QUDA_POINT_SOURCE) point(A, x, s, c);
116  else if (sourceType == QUDA_CONSTANT_SOURCE) constant(A, x, s, c);
117  else if (sourceType == QUDA_SINUSOIDAL_SOURCE) sin(A, x, s, c);
118  else if (sourceType == QUDA_CORNER_SOURCE) corner(A, x, s, c);
119  else errorQuda("Unsupported source type %d", sourceType);
120  }
121 
122  template <typename Float, int nSpin, QudaFieldOrder order>
123  void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c) {
124  if (a.Ncolor() == 3) {
125  genericSource<Float,nSpin,3,order>(a,sourceType, x, s, c);
126  } else if (a.Ncolor() == 4) {
127  genericSource<Float,nSpin,4,order>(a,sourceType, x, s, c);
128  } else if (a.Ncolor() == 6) { // for Wilson free field
129  genericSource<Float,nSpin,6,order>(a,sourceType, x, s, c);
130  } else if (a.Ncolor() == 8) {
131  genericSource<Float,nSpin,8,order>(a,sourceType, x, s, c);
132  } else if (a.Ncolor() == 12) {
133  genericSource<Float,nSpin,12,order>(a,sourceType, x, s, c);
134  } else if (a.Ncolor() == 16) {
135  genericSource<Float,nSpin,16,order>(a,sourceType, x, s, c);
136  } else if (a.Ncolor() == 20) {
137  genericSource<Float,nSpin,20,order>(a,sourceType, x, s, c);
138  } else if (a.Ncolor() == 24) {
139  genericSource<Float,nSpin,24,order>(a,sourceType, x, s, c);
140  } else if (a.Ncolor() == 32) {
141  genericSource<Float,nSpin,32,order>(a,sourceType, x, s, c);
142  } else {
143  errorQuda("Unsupported nColor=%d\n", a.Ncolor());
144  }
145  }
146 
147  template <typename Float, QudaFieldOrder order>
148  void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c) {
149  if (a.Nspin() == 1) {
150  genericSource<Float,1,order>(a,sourceType, x, s, c);
151  } else if (a.Nspin() == 2) {
152  genericSource<Float,2,order>(a,sourceType, x, s, c);
153  } else if (a.Nspin() == 4) {
154  genericSource<Float,4,order>(a,sourceType, x, s, c);
155  } else {
156  errorQuda("Unsupported nSpin=%d\n", a.Nspin());
157  }
158  }
159 
160  template <typename Float>
161  void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c) {
163  genericSource<Float,QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>(a,sourceType, x, s, c);
164  } else {
165  errorQuda("Unsupported field order %d\n", a.FieldOrder());
166  }
167 
168  }
169 
170  void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c) {
171 
172  if (a.Precision() == QUDA_DOUBLE_PRECISION) {
173  genericSource<double>(a,sourceType, x, s, c);
174  } else if (a.Precision() == QUDA_SINGLE_PRECISION) {
175  genericSource<float>(a, sourceType, x, s, c);
176  } else {
177  errorQuda("Precision not supported");
178  }
179 
180  }
181 
182 
183  template <class U, class V>
184  int compareSpinor(const U &u, const V &v, const int tol) {
185  int fail_check = 16*tol;
186  int *fail = new int[fail_check];
187  for (int f=0; f<fail_check; f++) fail[f] = 0;
188 
189  int N = 2*u.Nspin()*u.Ncolor();
190  int *iter = new int[N];
191  for (int i=0; i<N; i++) iter[i] = 0;
192 
193  for (int parity=0; parity<v.Nparity(); parity++) {
194  for (int x_cb=0; x_cb<u.VolumeCB(); x_cb++) {
195 
196  for (int s=0; s<u.Nspin(); s++) {
197  for (int c=0; c<u.Ncolor(); c++) {
198  for (int z=0; z<2; z++) {
199  int j = (s*u.Ncolor() + c)*2+z;
200 
201  double diff = z==0 ? fabs(u(parity,x_cb,s,c,z).real() - v(parity,x_cb,s,c,z).real()) :
202  fabs(u(parity,x_cb,s,c).imag() - v(parity,x_cb,s,c).imag());
203 
204  for (int f=0; f<fail_check; f++) {
205  if (diff > pow(10.0,-(f+1)/(double)tol)) {
206  fail[f]++;
207  }
208  }
209 
210  if (diff > 1e-3) iter[j]++;
211  }
212  }
213  }
214  }
215  }
216 
217  // reduce over all processes
218  for (int i=0; i<N; i++) comm_allreduce_int(&iter[i]);
219  for (int f=0; f<fail_check; f++) comm_allreduce_int(&fail[f]);
220 
221  for (int i=0; i<N; i++) printfQuda("%d fails = %d\n", i, iter[i]);
222 
223  int accuracy_level =0;
224  for (int f=0; f<fail_check; f++) {
225  if (fail[f] == 0) accuracy_level = f+1;
226  }
227 
228  size_t total = u.Nparity()*u.VolumeCB()*N*comm_size();
229  for (int f=0; f<fail_check; f++) {
230  printfQuda("%e Failures: %d / %lu = %e\n", pow(10.0,-(f+1)/(double)tol),
231  fail[f], total, fail[f] / (double)total);
232  }
233 
234  delete []iter;
235  delete []fail;
236 
237  return accuracy_level;
238  }
239 
240  template <typename oFloat, typename iFloat, QudaFieldOrder order>
242  int ret = 0;
243  if (a.Ncolor() == 3) {
244  constexpr int Nc = 3;
245  if (a.Nspin() == 4) {
246  constexpr int Ns = 4;
249 
250  double rescale = 1.0 / A.abs_max();
251 
252  auto a_(a), b_(b);
253  blas::ax(rescale, a_);
254  blas::ax(rescale, b_);
257 
258  ret = compareSpinor(A_, B_, tol);
259  } else if (a.Nspin() == 1) {
260  constexpr int Ns = 1;
263 
264  double rescale = 1.0 / A.abs_max();
265 
266  auto a_(a), b_(b);
267  blas::ax(rescale, a_);
268  blas::ax(rescale, b_);
271 
272  ret = compareSpinor(A_, B_, tol);
273  }
274  } else {
275  errorQuda("Number of colors %d not supported", a.Ncolor());
276  }
277  return ret;
278  }
279 
280 
281  template <typename oFloat, typename iFloat>
282  int genericCompare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol) {
283  int ret = 0;
285  ret = genericCompare<oFloat,iFloat,QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>(a, b, tol);
286  } else {
287  errorQuda("Unsupported field order %d\n", a.FieldOrder());
288  }
289  return ret;
290  }
291 
292 
293  template <typename oFloat>
294  int genericCompare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol) {
295  int ret = 0;
296  if (b.Precision() == QUDA_DOUBLE_PRECISION) {
297  ret = genericCompare<oFloat,double>(a, b, tol);
298  } else if (b.Precision() == QUDA_SINGLE_PRECISION) {
299  ret = genericCompare<oFloat,float>(a, b, tol);
300  } else {
301  errorQuda("Precision not supported");
302  }
303  return ret;
304  }
305 
306 
307  int genericCompare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol) {
308  int ret = 0;
309  if (a.Precision() == QUDA_DOUBLE_PRECISION) {
310  ret = genericCompare<double>(a, b, tol);
311  } else if (a.Precision() == QUDA_SINGLE_PRECISION) {
312  ret = genericCompare<float>(a, b, tol);
313  } else {
314  errorQuda("Precision not supported");
315  }
316  return ret;
317  }
318 
319 
320  template <class Order>
321  void print_vector(const Order &o, unsigned int x) {
322 
323  int x_cb = x / o.Nparity();
324  int parity = x%o.Nparity();
325 
326  for (int s=0; s<o.Nspin(); s++) {
327  printfQuda("x = %u, s = %d, { ", x_cb, s);
328  for (int c=0; c<o.Ncolor(); c++) {
329  printfQuda("(%f,%f) ", o(parity, x_cb, s, c).real(), o(parity, x_cb, s, c).imag());
330  }
331  printfQuda("}\n");
332  }
333 
334  }
335 
336  // print out the vector at volume point x
337  template <typename Float, QudaFieldOrder order> void genericPrintVector(const cpuColorSpinorField &a, unsigned int x)
338  {
339  if (a.Ncolor() == 3 && a.Nspin() == 1) {
341  print_vector(A, x);
342  }
343  else if (a.Ncolor() == 3 && a.Nspin() == 4) {
345  print_vector(A, x);
346  }
347  else if (a.Ncolor() == 2 && a.Nspin() == 2) {
349  print_vector(A, x);
350  }
351  else if (a.Ncolor() == 24 && a.Nspin() == 2) {
353  print_vector(A, x);
354  }
355  else if (a.Ncolor() == 6 && a.Nspin() == 4) {
357  print_vector(A, x);
358  }
359  else if (a.Ncolor() == 72 && a.Nspin() == 4) {
361  print_vector(A, x);
362  }
363  else if (a.Ncolor() == 576 && a.Nspin() == 2) {
365  print_vector(A, x);
366  } else {
367  errorQuda("Not supported Ncolor = %d, Nspin = %d", a.Ncolor(), a.Nspin());
368  }
369  }
370 
371  // print out the vector at volume point x
372  template <typename Float> void genericPrintVector(const cpuColorSpinorField &a, unsigned int x)
373  {
375  genericPrintVector<Float,QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>(a,x);
376  } else {
377  errorQuda("Unsupported field order %d\n", a.FieldOrder());
378  }
379  }
380 
381  // print out the vector at volume point x
382  void genericPrintVector(const cpuColorSpinorField &a, unsigned int x)
383  {
384  if (a.Precision() == QUDA_DOUBLE_PRECISION) {
385  genericPrintVector<double>(a,x);
386  } else if (a.Precision() == QUDA_SINGLE_PRECISION) {
387  genericPrintVector<float>(a,x);
388  } else {
389  errorQuda("Precision %d not implemented", a.Precision());
390  }
391  }
392 
393  // Eventually we should merge the below device print function and
394  // the above host print function.
395 
396  template <typename StoreType, int Ns, int Nc, QudaFieldOrder FieldOrder>
397  void genericCudaPrintVector(const cudaColorSpinorField &field, unsigned int i)
398  {
399 
401 
402  AccessorType A(field);
403 
404  // Register type
405  typedef typename scalar<typename mapper<StoreType>::type>::type Float;
406 
407  // Allocate a real+imag component for the storage type.
408  StoreType indiv_num[2];
409 
410  // Allocate space for the full site.
411  Float *data_cpu = new Float[2 * Ns * Nc];
412 
413  // Grab the pointer to the field.
414  complex<StoreType> *field_ptr = (complex<StoreType> *)field.V();
415 
416  // Grab the pointer to the norm field. Might be ignored as appropriate.
417  float *norm_ptr = (float *)field.Norm();
418  float scale = 1.0;
419 
421  cudaMemcpy(&scale, &norm_ptr[i], sizeof(float), cudaMemcpyDeviceToHost);
423  }
424 
425  for (int s = 0; s < Ns; s++) {
426  for (int c = 0; c < Nc; c++) {
427  cudaMemcpy(indiv_num, &field_ptr[A.index(i % 2, i / 2, s, c, 0)], 2 * sizeof(StoreType), cudaMemcpyDeviceToHost);
428  data_cpu[2 * (c + Nc * s)] = scale * static_cast<Float>(indiv_num[0]);
429  data_cpu[2 * (c + Nc * s) + 1] = scale * static_cast<Float>(indiv_num[1]);
430  }
431  }
432  // print
433  for (int s = 0; s < Ns; s++) {
434  printfQuda("x = %u, s = %d, { ", i, s);
435  for (int c = 0; c < Nc; c++) {
436  printfQuda("(%f,%f) ", data_cpu[(s * Nc + c) * 2], data_cpu[(s * Nc + c) * 2 + 1]);
437  }
438  printfQuda("}\n");
439  }
440 
441  delete[] data_cpu;
442  }
443 
444  template <typename Float, int Ns, int Nc>
445  void genericCudaPrintVector(const cudaColorSpinorField &field, unsigned int i)
446  {
447  switch (field.FieldOrder()) {
448  case QUDA_FLOAT_FIELD_ORDER: genericCudaPrintVector<Float, Ns, Nc, QUDA_FLOAT_FIELD_ORDER>(field, i); break;
449  case QUDA_FLOAT2_FIELD_ORDER: genericCudaPrintVector<Float, Ns, Nc, QUDA_FLOAT2_FIELD_ORDER>(field, i); break;
450  case QUDA_FLOAT4_FIELD_ORDER: genericCudaPrintVector<Float, Ns, Nc, QUDA_FLOAT4_FIELD_ORDER>(field, i); break;
452  genericCudaPrintVector<Float, Ns, Nc, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER>(field, i);
453  break;
455  genericCudaPrintVector<Float, Ns, Nc, QUDA_SPACE_COLOR_SPIN_FIELD_ORDER>(field, i);
456  break;
457  default: errorQuda("Unsupported field order %d", field.FieldOrder());
458  }
459  }
460 
461  template <typename Float> void genericCudaPrintVector(const cudaColorSpinorField &field, unsigned int i)
462  {
463  if (field.Ncolor() == 3 && field.Nspin() == 4) {
464  genericCudaPrintVector<Float, 4, 3>(field, i);
465  } else if (field.Ncolor() == 3 && field.Nspin() == 1) {
466  genericCudaPrintVector<Float, 1, 3>(field, i);
467  } else if (field.Ncolor() == 6 && field.Nspin() == 2) { // wilson free field MG
468  genericCudaPrintVector<Float, 2, 6>(field, i);
469  } else if (field.Ncolor() == 24 && field.Nspin() == 2) { // common value for Wilson, also staggered free field
470  genericCudaPrintVector<Float, 2, 24>(field, i);
471  } else if (field.Ncolor() == 32 && field.Nspin() == 2) {
472  genericCudaPrintVector<Float, 2, 32>(field, i);
473  } else {
474  errorQuda("Not supported Ncolor = %d, Nspin = %d", field.Ncolor(), field.Nspin());
475  }
476  }
477 
478  void genericCudaPrintVector(const cudaColorSpinorField &field, unsigned int i)
479  {
480 
481  switch (field.Precision()) {
482  case QUDA_QUARTER_PRECISION: genericCudaPrintVector<char>(field, i); break;
483  case QUDA_HALF_PRECISION: genericCudaPrintVector<short>(field, i); break;
484  case QUDA_SINGLE_PRECISION: genericCudaPrintVector<float>(field, i); break;
485  case QUDA_DOUBLE_PRECISION: genericCudaPrintVector<double>(field, i); break;
486  default: errorQuda("Unsupported precision = %d\n", field.Precision());
487  }
488 
489  /*
490  ColorSpinorParam param(*this);
491  param.fieldOrder = QUDA_SPACE_SPIN_COLOR_FIELD_ORDER;
492  param.location = QUDA_CPU_FIELD_LOCATION;
493  param.create = QUDA_NULL_FIELD_CREATE;
494 
495  cpuColorSpinorField tmp(param);
496  tmp = *this;
497  tmp.PrintVector(i);*/
498  }
499 
500 } // namespace quda
void ax(double a, ColorSpinorField &x)
Definition: blas_quda.cu:508
int genericCompare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol)
void genericPrintVector(const cpuColorSpinorField &a, unsigned int x)
void print_vector(const Order &o, unsigned int x)
#define errorQuda(...)
Definition: util_quda.h:121
int compareSpinor(const U &u, const V &v, const int tol)
__host__ double abs_max(bool global=true) const
enum QudaSourceType_s QudaSourceType
void corner(T &p, int v, int s, int c)
int comm_size(void)
Definition: comm_mpi.cpp:88
double tol
Definition: test_util.cpp:1656
__host__ __device__ ValueType sin(ValueType x)
Definition: complex_quda.h:51
void random(T &t)
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
Definition: complex_quda.h:111
double comm_drand(void)
Definition: comm_common.cpp:82
int X[4]
Definition: covdev_test.cpp:70
void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c)
int V
Definition: test_util.cpp:27
void point(T &t, int x, int s, int c)
__shared__ float s[]
#define printfQuda(...)
Definition: util_quda.h:115
void comm_allreduce_int(int *data)
Definition: comm_mpi.cpp:304
QudaPrecision Precision() const
QudaParity parity
Definition: covdev_test.cpp:54
QudaFieldOrder FieldOrder() const
void constant(T &t, int k, int s, int c)
__host__ __device__ int getCoords(int coord[], const Arg &arg, int &idx, int parity, int &dim)
Compute the space-time coordinates we are at.
void genericCudaPrintVector(const cudaColorSpinorField &a, unsigned x)