QUDA  v0.5.0
A library for QCD on GPUs
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
cpu_color_spinor_field.cpp
Go to the documentation of this file.
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <iostream>
5 #include <typeinfo>
6 #include <color_spinor_field.h>
8 #include <comm_quda.h> // for comm_drand()
9 
10 /*
11 Maybe this will be useful at some point
12 
13 #define myalloc(type, n, m0) (type *) aligned_malloc(n*sizeof(type), m0)
14 
15 #define ALIGN 16
16 void *
17 aligned_malloc(size_t n, void **m0)
18 {
19  size_t m = (size_t) safe_malloc(n+ALIGN);
20  *m0 = (void*)m;
21  size_t r = m % ALIGN;
22  if(r) m += (ALIGN - r);
23  return (void *)m;
24 }
25 */
26 
27 namespace quda {
28 
29  /*cpuColorSpinorField::cpuColorSpinorField() :
30  ColorSpinorField(), init(false) {
31 
32  }*/
33 
34 
40 
42  ColorSpinorField(param), init(false), reference(false), order_double(NULL), order_single(NULL) {
43  create(param.create);
44  if (param.create == QUDA_NULL_FIELD_CREATE) {
45  // do nothing
46  } else if (param.create == QUDA_ZERO_FIELD_CREATE) {
47  zero();
48  } else if (param.create == QUDA_REFERENCE_FIELD_CREATE) {
49  v = param.v;
50  reference = true;
51  } else {
52  errorQuda("Creation type %d not supported", param.create);
53  }
54  }
55 
57  ColorSpinorField(src), init(false), reference(false), order_double(NULL), order_single(NULL) {
58  create(QUDA_COPY_FIELD_CREATE);
59  memcpy(v,src.v,bytes);
60  }
61 
63  ColorSpinorField(src), init(false), reference(false), order_double(NULL), order_single(NULL) {
64  create(QUDA_COPY_FIELD_CREATE);
65  if (typeid(src) == typeid(cpuColorSpinorField)) {
66  memcpy(v, dynamic_cast<const cpuColorSpinorField&>(src).v, bytes);
67  } else if (typeid(src) == typeid(cudaColorSpinorField)) {
68  dynamic_cast<const cudaColorSpinorField&>(src).saveSpinorField(*this);
69  } else {
70  errorQuda("Unknown input ColorSpinorField %s", typeid(src).name());
71  }
72  }
73 
75  destroy();
76  }
77 
79  if (typeid(src) == typeid(cudaColorSpinorField)) {
80  *this = (dynamic_cast<const cudaColorSpinorField&>(src));
81  } else if (typeid(src) == typeid(cpuColorSpinorField)) {
82  *this = (dynamic_cast<const cpuColorSpinorField&>(src));
83  } else {
84  errorQuda("Unknown input ColorSpinorField %s", typeid(src).name());
85  }
86  return *this;
87  }
88 
90  if (&src != this) {
91  if (!reference) {
92  destroy();
93  // keep current attributes unless unset
95  create(QUDA_COPY_FIELD_CREATE);
96  }
97  copy(src);
98  }
99  return *this;
100  }
101 
103  if (!reference) { // if the field is a reference, then we must maintain the current state
104  destroy();
105  // keep current attributes unless unset
107  create(QUDA_COPY_FIELD_CREATE);
108  }
109  src.saveSpinorField(*this);
110  return *this;
111  }
112 
113  void cpuColorSpinorField::create(const QudaFieldCreate create) {
114  // these need to be reset to ensure no ghost zones for the cpu
115  // fields since we can't determine during the parent's constructor
116  // whether the field is a cpu or cuda field
117  ghost_length = 0;
118  ghost_norm_length = 0;
121  bytes = total_length * precision; // includes pads and ghost zones
123  norm_bytes = total_norm_length * sizeof(float);
124  norm_bytes = ALIGNMENT_ADJUST(norm_bytes);
125 
126  if (pad != 0) {
127  errorQuda("Non-zero pad not supported");
128  }
129 
130  if (precision == QUDA_HALF_PRECISION) {
131  errorQuda("Half precision not supported");
132  }
133 
137  errorQuda("Field order %d not supported", fieldOrder);
138  }
139 
140  if (create != QUDA_REFERENCE_FIELD_CREATE) {
141  // array of 4-d fields
143  int Ls = x[nDim-1];
144  v = (void**)safe_malloc(Ls * sizeof(void*));
145  for (int i=0; i<Ls; i++) ((void**)v)[i] = safe_malloc(bytes / Ls);
146  } else {
147  v = safe_malloc(bytes);
148  }
149  init = true;
150  }
151 
152  createOrder(); // need to do this for references?
153  }
154 
155  void cpuColorSpinorField::createOrder() {
156 
157  if (precision == QUDA_DOUBLE_PRECISION) {
159  order_double = new SpaceSpinColorOrder<double>(*this);
161  order_double = new SpaceColorSpinOrder<double>(*this);
163  order_double = new QOPDomainWallOrder<double>(*this);
164  else
165  errorQuda("Order %d not supported in cpuColorSpinorField", fieldOrder);
166  } else if (precision == QUDA_SINGLE_PRECISION) {
168  order_single = new SpaceSpinColorOrder<float>(*this);
170  order_single = new SpaceColorSpinOrder<float>(*this);
172  order_single = new QOPDomainWallOrder<float>(*this);
173  else
174  errorQuda("Order %d not supported in cpuColorSpinorField", fieldOrder);
175  } else {
176  errorQuda("Precision %d not supported", precision);
177  }
178 
179  }
180 
181  void cpuColorSpinorField::destroy() {
182 
183  if (precision == QUDA_DOUBLE_PRECISION) {
184  delete order_double;
185  } else if (precision == QUDA_SINGLE_PRECISION) {
186  delete order_single;
187  } else {
188  errorQuda("Precision %d not supported", precision);
189  }
190 
191  if (init) {
193  for (int i=0; i<x[nDim-1]; i++) host_free(((void**)v)[i]);
194  host_free(v);
195  init = false;
196  }
197 
198  }
199 
200  template <class D, class S>
201  void genericCopy(D &dst, const S &src) {
202 
203  for (int x=0; x<dst.Volume(); x++) {
204  for (int s=0; s<dst.Nspin(); s++) {
205  for (int c=0; c<dst.Ncolor(); c++) {
206  for (int z=0; z<2; z++) {
207  dst(x, s, c, z) = src(x, s, c, z);
208  }
209  }
210  }
211  }
212 
213  }
214 
216  checkField(*this, src);
217  if (fieldOrder == src.fieldOrder) {
219  for (int i=0; i<x[nDim-1]; i++) memcpy(((void**)v)[i], ((void**)src.v)[i], bytes);
220  else
221  memcpy(v, src.v, bytes);
222  } else {
223  if (precision == QUDA_DOUBLE_PRECISION) {
224  if (src.precision == QUDA_DOUBLE_PRECISION) {
225  genericCopy(*order_double, *(src.order_double));
226  } else {
227  genericCopy(*order_double, *(src.order_single));
228  }
229  } else {
230  if (src.precision == QUDA_DOUBLE_PRECISION) {
231  genericCopy(*order_single, *(src.order_double));
232  } else {
233  genericCopy(*order_single, *(src.order_single));
234  }
235  }
236  }
237  }
238 
241  else for (int i=0; i<x[nDim-1]; i++) memset(((void**)v)[i], '\0', bytes/x[nDim-1]);
242  }
243 
244  // Random number insertion over all field elements
245  template <class T>
246  void random(T &t) {
247  for (int x=0; x<t.Volume(); x++) {
248  for (int s=0; s<t.Nspin(); s++) {
249  for (int c=0; c<t.Ncolor(); c++) {
250  for (int z=0; z<2; z++) {
251  t(x,s,c,z) = comm_drand();
252  }
253  }
254  }
255  }
256  }
257 
258  // Create a point source at spacetime point x, spin s and colour c
259  template <class T>
260  void point(T &t, const int x, const int s, const int c) { t(x, s, c, 0) = 1.0; }
261 
262  void cpuColorSpinorField::Source(const QudaSourceType sourceType, const int x,
263  const int s, const int c) {
264 
265  switch(sourceType) {
266 
267  case QUDA_RANDOM_SOURCE:
268  if (precision == QUDA_DOUBLE_PRECISION) random(*order_double);
269  else if (precision == QUDA_SINGLE_PRECISION) random(*order_single);
270  else errorQuda("Precision not supported");
271  break;
272 
273  case QUDA_POINT_SOURCE:
274  zero();
275  if (precision == QUDA_DOUBLE_PRECISION) point(*order_double, x, s, c);
276  else if (precision == QUDA_SINGLE_PRECISION) point(*order_single, x, s, c);
277  else errorQuda("Precision not supported");
278  break;
279 
280  default:
281  errorQuda("Source type %d not implemented", sourceType);
282 
283  }
284 
285  }
286 
287  template <class U, class V>
288  int compareSpinor(const U &u, const V &v, const int tol) {
289  int fail_check = 16*tol;
290  int *fail = new int[fail_check];
291  for (int f=0; f<fail_check; f++) fail[f] = 0;
292 
293  int N = 2*u.Nspin()*u.Ncolor();
294  int *iter = new int[N];
295  for (int i=0; i<N; i++) iter[i] = 0;
296 
297  for (int x=0; x<u.Volume(); x++) {
298  for (int s=0; s<u.Nspin(); s++) {
299  for (int c=0; c<u.Ncolor(); c++) {
300  for (int z=0; z<2; z++) {
301  double diff = fabs(u(x,s,c,z) - v(x,s,c,z));
302 
303  for (int f=0; f<fail_check; f++)
304  if (diff > pow(10.0,-(f+1)/(double)tol)) fail[f]++;
305 
306  int j = (s*u.Ncolor() + c)*2+z;
307  if (diff > 1e-3) iter[j]++;
308  }
309  }
310  }
311  }
312 
313  for (int i=0; i<N; i++) printfQuda("%d fails = %d\n", i, iter[i]);
314 
315  int accuracy_level =0;
316  for (int f=0; f<fail_check; f++) {
317  if (fail[f] == 0) accuracy_level = f+1;
318  }
319 
320  for (int f=0; f<fail_check; f++) {
321  printfQuda("%e Failures: %d / %d = %e\n", pow(10.0,-(f+1)/(double)tol),
322  fail[f], u.Volume()*N, fail[f] / (double)(u.Volume()*N));
323  }
324 
325  delete []iter;
326  delete []fail;
327 
328  return accuracy_level;
329  }
330 
332  const int tol) {
333  checkField(a, b);
334 
335  int ret = 0;
338  ret = compareSpinor(*(a.order_double), *(b.order_double), tol);
339  else
340  ret = compareSpinor(*(a.order_double), *(b.order_single), tol);
341  else
343  ret = compareSpinor(*(a.order_single), *(b.order_double), tol);
344  else
345  ret = compareSpinor(*(a.order_single), *(b.order_single), tol);
346 
347  return ret;
348  }
349 
350 
351  template <class Order>
352  void print_vector(const Order &o, unsigned int x) {
353 
354  for (int s=0; s<o.Nspin(); s++) {
355  std::cout << "x = " << x << ", s = " << s << ", { ";
356  for (int c=0; c<o.Ncolor(); c++) {
357  std::cout << " ( " << o(x, s, c, 0) << " , " ;
358  if (c<o.Ncolor()-1) std::cout << o(x, s, c, 1) << " ) ," ;
359  else std::cout << o(x, s, c, 1) << " ) " ;
360  }
361  std::cout << " } " << std::endl;
362  }
363 
364  }
365 
366  // print out the vector at volume point x
367  void cpuColorSpinorField::PrintVector(unsigned int x) {
368 
369  switch(precision) {
371  print_vector(*order_double, x);
372  break;
374  print_vector(*order_single, x);
375  break;
376  default:
377  errorQuda("Precision %d not implemented", precision);
378  }
379 
380  }
381 
383  {
384  if (initGhostFaceBuffer) return;
385 
386  if (this->siteSubset == QUDA_FULL_SITE_SUBSET){
387  errorQuda("Full spinor is not supported in alllocateGhostBuffer\n");
388  }
389 
390  int X1 = this->x[0]*2;
391  int X2 = this->x[1];
392  int X3 = this->x[2];
393  int X4 = this->x[3];
394  int X5 = this->nDim == 5 ? this->x[4] : 1;
395 
396  int Vsh[4]={ X2*X3*X4*X5/2,
397  X1*X3*X4*X5/2,
398  X1*X2*X4*X5/2,
399  X1*X2*X3*X5/2};
400 
401  int num_faces = 1;
402  if(this->nSpin == 1) num_faces = 3; // staggered
403 
404  int spinor_size = 2*this->nSpin*this->nColor*this->precision;
405  for (int i=0; i<4; i++) {
406  size_t nbytes = num_faces*Vsh[i]*spinor_size;
407 
408  fwdGhostFaceBuffer[i] = safe_malloc(nbytes);
409  backGhostFaceBuffer[i] = safe_malloc(nbytes);
410  fwdGhostFaceSendBuffer[i] = safe_malloc(nbytes);
412  }
414  }
415 
416 
418  {
419  if(!initGhostFaceBuffer) return;
420 
421  for(int i=0;i < 4; i++){
426  }
428  }
429 
430 
431  void cpuColorSpinorField::packGhost(void* ghost_spinor, const int dim,
432  const QudaDirection dir, const QudaParity oddBit, const int dagger)
433  {
434  if (this->siteSubset == QUDA_FULL_SITE_SUBSET){
435  errorQuda("Full spinor is not supported in packGhost for cpu");
436  }
437 
439  errorQuda("Field order %d not supported", fieldOrder);
440  }
441 
442  int num_faces=1;
443  if(this->nSpin == 1){ //staggered
444  num_faces=3;
445  }
446  int spinor_size = 2*this->nSpin*this->nColor*this->precision;
447 
448  int X1 = this->x[0]*2;
449  int X2 = this->x[1];
450  int X3 = this->x[2];
451  int X4 = this->x[3];
452  int X5 = this->nDim == 5 ? this->x[4]: 1;
453 
454 
455  for(int i=0;i < this->volume;i++){
456 
457  int X1h = X1/2;
458 
459  int sid =i;
460  int za = sid/X1h;
461  int x1h = sid - za*X1h;
462  int zb = za/X2;
463  int x2 = za - zb*X2;
464  int zc = zb / X3;
465  int x3 = zb - zc*X3;
466  int x5 = zc / X4; //this->nDim == 5 ? zz / X4 : 0;
467  int x4 = zc - x5*X4;
468  int x1odd = (x2 + x3 + x4 + x5 + oddBit) & 1;
469  int x1 = 2*x1h + x1odd;
470 
471  int ghost_face_idx ;
472 
473  //NOTE: added extra dimension for DW and TM dslash
474 
475  switch(dim){
476  case 0: //X dimension
477  if (dir == QUDA_BACKWARDS){
478  if (x1 < num_faces){
479  ghost_face_idx = (x1*X5*X4*X3*X2 + x5*X4*X3*X2 + x4*(X3*X2)+x3*X2 +x2)>>1;
480  memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);
481  }
482  }else{ // QUDA_FORWARDS
483  if (x1 >=X1 - num_faces){
484  ghost_face_idx = ((x1-X1+num_faces)*X5*X4*X3*X2 + x5*X4*X3*X2 + x4*(X3*X2)+x3*X2 +x2)>>1;
485  memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);
486  }
487  }
488  break;
489 
490  case 1: //Y dimension
491  if (dir == QUDA_BACKWARDS){
492  if (x2 < num_faces){
493  ghost_face_idx = (x2*X5*X4*X3*X1 +x5*X4*X3*X1 + x4*X3*X1+x3*X1+x1)>>1;
494  memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);
495  }
496  }else{ // QUDA_FORWARDS
497  if (x2 >= X2 - num_faces){
498  ghost_face_idx = ((x2-X2+num_faces)*X5*X4*X3*X1 +x5*X4*X3*X1+ x4*X3*X1+x3*X1+x1)>>1;
499  memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);
500  }
501  }
502  break;
503 
504  case 2: //Z dimension
505  if (dir == QUDA_BACKWARDS){
506  if (x3 < num_faces){
507  ghost_face_idx = (x3*X5*X4*X2*X1 + x5*X4*X2*X1 + x4*X2*X1+x2*X1+x1)>>1;
508  memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);
509  }
510  }else{ // QUDA_FORWARDS
511  if (x3 >= X3 - num_faces){
512  ghost_face_idx = ((x3-X3+num_faces)*X5*X4*X2*X1 + x5*X4*X2*X1 + x4*X2*X1 + x2*X1 + x1)>>1;
513  memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);
514  }
515  }
516  break;
517 
518  case 3: //T dimension
519  if (dir == QUDA_BACKWARDS){
520  if (x4 < num_faces){
521  ghost_face_idx = (x4*X5*X3*X2*X1 + x5*X3*X2*X1 + x3*X2*X1+x2*X1+x1)>>1;
522  memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);
523  }
524  }else{ // QUDA_FORWARDS
525  if (x4 >= X4 - num_faces){
526  ghost_face_idx = ((x4-X4+num_faces)*X5*X3*X2*X1 + x5*X3*X2*X1 + x3*X2*X1+x2*X1+x1)>>1;
527  memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);
528  }
529  }
530  break;
531  default:
532  errorQuda("Invalid dim value\n");
533  }//switch
534  }//for i
535  return;
536  }
537 
538  void cpuColorSpinorField::unpackGhost(void* ghost_spinor, const int dim,
539  const QudaDirection dir, const int dagger)
540  {
541  if (this->siteSubset == QUDA_FULL_SITE_SUBSET){
542  errorQuda("Full spinor is not supported in unpackGhost for cpu");
543  }
544  }
545 
546  // Return the location of the field
549  }
550 
551 } // namespace quda