QUDA  v0.7.0
A library for QCD on GPUs
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
cpu_color_spinor_field.cpp
Go to the documentation of this file.
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <iostream>
5 #include <typeinfo>
6 #include <color_spinor_field.h>
7 #include <comm_quda.h> // for comm_drand()
8 
9 /*
10 Maybe this will be useful at some point
11 
12 #define myalloc(type, n, m0) (type *) aligned_malloc(n*sizeof(type), m0)
13 
14 #define ALIGN 16
15 void *
16 aligned_malloc(size_t n, void **m0)
17 {
18  size_t m = (size_t) safe_malloc(n+ALIGN);
19  *m0 = (void*)m;
20  size_t r = m % ALIGN;
21  if(r) m += (ALIGN - r);
22  return (void *)m;
23 }
24 */
25 
26 namespace quda {
27 
28  /*cpuColorSpinorField::cpuColorSpinorField() :
29  ColorSpinorField(), init(false) {
30 
31  }*/
32 
33 
39 
41  ColorSpinorField(param), init(false), reference(false) {
42  create(param.create);
43  if (param.create == QUDA_NULL_FIELD_CREATE) {
44  // do nothing
45  } else if (param.create == QUDA_ZERO_FIELD_CREATE) {
46  zero();
47  } else if (param.create == QUDA_REFERENCE_FIELD_CREATE) {
48  v = param.v;
49  reference = true;
50  } else {
51  errorQuda("Creation type %d not supported", param.create);
52  }
53  }
54 
56  ColorSpinorField(src), init(false), reference(false) {
57  create(QUDA_COPY_FIELD_CREATE);
58  memcpy(v,src.v,bytes);
59  }
60 
62  ColorSpinorField(src), init(false), reference(false) {
63  create(QUDA_COPY_FIELD_CREATE);
64  if (typeid(src) == typeid(cpuColorSpinorField)) {
65  memcpy(v, dynamic_cast<const cpuColorSpinorField&>(src).v, bytes);
66  } else if (typeid(src) == typeid(cudaColorSpinorField)) {
67  dynamic_cast<const cudaColorSpinorField&>(src).saveSpinorField(*this);
68  } else {
69  errorQuda("Unknown input ColorSpinorField %s", typeid(src).name());
70  }
71  }
72 
74  destroy();
75  }
76 
78  if (typeid(src) == typeid(cudaColorSpinorField)) {
79  *this = (dynamic_cast<const cudaColorSpinorField&>(src));
80  } else if (typeid(src) == typeid(cpuColorSpinorField)) {
81  *this = (dynamic_cast<const cpuColorSpinorField&>(src));
82  } else {
83  errorQuda("Unknown input ColorSpinorField %s", typeid(src).name());
84  }
85  return *this;
86  }
87 
89  if (&src != this) {
90  if (!reference) {
91  destroy();
92  // keep current attributes unless unset
94  create(QUDA_COPY_FIELD_CREATE);
95  }
96  copy(src);
97  }
98  return *this;
99  }
100 
102  if (!reference) { // if the field is a reference, then we must maintain the current state
103  destroy();
104  // keep current attributes unless unset
106  create(QUDA_COPY_FIELD_CREATE);
107  }
108  src.saveSpinorField(*this);
109  return *this;
110  }
111 
112  void cpuColorSpinorField::create(const QudaFieldCreate create) {
113  // these need to be reset to ensure no ghost zones for the cpu
114  // fields since we can't determine during the parent's constructor
115  // whether the field is a cpu or cuda field
116  ghost_length = 0;
117  ghost_norm_length = 0;
120  bytes = total_length * precision; // includes pads and ghost zones
122 
123  if (pad != 0) errorQuda("Non-zero pad not supported");
124  if (precision == QUDA_HALF_PRECISION) errorQuda("Half precision not supported");
125 
130  errorQuda("Field order %d not supported", fieldOrder);
131  }
132 
133  if (create != QUDA_REFERENCE_FIELD_CREATE) {
134  // array of 4-d fields
136  int Ls = x[nDim-1];
137  v = (void**)safe_malloc(Ls * sizeof(void*));
138  for (int i=0; i<Ls; i++) ((void**)v)[i] = safe_malloc(bytes / Ls);
139  } else {
140  v = safe_malloc(bytes);
141  }
142  init = true;
143  }
144 
145  }
146 
147  void cpuColorSpinorField::destroy() {
148 
149  if (init) {
151  for (int i=0; i<x[nDim-1]; i++) host_free(((void**)v)[i]);
152  host_free(v);
153  init = false;
154  }
155 
156  }
157 
159  checkField(*this, src);
160  if (fieldOrder == src.fieldOrder) {
162  // FIXME (HJ Kim): I think this is a bug, we should copy the data with amount of "bytes/Ls"
163  for (int i=0; i<x[nDim-1]; i++) memcpy(((void**)v)[i], ((void**)src.v)[i], bytes);
164  else
165  memcpy(v, src.v, bytes);
166  } else {
168  }
169  }
170 
173  else for (int i=0; i<x[nDim-1]; i++) memset(((void**)v)[i], '\0', bytes/x[nDim-1]);
174  }
175 
176  void cpuColorSpinorField::Source(QudaSourceType source_type, int x, int s, int c) {
177  genericSource(*this, source_type, x, s, c);
178  }
179 
181  const int tol) {
182  checkField(a,b);
183  return genericCompare(a, b, tol);
184  }
185 
186  // print out the vector at volume point x
187  void cpuColorSpinorField::PrintVector(unsigned int x) { genericPrintVector(*this, x); }
188 
190  {
191  if (initGhostFaceBuffer) return;
192 
193  if (this->siteSubset == QUDA_FULL_SITE_SUBSET){
194  errorQuda("Full spinor is not supported in alllocateGhostBuffer\n");
195  }
196 
197  int X1 = this->x[0]*2;
198  int X2 = this->x[1];
199  int X3 = this->x[2];
200  int X4 = this->x[3];
201  int X5 = this->nDim == 5 ? this->x[4] : 1;
202 
203  int Vsh[4]={ X2*X3*X4*X5/2,
204  X1*X3*X4*X5/2,
205  X1*X2*X4*X5/2,
206  X1*X2*X3*X5/2};
207 
208  int num_faces = 1;
209  if(this->nSpin == 1) num_faces = 3; // staggered
210 
211  int spinor_size = 2*this->nSpin*this->nColor*this->precision;
212  for (int i=0; i<4; i++) {
213  size_t nbytes = num_faces*Vsh[i]*spinor_size;
214 
215  fwdGhostFaceBuffer[i] = safe_malloc(nbytes);
216  backGhostFaceBuffer[i] = safe_malloc(nbytes);
217  fwdGhostFaceSendBuffer[i] = safe_malloc(nbytes);
219  }
221  }
222 
223 
225  {
226  if(!initGhostFaceBuffer) return;
227 
228  for(int i=0;i < 4; i++){
233  }
235  }
236 
237 
238  void cpuColorSpinorField::packGhost(void* ghost_spinor, const int dim,
239  const QudaDirection dir, const QudaParity oddBit, const int dagger)
240  {
241  if (this->siteSubset == QUDA_FULL_SITE_SUBSET){
242  errorQuda("Full spinor is not supported in packGhost for cpu");
243  }
244 
246  errorQuda("Field order %d not supported", fieldOrder);
247  }
248 
249  int num_faces=1;
250  if(this->nSpin == 1){ //staggered
251  num_faces=3;
252  }
253  int spinor_size = 2*this->nSpin*this->nColor*this->precision;
254 
255  int X1 = this->x[0]*2;
256  int X2 = this->x[1];
257  int X3 = this->x[2];
258  int X4 = this->x[3];
259  int X5 = this->nDim == 5 ? this->x[4]: 1;
260 
261 
262  for(int i=0;i < this->volume;i++){
263 
264  int X1h = X1/2;
265 
266  int sid =i;
267  int za = sid/X1h;
268  int x1h = sid - za*X1h;
269  int zb = za/X2;
270  int x2 = za - zb*X2;
271  int zc = zb / X3;
272  int x3 = zb - zc*X3;
273  int x5 = zc / X4; //this->nDim == 5 ? zz / X4 : 0;
274  int x4 = zc - x5*X4;
275  int x1odd;
276  if(this->DWFPCtype() == QUDA_5D_PC)
277  {
278  x1odd = (x2 + x3 + x4 + x5 + oddBit) & 1;
279  }
280  //else if(this->DWFPCtype() == QUDA_4D_PC)
281  else
282  {
283  x1odd = (x2 + x3 + x4 + oddBit) & 1;
284  }
285  //else
286  // errorQuda("Preconditioning type is not set(PC type = %d), please check your preconditioning method\n",this->DWFPCtype());
287  int x1 = 2*x1h + x1odd;
288 
289  int ghost_face_idx ;
290 
291  //NOTE: added extra dimension for DW and TM dslash
292 
293  switch(dim){
294  case 0: //X dimension
295  if (dir == QUDA_BACKWARDS){
296  if (x1 < num_faces){
297  ghost_face_idx = (x1*X5*X4*X3*X2 + x5*X4*X3*X2 + x4*(X3*X2)+x3*X2 +x2)>>1;
298  memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);
299  }
300  }else{ // QUDA_FORWARDS
301  if (x1 >=X1 - num_faces){
302  ghost_face_idx = ((x1-X1+num_faces)*X5*X4*X3*X2 + x5*X4*X3*X2 + x4*(X3*X2)+x3*X2 +x2)>>1;
303  memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);
304  }
305  }
306  break;
307 
308  case 1: //Y dimension
309  if (dir == QUDA_BACKWARDS){
310  if (x2 < num_faces){
311  ghost_face_idx = (x2*X5*X4*X3*X1 +x5*X4*X3*X1 + x4*X3*X1+x3*X1+x1)>>1;
312  memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);
313  }
314  }else{ // QUDA_FORWARDS
315  if (x2 >= X2 - num_faces){
316  ghost_face_idx = ((x2-X2+num_faces)*X5*X4*X3*X1 +x5*X4*X3*X1+ x4*X3*X1+x3*X1+x1)>>1;
317  memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);
318  }
319  }
320  break;
321 
322  case 2: //Z dimension
323  if (dir == QUDA_BACKWARDS){
324  if (x3 < num_faces){
325  ghost_face_idx = (x3*X5*X4*X2*X1 + x5*X4*X2*X1 + x4*X2*X1+x2*X1+x1)>>1;
326  memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);
327  }
328  }else{ // QUDA_FORWARDS
329  if (x3 >= X3 - num_faces){
330  ghost_face_idx = ((x3-X3+num_faces)*X5*X4*X2*X1 + x5*X4*X2*X1 + x4*X2*X1 + x2*X1 + x1)>>1;
331  memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);
332  }
333  }
334  break;
335 
336  case 3: //T dimension
337  if (dir == QUDA_BACKWARDS){
338  if (x4 < num_faces){
339  ghost_face_idx = (x4*X5*X3*X2*X1 + x5*X3*X2*X1 + x3*X2*X1+x2*X1+x1)>>1;
340  memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);
341  }
342  }else{ // QUDA_FORWARDS
343  if (x4 >= X4 - num_faces){
344  ghost_face_idx = ((x4-X4+num_faces)*X5*X3*X2*X1 + x5*X3*X2*X1 + x3*X2*X1+x2*X1+x1)>>1;
345  memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);
346  }
347  }
348  break;
349  default:
350  errorQuda("Invalid dim value\n");
351  }//switch
352  }//for i
353  return;
354  }
355 
356  void cpuColorSpinorField::unpackGhost(void* ghost_spinor, const int dim,
357  const QudaDirection dir, const int dagger)
358  {
359  if (this->siteSubset == QUDA_FULL_SITE_SUBSET){
360  errorQuda("Full spinor is not supported in unpackGhost for cpu");
361  }
362  }
363 
364  // Return the location of the field
367  }
368 
369 } // namespace quda
int genericCompare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol)
void copy(const cpuColorSpinorField &)
__constant__ int X1h
__constant__ int X2
void unpackGhost(void *ghost_spinor, const int dim, const QudaDirection dir, const int dagger)
__constant__ int Vsh
#define errorQuda(...)
Definition: util_quda.h:73
#define host_free(ptr)
Definition: malloc_quda.h:29
__constant__ int X1
cpuColorSpinorField(const cpuColorSpinorField &)
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
QudaDWFPCType DWFPCtype() const
void copyGenericColorSpinor(ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, void *Dst=0, void *Src=0, void *dstNorm=0, void *srcNorm=0)
void packGhost(void *ghost_spinor, const int dim, const QudaDirection dir, const QudaParity parity, const int dagger)
QudaDagType dagger
Definition: test_util.cpp:1558
enum QudaSourceType_s QudaSourceType
int Ls
Definition: test_util.cpp:40
QudaGaugeParam param
Definition: pack_test.cpp:17
enum QudaDirection_s QudaDirection
static void * backGhostFaceSendBuffer[QUDA_MAX_DIM]
static void checkField(const ColorSpinorField &, const ColorSpinorField &)
#define ALIGNMENT_ADJUST(n)
Definition: quda_internal.h:33
static void * backGhostFaceBuffer[QUDA_MAX_DIM]
enum QudaParity_s QudaParity
void genericSource(cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c)
static void * fwdGhostFaceSendBuffer[QUDA_MAX_DIM]
#define safe_malloc(size)
Definition: malloc_quda.h:25
static void * fwdGhostFaceBuffer[QUDA_MAX_DIM]
static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1)
void genericPrintVector(cpuColorSpinorField &a, unsigned int x)
short x1h
Definition: llfat_core.h:815
virtual ColorSpinorField & operator=(const ColorSpinorField &)
QudaFieldLocation Location() const
enum QudaFieldLocation_s QudaFieldLocation
void * memset(void *s, int c, size_t n)
__constant__ int X3
short x1odd
Definition: llfat_core.h:821
enum QudaFieldCreate_s QudaFieldCreate
void init(int argc, char **argv)
Definition: dslash_test.cpp:79
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
VOLATILE spinorFloat * s
int oddBit
__constant__ int X4
ColorSpinorField & operator=(const ColorSpinorField &)