QUDA: quda/lib/cpu_color_spinor_field.cpp Source File

QUDA v0.4.0
A library for QCD on GPUs
00001 #include <stdlib.h>
00002 #include <stdio.h>
00003 #include <string.h>
00004 #include <iostream>
00005 #include <color_spinor_field.h>
00006 #include <color_spinor_field_order.h>
00007 
00008 /*
00009 Maybe this will be useful at some point
00010 
00011 #define myalloc(type, n, m0) (type *) aligned_malloc(n*sizeof(type), m0)
00012 
00013 #define ALIGN 16
00014 void *
00015 aligned_malloc(size_t n, void **m0)
00016 {
00017   size_t m = (size_t) malloc(n+ALIGN);
00018   *m0 = (void*)m;
00019   size_t r = m % ALIGN;
00020   if(r) m += (ALIGN - r);
00021   return (void *)m;
00022 }
00023 */
00024 
00025  /*cpuColorSpinorField::cpuColorSpinorField() : 
00026   ColorSpinorField(), init(false) {
00027 
00028   }*/
00029 
00030 
00031 int cpuColorSpinorField::initGhostFaceBuffer =0;
00032 void* cpuColorSpinorField::fwdGhostFaceBuffer[QUDA_MAX_DIM]; 
00033 void* cpuColorSpinorField::backGhostFaceBuffer[QUDA_MAX_DIM];
00034 void* cpuColorSpinorField::fwdGhostFaceSendBuffer[QUDA_MAX_DIM]; 
00035 void* cpuColorSpinorField::backGhostFaceSendBuffer[QUDA_MAX_DIM];
00036 
00037 cpuColorSpinorField::cpuColorSpinorField(const ColorSpinorParam &param) :
00038   ColorSpinorField(param), init(false), order_double(NULL), order_single(NULL) {
00039   create(param.create);
00040   if (param.create == QUDA_NULL_FIELD_CREATE) {
00041     // do nothing
00042   } else if (param.create == QUDA_ZERO_FIELD_CREATE) {
00043     zero();
00044   } else if (param.create == QUDA_REFERENCE_FIELD_CREATE) {
00045     v = param.v;
00046   } else {
00047     errorQuda("Creation type %d not supported", param.create);
00048   }
00049 
00050   if (fieldLocation != QUDA_CPU_FIELD_LOCATION) 
00051     errorQuda("Location incorrectly set");
00052 }
00053 
00054 cpuColorSpinorField::cpuColorSpinorField(const cpuColorSpinorField &src) : 
00055   ColorSpinorField(src), init(false), order_double(NULL), order_single(NULL) {
00056   create(QUDA_COPY_FIELD_CREATE);
00057   memcpy(v,src.v,bytes);
00058 
00059   if (fieldLocation != QUDA_CPU_FIELD_LOCATION) 
00060     errorQuda("Location incorrectly set");
00061 }
00062 
00063 cpuColorSpinorField::cpuColorSpinorField(const ColorSpinorField &src) : 
00064   ColorSpinorField(src), init(false), order_double(NULL), order_single(NULL) {
00065   create(QUDA_COPY_FIELD_CREATE);
00066   if (src.FieldLocation() == QUDA_CPU_FIELD_LOCATION) {
00067     memcpy(v, dynamic_cast<const cpuColorSpinorField&>(src).v, bytes);
00068   } else if (src.FieldLocation() == QUDA_CUDA_FIELD_LOCATION) {
00069     dynamic_cast<const cudaColorSpinorField&>(src).saveCPUSpinorField(*this);
00070   } else {
00071     errorQuda("FieldType not supported");
00072   }
00073 
00074   if (fieldLocation != QUDA_CPU_FIELD_LOCATION) 
00075     errorQuda("Location incorrectly set");
00076 }
00077 
00078 cpuColorSpinorField::~cpuColorSpinorField() {
00079   destroy();
00080 }
00081 
00082 cpuColorSpinorField& cpuColorSpinorField::operator=(const cpuColorSpinorField &src) {
00083   if (&src != this) {
00084     destroy();
00085     // keep current attributes unless unset
00086     if (!ColorSpinorField::init) ColorSpinorField::operator=(src);
00087     fieldLocation = QUDA_CPU_FIELD_LOCATION;
00088     create(QUDA_COPY_FIELD_CREATE);
00089     copy(src);
00090   }
00091   return *this;
00092 }
00093 
00094 cpuColorSpinorField& cpuColorSpinorField::operator=(const cudaColorSpinorField &src) {
00095   destroy();
00096   // keep current attributes unless unset
00097   if (!ColorSpinorField::init) ColorSpinorField::operator=(src);
00098   fieldLocation = QUDA_CPU_FIELD_LOCATION;
00099   create(QUDA_COPY_FIELD_CREATE);
00100   src.saveCPUSpinorField(*this);
00101   return *this;
00102 }
00103 
00104 void cpuColorSpinorField::create(const QudaFieldCreate create) {
00105   if (pad != 0) {
00106     errorQuda("Non-zero pad not supported");
00107   }
00108   
00109   if (precision == QUDA_HALF_PRECISION) {
00110     errorQuda("Half precision not supported");
00111   }
00112 
00113   if (fieldOrder != QUDA_SPACE_COLOR_SPIN_FIELD_ORDER && 
00114       fieldOrder != QUDA_SPACE_SPIN_COLOR_FIELD_ORDER &&
00115       fieldOrder != QUDA_QOP_DOMAIN_WALL_FIELD_ORDER) {
00116     errorQuda("Field order %d not supported", fieldOrder);
00117   }
00118 
00119   if (create != QUDA_REFERENCE_FIELD_CREATE) {
00120     // array of 4-d fields
00121     if (fieldOrder == QUDA_QOP_DOMAIN_WALL_FIELD_ORDER) {
00122       int Ls = x[nDim-1];
00123       v = (void**)malloc(Ls * sizeof(void*));
00124       for (int i=0; i<Ls; i++) ((void**)v)[i] = (void*)malloc(bytes / Ls);
00125     } else {
00126       v = (void*)malloc(bytes);
00127       init = true;
00128     }
00129   }
00130  
00131   createOrder(); // need to do this for references?
00132 }
00133 
00134 void cpuColorSpinorField::createOrder() {
00135 
00136   if (precision == QUDA_DOUBLE_PRECISION) {
00137     if (fieldOrder == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) 
00138       order_double = new SpaceSpinColorOrder<double>(*this);
00139     else if (fieldOrder == QUDA_SPACE_COLOR_SPIN_FIELD_ORDER) 
00140       order_double = new SpaceColorSpinOrder<double>(*this);
00141     else if (fieldOrder == QUDA_QOP_DOMAIN_WALL_FIELD_ORDER) 
00142       order_double = new QOPDomainWallOrder<double>(*this);
00143     else
00144       errorQuda("Order %d not supported in cpuColorSpinorField", fieldOrder);
00145   } else if (precision == QUDA_SINGLE_PRECISION) {
00146     if (fieldOrder == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) 
00147       order_single = new SpaceSpinColorOrder<float>(*this);
00148     else if (fieldOrder == QUDA_SPACE_COLOR_SPIN_FIELD_ORDER) 
00149       order_single = new SpaceColorSpinOrder<float>(*this);
00150     else if (fieldOrder == QUDA_QOP_DOMAIN_WALL_FIELD_ORDER) 
00151       order_single = new QOPDomainWallOrder<float>(*this);
00152     else
00153       errorQuda("Order %d not supported in cpuColorSpinorField", fieldOrder);
00154   } else {
00155     errorQuda("Precision %d not supported", precision);
00156   }
00157   
00158 }
00159 
00160 void cpuColorSpinorField::destroy() {
00161   
00162   if (precision == QUDA_DOUBLE_PRECISION) {
00163     delete order_double;
00164   } else if (precision == QUDA_SINGLE_PRECISION) {
00165     delete order_single;
00166   } else {
00167     errorQuda("Precision %d not supported", precision);
00168   }
00169   
00170   if (init) {
00171     if (fieldOrder == QUDA_QOP_DOMAIN_WALL_FIELD_ORDER) 
00172       for (int i=0; i<x[nDim-1]; i++) free(((void**)v)[i]);
00173     free(v);
00174     init = false;
00175   }
00176 
00177 }
00178 
00179 template <class D, class S>
00180 void genericCopy(D &dst, const S &src) {
00181 
00182   for (int x=0; x<dst.Volume(); x++) {
00183     for (int s=0; s<dst.Nspin(); s++) {
00184       for (int c=0; c<dst.Ncolor(); c++) {
00185         for (int z=0; z<2; z++) {
00186           dst(x, s, c, z) = src(x, s, c, z);
00187         }
00188       }
00189     }
00190   }
00191 
00192 }
00193 
00194 void cpuColorSpinorField::copy(const cpuColorSpinorField &src) {
00195   checkField(*this, src);
00196   if (fieldOrder == src.fieldOrder) {
00197     if (fieldOrder == QUDA_QOP_DOMAIN_WALL_FIELD_ORDER) 
00198       for (int i=0; i<x[nDim-1]; i++) memcpy(((void**)v)[i], ((void**)src.v)[i], bytes);
00199     else 
00200       memcpy(v, src.v, bytes);
00201   } else {
00202     if (precision == QUDA_DOUBLE_PRECISION) {
00203       if (src.precision == QUDA_DOUBLE_PRECISION) {
00204         genericCopy(*order_double, *(src.order_double));
00205       } else {
00206         genericCopy(*order_double, *(src.order_single));
00207       }
00208     } else {
00209       if (src.precision == QUDA_DOUBLE_PRECISION) {
00210         genericCopy(*order_single, *(src.order_double));
00211       } else {
00212         genericCopy(*order_single, *(src.order_single));
00213       }
00214     }
00215   }
00216 }
00217 
00218 void cpuColorSpinorField::zero() {
00219   if (fieldOrder != QUDA_QOP_DOMAIN_WALL_FIELD_ORDER) memset(v, '\0', bytes);
00220   else for (int i=0; i<x[nDim-1]; i++) memset(((void**)v)[i], '\0', bytes/x[nDim-1]);
00221 }
00222 
00223 // Random number insertion over all field elements
00224 template <class T>
00225 void random(T &t) {
00226   for (int x=0; x<t.Volume(); x++) {
00227     for (int s=0; s<t.Nspin(); s++) {
00228       for (int c=0; c<t.Ncolor(); c++) {
00229         for (int z=0; z<2; z++) {
00230           t(x,s,c,z) = rand() / (double)RAND_MAX;
00231         }
00232       }
00233     }
00234   }
00235 }
00236 
00237 // Create a point source at spacetime point x, spin s and colour c
00238 template <class T>
00239 void point(T &t, const int x, const int s, const int c) { t(x, s, c, 0) = 1.0; }
00240 
00241 void cpuColorSpinorField::Source(const QudaSourceType sourceType, const int x,
00242                                  const int s, const int c) {
00243 
00244   switch(sourceType) {
00245 
00246   case QUDA_RANDOM_SOURCE:
00247     if (precision == QUDA_DOUBLE_PRECISION) random(*order_double);
00248     else if (precision == QUDA_SINGLE_PRECISION) random(*order_single);
00249     else errorQuda("Precision not supported");
00250     break;
00251 
00252   case QUDA_POINT_SOURCE:
00253     zero();
00254     if (precision == QUDA_DOUBLE_PRECISION) point(*order_double, x, s, c);
00255     else if (precision == QUDA_SINGLE_PRECISION) point(*order_single, x, s, c);
00256     else errorQuda("Precision not supported");
00257     break;
00258 
00259   default:
00260     errorQuda("Source type %d not implemented", sourceType);
00261 
00262   }
00263 
00264 }
00265 
00266 template <class U, class V>
00267 int compareSpinor(const U &u, const V &v, const int tol) {
00268   int fail_check = 16*tol;
00269   int *fail = new int[fail_check];
00270   for (int f=0; f<fail_check; f++) fail[f] = 0;
00271 
00272   int N = 2*u.Nspin()*u.Ncolor();
00273   int *iter = new int[N];
00274   for (int i=0; i<N; i++) iter[i] = 0;
00275 
00276   for (int x=0; x<u.Volume(); x++) {
00277     for (int s=0; s<u.Nspin(); s++) {
00278       for (int c=0; c<u.Ncolor(); c++) {
00279         for (int z=0; z<2; z++) {
00280           double diff = fabs(u(x,s,c,z) - v(x,s,c,z));
00281 
00282           for (int f=0; f<fail_check; f++)
00283             if (diff > pow(10.0,-(f+1)/(double)tol)) fail[f]++;
00284 
00285           int j = (s*u.Ncolor() + c)*2+z;
00286           if (diff > 1e-3) iter[j]++;
00287         }
00288       }
00289     }
00290   }
00291 
00292   for (int i=0; i<N; i++) printfQuda("%d fails = %d\n", i, iter[i]);
00293     
00294   int accuracy_level =0;
00295   for (int f=0; f<fail_check; f++) {
00296     if (fail[f] == 0) accuracy_level = f+1;
00297   }
00298 
00299   for (int f=0; f<fail_check; f++) {
00300     printfQuda("%e Failures: %d / %d  = %e\n", pow(10.0,-(f+1)/(double)tol), 
00301                fail[f], u.Volume()*N, fail[f] / (double)(u.Volume()*N));
00302   }
00303   
00304   delete []iter;
00305   delete []fail;
00306   
00307   return accuracy_level;
00308 }
00309 
00310 int cpuColorSpinorField::Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, 
00311                                  const int tol) {
00312   checkField(a, b);
00313 
00314   int ret = 0;
00315   if (a.precision == QUDA_DOUBLE_PRECISION) 
00316     if (b.precision == QUDA_DOUBLE_PRECISION)
00317       ret = compareSpinor(*(a.order_double), *(b.order_double), tol);
00318     else
00319       ret = compareSpinor(*(a.order_double), *(b.order_single), tol);
00320   else 
00321     if (b.precision == QUDA_DOUBLE_PRECISION)
00322       ret = compareSpinor(*(a.order_single), *(b.order_double), tol);
00323     else
00324       ret =compareSpinor(*(a.order_single), *(b.order_single), tol);
00325 
00326   return ret;
00327 }
00328 
00329 template <class Order>
00330 void print_vector(const Order &o, unsigned int x) {
00331 
00332   for (int s=0; s<o.Nspin(); s++) {
00333     for (int c=0; c<o.Ncolor(); c++) {
00334       for (int z=0; z<2; z++) {
00335         std::cout << o(x, s, c, z) << std::endl;
00336       }
00337     }
00338     std::cout << std::endl;
00339   }
00340 
00341 }
00342 
00343 // print out the vector at volume point x
00344 void cpuColorSpinorField::PrintVector(unsigned int x) {
00345   
00346   switch(precision) {
00347   case QUDA_DOUBLE_PRECISION:
00348     print_vector(*order_double, x);
00349     break;
00350   case QUDA_SINGLE_PRECISION:
00351     print_vector(*order_single, x);
00352     break;
00353   default:
00354     errorQuda("Precision %d not implemented", precision); 
00355   }
00356 
00357 }
00358 
00359 void cpuColorSpinorField::allocateGhostBuffer(void)
00360 {
00361   if (initGhostFaceBuffer) return;
00362 
00363   if (this->siteSubset == QUDA_FULL_SITE_SUBSET){
00364     errorQuda("Full spinor is not supported in alllocateGhostBuffer\n");
00365   }
00366   
00367   int X1 = this->x[0]*2;
00368   int X2 = this->x[1];
00369   int X3 = this->x[2];
00370   int X4 = this->x[3];
00371   int Vsh[4]={ X2*X3*X4/2,
00372                X1*X3*X4/2,
00373                X1*X2*X4/2,
00374                X1*X2*X3/2};
00375   
00376   int num_faces = 1;
00377   if(this->nSpin == 1) num_faces = 3; // staggered
00378 
00379   int spinor_size = 2*this->nSpin*this->nColor*this->precision;
00380   for(int i=0;i < 4; i++){
00381     fwdGhostFaceBuffer[i] = malloc(num_faces*Vsh[i]*spinor_size);
00382     backGhostFaceBuffer[i] = malloc(num_faces*Vsh[i]*spinor_size);
00383 
00384     fwdGhostFaceSendBuffer[i] = malloc(num_faces*Vsh[i]*spinor_size);
00385     backGhostFaceSendBuffer[i] = malloc(num_faces*Vsh[i]*spinor_size);
00386     
00387     if(fwdGhostFaceBuffer[i]== NULL || backGhostFaceBuffer[i] == NULL||
00388        fwdGhostFaceSendBuffer[i]== NULL || backGhostFaceSendBuffer[i]==NULL){
00389       errorQuda("malloc for ghost buf in cpu spinor failed\n");
00390     }
00391   }
00392   
00393   initGhostFaceBuffer = 1;
00394   return;
00395 }
00396 
00397 void cpuColorSpinorField::freeGhostBuffer(void)
00398 {
00399   if(!initGhostFaceBuffer) return;
00400 
00401   for(int i=0;i < 4; i++){
00402     free(fwdGhostFaceBuffer[i]); fwdGhostFaceBuffer[i] = NULL;
00403     free(backGhostFaceBuffer[i]); backGhostFaceBuffer[i] = NULL;
00404     free(fwdGhostFaceSendBuffer[i]); fwdGhostFaceSendBuffer[i] = NULL;
00405     free(backGhostFaceSendBuffer[i]);  backGhostFaceSendBuffer[i] = NULL;
00406   } 
00407 
00408   initGhostFaceBuffer = 0;
00409   
00410   return;
00411 }
00412 
00413 
00414 
00415 void cpuColorSpinorField::packGhost(void* ghost_spinor, const int dim, 
00416                                     const QudaDirection dir, const QudaParity oddBit, const int dagger)
00417 {
00418   if (this->siteSubset == QUDA_FULL_SITE_SUBSET){
00419     errorQuda("Full spinor is not supported in packGhost for cpu");
00420   }
00421   
00422   if (fieldOrder == QUDA_QOP_DOMAIN_WALL_FIELD_ORDER) {
00423     errorQuda("Field order %d not supported", fieldOrder);
00424   }
00425 
00426   int num_faces=1;
00427   if(this->nSpin == 1){ //staggered
00428     num_faces=3;
00429   }
00430   int spinor_size = 2*this->nSpin*this->nColor*this->precision;
00431 
00432   int X1 = this->x[0]*2;
00433   int X2 = this->x[1];
00434   int X3 = this->x[2];
00435   int X4 = this->x[3];
00436 
00437   for(int i=0;i < this->volume;i++){    
00438     /*
00439     int boundaryCrossings = i/(X1/2) + i/(X1*X2/2) + i/(X1*X2*X3/2);
00440     int Y = 2*i + (boundaryCrossings + oddBit) % 2;
00441     */
00442     int X1h = X1/2;
00443     
00444     int sid =i;
00445     int za = sid/X1h;
00446     int x1h = sid - za*X1h;
00447     int zb = za/X2;
00448     int x2 = za - zb*X2;
00449     int x4 = zb/X3;
00450     int x3 = zb - x4*X3;
00451     int x1odd = (x2 + x3 + x4 + oddBit) & 1;
00452     int x1 = 2*x1h + x1odd;
00453 
00454     int ghost_face_idx ;
00455     
00456     switch(dim){            
00457     case 0: //X dimension
00458       if (dir == QUDA_BACKWARDS){
00459         if (x1 < num_faces){
00460           ghost_face_idx =  (x1*X4*X3*X2 + x4*(X3*X2)+x3*X2 +x2)>>1;
00461           memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);
00462         }
00463       }else{  // QUDA_FORWARDS
00464         if (x1 >=X1 - num_faces){
00465           ghost_face_idx = ((x1-X1+num_faces)*X4*X3*X2 + x4*(X3*X2)+x3*X2 +x2)>>1;
00466           memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);     
00467         }
00468       }
00469       break;      
00470       
00471     case 1: //Y dimension
00472       if (dir == QUDA_BACKWARDS){
00473         if (x2 < num_faces){
00474           ghost_face_idx = (x2*X4*X3*X1 + x4*X3*X1+x3*X1+x1)>>1;
00475           memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);     
00476         }
00477       }else{ // QUDA_FORWARDS      
00478         if (x2 >= X2 - num_faces){
00479           ghost_face_idx = ((x2-X2+num_faces)*X4*X3*X1+ x4*X3*X1+x3*X1+x1)>>1;
00480           memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);     
00481         }
00482       }
00483       break;
00484 
00485     case 2: //Z dimension      
00486       if (dir == QUDA_BACKWARDS){
00487         if (x3 < num_faces){
00488           ghost_face_idx = (x3*X4*X2*X1 + x4*X2*X1+x2*X1+x1)>>1;
00489           memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);     
00490         }
00491       }else{ // QUDA_FORWARDS     
00492         if (x3 >= X3 - num_faces){
00493           ghost_face_idx = ((x3-X3+num_faces)*X4*X2*X1 + x4*X2*X1 + x2*X1 + x1)>>1;
00494           memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);     
00495         }
00496       }
00497       break;
00498       
00499     case 3:  //T dimension      
00500       if (dir == QUDA_BACKWARDS){
00501         if (x4 < num_faces){
00502           ghost_face_idx = (x4*X3*X2*X1 + x3*X2*X1+x2*X1+x1)>>1;
00503           memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);     
00504         }
00505       }else{ // QUDA_FORWARDS     
00506         if (x4 >= X4 - num_faces){
00507           ghost_face_idx = ((x4-X4+num_faces)*X3*X2*X1 + x3*X2*X1+x2*X1+x1)>>1;
00508           memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size);     
00509         }
00510       }
00511       break;
00512     default:
00513       errorQuda("Invalid dim value\n");
00514     }//switch
00515   }//for i
00516 
00517   return;
00518 }
00519 
00520 
00521 void cpuColorSpinorField::unpackGhost(void* ghost_spinor, const int dim, 
00522                                       const QudaDirection dir, const int dagger)
00523 {
00524   if (this->siteSubset == QUDA_FULL_SITE_SUBSET){
00525     errorQuda("Full spinor is not supported in unpackGhost for cpu");
00526   }
00527 }