QUDA v0.4.0
A library for QCD on GPUs
|
00001 #include <stdlib.h> 00002 #include <stdio.h> 00003 #include <string.h> 00004 #include <iostream> 00005 #include <color_spinor_field.h> 00006 #include <color_spinor_field_order.h> 00007 00008 /* 00009 Maybe this will be useful at some point 00010 00011 #define myalloc(type, n, m0) (type *) aligned_malloc(n*sizeof(type), m0) 00012 00013 #define ALIGN 16 00014 void * 00015 aligned_malloc(size_t n, void **m0) 00016 { 00017 size_t m = (size_t) malloc(n+ALIGN); 00018 *m0 = (void*)m; 00019 size_t r = m % ALIGN; 00020 if(r) m += (ALIGN - r); 00021 return (void *)m; 00022 } 00023 */ 00024 00025 /*cpuColorSpinorField::cpuColorSpinorField() : 00026 ColorSpinorField(), init(false) { 00027 00028 }*/ 00029 00030 00031 int cpuColorSpinorField::initGhostFaceBuffer =0; 00032 void* cpuColorSpinorField::fwdGhostFaceBuffer[QUDA_MAX_DIM]; 00033 void* cpuColorSpinorField::backGhostFaceBuffer[QUDA_MAX_DIM]; 00034 void* cpuColorSpinorField::fwdGhostFaceSendBuffer[QUDA_MAX_DIM]; 00035 void* cpuColorSpinorField::backGhostFaceSendBuffer[QUDA_MAX_DIM]; 00036 00037 cpuColorSpinorField::cpuColorSpinorField(const ColorSpinorParam ¶m) : 00038 ColorSpinorField(param), init(false), order_double(NULL), order_single(NULL) { 00039 create(param.create); 00040 if (param.create == QUDA_NULL_FIELD_CREATE) { 00041 // do nothing 00042 } else if (param.create == QUDA_ZERO_FIELD_CREATE) { 00043 zero(); 00044 } else if (param.create == QUDA_REFERENCE_FIELD_CREATE) { 00045 v = param.v; 00046 } else { 00047 errorQuda("Creation type %d not supported", param.create); 00048 } 00049 00050 if (fieldLocation != QUDA_CPU_FIELD_LOCATION) 00051 errorQuda("Location incorrectly set"); 00052 } 00053 00054 cpuColorSpinorField::cpuColorSpinorField(const cpuColorSpinorField &src) : 00055 ColorSpinorField(src), init(false), order_double(NULL), order_single(NULL) { 00056 create(QUDA_COPY_FIELD_CREATE); 00057 memcpy(v,src.v,bytes); 00058 00059 if (fieldLocation != QUDA_CPU_FIELD_LOCATION) 00060 errorQuda("Location incorrectly set"); 00061 } 00062 00063 cpuColorSpinorField::cpuColorSpinorField(const ColorSpinorField &src) : 00064 ColorSpinorField(src), init(false), order_double(NULL), order_single(NULL) { 00065 create(QUDA_COPY_FIELD_CREATE); 00066 if (src.FieldLocation() == QUDA_CPU_FIELD_LOCATION) { 00067 memcpy(v, dynamic_cast<const cpuColorSpinorField&>(src).v, bytes); 00068 } else if (src.FieldLocation() == QUDA_CUDA_FIELD_LOCATION) { 00069 dynamic_cast<const cudaColorSpinorField&>(src).saveCPUSpinorField(*this); 00070 } else { 00071 errorQuda("FieldType not supported"); 00072 } 00073 00074 if (fieldLocation != QUDA_CPU_FIELD_LOCATION) 00075 errorQuda("Location incorrectly set"); 00076 } 00077 00078 cpuColorSpinorField::~cpuColorSpinorField() { 00079 destroy(); 00080 } 00081 00082 cpuColorSpinorField& cpuColorSpinorField::operator=(const cpuColorSpinorField &src) { 00083 if (&src != this) { 00084 destroy(); 00085 // keep current attributes unless unset 00086 if (!ColorSpinorField::init) ColorSpinorField::operator=(src); 00087 fieldLocation = QUDA_CPU_FIELD_LOCATION; 00088 create(QUDA_COPY_FIELD_CREATE); 00089 copy(src); 00090 } 00091 return *this; 00092 } 00093 00094 cpuColorSpinorField& cpuColorSpinorField::operator=(const cudaColorSpinorField &src) { 00095 destroy(); 00096 // keep current attributes unless unset 00097 if (!ColorSpinorField::init) ColorSpinorField::operator=(src); 00098 fieldLocation = QUDA_CPU_FIELD_LOCATION; 00099 create(QUDA_COPY_FIELD_CREATE); 00100 src.saveCPUSpinorField(*this); 00101 return *this; 00102 } 00103 00104 void cpuColorSpinorField::create(const QudaFieldCreate create) { 00105 if (pad != 0) { 00106 errorQuda("Non-zero pad not supported"); 00107 } 00108 00109 if (precision == QUDA_HALF_PRECISION) { 00110 errorQuda("Half precision not supported"); 00111 } 00112 00113 if (fieldOrder != QUDA_SPACE_COLOR_SPIN_FIELD_ORDER && 00114 fieldOrder != QUDA_SPACE_SPIN_COLOR_FIELD_ORDER && 00115 fieldOrder != QUDA_QOP_DOMAIN_WALL_FIELD_ORDER) { 00116 errorQuda("Field order %d not supported", fieldOrder); 00117 } 00118 00119 if (create != QUDA_REFERENCE_FIELD_CREATE) { 00120 // array of 4-d fields 00121 if (fieldOrder == QUDA_QOP_DOMAIN_WALL_FIELD_ORDER) { 00122 int Ls = x[nDim-1]; 00123 v = (void**)malloc(Ls * sizeof(void*)); 00124 for (int i=0; i<Ls; i++) ((void**)v)[i] = (void*)malloc(bytes / Ls); 00125 } else { 00126 v = (void*)malloc(bytes); 00127 init = true; 00128 } 00129 } 00130 00131 createOrder(); // need to do this for references? 00132 } 00133 00134 void cpuColorSpinorField::createOrder() { 00135 00136 if (precision == QUDA_DOUBLE_PRECISION) { 00137 if (fieldOrder == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) 00138 order_double = new SpaceSpinColorOrder<double>(*this); 00139 else if (fieldOrder == QUDA_SPACE_COLOR_SPIN_FIELD_ORDER) 00140 order_double = new SpaceColorSpinOrder<double>(*this); 00141 else if (fieldOrder == QUDA_QOP_DOMAIN_WALL_FIELD_ORDER) 00142 order_double = new QOPDomainWallOrder<double>(*this); 00143 else 00144 errorQuda("Order %d not supported in cpuColorSpinorField", fieldOrder); 00145 } else if (precision == QUDA_SINGLE_PRECISION) { 00146 if (fieldOrder == QUDA_SPACE_SPIN_COLOR_FIELD_ORDER) 00147 order_single = new SpaceSpinColorOrder<float>(*this); 00148 else if (fieldOrder == QUDA_SPACE_COLOR_SPIN_FIELD_ORDER) 00149 order_single = new SpaceColorSpinOrder<float>(*this); 00150 else if (fieldOrder == QUDA_QOP_DOMAIN_WALL_FIELD_ORDER) 00151 order_single = new QOPDomainWallOrder<float>(*this); 00152 else 00153 errorQuda("Order %d not supported in cpuColorSpinorField", fieldOrder); 00154 } else { 00155 errorQuda("Precision %d not supported", precision); 00156 } 00157 00158 } 00159 00160 void cpuColorSpinorField::destroy() { 00161 00162 if (precision == QUDA_DOUBLE_PRECISION) { 00163 delete order_double; 00164 } else if (precision == QUDA_SINGLE_PRECISION) { 00165 delete order_single; 00166 } else { 00167 errorQuda("Precision %d not supported", precision); 00168 } 00169 00170 if (init) { 00171 if (fieldOrder == QUDA_QOP_DOMAIN_WALL_FIELD_ORDER) 00172 for (int i=0; i<x[nDim-1]; i++) free(((void**)v)[i]); 00173 free(v); 00174 init = false; 00175 } 00176 00177 } 00178 00179 template <class D, class S> 00180 void genericCopy(D &dst, const S &src) { 00181 00182 for (int x=0; x<dst.Volume(); x++) { 00183 for (int s=0; s<dst.Nspin(); s++) { 00184 for (int c=0; c<dst.Ncolor(); c++) { 00185 for (int z=0; z<2; z++) { 00186 dst(x, s, c, z) = src(x, s, c, z); 00187 } 00188 } 00189 } 00190 } 00191 00192 } 00193 00194 void cpuColorSpinorField::copy(const cpuColorSpinorField &src) { 00195 checkField(*this, src); 00196 if (fieldOrder == src.fieldOrder) { 00197 if (fieldOrder == QUDA_QOP_DOMAIN_WALL_FIELD_ORDER) 00198 for (int i=0; i<x[nDim-1]; i++) memcpy(((void**)v)[i], ((void**)src.v)[i], bytes); 00199 else 00200 memcpy(v, src.v, bytes); 00201 } else { 00202 if (precision == QUDA_DOUBLE_PRECISION) { 00203 if (src.precision == QUDA_DOUBLE_PRECISION) { 00204 genericCopy(*order_double, *(src.order_double)); 00205 } else { 00206 genericCopy(*order_double, *(src.order_single)); 00207 } 00208 } else { 00209 if (src.precision == QUDA_DOUBLE_PRECISION) { 00210 genericCopy(*order_single, *(src.order_double)); 00211 } else { 00212 genericCopy(*order_single, *(src.order_single)); 00213 } 00214 } 00215 } 00216 } 00217 00218 void cpuColorSpinorField::zero() { 00219 if (fieldOrder != QUDA_QOP_DOMAIN_WALL_FIELD_ORDER) memset(v, '\0', bytes); 00220 else for (int i=0; i<x[nDim-1]; i++) memset(((void**)v)[i], '\0', bytes/x[nDim-1]); 00221 } 00222 00223 // Random number insertion over all field elements 00224 template <class T> 00225 void random(T &t) { 00226 for (int x=0; x<t.Volume(); x++) { 00227 for (int s=0; s<t.Nspin(); s++) { 00228 for (int c=0; c<t.Ncolor(); c++) { 00229 for (int z=0; z<2; z++) { 00230 t(x,s,c,z) = rand() / (double)RAND_MAX; 00231 } 00232 } 00233 } 00234 } 00235 } 00236 00237 // Create a point source at spacetime point x, spin s and colour c 00238 template <class T> 00239 void point(T &t, const int x, const int s, const int c) { t(x, s, c, 0) = 1.0; } 00240 00241 void cpuColorSpinorField::Source(const QudaSourceType sourceType, const int x, 00242 const int s, const int c) { 00243 00244 switch(sourceType) { 00245 00246 case QUDA_RANDOM_SOURCE: 00247 if (precision == QUDA_DOUBLE_PRECISION) random(*order_double); 00248 else if (precision == QUDA_SINGLE_PRECISION) random(*order_single); 00249 else errorQuda("Precision not supported"); 00250 break; 00251 00252 case QUDA_POINT_SOURCE: 00253 zero(); 00254 if (precision == QUDA_DOUBLE_PRECISION) point(*order_double, x, s, c); 00255 else if (precision == QUDA_SINGLE_PRECISION) point(*order_single, x, s, c); 00256 else errorQuda("Precision not supported"); 00257 break; 00258 00259 default: 00260 errorQuda("Source type %d not implemented", sourceType); 00261 00262 } 00263 00264 } 00265 00266 template <class U, class V> 00267 int compareSpinor(const U &u, const V &v, const int tol) { 00268 int fail_check = 16*tol; 00269 int *fail = new int[fail_check]; 00270 for (int f=0; f<fail_check; f++) fail[f] = 0; 00271 00272 int N = 2*u.Nspin()*u.Ncolor(); 00273 int *iter = new int[N]; 00274 for (int i=0; i<N; i++) iter[i] = 0; 00275 00276 for (int x=0; x<u.Volume(); x++) { 00277 for (int s=0; s<u.Nspin(); s++) { 00278 for (int c=0; c<u.Ncolor(); c++) { 00279 for (int z=0; z<2; z++) { 00280 double diff = fabs(u(x,s,c,z) - v(x,s,c,z)); 00281 00282 for (int f=0; f<fail_check; f++) 00283 if (diff > pow(10.0,-(f+1)/(double)tol)) fail[f]++; 00284 00285 int j = (s*u.Ncolor() + c)*2+z; 00286 if (diff > 1e-3) iter[j]++; 00287 } 00288 } 00289 } 00290 } 00291 00292 for (int i=0; i<N; i++) printfQuda("%d fails = %d\n", i, iter[i]); 00293 00294 int accuracy_level =0; 00295 for (int f=0; f<fail_check; f++) { 00296 if (fail[f] == 0) accuracy_level = f+1; 00297 } 00298 00299 for (int f=0; f<fail_check; f++) { 00300 printfQuda("%e Failures: %d / %d = %e\n", pow(10.0,-(f+1)/(double)tol), 00301 fail[f], u.Volume()*N, fail[f] / (double)(u.Volume()*N)); 00302 } 00303 00304 delete []iter; 00305 delete []fail; 00306 00307 return accuracy_level; 00308 } 00309 00310 int cpuColorSpinorField::Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, 00311 const int tol) { 00312 checkField(a, b); 00313 00314 int ret = 0; 00315 if (a.precision == QUDA_DOUBLE_PRECISION) 00316 if (b.precision == QUDA_DOUBLE_PRECISION) 00317 ret = compareSpinor(*(a.order_double), *(b.order_double), tol); 00318 else 00319 ret = compareSpinor(*(a.order_double), *(b.order_single), tol); 00320 else 00321 if (b.precision == QUDA_DOUBLE_PRECISION) 00322 ret = compareSpinor(*(a.order_single), *(b.order_double), tol); 00323 else 00324 ret =compareSpinor(*(a.order_single), *(b.order_single), tol); 00325 00326 return ret; 00327 } 00328 00329 template <class Order> 00330 void print_vector(const Order &o, unsigned int x) { 00331 00332 for (int s=0; s<o.Nspin(); s++) { 00333 for (int c=0; c<o.Ncolor(); c++) { 00334 for (int z=0; z<2; z++) { 00335 std::cout << o(x, s, c, z) << std::endl; 00336 } 00337 } 00338 std::cout << std::endl; 00339 } 00340 00341 } 00342 00343 // print out the vector at volume point x 00344 void cpuColorSpinorField::PrintVector(unsigned int x) { 00345 00346 switch(precision) { 00347 case QUDA_DOUBLE_PRECISION: 00348 print_vector(*order_double, x); 00349 break; 00350 case QUDA_SINGLE_PRECISION: 00351 print_vector(*order_single, x); 00352 break; 00353 default: 00354 errorQuda("Precision %d not implemented", precision); 00355 } 00356 00357 } 00358 00359 void cpuColorSpinorField::allocateGhostBuffer(void) 00360 { 00361 if (initGhostFaceBuffer) return; 00362 00363 if (this->siteSubset == QUDA_FULL_SITE_SUBSET){ 00364 errorQuda("Full spinor is not supported in alllocateGhostBuffer\n"); 00365 } 00366 00367 int X1 = this->x[0]*2; 00368 int X2 = this->x[1]; 00369 int X3 = this->x[2]; 00370 int X4 = this->x[3]; 00371 int Vsh[4]={ X2*X3*X4/2, 00372 X1*X3*X4/2, 00373 X1*X2*X4/2, 00374 X1*X2*X3/2}; 00375 00376 int num_faces = 1; 00377 if(this->nSpin == 1) num_faces = 3; // staggered 00378 00379 int spinor_size = 2*this->nSpin*this->nColor*this->precision; 00380 for(int i=0;i < 4; i++){ 00381 fwdGhostFaceBuffer[i] = malloc(num_faces*Vsh[i]*spinor_size); 00382 backGhostFaceBuffer[i] = malloc(num_faces*Vsh[i]*spinor_size); 00383 00384 fwdGhostFaceSendBuffer[i] = malloc(num_faces*Vsh[i]*spinor_size); 00385 backGhostFaceSendBuffer[i] = malloc(num_faces*Vsh[i]*spinor_size); 00386 00387 if(fwdGhostFaceBuffer[i]== NULL || backGhostFaceBuffer[i] == NULL|| 00388 fwdGhostFaceSendBuffer[i]== NULL || backGhostFaceSendBuffer[i]==NULL){ 00389 errorQuda("malloc for ghost buf in cpu spinor failed\n"); 00390 } 00391 } 00392 00393 initGhostFaceBuffer = 1; 00394 return; 00395 } 00396 00397 void cpuColorSpinorField::freeGhostBuffer(void) 00398 { 00399 if(!initGhostFaceBuffer) return; 00400 00401 for(int i=0;i < 4; i++){ 00402 free(fwdGhostFaceBuffer[i]); fwdGhostFaceBuffer[i] = NULL; 00403 free(backGhostFaceBuffer[i]); backGhostFaceBuffer[i] = NULL; 00404 free(fwdGhostFaceSendBuffer[i]); fwdGhostFaceSendBuffer[i] = NULL; 00405 free(backGhostFaceSendBuffer[i]); backGhostFaceSendBuffer[i] = NULL; 00406 } 00407 00408 initGhostFaceBuffer = 0; 00409 00410 return; 00411 } 00412 00413 00414 00415 void cpuColorSpinorField::packGhost(void* ghost_spinor, const int dim, 00416 const QudaDirection dir, const QudaParity oddBit, const int dagger) 00417 { 00418 if (this->siteSubset == QUDA_FULL_SITE_SUBSET){ 00419 errorQuda("Full spinor is not supported in packGhost for cpu"); 00420 } 00421 00422 if (fieldOrder == QUDA_QOP_DOMAIN_WALL_FIELD_ORDER) { 00423 errorQuda("Field order %d not supported", fieldOrder); 00424 } 00425 00426 int num_faces=1; 00427 if(this->nSpin == 1){ //staggered 00428 num_faces=3; 00429 } 00430 int spinor_size = 2*this->nSpin*this->nColor*this->precision; 00431 00432 int X1 = this->x[0]*2; 00433 int X2 = this->x[1]; 00434 int X3 = this->x[2]; 00435 int X4 = this->x[3]; 00436 00437 for(int i=0;i < this->volume;i++){ 00438 /* 00439 int boundaryCrossings = i/(X1/2) + i/(X1*X2/2) + i/(X1*X2*X3/2); 00440 int Y = 2*i + (boundaryCrossings + oddBit) % 2; 00441 */ 00442 int X1h = X1/2; 00443 00444 int sid =i; 00445 int za = sid/X1h; 00446 int x1h = sid - za*X1h; 00447 int zb = za/X2; 00448 int x2 = za - zb*X2; 00449 int x4 = zb/X3; 00450 int x3 = zb - x4*X3; 00451 int x1odd = (x2 + x3 + x4 + oddBit) & 1; 00452 int x1 = 2*x1h + x1odd; 00453 00454 int ghost_face_idx ; 00455 00456 switch(dim){ 00457 case 0: //X dimension 00458 if (dir == QUDA_BACKWARDS){ 00459 if (x1 < num_faces){ 00460 ghost_face_idx = (x1*X4*X3*X2 + x4*(X3*X2)+x3*X2 +x2)>>1; 00461 memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size); 00462 } 00463 }else{ // QUDA_FORWARDS 00464 if (x1 >=X1 - num_faces){ 00465 ghost_face_idx = ((x1-X1+num_faces)*X4*X3*X2 + x4*(X3*X2)+x3*X2 +x2)>>1; 00466 memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size); 00467 } 00468 } 00469 break; 00470 00471 case 1: //Y dimension 00472 if (dir == QUDA_BACKWARDS){ 00473 if (x2 < num_faces){ 00474 ghost_face_idx = (x2*X4*X3*X1 + x4*X3*X1+x3*X1+x1)>>1; 00475 memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size); 00476 } 00477 }else{ // QUDA_FORWARDS 00478 if (x2 >= X2 - num_faces){ 00479 ghost_face_idx = ((x2-X2+num_faces)*X4*X3*X1+ x4*X3*X1+x3*X1+x1)>>1; 00480 memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size); 00481 } 00482 } 00483 break; 00484 00485 case 2: //Z dimension 00486 if (dir == QUDA_BACKWARDS){ 00487 if (x3 < num_faces){ 00488 ghost_face_idx = (x3*X4*X2*X1 + x4*X2*X1+x2*X1+x1)>>1; 00489 memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size); 00490 } 00491 }else{ // QUDA_FORWARDS 00492 if (x3 >= X3 - num_faces){ 00493 ghost_face_idx = ((x3-X3+num_faces)*X4*X2*X1 + x4*X2*X1 + x2*X1 + x1)>>1; 00494 memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size); 00495 } 00496 } 00497 break; 00498 00499 case 3: //T dimension 00500 if (dir == QUDA_BACKWARDS){ 00501 if (x4 < num_faces){ 00502 ghost_face_idx = (x4*X3*X2*X1 + x3*X2*X1+x2*X1+x1)>>1; 00503 memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size); 00504 } 00505 }else{ // QUDA_FORWARDS 00506 if (x4 >= X4 - num_faces){ 00507 ghost_face_idx = ((x4-X4+num_faces)*X3*X2*X1 + x3*X2*X1+x2*X1+x1)>>1; 00508 memcpy( ((char*)ghost_spinor) + ghost_face_idx*spinor_size, ((char*)v)+i*spinor_size, spinor_size); 00509 } 00510 } 00511 break; 00512 default: 00513 errorQuda("Invalid dim value\n"); 00514 }//switch 00515 }//for i 00516 00517 return; 00518 } 00519 00520 00521 void cpuColorSpinorField::unpackGhost(void* ghost_spinor, const int dim, 00522 const QudaDirection dir, const int dagger) 00523 { 00524 if (this->siteSubset == QUDA_FULL_SITE_SUBSET){ 00525 errorQuda("Full spinor is not supported in unpackGhost for cpu"); 00526 } 00527 }