QUDA v0.3.2
A library for QCD on GPUs

quda/lib/cuda_color_spinor_field.cpp

Go to the documentation of this file.
00001 #include <stdlib.h>
00002 #include <stdio.h>
00003 #include <color_spinor_field.h>
00004 #include <blas_quda.h>
00005 
00006 #include <string.h>
00007 #include <iostream>
00008 
00009 void* cudaColorSpinorField::buffer = 0;
00010 bool cudaColorSpinorField::bufferInit = false;
00011 size_t cudaColorSpinorField::bufferBytes = 0;
00012 
00013 /*cudaColorSpinorField::cudaColorSpinorField() : 
00014   ColorSpinorField(), v(0), norm(0), alloc(false), init(false) {
00015 
00016   }*/
00017 
00018 cudaColorSpinorField::cudaColorSpinorField(const ColorSpinorParam &param) : 
00019   ColorSpinorField(param), v(0), norm(0), alloc(false), init(true) {
00020   create(param.create);
00021   if  (param.create == QUDA_NULL_FIELD_CREATE) {
00022     // do nothing
00023   } else if (param.create == QUDA_ZERO_FIELD_CREATE) {
00024     zero();
00025   } else if (param.create == QUDA_REFERENCE_FIELD_CREATE) {
00026     v = param.v;
00027     norm = param.norm;
00028   } else if (param.create == QUDA_COPY_FIELD_CREATE){
00029     errorQuda("not implemented");
00030   }
00031 }
00032 
00033 cudaColorSpinorField::cudaColorSpinorField(const cudaColorSpinorField &src) : 
00034   ColorSpinorField(src), v(0), norm(0), alloc(false), init(true) {
00035   create(QUDA_COPY_FIELD_CREATE);
00036   copy(src);
00037 }
00038 
00039 // creates a copy of src, any differences defined in param
00040 cudaColorSpinorField::cudaColorSpinorField(const ColorSpinorField &src, const ColorSpinorParam &param) :
00041   ColorSpinorField(src), v(0), norm(0), alloc(false), init(true) {  
00042 // can only overide if we are not using a reference or parity special case
00043   if (param.create != QUDA_REFERENCE_FIELD_CREATE || 
00044       (param.create == QUDA_REFERENCE_FIELD_CREATE && 
00045        src.SiteSubset() == QUDA_FULL_SITE_SUBSET && 
00046        param.siteSubset == QUDA_PARITY_SITE_SUBSET && 
00047        src.FieldLocation() == QUDA_CUDA_FIELD_LOCATION) ) {
00048     reset(param);
00049   } else {
00050     errorQuda("Undefined behaviour"); // else silent bug possible?
00051   }
00052 
00053   fieldLocation = QUDA_CUDA_FIELD_LOCATION;
00054   create(param.create);
00055 
00056   if (param.create == QUDA_NULL_FIELD_CREATE) {
00057     // do nothing
00058   } else if (param.create == QUDA_ZERO_FIELD_CREATE) {
00059     zero();
00060   } else if (param.create == QUDA_COPY_FIELD_CREATE) {
00061     if (src.FieldLocation() == QUDA_CUDA_FIELD_LOCATION) {
00062       copy(dynamic_cast<const cudaColorSpinorField&>(src));    
00063     } else if (src.FieldLocation() == QUDA_CPU_FIELD_LOCATION) {
00064       loadCPUSpinorField(dynamic_cast<const cpuColorSpinorField&>(src));
00065     } else {
00066       errorQuda("FieldLocation %d not supported", src.FieldLocation());
00067     }
00068   } else if (param.create == QUDA_REFERENCE_FIELD_CREATE) {
00069     if (src.FieldLocation() == QUDA_CUDA_FIELD_LOCATION) {
00070       v = (dynamic_cast<const cudaColorSpinorField&>(src)).v;
00071       norm = (dynamic_cast<const cudaColorSpinorField&>(src)).norm;
00072     } else {
00073       errorQuda("Cannot reference a non cuda field");
00074     }
00075   } else {
00076     errorQuda("CreateType %d not implemented", param.create);
00077   }
00078 
00079 }
00080 
00081 cudaColorSpinorField::cudaColorSpinorField(const ColorSpinorField &src) 
00082   : ColorSpinorField(src), alloc(false), init(true) {
00083   fieldLocation = QUDA_CUDA_FIELD_LOCATION;
00084   create(QUDA_COPY_FIELD_CREATE);
00085   if (src.FieldLocation() == QUDA_CUDA_FIELD_LOCATION) {
00086     copy(dynamic_cast<const cudaColorSpinorField&>(src));
00087   } else if (src.FieldLocation() == QUDA_CPU_FIELD_LOCATION) {
00088     loadCPUSpinorField(src);
00089   } else {
00090     errorQuda("FieldLocation not supported");
00091   }
00092 }
00093 
00094 cudaColorSpinorField& cudaColorSpinorField::operator=(const cudaColorSpinorField &src) {
00095   if (&src != this) {
00096     destroy();
00097     // keep current attributes unless unset
00098     if (!ColorSpinorField::init) ColorSpinorField::operator=(src);
00099     fieldLocation = QUDA_CUDA_FIELD_LOCATION;
00100     create(QUDA_COPY_FIELD_CREATE);
00101     copy(src);
00102   }
00103   return *this;
00104 }
00105 
00106 cudaColorSpinorField& cudaColorSpinorField::operator=(const cpuColorSpinorField &src) {
00107   destroy();
00108   // keep current attributes unless unset
00109   if (!ColorSpinorField::init) ColorSpinorField::operator=(src);
00110   fieldLocation = QUDA_CUDA_FIELD_LOCATION;
00111   create(QUDA_COPY_FIELD_CREATE);
00112   loadCPUSpinorField(src);
00113   return *this;
00114 }
00115 
00116 cudaColorSpinorField::~cudaColorSpinorField() {
00117   destroy();
00118 }
00119 
00120 
00121 void cudaColorSpinorField::create(const QudaFieldCreate create) {
00122 
00123   if (siteSubset == QUDA_FULL_SITE_SUBSET && siteOrder != QUDA_EVEN_ODD_SITE_ORDER) {
00124     errorQuda("Subset not implemented");
00125   }
00126 
00127   if (create != QUDA_REFERENCE_FIELD_CREATE) {
00128     if (cudaMalloc((void**)&v, bytes) == cudaErrorMemoryAllocation) {
00129       errorQuda("Error allocating spinor: bytes=%d", (int)bytes);
00130     }
00131     
00132     if (precision == QUDA_HALF_PRECISION) {
00133       if (cudaMalloc((void**)&norm, bytes/(nColor*nSpin)) == cudaErrorMemoryAllocation) {
00134         errorQuda("Error allocating norm");
00135       }
00136     }
00137     alloc = true;
00138   }
00139 
00140   // Check if buffer isn't big enough
00141   if (bytes > bufferBytes && bufferInit) {
00142     cudaFreeHost(buffer);
00143     bufferInit = false;
00144   }
00145 
00146   if (!bufferInit) {
00147     bufferBytes = bytes;
00148     cudaMallocHost(&buffer, bufferBytes);    
00149     bufferInit = true;
00150   }
00151 
00152   if (siteSubset == QUDA_FULL_SITE_SUBSET) {
00153     // create the associated even and odd subsets
00154     ColorSpinorParam param;
00155     param.siteSubset = QUDA_PARITY_SITE_SUBSET;
00156     param.nDim = nDim;
00157     memcpy(param.x, x, nDim*sizeof(int));
00158     param.x[0] /= 2; // set single parity dimensions
00159     param.create = QUDA_REFERENCE_FIELD_CREATE;
00160     param.v = v;
00161     param.norm = norm;
00162     even = new cudaColorSpinorField(*this, param);
00163     odd = new cudaColorSpinorField(*this, param);
00164     // need this hackery for the moment (need to locate the odd pointer half way into the full field)
00165     (dynamic_cast<cudaColorSpinorField*>(odd))->v = (void*)((unsigned long)v + bytes/2);
00166     if (precision == QUDA_HALF_PRECISION) 
00167       (dynamic_cast<cudaColorSpinorField*>(odd))->norm = (void*)((unsigned long)norm + bytes/(2*nColor*nSpin));
00168   }
00169   
00170 }
00171 void cudaColorSpinorField::freeBuffer() {
00172   if (bufferInit) cudaFreeHost(buffer);
00173 }
00174 
00175 void cudaColorSpinorField::destroy() {
00176   if (alloc) {
00177     cudaFree(v);
00178     if (precision == QUDA_HALF_PRECISION) cudaFree(norm);
00179     if (siteSubset == QUDA_FULL_SITE_SUBSET) {
00180       delete even;
00181       delete odd;
00182     }
00183     alloc = false;
00184   }
00185 }
00186 
00187 
00188 cudaColorSpinorField& cudaColorSpinorField::Even() const { 
00189   if (siteSubset == QUDA_FULL_SITE_SUBSET) {
00190     return *(dynamic_cast<cudaColorSpinorField*>(even)); 
00191   } else {
00192     errorQuda("Cannot return even subset of %d subset", siteSubset);
00193   }
00194 }
00195 
00196 cudaColorSpinorField& cudaColorSpinorField::Odd() const {
00197   if (siteSubset == QUDA_FULL_SITE_SUBSET) {
00198     return *(dynamic_cast<cudaColorSpinorField*>(odd)); 
00199   } else {
00200     errorQuda("Cannot return odd subset of %d subset", siteSubset);
00201   }
00202 }
00203 
00204 // cuda's floating point format, IEEE-754, represents the floating point
00205 // zero as 4 zero bytes
00206 void cudaColorSpinorField::zero() {
00207   cudaMemset(v, 0, bytes);
00208   if (precision == QUDA_HALF_PRECISION) cudaMemset(norm, 0, bytes/(nColor*nSpin));
00209 }
00210 
00211 void cudaColorSpinorField::copy(const cudaColorSpinorField &src) {
00212   checkField(*this, src);
00213   copyCuda(*this, src);
00214 }
00215 
00216 #include <pack_spinor.h>
00217 
00218 void cudaColorSpinorField::loadCPUSpinorField(const cpuColorSpinorField &src) {
00219 
00220   
00221   if (nDim != src.Ndim()) {
00222     errorQuda("Number of dimensions %d %d don't match", nDim, src.Ndim());
00223   }
00224 
00225   if (volume != src.volume) {
00226     errorQuda("Volumes %d %d don't match", volume, src.volume);
00227   }
00228 
00229   if (SiteOrder() != src.SiteOrder()) {
00230     errorQuda("Subset orders don't match");
00231   }
00232 
00233   if (nColor != 3) {
00234     errorQuda("Nc != 3 not yet supported");
00235   }
00236 
00237   if (siteSubset != src.siteSubset) {
00238     errorQuda("Subset types do not match %d %d", siteSubset, src.siteSubset);
00239   }
00240   if (precision == QUDA_HALF_PRECISION) {
00241     ColorSpinorParam param;
00242     fill(param); // acquire all attributes of this
00243     param.precision = QUDA_SINGLE_PRECISION; // change precision
00244     param.create = QUDA_COPY_FIELD_CREATE;
00245     cudaColorSpinorField tmp(src, param);
00246     copy(tmp);
00247     return;
00248   }
00249 
00250   // (temporary?) bug fix for padding
00251   memset(buffer, 0, bufferBytes);
00252   
00253 #define LOAD_SPINOR_CPU_TO_GPU(myNs)                                    \
00254   if (precision == QUDA_DOUBLE_PRECISION) {                             \
00255       if (src.precision == QUDA_DOUBLE_PRECISION) {                     \
00256           if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) {                           \
00257               packSpinor<3,myNs,1>((double*)buffer, (double*)src.v, volume, pad, x, length, src.length, \
00258                                 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \
00259           } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) {                   \
00260               packSpinor<3,myNs,2>((double*)buffer, (double*)src.v, volume, pad, x, length, src.length, \
00261                                 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \
00262           } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) {                   \
00263               errorQuda("double4 not supported");                       \
00264           }                                                             \
00265       } else {                                                          \
00266           if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) {                           \
00267               packSpinor<3,myNs,1>((double*)buffer, (float*)src.v, volume, pad, x, length, src.length, \
00268                                 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \
00269           } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) {                   \
00270               packSpinor<3,myNs,2>((double*)buffer, (float*)src.v, volume, pad, x, length, src.length, \
00271                                 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \
00272           } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) {                   \
00273               errorQuda("double4 not supported");                       \
00274           }                                                             \
00275       }                                                                 \
00276   } else {                                                              \
00277       if (src.precision == QUDA_DOUBLE_PRECISION) {                     \
00278           if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) {                           \
00279               packSpinor<3,myNs,1>((float*)buffer, (double*)src.v, volume, pad, x, length, src.length, \
00280                                 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \
00281           } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) {                   \
00282               packSpinor<3,myNs,2>((float*)buffer, (double*)src.v, volume, pad, x, length, src.length, \
00283                                 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \
00284           } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) {                   \
00285               packSpinor<3,myNs,4>((float*)buffer, (double*)src.v, volume, pad, x, length, src.length, \
00286                                 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \
00287           }                                                             \
00288       } else {                                                          \
00289           if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) {                           \
00290               packSpinor<3,myNs,1>((float*)buffer, (float*)src.v, volume, pad, x, length, src.length, \
00291                                 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \
00292           } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) {                   \
00293               packSpinor<3,myNs,2>((float*)buffer, (float*)src.v, volume, pad, x, length, src.length, \
00294                                 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \
00295           } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) {                   \
00296               packSpinor<3,myNs,4>((float*)buffer, (float*)src.v, volume, pad, x, length, src.length, \
00297                                 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \
00298           }                                                             \
00299       }                                                                 \
00300 }
00301 
00302   switch(nSpin){
00303   case 1:
00304       LOAD_SPINOR_CPU_TO_GPU(1);
00305       break;
00306   case 4:
00307       LOAD_SPINOR_CPU_TO_GPU(4);
00308       break;
00309   default:
00310       errorQuda("invalid number of spinors in function %s\n", __FUNCTION__);
00311 
00312   }
00313   
00314 #undef LOAD_SPINOR_CPU_TO_GPU
00315 
00316   /*  for (int i=0; i<length; i++) {
00317     std::cout << i << " " << ((float*)src.v)[i] << " " << ((float*)buffer)[i] << std::endl;
00318     }*/
00319 
00320   cudaMemcpy(v, buffer, bytes, cudaMemcpyHostToDevice);
00321   return;
00322 }
00323 
00324 
00325 void cudaColorSpinorField::saveCPUSpinorField(cpuColorSpinorField &dest) const {
00326 
00327   if (nDim != dest.Ndim()) {
00328     errorQuda("Number of dimensions %d %d don't match", nDim, dest.Ndim());
00329   }
00330 
00331   if (volume != dest.volume) {
00332     errorQuda("Volumes %d %d don't match", volume, dest.volume);
00333   }
00334 
00335   if (SiteOrder() != dest.SiteOrder()) {
00336     errorQuda("Subset orders don't match");
00337   }
00338 
00339   if (nColor != 3) {
00340     errorQuda("Nc != 3 not yet supported");
00341   }
00342 
00343 
00344   if (siteSubset != dest.siteSubset) {
00345     errorQuda("Subset types do not match %d %d", siteSubset, dest.siteSubset);
00346   }
00347 
00348   if (precision == QUDA_HALF_PRECISION) {
00349     ColorSpinorParam param;
00350     param.create = QUDA_COPY_FIELD_CREATE;
00351     param.precision = QUDA_SINGLE_PRECISION;
00352     cudaColorSpinorField tmp(*this, param);
00353     tmp.saveCPUSpinorField(dest);
00354     return;
00355   }
00356 
00357   // (temporary?) bug fix for padding
00358   memset(buffer, 0, bufferBytes);
00359 
00360   cudaMemcpy(buffer, v, bytes, cudaMemcpyDeviceToHost);
00361 
00362 
00363 #define SAVE_SPINOR_GPU_TO_CPU(myNs)                            \
00364   if (precision == QUDA_DOUBLE_PRECISION) {                             \
00365       if (dest.precision == QUDA_DOUBLE_PRECISION) {                    \
00366           if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) {                           \
00367               unpackSpinor<3,myNs,1>((double*)dest.v, (double*)buffer, volume, pad, x, dest.length, length, \
00368                                   dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder());       \
00369           } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) {                   \
00370               unpackSpinor<3,myNs,2>((double*)dest.v, (double*)buffer, volume, pad, x, dest.length, length, \
00371                                   dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder());       \
00372           } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) {                   \
00373               errorQuda("double4 not supported");                       \
00374           }                                                             \
00375       } else {                                                          \
00376           if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) {                           \
00377               unpackSpinor<3,myNs,1>((float*)dest.v, (double*)buffer, volume, pad, x, dest.length, length, \
00378                                   dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \
00379           } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) {                   \
00380               unpackSpinor<3,myNs,2>((float*)dest.v, (double*)buffer, volume, pad, x, dest.length, length, \
00381                                   dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \
00382           } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) {                   \
00383               errorQuda("double4 not supported");                       \
00384           }                                                             \
00385       }                                                                 \
00386   } else {                                                              \
00387       if (dest.precision == QUDA_DOUBLE_PRECISION) {                    \
00388           if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) {                           \
00389               unpackSpinor<3,myNs,1>((double*)dest.v, (float*)buffer, volume, pad, x, dest.length, length, \
00390                                   dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder());       \
00391           } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) {                   \
00392               unpackSpinor<3,myNs,2>((double*)dest.v, (float*)buffer, volume, pad, x, dest.length, length,      \
00393                                   dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \
00394           } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) {                   \
00395               unpackSpinor<3,myNs,4>((double*)dest.v, (float*)buffer, volume, pad, x, dest.length, length, \
00396                                   dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \
00397           }                                                             \
00398       } else {                                                          \
00399           if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) {                           \
00400               unpackSpinor<3,myNs,1>((float*)dest.v, (float*)buffer, volume, pad, x, dest.length, length, \
00401                                   dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \
00402           } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) {                   \
00403               unpackSpinor<3,myNs,2>((float*)dest.v, (float*)buffer, volume, pad, x, dest.length, length, \
00404                                   dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder());       \
00405           } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) {                   \
00406               unpackSpinor<3,myNs,4>((float*)dest.v, (float*)buffer, volume, pad, x, dest.length, length, \
00407                                   dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder());       \
00408           }                                                             \
00409       }                                                                 \
00410   }
00411 
00412   switch(nSpin){
00413   case 1:
00414       SAVE_SPINOR_GPU_TO_CPU(1);
00415       break;
00416   case 4:
00417       SAVE_SPINOR_GPU_TO_CPU(4);
00418       break;
00419   default:
00420       errorQuda("invalid number of spinors in function %s\n", __FUNCTION__);      
00421   }
00422 #undef SAVE_SPINOR_GPU_TO_CPU
00423   return;
00424 }
00425 
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Defines