|
QUDA v0.3.2
A library for QCD on GPUs
|
00001 #include <stdlib.h> 00002 #include <stdio.h> 00003 #include <color_spinor_field.h> 00004 #include <blas_quda.h> 00005 00006 #include <string.h> 00007 #include <iostream> 00008 00009 void* cudaColorSpinorField::buffer = 0; 00010 bool cudaColorSpinorField::bufferInit = false; 00011 size_t cudaColorSpinorField::bufferBytes = 0; 00012 00013 /*cudaColorSpinorField::cudaColorSpinorField() : 00014 ColorSpinorField(), v(0), norm(0), alloc(false), init(false) { 00015 00016 }*/ 00017 00018 cudaColorSpinorField::cudaColorSpinorField(const ColorSpinorParam ¶m) : 00019 ColorSpinorField(param), v(0), norm(0), alloc(false), init(true) { 00020 create(param.create); 00021 if (param.create == QUDA_NULL_FIELD_CREATE) { 00022 // do nothing 00023 } else if (param.create == QUDA_ZERO_FIELD_CREATE) { 00024 zero(); 00025 } else if (param.create == QUDA_REFERENCE_FIELD_CREATE) { 00026 v = param.v; 00027 norm = param.norm; 00028 } else if (param.create == QUDA_COPY_FIELD_CREATE){ 00029 errorQuda("not implemented"); 00030 } 00031 } 00032 00033 cudaColorSpinorField::cudaColorSpinorField(const cudaColorSpinorField &src) : 00034 ColorSpinorField(src), v(0), norm(0), alloc(false), init(true) { 00035 create(QUDA_COPY_FIELD_CREATE); 00036 copy(src); 00037 } 00038 00039 // creates a copy of src, any differences defined in param 00040 cudaColorSpinorField::cudaColorSpinorField(const ColorSpinorField &src, const ColorSpinorParam ¶m) : 00041 ColorSpinorField(src), v(0), norm(0), alloc(false), init(true) { 00042 // can only overide if we are not using a reference or parity special case 00043 if (param.create != QUDA_REFERENCE_FIELD_CREATE || 00044 (param.create == QUDA_REFERENCE_FIELD_CREATE && 00045 src.SiteSubset() == QUDA_FULL_SITE_SUBSET && 00046 param.siteSubset == QUDA_PARITY_SITE_SUBSET && 00047 src.FieldLocation() == QUDA_CUDA_FIELD_LOCATION) ) { 00048 reset(param); 00049 } else { 00050 errorQuda("Undefined behaviour"); // else silent bug possible? 00051 } 00052 00053 fieldLocation = QUDA_CUDA_FIELD_LOCATION; 00054 create(param.create); 00055 00056 if (param.create == QUDA_NULL_FIELD_CREATE) { 00057 // do nothing 00058 } else if (param.create == QUDA_ZERO_FIELD_CREATE) { 00059 zero(); 00060 } else if (param.create == QUDA_COPY_FIELD_CREATE) { 00061 if (src.FieldLocation() == QUDA_CUDA_FIELD_LOCATION) { 00062 copy(dynamic_cast<const cudaColorSpinorField&>(src)); 00063 } else if (src.FieldLocation() == QUDA_CPU_FIELD_LOCATION) { 00064 loadCPUSpinorField(dynamic_cast<const cpuColorSpinorField&>(src)); 00065 } else { 00066 errorQuda("FieldLocation %d not supported", src.FieldLocation()); 00067 } 00068 } else if (param.create == QUDA_REFERENCE_FIELD_CREATE) { 00069 if (src.FieldLocation() == QUDA_CUDA_FIELD_LOCATION) { 00070 v = (dynamic_cast<const cudaColorSpinorField&>(src)).v; 00071 norm = (dynamic_cast<const cudaColorSpinorField&>(src)).norm; 00072 } else { 00073 errorQuda("Cannot reference a non cuda field"); 00074 } 00075 } else { 00076 errorQuda("CreateType %d not implemented", param.create); 00077 } 00078 00079 } 00080 00081 cudaColorSpinorField::cudaColorSpinorField(const ColorSpinorField &src) 00082 : ColorSpinorField(src), alloc(false), init(true) { 00083 fieldLocation = QUDA_CUDA_FIELD_LOCATION; 00084 create(QUDA_COPY_FIELD_CREATE); 00085 if (src.FieldLocation() == QUDA_CUDA_FIELD_LOCATION) { 00086 copy(dynamic_cast<const cudaColorSpinorField&>(src)); 00087 } else if (src.FieldLocation() == QUDA_CPU_FIELD_LOCATION) { 00088 loadCPUSpinorField(src); 00089 } else { 00090 errorQuda("FieldLocation not supported"); 00091 } 00092 } 00093 00094 cudaColorSpinorField& cudaColorSpinorField::operator=(const cudaColorSpinorField &src) { 00095 if (&src != this) { 00096 destroy(); 00097 // keep current attributes unless unset 00098 if (!ColorSpinorField::init) ColorSpinorField::operator=(src); 00099 fieldLocation = QUDA_CUDA_FIELD_LOCATION; 00100 create(QUDA_COPY_FIELD_CREATE); 00101 copy(src); 00102 } 00103 return *this; 00104 } 00105 00106 cudaColorSpinorField& cudaColorSpinorField::operator=(const cpuColorSpinorField &src) { 00107 destroy(); 00108 // keep current attributes unless unset 00109 if (!ColorSpinorField::init) ColorSpinorField::operator=(src); 00110 fieldLocation = QUDA_CUDA_FIELD_LOCATION; 00111 create(QUDA_COPY_FIELD_CREATE); 00112 loadCPUSpinorField(src); 00113 return *this; 00114 } 00115 00116 cudaColorSpinorField::~cudaColorSpinorField() { 00117 destroy(); 00118 } 00119 00120 00121 void cudaColorSpinorField::create(const QudaFieldCreate create) { 00122 00123 if (siteSubset == QUDA_FULL_SITE_SUBSET && siteOrder != QUDA_EVEN_ODD_SITE_ORDER) { 00124 errorQuda("Subset not implemented"); 00125 } 00126 00127 if (create != QUDA_REFERENCE_FIELD_CREATE) { 00128 if (cudaMalloc((void**)&v, bytes) == cudaErrorMemoryAllocation) { 00129 errorQuda("Error allocating spinor: bytes=%d", (int)bytes); 00130 } 00131 00132 if (precision == QUDA_HALF_PRECISION) { 00133 if (cudaMalloc((void**)&norm, bytes/(nColor*nSpin)) == cudaErrorMemoryAllocation) { 00134 errorQuda("Error allocating norm"); 00135 } 00136 } 00137 alloc = true; 00138 } 00139 00140 // Check if buffer isn't big enough 00141 if (bytes > bufferBytes && bufferInit) { 00142 cudaFreeHost(buffer); 00143 bufferInit = false; 00144 } 00145 00146 if (!bufferInit) { 00147 bufferBytes = bytes; 00148 cudaMallocHost(&buffer, bufferBytes); 00149 bufferInit = true; 00150 } 00151 00152 if (siteSubset == QUDA_FULL_SITE_SUBSET) { 00153 // create the associated even and odd subsets 00154 ColorSpinorParam param; 00155 param.siteSubset = QUDA_PARITY_SITE_SUBSET; 00156 param.nDim = nDim; 00157 memcpy(param.x, x, nDim*sizeof(int)); 00158 param.x[0] /= 2; // set single parity dimensions 00159 param.create = QUDA_REFERENCE_FIELD_CREATE; 00160 param.v = v; 00161 param.norm = norm; 00162 even = new cudaColorSpinorField(*this, param); 00163 odd = new cudaColorSpinorField(*this, param); 00164 // need this hackery for the moment (need to locate the odd pointer half way into the full field) 00165 (dynamic_cast<cudaColorSpinorField*>(odd))->v = (void*)((unsigned long)v + bytes/2); 00166 if (precision == QUDA_HALF_PRECISION) 00167 (dynamic_cast<cudaColorSpinorField*>(odd))->norm = (void*)((unsigned long)norm + bytes/(2*nColor*nSpin)); 00168 } 00169 00170 } 00171 void cudaColorSpinorField::freeBuffer() { 00172 if (bufferInit) cudaFreeHost(buffer); 00173 } 00174 00175 void cudaColorSpinorField::destroy() { 00176 if (alloc) { 00177 cudaFree(v); 00178 if (precision == QUDA_HALF_PRECISION) cudaFree(norm); 00179 if (siteSubset == QUDA_FULL_SITE_SUBSET) { 00180 delete even; 00181 delete odd; 00182 } 00183 alloc = false; 00184 } 00185 } 00186 00187 00188 cudaColorSpinorField& cudaColorSpinorField::Even() const { 00189 if (siteSubset == QUDA_FULL_SITE_SUBSET) { 00190 return *(dynamic_cast<cudaColorSpinorField*>(even)); 00191 } else { 00192 errorQuda("Cannot return even subset of %d subset", siteSubset); 00193 } 00194 } 00195 00196 cudaColorSpinorField& cudaColorSpinorField::Odd() const { 00197 if (siteSubset == QUDA_FULL_SITE_SUBSET) { 00198 return *(dynamic_cast<cudaColorSpinorField*>(odd)); 00199 } else { 00200 errorQuda("Cannot return odd subset of %d subset", siteSubset); 00201 } 00202 } 00203 00204 // cuda's floating point format, IEEE-754, represents the floating point 00205 // zero as 4 zero bytes 00206 void cudaColorSpinorField::zero() { 00207 cudaMemset(v, 0, bytes); 00208 if (precision == QUDA_HALF_PRECISION) cudaMemset(norm, 0, bytes/(nColor*nSpin)); 00209 } 00210 00211 void cudaColorSpinorField::copy(const cudaColorSpinorField &src) { 00212 checkField(*this, src); 00213 copyCuda(*this, src); 00214 } 00215 00216 #include <pack_spinor.h> 00217 00218 void cudaColorSpinorField::loadCPUSpinorField(const cpuColorSpinorField &src) { 00219 00220 00221 if (nDim != src.Ndim()) { 00222 errorQuda("Number of dimensions %d %d don't match", nDim, src.Ndim()); 00223 } 00224 00225 if (volume != src.volume) { 00226 errorQuda("Volumes %d %d don't match", volume, src.volume); 00227 } 00228 00229 if (SiteOrder() != src.SiteOrder()) { 00230 errorQuda("Subset orders don't match"); 00231 } 00232 00233 if (nColor != 3) { 00234 errorQuda("Nc != 3 not yet supported"); 00235 } 00236 00237 if (siteSubset != src.siteSubset) { 00238 errorQuda("Subset types do not match %d %d", siteSubset, src.siteSubset); 00239 } 00240 if (precision == QUDA_HALF_PRECISION) { 00241 ColorSpinorParam param; 00242 fill(param); // acquire all attributes of this 00243 param.precision = QUDA_SINGLE_PRECISION; // change precision 00244 param.create = QUDA_COPY_FIELD_CREATE; 00245 cudaColorSpinorField tmp(src, param); 00246 copy(tmp); 00247 return; 00248 } 00249 00250 // (temporary?) bug fix for padding 00251 memset(buffer, 0, bufferBytes); 00252 00253 #define LOAD_SPINOR_CPU_TO_GPU(myNs) \ 00254 if (precision == QUDA_DOUBLE_PRECISION) { \ 00255 if (src.precision == QUDA_DOUBLE_PRECISION) { \ 00256 if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) { \ 00257 packSpinor<3,myNs,1>((double*)buffer, (double*)src.v, volume, pad, x, length, src.length, \ 00258 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ 00259 } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) { \ 00260 packSpinor<3,myNs,2>((double*)buffer, (double*)src.v, volume, pad, x, length, src.length, \ 00261 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ 00262 } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) { \ 00263 errorQuda("double4 not supported"); \ 00264 } \ 00265 } else { \ 00266 if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) { \ 00267 packSpinor<3,myNs,1>((double*)buffer, (float*)src.v, volume, pad, x, length, src.length, \ 00268 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ 00269 } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) { \ 00270 packSpinor<3,myNs,2>((double*)buffer, (float*)src.v, volume, pad, x, length, src.length, \ 00271 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ 00272 } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) { \ 00273 errorQuda("double4 not supported"); \ 00274 } \ 00275 } \ 00276 } else { \ 00277 if (src.precision == QUDA_DOUBLE_PRECISION) { \ 00278 if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) { \ 00279 packSpinor<3,myNs,1>((float*)buffer, (double*)src.v, volume, pad, x, length, src.length, \ 00280 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ 00281 } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) { \ 00282 packSpinor<3,myNs,2>((float*)buffer, (double*)src.v, volume, pad, x, length, src.length, \ 00283 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ 00284 } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) { \ 00285 packSpinor<3,myNs,4>((float*)buffer, (double*)src.v, volume, pad, x, length, src.length, \ 00286 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ 00287 } \ 00288 } else { \ 00289 if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) { \ 00290 packSpinor<3,myNs,1>((float*)buffer, (float*)src.v, volume, pad, x, length, src.length, \ 00291 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ 00292 } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) { \ 00293 packSpinor<3,myNs,2>((float*)buffer, (float*)src.v, volume, pad, x, length, src.length, \ 00294 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ 00295 } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) { \ 00296 packSpinor<3,myNs,4>((float*)buffer, (float*)src.v, volume, pad, x, length, src.length, \ 00297 src.SiteSubset(), src.SiteOrder(), gammaBasis, src.GammaBasis(), src.FieldOrder()); \ 00298 } \ 00299 } \ 00300 } 00301 00302 switch(nSpin){ 00303 case 1: 00304 LOAD_SPINOR_CPU_TO_GPU(1); 00305 break; 00306 case 4: 00307 LOAD_SPINOR_CPU_TO_GPU(4); 00308 break; 00309 default: 00310 errorQuda("invalid number of spinors in function %s\n", __FUNCTION__); 00311 00312 } 00313 00314 #undef LOAD_SPINOR_CPU_TO_GPU 00315 00316 /* for (int i=0; i<length; i++) { 00317 std::cout << i << " " << ((float*)src.v)[i] << " " << ((float*)buffer)[i] << std::endl; 00318 }*/ 00319 00320 cudaMemcpy(v, buffer, bytes, cudaMemcpyHostToDevice); 00321 return; 00322 } 00323 00324 00325 void cudaColorSpinorField::saveCPUSpinorField(cpuColorSpinorField &dest) const { 00326 00327 if (nDim != dest.Ndim()) { 00328 errorQuda("Number of dimensions %d %d don't match", nDim, dest.Ndim()); 00329 } 00330 00331 if (volume != dest.volume) { 00332 errorQuda("Volumes %d %d don't match", volume, dest.volume); 00333 } 00334 00335 if (SiteOrder() != dest.SiteOrder()) { 00336 errorQuda("Subset orders don't match"); 00337 } 00338 00339 if (nColor != 3) { 00340 errorQuda("Nc != 3 not yet supported"); 00341 } 00342 00343 00344 if (siteSubset != dest.siteSubset) { 00345 errorQuda("Subset types do not match %d %d", siteSubset, dest.siteSubset); 00346 } 00347 00348 if (precision == QUDA_HALF_PRECISION) { 00349 ColorSpinorParam param; 00350 param.create = QUDA_COPY_FIELD_CREATE; 00351 param.precision = QUDA_SINGLE_PRECISION; 00352 cudaColorSpinorField tmp(*this, param); 00353 tmp.saveCPUSpinorField(dest); 00354 return; 00355 } 00356 00357 // (temporary?) bug fix for padding 00358 memset(buffer, 0, bufferBytes); 00359 00360 cudaMemcpy(buffer, v, bytes, cudaMemcpyDeviceToHost); 00361 00362 00363 #define SAVE_SPINOR_GPU_TO_CPU(myNs) \ 00364 if (precision == QUDA_DOUBLE_PRECISION) { \ 00365 if (dest.precision == QUDA_DOUBLE_PRECISION) { \ 00366 if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) { \ 00367 unpackSpinor<3,myNs,1>((double*)dest.v, (double*)buffer, volume, pad, x, dest.length, length, \ 00368 dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ 00369 } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) { \ 00370 unpackSpinor<3,myNs,2>((double*)dest.v, (double*)buffer, volume, pad, x, dest.length, length, \ 00371 dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ 00372 } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) { \ 00373 errorQuda("double4 not supported"); \ 00374 } \ 00375 } else { \ 00376 if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) { \ 00377 unpackSpinor<3,myNs,1>((float*)dest.v, (double*)buffer, volume, pad, x, dest.length, length, \ 00378 dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ 00379 } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) { \ 00380 unpackSpinor<3,myNs,2>((float*)dest.v, (double*)buffer, volume, pad, x, dest.length, length, \ 00381 dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ 00382 } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) { \ 00383 errorQuda("double4 not supported"); \ 00384 } \ 00385 } \ 00386 } else { \ 00387 if (dest.precision == QUDA_DOUBLE_PRECISION) { \ 00388 if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) { \ 00389 unpackSpinor<3,myNs,1>((double*)dest.v, (float*)buffer, volume, pad, x, dest.length, length, \ 00390 dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ 00391 } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) { \ 00392 unpackSpinor<3,myNs,2>((double*)dest.v, (float*)buffer, volume, pad, x, dest.length, length, \ 00393 dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ 00394 } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) { \ 00395 unpackSpinor<3,myNs,4>((double*)dest.v, (float*)buffer, volume, pad, x, dest.length, length, \ 00396 dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ 00397 } \ 00398 } else { \ 00399 if (fieldOrder == QUDA_FLOAT_FIELD_ORDER) { \ 00400 unpackSpinor<3,myNs,1>((float*)dest.v, (float*)buffer, volume, pad, x, dest.length, length, \ 00401 dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ 00402 } else if (fieldOrder == QUDA_FLOAT2_FIELD_ORDER) { \ 00403 unpackSpinor<3,myNs,2>((float*)dest.v, (float*)buffer, volume, pad, x, dest.length, length, \ 00404 dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ 00405 } else if (fieldOrder == QUDA_FLOAT4_FIELD_ORDER) { \ 00406 unpackSpinor<3,myNs,4>((float*)dest.v, (float*)buffer, volume, pad, x, dest.length, length, \ 00407 dest.SiteSubset(), dest.SiteOrder(), dest.GammaBasis(), gammaBasis, dest.FieldOrder()); \ 00408 } \ 00409 } \ 00410 } 00411 00412 switch(nSpin){ 00413 case 1: 00414 SAVE_SPINOR_GPU_TO_CPU(1); 00415 break; 00416 case 4: 00417 SAVE_SPINOR_GPU_TO_CPU(4); 00418 break; 00419 default: 00420 errorQuda("invalid number of spinors in function %s\n", __FUNCTION__); 00421 } 00422 #undef SAVE_SPINOR_GPU_TO_CPU 00423 return; 00424 } 00425
1.7.3