QUDA v0.4.0
A library for QCD on GPUs
|
00001 #include <typeinfo> 00002 00003 #if (__COMPUTE_CAPABILITY__ >= 130) 00004 static __inline__ __device__ double2 fetch_double2(texture<int4, 1> t, int i) 00005 { 00006 int4 v = tex1Dfetch(t,i); 00007 return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z)); 00008 } 00009 #endif 00010 00011 // Double precision gauge field 00012 texture<int4, 1> gauge0TexDouble2; 00013 texture<int4, 1> gauge1TexDouble2; 00014 00015 // Single precision gauge field 00016 texture<float4, 1, cudaReadModeElementType> gauge0TexSingle4; 00017 texture<float4, 1, cudaReadModeElementType> gauge1TexSingle4; 00018 texture<float2, 1, cudaReadModeElementType> gauge0TexSingle2; 00019 texture<float2, 1, cudaReadModeElementType> gauge1TexSingle2; 00020 00021 // Half precision gauge field 00022 texture<short4, 1, cudaReadModeNormalizedFloat> gauge0TexHalf4; 00023 texture<short4, 1, cudaReadModeNormalizedFloat> gauge1TexHalf4; 00024 texture<short2, 1, cudaReadModeNormalizedFloat> gauge0TexHalf2; 00025 texture<short2, 1, cudaReadModeNormalizedFloat> gauge1TexHalf2; 00026 00027 texture<int4, 1> fatGauge0TexDouble; 00028 texture<int4, 1> fatGauge1TexDouble; 00029 texture<float2, 1, cudaReadModeElementType> fatGauge0TexSingle; 00030 texture<float2, 1, cudaReadModeElementType> fatGauge1TexSingle; 00031 texture<short2, 1, cudaReadModeNormalizedFloat> fatGauge0TexHalf; 00032 texture<short2, 1, cudaReadModeNormalizedFloat> fatGauge1TexHalf; 00033 00034 texture<int4, 1> longGauge0TexDouble; 00035 texture<int4, 1> longGauge1TexDouble; 00036 texture<float4, 1, cudaReadModeElementType> longGauge0TexSingle; 00037 texture<float4, 1, cudaReadModeElementType> longGauge1TexSingle; 00038 texture<float2, 1, cudaReadModeElementType> longGauge0TexSingle_norecon; 00039 texture<float2, 1, cudaReadModeElementType> longGauge1TexSingle_norecon; 00040 00041 texture<short4, 1, cudaReadModeNormalizedFloat> longGauge0TexHalf; 00042 texture<short4, 1, cudaReadModeNormalizedFloat> longGauge1TexHalf; 00043 texture<short2, 1, cudaReadModeNormalizedFloat> longGauge0TexHalf_norecon; 00044 texture<short2, 1, cudaReadModeNormalizedFloat> longGauge1TexHalf_norecon; 00045 00046 00047 //Double precision for site link 00048 texture<int4, 1> siteLink0TexDouble; 00049 texture<int4, 1> siteLink1TexDouble; 00050 00051 //Single precision for site link 00052 texture<float2, 1, cudaReadModeElementType> siteLink0TexSingle; 00053 texture<float2, 1, cudaReadModeElementType> siteLink1TexSingle; 00054 00055 texture<float4, 1, cudaReadModeElementType> siteLink0TexSingle_recon; 00056 texture<float4, 1, cudaReadModeElementType> siteLink1TexSingle_recon; 00057 00058 texture<float2, 1, cudaReadModeElementType> siteLink0TexSingle_norecon; 00059 texture<float2, 1, cudaReadModeElementType> siteLink1TexSingle_norecon; 00060 00061 00062 texture<int4, 1> muLink0TexDouble; 00063 texture<int4, 1> muLink1TexDouble; 00064 // Single precision mulink field 00065 texture<float2, 1, cudaReadModeElementType> muLink0TexSingle; 00066 texture<float2, 1, cudaReadModeElementType> muLink1TexSingle; 00067 00068 // Double precision input spinor field 00069 texture<int4, 1> spinorTexDouble; 00070 00071 // Single precision input spinor field 00072 texture<float4, 1, cudaReadModeElementType> spinorTexSingle; 00073 texture<float2, 1, cudaReadModeElementType> spinorTexSingle2; 00074 00075 // Half precision input spinor field 00076 texture<short4, 1, cudaReadModeNormalizedFloat> spinorTexHalf; 00077 texture<short2, 1, cudaReadModeNormalizedFloat> spinorTexHalf2; 00078 texture<float, 1, cudaReadModeElementType> spinorTexHalfNorm; 00079 00080 // Double precision accumulate spinor field 00081 texture<int4, 1> accumTexDouble; 00082 00083 // Single precision accumulate spinor field 00084 texture<float4, 1, cudaReadModeElementType> accumTexSingle; 00085 texture<float2, 1, cudaReadModeElementType> accumTexSingle2; 00086 00087 // Half precision accumulate spinor field 00088 texture<short4, 1, cudaReadModeNormalizedFloat> accumTexHalf; 00089 texture<short2, 1, cudaReadModeNormalizedFloat> accumTexHalf2; 00090 texture<float, 1, cudaReadModeElementType> accumTexHalfNorm; 00091 00092 // Double precision intermediate spinor field (used by exterior Dslash kernels) 00093 texture<int4, 1> interTexDouble; 00094 00095 // Single precision intermediate spinor field 00096 texture<float4, 1, cudaReadModeElementType> interTexSingle; 00097 texture<float2, 1, cudaReadModeElementType> interTexSingle2; 00098 00099 // Half precision intermediate spinor field 00100 texture<short4, 1, cudaReadModeNormalizedFloat> interTexHalf; 00101 texture<short2, 1, cudaReadModeNormalizedFloat> interTexHalf2; 00102 texture<float, 1, cudaReadModeElementType> interTexHalfNorm; 00103 texture<float, 1, cudaReadModeElementType> interTexHalf2Norm; 00104 00105 void bindGaugeTex(const cudaGaugeField &gauge, const int oddBit, 00106 void **gauge0, void **gauge1) 00107 { 00108 if(oddBit) { 00109 *gauge0 = gauge.odd; 00110 *gauge1 = gauge.even; 00111 } else { 00112 *gauge0 = gauge.even; 00113 *gauge1 = gauge.odd; 00114 } 00115 00116 if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { 00117 if (gauge.precision == QUDA_DOUBLE_PRECISION) { 00118 cudaBindTexture(0, gauge0TexDouble2, *gauge0, gauge.bytes/2); 00119 cudaBindTexture(0, gauge1TexDouble2, *gauge1, gauge.bytes/2); 00120 } else if (gauge.precision == QUDA_SINGLE_PRECISION) { 00121 cudaBindTexture(0, gauge0TexSingle2, *gauge0, gauge.bytes/2); 00122 cudaBindTexture(0, gauge1TexSingle2, *gauge1, gauge.bytes/2); 00123 } else { 00124 cudaBindTexture(0, gauge0TexHalf2, *gauge0, gauge.bytes/2); 00125 cudaBindTexture(0, gauge1TexHalf2, *gauge1, gauge.bytes/2); 00126 } 00127 } else { 00128 if (gauge.precision == QUDA_DOUBLE_PRECISION) { 00129 cudaBindTexture(0, gauge0TexDouble2, *gauge0, gauge.bytes/2); 00130 cudaBindTexture(0, gauge1TexDouble2, *gauge1, gauge.bytes/2); 00131 } else if (gauge.precision == QUDA_SINGLE_PRECISION) { 00132 cudaBindTexture(0, gauge0TexSingle4, *gauge0, gauge.bytes/2); 00133 cudaBindTexture(0, gauge1TexSingle4, *gauge1, gauge.bytes/2); 00134 } else { 00135 cudaBindTexture(0, gauge0TexHalf4, *gauge0, gauge.bytes/2); 00136 cudaBindTexture(0, gauge1TexHalf4, *gauge1, gauge.bytes/2); 00137 } 00138 } 00139 00140 } 00141 00142 void unbindGaugeTex(const cudaGaugeField &gauge) 00143 { 00144 if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { 00145 if (gauge.precision == QUDA_DOUBLE_PRECISION) { 00146 cudaUnbindTexture(gauge0TexDouble2); 00147 cudaUnbindTexture(gauge1TexDouble2); 00148 } else if (gauge.precision == QUDA_SINGLE_PRECISION) { 00149 cudaUnbindTexture(gauge0TexSingle2); 00150 cudaUnbindTexture(gauge1TexSingle2); 00151 } else { 00152 cudaUnbindTexture(gauge0TexHalf2); 00153 cudaUnbindTexture(gauge1TexHalf2); 00154 } 00155 } else { 00156 if (gauge.precision == QUDA_DOUBLE_PRECISION) { 00157 cudaUnbindTexture(gauge0TexDouble2); 00158 cudaUnbindTexture(gauge1TexDouble2); 00159 } else if (gauge.precision == QUDA_SINGLE_PRECISION) { 00160 cudaUnbindTexture(gauge0TexSingle4); 00161 cudaUnbindTexture(gauge1TexSingle4); 00162 } else { 00163 cudaUnbindTexture(gauge0TexHalf4); 00164 cudaUnbindTexture(gauge1TexHalf4); 00165 } 00166 } 00167 00168 } 00169 00170 void bindFatGaugeTex(const cudaGaugeField &gauge, const int oddBit, void **gauge0, void **gauge1) 00171 { 00172 if(oddBit) { 00173 *gauge0 = gauge.odd; 00174 *gauge1 = gauge.even; 00175 } else { 00176 *gauge0 = gauge.even; 00177 *gauge1 = gauge.odd; 00178 } 00179 00180 if (gauge.precision == QUDA_DOUBLE_PRECISION) { 00181 cudaBindTexture(0, fatGauge0TexDouble, *gauge0, gauge.bytes/2); 00182 cudaBindTexture(0, fatGauge1TexDouble, *gauge1, gauge.bytes/2); 00183 } else if (gauge.precision == QUDA_SINGLE_PRECISION) { 00184 cudaBindTexture(0, fatGauge0TexSingle, *gauge0, gauge.bytes/2); 00185 cudaBindTexture(0, fatGauge1TexSingle, *gauge1, gauge.bytes/2); 00186 } else { 00187 cudaBindTexture(0, fatGauge0TexHalf, *gauge0, gauge.bytes/2); 00188 cudaBindTexture(0, fatGauge1TexHalf, *gauge1, gauge.bytes/2); 00189 } 00190 } 00191 00192 void unbindFatGaugeTex(const cudaGaugeField &gauge) 00193 { 00194 if (gauge.precision == QUDA_DOUBLE_PRECISION) { 00195 cudaUnbindTexture(fatGauge0TexDouble); 00196 cudaUnbindTexture(fatGauge1TexDouble); 00197 } else if (gauge.precision == QUDA_SINGLE_PRECISION) { 00198 cudaUnbindTexture(fatGauge0TexSingle); 00199 cudaUnbindTexture(fatGauge1TexSingle); 00200 } else { 00201 cudaUnbindTexture(fatGauge0TexHalf); 00202 cudaUnbindTexture(fatGauge1TexHalf); 00203 } 00204 } 00205 00206 void bindLongGaugeTex(const cudaGaugeField &gauge, const int oddBit, void **gauge0, void **gauge1) 00207 { 00208 if(oddBit) { 00209 *gauge0 = gauge.odd; 00210 *gauge1 = gauge.even; 00211 } else { 00212 *gauge0 = gauge.even; 00213 *gauge1 = gauge.odd; 00214 } 00215 00216 if (gauge.precision == QUDA_DOUBLE_PRECISION) { 00217 cudaBindTexture(0, longGauge0TexDouble, *gauge0, gauge.bytes/2); 00218 cudaBindTexture(0, longGauge1TexDouble, *gauge1, gauge.bytes/2); 00219 } else if (gauge.precision == QUDA_SINGLE_PRECISION) { 00220 if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { //18 reconstruct 00221 cudaBindTexture(0, longGauge0TexSingle_norecon, *gauge0, gauge.bytes/2); 00222 cudaBindTexture(0, longGauge1TexSingle_norecon, *gauge1, gauge.bytes/2); 00223 } else { 00224 cudaBindTexture(0, longGauge0TexSingle, *gauge0, gauge.bytes/2); 00225 cudaBindTexture(0, longGauge1TexSingle, *gauge1, gauge.bytes/2); 00226 } 00227 } else { 00228 if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { //18 reconstruct 00229 cudaBindTexture(0, longGauge0TexHalf_norecon, *gauge0, gauge.bytes/2); 00230 cudaBindTexture(0, longGauge1TexHalf_norecon, *gauge1, gauge.bytes/2); 00231 } else { 00232 cudaBindTexture(0, longGauge0TexHalf, *gauge0, gauge.bytes/2); 00233 cudaBindTexture(0, longGauge1TexHalf, *gauge1, gauge.bytes/2); 00234 } 00235 } 00236 } 00237 00238 void unbindLongGaugeTex(const cudaGaugeField &gauge) 00239 { 00240 if (gauge.precision == QUDA_DOUBLE_PRECISION) { 00241 cudaUnbindTexture(longGauge0TexDouble); 00242 cudaUnbindTexture(longGauge1TexDouble); 00243 } else if (gauge.precision == QUDA_SINGLE_PRECISION) { 00244 if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { //18 reconstruct 00245 cudaUnbindTexture(longGauge0TexSingle_norecon); 00246 cudaUnbindTexture(longGauge1TexSingle_norecon); 00247 } else { 00248 cudaUnbindTexture(longGauge0TexSingle); 00249 cudaUnbindTexture(longGauge1TexSingle); 00250 } 00251 } else { 00252 if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { //18 reconstruct 00253 cudaUnbindTexture(longGauge0TexHalf_norecon); 00254 cudaUnbindTexture(longGauge1TexHalf_norecon); 00255 } else { 00256 cudaUnbindTexture(longGauge0TexHalf); 00257 cudaUnbindTexture(longGauge1TexHalf); 00258 } 00259 } 00260 } 00261 00262 00263 template <typename spinorFloat> 00264 int bindSpinorTex(const size_t spinor_bytes, const size_t norm_bytes, const spinorFloat *in, 00265 const float *inNorm, const spinorFloat *out=0, const float *outNorm=0, 00266 const spinorFloat *x=0, const float *xNorm=0) 00267 { 00268 int size; 00269 00270 if (typeid(spinorFloat) == typeid(double2)) { 00271 cudaBindTexture(0, spinorTexDouble, in, spinor_bytes); 00272 if (out) cudaBindTexture(0, interTexDouble, out, spinor_bytes); 00273 if (x) cudaBindTexture(0, accumTexDouble, x, spinor_bytes); 00274 size = sizeof(double); 00275 } else if (typeid(spinorFloat) == typeid(float4)) { 00276 cudaBindTexture(0, spinorTexSingle, in, spinor_bytes); 00277 if (out) cudaBindTexture(0, interTexSingle, out, spinor_bytes); 00278 if (x) cudaBindTexture(0, accumTexSingle, x, spinor_bytes); 00279 size = sizeof(float); 00280 } else if (typeid(spinorFloat) == typeid(float2)) { 00281 cudaBindTexture(0, spinorTexSingle2, in, spinor_bytes); 00282 if (out) cudaBindTexture(0, interTexSingle2, out, spinor_bytes); 00283 if (x) cudaBindTexture(0, accumTexSingle2, x, spinor_bytes); 00284 size = sizeof(float); 00285 } else if (typeid(spinorFloat) == typeid(short4)) { 00286 cudaBindTexture(0, spinorTexHalf, in, spinor_bytes); 00287 if (inNorm) cudaBindTexture(0, spinorTexHalfNorm, inNorm, norm_bytes); 00288 if (out) cudaBindTexture(0, interTexHalf, out, spinor_bytes); 00289 if (outNorm) cudaBindTexture(0, interTexHalfNorm, outNorm, norm_bytes); 00290 if (x) cudaBindTexture(0, accumTexHalf, x, spinor_bytes); 00291 if (xNorm) cudaBindTexture(0, accumTexHalfNorm, xNorm, norm_bytes); 00292 size = sizeof(float); 00293 } else if (typeid(spinorFloat) == typeid(short2)) { 00294 cudaBindTexture(0, spinorTexHalf2, in, spinor_bytes); 00295 if (inNorm) cudaBindTexture(0, spinorTexHalfNorm, inNorm, norm_bytes); 00296 if (out) cudaBindTexture(0, interTexHalf2, out, spinor_bytes); 00297 if (outNorm) cudaBindTexture(0, interTexHalf2Norm, outNorm, norm_bytes); 00298 if (x) cudaBindTexture(0, accumTexHalf2, x, spinor_bytes); 00299 if (xNorm) cudaBindTexture(0, accumTexHalfNorm, xNorm, norm_bytes); 00300 size = sizeof(float); 00301 } else { 00302 errorQuda("Unsupported precision and short vector type"); 00303 } 00304 00305 return size; 00306 } 00307 00308 template <typename spinorFloat> 00309 void unbindSpinorTex(const spinorFloat *in, const float *inNorm, const spinorFloat *out=0, const float *outNorm=0, 00310 const spinorFloat *x=0, const float *xNorm=0) 00311 { 00312 if (typeid(spinorFloat) == typeid(double2)) { 00313 cudaUnbindTexture(spinorTexDouble); 00314 if (out) cudaUnbindTexture(interTexDouble); 00315 if (x) cudaUnbindTexture(accumTexDouble); 00316 } else if (typeid(spinorFloat) == typeid(float4)) { 00317 cudaUnbindTexture(spinorTexSingle); 00318 if (out) cudaUnbindTexture(interTexSingle); 00319 if (x) cudaUnbindTexture(accumTexSingle); 00320 } else if (typeid(spinorFloat) == typeid(float2)) { 00321 cudaUnbindTexture(spinorTexSingle2); 00322 if (out) cudaUnbindTexture(interTexSingle2); 00323 if (x) cudaUnbindTexture(accumTexSingle2); 00324 } else if (typeid(spinorFloat) == typeid(short4)) { 00325 cudaUnbindTexture(spinorTexHalf); 00326 if (inNorm) cudaUnbindTexture(spinorTexHalfNorm); 00327 if (out) cudaUnbindTexture(interTexHalf); 00328 if (outNorm) cudaUnbindTexture(interTexHalfNorm); 00329 if (x) cudaUnbindTexture(accumTexHalf); 00330 if (xNorm) cudaUnbindTexture(accumTexHalfNorm); 00331 } else if (typeid(spinorFloat) == typeid(short2)) { 00332 cudaUnbindTexture(spinorTexHalf2); 00333 if (inNorm) cudaUnbindTexture(spinorTexHalfNorm); 00334 if (out) cudaUnbindTexture(interTexHalf2); 00335 if (outNorm) cudaUnbindTexture(interTexHalf2Norm); 00336 if (x) cudaUnbindTexture(accumTexHalf2); 00337 if (xNorm) cudaUnbindTexture(accumTexHalfNorm); 00338 } else { 00339 errorQuda("Unsupported precision and short vector type"); 00340 } 00341 00342 } 00343 00344 // Double precision clover term 00345 texture<int4, 1> cloverTexDouble; 00346 00347 // Single precision clover term 00348 texture<float4, 1, cudaReadModeElementType> cloverTexSingle; 00349 00350 // Half precision clover term 00351 texture<short4, 1, cudaReadModeNormalizedFloat> cloverTexHalf; 00352 texture<float, 1, cudaReadModeElementType> cloverTexNorm; 00353 00354 QudaPrecision bindCloverTex(const FullClover clover, const int oddBit, 00355 void **cloverP, void **cloverNormP) 00356 { 00357 if (oddBit) { 00358 *cloverP = clover.odd; 00359 *cloverNormP = clover.oddNorm; 00360 } else { 00361 *cloverP = clover.even; 00362 *cloverNormP = clover.evenNorm; 00363 } 00364 00365 if (clover.precision == QUDA_DOUBLE_PRECISION) { 00366 cudaBindTexture(0, cloverTexDouble, *cloverP, clover.bytes); 00367 } else if (clover.precision == QUDA_SINGLE_PRECISION) { 00368 cudaBindTexture(0, cloverTexSingle, *cloverP, clover.bytes); 00369 } else { 00370 cudaBindTexture(0, cloverTexHalf, *cloverP, clover.bytes); 00371 cudaBindTexture(0, cloverTexNorm, *cloverNormP, clover.norm_bytes); 00372 } 00373 00374 return clover.precision; 00375 } 00376 00377 void unbindCloverTex(const FullClover clover) 00378 { 00379 if (clover.precision == QUDA_DOUBLE_PRECISION) { 00380 cudaUnbindTexture(cloverTexDouble); 00381 } else if (clover.precision == QUDA_SINGLE_PRECISION) { 00382 cudaUnbindTexture(cloverTexSingle); 00383 } else { 00384 cudaUnbindTexture(cloverTexHalf); 00385 cudaUnbindTexture(cloverTexNorm); 00386 } 00387 } 00388