|
QUDA v0.3.2
A library for QCD on GPUs
|
00001 #if (__CUDA_ARCH__ >= 130) 00002 static __inline__ __device__ double2 fetch_double2(texture<int4, 1> t, int i) 00003 { 00004 int4 v = tex1Dfetch(t,i); 00005 return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z)); 00006 } 00007 #endif 00008 00009 // Double precision gauge field 00010 texture<int4, 1> gauge0TexDouble2; 00011 texture<int4, 1> gauge1TexDouble2; 00012 00013 // Single precision gauge field 00014 texture<float4, 1, cudaReadModeElementType> gauge0TexSingle4; 00015 texture<float4, 1, cudaReadModeElementType> gauge1TexSingle4; 00016 texture<float2, 1, cudaReadModeElementType> gauge0TexSingle2; 00017 texture<float2, 1, cudaReadModeElementType> gauge1TexSingle2; 00018 00019 // Half precision gauge field 00020 texture<short4, 1, cudaReadModeNormalizedFloat> gauge0TexHalf4; 00021 texture<short4, 1, cudaReadModeNormalizedFloat> gauge1TexHalf4; 00022 texture<short2, 1, cudaReadModeNormalizedFloat> gauge0TexHalf2; 00023 texture<short2, 1, cudaReadModeNormalizedFloat> gauge1TexHalf2; 00024 00025 00026 texture<int4, 1> fatGauge0TexDouble; 00027 texture<int4, 1> fatGauge1TexDouble; 00028 texture<float2, 1, cudaReadModeElementType> fatGauge0TexSingle; 00029 texture<float2, 1, cudaReadModeElementType> fatGauge1TexSingle; 00030 texture<short2, 1, cudaReadModeNormalizedFloat> fatGauge0TexHalf; 00031 texture<short2, 1, cudaReadModeNormalizedFloat> fatGauge1TexHalf; 00032 00033 texture<int4, 1> longGauge0TexDouble; 00034 texture<int4, 1> longGauge1TexDouble; 00035 texture<float4, 1, cudaReadModeElementType> longGauge0TexSingle; 00036 texture<float4, 1, cudaReadModeElementType> longGauge1TexSingle; 00037 texture<float2, 1, cudaReadModeElementType> longGauge0TexSingle_norecon; 00038 texture<float2, 1, cudaReadModeElementType> longGauge1TexSingle_norecon; 00039 00040 texture<short4, 1, cudaReadModeNormalizedFloat> longGauge0TexHalf; 00041 texture<short4, 1, cudaReadModeNormalizedFloat> longGauge1TexHalf; 00042 texture<short2, 1, cudaReadModeNormalizedFloat> longGauge0TexHalf_norecon; 00043 texture<short2, 1, cudaReadModeNormalizedFloat> longGauge1TexHalf_norecon; 00044 00045 00046 //Double precision for site link 00047 texture<int4, 1> siteLink0TexDouble; 00048 texture<int4, 1> siteLink1TexDouble; 00049 00050 //Single precision for site link 00051 texture<float4, 1, cudaReadModeElementType> siteLink0TexSingle; 00052 texture<float4, 1, cudaReadModeElementType> siteLink1TexSingle; 00053 00054 texture<float2, 1, cudaReadModeElementType> siteLink0TexSingle_norecon; 00055 texture<float2, 1, cudaReadModeElementType> siteLink1TexSingle_norecon; 00056 00057 00058 texture<int4, 1> muLink0TexDouble; 00059 texture<int4, 1> muLink1TexDouble; 00060 // Single precision mulink field 00061 texture<float2, 1, cudaReadModeElementType> muLink0TexSingle; 00062 texture<float2, 1, cudaReadModeElementType> muLink1TexSingle; 00063 00064 // Double precision input spinor field 00065 texture<int4, 1> spinorTexDouble; 00066 00067 // Single precision input spinor field 00068 texture<float4, 1, cudaReadModeElementType> spinorTexSingle; 00069 texture<float2, 1, cudaReadModeElementType> spinorTexSingle2; 00070 00071 // Half precision input spinor field 00072 texture<short4, 1, cudaReadModeNormalizedFloat> spinorTexHalf; 00073 texture<short2, 1, cudaReadModeNormalizedFloat> spinorTexHalf2; 00074 texture<float, 1, cudaReadModeElementType> spinorTexNorm; 00075 00076 // Double precision accumulate spinor field 00077 texture<int4, 1> accumTexDouble; 00078 00079 // Single precision accumulate spinor field 00080 texture<float4, 1, cudaReadModeElementType> accumTexSingle; 00081 texture<float2, 1, cudaReadModeElementType> accumTexSingle2; 00082 00083 // Half precision accumulate spinor field 00084 texture<short4, 1, cudaReadModeNormalizedFloat> accumTexHalf; 00085 texture<short2, 1, cudaReadModeNormalizedFloat> accumTexHalf2; 00086 texture<float, 1, cudaReadModeElementType> accumTexNorm; 00087 00088 static void bindGaugeTex(const FullGauge gauge, const int oddBit, 00089 void **gauge0, void **gauge1) { 00090 if(oddBit) { 00091 *gauge0 = gauge.odd; 00092 *gauge1 = gauge.even; 00093 } else { 00094 *gauge0 = gauge.even; 00095 *gauge1 = gauge.odd; 00096 } 00097 00098 if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { 00099 if (gauge.precision == QUDA_DOUBLE_PRECISION) { 00100 cudaBindTexture(0, gauge0TexDouble2, *gauge0, gauge.bytes); 00101 cudaBindTexture(0, gauge1TexDouble2, *gauge1, gauge.bytes); 00102 } else if (gauge.precision == QUDA_SINGLE_PRECISION) { 00103 cudaBindTexture(0, gauge0TexSingle2, *gauge0, gauge.bytes); 00104 cudaBindTexture(0, gauge1TexSingle2, *gauge1, gauge.bytes); 00105 } else { 00106 cudaBindTexture(0, gauge0TexHalf2, *gauge0, gauge.bytes); 00107 cudaBindTexture(0, gauge1TexHalf2, *gauge1, gauge.bytes); 00108 } 00109 } else { 00110 if (gauge.precision == QUDA_DOUBLE_PRECISION) { 00111 cudaBindTexture(0, gauge0TexDouble2, *gauge0, gauge.bytes); 00112 cudaBindTexture(0, gauge1TexDouble2, *gauge1, gauge.bytes); 00113 } else if (gauge.precision == QUDA_SINGLE_PRECISION) { 00114 cudaBindTexture(0, gauge0TexSingle4, *gauge0, gauge.bytes); 00115 cudaBindTexture(0, gauge1TexSingle4, *gauge1, gauge.bytes); 00116 } else { 00117 cudaBindTexture(0, gauge0TexHalf4, *gauge0, gauge.bytes); 00118 cudaBindTexture(0, gauge1TexHalf4, *gauge1, gauge.bytes); 00119 } 00120 } 00121 00122 } 00123 00124 static void unbindGaugeTex(const FullGauge gauge) { 00125 if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { 00126 if (gauge.precision == QUDA_DOUBLE_PRECISION) { 00127 cudaUnbindTexture(gauge0TexDouble2); 00128 cudaUnbindTexture(gauge1TexDouble2); 00129 } else if (gauge.precision == QUDA_SINGLE_PRECISION) { 00130 cudaUnbindTexture(gauge0TexSingle2); 00131 cudaUnbindTexture(gauge1TexSingle2); 00132 } else { 00133 cudaUnbindTexture(gauge0TexHalf2); 00134 cudaUnbindTexture(gauge1TexHalf2); 00135 } 00136 } else { 00137 if (gauge.precision == QUDA_DOUBLE_PRECISION) { 00138 cudaUnbindTexture(gauge0TexDouble2); 00139 cudaUnbindTexture(gauge1TexDouble2); 00140 } else if (gauge.precision == QUDA_SINGLE_PRECISION) { 00141 cudaUnbindTexture(gauge0TexSingle4); 00142 cudaUnbindTexture(gauge1TexSingle4); 00143 } else { 00144 cudaUnbindTexture(gauge0TexHalf4); 00145 cudaUnbindTexture(gauge1TexHalf4); 00146 } 00147 } 00148 00149 } 00150 00151 static void bindFatGaugeTex(const FullGauge gauge, const int oddBit, 00152 void **gauge0, void **gauge1) { 00153 if(oddBit) { 00154 *gauge0 = gauge.odd; 00155 *gauge1 = gauge.even; 00156 } else { 00157 *gauge0 = gauge.even; 00158 *gauge1 = gauge.odd; 00159 } 00160 00161 if (gauge.precision == QUDA_DOUBLE_PRECISION) { 00162 cudaBindTexture(0, fatGauge0TexDouble, *gauge0, gauge.bytes); 00163 cudaBindTexture(0, fatGauge1TexDouble, *gauge1, gauge.bytes); 00164 } else if (gauge.precision == QUDA_SINGLE_PRECISION) { 00165 cudaBindTexture(0, fatGauge0TexSingle, *gauge0, gauge.bytes); 00166 cudaBindTexture(0, fatGauge1TexSingle, *gauge1, gauge.bytes); 00167 } else { 00168 cudaBindTexture(0, fatGauge0TexHalf, *gauge0, gauge.bytes); 00169 cudaBindTexture(0, fatGauge1TexHalf, *gauge1, gauge.bytes); 00170 } 00171 } 00172 00173 static void unbindFatGaugeTex(const FullGauge gauge) { 00174 if (gauge.precision == QUDA_DOUBLE_PRECISION) { 00175 cudaUnbindTexture(fatGauge0TexDouble); 00176 cudaUnbindTexture(fatGauge1TexDouble); 00177 } else if (gauge.precision == QUDA_SINGLE_PRECISION) { 00178 cudaUnbindTexture(fatGauge0TexSingle); 00179 cudaUnbindTexture(fatGauge1TexSingle); 00180 } else { 00181 cudaUnbindTexture(fatGauge0TexHalf); 00182 cudaUnbindTexture(fatGauge1TexHalf); 00183 } 00184 } 00185 00186 static void bindLongGaugeTex(const FullGauge gauge, const int oddBit, 00187 void **gauge0, void **gauge1) { 00188 if(oddBit) { 00189 *gauge0 = gauge.odd; 00190 *gauge1 = gauge.even; 00191 } else { 00192 *gauge0 = gauge.even; 00193 *gauge1 = gauge.odd; 00194 } 00195 00196 if (gauge.precision == QUDA_DOUBLE_PRECISION) { 00197 cudaBindTexture(0, longGauge0TexDouble, *gauge0, gauge.bytes); 00198 cudaBindTexture(0, longGauge1TexDouble, *gauge1, gauge.bytes); 00199 } else if (gauge.precision == QUDA_SINGLE_PRECISION) { 00200 if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { //18 reconstruct 00201 cudaBindTexture(0, longGauge0TexSingle_norecon, *gauge0, gauge.bytes); 00202 cudaBindTexture(0, longGauge1TexSingle_norecon, *gauge1, gauge.bytes); 00203 } else { 00204 cudaBindTexture(0, longGauge0TexSingle, *gauge0, gauge.bytes); 00205 cudaBindTexture(0, longGauge1TexSingle, *gauge1, gauge.bytes); 00206 } 00207 } else { 00208 if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { //18 reconstruct 00209 cudaBindTexture(0, longGauge0TexHalf_norecon, *gauge0, gauge.bytes); 00210 cudaBindTexture(0, longGauge1TexHalf_norecon, *gauge1, gauge.bytes); 00211 } else { 00212 cudaBindTexture(0, longGauge0TexHalf, *gauge0, gauge.bytes); 00213 cudaBindTexture(0, longGauge1TexHalf, *gauge1, gauge.bytes); 00214 } 00215 } 00216 } 00217 00218 static void unbindLongGaugeTex(const FullGauge gauge){ 00219 if (gauge.precision == QUDA_DOUBLE_PRECISION) { 00220 cudaUnbindTexture(longGauge0TexDouble); 00221 cudaUnbindTexture(longGauge1TexDouble); 00222 } else if (gauge.precision == QUDA_SINGLE_PRECISION) { 00223 if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { //18 reconstruct 00224 cudaUnbindTexture(longGauge0TexSingle_norecon); 00225 cudaUnbindTexture(longGauge1TexSingle_norecon); 00226 } else { 00227 cudaUnbindTexture(longGauge0TexSingle); 00228 cudaUnbindTexture(longGauge1TexSingle); 00229 } 00230 } else { 00231 if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { //18 reconstruct 00232 cudaUnbindTexture(longGauge0TexHalf_norecon); 00233 cudaUnbindTexture(longGauge1TexHalf_norecon); 00234 } else { 00235 cudaUnbindTexture(longGauge0TexHalf); 00236 cudaUnbindTexture(longGauge1TexHalf); 00237 } 00238 } 00239 } 00240 00241 00242 template <int N, typename spinorFloat> 00243 int bindSpinorTex(const int length, const spinorFloat *in, const float *inNorm, 00244 const spinorFloat *x=0, const float *xNorm=0) { 00245 00246 if (N==2 && sizeof(spinorFloat) == sizeof(double2)) { 00247 int spinor_bytes = length*sizeof(double); 00248 cudaBindTexture(0, spinorTexDouble, in, spinor_bytes); 00249 if (x) cudaBindTexture(0, accumTexDouble, x, spinor_bytes); 00250 return sizeof(double); 00251 } else if (N==4 && sizeof(spinorFloat) == sizeof(float4)) { 00252 int spinor_bytes = length*sizeof(float); 00253 cudaBindTexture(0, spinorTexSingle, in, spinor_bytes); 00254 checkCudaError(); 00255 if (x) cudaBindTexture(0, accumTexSingle, x, spinor_bytes); 00256 checkCudaError(); 00257 return sizeof(float); 00258 } else if (N==2 && sizeof(spinorFloat) == sizeof(float2)) { 00259 int spinor_bytes = length*sizeof(float); 00260 cudaBindTexture(0, spinorTexSingle2, in, spinor_bytes); 00261 if (x) cudaBindTexture(0, accumTexSingle2, x, spinor_bytes); 00262 return sizeof(float); 00263 } else if (N==4 && sizeof(spinorFloat) == sizeof(short4)) { 00264 int spinor_bytes = length*sizeof(short); 00265 cudaBindTexture(0, spinorTexHalf, in, spinor_bytes); 00266 if (inNorm) cudaBindTexture(0, spinorTexNorm, inNorm, spinor_bytes/12); 00267 if (x) cudaBindTexture(0, accumTexHalf, x, spinor_bytes); 00268 if (xNorm) cudaBindTexture(0, accumTexNorm, xNorm, spinor_bytes/12); 00269 return sizeof(float); 00270 } else if (N==2 && sizeof(spinorFloat) == sizeof(short2)) { 00271 int spinor_bytes = length*sizeof(short); 00272 cudaBindTexture(0, spinorTexHalf2, in, spinor_bytes); 00273 if (inNorm) cudaBindTexture(0, spinorTexNorm, inNorm, spinor_bytes/3); 00274 if (x) cudaBindTexture(0, accumTexHalf2, x, spinor_bytes); 00275 if (xNorm) cudaBindTexture(0, accumTexNorm, xNorm, spinor_bytes/3); 00276 return sizeof(float); 00277 } else { 00278 errorQuda("Unsupported precision and short vector type"); 00279 } 00280 00281 } 00282 00283 template <int N, typename spinorFloat> 00284 void unbindSpinorTex(const spinorFloat *in, const float *inNorm, 00285 const spinorFloat *x=0, const float *xNorm=0) { 00286 00287 if (N==2 && sizeof(spinorFloat) == sizeof(double2)) { 00288 cudaUnbindTexture(spinorTexDouble); 00289 if (x) cudaUnbindTexture(accumTexDouble); 00290 } else if (N==4 && sizeof(spinorFloat) == sizeof(float4)) { 00291 cudaUnbindTexture(spinorTexSingle); 00292 if (x) cudaUnbindTexture(accumTexSingle); 00293 } else if (N==2 && sizeof(spinorFloat) == sizeof(float2)) { 00294 cudaUnbindTexture(spinorTexSingle2); 00295 if (x) cudaUnbindTexture(accumTexSingle2); 00296 } else if (N==4 && sizeof(spinorFloat) == sizeof(short4)) { 00297 cudaUnbindTexture(spinorTexHalf); 00298 if (inNorm) cudaUnbindTexture(spinorTexNorm); 00299 if (x) cudaUnbindTexture(accumTexHalf); 00300 if (xNorm) cudaUnbindTexture(accumTexNorm); 00301 } else if (N==2 && sizeof(spinorFloat) == sizeof(short2)) { 00302 cudaUnbindTexture(spinorTexHalf2); 00303 if (inNorm) cudaUnbindTexture(spinorTexNorm); 00304 if (x) cudaUnbindTexture(accumTexHalf2); 00305 if (xNorm) cudaUnbindTexture(accumTexNorm); 00306 } else { 00307 errorQuda("Unsupported precision and short vector type"); 00308 } 00309 00310 checkCudaError(); 00311 } 00312 00313 // Double precision clover term 00314 texture<int4, 1> cloverTexDouble; 00315 00316 // Single precision clover term 00317 texture<float4, 1, cudaReadModeElementType> cloverTexSingle; 00318 00319 // Half precision clover term 00320 texture<short4, 1, cudaReadModeNormalizedFloat> cloverTexHalf; 00321 texture<float, 1, cudaReadModeElementType> cloverTexNorm; 00322 00323 static QudaPrecision bindCloverTex(const FullClover clover, const int oddBit, 00324 void **cloverP, void **cloverNormP) { 00325 00326 if (oddBit) { 00327 *cloverP = clover.odd.clover; 00328 *cloverNormP = clover.odd.cloverNorm; 00329 } else { 00330 *cloverP = clover.even.clover; 00331 *cloverNormP = clover.even.cloverNorm; 00332 } 00333 00334 if (clover.odd.precision == QUDA_DOUBLE_PRECISION) { 00335 cudaBindTexture(0, cloverTexDouble, *cloverP, clover.odd.bytes); 00336 } else if (clover.odd.precision == QUDA_SINGLE_PRECISION) { 00337 cudaBindTexture(0, cloverTexSingle, *cloverP, clover.odd.bytes); 00338 } else { 00339 cudaBindTexture(0, cloverTexHalf, *cloverP, clover.odd.bytes); 00340 cudaBindTexture(0, cloverTexNorm, *cloverNormP, clover.odd.bytes/18); 00341 } 00342 00343 return clover.odd.precision; 00344 } 00345 00346 static void unbindCloverTex(const FullClover clover) { 00347 00348 if (clover.odd.precision == QUDA_DOUBLE_PRECISION) { 00349 cudaUnbindTexture(cloverTexDouble); 00350 } else if (clover.odd.precision == QUDA_SINGLE_PRECISION) { 00351 cudaUnbindTexture(cloverTexSingle); 00352 } else { 00353 cudaUnbindTexture(cloverTexHalf); 00354 cudaUnbindTexture(cloverTexNorm); 00355 } 00356 00357 } 00358
1.7.3