QUDA v0.3.2
A library for QCD on GPUs

quda/lib/dslash_textures.h

Go to the documentation of this file.
00001 #if (__CUDA_ARCH__ >= 130)
00002 static __inline__ __device__ double2 fetch_double2(texture<int4, 1> t, int i)
00003 {
00004   int4 v = tex1Dfetch(t,i);
00005   return make_double2(__hiloint2double(v.y, v.x), __hiloint2double(v.w, v.z));
00006 }
00007 #endif
00008 
00009 // Double precision gauge field
00010 texture<int4, 1> gauge0TexDouble2;
00011 texture<int4, 1> gauge1TexDouble2;
00012 
00013 // Single precision gauge field
00014 texture<float4, 1, cudaReadModeElementType> gauge0TexSingle4;
00015 texture<float4, 1, cudaReadModeElementType> gauge1TexSingle4;
00016 texture<float2, 1, cudaReadModeElementType> gauge0TexSingle2;
00017 texture<float2, 1, cudaReadModeElementType> gauge1TexSingle2;
00018 
00019 // Half precision gauge field
00020 texture<short4, 1, cudaReadModeNormalizedFloat> gauge0TexHalf4;
00021 texture<short4, 1, cudaReadModeNormalizedFloat> gauge1TexHalf4;
00022 texture<short2, 1, cudaReadModeNormalizedFloat> gauge0TexHalf2;
00023 texture<short2, 1, cudaReadModeNormalizedFloat> gauge1TexHalf2;
00024 
00025 
00026 texture<int4, 1> fatGauge0TexDouble;
00027 texture<int4, 1> fatGauge1TexDouble;
00028 texture<float2, 1, cudaReadModeElementType> fatGauge0TexSingle;
00029 texture<float2, 1, cudaReadModeElementType> fatGauge1TexSingle;
00030 texture<short2, 1, cudaReadModeNormalizedFloat> fatGauge0TexHalf;
00031 texture<short2, 1, cudaReadModeNormalizedFloat> fatGauge1TexHalf;
00032 
00033 texture<int4, 1> longGauge0TexDouble;
00034 texture<int4, 1> longGauge1TexDouble;
00035 texture<float4, 1, cudaReadModeElementType> longGauge0TexSingle;
00036 texture<float4, 1, cudaReadModeElementType> longGauge1TexSingle;
00037 texture<float2, 1, cudaReadModeElementType> longGauge0TexSingle_norecon;
00038 texture<float2, 1, cudaReadModeElementType> longGauge1TexSingle_norecon;
00039 
00040 texture<short4, 1, cudaReadModeNormalizedFloat> longGauge0TexHalf;
00041 texture<short4, 1, cudaReadModeNormalizedFloat> longGauge1TexHalf;
00042 texture<short2, 1, cudaReadModeNormalizedFloat> longGauge0TexHalf_norecon;
00043 texture<short2, 1, cudaReadModeNormalizedFloat> longGauge1TexHalf_norecon;
00044 
00045 
00046 //Double precision for site link
00047 texture<int4, 1> siteLink0TexDouble;
00048 texture<int4, 1> siteLink1TexDouble;
00049 
00050 //Single precision for site link
00051 texture<float4, 1, cudaReadModeElementType> siteLink0TexSingle;
00052 texture<float4, 1, cudaReadModeElementType> siteLink1TexSingle;
00053 
00054 texture<float2, 1, cudaReadModeElementType> siteLink0TexSingle_norecon;
00055 texture<float2, 1, cudaReadModeElementType> siteLink1TexSingle_norecon;
00056 
00057 
00058 texture<int4, 1> muLink0TexDouble;
00059 texture<int4, 1> muLink1TexDouble;
00060 // Single precision mulink field
00061 texture<float2, 1, cudaReadModeElementType> muLink0TexSingle;
00062 texture<float2, 1, cudaReadModeElementType> muLink1TexSingle;
00063 
00064 // Double precision input spinor field
00065 texture<int4, 1> spinorTexDouble;
00066 
00067 // Single precision input spinor field
00068 texture<float4, 1, cudaReadModeElementType> spinorTexSingle;
00069 texture<float2, 1, cudaReadModeElementType> spinorTexSingle2;
00070 
00071 // Half precision input spinor field
00072 texture<short4, 1, cudaReadModeNormalizedFloat> spinorTexHalf;
00073 texture<short2, 1, cudaReadModeNormalizedFloat> spinorTexHalf2;
00074 texture<float, 1, cudaReadModeElementType> spinorTexNorm;
00075 
00076 // Double precision accumulate spinor field
00077 texture<int4, 1> accumTexDouble;
00078 
00079 // Single precision accumulate spinor field
00080 texture<float4, 1, cudaReadModeElementType> accumTexSingle;
00081 texture<float2, 1, cudaReadModeElementType> accumTexSingle2;
00082 
00083 // Half precision accumulate spinor field
00084 texture<short4, 1, cudaReadModeNormalizedFloat> accumTexHalf;
00085 texture<short2, 1, cudaReadModeNormalizedFloat> accumTexHalf2;
00086 texture<float, 1, cudaReadModeElementType> accumTexNorm;
00087 
00088 static void bindGaugeTex(const FullGauge gauge, const int oddBit, 
00089                          void **gauge0, void **gauge1) {
00090   if(oddBit) {
00091     *gauge0 = gauge.odd;
00092     *gauge1 = gauge.even;
00093   } else {
00094     *gauge0 = gauge.even;
00095     *gauge1 = gauge.odd;
00096   }
00097   
00098   if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) {
00099     if (gauge.precision == QUDA_DOUBLE_PRECISION) {
00100       cudaBindTexture(0, gauge0TexDouble2, *gauge0, gauge.bytes); 
00101       cudaBindTexture(0, gauge1TexDouble2, *gauge1, gauge.bytes);
00102     } else if (gauge.precision == QUDA_SINGLE_PRECISION) {
00103       cudaBindTexture(0, gauge0TexSingle2, *gauge0, gauge.bytes); 
00104       cudaBindTexture(0, gauge1TexSingle2, *gauge1, gauge.bytes);
00105     } else {
00106       cudaBindTexture(0, gauge0TexHalf2, *gauge0, gauge.bytes); 
00107       cudaBindTexture(0, gauge1TexHalf2, *gauge1, gauge.bytes);
00108     }
00109   } else {
00110     if (gauge.precision == QUDA_DOUBLE_PRECISION) {
00111       cudaBindTexture(0, gauge0TexDouble2, *gauge0, gauge.bytes); 
00112       cudaBindTexture(0, gauge1TexDouble2, *gauge1, gauge.bytes);
00113     } else if (gauge.precision == QUDA_SINGLE_PRECISION) {
00114       cudaBindTexture(0, gauge0TexSingle4, *gauge0, gauge.bytes); 
00115       cudaBindTexture(0, gauge1TexSingle4, *gauge1, gauge.bytes);
00116     } else {
00117       cudaBindTexture(0, gauge0TexHalf4, *gauge0, gauge.bytes); 
00118       cudaBindTexture(0, gauge1TexHalf4, *gauge1, gauge.bytes);
00119     }
00120   }
00121 
00122 }
00123 
00124 static void unbindGaugeTex(const FullGauge gauge) {
00125   if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) {
00126     if (gauge.precision == QUDA_DOUBLE_PRECISION) {
00127       cudaUnbindTexture(gauge0TexDouble2); 
00128       cudaUnbindTexture(gauge1TexDouble2);
00129     } else if (gauge.precision == QUDA_SINGLE_PRECISION) {
00130       cudaUnbindTexture(gauge0TexSingle2);
00131       cudaUnbindTexture(gauge1TexSingle2);
00132     } else {
00133       cudaUnbindTexture(gauge0TexHalf2); 
00134       cudaUnbindTexture(gauge1TexHalf2);
00135     }
00136   } else {
00137     if (gauge.precision == QUDA_DOUBLE_PRECISION) {
00138       cudaUnbindTexture(gauge0TexDouble2); 
00139       cudaUnbindTexture(gauge1TexDouble2);
00140     } else if (gauge.precision == QUDA_SINGLE_PRECISION) {
00141       cudaUnbindTexture(gauge0TexSingle4); 
00142       cudaUnbindTexture(gauge1TexSingle4);
00143     } else {
00144       cudaUnbindTexture(gauge0TexHalf4); 
00145       cudaUnbindTexture(gauge1TexHalf4);
00146     }
00147   }
00148 
00149 }
00150 
00151 static void bindFatGaugeTex(const FullGauge gauge, const int oddBit, 
00152                             void **gauge0, void **gauge1) {
00153   if(oddBit) {
00154     *gauge0 = gauge.odd;
00155     *gauge1 = gauge.even;
00156   } else {
00157     *gauge0 = gauge.even;
00158     *gauge1 = gauge.odd;
00159   }
00160   
00161   if (gauge.precision == QUDA_DOUBLE_PRECISION) {
00162     cudaBindTexture(0, fatGauge0TexDouble, *gauge0, gauge.bytes); 
00163     cudaBindTexture(0, fatGauge1TexDouble, *gauge1, gauge.bytes);
00164   } else if (gauge.precision == QUDA_SINGLE_PRECISION) {
00165     cudaBindTexture(0, fatGauge0TexSingle, *gauge0, gauge.bytes); 
00166     cudaBindTexture(0, fatGauge1TexSingle, *gauge1, gauge.bytes);
00167   } else {
00168     cudaBindTexture(0, fatGauge0TexHalf, *gauge0, gauge.bytes); 
00169     cudaBindTexture(0, fatGauge1TexHalf, *gauge1, gauge.bytes);
00170   }
00171 }
00172 
00173 static void unbindFatGaugeTex(const FullGauge gauge) {
00174   if (gauge.precision == QUDA_DOUBLE_PRECISION) {
00175     cudaUnbindTexture(fatGauge0TexDouble);
00176     cudaUnbindTexture(fatGauge1TexDouble);
00177   } else if (gauge.precision == QUDA_SINGLE_PRECISION) {
00178     cudaUnbindTexture(fatGauge0TexSingle);
00179     cudaUnbindTexture(fatGauge1TexSingle);
00180   } else {
00181     cudaUnbindTexture(fatGauge0TexHalf);
00182     cudaUnbindTexture(fatGauge1TexHalf);
00183     }
00184 }
00185 
00186 static void bindLongGaugeTex(const FullGauge gauge, const int oddBit, 
00187                             void **gauge0, void **gauge1) {
00188   if(oddBit) {
00189     *gauge0 = gauge.odd;
00190     *gauge1 = gauge.even;
00191   } else {
00192     *gauge0 = gauge.even;
00193     *gauge1 = gauge.odd;
00194   }
00195   
00196   if (gauge.precision == QUDA_DOUBLE_PRECISION) {
00197     cudaBindTexture(0, longGauge0TexDouble, *gauge0, gauge.bytes); 
00198     cudaBindTexture(0, longGauge1TexDouble, *gauge1, gauge.bytes);
00199   } else if (gauge.precision == QUDA_SINGLE_PRECISION) {
00200     if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { //18 reconstruct
00201       cudaBindTexture(0, longGauge0TexSingle_norecon, *gauge0, gauge.bytes); 
00202       cudaBindTexture(0, longGauge1TexSingle_norecon, *gauge1, gauge.bytes);    
00203     } else {
00204       cudaBindTexture(0, longGauge0TexSingle, *gauge0, gauge.bytes); 
00205       cudaBindTexture(0, longGauge1TexSingle, *gauge1, gauge.bytes);
00206     }
00207   } else {
00208     if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { //18 reconstruct
00209       cudaBindTexture(0, longGauge0TexHalf_norecon, *gauge0, gauge.bytes); 
00210       cudaBindTexture(0, longGauge1TexHalf_norecon, *gauge1, gauge.bytes);      
00211     } else {
00212       cudaBindTexture(0, longGauge0TexHalf, *gauge0, gauge.bytes); 
00213       cudaBindTexture(0, longGauge1TexHalf, *gauge1, gauge.bytes);
00214     }
00215   }
00216 }
00217 
00218 static void unbindLongGaugeTex(const FullGauge gauge){
00219   if (gauge.precision == QUDA_DOUBLE_PRECISION) {
00220     cudaUnbindTexture(longGauge0TexDouble);
00221     cudaUnbindTexture(longGauge1TexDouble);
00222   } else if (gauge.precision == QUDA_SINGLE_PRECISION) {
00223     if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { //18 reconstruct
00224       cudaUnbindTexture(longGauge0TexSingle_norecon);
00225       cudaUnbindTexture(longGauge1TexSingle_norecon);
00226     } else {
00227       cudaUnbindTexture(longGauge0TexSingle);
00228       cudaUnbindTexture(longGauge1TexSingle);
00229     }
00230   } else {
00231     if (gauge.reconstruct == QUDA_RECONSTRUCT_NO) { //18 reconstruct
00232       cudaUnbindTexture(longGauge0TexHalf_norecon);
00233       cudaUnbindTexture(longGauge1TexHalf_norecon);
00234     } else {
00235       cudaUnbindTexture(longGauge0TexHalf);
00236       cudaUnbindTexture(longGauge1TexHalf);
00237     }
00238   }
00239 }
00240     
00241 
00242 template <int N, typename spinorFloat>
00243   int bindSpinorTex(const int length, const spinorFloat *in, const float *inNorm,
00244                     const spinorFloat *x=0, const float *xNorm=0) {
00245 
00246   if (N==2 && sizeof(spinorFloat) == sizeof(double2)) {
00247     int spinor_bytes = length*sizeof(double);
00248     cudaBindTexture(0, spinorTexDouble, in, spinor_bytes); 
00249     if (x) cudaBindTexture(0, accumTexDouble, x, spinor_bytes); 
00250     return sizeof(double);
00251   } else if (N==4 && sizeof(spinorFloat) == sizeof(float4)) {
00252     int spinor_bytes = length*sizeof(float);
00253     cudaBindTexture(0, spinorTexSingle, in, spinor_bytes); 
00254     checkCudaError();
00255     if (x) cudaBindTexture(0, accumTexSingle, x, spinor_bytes); 
00256     checkCudaError();
00257     return sizeof(float);
00258   } else if  (N==2 && sizeof(spinorFloat) == sizeof(float2)) {
00259       int spinor_bytes = length*sizeof(float);
00260       cudaBindTexture(0, spinorTexSingle2, in, spinor_bytes); 
00261       if (x) cudaBindTexture(0, accumTexSingle2, x, spinor_bytes); 
00262       return sizeof(float);    
00263   } else if (N==4 && sizeof(spinorFloat) == sizeof(short4)) {
00264     int spinor_bytes = length*sizeof(short);
00265     cudaBindTexture(0, spinorTexHalf, in, spinor_bytes); 
00266     if (inNorm) cudaBindTexture(0, spinorTexNorm, inNorm, spinor_bytes/12); 
00267     if (x) cudaBindTexture(0, accumTexHalf, x, spinor_bytes); 
00268     if (xNorm) cudaBindTexture(0, accumTexNorm, xNorm, spinor_bytes/12); 
00269     return sizeof(float);
00270   } else if (N==2 && sizeof(spinorFloat) == sizeof(short2)) {
00271       int spinor_bytes = length*sizeof(short);
00272       cudaBindTexture(0, spinorTexHalf2, in, spinor_bytes); 
00273       if (inNorm) cudaBindTexture(0, spinorTexNorm, inNorm, spinor_bytes/3); 
00274       if (x) cudaBindTexture(0, accumTexHalf2, x, spinor_bytes); 
00275       if (xNorm) cudaBindTexture(0, accumTexNorm, xNorm, spinor_bytes/3); 
00276       return sizeof(float);
00277   } else {
00278     errorQuda("Unsupported precision and short vector type");
00279   }
00280 
00281 }
00282 
00283 template <int N, typename spinorFloat>
00284 void unbindSpinorTex(const spinorFloat *in, const float *inNorm,
00285                     const spinorFloat *x=0, const float *xNorm=0) {
00286 
00287   if (N==2 && sizeof(spinorFloat) == sizeof(double2)) {
00288     cudaUnbindTexture(spinorTexDouble); 
00289     if (x) cudaUnbindTexture(accumTexDouble);
00290   } else if (N==4 && sizeof(spinorFloat) == sizeof(float4)) {
00291     cudaUnbindTexture(spinorTexSingle); 
00292     if (x) cudaUnbindTexture(accumTexSingle); 
00293   } else if  (N==2 && sizeof(spinorFloat) == sizeof(float2)) {
00294     cudaUnbindTexture(spinorTexSingle2); 
00295     if (x) cudaUnbindTexture(accumTexSingle2); 
00296   } else if (N==4 && sizeof(spinorFloat) == sizeof(short4)) {
00297     cudaUnbindTexture(spinorTexHalf); 
00298     if (inNorm) cudaUnbindTexture(spinorTexNorm);
00299     if (x) cudaUnbindTexture(accumTexHalf); 
00300     if (xNorm) cudaUnbindTexture(accumTexNorm);
00301   } else if (N==2 && sizeof(spinorFloat) == sizeof(short2)) {
00302       cudaUnbindTexture(spinorTexHalf2); 
00303       if (inNorm) cudaUnbindTexture(spinorTexNorm);
00304       if (x) cudaUnbindTexture(accumTexHalf2); 
00305       if (xNorm) cudaUnbindTexture(accumTexNorm);
00306   } else {
00307     errorQuda("Unsupported precision and short vector type");
00308   }
00309    
00310   checkCudaError();
00311 }
00312 
00313 // Double precision clover term
00314 texture<int4, 1> cloverTexDouble;
00315 
00316 // Single precision clover term
00317 texture<float4, 1, cudaReadModeElementType> cloverTexSingle;
00318 
00319 // Half precision clover term
00320 texture<short4, 1, cudaReadModeNormalizedFloat> cloverTexHalf;
00321 texture<float, 1, cudaReadModeElementType> cloverTexNorm;
00322 
00323 static QudaPrecision bindCloverTex(const FullClover clover, const int oddBit, 
00324                                    void **cloverP, void **cloverNormP) {
00325 
00326   if (oddBit) {
00327     *cloverP = clover.odd.clover;
00328     *cloverNormP = clover.odd.cloverNorm;
00329   } else {
00330     *cloverP = clover.even.clover;
00331     *cloverNormP = clover.even.cloverNorm;
00332   }
00333 
00334   if (clover.odd.precision == QUDA_DOUBLE_PRECISION) {
00335     cudaBindTexture(0, cloverTexDouble, *cloverP, clover.odd.bytes); 
00336   } else if (clover.odd.precision == QUDA_SINGLE_PRECISION) {
00337     cudaBindTexture(0, cloverTexSingle, *cloverP, clover.odd.bytes); 
00338   } else {
00339     cudaBindTexture(0, cloverTexHalf, *cloverP, clover.odd.bytes); 
00340     cudaBindTexture(0, cloverTexNorm, *cloverNormP, clover.odd.bytes/18);
00341   }
00342 
00343   return clover.odd.precision;
00344 }
00345 
00346 static void unbindCloverTex(const FullClover clover) {
00347 
00348   if (clover.odd.precision == QUDA_DOUBLE_PRECISION) {
00349     cudaUnbindTexture(cloverTexDouble);
00350   } else if (clover.odd.precision == QUDA_SINGLE_PRECISION) {
00351     cudaUnbindTexture(cloverTexSingle);
00352   } else {
00353     cudaUnbindTexture(cloverTexHalf);
00354     cudaUnbindTexture(cloverTexNorm);
00355   }
00356 
00357 }
00358 
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Defines