QUDA v0.4.0
A library for QCD on GPUs
|
00001 // clover_def.h - clover kernel definitions 00002 // 00003 // See comments in wilson_dslash_def.h 00004 00005 // initialize on first iteration 00006 00007 #ifndef DD_LOOP 00008 #define DD_LOOP 00009 #define DD_XPAY 0 00010 #define DD_PREC 0 00011 #endif 00012 00013 // set options for current iteration 00014 00015 #if (DD_XPAY==0) // no xpay 00016 #define DD_XPAY_F 00017 #define DD_PARAM4 DslashParam param 00018 #else // xpay 00019 #define DD_XPAY_F Xpay 00020 #if (DD_PREC == 0) 00021 #define DD_PARAM4 DslashParam param, double a 00022 #else 00023 #define DD_PARAM4 DslashParam param, float a 00024 #endif 00025 #define DSLASH_XPAY 00026 #endif 00027 00028 #if (DD_PREC==0) // double-precision spinor field 00029 #define DD_PREC_F D 00030 #define DD_PARAM1 double2* out, float *null1 00031 #define DD_PARAM3 const double2* in, const float *null3 00032 #if (defined DIRECT_ACCESS_WILSON_SPINOR) || (defined FERMI_NO_DBLE_TEX) 00033 #define READ_SPINOR READ_SPINOR_DOUBLE 00034 #define SPINORTEX in 00035 #else 00036 #define READ_SPINOR READ_SPINOR_DOUBLE_TEX 00037 #define SPINORTEX spinorTexDouble 00038 #endif 00039 #if (DD_XPAY==1) // never used 00040 #define ACCUMTEX accumTexDouble 00041 #define READ_ACCUM READ_ACCUM_DOUBLE 00042 #endif 00043 #define SPINOR_DOUBLE 00044 #define WRITE_SPINOR WRITE_SPINOR_DOUBLE2 00045 #elif (DD_PREC==1) // single-precision spinor field 00046 #define DD_PREC_F S 00047 #define DD_PARAM1 float4* out, float *null1 00048 #define DD_PARAM3 const float4* in, const float *null3 00049 #ifdef DIRECT_ACCESS_WILSON_SPINOR 00050 #define READ_SPINOR READ_SPINOR_SINGLE 00051 #define SPINORTEX in 00052 #else 00053 #define READ_SPINOR READ_SPINOR_SINGLE_TEX 00054 #define SPINORTEX spinorTexSingle 00055 #endif 00056 #define WRITE_SPINOR WRITE_SPINOR_FLOAT4 00057 #if (DD_XPAY==1) 00058 #define ACCUMTEX accumTexSingle 00059 #define READ_ACCUM READ_ACCUM_SINGLE 00060 #endif 00061 #else // half-precision spinor field 00062 #define DD_PREC_F H 00063 #ifdef DIRECT_ACCESS_WILSON_SPINOR 00064 #define READ_SPINOR READ_SPINOR_HALF 00065 #define SPINORTEX in 00066 #else 00067 #define READ_SPINOR READ_SPINOR_HALF_TEX 00068 #define SPINORTEX spinorTexHalf 00069 #endif 00070 #define DD_PARAM1 short4* out, float *outNorm 00071 #define DD_PARAM3 const short4* in, const float *inNorm 00072 #define WRITE_SPINOR WRITE_SPINOR_SHORT4 00073 #if (DD_XPAY==1) 00074 #define ACCUMTEX accumTexHalf 00075 #define READ_ACCUM READ_ACCUM_HALF 00076 #endif 00077 #endif 00078 00079 #if (DD_PREC==0) // double-precision clover term 00080 #define DD_PREC_F D 00081 #define DD_PARAM2 const double2* clover, const float *null 00082 #if (defined DIRECT_ACCESS_CLOVER) || (defined FERMI_NO_DBLE_TEX) 00083 #define CLOVERTEX clover 00084 #define READ_CLOVER READ_CLOVER_DOUBLE 00085 #else 00086 #define CLOVERTEX cloverTexDouble 00087 #define READ_CLOVER READ_CLOVER_DOUBLE_TEX 00088 #endif 00089 #define CLOVER_DOUBLE 00090 #elif (DD_PREC==1) // single-precision clover term 00091 #define DD_PREC_F S 00092 #define DD_PARAM2 const float4* clover, const float *null 00093 #ifdef DIRECT_ACCESS_CLOVER 00094 #define CLOVERTEX clover 00095 #define READ_CLOVER READ_CLOVER_SINGLE 00096 #else 00097 #define CLOVERTEX cloverTexSingle 00098 #define READ_CLOVER READ_CLOVER_SINGLE_TEX 00099 #endif 00100 #else // half-precision clover term 00101 #define DD_PREC_F H 00102 #define DD_PARAM2 const short4* clover, const float *cloverNorm 00103 #ifdef DIRECT_ACCESS_CLOVER 00104 #define CLOVERTEX clover 00105 #define READ_CLOVER READ_CLOVER_HALF 00106 #else 00107 #define CLOVERTEX cloverTexHalf 00108 #define READ_CLOVER READ_CLOVER_HALF_TEX 00109 #endif 00110 #endif 00111 00112 //#define DD_CONCAT(s,c,x) clover ## s ## c ## x ## Kernel 00113 #define DD_CONCAT(x) clover ## x ## Kernel 00114 #define DD_FUNC(x) DD_CONCAT(x) 00115 00116 // define the kernel 00117 #if !(__COMPUTE_CAPABILITY__ < 130 && DD_PREC == 0) 00118 00119 __global__ void DD_FUNC(DD_XPAY_F)(DD_PARAM1, DD_PARAM2, DD_PARAM3, DD_PARAM4) { 00120 00121 #ifdef GPU_CLOVER_DIRAC 00122 #include "clover_core.h" 00123 #endif 00124 00125 } 00126 00127 #endif 00128 00129 // clean up 00130 00131 #undef DD_PREC_F 00132 #undef DD_XPAY_F 00133 #undef DD_PARAM1 00134 #undef DD_PARAM2 00135 #undef DD_PARAM3 00136 #undef DD_PARAM4 00137 #undef DD_CONCAT 00138 #undef DD_FUNC 00139 00140 #undef DSLASH_XPAY 00141 #undef READ_SPINOR 00142 #undef SPINORTEX 00143 #undef WRITE_SPINOR 00144 #undef ACCUMTEX 00145 #undef READ_ACCUM 00146 #undef CLOVERTEX 00147 #undef READ_CLOVER 00148 #undef GAUGE_DOUBLE 00149 #undef SPINOR_DOUBLE 00150 #undef CLOVER_DOUBLE 00151 00152 // prepare next set of options, or clean up after final iteration 00153 00154 //#if (DD_XPAY==0) // xpay variant is not needed 00155 //#undef DD_XPAY 00156 //#define DD_XPAY 1 00157 //#else 00158 //#undef DD_XPAY 00159 //#define DD_XPAY 0 00160 00161 #if (DD_PREC==0) 00162 #undef DD_PREC 00163 #define DD_PREC 1 00164 #elif (DD_PREC==1) 00165 #undef DD_PREC 00166 #define DD_PREC 2 00167 #else 00168 00169 #undef DD_LOOP 00170 #undef DD_XPAY 00171 #undef DD_PREC 00172 00173 #endif // DD_PREC 00174 //#endif // DD_XPAY 00175 00176 #ifdef DD_LOOP 00177 #include "clover_def.h" 00178 #endif