|
QUDA v0.3.2
A library for QCD on GPUs
|
00001 // clover_def.h - clover kernel definitions 00002 // 00003 // See comments in wilson_dslash_def.h 00004 00005 // initialize on first iteration 00006 00007 #ifndef DD_LOOP 00008 #define DD_LOOP 00009 #define DD_XPAY 0 00010 #define DD_PREC 0 00011 #endif 00012 00013 // set options for current iteration 00014 00015 #if (DD_XPAY==0) // no xpay 00016 #define DD_XPAY_F 00017 #define DD_PARAM4 int oddBit 00018 #else // xpay 00019 #define DD_XPAY_F Xpay 00020 #if (DD_PREC == 0) 00021 #define DD_PARAM4 int oddBit, double a 00022 #else 00023 #define DD_PARAM4 int oddBit, float a 00024 #endif 00025 #define DSLASH_XPAY 00026 #endif 00027 00028 #if (DD_PREC==0) // double-precision spinor field 00029 #define DD_PREC_F D 00030 #define DD_PARAM1 double2* out, float *null1 00031 #define DD_PARAM3 const double2* in, const float *null3 00032 #define READ_SPINOR READ_SPINOR_DOUBLE 00033 #define SPINORTEX spinorTexDouble 00034 #define WRITE_SPINOR WRITE_SPINOR_DOUBLE2 00035 #define SPINOR_DOUBLE 00036 #if (DD_XPAY==1) 00037 #define ACCUMTEX accumTexDouble 00038 #define READ_ACCUM READ_ACCUM_DOUBLE 00039 #endif 00040 #elif (DD_PREC==1) // single-precision spinor field 00041 #define DD_PREC_F S 00042 #define DD_PARAM1 float4* out, float *null1 00043 #define DD_PARAM3 const float4* in, const float *null3 00044 #define READ_SPINOR READ_SPINOR_SINGLE 00045 #define SPINORTEX spinorTexSingle 00046 #define WRITE_SPINOR WRITE_SPINOR_FLOAT4 00047 #if (DD_XPAY==1) 00048 #define ACCUMTEX accumTexSingle 00049 #define READ_ACCUM READ_ACCUM_SINGLE 00050 #endif 00051 #else // half-precision spinor field 00052 #define DD_PREC_F H 00053 #define READ_SPINOR READ_SPINOR_HALF 00054 #define SPINORTEX spinorTexHalf 00055 #define DD_PARAM1 short4* out, float *outNorm 00056 #define DD_PARAM3 const short4* in, const float *inNorm 00057 #define WRITE_SPINOR WRITE_SPINOR_SHORT4 00058 #if (DD_XPAY==1) 00059 #define ACCUMTEX accumTexHalf 00060 #define READ_ACCUM READ_ACCUM_HALF 00061 #endif 00062 #endif 00063 00064 #if (DD_PREC==0) // double-precision clover term 00065 #define DD_PREC_F D 00066 #define DD_PARAM2 const double2* clover, const float *null 00067 #define CLOVERTEX cloverTexDouble 00068 #define READ_CLOVER READ_CLOVER_DOUBLE 00069 #define CLOVER_DOUBLE 00070 #elif (DD_PREC==1) // single-precision clover term 00071 #define DD_PREC_F S 00072 #define DD_PARAM2 const float4* clover, const float *null 00073 #define CLOVERTEX cloverTexSingle 00074 #define READ_CLOVER READ_CLOVER_SINGLE 00075 #else // half-precision clover term 00076 #define DD_PREC_F H 00077 #define DD_PARAM2 const short4* clover, const float *cloverNorm 00078 #define CLOVERTEX cloverTexHalf 00079 #define READ_CLOVER READ_CLOVER_HALF 00080 #endif 00081 00082 //#define DD_CONCAT(s,c,x) clover ## s ## c ## x ## Kernel 00083 #define DD_CONCAT(x) clover ## x ## Kernel 00084 #define DD_FUNC(x) DD_CONCAT(x) 00085 00086 // define the kernel 00087 #if !(__CUDA_ARCH__ < 130 && DD_PREC == 0) 00088 00089 __global__ void DD_FUNC(DD_XPAY_F)(DD_PARAM1, DD_PARAM2, DD_PARAM3, DD_PARAM4) { 00090 00091 #ifdef GPU_WILSON_DIRAC 00092 #include "clover_core.h" 00093 #endif 00094 00095 } 00096 00097 #endif 00098 00099 // clean up 00100 00101 #undef DD_PREC_F 00102 #undef DD_XPAY_F 00103 #undef DD_PARAM1 00104 #undef DD_PARAM2 00105 #undef DD_PARAM3 00106 #undef DD_PARAM4 00107 #undef DD_CONCAT 00108 #undef DD_FUNC 00109 00110 #undef DSLASH_XPAY 00111 #undef READ_SPINOR 00112 #undef SPINORTEX 00113 #undef WRITE_SPINOR 00114 #undef ACCUMTEX 00115 #undef READ_ACCUM 00116 #undef CLOVERTEX 00117 #undef READ_CLOVER 00118 #undef GAUGE_DOUBLE 00119 #undef SPINOR_DOUBLE 00120 #undef CLOVER_DOUBLE 00121 00122 // prepare next set of options, or clean up after final iteration 00123 00124 //#if (DD_XPAY==0) // xpay variant is not needed 00125 //#undef DD_XPAY 00126 //#define DD_XPAY 1 00127 //#else 00128 //#undef DD_XPAY 00129 //#define DD_XPAY 0 00130 00131 #if (DD_PREC==0) 00132 #undef DD_PREC 00133 #define DD_PREC 1 00134 #elif (DD_PREC==1) 00135 #undef DD_PREC 00136 #define DD_PREC 2 00137 #else 00138 00139 #undef DD_LOOP 00140 #undef DD_XPAY 00141 #undef DD_PREC 00142 00143 #endif // DD_PREC 00144 //#endif // DD_XPAY 00145 00146 #ifdef DD_LOOP 00147 #include "clover_def.h" 00148 #endif
1.7.3