QUDA v0.3.2
A library for QCD on GPUs

quda/lib/force_common.h

Go to the documentation of this file.
00001 
00002 #ifndef __KERNEL_COMMOM_MACRO_H__
00003 #define __KERNEL_COMMOM_MACRO_H__
00004 
00005 #define XUP 0
00006 #define YUP 1
00007 #define ZUP 2
00008 #define TUP 3
00009 #define TDOWN 4
00010 #define ZDOWN 5
00011 #define YDOWN 6
00012 #define XDOWN 7
00013 #define OPP_DIR(dir)    (7-(dir))
00014 #define GOES_FORWARDS(dir) (dir<=3)
00015 #define GOES_BACKWARDS(dir) (dir>3)
00016 
00017 #define linka00_re LINKA0.x
00018 #define linka00_im LINKA0.y
00019 #define linka01_re LINKA0.z
00020 #define linka01_im LINKA0.w
00021 #define linka02_re LINKA1.x
00022 #define linka02_im LINKA1.y
00023 #define linka10_re LINKA1.z
00024 #define linka10_im LINKA1.w
00025 #define linka11_re LINKA2.x
00026 #define linka11_im LINKA2.y
00027 #define linka12_re LINKA2.z
00028 #define linka12_im LINKA2.w
00029 #define linka20_re LINKA3.x
00030 #define linka20_im LINKA3.y
00031 #define linka21_re LINKA3.z
00032 #define linka21_im LINKA3.w
00033 #define linka22_re LINKA4.x
00034 #define linka22_im LINKA4.y
00035 
00036 #define linkaT00_re (+linka00_re)
00037 #define linkaT00_im (-linka00_im)
00038 #define linkaT01_re (+linka10_re)
00039 #define linkaT01_im (-linka10_im)
00040 #define linkaT02_re (+linka20_re)
00041 #define linkaT02_im (-linka20_im)
00042 #define linkaT10_re (+linka01_re)
00043 #define linkaT10_im (-linka01_im)
00044 #define linkaT11_re (+linka11_re)
00045 #define linkaT11_im (-linka11_im)
00046 #define linkaT12_re (+linka21_re)
00047 #define linkaT12_im (-linka21_im)
00048 #define linkaT20_re (+linka02_re)
00049 #define linkaT20_im (-linka02_im)
00050 #define linkaT21_re (+linka12_re)
00051 #define linkaT21_im (-linka12_im)
00052 #define linkaT22_re (+linka22_re)
00053 #define linkaT22_im (-linka22_im)
00054 
00055 #define linkb00_re LINKB0.x
00056 #define linkb00_im LINKB0.y
00057 #define linkb01_re LINKB0.z
00058 #define linkb01_im LINKB0.w
00059 #define linkb02_re LINKB1.x
00060 #define linkb02_im LINKB1.y
00061 #define linkb10_re LINKB1.z
00062 #define linkb10_im LINKB1.w
00063 #define linkb11_re LINKB2.x
00064 #define linkb11_im LINKB2.y
00065 #define linkb12_re LINKB2.z
00066 #define linkb12_im LINKB2.w
00067 #define linkb20_re LINKB3.x
00068 #define linkb20_im LINKB3.y
00069 #define linkb21_re LINKB3.z
00070 #define linkb21_im LINKB3.w
00071 #define linkb22_re LINKB4.x
00072 #define linkb22_im LINKB4.y
00073 
00074 #define linkbT00_re (+linkb00_re)
00075 #define linkbT00_im (-linkb00_im)
00076 #define linkbT01_re (+linkb10_re)
00077 #define linkbT01_im (-linkb10_im)
00078 #define linkbT02_re (+linkb20_re)
00079 #define linkbT02_im (-linkb20_im)
00080 #define linkbT10_re (+linkb01_re)
00081 #define linkbT10_im (-linkb01_im)
00082 #define linkbT11_re (+linkb11_re)
00083 #define linkbT11_im (-linkb11_im)
00084 #define linkbT12_re (+linkb21_re)
00085 #define linkbT12_im (-linkb21_im)
00086 #define linkbT20_re (+linkb02_re)
00087 #define linkbT20_im (-linkb02_im)
00088 #define linkbT21_re (+linkb12_re)
00089 #define linkbT21_im (-linkb12_im)
00090 #define linkbT22_re (+linkb22_re)
00091 #define linkbT22_im (-linkb22_im)
00092 
00093 #define linkc00_re LINKC0.x
00094 #define linkc00_im LINKC0.y
00095 #define linkc01_re LINKC0.z
00096 #define linkc01_im LINKC0.w
00097 #define linkc02_re LINKC1.x
00098 #define linkc02_im LINKC1.y
00099 #define linkc10_re LINKC1.z
00100 #define linkc10_im LINKC1.w
00101 #define linkc11_re LINKC2.x
00102 #define linkc11_im LINKC2.y
00103 #define linkc12_re LINKC2.z
00104 #define linkc12_im LINKC2.w
00105 #define linkc20_re LINKC3.x
00106 #define linkc20_im LINKC3.y
00107 #define linkc21_re LINKC3.z
00108 #define linkc21_im LINKC3.w
00109 #define linkc22_re LINKC4.x
00110 #define linkc22_im LINKC4.y
00111 
00112 #define linkcT00_re (+linkc00_re)
00113 #define linkcT00_im (-linkc00_im)
00114 #define linkcT01_re (+linkc10_re)
00115 #define linkcT01_im (-linkc10_im)
00116 #define linkcT02_re (+linkc20_re)
00117 #define linkcT02_im (-linkc20_im)
00118 #define linkcT10_re (+linkc01_re)
00119 #define linkcT10_im (-linkc01_im)
00120 #define linkcT11_re (+linkc11_re)
00121 #define linkcT11_im (-linkc11_im)
00122 #define linkcT12_re (+linkc21_re)
00123 #define linkcT12_im (-linkc21_im)
00124 #define linkcT20_re (+linkc02_re)
00125 #define linkcT20_im (-linkc02_im)
00126 #define linkcT21_re (+linkc12_re)
00127 #define linkcT21_im (-linkc12_im)
00128 #define linkcT22_re (+linkc22_re)
00129 #define linkcT22_im (-linkc22_im)
00130 
00131 
00132 #define staple00_re STAPLE0.x
00133 #define staple00_im STAPLE0.y
00134 #define staple01_re STAPLE1.x
00135 #define staple01_im STAPLE1.y
00136 #define staple02_re STAPLE2.x
00137 #define staple02_im STAPLE2.y
00138 #define staple10_re STAPLE3.x
00139 #define staple10_im STAPLE3.y
00140 #define staple11_re STAPLE4.x
00141 #define staple11_im STAPLE4.y
00142 #define staple12_re STAPLE5.x
00143 #define staple12_im STAPLE5.y
00144 #define staple20_re STAPLE6.x
00145 #define staple20_im STAPLE6.y
00146 #define staple21_re STAPLE7.x
00147 #define staple21_im STAPLE7.y
00148 #define staple22_re STAPLE8.x
00149 #define staple22_im STAPLE8.y
00150 
00151 #define stapleT00_re (+staple00_re)
00152 #define stapleT00_im (-staple00_im)
00153 #define stapleT01_re (+staple10_re)
00154 #define stapleT01_im (-staple10_im)
00155 #define stapleT02_re (+staple20_re)
00156 #define stapleT02_im (-staple20_im)
00157 #define stapleT10_re (+staple01_re)
00158 #define stapleT10_im (-staple01_im)
00159 #define stapleT11_re (+staple11_re)
00160 #define stapleT11_im (-staple11_im)
00161 #define stapleT12_re (+staple21_re)
00162 #define stapleT12_im (-staple21_im)
00163 #define stapleT20_re (+staple02_re)
00164 #define stapleT20_im (-staple02_im)
00165 #define stapleT21_re (+staple12_re)
00166 #define stapleT21_im (-staple12_im)
00167 #define stapleT22_re (+staple22_re)
00168 #define stapleT22_im (-staple22_im)
00169 
00170 #define LOAD_MATRIX_12_SINGLE(gauge, dir, idx, var)do{                  \
00171     var##0 = gauge[idx + dir*Vhx3];                                     \
00172     var##1 = gauge[idx + dir*Vhx3 + Vh];                                \
00173     var##2 = gauge[idx + dir*Vhx3 + Vhx2];                              \
00174   }while(0)
00175 
00176 #define LOAD_MATRIX_12_SINGLE_TEX(gauge, dir, idx, var)do{              \
00177     var##0 = tex1Dfetch(gauge, idx + dir*Vhx3);                         \
00178     var##1 = tex1Dfetch(gauge, idx + dir*Vhx3 + Vh);                    \
00179     var##2 = tex1Dfetch(gauge, idx + dir*Vhx3 + Vhx2);                  \
00180   }while(0)
00181 
00182 
00183 #define MULT_SU3_NN(ma, mb, mc)                                 \
00184     mc##00_re =                                                 \
00185         ma##00_re * mb##00_re - ma##00_im * mb##00_im +         \
00186         ma##01_re * mb##10_re - ma##01_im * mb##10_im +         \
00187         ma##02_re * mb##20_re - ma##02_im * mb##20_im;          \
00188     mc##00_im =                                                 \
00189         ma##00_re * mb##00_im + ma##00_im * mb##00_re +         \
00190         ma##01_re * mb##10_im + ma##01_im * mb##10_re +         \
00191         ma##02_re * mb##20_im + ma##02_im * mb##20_re;          \
00192     mc##10_re =                                                 \
00193         ma##10_re * mb##00_re - ma##10_im * mb##00_im +         \
00194         ma##11_re * mb##10_re - ma##11_im * mb##10_im +         \
00195         ma##12_re * mb##20_re - ma##12_im * mb##20_im;          \
00196     mc##10_im =                                                 \
00197         ma##10_re * mb##00_im + ma##10_im * mb##00_re +         \
00198         ma##11_re * mb##10_im + ma##11_im * mb##10_re +         \
00199         ma##12_re * mb##20_im + ma##12_im * mb##20_re;          \
00200     mc##20_re =                                                 \
00201         ma##20_re * mb##00_re - ma##20_im * mb##00_im +         \
00202         ma##21_re * mb##10_re - ma##21_im * mb##10_im +         \
00203         ma##22_re * mb##20_re - ma##22_im * mb##20_im;          \
00204     mc##20_im =                                                 \
00205         ma##20_re * mb##00_im + ma##20_im * mb##00_re +         \
00206         ma##21_re * mb##10_im + ma##21_im * mb##10_re +         \
00207         ma##22_re * mb##20_im + ma##22_im * mb##20_re;          \
00208     mc##01_re =                                                 \
00209         ma##00_re * mb##01_re - ma##00_im * mb##01_im +         \
00210         ma##01_re * mb##11_re - ma##01_im * mb##11_im +         \
00211         ma##02_re * mb##21_re - ma##02_im * mb##21_im;          \
00212     mc##01_im =                                                 \
00213         ma##00_re * mb##01_im + ma##00_im * mb##01_re +         \
00214         ma##01_re * mb##11_im + ma##01_im * mb##11_re +         \
00215         ma##02_re * mb##21_im + ma##02_im * mb##21_re;          \
00216     mc##11_re =                                                 \
00217         ma##10_re * mb##01_re - ma##10_im * mb##01_im +         \
00218         ma##11_re * mb##11_re - ma##11_im * mb##11_im +         \
00219         ma##12_re * mb##21_re - ma##12_im * mb##21_im;          \
00220     mc##11_im =                                                 \
00221         ma##10_re * mb##01_im + ma##10_im * mb##01_re +         \
00222         ma##11_re * mb##11_im + ma##11_im * mb##11_re +         \
00223         ma##12_re * mb##21_im + ma##12_im * mb##21_re;          \
00224     mc##21_re =                                                 \
00225         ma##20_re * mb##01_re - ma##20_im * mb##01_im +         \
00226         ma##21_re * mb##11_re - ma##21_im * mb##11_im +         \
00227         ma##22_re * mb##21_re - ma##22_im * mb##21_im;          \
00228     mc##21_im =                                                 \
00229         ma##20_re * mb##01_im + ma##20_im * mb##01_re +         \
00230         ma##21_re * mb##11_im + ma##21_im * mb##11_re +         \
00231         ma##22_re * mb##21_im + ma##22_im * mb##21_re;          \
00232     mc##02_re =                                                 \
00233         ma##00_re * mb##02_re - ma##00_im * mb##02_im +         \
00234         ma##01_re * mb##12_re - ma##01_im * mb##12_im +         \
00235         ma##02_re * mb##22_re - ma##02_im * mb##22_im;          \
00236     mc##02_im =                                                 \
00237         ma##00_re * mb##02_im + ma##00_im * mb##02_re +         \
00238         ma##01_re * mb##12_im + ma##01_im * mb##12_re +         \
00239         ma##02_re * mb##22_im + ma##02_im * mb##22_re;          \
00240     mc##12_re =                                                 \
00241         ma##10_re * mb##02_re - ma##10_im * mb##02_im +         \
00242         ma##11_re * mb##12_re - ma##11_im * mb##12_im +         \
00243         ma##12_re * mb##22_re - ma##12_im * mb##22_im;          \
00244     mc##12_im =                                                 \
00245         ma##10_re * mb##02_im + ma##10_im * mb##02_re +         \
00246         ma##11_re * mb##12_im + ma##11_im * mb##12_re +         \
00247         ma##12_re * mb##22_im + ma##12_im * mb##22_re;          \
00248     mc##22_re =                                                 \
00249         ma##20_re * mb##02_re - ma##20_im * mb##02_im +         \
00250         ma##21_re * mb##12_re - ma##21_im * mb##12_im +         \
00251         ma##22_re * mb##22_re - ma##22_im * mb##22_im;          \
00252     mc##22_im =                                                 \
00253         ma##20_re * mb##02_im + ma##20_im * mb##02_re +         \
00254         ma##21_re * mb##12_im + ma##21_im * mb##12_re +         \
00255         ma##22_re * mb##22_im + ma##22_im * mb##22_re;
00256 
00257 
00258 
00259 #define MULT_SU3_NA(ma, mb, mc)                                         \
00260     mc##00_re =                                                         \
00261         ma##00_re * mb##T00_re - ma##00_im * mb##T00_im +               \
00262         ma##01_re * mb##T10_re - ma##01_im * mb##T10_im +               \
00263         ma##02_re * mb##T20_re - ma##02_im * mb##T20_im;                \
00264     mc##00_im =                                                         \
00265         ma##00_re * mb##T00_im + ma##00_im * mb##T00_re +               \
00266         ma##01_re * mb##T10_im + ma##01_im * mb##T10_re +               \
00267         ma##02_re * mb##T20_im + ma##02_im * mb##T20_re;                \
00268     mc##10_re =                                                         \
00269         ma##10_re * mb##T00_re - ma##10_im * mb##T00_im +               \
00270         ma##11_re * mb##T10_re - ma##11_im * mb##T10_im +               \
00271         ma##12_re * mb##T20_re - ma##12_im * mb##T20_im;                \
00272     mc##10_im =                                                         \
00273         ma##10_re * mb##T00_im + ma##10_im * mb##T00_re +               \
00274         ma##11_re * mb##T10_im + ma##11_im * mb##T10_re +               \
00275         ma##12_re * mb##T20_im + ma##12_im * mb##T20_re;                \
00276     mc##20_re =                                                         \
00277         ma##20_re * mb##T00_re - ma##20_im * mb##T00_im +               \
00278         ma##21_re * mb##T10_re - ma##21_im * mb##T10_im +               \
00279         ma##22_re * mb##T20_re - ma##22_im * mb##T20_im;                \
00280     mc##20_im =                                                         \
00281         ma##20_re * mb##T00_im + ma##20_im * mb##T00_re +               \
00282         ma##21_re * mb##T10_im + ma##21_im * mb##T10_re +               \
00283         ma##22_re * mb##T20_im + ma##22_im * mb##T20_re;                \
00284     mc##01_re =                                                         \
00285         ma##00_re * mb##T01_re - ma##00_im * mb##T01_im +               \
00286         ma##01_re * mb##T11_re - ma##01_im * mb##T11_im +               \
00287         ma##02_re * mb##T21_re - ma##02_im * mb##T21_im;                \
00288     mc##01_im =                                                         \
00289         ma##00_re * mb##T01_im + ma##00_im * mb##T01_re +               \
00290         ma##01_re * mb##T11_im + ma##01_im * mb##T11_re +               \
00291         ma##02_re * mb##T21_im + ma##02_im * mb##T21_re;                \
00292     mc##11_re =                                                         \
00293         ma##10_re * mb##T01_re - ma##10_im * mb##T01_im +               \
00294         ma##11_re * mb##T11_re - ma##11_im * mb##T11_im +               \
00295         ma##12_re * mb##T21_re - ma##12_im * mb##T21_im;                \
00296     mc##11_im =                                                         \
00297         ma##10_re * mb##T01_im + ma##10_im * mb##T01_re +               \
00298         ma##11_re * mb##T11_im + ma##11_im * mb##T11_re +               \
00299         ma##12_re * mb##T21_im + ma##12_im * mb##T21_re;                \
00300     mc##21_re =                                                         \
00301         ma##20_re * mb##T01_re - ma##20_im * mb##T01_im +               \
00302         ma##21_re * mb##T11_re - ma##21_im * mb##T11_im +               \
00303         ma##22_re * mb##T21_re - ma##22_im * mb##T21_im;                \
00304     mc##21_im =                                                         \
00305         ma##20_re * mb##T01_im + ma##20_im * mb##T01_re +               \
00306         ma##21_re * mb##T11_im + ma##21_im * mb##T11_re +               \
00307         ma##22_re * mb##T21_im + ma##22_im * mb##T21_re;                \
00308     mc##02_re =                                                         \
00309         ma##00_re * mb##T02_re - ma##00_im * mb##T02_im +               \
00310         ma##01_re * mb##T12_re - ma##01_im * mb##T12_im +               \
00311         ma##02_re * mb##T22_re - ma##02_im * mb##T22_im;                \
00312     mc##02_im =                                                         \
00313         ma##00_re * mb##T02_im + ma##00_im * mb##T02_re +               \
00314         ma##01_re * mb##T12_im + ma##01_im * mb##T12_re +               \
00315         ma##02_re * mb##T22_im + ma##02_im * mb##T22_re;                \
00316     mc##12_re =                                                         \
00317         ma##10_re * mb##T02_re - ma##10_im * mb##T02_im +               \
00318         ma##11_re * mb##T12_re - ma##11_im * mb##T12_im +               \
00319         ma##12_re * mb##T22_re - ma##12_im * mb##T22_im;                \
00320     mc##12_im =                                                         \
00321         ma##10_re * mb##T02_im + ma##10_im * mb##T02_re +               \
00322         ma##11_re * mb##T12_im + ma##11_im * mb##T12_re +               \
00323         ma##12_re * mb##T22_im + ma##12_im * mb##T22_re;                \
00324     mc##22_re =                                                         \
00325         ma##20_re * mb##T02_re - ma##20_im * mb##T02_im +               \
00326         ma##21_re * mb##T12_re - ma##21_im * mb##T12_im +               \
00327         ma##22_re * mb##T22_re - ma##22_im * mb##T22_im;                \
00328     mc##22_im =                                                         \
00329         ma##20_re * mb##T02_im + ma##20_im * mb##T02_re +               \
00330         ma##21_re * mb##T12_im + ma##21_im * mb##T12_re +               \
00331         ma##22_re * mb##T22_im + ma##22_im * mb##T22_re;
00332 
00333 
00334 
00335 #define MULT_SU3_AN(ma, mb, mc)                                         \
00336     mc##00_re =                                                         \
00337         ma##T00_re * mb##00_re - ma##T00_im * mb##00_im +               \
00338         ma##T01_re * mb##10_re - ma##T01_im * mb##10_im +               \
00339         ma##T02_re * mb##20_re - ma##T02_im * mb##20_im;                \
00340     mc##00_im =                                                         \
00341         ma##T00_re * mb##00_im + ma##T00_im * mb##00_re +               \
00342         ma##T01_re * mb##10_im + ma##T01_im * mb##10_re +               \
00343         ma##T02_re * mb##20_im + ma##T02_im * mb##20_re;                \
00344     mc##10_re =                                                         \
00345         ma##T10_re * mb##00_re - ma##T10_im * mb##00_im +               \
00346         ma##T11_re * mb##10_re - ma##T11_im * mb##10_im +               \
00347         ma##T12_re * mb##20_re - ma##T12_im * mb##20_im;                \
00348     mc##10_im =                                                         \
00349         ma##T10_re * mb##00_im + ma##T10_im * mb##00_re +               \
00350         ma##T11_re * mb##10_im + ma##T11_im * mb##10_re +               \
00351         ma##T12_re * mb##20_im + ma##T12_im * mb##20_re;                \
00352     mc##20_re =                                                         \
00353         ma##T20_re * mb##00_re - ma##T20_im * mb##00_im +               \
00354         ma##T21_re * mb##10_re - ma##T21_im * mb##10_im +               \
00355         ma##T22_re * mb##20_re - ma##T22_im * mb##20_im;                \
00356     mc##20_im =                                                         \
00357         ma##T20_re * mb##00_im + ma##T20_im * mb##00_re +               \
00358         ma##T21_re * mb##10_im + ma##T21_im * mb##10_re +               \
00359         ma##T22_re * mb##20_im + ma##T22_im * mb##20_re;                \
00360     mc##01_re =                                                         \
00361         ma##T00_re * mb##01_re - ma##T00_im * mb##01_im +               \
00362         ma##T01_re * mb##11_re - ma##T01_im * mb##11_im +               \
00363         ma##T02_re * mb##21_re - ma##T02_im * mb##21_im;                \
00364     mc##01_im =                                                         \
00365         ma##T00_re * mb##01_im + ma##T00_im * mb##01_re +               \
00366         ma##T01_re * mb##11_im + ma##T01_im * mb##11_re +               \
00367         ma##T02_re * mb##21_im + ma##T02_im * mb##21_re;                \
00368     mc##11_re =                                                         \
00369         ma##T10_re * mb##01_re - ma##T10_im * mb##01_im +               \
00370         ma##T11_re * mb##11_re - ma##T11_im * mb##11_im +               \
00371         ma##T12_re * mb##21_re - ma##T12_im * mb##21_im;                \
00372     mc##11_im =                                                         \
00373         ma##T10_re * mb##01_im + ma##T10_im * mb##01_re +               \
00374         ma##T11_re * mb##11_im + ma##T11_im * mb##11_re +               \
00375         ma##T12_re * mb##21_im + ma##T12_im * mb##21_re;                \
00376     mc##21_re =                                                         \
00377         ma##T20_re * mb##01_re - ma##T20_im * mb##01_im +               \
00378         ma##T21_re * mb##11_re - ma##T21_im * mb##11_im +               \
00379         ma##T22_re * mb##21_re - ma##T22_im * mb##21_im;                \
00380     mc##21_im =                                                         \
00381         ma##T20_re * mb##01_im + ma##T20_im * mb##01_re +               \
00382         ma##T21_re * mb##11_im + ma##T21_im * mb##11_re +               \
00383         ma##T22_re * mb##21_im + ma##T22_im * mb##21_re;                \
00384     mc##02_re =                                                         \
00385         ma##T00_re * mb##02_re - ma##T00_im * mb##02_im +               \
00386         ma##T01_re * mb##12_re - ma##T01_im * mb##12_im +               \
00387         ma##T02_re * mb##22_re - ma##T02_im * mb##22_im;                \
00388     mc##02_im =                                                         \
00389         ma##T00_re * mb##02_im + ma##T00_im * mb##02_re +               \
00390         ma##T01_re * mb##12_im + ma##T01_im * mb##12_re +               \
00391         ma##T02_re * mb##22_im + ma##T02_im * mb##22_re;                \
00392     mc##12_re =                                                         \
00393         ma##T10_re * mb##02_re - ma##T10_im * mb##02_im +               \
00394         ma##T11_re * mb##12_re - ma##T11_im * mb##12_im +               \
00395         ma##T12_re * mb##22_re - ma##T12_im * mb##22_im;                \
00396     mc##12_im =                                                         \
00397         ma##T10_re * mb##02_im + ma##T10_im * mb##02_re +               \
00398         ma##T11_re * mb##12_im + ma##T11_im * mb##12_re +               \
00399         ma##T12_re * mb##22_im + ma##T12_im * mb##22_re;                \
00400     mc##22_re =                                                         \
00401         ma##T20_re * mb##02_re - ma##T20_im * mb##02_im +               \
00402         ma##T21_re * mb##12_re - ma##T21_im * mb##12_im +               \
00403         ma##T22_re * mb##22_re - ma##T22_im * mb##22_im;                \
00404     mc##22_im =                                                         \
00405         ma##T20_re * mb##02_im + ma##T20_im * mb##02_re +               \
00406         ma##T21_re * mb##12_im + ma##T21_im * mb##12_re +               \
00407         ma##T22_re * mb##22_im + ma##T22_im * mb##22_re;
00408 
00409 #define SET_SU3_MATRIX(a, value)                \
00410     a##00_re = value;                           \
00411     a##00_im = value;                           \
00412     a##01_re = value;                           \
00413     a##01_im = value;                           \
00414     a##02_re = value;                           \
00415     a##02_im = value;                           \
00416     a##10_re = value;                           \
00417     a##10_im = value;                           \
00418     a##11_re = value;                           \
00419     a##11_im = value;                           \
00420     a##12_re = value;                           \
00421     a##12_im = value;                           \
00422     a##20_re = value;                           \
00423     a##20_im = value;                           \
00424     a##21_re = value;                           \
00425     a##21_im = value;                           \
00426     a##22_re = value;                           \
00427     a##22_im = value;                           \
00428 
00429 #define SCALAR_MULT_ADD_SU3_MATRIX(ma, mb, s, mc)       \
00430     mc##00_re = ma##00_re + mb##00_re * s;              \
00431     mc##00_im = ma##00_im + mb##00_im * s;              \
00432     mc##01_re = ma##01_re + mb##01_re * s;              \
00433     mc##01_im = ma##01_im + mb##01_im * s;              \
00434     mc##02_re = ma##02_re + mb##02_re * s;              \
00435     mc##02_im = ma##02_im + mb##02_im * s;              \
00436     mc##10_re = ma##10_re + mb##10_re * s;              \
00437     mc##10_im = ma##10_im + mb##10_im * s;              \
00438     mc##11_re = ma##11_re + mb##11_re * s;              \
00439     mc##11_im = ma##11_im + mb##11_im * s;              \
00440     mc##12_re = ma##12_re + mb##12_re * s;              \
00441     mc##12_im = ma##12_im + mb##12_im * s;              \
00442     mc##20_re = ma##20_re + mb##20_re * s;              \
00443     mc##20_im = ma##20_im + mb##20_im * s;              \
00444     mc##21_re = ma##21_re + mb##21_re * s;              \
00445     mc##21_im = ma##21_im + mb##21_im * s;              \
00446     mc##22_re = ma##22_re + mb##22_re * s;              \
00447     mc##22_im = ma##22_im + mb##22_im * s;              
00448 
00449 #define SCALAR_MULT_SUB_SU3_MATRIX(ma, mb, s, mc)       \
00450     mc##00_re = ma##00_re - mb##00_re * s;              \
00451     mc##00_im = ma##00_im - mb##00_im * s;              \
00452     mc##01_re = ma##01_re - mb##01_re * s;              \
00453     mc##01_im = ma##01_im - mb##01_im * s;              \
00454     mc##02_re = ma##02_re - mb##02_re * s;              \
00455     mc##02_im = ma##02_im - mb##02_im * s;              \
00456     mc##10_re = ma##10_re - mb##10_re * s;              \
00457     mc##10_im = ma##10_im - mb##10_im * s;              \
00458     mc##11_re = ma##11_re - mb##11_re * s;              \
00459     mc##11_im = ma##11_im - mb##11_im * s;              \
00460     mc##12_re = ma##12_re - mb##12_re * s;              \
00461     mc##12_im = ma##12_im - mb##12_im * s;              \
00462     mc##20_re = ma##20_re - mb##20_re * s;              \
00463     mc##20_im = ma##20_im - mb##20_im * s;              \
00464     mc##21_re = ma##21_re - mb##21_re * s;              \
00465     mc##21_im = ma##21_im - mb##21_im * s;              \
00466     mc##22_re = ma##22_re - mb##22_re * s;              \
00467     mc##22_im = ma##22_im - mb##22_im * s;              
00468 
00469 
00470 #define ah01_re AH0.x
00471 #define ah01_im AH0.y
00472 #define ah02_re AH1.x
00473 #define ah02_im AH1.y
00474 #define ah12_re AH2.x
00475 #define ah12_im AH2.y
00476 #define ah00_im AH3.x
00477 #define ah11_im AH3.y
00478 #define ah22_im AH4.x
00479 #define ahspace AH4.y
00480 
00481 #define UNCOMPRESS_ANTI_HERMITIAN(ah, m)        \
00482     m##00_re = 0;                               \
00483     m##00_im = ah##00_im;                       \
00484     m##11_re = 0;                               \
00485     m##11_im = ah##11_im;                       \
00486     m##22_re = 0;                               \
00487     m##22_im = ah##22_im;                       \
00488     m##01_re = ah##01_re;                       \
00489     m##01_im = ah##01_im;                       \
00490     m##10_re = -ah##01_re;                      \
00491     m##10_im = ah##01_im;                       \
00492     m##02_re = ah##02_re;                       \
00493     m##02_im = ah##02_im;                       \
00494     m##20_re = -ah##02_re;                      \
00495     m##20_im = ah##02_im;                       \
00496     m##12_re = ah##12_re;                       \
00497     m##12_im = ah##12_im;                       \
00498     m##21_re = -ah##12_re;                      \
00499     m##21_im = ah##12_im;
00500 
00501 
00502 #define MAKE_ANTI_HERMITIAN(m, ah) do {                                 \
00503         typeof(ah##space) temp;                                         \
00504         temp = (m##00_im + m##11_im + m##22_im)*0.33333333333333333;    \
00505         ah##00_im  = (m##00_im - temp);                                 \
00506         ah##11_im  = (m##11_im - temp);                                 \
00507         ah##22_im  = (m##22_im - temp);                                 \
00508         ah##01_re = (m##01_re - m##10_re)*0.5;                          \
00509         ah##02_re = (m##02_re - m##20_re)*0.5;                          \
00510         ah##12_re = (m##12_re - m##21_re)*0.5;                          \
00511         ah##01_im = (m##01_im + m##10_im)*0.5;                          \
00512         ah##02_im = (m##02_im + m##20_im)*0.5;                          \
00513         ah##12_im = (m##12_im + m##21_im)*0.5;                          \
00514         ah##space = 0;                                                  \
00515     }while(0)                                           
00516 
00517 
00518 #define LOAD_ANTI_HERMITIAN_SINGLE(src, dir, idx, var) do{              \
00519         int start_pos = idx + dir*Vhx5;                                 \
00520         var##0 = src[start_pos];                                        \
00521         var##1 = src[start_pos + Vh];                                   \
00522         var##2 = src[start_pos + Vhx2];                                 \
00523         var##3 = src[start_pos + Vhx3];                                 \
00524         var##4 = src[start_pos + Vhx4];                                 \
00525     }while(0)
00526 
00527 #define LOAD_ANTI_HERMITIAN_SINGLE_TEX(src, dir, idx, var) do{          \
00528         int start_pos = idx + dir*Vhx5;                                 \
00529         var##0 = tex1Dfetch(src, start_pos);                            \
00530         var##1 = tex1Dfetch(src, start_pos + Vh);                       \
00531         var##2 = tex1Dfetch(src, start_pos + Vhx2);                     \
00532         var##3 = tex1Dfetch(src, start_pos + Vhx3);                     \
00533         var##4 = tex1Dfetch(src, start_pos + Vhx4);                     \
00534     }while(0)
00535 
00536 #define WRITE_ANTI_HERMITIAN_SINGLE(mem, dir, idx, var) do{     \
00537         int start_ps = idx + dir*Vhx5;                          \
00538         mem[start_ps] = var##0;                                 \
00539         mem[start_ps + Vh] = var##1;                            \
00540         mem[start_ps + Vhx2] = var##2;                          \
00541         mem[start_ps + Vhx3] = var##3;                          \
00542         mem[start_ps + Vhx4] = var##4;                          \
00543     }while(0)
00544 
00545 #define WRITE_ANTI_HERMITIAN_SINGLE_A(mem, dir, idx, var) do{           \
00546         int start_ps = idx + dir*Vhx5;                                  \
00547         mem[start_ps] = (float2){1,1};                                  \
00548         mem[start_ps + Vh] = (float2){2,2};                             \
00549         mem[start_ps + Vhx2] = (float2){3,3};                           \
00550         mem[start_ps + Vhx3] = (float2){4,4};                           \
00551         mem[start_ps + Vhx4] = (float2){5,5};                           \
00552     }while(0)
00553 
00554 
00555 #define COPY_SU3_MATRIX(a, b)           \
00556     b##00_re = a##00_re;                \
00557     b##00_im = a##00_im;                \
00558     b##01_re = a##01_re;                \
00559     b##01_im = a##01_im;                \
00560     b##02_re = a##02_re;                \
00561     b##02_im = a##02_im;                \
00562     b##10_re = a##10_re;                \
00563     b##10_im = a##10_im;                \
00564     b##11_re = a##11_re;                \
00565     b##11_im = a##11_im;                \
00566     b##12_re = a##12_re;                \
00567     b##12_im = a##12_im;                \
00568     b##20_re = a##20_re;                \
00569     b##20_im = a##20_im;                \
00570     b##21_re = a##21_re;                \
00571     b##21_im = a##21_im;                \
00572     b##22_re = a##22_re;                \
00573     b##22_im = a##22_im;                
00574 
00575 #define SU3_ADJOINT(a, b)               \
00576     b##00_re = a##00_re;                \
00577     b##00_im = - a##00_im;              \
00578     b##01_re = a##10_re;                \
00579     b##01_im = - a##10_im;              \
00580     b##02_re = a##20_re;                \
00581     b##02_im = - a##20_im;              \
00582     b##10_re = a##01_re;                \
00583     b##10_im = - a##01_im;              \
00584     b##11_re = a##11_re;                \
00585     b##11_im = - a##11_im;              \
00586     b##12_re = a##21_re;                \
00587     b##12_im = - a##21_im;              \
00588     b##20_re = a##02_re;                \
00589     b##20_im = - a##02_im;              \
00590     b##21_re = a##12_re;                \
00591     b##21_im = - a##12_im;              \
00592     b##22_re = a##22_re;                \
00593     b##22_im = - a##22_im;              
00594 
00595 #define SET_UNIT_SU3_MATRIX(a)                  \
00596     a##00_re = 1.0;                             \
00597     a##00_im = 0;                               \
00598     a##01_re = 0;                               \
00599     a##01_im = 0;                               \
00600     a##02_re = 0;                               \
00601     a##02_im = 0;                               \
00602     a##10_re = 0;                               \
00603     a##10_im = 0;                               \
00604     a##11_re = 1.0;                             \
00605     a##11_im = 0;                               \
00606     a##12_re = 0;                               \
00607     a##12_im = 0;                               \
00608     a##20_re = 0;                               \
00609     a##20_im = 0;                               \
00610     a##21_re = 0;                               \
00611     a##21_im = 0;                               \
00612     a##22_re = 1.0;                             \
00613     a##22_im = 0;                               
00614 
00615 // Performs the complex conjugated accumulation: a = b* c*
00616 #define ACC_CONJ_PROD_ASSIGN(a, b, c)           \
00617   a##_re = b##_re * c##_re;                     \
00618   a##_re -= b##_im * c##_im;                    \
00619   a##_im = - b##_re * c##_im;                   \
00620   a##_im -= b##_im * c##_re
00621 
00622 
00623 #define RECONSTRUCT_LINK_12(dir, idx, sign, var)                        \
00624     ACC_CONJ_PROD_ASSIGN(var##20, +var##01, +var##12);                  \
00625     ACC_CONJ_PROD(var##20, -var##02, +var##11);                         \
00626     ACC_CONJ_PROD_ASSIGN(var##21, +var##02, +var##10);                  \
00627     ACC_CONJ_PROD(var##21, -var##00, +var##12);                         \
00628     ACC_CONJ_PROD_ASSIGN(var##22, +var##00, +var##11);                  \
00629     ACC_CONJ_PROD(var##22, -var##01, +var##10);                         \
00630     var##20_re *=sign;var##20_im *=sign; var##21_re *=sign; var##21_im *=sign; \
00631     var##22_re *=sign;var##22_im *=sign;
00632 
00633 #define COMPUTE_NEW_IDX_PLUS(mydir, idx) do {                           \
00634         switch(mydir){                                                  \
00635         case 0:                                                         \
00636             new_mem_idx = ( (x1==X1m1)?idx-X1m1:idx+1)>> 1;             \
00637             break;                                                      \
00638         case 1:                                                         \
00639             new_mem_idx = ( (x2==X2m1)?idx-X2X1mX1:idx+X1) >> 1;        \
00640             break;                                                      \
00641         case 2:                                                         \
00642             new_mem_idx = ( (x3==X3m1)?idx-X3X2X1mX2X1:idx+X2X1) >> 1;  \
00643             break;                                                      \
00644         case 3:                                                         \
00645             new_mem_idx = ( (x4==X4m1)?idx-X4X3X2X1mX3X2X1:idx+X3X2X1) >> 1; \
00646             break;                                                      \
00647         }                                                               \
00648     }while(0)
00649 
00650 #define COMPUTE_NEW_IDX_MINUS(mydir, idx) do {                          \
00651         switch(mydir){                                                  \
00652         case 0:                                                         \
00653             new_mem_idx = ( (x1==0)?idx+X1m1:X-1);                      \
00654             break;                                                      \
00655         case 1:                                                         \
00656             new_mem_idx = ( (x2==0)?idx+X2X1mX1:X-X1);                  \
00657             break;                                                      \
00658         case 2:                                                         \
00659             new_mem_idx = ( (x3==0)?idx+X3X2X1mX2X1:X-X2X1);            \
00660             break;                                                      \
00661         case 3:                                                         \
00662             new_mem_idx = ( (x4==0)?idx+X4X3X2X1mX3X2X1:X-X3X2X1);      \
00663             break;                                                      \
00664         }                                                               \
00665     }while(0)
00666 
00667 
00668 #define COMPUTE_NEW_FULL_IDX_PLUS(mydir, idx) do {                      \
00669         switch(mydir){                                                  \
00670         case 0:                                                         \
00671             new_mem_idx = ( (x1==X1m1)?idx-X1m1:idx+1);                 \
00672             break;                                                      \
00673         case 1:                                                         \
00674             new_mem_idx = ( (x2==X2m1)?idx-X2X1mX1:idx+X1);             \
00675             break;                                                      \
00676         case 2:                                                         \
00677             new_mem_idx = ( (x3==X3m1)?idx-X3X2X1mX2X1:idx+X2X1);       \
00678             break;                                                      \
00679         case 3:                                                         \
00680             new_mem_idx = ( (x4==X4m1)?idx-X4X3X2X1mX3X2X1:idx+X3X2X1); \
00681             break;                                                      \
00682         }                                                               \
00683     }while(0)
00684     
00685 #define COMPUTE_NEW_FULL_IDX_MINUS(mydir, idx) do {                     \
00686         switch(mydir){                                                  \
00687         case 0:                                                         \
00688             new_mem_idx = ( (x1==0)?idx+X1m1:X-1);                      \
00689             break;                                                      \
00690         case 1:                                                         \
00691             new_mem_idx = ( (x2==0)?idx+X2X1mX1:X-X1);                  \
00692             break;                                                      \
00693         case 2:                                                         \
00694             new_mem_idx = ( (x3==0)?idx+X3X2X1mX2X1:X-X2X1);            \
00695             break;                                                      \
00696         case 3:                                                         \
00697             new_mem_idx = ( (x4==0)?idx+X4X3X2X1mX3X2X1:X-X3X2X1);      \
00698             break;                                                      \
00699         }                                                               \
00700     }while(0)
00701 
00702 
00703 #endif
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Defines