Go to the documentation of this file. 4 #define COMPLEX_ADD_TO(a, b) \ 8 #define COMPLEX_PRODUCT(a, b, c) \ 9 a##_re = b##_re*c##_re; \ 10 a##_re -= b##_im*c##_im; \ 11 a##_im = b##_re*c##_im; \ 12 a##_im += b##_im*c##_re 14 #define COMPLEX_CONJUGATE_PRODUCT(a, b, c) \ 15 a##_re = b##_re*c##_re; \ 16 a##_re -= b##_im*c##_im; \ 17 a##_im = -b##_re*c##_im; \ 18 a##_im -= b##_im*c##_re 21 #define COMPLEX_DOT_PRODUCT(a, b, c) \ 22 a##_re = b##_re*c##_re; \ 23 a##_re += b##_im*c##_im; \ 24 a##_im = b##_re*c##_im; \ 25 a##_im -= b##_im*c##_re 28 #define COMPLEX_NORM(a, b) \ 32 #define ACC_COMPLEX_PROD(a, b, c) \ 33 a##_re += b##_re*c##_re; \ 34 a##_re -= b##_im*c##_im; \ 35 a##_im += b##_re*c##_im; \ 36 a##_im += b##_im*c##_re 39 #define ACC_CONJ_PROD(a, b, c) \ 40 a##_re += b##_re * c##_re; \ 41 a##_re -= b##_im * c##_im; \ 42 a##_im -= b##_re * c##_im; \ 43 a##_im -= b##_im * c##_re 45 #define READ_GAUGE_MATRIX_18_FLOAT2_TEX(G, gauge, dir, idx, stride) \ 46 float2 G##0 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+0)*stride); \ 47 float2 G##1 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+1)*stride); \ 48 float2 G##2 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+2)*stride); \ 49 float2 G##3 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+3)*stride); \ 50 float2 G##4 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+4)*stride); \ 51 float2 G##5 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+5)*stride); \ 52 float2 G##6 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+6)*stride); \ 53 float2 G##7 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+7)*stride); \ 54 float2 G##8 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+8)*stride); \ 56 #define READ_GAUGE_MATRIX_18_SHORT2_TEX(G, gauge, dir, idx, stride) \ 57 float2 G##0 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+0)*stride); \ 58 float2 G##1 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+1)*stride); \ 59 float2 G##2 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+2)*stride); \ 60 float2 G##3 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+3)*stride); \ 61 float2 G##4 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+4)*stride); \ 62 float2 G##5 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+5)*stride); \ 63 float2 G##6 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+6)*stride); \ 64 float2 G##7 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+7)*stride); \ 65 float2 G##8 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+8)*stride); \ 67 #define READ_GAUGE_MATRIX_12_FLOAT4_TEX(G, gauge, dir, idx, stride) \ 68 float4 G##0 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*3+0)*stride); \ 69 float4 G##1 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*3+1)*stride); \ 70 float4 G##2 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*3+2)*stride); \ 71 float4 G##3 = make_float4(0,0,0,0); \ 72 float4 G##4 = make_float4(0,0,0,0); 74 #define READ_GAUGE_MATRIX_12_SHORT4_TEX(G, gauge, dir, idx, stride) \ 75 float4 G##0 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*3+0)*stride); \ 76 float4 G##1 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*3+1)*stride); \ 77 float4 G##2 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*3+2)*stride); \ 78 float4 G##3 = make_float4(0,0,0,0); \ 79 float4 G##4 = make_float4(0,0,0,0); 82 #define READ_GAUGE_MATRIX_8_FLOAT4_TEX(G, gauge, dir, idx, stride) \ 83 float4 G##0 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*2+0)*stride); \ 84 float4 G##1 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*2+1)*stride); \ 85 float4 G##2 = make_float4(0,0,0,0); \ 86 float4 G##3 = make_float4(0,0,0,0); \ 87 float4 G##4 = make_float4(0,0,0,0); \ 88 (G##3).z = (G##0).x; \ 91 #define READ_GAUGE_MATRIX_8_SHORT4_TEX(G, gauge, dir, idx, stride) \ 92 float4 G##0 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*2+0)*stride); \ 93 float4 G##1 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*2+1)*stride); \ 94 float4 G##2 = make_float4(0,0,0,0); \ 95 float4 G##3 = make_float4(0,0,0,0); \ 96 float4 G##4 = make_float4(0,0,0,0); \ 97 (G##3).z = (G##0).x = M_PI*(G##0).x; \ 98 (G##3).w = (G##0).y = M_PI*(G##0).y; 100 #define READ_GAUGE_MATRIX_18_DOUBLE2(G, gauge_, dir, idx, stride) \ 101 double2 *gauge = (double2*)gauge_; \ 102 double2 G##0 = gauge[idx + ((dir/2)*9+0)*stride]; \ 103 double2 G##1 = gauge[idx + ((dir/2)*9+1)*stride]; \ 104 double2 G##2 = gauge[idx + ((dir/2)*9+2)*stride]; \ 105 double2 G##3 = gauge[idx + ((dir/2)*9+3)*stride]; \ 106 double2 G##4 = gauge[idx + ((dir/2)*9+4)*stride]; \ 107 double2 G##5 = gauge[idx + ((dir/2)*9+5)*stride]; \ 108 double2 G##6 = gauge[idx + ((dir/2)*9+6)*stride]; \ 109 double2 G##7 = gauge[idx + ((dir/2)*9+7)*stride]; \ 110 double2 G##8 = gauge[idx + ((dir/2)*9+8)*stride]; \ 112 #define READ_GAUGE_MATRIX_18_FLOAT2(G, gauge_, dir, idx, stride) \ 113 float2 *gauge = (float2*)gauge_; \ 114 float2 G##0 = ((float2*)gauge)[idx + ((dir/2)*9+0)*stride]; \ 115 float2 G##1 = ((float2*)gauge)[idx + ((dir/2)*9+1)*stride]; \ 116 float2 G##2 = ((float2*)gauge)[idx + ((dir/2)*9+2)*stride]; \ 117 float2 G##3 = ((float2*)gauge)[idx + ((dir/2)*9+3)*stride]; \ 118 float2 G##4 = ((float2*)gauge)[idx + ((dir/2)*9+4)*stride]; \ 119 float2 G##5 = ((float2*)gauge)[idx + ((dir/2)*9+5)*stride]; \ 120 float2 G##6 = ((float2*)gauge)[idx + ((dir/2)*9+6)*stride]; \ 121 float2 G##7 = ((float2*)gauge)[idx + ((dir/2)*9+7)*stride]; \ 122 float2 G##8 = ((float2*)gauge)[idx + ((dir/2)*9+8)*stride]; \ 124 #define READ_GAUGE_MATRIX_18_SHORT2(G, gauge_, dir, idx, stride) \ 125 short2 *gauge = (short2*)gauge_; \ 126 float2 G##0 = short22float2(((short2*)gauge)[idx + ((dir/2)*9+0)*stride]); \ 127 float2 G##1 = short22float2(((short2*)gauge)[idx + ((dir/2)*9+1)*stride]); \ 128 float2 G##2 = short22float2(((short2*)gauge)[idx + ((dir/2)*9+2)*stride]); \ 129 float2 G##3 = short22float2(((short2*)gauge)[idx + ((dir/2)*9+3)*stride]); \ 130 float2 G##4 = short22float2(((short2*)gauge)[idx + ((dir/2)*9+4)*stride]); \ 131 float2 G##5 = short22float2(((short2*)gauge)[idx + ((dir/2)*9+5)*stride]); \ 132 float2 G##6 = short22float2(((short2*)gauge)[idx + ((dir/2)*9+6)*stride]); \ 133 float2 G##7 = short22float2(((short2*)gauge)[idx + ((dir/2)*9+7)*stride]); \ 134 float2 G##8 = short22float2(((short2*)gauge)[idx + ((dir/2)*9+8)*stride]); \ 136 #define READ_GAUGE_MATRIX_12_DOUBLE2(G, gauge_, dir, idx, stride) \ 137 double2 *gauge = (double2*)gauge_; \ 138 double2 G##0 = gauge[idx + ((dir/2)*6+0)*stride]; \ 139 double2 G##1 = gauge[idx + ((dir/2)*6+1)*stride]; \ 140 double2 G##2 = gauge[idx + ((dir/2)*6+2)*stride]; \ 141 double2 G##3 = gauge[idx + ((dir/2)*6+3)*stride]; \ 142 double2 G##4 = gauge[idx + ((dir/2)*6+4)*stride]; \ 143 double2 G##5 = gauge[idx + ((dir/2)*6+5)*stride]; \ 144 double2 G##6 = make_double2(0,0); \ 145 double2 G##7 = make_double2(0,0); \ 146 double2 G##8 = make_double2(0,0); 148 #define READ_GAUGE_MATRIX_12_FLOAT4(G, gauge_, dir, idx, stride)\ 149 float4 *gauge = (float4*)gauge_; \ 150 float4 G##0 = gauge[idx + ((dir/2)*3+0)*stride]; \ 151 float4 G##1 = gauge[idx + ((dir/2)*3+1)*stride]; \ 152 float4 G##2 = gauge[idx + ((dir/2)*3+2)*stride]; \ 153 float4 G##3 = make_float4(0,0,0,0); \ 154 float4 G##4 = make_float4(0,0,0,0); 156 #define READ_GAUGE_MATRIX_12_SHORT4(G, gauge_, dir, idx, stride) \ 157 short4 *gauge = (short4*)gauge_; \ 158 float4 G##0 = short42float4(gauge[idx + ((dir/2)*3+0)*stride]); \ 159 float4 G##1 = short42float4(gauge[idx + ((dir/2)*3+1)*stride]); \ 160 float4 G##2 = short42float4(gauge[idx + ((dir/2)*3+2)*stride]); \ 161 float4 G##3 = make_float4(0,0,0,0); \ 162 float4 G##4 = make_float4(0,0,0,0); 165 #define READ_GAUGE_MATRIX_8_DOUBLE2(G, gauge_, dir, idx, stride) \ 166 double2 *gauge = (double2*)gauge_; \ 167 double2 G##0 = gauge[idx + ((dir/2)*4+0)*stride]; \ 168 double2 G##1 = gauge[idx + ((dir/2)*4+1)*stride]; \ 169 double2 G##2 = gauge[idx + ((dir/2)*4+2)*stride]; \ 170 double2 G##3 = gauge[idx + ((dir/2)*4+3)*stride]; \ 171 double2 G##4 = make_double2(0,0); \ 172 double2 G##5 = make_double2(0,0); \ 173 double2 G##6 = make_double2(0,0); \ 174 double2 G##7 = make_double2(0,0); \ 175 double2 G##8 = make_double2(0,0); \ 176 (G##7).x = (G##0).x; \ 180 #define READ_GAUGE_MATRIX_8_FLOAT4(G, gauge_, dir, idx, stride) \ 181 float4 *gauge = (float4*)gauge_; \ 182 float4 G##0 = gauge[idx + ((dir/2)*2+0)*stride]; \ 183 float4 G##1 = gauge[idx + ((dir/2)*2+1)*stride]; \ 184 float4 G##2 = make_float4(0,0,0,0); \ 185 float4 G##3 = make_float4(0,0,0,0); \ 186 float4 G##4 = make_float4(0,0,0,0); \ 187 (G##3).z = (G##0).x; \ 190 #define READ_GAUGE_MATRIX_8_SHORT4(G, gauge_, dir, idx, stride) \ 191 short4 *gauge = (short4*)gauge_; \ 192 float4 G##0 = short42float4(gauge[idx + ((dir/2)*2+0)*stride]); \ 193 float4 G##1 = short42float4(gauge[idx + ((dir/2)*2+1)*stride]); \ 194 float4 G##2 = make_float4(0,0,0,0); \ 195 float4 G##3 = make_float4(0,0,0,0); \ 196 float4 G##4 = make_float4(0,0,0,0); \ 197 (G##3).z = (G##0).x = M_PI*(G##0).x; \ 198 (G##3).w = (G##0).y = M_PI*(G##0).y; 201 #define READ_GAUGE_PHASE_DOUBLE(P, phase, dir, idx, stride){ \ 202 P = 2.*M_PI*((double*)phase)[idx + (dir/2)*stride]; \ 205 #define READ_GAUGE_PHASE_FLOAT(P, phase, dir, idx, stride){ \ 206 P = 2.f*M_PI*((float*)phase)[idx + (dir/2)*stride]; \ 209 #define READ_GAUGE_PHASE_SHORT(P, phase, dir, idx, stride){ \ 210 P = 2.f*M_PI*short2float(((short*)phase)[idx + (dir/2)*stride]); \ 215 #define ASSN_GAUGE_MATRIX_18_FLOAT2_TEX(G, gauge, dir, idx, stride) \ 216 G##0 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+0)*stride); \ 217 G##1 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+1)*stride); \ 218 G##2 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+2)*stride); \ 219 G##3 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+3)*stride); \ 220 G##4 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+4)*stride); \ 221 G##5 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+5)*stride); \ 222 G##6 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+6)*stride); \ 223 G##7 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+7)*stride); \ 224 G##8 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+8)*stride); \ 226 #define ASSN_GAUGE_MATRIX_18_SHORT2_TEX(G, gauge, dir, idx, stride) \ 227 G##0 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+0)*stride); \ 228 G##1 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+1)*stride); \ 229 G##2 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+2)*stride); \ 230 G##3 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+3)*stride); \ 231 G##4 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+4)*stride); \ 232 G##5 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+5)*stride); \ 233 G##6 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+6)*stride); \ 234 G##7 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+7)*stride); \ 235 G##8 = TEX1DFETCH(float2, (gauge), idx + ((dir/2)*9+8)*stride); \ 237 #define ASSN_GAUGE_MATRIX_12_FLOAT4_TEX(G, gauge, dir, idx, stride) \ 238 G##0 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*3+0)*stride); \ 239 G##1 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*3+1)*stride); \ 240 G##2 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*3+2)*stride); \ 241 G##3 = make_float4(0,0,0,0); \ 242 G##4 = make_float4(0,0,0,0); 244 #define ASSN_GAUGE_MATRIX_12_SHORT4_TEX(G, gauge, dir, idx, stride) \ 245 G##0 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*3+0)*stride); \ 246 G##1 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*3+1)*stride); \ 247 G##2 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*3+2)*stride); \ 248 G##3 = make_float4(0,0,0,0); \ 249 G##4 = make_float4(0,0,0,0); 252 #define ASSN_GAUGE_MATRIX_8_FLOAT4_TEX(G, gauge, dir, idx, stride) \ 253 G##0 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*2+0)*stride); \ 254 G##1 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*2+1)*stride); \ 255 G##2 = make_float4(0,0,0,0); \ 256 G##3 = make_float4(0,0,0,0); \ 257 G##4 = make_float4(0,0,0,0); \ 258 (G##3).z = (G##0).x; \ 261 #define ASSN_GAUGE_MATRIX_8_SHORT4_TEX(G, gauge, dir, idx, stride) \ 262 G##0 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*2+0)*stride); \ 263 G##1 = TEX1DFETCH(float4, (gauge), idx + ((dir/2)*2+1)*stride); \ 264 G##2 = make_float4(0,0,0,0); \ 265 G##3 = make_float4(0,0,0,0); \ 266 G##4 = make_float4(0,0,0,0); \ 267 (G##3).z = (G##0).x = M_PI*(G##0).x; \ 268 (G##3).w = (G##0).y = M_PI*(G##0).y; 270 #define ASSN_GAUGE_MATRIX_18_DOUBLE2(G, gauge_, dir, idx, stride) \ 271 double2 *gauge = (double2*)gauge_; \ 272 G##0 = gauge[idx + ((dir/2)*9+0)*stride]; \ 273 G##1 = gauge[idx + ((dir/2)*9+1)*stride]; \ 274 G##2 = gauge[idx + ((dir/2)*9+2)*stride]; \ 275 G##3 = gauge[idx + ((dir/2)*9+3)*stride]; \ 276 G##4 = gauge[idx + ((dir/2)*9+4)*stride]; \ 277 G##5 = gauge[idx + ((dir/2)*9+5)*stride]; \ 278 G##6 = gauge[idx + ((dir/2)*9+6)*stride]; \ 279 G##7 = gauge[idx + ((dir/2)*9+7)*stride]; \ 280 G##8 = gauge[idx + ((dir/2)*9+8)*stride]; \ 282 #define ASSN_GAUGE_MATRIX_18_FLOAT2(G, gauge_, dir, idx, stride) \ 283 float2 *gauge = (float2*)gauge_; \ 284 G##0 = ((float2*)gauge)[idx + ((dir/2)*9+0)*stride]; \ 285 G##1 = ((float2*)gauge)[idx + ((dir/2)*9+1)*stride]; \ 286 G##2 = ((float2*)gauge)[idx + ((dir/2)*9+2)*stride]; \ 287 G##3 = ((float2*)gauge)[idx + ((dir/2)*9+3)*stride]; \ 288 G##4 = ((float2*)gauge)[idx + ((dir/2)*9+4)*stride]; \ 289 G##5 = ((float2*)gauge)[idx + ((dir/2)*9+5)*stride]; \ 290 G##6 = ((float2*)gauge)[idx + ((dir/2)*9+6)*stride]; \ 291 G##7 = ((float2*)gauge)[idx + ((dir/2)*9+7)*stride]; \ 292 G##8 = ((float2*)gauge)[idx + ((dir/2)*9+8)*stride]; \ 294 #define ASSN_GAUGE_MATRIX_18_SHORT2(G, gauge_, dir, idx, stride) \ 295 short2 *gauge = (short2*)gauge_; \ 296 G##0 = short22float2(((short2*)gauge)[idx + ((dir/2)*9+0)*stride]); \ 297 G##1 = short22float2(((short2*)gauge)[idx + ((dir/2)*9+1)*stride]); \ 298 G##2 = short22float2(((short2*)gauge)[idx + ((dir/2)*9+2)*stride]); \ 299 G##3 = short22float2(((short2*)gauge)[idx + ((dir/2)*9+3)*stride]); \ 300 G##4 = short22float2(((short2*)gauge)[idx + ((dir/2)*9+4)*stride]); \ 301 G##5 = short22float2(((short2*)gauge)[idx + ((dir/2)*9+5)*stride]); \ 302 G##6 = short22float2(((short2*)gauge)[idx + ((dir/2)*9+6)*stride]); \ 303 G##7 = short22float2(((short2*)gauge)[idx + ((dir/2)*9+7)*stride]); \ 304 G##8 = short22float2(((short2*)gauge)[idx + ((dir/2)*9+8)*stride]); \ 306 #define ASSN_GAUGE_MATRIX_12_DOUBLE2(G, gauge_, dir, idx, stride) \ 307 double2 *gauge = (double2*)gauge_; \ 308 G##0 = gauge[idx + ((dir/2)*6+0)*stride]; \ 309 G##1 = gauge[idx + ((dir/2)*6+1)*stride]; \ 310 G##2 = gauge[idx + ((dir/2)*6+2)*stride]; \ 311 G##3 = gauge[idx + ((dir/2)*6+3)*stride]; \ 312 G##4 = gauge[idx + ((dir/2)*6+4)*stride]; \ 313 G##5 = gauge[idx + ((dir/2)*6+5)*stride]; \ 314 G##6 = make_double2(0,0); \ 315 G##7 = make_double2(0,0); \ 316 G##8 = make_double2(0,0); 318 #define ASSN_GAUGE_MATRIX_12_FLOAT4(G, gauge_, dir, idx, stride) \ 319 float4 *gauge = (float4*)gauge_; \ 320 G##0 = gauge[idx + ((dir/2)*3+0)*stride]; \ 321 G##1 = gauge[idx + ((dir/2)*3+1)*stride]; \ 322 G##2 = gauge[idx + ((dir/2)*3+2)*stride]; \ 323 G##3 = make_float4(0,0,0,0); \ 324 G##4 = make_float4(0,0,0,0); 326 #define ASSN_GAUGE_MATRIX_12_SHORT4(G, gauge_, dir, idx, stride) \ 327 short4 *gauge = (short4*)gauge_; \ 328 G##0 = short42float4(gauge[idx + ((dir/2)*3+0)*stride]); \ 329 G##1 = short42float4(gauge[idx + ((dir/2)*3+1)*stride]); \ 330 G##2 = short42float4(gauge[idx + ((dir/2)*3+2)*stride]); \ 331 G##3 = make_float4(0,0,0,0); \ 332 G##4 = make_float4(0,0,0,0); 335 #define ASSN_GAUGE_MATRIX_8_DOUBLE2(G, gauge_, dir, idx, stride) \ 336 double2 *gauge = (double2*)gauge_; \ 337 G##0 = gauge[idx + ((dir/2)*4+0)*stride]; \ 338 G##1 = gauge[idx + ((dir/2)*4+1)*stride]; \ 339 G##2 = gauge[idx + ((dir/2)*4+2)*stride]; \ 340 G##3 = gauge[idx + ((dir/2)*4+3)*stride]; \ 341 G##4 = make_double2(0,0); \ 342 G##5 = make_double2(0,0); \ 343 G##6 = make_double2(0,0); \ 344 G##7 = make_double2(0,0); \ 345 G##8 = make_double2(0,0); \ 346 (G##7).x = (G##0).x; \ 350 #define ASSN_GAUGE_MATRIX_8_FLOAT4(G, gauge_, dir, idx, stride) \ 351 float4 *gauge = (float4*)gauge_; \ 352 G##0 = gauge[idx + ((dir/2)*2+0)*stride]; \ 353 G##1 = gauge[idx + ((dir/2)*2+1)*stride]; \ 354 G##2 = make_float4(0,0,0,0); \ 355 G##3 = make_float4(0,0,0,0); \ 356 G##4 = make_float4(0,0,0,0); \ 357 (G##3).z = (G##0).x; \ 360 #define ASSN_GAUGE_MATRIX_8_SHORT4(G, gauge_, dir, idx, stride) \ 361 short4 *gauge = (short4*)gauge_; \ 362 G##0 = short42float4(gauge[idx + ((dir/2)*2+0)*stride]); \ 363 G##1 = short42float4(gauge[idx + ((dir/2)*2+1)*stride]); \ 364 G##2 = make_float4(0,0,0,0); \ 365 G##3 = make_float4(0,0,0,0); \ 366 G##4 = make_float4(0,0,0,0); \ 367 (G##3).z = (G##0).x = M_PI*(G##0).x; \ 368 (G##3).w = (G##0).y = M_PI*(G##0).y; 373 #define RESCALE2(G, max) \ 374 (G##0).x *= max; (G##0).y *= max; (G##1).x *= max; (G##1).y *= max; \ 375 (G##2).x *= max; (G##2).y *= max; (G##3).x *= max; (G##3).y *= max; \ 376 (G##4).x *= max; (G##4).y *= max; (G##5).x *= max; (G##5).y *= max; \ 377 (G##6).x *= max; (G##6).y *= max; (G##7).x *= max; (G##7).y *= max; \ 378 (G##8).x *= max; (G##8).y *= max; 380 #define RESCALE4(G, max) \ 381 (G##0).x *= max; (G##0).y *= max; (G##0).z *= max; (G##0).w *= max; \ 382 (G##1).x *= max; (G##1).y *= max; (G##1).z *= max; (G##1).w *= max; \ 383 (G##2).x *= max; (G##2).y *= max; (G##2).z *= max; (G##2).w *= max; \ 384 (G##3).x *= max; (G##3).y *= max; (G##3).z *= max; (G##3).w *= max; \ 385 (G##4).x *= max; (G##4).y *= max; 392 #define RECONSTRUCT_MATRIX_18_DOUBLE(dir) 393 #define RECONSTRUCT_MATRIX_18_SINGLE(dir) 396 #define do_boundary ga_idx >= param.dc.X4X3X2X1hmX3X2X1h 398 #define do_boundary ( (param.Pt0 && (ga_idx >= param.dc.Vh)) || ( param.PtNm1 && (ga_idx >= param.dc.X4X3X2X1hmX3X2X1h) && (ga_idx < param.dc.Vh) ) ) 401 #define RECONSTRUCT_MATRIX_12_DOUBLE(dir) \ 402 ACC_CONJ_PROD(g20, +g01, +g12); \ 403 ACC_CONJ_PROD(g20, -g02, +g11); \ 404 ACC_CONJ_PROD(g21, +g02, +g10); \ 405 ACC_CONJ_PROD(g21, -g00, +g12); \ 406 ACC_CONJ_PROD(g22, +g00, +g11); \ 407 ACC_CONJ_PROD(g22, -g01, +g10); \ 408 double u0 = (dir < 6 ? param.anisotropy : (do_boundary ? param.t_boundary : 1)); \ 409 G6.x*=u0; G6.y*=u0; G7.x*=u0; G7.y*=u0; G8.x*=u0; G8.y*=u0; 411 #define RECONSTRUCT_MATRIX_12_SINGLE(dir) \ 412 ACC_CONJ_PROD(g20, +g01, +g12); \ 413 ACC_CONJ_PROD(g20, -g02, +g11); \ 414 ACC_CONJ_PROD(g21, +g02, +g10); \ 415 ACC_CONJ_PROD(g21, -g00, +g12); \ 416 ACC_CONJ_PROD(g22, +g00, +g11); \ 417 ACC_CONJ_PROD(g22, -g01, +g10); \ 418 float u0 = (dir < 6 ? param.anisotropy_f : (do_boundary ? param.t_boundary_f : 1)); \ 419 G3.x*=u0; G3.y*=u0; G3.z*=u0; G3.w*=u0; G4.x*=u0; G4.y*=u0; 421 #define RECONSTRUCT_MATRIX_8_DOUBLE(dir) \ 422 double row_sum = g01_re*g01_re; \ 423 row_sum += g01_im*g01_im; \ 424 row_sum += g02_re*g02_re; \ 425 row_sum += g02_im*g02_im; \ 426 double u0 = (dir < 6 ? param.anisotropy : (do_boundary ? param.t_boundary : 1)); \ 427 double u02_inv = 1.0 / (u0*u0); \ 428 double column_sum = u02_inv - row_sum; \ 429 double U00_mag = sqrt((column_sum > 0 ? column_sum : 0)); \ 430 sincos(g21_re, &g00_im, &g00_re); \ 433 column_sum += g10_re*g10_re; \ 434 column_sum += g10_im*g10_im; \ 435 sincos(g21_im, &g20_im, &g20_re); \ 436 double U20_mag = sqrt(((u02_inv - column_sum) > 0 ? (u02_inv-column_sum) : 0)); \ 439 double r_inv2 = 1.0 / (u0*row_sum); \ 441 COMPLEX_DOT_PRODUCT(A, g00, g10); \ 442 A_re *= u0; A_im *= u0; \ 443 COMPLEX_CONJUGATE_PRODUCT(g11, g20, g02); \ 444 ACC_COMPLEX_PROD(g11, A, g01); \ 447 COMPLEX_CONJUGATE_PRODUCT(g12, g20, g01); \ 448 ACC_COMPLEX_PROD(g12, -A, g02); \ 451 COMPLEX_DOT_PRODUCT(A, g00, g20); \ 452 A_re *= u0; A_im *= u0; \ 453 COMPLEX_CONJUGATE_PRODUCT(g21, g10, g02); \ 454 ACC_COMPLEX_PROD(g21, -A, g01); \ 457 COMPLEX_CONJUGATE_PRODUCT(g22, g10, g01); \ 458 ACC_COMPLEX_PROD(g22, A, g02); \ 462 #define RECONSTRUCT_MATRIX_8_SINGLE(dir) \ 463 float row_sum = g01_re*g01_re; \ 464 row_sum += g01_im*g01_im; \ 465 row_sum += g02_re*g02_re; \ 466 row_sum += g02_im*g02_im; \ 467 __sincosf(g21_re, &g00_im, &g00_re); \ 468 __sincosf(g21_im, &g20_im, &g20_re); \ 469 float2 u0_2 = (dir < 6 ? param.An2 : (do_boundary ? param.TB2 : param.No2)); \ 470 float column_sum = u0_2.y - row_sum; \ 471 float U00_mag = column_sum * rsqrtf((column_sum > 0 ? column_sum : 1e14)); \ 474 column_sum += g10_re*g10_re; \ 475 column_sum += g10_im*g10_im; \ 476 column_sum = u0_2.y - column_sum; \ 477 float U20_mag = column_sum * rsqrtf((column_sum > 0 ? column_sum : 1e14)); \ 480 float r_inv2 = __fdividef(1.0f, u0_2.x*row_sum); \ 482 COMPLEX_DOT_PRODUCT(A, g00, g10); \ 483 A_re *= u0_2.x; A_im *= u0_2.x; \ 484 COMPLEX_CONJUGATE_PRODUCT(g11, g20, g02); \ 485 ACC_COMPLEX_PROD(g11, A, g01); \ 488 COMPLEX_CONJUGATE_PRODUCT(g12, g20, g01); \ 489 ACC_COMPLEX_PROD(g12, -A, g02); \ 492 COMPLEX_DOT_PRODUCT(A, g00, g20); \ 493 A_re *= u0_2.x; A_im *= u0_2.x; \ 494 COMPLEX_CONJUGATE_PRODUCT(g21, g10, g02); \ 495 ACC_COMPLEX_PROD(g21, -A, g01); \ 498 COMPLEX_CONJUGATE_PRODUCT(g22, g10, g01); \ 499 ACC_COMPLEX_PROD(g22, A, g02); \ 507 #define RECONSTRUCT_GAUGE_MATRIX_12_SINGLE(dir, gauge, idx, sign) \ 508 ACC_CONJ_PROD(gauge##20, +gauge##01, +gauge##12); \ 509 ACC_CONJ_PROD(gauge##20, -gauge##02, +gauge##11); \ 510 ACC_CONJ_PROD(gauge##21, +gauge##02, +gauge##10); \ 511 ACC_CONJ_PROD(gauge##21, -gauge##00, +gauge##12); \ 512 ACC_CONJ_PROD(gauge##22, +gauge##00, +gauge##11); \ 513 ACC_CONJ_PROD(gauge##22, -gauge##01, +gauge##10); \ 514 {float u0 = param.coeff_f*sign; \ 515 gauge##20_re *=u0;gauge##20_im *=u0; gauge##21_re *=u0; gauge##21_im *=u0; \ 516 gauge##22_re *=u0;gauge##22_im *=u0;} 518 #define RECONSTRUCT_GAUGE_MATRIX_12_DOUBLE(dir, gauge, idx, sign) \ 519 ACC_CONJ_PROD(gauge##20, +gauge##01, +gauge##12); \ 520 ACC_CONJ_PROD(gauge##20, -gauge##02, +gauge##11); \ 521 ACC_CONJ_PROD(gauge##21, +gauge##02, +gauge##10); \ 522 ACC_CONJ_PROD(gauge##21, -gauge##00, +gauge##12); \ 523 ACC_CONJ_PROD(gauge##22, +gauge##00, +gauge##11); \ 524 ACC_CONJ_PROD(gauge##22, -gauge##01, +gauge##10); \ 525 {double u0 = param.coeff* sign; \ 526 gauge##20_re *=u0;gauge##20_im *=u0; gauge##21_re *=u0; gauge##21_im *=u0; \ 527 gauge##22_re *=u0;gauge##22_im *=u0;} 530 #define RECONSTRUCT_GAUGE_MATRIX_13_SINGLE(dir, gauge, idx, sign) { \ 531 RECONSTRUCT_GAUGE_MATRIX_12_SINGLE(dir, gauge, idx, sign) \ 532 float exp_i3phase_re, exp_i3phase_im; \ 533 sincosf(3.f*PHASE, &exp_i3phase_im, &exp_i3phase_re); \ 535 COMPLEX_PRODUCT(A, exp_i3phase, gauge##20); \ 536 gauge##20_re = A_re; \ 537 gauge##20_im = A_im; \ 538 COMPLEX_PRODUCT(A, exp_i3phase, gauge##21); \ 539 gauge##21_re = A_re; \ 540 gauge##21_im = A_im; \ 541 COMPLEX_PRODUCT(A, exp_i3phase, gauge##22); \ 542 gauge##22_re = A_re; \ 543 gauge##22_im = A_im; \ 547 #define RECONSTRUCT_GAUGE_MATRIX_13_DOUBLE(dir, gauge, idx, sign) { \ 548 RECONSTRUCT_GAUGE_MATRIX_12_DOUBLE(dir, gauge, idx, sign) \ 549 double exp_i3phase_re, exp_i3phase_im; \ 550 sincos(3.*PHASE, &exp_i3phase_im, &exp_i3phase_re); \ 552 COMPLEX_PRODUCT(A, exp_i3phase, gauge##20); \ 553 gauge##20_re = A_re; \ 554 gauge##20_im = A_im; \ 555 COMPLEX_PRODUCT(A, exp_i3phase, gauge##21); \ 556 gauge##21_re = A_re; \ 557 gauge##21_im = A_im; \ 558 COMPLEX_PRODUCT(A, exp_i3phase, gauge##22); \ 559 gauge##22_re = A_re; \ 560 gauge##22_im = A_im; \ 566 #define RECONSTRUCT_GAUGE_MATRIX_8_DOUBLE(dir, gauge, idx, sign) \ 567 double row_sum = gauge##01_re*gauge##01_re + gauge##01_im*gauge##01_im; \ 568 row_sum += gauge##02_re*gauge##02_re + gauge##02_im*gauge##02_im; \ 569 double u0 = param.coeff*sign; \ 570 double u02_inv = 1.0 / (u0*u0); \ 571 double column_sum = u02_inv - row_sum; \ 572 double U00_mag = sqrt(column_sum); \ 573 sincos(gauge##21_re, &gauge##00_im, &gauge##00_re); \ 574 gauge##00_re *= U00_mag; \ 575 gauge##00_im *= U00_mag; \ 576 column_sum += gauge##10_re*gauge##10_re; \ 577 column_sum += gauge##10_im*gauge##10_im; \ 578 sincos(gauge##21_im, &gauge##20_im, &gauge##20_re); \ 579 double U20_mag = sqrt(u02_inv - column_sum); \ 580 gauge##20_re *= U20_mag; \ 581 gauge##20_im *= U20_mag; \ 582 double r_inv2 = 1.0 / (u0*row_sum); \ 584 COMPLEX_DOT_PRODUCT(A, gauge##00, gauge##10); \ 585 A_re *= u0; A_im *= u0; \ 586 COMPLEX_CONJUGATE_PRODUCT(gauge##11, gauge##20, gauge##02); \ 587 ACC_COMPLEX_PROD(gauge##11, A, gauge##01); \ 588 gauge##11_re *= -r_inv2; \ 589 gauge##11_im *= -r_inv2; \ 590 COMPLEX_CONJUGATE_PRODUCT(gauge##12, gauge##20, gauge##01); \ 591 ACC_COMPLEX_PROD(gauge##12, -A, gauge##02); \ 592 gauge##12_re *= r_inv2; \ 593 gauge##12_im *= r_inv2; \ 594 COMPLEX_DOT_PRODUCT(A, gauge##00, gauge##20); \ 595 A_re *= u0; A_im *= u0; \ 596 COMPLEX_CONJUGATE_PRODUCT(gauge##21, gauge##10, gauge##02); \ 597 ACC_COMPLEX_PROD(gauge##21, -A, gauge##01); \ 598 gauge##21_re *= r_inv2; \ 599 gauge##21_im *= r_inv2; \ 600 COMPLEX_CONJUGATE_PRODUCT(gauge##22, gauge##10, gauge##01); \ 601 ACC_COMPLEX_PROD(gauge##22, A, gauge##02); \ 602 gauge##22_re *= -r_inv2; \ 603 gauge##22_im *= -r_inv2; 605 #define RECONSTRUCT_GAUGE_MATRIX_8_SINGLE(dir, gauge, idx, sign) { \ 606 float row_sum = gauge##01_re*gauge##01_re + gauge##01_im*gauge##01_im; \ 607 row_sum += gauge##02_re*gauge##02_re + gauge##02_im*gauge##02_im; \ 608 float u0 = param.coeff_f*sign; \ 609 float u02_inv = __fdividef(1.f, u0*u0); \ 610 float column_sum = u02_inv - row_sum; \ 611 float U00_mag = sqrtf(column_sum > 0 ?column_sum:0); \ 612 __sincosf(gauge##21_re, &gauge##00_im, &gauge##00_re); \ 613 gauge##00_re *= U00_mag; \ 614 gauge##00_im *= U00_mag; \ 615 column_sum += gauge##10_re*gauge##10_re; \ 616 column_sum += gauge##10_im*gauge##10_im; \ 617 __sincosf(gauge##21_im, &gauge##20_im, &gauge##20_re); \ 618 float U20_mag = sqrtf( (u02_inv - column_sum)>0? (u02_inv - column_sum): 0); \ 619 gauge##20_re *= U20_mag; \ 620 gauge##20_im *= U20_mag; \ 621 float r_inv2 = __fdividef(1.0f, u0*row_sum); \ 623 COMPLEX_DOT_PRODUCT(A, gauge##00, gauge##10); \ 624 A_re *= u0; A_im *= u0; \ 625 COMPLEX_CONJUGATE_PRODUCT(gauge##11, gauge##20, gauge##02); \ 626 ACC_COMPLEX_PROD(gauge##11, A, gauge##01); \ 627 gauge##11_re *= -r_inv2; \ 628 gauge##11_im *= -r_inv2; \ 629 COMPLEX_CONJUGATE_PRODUCT(gauge##12, gauge##20, gauge##01); \ 630 ACC_COMPLEX_PROD(gauge##12, -A, gauge##02); \ 631 gauge##12_re *= r_inv2; \ 632 gauge##12_im *= r_inv2; \ 633 COMPLEX_DOT_PRODUCT(A, gauge##00, gauge##20); \ 634 A_re *= u0; A_im *= u0; \ 635 COMPLEX_CONJUGATE_PRODUCT(gauge##21, gauge##10, gauge##02); \ 636 ACC_COMPLEX_PROD(gauge##21, -A, gauge##01); \ 637 gauge##21_re *= r_inv2; \ 638 gauge##21_im *= r_inv2; \ 639 COMPLEX_CONJUGATE_PRODUCT(gauge##22, gauge##10, gauge##01); \ 640 ACC_COMPLEX_PROD(gauge##22, A, gauge##02); \ 641 gauge##22_re *= -r_inv2; \ 642 gauge##22_im *= -r_inv2;} 646 #define RECONSTRUCT_GAUGE_MATRIX_9_SINGLE(dir, gauge, idx, sign) { \ 647 RECONSTRUCT_GAUGE_MATRIX_8_SINGLE(dir, gauge, idx, sign) \ 648 float exp_iphase_re, exp_iphase_im; \ 649 __sincosf(PHASE, &exp_iphase_im, &exp_iphase_re); \ 651 COMPLEX_PRODUCT(B, exp_iphase, gauge##00); \ 652 gauge##00_re = B_re; \ 653 gauge##00_im = B_im; \ 654 COMPLEX_PRODUCT(B, exp_iphase, gauge##01); \ 655 gauge##01_re = B_re; \ 656 gauge##01_im = B_im; \ 657 COMPLEX_PRODUCT(B, exp_iphase, gauge##02); \ 658 gauge##02_re = B_re; \ 659 gauge##02_im = B_im; \ 660 COMPLEX_PRODUCT(B, exp_iphase, gauge##10); \ 661 gauge##10_re = B_re; \ 662 gauge##10_im = B_im; \ 663 COMPLEX_PRODUCT(B, exp_iphase, gauge##11); \ 664 gauge##11_re = B_re; \ 665 gauge##11_im = B_im; \ 666 COMPLEX_PRODUCT(B, exp_iphase, gauge##12); \ 667 gauge##12_re = B_re; \ 668 gauge##12_im = B_im; \ 669 COMPLEX_PRODUCT(B, exp_iphase, gauge##20); \ 670 gauge##20_re = B_re; \ 671 gauge##20_im = B_im; \ 672 COMPLEX_PRODUCT(B, exp_iphase, gauge##21); \ 673 gauge##21_re = B_re; \ 674 gauge##21_im = B_im; \ 675 COMPLEX_PRODUCT(B, exp_iphase, gauge##22); \ 676 gauge##22_re = B_re; \ 677 gauge##22_im = B_im; \ 681 #define RECONSTRUCT_GAUGE_MATRIX_9_DOUBLE(dir, gauge, idx, sign) { \ 682 RECONSTRUCT_GAUGE_MATRIX_8_DOUBLE(dir, gauge, idx, sign) \ 683 double exp_iphase_re, exp_iphase_im; \ 684 sincos(PHASE, &exp_iphase_im, &exp_iphase_re); \ 686 COMPLEX_PRODUCT(B, exp_iphase, gauge##00); \ 687 gauge##00_re = B_re; \ 688 gauge##00_im = B_im; \ 689 COMPLEX_PRODUCT(B, exp_iphase, gauge##01); \ 690 gauge##01_re = B_re; \ 691 gauge##01_im = B_im; \ 692 COMPLEX_PRODUCT(B, exp_iphase, gauge##02); \ 693 gauge##02_re = B_re; \ 694 gauge##02_im = B_im; \ 695 COMPLEX_PRODUCT(B, exp_iphase, gauge##10); \ 696 gauge##10_re = B_re; \ 697 gauge##10_im = B_im; \ 698 COMPLEX_PRODUCT(B, exp_iphase, gauge##11); \ 699 gauge##11_re = B_re; \ 700 gauge##11_im = B_im; \ 701 COMPLEX_PRODUCT(B, exp_iphase, gauge##12); \ 702 gauge##12_re = B_re; \ 703 gauge##12_im = B_im; \ 704 COMPLEX_PRODUCT(B, exp_iphase, gauge##20); \ 705 gauge##20_re = B_re; \ 706 gauge##20_im = B_im; \ 707 COMPLEX_PRODUCT(B, exp_iphase, gauge##21); \ 708 gauge##21_re = B_re; \ 709 gauge##21_im = B_im; \ 710 COMPLEX_PRODUCT(B, exp_iphase, gauge##22); \ 711 gauge##22_re = B_re; \ 712 gauge##22_im = B_im; \ 719 #ifdef FERMI_NO_DBLE_TEX 720 #define READ_GAUGE_MATRIX_18_DOUBLE2_TEX(G, gauge, dir, idx, stride) \ 721 READ_GAUGE_MATRIX_18_DOUBLE2(G, gauge, dir, idx, stride) 722 #define READ_GAUGE_MATRIX_12_DOUBLE2_TEX(G, gauge, dir, idx, stride) \ 723 READ_GAUGE_MATRIX_12_DOUBLE2(G, gauge, dir, idx, stride) 724 #define READ_GAUGE_MATRIX_8_DOUBLE2_TEX(G, gauge, dir, idx, stride) \ 725 READ_GAUGE_MATRIX_8_DOUBLE2(G, gauge, dir, idx, stride) 726 #define READ_GAUGE_PHASE_DOUBLE_TEX(P, phase, dir, idx, stride) \ 727 READ_GAUGE_PHASE_DOUBLE(P, phase, dir, idx, stride) 731 #define ASSN_GAUGE_MATRIX_18_DOUBLE2_TEX(G, gauge, dir, idx, stride) \ 732 ASSN_GAUGE_MATRIX_18_DOUBLE2(G, gauge, dir, idx, stride) 733 #define ASSN_GAUGE_MATRIX_12_DOUBLE2_TEX(G, gauge, dir, idx, stride) \ 734 ASSN_GAUGE_MATRIX_12_DOUBLE2(G, gauge, dir, idx, stride) 735 #define ASSN_GAUGE_MATRIX_8_DOUBLE2_TEX(G, gauge, dir, idx, stride) \ 736 ASSN_GAUGE_MATRIX_8_DOUBLE2(G, gauge, dir, idx, stride) 740 #define READ_GAUGE_MATRIX_18_DOUBLE2_TEX(G, gauge, dir, idx, stride) \ 741 double2 G##0 = fetch_double2((gauge), idx + ((dir/2)*9+0)*stride); \ 742 double2 G##1 = fetch_double2((gauge), idx + ((dir/2)*9+1)*stride); \ 743 double2 G##2 = fetch_double2((gauge), idx + ((dir/2)*9+2)*stride); \ 744 double2 G##3 = fetch_double2((gauge), idx + ((dir/2)*9+3)*stride); \ 745 double2 G##4 = fetch_double2((gauge), idx + ((dir/2)*9+4)*stride); \ 746 double2 G##5 = fetch_double2((gauge), idx + ((dir/2)*9+5)*stride); \ 747 double2 G##6 = fetch_double2((gauge), idx + ((dir/2)*9+6)*stride); \ 748 double2 G##7 = fetch_double2((gauge), idx + ((dir/2)*9+7)*stride); \ 749 double2 G##8 = fetch_double2((gauge), idx + ((dir/2)*9+8)*stride); \ 751 #define READ_GAUGE_MATRIX_12_DOUBLE2_TEX(G, gauge, dir, idx, stride) \ 752 double2 G##0 = fetch_double2((gauge), idx + ((dir/2)*6+0)*stride); \ 753 double2 G##1 = fetch_double2((gauge), idx + ((dir/2)*6+1)*stride); \ 754 double2 G##2 = fetch_double2((gauge), idx + ((dir/2)*6+2)*stride); \ 755 double2 G##3 = fetch_double2((gauge), idx + ((dir/2)*6+3)*stride); \ 756 double2 G##4 = fetch_double2((gauge), idx + ((dir/2)*6+4)*stride); \ 757 double2 G##5 = fetch_double2((gauge), idx + ((dir/2)*6+5)*stride); \ 758 double2 G##6 = make_double2(0,0); \ 759 double2 G##7 = make_double2(0,0); \ 760 double2 G##8 = make_double2(0,0); \ 763 #define READ_GAUGE_MATRIX_8_DOUBLE2_TEX(G, gauge, dir, idx, stride) \ 764 double2 G##0 = fetch_double2((gauge), idx + ((dir/2)*4+0)*stride); \ 765 double2 G##1 = fetch_double2((gauge), idx + ((dir/2)*4+1)*stride); \ 766 double2 G##2 = fetch_double2((gauge), idx + ((dir/2)*4+2)*stride); \ 767 double2 G##3 = fetch_double2((gauge), idx + ((dir/2)*4+3)*stride); \ 768 double2 G##4 = make_double2(0,0); \ 769 double2 G##5 = make_double2(0,0); \ 770 double2 G##6 = make_double2(0,0); \ 771 double2 G##7 = make_double2(0,0); \ 772 double2 G##8 = make_double2(0,0); \ 773 (G##7).x = (G##0).x; \ 779 #define READ_GAUGE_PHASE_FLOAT_TEX(P, phase, dir, idx, stride) { \ 780 P = 2.f*M_PI*TEX1DFETCH(float, (phase), idx + (dir/2)*stride); \ 783 #define READ_GAUGE_PHASE_SHORT_TEX(P, phase, dir, idx, stride) READ_GAUGE_PHASE_FLOAT_TEX(P, phase, dir, idx, stride) 785 #define READ_GAUGE_PHASE_DOUBLE_TEX(P, phase, dir, idx, stride) { \ 786 P = 2*M_PI*fetch_double((phase), idx + (dir/2)*stride); \ 791 #define ASSN_GAUGE_MATRIX_18_DOUBLE2_TEX(G, gauge, dir, idx, stride) \ 792 G##0 = fetch_double2((gauge), idx + ((dir/2)*9+0)*stride); \ 793 G##1 = fetch_double2((gauge), idx + ((dir/2)*9+1)*stride); \ 794 G##2 = fetch_double2((gauge), idx + ((dir/2)*9+2)*stride); \ 795 G##3 = fetch_double2((gauge), idx + ((dir/2)*9+3)*stride); \ 796 G##4 = fetch_double2((gauge), idx + ((dir/2)*9+4)*stride); \ 797 G##5 = fetch_double2((gauge), idx + ((dir/2)*9+5)*stride); \ 798 G##6 = fetch_double2((gauge), idx + ((dir/2)*9+6)*stride); \ 799 G##7 = fetch_double2((gauge), idx + ((dir/2)*9+7)*stride); \ 800 G##8 = fetch_double2((gauge), idx + ((dir/2)*9+8)*stride); \ 802 #define ASSN_GAUGE_MATRIX_12_DOUBLE2_TEX(G, gauge, dir, idx, stride) \ 803 G##0 = fetch_double2((gauge), idx + ((dir/2)*6+0)*stride); \ 804 G##1 = fetch_double2((gauge), idx + ((dir/2)*6+1)*stride); \ 805 G##2 = fetch_double2((gauge), idx + ((dir/2)*6+2)*stride); \ 806 G##3 = fetch_double2((gauge), idx + ((dir/2)*6+3)*stride); \ 807 G##4 = fetch_double2((gauge), idx + ((dir/2)*6+4)*stride); \ 808 G##5 = fetch_double2((gauge), idx + ((dir/2)*6+5)*stride); \ 809 G##6 = make_double2(0,0); \ 810 G##7 = make_double2(0,0); \ 811 G##8 = make_double2(0,0); \ 814 #define ASSN_GAUGE_MATRIX_8_DOUBLE2_TEX(G, gauge, dir, idx, stride) \ 815 G##0 = fetch_double2((gauge), idx + ((dir/2)*4+0)*stride); \ 816 G##1 = fetch_double2((gauge), idx + ((dir/2)*4+1)*stride); \ 817 G##2 = fetch_double2((gauge), idx + ((dir/2)*4+2)*stride); \ 818 G##3 = fetch_double2((gauge), idx + ((dir/2)*4+3)*stride); \ 819 G##4 = make_double2(0,0); \ 820 G##5 = make_double2(0,0); \ 821 G##6 = make_double2(0,0); \ 822 G##7 = make_double2(0,0); \ 823 G##8 = make_double2(0,0); \ 824 (G##7).x = (G##0).x; \