|
QUDA v0.3.2
A library for QCD on GPUs
|
00001 // *** CUDA DSLASH DAGGER *** 00002 00003 #define SHARED_FLOATS_PER_THREAD 8 00004 00005 // input spinor 00006 #ifdef SPINOR_DOUBLE 00007 #define spinorFloat double 00008 #define i00_re I0.x 00009 #define i00_im I0.y 00010 #define i01_re I1.x 00011 #define i01_im I1.y 00012 #define i02_re I2.x 00013 #define i02_im I2.y 00014 #define i10_re I3.x 00015 #define i10_im I3.y 00016 #define i11_re I4.x 00017 #define i11_im I4.y 00018 #define i12_re I5.x 00019 #define i12_im I5.y 00020 #define i20_re I6.x 00021 #define i20_im I6.y 00022 #define i21_re I7.x 00023 #define i21_im I7.y 00024 #define i22_re I8.x 00025 #define i22_im I8.y 00026 #define i30_re I9.x 00027 #define i30_im I9.y 00028 #define i31_re I10.x 00029 #define i31_im I10.y 00030 #define i32_re I11.x 00031 #define i32_im I11.y 00032 00033 #else 00034 #define spinorFloat float 00035 #define i00_re I0.x 00036 #define i00_im I0.y 00037 #define i01_re I0.z 00038 #define i01_im I0.w 00039 #define i02_re I1.x 00040 #define i02_im I1.y 00041 #define i10_re I1.z 00042 #define i10_im I1.w 00043 #define i11_re I2.x 00044 #define i11_im I2.y 00045 #define i12_re I2.z 00046 #define i12_im I2.w 00047 #define i20_re I3.x 00048 #define i20_im I3.y 00049 #define i21_re I3.z 00050 #define i21_im I3.w 00051 #define i22_re I4.x 00052 #define i22_im I4.y 00053 #define i30_re I4.z 00054 #define i30_im I4.w 00055 #define i31_re I5.x 00056 #define i31_im I5.y 00057 #define i32_re I5.z 00058 #define i32_im I5.w 00059 #endif // SPINOR_DOUBLE 00060 00061 // gauge link 00062 #ifdef GAUGE_FLOAT2 00063 #define g00_re G0.x 00064 #define g00_im G0.y 00065 #define g01_re G1.x 00066 #define g01_im G1.y 00067 #define g02_re G2.x 00068 #define g02_im G2.y 00069 #define g10_re G3.x 00070 #define g10_im G3.y 00071 #define g11_re G4.x 00072 #define g11_im G4.y 00073 #define g12_re G5.x 00074 #define g12_im G5.y 00075 #define g20_re G6.x 00076 #define g20_im G6.y 00077 #define g21_re G7.x 00078 #define g21_im G7.y 00079 #define g22_re G8.x 00080 #define g22_im G8.y 00081 // temporaries 00082 #define A_re G9.x 00083 #define A_im G9.y 00084 00085 #else 00086 #define g00_re G0.x 00087 #define g00_im G0.y 00088 #define g01_re G0.z 00089 #define g01_im G0.w 00090 #define g02_re G1.x 00091 #define g02_im G1.y 00092 #define g10_re G1.z 00093 #define g10_im G1.w 00094 #define g11_re G2.x 00095 #define g11_im G2.y 00096 #define g12_re G2.z 00097 #define g12_im G2.w 00098 #define g20_re G3.x 00099 #define g20_im G3.y 00100 #define g21_re G3.z 00101 #define g21_im G3.w 00102 #define g22_re G4.x 00103 #define g22_im G4.y 00104 // temporaries 00105 #define A_re G4.z 00106 #define A_im G4.w 00107 00108 #endif // GAUGE_DOUBLE 00109 00110 // conjugated gauge link 00111 #define gT00_re (+g00_re) 00112 #define gT00_im (-g00_im) 00113 #define gT01_re (+g10_re) 00114 #define gT01_im (-g10_im) 00115 #define gT02_re (+g20_re) 00116 #define gT02_im (-g20_im) 00117 #define gT10_re (+g01_re) 00118 #define gT10_im (-g01_im) 00119 #define gT11_re (+g11_re) 00120 #define gT11_im (-g11_im) 00121 #define gT12_re (+g21_re) 00122 #define gT12_im (-g21_im) 00123 #define gT20_re (+g02_re) 00124 #define gT20_im (-g02_im) 00125 #define gT21_re (+g12_re) 00126 #define gT21_im (-g12_im) 00127 #define gT22_re (+g22_re) 00128 #define gT22_im (-g22_im) 00129 00130 // first chiral block of inverted clover term 00131 #ifdef CLOVER_DOUBLE 00132 #define c00_00_re C0.x 00133 #define c01_01_re C0.y 00134 #define c02_02_re C1.x 00135 #define c10_10_re C1.y 00136 #define c11_11_re C2.x 00137 #define c12_12_re C2.y 00138 #define c01_00_re C3.x 00139 #define c01_00_im C3.y 00140 #define c02_00_re C4.x 00141 #define c02_00_im C4.y 00142 #define c10_00_re C5.x 00143 #define c10_00_im C5.y 00144 #define c11_00_re C6.x 00145 #define c11_00_im C6.y 00146 #define c12_00_re C7.x 00147 #define c12_00_im C7.y 00148 #define c02_01_re C8.x 00149 #define c02_01_im C8.y 00150 #define c10_01_re C9.x 00151 #define c10_01_im C9.y 00152 #define c11_01_re C10.x 00153 #define c11_01_im C10.y 00154 #define c12_01_re C11.x 00155 #define c12_01_im C11.y 00156 #define c10_02_re C12.x 00157 #define c10_02_im C12.y 00158 #define c11_02_re C13.x 00159 #define c11_02_im C13.y 00160 #define c12_02_re C14.x 00161 #define c12_02_im C14.y 00162 #define c11_10_re C15.x 00163 #define c11_10_im C15.y 00164 #define c12_10_re C16.x 00165 #define c12_10_im C16.y 00166 #define c12_11_re C17.x 00167 #define c12_11_im C17.y 00168 #else 00169 #define c00_00_re C0.x 00170 #define c01_01_re C0.y 00171 #define c02_02_re C0.z 00172 #define c10_10_re C0.w 00173 #define c11_11_re C1.x 00174 #define c12_12_re C1.y 00175 #define c01_00_re C1.z 00176 #define c01_00_im C1.w 00177 #define c02_00_re C2.x 00178 #define c02_00_im C2.y 00179 #define c10_00_re C2.z 00180 #define c10_00_im C2.w 00181 #define c11_00_re C3.x 00182 #define c11_00_im C3.y 00183 #define c12_00_re C3.z 00184 #define c12_00_im C3.w 00185 #define c02_01_re C4.x 00186 #define c02_01_im C4.y 00187 #define c10_01_re C4.z 00188 #define c10_01_im C4.w 00189 #define c11_01_re C5.x 00190 #define c11_01_im C5.y 00191 #define c12_01_re C5.z 00192 #define c12_01_im C5.w 00193 #define c10_02_re C6.x 00194 #define c10_02_im C6.y 00195 #define c11_02_re C6.z 00196 #define c11_02_im C6.w 00197 #define c12_02_re C7.x 00198 #define c12_02_im C7.y 00199 #define c11_10_re C7.z 00200 #define c11_10_im C7.w 00201 #define c12_10_re C8.x 00202 #define c12_10_im C8.y 00203 #define c12_11_re C8.z 00204 #define c12_11_im C8.w 00205 #endif // CLOVER_DOUBLE 00206 00207 #define c00_01_re (+c01_00_re) 00208 #define c00_01_im (-c01_00_im) 00209 #define c00_02_re (+c02_00_re) 00210 #define c00_02_im (-c02_00_im) 00211 #define c01_02_re (+c02_01_re) 00212 #define c01_02_im (-c02_01_im) 00213 #define c00_10_re (+c10_00_re) 00214 #define c00_10_im (-c10_00_im) 00215 #define c01_10_re (+c10_01_re) 00216 #define c01_10_im (-c10_01_im) 00217 #define c02_10_re (+c10_02_re) 00218 #define c02_10_im (-c10_02_im) 00219 #define c00_11_re (+c11_00_re) 00220 #define c00_11_im (-c11_00_im) 00221 #define c01_11_re (+c11_01_re) 00222 #define c01_11_im (-c11_01_im) 00223 #define c02_11_re (+c11_02_re) 00224 #define c02_11_im (-c11_02_im) 00225 #define c10_11_re (+c11_10_re) 00226 #define c10_11_im (-c11_10_im) 00227 #define c00_12_re (+c12_00_re) 00228 #define c00_12_im (-c12_00_im) 00229 #define c01_12_re (+c12_01_re) 00230 #define c01_12_im (-c12_01_im) 00231 #define c02_12_re (+c12_02_re) 00232 #define c02_12_im (-c12_02_im) 00233 #define c10_12_re (+c12_10_re) 00234 #define c10_12_im (-c12_10_im) 00235 #define c11_12_re (+c12_11_re) 00236 #define c11_12_im (-c12_11_im) 00237 00238 // second chiral block of inverted clover term (reuses C0,...,C9) 00239 #define c20_20_re c00_00_re 00240 #define c21_20_re c01_00_re 00241 #define c21_20_im c01_00_im 00242 #define c22_20_re c02_00_re 00243 #define c22_20_im c02_00_im 00244 #define c30_20_re c10_00_re 00245 #define c30_20_im c10_00_im 00246 #define c31_20_re c11_00_re 00247 #define c31_20_im c11_00_im 00248 #define c32_20_re c12_00_re 00249 #define c32_20_im c12_00_im 00250 #define c20_21_re c00_01_re 00251 #define c20_21_im c00_01_im 00252 #define c21_21_re c01_01_re 00253 #define c22_21_re c02_01_re 00254 #define c22_21_im c02_01_im 00255 #define c30_21_re c10_01_re 00256 #define c30_21_im c10_01_im 00257 #define c31_21_re c11_01_re 00258 #define c31_21_im c11_01_im 00259 #define c32_21_re c12_01_re 00260 #define c32_21_im c12_01_im 00261 #define c20_22_re c00_02_re 00262 #define c20_22_im c00_02_im 00263 #define c21_22_re c01_02_re 00264 #define c21_22_im c01_02_im 00265 #define c22_22_re c02_02_re 00266 #define c30_22_re c10_02_re 00267 #define c30_22_im c10_02_im 00268 #define c31_22_re c11_02_re 00269 #define c31_22_im c11_02_im 00270 #define c32_22_re c12_02_re 00271 #define c32_22_im c12_02_im 00272 #define c20_30_re c00_10_re 00273 #define c20_30_im c00_10_im 00274 #define c21_30_re c01_10_re 00275 #define c21_30_im c01_10_im 00276 #define c22_30_re c02_10_re 00277 #define c22_30_im c02_10_im 00278 #define c30_30_re c10_10_re 00279 #define c31_30_re c11_10_re 00280 #define c31_30_im c11_10_im 00281 #define c32_30_re c12_10_re 00282 #define c32_30_im c12_10_im 00283 #define c20_31_re c00_11_re 00284 #define c20_31_im c00_11_im 00285 #define c21_31_re c01_11_re 00286 #define c21_31_im c01_11_im 00287 #define c22_31_re c02_11_re 00288 #define c22_31_im c02_11_im 00289 #define c30_31_re c10_11_re 00290 #define c30_31_im c10_11_im 00291 #define c31_31_re c11_11_re 00292 #define c32_31_re c12_11_re 00293 #define c32_31_im c12_11_im 00294 #define c20_32_re c00_12_re 00295 #define c20_32_im c00_12_im 00296 #define c21_32_re c01_12_re 00297 #define c21_32_im c01_12_im 00298 #define c22_32_re c02_12_re 00299 #define c22_32_im c02_12_im 00300 #define c30_32_re c10_12_re 00301 #define c30_32_im c10_12_im 00302 #define c31_32_re c11_12_re 00303 #define c31_32_im c11_12_im 00304 #define c32_32_re c12_12_re 00305 00306 // output spinor 00307 #define o00_re s[0*SHARED_STRIDE] 00308 #define o00_im s[1*SHARED_STRIDE] 00309 #define o01_re s[2*SHARED_STRIDE] 00310 #define o01_im s[3*SHARED_STRIDE] 00311 #define o02_re s[4*SHARED_STRIDE] 00312 #define o02_im s[5*SHARED_STRIDE] 00313 #define o10_re s[6*SHARED_STRIDE] 00314 #define o10_im s[7*SHARED_STRIDE] 00315 volatile spinorFloat o11_re; 00316 volatile spinorFloat o11_im; 00317 volatile spinorFloat o12_re; 00318 volatile spinorFloat o12_im; 00319 volatile spinorFloat o20_re; 00320 volatile spinorFloat o20_im; 00321 volatile spinorFloat o21_re; 00322 volatile spinorFloat o21_im; 00323 volatile spinorFloat o22_re; 00324 volatile spinorFloat o22_im; 00325 volatile spinorFloat o30_re; 00326 volatile spinorFloat o30_im; 00327 volatile spinorFloat o31_re; 00328 volatile spinorFloat o31_im; 00329 volatile spinorFloat o32_re; 00330 volatile spinorFloat o32_im; 00331 00332 00333 00334 #include "read_gauge.h" 00335 #include "read_clover.h" 00336 #include "io_spinor.h" 00337 00338 int sid = blockIdx.x*blockDim.x + threadIdx.x; 00339 int z1 = FAST_INT_DIVIDE(sid, X1h); 00340 int x1h = sid - z1*X1h; 00341 int z2 = FAST_INT_DIVIDE(z1, X2); 00342 int x2 = z1 - z2*X2; 00343 int x4 = FAST_INT_DIVIDE(z2, X3); 00344 int x3 = z2 - x4*X3; 00345 int x1odd = (x2 + x3 + x4 + oddBit) & 1; 00346 int x1 = 2*x1h + x1odd; 00347 int X = 2*sid + x1odd; 00348 00349 #ifdef SPINOR_DOUBLE 00350 #if (__CUDA_ARCH__ >= 200) 00351 #define SHARED_STRIDE 16 // to avoid bank conflicts on Fermi 00352 #else 00353 #define SHARED_STRIDE 8 // to avoid bank conflicts on G80 and GT200 00354 #endif 00355 extern __shared__ spinorFloat sd_data[]; 00356 volatile spinorFloat *s = sd_data + SHARED_FLOATS_PER_THREAD*SHARED_STRIDE*(threadIdx.x/SHARED_STRIDE) 00357 + (threadIdx.x % SHARED_STRIDE); 00358 #else 00359 #if (__CUDA_ARCH__ >= 200) 00360 #define SHARED_STRIDE 32 // to avoid bank conflicts on Fermi 00361 #else 00362 #define SHARED_STRIDE 16 // to avoid bank conflicts on G80 and GT200 00363 #endif 00364 extern __shared__ spinorFloat ss_data[]; 00365 volatile spinorFloat *s = ss_data + SHARED_FLOATS_PER_THREAD*SHARED_STRIDE*(threadIdx.x/SHARED_STRIDE) 00366 + (threadIdx.x % SHARED_STRIDE); 00367 #endif 00368 00369 o00_re = o00_im = 0; 00370 o01_re = o01_im = 0; 00371 o02_re = o02_im = 0; 00372 o10_re = o10_im = 0; 00373 o11_re = o11_im = 0; 00374 o12_re = o12_im = 0; 00375 o20_re = o20_im = 0; 00376 o21_re = o21_im = 0; 00377 o22_re = o22_im = 0; 00378 o30_re = o30_im = 0; 00379 o31_re = o31_im = 0; 00380 o32_re = o32_im = 0; 00381 00382 { 00383 // Projector P0+ 00384 // 1 0 0 i 00385 // 0 1 i 0 00386 // 0 -i 1 0 00387 // -i 0 0 1 00388 00389 int sp_idx = ((x1==X1m1) ? X-X1m1 : X+1) >> 1; 00390 int ga_idx = sid; 00391 00392 // read gauge matrix from device memory 00393 READ_GAUGE_MATRIX(GAUGE0TEX, 0); 00394 00395 // read spinor from device memory 00396 READ_SPINOR(SPINORTEX); 00397 00398 // reconstruct gauge matrix 00399 RECONSTRUCT_GAUGE_MATRIX(0); 00400 00401 // project spinor into half spinors 00402 spinorFloat a0_re = +i00_re-i30_im; 00403 spinorFloat a0_im = +i00_im+i30_re; 00404 spinorFloat a1_re = +i01_re-i31_im; 00405 spinorFloat a1_im = +i01_im+i31_re; 00406 spinorFloat a2_re = +i02_re-i32_im; 00407 spinorFloat a2_im = +i02_im+i32_re; 00408 00409 spinorFloat b0_re = +i10_re-i20_im; 00410 spinorFloat b0_im = +i10_im+i20_re; 00411 spinorFloat b1_re = +i11_re-i21_im; 00412 spinorFloat b1_im = +i11_im+i21_re; 00413 spinorFloat b2_re = +i12_re-i22_im; 00414 spinorFloat b2_im = +i12_im+i22_re; 00415 00416 // multiply row 0 00417 spinorFloat A0_re = 0; 00418 A0_re += g00_re * a0_re; 00419 A0_re -= g00_im * a0_im; 00420 A0_re += g01_re * a1_re; 00421 A0_re -= g01_im * a1_im; 00422 A0_re += g02_re * a2_re; 00423 A0_re -= g02_im * a2_im; 00424 spinorFloat A0_im = 0; 00425 A0_im += g00_re * a0_im; 00426 A0_im += g00_im * a0_re; 00427 A0_im += g01_re * a1_im; 00428 A0_im += g01_im * a1_re; 00429 A0_im += g02_re * a2_im; 00430 A0_im += g02_im * a2_re; 00431 spinorFloat B0_re = 0; 00432 B0_re += g00_re * b0_re; 00433 B0_re -= g00_im * b0_im; 00434 B0_re += g01_re * b1_re; 00435 B0_re -= g01_im * b1_im; 00436 B0_re += g02_re * b2_re; 00437 B0_re -= g02_im * b2_im; 00438 spinorFloat B0_im = 0; 00439 B0_im += g00_re * b0_im; 00440 B0_im += g00_im * b0_re; 00441 B0_im += g01_re * b1_im; 00442 B0_im += g01_im * b1_re; 00443 B0_im += g02_re * b2_im; 00444 B0_im += g02_im * b2_re; 00445 00446 // multiply row 1 00447 spinorFloat A1_re = 0; 00448 A1_re += g10_re * a0_re; 00449 A1_re -= g10_im * a0_im; 00450 A1_re += g11_re * a1_re; 00451 A1_re -= g11_im * a1_im; 00452 A1_re += g12_re * a2_re; 00453 A1_re -= g12_im * a2_im; 00454 spinorFloat A1_im = 0; 00455 A1_im += g10_re * a0_im; 00456 A1_im += g10_im * a0_re; 00457 A1_im += g11_re * a1_im; 00458 A1_im += g11_im * a1_re; 00459 A1_im += g12_re * a2_im; 00460 A1_im += g12_im * a2_re; 00461 spinorFloat B1_re = 0; 00462 B1_re += g10_re * b0_re; 00463 B1_re -= g10_im * b0_im; 00464 B1_re += g11_re * b1_re; 00465 B1_re -= g11_im * b1_im; 00466 B1_re += g12_re * b2_re; 00467 B1_re -= g12_im * b2_im; 00468 spinorFloat B1_im = 0; 00469 B1_im += g10_re * b0_im; 00470 B1_im += g10_im * b0_re; 00471 B1_im += g11_re * b1_im; 00472 B1_im += g11_im * b1_re; 00473 B1_im += g12_re * b2_im; 00474 B1_im += g12_im * b2_re; 00475 00476 // multiply row 2 00477 spinorFloat A2_re = 0; 00478 A2_re += g20_re * a0_re; 00479 A2_re -= g20_im * a0_im; 00480 A2_re += g21_re * a1_re; 00481 A2_re -= g21_im * a1_im; 00482 A2_re += g22_re * a2_re; 00483 A2_re -= g22_im * a2_im; 00484 spinorFloat A2_im = 0; 00485 A2_im += g20_re * a0_im; 00486 A2_im += g20_im * a0_re; 00487 A2_im += g21_re * a1_im; 00488 A2_im += g21_im * a1_re; 00489 A2_im += g22_re * a2_im; 00490 A2_im += g22_im * a2_re; 00491 spinorFloat B2_re = 0; 00492 B2_re += g20_re * b0_re; 00493 B2_re -= g20_im * b0_im; 00494 B2_re += g21_re * b1_re; 00495 B2_re -= g21_im * b1_im; 00496 B2_re += g22_re * b2_re; 00497 B2_re -= g22_im * b2_im; 00498 spinorFloat B2_im = 0; 00499 B2_im += g20_re * b0_im; 00500 B2_im += g20_im * b0_re; 00501 B2_im += g21_re * b1_im; 00502 B2_im += g21_im * b1_re; 00503 B2_im += g22_re * b2_im; 00504 B2_im += g22_im * b2_re; 00505 00506 o00_re += A0_re; 00507 o00_im += A0_im; 00508 o10_re += B0_re; 00509 o10_im += B0_im; 00510 o20_re += B0_im; 00511 o20_im -= B0_re; 00512 o30_re += A0_im; 00513 o30_im -= A0_re; 00514 00515 o01_re += A1_re; 00516 o01_im += A1_im; 00517 o11_re += B1_re; 00518 o11_im += B1_im; 00519 o21_re += B1_im; 00520 o21_im -= B1_re; 00521 o31_re += A1_im; 00522 o31_im -= A1_re; 00523 00524 o02_re += A2_re; 00525 o02_im += A2_im; 00526 o12_re += B2_re; 00527 o12_im += B2_im; 00528 o22_re += B2_im; 00529 o22_im -= B2_re; 00530 o32_re += A2_im; 00531 o32_im -= A2_re; 00532 00533 } 00534 00535 { 00536 // Projector P0- 00537 // 1 0 0 -i 00538 // 0 1 -i 0 00539 // 0 i 1 0 00540 // i 0 0 1 00541 00542 int sp_idx = ((x1==0) ? X+X1m1 : X-1) >> 1; 00543 int ga_idx = sp_idx; 00544 00545 // read gauge matrix from device memory 00546 READ_GAUGE_MATRIX(GAUGE1TEX, 1); 00547 00548 // read spinor from device memory 00549 READ_SPINOR(SPINORTEX); 00550 00551 // reconstruct gauge matrix 00552 RECONSTRUCT_GAUGE_MATRIX(1); 00553 00554 // project spinor into half spinors 00555 spinorFloat a0_re = +i00_re+i30_im; 00556 spinorFloat a0_im = +i00_im-i30_re; 00557 spinorFloat a1_re = +i01_re+i31_im; 00558 spinorFloat a1_im = +i01_im-i31_re; 00559 spinorFloat a2_re = +i02_re+i32_im; 00560 spinorFloat a2_im = +i02_im-i32_re; 00561 00562 spinorFloat b0_re = +i10_re+i20_im; 00563 spinorFloat b0_im = +i10_im-i20_re; 00564 spinorFloat b1_re = +i11_re+i21_im; 00565 spinorFloat b1_im = +i11_im-i21_re; 00566 spinorFloat b2_re = +i12_re+i22_im; 00567 spinorFloat b2_im = +i12_im-i22_re; 00568 00569 // multiply row 0 00570 spinorFloat A0_re = 0; 00571 A0_re += gT00_re * a0_re; 00572 A0_re -= gT00_im * a0_im; 00573 A0_re += gT01_re * a1_re; 00574 A0_re -= gT01_im * a1_im; 00575 A0_re += gT02_re * a2_re; 00576 A0_re -= gT02_im * a2_im; 00577 spinorFloat A0_im = 0; 00578 A0_im += gT00_re * a0_im; 00579 A0_im += gT00_im * a0_re; 00580 A0_im += gT01_re * a1_im; 00581 A0_im += gT01_im * a1_re; 00582 A0_im += gT02_re * a2_im; 00583 A0_im += gT02_im * a2_re; 00584 spinorFloat B0_re = 0; 00585 B0_re += gT00_re * b0_re; 00586 B0_re -= gT00_im * b0_im; 00587 B0_re += gT01_re * b1_re; 00588 B0_re -= gT01_im * b1_im; 00589 B0_re += gT02_re * b2_re; 00590 B0_re -= gT02_im * b2_im; 00591 spinorFloat B0_im = 0; 00592 B0_im += gT00_re * b0_im; 00593 B0_im += gT00_im * b0_re; 00594 B0_im += gT01_re * b1_im; 00595 B0_im += gT01_im * b1_re; 00596 B0_im += gT02_re * b2_im; 00597 B0_im += gT02_im * b2_re; 00598 00599 // multiply row 1 00600 spinorFloat A1_re = 0; 00601 A1_re += gT10_re * a0_re; 00602 A1_re -= gT10_im * a0_im; 00603 A1_re += gT11_re * a1_re; 00604 A1_re -= gT11_im * a1_im; 00605 A1_re += gT12_re * a2_re; 00606 A1_re -= gT12_im * a2_im; 00607 spinorFloat A1_im = 0; 00608 A1_im += gT10_re * a0_im; 00609 A1_im += gT10_im * a0_re; 00610 A1_im += gT11_re * a1_im; 00611 A1_im += gT11_im * a1_re; 00612 A1_im += gT12_re * a2_im; 00613 A1_im += gT12_im * a2_re; 00614 spinorFloat B1_re = 0; 00615 B1_re += gT10_re * b0_re; 00616 B1_re -= gT10_im * b0_im; 00617 B1_re += gT11_re * b1_re; 00618 B1_re -= gT11_im * b1_im; 00619 B1_re += gT12_re * b2_re; 00620 B1_re -= gT12_im * b2_im; 00621 spinorFloat B1_im = 0; 00622 B1_im += gT10_re * b0_im; 00623 B1_im += gT10_im * b0_re; 00624 B1_im += gT11_re * b1_im; 00625 B1_im += gT11_im * b1_re; 00626 B1_im += gT12_re * b2_im; 00627 B1_im += gT12_im * b2_re; 00628 00629 // multiply row 2 00630 spinorFloat A2_re = 0; 00631 A2_re += gT20_re * a0_re; 00632 A2_re -= gT20_im * a0_im; 00633 A2_re += gT21_re * a1_re; 00634 A2_re -= gT21_im * a1_im; 00635 A2_re += gT22_re * a2_re; 00636 A2_re -= gT22_im * a2_im; 00637 spinorFloat A2_im = 0; 00638 A2_im += gT20_re * a0_im; 00639 A2_im += gT20_im * a0_re; 00640 A2_im += gT21_re * a1_im; 00641 A2_im += gT21_im * a1_re; 00642 A2_im += gT22_re * a2_im; 00643 A2_im += gT22_im * a2_re; 00644 spinorFloat B2_re = 0; 00645 B2_re += gT20_re * b0_re; 00646 B2_re -= gT20_im * b0_im; 00647 B2_re += gT21_re * b1_re; 00648 B2_re -= gT21_im * b1_im; 00649 B2_re += gT22_re * b2_re; 00650 B2_re -= gT22_im * b2_im; 00651 spinorFloat B2_im = 0; 00652 B2_im += gT20_re * b0_im; 00653 B2_im += gT20_im * b0_re; 00654 B2_im += gT21_re * b1_im; 00655 B2_im += gT21_im * b1_re; 00656 B2_im += gT22_re * b2_im; 00657 B2_im += gT22_im * b2_re; 00658 00659 o00_re += A0_re; 00660 o00_im += A0_im; 00661 o10_re += B0_re; 00662 o10_im += B0_im; 00663 o20_re -= B0_im; 00664 o20_im += B0_re; 00665 o30_re -= A0_im; 00666 o30_im += A0_re; 00667 00668 o01_re += A1_re; 00669 o01_im += A1_im; 00670 o11_re += B1_re; 00671 o11_im += B1_im; 00672 o21_re -= B1_im; 00673 o21_im += B1_re; 00674 o31_re -= A1_im; 00675 o31_im += A1_re; 00676 00677 o02_re += A2_re; 00678 o02_im += A2_im; 00679 o12_re += B2_re; 00680 o12_im += B2_im; 00681 o22_re -= B2_im; 00682 o22_im += B2_re; 00683 o32_re -= A2_im; 00684 o32_im += A2_re; 00685 00686 } 00687 00688 { 00689 // Projector P1+ 00690 // 1 0 0 1 00691 // 0 1 -1 0 00692 // 0 -1 1 0 00693 // 1 0 0 1 00694 00695 int sp_idx = ((x2==X2m1) ? X-X2X1mX1 : X+X1) >> 1; 00696 int ga_idx = sid; 00697 00698 // read gauge matrix from device memory 00699 READ_GAUGE_MATRIX(GAUGE0TEX, 2); 00700 00701 // read spinor from device memory 00702 READ_SPINOR(SPINORTEX); 00703 00704 // reconstruct gauge matrix 00705 RECONSTRUCT_GAUGE_MATRIX(2); 00706 00707 // project spinor into half spinors 00708 spinorFloat a0_re = +i00_re+i30_re; 00709 spinorFloat a0_im = +i00_im+i30_im; 00710 spinorFloat a1_re = +i01_re+i31_re; 00711 spinorFloat a1_im = +i01_im+i31_im; 00712 spinorFloat a2_re = +i02_re+i32_re; 00713 spinorFloat a2_im = +i02_im+i32_im; 00714 00715 spinorFloat b0_re = +i10_re-i20_re; 00716 spinorFloat b0_im = +i10_im-i20_im; 00717 spinorFloat b1_re = +i11_re-i21_re; 00718 spinorFloat b1_im = +i11_im-i21_im; 00719 spinorFloat b2_re = +i12_re-i22_re; 00720 spinorFloat b2_im = +i12_im-i22_im; 00721 00722 // multiply row 0 00723 spinorFloat A0_re = 0; 00724 A0_re += g00_re * a0_re; 00725 A0_re -= g00_im * a0_im; 00726 A0_re += g01_re * a1_re; 00727 A0_re -= g01_im * a1_im; 00728 A0_re += g02_re * a2_re; 00729 A0_re -= g02_im * a2_im; 00730 spinorFloat A0_im = 0; 00731 A0_im += g00_re * a0_im; 00732 A0_im += g00_im * a0_re; 00733 A0_im += g01_re * a1_im; 00734 A0_im += g01_im * a1_re; 00735 A0_im += g02_re * a2_im; 00736 A0_im += g02_im * a2_re; 00737 spinorFloat B0_re = 0; 00738 B0_re += g00_re * b0_re; 00739 B0_re -= g00_im * b0_im; 00740 B0_re += g01_re * b1_re; 00741 B0_re -= g01_im * b1_im; 00742 B0_re += g02_re * b2_re; 00743 B0_re -= g02_im * b2_im; 00744 spinorFloat B0_im = 0; 00745 B0_im += g00_re * b0_im; 00746 B0_im += g00_im * b0_re; 00747 B0_im += g01_re * b1_im; 00748 B0_im += g01_im * b1_re; 00749 B0_im += g02_re * b2_im; 00750 B0_im += g02_im * b2_re; 00751 00752 // multiply row 1 00753 spinorFloat A1_re = 0; 00754 A1_re += g10_re * a0_re; 00755 A1_re -= g10_im * a0_im; 00756 A1_re += g11_re * a1_re; 00757 A1_re -= g11_im * a1_im; 00758 A1_re += g12_re * a2_re; 00759 A1_re -= g12_im * a2_im; 00760 spinorFloat A1_im = 0; 00761 A1_im += g10_re * a0_im; 00762 A1_im += g10_im * a0_re; 00763 A1_im += g11_re * a1_im; 00764 A1_im += g11_im * a1_re; 00765 A1_im += g12_re * a2_im; 00766 A1_im += g12_im * a2_re; 00767 spinorFloat B1_re = 0; 00768 B1_re += g10_re * b0_re; 00769 B1_re -= g10_im * b0_im; 00770 B1_re += g11_re * b1_re; 00771 B1_re -= g11_im * b1_im; 00772 B1_re += g12_re * b2_re; 00773 B1_re -= g12_im * b2_im; 00774 spinorFloat B1_im = 0; 00775 B1_im += g10_re * b0_im; 00776 B1_im += g10_im * b0_re; 00777 B1_im += g11_re * b1_im; 00778 B1_im += g11_im * b1_re; 00779 B1_im += g12_re * b2_im; 00780 B1_im += g12_im * b2_re; 00781 00782 // multiply row 2 00783 spinorFloat A2_re = 0; 00784 A2_re += g20_re * a0_re; 00785 A2_re -= g20_im * a0_im; 00786 A2_re += g21_re * a1_re; 00787 A2_re -= g21_im * a1_im; 00788 A2_re += g22_re * a2_re; 00789 A2_re -= g22_im * a2_im; 00790 spinorFloat A2_im = 0; 00791 A2_im += g20_re * a0_im; 00792 A2_im += g20_im * a0_re; 00793 A2_im += g21_re * a1_im; 00794 A2_im += g21_im * a1_re; 00795 A2_im += g22_re * a2_im; 00796 A2_im += g22_im * a2_re; 00797 spinorFloat B2_re = 0; 00798 B2_re += g20_re * b0_re; 00799 B2_re -= g20_im * b0_im; 00800 B2_re += g21_re * b1_re; 00801 B2_re -= g21_im * b1_im; 00802 B2_re += g22_re * b2_re; 00803 B2_re -= g22_im * b2_im; 00804 spinorFloat B2_im = 0; 00805 B2_im += g20_re * b0_im; 00806 B2_im += g20_im * b0_re; 00807 B2_im += g21_re * b1_im; 00808 B2_im += g21_im * b1_re; 00809 B2_im += g22_re * b2_im; 00810 B2_im += g22_im * b2_re; 00811 00812 o00_re += A0_re; 00813 o00_im += A0_im; 00814 o10_re += B0_re; 00815 o10_im += B0_im; 00816 o20_re -= B0_re; 00817 o20_im -= B0_im; 00818 o30_re += A0_re; 00819 o30_im += A0_im; 00820 00821 o01_re += A1_re; 00822 o01_im += A1_im; 00823 o11_re += B1_re; 00824 o11_im += B1_im; 00825 o21_re -= B1_re; 00826 o21_im -= B1_im; 00827 o31_re += A1_re; 00828 o31_im += A1_im; 00829 00830 o02_re += A2_re; 00831 o02_im += A2_im; 00832 o12_re += B2_re; 00833 o12_im += B2_im; 00834 o22_re -= B2_re; 00835 o22_im -= B2_im; 00836 o32_re += A2_re; 00837 o32_im += A2_im; 00838 00839 } 00840 00841 { 00842 // Projector P1- 00843 // 1 0 0 -1 00844 // 0 1 1 0 00845 // 0 1 1 0 00846 // -1 0 0 1 00847 00848 int sp_idx = ((x2==0) ? X+X2X1mX1 : X-X1) >> 1; 00849 int ga_idx = sp_idx; 00850 00851 // read gauge matrix from device memory 00852 READ_GAUGE_MATRIX(GAUGE1TEX, 3); 00853 00854 // read spinor from device memory 00855 READ_SPINOR(SPINORTEX); 00856 00857 // reconstruct gauge matrix 00858 RECONSTRUCT_GAUGE_MATRIX(3); 00859 00860 // project spinor into half spinors 00861 spinorFloat a0_re = +i00_re-i30_re; 00862 spinorFloat a0_im = +i00_im-i30_im; 00863 spinorFloat a1_re = +i01_re-i31_re; 00864 spinorFloat a1_im = +i01_im-i31_im; 00865 spinorFloat a2_re = +i02_re-i32_re; 00866 spinorFloat a2_im = +i02_im-i32_im; 00867 00868 spinorFloat b0_re = +i10_re+i20_re; 00869 spinorFloat b0_im = +i10_im+i20_im; 00870 spinorFloat b1_re = +i11_re+i21_re; 00871 spinorFloat b1_im = +i11_im+i21_im; 00872 spinorFloat b2_re = +i12_re+i22_re; 00873 spinorFloat b2_im = +i12_im+i22_im; 00874 00875 // multiply row 0 00876 spinorFloat A0_re = 0; 00877 A0_re += gT00_re * a0_re; 00878 A0_re -= gT00_im * a0_im; 00879 A0_re += gT01_re * a1_re; 00880 A0_re -= gT01_im * a1_im; 00881 A0_re += gT02_re * a2_re; 00882 A0_re -= gT02_im * a2_im; 00883 spinorFloat A0_im = 0; 00884 A0_im += gT00_re * a0_im; 00885 A0_im += gT00_im * a0_re; 00886 A0_im += gT01_re * a1_im; 00887 A0_im += gT01_im * a1_re; 00888 A0_im += gT02_re * a2_im; 00889 A0_im += gT02_im * a2_re; 00890 spinorFloat B0_re = 0; 00891 B0_re += gT00_re * b0_re; 00892 B0_re -= gT00_im * b0_im; 00893 B0_re += gT01_re * b1_re; 00894 B0_re -= gT01_im * b1_im; 00895 B0_re += gT02_re * b2_re; 00896 B0_re -= gT02_im * b2_im; 00897 spinorFloat B0_im = 0; 00898 B0_im += gT00_re * b0_im; 00899 B0_im += gT00_im * b0_re; 00900 B0_im += gT01_re * b1_im; 00901 B0_im += gT01_im * b1_re; 00902 B0_im += gT02_re * b2_im; 00903 B0_im += gT02_im * b2_re; 00904 00905 // multiply row 1 00906 spinorFloat A1_re = 0; 00907 A1_re += gT10_re * a0_re; 00908 A1_re -= gT10_im * a0_im; 00909 A1_re += gT11_re * a1_re; 00910 A1_re -= gT11_im * a1_im; 00911 A1_re += gT12_re * a2_re; 00912 A1_re -= gT12_im * a2_im; 00913 spinorFloat A1_im = 0; 00914 A1_im += gT10_re * a0_im; 00915 A1_im += gT10_im * a0_re; 00916 A1_im += gT11_re * a1_im; 00917 A1_im += gT11_im * a1_re; 00918 A1_im += gT12_re * a2_im; 00919 A1_im += gT12_im * a2_re; 00920 spinorFloat B1_re = 0; 00921 B1_re += gT10_re * b0_re; 00922 B1_re -= gT10_im * b0_im; 00923 B1_re += gT11_re * b1_re; 00924 B1_re -= gT11_im * b1_im; 00925 B1_re += gT12_re * b2_re; 00926 B1_re -= gT12_im * b2_im; 00927 spinorFloat B1_im = 0; 00928 B1_im += gT10_re * b0_im; 00929 B1_im += gT10_im * b0_re; 00930 B1_im += gT11_re * b1_im; 00931 B1_im += gT11_im * b1_re; 00932 B1_im += gT12_re * b2_im; 00933 B1_im += gT12_im * b2_re; 00934 00935 // multiply row 2 00936 spinorFloat A2_re = 0; 00937 A2_re += gT20_re * a0_re; 00938 A2_re -= gT20_im * a0_im; 00939 A2_re += gT21_re * a1_re; 00940 A2_re -= gT21_im * a1_im; 00941 A2_re += gT22_re * a2_re; 00942 A2_re -= gT22_im * a2_im; 00943 spinorFloat A2_im = 0; 00944 A2_im += gT20_re * a0_im; 00945 A2_im += gT20_im * a0_re; 00946 A2_im += gT21_re * a1_im; 00947 A2_im += gT21_im * a1_re; 00948 A2_im += gT22_re * a2_im; 00949 A2_im += gT22_im * a2_re; 00950 spinorFloat B2_re = 0; 00951 B2_re += gT20_re * b0_re; 00952 B2_re -= gT20_im * b0_im; 00953 B2_re += gT21_re * b1_re; 00954 B2_re -= gT21_im * b1_im; 00955 B2_re += gT22_re * b2_re; 00956 B2_re -= gT22_im * b2_im; 00957 spinorFloat B2_im = 0; 00958 B2_im += gT20_re * b0_im; 00959 B2_im += gT20_im * b0_re; 00960 B2_im += gT21_re * b1_im; 00961 B2_im += gT21_im * b1_re; 00962 B2_im += gT22_re * b2_im; 00963 B2_im += gT22_im * b2_re; 00964 00965 o00_re += A0_re; 00966 o00_im += A0_im; 00967 o10_re += B0_re; 00968 o10_im += B0_im; 00969 o20_re += B0_re; 00970 o20_im += B0_im; 00971 o30_re -= A0_re; 00972 o30_im -= A0_im; 00973 00974 o01_re += A1_re; 00975 o01_im += A1_im; 00976 o11_re += B1_re; 00977 o11_im += B1_im; 00978 o21_re += B1_re; 00979 o21_im += B1_im; 00980 o31_re -= A1_re; 00981 o31_im -= A1_im; 00982 00983 o02_re += A2_re; 00984 o02_im += A2_im; 00985 o12_re += B2_re; 00986 o12_im += B2_im; 00987 o22_re += B2_re; 00988 o22_im += B2_im; 00989 o32_re -= A2_re; 00990 o32_im -= A2_im; 00991 00992 } 00993 00994 { 00995 // Projector P2+ 00996 // 1 0 i 0 00997 // 0 1 0 -i 00998 // -i 0 1 0 00999 // 0 i 0 1 01000 01001 int sp_idx = ((x3==X3m1) ? X-X3X2X1mX2X1 : X+X2X1) >> 1; 01002 int ga_idx = sid; 01003 01004 // read gauge matrix from device memory 01005 READ_GAUGE_MATRIX(GAUGE0TEX, 4); 01006 01007 // read spinor from device memory 01008 READ_SPINOR(SPINORTEX); 01009 01010 // reconstruct gauge matrix 01011 RECONSTRUCT_GAUGE_MATRIX(4); 01012 01013 // project spinor into half spinors 01014 spinorFloat a0_re = +i00_re-i20_im; 01015 spinorFloat a0_im = +i00_im+i20_re; 01016 spinorFloat a1_re = +i01_re-i21_im; 01017 spinorFloat a1_im = +i01_im+i21_re; 01018 spinorFloat a2_re = +i02_re-i22_im; 01019 spinorFloat a2_im = +i02_im+i22_re; 01020 01021 spinorFloat b0_re = +i10_re+i30_im; 01022 spinorFloat b0_im = +i10_im-i30_re; 01023 spinorFloat b1_re = +i11_re+i31_im; 01024 spinorFloat b1_im = +i11_im-i31_re; 01025 spinorFloat b2_re = +i12_re+i32_im; 01026 spinorFloat b2_im = +i12_im-i32_re; 01027 01028 // multiply row 0 01029 spinorFloat A0_re = 0; 01030 A0_re += g00_re * a0_re; 01031 A0_re -= g00_im * a0_im; 01032 A0_re += g01_re * a1_re; 01033 A0_re -= g01_im * a1_im; 01034 A0_re += g02_re * a2_re; 01035 A0_re -= g02_im * a2_im; 01036 spinorFloat A0_im = 0; 01037 A0_im += g00_re * a0_im; 01038 A0_im += g00_im * a0_re; 01039 A0_im += g01_re * a1_im; 01040 A0_im += g01_im * a1_re; 01041 A0_im += g02_re * a2_im; 01042 A0_im += g02_im * a2_re; 01043 spinorFloat B0_re = 0; 01044 B0_re += g00_re * b0_re; 01045 B0_re -= g00_im * b0_im; 01046 B0_re += g01_re * b1_re; 01047 B0_re -= g01_im * b1_im; 01048 B0_re += g02_re * b2_re; 01049 B0_re -= g02_im * b2_im; 01050 spinorFloat B0_im = 0; 01051 B0_im += g00_re * b0_im; 01052 B0_im += g00_im * b0_re; 01053 B0_im += g01_re * b1_im; 01054 B0_im += g01_im * b1_re; 01055 B0_im += g02_re * b2_im; 01056 B0_im += g02_im * b2_re; 01057 01058 // multiply row 1 01059 spinorFloat A1_re = 0; 01060 A1_re += g10_re * a0_re; 01061 A1_re -= g10_im * a0_im; 01062 A1_re += g11_re * a1_re; 01063 A1_re -= g11_im * a1_im; 01064 A1_re += g12_re * a2_re; 01065 A1_re -= g12_im * a2_im; 01066 spinorFloat A1_im = 0; 01067 A1_im += g10_re * a0_im; 01068 A1_im += g10_im * a0_re; 01069 A1_im += g11_re * a1_im; 01070 A1_im += g11_im * a1_re; 01071 A1_im += g12_re * a2_im; 01072 A1_im += g12_im * a2_re; 01073 spinorFloat B1_re = 0; 01074 B1_re += g10_re * b0_re; 01075 B1_re -= g10_im * b0_im; 01076 B1_re += g11_re * b1_re; 01077 B1_re -= g11_im * b1_im; 01078 B1_re += g12_re * b2_re; 01079 B1_re -= g12_im * b2_im; 01080 spinorFloat B1_im = 0; 01081 B1_im += g10_re * b0_im; 01082 B1_im += g10_im * b0_re; 01083 B1_im += g11_re * b1_im; 01084 B1_im += g11_im * b1_re; 01085 B1_im += g12_re * b2_im; 01086 B1_im += g12_im * b2_re; 01087 01088 // multiply row 2 01089 spinorFloat A2_re = 0; 01090 A2_re += g20_re * a0_re; 01091 A2_re -= g20_im * a0_im; 01092 A2_re += g21_re * a1_re; 01093 A2_re -= g21_im * a1_im; 01094 A2_re += g22_re * a2_re; 01095 A2_re -= g22_im * a2_im; 01096 spinorFloat A2_im = 0; 01097 A2_im += g20_re * a0_im; 01098 A2_im += g20_im * a0_re; 01099 A2_im += g21_re * a1_im; 01100 A2_im += g21_im * a1_re; 01101 A2_im += g22_re * a2_im; 01102 A2_im += g22_im * a2_re; 01103 spinorFloat B2_re = 0; 01104 B2_re += g20_re * b0_re; 01105 B2_re -= g20_im * b0_im; 01106 B2_re += g21_re * b1_re; 01107 B2_re -= g21_im * b1_im; 01108 B2_re += g22_re * b2_re; 01109 B2_re -= g22_im * b2_im; 01110 spinorFloat B2_im = 0; 01111 B2_im += g20_re * b0_im; 01112 B2_im += g20_im * b0_re; 01113 B2_im += g21_re * b1_im; 01114 B2_im += g21_im * b1_re; 01115 B2_im += g22_re * b2_im; 01116 B2_im += g22_im * b2_re; 01117 01118 o00_re += A0_re; 01119 o00_im += A0_im; 01120 o10_re += B0_re; 01121 o10_im += B0_im; 01122 o20_re += A0_im; 01123 o20_im -= A0_re; 01124 o30_re -= B0_im; 01125 o30_im += B0_re; 01126 01127 o01_re += A1_re; 01128 o01_im += A1_im; 01129 o11_re += B1_re; 01130 o11_im += B1_im; 01131 o21_re += A1_im; 01132 o21_im -= A1_re; 01133 o31_re -= B1_im; 01134 o31_im += B1_re; 01135 01136 o02_re += A2_re; 01137 o02_im += A2_im; 01138 o12_re += B2_re; 01139 o12_im += B2_im; 01140 o22_re += A2_im; 01141 o22_im -= A2_re; 01142 o32_re -= B2_im; 01143 o32_im += B2_re; 01144 01145 } 01146 01147 { 01148 // Projector P2- 01149 // 1 0 -i 0 01150 // 0 1 0 i 01151 // i 0 1 0 01152 // 0 -i 0 1 01153 01154 int sp_idx = ((x3==0) ? X+X3X2X1mX2X1 : X-X2X1) >> 1; 01155 int ga_idx = sp_idx; 01156 01157 // read gauge matrix from device memory 01158 READ_GAUGE_MATRIX(GAUGE1TEX, 5); 01159 01160 // read spinor from device memory 01161 READ_SPINOR(SPINORTEX); 01162 01163 // reconstruct gauge matrix 01164 RECONSTRUCT_GAUGE_MATRIX(5); 01165 01166 // project spinor into half spinors 01167 spinorFloat a0_re = +i00_re+i20_im; 01168 spinorFloat a0_im = +i00_im-i20_re; 01169 spinorFloat a1_re = +i01_re+i21_im; 01170 spinorFloat a1_im = +i01_im-i21_re; 01171 spinorFloat a2_re = +i02_re+i22_im; 01172 spinorFloat a2_im = +i02_im-i22_re; 01173 01174 spinorFloat b0_re = +i10_re-i30_im; 01175 spinorFloat b0_im = +i10_im+i30_re; 01176 spinorFloat b1_re = +i11_re-i31_im; 01177 spinorFloat b1_im = +i11_im+i31_re; 01178 spinorFloat b2_re = +i12_re-i32_im; 01179 spinorFloat b2_im = +i12_im+i32_re; 01180 01181 // multiply row 0 01182 spinorFloat A0_re = 0; 01183 A0_re += gT00_re * a0_re; 01184 A0_re -= gT00_im * a0_im; 01185 A0_re += gT01_re * a1_re; 01186 A0_re -= gT01_im * a1_im; 01187 A0_re += gT02_re * a2_re; 01188 A0_re -= gT02_im * a2_im; 01189 spinorFloat A0_im = 0; 01190 A0_im += gT00_re * a0_im; 01191 A0_im += gT00_im * a0_re; 01192 A0_im += gT01_re * a1_im; 01193 A0_im += gT01_im * a1_re; 01194 A0_im += gT02_re * a2_im; 01195 A0_im += gT02_im * a2_re; 01196 spinorFloat B0_re = 0; 01197 B0_re += gT00_re * b0_re; 01198 B0_re -= gT00_im * b0_im; 01199 B0_re += gT01_re * b1_re; 01200 B0_re -= gT01_im * b1_im; 01201 B0_re += gT02_re * b2_re; 01202 B0_re -= gT02_im * b2_im; 01203 spinorFloat B0_im = 0; 01204 B0_im += gT00_re * b0_im; 01205 B0_im += gT00_im * b0_re; 01206 B0_im += gT01_re * b1_im; 01207 B0_im += gT01_im * b1_re; 01208 B0_im += gT02_re * b2_im; 01209 B0_im += gT02_im * b2_re; 01210 01211 // multiply row 1 01212 spinorFloat A1_re = 0; 01213 A1_re += gT10_re * a0_re; 01214 A1_re -= gT10_im * a0_im; 01215 A1_re += gT11_re * a1_re; 01216 A1_re -= gT11_im * a1_im; 01217 A1_re += gT12_re * a2_re; 01218 A1_re -= gT12_im * a2_im; 01219 spinorFloat A1_im = 0; 01220 A1_im += gT10_re * a0_im; 01221 A1_im += gT10_im * a0_re; 01222 A1_im += gT11_re * a1_im; 01223 A1_im += gT11_im * a1_re; 01224 A1_im += gT12_re * a2_im; 01225 A1_im += gT12_im * a2_re; 01226 spinorFloat B1_re = 0; 01227 B1_re += gT10_re * b0_re; 01228 B1_re -= gT10_im * b0_im; 01229 B1_re += gT11_re * b1_re; 01230 B1_re -= gT11_im * b1_im; 01231 B1_re += gT12_re * b2_re; 01232 B1_re -= gT12_im * b2_im; 01233 spinorFloat B1_im = 0; 01234 B1_im += gT10_re * b0_im; 01235 B1_im += gT10_im * b0_re; 01236 B1_im += gT11_re * b1_im; 01237 B1_im += gT11_im * b1_re; 01238 B1_im += gT12_re * b2_im; 01239 B1_im += gT12_im * b2_re; 01240 01241 // multiply row 2 01242 spinorFloat A2_re = 0; 01243 A2_re += gT20_re * a0_re; 01244 A2_re -= gT20_im * a0_im; 01245 A2_re += gT21_re * a1_re; 01246 A2_re -= gT21_im * a1_im; 01247 A2_re += gT22_re * a2_re; 01248 A2_re -= gT22_im * a2_im; 01249 spinorFloat A2_im = 0; 01250 A2_im += gT20_re * a0_im; 01251 A2_im += gT20_im * a0_re; 01252 A2_im += gT21_re * a1_im; 01253 A2_im += gT21_im * a1_re; 01254 A2_im += gT22_re * a2_im; 01255 A2_im += gT22_im * a2_re; 01256 spinorFloat B2_re = 0; 01257 B2_re += gT20_re * b0_re; 01258 B2_re -= gT20_im * b0_im; 01259 B2_re += gT21_re * b1_re; 01260 B2_re -= gT21_im * b1_im; 01261 B2_re += gT22_re * b2_re; 01262 B2_re -= gT22_im * b2_im; 01263 spinorFloat B2_im = 0; 01264 B2_im += gT20_re * b0_im; 01265 B2_im += gT20_im * b0_re; 01266 B2_im += gT21_re * b1_im; 01267 B2_im += gT21_im * b1_re; 01268 B2_im += gT22_re * b2_im; 01269 B2_im += gT22_im * b2_re; 01270 01271 o00_re += A0_re; 01272 o00_im += A0_im; 01273 o10_re += B0_re; 01274 o10_im += B0_im; 01275 o20_re -= A0_im; 01276 o20_im += A0_re; 01277 o30_re += B0_im; 01278 o30_im -= B0_re; 01279 01280 o01_re += A1_re; 01281 o01_im += A1_im; 01282 o11_re += B1_re; 01283 o11_im += B1_im; 01284 o21_re -= A1_im; 01285 o21_im += A1_re; 01286 o31_re += B1_im; 01287 o31_im -= B1_re; 01288 01289 o02_re += A2_re; 01290 o02_im += A2_im; 01291 o12_re += B2_re; 01292 o12_im += B2_im; 01293 o22_re -= A2_im; 01294 o22_im += A2_re; 01295 o32_re += B2_im; 01296 o32_im -= B2_re; 01297 01298 } 01299 01300 { 01301 // Projector P3+ 01302 // 2 0 0 0 01303 // 0 2 0 0 01304 // 0 0 0 0 01305 // 0 0 0 0 01306 01307 int sp_idx = ((x4==X4m1) ? X-X4X3X2X1mX3X2X1 : X+X3X2X1) >> 1; 01308 int ga_idx = sid; 01309 01310 if (gauge_fixed && ga_idx < X4X3X2X1hmX3X2X1h) { 01311 // read spinor from device memory 01312 READ_SPINOR_UP(SPINORTEX); 01313 01314 // project spinor into half spinors 01315 spinorFloat a0_re = +2*i00_re; 01316 spinorFloat a0_im = +2*i00_im; 01317 spinorFloat a1_re = +2*i01_re; 01318 spinorFloat a1_im = +2*i01_im; 01319 spinorFloat a2_re = +2*i02_re; 01320 spinorFloat a2_im = +2*i02_im; 01321 01322 spinorFloat b0_re = +2*i10_re; 01323 spinorFloat b0_im = +2*i10_im; 01324 spinorFloat b1_re = +2*i11_re; 01325 spinorFloat b1_im = +2*i11_im; 01326 spinorFloat b2_re = +2*i12_re; 01327 spinorFloat b2_im = +2*i12_im; 01328 01329 // identity gauge matrix 01330 spinorFloat A0_re = a0_re; spinorFloat A0_im = a0_im; 01331 spinorFloat B0_re = b0_re; spinorFloat B0_im = b0_im; 01332 spinorFloat A1_re = a1_re; spinorFloat A1_im = a1_im; 01333 spinorFloat B1_re = b1_re; spinorFloat B1_im = b1_im; 01334 spinorFloat A2_re = a2_re; spinorFloat A2_im = a2_im; 01335 spinorFloat B2_re = b2_re; spinorFloat B2_im = b2_im; 01336 01337 o00_re += A0_re; 01338 o00_im += A0_im; 01339 o10_re += B0_re; 01340 o10_im += B0_im; 01341 01342 o01_re += A1_re; 01343 o01_im += A1_im; 01344 o11_re += B1_re; 01345 o11_im += B1_im; 01346 01347 o02_re += A2_re; 01348 o02_im += A2_im; 01349 o12_re += B2_re; 01350 o12_im += B2_im; 01351 01352 } 01353 else { 01354 // read gauge matrix from device memory 01355 READ_GAUGE_MATRIX(GAUGE0TEX, 6); 01356 01357 // read spinor from device memory 01358 READ_SPINOR_UP(SPINORTEX); 01359 01360 // reconstruct gauge matrix 01361 RECONSTRUCT_GAUGE_MATRIX(6); 01362 01363 // project spinor into half spinors 01364 spinorFloat a0_re = +2*i00_re; 01365 spinorFloat a0_im = +2*i00_im; 01366 spinorFloat a1_re = +2*i01_re; 01367 spinorFloat a1_im = +2*i01_im; 01368 spinorFloat a2_re = +2*i02_re; 01369 spinorFloat a2_im = +2*i02_im; 01370 01371 spinorFloat b0_re = +2*i10_re; 01372 spinorFloat b0_im = +2*i10_im; 01373 spinorFloat b1_re = +2*i11_re; 01374 spinorFloat b1_im = +2*i11_im; 01375 spinorFloat b2_re = +2*i12_re; 01376 spinorFloat b2_im = +2*i12_im; 01377 01378 // multiply row 0 01379 spinorFloat A0_re = 0; 01380 A0_re += g00_re * a0_re; 01381 A0_re -= g00_im * a0_im; 01382 A0_re += g01_re * a1_re; 01383 A0_re -= g01_im * a1_im; 01384 A0_re += g02_re * a2_re; 01385 A0_re -= g02_im * a2_im; 01386 spinorFloat A0_im = 0; 01387 A0_im += g00_re * a0_im; 01388 A0_im += g00_im * a0_re; 01389 A0_im += g01_re * a1_im; 01390 A0_im += g01_im * a1_re; 01391 A0_im += g02_re * a2_im; 01392 A0_im += g02_im * a2_re; 01393 spinorFloat B0_re = 0; 01394 B0_re += g00_re * b0_re; 01395 B0_re -= g00_im * b0_im; 01396 B0_re += g01_re * b1_re; 01397 B0_re -= g01_im * b1_im; 01398 B0_re += g02_re * b2_re; 01399 B0_re -= g02_im * b2_im; 01400 spinorFloat B0_im = 0; 01401 B0_im += g00_re * b0_im; 01402 B0_im += g00_im * b0_re; 01403 B0_im += g01_re * b1_im; 01404 B0_im += g01_im * b1_re; 01405 B0_im += g02_re * b2_im; 01406 B0_im += g02_im * b2_re; 01407 01408 // multiply row 1 01409 spinorFloat A1_re = 0; 01410 A1_re += g10_re * a0_re; 01411 A1_re -= g10_im * a0_im; 01412 A1_re += g11_re * a1_re; 01413 A1_re -= g11_im * a1_im; 01414 A1_re += g12_re * a2_re; 01415 A1_re -= g12_im * a2_im; 01416 spinorFloat A1_im = 0; 01417 A1_im += g10_re * a0_im; 01418 A1_im += g10_im * a0_re; 01419 A1_im += g11_re * a1_im; 01420 A1_im += g11_im * a1_re; 01421 A1_im += g12_re * a2_im; 01422 A1_im += g12_im * a2_re; 01423 spinorFloat B1_re = 0; 01424 B1_re += g10_re * b0_re; 01425 B1_re -= g10_im * b0_im; 01426 B1_re += g11_re * b1_re; 01427 B1_re -= g11_im * b1_im; 01428 B1_re += g12_re * b2_re; 01429 B1_re -= g12_im * b2_im; 01430 spinorFloat B1_im = 0; 01431 B1_im += g10_re * b0_im; 01432 B1_im += g10_im * b0_re; 01433 B1_im += g11_re * b1_im; 01434 B1_im += g11_im * b1_re; 01435 B1_im += g12_re * b2_im; 01436 B1_im += g12_im * b2_re; 01437 01438 // multiply row 2 01439 spinorFloat A2_re = 0; 01440 A2_re += g20_re * a0_re; 01441 A2_re -= g20_im * a0_im; 01442 A2_re += g21_re * a1_re; 01443 A2_re -= g21_im * a1_im; 01444 A2_re += g22_re * a2_re; 01445 A2_re -= g22_im * a2_im; 01446 spinorFloat A2_im = 0; 01447 A2_im += g20_re * a0_im; 01448 A2_im += g20_im * a0_re; 01449 A2_im += g21_re * a1_im; 01450 A2_im += g21_im * a1_re; 01451 A2_im += g22_re * a2_im; 01452 A2_im += g22_im * a2_re; 01453 spinorFloat B2_re = 0; 01454 B2_re += g20_re * b0_re; 01455 B2_re -= g20_im * b0_im; 01456 B2_re += g21_re * b1_re; 01457 B2_re -= g21_im * b1_im; 01458 B2_re += g22_re * b2_re; 01459 B2_re -= g22_im * b2_im; 01460 spinorFloat B2_im = 0; 01461 B2_im += g20_re * b0_im; 01462 B2_im += g20_im * b0_re; 01463 B2_im += g21_re * b1_im; 01464 B2_im += g21_im * b1_re; 01465 B2_im += g22_re * b2_im; 01466 B2_im += g22_im * b2_re; 01467 01468 o00_re += A0_re; 01469 o00_im += A0_im; 01470 o10_re += B0_re; 01471 o10_im += B0_im; 01472 01473 o01_re += A1_re; 01474 o01_im += A1_im; 01475 o11_re += B1_re; 01476 o11_im += B1_im; 01477 01478 o02_re += A2_re; 01479 o02_im += A2_im; 01480 o12_re += B2_re; 01481 o12_im += B2_im; 01482 01483 } 01484 } 01485 01486 { 01487 // Projector P3- 01488 // 0 0 0 0 01489 // 0 0 0 0 01490 // 0 0 2 0 01491 // 0 0 0 2 01492 01493 int sp_idx = ((x4==0) ? X+X4X3X2X1mX3X2X1 : X-X3X2X1) >> 1; 01494 int ga_idx = sp_idx; 01495 01496 if (gauge_fixed && ga_idx < X4X3X2X1hmX3X2X1h) { 01497 // read spinor from device memory 01498 READ_SPINOR_DOWN(SPINORTEX); 01499 01500 // project spinor into half spinors 01501 spinorFloat a0_re = +2*i20_re; 01502 spinorFloat a0_im = +2*i20_im; 01503 spinorFloat a1_re = +2*i21_re; 01504 spinorFloat a1_im = +2*i21_im; 01505 spinorFloat a2_re = +2*i22_re; 01506 spinorFloat a2_im = +2*i22_im; 01507 01508 spinorFloat b0_re = +2*i30_re; 01509 spinorFloat b0_im = +2*i30_im; 01510 spinorFloat b1_re = +2*i31_re; 01511 spinorFloat b1_im = +2*i31_im; 01512 spinorFloat b2_re = +2*i32_re; 01513 spinorFloat b2_im = +2*i32_im; 01514 01515 // identity gauge matrix 01516 spinorFloat A0_re = a0_re; spinorFloat A0_im = a0_im; 01517 spinorFloat B0_re = b0_re; spinorFloat B0_im = b0_im; 01518 spinorFloat A1_re = a1_re; spinorFloat A1_im = a1_im; 01519 spinorFloat B1_re = b1_re; spinorFloat B1_im = b1_im; 01520 spinorFloat A2_re = a2_re; spinorFloat A2_im = a2_im; 01521 spinorFloat B2_re = b2_re; spinorFloat B2_im = b2_im; 01522 01523 o20_re += A0_re; 01524 o20_im += A0_im; 01525 o30_re += B0_re; 01526 o30_im += B0_im; 01527 01528 o21_re += A1_re; 01529 o21_im += A1_im; 01530 o31_re += B1_re; 01531 o31_im += B1_im; 01532 01533 o22_re += A2_re; 01534 o22_im += A2_im; 01535 o32_re += B2_re; 01536 o32_im += B2_im; 01537 01538 } 01539 else { 01540 // read gauge matrix from device memory 01541 READ_GAUGE_MATRIX(GAUGE1TEX, 7); 01542 01543 // read spinor from device memory 01544 READ_SPINOR_DOWN(SPINORTEX); 01545 01546 // reconstruct gauge matrix 01547 RECONSTRUCT_GAUGE_MATRIX(7); 01548 01549 // project spinor into half spinors 01550 spinorFloat a0_re = +2*i20_re; 01551 spinorFloat a0_im = +2*i20_im; 01552 spinorFloat a1_re = +2*i21_re; 01553 spinorFloat a1_im = +2*i21_im; 01554 spinorFloat a2_re = +2*i22_re; 01555 spinorFloat a2_im = +2*i22_im; 01556 01557 spinorFloat b0_re = +2*i30_re; 01558 spinorFloat b0_im = +2*i30_im; 01559 spinorFloat b1_re = +2*i31_re; 01560 spinorFloat b1_im = +2*i31_im; 01561 spinorFloat b2_re = +2*i32_re; 01562 spinorFloat b2_im = +2*i32_im; 01563 01564 // multiply row 0 01565 spinorFloat A0_re = 0; 01566 A0_re += gT00_re * a0_re; 01567 A0_re -= gT00_im * a0_im; 01568 A0_re += gT01_re * a1_re; 01569 A0_re -= gT01_im * a1_im; 01570 A0_re += gT02_re * a2_re; 01571 A0_re -= gT02_im * a2_im; 01572 spinorFloat A0_im = 0; 01573 A0_im += gT00_re * a0_im; 01574 A0_im += gT00_im * a0_re; 01575 A0_im += gT01_re * a1_im; 01576 A0_im += gT01_im * a1_re; 01577 A0_im += gT02_re * a2_im; 01578 A0_im += gT02_im * a2_re; 01579 spinorFloat B0_re = 0; 01580 B0_re += gT00_re * b0_re; 01581 B0_re -= gT00_im * b0_im; 01582 B0_re += gT01_re * b1_re; 01583 B0_re -= gT01_im * b1_im; 01584 B0_re += gT02_re * b2_re; 01585 B0_re -= gT02_im * b2_im; 01586 spinorFloat B0_im = 0; 01587 B0_im += gT00_re * b0_im; 01588 B0_im += gT00_im * b0_re; 01589 B0_im += gT01_re * b1_im; 01590 B0_im += gT01_im * b1_re; 01591 B0_im += gT02_re * b2_im; 01592 B0_im += gT02_im * b2_re; 01593 01594 // multiply row 1 01595 spinorFloat A1_re = 0; 01596 A1_re += gT10_re * a0_re; 01597 A1_re -= gT10_im * a0_im; 01598 A1_re += gT11_re * a1_re; 01599 A1_re -= gT11_im * a1_im; 01600 A1_re += gT12_re * a2_re; 01601 A1_re -= gT12_im * a2_im; 01602 spinorFloat A1_im = 0; 01603 A1_im += gT10_re * a0_im; 01604 A1_im += gT10_im * a0_re; 01605 A1_im += gT11_re * a1_im; 01606 A1_im += gT11_im * a1_re; 01607 A1_im += gT12_re * a2_im; 01608 A1_im += gT12_im * a2_re; 01609 spinorFloat B1_re = 0; 01610 B1_re += gT10_re * b0_re; 01611 B1_re -= gT10_im * b0_im; 01612 B1_re += gT11_re * b1_re; 01613 B1_re -= gT11_im * b1_im; 01614 B1_re += gT12_re * b2_re; 01615 B1_re -= gT12_im * b2_im; 01616 spinorFloat B1_im = 0; 01617 B1_im += gT10_re * b0_im; 01618 B1_im += gT10_im * b0_re; 01619 B1_im += gT11_re * b1_im; 01620 B1_im += gT11_im * b1_re; 01621 B1_im += gT12_re * b2_im; 01622 B1_im += gT12_im * b2_re; 01623 01624 // multiply row 2 01625 spinorFloat A2_re = 0; 01626 A2_re += gT20_re * a0_re; 01627 A2_re -= gT20_im * a0_im; 01628 A2_re += gT21_re * a1_re; 01629 A2_re -= gT21_im * a1_im; 01630 A2_re += gT22_re * a2_re; 01631 A2_re -= gT22_im * a2_im; 01632 spinorFloat A2_im = 0; 01633 A2_im += gT20_re * a0_im; 01634 A2_im += gT20_im * a0_re; 01635 A2_im += gT21_re * a1_im; 01636 A2_im += gT21_im * a1_re; 01637 A2_im += gT22_re * a2_im; 01638 A2_im += gT22_im * a2_re; 01639 spinorFloat B2_re = 0; 01640 B2_re += gT20_re * b0_re; 01641 B2_re -= gT20_im * b0_im; 01642 B2_re += gT21_re * b1_re; 01643 B2_re -= gT21_im * b1_im; 01644 B2_re += gT22_re * b2_re; 01645 B2_re -= gT22_im * b2_im; 01646 spinorFloat B2_im = 0; 01647 B2_im += gT20_re * b0_im; 01648 B2_im += gT20_im * b0_re; 01649 B2_im += gT21_re * b1_im; 01650 B2_im += gT21_im * b1_re; 01651 B2_im += gT22_re * b2_im; 01652 B2_im += gT22_im * b2_re; 01653 01654 o20_re += A0_re; 01655 o20_im += A0_im; 01656 o30_re += B0_re; 01657 o30_im += B0_im; 01658 01659 o21_re += A1_re; 01660 o21_im += A1_im; 01661 o31_re += B1_re; 01662 o31_im += B1_im; 01663 01664 o22_re += A2_re; 01665 o22_im += A2_im; 01666 o32_re += B2_re; 01667 o32_im += B2_im; 01668 01669 } 01670 } 01671 01672 #ifdef DSLASH_CLOVER 01673 01674 // change to chiral basis 01675 { 01676 spinorFloat a00_re = -o10_re - o30_re; 01677 spinorFloat a00_im = -o10_im - o30_im; 01678 spinorFloat a10_re = o00_re + o20_re; 01679 spinorFloat a10_im = o00_im + o20_im; 01680 spinorFloat a20_re = -o10_re + o30_re; 01681 spinorFloat a20_im = -o10_im + o30_im; 01682 spinorFloat a30_re = o00_re - o20_re; 01683 spinorFloat a30_im = o00_im - o20_im; 01684 01685 o00_re = a00_re; o00_im = a00_im; 01686 o10_re = a10_re; o10_im = a10_im; 01687 o20_re = a20_re; o20_im = a20_im; 01688 o30_re = a30_re; o30_im = a30_im; 01689 } 01690 { 01691 spinorFloat a01_re = -o11_re - o31_re; 01692 spinorFloat a01_im = -o11_im - o31_im; 01693 spinorFloat a11_re = o01_re + o21_re; 01694 spinorFloat a11_im = o01_im + o21_im; 01695 spinorFloat a21_re = -o11_re + o31_re; 01696 spinorFloat a21_im = -o11_im + o31_im; 01697 spinorFloat a31_re = o01_re - o21_re; 01698 spinorFloat a31_im = o01_im - o21_im; 01699 01700 o01_re = a01_re; o01_im = a01_im; 01701 o11_re = a11_re; o11_im = a11_im; 01702 o21_re = a21_re; o21_im = a21_im; 01703 o31_re = a31_re; o31_im = a31_im; 01704 } 01705 { 01706 spinorFloat a02_re = -o12_re - o32_re; 01707 spinorFloat a02_im = -o12_im - o32_im; 01708 spinorFloat a12_re = o02_re + o22_re; 01709 spinorFloat a12_im = o02_im + o22_im; 01710 spinorFloat a22_re = -o12_re + o32_re; 01711 spinorFloat a22_im = -o12_im + o32_im; 01712 spinorFloat a32_re = o02_re - o22_re; 01713 spinorFloat a32_im = o02_im - o22_im; 01714 01715 o02_re = a02_re; o02_im = a02_im; 01716 o12_re = a12_re; o12_im = a12_im; 01717 o22_re = a22_re; o22_im = a22_im; 01718 o32_re = a32_re; o32_im = a32_im; 01719 } 01720 01721 // apply first chiral block 01722 { 01723 READ_CLOVER(CLOVERTEX, 0) 01724 01725 spinorFloat a00_re = 0; spinorFloat a00_im = 0; 01726 spinorFloat a01_re = 0; spinorFloat a01_im = 0; 01727 spinorFloat a02_re = 0; spinorFloat a02_im = 0; 01728 spinorFloat a10_re = 0; spinorFloat a10_im = 0; 01729 spinorFloat a11_re = 0; spinorFloat a11_im = 0; 01730 spinorFloat a12_re = 0; spinorFloat a12_im = 0; 01731 01732 a00_re += c00_00_re * o00_re; 01733 a00_im += c00_00_re * o00_im; 01734 a00_re += c00_01_re * o01_re; 01735 a00_re -= c00_01_im * o01_im; 01736 a00_im += c00_01_re * o01_im; 01737 a00_im += c00_01_im * o01_re; 01738 a00_re += c00_02_re * o02_re; 01739 a00_re -= c00_02_im * o02_im; 01740 a00_im += c00_02_re * o02_im; 01741 a00_im += c00_02_im * o02_re; 01742 a00_re += c00_10_re * o10_re; 01743 a00_re -= c00_10_im * o10_im; 01744 a00_im += c00_10_re * o10_im; 01745 a00_im += c00_10_im * o10_re; 01746 a00_re += c00_11_re * o11_re; 01747 a00_re -= c00_11_im * o11_im; 01748 a00_im += c00_11_re * o11_im; 01749 a00_im += c00_11_im * o11_re; 01750 a00_re += c00_12_re * o12_re; 01751 a00_re -= c00_12_im * o12_im; 01752 a00_im += c00_12_re * o12_im; 01753 a00_im += c00_12_im * o12_re; 01754 01755 a01_re += c01_00_re * o00_re; 01756 a01_re -= c01_00_im * o00_im; 01757 a01_im += c01_00_re * o00_im; 01758 a01_im += c01_00_im * o00_re; 01759 a01_re += c01_01_re * o01_re; 01760 a01_im += c01_01_re * o01_im; 01761 a01_re += c01_02_re * o02_re; 01762 a01_re -= c01_02_im * o02_im; 01763 a01_im += c01_02_re * o02_im; 01764 a01_im += c01_02_im * o02_re; 01765 a01_re += c01_10_re * o10_re; 01766 a01_re -= c01_10_im * o10_im; 01767 a01_im += c01_10_re * o10_im; 01768 a01_im += c01_10_im * o10_re; 01769 a01_re += c01_11_re * o11_re; 01770 a01_re -= c01_11_im * o11_im; 01771 a01_im += c01_11_re * o11_im; 01772 a01_im += c01_11_im * o11_re; 01773 a01_re += c01_12_re * o12_re; 01774 a01_re -= c01_12_im * o12_im; 01775 a01_im += c01_12_re * o12_im; 01776 a01_im += c01_12_im * o12_re; 01777 01778 a02_re += c02_00_re * o00_re; 01779 a02_re -= c02_00_im * o00_im; 01780 a02_im += c02_00_re * o00_im; 01781 a02_im += c02_00_im * o00_re; 01782 a02_re += c02_01_re * o01_re; 01783 a02_re -= c02_01_im * o01_im; 01784 a02_im += c02_01_re * o01_im; 01785 a02_im += c02_01_im * o01_re; 01786 a02_re += c02_02_re * o02_re; 01787 a02_im += c02_02_re * o02_im; 01788 a02_re += c02_10_re * o10_re; 01789 a02_re -= c02_10_im * o10_im; 01790 a02_im += c02_10_re * o10_im; 01791 a02_im += c02_10_im * o10_re; 01792 a02_re += c02_11_re * o11_re; 01793 a02_re -= c02_11_im * o11_im; 01794 a02_im += c02_11_re * o11_im; 01795 a02_im += c02_11_im * o11_re; 01796 a02_re += c02_12_re * o12_re; 01797 a02_re -= c02_12_im * o12_im; 01798 a02_im += c02_12_re * o12_im; 01799 a02_im += c02_12_im * o12_re; 01800 01801 a10_re += c10_00_re * o00_re; 01802 a10_re -= c10_00_im * o00_im; 01803 a10_im += c10_00_re * o00_im; 01804 a10_im += c10_00_im * o00_re; 01805 a10_re += c10_01_re * o01_re; 01806 a10_re -= c10_01_im * o01_im; 01807 a10_im += c10_01_re * o01_im; 01808 a10_im += c10_01_im * o01_re; 01809 a10_re += c10_02_re * o02_re; 01810 a10_re -= c10_02_im * o02_im; 01811 a10_im += c10_02_re * o02_im; 01812 a10_im += c10_02_im * o02_re; 01813 a10_re += c10_10_re * o10_re; 01814 a10_im += c10_10_re * o10_im; 01815 a10_re += c10_11_re * o11_re; 01816 a10_re -= c10_11_im * o11_im; 01817 a10_im += c10_11_re * o11_im; 01818 a10_im += c10_11_im * o11_re; 01819 a10_re += c10_12_re * o12_re; 01820 a10_re -= c10_12_im * o12_im; 01821 a10_im += c10_12_re * o12_im; 01822 a10_im += c10_12_im * o12_re; 01823 01824 a11_re += c11_00_re * o00_re; 01825 a11_re -= c11_00_im * o00_im; 01826 a11_im += c11_00_re * o00_im; 01827 a11_im += c11_00_im * o00_re; 01828 a11_re += c11_01_re * o01_re; 01829 a11_re -= c11_01_im * o01_im; 01830 a11_im += c11_01_re * o01_im; 01831 a11_im += c11_01_im * o01_re; 01832 a11_re += c11_02_re * o02_re; 01833 a11_re -= c11_02_im * o02_im; 01834 a11_im += c11_02_re * o02_im; 01835 a11_im += c11_02_im * o02_re; 01836 a11_re += c11_10_re * o10_re; 01837 a11_re -= c11_10_im * o10_im; 01838 a11_im += c11_10_re * o10_im; 01839 a11_im += c11_10_im * o10_re; 01840 a11_re += c11_11_re * o11_re; 01841 a11_im += c11_11_re * o11_im; 01842 a11_re += c11_12_re * o12_re; 01843 a11_re -= c11_12_im * o12_im; 01844 a11_im += c11_12_re * o12_im; 01845 a11_im += c11_12_im * o12_re; 01846 01847 a12_re += c12_00_re * o00_re; 01848 a12_re -= c12_00_im * o00_im; 01849 a12_im += c12_00_re * o00_im; 01850 a12_im += c12_00_im * o00_re; 01851 a12_re += c12_01_re * o01_re; 01852 a12_re -= c12_01_im * o01_im; 01853 a12_im += c12_01_re * o01_im; 01854 a12_im += c12_01_im * o01_re; 01855 a12_re += c12_02_re * o02_re; 01856 a12_re -= c12_02_im * o02_im; 01857 a12_im += c12_02_re * o02_im; 01858 a12_im += c12_02_im * o02_re; 01859 a12_re += c12_10_re * o10_re; 01860 a12_re -= c12_10_im * o10_im; 01861 a12_im += c12_10_re * o10_im; 01862 a12_im += c12_10_im * o10_re; 01863 a12_re += c12_11_re * o11_re; 01864 a12_re -= c12_11_im * o11_im; 01865 a12_im += c12_11_re * o11_im; 01866 a12_im += c12_11_im * o11_re; 01867 a12_re += c12_12_re * o12_re; 01868 a12_im += c12_12_re * o12_im; 01869 01870 o00_re = a00_re; o00_im = a00_im; 01871 o01_re = a01_re; o01_im = a01_im; 01872 o02_re = a02_re; o02_im = a02_im; 01873 o10_re = a10_re; o10_im = a10_im; 01874 o11_re = a11_re; o11_im = a11_im; 01875 o12_re = a12_re; o12_im = a12_im; 01876 01877 } 01878 01879 // apply second chiral block 01880 { 01881 READ_CLOVER(CLOVERTEX, 1) 01882 01883 spinorFloat a20_re = 0; spinorFloat a20_im = 0; 01884 spinorFloat a21_re = 0; spinorFloat a21_im = 0; 01885 spinorFloat a22_re = 0; spinorFloat a22_im = 0; 01886 spinorFloat a30_re = 0; spinorFloat a30_im = 0; 01887 spinorFloat a31_re = 0; spinorFloat a31_im = 0; 01888 spinorFloat a32_re = 0; spinorFloat a32_im = 0; 01889 01890 a20_re += c20_20_re * o20_re; 01891 a20_im += c20_20_re * o20_im; 01892 a20_re += c20_21_re * o21_re; 01893 a20_re -= c20_21_im * o21_im; 01894 a20_im += c20_21_re * o21_im; 01895 a20_im += c20_21_im * o21_re; 01896 a20_re += c20_22_re * o22_re; 01897 a20_re -= c20_22_im * o22_im; 01898 a20_im += c20_22_re * o22_im; 01899 a20_im += c20_22_im * o22_re; 01900 a20_re += c20_30_re * o30_re; 01901 a20_re -= c20_30_im * o30_im; 01902 a20_im += c20_30_re * o30_im; 01903 a20_im += c20_30_im * o30_re; 01904 a20_re += c20_31_re * o31_re; 01905 a20_re -= c20_31_im * o31_im; 01906 a20_im += c20_31_re * o31_im; 01907 a20_im += c20_31_im * o31_re; 01908 a20_re += c20_32_re * o32_re; 01909 a20_re -= c20_32_im * o32_im; 01910 a20_im += c20_32_re * o32_im; 01911 a20_im += c20_32_im * o32_re; 01912 01913 a21_re += c21_20_re * o20_re; 01914 a21_re -= c21_20_im * o20_im; 01915 a21_im += c21_20_re * o20_im; 01916 a21_im += c21_20_im * o20_re; 01917 a21_re += c21_21_re * o21_re; 01918 a21_im += c21_21_re * o21_im; 01919 a21_re += c21_22_re * o22_re; 01920 a21_re -= c21_22_im * o22_im; 01921 a21_im += c21_22_re * o22_im; 01922 a21_im += c21_22_im * o22_re; 01923 a21_re += c21_30_re * o30_re; 01924 a21_re -= c21_30_im * o30_im; 01925 a21_im += c21_30_re * o30_im; 01926 a21_im += c21_30_im * o30_re; 01927 a21_re += c21_31_re * o31_re; 01928 a21_re -= c21_31_im * o31_im; 01929 a21_im += c21_31_re * o31_im; 01930 a21_im += c21_31_im * o31_re; 01931 a21_re += c21_32_re * o32_re; 01932 a21_re -= c21_32_im * o32_im; 01933 a21_im += c21_32_re * o32_im; 01934 a21_im += c21_32_im * o32_re; 01935 01936 a22_re += c22_20_re * o20_re; 01937 a22_re -= c22_20_im * o20_im; 01938 a22_im += c22_20_re * o20_im; 01939 a22_im += c22_20_im * o20_re; 01940 a22_re += c22_21_re * o21_re; 01941 a22_re -= c22_21_im * o21_im; 01942 a22_im += c22_21_re * o21_im; 01943 a22_im += c22_21_im * o21_re; 01944 a22_re += c22_22_re * o22_re; 01945 a22_im += c22_22_re * o22_im; 01946 a22_re += c22_30_re * o30_re; 01947 a22_re -= c22_30_im * o30_im; 01948 a22_im += c22_30_re * o30_im; 01949 a22_im += c22_30_im * o30_re; 01950 a22_re += c22_31_re * o31_re; 01951 a22_re -= c22_31_im * o31_im; 01952 a22_im += c22_31_re * o31_im; 01953 a22_im += c22_31_im * o31_re; 01954 a22_re += c22_32_re * o32_re; 01955 a22_re -= c22_32_im * o32_im; 01956 a22_im += c22_32_re * o32_im; 01957 a22_im += c22_32_im * o32_re; 01958 01959 a30_re += c30_20_re * o20_re; 01960 a30_re -= c30_20_im * o20_im; 01961 a30_im += c30_20_re * o20_im; 01962 a30_im += c30_20_im * o20_re; 01963 a30_re += c30_21_re * o21_re; 01964 a30_re -= c30_21_im * o21_im; 01965 a30_im += c30_21_re * o21_im; 01966 a30_im += c30_21_im * o21_re; 01967 a30_re += c30_22_re * o22_re; 01968 a30_re -= c30_22_im * o22_im; 01969 a30_im += c30_22_re * o22_im; 01970 a30_im += c30_22_im * o22_re; 01971 a30_re += c30_30_re * o30_re; 01972 a30_im += c30_30_re * o30_im; 01973 a30_re += c30_31_re * o31_re; 01974 a30_re -= c30_31_im * o31_im; 01975 a30_im += c30_31_re * o31_im; 01976 a30_im += c30_31_im * o31_re; 01977 a30_re += c30_32_re * o32_re; 01978 a30_re -= c30_32_im * o32_im; 01979 a30_im += c30_32_re * o32_im; 01980 a30_im += c30_32_im * o32_re; 01981 01982 a31_re += c31_20_re * o20_re; 01983 a31_re -= c31_20_im * o20_im; 01984 a31_im += c31_20_re * o20_im; 01985 a31_im += c31_20_im * o20_re; 01986 a31_re += c31_21_re * o21_re; 01987 a31_re -= c31_21_im * o21_im; 01988 a31_im += c31_21_re * o21_im; 01989 a31_im += c31_21_im * o21_re; 01990 a31_re += c31_22_re * o22_re; 01991 a31_re -= c31_22_im * o22_im; 01992 a31_im += c31_22_re * o22_im; 01993 a31_im += c31_22_im * o22_re; 01994 a31_re += c31_30_re * o30_re; 01995 a31_re -= c31_30_im * o30_im; 01996 a31_im += c31_30_re * o30_im; 01997 a31_im += c31_30_im * o30_re; 01998 a31_re += c31_31_re * o31_re; 01999 a31_im += c31_31_re * o31_im; 02000 a31_re += c31_32_re * o32_re; 02001 a31_re -= c31_32_im * o32_im; 02002 a31_im += c31_32_re * o32_im; 02003 a31_im += c31_32_im * o32_re; 02004 02005 a32_re += c32_20_re * o20_re; 02006 a32_re -= c32_20_im * o20_im; 02007 a32_im += c32_20_re * o20_im; 02008 a32_im += c32_20_im * o20_re; 02009 a32_re += c32_21_re * o21_re; 02010 a32_re -= c32_21_im * o21_im; 02011 a32_im += c32_21_re * o21_im; 02012 a32_im += c32_21_im * o21_re; 02013 a32_re += c32_22_re * o22_re; 02014 a32_re -= c32_22_im * o22_im; 02015 a32_im += c32_22_re * o22_im; 02016 a32_im += c32_22_im * o22_re; 02017 a32_re += c32_30_re * o30_re; 02018 a32_re -= c32_30_im * o30_im; 02019 a32_im += c32_30_re * o30_im; 02020 a32_im += c32_30_im * o30_re; 02021 a32_re += c32_31_re * o31_re; 02022 a32_re -= c32_31_im * o31_im; 02023 a32_im += c32_31_re * o31_im; 02024 a32_im += c32_31_im * o31_re; 02025 a32_re += c32_32_re * o32_re; 02026 a32_im += c32_32_re * o32_im; 02027 02028 o20_re = a20_re; o20_im = a20_im; 02029 o21_re = a21_re; o21_im = a21_im; 02030 o22_re = a22_re; o22_im = a22_im; 02031 o30_re = a30_re; o30_im = a30_im; 02032 o31_re = a31_re; o31_im = a31_im; 02033 o32_re = a32_re; o32_im = a32_im; 02034 02035 } 02036 02037 // change back from chiral basis 02038 // (note: required factor of 1/2 is included in clover term normalization) 02039 { 02040 spinorFloat a00_re = o10_re + o30_re; 02041 spinorFloat a00_im = o10_im + o30_im; 02042 spinorFloat a10_re = -o00_re - o20_re; 02043 spinorFloat a10_im = -o00_im - o20_im; 02044 spinorFloat a20_re = o10_re - o30_re; 02045 spinorFloat a20_im = o10_im - o30_im; 02046 spinorFloat a30_re = -o00_re + o20_re; 02047 spinorFloat a30_im = -o00_im + o20_im; 02048 02049 o00_re = a00_re; o00_im = a00_im; 02050 o10_re = a10_re; o10_im = a10_im; 02051 o20_re = a20_re; o20_im = a20_im; 02052 o30_re = a30_re; o30_im = a30_im; 02053 } 02054 { 02055 spinorFloat a01_re = o11_re + o31_re; 02056 spinorFloat a01_im = o11_im + o31_im; 02057 spinorFloat a11_re = -o01_re - o21_re; 02058 spinorFloat a11_im = -o01_im - o21_im; 02059 spinorFloat a21_re = o11_re - o31_re; 02060 spinorFloat a21_im = o11_im - o31_im; 02061 spinorFloat a31_re = -o01_re + o21_re; 02062 spinorFloat a31_im = -o01_im + o21_im; 02063 02064 o01_re = a01_re; o01_im = a01_im; 02065 o11_re = a11_re; o11_im = a11_im; 02066 o21_re = a21_re; o21_im = a21_im; 02067 o31_re = a31_re; o31_im = a31_im; 02068 } 02069 { 02070 spinorFloat a02_re = o12_re + o32_re; 02071 spinorFloat a02_im = o12_im + o32_im; 02072 spinorFloat a12_re = -o02_re - o22_re; 02073 spinorFloat a12_im = -o02_im - o22_im; 02074 spinorFloat a22_re = o12_re - o32_re; 02075 spinorFloat a22_im = o12_im - o32_im; 02076 spinorFloat a32_re = -o02_re + o22_re; 02077 spinorFloat a32_im = -o02_im + o22_im; 02078 02079 o02_re = a02_re; o02_im = a02_im; 02080 o12_re = a12_re; o12_im = a12_im; 02081 o22_re = a22_re; o22_im = a22_im; 02082 o32_re = a32_re; o32_im = a32_im; 02083 } 02084 #endif // DSLASH_CLOVER 02085 02086 02087 #ifdef DSLASH_XPAY 02088 READ_ACCUM(ACCUMTEX) 02089 #ifdef SPINOR_DOUBLE 02090 o00_re = a*o00_re + accum0.x; 02091 o00_im = a*o00_im + accum0.y; 02092 o01_re = a*o01_re + accum1.x; 02093 o01_im = a*o01_im + accum1.y; 02094 o02_re = a*o02_re + accum2.x; 02095 o02_im = a*o02_im + accum2.y; 02096 o10_re = a*o10_re + accum3.x; 02097 o10_im = a*o10_im + accum3.y; 02098 o11_re = a*o11_re + accum4.x; 02099 o11_im = a*o11_im + accum4.y; 02100 o12_re = a*o12_re + accum5.x; 02101 o12_im = a*o12_im + accum5.y; 02102 o20_re = a*o20_re + accum6.x; 02103 o20_im = a*o20_im + accum6.y; 02104 o21_re = a*o21_re + accum7.x; 02105 o21_im = a*o21_im + accum7.y; 02106 o22_re = a*o22_re + accum8.x; 02107 o22_im = a*o22_im + accum8.y; 02108 o30_re = a*o30_re + accum9.x; 02109 o30_im = a*o30_im + accum9.y; 02110 o31_re = a*o31_re + accum10.x; 02111 o31_im = a*o31_im + accum10.y; 02112 o32_re = a*o32_re + accum11.x; 02113 o32_im = a*o32_im + accum11.y; 02114 #else 02115 o00_re = a*o00_re + accum0.x; 02116 o00_im = a*o00_im + accum0.y; 02117 o01_re = a*o01_re + accum0.z; 02118 o01_im = a*o01_im + accum0.w; 02119 o02_re = a*o02_re + accum1.x; 02120 o02_im = a*o02_im + accum1.y; 02121 o10_re = a*o10_re + accum1.z; 02122 o10_im = a*o10_im + accum1.w; 02123 o11_re = a*o11_re + accum2.x; 02124 o11_im = a*o11_im + accum2.y; 02125 o12_re = a*o12_re + accum2.z; 02126 o12_im = a*o12_im + accum2.w; 02127 o20_re = a*o20_re + accum3.x; 02128 o20_im = a*o20_im + accum3.y; 02129 o21_re = a*o21_re + accum3.z; 02130 o21_im = a*o21_im + accum3.w; 02131 o22_re = a*o22_re + accum4.x; 02132 o22_im = a*o22_im + accum4.y; 02133 o30_re = a*o30_re + accum4.z; 02134 o30_im = a*o30_im + accum4.w; 02135 o31_re = a*o31_re + accum5.x; 02136 o31_im = a*o31_im + accum5.y; 02137 o32_re = a*o32_re + accum5.z; 02138 o32_im = a*o32_im + accum5.w; 02139 #endif // SPINOR_DOUBLE 02140 #endif // DSLASH_XPAY 02141 02142 02143 // write spinor field back to device memory 02144 WRITE_SPINOR(); 02145 02146 // undefine to prevent warning when precision is changed 02147 #undef spinorFloat 02148 #undef SHARED_STRIDE 02149 02150 #undef A_re 02151 #undef A_im 02152 02153 #undef g00_re 02154 #undef g00_im 02155 #undef g01_re 02156 #undef g01_im 02157 #undef g02_re 02158 #undef g02_im 02159 #undef g10_re 02160 #undef g10_im 02161 #undef g11_re 02162 #undef g11_im 02163 #undef g12_re 02164 #undef g12_im 02165 #undef g20_re 02166 #undef g20_im 02167 #undef g21_re 02168 #undef g21_im 02169 #undef g22_re 02170 #undef g22_im 02171 02172 #undef i00_re 02173 #undef i00_im 02174 #undef i01_re 02175 #undef i01_im 02176 #undef i02_re 02177 #undef i02_im 02178 #undef i10_re 02179 #undef i10_im 02180 #undef i11_re 02181 #undef i11_im 02182 #undef i12_re 02183 #undef i12_im 02184 #undef i20_re 02185 #undef i20_im 02186 #undef i21_re 02187 #undef i21_im 02188 #undef i22_re 02189 #undef i22_im 02190 #undef i30_re 02191 #undef i30_im 02192 #undef i31_re 02193 #undef i31_im 02194 #undef i32_re 02195 #undef i32_im 02196 02197 #undef c00_00_re 02198 #undef c01_00_re 02199 #undef c01_00_im 02200 #undef c02_00_re 02201 #undef c02_00_im 02202 #undef c10_00_re 02203 #undef c10_00_im 02204 #undef c11_00_re 02205 #undef c11_00_im 02206 #undef c12_00_re 02207 #undef c12_00_im 02208 #undef c02_01_re 02209 #undef c02_01_im 02210 #undef c10_01_re 02211 #undef c10_01_im 02212 #undef c11_01_re 02213 #undef c11_01_im 02214 #undef c12_01_re 02215 #undef c12_01_im 02216 #undef c10_02_re 02217 #undef c10_02_im 02218 #undef c11_02_re 02219 #undef c11_02_im 02220 #undef c12_02_re 02221 #undef c12_02_im 02222 #undef c11_10_re 02223 #undef c11_10_im 02224 #undef c12_10_re 02225 #undef c12_10_im 02226 #undef c12_11_re 02227 #undef c12_11_im 02228 #undef c01_01_re 02229 #undef c01_00_re 02230 #undef c01_00_im 02231 #undef c02_00_re 02232 #undef c02_00_im 02233 #undef c10_00_re 02234 #undef c10_00_im 02235 #undef c11_00_re 02236 #undef c11_00_im 02237 #undef c12_00_re 02238 #undef c12_00_im 02239 #undef c02_01_re 02240 #undef c02_01_im 02241 #undef c10_01_re 02242 #undef c10_01_im 02243 #undef c11_01_re 02244 #undef c11_01_im 02245 #undef c12_01_re 02246 #undef c12_01_im 02247 #undef c10_02_re 02248 #undef c10_02_im 02249 #undef c11_02_re 02250 #undef c11_02_im 02251 #undef c12_02_re 02252 #undef c12_02_im 02253 #undef c11_10_re 02254 #undef c11_10_im 02255 #undef c12_10_re 02256 #undef c12_10_im 02257 #undef c12_11_re 02258 #undef c12_11_im 02259 #undef c02_02_re 02260 #undef c01_00_re 02261 #undef c01_00_im 02262 #undef c02_00_re 02263 #undef c02_00_im 02264 #undef c10_00_re 02265 #undef c10_00_im 02266 #undef c11_00_re 02267 #undef c11_00_im 02268 #undef c12_00_re 02269 #undef c12_00_im 02270 #undef c02_01_re 02271 #undef c02_01_im 02272 #undef c10_01_re 02273 #undef c10_01_im 02274 #undef c11_01_re 02275 #undef c11_01_im 02276 #undef c12_01_re 02277 #undef c12_01_im 02278 #undef c10_02_re 02279 #undef c10_02_im 02280 #undef c11_02_re 02281 #undef c11_02_im 02282 #undef c12_02_re 02283 #undef c12_02_im 02284 #undef c11_10_re 02285 #undef c11_10_im 02286 #undef c12_10_re 02287 #undef c12_10_im 02288 #undef c12_11_re 02289 #undef c12_11_im 02290 #undef c10_10_re 02291 #undef c01_00_re 02292 #undef c01_00_im 02293 #undef c02_00_re 02294 #undef c02_00_im 02295 #undef c10_00_re 02296 #undef c10_00_im 02297 #undef c11_00_re 02298 #undef c11_00_im 02299 #undef c12_00_re 02300 #undef c12_00_im 02301 #undef c02_01_re 02302 #undef c02_01_im 02303 #undef c10_01_re 02304 #undef c10_01_im 02305 #undef c11_01_re 02306 #undef c11_01_im 02307 #undef c12_01_re 02308 #undef c12_01_im 02309 #undef c10_02_re 02310 #undef c10_02_im 02311 #undef c11_02_re 02312 #undef c11_02_im 02313 #undef c12_02_re 02314 #undef c12_02_im 02315 #undef c11_10_re 02316 #undef c11_10_im 02317 #undef c12_10_re 02318 #undef c12_10_im 02319 #undef c12_11_re 02320 #undef c12_11_im 02321 #undef c11_11_re 02322 #undef c01_00_re 02323 #undef c01_00_im 02324 #undef c02_00_re 02325 #undef c02_00_im 02326 #undef c10_00_re 02327 #undef c10_00_im 02328 #undef c11_00_re 02329 #undef c11_00_im 02330 #undef c12_00_re 02331 #undef c12_00_im 02332 #undef c02_01_re 02333 #undef c02_01_im 02334 #undef c10_01_re 02335 #undef c10_01_im 02336 #undef c11_01_re 02337 #undef c11_01_im 02338 #undef c12_01_re 02339 #undef c12_01_im 02340 #undef c10_02_re 02341 #undef c10_02_im 02342 #undef c11_02_re 02343 #undef c11_02_im 02344 #undef c12_02_re 02345 #undef c12_02_im 02346 #undef c11_10_re 02347 #undef c11_10_im 02348 #undef c12_10_re 02349 #undef c12_10_im 02350 #undef c12_11_re 02351 #undef c12_11_im 02352 #undef c12_12_re 02353 #undef c01_00_re 02354 #undef c01_00_im 02355 #undef c02_00_re 02356 #undef c02_00_im 02357 #undef c10_00_re 02358 #undef c10_00_im 02359 #undef c11_00_re 02360 #undef c11_00_im 02361 #undef c12_00_re 02362 #undef c12_00_im 02363 #undef c02_01_re 02364 #undef c02_01_im 02365 #undef c10_01_re 02366 #undef c10_01_im 02367 #undef c11_01_re 02368 #undef c11_01_im 02369 #undef c12_01_re 02370 #undef c12_01_im 02371 #undef c10_02_re 02372 #undef c10_02_im 02373 #undef c11_02_re 02374 #undef c11_02_im 02375 #undef c12_02_re 02376 #undef c12_02_im 02377 #undef c11_10_re 02378 #undef c11_10_im 02379 #undef c12_10_re 02380 #undef c12_10_im 02381 #undef c12_11_re 02382 #undef c12_11_im 02383 02384 #undef o00_re 02385 #undef o00_im 02386 #undef o01_re 02387 #undef o01_im 02388 #undef o02_re 02389 #undef o02_im 02390 #undef o10_re 02391 #undef o10_im 02392
1.7.3