|
QUDA v0.3.2
A library for QCD on GPUs
|
00001 //J dslash_dagger_dwf_core.h 00002 //J Ver. 09.10.a 00003 00004 // goto HERE to continue checking 00005 00006 00007 //J Q. Where do the diagonal components 00008 //J += (m0-5) psi(x,s) 00009 //J get performed? Not in this hopping 00010 //J file. Here, m0 is the dwf barrier 00011 //J height, related to Andrew P.'s documentation mdwf.pdf 00012 //J by m0= -M5. 00013 //J A. They get carried out using the xpay 00014 //J operations in dslash_dwf_cuda.cu. 00015 //J These are defined in the dslash_dwf_post.h that is 00016 //J included at the end of this file. 00017 // 00018 00019 //J Carry out the 4d operations with this include. 00020 // It does not undefine things. That comes 00021 // at the end of this file, through another include. 00022 //#include "dslash_dagger_core_ante.h" 00023 00024 // *** CUDA DSLASH DAGGER *** 00025 00026 //#define SHARED_FLOATS_PER_THREAD 0 // FIXME 00027 #define SHARED_BYTES_DOUBLE (BLOCK_DIM*SHARED_FLOATS_PER_THREAD*sizeof(double)) 00028 00029 #define SHARED_BYTES_SINGLE (BLOCK_DIM*SHARED_FLOATS_PER_THREAD*sizeof(float)) 00030 00031 // input spinor 00032 #ifdef SPINOR_DOUBLE 00033 #define spinorFloat double 00034 #define i00_re I0.x 00035 #define i00_im I0.y 00036 #define i01_re I1.x 00037 #define i01_im I1.y 00038 #define i02_re I2.x 00039 #define i02_im I2.y 00040 #define i10_re I3.x 00041 #define i10_im I3.y 00042 #define i11_re I4.x 00043 #define i11_im I4.y 00044 #define i12_re I5.x 00045 #define i12_im I5.y 00046 #define i20_re I6.x 00047 #define i20_im I6.y 00048 #define i21_re I7.x 00049 #define i21_im I7.y 00050 #define i22_re I8.x 00051 #define i22_im I8.y 00052 #define i30_re I9.x 00053 #define i30_im I9.y 00054 #define i31_re I10.x 00055 #define i31_im I10.y 00056 #define i32_re I11.x 00057 #define i32_im I11.y 00058 00059 #else 00060 #define spinorFloat float 00061 #define i00_re I0.x 00062 #define i00_im I0.y 00063 #define i01_re I0.z 00064 #define i01_im I0.w 00065 #define i02_re I1.x 00066 #define i02_im I1.y 00067 #define i10_re I1.z 00068 #define i10_im I1.w 00069 #define i11_re I2.x 00070 #define i11_im I2.y 00071 #define i12_re I2.z 00072 #define i12_im I2.w 00073 #define i20_re I3.x 00074 #define i20_im I3.y 00075 #define i21_re I3.z 00076 #define i21_im I3.w 00077 #define i22_re I4.x 00078 #define i22_im I4.y 00079 #define i30_re I4.z 00080 #define i30_im I4.w 00081 #define i31_re I5.x 00082 #define i31_im I5.y 00083 #define i32_re I5.z 00084 #define i32_im I5.w 00085 #endif 00086 00087 // gauge link 00088 #ifdef GAUGE_FLOAT2 00089 #define g00_re G0.x 00090 #define g00_im G0.y 00091 #define g01_re G1.x 00092 #define g01_im G1.y 00093 #define g02_re G2.x 00094 #define g02_im G2.y 00095 #define g10_re G3.x 00096 #define g10_im G3.y 00097 #define g11_re G4.x 00098 #define g11_im G4.y 00099 #define g12_re G5.x 00100 #define g12_im G5.y 00101 #define g20_re G6.x 00102 #define g20_im G6.y 00103 #define g21_re G7.x 00104 #define g21_im G7.y 00105 #define g22_re G8.x 00106 #define g22_im G8.y 00107 // temporaries 00108 #define A_re G9.x 00109 #define A_im G9.y 00110 00111 #else 00112 #define g00_re G0.x 00113 #define g00_im G0.y 00114 #define g01_re G0.z 00115 #define g01_im G0.w 00116 #define g02_re G1.x 00117 #define g02_im G1.y 00118 #define g10_re G1.z 00119 #define g10_im G1.w 00120 #define g11_re G2.x 00121 #define g11_im G2.y 00122 #define g12_re G2.z 00123 #define g12_im G2.w 00124 #define g20_re G3.x 00125 #define g20_im G3.y 00126 #define g21_re G3.z 00127 #define g21_im G3.w 00128 #define g22_re G4.x 00129 #define g22_im G4.y 00130 // temporaries 00131 #define A_re G4.z 00132 #define A_im G4.w 00133 00134 #endif 00135 00136 // conjugated gauge link 00137 #define gT00_re (+g00_re) 00138 #define gT00_im (-g00_im) 00139 #define gT01_re (+g10_re) 00140 #define gT01_im (-g10_im) 00141 #define gT02_re (+g20_re) 00142 #define gT02_im (-g20_im) 00143 #define gT10_re (+g01_re) 00144 #define gT10_im (-g01_im) 00145 #define gT11_re (+g11_re) 00146 #define gT11_im (-g11_im) 00147 #define gT12_re (+g21_re) 00148 #define gT12_im (-g21_im) 00149 #define gT20_re (+g02_re) 00150 #define gT20_im (-g02_im) 00151 #define gT21_re (+g12_re) 00152 #define gT21_im (-g12_im) 00153 #define gT22_re (+g22_re) 00154 #define gT22_im (-g22_im) 00155 00156 00157 // output spinor 00158 volatile spinorFloat o00_re; 00159 volatile spinorFloat o00_im; 00160 volatile spinorFloat o01_re; 00161 volatile spinorFloat o01_im; 00162 volatile spinorFloat o02_re; 00163 volatile spinorFloat o02_im; 00164 volatile spinorFloat o10_re; 00165 volatile spinorFloat o10_im; 00166 volatile spinorFloat o11_re; 00167 volatile spinorFloat o11_im; 00168 volatile spinorFloat o12_re; 00169 volatile spinorFloat o12_im; 00170 volatile spinorFloat o20_re; 00171 volatile spinorFloat o20_im; 00172 volatile spinorFloat o21_re; 00173 volatile spinorFloat o21_im; 00174 volatile spinorFloat o22_re; 00175 volatile spinorFloat o22_im; 00176 volatile spinorFloat o30_re; 00177 volatile spinorFloat o30_im; 00178 volatile spinorFloat o31_re; 00179 volatile spinorFloat o31_im; 00180 volatile spinorFloat o32_re; 00181 volatile spinorFloat o32_im; 00182 00183 00184 00185 #include "read_gauge.h" 00186 //#include "read_clover.h" 00187 #include "io_spinor.h" 00188 00189 int sid = BLOCK_DIM*blockIdx.x + threadIdx.x; 00190 int boundaryCrossings = sid/X1h + sid/(X2*X1h) + sid/(X3*X2*X1h) + sid/(X4*X3*X2*X1h); 00191 int boundaryCrossings4d = sid/X1h + sid/(X2*X1h) + sid/(X3*X2*X1h); 00192 int X = 2*sid + (boundaryCrossings + oddBit) % 2; 00193 int xs = X/(X4*X3*X2*X1); 00194 int x4 = (X/(X3*X2*X1)) % X4; 00195 int x3 = (X/(X2*X1)) % X3; 00196 int x2 = (X/X1) % X2; 00197 int x1 = X % X1; 00198 00199 o00_re = o00_im = 0; 00200 o01_re = o01_im = 0; 00201 o02_re = o02_im = 0; 00202 o10_re = o10_im = 0; 00203 o11_re = o11_im = 0; 00204 o12_re = o12_im = 0; 00205 o20_re = o20_im = 0; 00206 o21_re = o21_im = 0; 00207 o22_re = o22_im = 0; 00208 o30_re = o30_im = 0; 00209 o31_re = o31_im = 0; 00210 o32_re = o32_im = 0; 00211 00212 { 00213 // Projector P0+ 00214 // 1 0 0 i 00215 // 0 1 i 0 00216 // 0 -i 1 0 00217 // -i 0 0 1 00218 00219 int sp_idx = ((x1==X1-1) ? X-(X1-1) : X+1) / 2; 00220 int ga_idx = sid % Vh; 00221 if ( !( (boundaryCrossings-boundaryCrossings4d) % 2) ) { 00222 // read gauge matrix from device memory 00223 READ_GAUGE_MATRIX(GAUGE0TEX, 0); 00224 00225 // read spinor from device memory 00226 READ_SPINOR(SPINORTEX); 00227 00228 // reconstruct gauge matrix 00229 RECONSTRUCT_GAUGE_MATRIX(0); 00230 00231 // project spinor into half spinors 00232 spinorFloat a0_re = +i00_re-i30_im; 00233 spinorFloat a0_im = +i00_im+i30_re; 00234 spinorFloat a1_re = +i01_re-i31_im; 00235 spinorFloat a1_im = +i01_im+i31_re; 00236 spinorFloat a2_re = +i02_re-i32_im; 00237 spinorFloat a2_im = +i02_im+i32_re; 00238 00239 spinorFloat b0_re = +i10_re-i20_im; 00240 spinorFloat b0_im = +i10_im+i20_re; 00241 spinorFloat b1_re = +i11_re-i21_im; 00242 spinorFloat b1_im = +i11_im+i21_re; 00243 spinorFloat b2_re = +i12_re-i22_im; 00244 spinorFloat b2_im = +i12_im+i22_re; 00245 00246 // multiply row 0 00247 spinorFloat A0_re = + (g00_re * a0_re - g00_im * a0_im) + (g01_re * a1_re - g01_im * a1_im) + (g02_re * a2_re - g02_im * a2_im); 00248 spinorFloat A0_im = + (g00_re * a0_im + g00_im * a0_re) + (g01_re * a1_im + g01_im * a1_re) + (g02_re * a2_im + g02_im * a2_re); 00249 spinorFloat B0_re = + (g00_re * b0_re - g00_im * b0_im) + (g01_re * b1_re - g01_im * b1_im) + (g02_re * b2_re - g02_im * b2_im); 00250 spinorFloat B0_im = + (g00_re * b0_im + g00_im * b0_re) + (g01_re * b1_im + g01_im * b1_re) + (g02_re * b2_im + g02_im * b2_re); 00251 00252 // multiply row 1 00253 spinorFloat A1_re = + (g10_re * a0_re - g10_im * a0_im) + (g11_re * a1_re - g11_im * a1_im) + (g12_re * a2_re - g12_im * a2_im); 00254 spinorFloat A1_im = + (g10_re * a0_im + g10_im * a0_re) + (g11_re * a1_im + g11_im * a1_re) + (g12_re * a2_im + g12_im * a2_re); 00255 spinorFloat B1_re = + (g10_re * b0_re - g10_im * b0_im) + (g11_re * b1_re - g11_im * b1_im) + (g12_re * b2_re - g12_im * b2_im); 00256 spinorFloat B1_im = + (g10_re * b0_im + g10_im * b0_re) + (g11_re * b1_im + g11_im * b1_re) + (g12_re * b2_im + g12_im * b2_re); 00257 00258 // multiply row 2 00259 spinorFloat A2_re = + (g20_re * a0_re - g20_im * a0_im) + (g21_re * a1_re - g21_im * a1_im) + (g22_re * a2_re - g22_im * a2_im); 00260 spinorFloat A2_im = + (g20_re * a0_im + g20_im * a0_re) + (g21_re * a1_im + g21_im * a1_re) + (g22_re * a2_im + g22_im * a2_re); 00261 spinorFloat B2_re = + (g20_re * b0_re - g20_im * b0_im) + (g21_re * b1_re - g21_im * b1_im) + (g22_re * b2_re - g22_im * b2_im); 00262 spinorFloat B2_im = + (g20_re * b0_im + g20_im * b0_re) + (g21_re * b1_im + g21_im * b1_re) + (g22_re * b2_im + g22_im * b2_re); 00263 00264 o00_re += A0_re; 00265 o00_im += A0_im; 00266 o10_re += B0_re; 00267 o10_im += B0_im; 00268 o20_re += B0_im; 00269 o20_im -= B0_re; 00270 o30_re += A0_im; 00271 o30_im -= A0_re; 00272 00273 o01_re += A1_re; 00274 o01_im += A1_im; 00275 o11_re += B1_re; 00276 o11_im += B1_im; 00277 o21_re += B1_im; 00278 o21_im -= B1_re; 00279 o31_re += A1_im; 00280 o31_im -= A1_re; 00281 00282 o02_re += A2_re; 00283 o02_im += A2_im; 00284 o12_re += B2_re; 00285 o12_im += B2_im; 00286 o22_re += B2_im; 00287 o22_im -= B2_re; 00288 o32_re += A2_im; 00289 o32_im -= A2_re; 00290 00291 } else { 00292 00293 // read gauge matrix from device memory 00294 READ_GAUGE_MATRIX(GAUGE1TEX, 0); 00295 00296 // read spinor from device memory 00297 READ_SPINOR(SPINORTEX); 00298 00299 // reconstruct gauge matrix 00300 RECONSTRUCT_GAUGE_MATRIX(0); 00301 00302 // project spinor into half spinors 00303 spinorFloat a0_re = +i00_re-i30_im; 00304 spinorFloat a0_im = +i00_im+i30_re; 00305 spinorFloat a1_re = +i01_re-i31_im; 00306 spinorFloat a1_im = +i01_im+i31_re; 00307 spinorFloat a2_re = +i02_re-i32_im; 00308 spinorFloat a2_im = +i02_im+i32_re; 00309 00310 spinorFloat b0_re = +i10_re-i20_im; 00311 spinorFloat b0_im = +i10_im+i20_re; 00312 spinorFloat b1_re = +i11_re-i21_im; 00313 spinorFloat b1_im = +i11_im+i21_re; 00314 spinorFloat b2_re = +i12_re-i22_im; 00315 spinorFloat b2_im = +i12_im+i22_re; 00316 00317 // multiply row 0 00318 spinorFloat A0_re = + (g00_re * a0_re - g00_im * a0_im) + (g01_re * a1_re - g01_im * a1_im) + (g02_re * a2_re - g02_im * a2_im); 00319 spinorFloat A0_im = + (g00_re * a0_im + g00_im * a0_re) + (g01_re * a1_im + g01_im * a1_re) + (g02_re * a2_im + g02_im * a2_re); 00320 spinorFloat B0_re = + (g00_re * b0_re - g00_im * b0_im) + (g01_re * b1_re - g01_im * b1_im) + (g02_re * b2_re - g02_im * b2_im); 00321 spinorFloat B0_im = + (g00_re * b0_im + g00_im * b0_re) + (g01_re * b1_im + g01_im * b1_re) + (g02_re * b2_im + g02_im * b2_re); 00322 00323 // multiply row 1 00324 spinorFloat A1_re = + (g10_re * a0_re - g10_im * a0_im) + (g11_re * a1_re - g11_im * a1_im) + (g12_re * a2_re - g12_im * a2_im); 00325 spinorFloat A1_im = + (g10_re * a0_im + g10_im * a0_re) + (g11_re * a1_im + g11_im * a1_re) + (g12_re * a2_im + g12_im * a2_re); 00326 spinorFloat B1_re = + (g10_re * b0_re - g10_im * b0_im) + (g11_re * b1_re - g11_im * b1_im) + (g12_re * b2_re - g12_im * b2_im); 00327 spinorFloat B1_im = + (g10_re * b0_im + g10_im * b0_re) + (g11_re * b1_im + g11_im * b1_re) + (g12_re * b2_im + g12_im * b2_re); 00328 00329 // multiply row 2 00330 spinorFloat A2_re = + (g20_re * a0_re - g20_im * a0_im) + (g21_re * a1_re - g21_im * a1_im) + (g22_re * a2_re - g22_im * a2_im); 00331 spinorFloat A2_im = + (g20_re * a0_im + g20_im * a0_re) + (g21_re * a1_im + g21_im * a1_re) + (g22_re * a2_im + g22_im * a2_re); 00332 spinorFloat B2_re = + (g20_re * b0_re - g20_im * b0_im) + (g21_re * b1_re - g21_im * b1_im) + (g22_re * b2_re - g22_im * b2_im); 00333 spinorFloat B2_im = + (g20_re * b0_im + g20_im * b0_re) + (g21_re * b1_im + g21_im * b1_re) + (g22_re * b2_im + g22_im * b2_re); 00334 00335 o00_re += A0_re; 00336 o00_im += A0_im; 00337 o10_re += B0_re; 00338 o10_im += B0_im; 00339 o20_re += B0_im; 00340 o20_im -= B0_re; 00341 o30_re += A0_im; 00342 o30_im -= A0_re; 00343 00344 o01_re += A1_re; 00345 o01_im += A1_im; 00346 o11_re += B1_re; 00347 o11_im += B1_im; 00348 o21_re += B1_im; 00349 o21_im -= B1_re; 00350 o31_re += A1_im; 00351 o31_im -= A1_re; 00352 00353 o02_re += A2_re; 00354 o02_im += A2_im; 00355 o12_re += B2_re; 00356 o12_im += B2_im; 00357 o22_re += B2_im; 00358 o22_im -= B2_re; 00359 o32_re += A2_im; 00360 o32_im -= A2_re; 00361 } 00362 } 00363 00364 { 00365 // Projector P0- 00366 // 1 0 0 -i 00367 // 0 1 -i 0 00368 // 0 i 1 0 00369 // i 0 0 1 00370 00371 int sp_idx = ((x1==0) ? X+(X1-1) : X-1) / 2; 00372 int ga_idx = sp_idx % Vh; 00373 00374 if ( !( (boundaryCrossings-boundaryCrossings4d) % 2) ) { 00375 // read gauge matrix from device memory 00376 READ_GAUGE_MATRIX(GAUGE1TEX, 1); 00377 00378 // read spinor from device memory 00379 READ_SPINOR(SPINORTEX); 00380 00381 // reconstruct gauge matrix 00382 RECONSTRUCT_GAUGE_MATRIX(1); 00383 00384 // project spinor into half spinors 00385 spinorFloat a0_re = +i00_re+i30_im; 00386 spinorFloat a0_im = +i00_im-i30_re; 00387 spinorFloat a1_re = +i01_re+i31_im; 00388 spinorFloat a1_im = +i01_im-i31_re; 00389 spinorFloat a2_re = +i02_re+i32_im; 00390 spinorFloat a2_im = +i02_im-i32_re; 00391 00392 spinorFloat b0_re = +i10_re+i20_im; 00393 spinorFloat b0_im = +i10_im-i20_re; 00394 spinorFloat b1_re = +i11_re+i21_im; 00395 spinorFloat b1_im = +i11_im-i21_re; 00396 spinorFloat b2_re = +i12_re+i22_im; 00397 spinorFloat b2_im = +i12_im-i22_re; 00398 00399 // multiply row 0 00400 spinorFloat A0_re = + (gT00_re * a0_re - gT00_im * a0_im) + (gT01_re * a1_re - gT01_im * a1_im) + (gT02_re * a2_re - gT02_im * a2_im); 00401 spinorFloat A0_im = + (gT00_re * a0_im + gT00_im * a0_re) + (gT01_re * a1_im + gT01_im * a1_re) + (gT02_re * a2_im + gT02_im * a2_re); 00402 spinorFloat B0_re = + (gT00_re * b0_re - gT00_im * b0_im) + (gT01_re * b1_re - gT01_im * b1_im) + (gT02_re * b2_re - gT02_im * b2_im); 00403 spinorFloat B0_im = + (gT00_re * b0_im + gT00_im * b0_re) + (gT01_re * b1_im + gT01_im * b1_re) + (gT02_re * b2_im + gT02_im * b2_re); 00404 00405 // multiply row 1 00406 spinorFloat A1_re = + (gT10_re * a0_re - gT10_im * a0_im) + (gT11_re * a1_re - gT11_im * a1_im) + (gT12_re * a2_re - gT12_im * a2_im); 00407 spinorFloat A1_im = + (gT10_re * a0_im + gT10_im * a0_re) + (gT11_re * a1_im + gT11_im * a1_re) + (gT12_re * a2_im + gT12_im * a2_re); 00408 spinorFloat B1_re = + (gT10_re * b0_re - gT10_im * b0_im) + (gT11_re * b1_re - gT11_im * b1_im) + (gT12_re * b2_re - gT12_im * b2_im); 00409 spinorFloat B1_im = + (gT10_re * b0_im + gT10_im * b0_re) + (gT11_re * b1_im + gT11_im * b1_re) + (gT12_re * b2_im + gT12_im * b2_re); 00410 00411 // multiply row 2 00412 spinorFloat A2_re = + (gT20_re * a0_re - gT20_im * a0_im) + (gT21_re * a1_re - gT21_im * a1_im) + (gT22_re * a2_re - gT22_im * a2_im); 00413 spinorFloat A2_im = + (gT20_re * a0_im + gT20_im * a0_re) + (gT21_re * a1_im + gT21_im * a1_re) + (gT22_re * a2_im + gT22_im * a2_re); 00414 spinorFloat B2_re = + (gT20_re * b0_re - gT20_im * b0_im) + (gT21_re * b1_re - gT21_im * b1_im) + (gT22_re * b2_re - gT22_im * b2_im); 00415 spinorFloat B2_im = + (gT20_re * b0_im + gT20_im * b0_re) + (gT21_re * b1_im + gT21_im * b1_re) + (gT22_re * b2_im + gT22_im * b2_re); 00416 00417 o00_re += A0_re; 00418 o00_im += A0_im; 00419 o10_re += B0_re; 00420 o10_im += B0_im; 00421 o20_re -= B0_im; 00422 o20_im += B0_re; 00423 o30_re -= A0_im; 00424 o30_im += A0_re; 00425 00426 o01_re += A1_re; 00427 o01_im += A1_im; 00428 o11_re += B1_re; 00429 o11_im += B1_im; 00430 o21_re -= B1_im; 00431 o21_im += B1_re; 00432 o31_re -= A1_im; 00433 o31_im += A1_re; 00434 00435 o02_re += A2_re; 00436 o02_im += A2_im; 00437 o12_re += B2_re; 00438 o12_im += B2_im; 00439 o22_re -= B2_im; 00440 o22_im += B2_re; 00441 o32_re -= A2_im; 00442 o32_im += A2_re; 00443 } else { 00444 // read gauge matrix from device memory 00445 READ_GAUGE_MATRIX(GAUGE0TEX, 1); 00446 00447 // read spinor from device memory 00448 READ_SPINOR(SPINORTEX); 00449 00450 // reconstruct gauge matrix 00451 RECONSTRUCT_GAUGE_MATRIX(1); 00452 00453 // project spinor into half spinors 00454 spinorFloat a0_re = +i00_re+i30_im; 00455 spinorFloat a0_im = +i00_im-i30_re; 00456 spinorFloat a1_re = +i01_re+i31_im; 00457 spinorFloat a1_im = +i01_im-i31_re; 00458 spinorFloat a2_re = +i02_re+i32_im; 00459 spinorFloat a2_im = +i02_im-i32_re; 00460 00461 spinorFloat b0_re = +i10_re+i20_im; 00462 spinorFloat b0_im = +i10_im-i20_re; 00463 spinorFloat b1_re = +i11_re+i21_im; 00464 spinorFloat b1_im = +i11_im-i21_re; 00465 spinorFloat b2_re = +i12_re+i22_im; 00466 spinorFloat b2_im = +i12_im-i22_re; 00467 00468 // multiply row 0 00469 spinorFloat A0_re = + (gT00_re * a0_re - gT00_im * a0_im) + (gT01_re * a1_re - gT01_im * a1_im) + (gT02_re * a2_re - gT02_im * a2_im); 00470 spinorFloat A0_im = + (gT00_re * a0_im + gT00_im * a0_re) + (gT01_re * a1_im + gT01_im * a1_re) + (gT02_re * a2_im + gT02_im * a2_re); 00471 spinorFloat B0_re = + (gT00_re * b0_re - gT00_im * b0_im) + (gT01_re * b1_re - gT01_im * b1_im) + (gT02_re * b2_re - gT02_im * b2_im); 00472 spinorFloat B0_im = + (gT00_re * b0_im + gT00_im * b0_re) + (gT01_re * b1_im + gT01_im * b1_re) + (gT02_re * b2_im + gT02_im * b2_re); 00473 00474 // multiply row 1 00475 spinorFloat A1_re = + (gT10_re * a0_re - gT10_im * a0_im) + (gT11_re * a1_re - gT11_im * a1_im) + (gT12_re * a2_re - gT12_im * a2_im); 00476 spinorFloat A1_im = + (gT10_re * a0_im + gT10_im * a0_re) + (gT11_re * a1_im + gT11_im * a1_re) + (gT12_re * a2_im + gT12_im * a2_re); 00477 spinorFloat B1_re = + (gT10_re * b0_re - gT10_im * b0_im) + (gT11_re * b1_re - gT11_im * b1_im) + (gT12_re * b2_re - gT12_im * b2_im); 00478 spinorFloat B1_im = + (gT10_re * b0_im + gT10_im * b0_re) + (gT11_re * b1_im + gT11_im * b1_re) + (gT12_re * b2_im + gT12_im * b2_re); 00479 00480 // multiply row 2 00481 spinorFloat A2_re = + (gT20_re * a0_re - gT20_im * a0_im) + (gT21_re * a1_re - gT21_im * a1_im) + (gT22_re * a2_re - gT22_im * a2_im); 00482 spinorFloat A2_im = + (gT20_re * a0_im + gT20_im * a0_re) + (gT21_re * a1_im + gT21_im * a1_re) + (gT22_re * a2_im + gT22_im * a2_re); 00483 spinorFloat B2_re = + (gT20_re * b0_re - gT20_im * b0_im) + (gT21_re * b1_re - gT21_im * b1_im) + (gT22_re * b2_re - gT22_im * b2_im); 00484 spinorFloat B2_im = + (gT20_re * b0_im + gT20_im * b0_re) + (gT21_re * b1_im + gT21_im * b1_re) + (gT22_re * b2_im + gT22_im * b2_re); 00485 00486 o00_re += A0_re; 00487 o00_im += A0_im; 00488 o10_re += B0_re; 00489 o10_im += B0_im; 00490 o20_re -= B0_im; 00491 o20_im += B0_re; 00492 o30_re -= A0_im; 00493 o30_im += A0_re; 00494 00495 o01_re += A1_re; 00496 o01_im += A1_im; 00497 o11_re += B1_re; 00498 o11_im += B1_im; 00499 o21_re -= B1_im; 00500 o21_im += B1_re; 00501 o31_re -= A1_im; 00502 o31_im += A1_re; 00503 00504 o02_re += A2_re; 00505 o02_im += A2_im; 00506 o12_re += B2_re; 00507 o12_im += B2_im; 00508 o22_re -= B2_im; 00509 o22_im += B2_re; 00510 o32_re -= A2_im; 00511 o32_im += A2_re; 00512 } 00513 } 00514 00515 { 00516 // Projector P1+ 00517 // 1 0 0 1 00518 // 0 1 -1 0 00519 // 0 -1 1 0 00520 // 1 0 0 1 00521 00522 int sp_idx = ((x2==X2-1) ? X-(X2-1)*X1 : X+X1) / 2; 00523 int ga_idx = sid % Vh; 00524 00525 if ( !( (boundaryCrossings-boundaryCrossings4d) % 2) ) { 00526 00527 // read gauge matrix from device memory 00528 READ_GAUGE_MATRIX(GAUGE0TEX, 2); 00529 00530 // read spinor from device memory 00531 READ_SPINOR(SPINORTEX); 00532 00533 // reconstruct gauge matrix 00534 RECONSTRUCT_GAUGE_MATRIX(2); 00535 00536 // project spinor into half spinors 00537 spinorFloat a0_re = +i00_re+i30_re; 00538 spinorFloat a0_im = +i00_im+i30_im; 00539 spinorFloat a1_re = +i01_re+i31_re; 00540 spinorFloat a1_im = +i01_im+i31_im; 00541 spinorFloat a2_re = +i02_re+i32_re; 00542 spinorFloat a2_im = +i02_im+i32_im; 00543 00544 spinorFloat b0_re = +i10_re-i20_re; 00545 spinorFloat b0_im = +i10_im-i20_im; 00546 spinorFloat b1_re = +i11_re-i21_re; 00547 spinorFloat b1_im = +i11_im-i21_im; 00548 spinorFloat b2_re = +i12_re-i22_re; 00549 spinorFloat b2_im = +i12_im-i22_im; 00550 00551 // multiply row 0 00552 spinorFloat A0_re = + (g00_re * a0_re - g00_im * a0_im) + (g01_re * a1_re - g01_im * a1_im) + (g02_re * a2_re - g02_im * a2_im); 00553 spinorFloat A0_im = + (g00_re * a0_im + g00_im * a0_re) + (g01_re * a1_im + g01_im * a1_re) + (g02_re * a2_im + g02_im * a2_re); 00554 spinorFloat B0_re = + (g00_re * b0_re - g00_im * b0_im) + (g01_re * b1_re - g01_im * b1_im) + (g02_re * b2_re - g02_im * b2_im); 00555 spinorFloat B0_im = + (g00_re * b0_im + g00_im * b0_re) + (g01_re * b1_im + g01_im * b1_re) + (g02_re * b2_im + g02_im * b2_re); 00556 00557 // multiply row 1 00558 spinorFloat A1_re = + (g10_re * a0_re - g10_im * a0_im) + (g11_re * a1_re - g11_im * a1_im) + (g12_re * a2_re - g12_im * a2_im); 00559 spinorFloat A1_im = + (g10_re * a0_im + g10_im * a0_re) + (g11_re * a1_im + g11_im * a1_re) + (g12_re * a2_im + g12_im * a2_re); 00560 spinorFloat B1_re = + (g10_re * b0_re - g10_im * b0_im) + (g11_re * b1_re - g11_im * b1_im) + (g12_re * b2_re - g12_im * b2_im); 00561 spinorFloat B1_im = + (g10_re * b0_im + g10_im * b0_re) + (g11_re * b1_im + g11_im * b1_re) + (g12_re * b2_im + g12_im * b2_re); 00562 00563 // multiply row 2 00564 spinorFloat A2_re = + (g20_re * a0_re - g20_im * a0_im) + (g21_re * a1_re - g21_im * a1_im) + (g22_re * a2_re - g22_im * a2_im); 00565 spinorFloat A2_im = + (g20_re * a0_im + g20_im * a0_re) + (g21_re * a1_im + g21_im * a1_re) + (g22_re * a2_im + g22_im * a2_re); 00566 spinorFloat B2_re = + (g20_re * b0_re - g20_im * b0_im) + (g21_re * b1_re - g21_im * b1_im) + (g22_re * b2_re - g22_im * b2_im); 00567 spinorFloat B2_im = + (g20_re * b0_im + g20_im * b0_re) + (g21_re * b1_im + g21_im * b1_re) + (g22_re * b2_im + g22_im * b2_re); 00568 00569 o00_re += A0_re; 00570 o00_im += A0_im; 00571 o10_re += B0_re; 00572 o10_im += B0_im; 00573 o20_re -= B0_re; 00574 o20_im -= B0_im; 00575 o30_re += A0_re; 00576 o30_im += A0_im; 00577 00578 o01_re += A1_re; 00579 o01_im += A1_im; 00580 o11_re += B1_re; 00581 o11_im += B1_im; 00582 o21_re -= B1_re; 00583 o21_im -= B1_im; 00584 o31_re += A1_re; 00585 o31_im += A1_im; 00586 00587 o02_re += A2_re; 00588 o02_im += A2_im; 00589 o12_re += B2_re; 00590 o12_im += B2_im; 00591 o22_re -= B2_re; 00592 o22_im -= B2_im; 00593 o32_re += A2_re; 00594 o32_im += A2_im; 00595 00596 } else { 00597 00598 // read gauge matrix from device memory 00599 READ_GAUGE_MATRIX(GAUGE1TEX, 2); 00600 00601 // read spinor from device memory 00602 READ_SPINOR(SPINORTEX); 00603 00604 // reconstruct gauge matrix 00605 RECONSTRUCT_GAUGE_MATRIX(2); 00606 00607 // project spinor into half spinors 00608 spinorFloat a0_re = +i00_re+i30_re; 00609 spinorFloat a0_im = +i00_im+i30_im; 00610 spinorFloat a1_re = +i01_re+i31_re; 00611 spinorFloat a1_im = +i01_im+i31_im; 00612 spinorFloat a2_re = +i02_re+i32_re; 00613 spinorFloat a2_im = +i02_im+i32_im; 00614 00615 spinorFloat b0_re = +i10_re-i20_re; 00616 spinorFloat b0_im = +i10_im-i20_im; 00617 spinorFloat b1_re = +i11_re-i21_re; 00618 spinorFloat b1_im = +i11_im-i21_im; 00619 spinorFloat b2_re = +i12_re-i22_re; 00620 spinorFloat b2_im = +i12_im-i22_im; 00621 00622 // multiply row 0 00623 spinorFloat A0_re = + (g00_re * a0_re - g00_im * a0_im) + (g01_re * a1_re - g01_im * a1_im) + (g02_re * a2_re - g02_im * a2_im); 00624 spinorFloat A0_im = + (g00_re * a0_im + g00_im * a0_re) + (g01_re * a1_im + g01_im * a1_re) + (g02_re * a2_im + g02_im * a2_re); 00625 spinorFloat B0_re = + (g00_re * b0_re - g00_im * b0_im) + (g01_re * b1_re - g01_im * b1_im) + (g02_re * b2_re - g02_im * b2_im); 00626 spinorFloat B0_im = + (g00_re * b0_im + g00_im * b0_re) + (g01_re * b1_im + g01_im * b1_re) + (g02_re * b2_im + g02_im * b2_re); 00627 00628 // multiply row 1 00629 spinorFloat A1_re = + (g10_re * a0_re - g10_im * a0_im) + (g11_re * a1_re - g11_im * a1_im) + (g12_re * a2_re - g12_im * a2_im); 00630 spinorFloat A1_im = + (g10_re * a0_im + g10_im * a0_re) + (g11_re * a1_im + g11_im * a1_re) + (g12_re * a2_im + g12_im * a2_re); 00631 spinorFloat B1_re = + (g10_re * b0_re - g10_im * b0_im) + (g11_re * b1_re - g11_im * b1_im) + (g12_re * b2_re - g12_im * b2_im); 00632 spinorFloat B1_im = + (g10_re * b0_im + g10_im * b0_re) + (g11_re * b1_im + g11_im * b1_re) + (g12_re * b2_im + g12_im * b2_re); 00633 00634 // multiply row 2 00635 spinorFloat A2_re = + (g20_re * a0_re - g20_im * a0_im) + (g21_re * a1_re - g21_im * a1_im) + (g22_re * a2_re - g22_im * a2_im); 00636 spinorFloat A2_im = + (g20_re * a0_im + g20_im * a0_re) + (g21_re * a1_im + g21_im * a1_re) + (g22_re * a2_im + g22_im * a2_re); 00637 spinorFloat B2_re = + (g20_re * b0_re - g20_im * b0_im) + (g21_re * b1_re - g21_im * b1_im) + (g22_re * b2_re - g22_im * b2_im); 00638 spinorFloat B2_im = + (g20_re * b0_im + g20_im * b0_re) + (g21_re * b1_im + g21_im * b1_re) + (g22_re * b2_im + g22_im * b2_re); 00639 00640 o00_re += A0_re; 00641 o00_im += A0_im; 00642 o10_re += B0_re; 00643 o10_im += B0_im; 00644 o20_re -= B0_re; 00645 o20_im -= B0_im; 00646 o30_re += A0_re; 00647 o30_im += A0_im; 00648 00649 o01_re += A1_re; 00650 o01_im += A1_im; 00651 o11_re += B1_re; 00652 o11_im += B1_im; 00653 o21_re -= B1_re; 00654 o21_im -= B1_im; 00655 o31_re += A1_re; 00656 o31_im += A1_im; 00657 00658 o02_re += A2_re; 00659 o02_im += A2_im; 00660 o12_re += B2_re; 00661 o12_im += B2_im; 00662 o22_re -= B2_re; 00663 o22_im -= B2_im; 00664 o32_re += A2_re; 00665 o32_im += A2_im; 00666 00667 } 00668 } 00669 00670 { 00671 // Projector P1- 00672 // 1 0 0 -1 00673 // 0 1 1 0 00674 // 0 1 1 0 00675 // -1 0 0 1 00676 00677 int sp_idx = ((x2==0) ? X+(X2-1)*X1 : X-X1) / 2; 00678 int ga_idx = sp_idx % Vh; 00679 00680 if ( !( (boundaryCrossings-boundaryCrossings4d) % 2) ) { 00681 00682 // read gauge matrix from device memory 00683 READ_GAUGE_MATRIX(GAUGE1TEX, 3); 00684 00685 // read spinor from device memory 00686 READ_SPINOR(SPINORTEX); 00687 00688 // reconstruct gauge matrix 00689 RECONSTRUCT_GAUGE_MATRIX(3); 00690 00691 // project spinor into half spinors 00692 spinorFloat a0_re = +i00_re-i30_re; 00693 spinorFloat a0_im = +i00_im-i30_im; 00694 spinorFloat a1_re = +i01_re-i31_re; 00695 spinorFloat a1_im = +i01_im-i31_im; 00696 spinorFloat a2_re = +i02_re-i32_re; 00697 spinorFloat a2_im = +i02_im-i32_im; 00698 00699 spinorFloat b0_re = +i10_re+i20_re; 00700 spinorFloat b0_im = +i10_im+i20_im; 00701 spinorFloat b1_re = +i11_re+i21_re; 00702 spinorFloat b1_im = +i11_im+i21_im; 00703 spinorFloat b2_re = +i12_re+i22_re; 00704 spinorFloat b2_im = +i12_im+i22_im; 00705 00706 // multiply row 0 00707 spinorFloat A0_re = + (gT00_re * a0_re - gT00_im * a0_im) + (gT01_re * a1_re - gT01_im * a1_im) + (gT02_re * a2_re - gT02_im * a2_im); 00708 spinorFloat A0_im = + (gT00_re * a0_im + gT00_im * a0_re) + (gT01_re * a1_im + gT01_im * a1_re) + (gT02_re * a2_im + gT02_im * a2_re); 00709 spinorFloat B0_re = + (gT00_re * b0_re - gT00_im * b0_im) + (gT01_re * b1_re - gT01_im * b1_im) + (gT02_re * b2_re - gT02_im * b2_im); 00710 spinorFloat B0_im = + (gT00_re * b0_im + gT00_im * b0_re) + (gT01_re * b1_im + gT01_im * b1_re) + (gT02_re * b2_im + gT02_im * b2_re); 00711 00712 // multiply row 1 00713 spinorFloat A1_re = + (gT10_re * a0_re - gT10_im * a0_im) + (gT11_re * a1_re - gT11_im * a1_im) + (gT12_re * a2_re - gT12_im * a2_im); 00714 spinorFloat A1_im = + (gT10_re * a0_im + gT10_im * a0_re) + (gT11_re * a1_im + gT11_im * a1_re) + (gT12_re * a2_im + gT12_im * a2_re); 00715 spinorFloat B1_re = + (gT10_re * b0_re - gT10_im * b0_im) + (gT11_re * b1_re - gT11_im * b1_im) + (gT12_re * b2_re - gT12_im * b2_im); 00716 spinorFloat B1_im = + (gT10_re * b0_im + gT10_im * b0_re) + (gT11_re * b1_im + gT11_im * b1_re) + (gT12_re * b2_im + gT12_im * b2_re); 00717 00718 // multiply row 2 00719 spinorFloat A2_re = + (gT20_re * a0_re - gT20_im * a0_im) + (gT21_re * a1_re - gT21_im * a1_im) + (gT22_re * a2_re - gT22_im * a2_im); 00720 spinorFloat A2_im = + (gT20_re * a0_im + gT20_im * a0_re) + (gT21_re * a1_im + gT21_im * a1_re) + (gT22_re * a2_im + gT22_im * a2_re); 00721 spinorFloat B2_re = + (gT20_re * b0_re - gT20_im * b0_im) + (gT21_re * b1_re - gT21_im * b1_im) + (gT22_re * b2_re - gT22_im * b2_im); 00722 spinorFloat B2_im = + (gT20_re * b0_im + gT20_im * b0_re) + (gT21_re * b1_im + gT21_im * b1_re) + (gT22_re * b2_im + gT22_im * b2_re); 00723 00724 o00_re += A0_re; 00725 o00_im += A0_im; 00726 o10_re += B0_re; 00727 o10_im += B0_im; 00728 o20_re += B0_re; 00729 o20_im += B0_im; 00730 o30_re -= A0_re; 00731 o30_im -= A0_im; 00732 00733 o01_re += A1_re; 00734 o01_im += A1_im; 00735 o11_re += B1_re; 00736 o11_im += B1_im; 00737 o21_re += B1_re; 00738 o21_im += B1_im; 00739 o31_re -= A1_re; 00740 o31_im -= A1_im; 00741 00742 o02_re += A2_re; 00743 o02_im += A2_im; 00744 o12_re += B2_re; 00745 o12_im += B2_im; 00746 o22_re += B2_re; 00747 o22_im += B2_im; 00748 o32_re -= A2_re; 00749 o32_im -= A2_im; 00750 } else { 00751 // read gauge matrix from device memory 00752 READ_GAUGE_MATRIX(GAUGE0TEX, 3); 00753 00754 // read spinor from device memory 00755 READ_SPINOR(SPINORTEX); 00756 00757 // reconstruct gauge matrix 00758 RECONSTRUCT_GAUGE_MATRIX(3); 00759 00760 // project spinor into half spinors 00761 spinorFloat a0_re = +i00_re-i30_re; 00762 spinorFloat a0_im = +i00_im-i30_im; 00763 spinorFloat a1_re = +i01_re-i31_re; 00764 spinorFloat a1_im = +i01_im-i31_im; 00765 spinorFloat a2_re = +i02_re-i32_re; 00766 spinorFloat a2_im = +i02_im-i32_im; 00767 00768 spinorFloat b0_re = +i10_re+i20_re; 00769 spinorFloat b0_im = +i10_im+i20_im; 00770 spinorFloat b1_re = +i11_re+i21_re; 00771 spinorFloat b1_im = +i11_im+i21_im; 00772 spinorFloat b2_re = +i12_re+i22_re; 00773 spinorFloat b2_im = +i12_im+i22_im; 00774 00775 // multiply row 0 00776 spinorFloat A0_re = + (gT00_re * a0_re - gT00_im * a0_im) + (gT01_re * a1_re - gT01_im * a1_im) + (gT02_re * a2_re - gT02_im * a2_im); 00777 spinorFloat A0_im = + (gT00_re * a0_im + gT00_im * a0_re) + (gT01_re * a1_im + gT01_im * a1_re) + (gT02_re * a2_im + gT02_im * a2_re); 00778 spinorFloat B0_re = + (gT00_re * b0_re - gT00_im * b0_im) + (gT01_re * b1_re - gT01_im * b1_im) + (gT02_re * b2_re - gT02_im * b2_im); 00779 spinorFloat B0_im = + (gT00_re * b0_im + gT00_im * b0_re) + (gT01_re * b1_im + gT01_im * b1_re) + (gT02_re * b2_im + gT02_im * b2_re); 00780 00781 // multiply row 1 00782 spinorFloat A1_re = + (gT10_re * a0_re - gT10_im * a0_im) + (gT11_re * a1_re - gT11_im * a1_im) + (gT12_re * a2_re - gT12_im * a2_im); 00783 spinorFloat A1_im = + (gT10_re * a0_im + gT10_im * a0_re) + (gT11_re * a1_im + gT11_im * a1_re) + (gT12_re * a2_im + gT12_im * a2_re); 00784 spinorFloat B1_re = + (gT10_re * b0_re - gT10_im * b0_im) + (gT11_re * b1_re - gT11_im * b1_im) + (gT12_re * b2_re - gT12_im * b2_im); 00785 spinorFloat B1_im = + (gT10_re * b0_im + gT10_im * b0_re) + (gT11_re * b1_im + gT11_im * b1_re) + (gT12_re * b2_im + gT12_im * b2_re); 00786 00787 // multiply row 2 00788 spinorFloat A2_re = + (gT20_re * a0_re - gT20_im * a0_im) + (gT21_re * a1_re - gT21_im * a1_im) + (gT22_re * a2_re - gT22_im * a2_im); 00789 spinorFloat A2_im = + (gT20_re * a0_im + gT20_im * a0_re) + (gT21_re * a1_im + gT21_im * a1_re) + (gT22_re * a2_im + gT22_im * a2_re); 00790 spinorFloat B2_re = + (gT20_re * b0_re - gT20_im * b0_im) + (gT21_re * b1_re - gT21_im * b1_im) + (gT22_re * b2_re - gT22_im * b2_im); 00791 spinorFloat B2_im = + (gT20_re * b0_im + gT20_im * b0_re) + (gT21_re * b1_im + gT21_im * b1_re) + (gT22_re * b2_im + gT22_im * b2_re); 00792 00793 o00_re += A0_re; 00794 o00_im += A0_im; 00795 o10_re += B0_re; 00796 o10_im += B0_im; 00797 o20_re += B0_re; 00798 o20_im += B0_im; 00799 o30_re -= A0_re; 00800 o30_im -= A0_im; 00801 00802 o01_re += A1_re; 00803 o01_im += A1_im; 00804 o11_re += B1_re; 00805 o11_im += B1_im; 00806 o21_re += B1_re; 00807 o21_im += B1_im; 00808 o31_re -= A1_re; 00809 o31_im -= A1_im; 00810 00811 o02_re += A2_re; 00812 o02_im += A2_im; 00813 o12_re += B2_re; 00814 o12_im += B2_im; 00815 o22_re += B2_re; 00816 o22_im += B2_im; 00817 o32_re -= A2_re; 00818 o32_im -= A2_im; 00819 } 00820 } 00821 00822 { 00823 // Projector P2+ 00824 // 1 0 i 0 00825 // 0 1 0 -i 00826 // -i 0 1 0 00827 // 0 i 0 1 00828 00829 int sp_idx = ((x3==X3-1) ? X-(X3-1)*X2*X1 : X+X2*X1) / 2; 00830 int ga_idx = sid % Vh; 00831 00832 if ( !( (boundaryCrossings-boundaryCrossings4d) % 2) ) { 00833 00834 // read gauge matrix from device memory 00835 READ_GAUGE_MATRIX(GAUGE0TEX, 4); 00836 00837 // read spinor from device memory 00838 READ_SPINOR(SPINORTEX); 00839 00840 // reconstruct gauge matrix 00841 RECONSTRUCT_GAUGE_MATRIX(4); 00842 00843 // project spinor into half spinors 00844 spinorFloat a0_re = +i00_re-i20_im; 00845 spinorFloat a0_im = +i00_im+i20_re; 00846 spinorFloat a1_re = +i01_re-i21_im; 00847 spinorFloat a1_im = +i01_im+i21_re; 00848 spinorFloat a2_re = +i02_re-i22_im; 00849 spinorFloat a2_im = +i02_im+i22_re; 00850 00851 spinorFloat b0_re = +i10_re+i30_im; 00852 spinorFloat b0_im = +i10_im-i30_re; 00853 spinorFloat b1_re = +i11_re+i31_im; 00854 spinorFloat b1_im = +i11_im-i31_re; 00855 spinorFloat b2_re = +i12_re+i32_im; 00856 spinorFloat b2_im = +i12_im-i32_re; 00857 00858 // multiply row 0 00859 spinorFloat A0_re = + (g00_re * a0_re - g00_im * a0_im) + (g01_re * a1_re - g01_im * a1_im) + (g02_re * a2_re - g02_im * a2_im); 00860 spinorFloat A0_im = + (g00_re * a0_im + g00_im * a0_re) + (g01_re * a1_im + g01_im * a1_re) + (g02_re * a2_im + g02_im * a2_re); 00861 spinorFloat B0_re = + (g00_re * b0_re - g00_im * b0_im) + (g01_re * b1_re - g01_im * b1_im) + (g02_re * b2_re - g02_im * b2_im); 00862 spinorFloat B0_im = + (g00_re * b0_im + g00_im * b0_re) + (g01_re * b1_im + g01_im * b1_re) + (g02_re * b2_im + g02_im * b2_re); 00863 00864 // multiply row 1 00865 spinorFloat A1_re = + (g10_re * a0_re - g10_im * a0_im) + (g11_re * a1_re - g11_im * a1_im) + (g12_re * a2_re - g12_im * a2_im); 00866 spinorFloat A1_im = + (g10_re * a0_im + g10_im * a0_re) + (g11_re * a1_im + g11_im * a1_re) + (g12_re * a2_im + g12_im * a2_re); 00867 spinorFloat B1_re = + (g10_re * b0_re - g10_im * b0_im) + (g11_re * b1_re - g11_im * b1_im) + (g12_re * b2_re - g12_im * b2_im); 00868 spinorFloat B1_im = + (g10_re * b0_im + g10_im * b0_re) + (g11_re * b1_im + g11_im * b1_re) + (g12_re * b2_im + g12_im * b2_re); 00869 00870 // multiply row 2 00871 spinorFloat A2_re = + (g20_re * a0_re - g20_im * a0_im) + (g21_re * a1_re - g21_im * a1_im) + (g22_re * a2_re - g22_im * a2_im); 00872 spinorFloat A2_im = + (g20_re * a0_im + g20_im * a0_re) + (g21_re * a1_im + g21_im * a1_re) + (g22_re * a2_im + g22_im * a2_re); 00873 spinorFloat B2_re = + (g20_re * b0_re - g20_im * b0_im) + (g21_re * b1_re - g21_im * b1_im) + (g22_re * b2_re - g22_im * b2_im); 00874 spinorFloat B2_im = + (g20_re * b0_im + g20_im * b0_re) + (g21_re * b1_im + g21_im * b1_re) + (g22_re * b2_im + g22_im * b2_re); 00875 00876 o00_re += A0_re; 00877 o00_im += A0_im; 00878 o10_re += B0_re; 00879 o10_im += B0_im; 00880 o20_re += A0_im; 00881 o20_im -= A0_re; 00882 o30_re -= B0_im; 00883 o30_im += B0_re; 00884 00885 o01_re += A1_re; 00886 o01_im += A1_im; 00887 o11_re += B1_re; 00888 o11_im += B1_im; 00889 o21_re += A1_im; 00890 o21_im -= A1_re; 00891 o31_re -= B1_im; 00892 o31_im += B1_re; 00893 00894 o02_re += A2_re; 00895 o02_im += A2_im; 00896 o12_re += B2_re; 00897 o12_im += B2_im; 00898 o22_re += A2_im; 00899 o22_im -= A2_re; 00900 o32_re -= B2_im; 00901 o32_im += B2_re; 00902 00903 } else { 00904 00905 // read gauge matrix from device memory 00906 READ_GAUGE_MATRIX(GAUGE1TEX, 4); 00907 00908 // read spinor from device memory 00909 READ_SPINOR(SPINORTEX); 00910 00911 // reconstruct gauge matrix 00912 RECONSTRUCT_GAUGE_MATRIX(4); 00913 00914 // project spinor into half spinors 00915 spinorFloat a0_re = +i00_re-i20_im; 00916 spinorFloat a0_im = +i00_im+i20_re; 00917 spinorFloat a1_re = +i01_re-i21_im; 00918 spinorFloat a1_im = +i01_im+i21_re; 00919 spinorFloat a2_re = +i02_re-i22_im; 00920 spinorFloat a2_im = +i02_im+i22_re; 00921 00922 spinorFloat b0_re = +i10_re+i30_im; 00923 spinorFloat b0_im = +i10_im-i30_re; 00924 spinorFloat b1_re = +i11_re+i31_im; 00925 spinorFloat b1_im = +i11_im-i31_re; 00926 spinorFloat b2_re = +i12_re+i32_im; 00927 spinorFloat b2_im = +i12_im-i32_re; 00928 00929 // multiply row 0 00930 spinorFloat A0_re = + (g00_re * a0_re - g00_im * a0_im) + (g01_re * a1_re - g01_im * a1_im) + (g02_re * a2_re - g02_im * a2_im); 00931 spinorFloat A0_im = + (g00_re * a0_im + g00_im * a0_re) + (g01_re * a1_im + g01_im * a1_re) + (g02_re * a2_im + g02_im * a2_re); 00932 spinorFloat B0_re = + (g00_re * b0_re - g00_im * b0_im) + (g01_re * b1_re - g01_im * b1_im) + (g02_re * b2_re - g02_im * b2_im); 00933 spinorFloat B0_im = + (g00_re * b0_im + g00_im * b0_re) + (g01_re * b1_im + g01_im * b1_re) + (g02_re * b2_im + g02_im * b2_re); 00934 00935 // multiply row 1 00936 spinorFloat A1_re = + (g10_re * a0_re - g10_im * a0_im) + (g11_re * a1_re - g11_im * a1_im) + (g12_re * a2_re - g12_im * a2_im); 00937 spinorFloat A1_im = + (g10_re * a0_im + g10_im * a0_re) + (g11_re * a1_im + g11_im * a1_re) + (g12_re * a2_im + g12_im * a2_re); 00938 spinorFloat B1_re = + (g10_re * b0_re - g10_im * b0_im) + (g11_re * b1_re - g11_im * b1_im) + (g12_re * b2_re - g12_im * b2_im); 00939 spinorFloat B1_im = + (g10_re * b0_im + g10_im * b0_re) + (g11_re * b1_im + g11_im * b1_re) + (g12_re * b2_im + g12_im * b2_re); 00940 00941 // multiply row 2 00942 spinorFloat A2_re = + (g20_re * a0_re - g20_im * a0_im) + (g21_re * a1_re - g21_im * a1_im) + (g22_re * a2_re - g22_im * a2_im); 00943 spinorFloat A2_im = + (g20_re * a0_im + g20_im * a0_re) + (g21_re * a1_im + g21_im * a1_re) + (g22_re * a2_im + g22_im * a2_re); 00944 spinorFloat B2_re = + (g20_re * b0_re - g20_im * b0_im) + (g21_re * b1_re - g21_im * b1_im) + (g22_re * b2_re - g22_im * b2_im); 00945 spinorFloat B2_im = + (g20_re * b0_im + g20_im * b0_re) + (g21_re * b1_im + g21_im * b1_re) + (g22_re * b2_im + g22_im * b2_re); 00946 00947 o00_re += A0_re; 00948 o00_im += A0_im; 00949 o10_re += B0_re; 00950 o10_im += B0_im; 00951 o20_re += A0_im; 00952 o20_im -= A0_re; 00953 o30_re -= B0_im; 00954 o30_im += B0_re; 00955 00956 o01_re += A1_re; 00957 o01_im += A1_im; 00958 o11_re += B1_re; 00959 o11_im += B1_im; 00960 o21_re += A1_im; 00961 o21_im -= A1_re; 00962 o31_re -= B1_im; 00963 o31_im += B1_re; 00964 00965 o02_re += A2_re; 00966 o02_im += A2_im; 00967 o12_re += B2_re; 00968 o12_im += B2_im; 00969 o22_re += A2_im; 00970 o22_im -= A2_re; 00971 o32_re -= B2_im; 00972 o32_im += B2_re; 00973 } 00974 } 00975 00976 { 00977 // Projector P2- 00978 // 1 0 -i 0 00979 // 0 1 0 i 00980 // i 0 1 0 00981 // 0 -i 0 1 00982 00983 int sp_idx = ((x3==0) ? X+(X3-1)*X2*X1 : X-X2*X1) / 2; 00984 int ga_idx = sp_idx % Vh; 00985 00986 if ( !( (boundaryCrossings-boundaryCrossings4d) % 2) ) { 00987 00988 // read gauge matrix from device memory 00989 READ_GAUGE_MATRIX(GAUGE1TEX, 5); 00990 00991 // read spinor from device memory 00992 READ_SPINOR(SPINORTEX); 00993 00994 // reconstruct gauge matrix 00995 RECONSTRUCT_GAUGE_MATRIX(5); 00996 00997 // project spinor into half spinors 00998 spinorFloat a0_re = +i00_re+i20_im; 00999 spinorFloat a0_im = +i00_im-i20_re; 01000 spinorFloat a1_re = +i01_re+i21_im; 01001 spinorFloat a1_im = +i01_im-i21_re; 01002 spinorFloat a2_re = +i02_re+i22_im; 01003 spinorFloat a2_im = +i02_im-i22_re; 01004 01005 spinorFloat b0_re = +i10_re-i30_im; 01006 spinorFloat b0_im = +i10_im+i30_re; 01007 spinorFloat b1_re = +i11_re-i31_im; 01008 spinorFloat b1_im = +i11_im+i31_re; 01009 spinorFloat b2_re = +i12_re-i32_im; 01010 spinorFloat b2_im = +i12_im+i32_re; 01011 01012 // multiply row 0 01013 spinorFloat A0_re = + (gT00_re * a0_re - gT00_im * a0_im) + (gT01_re * a1_re - gT01_im * a1_im) + (gT02_re * a2_re - gT02_im * a2_im); 01014 spinorFloat A0_im = + (gT00_re * a0_im + gT00_im * a0_re) + (gT01_re * a1_im + gT01_im * a1_re) + (gT02_re * a2_im + gT02_im * a2_re); 01015 spinorFloat B0_re = + (gT00_re * b0_re - gT00_im * b0_im) + (gT01_re * b1_re - gT01_im * b1_im) + (gT02_re * b2_re - gT02_im * b2_im); 01016 spinorFloat B0_im = + (gT00_re * b0_im + gT00_im * b0_re) + (gT01_re * b1_im + gT01_im * b1_re) + (gT02_re * b2_im + gT02_im * b2_re); 01017 01018 // multiply row 1 01019 spinorFloat A1_re = + (gT10_re * a0_re - gT10_im * a0_im) + (gT11_re * a1_re - gT11_im * a1_im) + (gT12_re * a2_re - gT12_im * a2_im); 01020 spinorFloat A1_im = + (gT10_re * a0_im + gT10_im * a0_re) + (gT11_re * a1_im + gT11_im * a1_re) + (gT12_re * a2_im + gT12_im * a2_re); 01021 spinorFloat B1_re = + (gT10_re * b0_re - gT10_im * b0_im) + (gT11_re * b1_re - gT11_im * b1_im) + (gT12_re * b2_re - gT12_im * b2_im); 01022 spinorFloat B1_im = + (gT10_re * b0_im + gT10_im * b0_re) + (gT11_re * b1_im + gT11_im * b1_re) + (gT12_re * b2_im + gT12_im * b2_re); 01023 01024 // multiply row 2 01025 spinorFloat A2_re = + (gT20_re * a0_re - gT20_im * a0_im) + (gT21_re * a1_re - gT21_im * a1_im) + (gT22_re * a2_re - gT22_im * a2_im); 01026 spinorFloat A2_im = + (gT20_re * a0_im + gT20_im * a0_re) + (gT21_re * a1_im + gT21_im * a1_re) + (gT22_re * a2_im + gT22_im * a2_re); 01027 spinorFloat B2_re = + (gT20_re * b0_re - gT20_im * b0_im) + (gT21_re * b1_re - gT21_im * b1_im) + (gT22_re * b2_re - gT22_im * b2_im); 01028 spinorFloat B2_im = + (gT20_re * b0_im + gT20_im * b0_re) + (gT21_re * b1_im + gT21_im * b1_re) + (gT22_re * b2_im + gT22_im * b2_re); 01029 01030 o00_re += A0_re; 01031 o00_im += A0_im; 01032 o10_re += B0_re; 01033 o10_im += B0_im; 01034 o20_re -= A0_im; 01035 o20_im += A0_re; 01036 o30_re += B0_im; 01037 o30_im -= B0_re; 01038 01039 o01_re += A1_re; 01040 o01_im += A1_im; 01041 o11_re += B1_re; 01042 o11_im += B1_im; 01043 o21_re -= A1_im; 01044 o21_im += A1_re; 01045 o31_re += B1_im; 01046 o31_im -= B1_re; 01047 01048 o02_re += A2_re; 01049 o02_im += A2_im; 01050 o12_re += B2_re; 01051 o12_im += B2_im; 01052 o22_re -= A2_im; 01053 o22_im += A2_re; 01054 o32_re += B2_im; 01055 o32_im -= B2_re; 01056 01057 } else { 01058 01059 // read gauge matrix from device memory 01060 READ_GAUGE_MATRIX(GAUGE0TEX, 5); 01061 01062 // read spinor from device memory 01063 READ_SPINOR(SPINORTEX); 01064 01065 // reconstruct gauge matrix 01066 RECONSTRUCT_GAUGE_MATRIX(5); 01067 01068 // project spinor into half spinors 01069 spinorFloat a0_re = +i00_re+i20_im; 01070 spinorFloat a0_im = +i00_im-i20_re; 01071 spinorFloat a1_re = +i01_re+i21_im; 01072 spinorFloat a1_im = +i01_im-i21_re; 01073 spinorFloat a2_re = +i02_re+i22_im; 01074 spinorFloat a2_im = +i02_im-i22_re; 01075 01076 spinorFloat b0_re = +i10_re-i30_im; 01077 spinorFloat b0_im = +i10_im+i30_re; 01078 spinorFloat b1_re = +i11_re-i31_im; 01079 spinorFloat b1_im = +i11_im+i31_re; 01080 spinorFloat b2_re = +i12_re-i32_im; 01081 spinorFloat b2_im = +i12_im+i32_re; 01082 01083 // multiply row 0 01084 spinorFloat A0_re = + (gT00_re * a0_re - gT00_im * a0_im) + (gT01_re * a1_re - gT01_im * a1_im) + (gT02_re * a2_re - gT02_im * a2_im); 01085 spinorFloat A0_im = + (gT00_re * a0_im + gT00_im * a0_re) + (gT01_re * a1_im + gT01_im * a1_re) + (gT02_re * a2_im + gT02_im * a2_re); 01086 spinorFloat B0_re = + (gT00_re * b0_re - gT00_im * b0_im) + (gT01_re * b1_re - gT01_im * b1_im) + (gT02_re * b2_re - gT02_im * b2_im); 01087 spinorFloat B0_im = + (gT00_re * b0_im + gT00_im * b0_re) + (gT01_re * b1_im + gT01_im * b1_re) + (gT02_re * b2_im + gT02_im * b2_re); 01088 01089 // multiply row 1 01090 spinorFloat A1_re = + (gT10_re * a0_re - gT10_im * a0_im) + (gT11_re * a1_re - gT11_im * a1_im) + (gT12_re * a2_re - gT12_im * a2_im); 01091 spinorFloat A1_im = + (gT10_re * a0_im + gT10_im * a0_re) + (gT11_re * a1_im + gT11_im * a1_re) + (gT12_re * a2_im + gT12_im * a2_re); 01092 spinorFloat B1_re = + (gT10_re * b0_re - gT10_im * b0_im) + (gT11_re * b1_re - gT11_im * b1_im) + (gT12_re * b2_re - gT12_im * b2_im); 01093 spinorFloat B1_im = + (gT10_re * b0_im + gT10_im * b0_re) + (gT11_re * b1_im + gT11_im * b1_re) + (gT12_re * b2_im + gT12_im * b2_re); 01094 01095 // multiply row 2 01096 spinorFloat A2_re = + (gT20_re * a0_re - gT20_im * a0_im) + (gT21_re * a1_re - gT21_im * a1_im) + (gT22_re * a2_re - gT22_im * a2_im); 01097 spinorFloat A2_im = + (gT20_re * a0_im + gT20_im * a0_re) + (gT21_re * a1_im + gT21_im * a1_re) + (gT22_re * a2_im + gT22_im * a2_re); 01098 spinorFloat B2_re = + (gT20_re * b0_re - gT20_im * b0_im) + (gT21_re * b1_re - gT21_im * b1_im) + (gT22_re * b2_re - gT22_im * b2_im); 01099 spinorFloat B2_im = + (gT20_re * b0_im + gT20_im * b0_re) + (gT21_re * b1_im + gT21_im * b1_re) + (gT22_re * b2_im + gT22_im * b2_re); 01100 01101 o00_re += A0_re; 01102 o00_im += A0_im; 01103 o10_re += B0_re; 01104 o10_im += B0_im; 01105 o20_re -= A0_im; 01106 o20_im += A0_re; 01107 o30_re += B0_im; 01108 o30_im -= B0_re; 01109 01110 o01_re += A1_re; 01111 o01_im += A1_im; 01112 o11_re += B1_re; 01113 o11_im += B1_im; 01114 o21_re -= A1_im; 01115 o21_im += A1_re; 01116 o31_re += B1_im; 01117 o31_im -= B1_re; 01118 01119 o02_re += A2_re; 01120 o02_im += A2_im; 01121 o12_re += B2_re; 01122 o12_im += B2_im; 01123 o22_re -= A2_im; 01124 o22_im += A2_re; 01125 o32_re += B2_im; 01126 o32_im -= B2_re; 01127 01128 } 01129 } 01130 01131 { 01132 // Projector P3+ 01133 // 2 0 0 0 01134 // 0 2 0 0 01135 // 0 0 0 0 01136 // 0 0 0 0 01137 01138 int sp_idx = ((x4==X4-1) ? X-(X4-1)*X3*X2*X1 : X+X3*X2*X1) / 2; 01139 int ga_idx = sid % Vh; 01140 01141 if (gauge_fixed && ga_idx < (X4-1)*X1h*X2*X3) { 01142 // read spinor from device memory 01143 READ_SPINOR_UP(SPINORTEX); 01144 01145 // project spinor into half spinors 01146 spinorFloat a0_re = +2*i00_re; 01147 spinorFloat a0_im = +2*i00_im; 01148 spinorFloat a1_re = +2*i01_re; 01149 spinorFloat a1_im = +2*i01_im; 01150 spinorFloat a2_re = +2*i02_re; 01151 spinorFloat a2_im = +2*i02_im; 01152 01153 spinorFloat b0_re = +2*i10_re; 01154 spinorFloat b0_im = +2*i10_im; 01155 spinorFloat b1_re = +2*i11_re; 01156 spinorFloat b1_im = +2*i11_im; 01157 spinorFloat b2_re = +2*i12_re; 01158 spinorFloat b2_im = +2*i12_im; 01159 01160 // identity gauge matrix 01161 spinorFloat A0_re = a0_re; spinorFloat A0_im = a0_im; 01162 spinorFloat B0_re = b0_re; spinorFloat B0_im = b0_im; 01163 spinorFloat A1_re = a1_re; spinorFloat A1_im = a1_im; 01164 spinorFloat B1_re = b1_re; spinorFloat B1_im = b1_im; 01165 spinorFloat A2_re = a2_re; spinorFloat A2_im = a2_im; 01166 spinorFloat B2_re = b2_re; spinorFloat B2_im = b2_im; 01167 01168 o00_re += A0_re; 01169 o00_im += A0_im; 01170 o10_re += B0_re; 01171 o10_im += B0_im; 01172 01173 o01_re += A1_re; 01174 o01_im += A1_im; 01175 o11_re += B1_re; 01176 o11_im += B1_im; 01177 01178 o02_re += A2_re; 01179 o02_im += A2_im; 01180 o12_re += B2_re; 01181 o12_im += B2_im; 01182 01183 } else { 01184 01185 if ( !( (boundaryCrossings-boundaryCrossings4d) % 2) ) { 01186 01187 // read gauge matrix from device memory 01188 READ_GAUGE_MATRIX(GAUGE0TEX, 6); 01189 01190 // read spinor from device memory 01191 READ_SPINOR_UP(SPINORTEX); 01192 01193 // reconstruct gauge matrix 01194 RECONSTRUCT_GAUGE_MATRIX(6); 01195 01196 // project spinor into half spinors 01197 spinorFloat a0_re = +2*i00_re; 01198 spinorFloat a0_im = +2*i00_im; 01199 spinorFloat a1_re = +2*i01_re; 01200 spinorFloat a1_im = +2*i01_im; 01201 spinorFloat a2_re = +2*i02_re; 01202 spinorFloat a2_im = +2*i02_im; 01203 01204 spinorFloat b0_re = +2*i10_re; 01205 spinorFloat b0_im = +2*i10_im; 01206 spinorFloat b1_re = +2*i11_re; 01207 spinorFloat b1_im = +2*i11_im; 01208 spinorFloat b2_re = +2*i12_re; 01209 spinorFloat b2_im = +2*i12_im; 01210 01211 // multiply row 0 01212 spinorFloat A0_re = + (g00_re * a0_re - g00_im * a0_im) + (g01_re * a1_re - g01_im * a1_im) + (g02_re * a2_re - g02_im * a2_im); 01213 spinorFloat A0_im = + (g00_re * a0_im + g00_im * a0_re) + (g01_re * a1_im + g01_im * a1_re) + (g02_re * a2_im + g02_im * a2_re); 01214 spinorFloat B0_re = + (g00_re * b0_re - g00_im * b0_im) + (g01_re * b1_re - g01_im * b1_im) + (g02_re * b2_re - g02_im * b2_im); 01215 spinorFloat B0_im = + (g00_re * b0_im + g00_im * b0_re) + (g01_re * b1_im + g01_im * b1_re) + (g02_re * b2_im + g02_im * b2_re); 01216 01217 // multiply row 1 01218 spinorFloat A1_re = + (g10_re * a0_re - g10_im * a0_im) + (g11_re * a1_re - g11_im * a1_im) + (g12_re * a2_re - g12_im * a2_im); 01219 spinorFloat A1_im = + (g10_re * a0_im + g10_im * a0_re) + (g11_re * a1_im + g11_im * a1_re) + (g12_re * a2_im + g12_im * a2_re); 01220 spinorFloat B1_re = + (g10_re * b0_re - g10_im * b0_im) + (g11_re * b1_re - g11_im * b1_im) + (g12_re * b2_re - g12_im * b2_im); 01221 spinorFloat B1_im = + (g10_re * b0_im + g10_im * b0_re) + (g11_re * b1_im + g11_im * b1_re) + (g12_re * b2_im + g12_im * b2_re); 01222 01223 // multiply row 2 01224 spinorFloat A2_re = + (g20_re * a0_re - g20_im * a0_im) + (g21_re * a1_re - g21_im * a1_im) + (g22_re * a2_re - g22_im * a2_im); 01225 spinorFloat A2_im = + (g20_re * a0_im + g20_im * a0_re) + (g21_re * a1_im + g21_im * a1_re) + (g22_re * a2_im + g22_im * a2_re); 01226 spinorFloat B2_re = + (g20_re * b0_re - g20_im * b0_im) + (g21_re * b1_re - g21_im * b1_im) + (g22_re * b2_re - g22_im * b2_im); 01227 spinorFloat B2_im = + (g20_re * b0_im + g20_im * b0_re) + (g21_re * b1_im + g21_im * b1_re) + (g22_re * b2_im + g22_im * b2_re); 01228 01229 o00_re += A0_re; 01230 o00_im += A0_im; 01231 o10_re += B0_re; 01232 o10_im += B0_im; 01233 01234 o01_re += A1_re; 01235 o01_im += A1_im; 01236 o11_re += B1_re; 01237 o11_im += B1_im; 01238 01239 o02_re += A2_re; 01240 o02_im += A2_im; 01241 o12_re += B2_re; 01242 o12_im += B2_im; 01243 } else { 01244 01245 // read gauge matrix from device memory 01246 READ_GAUGE_MATRIX(GAUGE1TEX, 6); 01247 01248 // read spinor from device memory 01249 READ_SPINOR_UP(SPINORTEX); 01250 01251 // reconstruct gauge matrix 01252 RECONSTRUCT_GAUGE_MATRIX(6); 01253 01254 // project spinor into half spinors 01255 spinorFloat a0_re = +2*i00_re; 01256 spinorFloat a0_im = +2*i00_im; 01257 spinorFloat a1_re = +2*i01_re; 01258 spinorFloat a1_im = +2*i01_im; 01259 spinorFloat a2_re = +2*i02_re; 01260 spinorFloat a2_im = +2*i02_im; 01261 01262 spinorFloat b0_re = +2*i10_re; 01263 spinorFloat b0_im = +2*i10_im; 01264 spinorFloat b1_re = +2*i11_re; 01265 spinorFloat b1_im = +2*i11_im; 01266 spinorFloat b2_re = +2*i12_re; 01267 spinorFloat b2_im = +2*i12_im; 01268 01269 // multiply row 0 01270 spinorFloat A0_re = + (g00_re * a0_re - g00_im * a0_im) + (g01_re * a1_re - g01_im * a1_im) + (g02_re * a2_re - g02_im * a2_im); 01271 spinorFloat A0_im = + (g00_re * a0_im + g00_im * a0_re) + (g01_re * a1_im + g01_im * a1_re) + (g02_re * a2_im + g02_im * a2_re); 01272 spinorFloat B0_re = + (g00_re * b0_re - g00_im * b0_im) + (g01_re * b1_re - g01_im * b1_im) + (g02_re * b2_re - g02_im * b2_im); 01273 spinorFloat B0_im = + (g00_re * b0_im + g00_im * b0_re) + (g01_re * b1_im + g01_im * b1_re) + (g02_re * b2_im + g02_im * b2_re); 01274 01275 // multiply row 1 01276 spinorFloat A1_re = + (g10_re * a0_re - g10_im * a0_im) + (g11_re * a1_re - g11_im * a1_im) + (g12_re * a2_re - g12_im * a2_im); 01277 spinorFloat A1_im = + (g10_re * a0_im + g10_im * a0_re) + (g11_re * a1_im + g11_im * a1_re) + (g12_re * a2_im + g12_im * a2_re); 01278 spinorFloat B1_re = + (g10_re * b0_re - g10_im * b0_im) + (g11_re * b1_re - g11_im * b1_im) + (g12_re * b2_re - g12_im * b2_im); 01279 spinorFloat B1_im = + (g10_re * b0_im + g10_im * b0_re) + (g11_re * b1_im + g11_im * b1_re) + (g12_re * b2_im + g12_im * b2_re); 01280 01281 // multiply row 2 01282 spinorFloat A2_re = + (g20_re * a0_re - g20_im * a0_im) + (g21_re * a1_re - g21_im * a1_im) + (g22_re * a2_re - g22_im * a2_im); 01283 spinorFloat A2_im = + (g20_re * a0_im + g20_im * a0_re) + (g21_re * a1_im + g21_im * a1_re) + (g22_re * a2_im + g22_im * a2_re); 01284 spinorFloat B2_re = + (g20_re * b0_re - g20_im * b0_im) + (g21_re * b1_re - g21_im * b1_im) + (g22_re * b2_re - g22_im * b2_im); 01285 spinorFloat B2_im = + (g20_re * b0_im + g20_im * b0_re) + (g21_re * b1_im + g21_im * b1_re) + (g22_re * b2_im + g22_im * b2_re); 01286 01287 o00_re += A0_re; 01288 o00_im += A0_im; 01289 o10_re += B0_re; 01290 o10_im += B0_im; 01291 01292 o01_re += A1_re; 01293 o01_im += A1_im; 01294 o11_re += B1_re; 01295 o11_im += B1_im; 01296 01297 o02_re += A2_re; 01298 o02_im += A2_im; 01299 o12_re += B2_re; 01300 o12_im += B2_im; 01301 01302 } 01303 } 01304 } 01305 01306 { 01307 // Projector P3- 01308 // 0 0 0 0 01309 // 0 0 0 0 01310 // 0 0 2 0 01311 // 0 0 0 2 01312 01313 int sp_idx = ((x4==0) ? X+(X4-1)*X3*X2*X1 : X-X3*X2*X1) / 2; 01314 int ga_idx = sp_idx % Vh; 01315 01316 if (gauge_fixed && ga_idx < (X4-1)*X1h*X2*X3) { 01317 // read spinor from device memory 01318 READ_SPINOR_DOWN(SPINORTEX); 01319 01320 // project spinor into half spinors 01321 spinorFloat a0_re = +2*i20_re; 01322 spinorFloat a0_im = +2*i20_im; 01323 spinorFloat a1_re = +2*i21_re; 01324 spinorFloat a1_im = +2*i21_im; 01325 spinorFloat a2_re = +2*i22_re; 01326 spinorFloat a2_im = +2*i22_im; 01327 01328 spinorFloat b0_re = +2*i30_re; 01329 spinorFloat b0_im = +2*i30_im; 01330 spinorFloat b1_re = +2*i31_re; 01331 spinorFloat b1_im = +2*i31_im; 01332 spinorFloat b2_re = +2*i32_re; 01333 spinorFloat b2_im = +2*i32_im; 01334 01335 // identity gauge matrix 01336 spinorFloat A0_re = a0_re; spinorFloat A0_im = a0_im; 01337 spinorFloat B0_re = b0_re; spinorFloat B0_im = b0_im; 01338 spinorFloat A1_re = a1_re; spinorFloat A1_im = a1_im; 01339 spinorFloat B1_re = b1_re; spinorFloat B1_im = b1_im; 01340 spinorFloat A2_re = a2_re; spinorFloat A2_im = a2_im; 01341 spinorFloat B2_re = b2_re; spinorFloat B2_im = b2_im; 01342 01343 o20_re += A0_re; 01344 o20_im += A0_im; 01345 o30_re += B0_re; 01346 o30_im += B0_im; 01347 01348 o21_re += A1_re; 01349 o21_im += A1_im; 01350 o31_re += B1_re; 01351 o31_im += B1_im; 01352 01353 o22_re += A2_re; 01354 o22_im += A2_im; 01355 o32_re += B2_re; 01356 o32_im += B2_im; 01357 01358 } else { 01359 01360 if ( !( (boundaryCrossings-boundaryCrossings4d) % 2) ) { 01361 01362 // read gauge matrix from device memory 01363 READ_GAUGE_MATRIX(GAUGE1TEX, 7); 01364 01365 // read spinor from device memory 01366 READ_SPINOR_DOWN(SPINORTEX); 01367 01368 // reconstruct gauge matrix 01369 RECONSTRUCT_GAUGE_MATRIX(7); 01370 01371 // project spinor into half spinors 01372 spinorFloat a0_re = +2*i20_re; 01373 spinorFloat a0_im = +2*i20_im; 01374 spinorFloat a1_re = +2*i21_re; 01375 spinorFloat a1_im = +2*i21_im; 01376 spinorFloat a2_re = +2*i22_re; 01377 spinorFloat a2_im = +2*i22_im; 01378 01379 spinorFloat b0_re = +2*i30_re; 01380 spinorFloat b0_im = +2*i30_im; 01381 spinorFloat b1_re = +2*i31_re; 01382 spinorFloat b1_im = +2*i31_im; 01383 spinorFloat b2_re = +2*i32_re; 01384 spinorFloat b2_im = +2*i32_im; 01385 01386 // multiply row 0 01387 spinorFloat A0_re = + (gT00_re * a0_re - gT00_im * a0_im) + (gT01_re * a1_re - gT01_im * a1_im) + (gT02_re * a2_re - gT02_im * a2_im); 01388 spinorFloat A0_im = + (gT00_re * a0_im + gT00_im * a0_re) + (gT01_re * a1_im + gT01_im * a1_re) + (gT02_re * a2_im + gT02_im * a2_re); 01389 spinorFloat B0_re = + (gT00_re * b0_re - gT00_im * b0_im) + (gT01_re * b1_re - gT01_im * b1_im) + (gT02_re * b2_re - gT02_im * b2_im); 01390 spinorFloat B0_im = + (gT00_re * b0_im + gT00_im * b0_re) + (gT01_re * b1_im + gT01_im * b1_re) + (gT02_re * b2_im + gT02_im * b2_re); 01391 01392 // multiply row 1 01393 spinorFloat A1_re = + (gT10_re * a0_re - gT10_im * a0_im) + (gT11_re * a1_re - gT11_im * a1_im) + (gT12_re * a2_re - gT12_im * a2_im); 01394 spinorFloat A1_im = + (gT10_re * a0_im + gT10_im * a0_re) + (gT11_re * a1_im + gT11_im * a1_re) + (gT12_re * a2_im + gT12_im * a2_re); 01395 spinorFloat B1_re = + (gT10_re * b0_re - gT10_im * b0_im) + (gT11_re * b1_re - gT11_im * b1_im) + (gT12_re * b2_re - gT12_im * b2_im); 01396 spinorFloat B1_im = + (gT10_re * b0_im + gT10_im * b0_re) + (gT11_re * b1_im + gT11_im * b1_re) + (gT12_re * b2_im + gT12_im * b2_re); 01397 01398 // multiply row 2 01399 spinorFloat A2_re = + (gT20_re * a0_re - gT20_im * a0_im) + (gT21_re * a1_re - gT21_im * a1_im) + (gT22_re * a2_re - gT22_im * a2_im); 01400 spinorFloat A2_im = + (gT20_re * a0_im + gT20_im * a0_re) + (gT21_re * a1_im + gT21_im * a1_re) + (gT22_re * a2_im + gT22_im * a2_re); 01401 spinorFloat B2_re = + (gT20_re * b0_re - gT20_im * b0_im) + (gT21_re * b1_re - gT21_im * b1_im) + (gT22_re * b2_re - gT22_im * b2_im); 01402 spinorFloat B2_im = + (gT20_re * b0_im + gT20_im * b0_re) + (gT21_re * b1_im + gT21_im * b1_re) + (gT22_re * b2_im + gT22_im * b2_re); 01403 01404 o20_re += A0_re; 01405 o20_im += A0_im; 01406 o30_re += B0_re; 01407 o30_im += B0_im; 01408 01409 o21_re += A1_re; 01410 o21_im += A1_im; 01411 o31_re += B1_re; 01412 o31_im += B1_im; 01413 01414 o22_re += A2_re; 01415 o22_im += A2_im; 01416 o32_re += B2_re; 01417 o32_im += B2_im; 01418 01419 } else { 01420 01421 // read gauge matrix from device memory 01422 READ_GAUGE_MATRIX(GAUGE0TEX, 7); 01423 01424 // read spinor from device memory 01425 READ_SPINOR_DOWN(SPINORTEX); 01426 01427 // reconstruct gauge matrix 01428 RECONSTRUCT_GAUGE_MATRIX(7); 01429 01430 // project spinor into half spinors 01431 spinorFloat a0_re = +2*i20_re; 01432 spinorFloat a0_im = +2*i20_im; 01433 spinorFloat a1_re = +2*i21_re; 01434 spinorFloat a1_im = +2*i21_im; 01435 spinorFloat a2_re = +2*i22_re; 01436 spinorFloat a2_im = +2*i22_im; 01437 01438 spinorFloat b0_re = +2*i30_re; 01439 spinorFloat b0_im = +2*i30_im; 01440 spinorFloat b1_re = +2*i31_re; 01441 spinorFloat b1_im = +2*i31_im; 01442 spinorFloat b2_re = +2*i32_re; 01443 spinorFloat b2_im = +2*i32_im; 01444 01445 // multiply row 0 01446 spinorFloat A0_re = + (gT00_re * a0_re - gT00_im * a0_im) + (gT01_re * a1_re - gT01_im * a1_im) + (gT02_re * a2_re - gT02_im * a2_im); 01447 spinorFloat A0_im = + (gT00_re * a0_im + gT00_im * a0_re) + (gT01_re * a1_im + gT01_im * a1_re) + (gT02_re * a2_im + gT02_im * a2_re); 01448 spinorFloat B0_re = + (gT00_re * b0_re - gT00_im * b0_im) + (gT01_re * b1_re - gT01_im * b1_im) + (gT02_re * b2_re - gT02_im * b2_im); 01449 spinorFloat B0_im = + (gT00_re * b0_im + gT00_im * b0_re) + (gT01_re * b1_im + gT01_im * b1_re) + (gT02_re * b2_im + gT02_im * b2_re); 01450 01451 // multiply row 1 01452 spinorFloat A1_re = + (gT10_re * a0_re - gT10_im * a0_im) + (gT11_re * a1_re - gT11_im * a1_im) + (gT12_re * a2_re - gT12_im * a2_im); 01453 spinorFloat A1_im = + (gT10_re * a0_im + gT10_im * a0_re) + (gT11_re * a1_im + gT11_im * a1_re) + (gT12_re * a2_im + gT12_im * a2_re); 01454 spinorFloat B1_re = + (gT10_re * b0_re - gT10_im * b0_im) + (gT11_re * b1_re - gT11_im * b1_im) + (gT12_re * b2_re - gT12_im * b2_im); 01455 spinorFloat B1_im = + (gT10_re * b0_im + gT10_im * b0_re) + (gT11_re * b1_im + gT11_im * b1_re) + (gT12_re * b2_im + gT12_im * b2_re); 01456 01457 // multiply row 2 01458 spinorFloat A2_re = + (gT20_re * a0_re - gT20_im * a0_im) + (gT21_re * a1_re - gT21_im * a1_im) + (gT22_re * a2_re - gT22_im * a2_im); 01459 spinorFloat A2_im = + (gT20_re * a0_im + gT20_im * a0_re) + (gT21_re * a1_im + gT21_im * a1_re) + (gT22_re * a2_im + gT22_im * a2_re); 01460 spinorFloat B2_re = + (gT20_re * b0_re - gT20_im * b0_im) + (gT21_re * b1_re - gT21_im * b1_im) + (gT22_re * b2_re - gT22_im * b2_im); 01461 spinorFloat B2_im = + (gT20_re * b0_im + gT20_im * b0_re) + (gT21_re * b1_im + gT21_im * b1_re) + (gT22_re * b2_im + gT22_im * b2_re); 01462 01463 o20_re += A0_re; 01464 o20_im += A0_im; 01465 o30_re += B0_re; 01466 o30_im += B0_im; 01467 01468 o21_re += A1_re; 01469 o21_im += A1_im; 01470 o31_re += B1_re; 01471 o31_im += B1_im; 01472 01473 o22_re += A2_re; 01474 o22_im += A2_im; 01475 o32_re += B2_re; 01476 o32_im += B2_im; 01477 01478 } 01479 } 01480 } 01481 01482 01483 01484 01485 //J ---------------------------------- 01486 //J --- DWF code for 5th dimension --- 01487 //J ---------------------------------- 01488 // 01489 //J Begin scope. 01490 { 01491 //J TODO Insert/check handler for s-direction here. 01492 01493 //J Decided to not change to chiral basis. Then: 01494 // 2 P_+ = 2 P_R = 1 1 01495 // 1 1 01496 // --- Begin right-handed spinor projection. --- 01497 { 01498 //J We are right-handed, so for the dslash_dagger we hop backwards. If we are at 01499 //J boundary in s-direction, special 01500 //J things will need to be done. xs is defined in dslash_dagger_core_ante.h. 01501 //J See near Line 328. N is the 4d volume; cf. quda.h. 01502 //J Cf. hand-written notes 8/6/09 for check of logic. 01503 //J The logic sets xs to the s-coordinate of the output 01504 //J spinor, which is accumulated by this thread. 01505 //J I.e., it uses the thread index to determine xs. 01506 int sp_idx = ((xs==0) ? X+(Ls-1)*2*Vh : X-2*Vh) / 2; 01507 // --- Read spinor from device memory. --- 01508 //J Q. How does it know which direction to hop in? 01509 //J A. It uses sp_idx as the origin and picks up 0*Vh_5d ... 5*Vh_5d 01510 //J offsets in the READ_SPINOR_UP that is below. 01511 //J This has to do with the "concurrency" optimization. 01512 //J Q. Where does Vh_5d get set and does it know about the dwf 01513 //J modification? Does it care? 01514 //J 01515 // 01516 READ_SPINOR(SPINORTEX); 01517 01518 if (xs != 0) { 01519 //J OK, now the input spinor should be at: 01520 //J 0 < s <= Ls-1 01521 // 01522 //J Project spinor into half spinors, i.e., this is the term 01523 //J " + 2 P_R psi(s-1) " 01524 01525 //J ------------------------------------ 01526 //J --- Dirac index 0, Colors 0,1,2. --- 01527 //J ------------------------------------ 01528 //J dagger takes P_R instead of P_L 01529 o00_re += i00_re+i20_re; //ok 01530 o00_im += i00_im+i20_im; //ok 01531 o01_re += i01_re+i21_re; //ok 01532 o01_im += i01_im+i21_im; //ok 01533 o02_re += i02_re+i22_re; //ok 01534 o02_im += i02_im+i22_im; //ok 01535 01536 //J ------------------------------------- 01537 //J --- Dirac index 1, Colors 0,1,2. --- 01538 //J ------------------------------------- 01539 o10_re += i10_re+i30_re; //ok 01540 o10_im += i10_im+i30_im; //ok 01541 o11_re += i11_re+i31_re; //ok 01542 o11_im += i11_im+i31_im; //ok 01543 o12_re += i12_re+i32_re; //ok 01544 o12_im += i12_im+i32_im; //ok 01545 01546 //J ------------------------------------ 01547 //J --- Dirac index 2, Colors 0,1,2. --- 01548 //J ------------------------------------ 01549 o20_re += i00_re+i20_re; //ok 01550 o20_im += i00_im+i20_im; //ok 01551 o21_re += i01_re+i21_re; //ok 01552 o21_im += i01_im+i21_im; //ok 01553 o22_re += i02_re+i22_re; //ok 01554 o22_im += i02_im+i22_im; //ok 01555 01556 //J ------------------------------------- 01557 //J --- Dirac index 3, Colors 0,1,2. --- 01558 //J ------------------------------------- 01559 // color 0 (second index) 01560 o30_re += i10_re+i30_re; //ok 01561 o30_im += i10_im+i30_im; //ok 01562 // color 1 (second index) 01563 o31_re += i11_re+i31_re; //ok 01564 o31_im += i11_im+i31_im; //ok 01565 // color 2 (second index) 01566 o32_re += i12_re+i32_re; //ok 01567 o32_im += i12_im+i32_im; //ok 01568 01569 } // End (x,0) < (x,s) <= (x,Ls-1). 01570 else { 01571 //J LH boundary s=0, backwards hop to Ls-1. 01572 //J Term to add: -mferm*P_R*psi(x,Ls-1) 01573 //J With any luck, sp_idx is linear equiv. to "(x,Ls-1)" 01574 //J Above, we set: 01575 //J sp_idx= (X+(Ls-1)*X4*X3*X2*X1)/2 (*). 01576 //J efs: do some case examples where xs=0 comes out of 01577 //J dslash_ante_core.h procedure, and check that sp_idx is 01578 //J really coming out correct (and in permissable range) 01579 //J in the operation (*). 01580 //J We need mferm to get passed. A modification 01581 //J was made to DD_PARAM2 in the C preprocessing file 01582 //J dslash_dwf_def.h, adding 01583 //J an extra argument to the kernel declarations. 01584 // 01585 //J --- Dirac index 0, Colors 0,1,2. --- 01586 // color 0 (second index) 01587 o00_re += -mferm*(i00_re+i20_re); //ok 01588 o00_im += -mferm*(i00_im+i20_im); //ok 01589 // color 1 01590 o01_re += -mferm*(i01_re+i21_re); //ok 01591 o01_im += -mferm*(i01_im+i21_im); //ok 01592 // color 2 01593 o02_re += -mferm*(i02_re+i22_re); //ok 01594 o02_im += -mferm*(i02_im+i22_im); //ok 01595 01596 //J --- Dirac index 1, Colors 0,1,2. --- 01597 // color 0 01598 o10_re += -mferm*(i10_re+i30_re); //ok 01599 o10_im += -mferm*(i10_im+i30_im); //ok 01600 // color 1 01601 o11_re += -mferm*(i11_re+i31_re); //ok 01602 o11_im += -mferm*(i11_im+i31_im); //ok 01603 // color 2 01604 o12_re += -mferm*(i12_re+i32_re); //ok 01605 o12_im += -mferm*(i12_im+i32_im); //ok 01606 01607 //J --- Dirac index 2, Colors 0,1,2. --- 01608 // color 0 (second index) 01609 o20_re += -mferm*(i00_re+i20_re); //ok 01610 o20_im += -mferm*(i00_im+i20_im); //ok 01611 // color 1 01612 o21_re += -mferm*(i01_re+i21_re); //ok 01613 o21_im += -mferm*(i01_im+i21_im); //ok 01614 // color 2 01615 o22_re += -mferm*(i02_re+i22_re); //ok 01616 o22_im += -mferm*(i02_im+i22_im); //ok 01617 01618 //J --- Dirac index 3, Colors 0,1,2. --- 01619 // color 0 01620 o30_re += -mferm*(i10_re+i30_re); //ok 01621 o30_im += -mferm*(i10_im+i30_im); //ok 01622 // color 1 01623 o31_re += -mferm*(i11_re+i31_re); //ok 01624 o31_im += -mferm*(i11_im+i31_im); //ok 01625 // color 2 01626 o32_re += -mferm*(i12_re+i32_re); //ok 01627 o32_im += -mferm*(i12_im+i32_im); //ok 01628 01629 } // End (x,s)=(x,0) 01630 } 01631 // --- End of right-handed spinor projection. --- 01632 01633 // In the GPU Dirac matrix basis: 01634 // 2 P_- = 2 P_L = 1 -1 01635 // -1 1 01636 //J Begin scope for 2 P_L projection of forward-hopped spinor. 01637 { 01638 //J For P_L spinor, dslash_dagger, we hop forwards. 01639 01640 //J This bit mimics what is done for x4==X4-1 in dslash_core_ante.h. 01641 //J 01642 //J Checked logic w/ case examples. 01643 //J Cf. hand-written notes 8/6/09 for check of logic. 01644 int sp_idx = ((xs==(Ls-1)) ? X-(Ls-1)*2*Vh : X+2*Vh) / 2; 01645 01646 //J Read spinor from device memory. 01647 // 01648 READ_SPINOR(SPINORTEX); 01649 01650 // 01651 // 01652 if ( xs < (Ls-1) ) { 01653 //J Case of not at RH boundary. Then we just do += P_L psi(s+1). 01654 01655 //J ------------------------------------ 01656 //J --- Dirac index 0, Colors 0,1,2. --- 01657 //J ------------------------------------ 01658 // color 0 (second index) 01659 o00_re += i00_re-i20_re; //ok 01660 o00_im += i00_im-i20_im; //ok 01661 // color 1 (second index) 01662 o01_re += i01_re-i21_re; //ok 01663 o01_im += i01_im-i21_im; //ok 01664 // color 2 (second index) 01665 o02_re += i02_re-i22_re; //ok 01666 o02_im += i02_im-i22_im; //ok 01667 01668 //J ------------------------------------- 01669 //J --- Dirac index 1, Colors 0,1,2. --- 01670 //J ------------------------------------- 01671 // color 0 (second index) 01672 o10_re += i10_re-i30_re; //ok 01673 o10_im += i10_im-i30_im; //ok 01674 // color 1 (second index) 01675 o11_re += i11_re-i31_re; //ok 01676 o11_im += i11_im-i31_im; //ok 01677 // color 2 (second index) 01678 o12_re += i12_re-i32_re; //ok 01679 o12_im += i12_im-i32_im; //ok 01680 01681 //J ------------------------------------ 01682 //J --- Dirac index 2, Colors 0,1,2. --- 01683 //J ------------------------------------ 01684 // color 0 (second index) 01685 o20_re += -i00_re+i20_re; //ok 01686 o20_im += -i00_im+i20_im; //ok 01687 // color 1 (second index) 01688 o21_re += -i01_re+i21_re; //ok 01689 o21_im += -i01_im+i21_im; //ok 01690 // color 2 (second index) 01691 o22_re += -i02_re+i22_re; //ok 01692 o22_im += -i02_im+i22_im; //ok 01693 01694 //J ------------------------------------- 01695 //J --- Dirac index 3, Colors 0,1,2. --- 01696 //J ------------------------------------- 01697 // color 0 (second index) 01698 o30_re += -i10_re+i30_re; //ok 01699 o30_im += -i10_im+i30_im; //ok 01700 // color 1 (second index) 01701 o31_re += -i11_re+i31_re; //ok 01702 o31_im += -i11_im+i31_im; //ok 01703 // color 2 (second index) 01704 o32_re += -i12_re+i32_re; //ok 01705 o32_im += -i12_im+i32_im; //ok 01706 01707 } // End (x,0) <= (x,s) < (x,Ls-1). 01708 else { 01709 //J RH boundary s=Ls-1, forwards hop to s=0. 01710 //J Term to add: -mferm*P_L*psi(x,0) 01711 01712 //J --- Dirac index 0, Colors 0,1,2. --- 01713 // color 0 (second index) 01714 o00_re += -mferm*(i00_re-i20_re); //ok 01715 o00_im += -mferm*(i00_im-i20_im); //ok 01716 // color 1 01717 o01_re += -mferm*(i01_re-i21_re); //ok 01718 o01_im += -mferm*(i01_im-i21_im); //ok 01719 // color 2 01720 o02_re += -mferm*(i02_re-i22_re); //ok 01721 o02_im += -mferm*(i02_im-i22_im); //ok 01722 01723 //J --- Dirac index 1, Colors 0,1,2. --- 01724 // color 0 01725 o10_re += -mferm*(i10_re-i30_re); //ok 01726 o10_im += -mferm*(i10_im-i30_im); //ok 01727 // color 1 01728 o11_re += -mferm*(i11_re-i31_re); //ok 01729 o11_im += -mferm*(i11_im-i31_im); //ok 01730 // color 2 01731 o12_re += -mferm*(i12_re-i32_re); //ok 01732 o12_im += -mferm*(i12_im-i32_im); //ok 01733 01734 //J --- Dirac index 2, Colors 0,1,2. --- 01735 // color 0 (second index) 01736 o20_re += -mferm*(-i00_re+i20_re); //ok 01737 o20_im += -mferm*(-i00_im+i20_im); //ok 01738 // color 1 01739 o21_re += -mferm*(-i01_re+i21_re); //ok 01740 o21_im += -mferm*(-i01_im+i21_im); //ok 01741 // color 2 01742 o22_re += -mferm*(-i02_re+i22_re); //ok 01743 o22_im += -mferm*(-i02_im+i22_im); //ok 01744 01745 //J --- Dirac index 3, Colors 0,1,2. --- 01746 // color 0 01747 o30_re += -mferm*(-i10_re+i30_re); //ok 01748 o30_im += -mferm*(-i10_im+i30_im); //ok 01749 // color 1 01750 o31_re += -mferm*(-i11_re+i31_re); //ok 01751 o31_im += -mferm*(-i11_im+i31_im); //ok 01752 // color 2 01753 o32_re += -mferm*(-i12_re+i32_re); //ok 01754 o32_im += -mferm*(-i12_im+i32_im); //ok 01755 // 01756 } // End (x,s)=(x,Ls-1) 01757 } 01758 // ----- end dwf s-direction ---- 01759 01760 } // end s-direction block 01761 01762 01763 // Perform the DSLASH_XPAY operations. 01764 // Undefine all the macros. TODO Make sure that this 01765 // is working right for the diagonal terms of DWF. 01766 //#include "dslash_dagger_core_post.h" 01767 01768 01769 #ifdef DSLASH_XPAY 01770 READ_ACCUM(ACCUMTEX) 01771 #ifdef SPINOR_DOUBLE 01772 o00_re = a*o00_re + accum0.x; 01773 o00_im = a*o00_im + accum0.y; 01774 o01_re = a*o01_re + accum1.x; 01775 o01_im = a*o01_im + accum1.y; 01776 o02_re = a*o02_re + accum2.x; 01777 o02_im = a*o02_im + accum2.y; 01778 o10_re = a*o10_re + accum3.x; 01779 o10_im = a*o10_im + accum3.y; 01780 o11_re = a*o11_re + accum4.x; 01781 o11_im = a*o11_im + accum4.y; 01782 o12_re = a*o12_re + accum5.x; 01783 o12_im = a*o12_im + accum5.y; 01784 o20_re = a*o20_re + accum6.x; 01785 o20_im = a*o20_im + accum6.y; 01786 o21_re = a*o21_re + accum7.x; 01787 o21_im = a*o21_im + accum7.y; 01788 o22_re = a*o22_re + accum8.x; 01789 o22_im = a*o22_im + accum8.y; 01790 o30_re = a*o30_re + accum9.x; 01791 o30_im = a*o30_im + accum9.y; 01792 o31_re = a*o31_re + accum10.x; 01793 o31_im = a*o31_im + accum10.y; 01794 o32_re = a*o32_re + accum11.x; 01795 o32_im = a*o32_im + accum11.y; 01796 #else 01797 o00_re = a*o00_re + accum0.x; 01798 o00_im = a*o00_im + accum0.y; 01799 o01_re = a*o01_re + accum0.z; 01800 o01_im = a*o01_im + accum0.w; 01801 o02_re = a*o02_re + accum1.x; 01802 o02_im = a*o02_im + accum1.y; 01803 o10_re = a*o10_re + accum1.z; 01804 o10_im = a*o10_im + accum1.w; 01805 o11_re = a*o11_re + accum2.x; 01806 o11_im = a*o11_im + accum2.y; 01807 o12_re = a*o12_re + accum2.z; 01808 o12_im = a*o12_im + accum2.w; 01809 o20_re = a*o20_re + accum3.x; 01810 o20_im = a*o20_im + accum3.y; 01811 o21_re = a*o21_re + accum3.z; 01812 o21_im = a*o21_im + accum3.w; 01813 o22_re = a*o22_re + accum4.x; 01814 o22_im = a*o22_im + accum4.y; 01815 o30_re = a*o30_re + accum4.z; 01816 o30_im = a*o30_im + accum4.w; 01817 o31_re = a*o31_re + accum5.x; 01818 o31_im = a*o31_im + accum5.y; 01819 o32_re = a*o32_re + accum5.z; 01820 o32_im = a*o32_im + accum5.w; 01821 #endif // DD_SPREC 01822 #endif // DSLASH_XPAY 01823 01824 01825 // write spinor field back to device memory 01826 WRITE_SPINOR(); 01827 01828 // undefine to prevent warning when precision is changed 01829 #undef spinorFloat 01830 #undef A_re 01831 #undef A_im 01832 01833 #undef g00_re 01834 #undef g00_im 01835 #undef g01_re 01836 #undef g01_im 01837 #undef g02_re 01838 #undef g02_im 01839 #undef g10_re 01840 #undef g10_im 01841 #undef g11_re 01842 #undef g11_im 01843 #undef g12_re 01844 #undef g12_im 01845 #undef g20_re 01846 #undef g20_im 01847 #undef g21_re 01848 #undef g21_im 01849 #undef g22_re 01850 #undef g22_im 01851 01852 #undef i00_re 01853 #undef i00_im 01854 #undef i01_re 01855 #undef i01_im 01856 #undef i02_re 01857 #undef i02_im 01858 #undef i10_re 01859 #undef i10_im 01860 #undef i11_re 01861 #undef i11_im 01862 #undef i12_re 01863 #undef i12_im 01864 #undef i20_re 01865 #undef i20_im 01866 #undef i21_re 01867 #undef i21_im 01868 #undef i22_re 01869 #undef i22_im 01870 #undef i30_re 01871 #undef i30_im 01872 #undef i31_re 01873 #undef i31_im 01874 #undef i32_re 01875 #undef i32_im 01876
1.7.3