|
QUDA v0.3.2
A library for QCD on GPUs
|
00001 //J dslash_dwf_core.h 00002 //J Ver. 09.08.a 00003 // 00004 // 5D PreConditioning (5DPC) code. I.e., the hopping in s direction is 00005 // included in "dslash". 4DPC would require the inverse of the s direction 00006 // hopping to be coded into XpayD, in place of kappa. 00007 // 00008 // 00009 00010 //J Carry out the 4d operations with this include. 00011 // It does not undefine things. That comes 00012 // at the end of this file, through another include. 00013 //#include "dslash_core_ante.h" 00014 00015 //#define SHARED_FLOATS_PER_THREAD 0 // FIXME 00016 #define SHARED_BYTES_DOUBLE (BLOCK_DIM*SHARED_FLOATS_PER_THREAD*sizeof(double)) 00017 00018 #define SHARED_BYTES_SINGLE (BLOCK_DIM*SHARED_FLOATS_PER_THREAD*sizeof(float)) 00019 00020 //J I0,...,I11 correspond to the input spinor. 00021 //J The texture fetch operations that get it from device memory are 00022 //J located in io_spinor.h. 00023 // 00024 #ifdef SPINOR_DOUBLE 00025 #define spinorFloat double 00026 //J --- Dirac index 0, Colors 0,1,2. --- 00027 #define i00_re I0.x 00028 #define i00_im I0.y 00029 #define i01_re I1.x 00030 #define i01_im I1.y 00031 #define i02_re I2.x 00032 #define i02_im I2.y 00033 //J --- Dirac index 1, Colors 0,1,2. --- 00034 #define i10_re I3.x 00035 #define i10_im I3.y 00036 #define i11_re I4.x 00037 #define i11_im I4.y 00038 #define i12_re I5.x 00039 #define i12_im I5.y 00040 //J --- Dirac index 2, Colors 0,1,2. --- 00041 #define i20_re I6.x 00042 #define i20_im I6.y 00043 #define i21_re I7.x 00044 #define i21_im I7.y 00045 #define i22_re I8.x 00046 #define i22_im I8.y 00047 //J --- Dirac index 3, Colors 0,1,2. --- 00048 #define i30_re I9.x 00049 #define i30_im I9.y 00050 #define i31_re I10.x 00051 #define i31_im I10.y 00052 #define i32_re I11.x 00053 #define i32_im I11.y 00054 00055 #else 00056 #define spinorFloat float 00057 #define i00_re I0.x 00058 #define i00_im I0.y 00059 #define i01_re I0.z 00060 #define i01_im I0.w 00061 #define i02_re I1.x 00062 #define i02_im I1.y 00063 #define i10_re I1.z 00064 #define i10_im I1.w 00065 #define i11_re I2.x 00066 #define i11_im I2.y 00067 #define i12_re I2.z 00068 #define i12_im I2.w 00069 #define i20_re I3.x 00070 #define i20_im I3.y 00071 #define i21_re I3.z 00072 #define i21_im I3.w 00073 #define i22_re I4.x 00074 #define i22_im I4.y 00075 #define i30_re I4.z 00076 #define i30_im I4.w 00077 #define i31_re I5.x 00078 #define i31_im I5.y 00079 #define i32_re I5.z 00080 #define i32_im I5.w 00081 #endif 00082 00083 // gauge link 00084 #ifdef GAUGE_FLOAT2 00085 #define g00_re G0.x 00086 #define g00_im G0.y 00087 #define g01_re G1.x 00088 #define g01_im G1.y 00089 #define g02_re G2.x 00090 #define g02_im G2.y 00091 #define g10_re G3.x 00092 #define g10_im G3.y 00093 #define g11_re G4.x 00094 #define g11_im G4.y 00095 #define g12_re G5.x 00096 #define g12_im G5.y 00097 #define g20_re G6.x 00098 #define g20_im G6.y 00099 #define g21_re G7.x 00100 #define g21_im G7.y 00101 #define g22_re G8.x 00102 #define g22_im G8.y 00103 // temporaries 00104 #define A_re G9.x 00105 #define A_im G9.y 00106 00107 #else 00108 // Color row 0, color cols 0,1,2. 00109 #define g00_re G0.x 00110 #define g00_im G0.y 00111 #define g01_re G0.z 00112 #define g01_im G0.w 00113 #define g02_re G1.x 00114 #define g02_im G1.y 00115 // Color row 1, color cols 0,1,2. 00116 #define g10_re G1.z 00117 #define g10_im G1.w 00118 #define g11_re G2.x 00119 #define g11_im G2.y 00120 #define g12_re G2.z 00121 #define g12_im G2.w 00122 // Colors row 2, color cols 0,1,2. 00123 // These are reconstructed from rows 0 and 1. 00124 #define g20_re G3.x 00125 #define g20_im G3.y 00126 #define g21_re G3.z 00127 #define g21_im G3.w 00128 #define g22_re G4.x 00129 #define g22_im G4.y 00130 // 00131 // temporaries 00132 #define A_re G4.z 00133 #define A_im G4.w 00134 00135 #endif 00136 00137 // conjugated gauge link 00138 #define gT00_re (+g00_re) 00139 #define gT00_im (-g00_im) 00140 #define gT01_re (+g10_re) 00141 #define gT01_im (-g10_im) 00142 #define gT02_re (+g20_re) 00143 #define gT02_im (-g20_im) 00144 #define gT10_re (+g01_re) 00145 #define gT10_im (-g01_im) 00146 #define gT11_re (+g11_re) 00147 #define gT11_im (-g11_im) 00148 #define gT12_re (+g21_re) 00149 #define gT12_im (-g21_im) 00150 #define gT20_re (+g02_re) 00151 #define gT20_im (-g02_im) 00152 #define gT21_re (+g12_re) 00153 #define gT21_im (-g12_im) 00154 #define gT22_re (+g22_re) 00155 #define gT22_im (-g22_im) 00156 00157 00158 // output spinor 00159 //J According to what I understand from Ron, these sit in device memory 00160 //J and get loaded into registers when they are used, then immediately 00161 //J sent back to device mem. Isn't that very inefficient? Do we leave 00162 //J it to the GPU to avoid IO conflicts when this is done? 00163 volatile spinorFloat o00_re; 00164 volatile spinorFloat o00_im; 00165 volatile spinorFloat o01_re; 00166 volatile spinorFloat o01_im; 00167 volatile spinorFloat o02_re; 00168 volatile spinorFloat o02_im; 00169 volatile spinorFloat o10_re; 00170 volatile spinorFloat o10_im; 00171 volatile spinorFloat o11_re; 00172 volatile spinorFloat o11_im; 00173 volatile spinorFloat o12_re; 00174 volatile spinorFloat o12_im; 00175 volatile spinorFloat o20_re; 00176 volatile spinorFloat o20_im; 00177 volatile spinorFloat o21_re; 00178 volatile spinorFloat o21_im; 00179 volatile spinorFloat o22_re; 00180 volatile spinorFloat o22_im; 00181 volatile spinorFloat o30_re; 00182 volatile spinorFloat o30_im; 00183 volatile spinorFloat o31_re; 00184 volatile spinorFloat o31_im; 00185 volatile spinorFloat o32_re; 00186 volatile spinorFloat o32_im; 00187 00188 00189 //J Go up for these. At this point hoping that no changes 00190 //J will be needed for the gauge/clover code. But need to check. 00191 #include "read_gauge.h" 00192 //J Change to a dwf version which sets N and Vh to dwf values. 00193 #include "io_spinor.h" 00194 00195 //J --- INDEX LOGIC --- 00196 //J BLOCK_DIM is defined in dslash_quda.h. There it is 00197 //J set to 64. Why? GRID_DIM 00198 //J is also defined there. Where is that used? 00199 //J Introduce global thread index sid. Each thread is 00200 //J associated with a pair of sites. They are paired 00201 //J in the x1 direction. They are ordered red/black or 00202 //J black/red, depending on the number of boundary crossings. 00203 int sid = BLOCK_DIM*blockIdx.x + threadIdx.x; 00204 //J Boundary crossings has to do with the red/black checkerboard. 00205 //J We keep track of how many of those boundaries we cross in going 00206 //J from the origin to the location associated with THIS thread. 00207 //J Add a dimension relative to original code, so we get an extra term here. 00208 //J Note that X1,..,X4 and X1h are defined in quda.h. 00209 00210 // Will work with 5d checkerboard (for spinors) b/c this is 5DPC code. 00211 // Because of this, some gymnastics are required when accessing 00212 // the gauge links. I.e., they should be thought of as living 00213 // on the xs=0 slice, when determining their parity. 00214 00215 int boundaryCrossings = sid/X1h + sid/(X2*X1h) + sid/(X3*X2*X1h) + sid/(X4*X3*X2*X1h); 00216 // Gauge fields have 4d parity. E.g., if xs is odd, we need to grab 00217 // opposite parity link if we're supposed to use U_\mu(x). 00218 int boundaryCrossings4d = sid/X1h + sid/(X2*X1h) + sid/(X3*X2*X1h); 00219 // We will use the difference between boundaryCrossings and boundaryCrossings4d to determine 00220 // which gaugetex is used in the operations below. 00221 00222 //J Define the linear index X to checkerboard sites on the lattice. 00223 int X = 2*sid + (boundaryCrossings + oddBit) % 2; 00224 //J Coordinates of the checkerboard (sublattice) site. 00225 //J This is for the output spinor. 00226 int xs = X/(X4*X3*X2*X1); 00227 int x4 = (X/(X3*X2*X1)) % X4; 00228 int x3 = (X/(X2*X1)) % X3; 00229 int x2 = (X/X1) % X2; 00230 int x1 = X % X1; 00231 00232 // Initialize output spinor to zero. 00233 o00_re = o00_im = 0; 00234 o01_re = o01_im = 0; 00235 o02_re = o02_im = 0; 00236 o10_re = o10_im = 0; 00237 o11_re = o11_im = 0; 00238 o12_re = o12_im = 0; 00239 o20_re = o20_im = 0; 00240 o21_re = o21_im = 0; 00241 o22_re = o22_im = 0; 00242 o30_re = o30_im = 0; 00243 o31_re = o31_im = 0; 00244 o32_re = o32_im = 0; 00245 00246 //ok 00247 // Hopping operations in x1 direction. 00248 { 00249 // Projector P0- 00250 // 1 0 0 -i 00251 // 0 1 -i 0 00252 // 0 i 1 0 00253 // i 0 0 1 00254 00255 //J Hop forward? Yes, and this is for the input spinor. 00256 int sp_idx = ((x1==X1-1) ? X-(X1-1) : X+1) / 2; 00257 //J Link not hopped. But need to project to 4d lattice. 00258 int ga_idx = (sid % Vh); 00259 00260 // read gauge matrix from device memory 00261 // GAUGE0TEX is defined in dslash_dwf_def.h. 00262 // 00263 //J *** Note: The two options of this if statement are 00264 //J not put into if {} else {} prior to all the computation 00265 //J code that follows because READ_GAUGE_MATRIX declares 00266 //J G0, G1 and they would only have scope inside the {} 00267 //J if we did that. Then they would be invisible to the 00268 //J calculation code that follows. That is why I did this 00269 //J ugly code repetition. *** 00270 if ( !( (boundaryCrossings-boundaryCrossings4d) % 2) ) { 00271 // gauge field same parity. 00272 READ_GAUGE_MATRIX(GAUGE0TEX, 0); 00273 // read spinor from device memory 00274 READ_SPINOR(SPINORTEX); 00275 00276 // reconstruct gauge matrix 00277 RECONSTRUCT_GAUGE_MATRIX(0); 00278 00279 // project spinor into half spinors 00280 //J Here we are using P0- on the input spinor. 00281 //J spinor row 0, colors 0,1,2 00282 spinorFloat a0_re = +i00_re+i30_im; 00283 spinorFloat a0_im = +i00_im-i30_re; 00284 spinorFloat a1_re = +i01_re+i31_im; 00285 spinorFloat a1_im = +i01_im-i31_re; 00286 spinorFloat a2_re = +i02_re+i32_im; 00287 spinorFloat a2_im = +i02_im-i32_re; 00288 00289 //J spinor row 1, colors 0,1,2. 00290 spinorFloat b0_re = +i10_re+i20_im; 00291 spinorFloat b0_im = +i10_im-i20_re; 00292 spinorFloat b1_re = +i11_re+i21_im; 00293 spinorFloat b1_im = +i11_im-i21_re; 00294 spinorFloat b2_re = +i12_re+i22_im; 00295 spinorFloat b2_im = +i12_im-i22_re; 00296 00297 //J multiply by links, to get color row 0 00298 spinorFloat A0_re = + (g00_re * a0_re - g00_im * a0_im) + (g01_re * a1_re - g01_im * a1_im) + (g02_re * a2_re - g02_im * a2_im); 00299 spinorFloat A0_im = + (g00_re * a0_im + g00_im * a0_re) + (g01_re * a1_im + g01_im * a1_re) + (g02_re * a2_im + g02_im * a2_re); 00300 spinorFloat B0_re = + (g00_re * b0_re - g00_im * b0_im) + (g01_re * b1_re - g01_im * b1_im) + (g02_re * b2_re - g02_im * b2_im); 00301 spinorFloat B0_im = + (g00_re * b0_im + g00_im * b0_re) + (g01_re * b1_im + g01_im * b1_re) + (g02_re * b2_im + g02_im * b2_re); 00302 00303 // multiply row 1 00304 spinorFloat A1_re = + (g10_re * a0_re - g10_im * a0_im) + (g11_re * a1_re - g11_im * a1_im) + (g12_re * a2_re - g12_im * a2_im); 00305 spinorFloat A1_im = + (g10_re * a0_im + g10_im * a0_re) + (g11_re * a1_im + g11_im * a1_re) + (g12_re * a2_im + g12_im * a2_re); 00306 spinorFloat B1_re = + (g10_re * b0_re - g10_im * b0_im) + (g11_re * b1_re - g11_im * b1_im) + (g12_re * b2_re - g12_im * b2_im); 00307 spinorFloat B1_im = + (g10_re * b0_im + g10_im * b0_re) + (g11_re * b1_im + g11_im * b1_re) + (g12_re * b2_im + g12_im * b2_re); 00308 00309 // multiply row 2 00310 spinorFloat A2_re = + (g20_re * a0_re - g20_im * a0_im) + (g21_re * a1_re - g21_im * a1_im) + (g22_re * a2_re - g22_im * a2_im); 00311 spinorFloat A2_im = + (g20_re * a0_im + g20_im * a0_re) + (g21_re * a1_im + g21_im * a1_re) + (g22_re * a2_im + g22_im * a2_re); 00312 spinorFloat B2_re = + (g20_re * b0_re - g20_im * b0_im) + (g21_re * b1_re - g21_im * b1_im) + (g22_re * b2_re - g22_im * b2_im); 00313 spinorFloat B2_im = + (g20_re * b0_im + g20_im * b0_re) + (g21_re * b1_im + g21_im * b1_re) + (g22_re * b2_im + g22_im * b2_re); 00314 00315 //J Add results to output spinor. 00316 //J color 0 00317 o00_re += A0_re; 00318 o00_im += A0_im; 00319 o10_re += B0_re; 00320 o10_im += B0_im; 00321 //J Q. Why are we assigning spinor row 2 with what 00322 //J looks like spinor row 1 results? A. B/c in P0- row 2 00323 //J differs from row 1 by a factor of i. 00324 o20_re -= B0_im; 00325 o20_im += B0_re; 00326 //J Similarly here. 00327 o30_re -= A0_im; 00328 o30_im += A0_re; 00329 00330 o01_re += A1_re; 00331 o01_im += A1_im; 00332 o11_re += B1_re; 00333 o11_im += B1_im; 00334 o21_re -= B1_im; 00335 o21_im += B1_re; 00336 o31_re -= A1_im; 00337 o31_im += A1_re; 00338 00339 o02_re += A2_re; 00340 o02_im += A2_im; 00341 o12_re += B2_re; 00342 o12_im += B2_im; 00343 o22_re -= B2_im; 00344 o22_im += B2_re; 00345 o32_re -= A2_im; 00346 o32_im += A2_re; 00347 00348 } else { 00349 // gauge field opposite parity. 00350 READ_GAUGE_MATRIX(GAUGE1TEX, 0); 00351 // read spinor from device memory 00352 READ_SPINOR(SPINORTEX); 00353 00354 // reconstruct gauge matrix 00355 RECONSTRUCT_GAUGE_MATRIX(0); 00356 00357 // project spinor into half spinors 00358 //J Here we are using P0- on the input spinor. 00359 //J spinor row 0, colors 0,1,2 00360 spinorFloat a0_re = +i00_re+i30_im; 00361 spinorFloat a0_im = +i00_im-i30_re; 00362 spinorFloat a1_re = +i01_re+i31_im; 00363 spinorFloat a1_im = +i01_im-i31_re; 00364 spinorFloat a2_re = +i02_re+i32_im; 00365 spinorFloat a2_im = +i02_im-i32_re; 00366 00367 //J spinor row 1, colors 0,1,2. 00368 spinorFloat b0_re = +i10_re+i20_im; 00369 spinorFloat b0_im = +i10_im-i20_re; 00370 spinorFloat b1_re = +i11_re+i21_im; 00371 spinorFloat b1_im = +i11_im-i21_re; 00372 spinorFloat b2_re = +i12_re+i22_im; 00373 spinorFloat b2_im = +i12_im-i22_re; 00374 00375 //J multiply by links, to get color row 0 00376 spinorFloat A0_re = + (g00_re * a0_re - g00_im * a0_im) + (g01_re * a1_re - g01_im * a1_im) + (g02_re * a2_re - g02_im * a2_im); 00377 spinorFloat A0_im = + (g00_re * a0_im + g00_im * a0_re) + (g01_re * a1_im + g01_im * a1_re) + (g02_re * a2_im + g02_im * a2_re); 00378 spinorFloat B0_re = + (g00_re * b0_re - g00_im * b0_im) + (g01_re * b1_re - g01_im * b1_im) + (g02_re * b2_re - g02_im * b2_im); 00379 spinorFloat B0_im = + (g00_re * b0_im + g00_im * b0_re) + (g01_re * b1_im + g01_im * b1_re) + (g02_re * b2_im + g02_im * b2_re); 00380 00381 // multiply row 1 00382 spinorFloat A1_re = + (g10_re * a0_re - g10_im * a0_im) + (g11_re * a1_re - g11_im * a1_im) + (g12_re * a2_re - g12_im * a2_im); 00383 spinorFloat A1_im = + (g10_re * a0_im + g10_im * a0_re) + (g11_re * a1_im + g11_im * a1_re) + (g12_re * a2_im + g12_im * a2_re); 00384 spinorFloat B1_re = + (g10_re * b0_re - g10_im * b0_im) + (g11_re * b1_re - g11_im * b1_im) + (g12_re * b2_re - g12_im * b2_im); 00385 spinorFloat B1_im = + (g10_re * b0_im + g10_im * b0_re) + (g11_re * b1_im + g11_im * b1_re) + (g12_re * b2_im + g12_im * b2_re); 00386 00387 // multiply row 2 00388 spinorFloat A2_re = + (g20_re * a0_re - g20_im * a0_im) + (g21_re * a1_re - g21_im * a1_im) + (g22_re * a2_re - g22_im * a2_im); 00389 spinorFloat A2_im = + (g20_re * a0_im + g20_im * a0_re) + (g21_re * a1_im + g21_im * a1_re) + (g22_re * a2_im + g22_im * a2_re); 00390 spinorFloat B2_re = + (g20_re * b0_re - g20_im * b0_im) + (g21_re * b1_re - g21_im * b1_im) + (g22_re * b2_re - g22_im * b2_im); 00391 spinorFloat B2_im = + (g20_re * b0_im + g20_im * b0_re) + (g21_re * b1_im + g21_im * b1_re) + (g22_re * b2_im + g22_im * b2_re); 00392 00393 //J Add results to output spinor. 00394 //J color 0 00395 o00_re += A0_re; 00396 o00_im += A0_im; 00397 o10_re += B0_re; 00398 o10_im += B0_im; 00399 //J Q. Why are we assigning spinor row 2 with what 00400 //J looks like spinor row 1 results? A. B/c in P0- row 2 00401 //J differs from row 1 by a factor of i. 00402 o20_re -= B0_im; 00403 o20_im += B0_re; 00404 //J Similarly here. 00405 o30_re -= A0_im; 00406 o30_im += A0_re; 00407 00408 o01_re += A1_re; 00409 o01_im += A1_im; 00410 o11_re += B1_re; 00411 o11_im += B1_im; 00412 o21_re -= B1_im; 00413 o21_im += B1_re; 00414 o31_re -= A1_im; 00415 o31_im += A1_re; 00416 00417 o02_re += A2_re; 00418 o02_im += A2_im; 00419 o12_re += B2_re; 00420 o12_im += B2_im; 00421 o22_re -= B2_im; 00422 o22_im += B2_re; 00423 o32_re -= A2_im; 00424 o32_im += A2_re; 00425 00426 } 00427 } 00428 00429 { 00430 // Projector P0+ 00431 // 1 0 0 i 00432 // 0 1 i 0 00433 // 0 -i 1 0 00434 // -i 0 0 1 00435 00436 //J (1+gamma1) Udag(x-1hat) psi(x-1hat), 00437 //J but relabeling 1-->0. 00438 //J Thus, hop backwards. sp_idx is used in io_spinor.h to 00439 //J read in the input spinor. 00440 int sp_idx = ((x1==0) ? X+(X1-1) : X-1) / 2; 00441 //J Link also hops. 00442 // ** HERE : need gymnastics because xs=0 for gauge parity. ** 00443 int ga_idx = sp_idx % Vh; 00444 00445 // read gauge matrix from device memory 00446 // NB: Here GAUGE1TEX is used, which is different from in P0- above! 00447 // I.e., parity is handled by having 2 bindings. 00448 if ( !( (boundaryCrossings-boundaryCrossings4d) % 2) ) { 00449 // gauge field opposite parity. 00450 READ_GAUGE_MATRIX(GAUGE1TEX, 1); 00451 // read spinor from device memory 00452 //J The relevant code from io_spinor.h. 00453 //J Which one to use is determined near Line 105 00454 //J of dslash_dwf_def.h. 00455 READ_SPINOR(SPINORTEX); 00456 00457 // reconstruct gauge matrix 00458 RECONSTRUCT_GAUGE_MATRIX(1); 00459 00460 // project spinor into half spinors 00461 spinorFloat a0_re = +i00_re-i30_im; 00462 spinorFloat a0_im = +i00_im+i30_re; 00463 spinorFloat a1_re = +i01_re-i31_im; 00464 spinorFloat a1_im = +i01_im+i31_re; 00465 spinorFloat a2_re = +i02_re-i32_im; 00466 spinorFloat a2_im = +i02_im+i32_re; 00467 00468 spinorFloat b0_re = +i10_re-i20_im; 00469 spinorFloat b0_im = +i10_im+i20_re; 00470 spinorFloat b1_re = +i11_re-i21_im; 00471 spinorFloat b1_im = +i11_im+i21_re; 00472 spinorFloat b2_re = +i12_re-i22_im; 00473 spinorFloat b2_im = +i12_im+i22_re; 00474 00475 // multiply row 0 00476 spinorFloat A0_re = + (gT00_re * a0_re - gT00_im * a0_im) + (gT01_re * a1_re - gT01_im * a1_im) + (gT02_re * a2_re - gT02_im * a2_im); 00477 spinorFloat A0_im = + (gT00_re * a0_im + gT00_im * a0_re) + (gT01_re * a1_im + gT01_im * a1_re) + (gT02_re * a2_im + gT02_im * a2_re); 00478 spinorFloat B0_re = + (gT00_re * b0_re - gT00_im * b0_im) + (gT01_re * b1_re - gT01_im * b1_im) + (gT02_re * b2_re - gT02_im * b2_im); 00479 spinorFloat B0_im = + (gT00_re * b0_im + gT00_im * b0_re) + (gT01_re * b1_im + gT01_im * b1_re) + (gT02_re * b2_im + gT02_im * b2_re); 00480 00481 // multiply row 1 00482 spinorFloat A1_re = + (gT10_re * a0_re - gT10_im * a0_im) + (gT11_re * a1_re - gT11_im * a1_im) + (gT12_re * a2_re - gT12_im * a2_im); 00483 spinorFloat A1_im = + (gT10_re * a0_im + gT10_im * a0_re) + (gT11_re * a1_im + gT11_im * a1_re) + (gT12_re * a2_im + gT12_im * a2_re); 00484 spinorFloat B1_re = + (gT10_re * b0_re - gT10_im * b0_im) + (gT11_re * b1_re - gT11_im * b1_im) + (gT12_re * b2_re - gT12_im * b2_im); 00485 spinorFloat B1_im = + (gT10_re * b0_im + gT10_im * b0_re) + (gT11_re * b1_im + gT11_im * b1_re) + (gT12_re * b2_im + gT12_im * b2_re); 00486 00487 // multiply row 2 00488 spinorFloat A2_re = + (gT20_re * a0_re - gT20_im * a0_im) + (gT21_re * a1_re - gT21_im * a1_im) + (gT22_re * a2_re - gT22_im * a2_im); 00489 spinorFloat A2_im = + (gT20_re * a0_im + gT20_im * a0_re) + (gT21_re * a1_im + gT21_im * a1_re) + (gT22_re * a2_im + gT22_im * a2_re); 00490 spinorFloat B2_re = + (gT20_re * b0_re - gT20_im * b0_im) + (gT21_re * b1_re - gT21_im * b1_im) + (gT22_re * b2_re - gT22_im * b2_im); 00491 spinorFloat B2_im = + (gT20_re * b0_im + gT20_im * b0_re) + (gT21_re * b1_im + gT21_im * b1_re) + (gT22_re * b2_im + gT22_im * b2_re); 00492 00493 o00_re += A0_re; 00494 o00_im += A0_im; 00495 o10_re += B0_re; 00496 o10_im += B0_im; 00497 o20_re += B0_im; 00498 o20_im -= B0_re; 00499 o30_re += A0_im; 00500 o30_im -= A0_re; 00501 00502 o01_re += A1_re; 00503 o01_im += A1_im; 00504 o11_re += B1_re; 00505 o11_im += B1_im; 00506 o21_re += B1_im; 00507 o21_im -= B1_re; 00508 o31_re += A1_im; 00509 o31_im -= A1_re; 00510 00511 o02_re += A2_re; 00512 o02_im += A2_im; 00513 o12_re += B2_re; 00514 o12_im += B2_im; 00515 o22_re += B2_im; 00516 o22_im -= B2_re; 00517 o32_re += A2_im; 00518 o32_im -= A2_re; 00519 } else { 00520 // gauge field same parity. 00521 READ_GAUGE_MATRIX(GAUGE0TEX, 1); 00522 // read spinor from device memory 00523 //J The relevant code from io_spinor.h. 00524 //J Which one to use is determined near Line 105 00525 //J of dslash_dwf_def.h. 00526 READ_SPINOR(SPINORTEX); 00527 00528 // reconstruct gauge matrix 00529 RECONSTRUCT_GAUGE_MATRIX(1); 00530 00531 // project spinor into half spinors 00532 spinorFloat a0_re = +i00_re-i30_im; 00533 spinorFloat a0_im = +i00_im+i30_re; 00534 spinorFloat a1_re = +i01_re-i31_im; 00535 spinorFloat a1_im = +i01_im+i31_re; 00536 spinorFloat a2_re = +i02_re-i32_im; 00537 spinorFloat a2_im = +i02_im+i32_re; 00538 00539 spinorFloat b0_re = +i10_re-i20_im; 00540 spinorFloat b0_im = +i10_im+i20_re; 00541 spinorFloat b1_re = +i11_re-i21_im; 00542 spinorFloat b1_im = +i11_im+i21_re; 00543 spinorFloat b2_re = +i12_re-i22_im; 00544 spinorFloat b2_im = +i12_im+i22_re; 00545 00546 // multiply row 0 00547 spinorFloat A0_re = + (gT00_re * a0_re - gT00_im * a0_im) + (gT01_re * a1_re - gT01_im * a1_im) + (gT02_re * a2_re - gT02_im * a2_im); 00548 spinorFloat A0_im = + (gT00_re * a0_im + gT00_im * a0_re) + (gT01_re * a1_im + gT01_im * a1_re) + (gT02_re * a2_im + gT02_im * a2_re); 00549 spinorFloat B0_re = + (gT00_re * b0_re - gT00_im * b0_im) + (gT01_re * b1_re - gT01_im * b1_im) + (gT02_re * b2_re - gT02_im * b2_im); 00550 spinorFloat B0_im = + (gT00_re * b0_im + gT00_im * b0_re) + (gT01_re * b1_im + gT01_im * b1_re) + (gT02_re * b2_im + gT02_im * b2_re); 00551 00552 // multiply row 1 00553 spinorFloat A1_re = + (gT10_re * a0_re - gT10_im * a0_im) + (gT11_re * a1_re - gT11_im * a1_im) + (gT12_re * a2_re - gT12_im * a2_im); 00554 spinorFloat A1_im = + (gT10_re * a0_im + gT10_im * a0_re) + (gT11_re * a1_im + gT11_im * a1_re) + (gT12_re * a2_im + gT12_im * a2_re); 00555 spinorFloat B1_re = + (gT10_re * b0_re - gT10_im * b0_im) + (gT11_re * b1_re - gT11_im * b1_im) + (gT12_re * b2_re - gT12_im * b2_im); 00556 spinorFloat B1_im = + (gT10_re * b0_im + gT10_im * b0_re) + (gT11_re * b1_im + gT11_im * b1_re) + (gT12_re * b2_im + gT12_im * b2_re); 00557 00558 // multiply row 2 00559 spinorFloat A2_re = + (gT20_re * a0_re - gT20_im * a0_im) + (gT21_re * a1_re - gT21_im * a1_im) + (gT22_re * a2_re - gT22_im * a2_im); 00560 spinorFloat A2_im = + (gT20_re * a0_im + gT20_im * a0_re) + (gT21_re * a1_im + gT21_im * a1_re) + (gT22_re * a2_im + gT22_im * a2_re); 00561 spinorFloat B2_re = + (gT20_re * b0_re - gT20_im * b0_im) + (gT21_re * b1_re - gT21_im * b1_im) + (gT22_re * b2_re - gT22_im * b2_im); 00562 spinorFloat B2_im = + (gT20_re * b0_im + gT20_im * b0_re) + (gT21_re * b1_im + gT21_im * b1_re) + (gT22_re * b2_im + gT22_im * b2_re); 00563 00564 o00_re += A0_re; 00565 o00_im += A0_im; 00566 o10_re += B0_re; 00567 o10_im += B0_im; 00568 o20_re += B0_im; 00569 o20_im -= B0_re; 00570 o30_re += A0_im; 00571 o30_im -= A0_re; 00572 00573 o01_re += A1_re; 00574 o01_im += A1_im; 00575 o11_re += B1_re; 00576 o11_im += B1_im; 00577 o21_re += B1_im; 00578 o21_im -= B1_re; 00579 o31_re += A1_im; 00580 o31_im -= A1_re; 00581 00582 o02_re += A2_re; 00583 o02_im += A2_im; 00584 o12_re += B2_re; 00585 o12_im += B2_im; 00586 o22_re += B2_im; 00587 o22_im -= B2_re; 00588 o32_re += A2_im; 00589 o32_im -= A2_re; 00590 } 00591 00592 00593 } 00594 00595 { 00596 // Projector P1- 00597 // 1 0 0 -1 00598 // 0 1 1 0 00599 // 0 1 1 0 00600 // -1 0 0 1 00601 00602 int sp_idx = ((x2==X2-1) ? X-(X2-1)*X1 : X+X1) / 2; 00603 int ga_idx = sid % Vh; 00604 00605 // read gauge matrix from device memory 00606 if ( !( (boundaryCrossings-boundaryCrossings4d) % 2) ) { 00607 // gauge field same parity. 00608 READ_GAUGE_MATRIX(GAUGE0TEX, 2); 00609 // read spinor from device memory 00610 READ_SPINOR(SPINORTEX); 00611 00612 // reconstruct gauge matrix 00613 RECONSTRUCT_GAUGE_MATRIX(2); 00614 00615 // project spinor into half spinors 00616 spinorFloat a0_re = +i00_re-i30_re; 00617 spinorFloat a0_im = +i00_im-i30_im; 00618 spinorFloat a1_re = +i01_re-i31_re; 00619 spinorFloat a1_im = +i01_im-i31_im; 00620 spinorFloat a2_re = +i02_re-i32_re; 00621 spinorFloat a2_im = +i02_im-i32_im; 00622 00623 spinorFloat b0_re = +i10_re+i20_re; 00624 spinorFloat b0_im = +i10_im+i20_im; 00625 spinorFloat b1_re = +i11_re+i21_re; 00626 spinorFloat b1_im = +i11_im+i21_im; 00627 spinorFloat b2_re = +i12_re+i22_re; 00628 spinorFloat b2_im = +i12_im+i22_im; 00629 00630 // multiply row 0 00631 spinorFloat A0_re = + (g00_re * a0_re - g00_im * a0_im) + (g01_re * a1_re - g01_im * a1_im) + (g02_re * a2_re - g02_im * a2_im); 00632 spinorFloat A0_im = + (g00_re * a0_im + g00_im * a0_re) + (g01_re * a1_im + g01_im * a1_re) + (g02_re * a2_im + g02_im * a2_re); 00633 spinorFloat B0_re = + (g00_re * b0_re - g00_im * b0_im) + (g01_re * b1_re - g01_im * b1_im) + (g02_re * b2_re - g02_im * b2_im); 00634 spinorFloat B0_im = + (g00_re * b0_im + g00_im * b0_re) + (g01_re * b1_im + g01_im * b1_re) + (g02_re * b2_im + g02_im * b2_re); 00635 00636 // multiply row 1 00637 spinorFloat A1_re = + (g10_re * a0_re - g10_im * a0_im) + (g11_re * a1_re - g11_im * a1_im) + (g12_re * a2_re - g12_im * a2_im); 00638 spinorFloat A1_im = + (g10_re * a0_im + g10_im * a0_re) + (g11_re * a1_im + g11_im * a1_re) + (g12_re * a2_im + g12_im * a2_re); 00639 spinorFloat B1_re = + (g10_re * b0_re - g10_im * b0_im) + (g11_re * b1_re - g11_im * b1_im) + (g12_re * b2_re - g12_im * b2_im); 00640 spinorFloat B1_im = + (g10_re * b0_im + g10_im * b0_re) + (g11_re * b1_im + g11_im * b1_re) + (g12_re * b2_im + g12_im * b2_re); 00641 00642 // multiply row 2 00643 spinorFloat A2_re = + (g20_re * a0_re - g20_im * a0_im) + (g21_re * a1_re - g21_im * a1_im) + (g22_re * a2_re - g22_im * a2_im); 00644 spinorFloat A2_im = + (g20_re * a0_im + g20_im * a0_re) + (g21_re * a1_im + g21_im * a1_re) + (g22_re * a2_im + g22_im * a2_re); 00645 spinorFloat B2_re = + (g20_re * b0_re - g20_im * b0_im) + (g21_re * b1_re - g21_im * b1_im) + (g22_re * b2_re - g22_im * b2_im); 00646 spinorFloat B2_im = + (g20_re * b0_im + g20_im * b0_re) + (g21_re * b1_im + g21_im * b1_re) + (g22_re * b2_im + g22_im * b2_re); 00647 00648 o00_re += A0_re; 00649 o00_im += A0_im; 00650 o10_re += B0_re; 00651 o10_im += B0_im; 00652 o20_re += B0_re; 00653 o20_im += B0_im; 00654 o30_re -= A0_re; 00655 o30_im -= A0_im; 00656 00657 o01_re += A1_re; 00658 o01_im += A1_im; 00659 o11_re += B1_re; 00660 o11_im += B1_im; 00661 o21_re += B1_re; 00662 o21_im += B1_im; 00663 o31_re -= A1_re; 00664 o31_im -= A1_im; 00665 00666 o02_re += A2_re; 00667 o02_im += A2_im; 00668 o12_re += B2_re; 00669 o12_im += B2_im; 00670 o22_re += B2_re; 00671 o22_im += B2_im; 00672 o32_re -= A2_re; 00673 o32_im -= A2_im; 00674 } else { 00675 // gauge field opposite parity. 00676 READ_GAUGE_MATRIX(GAUGE1TEX, 2); 00677 // read spinor from device memory 00678 READ_SPINOR(SPINORTEX); 00679 00680 // reconstruct gauge matrix 00681 RECONSTRUCT_GAUGE_MATRIX(2); 00682 00683 // project spinor into half spinors 00684 spinorFloat a0_re = +i00_re-i30_re; 00685 spinorFloat a0_im = +i00_im-i30_im; 00686 spinorFloat a1_re = +i01_re-i31_re; 00687 spinorFloat a1_im = +i01_im-i31_im; 00688 spinorFloat a2_re = +i02_re-i32_re; 00689 spinorFloat a2_im = +i02_im-i32_im; 00690 00691 spinorFloat b0_re = +i10_re+i20_re; 00692 spinorFloat b0_im = +i10_im+i20_im; 00693 spinorFloat b1_re = +i11_re+i21_re; 00694 spinorFloat b1_im = +i11_im+i21_im; 00695 spinorFloat b2_re = +i12_re+i22_re; 00696 spinorFloat b2_im = +i12_im+i22_im; 00697 00698 // multiply row 0 00699 spinorFloat A0_re = + (g00_re * a0_re - g00_im * a0_im) + (g01_re * a1_re - g01_im * a1_im) + (g02_re * a2_re - g02_im * a2_im); 00700 spinorFloat A0_im = + (g00_re * a0_im + g00_im * a0_re) + (g01_re * a1_im + g01_im * a1_re) + (g02_re * a2_im + g02_im * a2_re); 00701 spinorFloat B0_re = + (g00_re * b0_re - g00_im * b0_im) + (g01_re * b1_re - g01_im * b1_im) + (g02_re * b2_re - g02_im * b2_im); 00702 spinorFloat B0_im = + (g00_re * b0_im + g00_im * b0_re) + (g01_re * b1_im + g01_im * b1_re) + (g02_re * b2_im + g02_im * b2_re); 00703 00704 // multiply row 1 00705 spinorFloat A1_re = + (g10_re * a0_re - g10_im * a0_im) + (g11_re * a1_re - g11_im * a1_im) + (g12_re * a2_re - g12_im * a2_im); 00706 spinorFloat A1_im = + (g10_re * a0_im + g10_im * a0_re) + (g11_re * a1_im + g11_im * a1_re) + (g12_re * a2_im + g12_im * a2_re); 00707 spinorFloat B1_re = + (g10_re * b0_re - g10_im * b0_im) + (g11_re * b1_re - g11_im * b1_im) + (g12_re * b2_re - g12_im * b2_im); 00708 spinorFloat B1_im = + (g10_re * b0_im + g10_im * b0_re) + (g11_re * b1_im + g11_im * b1_re) + (g12_re * b2_im + g12_im * b2_re); 00709 00710 // multiply row 2 00711 spinorFloat A2_re = + (g20_re * a0_re - g20_im * a0_im) + (g21_re * a1_re - g21_im * a1_im) + (g22_re * a2_re - g22_im * a2_im); 00712 spinorFloat A2_im = + (g20_re * a0_im + g20_im * a0_re) + (g21_re * a1_im + g21_im * a1_re) + (g22_re * a2_im + g22_im * a2_re); 00713 spinorFloat B2_re = + (g20_re * b0_re - g20_im * b0_im) + (g21_re * b1_re - g21_im * b1_im) + (g22_re * b2_re - g22_im * b2_im); 00714 spinorFloat B2_im = + (g20_re * b0_im + g20_im * b0_re) + (g21_re * b1_im + g21_im * b1_re) + (g22_re * b2_im + g22_im * b2_re); 00715 00716 o00_re += A0_re; 00717 o00_im += A0_im; 00718 o10_re += B0_re; 00719 o10_im += B0_im; 00720 o20_re += B0_re; 00721 o20_im += B0_im; 00722 o30_re -= A0_re; 00723 o30_im -= A0_im; 00724 00725 o01_re += A1_re; 00726 o01_im += A1_im; 00727 o11_re += B1_re; 00728 o11_im += B1_im; 00729 o21_re += B1_re; 00730 o21_im += B1_im; 00731 o31_re -= A1_re; 00732 o31_im -= A1_im; 00733 00734 o02_re += A2_re; 00735 o02_im += A2_im; 00736 o12_re += B2_re; 00737 o12_im += B2_im; 00738 o22_re += B2_re; 00739 o22_im += B2_im; 00740 o32_re -= A2_re; 00741 o32_im -= A2_im; 00742 } 00743 //READ_GAUGE_MATRIX(GAUGE0TEX, 2); 00744 00745 00746 } 00747 00748 { 00749 // Projector P1+ 00750 // 1 0 0 1 00751 // 0 1 -1 0 00752 // 0 -1 1 0 00753 // 1 0 0 1 00754 00755 int sp_idx = ((x2==0) ? X+(X2-1)*X1 : X-X1) / 2; 00756 int ga_idx = sp_idx % Vh; 00757 00758 // read gauge matrix from device memory 00759 if ( !( (boundaryCrossings-boundaryCrossings4d) % 2) ) { 00760 // gauge field opposite parity. 00761 READ_GAUGE_MATRIX(GAUGE1TEX, 3); 00762 // read spinor from device memory 00763 READ_SPINOR(SPINORTEX); 00764 00765 // reconstruct gauge matrix 00766 RECONSTRUCT_GAUGE_MATRIX(3); 00767 00768 // project spinor into half spinors 00769 spinorFloat a0_re = +i00_re+i30_re; 00770 spinorFloat a0_im = +i00_im+i30_im; 00771 spinorFloat a1_re = +i01_re+i31_re; 00772 spinorFloat a1_im = +i01_im+i31_im; 00773 spinorFloat a2_re = +i02_re+i32_re; 00774 spinorFloat a2_im = +i02_im+i32_im; 00775 00776 spinorFloat b0_re = +i10_re-i20_re; 00777 spinorFloat b0_im = +i10_im-i20_im; 00778 spinorFloat b1_re = +i11_re-i21_re; 00779 spinorFloat b1_im = +i11_im-i21_im; 00780 spinorFloat b2_re = +i12_re-i22_re; 00781 spinorFloat b2_im = +i12_im-i22_im; 00782 00783 // multiply row 0 00784 spinorFloat A0_re = + (gT00_re * a0_re - gT00_im * a0_im) + (gT01_re * a1_re - gT01_im * a1_im) + (gT02_re * a2_re - gT02_im * a2_im); 00785 spinorFloat A0_im = + (gT00_re * a0_im + gT00_im * a0_re) + (gT01_re * a1_im + gT01_im * a1_re) + (gT02_re * a2_im + gT02_im * a2_re); 00786 spinorFloat B0_re = + (gT00_re * b0_re - gT00_im * b0_im) + (gT01_re * b1_re - gT01_im * b1_im) + (gT02_re * b2_re - gT02_im * b2_im); 00787 spinorFloat B0_im = + (gT00_re * b0_im + gT00_im * b0_re) + (gT01_re * b1_im + gT01_im * b1_re) + (gT02_re * b2_im + gT02_im * b2_re); 00788 00789 // multiply row 1 00790 spinorFloat A1_re = + (gT10_re * a0_re - gT10_im * a0_im) + (gT11_re * a1_re - gT11_im * a1_im) + (gT12_re * a2_re - gT12_im * a2_im); 00791 spinorFloat A1_im = + (gT10_re * a0_im + gT10_im * a0_re) + (gT11_re * a1_im + gT11_im * a1_re) + (gT12_re * a2_im + gT12_im * a2_re); 00792 spinorFloat B1_re = + (gT10_re * b0_re - gT10_im * b0_im) + (gT11_re * b1_re - gT11_im * b1_im) + (gT12_re * b2_re - gT12_im * b2_im); 00793 spinorFloat B1_im = + (gT10_re * b0_im + gT10_im * b0_re) + (gT11_re * b1_im + gT11_im * b1_re) + (gT12_re * b2_im + gT12_im * b2_re); 00794 00795 // multiply row 2 00796 spinorFloat A2_re = + (gT20_re * a0_re - gT20_im * a0_im) + (gT21_re * a1_re - gT21_im * a1_im) + (gT22_re * a2_re - gT22_im * a2_im); 00797 spinorFloat A2_im = + (gT20_re * a0_im + gT20_im * a0_re) + (gT21_re * a1_im + gT21_im * a1_re) + (gT22_re * a2_im + gT22_im * a2_re); 00798 spinorFloat B2_re = + (gT20_re * b0_re - gT20_im * b0_im) + (gT21_re * b1_re - gT21_im * b1_im) + (gT22_re * b2_re - gT22_im * b2_im); 00799 spinorFloat B2_im = + (gT20_re * b0_im + gT20_im * b0_re) + (gT21_re * b1_im + gT21_im * b1_re) + (gT22_re * b2_im + gT22_im * b2_re); 00800 00801 o00_re += A0_re; 00802 o00_im += A0_im; 00803 o10_re += B0_re; 00804 o10_im += B0_im; 00805 o20_re -= B0_re; 00806 o20_im -= B0_im; 00807 o30_re += A0_re; 00808 o30_im += A0_im; 00809 00810 o01_re += A1_re; 00811 o01_im += A1_im; 00812 o11_re += B1_re; 00813 o11_im += B1_im; 00814 o21_re -= B1_re; 00815 o21_im -= B1_im; 00816 o31_re += A1_re; 00817 o31_im += A1_im; 00818 00819 o02_re += A2_re; 00820 o02_im += A2_im; 00821 o12_re += B2_re; 00822 o12_im += B2_im; 00823 o22_re -= B2_re; 00824 o22_im -= B2_im; 00825 o32_re += A2_re; 00826 o32_im += A2_im; 00827 00828 } else { 00829 // gauge field same parity. 00830 READ_GAUGE_MATRIX(GAUGE0TEX, 3); 00831 // read spinor from device memory 00832 READ_SPINOR(SPINORTEX); 00833 00834 // reconstruct gauge matrix 00835 RECONSTRUCT_GAUGE_MATRIX(3); 00836 00837 // project spinor into half spinors 00838 spinorFloat a0_re = +i00_re+i30_re; 00839 spinorFloat a0_im = +i00_im+i30_im; 00840 spinorFloat a1_re = +i01_re+i31_re; 00841 spinorFloat a1_im = +i01_im+i31_im; 00842 spinorFloat a2_re = +i02_re+i32_re; 00843 spinorFloat a2_im = +i02_im+i32_im; 00844 00845 spinorFloat b0_re = +i10_re-i20_re; 00846 spinorFloat b0_im = +i10_im-i20_im; 00847 spinorFloat b1_re = +i11_re-i21_re; 00848 spinorFloat b1_im = +i11_im-i21_im; 00849 spinorFloat b2_re = +i12_re-i22_re; 00850 spinorFloat b2_im = +i12_im-i22_im; 00851 00852 // multiply row 0 00853 spinorFloat A0_re = + (gT00_re * a0_re - gT00_im * a0_im) + (gT01_re * a1_re - gT01_im * a1_im) + (gT02_re * a2_re - gT02_im * a2_im); 00854 spinorFloat A0_im = + (gT00_re * a0_im + gT00_im * a0_re) + (gT01_re * a1_im + gT01_im * a1_re) + (gT02_re * a2_im + gT02_im * a2_re); 00855 spinorFloat B0_re = + (gT00_re * b0_re - gT00_im * b0_im) + (gT01_re * b1_re - gT01_im * b1_im) + (gT02_re * b2_re - gT02_im * b2_im); 00856 spinorFloat B0_im = + (gT00_re * b0_im + gT00_im * b0_re) + (gT01_re * b1_im + gT01_im * b1_re) + (gT02_re * b2_im + gT02_im * b2_re); 00857 00858 // multiply row 1 00859 spinorFloat A1_re = + (gT10_re * a0_re - gT10_im * a0_im) + (gT11_re * a1_re - gT11_im * a1_im) + (gT12_re * a2_re - gT12_im * a2_im); 00860 spinorFloat A1_im = + (gT10_re * a0_im + gT10_im * a0_re) + (gT11_re * a1_im + gT11_im * a1_re) + (gT12_re * a2_im + gT12_im * a2_re); 00861 spinorFloat B1_re = + (gT10_re * b0_re - gT10_im * b0_im) + (gT11_re * b1_re - gT11_im * b1_im) + (gT12_re * b2_re - gT12_im * b2_im); 00862 spinorFloat B1_im = + (gT10_re * b0_im + gT10_im * b0_re) + (gT11_re * b1_im + gT11_im * b1_re) + (gT12_re * b2_im + gT12_im * b2_re); 00863 00864 // multiply row 2 00865 spinorFloat A2_re = + (gT20_re * a0_re - gT20_im * a0_im) + (gT21_re * a1_re - gT21_im * a1_im) + (gT22_re * a2_re - gT22_im * a2_im); 00866 spinorFloat A2_im = + (gT20_re * a0_im + gT20_im * a0_re) + (gT21_re * a1_im + gT21_im * a1_re) + (gT22_re * a2_im + gT22_im * a2_re); 00867 spinorFloat B2_re = + (gT20_re * b0_re - gT20_im * b0_im) + (gT21_re * b1_re - gT21_im * b1_im) + (gT22_re * b2_re - gT22_im * b2_im); 00868 spinorFloat B2_im = + (gT20_re * b0_im + gT20_im * b0_re) + (gT21_re * b1_im + gT21_im * b1_re) + (gT22_re * b2_im + gT22_im * b2_re); 00869 00870 o00_re += A0_re; 00871 o00_im += A0_im; 00872 o10_re += B0_re; 00873 o10_im += B0_im; 00874 o20_re -= B0_re; 00875 o20_im -= B0_im; 00876 o30_re += A0_re; 00877 o30_im += A0_im; 00878 00879 o01_re += A1_re; 00880 o01_im += A1_im; 00881 o11_re += B1_re; 00882 o11_im += B1_im; 00883 o21_re -= B1_re; 00884 o21_im -= B1_im; 00885 o31_re += A1_re; 00886 o31_im += A1_im; 00887 00888 o02_re += A2_re; 00889 o02_im += A2_im; 00890 o12_re += B2_re; 00891 o12_im += B2_im; 00892 o22_re -= B2_re; 00893 o22_im -= B2_im; 00894 o32_re += A2_re; 00895 o32_im += A2_im; 00896 00897 } 00898 // READ_GAUGE_MATRIX(GAUGE1TEX, 3); 00899 00900 } 00901 00902 { 00903 // Projector P2- 00904 // 1 0 -i 0 00905 // 0 1 0 i 00906 // i 0 1 0 00907 // 0 -i 0 1 00908 00909 int sp_idx = ((x3==X3-1) ? X-(X3-1)*X2*X1 : X+X2*X1) / 2; 00910 int ga_idx = sid % Vh; 00911 00912 // read gauge matrix from device memory 00913 if ( !( (boundaryCrossings-boundaryCrossings4d) % 2) ) { 00914 // gauge field same parity. 00915 READ_GAUGE_MATRIX(GAUGE0TEX, 4); 00916 // read spinor from device memory 00917 READ_SPINOR(SPINORTEX); 00918 00919 // reconstruct gauge matrix 00920 RECONSTRUCT_GAUGE_MATRIX(4); 00921 00922 // project spinor into half spinors 00923 spinorFloat a0_re = +i00_re+i20_im; 00924 spinorFloat a0_im = +i00_im-i20_re; 00925 spinorFloat a1_re = +i01_re+i21_im; 00926 spinorFloat a1_im = +i01_im-i21_re; 00927 spinorFloat a2_re = +i02_re+i22_im; 00928 spinorFloat a2_im = +i02_im-i22_re; 00929 00930 spinorFloat b0_re = +i10_re-i30_im; 00931 spinorFloat b0_im = +i10_im+i30_re; 00932 spinorFloat b1_re = +i11_re-i31_im; 00933 spinorFloat b1_im = +i11_im+i31_re; 00934 spinorFloat b2_re = +i12_re-i32_im; 00935 spinorFloat b2_im = +i12_im+i32_re; 00936 00937 // multiply row 0 00938 spinorFloat A0_re = + (g00_re * a0_re - g00_im * a0_im) + (g01_re * a1_re - g01_im * a1_im) + (g02_re * a2_re - g02_im * a2_im); 00939 spinorFloat A0_im = + (g00_re * a0_im + g00_im * a0_re) + (g01_re * a1_im + g01_im * a1_re) + (g02_re * a2_im + g02_im * a2_re); 00940 spinorFloat B0_re = + (g00_re * b0_re - g00_im * b0_im) + (g01_re * b1_re - g01_im * b1_im) + (g02_re * b2_re - g02_im * b2_im); 00941 spinorFloat B0_im = + (g00_re * b0_im + g00_im * b0_re) + (g01_re * b1_im + g01_im * b1_re) + (g02_re * b2_im + g02_im * b2_re); 00942 00943 // multiply row 1 00944 spinorFloat A1_re = + (g10_re * a0_re - g10_im * a0_im) + (g11_re * a1_re - g11_im * a1_im) + (g12_re * a2_re - g12_im * a2_im); 00945 spinorFloat A1_im = + (g10_re * a0_im + g10_im * a0_re) + (g11_re * a1_im + g11_im * a1_re) + (g12_re * a2_im + g12_im * a2_re); 00946 spinorFloat B1_re = + (g10_re * b0_re - g10_im * b0_im) + (g11_re * b1_re - g11_im * b1_im) + (g12_re * b2_re - g12_im * b2_im); 00947 spinorFloat B1_im = + (g10_re * b0_im + g10_im * b0_re) + (g11_re * b1_im + g11_im * b1_re) + (g12_re * b2_im + g12_im * b2_re); 00948 00949 // multiply row 2 00950 spinorFloat A2_re = + (g20_re * a0_re - g20_im * a0_im) + (g21_re * a1_re - g21_im * a1_im) + (g22_re * a2_re - g22_im * a2_im); 00951 spinorFloat A2_im = + (g20_re * a0_im + g20_im * a0_re) + (g21_re * a1_im + g21_im * a1_re) + (g22_re * a2_im + g22_im * a2_re); 00952 spinorFloat B2_re = + (g20_re * b0_re - g20_im * b0_im) + (g21_re * b1_re - g21_im * b1_im) + (g22_re * b2_re - g22_im * b2_im); 00953 spinorFloat B2_im = + (g20_re * b0_im + g20_im * b0_re) + (g21_re * b1_im + g21_im * b1_re) + (g22_re * b2_im + g22_im * b2_re); 00954 00955 o00_re += A0_re; 00956 o00_im += A0_im; 00957 o10_re += B0_re; 00958 o10_im += B0_im; 00959 o20_re -= A0_im; 00960 o20_im += A0_re; 00961 o30_re += B0_im; 00962 o30_im -= B0_re; 00963 00964 o01_re += A1_re; 00965 o01_im += A1_im; 00966 o11_re += B1_re; 00967 o11_im += B1_im; 00968 o21_re -= A1_im; 00969 o21_im += A1_re; 00970 o31_re += B1_im; 00971 o31_im -= B1_re; 00972 00973 o02_re += A2_re; 00974 o02_im += A2_im; 00975 o12_re += B2_re; 00976 o12_im += B2_im; 00977 o22_re -= A2_im; 00978 o22_im += A2_re; 00979 o32_re += B2_im; 00980 o32_im -= B2_re; 00981 } else { 00982 // gauge field opp parity. 00983 READ_GAUGE_MATRIX(GAUGE1TEX, 4); 00984 // read spinor from device memory 00985 READ_SPINOR(SPINORTEX); 00986 00987 // reconstruct gauge matrix 00988 RECONSTRUCT_GAUGE_MATRIX(4); 00989 00990 // project spinor into half spinors 00991 spinorFloat a0_re = +i00_re+i20_im; 00992 spinorFloat a0_im = +i00_im-i20_re; 00993 spinorFloat a1_re = +i01_re+i21_im; 00994 spinorFloat a1_im = +i01_im-i21_re; 00995 spinorFloat a2_re = +i02_re+i22_im; 00996 spinorFloat a2_im = +i02_im-i22_re; 00997 00998 spinorFloat b0_re = +i10_re-i30_im; 00999 spinorFloat b0_im = +i10_im+i30_re; 01000 spinorFloat b1_re = +i11_re-i31_im; 01001 spinorFloat b1_im = +i11_im+i31_re; 01002 spinorFloat b2_re = +i12_re-i32_im; 01003 spinorFloat b2_im = +i12_im+i32_re; 01004 01005 // multiply row 0 01006 spinorFloat A0_re = + (g00_re * a0_re - g00_im * a0_im) + (g01_re * a1_re - g01_im * a1_im) + (g02_re * a2_re - g02_im * a2_im); 01007 spinorFloat A0_im = + (g00_re * a0_im + g00_im * a0_re) + (g01_re * a1_im + g01_im * a1_re) + (g02_re * a2_im + g02_im * a2_re); 01008 spinorFloat B0_re = + (g00_re * b0_re - g00_im * b0_im) + (g01_re * b1_re - g01_im * b1_im) + (g02_re * b2_re - g02_im * b2_im); 01009 spinorFloat B0_im = + (g00_re * b0_im + g00_im * b0_re) + (g01_re * b1_im + g01_im * b1_re) + (g02_re * b2_im + g02_im * b2_re); 01010 01011 // multiply row 1 01012 spinorFloat A1_re = + (g10_re * a0_re - g10_im * a0_im) + (g11_re * a1_re - g11_im * a1_im) + (g12_re * a2_re - g12_im * a2_im); 01013 spinorFloat A1_im = + (g10_re * a0_im + g10_im * a0_re) + (g11_re * a1_im + g11_im * a1_re) + (g12_re * a2_im + g12_im * a2_re); 01014 spinorFloat B1_re = + (g10_re * b0_re - g10_im * b0_im) + (g11_re * b1_re - g11_im * b1_im) + (g12_re * b2_re - g12_im * b2_im); 01015 spinorFloat B1_im = + (g10_re * b0_im + g10_im * b0_re) + (g11_re * b1_im + g11_im * b1_re) + (g12_re * b2_im + g12_im * b2_re); 01016 01017 // multiply row 2 01018 spinorFloat A2_re = + (g20_re * a0_re - g20_im * a0_im) + (g21_re * a1_re - g21_im * a1_im) + (g22_re * a2_re - g22_im * a2_im); 01019 spinorFloat A2_im = + (g20_re * a0_im + g20_im * a0_re) + (g21_re * a1_im + g21_im * a1_re) + (g22_re * a2_im + g22_im * a2_re); 01020 spinorFloat B2_re = + (g20_re * b0_re - g20_im * b0_im) + (g21_re * b1_re - g21_im * b1_im) + (g22_re * b2_re - g22_im * b2_im); 01021 spinorFloat B2_im = + (g20_re * b0_im + g20_im * b0_re) + (g21_re * b1_im + g21_im * b1_re) + (g22_re * b2_im + g22_im * b2_re); 01022 01023 o00_re += A0_re; 01024 o00_im += A0_im; 01025 o10_re += B0_re; 01026 o10_im += B0_im; 01027 o20_re -= A0_im; 01028 o20_im += A0_re; 01029 o30_re += B0_im; 01030 o30_im -= B0_re; 01031 01032 o01_re += A1_re; 01033 o01_im += A1_im; 01034 o11_re += B1_re; 01035 o11_im += B1_im; 01036 o21_re -= A1_im; 01037 o21_im += A1_re; 01038 o31_re += B1_im; 01039 o31_im -= B1_re; 01040 01041 o02_re += A2_re; 01042 o02_im += A2_im; 01043 o12_re += B2_re; 01044 o12_im += B2_im; 01045 o22_re -= A2_im; 01046 o22_im += A2_re; 01047 o32_re += B2_im; 01048 o32_im -= B2_re; 01049 } 01050 // READ_GAUGE_MATRIX(GAUGE0TEX, 4); 01051 01052 01053 } 01054 01055 { 01056 // Projector P2+ 01057 // 1 0 i 0 01058 // 0 1 0 -i 01059 // -i 0 1 0 01060 // 0 i 0 1 01061 01062 int sp_idx = ((x3==0) ? X+(X3-1)*X2*X1 : X-X2*X1) / 2; 01063 int ga_idx = sp_idx % Vh; 01064 01065 // read gauge matrix from device memory 01066 if ( !( (boundaryCrossings-boundaryCrossings4d) % 2) ) { 01067 // gauge field opposite parity. 01068 READ_GAUGE_MATRIX(GAUGE1TEX, 5); 01069 // read spinor from device memory 01070 READ_SPINOR(SPINORTEX); 01071 01072 // reconstruct gauge matrix 01073 RECONSTRUCT_GAUGE_MATRIX(5); 01074 01075 // project spinor into half spinors 01076 spinorFloat a0_re = +i00_re-i20_im; 01077 spinorFloat a0_im = +i00_im+i20_re; 01078 spinorFloat a1_re = +i01_re-i21_im; 01079 spinorFloat a1_im = +i01_im+i21_re; 01080 spinorFloat a2_re = +i02_re-i22_im; 01081 spinorFloat a2_im = +i02_im+i22_re; 01082 01083 spinorFloat b0_re = +i10_re+i30_im; 01084 spinorFloat b0_im = +i10_im-i30_re; 01085 spinorFloat b1_re = +i11_re+i31_im; 01086 spinorFloat b1_im = +i11_im-i31_re; 01087 spinorFloat b2_re = +i12_re+i32_im; 01088 spinorFloat b2_im = +i12_im-i32_re; 01089 01090 // multiply row 0 01091 spinorFloat A0_re = + (gT00_re * a0_re - gT00_im * a0_im) + (gT01_re * a1_re - gT01_im * a1_im) + (gT02_re * a2_re - gT02_im * a2_im); 01092 spinorFloat A0_im = + (gT00_re * a0_im + gT00_im * a0_re) + (gT01_re * a1_im + gT01_im * a1_re) + (gT02_re * a2_im + gT02_im * a2_re); 01093 spinorFloat B0_re = + (gT00_re * b0_re - gT00_im * b0_im) + (gT01_re * b1_re - gT01_im * b1_im) + (gT02_re * b2_re - gT02_im * b2_im); 01094 spinorFloat B0_im = + (gT00_re * b0_im + gT00_im * b0_re) + (gT01_re * b1_im + gT01_im * b1_re) + (gT02_re * b2_im + gT02_im * b2_re); 01095 01096 // multiply row 1 01097 spinorFloat A1_re = + (gT10_re * a0_re - gT10_im * a0_im) + (gT11_re * a1_re - gT11_im * a1_im) + (gT12_re * a2_re - gT12_im * a2_im); 01098 spinorFloat A1_im = + (gT10_re * a0_im + gT10_im * a0_re) + (gT11_re * a1_im + gT11_im * a1_re) + (gT12_re * a2_im + gT12_im * a2_re); 01099 spinorFloat B1_re = + (gT10_re * b0_re - gT10_im * b0_im) + (gT11_re * b1_re - gT11_im * b1_im) + (gT12_re * b2_re - gT12_im * b2_im); 01100 spinorFloat B1_im = + (gT10_re * b0_im + gT10_im * b0_re) + (gT11_re * b1_im + gT11_im * b1_re) + (gT12_re * b2_im + gT12_im * b2_re); 01101 01102 // multiply row 2 01103 spinorFloat A2_re = + (gT20_re * a0_re - gT20_im * a0_im) + (gT21_re * a1_re - gT21_im * a1_im) + (gT22_re * a2_re - gT22_im * a2_im); 01104 spinorFloat A2_im = + (gT20_re * a0_im + gT20_im * a0_re) + (gT21_re * a1_im + gT21_im * a1_re) + (gT22_re * a2_im + gT22_im * a2_re); 01105 spinorFloat B2_re = + (gT20_re * b0_re - gT20_im * b0_im) + (gT21_re * b1_re - gT21_im * b1_im) + (gT22_re * b2_re - gT22_im * b2_im); 01106 spinorFloat B2_im = + (gT20_re * b0_im + gT20_im * b0_re) + (gT21_re * b1_im + gT21_im * b1_re) + (gT22_re * b2_im + gT22_im * b2_re); 01107 01108 o00_re += A0_re; 01109 o00_im += A0_im; 01110 o10_re += B0_re; 01111 o10_im += B0_im; 01112 o20_re += A0_im; 01113 o20_im -= A0_re; 01114 o30_re -= B0_im; 01115 o30_im += B0_re; 01116 01117 o01_re += A1_re; 01118 o01_im += A1_im; 01119 o11_re += B1_re; 01120 o11_im += B1_im; 01121 o21_re += A1_im; 01122 o21_im -= A1_re; 01123 o31_re -= B1_im; 01124 o31_im += B1_re; 01125 01126 o02_re += A2_re; 01127 o02_im += A2_im; 01128 o12_re += B2_re; 01129 o12_im += B2_im; 01130 o22_re += A2_im; 01131 o22_im -= A2_re; 01132 o32_re -= B2_im; 01133 o32_im += B2_re; 01134 01135 } else { 01136 // gauge field same parity. 01137 READ_GAUGE_MATRIX(GAUGE0TEX, 5); 01138 // read spinor from device memory 01139 READ_SPINOR(SPINORTEX); 01140 01141 // reconstruct gauge matrix 01142 RECONSTRUCT_GAUGE_MATRIX(5); 01143 01144 // project spinor into half spinors 01145 spinorFloat a0_re = +i00_re-i20_im; 01146 spinorFloat a0_im = +i00_im+i20_re; 01147 spinorFloat a1_re = +i01_re-i21_im; 01148 spinorFloat a1_im = +i01_im+i21_re; 01149 spinorFloat a2_re = +i02_re-i22_im; 01150 spinorFloat a2_im = +i02_im+i22_re; 01151 01152 spinorFloat b0_re = +i10_re+i30_im; 01153 spinorFloat b0_im = +i10_im-i30_re; 01154 spinorFloat b1_re = +i11_re+i31_im; 01155 spinorFloat b1_im = +i11_im-i31_re; 01156 spinorFloat b2_re = +i12_re+i32_im; 01157 spinorFloat b2_im = +i12_im-i32_re; 01158 01159 // multiply row 0 01160 spinorFloat A0_re = + (gT00_re * a0_re - gT00_im * a0_im) + (gT01_re * a1_re - gT01_im * a1_im) + (gT02_re * a2_re - gT02_im * a2_im); 01161 spinorFloat A0_im = + (gT00_re * a0_im + gT00_im * a0_re) + (gT01_re * a1_im + gT01_im * a1_re) + (gT02_re * a2_im + gT02_im * a2_re); 01162 spinorFloat B0_re = + (gT00_re * b0_re - gT00_im * b0_im) + (gT01_re * b1_re - gT01_im * b1_im) + (gT02_re * b2_re - gT02_im * b2_im); 01163 spinorFloat B0_im = + (gT00_re * b0_im + gT00_im * b0_re) + (gT01_re * b1_im + gT01_im * b1_re) + (gT02_re * b2_im + gT02_im * b2_re); 01164 01165 // multiply row 1 01166 spinorFloat A1_re = + (gT10_re * a0_re - gT10_im * a0_im) + (gT11_re * a1_re - gT11_im * a1_im) + (gT12_re * a2_re - gT12_im * a2_im); 01167 spinorFloat A1_im = + (gT10_re * a0_im + gT10_im * a0_re) + (gT11_re * a1_im + gT11_im * a1_re) + (gT12_re * a2_im + gT12_im * a2_re); 01168 spinorFloat B1_re = + (gT10_re * b0_re - gT10_im * b0_im) + (gT11_re * b1_re - gT11_im * b1_im) + (gT12_re * b2_re - gT12_im * b2_im); 01169 spinorFloat B1_im = + (gT10_re * b0_im + gT10_im * b0_re) + (gT11_re * b1_im + gT11_im * b1_re) + (gT12_re * b2_im + gT12_im * b2_re); 01170 01171 // multiply row 2 01172 spinorFloat A2_re = + (gT20_re * a0_re - gT20_im * a0_im) + (gT21_re * a1_re - gT21_im * a1_im) + (gT22_re * a2_re - gT22_im * a2_im); 01173 spinorFloat A2_im = + (gT20_re * a0_im + gT20_im * a0_re) + (gT21_re * a1_im + gT21_im * a1_re) + (gT22_re * a2_im + gT22_im * a2_re); 01174 spinorFloat B2_re = + (gT20_re * b0_re - gT20_im * b0_im) + (gT21_re * b1_re - gT21_im * b1_im) + (gT22_re * b2_re - gT22_im * b2_im); 01175 spinorFloat B2_im = + (gT20_re * b0_im + gT20_im * b0_re) + (gT21_re * b1_im + gT21_im * b1_re) + (gT22_re * b2_im + gT22_im * b2_re); 01176 01177 o00_re += A0_re; 01178 o00_im += A0_im; 01179 o10_re += B0_re; 01180 o10_im += B0_im; 01181 o20_re += A0_im; 01182 o20_im -= A0_re; 01183 o30_re -= B0_im; 01184 o30_im += B0_re; 01185 01186 o01_re += A1_re; 01187 o01_im += A1_im; 01188 o11_re += B1_re; 01189 o11_im += B1_im; 01190 o21_re += A1_im; 01191 o21_im -= A1_re; 01192 o31_re -= B1_im; 01193 o31_im += B1_re; 01194 01195 o02_re += A2_re; 01196 o02_im += A2_im; 01197 o12_re += B2_re; 01198 o12_im += B2_im; 01199 o22_re += A2_im; 01200 o22_im -= A2_re; 01201 o32_re -= B2_im; 01202 o32_im += B2_re; 01203 01204 } 01205 // READ_GAUGE_MATRIX(GAUGE1TEX, 5); 01206 01207 } 01208 01209 { 01210 // Projector P3- 01211 // 0 0 0 0 01212 // 0 0 0 0 01213 // 0 0 2 0 01214 // 0 0 0 2 01215 01216 int sp_idx = ((x4==X4-1) ? X-(X4-1)*X3*X2*X1 : X+X3*X2*X1) / 2; 01217 int ga_idx = sid % Vh; 01218 01219 if (gauge_fixed && ga_idx < (X4-1)*X1h*X2*X3) { 01220 // read spinor from device memory 01221 READ_SPINOR_DOWN(SPINORTEX); 01222 01223 // project spinor into half spinors 01224 spinorFloat a0_re = +2*i20_re; 01225 spinorFloat a0_im = +2*i20_im; 01226 spinorFloat a1_re = +2*i21_re; 01227 spinorFloat a1_im = +2*i21_im; 01228 spinorFloat a2_re = +2*i22_re; 01229 spinorFloat a2_im = +2*i22_im; 01230 01231 spinorFloat b0_re = +2*i30_re; 01232 spinorFloat b0_im = +2*i30_im; 01233 spinorFloat b1_re = +2*i31_re; 01234 spinorFloat b1_im = +2*i31_im; 01235 spinorFloat b2_re = +2*i32_re; 01236 spinorFloat b2_im = +2*i32_im; 01237 01238 // identity gauge matrix 01239 spinorFloat A0_re = a0_re; spinorFloat A0_im = a0_im; 01240 spinorFloat B0_re = b0_re; spinorFloat B0_im = b0_im; 01241 spinorFloat A1_re = a1_re; spinorFloat A1_im = a1_im; 01242 spinorFloat B1_re = b1_re; spinorFloat B1_im = b1_im; 01243 spinorFloat A2_re = a2_re; spinorFloat A2_im = a2_im; 01244 spinorFloat B2_re = b2_re; spinorFloat B2_im = b2_im; 01245 01246 o20_re += A0_re; 01247 o20_im += A0_im; 01248 o30_re += B0_re; 01249 o30_im += B0_im; 01250 01251 o21_re += A1_re; 01252 o21_im += A1_im; 01253 o31_re += B1_re; 01254 o31_im += B1_im; 01255 01256 o22_re += A2_re; 01257 o22_im += A2_im; 01258 o32_re += B2_re; 01259 o32_im += B2_im; 01260 01261 } 01262 else { 01263 // read gauge matrix from device memory 01264 if ( !( (boundaryCrossings-boundaryCrossings4d) % 2) ) { 01265 // gauge field same parity. 01266 READ_GAUGE_MATRIX(GAUGE0TEX, 6); 01267 // read spinor from device memory 01268 READ_SPINOR_DOWN(SPINORTEX); 01269 01270 // reconstruct gauge matrix 01271 RECONSTRUCT_GAUGE_MATRIX(6); 01272 01273 // project spinor into half spinors 01274 spinorFloat a0_re = +2*i20_re; 01275 spinorFloat a0_im = +2*i20_im; 01276 spinorFloat a1_re = +2*i21_re; 01277 spinorFloat a1_im = +2*i21_im; 01278 spinorFloat a2_re = +2*i22_re; 01279 spinorFloat a2_im = +2*i22_im; 01280 01281 spinorFloat b0_re = +2*i30_re; 01282 spinorFloat b0_im = +2*i30_im; 01283 spinorFloat b1_re = +2*i31_re; 01284 spinorFloat b1_im = +2*i31_im; 01285 spinorFloat b2_re = +2*i32_re; 01286 spinorFloat b2_im = +2*i32_im; 01287 01288 // multiply row 0 01289 spinorFloat A0_re = + (g00_re * a0_re - g00_im * a0_im) + (g01_re * a1_re - g01_im * a1_im) + (g02_re * a2_re - g02_im * a2_im); 01290 spinorFloat A0_im = + (g00_re * a0_im + g00_im * a0_re) + (g01_re * a1_im + g01_im * a1_re) + (g02_re * a2_im + g02_im * a2_re); 01291 spinorFloat B0_re = + (g00_re * b0_re - g00_im * b0_im) + (g01_re * b1_re - g01_im * b1_im) + (g02_re * b2_re - g02_im * b2_im); 01292 spinorFloat B0_im = + (g00_re * b0_im + g00_im * b0_re) + (g01_re * b1_im + g01_im * b1_re) + (g02_re * b2_im + g02_im * b2_re); 01293 01294 // multiply row 1 01295 spinorFloat A1_re = + (g10_re * a0_re - g10_im * a0_im) + (g11_re * a1_re - g11_im * a1_im) + (g12_re * a2_re - g12_im * a2_im); 01296 spinorFloat A1_im = + (g10_re * a0_im + g10_im * a0_re) + (g11_re * a1_im + g11_im * a1_re) + (g12_re * a2_im + g12_im * a2_re); 01297 spinorFloat B1_re = + (g10_re * b0_re - g10_im * b0_im) + (g11_re * b1_re - g11_im * b1_im) + (g12_re * b2_re - g12_im * b2_im); 01298 spinorFloat B1_im = + (g10_re * b0_im + g10_im * b0_re) + (g11_re * b1_im + g11_im * b1_re) + (g12_re * b2_im + g12_im * b2_re); 01299 01300 // multiply row 2 01301 spinorFloat A2_re = + (g20_re * a0_re - g20_im * a0_im) + (g21_re * a1_re - g21_im * a1_im) + (g22_re * a2_re - g22_im * a2_im); 01302 spinorFloat A2_im = + (g20_re * a0_im + g20_im * a0_re) + (g21_re * a1_im + g21_im * a1_re) + (g22_re * a2_im + g22_im * a2_re); 01303 spinorFloat B2_re = + (g20_re * b0_re - g20_im * b0_im) + (g21_re * b1_re - g21_im * b1_im) + (g22_re * b2_re - g22_im * b2_im); 01304 spinorFloat B2_im = + (g20_re * b0_im + g20_im * b0_re) + (g21_re * b1_im + g21_im * b1_re) + (g22_re * b2_im + g22_im * b2_re); 01305 01306 o20_re += A0_re; 01307 o20_im += A0_im; 01308 o30_re += B0_re; 01309 o30_im += B0_im; 01310 01311 o21_re += A1_re; 01312 o21_im += A1_im; 01313 o31_re += B1_re; 01314 o31_im += B1_im; 01315 01316 o22_re += A2_re; 01317 o22_im += A2_im; 01318 o32_re += B2_re; 01319 o32_im += B2_im; 01320 01321 } else { 01322 // gauge field opp parity. 01323 READ_GAUGE_MATRIX(GAUGE1TEX, 6); 01324 // read spinor from device memory 01325 READ_SPINOR_DOWN(SPINORTEX); 01326 01327 // reconstruct gauge matrix 01328 RECONSTRUCT_GAUGE_MATRIX(6); 01329 01330 // project spinor into half spinors 01331 spinorFloat a0_re = +2*i20_re; 01332 spinorFloat a0_im = +2*i20_im; 01333 spinorFloat a1_re = +2*i21_re; 01334 spinorFloat a1_im = +2*i21_im; 01335 spinorFloat a2_re = +2*i22_re; 01336 spinorFloat a2_im = +2*i22_im; 01337 01338 spinorFloat b0_re = +2*i30_re; 01339 spinorFloat b0_im = +2*i30_im; 01340 spinorFloat b1_re = +2*i31_re; 01341 spinorFloat b1_im = +2*i31_im; 01342 spinorFloat b2_re = +2*i32_re; 01343 spinorFloat b2_im = +2*i32_im; 01344 01345 // multiply row 0 01346 spinorFloat A0_re = + (g00_re * a0_re - g00_im * a0_im) + (g01_re * a1_re - g01_im * a1_im) + (g02_re * a2_re - g02_im * a2_im); 01347 spinorFloat A0_im = + (g00_re * a0_im + g00_im * a0_re) + (g01_re * a1_im + g01_im * a1_re) + (g02_re * a2_im + g02_im * a2_re); 01348 spinorFloat B0_re = + (g00_re * b0_re - g00_im * b0_im) + (g01_re * b1_re - g01_im * b1_im) + (g02_re * b2_re - g02_im * b2_im); 01349 spinorFloat B0_im = + (g00_re * b0_im + g00_im * b0_re) + (g01_re * b1_im + g01_im * b1_re) + (g02_re * b2_im + g02_im * b2_re); 01350 01351 // multiply row 1 01352 spinorFloat A1_re = + (g10_re * a0_re - g10_im * a0_im) + (g11_re * a1_re - g11_im * a1_im) + (g12_re * a2_re - g12_im * a2_im); 01353 spinorFloat A1_im = + (g10_re * a0_im + g10_im * a0_re) + (g11_re * a1_im + g11_im * a1_re) + (g12_re * a2_im + g12_im * a2_re); 01354 spinorFloat B1_re = + (g10_re * b0_re - g10_im * b0_im) + (g11_re * b1_re - g11_im * b1_im) + (g12_re * b2_re - g12_im * b2_im); 01355 spinorFloat B1_im = + (g10_re * b0_im + g10_im * b0_re) + (g11_re * b1_im + g11_im * b1_re) + (g12_re * b2_im + g12_im * b2_re); 01356 01357 // multiply row 2 01358 spinorFloat A2_re = + (g20_re * a0_re - g20_im * a0_im) + (g21_re * a1_re - g21_im * a1_im) + (g22_re * a2_re - g22_im * a2_im); 01359 spinorFloat A2_im = + (g20_re * a0_im + g20_im * a0_re) + (g21_re * a1_im + g21_im * a1_re) + (g22_re * a2_im + g22_im * a2_re); 01360 spinorFloat B2_re = + (g20_re * b0_re - g20_im * b0_im) + (g21_re * b1_re - g21_im * b1_im) + (g22_re * b2_re - g22_im * b2_im); 01361 spinorFloat B2_im = + (g20_re * b0_im + g20_im * b0_re) + (g21_re * b1_im + g21_im * b1_re) + (g22_re * b2_im + g22_im * b2_re); 01362 01363 o20_re += A0_re; 01364 o20_im += A0_im; 01365 o30_re += B0_re; 01366 o30_im += B0_im; 01367 01368 o21_re += A1_re; 01369 o21_im += A1_im; 01370 o31_re += B1_re; 01371 o31_im += B1_im; 01372 01373 o22_re += A2_re; 01374 o22_im += A2_im; 01375 o32_re += B2_re; 01376 o32_im += B2_im; 01377 01378 } 01379 //READ_GAUGE_MATRIX(GAUGE0TEX, 6); 01380 01381 } 01382 } 01383 01384 { 01385 // Projector P3+ 01386 // 2 0 0 0 01387 // 0 2 0 0 01388 // 0 0 0 0 01389 // 0 0 0 0 01390 01391 int sp_idx = ((x4==0) ? X+(X4-1)*X3*X2*X1 : X-X3*X2*X1) / 2; 01392 int ga_idx = sp_idx % Vh; 01393 01394 if (gauge_fixed && ga_idx < (X4-1)*X1h*X2*X3) { 01395 // read spinor from device memory 01396 READ_SPINOR_UP(SPINORTEX); 01397 01398 // project spinor into half spinors 01399 spinorFloat a0_re = +2*i00_re; 01400 spinorFloat a0_im = +2*i00_im; 01401 spinorFloat a1_re = +2*i01_re; 01402 spinorFloat a1_im = +2*i01_im; 01403 spinorFloat a2_re = +2*i02_re; 01404 spinorFloat a2_im = +2*i02_im; 01405 01406 spinorFloat b0_re = +2*i10_re; 01407 spinorFloat b0_im = +2*i10_im; 01408 spinorFloat b1_re = +2*i11_re; 01409 spinorFloat b1_im = +2*i11_im; 01410 spinorFloat b2_re = +2*i12_re; 01411 spinorFloat b2_im = +2*i12_im; 01412 01413 // identity gauge matrix 01414 spinorFloat A0_re = a0_re; spinorFloat A0_im = a0_im; 01415 spinorFloat B0_re = b0_re; spinorFloat B0_im = b0_im; 01416 spinorFloat A1_re = a1_re; spinorFloat A1_im = a1_im; 01417 spinorFloat B1_re = b1_re; spinorFloat B1_im = b1_im; 01418 spinorFloat A2_re = a2_re; spinorFloat A2_im = a2_im; 01419 spinorFloat B2_re = b2_re; spinorFloat B2_im = b2_im; 01420 01421 o00_re += A0_re; 01422 o00_im += A0_im; 01423 o10_re += B0_re; 01424 o10_im += B0_im; 01425 01426 o01_re += A1_re; 01427 o01_im += A1_im; 01428 o11_re += B1_re; 01429 o11_im += B1_im; 01430 01431 o02_re += A2_re; 01432 o02_im += A2_im; 01433 o12_re += B2_re; 01434 o12_im += B2_im; 01435 01436 } 01437 else { 01438 // read gauge matrix from device memory 01439 if ( !( (boundaryCrossings-boundaryCrossings4d) % 2) ) { 01440 // gauge field opposite parity. 01441 READ_GAUGE_MATRIX(GAUGE1TEX, 7); 01442 // read spinor from device memory 01443 READ_SPINOR_UP(SPINORTEX); 01444 01445 // reconstruct gauge matrix 01446 RECONSTRUCT_GAUGE_MATRIX(7); 01447 01448 // project spinor into half spinors 01449 spinorFloat a0_re = +2*i00_re; 01450 spinorFloat a0_im = +2*i00_im; 01451 spinorFloat a1_re = +2*i01_re; 01452 spinorFloat a1_im = +2*i01_im; 01453 spinorFloat a2_re = +2*i02_re; 01454 spinorFloat a2_im = +2*i02_im; 01455 01456 spinorFloat b0_re = +2*i10_re; 01457 spinorFloat b0_im = +2*i10_im; 01458 spinorFloat b1_re = +2*i11_re; 01459 spinorFloat b1_im = +2*i11_im; 01460 spinorFloat b2_re = +2*i12_re; 01461 spinorFloat b2_im = +2*i12_im; 01462 01463 // multiply row 0 01464 spinorFloat A0_re = + (gT00_re * a0_re - gT00_im * a0_im) + (gT01_re * a1_re - gT01_im * a1_im) + (gT02_re * a2_re - gT02_im * a2_im); 01465 spinorFloat A0_im = + (gT00_re * a0_im + gT00_im * a0_re) + (gT01_re * a1_im + gT01_im * a1_re) + (gT02_re * a2_im + gT02_im * a2_re); 01466 spinorFloat B0_re = + (gT00_re * b0_re - gT00_im * b0_im) + (gT01_re * b1_re - gT01_im * b1_im) + (gT02_re * b2_re - gT02_im * b2_im); 01467 spinorFloat B0_im = + (gT00_re * b0_im + gT00_im * b0_re) + (gT01_re * b1_im + gT01_im * b1_re) + (gT02_re * b2_im + gT02_im * b2_re); 01468 01469 // multiply row 1 01470 spinorFloat A1_re = + (gT10_re * a0_re - gT10_im * a0_im) + (gT11_re * a1_re - gT11_im * a1_im) + (gT12_re * a2_re - gT12_im * a2_im); 01471 spinorFloat A1_im = + (gT10_re * a0_im + gT10_im * a0_re) + (gT11_re * a1_im + gT11_im * a1_re) + (gT12_re * a2_im + gT12_im * a2_re); 01472 spinorFloat B1_re = + (gT10_re * b0_re - gT10_im * b0_im) + (gT11_re * b1_re - gT11_im * b1_im) + (gT12_re * b2_re - gT12_im * b2_im); 01473 spinorFloat B1_im = + (gT10_re * b0_im + gT10_im * b0_re) + (gT11_re * b1_im + gT11_im * b1_re) + (gT12_re * b2_im + gT12_im * b2_re); 01474 01475 // multiply row 2 01476 spinorFloat A2_re = + (gT20_re * a0_re - gT20_im * a0_im) + (gT21_re * a1_re - gT21_im * a1_im) + (gT22_re * a2_re - gT22_im * a2_im); 01477 spinorFloat A2_im = + (gT20_re * a0_im + gT20_im * a0_re) + (gT21_re * a1_im + gT21_im * a1_re) + (gT22_re * a2_im + gT22_im * a2_re); 01478 spinorFloat B2_re = + (gT20_re * b0_re - gT20_im * b0_im) + (gT21_re * b1_re - gT21_im * b1_im) + (gT22_re * b2_re - gT22_im * b2_im); 01479 spinorFloat B2_im = + (gT20_re * b0_im + gT20_im * b0_re) + (gT21_re * b1_im + gT21_im * b1_re) + (gT22_re * b2_im + gT22_im * b2_re); 01480 01481 o00_re += A0_re; 01482 o00_im += A0_im; 01483 o10_re += B0_re; 01484 o10_im += B0_im; 01485 01486 o01_re += A1_re; 01487 o01_im += A1_im; 01488 o11_re += B1_re; 01489 o11_im += B1_im; 01490 01491 o02_re += A2_re; 01492 o02_im += A2_im; 01493 o12_re += B2_re; 01494 o12_im += B2_im; 01495 } else { 01496 // gauge field same parity. 01497 READ_GAUGE_MATRIX(GAUGE0TEX, 7); 01498 // read spinor from device memory 01499 READ_SPINOR_UP(SPINORTEX); 01500 01501 // reconstruct gauge matrix 01502 RECONSTRUCT_GAUGE_MATRIX(7); 01503 01504 // project spinor into half spinors 01505 spinorFloat a0_re = +2*i00_re; 01506 spinorFloat a0_im = +2*i00_im; 01507 spinorFloat a1_re = +2*i01_re; 01508 spinorFloat a1_im = +2*i01_im; 01509 spinorFloat a2_re = +2*i02_re; 01510 spinorFloat a2_im = +2*i02_im; 01511 01512 spinorFloat b0_re = +2*i10_re; 01513 spinorFloat b0_im = +2*i10_im; 01514 spinorFloat b1_re = +2*i11_re; 01515 spinorFloat b1_im = +2*i11_im; 01516 spinorFloat b2_re = +2*i12_re; 01517 spinorFloat b2_im = +2*i12_im; 01518 01519 // multiply row 0 01520 spinorFloat A0_re = + (gT00_re * a0_re - gT00_im * a0_im) + (gT01_re * a1_re - gT01_im * a1_im) + (gT02_re * a2_re - gT02_im * a2_im); 01521 spinorFloat A0_im = + (gT00_re * a0_im + gT00_im * a0_re) + (gT01_re * a1_im + gT01_im * a1_re) + (gT02_re * a2_im + gT02_im * a2_re); 01522 spinorFloat B0_re = + (gT00_re * b0_re - gT00_im * b0_im) + (gT01_re * b1_re - gT01_im * b1_im) + (gT02_re * b2_re - gT02_im * b2_im); 01523 spinorFloat B0_im = + (gT00_re * b0_im + gT00_im * b0_re) + (gT01_re * b1_im + gT01_im * b1_re) + (gT02_re * b2_im + gT02_im * b2_re); 01524 01525 // multiply row 1 01526 spinorFloat A1_re = + (gT10_re * a0_re - gT10_im * a0_im) + (gT11_re * a1_re - gT11_im * a1_im) + (gT12_re * a2_re - gT12_im * a2_im); 01527 spinorFloat A1_im = + (gT10_re * a0_im + gT10_im * a0_re) + (gT11_re * a1_im + gT11_im * a1_re) + (gT12_re * a2_im + gT12_im * a2_re); 01528 spinorFloat B1_re = + (gT10_re * b0_re - gT10_im * b0_im) + (gT11_re * b1_re - gT11_im * b1_im) + (gT12_re * b2_re - gT12_im * b2_im); 01529 spinorFloat B1_im = + (gT10_re * b0_im + gT10_im * b0_re) + (gT11_re * b1_im + gT11_im * b1_re) + (gT12_re * b2_im + gT12_im * b2_re); 01530 01531 // multiply row 2 01532 spinorFloat A2_re = + (gT20_re * a0_re - gT20_im * a0_im) + (gT21_re * a1_re - gT21_im * a1_im) + (gT22_re * a2_re - gT22_im * a2_im); 01533 spinorFloat A2_im = + (gT20_re * a0_im + gT20_im * a0_re) + (gT21_re * a1_im + gT21_im * a1_re) + (gT22_re * a2_im + gT22_im * a2_re); 01534 spinorFloat B2_re = + (gT20_re * b0_re - gT20_im * b0_im) + (gT21_re * b1_re - gT21_im * b1_im) + (gT22_re * b2_re - gT22_im * b2_im); 01535 spinorFloat B2_im = + (gT20_re * b0_im + gT20_im * b0_re) + (gT21_re * b1_im + gT21_im * b1_re) + (gT22_re * b2_im + gT22_im * b2_re); 01536 01537 o00_re += A0_re; 01538 o00_im += A0_im; 01539 o10_re += B0_re; 01540 o10_im += B0_im; 01541 01542 o01_re += A1_re; 01543 o01_im += A1_im; 01544 o11_re += B1_re; 01545 o11_im += B1_im; 01546 01547 o02_re += A2_re; 01548 o02_im += A2_im; 01549 o12_re += B2_re; 01550 o12_im += B2_im; 01551 } 01552 // READ_GAUGE_MATRIX(GAUGE1TEX, 7); 01553 01554 01555 } 01556 } 01557 01558 01559 01560 01561 //J ---------------------------------- 01562 //J --- DWF code for 5th dimension --- 01563 //J ---------------------------------- 01564 // 01565 //J Begin scope. 01566 { 01567 //J TODO Insert/check handler for s-direction here. 01568 01569 //J Decided to not change to chiral basis. Then: 01570 // 2 P_- = 2 P_L = 1 -1 01571 // -1 1 01572 //J Begin scope for 2 P_L projection of back-hopped spinor. 01573 { 01574 //J We are left-handed, so hop backwards. If we are at 01575 //J boundary in s-direction, special 01576 //J things will need to be done. xs is defined in dslash_core_ante.h. 01577 //J See near Line 328. N is the 4d volume; cf. quda.h. 01578 //J Cf. hand-written notes 8/6/09 for check of logic. 01579 //J The logic sets xs to the s-coordinate of the output 01580 //J spinor, which is accumulated by this thread. 01581 //J I.e., it uses the thread index to determine xs. 01582 int sp_idx = ((xs==0) ? X+(Ls-1)*2*Vh : X-2*Vh) / 2; 01583 // --- Read spinor from device memory. --- 01584 // 01585 READ_SPINOR(SPINORTEX); 01586 01587 if (xs != 0) { 01588 //J OK, now the input spinor should be at: 01589 //J 0 < s <= Ls-1 01590 // 01591 //J Project spinor into half spinors, i.e., this is the term 01592 //J " + P_L psi(s-1) " 01593 01594 //J ------------------------------------ 01595 //J --- Dirac index 0, Colors 0,1,2. --- 01596 //J ------------------------------------ 01597 //ok 01598 o00_re += i00_re-i20_re; 01599 o00_im += i00_im-i20_im; 01600 o01_re += i01_re-i21_re; 01601 o01_im += i01_im-i21_im; 01602 o02_re += i02_re-i22_re; 01603 o02_im += i02_im-i22_im; 01604 01605 //J ------------------------------------- 01606 //J --- Dirac index 1, Colors 0,1,2. --- 01607 //J ------------------------------------- 01608 //ok 01609 o10_re += i10_re-i30_re; 01610 o10_im += i10_im-i30_im; 01611 o11_re += i11_re-i31_re; 01612 o11_im += i11_im-i31_im; 01613 o12_re += i12_re-i32_re; 01614 o12_im += i12_im-i32_im; 01615 01616 //J ------------------------------------ 01617 //J --- Dirac index 2, Colors 0,1,2. --- 01618 //J ------------------------------------ 01619 //ok 01620 o20_re += -i00_re+i20_re; 01621 o20_im += -i00_im+i20_im; 01622 o21_re += -i01_re+i21_re; 01623 o21_im += -i01_im+i21_im; 01624 o22_re += -i02_re+i22_re; 01625 o22_im += -i02_im+i22_im; 01626 01627 //J ------------------------------------- 01628 //J --- Dirac index 3, Colors 0,1,2. --- 01629 //J ------------------------------------- 01630 //ok 01631 o30_re += -i10_re+i30_re; 01632 o30_im += -i10_im+i30_im; 01633 o31_re += -i11_re+i31_re; 01634 o31_im += -i11_im+i31_im; 01635 o32_re += -i12_re+i32_re; 01636 o32_im += -i12_im+i32_im; 01637 01638 } // End (x,0) < (x,s) <= (x,Ls-1). 01639 else { 01640 //J LH boundary s=0, backwards hop to Ls-1. 01641 //J Term to add: -mferm*P_L*psi(x,Ls-1) 01642 //J With any luck, sp_idx is linear equiv. to "(x,Ls-1)" 01643 //J Above, we set: 01644 //J sp_idx= (X+(Ls-1)*X4*X3*X2*X1)/2 (*). 01645 //J efs: do some case examples where xs=0 comes out of 01646 //J dslash_ante_core.h procedure, and check that sp_idx is 01647 //J really coming out correct (and in permissable range) 01648 //J in the operation (*). 01649 //J We need mferm to get passed. A modification 01650 //J was made to DD_PARAM2 in the C preprocessing file 01651 //J dslash_dwf_def.h, adding 01652 //J an extra argument to the kernel declarations. 01653 // 01654 //J --- Dirac index 0, Colors 0,1,2. --- 01655 // color 0 (second index) 01656 o00_re += -mferm*(i00_re-i20_re); 01657 o00_im += -mferm*(i00_im-i20_im); 01658 // color 1 01659 o01_re += -mferm*(i01_re-i21_re); 01660 o01_im += -mferm*(i01_im-i21_im); 01661 // color 2 01662 o02_re += -mferm*(i02_re-i22_re); 01663 o02_im += -mferm*(i02_im-i22_im); 01664 01665 //J --- Dirac index 1, Colors 0,1,2. --- 01666 // color 0 01667 o10_re += -mferm*(i10_re-i30_re); 01668 o10_im += -mferm*(i10_im-i30_im); 01669 // color 1 01670 o11_re += -mferm*(i11_re-i31_re); 01671 o11_im += -mferm*(i11_im-i31_im); 01672 // color 2 01673 o12_re += -mferm*(i12_re-i32_re); 01674 o12_im += -mferm*(i12_im-i32_im); 01675 01676 //J --- Dirac index 2, Colors 0,1,2. --- 01677 // color 0 (second index) 01678 o20_re += -mferm*(-i00_re+i20_re); 01679 o20_im += -mferm*(-i00_im+i20_im); 01680 // color 1 01681 o21_re += -mferm*(-i01_re+i21_re); 01682 o21_im += -mferm*(-i01_im+i21_im); 01683 // color 2 01684 o22_re += -mferm*(-i02_re+i22_re); 01685 o22_im += -mferm*(-i02_im+i22_im); 01686 01687 //J --- Dirac index 3, Colors 0,1,2. --- 01688 // color 0 01689 o30_re += -mferm*(-i10_re+i30_re); 01690 o30_im += -mferm*(-i10_im+i30_im); 01691 // color 1 01692 o31_re += -mferm*(-i11_re+i31_re); 01693 o31_im += -mferm*(-i11_im+i31_im); 01694 // color 2 01695 o32_re += -mferm*(-i12_re+i32_re); 01696 o32_im += -mferm*(-i12_im+i32_im); 01697 01698 } // End (x,s)=(x,0) 01699 } 01700 // --- End of left-handed spinor projection. --- 01701 01702 // 2 P_+ = 2 P_R = 1 1 01703 // 1 1 01704 // --- Begin right-handed spinor projection. --- 01705 { 01706 //J For P_R spinor, we hop forwards. Thus: 01707 01708 //J This bit mimics what is done for x4==X4-1 in dslash_core_ante.h. 01709 //J 01710 //J TODO Check logic w/ case examples. 01711 //J Cf. hand-written notes 8/6/09 for check of logic. 01712 int sp_idx = ((xs==(Ls-1)) ? X-(Ls-1)*2*Vh : X+2*Vh) / 2; 01713 01714 //J Read spinor from device memory. 01715 // 01716 READ_SPINOR(SPINORTEX); 01717 01718 // HERE 01719 // 01720 if ( xs < (Ls-1) ) { 01721 //J Case of not at RH boundary. Then we just do += P_R psi(s+1). 01722 01723 //J ------------------------------------ 01724 //J --- Dirac index 0, Colors 0,1,2. --- 01725 //J ------------------------------------ 01726 o00_re += i00_re+i20_re; 01727 o00_im += i00_im+i20_im; 01728 o01_re += i01_re+i21_re; 01729 o01_im += i01_im+i21_im; 01730 o02_re += i02_re+i22_re; 01731 o02_im += i02_im+i22_im; 01732 01733 //J ------------------------------------- 01734 //J --- Dirac index 1, Colors 0,1,2. --- 01735 //J ------------------------------------- 01736 o10_re += i10_re+i30_re; 01737 o10_im += i10_im+i30_im; 01738 o11_re += i11_re+i31_re; 01739 o11_im += i11_im+i31_im; 01740 o12_re += i12_re+i32_re; 01741 o12_im += i12_im+i32_im; 01742 01743 //J ------------------------------------ 01744 //J --- Dirac index 2, Colors 0,1,2. --- 01745 //J ------------------------------------ 01746 o20_re += i00_re+i20_re; 01747 o20_im += i00_im+i20_im; 01748 o21_re += i01_re+i21_re; 01749 o21_im += i01_im+i21_im; 01750 o22_re += i02_re+i22_re; 01751 o22_im += i02_im+i22_im; 01752 01753 //J ------------------------------------- 01754 //J --- Dirac index 3, Colors 0,1,2. --- 01755 //J ------------------------------------- 01756 o30_re += i10_re+i30_re; 01757 o30_im += i10_im+i30_im; 01758 o31_re += i11_re+i31_re; 01759 o31_im += i11_im+i31_im; 01760 o32_re += i12_re+i32_re; 01761 o32_im += i12_im+i32_im; 01762 01763 } // End (x,0) <= (x,s) < (x,Ls-1). 01764 else { 01765 //J RH boundary s=Ls-1, forwards hop to s=0. 01766 //J Term to add: -mferm*P_R*psi(x,0) 01767 01768 //J --- Dirac index 0, Colors 0,1,2. --- 01769 // color 0 (second index) 01770 o00_re += -mferm*(i00_re+i20_re); 01771 o00_im += -mferm*(i00_im+i20_im); 01772 // color 1 01773 o01_re += -mferm*(i01_re+i21_re); 01774 o01_im += -mferm*(i01_im+i21_im); 01775 // color 2 01776 o02_re += -mferm*(i02_re+i22_re); 01777 o02_im += -mferm*(i02_im+i22_im); 01778 01779 //J --- Dirac index 1, Colors 0,1,2. --- 01780 // color 0 01781 o10_re += -mferm*(i10_re+i30_re); 01782 o10_im += -mferm*(i10_im+i30_im); 01783 // color 1 01784 o11_re += -mferm*(i11_re+i31_re); 01785 o11_im += -mferm*(i11_im+i31_im); 01786 // color 2 01787 o12_re += -mferm*(i12_re+i32_re); 01788 o12_im += -mferm*(i12_im+i32_im); 01789 01790 //J --- Dirac index 2, Colors 0,1,2. --- 01791 // color 0 (second index) 01792 o20_re += -mferm*(i00_re+i20_re); 01793 o20_im += -mferm*(i00_im+i20_im); 01794 // color 1 01795 o21_re += -mferm*(i01_re+i21_re); 01796 o21_im += -mferm*(i01_im+i21_im); 01797 // color 2 01798 o22_re += -mferm*(i02_re+i22_re); 01799 o22_im += -mferm*(i02_im+i22_im); 01800 01801 //J --- Dirac index 3, Colors 0,1,2. --- 01802 // color 0 01803 o30_re += -mferm*(i10_re+i30_re); 01804 o30_im += -mferm*(i10_im+i30_im); 01805 // color 1 01806 o31_re += -mferm*(i11_re+i31_re); 01807 o31_im += -mferm*(i11_im+i31_im); 01808 // color 2 01809 o32_re += -mferm*(i12_re+i32_re); 01810 o32_im += -mferm*(i12_im+i32_im); 01811 // 01812 } // End (x,s)=(x,Ls-1) 01813 } 01814 // ----- end dwf s-direction ---- 01815 01816 } // end s-direction block 01817 01818 01819 // Perform the DSLASH_XPAY operations. 01820 // Undefine all the macros. TODO Make sure that this 01821 // is working right for the diagonal terms of DWF. 01822 //#include "dslash_core_post.h" 01823 01824 //J The "a" variable is an argument passed within xpay functions. Look at 01825 //J the macro DD_PARAM2 in dslash_dwf_def.h. 01826 //J READ_ACCUM is defined in io_spinor.h. In truth, it points 01827 //J to READ_ACCUM_SINGLE() in the case of single precision, etc. 01828 //J It performs tex1dfetch() calls on ACCUMTEX. ACCUMTEX stands for 01829 //J accumTexSingle, which gets bound in dslash_quda.cu prior to calling 01830 //J the kernel that is defined below. That texture is a cache of the "x" 01831 //J argument to dslashXpayS_dwf_Cuda. 01832 01833 // out_spinor = a*out_spinor + x 01834 // This is useful if we have, say 01835 // -kappa^2 * D_{eo} psi_o + psi_e 01836 // such as occurs in Lines near 666 of dslash_quda.cu. 01837 01838 #ifdef DSLASH_XPAY 01839 READ_ACCUM(ACCUMTEX) 01840 #ifdef SPINOR_DOUBLE 01841 //J This all looks like diagonal terms, 01842 //J out = a*out+in. 01843 //J Q. Why use "accum" rather than I0...I11? Look at io_spinor.h. 01844 // Also note that in dslash_quda.cu, a bunch of binding is performed 01845 // that may be related to what sits in "accum". 01846 o00_re = a*o00_re + accum0.x; 01847 o00_im = a*o00_im + accum0.y; 01848 o01_re = a*o01_re + accum1.x; 01849 o01_im = a*o01_im + accum1.y; 01850 o02_re = a*o02_re + accum2.x; 01851 o02_im = a*o02_im + accum2.y; 01852 o10_re = a*o10_re + accum3.x; 01853 o10_im = a*o10_im + accum3.y; 01854 o11_re = a*o11_re + accum4.x; 01855 o11_im = a*o11_im + accum4.y; 01856 o12_re = a*o12_re + accum5.x; 01857 o12_im = a*o12_im + accum5.y; 01858 o20_re = a*o20_re + accum6.x; 01859 o20_im = a*o20_im + accum6.y; 01860 o21_re = a*o21_re + accum7.x; 01861 o21_im = a*o21_im + accum7.y; 01862 o22_re = a*o22_re + accum8.x; 01863 o22_im = a*o22_im + accum8.y; 01864 o30_re = a*o30_re + accum9.x; 01865 o30_im = a*o30_im + accum9.y; 01866 o31_re = a*o31_re + accum10.x; 01867 o31_im = a*o31_im + accum10.y; 01868 o32_re = a*o32_re + accum11.x; 01869 o32_im = a*o32_im + accum11.y; 01870 #else 01871 o00_re = a*o00_re + accum0.x; 01872 o00_im = a*o00_im + accum0.y; 01873 o01_re = a*o01_re + accum0.z; 01874 o01_im = a*o01_im + accum0.w; 01875 o02_re = a*o02_re + accum1.x; 01876 o02_im = a*o02_im + accum1.y; 01877 o10_re = a*o10_re + accum1.z; 01878 o10_im = a*o10_im + accum1.w; 01879 o11_re = a*o11_re + accum2.x; 01880 o11_im = a*o11_im + accum2.y; 01881 o12_re = a*o12_re + accum2.z; 01882 o12_im = a*o12_im + accum2.w; 01883 o20_re = a*o20_re + accum3.x; 01884 o20_im = a*o20_im + accum3.y; 01885 o21_re = a*o21_re + accum3.z; 01886 o21_im = a*o21_im + accum3.w; 01887 o22_re = a*o22_re + accum4.x; 01888 o22_im = a*o22_im + accum4.y; 01889 o30_re = a*o30_re + accum4.z; 01890 o30_im = a*o30_im + accum4.w; 01891 o31_re = a*o31_re + accum5.x; 01892 o31_im = a*o31_im + accum5.y; 01893 o32_re = a*o32_re + accum5.z; 01894 o32_im = a*o32_im + accum5.w; 01895 #endif // DD_SPREC 01896 #endif // DSLASH_XPAY 01897 01898 01899 // write spinor field back to device memory 01900 WRITE_SPINOR(); 01901 01902 // undefine to prevent warning when precision is changed 01903 #undef spinorFloat 01904 #undef A_re 01905 #undef A_im 01906 01907 #undef g00_re 01908 #undef g00_im 01909 #undef g01_re 01910 #undef g01_im 01911 #undef g02_re 01912 #undef g02_im 01913 #undef g10_re 01914 #undef g10_im 01915 #undef g11_re 01916 #undef g11_im 01917 #undef g12_re 01918 #undef g12_im 01919 #undef g20_re 01920 #undef g20_im 01921 #undef g21_re 01922 #undef g21_im 01923 #undef g22_re 01924 #undef g22_im 01925 01926 #undef i00_re 01927 #undef i00_im 01928 #undef i01_re 01929 #undef i01_im 01930 #undef i02_re 01931 #undef i02_im 01932 #undef i10_re 01933 #undef i10_im 01934 #undef i11_re 01935 #undef i11_im 01936 #undef i12_re 01937 #undef i12_im 01938 #undef i20_re 01939 #undef i20_im 01940 #undef i21_re 01941 #undef i21_im 01942 #undef i22_re 01943 #undef i22_im 01944 #undef i30_re 01945 #undef i30_im 01946 #undef i31_re 01947 #undef i31_im 01948 #undef i32_re 01949 #undef i32_im 01950 01951 01952
1.7.3