QUDA v0.3.2
A library for QCD on GPUs

quda/lib/dslash_core/wilson_dslash_core.h

Go to the documentation of this file.
00001 // *** CUDA DSLASH ***
00002 
00003 #define SHARED_FLOATS_PER_THREAD 8
00004 
00005 // input spinor
00006 #ifdef SPINOR_DOUBLE
00007 #define spinorFloat double
00008 #define i00_re I0.x
00009 #define i00_im I0.y
00010 #define i01_re I1.x
00011 #define i01_im I1.y
00012 #define i02_re I2.x
00013 #define i02_im I2.y
00014 #define i10_re I3.x
00015 #define i10_im I3.y
00016 #define i11_re I4.x
00017 #define i11_im I4.y
00018 #define i12_re I5.x
00019 #define i12_im I5.y
00020 #define i20_re I6.x
00021 #define i20_im I6.y
00022 #define i21_re I7.x
00023 #define i21_im I7.y
00024 #define i22_re I8.x
00025 #define i22_im I8.y
00026 #define i30_re I9.x
00027 #define i30_im I9.y
00028 #define i31_re I10.x
00029 #define i31_im I10.y
00030 #define i32_re I11.x
00031 #define i32_im I11.y
00032 
00033 #else
00034 #define spinorFloat float
00035 #define i00_re I0.x
00036 #define i00_im I0.y
00037 #define i01_re I0.z
00038 #define i01_im I0.w
00039 #define i02_re I1.x
00040 #define i02_im I1.y
00041 #define i10_re I1.z
00042 #define i10_im I1.w
00043 #define i11_re I2.x
00044 #define i11_im I2.y
00045 #define i12_re I2.z
00046 #define i12_im I2.w
00047 #define i20_re I3.x
00048 #define i20_im I3.y
00049 #define i21_re I3.z
00050 #define i21_im I3.w
00051 #define i22_re I4.x
00052 #define i22_im I4.y
00053 #define i30_re I4.z
00054 #define i30_im I4.w
00055 #define i31_re I5.x
00056 #define i31_im I5.y
00057 #define i32_re I5.z
00058 #define i32_im I5.w
00059 #endif // SPINOR_DOUBLE
00060 
00061 // gauge link
00062 #ifdef GAUGE_FLOAT2
00063 #define g00_re G0.x
00064 #define g00_im G0.y
00065 #define g01_re G1.x
00066 #define g01_im G1.y
00067 #define g02_re G2.x
00068 #define g02_im G2.y
00069 #define g10_re G3.x
00070 #define g10_im G3.y
00071 #define g11_re G4.x
00072 #define g11_im G4.y
00073 #define g12_re G5.x
00074 #define g12_im G5.y
00075 #define g20_re G6.x
00076 #define g20_im G6.y
00077 #define g21_re G7.x
00078 #define g21_im G7.y
00079 #define g22_re G8.x
00080 #define g22_im G8.y
00081 // temporaries
00082 #define A_re G9.x
00083 #define A_im G9.y
00084 
00085 #else
00086 #define g00_re G0.x
00087 #define g00_im G0.y
00088 #define g01_re G0.z
00089 #define g01_im G0.w
00090 #define g02_re G1.x
00091 #define g02_im G1.y
00092 #define g10_re G1.z
00093 #define g10_im G1.w
00094 #define g11_re G2.x
00095 #define g11_im G2.y
00096 #define g12_re G2.z
00097 #define g12_im G2.w
00098 #define g20_re G3.x
00099 #define g20_im G3.y
00100 #define g21_re G3.z
00101 #define g21_im G3.w
00102 #define g22_re G4.x
00103 #define g22_im G4.y
00104 // temporaries
00105 #define A_re G4.z
00106 #define A_im G4.w
00107 
00108 #endif // GAUGE_DOUBLE
00109 
00110 // conjugated gauge link
00111 #define gT00_re (+g00_re)
00112 #define gT00_im (-g00_im)
00113 #define gT01_re (+g10_re)
00114 #define gT01_im (-g10_im)
00115 #define gT02_re (+g20_re)
00116 #define gT02_im (-g20_im)
00117 #define gT10_re (+g01_re)
00118 #define gT10_im (-g01_im)
00119 #define gT11_re (+g11_re)
00120 #define gT11_im (-g11_im)
00121 #define gT12_re (+g21_re)
00122 #define gT12_im (-g21_im)
00123 #define gT20_re (+g02_re)
00124 #define gT20_im (-g02_im)
00125 #define gT21_re (+g12_re)
00126 #define gT21_im (-g12_im)
00127 #define gT22_re (+g22_re)
00128 #define gT22_im (-g22_im)
00129 
00130 // first chiral block of inverted clover term
00131 #ifdef CLOVER_DOUBLE
00132 #define c00_00_re C0.x
00133 #define c01_01_re C0.y
00134 #define c02_02_re C1.x
00135 #define c10_10_re C1.y
00136 #define c11_11_re C2.x
00137 #define c12_12_re C2.y
00138 #define c01_00_re C3.x
00139 #define c01_00_im C3.y
00140 #define c02_00_re C4.x
00141 #define c02_00_im C4.y
00142 #define c10_00_re C5.x
00143 #define c10_00_im C5.y
00144 #define c11_00_re C6.x
00145 #define c11_00_im C6.y
00146 #define c12_00_re C7.x
00147 #define c12_00_im C7.y
00148 #define c02_01_re C8.x
00149 #define c02_01_im C8.y
00150 #define c10_01_re C9.x
00151 #define c10_01_im C9.y
00152 #define c11_01_re C10.x
00153 #define c11_01_im C10.y
00154 #define c12_01_re C11.x
00155 #define c12_01_im C11.y
00156 #define c10_02_re C12.x
00157 #define c10_02_im C12.y
00158 #define c11_02_re C13.x
00159 #define c11_02_im C13.y
00160 #define c12_02_re C14.x
00161 #define c12_02_im C14.y
00162 #define c11_10_re C15.x
00163 #define c11_10_im C15.y
00164 #define c12_10_re C16.x
00165 #define c12_10_im C16.y
00166 #define c12_11_re C17.x
00167 #define c12_11_im C17.y
00168 #else
00169 #define c00_00_re C0.x
00170 #define c01_01_re C0.y
00171 #define c02_02_re C0.z
00172 #define c10_10_re C0.w
00173 #define c11_11_re C1.x
00174 #define c12_12_re C1.y
00175 #define c01_00_re C1.z
00176 #define c01_00_im C1.w
00177 #define c02_00_re C2.x
00178 #define c02_00_im C2.y
00179 #define c10_00_re C2.z
00180 #define c10_00_im C2.w
00181 #define c11_00_re C3.x
00182 #define c11_00_im C3.y
00183 #define c12_00_re C3.z
00184 #define c12_00_im C3.w
00185 #define c02_01_re C4.x
00186 #define c02_01_im C4.y
00187 #define c10_01_re C4.z
00188 #define c10_01_im C4.w
00189 #define c11_01_re C5.x
00190 #define c11_01_im C5.y
00191 #define c12_01_re C5.z
00192 #define c12_01_im C5.w
00193 #define c10_02_re C6.x
00194 #define c10_02_im C6.y
00195 #define c11_02_re C6.z
00196 #define c11_02_im C6.w
00197 #define c12_02_re C7.x
00198 #define c12_02_im C7.y
00199 #define c11_10_re C7.z
00200 #define c11_10_im C7.w
00201 #define c12_10_re C8.x
00202 #define c12_10_im C8.y
00203 #define c12_11_re C8.z
00204 #define c12_11_im C8.w
00205 #endif // CLOVER_DOUBLE
00206 
00207 #define c00_01_re (+c01_00_re)
00208 #define c00_01_im (-c01_00_im)
00209 #define c00_02_re (+c02_00_re)
00210 #define c00_02_im (-c02_00_im)
00211 #define c01_02_re (+c02_01_re)
00212 #define c01_02_im (-c02_01_im)
00213 #define c00_10_re (+c10_00_re)
00214 #define c00_10_im (-c10_00_im)
00215 #define c01_10_re (+c10_01_re)
00216 #define c01_10_im (-c10_01_im)
00217 #define c02_10_re (+c10_02_re)
00218 #define c02_10_im (-c10_02_im)
00219 #define c00_11_re (+c11_00_re)
00220 #define c00_11_im (-c11_00_im)
00221 #define c01_11_re (+c11_01_re)
00222 #define c01_11_im (-c11_01_im)
00223 #define c02_11_re (+c11_02_re)
00224 #define c02_11_im (-c11_02_im)
00225 #define c10_11_re (+c11_10_re)
00226 #define c10_11_im (-c11_10_im)
00227 #define c00_12_re (+c12_00_re)
00228 #define c00_12_im (-c12_00_im)
00229 #define c01_12_re (+c12_01_re)
00230 #define c01_12_im (-c12_01_im)
00231 #define c02_12_re (+c12_02_re)
00232 #define c02_12_im (-c12_02_im)
00233 #define c10_12_re (+c12_10_re)
00234 #define c10_12_im (-c12_10_im)
00235 #define c11_12_re (+c12_11_re)
00236 #define c11_12_im (-c12_11_im)
00237 
00238 // second chiral block of inverted clover term (reuses C0,...,C9)
00239 #define c20_20_re c00_00_re
00240 #define c21_20_re c01_00_re
00241 #define c21_20_im c01_00_im
00242 #define c22_20_re c02_00_re
00243 #define c22_20_im c02_00_im
00244 #define c30_20_re c10_00_re
00245 #define c30_20_im c10_00_im
00246 #define c31_20_re c11_00_re
00247 #define c31_20_im c11_00_im
00248 #define c32_20_re c12_00_re
00249 #define c32_20_im c12_00_im
00250 #define c20_21_re c00_01_re
00251 #define c20_21_im c00_01_im
00252 #define c21_21_re c01_01_re
00253 #define c22_21_re c02_01_re
00254 #define c22_21_im c02_01_im
00255 #define c30_21_re c10_01_re
00256 #define c30_21_im c10_01_im
00257 #define c31_21_re c11_01_re
00258 #define c31_21_im c11_01_im
00259 #define c32_21_re c12_01_re
00260 #define c32_21_im c12_01_im
00261 #define c20_22_re c00_02_re
00262 #define c20_22_im c00_02_im
00263 #define c21_22_re c01_02_re
00264 #define c21_22_im c01_02_im
00265 #define c22_22_re c02_02_re
00266 #define c30_22_re c10_02_re
00267 #define c30_22_im c10_02_im
00268 #define c31_22_re c11_02_re
00269 #define c31_22_im c11_02_im
00270 #define c32_22_re c12_02_re
00271 #define c32_22_im c12_02_im
00272 #define c20_30_re c00_10_re
00273 #define c20_30_im c00_10_im
00274 #define c21_30_re c01_10_re
00275 #define c21_30_im c01_10_im
00276 #define c22_30_re c02_10_re
00277 #define c22_30_im c02_10_im
00278 #define c30_30_re c10_10_re
00279 #define c31_30_re c11_10_re
00280 #define c31_30_im c11_10_im
00281 #define c32_30_re c12_10_re
00282 #define c32_30_im c12_10_im
00283 #define c20_31_re c00_11_re
00284 #define c20_31_im c00_11_im
00285 #define c21_31_re c01_11_re
00286 #define c21_31_im c01_11_im
00287 #define c22_31_re c02_11_re
00288 #define c22_31_im c02_11_im
00289 #define c30_31_re c10_11_re
00290 #define c30_31_im c10_11_im
00291 #define c31_31_re c11_11_re
00292 #define c32_31_re c12_11_re
00293 #define c32_31_im c12_11_im
00294 #define c20_32_re c00_12_re
00295 #define c20_32_im c00_12_im
00296 #define c21_32_re c01_12_re
00297 #define c21_32_im c01_12_im
00298 #define c22_32_re c02_12_re
00299 #define c22_32_im c02_12_im
00300 #define c30_32_re c10_12_re
00301 #define c30_32_im c10_12_im
00302 #define c31_32_re c11_12_re
00303 #define c31_32_im c11_12_im
00304 #define c32_32_re c12_12_re
00305 
00306 // output spinor
00307 #define o00_re s[0*SHARED_STRIDE]
00308 #define o00_im s[1*SHARED_STRIDE]
00309 #define o01_re s[2*SHARED_STRIDE]
00310 #define o01_im s[3*SHARED_STRIDE]
00311 #define o02_re s[4*SHARED_STRIDE]
00312 #define o02_im s[5*SHARED_STRIDE]
00313 #define o10_re s[6*SHARED_STRIDE]
00314 #define o10_im s[7*SHARED_STRIDE]
00315 volatile spinorFloat o11_re;
00316 volatile spinorFloat o11_im;
00317 volatile spinorFloat o12_re;
00318 volatile spinorFloat o12_im;
00319 volatile spinorFloat o20_re;
00320 volatile spinorFloat o20_im;
00321 volatile spinorFloat o21_re;
00322 volatile spinorFloat o21_im;
00323 volatile spinorFloat o22_re;
00324 volatile spinorFloat o22_im;
00325 volatile spinorFloat o30_re;
00326 volatile spinorFloat o30_im;
00327 volatile spinorFloat o31_re;
00328 volatile spinorFloat o31_im;
00329 volatile spinorFloat o32_re;
00330 volatile spinorFloat o32_im;
00331 
00332 
00333 
00334 #include "read_gauge.h"
00335 #include "read_clover.h"
00336 #include "io_spinor.h"
00337 
00338 int sid = blockIdx.x*blockDim.x + threadIdx.x;
00339 int z1 = FAST_INT_DIVIDE(sid, X1h);
00340 int x1h = sid - z1*X1h;
00341 int z2 = FAST_INT_DIVIDE(z1, X2);
00342 int x2 = z1 - z2*X2;
00343 int x4 = FAST_INT_DIVIDE(z2, X3);
00344 int x3 = z2 - x4*X3;
00345 int x1odd = (x2 + x3 + x4 + oddBit) & 1;
00346 int x1 = 2*x1h + x1odd;
00347 int X = 2*sid + x1odd;
00348 
00349 #ifdef SPINOR_DOUBLE
00350 #if (__CUDA_ARCH__ >= 200)
00351 #define SHARED_STRIDE 16 // to avoid bank conflicts on Fermi
00352 #else
00353 #define SHARED_STRIDE  8 // to avoid bank conflicts on G80 and GT200
00354 #endif
00355 extern __shared__ spinorFloat sd_data[];
00356 volatile spinorFloat *s = sd_data + SHARED_FLOATS_PER_THREAD*SHARED_STRIDE*(threadIdx.x/SHARED_STRIDE)
00357                                   + (threadIdx.x % SHARED_STRIDE);
00358 #else
00359 #if (__CUDA_ARCH__ >= 200)
00360 #define SHARED_STRIDE 32 // to avoid bank conflicts on Fermi
00361 #else
00362 #define SHARED_STRIDE 16 // to avoid bank conflicts on G80 and GT200
00363 #endif
00364 extern __shared__ spinorFloat ss_data[];
00365 volatile spinorFloat *s = ss_data + SHARED_FLOATS_PER_THREAD*SHARED_STRIDE*(threadIdx.x/SHARED_STRIDE)
00366                                   + (threadIdx.x % SHARED_STRIDE);
00367 #endif
00368 
00369 o00_re = o00_im = 0;
00370 o01_re = o01_im = 0;
00371 o02_re = o02_im = 0;
00372 o10_re = o10_im = 0;
00373 o11_re = o11_im = 0;
00374 o12_re = o12_im = 0;
00375 o20_re = o20_im = 0;
00376 o21_re = o21_im = 0;
00377 o22_re = o22_im = 0;
00378 o30_re = o30_im = 0;
00379 o31_re = o31_im = 0;
00380 o32_re = o32_im = 0;
00381 
00382 {
00383     // Projector P0-
00384     // 1 0 0 -i 
00385     // 0 1 -i 0 
00386     // 0 i 1 0 
00387     // i 0 0 1 
00388     
00389     int sp_idx = ((x1==X1m1) ? X-X1m1 : X+1) >> 1;
00390     int ga_idx = sid;
00391     
00392     // read gauge matrix from device memory
00393     READ_GAUGE_MATRIX(GAUGE0TEX, 0);
00394     
00395     // read spinor from device memory
00396     READ_SPINOR(SPINORTEX);
00397     
00398     // reconstruct gauge matrix
00399     RECONSTRUCT_GAUGE_MATRIX(0);
00400     
00401     // project spinor into half spinors
00402     spinorFloat a0_re = +i00_re+i30_im;
00403     spinorFloat a0_im = +i00_im-i30_re;
00404     spinorFloat a1_re = +i01_re+i31_im;
00405     spinorFloat a1_im = +i01_im-i31_re;
00406     spinorFloat a2_re = +i02_re+i32_im;
00407     spinorFloat a2_im = +i02_im-i32_re;
00408     
00409     spinorFloat b0_re = +i10_re+i20_im;
00410     spinorFloat b0_im = +i10_im-i20_re;
00411     spinorFloat b1_re = +i11_re+i21_im;
00412     spinorFloat b1_im = +i11_im-i21_re;
00413     spinorFloat b2_re = +i12_re+i22_im;
00414     spinorFloat b2_im = +i12_im-i22_re;
00415     
00416     // multiply row 0
00417     spinorFloat A0_re = 0;
00418     A0_re += g00_re * a0_re;
00419     A0_re -= g00_im * a0_im;
00420     A0_re += g01_re * a1_re;
00421     A0_re -= g01_im * a1_im;
00422     A0_re += g02_re * a2_re;
00423     A0_re -= g02_im * a2_im;
00424     spinorFloat A0_im = 0;
00425     A0_im += g00_re * a0_im;
00426     A0_im += g00_im * a0_re;
00427     A0_im += g01_re * a1_im;
00428     A0_im += g01_im * a1_re;
00429     A0_im += g02_re * a2_im;
00430     A0_im += g02_im * a2_re;
00431     spinorFloat B0_re = 0;
00432     B0_re += g00_re * b0_re;
00433     B0_re -= g00_im * b0_im;
00434     B0_re += g01_re * b1_re;
00435     B0_re -= g01_im * b1_im;
00436     B0_re += g02_re * b2_re;
00437     B0_re -= g02_im * b2_im;
00438     spinorFloat B0_im = 0;
00439     B0_im += g00_re * b0_im;
00440     B0_im += g00_im * b0_re;
00441     B0_im += g01_re * b1_im;
00442     B0_im += g01_im * b1_re;
00443     B0_im += g02_re * b2_im;
00444     B0_im += g02_im * b2_re;
00445     
00446     // multiply row 1
00447     spinorFloat A1_re = 0;
00448     A1_re += g10_re * a0_re;
00449     A1_re -= g10_im * a0_im;
00450     A1_re += g11_re * a1_re;
00451     A1_re -= g11_im * a1_im;
00452     A1_re += g12_re * a2_re;
00453     A1_re -= g12_im * a2_im;
00454     spinorFloat A1_im = 0;
00455     A1_im += g10_re * a0_im;
00456     A1_im += g10_im * a0_re;
00457     A1_im += g11_re * a1_im;
00458     A1_im += g11_im * a1_re;
00459     A1_im += g12_re * a2_im;
00460     A1_im += g12_im * a2_re;
00461     spinorFloat B1_re = 0;
00462     B1_re += g10_re * b0_re;
00463     B1_re -= g10_im * b0_im;
00464     B1_re += g11_re * b1_re;
00465     B1_re -= g11_im * b1_im;
00466     B1_re += g12_re * b2_re;
00467     B1_re -= g12_im * b2_im;
00468     spinorFloat B1_im = 0;
00469     B1_im += g10_re * b0_im;
00470     B1_im += g10_im * b0_re;
00471     B1_im += g11_re * b1_im;
00472     B1_im += g11_im * b1_re;
00473     B1_im += g12_re * b2_im;
00474     B1_im += g12_im * b2_re;
00475     
00476     // multiply row 2
00477     spinorFloat A2_re = 0;
00478     A2_re += g20_re * a0_re;
00479     A2_re -= g20_im * a0_im;
00480     A2_re += g21_re * a1_re;
00481     A2_re -= g21_im * a1_im;
00482     A2_re += g22_re * a2_re;
00483     A2_re -= g22_im * a2_im;
00484     spinorFloat A2_im = 0;
00485     A2_im += g20_re * a0_im;
00486     A2_im += g20_im * a0_re;
00487     A2_im += g21_re * a1_im;
00488     A2_im += g21_im * a1_re;
00489     A2_im += g22_re * a2_im;
00490     A2_im += g22_im * a2_re;
00491     spinorFloat B2_re = 0;
00492     B2_re += g20_re * b0_re;
00493     B2_re -= g20_im * b0_im;
00494     B2_re += g21_re * b1_re;
00495     B2_re -= g21_im * b1_im;
00496     B2_re += g22_re * b2_re;
00497     B2_re -= g22_im * b2_im;
00498     spinorFloat B2_im = 0;
00499     B2_im += g20_re * b0_im;
00500     B2_im += g20_im * b0_re;
00501     B2_im += g21_re * b1_im;
00502     B2_im += g21_im * b1_re;
00503     B2_im += g22_re * b2_im;
00504     B2_im += g22_im * b2_re;
00505     
00506     o00_re += A0_re;
00507     o00_im += A0_im;
00508     o10_re += B0_re;
00509     o10_im += B0_im;
00510     o20_re -= B0_im;
00511     o20_im += B0_re;
00512     o30_re -= A0_im;
00513     o30_im += A0_re;
00514     
00515     o01_re += A1_re;
00516     o01_im += A1_im;
00517     o11_re += B1_re;
00518     o11_im += B1_im;
00519     o21_re -= B1_im;
00520     o21_im += B1_re;
00521     o31_re -= A1_im;
00522     o31_im += A1_re;
00523     
00524     o02_re += A2_re;
00525     o02_im += A2_im;
00526     o12_re += B2_re;
00527     o12_im += B2_im;
00528     o22_re -= B2_im;
00529     o22_im += B2_re;
00530     o32_re -= A2_im;
00531     o32_im += A2_re;
00532     
00533 }
00534 
00535 {
00536     // Projector P0+
00537     // 1 0 0 i 
00538     // 0 1 i 0 
00539     // 0 -i 1 0 
00540     // -i 0 0 1 
00541     
00542     int sp_idx = ((x1==0)    ? X+X1m1 : X-1) >> 1;
00543     int ga_idx = sp_idx;
00544     
00545     // read gauge matrix from device memory
00546     READ_GAUGE_MATRIX(GAUGE1TEX, 1);
00547     
00548     // read spinor from device memory
00549     READ_SPINOR(SPINORTEX);
00550     
00551     // reconstruct gauge matrix
00552     RECONSTRUCT_GAUGE_MATRIX(1);
00553     
00554     // project spinor into half spinors
00555     spinorFloat a0_re = +i00_re-i30_im;
00556     spinorFloat a0_im = +i00_im+i30_re;
00557     spinorFloat a1_re = +i01_re-i31_im;
00558     spinorFloat a1_im = +i01_im+i31_re;
00559     spinorFloat a2_re = +i02_re-i32_im;
00560     spinorFloat a2_im = +i02_im+i32_re;
00561     
00562     spinorFloat b0_re = +i10_re-i20_im;
00563     spinorFloat b0_im = +i10_im+i20_re;
00564     spinorFloat b1_re = +i11_re-i21_im;
00565     spinorFloat b1_im = +i11_im+i21_re;
00566     spinorFloat b2_re = +i12_re-i22_im;
00567     spinorFloat b2_im = +i12_im+i22_re;
00568     
00569     // multiply row 0
00570     spinorFloat A0_re = 0;
00571     A0_re += gT00_re * a0_re;
00572     A0_re -= gT00_im * a0_im;
00573     A0_re += gT01_re * a1_re;
00574     A0_re -= gT01_im * a1_im;
00575     A0_re += gT02_re * a2_re;
00576     A0_re -= gT02_im * a2_im;
00577     spinorFloat A0_im = 0;
00578     A0_im += gT00_re * a0_im;
00579     A0_im += gT00_im * a0_re;
00580     A0_im += gT01_re * a1_im;
00581     A0_im += gT01_im * a1_re;
00582     A0_im += gT02_re * a2_im;
00583     A0_im += gT02_im * a2_re;
00584     spinorFloat B0_re = 0;
00585     B0_re += gT00_re * b0_re;
00586     B0_re -= gT00_im * b0_im;
00587     B0_re += gT01_re * b1_re;
00588     B0_re -= gT01_im * b1_im;
00589     B0_re += gT02_re * b2_re;
00590     B0_re -= gT02_im * b2_im;
00591     spinorFloat B0_im = 0;
00592     B0_im += gT00_re * b0_im;
00593     B0_im += gT00_im * b0_re;
00594     B0_im += gT01_re * b1_im;
00595     B0_im += gT01_im * b1_re;
00596     B0_im += gT02_re * b2_im;
00597     B0_im += gT02_im * b2_re;
00598     
00599     // multiply row 1
00600     spinorFloat A1_re = 0;
00601     A1_re += gT10_re * a0_re;
00602     A1_re -= gT10_im * a0_im;
00603     A1_re += gT11_re * a1_re;
00604     A1_re -= gT11_im * a1_im;
00605     A1_re += gT12_re * a2_re;
00606     A1_re -= gT12_im * a2_im;
00607     spinorFloat A1_im = 0;
00608     A1_im += gT10_re * a0_im;
00609     A1_im += gT10_im * a0_re;
00610     A1_im += gT11_re * a1_im;
00611     A1_im += gT11_im * a1_re;
00612     A1_im += gT12_re * a2_im;
00613     A1_im += gT12_im * a2_re;
00614     spinorFloat B1_re = 0;
00615     B1_re += gT10_re * b0_re;
00616     B1_re -= gT10_im * b0_im;
00617     B1_re += gT11_re * b1_re;
00618     B1_re -= gT11_im * b1_im;
00619     B1_re += gT12_re * b2_re;
00620     B1_re -= gT12_im * b2_im;
00621     spinorFloat B1_im = 0;
00622     B1_im += gT10_re * b0_im;
00623     B1_im += gT10_im * b0_re;
00624     B1_im += gT11_re * b1_im;
00625     B1_im += gT11_im * b1_re;
00626     B1_im += gT12_re * b2_im;
00627     B1_im += gT12_im * b2_re;
00628     
00629     // multiply row 2
00630     spinorFloat A2_re = 0;
00631     A2_re += gT20_re * a0_re;
00632     A2_re -= gT20_im * a0_im;
00633     A2_re += gT21_re * a1_re;
00634     A2_re -= gT21_im * a1_im;
00635     A2_re += gT22_re * a2_re;
00636     A2_re -= gT22_im * a2_im;
00637     spinorFloat A2_im = 0;
00638     A2_im += gT20_re * a0_im;
00639     A2_im += gT20_im * a0_re;
00640     A2_im += gT21_re * a1_im;
00641     A2_im += gT21_im * a1_re;
00642     A2_im += gT22_re * a2_im;
00643     A2_im += gT22_im * a2_re;
00644     spinorFloat B2_re = 0;
00645     B2_re += gT20_re * b0_re;
00646     B2_re -= gT20_im * b0_im;
00647     B2_re += gT21_re * b1_re;
00648     B2_re -= gT21_im * b1_im;
00649     B2_re += gT22_re * b2_re;
00650     B2_re -= gT22_im * b2_im;
00651     spinorFloat B2_im = 0;
00652     B2_im += gT20_re * b0_im;
00653     B2_im += gT20_im * b0_re;
00654     B2_im += gT21_re * b1_im;
00655     B2_im += gT21_im * b1_re;
00656     B2_im += gT22_re * b2_im;
00657     B2_im += gT22_im * b2_re;
00658     
00659     o00_re += A0_re;
00660     o00_im += A0_im;
00661     o10_re += B0_re;
00662     o10_im += B0_im;
00663     o20_re += B0_im;
00664     o20_im -= B0_re;
00665     o30_re += A0_im;
00666     o30_im -= A0_re;
00667     
00668     o01_re += A1_re;
00669     o01_im += A1_im;
00670     o11_re += B1_re;
00671     o11_im += B1_im;
00672     o21_re += B1_im;
00673     o21_im -= B1_re;
00674     o31_re += A1_im;
00675     o31_im -= A1_re;
00676     
00677     o02_re += A2_re;
00678     o02_im += A2_im;
00679     o12_re += B2_re;
00680     o12_im += B2_im;
00681     o22_re += B2_im;
00682     o22_im -= B2_re;
00683     o32_re += A2_im;
00684     o32_im -= A2_re;
00685     
00686 }
00687 
00688 {
00689     // Projector P1-
00690     // 1 0 0 -1 
00691     // 0 1 1 0 
00692     // 0 1 1 0 
00693     // -1 0 0 1 
00694     
00695     int sp_idx = ((x2==X2m1) ? X-X2X1mX1 : X+X1) >> 1;
00696     int ga_idx = sid;
00697     
00698     // read gauge matrix from device memory
00699     READ_GAUGE_MATRIX(GAUGE0TEX, 2);
00700     
00701     // read spinor from device memory
00702     READ_SPINOR(SPINORTEX);
00703     
00704     // reconstruct gauge matrix
00705     RECONSTRUCT_GAUGE_MATRIX(2);
00706     
00707     // project spinor into half spinors
00708     spinorFloat a0_re = +i00_re-i30_re;
00709     spinorFloat a0_im = +i00_im-i30_im;
00710     spinorFloat a1_re = +i01_re-i31_re;
00711     spinorFloat a1_im = +i01_im-i31_im;
00712     spinorFloat a2_re = +i02_re-i32_re;
00713     spinorFloat a2_im = +i02_im-i32_im;
00714     
00715     spinorFloat b0_re = +i10_re+i20_re;
00716     spinorFloat b0_im = +i10_im+i20_im;
00717     spinorFloat b1_re = +i11_re+i21_re;
00718     spinorFloat b1_im = +i11_im+i21_im;
00719     spinorFloat b2_re = +i12_re+i22_re;
00720     spinorFloat b2_im = +i12_im+i22_im;
00721     
00722     // multiply row 0
00723     spinorFloat A0_re = 0;
00724     A0_re += g00_re * a0_re;
00725     A0_re -= g00_im * a0_im;
00726     A0_re += g01_re * a1_re;
00727     A0_re -= g01_im * a1_im;
00728     A0_re += g02_re * a2_re;
00729     A0_re -= g02_im * a2_im;
00730     spinorFloat A0_im = 0;
00731     A0_im += g00_re * a0_im;
00732     A0_im += g00_im * a0_re;
00733     A0_im += g01_re * a1_im;
00734     A0_im += g01_im * a1_re;
00735     A0_im += g02_re * a2_im;
00736     A0_im += g02_im * a2_re;
00737     spinorFloat B0_re = 0;
00738     B0_re += g00_re * b0_re;
00739     B0_re -= g00_im * b0_im;
00740     B0_re += g01_re * b1_re;
00741     B0_re -= g01_im * b1_im;
00742     B0_re += g02_re * b2_re;
00743     B0_re -= g02_im * b2_im;
00744     spinorFloat B0_im = 0;
00745     B0_im += g00_re * b0_im;
00746     B0_im += g00_im * b0_re;
00747     B0_im += g01_re * b1_im;
00748     B0_im += g01_im * b1_re;
00749     B0_im += g02_re * b2_im;
00750     B0_im += g02_im * b2_re;
00751     
00752     // multiply row 1
00753     spinorFloat A1_re = 0;
00754     A1_re += g10_re * a0_re;
00755     A1_re -= g10_im * a0_im;
00756     A1_re += g11_re * a1_re;
00757     A1_re -= g11_im * a1_im;
00758     A1_re += g12_re * a2_re;
00759     A1_re -= g12_im * a2_im;
00760     spinorFloat A1_im = 0;
00761     A1_im += g10_re * a0_im;
00762     A1_im += g10_im * a0_re;
00763     A1_im += g11_re * a1_im;
00764     A1_im += g11_im * a1_re;
00765     A1_im += g12_re * a2_im;
00766     A1_im += g12_im * a2_re;
00767     spinorFloat B1_re = 0;
00768     B1_re += g10_re * b0_re;
00769     B1_re -= g10_im * b0_im;
00770     B1_re += g11_re * b1_re;
00771     B1_re -= g11_im * b1_im;
00772     B1_re += g12_re * b2_re;
00773     B1_re -= g12_im * b2_im;
00774     spinorFloat B1_im = 0;
00775     B1_im += g10_re * b0_im;
00776     B1_im += g10_im * b0_re;
00777     B1_im += g11_re * b1_im;
00778     B1_im += g11_im * b1_re;
00779     B1_im += g12_re * b2_im;
00780     B1_im += g12_im * b2_re;
00781     
00782     // multiply row 2
00783     spinorFloat A2_re = 0;
00784     A2_re += g20_re * a0_re;
00785     A2_re -= g20_im * a0_im;
00786     A2_re += g21_re * a1_re;
00787     A2_re -= g21_im * a1_im;
00788     A2_re += g22_re * a2_re;
00789     A2_re -= g22_im * a2_im;
00790     spinorFloat A2_im = 0;
00791     A2_im += g20_re * a0_im;
00792     A2_im += g20_im * a0_re;
00793     A2_im += g21_re * a1_im;
00794     A2_im += g21_im * a1_re;
00795     A2_im += g22_re * a2_im;
00796     A2_im += g22_im * a2_re;
00797     spinorFloat B2_re = 0;
00798     B2_re += g20_re * b0_re;
00799     B2_re -= g20_im * b0_im;
00800     B2_re += g21_re * b1_re;
00801     B2_re -= g21_im * b1_im;
00802     B2_re += g22_re * b2_re;
00803     B2_re -= g22_im * b2_im;
00804     spinorFloat B2_im = 0;
00805     B2_im += g20_re * b0_im;
00806     B2_im += g20_im * b0_re;
00807     B2_im += g21_re * b1_im;
00808     B2_im += g21_im * b1_re;
00809     B2_im += g22_re * b2_im;
00810     B2_im += g22_im * b2_re;
00811     
00812     o00_re += A0_re;
00813     o00_im += A0_im;
00814     o10_re += B0_re;
00815     o10_im += B0_im;
00816     o20_re += B0_re;
00817     o20_im += B0_im;
00818     o30_re -= A0_re;
00819     o30_im -= A0_im;
00820     
00821     o01_re += A1_re;
00822     o01_im += A1_im;
00823     o11_re += B1_re;
00824     o11_im += B1_im;
00825     o21_re += B1_re;
00826     o21_im += B1_im;
00827     o31_re -= A1_re;
00828     o31_im -= A1_im;
00829     
00830     o02_re += A2_re;
00831     o02_im += A2_im;
00832     o12_re += B2_re;
00833     o12_im += B2_im;
00834     o22_re += B2_re;
00835     o22_im += B2_im;
00836     o32_re -= A2_re;
00837     o32_im -= A2_im;
00838     
00839 }
00840 
00841 {
00842     // Projector P1+
00843     // 1 0 0 1 
00844     // 0 1 -1 0 
00845     // 0 -1 1 0 
00846     // 1 0 0 1 
00847     
00848     int sp_idx = ((x2==0)    ? X+X2X1mX1 : X-X1) >> 1;
00849     int ga_idx = sp_idx;
00850     
00851     // read gauge matrix from device memory
00852     READ_GAUGE_MATRIX(GAUGE1TEX, 3);
00853     
00854     // read spinor from device memory
00855     READ_SPINOR(SPINORTEX);
00856     
00857     // reconstruct gauge matrix
00858     RECONSTRUCT_GAUGE_MATRIX(3);
00859     
00860     // project spinor into half spinors
00861     spinorFloat a0_re = +i00_re+i30_re;
00862     spinorFloat a0_im = +i00_im+i30_im;
00863     spinorFloat a1_re = +i01_re+i31_re;
00864     spinorFloat a1_im = +i01_im+i31_im;
00865     spinorFloat a2_re = +i02_re+i32_re;
00866     spinorFloat a2_im = +i02_im+i32_im;
00867     
00868     spinorFloat b0_re = +i10_re-i20_re;
00869     spinorFloat b0_im = +i10_im-i20_im;
00870     spinorFloat b1_re = +i11_re-i21_re;
00871     spinorFloat b1_im = +i11_im-i21_im;
00872     spinorFloat b2_re = +i12_re-i22_re;
00873     spinorFloat b2_im = +i12_im-i22_im;
00874     
00875     // multiply row 0
00876     spinorFloat A0_re = 0;
00877     A0_re += gT00_re * a0_re;
00878     A0_re -= gT00_im * a0_im;
00879     A0_re += gT01_re * a1_re;
00880     A0_re -= gT01_im * a1_im;
00881     A0_re += gT02_re * a2_re;
00882     A0_re -= gT02_im * a2_im;
00883     spinorFloat A0_im = 0;
00884     A0_im += gT00_re * a0_im;
00885     A0_im += gT00_im * a0_re;
00886     A0_im += gT01_re * a1_im;
00887     A0_im += gT01_im * a1_re;
00888     A0_im += gT02_re * a2_im;
00889     A0_im += gT02_im * a2_re;
00890     spinorFloat B0_re = 0;
00891     B0_re += gT00_re * b0_re;
00892     B0_re -= gT00_im * b0_im;
00893     B0_re += gT01_re * b1_re;
00894     B0_re -= gT01_im * b1_im;
00895     B0_re += gT02_re * b2_re;
00896     B0_re -= gT02_im * b2_im;
00897     spinorFloat B0_im = 0;
00898     B0_im += gT00_re * b0_im;
00899     B0_im += gT00_im * b0_re;
00900     B0_im += gT01_re * b1_im;
00901     B0_im += gT01_im * b1_re;
00902     B0_im += gT02_re * b2_im;
00903     B0_im += gT02_im * b2_re;
00904     
00905     // multiply row 1
00906     spinorFloat A1_re = 0;
00907     A1_re += gT10_re * a0_re;
00908     A1_re -= gT10_im * a0_im;
00909     A1_re += gT11_re * a1_re;
00910     A1_re -= gT11_im * a1_im;
00911     A1_re += gT12_re * a2_re;
00912     A1_re -= gT12_im * a2_im;
00913     spinorFloat A1_im = 0;
00914     A1_im += gT10_re * a0_im;
00915     A1_im += gT10_im * a0_re;
00916     A1_im += gT11_re * a1_im;
00917     A1_im += gT11_im * a1_re;
00918     A1_im += gT12_re * a2_im;
00919     A1_im += gT12_im * a2_re;
00920     spinorFloat B1_re = 0;
00921     B1_re += gT10_re * b0_re;
00922     B1_re -= gT10_im * b0_im;
00923     B1_re += gT11_re * b1_re;
00924     B1_re -= gT11_im * b1_im;
00925     B1_re += gT12_re * b2_re;
00926     B1_re -= gT12_im * b2_im;
00927     spinorFloat B1_im = 0;
00928     B1_im += gT10_re * b0_im;
00929     B1_im += gT10_im * b0_re;
00930     B1_im += gT11_re * b1_im;
00931     B1_im += gT11_im * b1_re;
00932     B1_im += gT12_re * b2_im;
00933     B1_im += gT12_im * b2_re;
00934     
00935     // multiply row 2
00936     spinorFloat A2_re = 0;
00937     A2_re += gT20_re * a0_re;
00938     A2_re -= gT20_im * a0_im;
00939     A2_re += gT21_re * a1_re;
00940     A2_re -= gT21_im * a1_im;
00941     A2_re += gT22_re * a2_re;
00942     A2_re -= gT22_im * a2_im;
00943     spinorFloat A2_im = 0;
00944     A2_im += gT20_re * a0_im;
00945     A2_im += gT20_im * a0_re;
00946     A2_im += gT21_re * a1_im;
00947     A2_im += gT21_im * a1_re;
00948     A2_im += gT22_re * a2_im;
00949     A2_im += gT22_im * a2_re;
00950     spinorFloat B2_re = 0;
00951     B2_re += gT20_re * b0_re;
00952     B2_re -= gT20_im * b0_im;
00953     B2_re += gT21_re * b1_re;
00954     B2_re -= gT21_im * b1_im;
00955     B2_re += gT22_re * b2_re;
00956     B2_re -= gT22_im * b2_im;
00957     spinorFloat B2_im = 0;
00958     B2_im += gT20_re * b0_im;
00959     B2_im += gT20_im * b0_re;
00960     B2_im += gT21_re * b1_im;
00961     B2_im += gT21_im * b1_re;
00962     B2_im += gT22_re * b2_im;
00963     B2_im += gT22_im * b2_re;
00964     
00965     o00_re += A0_re;
00966     o00_im += A0_im;
00967     o10_re += B0_re;
00968     o10_im += B0_im;
00969     o20_re -= B0_re;
00970     o20_im -= B0_im;
00971     o30_re += A0_re;
00972     o30_im += A0_im;
00973     
00974     o01_re += A1_re;
00975     o01_im += A1_im;
00976     o11_re += B1_re;
00977     o11_im += B1_im;
00978     o21_re -= B1_re;
00979     o21_im -= B1_im;
00980     o31_re += A1_re;
00981     o31_im += A1_im;
00982     
00983     o02_re += A2_re;
00984     o02_im += A2_im;
00985     o12_re += B2_re;
00986     o12_im += B2_im;
00987     o22_re -= B2_re;
00988     o22_im -= B2_im;
00989     o32_re += A2_re;
00990     o32_im += A2_im;
00991     
00992 }
00993 
00994 {
00995     // Projector P2-
00996     // 1 0 -i 0 
00997     // 0 1 0 i 
00998     // i 0 1 0 
00999     // 0 -i 0 1 
01000     
01001     int sp_idx = ((x3==X3m1) ? X-X3X2X1mX2X1 : X+X2X1) >> 1;
01002     int ga_idx = sid;
01003     
01004     // read gauge matrix from device memory
01005     READ_GAUGE_MATRIX(GAUGE0TEX, 4);
01006     
01007     // read spinor from device memory
01008     READ_SPINOR(SPINORTEX);
01009     
01010     // reconstruct gauge matrix
01011     RECONSTRUCT_GAUGE_MATRIX(4);
01012     
01013     // project spinor into half spinors
01014     spinorFloat a0_re = +i00_re+i20_im;
01015     spinorFloat a0_im = +i00_im-i20_re;
01016     spinorFloat a1_re = +i01_re+i21_im;
01017     spinorFloat a1_im = +i01_im-i21_re;
01018     spinorFloat a2_re = +i02_re+i22_im;
01019     spinorFloat a2_im = +i02_im-i22_re;
01020     
01021     spinorFloat b0_re = +i10_re-i30_im;
01022     spinorFloat b0_im = +i10_im+i30_re;
01023     spinorFloat b1_re = +i11_re-i31_im;
01024     spinorFloat b1_im = +i11_im+i31_re;
01025     spinorFloat b2_re = +i12_re-i32_im;
01026     spinorFloat b2_im = +i12_im+i32_re;
01027     
01028     // multiply row 0
01029     spinorFloat A0_re = 0;
01030     A0_re += g00_re * a0_re;
01031     A0_re -= g00_im * a0_im;
01032     A0_re += g01_re * a1_re;
01033     A0_re -= g01_im * a1_im;
01034     A0_re += g02_re * a2_re;
01035     A0_re -= g02_im * a2_im;
01036     spinorFloat A0_im = 0;
01037     A0_im += g00_re * a0_im;
01038     A0_im += g00_im * a0_re;
01039     A0_im += g01_re * a1_im;
01040     A0_im += g01_im * a1_re;
01041     A0_im += g02_re * a2_im;
01042     A0_im += g02_im * a2_re;
01043     spinorFloat B0_re = 0;
01044     B0_re += g00_re * b0_re;
01045     B0_re -= g00_im * b0_im;
01046     B0_re += g01_re * b1_re;
01047     B0_re -= g01_im * b1_im;
01048     B0_re += g02_re * b2_re;
01049     B0_re -= g02_im * b2_im;
01050     spinorFloat B0_im = 0;
01051     B0_im += g00_re * b0_im;
01052     B0_im += g00_im * b0_re;
01053     B0_im += g01_re * b1_im;
01054     B0_im += g01_im * b1_re;
01055     B0_im += g02_re * b2_im;
01056     B0_im += g02_im * b2_re;
01057     
01058     // multiply row 1
01059     spinorFloat A1_re = 0;
01060     A1_re += g10_re * a0_re;
01061     A1_re -= g10_im * a0_im;
01062     A1_re += g11_re * a1_re;
01063     A1_re -= g11_im * a1_im;
01064     A1_re += g12_re * a2_re;
01065     A1_re -= g12_im * a2_im;
01066     spinorFloat A1_im = 0;
01067     A1_im += g10_re * a0_im;
01068     A1_im += g10_im * a0_re;
01069     A1_im += g11_re * a1_im;
01070     A1_im += g11_im * a1_re;
01071     A1_im += g12_re * a2_im;
01072     A1_im += g12_im * a2_re;
01073     spinorFloat B1_re = 0;
01074     B1_re += g10_re * b0_re;
01075     B1_re -= g10_im * b0_im;
01076     B1_re += g11_re * b1_re;
01077     B1_re -= g11_im * b1_im;
01078     B1_re += g12_re * b2_re;
01079     B1_re -= g12_im * b2_im;
01080     spinorFloat B1_im = 0;
01081     B1_im += g10_re * b0_im;
01082     B1_im += g10_im * b0_re;
01083     B1_im += g11_re * b1_im;
01084     B1_im += g11_im * b1_re;
01085     B1_im += g12_re * b2_im;
01086     B1_im += g12_im * b2_re;
01087     
01088     // multiply row 2
01089     spinorFloat A2_re = 0;
01090     A2_re += g20_re * a0_re;
01091     A2_re -= g20_im * a0_im;
01092     A2_re += g21_re * a1_re;
01093     A2_re -= g21_im * a1_im;
01094     A2_re += g22_re * a2_re;
01095     A2_re -= g22_im * a2_im;
01096     spinorFloat A2_im = 0;
01097     A2_im += g20_re * a0_im;
01098     A2_im += g20_im * a0_re;
01099     A2_im += g21_re * a1_im;
01100     A2_im += g21_im * a1_re;
01101     A2_im += g22_re * a2_im;
01102     A2_im += g22_im * a2_re;
01103     spinorFloat B2_re = 0;
01104     B2_re += g20_re * b0_re;
01105     B2_re -= g20_im * b0_im;
01106     B2_re += g21_re * b1_re;
01107     B2_re -= g21_im * b1_im;
01108     B2_re += g22_re * b2_re;
01109     B2_re -= g22_im * b2_im;
01110     spinorFloat B2_im = 0;
01111     B2_im += g20_re * b0_im;
01112     B2_im += g20_im * b0_re;
01113     B2_im += g21_re * b1_im;
01114     B2_im += g21_im * b1_re;
01115     B2_im += g22_re * b2_im;
01116     B2_im += g22_im * b2_re;
01117     
01118     o00_re += A0_re;
01119     o00_im += A0_im;
01120     o10_re += B0_re;
01121     o10_im += B0_im;
01122     o20_re -= A0_im;
01123     o20_im += A0_re;
01124     o30_re += B0_im;
01125     o30_im -= B0_re;
01126     
01127     o01_re += A1_re;
01128     o01_im += A1_im;
01129     o11_re += B1_re;
01130     o11_im += B1_im;
01131     o21_re -= A1_im;
01132     o21_im += A1_re;
01133     o31_re += B1_im;
01134     o31_im -= B1_re;
01135     
01136     o02_re += A2_re;
01137     o02_im += A2_im;
01138     o12_re += B2_re;
01139     o12_im += B2_im;
01140     o22_re -= A2_im;
01141     o22_im += A2_re;
01142     o32_re += B2_im;
01143     o32_im -= B2_re;
01144     
01145 }
01146 
01147 {
01148     // Projector P2+
01149     // 1 0 i 0 
01150     // 0 1 0 -i 
01151     // -i 0 1 0 
01152     // 0 i 0 1 
01153     
01154     int sp_idx = ((x3==0)    ? X+X3X2X1mX2X1 : X-X2X1) >> 1;
01155     int ga_idx = sp_idx;
01156     
01157     // read gauge matrix from device memory
01158     READ_GAUGE_MATRIX(GAUGE1TEX, 5);
01159     
01160     // read spinor from device memory
01161     READ_SPINOR(SPINORTEX);
01162     
01163     // reconstruct gauge matrix
01164     RECONSTRUCT_GAUGE_MATRIX(5);
01165     
01166     // project spinor into half spinors
01167     spinorFloat a0_re = +i00_re-i20_im;
01168     spinorFloat a0_im = +i00_im+i20_re;
01169     spinorFloat a1_re = +i01_re-i21_im;
01170     spinorFloat a1_im = +i01_im+i21_re;
01171     spinorFloat a2_re = +i02_re-i22_im;
01172     spinorFloat a2_im = +i02_im+i22_re;
01173     
01174     spinorFloat b0_re = +i10_re+i30_im;
01175     spinorFloat b0_im = +i10_im-i30_re;
01176     spinorFloat b1_re = +i11_re+i31_im;
01177     spinorFloat b1_im = +i11_im-i31_re;
01178     spinorFloat b2_re = +i12_re+i32_im;
01179     spinorFloat b2_im = +i12_im-i32_re;
01180     
01181     // multiply row 0
01182     spinorFloat A0_re = 0;
01183     A0_re += gT00_re * a0_re;
01184     A0_re -= gT00_im * a0_im;
01185     A0_re += gT01_re * a1_re;
01186     A0_re -= gT01_im * a1_im;
01187     A0_re += gT02_re * a2_re;
01188     A0_re -= gT02_im * a2_im;
01189     spinorFloat A0_im = 0;
01190     A0_im += gT00_re * a0_im;
01191     A0_im += gT00_im * a0_re;
01192     A0_im += gT01_re * a1_im;
01193     A0_im += gT01_im * a1_re;
01194     A0_im += gT02_re * a2_im;
01195     A0_im += gT02_im * a2_re;
01196     spinorFloat B0_re = 0;
01197     B0_re += gT00_re * b0_re;
01198     B0_re -= gT00_im * b0_im;
01199     B0_re += gT01_re * b1_re;
01200     B0_re -= gT01_im * b1_im;
01201     B0_re += gT02_re * b2_re;
01202     B0_re -= gT02_im * b2_im;
01203     spinorFloat B0_im = 0;
01204     B0_im += gT00_re * b0_im;
01205     B0_im += gT00_im * b0_re;
01206     B0_im += gT01_re * b1_im;
01207     B0_im += gT01_im * b1_re;
01208     B0_im += gT02_re * b2_im;
01209     B0_im += gT02_im * b2_re;
01210     
01211     // multiply row 1
01212     spinorFloat A1_re = 0;
01213     A1_re += gT10_re * a0_re;
01214     A1_re -= gT10_im * a0_im;
01215     A1_re += gT11_re * a1_re;
01216     A1_re -= gT11_im * a1_im;
01217     A1_re += gT12_re * a2_re;
01218     A1_re -= gT12_im * a2_im;
01219     spinorFloat A1_im = 0;
01220     A1_im += gT10_re * a0_im;
01221     A1_im += gT10_im * a0_re;
01222     A1_im += gT11_re * a1_im;
01223     A1_im += gT11_im * a1_re;
01224     A1_im += gT12_re * a2_im;
01225     A1_im += gT12_im * a2_re;
01226     spinorFloat B1_re = 0;
01227     B1_re += gT10_re * b0_re;
01228     B1_re -= gT10_im * b0_im;
01229     B1_re += gT11_re * b1_re;
01230     B1_re -= gT11_im * b1_im;
01231     B1_re += gT12_re * b2_re;
01232     B1_re -= gT12_im * b2_im;
01233     spinorFloat B1_im = 0;
01234     B1_im += gT10_re * b0_im;
01235     B1_im += gT10_im * b0_re;
01236     B1_im += gT11_re * b1_im;
01237     B1_im += gT11_im * b1_re;
01238     B1_im += gT12_re * b2_im;
01239     B1_im += gT12_im * b2_re;
01240     
01241     // multiply row 2
01242     spinorFloat A2_re = 0;
01243     A2_re += gT20_re * a0_re;
01244     A2_re -= gT20_im * a0_im;
01245     A2_re += gT21_re * a1_re;
01246     A2_re -= gT21_im * a1_im;
01247     A2_re += gT22_re * a2_re;
01248     A2_re -= gT22_im * a2_im;
01249     spinorFloat A2_im = 0;
01250     A2_im += gT20_re * a0_im;
01251     A2_im += gT20_im * a0_re;
01252     A2_im += gT21_re * a1_im;
01253     A2_im += gT21_im * a1_re;
01254     A2_im += gT22_re * a2_im;
01255     A2_im += gT22_im * a2_re;
01256     spinorFloat B2_re = 0;
01257     B2_re += gT20_re * b0_re;
01258     B2_re -= gT20_im * b0_im;
01259     B2_re += gT21_re * b1_re;
01260     B2_re -= gT21_im * b1_im;
01261     B2_re += gT22_re * b2_re;
01262     B2_re -= gT22_im * b2_im;
01263     spinorFloat B2_im = 0;
01264     B2_im += gT20_re * b0_im;
01265     B2_im += gT20_im * b0_re;
01266     B2_im += gT21_re * b1_im;
01267     B2_im += gT21_im * b1_re;
01268     B2_im += gT22_re * b2_im;
01269     B2_im += gT22_im * b2_re;
01270     
01271     o00_re += A0_re;
01272     o00_im += A0_im;
01273     o10_re += B0_re;
01274     o10_im += B0_im;
01275     o20_re += A0_im;
01276     o20_im -= A0_re;
01277     o30_re -= B0_im;
01278     o30_im += B0_re;
01279     
01280     o01_re += A1_re;
01281     o01_im += A1_im;
01282     o11_re += B1_re;
01283     o11_im += B1_im;
01284     o21_re += A1_im;
01285     o21_im -= A1_re;
01286     o31_re -= B1_im;
01287     o31_im += B1_re;
01288     
01289     o02_re += A2_re;
01290     o02_im += A2_im;
01291     o12_re += B2_re;
01292     o12_im += B2_im;
01293     o22_re += A2_im;
01294     o22_im -= A2_re;
01295     o32_re -= B2_im;
01296     o32_im += B2_re;
01297     
01298 }
01299 
01300 {
01301     // Projector P3-
01302     // 0 0 0 0 
01303     // 0 0 0 0 
01304     // 0 0 2 0 
01305     // 0 0 0 2 
01306     
01307     int sp_idx = ((x4==X4m1) ? X-X4X3X2X1mX3X2X1 : X+X3X2X1) >> 1;
01308     int ga_idx = sid;
01309     
01310     if (gauge_fixed && ga_idx < X4X3X2X1hmX3X2X1h) {
01311         // read spinor from device memory
01312         READ_SPINOR_DOWN(SPINORTEX);
01313         
01314         // project spinor into half spinors
01315         spinorFloat a0_re = +2*i20_re;
01316         spinorFloat a0_im = +2*i20_im;
01317         spinorFloat a1_re = +2*i21_re;
01318         spinorFloat a1_im = +2*i21_im;
01319         spinorFloat a2_re = +2*i22_re;
01320         spinorFloat a2_im = +2*i22_im;
01321         
01322         spinorFloat b0_re = +2*i30_re;
01323         spinorFloat b0_im = +2*i30_im;
01324         spinorFloat b1_re = +2*i31_re;
01325         spinorFloat b1_im = +2*i31_im;
01326         spinorFloat b2_re = +2*i32_re;
01327         spinorFloat b2_im = +2*i32_im;
01328         
01329         // identity gauge matrix
01330         spinorFloat A0_re = a0_re; spinorFloat A0_im = a0_im;
01331         spinorFloat B0_re = b0_re; spinorFloat B0_im = b0_im;
01332         spinorFloat A1_re = a1_re; spinorFloat A1_im = a1_im;
01333         spinorFloat B1_re = b1_re; spinorFloat B1_im = b1_im;
01334         spinorFloat A2_re = a2_re; spinorFloat A2_im = a2_im;
01335         spinorFloat B2_re = b2_re; spinorFloat B2_im = b2_im;
01336         
01337         o20_re += A0_re;
01338         o20_im += A0_im;
01339         o30_re += B0_re;
01340         o30_im += B0_im;
01341         
01342         o21_re += A1_re;
01343         o21_im += A1_im;
01344         o31_re += B1_re;
01345         o31_im += B1_im;
01346         
01347         o22_re += A2_re;
01348         o22_im += A2_im;
01349         o32_re += B2_re;
01350         o32_im += B2_im;
01351         
01352     }
01353     else {
01354         // read gauge matrix from device memory
01355         READ_GAUGE_MATRIX(GAUGE0TEX, 6);
01356         
01357         // read spinor from device memory
01358         READ_SPINOR_DOWN(SPINORTEX);
01359         
01360         // reconstruct gauge matrix
01361         RECONSTRUCT_GAUGE_MATRIX(6);
01362         
01363         // project spinor into half spinors
01364         spinorFloat a0_re = +2*i20_re;
01365         spinorFloat a0_im = +2*i20_im;
01366         spinorFloat a1_re = +2*i21_re;
01367         spinorFloat a1_im = +2*i21_im;
01368         spinorFloat a2_re = +2*i22_re;
01369         spinorFloat a2_im = +2*i22_im;
01370         
01371         spinorFloat b0_re = +2*i30_re;
01372         spinorFloat b0_im = +2*i30_im;
01373         spinorFloat b1_re = +2*i31_re;
01374         spinorFloat b1_im = +2*i31_im;
01375         spinorFloat b2_re = +2*i32_re;
01376         spinorFloat b2_im = +2*i32_im;
01377         
01378         // multiply row 0
01379         spinorFloat A0_re = 0;
01380         A0_re += g00_re * a0_re;
01381         A0_re -= g00_im * a0_im;
01382         A0_re += g01_re * a1_re;
01383         A0_re -= g01_im * a1_im;
01384         A0_re += g02_re * a2_re;
01385         A0_re -= g02_im * a2_im;
01386         spinorFloat A0_im = 0;
01387         A0_im += g00_re * a0_im;
01388         A0_im += g00_im * a0_re;
01389         A0_im += g01_re * a1_im;
01390         A0_im += g01_im * a1_re;
01391         A0_im += g02_re * a2_im;
01392         A0_im += g02_im * a2_re;
01393         spinorFloat B0_re = 0;
01394         B0_re += g00_re * b0_re;
01395         B0_re -= g00_im * b0_im;
01396         B0_re += g01_re * b1_re;
01397         B0_re -= g01_im * b1_im;
01398         B0_re += g02_re * b2_re;
01399         B0_re -= g02_im * b2_im;
01400         spinorFloat B0_im = 0;
01401         B0_im += g00_re * b0_im;
01402         B0_im += g00_im * b0_re;
01403         B0_im += g01_re * b1_im;
01404         B0_im += g01_im * b1_re;
01405         B0_im += g02_re * b2_im;
01406         B0_im += g02_im * b2_re;
01407         
01408         // multiply row 1
01409         spinorFloat A1_re = 0;
01410         A1_re += g10_re * a0_re;
01411         A1_re -= g10_im * a0_im;
01412         A1_re += g11_re * a1_re;
01413         A1_re -= g11_im * a1_im;
01414         A1_re += g12_re * a2_re;
01415         A1_re -= g12_im * a2_im;
01416         spinorFloat A1_im = 0;
01417         A1_im += g10_re * a0_im;
01418         A1_im += g10_im * a0_re;
01419         A1_im += g11_re * a1_im;
01420         A1_im += g11_im * a1_re;
01421         A1_im += g12_re * a2_im;
01422         A1_im += g12_im * a2_re;
01423         spinorFloat B1_re = 0;
01424         B1_re += g10_re * b0_re;
01425         B1_re -= g10_im * b0_im;
01426         B1_re += g11_re * b1_re;
01427         B1_re -= g11_im * b1_im;
01428         B1_re += g12_re * b2_re;
01429         B1_re -= g12_im * b2_im;
01430         spinorFloat B1_im = 0;
01431         B1_im += g10_re * b0_im;
01432         B1_im += g10_im * b0_re;
01433         B1_im += g11_re * b1_im;
01434         B1_im += g11_im * b1_re;
01435         B1_im += g12_re * b2_im;
01436         B1_im += g12_im * b2_re;
01437         
01438         // multiply row 2
01439         spinorFloat A2_re = 0;
01440         A2_re += g20_re * a0_re;
01441         A2_re -= g20_im * a0_im;
01442         A2_re += g21_re * a1_re;
01443         A2_re -= g21_im * a1_im;
01444         A2_re += g22_re * a2_re;
01445         A2_re -= g22_im * a2_im;
01446         spinorFloat A2_im = 0;
01447         A2_im += g20_re * a0_im;
01448         A2_im += g20_im * a0_re;
01449         A2_im += g21_re * a1_im;
01450         A2_im += g21_im * a1_re;
01451         A2_im += g22_re * a2_im;
01452         A2_im += g22_im * a2_re;
01453         spinorFloat B2_re = 0;
01454         B2_re += g20_re * b0_re;
01455         B2_re -= g20_im * b0_im;
01456         B2_re += g21_re * b1_re;
01457         B2_re -= g21_im * b1_im;
01458         B2_re += g22_re * b2_re;
01459         B2_re -= g22_im * b2_im;
01460         spinorFloat B2_im = 0;
01461         B2_im += g20_re * b0_im;
01462         B2_im += g20_im * b0_re;
01463         B2_im += g21_re * b1_im;
01464         B2_im += g21_im * b1_re;
01465         B2_im += g22_re * b2_im;
01466         B2_im += g22_im * b2_re;
01467         
01468         o20_re += A0_re;
01469         o20_im += A0_im;
01470         o30_re += B0_re;
01471         o30_im += B0_im;
01472         
01473         o21_re += A1_re;
01474         o21_im += A1_im;
01475         o31_re += B1_re;
01476         o31_im += B1_im;
01477         
01478         o22_re += A2_re;
01479         o22_im += A2_im;
01480         o32_re += B2_re;
01481         o32_im += B2_im;
01482         
01483     }
01484 }
01485 
01486 {
01487     // Projector P3+
01488     // 2 0 0 0 
01489     // 0 2 0 0 
01490     // 0 0 0 0 
01491     // 0 0 0 0 
01492     
01493     int sp_idx = ((x4==0)    ? X+X4X3X2X1mX3X2X1 : X-X3X2X1) >> 1;
01494     int ga_idx = sp_idx;
01495     
01496     if (gauge_fixed && ga_idx < X4X3X2X1hmX3X2X1h) {
01497         // read spinor from device memory
01498         READ_SPINOR_UP(SPINORTEX);
01499         
01500         // project spinor into half spinors
01501         spinorFloat a0_re = +2*i00_re;
01502         spinorFloat a0_im = +2*i00_im;
01503         spinorFloat a1_re = +2*i01_re;
01504         spinorFloat a1_im = +2*i01_im;
01505         spinorFloat a2_re = +2*i02_re;
01506         spinorFloat a2_im = +2*i02_im;
01507         
01508         spinorFloat b0_re = +2*i10_re;
01509         spinorFloat b0_im = +2*i10_im;
01510         spinorFloat b1_re = +2*i11_re;
01511         spinorFloat b1_im = +2*i11_im;
01512         spinorFloat b2_re = +2*i12_re;
01513         spinorFloat b2_im = +2*i12_im;
01514         
01515         // identity gauge matrix
01516         spinorFloat A0_re = a0_re; spinorFloat A0_im = a0_im;
01517         spinorFloat B0_re = b0_re; spinorFloat B0_im = b0_im;
01518         spinorFloat A1_re = a1_re; spinorFloat A1_im = a1_im;
01519         spinorFloat B1_re = b1_re; spinorFloat B1_im = b1_im;
01520         spinorFloat A2_re = a2_re; spinorFloat A2_im = a2_im;
01521         spinorFloat B2_re = b2_re; spinorFloat B2_im = b2_im;
01522         
01523         o00_re += A0_re;
01524         o00_im += A0_im;
01525         o10_re += B0_re;
01526         o10_im += B0_im;
01527         
01528         o01_re += A1_re;
01529         o01_im += A1_im;
01530         o11_re += B1_re;
01531         o11_im += B1_im;
01532         
01533         o02_re += A2_re;
01534         o02_im += A2_im;
01535         o12_re += B2_re;
01536         o12_im += B2_im;
01537         
01538     }
01539     else {
01540         // read gauge matrix from device memory
01541         READ_GAUGE_MATRIX(GAUGE1TEX, 7);
01542         
01543         // read spinor from device memory
01544         READ_SPINOR_UP(SPINORTEX);
01545         
01546         // reconstruct gauge matrix
01547         RECONSTRUCT_GAUGE_MATRIX(7);
01548         
01549         // project spinor into half spinors
01550         spinorFloat a0_re = +2*i00_re;
01551         spinorFloat a0_im = +2*i00_im;
01552         spinorFloat a1_re = +2*i01_re;
01553         spinorFloat a1_im = +2*i01_im;
01554         spinorFloat a2_re = +2*i02_re;
01555         spinorFloat a2_im = +2*i02_im;
01556         
01557         spinorFloat b0_re = +2*i10_re;
01558         spinorFloat b0_im = +2*i10_im;
01559         spinorFloat b1_re = +2*i11_re;
01560         spinorFloat b1_im = +2*i11_im;
01561         spinorFloat b2_re = +2*i12_re;
01562         spinorFloat b2_im = +2*i12_im;
01563         
01564         // multiply row 0
01565         spinorFloat A0_re = 0;
01566         A0_re += gT00_re * a0_re;
01567         A0_re -= gT00_im * a0_im;
01568         A0_re += gT01_re * a1_re;
01569         A0_re -= gT01_im * a1_im;
01570         A0_re += gT02_re * a2_re;
01571         A0_re -= gT02_im * a2_im;
01572         spinorFloat A0_im = 0;
01573         A0_im += gT00_re * a0_im;
01574         A0_im += gT00_im * a0_re;
01575         A0_im += gT01_re * a1_im;
01576         A0_im += gT01_im * a1_re;
01577         A0_im += gT02_re * a2_im;
01578         A0_im += gT02_im * a2_re;
01579         spinorFloat B0_re = 0;
01580         B0_re += gT00_re * b0_re;
01581         B0_re -= gT00_im * b0_im;
01582         B0_re += gT01_re * b1_re;
01583         B0_re -= gT01_im * b1_im;
01584         B0_re += gT02_re * b2_re;
01585         B0_re -= gT02_im * b2_im;
01586         spinorFloat B0_im = 0;
01587         B0_im += gT00_re * b0_im;
01588         B0_im += gT00_im * b0_re;
01589         B0_im += gT01_re * b1_im;
01590         B0_im += gT01_im * b1_re;
01591         B0_im += gT02_re * b2_im;
01592         B0_im += gT02_im * b2_re;
01593         
01594         // multiply row 1
01595         spinorFloat A1_re = 0;
01596         A1_re += gT10_re * a0_re;
01597         A1_re -= gT10_im * a0_im;
01598         A1_re += gT11_re * a1_re;
01599         A1_re -= gT11_im * a1_im;
01600         A1_re += gT12_re * a2_re;
01601         A1_re -= gT12_im * a2_im;
01602         spinorFloat A1_im = 0;
01603         A1_im += gT10_re * a0_im;
01604         A1_im += gT10_im * a0_re;
01605         A1_im += gT11_re * a1_im;
01606         A1_im += gT11_im * a1_re;
01607         A1_im += gT12_re * a2_im;
01608         A1_im += gT12_im * a2_re;
01609         spinorFloat B1_re = 0;
01610         B1_re += gT10_re * b0_re;
01611         B1_re -= gT10_im * b0_im;
01612         B1_re += gT11_re * b1_re;
01613         B1_re -= gT11_im * b1_im;
01614         B1_re += gT12_re * b2_re;
01615         B1_re -= gT12_im * b2_im;
01616         spinorFloat B1_im = 0;
01617         B1_im += gT10_re * b0_im;
01618         B1_im += gT10_im * b0_re;
01619         B1_im += gT11_re * b1_im;
01620         B1_im += gT11_im * b1_re;
01621         B1_im += gT12_re * b2_im;
01622         B1_im += gT12_im * b2_re;
01623         
01624         // multiply row 2
01625         spinorFloat A2_re = 0;
01626         A2_re += gT20_re * a0_re;
01627         A2_re -= gT20_im * a0_im;
01628         A2_re += gT21_re * a1_re;
01629         A2_re -= gT21_im * a1_im;
01630         A2_re += gT22_re * a2_re;
01631         A2_re -= gT22_im * a2_im;
01632         spinorFloat A2_im = 0;
01633         A2_im += gT20_re * a0_im;
01634         A2_im += gT20_im * a0_re;
01635         A2_im += gT21_re * a1_im;
01636         A2_im += gT21_im * a1_re;
01637         A2_im += gT22_re * a2_im;
01638         A2_im += gT22_im * a2_re;
01639         spinorFloat B2_re = 0;
01640         B2_re += gT20_re * b0_re;
01641         B2_re -= gT20_im * b0_im;
01642         B2_re += gT21_re * b1_re;
01643         B2_re -= gT21_im * b1_im;
01644         B2_re += gT22_re * b2_re;
01645         B2_re -= gT22_im * b2_im;
01646         spinorFloat B2_im = 0;
01647         B2_im += gT20_re * b0_im;
01648         B2_im += gT20_im * b0_re;
01649         B2_im += gT21_re * b1_im;
01650         B2_im += gT21_im * b1_re;
01651         B2_im += gT22_re * b2_im;
01652         B2_im += gT22_im * b2_re;
01653         
01654         o00_re += A0_re;
01655         o00_im += A0_im;
01656         o10_re += B0_re;
01657         o10_im += B0_im;
01658         
01659         o01_re += A1_re;
01660         o01_im += A1_im;
01661         o11_re += B1_re;
01662         o11_im += B1_im;
01663         
01664         o02_re += A2_re;
01665         o02_im += A2_im;
01666         o12_re += B2_re;
01667         o12_im += B2_im;
01668         
01669     }
01670 }
01671 
01672 #ifdef DSLASH_CLOVER
01673 
01674 // change to chiral basis
01675 {
01676     spinorFloat a00_re = -o10_re - o30_re;
01677     spinorFloat a00_im = -o10_im - o30_im;
01678     spinorFloat a10_re =  o00_re + o20_re;
01679     spinorFloat a10_im =  o00_im + o20_im;
01680     spinorFloat a20_re = -o10_re + o30_re;
01681     spinorFloat a20_im = -o10_im + o30_im;
01682     spinorFloat a30_re =  o00_re - o20_re;
01683     spinorFloat a30_im =  o00_im - o20_im;
01684     
01685     o00_re = a00_re;  o00_im = a00_im;
01686     o10_re = a10_re;  o10_im = a10_im;
01687     o20_re = a20_re;  o20_im = a20_im;
01688     o30_re = a30_re;  o30_im = a30_im;
01689 }
01690 {
01691     spinorFloat a01_re = -o11_re - o31_re;
01692     spinorFloat a01_im = -o11_im - o31_im;
01693     spinorFloat a11_re =  o01_re + o21_re;
01694     spinorFloat a11_im =  o01_im + o21_im;
01695     spinorFloat a21_re = -o11_re + o31_re;
01696     spinorFloat a21_im = -o11_im + o31_im;
01697     spinorFloat a31_re =  o01_re - o21_re;
01698     spinorFloat a31_im =  o01_im - o21_im;
01699     
01700     o01_re = a01_re;  o01_im = a01_im;
01701     o11_re = a11_re;  o11_im = a11_im;
01702     o21_re = a21_re;  o21_im = a21_im;
01703     o31_re = a31_re;  o31_im = a31_im;
01704 }
01705 {
01706     spinorFloat a02_re = -o12_re - o32_re;
01707     spinorFloat a02_im = -o12_im - o32_im;
01708     spinorFloat a12_re =  o02_re + o22_re;
01709     spinorFloat a12_im =  o02_im + o22_im;
01710     spinorFloat a22_re = -o12_re + o32_re;
01711     spinorFloat a22_im = -o12_im + o32_im;
01712     spinorFloat a32_re =  o02_re - o22_re;
01713     spinorFloat a32_im =  o02_im - o22_im;
01714     
01715     o02_re = a02_re;  o02_im = a02_im;
01716     o12_re = a12_re;  o12_im = a12_im;
01717     o22_re = a22_re;  o22_im = a22_im;
01718     o32_re = a32_re;  o32_im = a32_im;
01719 }
01720 
01721 // apply first chiral block
01722 {
01723     READ_CLOVER(CLOVERTEX, 0)
01724     
01725     spinorFloat a00_re = 0; spinorFloat a00_im = 0;
01726     spinorFloat a01_re = 0; spinorFloat a01_im = 0;
01727     spinorFloat a02_re = 0; spinorFloat a02_im = 0;
01728     spinorFloat a10_re = 0; spinorFloat a10_im = 0;
01729     spinorFloat a11_re = 0; spinorFloat a11_im = 0;
01730     spinorFloat a12_re = 0; spinorFloat a12_im = 0;
01731     
01732     a00_re += c00_00_re * o00_re;
01733     a00_im += c00_00_re * o00_im;
01734     a00_re += c00_01_re * o01_re;
01735     a00_re -= c00_01_im * o01_im;
01736     a00_im += c00_01_re * o01_im;
01737     a00_im += c00_01_im * o01_re;
01738     a00_re += c00_02_re * o02_re;
01739     a00_re -= c00_02_im * o02_im;
01740     a00_im += c00_02_re * o02_im;
01741     a00_im += c00_02_im * o02_re;
01742     a00_re += c00_10_re * o10_re;
01743     a00_re -= c00_10_im * o10_im;
01744     a00_im += c00_10_re * o10_im;
01745     a00_im += c00_10_im * o10_re;
01746     a00_re += c00_11_re * o11_re;
01747     a00_re -= c00_11_im * o11_im;
01748     a00_im += c00_11_re * o11_im;
01749     a00_im += c00_11_im * o11_re;
01750     a00_re += c00_12_re * o12_re;
01751     a00_re -= c00_12_im * o12_im;
01752     a00_im += c00_12_re * o12_im;
01753     a00_im += c00_12_im * o12_re;
01754     
01755     a01_re += c01_00_re * o00_re;
01756     a01_re -= c01_00_im * o00_im;
01757     a01_im += c01_00_re * o00_im;
01758     a01_im += c01_00_im * o00_re;
01759     a01_re += c01_01_re * o01_re;
01760     a01_im += c01_01_re * o01_im;
01761     a01_re += c01_02_re * o02_re;
01762     a01_re -= c01_02_im * o02_im;
01763     a01_im += c01_02_re * o02_im;
01764     a01_im += c01_02_im * o02_re;
01765     a01_re += c01_10_re * o10_re;
01766     a01_re -= c01_10_im * o10_im;
01767     a01_im += c01_10_re * o10_im;
01768     a01_im += c01_10_im * o10_re;
01769     a01_re += c01_11_re * o11_re;
01770     a01_re -= c01_11_im * o11_im;
01771     a01_im += c01_11_re * o11_im;
01772     a01_im += c01_11_im * o11_re;
01773     a01_re += c01_12_re * o12_re;
01774     a01_re -= c01_12_im * o12_im;
01775     a01_im += c01_12_re * o12_im;
01776     a01_im += c01_12_im * o12_re;
01777     
01778     a02_re += c02_00_re * o00_re;
01779     a02_re -= c02_00_im * o00_im;
01780     a02_im += c02_00_re * o00_im;
01781     a02_im += c02_00_im * o00_re;
01782     a02_re += c02_01_re * o01_re;
01783     a02_re -= c02_01_im * o01_im;
01784     a02_im += c02_01_re * o01_im;
01785     a02_im += c02_01_im * o01_re;
01786     a02_re += c02_02_re * o02_re;
01787     a02_im += c02_02_re * o02_im;
01788     a02_re += c02_10_re * o10_re;
01789     a02_re -= c02_10_im * o10_im;
01790     a02_im += c02_10_re * o10_im;
01791     a02_im += c02_10_im * o10_re;
01792     a02_re += c02_11_re * o11_re;
01793     a02_re -= c02_11_im * o11_im;
01794     a02_im += c02_11_re * o11_im;
01795     a02_im += c02_11_im * o11_re;
01796     a02_re += c02_12_re * o12_re;
01797     a02_re -= c02_12_im * o12_im;
01798     a02_im += c02_12_re * o12_im;
01799     a02_im += c02_12_im * o12_re;
01800     
01801     a10_re += c10_00_re * o00_re;
01802     a10_re -= c10_00_im * o00_im;
01803     a10_im += c10_00_re * o00_im;
01804     a10_im += c10_00_im * o00_re;
01805     a10_re += c10_01_re * o01_re;
01806     a10_re -= c10_01_im * o01_im;
01807     a10_im += c10_01_re * o01_im;
01808     a10_im += c10_01_im * o01_re;
01809     a10_re += c10_02_re * o02_re;
01810     a10_re -= c10_02_im * o02_im;
01811     a10_im += c10_02_re * o02_im;
01812     a10_im += c10_02_im * o02_re;
01813     a10_re += c10_10_re * o10_re;
01814     a10_im += c10_10_re * o10_im;
01815     a10_re += c10_11_re * o11_re;
01816     a10_re -= c10_11_im * o11_im;
01817     a10_im += c10_11_re * o11_im;
01818     a10_im += c10_11_im * o11_re;
01819     a10_re += c10_12_re * o12_re;
01820     a10_re -= c10_12_im * o12_im;
01821     a10_im += c10_12_re * o12_im;
01822     a10_im += c10_12_im * o12_re;
01823     
01824     a11_re += c11_00_re * o00_re;
01825     a11_re -= c11_00_im * o00_im;
01826     a11_im += c11_00_re * o00_im;
01827     a11_im += c11_00_im * o00_re;
01828     a11_re += c11_01_re * o01_re;
01829     a11_re -= c11_01_im * o01_im;
01830     a11_im += c11_01_re * o01_im;
01831     a11_im += c11_01_im * o01_re;
01832     a11_re += c11_02_re * o02_re;
01833     a11_re -= c11_02_im * o02_im;
01834     a11_im += c11_02_re * o02_im;
01835     a11_im += c11_02_im * o02_re;
01836     a11_re += c11_10_re * o10_re;
01837     a11_re -= c11_10_im * o10_im;
01838     a11_im += c11_10_re * o10_im;
01839     a11_im += c11_10_im * o10_re;
01840     a11_re += c11_11_re * o11_re;
01841     a11_im += c11_11_re * o11_im;
01842     a11_re += c11_12_re * o12_re;
01843     a11_re -= c11_12_im * o12_im;
01844     a11_im += c11_12_re * o12_im;
01845     a11_im += c11_12_im * o12_re;
01846     
01847     a12_re += c12_00_re * o00_re;
01848     a12_re -= c12_00_im * o00_im;
01849     a12_im += c12_00_re * o00_im;
01850     a12_im += c12_00_im * o00_re;
01851     a12_re += c12_01_re * o01_re;
01852     a12_re -= c12_01_im * o01_im;
01853     a12_im += c12_01_re * o01_im;
01854     a12_im += c12_01_im * o01_re;
01855     a12_re += c12_02_re * o02_re;
01856     a12_re -= c12_02_im * o02_im;
01857     a12_im += c12_02_re * o02_im;
01858     a12_im += c12_02_im * o02_re;
01859     a12_re += c12_10_re * o10_re;
01860     a12_re -= c12_10_im * o10_im;
01861     a12_im += c12_10_re * o10_im;
01862     a12_im += c12_10_im * o10_re;
01863     a12_re += c12_11_re * o11_re;
01864     a12_re -= c12_11_im * o11_im;
01865     a12_im += c12_11_re * o11_im;
01866     a12_im += c12_11_im * o11_re;
01867     a12_re += c12_12_re * o12_re;
01868     a12_im += c12_12_re * o12_im;
01869     
01870     o00_re = a00_re;  o00_im = a00_im;
01871     o01_re = a01_re;  o01_im = a01_im;
01872     o02_re = a02_re;  o02_im = a02_im;
01873     o10_re = a10_re;  o10_im = a10_im;
01874     o11_re = a11_re;  o11_im = a11_im;
01875     o12_re = a12_re;  o12_im = a12_im;
01876     
01877 }
01878 
01879 // apply second chiral block
01880 {
01881     READ_CLOVER(CLOVERTEX, 1)
01882     
01883     spinorFloat a20_re = 0; spinorFloat a20_im = 0;
01884     spinorFloat a21_re = 0; spinorFloat a21_im = 0;
01885     spinorFloat a22_re = 0; spinorFloat a22_im = 0;
01886     spinorFloat a30_re = 0; spinorFloat a30_im = 0;
01887     spinorFloat a31_re = 0; spinorFloat a31_im = 0;
01888     spinorFloat a32_re = 0; spinorFloat a32_im = 0;
01889     
01890     a20_re += c20_20_re * o20_re;
01891     a20_im += c20_20_re * o20_im;
01892     a20_re += c20_21_re * o21_re;
01893     a20_re -= c20_21_im * o21_im;
01894     a20_im += c20_21_re * o21_im;
01895     a20_im += c20_21_im * o21_re;
01896     a20_re += c20_22_re * o22_re;
01897     a20_re -= c20_22_im * o22_im;
01898     a20_im += c20_22_re * o22_im;
01899     a20_im += c20_22_im * o22_re;
01900     a20_re += c20_30_re * o30_re;
01901     a20_re -= c20_30_im * o30_im;
01902     a20_im += c20_30_re * o30_im;
01903     a20_im += c20_30_im * o30_re;
01904     a20_re += c20_31_re * o31_re;
01905     a20_re -= c20_31_im * o31_im;
01906     a20_im += c20_31_re * o31_im;
01907     a20_im += c20_31_im * o31_re;
01908     a20_re += c20_32_re * o32_re;
01909     a20_re -= c20_32_im * o32_im;
01910     a20_im += c20_32_re * o32_im;
01911     a20_im += c20_32_im * o32_re;
01912     
01913     a21_re += c21_20_re * o20_re;
01914     a21_re -= c21_20_im * o20_im;
01915     a21_im += c21_20_re * o20_im;
01916     a21_im += c21_20_im * o20_re;
01917     a21_re += c21_21_re * o21_re;
01918     a21_im += c21_21_re * o21_im;
01919     a21_re += c21_22_re * o22_re;
01920     a21_re -= c21_22_im * o22_im;
01921     a21_im += c21_22_re * o22_im;
01922     a21_im += c21_22_im * o22_re;
01923     a21_re += c21_30_re * o30_re;
01924     a21_re -= c21_30_im * o30_im;
01925     a21_im += c21_30_re * o30_im;
01926     a21_im += c21_30_im * o30_re;
01927     a21_re += c21_31_re * o31_re;
01928     a21_re -= c21_31_im * o31_im;
01929     a21_im += c21_31_re * o31_im;
01930     a21_im += c21_31_im * o31_re;
01931     a21_re += c21_32_re * o32_re;
01932     a21_re -= c21_32_im * o32_im;
01933     a21_im += c21_32_re * o32_im;
01934     a21_im += c21_32_im * o32_re;
01935     
01936     a22_re += c22_20_re * o20_re;
01937     a22_re -= c22_20_im * o20_im;
01938     a22_im += c22_20_re * o20_im;
01939     a22_im += c22_20_im * o20_re;
01940     a22_re += c22_21_re * o21_re;
01941     a22_re -= c22_21_im * o21_im;
01942     a22_im += c22_21_re * o21_im;
01943     a22_im += c22_21_im * o21_re;
01944     a22_re += c22_22_re * o22_re;
01945     a22_im += c22_22_re * o22_im;
01946     a22_re += c22_30_re * o30_re;
01947     a22_re -= c22_30_im * o30_im;
01948     a22_im += c22_30_re * o30_im;
01949     a22_im += c22_30_im * o30_re;
01950     a22_re += c22_31_re * o31_re;
01951     a22_re -= c22_31_im * o31_im;
01952     a22_im += c22_31_re * o31_im;
01953     a22_im += c22_31_im * o31_re;
01954     a22_re += c22_32_re * o32_re;
01955     a22_re -= c22_32_im * o32_im;
01956     a22_im += c22_32_re * o32_im;
01957     a22_im += c22_32_im * o32_re;
01958     
01959     a30_re += c30_20_re * o20_re;
01960     a30_re -= c30_20_im * o20_im;
01961     a30_im += c30_20_re * o20_im;
01962     a30_im += c30_20_im * o20_re;
01963     a30_re += c30_21_re * o21_re;
01964     a30_re -= c30_21_im * o21_im;
01965     a30_im += c30_21_re * o21_im;
01966     a30_im += c30_21_im * o21_re;
01967     a30_re += c30_22_re * o22_re;
01968     a30_re -= c30_22_im * o22_im;
01969     a30_im += c30_22_re * o22_im;
01970     a30_im += c30_22_im * o22_re;
01971     a30_re += c30_30_re * o30_re;
01972     a30_im += c30_30_re * o30_im;
01973     a30_re += c30_31_re * o31_re;
01974     a30_re -= c30_31_im * o31_im;
01975     a30_im += c30_31_re * o31_im;
01976     a30_im += c30_31_im * o31_re;
01977     a30_re += c30_32_re * o32_re;
01978     a30_re -= c30_32_im * o32_im;
01979     a30_im += c30_32_re * o32_im;
01980     a30_im += c30_32_im * o32_re;
01981     
01982     a31_re += c31_20_re * o20_re;
01983     a31_re -= c31_20_im * o20_im;
01984     a31_im += c31_20_re * o20_im;
01985     a31_im += c31_20_im * o20_re;
01986     a31_re += c31_21_re * o21_re;
01987     a31_re -= c31_21_im * o21_im;
01988     a31_im += c31_21_re * o21_im;
01989     a31_im += c31_21_im * o21_re;
01990     a31_re += c31_22_re * o22_re;
01991     a31_re -= c31_22_im * o22_im;
01992     a31_im += c31_22_re * o22_im;
01993     a31_im += c31_22_im * o22_re;
01994     a31_re += c31_30_re * o30_re;
01995     a31_re -= c31_30_im * o30_im;
01996     a31_im += c31_30_re * o30_im;
01997     a31_im += c31_30_im * o30_re;
01998     a31_re += c31_31_re * o31_re;
01999     a31_im += c31_31_re * o31_im;
02000     a31_re += c31_32_re * o32_re;
02001     a31_re -= c31_32_im * o32_im;
02002     a31_im += c31_32_re * o32_im;
02003     a31_im += c31_32_im * o32_re;
02004     
02005     a32_re += c32_20_re * o20_re;
02006     a32_re -= c32_20_im * o20_im;
02007     a32_im += c32_20_re * o20_im;
02008     a32_im += c32_20_im * o20_re;
02009     a32_re += c32_21_re * o21_re;
02010     a32_re -= c32_21_im * o21_im;
02011     a32_im += c32_21_re * o21_im;
02012     a32_im += c32_21_im * o21_re;
02013     a32_re += c32_22_re * o22_re;
02014     a32_re -= c32_22_im * o22_im;
02015     a32_im += c32_22_re * o22_im;
02016     a32_im += c32_22_im * o22_re;
02017     a32_re += c32_30_re * o30_re;
02018     a32_re -= c32_30_im * o30_im;
02019     a32_im += c32_30_re * o30_im;
02020     a32_im += c32_30_im * o30_re;
02021     a32_re += c32_31_re * o31_re;
02022     a32_re -= c32_31_im * o31_im;
02023     a32_im += c32_31_re * o31_im;
02024     a32_im += c32_31_im * o31_re;
02025     a32_re += c32_32_re * o32_re;
02026     a32_im += c32_32_re * o32_im;
02027     
02028     o20_re = a20_re;  o20_im = a20_im;
02029     o21_re = a21_re;  o21_im = a21_im;
02030     o22_re = a22_re;  o22_im = a22_im;
02031     o30_re = a30_re;  o30_im = a30_im;
02032     o31_re = a31_re;  o31_im = a31_im;
02033     o32_re = a32_re;  o32_im = a32_im;
02034     
02035 }
02036 
02037 // change back from chiral basis
02038 // (note: required factor of 1/2 is included in clover term normalization)
02039 {
02040     spinorFloat a00_re =  o10_re + o30_re;
02041     spinorFloat a00_im =  o10_im + o30_im;
02042     spinorFloat a10_re = -o00_re - o20_re;
02043     spinorFloat a10_im = -o00_im - o20_im;
02044     spinorFloat a20_re =  o10_re - o30_re;
02045     spinorFloat a20_im =  o10_im - o30_im;
02046     spinorFloat a30_re = -o00_re + o20_re;
02047     spinorFloat a30_im = -o00_im + o20_im;
02048     
02049     o00_re = a00_re;  o00_im = a00_im;
02050     o10_re = a10_re;  o10_im = a10_im;
02051     o20_re = a20_re;  o20_im = a20_im;
02052     o30_re = a30_re;  o30_im = a30_im;
02053 }
02054 {
02055     spinorFloat a01_re =  o11_re + o31_re;
02056     spinorFloat a01_im =  o11_im + o31_im;
02057     spinorFloat a11_re = -o01_re - o21_re;
02058     spinorFloat a11_im = -o01_im - o21_im;
02059     spinorFloat a21_re =  o11_re - o31_re;
02060     spinorFloat a21_im =  o11_im - o31_im;
02061     spinorFloat a31_re = -o01_re + o21_re;
02062     spinorFloat a31_im = -o01_im + o21_im;
02063     
02064     o01_re = a01_re;  o01_im = a01_im;
02065     o11_re = a11_re;  o11_im = a11_im;
02066     o21_re = a21_re;  o21_im = a21_im;
02067     o31_re = a31_re;  o31_im = a31_im;
02068 }
02069 {
02070     spinorFloat a02_re =  o12_re + o32_re;
02071     spinorFloat a02_im =  o12_im + o32_im;
02072     spinorFloat a12_re = -o02_re - o22_re;
02073     spinorFloat a12_im = -o02_im - o22_im;
02074     spinorFloat a22_re =  o12_re - o32_re;
02075     spinorFloat a22_im =  o12_im - o32_im;
02076     spinorFloat a32_re = -o02_re + o22_re;
02077     spinorFloat a32_im = -o02_im + o22_im;
02078     
02079     o02_re = a02_re;  o02_im = a02_im;
02080     o12_re = a12_re;  o12_im = a12_im;
02081     o22_re = a22_re;  o22_im = a22_im;
02082     o32_re = a32_re;  o32_im = a32_im;
02083 }
02084 #endif // DSLASH_CLOVER
02085 
02086 
02087 #ifdef DSLASH_XPAY
02088     READ_ACCUM(ACCUMTEX)
02089 #ifdef SPINOR_DOUBLE
02090     o00_re = a*o00_re + accum0.x;
02091     o00_im = a*o00_im + accum0.y;
02092     o01_re = a*o01_re + accum1.x;
02093     o01_im = a*o01_im + accum1.y;
02094     o02_re = a*o02_re + accum2.x;
02095     o02_im = a*o02_im + accum2.y;
02096     o10_re = a*o10_re + accum3.x;
02097     o10_im = a*o10_im + accum3.y;
02098     o11_re = a*o11_re + accum4.x;
02099     o11_im = a*o11_im + accum4.y;
02100     o12_re = a*o12_re + accum5.x;
02101     o12_im = a*o12_im + accum5.y;
02102     o20_re = a*o20_re + accum6.x;
02103     o20_im = a*o20_im + accum6.y;
02104     o21_re = a*o21_re + accum7.x;
02105     o21_im = a*o21_im + accum7.y;
02106     o22_re = a*o22_re + accum8.x;
02107     o22_im = a*o22_im + accum8.y;
02108     o30_re = a*o30_re + accum9.x;
02109     o30_im = a*o30_im + accum9.y;
02110     o31_re = a*o31_re + accum10.x;
02111     o31_im = a*o31_im + accum10.y;
02112     o32_re = a*o32_re + accum11.x;
02113     o32_im = a*o32_im + accum11.y;
02114 #else
02115     o00_re = a*o00_re + accum0.x;
02116     o00_im = a*o00_im + accum0.y;
02117     o01_re = a*o01_re + accum0.z;
02118     o01_im = a*o01_im + accum0.w;
02119     o02_re = a*o02_re + accum1.x;
02120     o02_im = a*o02_im + accum1.y;
02121     o10_re = a*o10_re + accum1.z;
02122     o10_im = a*o10_im + accum1.w;
02123     o11_re = a*o11_re + accum2.x;
02124     o11_im = a*o11_im + accum2.y;
02125     o12_re = a*o12_re + accum2.z;
02126     o12_im = a*o12_im + accum2.w;
02127     o20_re = a*o20_re + accum3.x;
02128     o20_im = a*o20_im + accum3.y;
02129     o21_re = a*o21_re + accum3.z;
02130     o21_im = a*o21_im + accum3.w;
02131     o22_re = a*o22_re + accum4.x;
02132     o22_im = a*o22_im + accum4.y;
02133     o30_re = a*o30_re + accum4.z;
02134     o30_im = a*o30_im + accum4.w;
02135     o31_re = a*o31_re + accum5.x;
02136     o31_im = a*o31_im + accum5.y;
02137     o32_re = a*o32_re + accum5.z;
02138     o32_im = a*o32_im + accum5.w;
02139 #endif // SPINOR_DOUBLE
02140 #endif // DSLASH_XPAY
02141 
02142 
02143     // write spinor field back to device memory
02144     WRITE_SPINOR();
02145 
02146 // undefine to prevent warning when precision is changed
02147 #undef spinorFloat
02148 #undef SHARED_STRIDE
02149 
02150 #undef A_re
02151 #undef A_im
02152 
02153 #undef g00_re
02154 #undef g00_im
02155 #undef g01_re
02156 #undef g01_im
02157 #undef g02_re
02158 #undef g02_im
02159 #undef g10_re
02160 #undef g10_im
02161 #undef g11_re
02162 #undef g11_im
02163 #undef g12_re
02164 #undef g12_im
02165 #undef g20_re
02166 #undef g20_im
02167 #undef g21_re
02168 #undef g21_im
02169 #undef g22_re
02170 #undef g22_im
02171 
02172 #undef i00_re
02173 #undef i00_im
02174 #undef i01_re
02175 #undef i01_im
02176 #undef i02_re
02177 #undef i02_im
02178 #undef i10_re
02179 #undef i10_im
02180 #undef i11_re
02181 #undef i11_im
02182 #undef i12_re
02183 #undef i12_im
02184 #undef i20_re
02185 #undef i20_im
02186 #undef i21_re
02187 #undef i21_im
02188 #undef i22_re
02189 #undef i22_im
02190 #undef i30_re
02191 #undef i30_im
02192 #undef i31_re
02193 #undef i31_im
02194 #undef i32_re
02195 #undef i32_im
02196 
02197 #undef c00_00_re
02198 #undef c01_00_re
02199 #undef c01_00_im
02200 #undef c02_00_re
02201 #undef c02_00_im
02202 #undef c10_00_re
02203 #undef c10_00_im
02204 #undef c11_00_re
02205 #undef c11_00_im
02206 #undef c12_00_re
02207 #undef c12_00_im
02208 #undef c02_01_re
02209 #undef c02_01_im
02210 #undef c10_01_re
02211 #undef c10_01_im
02212 #undef c11_01_re
02213 #undef c11_01_im
02214 #undef c12_01_re
02215 #undef c12_01_im
02216 #undef c10_02_re
02217 #undef c10_02_im
02218 #undef c11_02_re
02219 #undef c11_02_im
02220 #undef c12_02_re
02221 #undef c12_02_im
02222 #undef c11_10_re
02223 #undef c11_10_im
02224 #undef c12_10_re
02225 #undef c12_10_im
02226 #undef c12_11_re
02227 #undef c12_11_im
02228 #undef c01_01_re
02229 #undef c01_00_re
02230 #undef c01_00_im
02231 #undef c02_00_re
02232 #undef c02_00_im
02233 #undef c10_00_re
02234 #undef c10_00_im
02235 #undef c11_00_re
02236 #undef c11_00_im
02237 #undef c12_00_re
02238 #undef c12_00_im
02239 #undef c02_01_re
02240 #undef c02_01_im
02241 #undef c10_01_re
02242 #undef c10_01_im
02243 #undef c11_01_re
02244 #undef c11_01_im
02245 #undef c12_01_re
02246 #undef c12_01_im
02247 #undef c10_02_re
02248 #undef c10_02_im
02249 #undef c11_02_re
02250 #undef c11_02_im
02251 #undef c12_02_re
02252 #undef c12_02_im
02253 #undef c11_10_re
02254 #undef c11_10_im
02255 #undef c12_10_re
02256 #undef c12_10_im
02257 #undef c12_11_re
02258 #undef c12_11_im
02259 #undef c02_02_re
02260 #undef c01_00_re
02261 #undef c01_00_im
02262 #undef c02_00_re
02263 #undef c02_00_im
02264 #undef c10_00_re
02265 #undef c10_00_im
02266 #undef c11_00_re
02267 #undef c11_00_im
02268 #undef c12_00_re
02269 #undef c12_00_im
02270 #undef c02_01_re
02271 #undef c02_01_im
02272 #undef c10_01_re
02273 #undef c10_01_im
02274 #undef c11_01_re
02275 #undef c11_01_im
02276 #undef c12_01_re
02277 #undef c12_01_im
02278 #undef c10_02_re
02279 #undef c10_02_im
02280 #undef c11_02_re
02281 #undef c11_02_im
02282 #undef c12_02_re
02283 #undef c12_02_im
02284 #undef c11_10_re
02285 #undef c11_10_im
02286 #undef c12_10_re
02287 #undef c12_10_im
02288 #undef c12_11_re
02289 #undef c12_11_im
02290 #undef c10_10_re
02291 #undef c01_00_re
02292 #undef c01_00_im
02293 #undef c02_00_re
02294 #undef c02_00_im
02295 #undef c10_00_re
02296 #undef c10_00_im
02297 #undef c11_00_re
02298 #undef c11_00_im
02299 #undef c12_00_re
02300 #undef c12_00_im
02301 #undef c02_01_re
02302 #undef c02_01_im
02303 #undef c10_01_re
02304 #undef c10_01_im
02305 #undef c11_01_re
02306 #undef c11_01_im
02307 #undef c12_01_re
02308 #undef c12_01_im
02309 #undef c10_02_re
02310 #undef c10_02_im
02311 #undef c11_02_re
02312 #undef c11_02_im
02313 #undef c12_02_re
02314 #undef c12_02_im
02315 #undef c11_10_re
02316 #undef c11_10_im
02317 #undef c12_10_re
02318 #undef c12_10_im
02319 #undef c12_11_re
02320 #undef c12_11_im
02321 #undef c11_11_re
02322 #undef c01_00_re
02323 #undef c01_00_im
02324 #undef c02_00_re
02325 #undef c02_00_im
02326 #undef c10_00_re
02327 #undef c10_00_im
02328 #undef c11_00_re
02329 #undef c11_00_im
02330 #undef c12_00_re
02331 #undef c12_00_im
02332 #undef c02_01_re
02333 #undef c02_01_im
02334 #undef c10_01_re
02335 #undef c10_01_im
02336 #undef c11_01_re
02337 #undef c11_01_im
02338 #undef c12_01_re
02339 #undef c12_01_im
02340 #undef c10_02_re
02341 #undef c10_02_im
02342 #undef c11_02_re
02343 #undef c11_02_im
02344 #undef c12_02_re
02345 #undef c12_02_im
02346 #undef c11_10_re
02347 #undef c11_10_im
02348 #undef c12_10_re
02349 #undef c12_10_im
02350 #undef c12_11_re
02351 #undef c12_11_im
02352 #undef c12_12_re
02353 #undef c01_00_re
02354 #undef c01_00_im
02355 #undef c02_00_re
02356 #undef c02_00_im
02357 #undef c10_00_re
02358 #undef c10_00_im
02359 #undef c11_00_re
02360 #undef c11_00_im
02361 #undef c12_00_re
02362 #undef c12_00_im
02363 #undef c02_01_re
02364 #undef c02_01_im
02365 #undef c10_01_re
02366 #undef c10_01_im
02367 #undef c11_01_re
02368 #undef c11_01_im
02369 #undef c12_01_re
02370 #undef c12_01_im
02371 #undef c10_02_re
02372 #undef c10_02_im
02373 #undef c11_02_re
02374 #undef c11_02_im
02375 #undef c12_02_re
02376 #undef c12_02_im
02377 #undef c11_10_re
02378 #undef c11_10_im
02379 #undef c12_10_re
02380 #undef c12_10_im
02381 #undef c12_11_re
02382 #undef c12_11_im
02383 
02384 #undef o00_re
02385 #undef o00_im
02386 #undef o01_re
02387 #undef o01_im
02388 #undef o02_re
02389 #undef o02_im
02390 #undef o10_re
02391 #undef o10_im
02392 
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Defines