QUDA v0.4.0
A library for QCD on GPUs
quda/lib/dslash_core/wilson_pack_face_dagger_core.h
Go to the documentation of this file.
00001 // input spinor
00002 #ifdef SPINOR_DOUBLE
00003 #define spinorFloat double
00004 #define i00_re I0.x
00005 #define i00_im I0.y
00006 #define i01_re I1.x
00007 #define i01_im I1.y
00008 #define i02_re I2.x
00009 #define i02_im I2.y
00010 #define i10_re I3.x
00011 #define i10_im I3.y
00012 #define i11_re I4.x
00013 #define i11_im I4.y
00014 #define i12_re I5.x
00015 #define i12_im I5.y
00016 #define i20_re I6.x
00017 #define i20_im I6.y
00018 #define i21_re I7.x
00019 #define i21_im I7.y
00020 #define i22_re I8.x
00021 #define i22_im I8.y
00022 #define i30_re I9.x
00023 #define i30_im I9.y
00024 #define i31_re I10.x
00025 #define i31_im I10.y
00026 #define i32_re I11.x
00027 #define i32_im I11.y
00028 #else
00029 #define spinorFloat float
00030 #define i00_re I0.x
00031 #define i00_im I0.y
00032 #define i01_re I0.z
00033 #define i01_im I0.w
00034 #define i02_re I1.x
00035 #define i02_im I1.y
00036 #define i10_re I1.z
00037 #define i10_im I1.w
00038 #define i11_re I2.x
00039 #define i11_im I2.y
00040 #define i12_re I2.z
00041 #define i12_im I2.w
00042 #define i20_re I3.x
00043 #define i20_im I3.y
00044 #define i21_re I3.z
00045 #define i21_im I3.w
00046 #define i22_re I4.x
00047 #define i22_im I4.y
00048 #define i30_re I4.z
00049 #define i30_im I4.w
00050 #define i31_re I5.x
00051 #define i31_im I5.y
00052 #define i32_re I5.z
00053 #define i32_im I5.w
00054 #endif // SPINOR_DOUBLE
00055 
00056 #include "io_spinor.h"
00057 
00058 if (face_num) {
00059   
00060   switch(dim) {
00061   case 0:
00062     {
00063       // read spinor from device memory
00064       READ_SPINOR(SPINORTEX, sp_stride, idx, idx);
00065       
00066       spinorFloat a0_re, a0_im;
00067       spinorFloat a1_re, a1_im;
00068       spinorFloat a2_re, a2_im;
00069       spinorFloat b0_re, b0_im;
00070       spinorFloat b1_re, b1_im;
00071       spinorFloat b2_re, b2_im;
00072       
00073       // project spinor into half spinors
00074       a0_re = +i00_re+i30_im;
00075       a0_im = +i00_im-i30_re;
00076       a1_re = +i01_re+i31_im;
00077       a1_im = +i01_im-i31_re;
00078       a2_re = +i02_re+i32_im;
00079       a2_im = +i02_im-i32_re;
00080       b0_re = +i10_re+i20_im;
00081       b0_im = +i10_im-i20_re;
00082       b1_re = +i11_re+i21_im;
00083       b1_im = +i11_im-i21_re;
00084       b2_re = +i12_re+i22_im;
00085       b2_im = +i12_im-i22_re;
00086       
00087       // write half spinor back to device memory
00088       WRITE_HALF_SPINOR(face_volume, face_idx);
00089     }
00090     break;
00091   case 1:
00092     {
00093       // read spinor from device memory
00094       READ_SPINOR(SPINORTEX, sp_stride, idx, idx);
00095       
00096       spinorFloat a0_re, a0_im;
00097       spinorFloat a1_re, a1_im;
00098       spinorFloat a2_re, a2_im;
00099       spinorFloat b0_re, b0_im;
00100       spinorFloat b1_re, b1_im;
00101       spinorFloat b2_re, b2_im;
00102       
00103       // project spinor into half spinors
00104       a0_re = +i00_re-i30_re;
00105       a0_im = +i00_im-i30_im;
00106       a1_re = +i01_re-i31_re;
00107       a1_im = +i01_im-i31_im;
00108       a2_re = +i02_re-i32_re;
00109       a2_im = +i02_im-i32_im;
00110       b0_re = +i10_re+i20_re;
00111       b0_im = +i10_im+i20_im;
00112       b1_re = +i11_re+i21_re;
00113       b1_im = +i11_im+i21_im;
00114       b2_re = +i12_re+i22_re;
00115       b2_im = +i12_im+i22_im;
00116       
00117       // write half spinor back to device memory
00118       WRITE_HALF_SPINOR(face_volume, face_idx);
00119     }
00120     break;
00121   case 2:
00122     {
00123       // read spinor from device memory
00124       READ_SPINOR(SPINORTEX, sp_stride, idx, idx);
00125       
00126       spinorFloat a0_re, a0_im;
00127       spinorFloat a1_re, a1_im;
00128       spinorFloat a2_re, a2_im;
00129       spinorFloat b0_re, b0_im;
00130       spinorFloat b1_re, b1_im;
00131       spinorFloat b2_re, b2_im;
00132       
00133       // project spinor into half spinors
00134       a0_re = +i00_re+i20_im;
00135       a0_im = +i00_im-i20_re;
00136       a1_re = +i01_re+i21_im;
00137       a1_im = +i01_im-i21_re;
00138       a2_re = +i02_re+i22_im;
00139       a2_im = +i02_im-i22_re;
00140       b0_re = +i10_re-i30_im;
00141       b0_im = +i10_im+i30_re;
00142       b1_re = +i11_re-i31_im;
00143       b1_im = +i11_im+i31_re;
00144       b2_re = +i12_re-i32_im;
00145       b2_im = +i12_im+i32_re;
00146       
00147       // write half spinor back to device memory
00148       WRITE_HALF_SPINOR(face_volume, face_idx);
00149     }
00150     break;
00151   case 3:
00152     {
00153       // read spinor from device memory
00154       READ_SPINOR_DOWN(SPINORTEX, sp_stride, idx, idx);
00155       
00156       spinorFloat a0_re, a0_im;
00157       spinorFloat a1_re, a1_im;
00158       spinorFloat a2_re, a2_im;
00159       spinorFloat b0_re, b0_im;
00160       spinorFloat b1_re, b1_im;
00161       spinorFloat b2_re, b2_im;
00162       
00163       // project spinor into half spinors
00164       a0_re = +2*i20_re;
00165       a0_im = +2*i20_im;
00166       a1_re = +2*i21_re;
00167       a1_im = +2*i21_im;
00168       a2_re = +2*i22_re;
00169       a2_im = +2*i22_im;
00170       b0_re = +2*i30_re;
00171       b0_im = +2*i30_im;
00172       b1_re = +2*i31_re;
00173       b1_im = +2*i31_im;
00174       b2_re = +2*i32_re;
00175       b2_im = +2*i32_im;
00176       
00177       // write half spinor back to device memory
00178       WRITE_HALF_SPINOR(face_volume, face_idx);
00179     }
00180     break;
00181   }
00182   
00183 } else {
00184   
00185   switch(dim) {
00186   case 0:
00187     {
00188       // read spinor from device memory
00189       READ_SPINOR(SPINORTEX, sp_stride, idx, idx);
00190       
00191       spinorFloat a0_re, a0_im;
00192       spinorFloat a1_re, a1_im;
00193       spinorFloat a2_re, a2_im;
00194       spinorFloat b0_re, b0_im;
00195       spinorFloat b1_re, b1_im;
00196       spinorFloat b2_re, b2_im;
00197       
00198       // project spinor into half spinors
00199       a0_re = +i00_re-i30_im;
00200       a0_im = +i00_im+i30_re;
00201       a1_re = +i01_re-i31_im;
00202       a1_im = +i01_im+i31_re;
00203       a2_re = +i02_re-i32_im;
00204       a2_im = +i02_im+i32_re;
00205       b0_re = +i10_re-i20_im;
00206       b0_im = +i10_im+i20_re;
00207       b1_re = +i11_re-i21_im;
00208       b1_im = +i11_im+i21_re;
00209       b2_re = +i12_re-i22_im;
00210       b2_im = +i12_im+i22_re;
00211       
00212       // write half spinor back to device memory
00213       WRITE_HALF_SPINOR(face_volume, face_idx);
00214     }
00215     break;
00216   case 1:
00217     {
00218       // read spinor from device memory
00219       READ_SPINOR(SPINORTEX, sp_stride, idx, idx);
00220       
00221       spinorFloat a0_re, a0_im;
00222       spinorFloat a1_re, a1_im;
00223       spinorFloat a2_re, a2_im;
00224       spinorFloat b0_re, b0_im;
00225       spinorFloat b1_re, b1_im;
00226       spinorFloat b2_re, b2_im;
00227       
00228       // project spinor into half spinors
00229       a0_re = +i00_re+i30_re;
00230       a0_im = +i00_im+i30_im;
00231       a1_re = +i01_re+i31_re;
00232       a1_im = +i01_im+i31_im;
00233       a2_re = +i02_re+i32_re;
00234       a2_im = +i02_im+i32_im;
00235       b0_re = +i10_re-i20_re;
00236       b0_im = +i10_im-i20_im;
00237       b1_re = +i11_re-i21_re;
00238       b1_im = +i11_im-i21_im;
00239       b2_re = +i12_re-i22_re;
00240       b2_im = +i12_im-i22_im;
00241       
00242       // write half spinor back to device memory
00243       WRITE_HALF_SPINOR(face_volume, face_idx);
00244     }
00245     break;
00246   case 2:
00247     {
00248       // read spinor from device memory
00249       READ_SPINOR(SPINORTEX, sp_stride, idx, idx);
00250       
00251       spinorFloat a0_re, a0_im;
00252       spinorFloat a1_re, a1_im;
00253       spinorFloat a2_re, a2_im;
00254       spinorFloat b0_re, b0_im;
00255       spinorFloat b1_re, b1_im;
00256       spinorFloat b2_re, b2_im;
00257       
00258       // project spinor into half spinors
00259       a0_re = +i00_re-i20_im;
00260       a0_im = +i00_im+i20_re;
00261       a1_re = +i01_re-i21_im;
00262       a1_im = +i01_im+i21_re;
00263       a2_re = +i02_re-i22_im;
00264       a2_im = +i02_im+i22_re;
00265       b0_re = +i10_re+i30_im;
00266       b0_im = +i10_im-i30_re;
00267       b1_re = +i11_re+i31_im;
00268       b1_im = +i11_im-i31_re;
00269       b2_re = +i12_re+i32_im;
00270       b2_im = +i12_im-i32_re;
00271       
00272       // write half spinor back to device memory
00273       WRITE_HALF_SPINOR(face_volume, face_idx);
00274     }
00275     break;
00276   case 3:
00277     {
00278       // read spinor from device memory
00279       READ_SPINOR_UP(SPINORTEX, sp_stride, idx, idx);
00280       
00281       spinorFloat a0_re, a0_im;
00282       spinorFloat a1_re, a1_im;
00283       spinorFloat a2_re, a2_im;
00284       spinorFloat b0_re, b0_im;
00285       spinorFloat b1_re, b1_im;
00286       spinorFloat b2_re, b2_im;
00287       
00288       // project spinor into half spinors
00289       a0_re = +2*i00_re;
00290       a0_im = +2*i00_im;
00291       a1_re = +2*i01_re;
00292       a1_im = +2*i01_im;
00293       a2_re = +2*i02_re;
00294       a2_im = +2*i02_im;
00295       b0_re = +2*i10_re;
00296       b0_im = +2*i10_im;
00297       b1_re = +2*i11_re;
00298       b1_im = +2*i11_im;
00299       b2_re = +2*i12_re;
00300       b2_im = +2*i12_im;
00301       
00302       // write half spinor back to device memory
00303       WRITE_HALF_SPINOR(face_volume, face_idx);
00304     }
00305     break;
00306   }
00307   
00308 }
00309 
00310 // undefine to prevent warning when precision is changed
00311 #undef spinorFloat
00312 #undef SHARED_STRIDE
00313 
00314 #undef i00_re
00315 #undef i00_im
00316 #undef i01_re
00317 #undef i01_im
00318 #undef i02_re
00319 #undef i02_im
00320 #undef i10_re
00321 #undef i10_im
00322 #undef i11_re
00323 #undef i11_im
00324 #undef i12_re
00325 #undef i12_im
00326 #undef i20_re
00327 #undef i20_im
00328 #undef i21_re
00329 #undef i21_im
00330 #undef i22_re
00331 #undef i22_im
00332 #undef i30_re
00333 #undef i30_im
00334 #undef i31_re
00335 #undef i31_im
00336 #undef i32_re
00337 #undef i32_im
00338 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines