QUDA v0.4.0
A library for QCD on GPUs
|
00001 // input spinor 00002 #ifdef SPINOR_DOUBLE 00003 #define spinorFloat double 00004 #define i00_re I0.x 00005 #define i00_im I0.y 00006 #define i01_re I1.x 00007 #define i01_im I1.y 00008 #define i02_re I2.x 00009 #define i02_im I2.y 00010 #define i10_re I3.x 00011 #define i10_im I3.y 00012 #define i11_re I4.x 00013 #define i11_im I4.y 00014 #define i12_re I5.x 00015 #define i12_im I5.y 00016 #define i20_re I6.x 00017 #define i20_im I6.y 00018 #define i21_re I7.x 00019 #define i21_im I7.y 00020 #define i22_re I8.x 00021 #define i22_im I8.y 00022 #define i30_re I9.x 00023 #define i30_im I9.y 00024 #define i31_re I10.x 00025 #define i31_im I10.y 00026 #define i32_re I11.x 00027 #define i32_im I11.y 00028 #else 00029 #define spinorFloat float 00030 #define i00_re I0.x 00031 #define i00_im I0.y 00032 #define i01_re I0.z 00033 #define i01_im I0.w 00034 #define i02_re I1.x 00035 #define i02_im I1.y 00036 #define i10_re I1.z 00037 #define i10_im I1.w 00038 #define i11_re I2.x 00039 #define i11_im I2.y 00040 #define i12_re I2.z 00041 #define i12_im I2.w 00042 #define i20_re I3.x 00043 #define i20_im I3.y 00044 #define i21_re I3.z 00045 #define i21_im I3.w 00046 #define i22_re I4.x 00047 #define i22_im I4.y 00048 #define i30_re I4.z 00049 #define i30_im I4.w 00050 #define i31_re I5.x 00051 #define i31_im I5.y 00052 #define i32_re I5.z 00053 #define i32_im I5.w 00054 #endif // SPINOR_DOUBLE 00055 00056 #include "io_spinor.h" 00057 00058 if (face_num) { 00059 00060 switch(dim) { 00061 case 0: 00062 { 00063 // read spinor from device memory 00064 READ_SPINOR(SPINORTEX, sp_stride, idx, idx); 00065 00066 spinorFloat a0_re, a0_im; 00067 spinorFloat a1_re, a1_im; 00068 spinorFloat a2_re, a2_im; 00069 spinorFloat b0_re, b0_im; 00070 spinorFloat b1_re, b1_im; 00071 spinorFloat b2_re, b2_im; 00072 00073 // project spinor into half spinors 00074 a0_re = +i00_re+i30_im; 00075 a0_im = +i00_im-i30_re; 00076 a1_re = +i01_re+i31_im; 00077 a1_im = +i01_im-i31_re; 00078 a2_re = +i02_re+i32_im; 00079 a2_im = +i02_im-i32_re; 00080 b0_re = +i10_re+i20_im; 00081 b0_im = +i10_im-i20_re; 00082 b1_re = +i11_re+i21_im; 00083 b1_im = +i11_im-i21_re; 00084 b2_re = +i12_re+i22_im; 00085 b2_im = +i12_im-i22_re; 00086 00087 // write half spinor back to device memory 00088 WRITE_HALF_SPINOR(face_volume, face_idx); 00089 } 00090 break; 00091 case 1: 00092 { 00093 // read spinor from device memory 00094 READ_SPINOR(SPINORTEX, sp_stride, idx, idx); 00095 00096 spinorFloat a0_re, a0_im; 00097 spinorFloat a1_re, a1_im; 00098 spinorFloat a2_re, a2_im; 00099 spinorFloat b0_re, b0_im; 00100 spinorFloat b1_re, b1_im; 00101 spinorFloat b2_re, b2_im; 00102 00103 // project spinor into half spinors 00104 a0_re = +i00_re-i30_re; 00105 a0_im = +i00_im-i30_im; 00106 a1_re = +i01_re-i31_re; 00107 a1_im = +i01_im-i31_im; 00108 a2_re = +i02_re-i32_re; 00109 a2_im = +i02_im-i32_im; 00110 b0_re = +i10_re+i20_re; 00111 b0_im = +i10_im+i20_im; 00112 b1_re = +i11_re+i21_re; 00113 b1_im = +i11_im+i21_im; 00114 b2_re = +i12_re+i22_re; 00115 b2_im = +i12_im+i22_im; 00116 00117 // write half spinor back to device memory 00118 WRITE_HALF_SPINOR(face_volume, face_idx); 00119 } 00120 break; 00121 case 2: 00122 { 00123 // read spinor from device memory 00124 READ_SPINOR(SPINORTEX, sp_stride, idx, idx); 00125 00126 spinorFloat a0_re, a0_im; 00127 spinorFloat a1_re, a1_im; 00128 spinorFloat a2_re, a2_im; 00129 spinorFloat b0_re, b0_im; 00130 spinorFloat b1_re, b1_im; 00131 spinorFloat b2_re, b2_im; 00132 00133 // project spinor into half spinors 00134 a0_re = +i00_re+i20_im; 00135 a0_im = +i00_im-i20_re; 00136 a1_re = +i01_re+i21_im; 00137 a1_im = +i01_im-i21_re; 00138 a2_re = +i02_re+i22_im; 00139 a2_im = +i02_im-i22_re; 00140 b0_re = +i10_re-i30_im; 00141 b0_im = +i10_im+i30_re; 00142 b1_re = +i11_re-i31_im; 00143 b1_im = +i11_im+i31_re; 00144 b2_re = +i12_re-i32_im; 00145 b2_im = +i12_im+i32_re; 00146 00147 // write half spinor back to device memory 00148 WRITE_HALF_SPINOR(face_volume, face_idx); 00149 } 00150 break; 00151 case 3: 00152 { 00153 // read spinor from device memory 00154 READ_SPINOR_DOWN(SPINORTEX, sp_stride, idx, idx); 00155 00156 spinorFloat a0_re, a0_im; 00157 spinorFloat a1_re, a1_im; 00158 spinorFloat a2_re, a2_im; 00159 spinorFloat b0_re, b0_im; 00160 spinorFloat b1_re, b1_im; 00161 spinorFloat b2_re, b2_im; 00162 00163 // project spinor into half spinors 00164 a0_re = +2*i20_re; 00165 a0_im = +2*i20_im; 00166 a1_re = +2*i21_re; 00167 a1_im = +2*i21_im; 00168 a2_re = +2*i22_re; 00169 a2_im = +2*i22_im; 00170 b0_re = +2*i30_re; 00171 b0_im = +2*i30_im; 00172 b1_re = +2*i31_re; 00173 b1_im = +2*i31_im; 00174 b2_re = +2*i32_re; 00175 b2_im = +2*i32_im; 00176 00177 // write half spinor back to device memory 00178 WRITE_HALF_SPINOR(face_volume, face_idx); 00179 } 00180 break; 00181 } 00182 00183 } else { 00184 00185 switch(dim) { 00186 case 0: 00187 { 00188 // read spinor from device memory 00189 READ_SPINOR(SPINORTEX, sp_stride, idx, idx); 00190 00191 spinorFloat a0_re, a0_im; 00192 spinorFloat a1_re, a1_im; 00193 spinorFloat a2_re, a2_im; 00194 spinorFloat b0_re, b0_im; 00195 spinorFloat b1_re, b1_im; 00196 spinorFloat b2_re, b2_im; 00197 00198 // project spinor into half spinors 00199 a0_re = +i00_re-i30_im; 00200 a0_im = +i00_im+i30_re; 00201 a1_re = +i01_re-i31_im; 00202 a1_im = +i01_im+i31_re; 00203 a2_re = +i02_re-i32_im; 00204 a2_im = +i02_im+i32_re; 00205 b0_re = +i10_re-i20_im; 00206 b0_im = +i10_im+i20_re; 00207 b1_re = +i11_re-i21_im; 00208 b1_im = +i11_im+i21_re; 00209 b2_re = +i12_re-i22_im; 00210 b2_im = +i12_im+i22_re; 00211 00212 // write half spinor back to device memory 00213 WRITE_HALF_SPINOR(face_volume, face_idx); 00214 } 00215 break; 00216 case 1: 00217 { 00218 // read spinor from device memory 00219 READ_SPINOR(SPINORTEX, sp_stride, idx, idx); 00220 00221 spinorFloat a0_re, a0_im; 00222 spinorFloat a1_re, a1_im; 00223 spinorFloat a2_re, a2_im; 00224 spinorFloat b0_re, b0_im; 00225 spinorFloat b1_re, b1_im; 00226 spinorFloat b2_re, b2_im; 00227 00228 // project spinor into half spinors 00229 a0_re = +i00_re+i30_re; 00230 a0_im = +i00_im+i30_im; 00231 a1_re = +i01_re+i31_re; 00232 a1_im = +i01_im+i31_im; 00233 a2_re = +i02_re+i32_re; 00234 a2_im = +i02_im+i32_im; 00235 b0_re = +i10_re-i20_re; 00236 b0_im = +i10_im-i20_im; 00237 b1_re = +i11_re-i21_re; 00238 b1_im = +i11_im-i21_im; 00239 b2_re = +i12_re-i22_re; 00240 b2_im = +i12_im-i22_im; 00241 00242 // write half spinor back to device memory 00243 WRITE_HALF_SPINOR(face_volume, face_idx); 00244 } 00245 break; 00246 case 2: 00247 { 00248 // read spinor from device memory 00249 READ_SPINOR(SPINORTEX, sp_stride, idx, idx); 00250 00251 spinorFloat a0_re, a0_im; 00252 spinorFloat a1_re, a1_im; 00253 spinorFloat a2_re, a2_im; 00254 spinorFloat b0_re, b0_im; 00255 spinorFloat b1_re, b1_im; 00256 spinorFloat b2_re, b2_im; 00257 00258 // project spinor into half spinors 00259 a0_re = +i00_re-i20_im; 00260 a0_im = +i00_im+i20_re; 00261 a1_re = +i01_re-i21_im; 00262 a1_im = +i01_im+i21_re; 00263 a2_re = +i02_re-i22_im; 00264 a2_im = +i02_im+i22_re; 00265 b0_re = +i10_re+i30_im; 00266 b0_im = +i10_im-i30_re; 00267 b1_re = +i11_re+i31_im; 00268 b1_im = +i11_im-i31_re; 00269 b2_re = +i12_re+i32_im; 00270 b2_im = +i12_im-i32_re; 00271 00272 // write half spinor back to device memory 00273 WRITE_HALF_SPINOR(face_volume, face_idx); 00274 } 00275 break; 00276 case 3: 00277 { 00278 // read spinor from device memory 00279 READ_SPINOR_UP(SPINORTEX, sp_stride, idx, idx); 00280 00281 spinorFloat a0_re, a0_im; 00282 spinorFloat a1_re, a1_im; 00283 spinorFloat a2_re, a2_im; 00284 spinorFloat b0_re, b0_im; 00285 spinorFloat b1_re, b1_im; 00286 spinorFloat b2_re, b2_im; 00287 00288 // project spinor into half spinors 00289 a0_re = +2*i00_re; 00290 a0_im = +2*i00_im; 00291 a1_re = +2*i01_re; 00292 a1_im = +2*i01_im; 00293 a2_re = +2*i02_re; 00294 a2_im = +2*i02_im; 00295 b0_re = +2*i10_re; 00296 b0_im = +2*i10_im; 00297 b1_re = +2*i11_re; 00298 b1_im = +2*i11_im; 00299 b2_re = +2*i12_re; 00300 b2_im = +2*i12_im; 00301 00302 // write half spinor back to device memory 00303 WRITE_HALF_SPINOR(face_volume, face_idx); 00304 } 00305 break; 00306 } 00307 00308 } 00309 00310 // undefine to prevent warning when precision is changed 00311 #undef spinorFloat 00312 #undef SHARED_STRIDE 00313 00314 #undef i00_re 00315 #undef i00_im 00316 #undef i01_re 00317 #undef i01_im 00318 #undef i02_re 00319 #undef i02_im 00320 #undef i10_re 00321 #undef i10_im 00322 #undef i11_re 00323 #undef i11_im 00324 #undef i12_re 00325 #undef i12_im 00326 #undef i20_re 00327 #undef i20_im 00328 #undef i21_re 00329 #undef i21_im 00330 #undef i22_re 00331 #undef i22_im 00332 #undef i30_re 00333 #undef i30_im 00334 #undef i31_re 00335 #undef i31_im 00336 #undef i32_re 00337 #undef i32_im 00338