|
QUDA v0.3.2
A library for QCD on GPUs
|
00001 #include <stdio.h> 00002 #include <stdlib.h> 00003 #include <math.h> 00004 00005 #include <util_quda.h> 00006 00007 #include <test_util.h> 00008 #include <blas_reference.h> 00009 #include <twisted_mass_dslash_reference.h> 00010 00011 int Z[4]; 00012 int V; 00013 int Vh; 00014 00015 void setDims(int *X) { 00016 V = 1; 00017 for (int d=0; d< 4; d++) { 00018 V *= X[d]; 00019 Z[d] = X[d]; 00020 } 00021 Vh = V/2; 00022 } 00023 00024 template <typename Float> 00025 void sum(Float *dst, Float *a, Float *b, int cnt) { 00026 for (int i = 0; i < cnt; i++) 00027 dst[i] = a[i] + b[i]; 00028 } 00029 00030 // performs the operation y[i] = x[i] + a*y[i] 00031 template <typename Float> 00032 void xpay(Float *x, Float a, Float *y, int len) { 00033 for (int i=0; i<len; i++) y[i] = x[i] + a*y[i]; 00034 } 00035 00036 00037 00038 template <typename Float> 00039 Float *gaugeLink(int i, int dir, int oddBit, Float **gaugeEven, Float **gaugeOdd) { 00040 Float **gaugeField; 00041 int j; 00042 00043 if (dir % 2 == 0) { 00044 j = i; 00045 gaugeField = (oddBit ? gaugeOdd : gaugeEven); 00046 } 00047 else { 00048 switch (dir) { 00049 case 1: j = neighborIndex(i, oddBit, 0, 0, 0, -1); break; 00050 case 3: j = neighborIndex(i, oddBit, 0, 0, -1, 0); break; 00051 case 5: j = neighborIndex(i, oddBit, 0, -1, 0, 0); break; 00052 case 7: j = neighborIndex(i, oddBit, -1, 0, 0, 0); break; 00053 default: j = -1; break; 00054 } 00055 gaugeField = (oddBit ? gaugeEven : gaugeOdd); 00056 } 00057 00058 return &gaugeField[dir/2][j*(3*3*2)]; 00059 } 00060 00061 template <typename Float> 00062 Float *spinorNeighbor(int i, int dir, int oddBit, Float *spinorField) { 00063 int j; 00064 switch (dir) { 00065 case 0: j = neighborIndex(i, oddBit, 0, 0, 0, +1); break; 00066 case 1: j = neighborIndex(i, oddBit, 0, 0, 0, -1); break; 00067 case 2: j = neighborIndex(i, oddBit, 0, 0, +1, 0); break; 00068 case 3: j = neighborIndex(i, oddBit, 0, 0, -1, 0); break; 00069 case 4: j = neighborIndex(i, oddBit, 0, +1, 0, 0); break; 00070 case 5: j = neighborIndex(i, oddBit, 0, -1, 0, 0); break; 00071 case 6: j = neighborIndex(i, oddBit, +1, 0, 0, 0); break; 00072 case 7: j = neighborIndex(i, oddBit, -1, 0, 0, 0); break; 00073 default: j = -1; break; 00074 } 00075 00076 return &spinorField[j*(4*3*2)]; 00077 } 00078 00079 template <typename sFloat, typename gFloat> 00080 void dot(sFloat* res, gFloat* a, sFloat* b) { 00081 res[0] = res[1] = 0; 00082 for (int m = 0; m < 3; m++) { 00083 sFloat a_re = a[2*m+0]; 00084 sFloat a_im = a[2*m+1]; 00085 sFloat b_re = b[2*m+0]; 00086 sFloat b_im = b[2*m+1]; 00087 res[0] += a_re * b_re - a_im * b_im; 00088 res[1] += a_re * b_im + a_im * b_re; 00089 } 00090 } 00091 00092 template <typename Float> 00093 void su3Transpose(Float *res, Float *mat) { 00094 for (int m = 0; m < 3; m++) { 00095 for (int n = 0; n < 3; n++) { 00096 res[m*(3*2) + n*(2) + 0] = + mat[n*(3*2) + m*(2) + 0]; 00097 res[m*(3*2) + n*(2) + 1] = - mat[n*(3*2) + m*(2) + 1]; 00098 } 00099 } 00100 } 00101 00102 template <typename sFloat, typename gFloat> 00103 void su3Mul(sFloat *res, gFloat *mat, sFloat *vec) { 00104 for (int n = 0; n < 3; n++) dot(&res[n*(2)], &mat[n*(3*2)], vec); 00105 } 00106 00107 template <typename sFloat, typename gFloat> 00108 void su3Tmul(sFloat *res, gFloat *mat, sFloat *vec) { 00109 gFloat matT[3*3*2]; 00110 su3Transpose(matT, mat); 00111 su3Mul(res, matT, vec); 00112 } 00113 00114 const double projector[8][4][4][2] = { 00115 { 00116 {{1,0}, {0,0}, {0,0}, {0,-1}}, 00117 {{0,0}, {1,0}, {0,-1}, {0,0}}, 00118 {{0,0}, {0,1}, {1,0}, {0,0}}, 00119 {{0,1}, {0,0}, {0,0}, {1,0}} 00120 }, 00121 { 00122 {{1,0}, {0,0}, {0,0}, {0,1}}, 00123 {{0,0}, {1,0}, {0,1}, {0,0}}, 00124 {{0,0}, {0,-1}, {1,0}, {0,0}}, 00125 {{0,-1}, {0,0}, {0,0}, {1,0}} 00126 }, 00127 { 00128 {{1,0}, {0,0}, {0,0}, {1,0}}, 00129 {{0,0}, {1,0}, {-1,0}, {0,0}}, 00130 {{0,0}, {-1,0}, {1,0}, {0,0}}, 00131 {{1,0}, {0,0}, {0,0}, {1,0}} 00132 }, 00133 { 00134 {{1,0}, {0,0}, {0,0}, {-1,0}}, 00135 {{0,0}, {1,0}, {1,0}, {0,0}}, 00136 {{0,0}, {1,0}, {1,0}, {0,0}}, 00137 {{-1,0}, {0,0}, {0,0}, {1,0}} 00138 }, 00139 { 00140 {{1,0}, {0,0}, {0,-1}, {0,0}}, 00141 {{0,0}, {1,0}, {0,0}, {0,1}}, 00142 {{0,1}, {0,0}, {1,0}, {0,0}}, 00143 {{0,0}, {0,-1}, {0,0}, {1,0}} 00144 }, 00145 { 00146 {{1,0}, {0,0}, {0,1}, {0,0}}, 00147 {{0,0}, {1,0}, {0,0}, {0,-1}}, 00148 {{0,-1}, {0,0}, {1,0}, {0,0}}, 00149 {{0,0}, {0,1}, {0,0}, {1,0}} 00150 }, 00151 { 00152 {{1,0}, {0,0}, {-1,0}, {0,0}}, 00153 {{0,0}, {1,0}, {0,0}, {-1,0}}, 00154 {{-1,0}, {0,0}, {1,0}, {0,0}}, 00155 {{0,0}, {-1,0}, {0,0}, {1,0}} 00156 }, 00157 { 00158 {{1,0}, {0,0}, {1,0}, {0,0}}, 00159 {{0,0}, {1,0}, {0,0}, {1,0}}, 00160 {{1,0}, {0,0}, {1,0}, {0,0}}, 00161 {{0,0}, {1,0}, {0,0}, {1,0}} 00162 } 00163 }; 00164 00165 00166 // todo pass projector 00167 template <typename Float> 00168 void multiplySpinorByDiracProjector(Float *res, int projIdx, Float *spinorIn) { 00169 for (int i=0; i<4*3*2; i++) res[i] = 0.0; 00170 00171 for (int s = 0; s < 4; s++) { 00172 for (int t = 0; t < 4; t++) { 00173 Float projRe = projector[projIdx][s][t][0]; 00174 Float projIm = projector[projIdx][s][t][1]; 00175 00176 for (int m = 0; m < 3; m++) { 00177 Float spinorRe = spinorIn[t*(3*2) + m*(2) + 0]; 00178 Float spinorIm = spinorIn[t*(3*2) + m*(2) + 1]; 00179 res[s*(3*2) + m*(2) + 0] += projRe*spinorRe - projIm*spinorIm; 00180 res[s*(3*2) + m*(2) + 1] += projRe*spinorIm + projIm*spinorRe; 00181 } 00182 } 00183 } 00184 } 00185 00186 00187 // 00188 // dslashReference() 00189 // 00190 // if oddBit is zero: calculate odd parity spinor elements (using even parity spinor) 00191 // if oddBit is one: calculate even parity spinor elements 00192 // 00193 // if daggerBit is zero: perform ordinary dslash operator 00194 // if daggerBit is one: perform hermitian conjugate of dslash 00195 // 00196 template <typename sFloat, typename gFloat> 00197 void dslashReference(sFloat *res, gFloat **gaugeFull, sFloat *spinorField, int oddBit, int daggerBit) { 00198 for (int i=0; i<Vh*4*3*2; i++) res[i] = 0.0; 00199 00200 gFloat *gaugeEven[4], *gaugeOdd[4]; 00201 for (int dir = 0; dir < 4; dir++) { 00202 gaugeEven[dir] = gaugeFull[dir]; 00203 gaugeOdd[dir] = gaugeFull[dir]+Vh*gaugeSiteSize; 00204 } 00205 00206 for (int i = 0; i < Vh; i++) { 00207 for (int dir = 0; dir < 8; dir++) { 00208 gFloat *gauge = gaugeLink(i, dir, oddBit, gaugeEven, gaugeOdd); 00209 sFloat *spinor = spinorNeighbor(i, dir, oddBit, spinorField); 00210 00211 sFloat projectedSpinor[4*3*2], gaugedSpinor[4*3*2]; 00212 int projIdx = 2*(dir/2)+(dir+daggerBit)%2; 00213 multiplySpinorByDiracProjector(projectedSpinor, projIdx, spinor); 00214 00215 for (int s = 0; s < 4; s++) { 00216 if (dir % 2 == 0) 00217 su3Mul(&gaugedSpinor[s*(3*2)], gauge, &projectedSpinor[s*(3*2)]); 00218 else 00219 su3Tmul(&gaugedSpinor[s*(3*2)], gauge, &projectedSpinor[s*(3*2)]); 00220 } 00221 00222 sum(&res[i*(4*3*2)], &res[i*(4*3*2)], gaugedSpinor, 4*3*2); 00223 } 00224 } 00225 } 00226 00227 // applies b*(1 + i*a*gamma_5) 00228 template <typename sFloat> 00229 void twistGamma5(sFloat *out, sFloat *in, const int dagger, const sFloat kappa, const sFloat mu, 00230 const QudaTwistFlavorType flavor, const int V, QudaTwistGamma5Type twist) { 00231 00232 sFloat a=0.0,b=0.0; 00233 if (twist == QUDA_TWIST_GAMMA5_DIRECT) { // applying the twist 00234 a = 2.0 * kappa * mu * flavor; // mu already includes the flavor 00235 b = 1.0; 00236 } else if (twist == QUDA_TWIST_GAMMA5_INVERSE) { // applying the inverse twist 00237 a = -2.0 * kappa * mu * flavor; 00238 b = 1.0 / (1.0 + a*a); 00239 } else { 00240 printf("Twist type %d not defined\n", twist); 00241 exit(0); 00242 } 00243 00244 if (dagger) a *= -1.0; 00245 00246 for(int i = 0; i < V; i++) { 00247 sFloat tmp[24]; 00248 for(int s = 0; s < 4; s++) 00249 for(int c = 0; c < 3; c++) { 00250 sFloat a5 = ((s / 2) ? -1.0 : +1.0) * a; 00251 tmp[s * 6 + c * 2 + 0] = b* (in[i * 24 + s * 6 + c * 2 + 0] - a5*in[i * 24 + s * 6 + c * 2 + 1]); 00252 tmp[s * 6 + c * 2 + 1] = b* (in[i * 24 + s * 6 + c * 2 + 1] + a5*in[i * 24 + s * 6 + c * 2 + 0]); 00253 } 00254 00255 for (int j=0; j<24; j++) out[i*24+j] = tmp[j]; 00256 } 00257 00258 } 00259 00260 // this actually applies the preconditioned dslash, e.g., D_ee^{-1} D_eo or D_oo^{-1} D_oe 00261 void dslash(void *res, void **gaugeFull, void *spinorField, double kappa, double mu, 00262 QudaTwistFlavorType flavor, int oddBit, int daggerBit, 00263 QudaPrecision sPrecision, QudaPrecision gPrecision) { 00264 00265 if (!daggerBit) { 00266 if (sPrecision == QUDA_DOUBLE_PRECISION) { 00267 if (gPrecision == QUDA_DOUBLE_PRECISION) { 00268 dslashReference((double*)res, (double**)gaugeFull, (double*)spinorField, oddBit, daggerBit); 00269 } else { 00270 dslashReference((double*)res, (float**)gaugeFull, (double*)spinorField, oddBit, daggerBit); 00271 } 00272 twistGamma5((double*)res, (double*)res, daggerBit, kappa, mu, 00273 flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE); 00274 } else { 00275 if (gPrecision == QUDA_DOUBLE_PRECISION) { 00276 dslashReference((float*)res, (double**)gaugeFull, (float*)spinorField, oddBit, daggerBit); 00277 } else { 00278 dslashReference((float*)res, (float**)gaugeFull, (float*)spinorField, oddBit, daggerBit); 00279 } 00280 twistGamma5((float*)res, (float*)res, daggerBit, (float)kappa, (float)mu, 00281 flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE); 00282 } 00283 } else { 00284 if (sPrecision == QUDA_DOUBLE_PRECISION) { 00285 twistGamma5((double*)spinorField, (double*)spinorField, daggerBit, kappa, mu, 00286 flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE); 00287 if (gPrecision == QUDA_DOUBLE_PRECISION) { 00288 dslashReference((double*)res, (double**)gaugeFull, (double*)spinorField, oddBit, daggerBit); 00289 } else { 00290 dslashReference((double*)res, (float**)gaugeFull, (double*)spinorField, oddBit, daggerBit); 00291 } 00292 twistGamma5((double*)spinorField, (double*)spinorField, daggerBit, kappa, mu, 00293 flavor, Vh, QUDA_TWIST_GAMMA5_DIRECT); 00294 } else { 00295 twistGamma5((float*)spinorField, (float*)spinorField, daggerBit, (float)kappa, (float)mu, 00296 flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE); 00297 if (gPrecision == QUDA_DOUBLE_PRECISION) { 00298 dslashReference((float*)res, (double**)gaugeFull, (float*)spinorField, oddBit, daggerBit); 00299 } else { 00300 dslashReference((float*)res, (float**)gaugeFull, (float*)spinorField, oddBit, daggerBit); 00301 } 00302 twistGamma5((float*)spinorField, (float*)spinorField, daggerBit, (float)kappa, (float)mu, 00303 flavor, Vh, QUDA_TWIST_GAMMA5_DIRECT); 00304 } 00305 } 00306 } 00307 00308 template <typename sFloat, typename gFloat> 00309 void Mat(sFloat *out, gFloat **gauge, sFloat *in, sFloat kappa, sFloat mu, 00310 QudaTwistFlavorType flavor, int daggerBit) { 00311 00312 sFloat *inEven = in; 00313 sFloat *inOdd = in + Vh*spinorSiteSize; 00314 sFloat *outEven = out; 00315 sFloat *outOdd = out + Vh*spinorSiteSize; 00316 00317 sFloat *tmp = (sFloat*)malloc(V*spinorSiteSize*sizeof(sFloat)); 00318 00319 // full dslash operator 00320 dslashReference(outOdd, gauge, inEven, 1, daggerBit); 00321 dslashReference(outEven, gauge, inOdd, 0, daggerBit); 00322 // apply the twist term 00323 twistGamma5(tmp, in, daggerBit, kappa, mu, flavor, V, QUDA_TWIST_GAMMA5_DIRECT); 00324 00325 // combine 00326 xpay(tmp, -kappa, out, V*spinorSiteSize); 00327 00328 free(tmp); 00329 } 00330 00331 void mat(void *out, void **gauge, void *in, double kappa, double mu, 00332 QudaTwistFlavorType flavor, int dagger_bit, 00333 QudaPrecision sPrecision, QudaPrecision gPrecision) { 00334 00335 if (sPrecision == QUDA_DOUBLE_PRECISION) 00336 if (gPrecision == QUDA_DOUBLE_PRECISION) 00337 Mat((double*)out, (double**)gauge, (double*)in, (double)kappa, (double)mu, flavor, dagger_bit); 00338 else 00339 Mat((double*)out, (float**)gauge, (double*)in, (double)kappa, (double)mu, flavor, dagger_bit); 00340 else 00341 if (gPrecision == QUDA_DOUBLE_PRECISION) 00342 Mat((float*)out, (double**)gauge, (float*)in, (float)kappa, (float)mu, flavor, dagger_bit); 00343 else 00344 Mat((float*)out, (float**)gauge, (float*)in, (float)kappa, (float)mu, flavor, dagger_bit); 00345 } 00346 00347 template <typename Float> 00348 double norm2(Float *v, int len) { 00349 double sum=0.0; 00350 for (int i=0; i<len; i++) sum += v[i]*v[i]; 00351 return sum; 00352 } 00353 00354 // Apply the even-odd preconditioned Dirac operator 00355 template <typename sFloat, typename gFloat> 00356 void MatPC(sFloat *outEven, gFloat **gauge, sFloat *inEven, sFloat kappa, sFloat mu, 00357 QudaTwistFlavorType flavor, int daggerBit, QudaMatPCType matpc_type) { 00358 00359 sFloat *tmp = (sFloat*)malloc(Vh*spinorSiteSize*sizeof(sFloat)); 00360 00361 if (!daggerBit) { 00362 if (matpc_type == QUDA_MATPC_EVEN_EVEN) { 00363 dslashReference(tmp, gauge, inEven, 1, daggerBit); 00364 twistGamma5(tmp, tmp, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE); 00365 dslashReference(outEven, gauge, tmp, 0, daggerBit); 00366 twistGamma5(outEven, outEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE); 00367 } else if (matpc_type == QUDA_MATPC_ODD_ODD) { 00368 dslashReference(tmp, gauge, inEven, 0, daggerBit); 00369 twistGamma5(tmp, tmp, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE); 00370 dslashReference(outEven, gauge, tmp, 1, daggerBit); 00371 twistGamma5(outEven, outEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE); 00372 } 00373 } else { 00374 if (matpc_type == QUDA_MATPC_EVEN_EVEN) { 00375 twistGamma5(inEven, inEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE); 00376 dslashReference(tmp, gauge, inEven, 1, daggerBit); 00377 twistGamma5(tmp, tmp, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE); 00378 dslashReference(outEven, gauge, tmp, 0, daggerBit); 00379 twistGamma5(inEven, inEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_DIRECT); 00380 } else if (matpc_type == QUDA_MATPC_ODD_ODD) { 00381 twistGamma5(inEven, inEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE); 00382 dslashReference(tmp, gauge, inEven, 0, daggerBit); 00383 twistGamma5(tmp, tmp, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_INVERSE); 00384 dslashReference(outEven, gauge, tmp, 1, daggerBit); 00385 twistGamma5(inEven, inEven, daggerBit, kappa, mu, flavor, Vh, QUDA_TWIST_GAMMA5_DIRECT); // undo 00386 } 00387 } 00388 // lastly apply the kappa term 00389 sFloat kappa2 = -kappa*kappa; 00390 xpay(inEven, kappa2, outEven, Vh*spinorSiteSize); 00391 free(tmp); 00392 00393 } 00394 00395 void matpc(void *outEven, void **gauge, void *inEven, double kappa, double mu, QudaTwistFlavorType flavor, 00396 QudaMatPCType matpc_type, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision) { 00397 00398 if (matpc_type != QUDA_MATPC_EVEN_EVEN && matpc_type != QUDA_MATPC_ODD_ODD) { 00399 printf("Only symmetric preconditioning is implemented in reference\n"); 00400 exit(-1); 00401 } 00402 00403 if (sPrecision == QUDA_DOUBLE_PRECISION) 00404 if (gPrecision == QUDA_DOUBLE_PRECISION) 00405 MatPC((double*)outEven, (double**)gauge, (double*)inEven, (double)kappa, (double)mu, 00406 flavor, dagger_bit, matpc_type); 00407 else 00408 MatPC((double*)outEven, (float**)gauge, (double*)inEven, (double)kappa, (double)mu, 00409 flavor, dagger_bit, matpc_type); 00410 else 00411 if (gPrecision == QUDA_DOUBLE_PRECISION) 00412 MatPC((float*)outEven, (double**)gauge, (float*)inEven, (float)kappa, (float)mu, 00413 flavor, dagger_bit, matpc_type); 00414 else 00415 MatPC((float*)outEven, (float**)gauge, (float*)inEven, (float)kappa, (float)mu, 00416 flavor, dagger_bit, matpc_type); 00417 }
1.7.3