QUDA v0.4.0
A library for QCD on GPUs
|
00001 int Z[4]; 00002 int V; 00003 int Vh; 00004 int Vs_t; 00005 int Vsh_x, Vsh_y, Vsh_z, Vsh_t; 00006 int faceVolume[4]; 00007 00008 void setDims(int *X) { 00009 V = 1; 00010 for (int d=0; d< 4; d++) { 00011 V *= X[d]; 00012 Z[d] = X[d]; 00013 00014 faceVolume[d] = 1; 00015 for (int i=0; i<4; i++) { 00016 if (i==d) continue; 00017 faceVolume[d] *= X[i]; 00018 } 00019 } 00020 Vh = V/2; 00021 00022 Vs_t = Z[0]*Z[1]*Z[2]; 00023 Vsh_t = Vs_t/2; 00024 } 00025 00026 template <typename Float> 00027 void sum(Float *dst, Float *a, Float *b, int cnt) { 00028 for (int i = 0; i < cnt; i++) 00029 dst[i] = a[i] + b[i]; 00030 } 00031 template <typename Float> 00032 void sub(Float *dst, Float *a, Float *b, int cnt) { 00033 for (int i = 0; i < cnt; i++) 00034 dst[i] = a[i] - b[i]; 00035 } 00036 // performs the operation y[i] = x[i] + a*y[i] 00037 template <typename Float> 00038 void xpay(Float *x, Float a, Float *y, int len) { 00039 for (int i=0; i<len; i++) y[i] = x[i] + a*y[i]; 00040 } 00041 // performs the operation y[i] = a*x[i] - y[i] 00042 template <typename Float> 00043 void axmy(Float *x, Float a, Float *y, int len) { 00044 for (int i=0; i<len; i++) y[i] = a*x[i] - y[i]; 00045 } 00046 00047 template <typename Float> 00048 static double norm2(Float *v, int len) { 00049 double sum=0.0; 00050 for (int i=0; i<len; i++) sum += v[i]*v[i]; 00051 return sum; 00052 } 00053 00054 template <typename Float> 00055 void negx(Float *x, int len) { 00056 for (int i=0; i<len; i++) x[i] = -x[i]; 00057 } 00058 00059 template <typename sFloat, typename gFloat> 00060 void dot(sFloat* res, gFloat* a, sFloat* b) { 00061 res[0] = res[1] = 0; 00062 for (int m = 0; m < 3; m++) { 00063 sFloat a_re = a[2*m+0]; 00064 sFloat a_im = a[2*m+1]; 00065 sFloat b_re = b[2*m+0]; 00066 sFloat b_im = b[2*m+1]; 00067 res[0] += a_re * b_re - a_im * b_im; 00068 res[1] += a_re * b_im + a_im * b_re; 00069 } 00070 } 00071 00072 template <typename Float> 00073 void su3Transpose(Float *res, Float *mat) { 00074 for (int m = 0; m < 3; m++) { 00075 for (int n = 0; n < 3; n++) { 00076 res[m*(3*2) + n*(2) + 0] = + mat[n*(3*2) + m*(2) + 0]; 00077 res[m*(3*2) + n*(2) + 1] = - mat[n*(3*2) + m*(2) + 1]; 00078 } 00079 } 00080 } 00081 00082 00083 template <typename sFloat, typename gFloat> 00084 void su3Mul(sFloat *res, gFloat *mat, sFloat *vec) { 00085 for (int n = 0; n < 3; n++) dot(&res[n*(2)], &mat[n*(3*2)], vec); 00086 } 00087 00088 template <typename sFloat, typename gFloat> 00089 void su3Tmul(sFloat *res, gFloat *mat, sFloat *vec) { 00090 gFloat matT[3*3*2]; 00091 su3Transpose(matT, mat); 00092 su3Mul(res, matT, vec); 00093 } 00094 00095 00096 // i represents a "half index" into an even or odd "half lattice". 00097 // when oddBit={0,1} the half lattice is {even,odd}. 00098 // 00099 // the displacements, such as dx, refer to the full lattice coordinates. 00100 // 00101 // neighborIndex() takes a "half index", displaces it, and returns the 00102 // new "half index", which can be an index into either the even or odd lattices. 00103 // displacements of magnitude one always interchange odd and even lattices. 00104 // 00105 00106 00107 template <typename Float> 00108 Float *gaugeLink(int i, int dir, int oddBit, Float **gaugeEven, Float **gaugeOdd, int nbr_distance) { 00109 Float **gaugeField; 00110 int j; 00111 int d = nbr_distance; 00112 if (dir % 2 == 0) { 00113 j = i; 00114 gaugeField = (oddBit ? gaugeOdd : gaugeEven); 00115 } 00116 else { 00117 switch (dir) { 00118 case 1: j = neighborIndex(i, oddBit, 0, 0, 0, -d); break; 00119 case 3: j = neighborIndex(i, oddBit, 0, 0, -d, 0); break; 00120 case 5: j = neighborIndex(i, oddBit, 0, -d, 0, 0); break; 00121 case 7: j = neighborIndex(i, oddBit, -d, 0, 0, 0); break; 00122 default: j = -1; break; 00123 } 00124 gaugeField = (oddBit ? gaugeEven : gaugeOdd); 00125 } 00126 00127 return &gaugeField[dir/2][j*(3*3*2)]; 00128 } 00129 00130 template <typename Float> 00131 Float *spinorNeighbor(int i, int dir, int oddBit, Float *spinorField, int neighbor_distance) 00132 { 00133 int j; 00134 int nb = neighbor_distance; 00135 switch (dir) { 00136 case 0: j = neighborIndex(i, oddBit, 0, 0, 0, +nb); break; 00137 case 1: j = neighborIndex(i, oddBit, 0, 0, 0, -nb); break; 00138 case 2: j = neighborIndex(i, oddBit, 0, 0, +nb, 0); break; 00139 case 3: j = neighborIndex(i, oddBit, 0, 0, -nb, 0); break; 00140 case 4: j = neighborIndex(i, oddBit, 0, +nb, 0, 0); break; 00141 case 5: j = neighborIndex(i, oddBit, 0, -nb, 0, 0); break; 00142 case 6: j = neighborIndex(i, oddBit, +nb, 0, 0, 0); break; 00143 case 7: j = neighborIndex(i, oddBit, -nb, 0, 0, 0); break; 00144 default: j = -1; break; 00145 } 00146 00147 return &spinorField[j*(mySpinorSiteSize)]; 00148 } 00149 00150 00151 #ifdef MULTI_GPU 00152 00153 int 00154 x4_mg(int i, int oddBit) 00155 { 00156 int Y = fullLatticeIndex(i, oddBit); 00157 int x4 = Y/(Z[2]*Z[1]*Z[0]); 00158 return x4; 00159 } 00160 00161 template <typename Float> 00162 Float *gaugeLink_mg4dir(int i, int dir, int oddBit, Float **gaugeEven, Float **gaugeOdd, 00163 Float** ghostGaugeEven, Float** ghostGaugeOdd, int n_ghost_faces, int nbr_distance) { 00164 Float **gaugeField; 00165 int j; 00166 int d = nbr_distance; 00167 if (dir % 2 == 0) { 00168 j = i; 00169 gaugeField = (oddBit ? gaugeOdd : gaugeEven); 00170 } 00171 else { 00172 00173 int Y = fullLatticeIndex(i, oddBit); 00174 int x4 = Y/(Z[2]*Z[1]*Z[0]); 00175 int x3 = (Y/(Z[1]*Z[0])) % Z[2]; 00176 int x2 = (Y/Z[0]) % Z[1]; 00177 int x1 = Y % Z[0]; 00178 int X1= Z[0]; 00179 int X2= Z[1]; 00180 int X3= Z[2]; 00181 int X4= Z[3]; 00182 Float* ghostGaugeField; 00183 00184 switch (dir) { 00185 case 1: 00186 { //-X direction 00187 int new_x1 = (x1 - d + X1 )% X1; 00188 if (x1 -d < 0){ 00189 ghostGaugeField = (oddBit?ghostGaugeEven[0]: ghostGaugeOdd[0]); 00190 int offset = (n_ghost_faces + x1 -d)*X4*X3*X2/2 + (x4*X3*X2 + x3*X2+x2)/2; 00191 return &ghostGaugeField[offset*(3*3*2)]; 00192 } 00193 j = (x4*X3*X2*X1 + x3*X2*X1 + x2*X1 + new_x1) / 2; 00194 break; 00195 } 00196 case 3: 00197 { //-Y direction 00198 int new_x2 = (x2 - d + X2 )% X2; 00199 if (x2 -d < 0){ 00200 ghostGaugeField = (oddBit?ghostGaugeEven[1]: ghostGaugeOdd[1]); 00201 int offset = (n_ghost_faces + x2 -d)*X4*X3*X1/2 + (x4*X3*X1 + x3*X1+x1)/2; 00202 return &ghostGaugeField[offset*(3*3*2)]; 00203 } 00204 j = (x4*X3*X2*X1 + x3*X2*X1 + new_x2*X1 + x1) / 2; 00205 break; 00206 00207 } 00208 case 5: 00209 { //-Z direction 00210 int new_x3 = (x3 - d + X3 )% X3; 00211 if (x3 -d < 0){ 00212 ghostGaugeField = (oddBit?ghostGaugeEven[2]: ghostGaugeOdd[2]); 00213 int offset = (n_ghost_faces + x3 -d)*X4*X2*X1/2 + (x4*X2*X1 + x2*X1+x1)/2; 00214 return &ghostGaugeField[offset*(3*3*2)]; 00215 } 00216 j = (x4*X3*X2*X1 + new_x3*X2*X1 + x2*X1 + x1) / 2; 00217 break; 00218 } 00219 case 7: 00220 { //-T direction 00221 int new_x4 = (x4 - d + X4)% X4; 00222 if (x4 -d < 0){ 00223 ghostGaugeField = (oddBit?ghostGaugeEven[3]: ghostGaugeOdd[3]); 00224 int offset = (n_ghost_faces + x4 -d)*X1*X2*X3/2 + (x3*X2*X1 + x2*X1+x1)/2; 00225 return &ghostGaugeField[offset*(3*3*2)]; 00226 } 00227 j = (new_x4*(X3*X2*X1) + x3*(X2*X1) + x2*(X1) + x1) / 2; 00228 break; 00229 }//7 00230 00231 default: j = -1; printf("ERROR: wrong dir \n"); exit(1); 00232 } 00233 gaugeField = (oddBit ? gaugeEven : gaugeOdd); 00234 00235 } 00236 00237 return &gaugeField[dir/2][j*(3*3*2)]; 00238 } 00239 00240 template <typename Float> 00241 Float *spinorNeighbor_mg4dir(int i, int dir, int oddBit, Float *spinorField, Float** fwd_nbr_spinor, 00242 Float** back_nbr_spinor, int neighbor_distance, int nFace) 00243 { 00244 int j; 00245 int nb = neighbor_distance; 00246 int Y = fullLatticeIndex(i, oddBit); 00247 int x4 = Y/(Z[2]*Z[1]*Z[0]); 00248 int x3 = (Y/(Z[1]*Z[0])) % Z[2]; 00249 int x2 = (Y/Z[0]) % Z[1]; 00250 int x1 = Y % Z[0]; 00251 int X1= Z[0]; 00252 int X2= Z[1]; 00253 int X3= Z[2]; 00254 int X4= Z[3]; 00255 00256 switch (dir) { 00257 case 0://+X 00258 { 00259 int new_x1 = (x1 + nb)% X1; 00260 if(x1+nb >=X1){ 00261 int offset = ( x1 + nb -X1)*X4*X3*X2/2+(x4*X3*X2 + x3*X2+x2)/2; 00262 return fwd_nbr_spinor[0] + offset*mySpinorSiteSize; 00263 } 00264 j = (x4*X3*X2*X1 + x3*X2*X1 + x2*X1 + new_x1) / 2; 00265 break; 00266 00267 } 00268 case 1://-X 00269 { 00270 int new_x1 = (x1 - nb + X1)% X1; 00271 if(x1 - nb < 0){ 00272 int offset = ( x1+nFace- nb)*X4*X3*X2/2+(x4*X3*X2 + x3*X2+x2)/2; 00273 return back_nbr_spinor[0] + offset*mySpinorSiteSize; 00274 } 00275 j = (x4*X3*X2*X1 + x3*X2*X1 + x2*X1 + new_x1) / 2; 00276 break; 00277 } 00278 case 2://+Y 00279 { 00280 int new_x2 = (x2 + nb)% X2; 00281 if(x2+nb >=X2){ 00282 int offset = ( x2 + nb -X2)*X4*X3*X1/2+(x4*X3*X1 + x3*X1+x1)/2; 00283 return fwd_nbr_spinor[1] + offset*mySpinorSiteSize; 00284 } 00285 j = (x4*X3*X2*X1 + x3*X2*X1 + new_x2*X1 + x1) / 2; 00286 break; 00287 } 00288 case 3:// -Y 00289 { 00290 int new_x2 = (x2 - nb + X2)% X2; 00291 if(x2 - nb < 0){ 00292 int offset = ( x2 + nFace -nb)*X4*X3*X1/2+(x4*X3*X1 + x3*X1+x1)/2; 00293 return back_nbr_spinor[1] + offset*mySpinorSiteSize; 00294 } 00295 j = (x4*X3*X2*X1 + x3*X2*X1 + new_x2*X1 + x1) / 2; 00296 break; 00297 } 00298 case 4://+Z 00299 { 00300 int new_x3 = (x3 + nb)% X3; 00301 if(x3+nb >=X3){ 00302 int offset = ( x3 + nb -X3)*X4*X2*X1/2+(x4*X2*X1 + x2*X1+x1)/2; 00303 return fwd_nbr_spinor[2] + offset*mySpinorSiteSize; 00304 } 00305 j = (x4*X3*X2*X1 + new_x3*X2*X1 + x2*X1 + x1) / 2; 00306 break; 00307 } 00308 case 5://-Z 00309 { 00310 int new_x3 = (x3 - nb + X3)% X3; 00311 if(x3 - nb < 0){ 00312 int offset = ( x3 + nFace -nb)*X4*X2*X1/2+(x4*X2*X1 + x2*X1+x1)/2; 00313 return back_nbr_spinor[2] + offset*mySpinorSiteSize; 00314 } 00315 j = (x4*X3*X2*X1 + new_x3*X2*X1 + x2*X1 + x1) / 2; 00316 break; 00317 } 00318 case 6://+T 00319 { 00320 j = neighborIndex_mg(i, oddBit, +nb, 0, 0, 0); 00321 int x4 = x4_mg(i, oddBit); 00322 if ( (x4 + nb) >= Z[3]){ 00323 int offset = (x4+nb - Z[3])*Vsh_t; 00324 return &fwd_nbr_spinor[3][(offset+j)*mySpinorSiteSize]; 00325 } 00326 break; 00327 } 00328 case 7://-T 00329 { 00330 j = neighborIndex_mg(i, oddBit, -nb, 0, 0, 0); 00331 int x4 = x4_mg(i, oddBit); 00332 if ( (x4 - nb) < 0){ 00333 int offset = ( x4 - nb +nFace)*Vsh_t; 00334 return &back_nbr_spinor[3][(offset+j)*mySpinorSiteSize]; 00335 } 00336 break; 00337 } 00338 default: j = -1; printf("ERROR: wrong dir\n"); exit(1); 00339 } 00340 00341 return &spinorField[j*(mySpinorSiteSize)]; 00342 } 00343 00344 #endif