13 #define RETURN_IF_ERR if(err) return;
19 static int OPP_DIR(
int dir){
return 7-dir; }
24 static const int result = 1;
30 static const int result = -1;
34 template<
class T,
class U>
79 typedef std::complex<float>
Type;
85 typedef std::complex<float>
Type;
91 typedef std::complex<float>
Type;
97 typedef std::complex<double>
Type;
103 typedef std::complex<double>
Type;
109 typedef std::complex<double>
Type;
115 typedef std::complex<double>
Type;
121 typedef std::complex<double>
Type;
127 typedef std::complex<double>
Type;
130 template<
int N,
class T>
145 template<
int N,
class T>
148 for(
int i=0; i<N; ++i){
149 for(
int j=0; j<N; ++j){
150 data[i][j] =
static_cast<T
>(0);
155 template<
int N,
class T>
158 for(
int i=0; i<N; ++i){
159 for(
int j=0; j<N; ++j){
160 data[i][j] =
mat.data[i][j];
165 template<
int N,
class T>
171 template<
int N,
class T>
177 template<
int N,
class T>
180 for(
int i=0; i<N; ++i){
181 for(
int j=0; j<N; ++j){
182 data[i][j] +=
mat.data[i][j];
188 template<
int N,
class T>
191 for(
int i=0; i<N; ++i){
192 for(
int j=0; j<N; ++j){
193 data[i][j] -=
mat.data[i][j];
199 template<
int N,
class T>
207 template<
int N,
class T>
215 template<
int N,
class T>
219 for(
int i=0; i<N; ++i){
220 for(
int j=0; j<N; ++j){
221 result(i,j) =
static_cast<T
>(0);
222 for(
int k=0; k<N; ++k){
223 result(i,j) += a(i,k)*b(k,j);
230 template<
int N,
class T>
234 for(
int i=0; i<N; ++i){
235 for(
int j=0; j<N; ++j){
242 template<
int N,
class T>
246 for(
int i=0; i<N; ++i){
247 for(
int j=0; j<N; ++j){
248 result(i,j) =
mat(j,i);
254 template<
int N,
class T,
class U>
260 for(
int i=0; i<N; ++i){
261 for(
int j=0; j<N; ++j){
268 template<
int N,
class T,
class U>
274 template<
int N,
class T>
279 for(
int i=0; i<N; ++i){
280 id(i,i) =
static_cast<T
>(1);
286 template<
int N,
class T>
296 template<
int N,
class T>
299 for(
int i=0; i<N; ++i){
300 for(
int j=0; j<N; ++j){
303 if(i<N-1) os << std::endl;
314 const int half_volume;
318 void loadMatrixFromField(
const Real*
const field,
int oddBit,
int half_lattice_index,
Matrix<3, std::complex<Real> >*
const mat)
const;
320 void loadMatrixFromField(
const Real*
const field,
int oddBit,
int dir,
int half_lattice_index,
Matrix<3, std::complex<Real> >*
const mat)
const;
322 void storeMatrixToField(
const Matrix<3, std::complex<Real> >&
mat,
int oddBit,
int half_lattice_index, Real*
const field)
const;
324 void addMatrixToField(
const Matrix<3, std::complex<Real> >&
mat,
int oddBit,
int half_lattice_index, Real coeff, Real*
const)
const;
326 void addMatrixToField(
const Matrix<3, std::complex<Real> >&
mat,
int oddBit,
int dir,
int half_lattice_index, Real coeff, Real*
const)
const;
328 void storeMatrixToMomentumField(
const Matrix<3, std::complex<Real> >&
mat,
int oddBit,
int dir,
int half_lattice_index, Real coeff, Real*
const)
const;
329 Real getData(
const Real*
const field,
int idx,
int dir,
int oddBit,
int offset,
int hfv)
const;
330 void addData(Real*
const field,
int idx,
int dir,
int oddBit,
int offset, Real,
int hfv)
const;
331 int half_idx_conversion_ex2normal(
int half_lattice_index,
const int*
dim,
int oddBit)
const ;
332 int half_idx_conversion_normal2ex(
int half_lattice_index,
const int*
dim,
int oddBit)
const ;
349 int sid = half_lattice_index_ex;
352 int x1h = sid - za*
E1h;
357 int x1odd = (x2 + x3 + x4 + oddBit) & 1;
358 int x1 = 2*x1h + x1odd;
360 int idx = ((x4-2)*X3*X2*X1 + (x3-2)*X2*X1+(x2-2)*X1+(x1-2))/2;
379 int sid = half_lattice_index;
382 int x1h = sid - za*X1h;
387 int x1odd = (x2 + x3 + x4 + oddBit) & 1;
388 int x1 = 2*x1h + x1odd;
390 int idx = ((x4+2)*
E3*
E2*
E1 + (x3+2)*
E2*
E1+(x2+2)*
E1+(x1+2))/2;
399 return field[(4*hfv*oddBit +4*idx + dir)*18+offset];
401 return ((Real**)field)[dir][(hfv*oddBit+idx)*18 +offset];
408 field[(4*hfv*oddBit +4*idx + dir)*18+offset] += v;
410 ((Real**)field)[dir][(hfv*oddBit+idx)*18 +offset] += v;
419 int half_lattice_index,
420 Matrix<3, std::complex<Real> >*
const mat
430 for(
int i=0; i<3; ++i){
431 for(
int j=0; j<3; ++j){
432 (*mat)(i,j) = (*(field + (oddBit*hfv + half_lattice_index)*18 + offset++));
433 (*mat)(i,j) += std::complex<Real>(0, *(field + (oddBit*hfv + half_lattice_index)*18 + offset++));
443 int half_lattice_index,
444 Matrix<3, std::complex<Real> >*
const mat
455 for(
int i=0; i<3; ++i){
456 for(
int j=0; j<3; ++j){
457 (*mat)(i,j) = (getData(field, half_lattice_index, dir, oddBit, offset++, hfv));
458 (*mat)(i,j) += std::complex<Real>(0, getData(field, half_lattice_index, dir, oddBit, offset++, hfv));
467 int half_lattice_index,
468 Real*
const field)
const
477 for(
int i=0; i<3; ++i){
478 for(
int j=0; j<3; ++j){
479 *(field + (oddBit*hfv + half_lattice_index)*18 + offset++) = (
mat)(i,j).real();
480 *(field + (oddBit*hfv + half_lattice_index)*18 + offset++) = (
mat)(i,j).imag();
489 int half_lattice_index,
491 Real*
const field)
const
498 Real*
const local_field = field + (oddBit*hfv + half_lattice_index)*18;
502 for(
int i=0; i<3; ++i){
503 for(
int j=0; j<3; ++j){
504 local_field[offset++] += coeff*
mat(i,j).real();
505 local_field[offset++] += coeff*
mat(i,j).imag();
516 int half_lattice_index,
518 Real*
const field)
const
529 for(
int i=0; i<3; ++i){
530 for(
int j=0; j<3; ++j){
532 addData(field, half_lattice_index, dir, oddBit, offset++, coeff*
mat(i,j).real(), hfv);
535 addData(field, half_lattice_index, dir, oddBit, offset++, coeff*
mat(i,j).imag(), hfv);
546 int half_lattice_index,
548 Real*
const field)
const
550 Real*
const mom_field = field + ((oddBit*half_volume + half_lattice_index)*4 + dir)*10;
551 mom_field[0] = (
mat(0,1).real() -
mat(1,0).real())*0.5*coeff;
552 mom_field[1] = (
mat(0,1).imag() +
mat(1,0).imag())*0.5*coeff;
554 mom_field[2] = (
mat(0,2).real() -
mat(2,0).real())*0.5*coeff;
555 mom_field[3] = (
mat(0,2).imag() +
mat(2,0).imag())*0.5*coeff;
557 mom_field[4] = (
mat(1,2).real() -
mat(2,1).real())*0.5*coeff;
558 mom_field[5] = (
mat(1,2).imag() +
mat(2,1).imag())*0.5*coeff;
560 const Real temp = (
mat(0,0).imag() +
mat(1,1).imag() +
mat(2,2).imag())*0.3333333333333333333;
561 mom_field[6] = (
mat(0,0).imag() - temp)*coeff;
562 mom_field[7] = (
mat(1,1).imag() - temp)*coeff;
563 mom_field[8] = (
mat(2,2).imag() - temp)*coeff;
579 void getCoordsFromHalfIndex(
int half_index,
int coord[4]);
580 void getCoordsFromFullIndex(
int full_index,
int coord[4]);
581 void cache(
int half_lattice_index);
587 int getFullFromHalfIndex(
int half_lattice_index);
588 int getNeighborFromFullIndex(
int full_lattice_index,
int dir,
int* err=NULL);
594 for(
int dir=0; dir<4; ++dir){
595 local_dim[dir] =
dim[dir];
596 volume *= local_dim[dir];
606 int E1 = local_dim[0]+4;
607 int E2 = local_dim[1]+4;
608 int E3 = local_dim[2]+4;
612 int z1 = half_lattice_index/
E1h;
613 int x1h = half_lattice_index - z1*
E1h;
615 coord[1] = z1 - z2*
E2;
617 coord[2] = z2 - coord[3]*
E3;
618 int x1odd = (coord[1] + coord[2] + coord[3] + oddBit) & 1;
619 coord[0] = 2*x1h + x1odd;
621 int half_dim_0 = local_dim[0]/2;
622 int z1 = half_lattice_index/half_dim_0;
623 int x1h = half_lattice_index - z1*half_dim_0;
624 int z2 = z1/local_dim[1];
625 coord[1] = z1 - z2*local_dim[1];
626 coord[3] = z2/local_dim[2];
627 coord[2] = z2 - coord[3]*local_dim[2];
628 int x1odd = (coord[1] + coord[2] + coord[3] + oddBit) & 1;
629 coord[0] = 2*x1h + x1odd;
638 int D1=local_dim[0]+4;
639 int D2=local_dim[1]+4;
640 int D3=local_dim[2]+4;
651 int z1 = full_lattice_index/D1;
652 coord[0] = full_lattice_index - z1*D1;
654 coord[1] = z1 - z2*D2;
656 coord[2] = z2 - coord[3]*D3;
666 half_index = half_lattice_index;
667 getCoordsFromHalfIndex(half_lattice_index, full_coord);
668 int x1odd = (full_coord[1] + full_coord[2] + full_coord[3] + oddBit) & 1;
669 full_index = 2*half_lattice_index + x1odd;
676 if(half_index != half_lattice_index) cache(half_lattice_index);
688 getCoordsFromFullIndex(full_lattice_index, coord);
690 int E1 = local_dim[0] + 4;
691 int E2 = local_dim[1] + 4;
692 int E3 = local_dim[2] + 4;
693 int E4 = local_dim[3] + 4;
696 neighbor_index = full_lattice_index + 1;
697 if(err && (coord[0] ==
E1-1) ) *err = 1;
700 neighbor_index = full_lattice_index +
E1;
701 if(err && (coord[1] ==
E2-1) ) *err = 1;
704 neighbor_index = full_lattice_index +
E2*
E1;
705 if(err && (coord[2] ==
E3-1) ) *err = 1;
708 neighbor_index = full_lattice_index +
E3*
E2*
E1;
709 if(err && (coord[3] ==
E4-1) ) *err = 1;
712 neighbor_index = full_lattice_index - 1;
713 if(err && (coord[0] == 0) ) *err = 1;
716 neighbor_index = full_lattice_index -
E1;
717 if(err && (coord[1] == 0) ) *err = 1;
720 neighbor_index = full_lattice_index -
E2*
E1;
721 if(err && (coord[2] == 0) ) *err = 1;
724 neighbor_index = full_lattice_index -
E3*
E2*
E1;
725 if(err && (coord[3] == 0) ) *err = 1;
728 errorQuda(
"Neighbor index could not be determined\n");
736 neighbor_index = (coord[0] == local_dim[0]-1) ? full_lattice_index + 1 - local_dim[0] : full_lattice_index + 1;
739 neighbor_index = (coord[1] == local_dim[1]-1) ? full_lattice_index + local_dim[0]*(1 - local_dim[1]) : full_lattice_index + local_dim[0];
742 neighbor_index = (coord[2] == local_dim[2]-1) ? full_lattice_index + local_dim[0]*local_dim[1]*(1 - local_dim[2]) : full_lattice_index + local_dim[0]*local_dim[1];
745 neighbor_index = (coord[3] == local_dim[3]-1) ? full_lattice_index + local_dim[0]*local_dim[1]*local_dim[2]*(1-local_dim[3]) : full_lattice_index + local_dim[0]*local_dim[1]*local_dim[2];
748 neighbor_index = (coord[0] == 0) ? full_lattice_index - 1 + local_dim[0] : full_lattice_index - 1;
751 neighbor_index = (coord[1] == 0) ? full_lattice_index - local_dim[0]*(1 - local_dim[1]) : full_lattice_index - local_dim[0];
754 neighbor_index = (coord[2] == 0) ? full_lattice_index - local_dim[0]*local_dim[1]*(1 - local_dim[2]) : full_lattice_index - local_dim[0]*local_dim[1];
757 neighbor_index = (coord[3] == 0) ? full_lattice_index - local_dim[0]*local_dim[1]*local_dim[2]*(1 - local_dim[3]) : full_lattice_index - local_dim[0]*local_dim[1]*local_dim[2];
760 errorQuda(
"Neighbor index could not be determined\n");
766 return neighbor_index;
776 template<
class Real,
int oddBit>
778 int half_lattice_index,
779 const Real*
const oprod,
789 int idx = half_lattice_index;
799 const Real*
const oprod,
804 for(
int dir=0; dir<4; ++dir) volume *=
dim[dir];
805 const int half_volume = volume/2;
807 for(
int site=0; site<half_volume; ++site){
808 computeOneLinkSite<Real,0>(
dim, site,
815 for(
int site=0; site<half_volume; ++site){
816 computeOneLinkSite<Real,1>(
dim, site,
833 template<
class Real,
int oddBit>
836 const Real*
const oprod,
837 const Real*
const Qprev,
838 const Real*
const link,
849 const bool sig_positive = (
GOES_FORWARDS(sig)) ?
true :
false;
853 int point_b, point_c, point_d;
854 int ad_link_nbr_idx, ab_link_nbr_idx, bc_link_nbr_idx;
859 point_d = new_mem_idx >> 1;
862 point_c = new_mem_idx >> 1;
865 point_b = new_mem_idx >> 1;
867 ad_link_nbr_idx = (mu_positive) ? point_d : half_lattice_index;
868 bc_link_nbr_idx = (mu_positive) ? point_c : point_b;
869 ab_link_nbr_idx = (sig_positive) ? half_lattice_index : point_b;
895 colorMatY =
conj(colorMatY);
901 colorMatW = (!mu_positive) ? bc_link*colorMatY :
conj(bc_link)*colorMatY;
904 colorMatY = (sig_positive) ? ab_link*colorMatW :
conj(ab_link)*colorMatW;
911 ad_link =
conj(ad_link);
916 if(sig_positive) colorMatY = colorMatW*ad_link;
919 if(
Qmu || sig_positive){
921 colorMatX= colorMatY*ad_link;
924 if(sig_positive) colorMatY = colorMatW*colorMatX;
927 if(sig_positive) ls.
addMatrixToField(colorMatY, oddBit, sig, half_lattice_index, coeff, newOprod);
935 const Real*
const oprod,
936 const Real*
const Qprev,
937 const Real*
const link,
948 for(
int dir=0; dir<4; ++dir) volume *=
dim[dir];
950 const int loop_count =
Vh_ex;
952 const int loop_count = volume/2;
958 for(
int site=0; site<loop_count; ++site){
959 computeMiddleLinkSite<Real, 0>(site,
dim,
966 for(
int site=0; site<loop_count; ++site){
967 computeMiddleLinkSite<Real,1>(site,
dim,
979 template<
class Real,
int oddBit>
982 const Real*
const P3,
983 const Real*
const Qprod,
984 const Real*
const link,
986 Real coeff, Real accumu_coeff,
994 const bool sig_positive = (
GOES_FORWARDS(sig)) ?
true :
false;
1003 point_d = new_mem_idx >> 1;
1004 ad_link_nbr_idx = (mu_positive) ? point_d : half_lattice_index;
1014 ad_link_nbr_idx = point_d;
1017 ad_link_nbr_idx = half_lattice_index;
1020 colorMatW = (mu_positive) ? ad_link*colorMatY :
conj(ad_link)*colorMatY;
1025 Real mycoeff = ( (sig_positive && oddBit) || (!sig_positive && !oddBit) ) ? coeff : -coeff;
1030 colorMatW = colorMatY*colorMatX;
1031 if(!oddBit){ mycoeff = -mycoeff; }
1034 colorMatW =
conj(colorMatX)*
conj(colorMatY);
1035 if(oddBit){ mycoeff = -mycoeff; }
1042 if(!oddBit){ mycoeff = -mycoeff; }
1045 if(oddBit){ mycoeff = -mycoeff; }
1046 colorMatW =
conj(colorMatY);
1056 template<
class Real>
1058 const Real*
const P3,
1059 const Real*
const Qprod,
1060 const Real*
const link,
1062 Real coeff, Real accumu_coeff,
1064 Real*
const newOprod
1069 for(
int dir=0; dir<4; ++dir) volume *=
dim[dir];
1071 const int loop_count =
Vh_ex;
1073 const int loop_count = volume/2;
1077 for(
int site=0; site<loop_count; ++site){
1078 computeSideLinkSite<Real,0>(site,
dim,
1081 coeff, accumu_coeff,
1082 ls, shortP, newOprod);
1085 for(
int site=0; site<loop_count; ++site){
1086 computeSideLinkSite<Real,1>(site,
dim,
1089 coeff, accumu_coeff,
1090 ls, shortP, newOprod);
1100 template<
class Real,
int oddBit>
1103 const Real*
const oprod,
1104 const Real*
const Qprev,
1105 const Real*
const link,
1107 Real coeff, Real accumu_coeff,
1110 Real*
const newOprod)
1114 const bool sig_positive = (
GOES_FORWARDS(sig)) ?
true :
false;
1120 int ab_link_nbr_idx, point_b, point_c, point_d;
1127 point_d = new_mem_idx >> 1;
1130 point_c = new_mem_idx >> 1;
1133 point_b = new_mem_idx >> 1;
1134 ab_link_nbr_idx = (sig_positive) ? half_lattice_index : point_b;
1138 Real mycoeff = ( (sig_positive && oddBit) || (!sig_positive && !oddBit) ) ? coeff : -coeff;
1146 colorMatZ =
conj(bc_link)*colorMatY;
1150 colorMatY = colorMatX*ad_link;
1151 colorMatW = colorMatZ*colorMatY;
1160 colorMatY = (sig_positive) ? ab_link*colorMatZ :
conj(ab_link)*colorMatZ;
1161 colorMatW = colorMatY*colorMatX;
1163 colorMatW = ad_link*colorMatY;
1172 if(sig_positive) colorMatW = colorMatX*
conj(ad_link);
1173 colorMatZ = bc_link*colorMatY;
1175 colorMatY = colorMatZ*colorMatW;
1185 colorMatY = (sig_positive) ? ab_link*colorMatZ :
conj(ab_link)*colorMatZ;
1186 colorMatW =
conj(colorMatX)*
conj(colorMatY);
1189 colorMatW =
conj(ad_link)*colorMatY;
1197 template<
class Real>
1199 const Real*
const oprod,
1200 const Real*
const Qprev,
1201 const Real*
const link,
1203 Real coeff, Real accumu_coeff,
1205 Real*
const newOprod)
1208 for(
int dir=0; dir<4; ++dir) volume *=
dim[dir];
1210 const int loop_count =
Vh_ex;
1212 const int loop_count = volume/2;
1216 for(
int site=0; site<loop_count; ++site){
1218 computeAllLinkSite<Real,0>(site,
dim,
1221 coeff, accumu_coeff,
1226 for(
int site=0; site<loop_count; ++site){
1227 computeAllLinkSite<Real, 1>(site,
dim,
1230 coeff, accumu_coeff,
1238 #define Pmu tempmat[0]
1239 #define P3 tempmat[1]
1240 #define P5 tempmat[2]
1241 #define Pnumu tempmat[3]
1242 #define Qmu tempmat[4]
1243 #define Qnumu tempmat[5]
1245 template<
class Real>
1257 template<
class Real>
1265 Real OneLink, ThreeSt, FiveSt, SevenSt, Lepage, coeff;
1267 OneLink = staple_coeff.
one;
1268 ThreeSt = staple_coeff.
three;
1269 FiveSt = staple_coeff.
five;
1270 SevenSt = staple_coeff.
seven;
1271 Lepage = staple_coeff.
lepage;
1273 for(
int sig=0; sig<4; ++sig){
1282 for(
int sig=0; sig<8; ++sig){
1283 for(
int mu=0;
mu<8; ++
mu){
1286 computeMiddleLinkField<Real>(
dim,
1292 for(
int nu=0; nu<8; ++nu){
1294 || nu==sig || nu==
OPP_DIR(sig) )
continue;
1296 computeMiddleLinkField<Real>(
dim,
1298 sig, nu, staple_coeff.
five,
1303 for(
int rho=0; rho<8; ++rho){
1304 if( rho == sig || rho ==
OPP_DIR(sig)
1306 || rho == nu || rho ==
OPP_DIR(nu) )
1311 if(FiveSt != 0)coeff = SevenSt/FiveSt;
else coeff = 0;
1312 computeAllLinkField<Real>(
dim,
1314 sig, rho, staple_coeff.
seven, coeff,
1320 if(ThreeSt != 0)coeff = FiveSt/ThreeSt;
else coeff = 0;
1321 computeSideLinkField<Real>(
dim,
1323 sig, nu, -FiveSt, coeff,
1331 if(staple_coeff.
lepage != 0.){
1332 computeMiddleLinkField<Real>(
dim,
1338 if(ThreeSt != 0)coeff = Lepage/ThreeSt;
else coeff = 0;
1339 computeSideLinkField<Real>(
dim,
1341 sig,
mu, -Lepage, coeff,
1346 computeSideLinkField<Real>(
dim,
1348 sig,
mu, ThreeSt, 0.,
1372 for(
int dir=0; dir<4; ++dir) volume *=
param.
X[dir];
1382 for(
int i=0; i<6; ++i) tempmat[i] = malloc(len*18*
sizeof(
double));
1384 for(
int i=0; i<6; ++i) tempmat[i] = malloc(len*18*
sizeof(
float));
1388 act_path_coeff.
one = path_coeff[0];
1389 act_path_coeff.
naik = path_coeff[1];
1390 act_path_coeff.
three = path_coeff[2];
1391 act_path_coeff.
five = path_coeff[3];
1392 act_path_coeff.
seven = path_coeff[4];
1393 act_path_coeff.
lepage = path_coeff[5];
1396 doHisqStaplesForceCPU<double>(
param.
X,
1405 doHisqStaplesForceCPU<float>(
param.
X,
1416 for(
int i=0; i<6; ++i){
1424 template<
class Real,
int oddBit>
1427 const Real*
const oprod,
1428 const Real*
const link,
1429 int sig, Real coeff,
1440 int point_a, point_b, point_c, point_d, point_e;
1444 int idx = half_lattice_index;
1451 point_d = new_mem_idx >> 1;
1454 point_e = new_mem_idx >> 1;
1457 point_b = new_mem_idx >> 1;
1460 point_a = new_mem_idx >> 1;
1471 colorMatV = de_link*ef_link*colorMatZ
1472 - de_link*colorMatY*bc_link
1473 + colorMatX*ab_link*bc_link;
1480 template<
class Real>
1482 const Real*
const oprod,
1483 const Real*
const link,
1484 int sig, Real coeff,
1488 for(
int dir=0; dir<4; ++dir) volume *=
dim[dir];
1489 const int half_volume = volume/2;
1492 for(
int site=0; site<half_volume; ++site){
1493 computeLongLinkSite<Real,0>(site,
1502 for(
int site=0; site<half_volume; ++site){
1503 computeLongLinkSite<Real,1>(site,
1520 for(
int sig=0; sig<4; ++sig){
1522 computeLongLinkField<float>(
param.
X,
1528 computeLongLinkField<double>(
param.
X,
1532 (
double*)newOprod->
Gauge_p());
1541 template<
class Real,
int oddBit>
1544 const Real*
const oprod,
1545 const Real*
const link,
1555 int idx = half_lattice_index_ex;
1557 int idx = half_lattice_index;
1562 const Real coeff = (oddBit) ? -1 : 1;
1563 colorMatY = linkW*colorMatX;
1569 template <
class Real>
1571 const Real*
const oprod,
1572 const Real*
const link,
1577 const int half_volume = volume/2;
1581 for(
int site=0; site<half_volume; ++site){
1582 completeForceSite<Real,0>(site,
1590 for(
int site=0; site<half_volume; ++site){
1591 completeForceSite<Real,1>(site,
1607 for(
int sig=0; sig<4; ++sig){
1609 completeForceField<float>(
param.
X,
1615 completeForceField<double>(
param.
X,
void storeMatrixToMomentumField(const Matrix< 3, std::complex< Real > > &mat, int oddBit, int dir, int half_lattice_index, Real coeff, Real *const) const
Real getData(const Real *const field, int idx, int dir, int oddBit, int offset, int hfv) const
void loadMatrixFromField(const Real *const field, int oddBit, int half_lattice_index, Matrix< 3, std::complex< Real > > *const mat) const
int half_idx_conversion_ex2normal(int half_lattice_index, const int *dim, int oddBit) const
int half_idx_conversion_normal2ex(int half_lattice_index, const int *dim, int oddBit) const
void storeMatrixToField(const Matrix< 3, std::complex< Real > > &mat, int oddBit, int half_lattice_index, Real *const field) const
void addData(Real *const field, int idx, int dir, int oddBit, int offset, Real, int hfv) const
void addMatrixToField(const Matrix< 3, std::complex< Real > > &mat, int oddBit, int half_lattice_index, Real coeff, Real *const) const
T & operator()(int i, int j)
Matrix & operator-=(const Matrix< N, T > &mat)
Matrix & operator+=(const Matrix< N, T > &mat)
const T & operator()(int i, int j) const
__device__ __host__ Matrix()
__device__ __host__ T const & operator()(int i, int j) const
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
void completeForceField(const int dim[4], const Real *const oprod, const Real *const link, int sig, Real *const mom)
void computeLongLinkField(const int dim[4], const Real *const oprod, const Real *const link, int sig, Real coeff, Real *const output)
void computeAllLinkField(const int dim[4], const Real *const oprod, const Real *const Qprev, const Real *const link, int sig, int mu, Real coeff, Real accumu_coeff, Real *const shortP, Real *const newOprod)
void computeAllLinkSite(int half_lattice_index, const int dim[4], const Real *const oprod, const Real *const Qprev, const Real *const link, int sig, int mu, Real coeff, Real accumu_coeff, const LoadStore< Real > &ls, Real *const shortP, Real *const newOprod)
void completeForceSite(int half_lattice_index, const int dim[4], const Real *const oprod, const Real *const link, int sig, const LoadStore< Real > &ls, Real *const mom)
void computeSideLinkSite(int half_lattice_index, const int dim[4], const Real *const P3, const Real *const Qprod, const Real *const link, int sig, int mu, Real coeff, Real accumu_coeff, const LoadStore< Real > &ls, Real *const shortP, Real *const newOprod)
void doHisqStaplesForceCPU(const int dim[4], PathCoefficients< double > staple_coeff, Real *oprod, Real *link, Real **tempmat, Real *newOprod)
void computeLongLinkSite(int half_lattice_index, const int dim[4], const Real *const oprod, const Real *const link, int sig, Real coeff, const LoadStore< Real > &ls, Real *const output)
void hisqStaplesForceCPU(const double *path_coeff, const QudaGaugeParam ¶m, cpuGaugeField &oprod, cpuGaugeField &link, cpuGaugeField *newOprod)
void computeMiddleLinkField(const int dim[4], const Real *const oprod, const Real *const Qprev, const Real *const link, int sig, int mu, Real coeff, Real *const Pmu, Real *const P3, Real *const Qmu, Real *const newOprod)
Matrix< N, T > transpose(const Matrix< N, std::complex< T > > &mat)
void computeMiddleLinkSite(int half_lattice_index, const int dim[4], const Real *const oprod, const Real *const Qprev, const Real *const link, int sig, int mu, Real coeff, const LoadStore< Real > &ls, Real *const Pmu, Real *const P3, Real *const Qmu, Real *const newOprod)
void computeOneLinkField(const int dim[4], const Real *const oprod, int sig, Real coeff, Real *const output)
void computeSideLinkField(const int dim[4], const Real *const P3, const Real *const Qprod, const Real *const link, int sig, int mu, Real coeff, Real accumu_coeff, Real *const shortP, Real *const newOprod)
void hisqCompleteForceCPU(const QudaGaugeParam ¶m, cpuGaugeField &oprod, cpuGaugeField &link, cpuGaugeField *mom)
Matrix< N, std::complex< T > > conj(const Matrix< N, std::complex< T > > &mat)
void hisqLongLinkForceCPU(double coeff, const QudaGaugeParam ¶m, cpuGaugeField &oprod, cpuGaugeField &link, cpuGaugeField *newOprod)
void computeOneLinkSite(const int dim[4], int half_lattice_index, const Real *const oprod, int sig, Real coeff, const LoadStore< Real > &ls, Real *const output)
#define GOES_FORWARDS(dir)
__host__ __device__ ValueType conj(ValueType x)
__host__ __device__ float4 operator+=(float4 &x, const float4 &y)
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator*(const S &a, const ColorSpinor< Float, Nc, Ns > &x)
Compute the scalar-vector product y = a * x.
__host__ __device__ float4 operator-=(float4 &x, const float4 &y)
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator+(const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
ColorSpinor addition operator.
__device__ __host__ ColorSpinor< Float, Nc, Ns > operator-(const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y)
ColorSpinor subtraction operator.
std::ostream & operator<<(std::ostream &output, const CloverFieldParam ¶m)
Main header file for the QUDA library.
Matrix< 3, std::complex< Real > > Type
Matrix< N, T > operator()() const
int getNeighborFromFullIndex(int full_lattice_index, int dir, int *err=NULL)
Locator(const int dim[4])
int getFullFromHalfIndex(int half_lattice_index)
Matrix< N, T > operator()() const