53 char *grid_size_env = getenv(
"QUDA_TEST_GRID_SIZE");
55 std::stringstream grid_list(grid_size_env);
59 while (grid_list >> dim) {
60 if (i >= 4)
errorQuda(
"Unexpected grid size array length");
62 if (grid_list.peek() ==
',') grid_list.ignore();
71 for (
int i = 1; i < 4; i++) {
80 for (
int i = 2; i >= 0; i--) {
88 void initComms(
int argc,
char **argv,
int *
const commDims)
92 #if defined(QMP_COMMS) 93 QMP_thread_level_t tl;
94 QMP_init_msg_passing(&argc, &argv, QMP_THREAD_SINGLE, &tl);
98 int map[] = {3, 2, 1, 0};
99 QMP_declare_logical_topology_map(commDims, 4, map, 4);
101 int map[] = { 0, 1, 2, 3 };
102 QMP_declare_logical_topology_map(commDims, 4, map, 4);
104 #elif defined(MPI_COMMS) 113 printfQuda(
"Rank order is %s major (%s running fastest)\n",
130 #if defined(QMP_COMMS) 131 QMP_finalize_msg_passing();
132 #elif defined(MPI_COMMS) 142 #if defined(QMP_COMMS) 143 rank = QMP_get_node_number();
144 #elif defined(MPI_COMMS) 145 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
148 srand(17*rank + 137);
153 for (
int d=0; d< 4; d++) {
158 for (
int i=0; i<4; i++) {
165 Vs_x = X[1]*X[2]*X[3];
166 Vs_y = X[0]*X[2]*X[3];
167 Vs_z = X[0]*X[1]*X[3];
168 Vs_t = X[0]*X[1]*X[2];
176 E1=X[0]+4;
E2=X[1]+4;
E3=X[2]+4;
E4=X[3]+4;
190 for (
int d = 0; d < 4; d++) {
195 for (
int i=0; i<4; i++) {
217 template <
typename Float>
219 printfQuda(
"{(%f %f) (%f %f) (%f %f)}\n", v[0], v[1], v[2], v[3], v[4], v[5]);
248 int x4 = Y/(
Z[2]*
Z[1]*
Z[0]);
249 int x3 = (Y/(
Z[1]*
Z[0])) %
Z[2];
250 int x2 = (Y/
Z[0]) %
Z[1];
252 return (x4+x3+x2+x1) % 2;
256 template <
typename Float>
263 template <
typename Float>
265 a[0] = b[0]*c[0] - b[1]*c[1];
266 a[1] = b[0]*c[1] + b[1]*c[0];
270 template <
typename Float>
272 a[0] = b[0]*c[0] - b[1]*c[1];
273 a[1] = -b[0]*c[1] - b[1]*c[0];
277 template <
typename Float>
279 a[0] = b[0]*c[0] + b[1]*c[1];
280 a[1] = b[0]*c[1] - b[1]*c[0];
284 template <
typename Float>
286 a[0] += sign*(b[0]*c[0] - b[1]*c[1]);
287 a[1] += sign*(b[0]*c[1] + b[1]*c[0]);
291 template <
typename Float>
293 a[0] += b[0]*c[0] + b[1]*c[1];
294 a[1] += b[0]*c[1] - b[1]*c[0];
297 template <
typename Float>
299 a[0] += sign * (b[0]*c[0] - b[1]*c[1]);
300 a[1] -= sign * (b[0]*c[1] + b[1]*c[0]);
303 template <
typename Float>
315 template <
typename Float>
317 mat[0] =
atan2(mat[1], mat[0]);
318 mat[1] =
atan2(mat[13], mat[12]);
319 for (
int i=8; i<18; i++) mat[i] = 0.0;
336 template <
typename Float>
338 Float *u = &mat[0*(3*2)];
339 Float *v = &mat[1*(3*2)];
340 Float *w = &mat[2*(3*2)];
341 w[0] = 0.0; w[1] = 0.0; w[2] = 0.0; w[3] = 0.0; w[4] = 0.0; w[5] = 0.0;
350 w[0]*=u0; w[1]*=u0; w[2]*=u0; w[3]*=u0; w[4]*=u0; w[5]*=u0;
353 template <
typename Float>
357 row_sum += mat[2]*mat[2];
358 row_sum += mat[3]*mat[3];
359 row_sum += mat[4]*mat[4];
360 row_sum += mat[5]*mat[5];
363 Float U00_mag =
sqrt(1.f/(u0*u0) - row_sum);
368 mat[0] = U00_mag *
cos(mat[14]);
369 mat[1] = U00_mag *
sin(mat[14]);
371 Float column_sum = 0.0;
372 for (
int i=0; i<2; i++) column_sum += mat[i]*mat[i];
373 for (
int i=6; i<8; i++) column_sum += mat[i]*mat[i];
374 Float U20_mag =
sqrt(1.f/(u0*u0) - column_sum);
376 mat[12] = U20_mag *
cos(mat[15]);
377 mat[13] = U20_mag *
sin(mat[15]);
382 Float r_inv2 = 1.0/(u0*row_sum);
422 template <
typename Float>
424 for (
int i = 0; i < len; i++) {
425 double diff = fabs(a[i] - b[i]);
426 if (diff > epsilon) {
427 printfQuda(
"ERROR: i=%d, a[%d]=%f, b[%d]=%f\n", i, i, a[i], i, b[i]);
436 else return compareFloats((
float*)a, (
float*)b, len, epsilon);
441 int za = index/(dim[0]>>1);
443 int x2 = za - zb*dim[1];
445 int x3 = zb - x4*dim[2];
447 return 2*index + ((x2 + x3 + x4 + oddBit) & 1);
471 int x1odd = (x2 + x3 + x4 + oddBit) & 1;
473 int X = 2 * sid + x1odd;
490 int x4 = Y/(
Z[2]*
Z[1]*
Z[0]);
491 int x3 = (Y/(
Z[1]*
Z[0])) %
Z[2];
492 int x2 = (Y/
Z[0]) %
Z[1];
497 x4 = (x4+dx4+
Z[3]) %
Z[3];
498 x3 = (x3+dx3+
Z[2]) %
Z[2];
499 x2 = (x2+dx2+
Z[1]) %
Z[1];
500 x1 = (x1+dx1+
Z[0]) %
Z[0];
502 return (x4*(
Z[2]*
Z[1]*
Z[0]) + x3*(Z[1]*Z[0]) + x2*(Z[0]) + x1) / 2;
511 x[3] = fullIndex/(dim[2]*dim[1]*dim[0]);
512 x[2] = (fullIndex/(dim[1]*dim[0])) % dim[2];
513 x[1] = (fullIndex/dim[0]) % dim[1];
514 x[0] = fullIndex % dim[0];
516 for(
int dir=0; dir<4; ++dir)
517 x[dir] = (x[dir]+dx[dir]+dim[dir]) % dim[dir];
519 return (((x[3]*dim[2] + x[2])*dim[1] + x[1])*dim[0] + x[0])/2;
528 int x4 = Y/(
Z[2]*
Z[1]*
Z[0]);
529 int x3 = (Y/(
Z[1]*
Z[0])) %
Z[2];
530 int x2 = (Y/
Z[0]) %
Z[1];
533 int ghost_x4 = x4+ dx4;
537 x4 = (x4+dx4+
Z[3]) %
Z[3];
538 x3 = (x3+dx3+
Z[2]) %
Z[2];
539 x2 = (x2+dx2+
Z[1]) %
Z[1];
540 x1 = (x1+dx1+
Z[0]) %
Z[0];
543 ret = (x4*(
Z[2]*
Z[1]*
Z[0]) + x3*(
Z[1]*
Z[0]) + x2*(Z[0]) + x1) / 2;
545 ret = (x3 * (
Z[1] *
Z[0]) + x2 * (
Z[0]) + x1) / 2;
565 int nbr_half_idx =
neighborIndex(half_idx, oddBit, dx4,dx3,dx2,dx1);
566 int oddBitChanged = (dx4+dx3+dx2+dx1)%2;
570 int ret = nbr_half_idx;
572 ret =
Vh + nbr_half_idx;
581 const int volume = dim[0]*dim[1]*dim[2]*dim[3];
582 const int halfVolume = volume/2;
584 int halfIndex =
index;
586 if(index >= halfVolume){
588 halfIndex = index - halfVolume;
591 int neighborHalfIndex =
neighborIndex(dim, halfIndex, oddBit, dx);
593 int oddBitChanged = (dx[0]+dx[1]+dx[2]+dx[3])%2;
598 return neighborHalfIndex + oddBit*halfVolume;
612 int x4 = Y/(
Z[2]*
Z[1]*
Z[0]);
613 int x3 = (Y/(
Z[1]*
Z[0])) %
Z[2];
614 int x2 = (Y/
Z[0]) %
Z[1];
616 int ghost_x4 = x4+ dx4;
618 x4 = (x4+dx4+
Z[3]) %
Z[3];
619 x3 = (x3+dx3+
Z[2]) %
Z[2];
620 x2 = (x2+dx2+
Z[1]) %
Z[1];
621 x1 = (x1+dx1+
Z[0]) %
Z[0];
623 if ( ghost_x4 >= 0 && ghost_x4 <
Z[3]){
624 ret = (x4*(
Z[2]*
Z[1]*
Z[0]) + x3*(
Z[1]*
Z[0]) + x2*(Z[0]) + x1) / 2;
626 ret = (x3 * (
Z[1] *
Z[0]) + x2 * (
Z[0]) + x1) / 2;
630 int oddBitChanged = (dx4+dx3+dx2+dx1)%2;
649 if (i >=
Vh || i < 0) {printf(
"i out of range in fullLatticeIndex_4d"); exit(-1);}
668 int x1odd = (x2 + x3 + x4 + oddBit) & 1;
670 int X = 2 * sid + x1odd;
683 int boundaryCrossings = i/(
Z[0]/2) + i/(
Z[1]*
Z[0]/2) + i/(
Z[2]*
Z[1]*
Z[0]/2) + i/(
Z[3]*
Z[2]*
Z[1]*
Z[0]/2);
684 return 2*i + (boundaryCrossings + oddBit) % 2;
688 int boundaryCrossings = i/(
Z[0]/2) + i/(
Z[1]*
Z[0]/2) + i/(
Z[2]*
Z[1]*
Z[0]/2);
689 return 2*i + (boundaryCrossings + oddBit) % 2;
702 int x4 = Y/(
Z[2]*
Z[1]*
Z[0]);
707 template <
typename Float>
710 for (
int d = 0; d < 3; d++) {
718 for (
int j = (
Z[0]/2)*
Z[1]*
Z[2]*(
Z[3]-1); j <
Vh; j++) {
720 gauge[3][j*gaugeSiteSize+i] *= -1.0;
721 gauge[3][(Vh+j)*gaugeSiteSize+i] *= -1.0;
731 Float *even = gauge[dir];
733 for (
int i = 0; i< iMax; i++) {
734 for (
int m = 0; m < 3; m++) {
735 for (
int n = 0; n < 3; n++) {
736 even[i*(3*3*2) + m*(3*2) + n*(2) + 0] = (m==n) ? 1 : 0;
737 even[i*(3*3*2) + m*(3*2) + n*(2) + 1] = 0.0;
738 odd [i*(3*3*2) + m*(3*2) + n*(2) + 0] = (m==n) ? 1 : 0;
739 odd [i*(3*3*2) + m*(3*2) + n*(2) + 1] = 0.0;
746 template <
typename Float>
749 int X1h=param->
X[0]/2;
757 for(
int d=0; d<4; d++){
765 for (
int d = 0; d < 3; d++) {
768 for (
int i = 0; i <
Vh; i++) {
771 int i4 = index /(X3*X2*
X1);
772 int i3 = (index - i4*(X3*X2*
X1))/(X2*
X1);
773 int i2 = (index - i4*(X3*X2*
X1) - i3*(X2*X1))/X1;
774 int i1 = index - i4*(X3*X2*
X1) - i3*(X2*X1) - i2*
X1;
784 if ((i4+i1) % 2 == 1){
789 if ( (i4+i1+i2) % 2 == 1){
794 for (
int j=0; j < 18; j++) {
799 for (
int i = 0; i <
Vh; i++) {
801 int i4 = index /(X3*X2*
X1);
802 int i3 = (index - i4*(X3*X2*
X1))/(X2*
X1);
803 int i2 = (index - i4*(X3*X2*
X1) - i3*(X2*X1))/X1;
804 int i1 = index - i4*(X3*X2*
X1) - i3*(X2*X1) - i2*
X1;
814 if ((i4+i1) % 2 == 1){
819 if ( (i4+i1+i2) % 2 == 1){
824 for (
int j=0; j<18; j++){
833 for (
int j = 0; j <
Vh; j++) {
836 if (j >= (X4-3)*X1h*X2*
X3 ){
840 if (j >= (X4-1)*X1h*X2*
X3 ){
845 for (
int i=0; i<18; i++) {
860 errorQuda(
"Invalid type %d for applyGaugeFieldScaling_long\n", local_prec);
864 template <
typename Float>
866 Float *resOdd[4], *resEven[4];
867 for (
int dir = 0; dir < 4; dir++) {
868 resEven[dir] = res[dir];
872 for (
int dir = 0; dir < 4; dir++) {
873 for (
int i = 0; i <
Vh; i++) {
874 for (
int m = 0; m < 3; m++) {
875 for (
int n = 0; n < 3; n++) {
876 resEven[dir][i*(3*3*2) + m*(3*2) + n*(2) + 0] = (m==n) ? 1 : 0;
877 resEven[dir][i*(3*3*2) + m*(3*2) + n*(2) + 1] = 0.0;
878 resOdd[dir][i*(3*3*2) + m*(3*2) + n*(2) + 0] = (m==n) ? 1 : 0;
879 resOdd[dir][i*(3*3*2) + m*(3*2) + n*(2) + 1] = 0.0;
889 template <
typename Float>
892 for (
int i=0; i<len; i++) sum +=
norm(a[i]);
893 for (
int i=0; i<len; i++) a[i] /=
sqrt(sum);
897 template <
typename Float>
899 complex<double>
dot = 0.0;
900 for (
int i=0; i<len; i++) dot +=
conj(a[i])*b[i];
901 for (
int i=0; i<len; i++) b[i] -= (complex<Float>)dot*a[i];
904 template <
typename Float>
907 Float *resOdd[4], *resEven[4];
908 for (
int dir = 0; dir < 4; dir++) {
909 resEven[dir] = res[dir];
913 for (
int dir = 0; dir < 4; dir++) {
914 for (
int i = 0; i <
Vh; i++) {
915 for (
int m = 1; m < 3; m++) {
916 for (
int n = 0; n < 3; n++) {
917 resEven[dir][i*(3*3*2) + m*(3*2) + n*(2) + 0] = rand() / (Float)RAND_MAX;
918 resEven[dir][i*(3*3*2) + m*(3*2) + n*(2) + 1] = rand() / (Float)RAND_MAX;
919 resOdd[dir][i*(3*3*2) + m*(3*2) + n*(2) + 0] = rand() / (Float)RAND_MAX;
920 resOdd[dir][i * (3 * 3 * 2) + m * (3 * 2) + n * (2) + 1] = rand() / (Float)RAND_MAX;
923 normalize((complex<Float>*)(resEven[dir] + (i*3+1)*3*2), 3);
924 orthogonalize((complex<Float>*)(resEven[dir] + (i*3+1)*3*2), (complex<Float>*)(resEven[dir] + (i*3+2)*3*2), 3);
925 normalize((complex<Float>*)(resEven[dir] + (i*3 + 2)*3*2), 3);
927 normalize((complex<Float>*)(resOdd[dir] + (i*3+1)*3*2), 3);
928 orthogonalize((complex<Float>*)(resOdd[dir] + (i*3+1)*3*2), (complex<Float>*)(resOdd[dir] + (i*3+2)*3*2), 3);
929 normalize((complex<Float>*)(resOdd[dir] + (i*3 + 2)*3*2), 3);
932 Float *w = resEven[dir]+(i*3+0)*3*2;
933 Float *u = resEven[dir]+(i*3+1)*3*2;
934 Float *v = resEven[dir]+(i*3+2)*3*2;
936 for (
int n = 0; n < 6; n++) w[n] = 0.0;
946 Float *w = resOdd[dir]+(i*3+0)*3*2;
947 Float *u = resOdd[dir]+(i*3+1)*3*2;
948 Float *v = resOdd[dir]+(i*3+2)*3*2;
950 for (
int n = 0; n < 6; n++) w[n] = 0.0;
967 for (
int dir = 0; dir < 4; dir++) {
968 for (
int i = 0; i <
Vh; i++) {
969 for (
int m = 0; m < 3; m++) {
970 for (
int n = 0; n < 3; n++) {
971 resEven[dir][i*(3*3*2) + m*(3*2) + n*(2) + 0] =1.0* rand() / (Float)RAND_MAX;
972 resEven[dir][i*(3*3*2) + m*(3*2) + n*(2) + 1] = 2.0* rand() / (Float)RAND_MAX;
973 resOdd[dir][i*(3*3*2) + m*(3*2) + n*(2) + 0] = 3.0*rand() / (Float)RAND_MAX;
974 resOdd[dir][i*(3*3*2) + m*(3*2) + n*(2) + 1] = 4.0*rand() / (Float)RAND_MAX;
984 Float *resOdd[4], *resEven[4];
985 for (
int dir = 0; dir < 4; dir++) {
986 resEven[dir] = res[dir];
990 for (
int dir = 0; dir < 4; dir++) {
991 for (
int i = 0; i <
Vh; i++) {
992 for (
int m = 1; m < 3; m++) {
993 for (
int n = 0; n < 3; n++) {
994 resEven[dir][i*(3*3*2) + m*(3*2) + n*(2) + 0] = rand() / (Float)RAND_MAX;
995 resEven[dir][i*(3*3*2) + m*(3*2) + n*(2) + 1] = rand() / (Float)RAND_MAX;
996 resOdd[dir][i*(3*3*2) + m*(3*2) + n*(2) + 0] = rand() / (Float)RAND_MAX;
997 resOdd[dir][i * (3 * 3 * 2) + m * (3 * 2) + n * (2) + 1] = rand() / (Float)RAND_MAX;
1000 normalize((complex<Float>*)(resEven[dir] + (i*3+1)*3*2), 3);
1001 orthogonalize((complex<Float>*)(resEven[dir] + (i*3+1)*3*2), (complex<Float>*)(resEven[dir] + (i*3+2)*3*2), 3);
1002 normalize((complex<Float>*)(resEven[dir] + (i*3 + 2)*3*2), 3);
1004 normalize((complex<Float>*)(resOdd[dir] + (i*3+1)*3*2), 3);
1005 orthogonalize((complex<Float>*)(resOdd[dir] + (i*3+1)*3*2), (complex<Float>*)(resOdd[dir] + (i*3+2)*3*2), 3);
1006 normalize((complex<Float>*)(resOdd[dir] + (i*3 + 2)*3*2), 3);
1009 Float *w = resEven[dir]+(i*3+0)*3*2;
1010 Float *u = resEven[dir]+(i*3+1)*3*2;
1011 Float *v = resEven[dir]+(i*3+2)*3*2;
1013 for (
int n = 0; n < 6; n++) w[n] = 0.0;
1023 Float *w = resOdd[dir]+(i*3+0)*3*2;
1024 Float *u = resOdd[dir]+(i*3+1)*3*2;
1025 Float *v = resOdd[dir]+(i*3+2)*3*2;
1027 for (
int n = 0; n < 6; n++) w[n] = 0.0;
1051 }
else if (type == 1) {
1099 const double phase = (M_PI * rand()) / RAND_MAX;
1100 const complex<double> z =
polar(1.0, phase);
1101 for (
int dir = 0; dir < 4; ++dir) {
1102 for (
int i = 0; i <
V; ++i) {
1105 complex<double> *l = (complex<double> *)(&(((
double *)longlink[dir])[i * gaugeSiteSize + j]));
1108 complex<float> *l = (complex<float> *)(&(((
float *)longlink[dir])[i * gaugeSiteSize + j]));
1116 if (type == 3)
return;
1120 for (
int dir = 0; dir < 4; ++dir) {
1121 for (
int i = 0; i <
V; ++i) {
1124 ((
double *)longlink[dir])[i * gaugeSiteSize + j] = 0.0;
1125 ((
double *)longlink[dir])[i * gaugeSiteSize + j + 1] = 0.0;
1127 ((
float *)longlink[dir])[i * gaugeSiteSize + j] = 0.0;
1128 ((
float *)longlink[dir])[i * gaugeSiteSize + j + 1] = 0.0;
1137 template <
typename Float>
1140 Float c = 2.0 * norm / RAND_MAX;
1142 for(
int i = 0; i <
V; i++) {
1143 for (
int j = 0; j < 72; j++) {
1144 res[i*72 + j] = c*rand() -
norm;
1148 for (
int ch=0; ch<2; ch++) {
1149 res[i*72 + 3 + 36*ch] = -res[i*72 + 0 + 36*ch];
1150 res[i*72 + 4 + 36*ch] = -res[i*72 + 1 + 36*ch];
1151 res[i*72 + 5 + 36*ch] = -res[i*72 + 2 + 36*ch];
1152 res[i*72 + 30 + 36*ch] = -res[i*72 + 6 + 36*ch];
1153 res[i*72 + 31 + 36*ch] = -res[i*72 + 7 + 36*ch];
1154 res[i*72 + 32 + 36*ch] = -res[i*72 + 8 + 36*ch];
1155 res[i*72 + 33 + 36*ch] = -res[i*72 + 9 + 36*ch];
1156 res[i*72 + 34 + 36*ch] = -res[i*72 + 16 + 36*ch];
1157 res[i*72 + 35 + 36*ch] = -res[i*72 + 17 + 36*ch];
1160 for (
int j = 0; j<6; j++) {
1161 res[i*72 + j] += diag;
1162 res[i*72 + j+36] += diag;
1185 template <
typename Float>
1188 const int fail_check = 17;
1189 int fail[4][fail_check];
1191 for (
int d=0; d<4; d++)
for (
int i=0; i<fail_check; i++) fail[d][i] = 0;
1192 for (
int d=0; d<4; d++)
for (
int i=0; i<18; i++) iter[d][i] = 0;
1194 for (
int d=0; d<4; d++) {
1195 for (
int eo=0; eo<2; eo++) {
1196 for (
int i=0; i<
Vh; i++) {
1197 int ga_idx = (eo*Vh+i);
1198 for (
int j=0; j<18; j++) {
1199 double diff = fabs(newG[d][ga_idx*18+j] - oldG[d][ga_idx*18+j]);
1201 for (
int f=0; f<fail_check; f++) if (diff >
pow(10.0,-(f+1))) fail[d][f]++;
1202 if (diff > epsilon) iter[d][j]++;
1208 printf(
"Component fails (X, Y, Z, T)\n");
1209 for (
int i=0; i<18; i++) printf(
"%d fails = (%8d, %8d, %8d, %8d)\n", i, iter[0][i], iter[1][i], iter[2][i], iter[3][i]);
1211 printf(
"\nDeviation Failures = (X, Y, Z, T)\n");
1212 for (
int f=0; f<fail_check; f++) {
1213 printf(
"%e Failures = (%9d, %9d, %9d, %9d) = (%6.5f, %6.5f, %6.5f, %6.5f)\n",
pow(10.0, -(f + 1)), fail[0][f],
1214 fail[1][f], fail[2][f], fail[3][f], fail[0][f] / (
double)(
V * 18), fail[1][f] / (
double)(
V * 18),
1215 fail[2][f] / (
double)(
V * 18), fail[3][f] / (
double)(
V * 18));
1222 checkGauge((
double**)oldG, (
double**)newG, epsilon);
1224 checkGauge((
float**)oldG, (
float**)newG, epsilon);
1238 for(
int i=0;i <
V;i++){
1239 for(
int dir =
XUP; dir <=
TUP; dir++){
1253 int i4 = full_idx /(X3*X2*
X1);
1254 int i3 = (full_idx - i4*(X3*X2*
X1))/(X2*
X1);
1255 int i2 = (full_idx - i4*(X3*X2*
X1) - i3*(X2*X1))/X1;
1256 int i1 = full_idx - i4 * (X3 * X2 *
X1) - i3 * (X2 * X1) - i2 *
X1;
1261 if ( (i4 & 1) != 0){
1267 if ( ((i4+i1) & 1) != 0){
1273 if ( ((i4+i1+i2) & 1) != 0){
1285 printf(
"ERROR: wrong dir(%d)\n", dir);
1292 double* mylink = (
double*)link[dir];
1295 mylink[12] *= coeff;
1296 mylink[13] *= coeff;
1297 mylink[14] *= coeff;
1298 mylink[15] *= coeff;
1299 mylink[16] *= coeff;
1300 mylink[17] *= coeff;
1305 float* mylink = (
float*)link[dir];
1308 mylink[12] *= coeff;
1309 mylink[13] *= coeff;
1310 mylink[14] *= coeff;
1311 mylink[15] *= coeff;
1312 mylink[16] *= coeff;
1313 mylink[17] *= coeff;
1320 for(
int dir= 0;dir < 4;dir++){
1323 float* f = (
float*)link[dir];
1324 if (f[i] != f[i] || (fabsf(f[i]) > 1.e+3) ){
1325 fprintf(stderr,
"ERROR: %dth: bad number(%f) in function %s \n",i, f[i], __FUNCTION__);
1329 double* f = (
double*)link[dir];
1330 if (f[i] != f[i] || (fabs(f[i]) > 1.e+3)){
1331 fprintf(stderr,
"ERROR: %dth: bad number(%f) in function %s \n",i, f[i], __FUNCTION__);
1347 param.
nSpin = nSpin;
1354 for (
int d = 0; d < 4; d++) param.
x[d] = x[d];
1360 template <
typename Float>
1362 const int fail_check = 16;
1363 int fail[fail_check];
1364 for (
int f=0; f<fail_check; f++) fail[f] = 0;
1367 for (
int i=0; i<18; i++) iter[i] = 0;
1369 for(
int dir=0;dir < 4; dir++){
1370 for (
int i=0; i<len; i++) {
1371 for (
int j=0; j<18; j++) {
1373 double diff = fabs(linkA[dir][is]-linkB[dir][is]);
1374 for (
int f=0; f<fail_check; f++)
1375 if (diff >
pow(10.0,-(f+1))) fail[f]++;
1377 if (diff > 1e-3) iter[j]++;
1382 for (
int i=0; i<18; i++)
printfQuda(
"%d fails = %d\n", i, iter[i]);
1384 int accuracy_level = 0;
1385 for(
int f =0; f < fail_check; f++){
1391 for (
int f=0; f<fail_check; f++) {
1392 printfQuda(
"%e Failures: %d / %d = %e\n",
pow(10.0,-(f+1)), fail[f], 4*len*18, fail[f] / (
double)(4*len*18));
1395 return accuracy_level;
1404 ret =
compareLink((
double**)linkA, (
double**)linkB, len);
1406 ret =
compareLink((
float**)linkA, (
float**)linkB, len);
1417 for(
int i=0; i < 3;i++){
1423 for(
int i=0;i < 3;i++){
1458 fprintf(stderr,
"Error: malloc failed for temp in function %s\n", __FUNCTION__);
1462 for(
int i=0;i <
V;i++){
1464 for(
int dir=0;dir < 4;dir++){
1465 double *thismom = (
double *)mom;
1467 thismom[(4 * i + dir) * momSiteSize + k] = 1.0 * rand() / RAND_MAX;
1468 if (k==momSiteSize-1) thismom[ (4*i+dir)*momSiteSize + k ]= 0.0;
1472 for(
int dir=0;dir < 4;dir++){
1473 float* thismom=(
float*)mom;
1475 thismom[(4 * i + dir) * momSiteSize + k] = 1.0 * rand() / RAND_MAX;
1476 if (k==momSiteSize-1) thismom[ (4*i+dir)*momSiteSize + k ]= 0.0;
1489 for(
int i=0;i <
V;i++){
1491 for(
int dir=0;dir < 4;dir++){
1492 double* thishw = (
double*)hw;
1494 thishw[ (4*i+dir)*hwSiteSize + k ]= 1.0* rand() /RAND_MAX;
1498 for(
int dir=0;dir < 4;dir++){
1499 float* thishw=(
float*)hw;
1501 thishw[ (4*i+dir)*hwSiteSize + k ]= 1.0* rand() /RAND_MAX;
1511 template <
typename Float>
1513 const int fail_check = 16;
1514 int fail[fail_check];
1515 for (
int f=0; f<fail_check; f++) fail[f] = 0;
1520 for (
int i=0; i<len; i++) {
1521 for (
int j=0; j<momSiteSize-1; j++) {
1522 int is = i*momSiteSize+j;
1523 double diff = fabs(momA[is]-momB[is]);
1524 for (
int f=0; f<fail_check; f++)
1525 if (diff >
pow(10.0,-(f+1))) fail[f]++;
1527 if (diff > 1e-3) iter[j]++;
1531 int accuracy_level = 0;
1532 for(
int f =0; f < fail_check; f++){
1534 accuracy_level =f+1;
1540 for (
int f=0; f<fail_check; f++) {
1541 printfQuda(
"%e Failures: %d / %d = %e\n",
pow(10.0,-(f+1)), fail[f], len*9, fail[f]/(
double)(len*9));
1544 return accuracy_level;
1552 printfQuda(
"(%9f,%9f) (%9f,%9f)\n", thismom[6], thismom[7], thismom[8], thismom[9]);
1556 printfQuda(
"(%9f,%9f) (%9f,%9f)\n", thismom[6], thismom[7], thismom[8], thismom[9]);
1583 ret =
compare_mom((
double*)momA, (
double*)momB, len);
1585 ret =
compare_mom((
float*)momA, (
float*)momB, len);
1785 printf(
"Usage: %s [options]\n", argv[0]);
1786 printf(
"Common options: \n");
1788 printf(
" --device <n> # Set the CUDA device to use (default 0, single GPU only)\n");
1792 printf(
" --verbosity <silent/summurize/verbose> # The the verbosity on the top level of QUDA( default summarize)\n");
1793 printf(
" --prec <double/single/half> # Precision in GPU\n");
1794 printf(
" --prec-sloppy <double/single/half> # Sloppy precision in GPU\n");
1795 printf(
" --prec-refine <double/single/half> # Sloppy precision for refinement in GPU\n");
1796 printf(
" --prec-precondition <double/single/half> # Preconditioner precision in GPU\n");
1797 printf(
" --prec-ritz <double/single/half> # Eigenvector precision in GPU\n");
1798 printf(
" --recon <8/9/12/13/18> # Link reconstruction type\n");
1799 printf(
" --recon-sloppy <8/9/12/13/18> # Sloppy link reconstruction type\n");
1800 printf(
" --recon-precondition <8/9/12/13/18> # Preconditioner link reconstruction type\n");
1801 printf(
" --dagger # Set the dagger to 1 (default 0)\n");
1802 printf(
" --dim <n> # Set space-time dimension (X Y Z T)\n");
1803 printf(
" --sdim <n> # Set space dimension(X/Y/Z) size\n");
1804 printf(
" --xdim <n> # Set X dimension size(default 24)\n");
1805 printf(
" --ydim <n> # Set X dimension size(default 24)\n");
1806 printf(
" --zdim <n> # Set X dimension size(default 24)\n");
1807 printf(
" --tdim <n> # Set T dimension size(default 24)\n");
1808 printf(
" --Lsdim <n> # Set Ls dimension size(default 16)\n");
1809 printf(
" --gridsize <x y z t> # Set the grid size in all four dimension (default 1 1 1 1)\n");
1810 printf(
" --xgridsize <n> # Set grid size in X dimension (default 1)\n");
1811 printf(
" --ygridsize <n> # Set grid size in Y dimension (default 1)\n");
1812 printf(
" --zgridsize <n> # Set grid size in Z dimension (default 1)\n");
1813 printf(
" --tgridsize <n> # Set grid size in T dimension (default 1)\n");
1814 printf(
" --partition <mask> # Set the communication topology (X=1, Y=2, Z=4, T=8, and combinations of these)\n");
1815 printf(
" --rank-order <col/row> # Set the [t][z][y][x] rank order as either column major (t fastest, default) or row major (x fastest)\n");
1816 printf(
" --dslash-type <type> # Set the dslash type, the following values are valid\n" 1817 " wilson/clover/twisted-mass/twisted-clover/staggered\n" 1818 " /asqtad/domain-wall/domain-wall-4d/mobius/laplace\n");
1819 printf(
" --laplace3D <n> # Restrict laplace operator to omit the t dimension (n=3), or include all dims (n=4) (default 4)\n");
1820 printf(
" --flavor <type> # Set the twisted mass flavor type (singlet (default), deg-doublet, nondeg-doublet)\n");
1821 printf(
" --load-gauge file # Load gauge field \"file\" for the test (requires QIO)\n");
1822 printf(
" --save-gauge file # Save gauge field \"file\" for the test (requires QIO, " 1823 "heatbath test only)\n");
1824 printf(
" --unit-gauge <true/false> # Generate a unit valued gauge field in the tests. If false, a " 1825 "random gauge is generated (default false)\n");
1826 printf(
" --gaussian-sigma <sigma> # Width of the Gaussian noise used for random gauge field " 1827 "contruction (default 0.2)\n");
1828 printf(
" --niter <n> # The number of iterations to perform (default 10)\n");
1829 printf(
" --ngcrkrylov <n> # The number of inner iterations to use for GCR, BiCGstab-l, CA-CG (default 10)\n");
1830 printf(
" --ca-basis-type <power/chebyshev> # The basis to use for CA-CG (default power)\n");
1831 printf(
" --cheby-basis-eig-min # Conservative estimate of smallest eigenvalue for Chebyshev basis CA-CG (default 0)\n");
1832 printf(
" --cheby-basis-eig-max # Conservative estimate of largest eigenvalue for Chebyshev basis CA-CG (default is to guess with power iterations)\n");
1833 printf(
" --pipeline <n> # The pipeline length for fused operations in GCR, BiCGstab-l (default 0, no pipelining)\n");
1834 printf(
" --solution-pipeline <n> # The pipeline length for fused solution accumulation (default 0, no pipelining)\n");
1835 printf(
" --inv-type <cg/bicgstab/gcr> # The type of solver to use (default cg)\n");
1836 printf(
" --precon-type <mr/ (unspecified)> # The type of solver to use (default none (=unspecified)).\n");
1837 printf(
" --multishift <true/false> # Whether to do a multi-shift solver test or not (default false)\n");
1838 printf(
" --mass # Mass of Dirac operator (default 0.1)\n");
1839 printf(
" --kappa # Kappa of Dirac operator (default 0.12195122... [equiv to mass])\n");
1840 printf(
" --mu # Twisted-Mass chiral twist of Dirac operator (default 0.1)\n");
1842 " --epsilon # Twisted-Mass flavor twist of Dirac operator (default 0.01)\n");
1843 printf(
" --tadpole-coeff # Tadpole coefficient for HISQ fermions (default 1.0, recommended [Plaq]^1/4)\n");
1844 printf(
" --epsilon-naik # Epsilon factor on Naik term (default 0.0, suggested non-zero -0.1)\n");
1845 printf(
" --compute-clover # Compute the clover field or use random numbers (default false)\n");
1846 printf(
" --compute-fat-long # Compute the fat/long field or use random numbers (default false)\n");
1847 printf(
" --clover-coeff # Clover coefficient (default 1.0)\n");
1848 printf(
" --anisotropy # Temporal anisotropy factor (default 1.0)\n");
1849 printf(
" --mass-normalization # Mass normalization (kappa (default) / mass / asym-mass)\n");
1850 printf(
" --matpc # Matrix preconditioning type (even-even, odd-odd, even-even-asym, odd-odd-asym) \n");
1851 printf(
" --solve-type # The type of solve to do (direct, direct-pc, normop, " 1852 "normop-pc, normerr, normerr-pc) \n");
1853 printf(
" --solution-type # The solution we desire (mat (default), mat-dag-mat, mat-pc, " 1854 "mat-pc-dag-mat-pc (default for multi-shift))\n");
1855 printf(
" --tol <resid_tol> # Set L2 residual tolerance\n");
1856 printf(
" --tolhq <resid_hq_tol> # Set heavy-quark residual tolerance\n");
1857 printf(
" --reliable-delta <delta> # Set reliable update delta factor\n");
1858 printf(
" --test # Test method (different for each test)\n");
1859 printf(
" --verify <true/false> # Verify the GPU results using CPU results (default true)\n");
1862 printf(
" --mg-low-mode-check <true/false> # Measure how well the null vector subspace overlaps with the " 1863 "low eigenmode subspace (default false)\n");
1864 printf(
" --mg-oblique-proj-check <true/false> # Measure how well the null vector subspace adjusts the low " 1865 "eigenmode subspace (default false)\n");
1866 printf(
" --mg-nvec <level nvec> # Number of null-space vectors to define the multigrid " 1867 "transfer operator on a given level\n" 1868 " If using the eigensolver of the coarsest level then this " 1869 "dictates the size of the deflation space.\n");
1870 printf(
" --mg-gpu-prolongate <true/false> # Whether to do the multigrid transfer operators on the GPU (default false)\n");
1871 printf(
" --mg-levels <2+> # The number of multigrid levels to do (default 2)\n");
1872 printf(
" --mg-nu-pre <level 1-20> # The number of pre-smoother applications to do at a given multigrid level (default 2)\n");
1873 printf(
" --mg-nu-post <level 1-20> # The number of post-smoother applications to do at a given multigrid level (default 2)\n");
1874 printf(
" --mg-coarse-solve-type <level solve> # The type of solve to do on each level (direct, direct-pc) (default = solve_type)\n");
1875 printf(
" --mg-smoother-solve-type <level solve> # The type of solve to do in smoother (direct, direct-pc (default) )\n");
1876 printf(
" --mg-solve-location <level cpu/cuda> # The location where the multigrid solver will run (default cuda)\n");
1877 printf(
" --mg-setup-location <level cpu/cuda> # The location where the multigrid setup will be computed (default cuda)\n");
1878 printf(
" --mg-setup-inv <level inv> # The inverter to use for the setup of multigrid (default bicgstab)\n");
1879 printf(
" --mg-setup-maxiter <level iter> # The maximum number of solver iterations to use when relaxing on a null space vector (default 500)\n");
1880 printf(
" --mg-setup-maxiter-refresh <level iter> # The maximum number of solver iterations to use when refreshing the pre-existing null space vectors (default 100)\n");
1881 printf(
" --mg-setup-iters <level iter> # The number of setup iterations to use for the multigrid (default 1)\n");
1882 printf(
" --mg-setup-tol <level tol> # The tolerance to use for the setup of multigrid (default 5e-6)\n");
1883 printf(
" --mg-setup-ca-basis-type <level power/chebyshev> # The basis to use for CA-CG setup of multigrid(default power)\n");
1884 printf(
" --mg-setup-ca-basis-size <level size> # The basis size to use for CA-CG setup of multigrid (default 4)\n");
1885 printf(
" --mg-setup-cheby-basis-eig-min <level val> # Conservative estimate of smallest eigenvalue for Chebyshev basis CA-CG in setup of multigrid (default 0)\n");
1886 printf(
" --mg-setup-cheby-basis-eig-max <level val> # Conservative estimate of largest eigenvalue for Chebyshev basis CA-CG in setup of multigrid (default is to guess with power iterations)\n");
1887 printf(
" --mg-setup-type <null/test> # The type of setup to use for the multigrid (default null)\n");
1888 printf(
" --mg-pre-orth <true/false> # If orthonormalize the vector before inverting in the setup of multigrid (default false)\n");
1889 printf(
" --mg-post-orth <true/false> # If orthonormalize the vector after inverting in the setup of multigrid (default true)\n");
1890 printf(
" --mg-n-block-ortho <level n> # The number of times to run Gram-Schmidt during block " 1891 "orthonormalization (default 1)\n");
1892 printf(
" --mg-omega # The over/under relaxation factor for the smoother of multigrid (default 0.85)\n");
1893 printf(
" --mg-coarse-solver <level gcr/etc.> # The solver to wrap the V cycle on each level (default gcr, only for levels 1+)\n");
1894 printf(
" --mg-coarse-solver-tol <level gcr/etc.> # The coarse solver tolerance for each level (default 0.25, only for levels 1+)\n");
1895 printf(
" --mg-coarse-solver-maxiter <level n> # The coarse solver maxiter for each level (default 100)\n");
1896 printf(
" --mg-coarse-solver-ca-basis-type <level power/chebyshev> # The basis to use for CA-CG setup of multigrid(default power)\n");
1897 printf(
" --mg-coarse-solver-ca-basis-size <level size> # The basis size to use for CA-CG setup of multigrid (default 4)\n");
1898 printf(
" --mg-coarse-solver-cheby-basis-eig-min <level val> # Conservative estimate of smallest eigenvalue for Chebyshev basis CA-CG in setup of multigrid (default 0)\n");
1899 printf(
" --mg-coarse-solver-cheby-basis-eig-max <level val> # Conservative estimate of largest eigenvalue for Chebyshev basis CA-CG in setup of multigrid (default is to guess with power iterations)\n");
1900 printf(
" --mg-smoother <level mr/etc.> # The smoother to use for multigrid (default mr)\n");
1901 printf(
" --mg-smoother-tol <level resid_tol> # The smoother tolerance to use for each multigrid (default 0.25)\n");
1902 printf(
" --mg-smoother-halo-prec # The smoother halo precision (applies to all levels - defaults to null_precision)\n");
1903 printf(
" --mg-schwarz-type <level false/add/mul> # Whether to use Schwarz preconditioning (requires MR smoother and GCR setup solver) (default false)\n");
1904 printf(
" --mg-schwarz-cycle <level cycle> # The number of Schwarz cycles to apply per smoother application (default=1)\n");
1905 printf(
" --mg-block-size <level x y z t> # Set the geometric block size for the each multigrid level's transfer operator (default 4 4 4 4)\n");
1906 printf(
" --mg-mu-factor <level factor> # Set the multiplicative factor for the twisted mass mu parameter on each level (default 1)\n");
1907 printf(
" --mg-generate-nullspace <true/false> # Generate the null-space vector dynamically (default true, if set false and mg-load-vec isn't set, creates free-field null vectors)\n");
1908 printf(
" --mg-generate-all-levels <true/talse> # true=generate null-space on all levels, false=generate on level 0 and create other levels from that (default true)\n");
1909 printf(
" --mg-load-vec <level file> # Load the vectors \"file\" for the multigrid_test (requires " 1911 printf(
" --mg-save-vec <level file> # Save the generated null-space vectors \"file\" from the " 1912 "multigrid_test (requires QIO)\n");
1913 printf(
" --mg-verbosity <level verb> # The verbosity to use on each level of the multigrid (default " 1917 printf(
" --df-nev <nev> # Set number of eigenvectors computed within a single solve cycle (default 8)\n");
1918 printf(
" --df-max-search-dim <dim> # Set the size of eigenvector search space (default 64)\n");
1919 printf(
" --df-deflation-grid <n> # Set maximum number of cycles needed to compute eigenvectors(default 1)\n");
1920 printf(
" --df-eigcg-max-restarts <n> # Set how many iterative refinement cycles will be solved with eigCG within a single physical right hand site solve (default 4)\n");
1921 printf(
" --df-tol-restart <tol> # Set tolerance for the first restart in the initCG solver(default 5e-5)\n");
1922 printf(
" --df-tol-inc <tol> # Set tolerance for the subsequent restarts in the initCG solver (default 1e-2)\n");
1923 printf(
" --df-max-restart-num <n> # Set maximum number of the initCG restarts in the deflation stage (default 3)\n");
1924 printf(
" --df-tol-eigenval <tol> # Set maximum eigenvalue residual norm (default 1e-1)\n");
1926 printf(
" --solver-ext-lib-type <eigen/magma> # Set external library for the solvers (default Eigen library)\n");
1927 printf(
" --df-ext-lib-type <eigen/magma> # Set external library for the deflation methods (default Eigen library)\n");
1928 printf(
" --df-location-ritz <host/cuda> # Set memory location for the ritz vectors (default cuda memory location)\n");
1929 printf(
" --df-mem-type-ritz <device/pinned/mapped> # Set memory type for the ritz vectors (default device memory type)\n");
1932 printf(
" --eig-nEv <n> # The size of eigenvector search space in the eigensolver\n");
1933 printf(
" --eig-nKr <n> # The size of the Krylov subspace to use in the eigensolver\n");
1934 printf(
" --eig-nConv <n> # The number of converged eigenvalues requested\n");
1935 printf(
" --eig-require-convergence <true/false> # If true, the solver will error out if convergence is not " 1936 "attained. If false, a warning will be given (default true)\n");
1937 printf(
" --eig-check-interval <n> # Perform a convergence check every nth restart/iteration in " 1938 "the IRAM,IRLM/lanczos,arnoldi eigensolver\n");
1939 printf(
" --eig-max-restarts <n> # Perform n iterations of the restart in the eigensolver\n");
1940 printf(
" --eig-tol <tol> # The tolerance to use in the eigensolver\n");
1941 printf(
" --eig-use-poly-acc <true/false> # Use Chebyshev polynomial acceleration in the eigensolver\n");
1942 printf(
" --eig-poly-deg <n> # Set the degree of the Chebyshev polynomial acceleration in " 1943 "the eigensolver\n");
1944 printf(
" --eig-amin <Float> # The minimum in the polynomial acceleration\n");
1945 printf(
" --eig-amax <Float> # The maximum in the polynomial acceleration\n");
1946 printf(
" --eig-use-normop <true/false> # Solve the MdagM problem instead of M (MMdag if " 1947 "eig-use-dagger == true) (default false)\n");
1948 printf(
" --eig-use-dagger <true/false> # Solve the Mdag problem instead of M (MMdag if " 1949 "eig-use-normop == true) (default false)\n");
1950 printf(
" --eig-compute-svd <true/false> # Solve the MdagM problem, use to compute SVD of M (default " 1952 printf(
" --eig-spectrum <SR/LR/SM/LM/SI/LI> # The spectrum part to be calulated. S=smallest L=largest " 1953 "R=real M=modulus I=imaginary\n");
1954 printf(
" --eig-type <eigensolver> # The type of eigensolver to use (default trlm)\n");
1955 printf(
" --eig-QUDA-logfile <file_name> # The filename storing the stdout from the QUDA eigensolver\n");
1956 printf(
" --eig-arpack-check <true/false> # Cross check the device data against ARPACK (requires ARPACK, " 1957 "default false)\n");
1958 printf(
" --eig-load-vec <file> # Load eigenvectors to <file> (requires QIO)\n");
1959 printf(
" --eig-save-vec <file> # Save eigenvectors to <file> (requires QIO)\n");
1962 printf(
" --mg-eig <level> <true/false> # Use the eigensolver on this level (default false)\n");
1963 printf(
" --mg-eig-nEv <level> <n> # The size of eigenvector search space in the " 1965 printf(
" --mg-eig-nKr <level> <n> # The size of the Krylov subspace to use in the " 1967 printf(
" --mg-eig-require-convergence <level> <true/false> # If true, the solver will error out if convergence is " 1968 "not attained. If false, a warning will be given (default true)\n");
1969 printf(
" --mg-eig-check-interval <level> <n> # Perform a convergence check every nth " 1970 "restart/iteration (only used in Implicit Restart types)\n");
1971 printf(
" --mg-eig-max-restarts <level> <n> # Perform a maximun of n restarts in eigensolver " 1973 printf(
" --mg-eig-use-normop <level> <true/false> # Solve the MdagM problem instead of M (MMdag if " 1974 "eig-use-dagger == true) (default false)\n");
1975 printf(
" --mg-eig-use-dagger <level> <true/false> # Solve the MMdag problem instead of M (MMdag if " 1976 "eig-use-normop == true) (default false)\n");
1978 " --mg-eig-tol <level> <tol> # The tolerance to use in the eigensolver (default 1e-6)\n");
1979 printf(
" --mg-eig-use-poly-acc <level> <true/false> # Use Chebyshev polynomial acceleration in the " 1980 "eigensolver (default true)\n");
1981 printf(
" --mg-eig-poly-deg <level> <n> # Set the degree of the Chebyshev polynomial (default " 1983 printf(
" --mg-eig-amin <level> <Float> # The minimum in the polynomial acceleration (default " 1985 printf(
" --mg-eig-amax <level> <Float> # The maximum in the polynomial acceleration (default " 1987 printf(
" --mg-eig-spectrum <level> <SR/LR/SM/LM/SI/LI> # The spectrum part to be calulated. S=smallest " 1988 "L=largest R=real M=modulus I=imaginary (default SR)\n");
1989 printf(
" --mg-eig-coarse-guess <true/false> # If deflating on the coarse grid, optionaly use an " 1990 "initial guess (default = false)\n");
1991 printf(
" --mg-eig-type <level> <eigensolver> # The type of eigensolver to use (default trlm)\n");
1994 printf(
" --nsrc <n> # How many spinors to apply the dslash to simultaneusly (experimental for staggered only)\n");
1995 printf(
" --msrc <n> # Used for testing non-square block blas routines where nsrc defines the other dimension\n");
1996 printf(
" --heatbath-beta <beta> # Beta value used in heatbath test (default 6.2)\n");
1997 printf(
" --heatbath-warmup-steps <n> # Number of warmup steps in heatbath test (default 10)\n");
1998 printf(
" --heatbath-num-steps <n> # Number of measurement steps in heatbath test (default 10)\n");
1999 printf(
" --heatbath-num-hb-per-step <n> # Number of heatbath hits per heatbath step (default 5)\n");
2000 printf(
" --heatbath-num-or-per-step <n> # Number of overrelaxation hits per heatbath step (default 5)\n");
2001 printf(
" --heatbath-coldstart <true/false> # Whether to use a cold or hot start in heatbath test (default false)\n");
2003 printf(
" --contraction-type <open/dr/dp> # Whether to leave spin elemental open, or use a gamma basis " 2004 "and contract on spin (default open)\n");
2006 printf(
" --help # Print out this message\n");
2012 char msg[]=
"single";
2014 printf(
"Note: this program is %s GPU build\n", msg);
2024 char msg[]=
"single";
2031 if( strcmp(argv[i],
"--help")== 0){
2035 if( strcmp(argv[i],
"--verify") == 0){
2036 if (i + 1 >= argc) {
usage(argv); }
2038 if (strcmp(argv[i+1],
"true") == 0){
2040 }
else if (strcmp(argv[i+1],
"false") == 0){
2043 fprintf(stderr,
"ERROR: invalid verify type\n");
2052 if (strcmp(argv[i],
"--mg-low-mode-check") == 0) {
2053 if (i + 1 >= argc) {
usage(argv); }
2055 if (strcmp(argv[i + 1],
"true") == 0) {
2057 }
else if (strcmp(argv[i + 1],
"false") == 0) {
2060 fprintf(stderr,
"ERROR: invalid low_mode_check type (true/false)\n");
2069 if (strcmp(argv[i],
"--mg-oblique-proj-check") == 0) {
2070 if (i + 1 >= argc) {
usage(argv); }
2072 if (strcmp(argv[i + 1],
"true") == 0) {
2074 }
else if (strcmp(argv[i + 1],
"false") == 0) {
2077 fprintf(stderr,
"ERROR: invalid oblique_proj_check type (true/false)\n");
2086 if( strcmp(argv[i],
"--device") == 0){
2090 device = atoi(argv[i+1]);
2091 if (device < 0 || device > 16){
2092 printf(
"ERROR: Invalid CUDA device number (%d)\n",
device);
2100 if( strcmp(argv[i],
"--verbosity") == 0){
2110 if( strcmp(argv[i],
"--prec") == 0){
2111 if (i + 1 >= argc) {
usage(argv); }
2118 if( strcmp(argv[i],
"--prec-sloppy") == 0){
2119 if (i + 1 >= argc) {
usage(argv); }
2126 if( strcmp(argv[i],
"--prec-refine") == 0){
2127 if (i + 1 >= argc) {
usage(argv); }
2134 if( strcmp(argv[i],
"--prec-precondition") == 0){
2144 if( strcmp(argv[i],
"--prec-null") == 0){
2154 if( strcmp(argv[i],
"--prec-ritz") == 0){
2164 if( strcmp(argv[i],
"--recon") == 0){
2174 if( strcmp(argv[i],
"--recon-sloppy") == 0){
2184 if( strcmp(argv[i],
"--recon-precondition") == 0){
2194 if( strcmp(argv[i],
"--dim") == 0){
2198 xdim= atoi(argv[i+1]);
2199 if (xdim < 0 || xdim > 512){
2200 printf(
"ERROR: invalid X dimension (%d)\n",
xdim);
2208 ydim= atoi(argv[i+1]);
2209 if (ydim < 0 || ydim > 512){
2210 printf(
"ERROR: invalid Y dimension (%d)\n",
ydim);
2218 zdim= atoi(argv[i+1]);
2219 if (zdim < 0 || zdim > 512){
2220 printf(
"ERROR: invalid Z dimension (%d)\n",
zdim);
2228 tdim= atoi(argv[i+1]);
2229 if (tdim < 0 || tdim > 512){
2230 printf(
"ERROR: invalid T dimension (%d)\n",
tdim);
2239 if( strcmp(argv[i],
"--xdim") == 0){
2243 xdim= atoi(argv[i+1]);
2244 if (xdim < 0 || xdim > 512){
2245 printf(
"ERROR: invalid X dimension (%d)\n",
xdim);
2253 if( strcmp(argv[i],
"--ydim") == 0){
2257 ydim= atoi(argv[i+1]);
2258 if (ydim < 0 || ydim > 512){
2259 printf(
"ERROR: invalid T dimension (%d)\n",
ydim);
2268 if( strcmp(argv[i],
"--zdim") == 0){
2272 zdim= atoi(argv[i+1]);
2273 if (zdim < 0 || zdim > 512){
2274 printf(
"ERROR: invalid T dimension (%d)\n",
zdim);
2282 if( strcmp(argv[i],
"--tdim") == 0){
2283 if (i + 1 >= argc) {
usage(argv); }
2284 tdim = atoi(argv[i+1]);
2285 if (tdim < 0 || tdim > 512){
2286 printf(
"Error: invalid t dimension");
2294 if( strcmp(argv[i],
"--sdim") == 0){
2295 if (i + 1 >= argc) {
usage(argv); }
2296 int sdim = atoi(argv[i+1]);
2297 if (sdim < 0 || sdim > 512){
2298 printf(
"ERROR: invalid S dimension\n");
2307 if( strcmp(argv[i],
"--Lsdim") == 0){
2308 if (i + 1 >= argc) {
usage(argv); }
2309 int Ls = atoi(argv[i+1]);
2310 if (Ls < 0 || Ls > 128){
2311 printf(
"ERROR: invalid Ls dimension\n");
2320 if( strcmp(argv[i],
"--dagger") == 0){
2326 if( strcmp(argv[i],
"--partition") == 0){
2331 int value = atoi(argv[i+1]);
2332 for(
int j=0; j < 4;j++){
2333 if (value & (1 << j)){
2339 printfQuda(
"WARNING: Ignoring --partition option since this is a single-GPU build.\n");
2346 if( strcmp(argv[i],
"--multishift") == 0){
2347 if (i + 1 >= argc) {
usage(argv); }
2349 if (strcmp(argv[i+1],
"true") == 0){
2351 }
else if (strcmp(argv[i+1],
"false") == 0){
2354 fprintf(stderr,
"ERROR: invalid multishift boolean\n");
2363 if( strcmp(argv[i],
"--gridsize") == 0){
2364 if (i + 4 >= argc) {
usage(argv); }
2365 int xsize = atoi(argv[i+1]);
2367 printf(
"ERROR: invalid X grid size");
2373 int ysize = atoi(argv[i+1]);
2375 printf(
"ERROR: invalid Y grid size");
2381 int zsize = atoi(argv[i+1]);
2383 printf(
"ERROR: invalid Z grid size");
2389 int tsize = atoi(argv[i+1]);
2391 printf(
"ERROR: invalid T grid size");
2401 if( strcmp(argv[i],
"--xgridsize") == 0){
2402 if (i + 1 >= argc) {
usage(argv); }
2403 int xsize = atoi(argv[i+1]);
2405 printf(
"ERROR: invalid X grid size");
2414 if( strcmp(argv[i],
"--ygridsize") == 0){
2415 if (i + 1 >= argc) {
usage(argv); }
2416 int ysize = atoi(argv[i+1]);
2418 printf(
"ERROR: invalid Y grid size");
2427 if( strcmp(argv[i],
"--zgridsize") == 0){
2428 if (i + 1 >= argc) {
usage(argv); }
2429 int zsize = atoi(argv[i+1]);
2431 printf(
"ERROR: invalid Z grid size");
2440 if( strcmp(argv[i],
"--tgridsize") == 0){
2441 if (i + 1 >= argc) {
usage(argv); }
2442 int tsize = atoi(argv[i+1]);
2444 printf(
"ERROR: invalid T grid size");
2453 if( strcmp(argv[i],
"--rank-order") == 0){
2463 if( strcmp(argv[i],
"--dslash-type") == 0){
2464 if (i + 1 >= argc) {
usage(argv); }
2471 if (strcmp(argv[i],
"--laplace3D") == 0) {
2472 if (i + 1 >= argc) {
usage(argv); }
2475 printf(
"ERROR: invalid transverse dim %d given. Please use 3 or 4 for t,none.\n",
laplace3D);
2483 if( strcmp(argv[i],
"--flavor") == 0){
2484 if (i + 1 >= argc) {
usage(argv); }
2491 if( strcmp(argv[i],
"--inv-type") == 0){
2492 if (i + 1 >= argc) {
usage(argv); }
2499 if (strcmp(argv[i],
"--precon-type") == 0) {
2500 if (i + 1 >= argc) {
usage(argv); }
2507 if (strcmp(argv[i],
"--mass") == 0) {
2508 if (i + 1 >= argc) {
usage(argv); }
2509 mass = atof(argv[i+1]);
2515 if( strcmp(argv[i],
"--kappa") == 0){
2519 kappa = atof(argv[i+1]);
2525 if( strcmp(argv[i],
"--compute-clover") == 0){
2529 if (strcmp(argv[i+1],
"true") == 0){
2531 }
else if (strcmp(argv[i+1],
"false") == 0){
2534 fprintf(stderr,
"ERROR: invalid compute_clover type\n");
2543 if( strcmp(argv[i],
"--clover-coeff") == 0){
2553 if( strcmp(argv[i],
"--mu") == 0){
2557 mu = atof(argv[i+1]);
2563 if (strcmp(argv[i],
"--epsilon") == 0) {
2564 if (i + 1 >= argc) {
usage(argv); }
2571 if( strcmp(argv[i],
"--compute-fat-long") == 0){
2575 if (strcmp(argv[i+1],
"true") == 0){
2577 }
else if (strcmp(argv[i+1],
"false") == 0){
2580 fprintf(stderr,
"ERROR: invalid compute_fatlong type\n");
2590 if( strcmp(argv[i],
"--epsilon-naik") == 0){
2600 if( strcmp(argv[i],
"--tadpole-coeff") == 0){
2610 if( strcmp(argv[i],
"--anisotropy") == 0){
2620 if( strcmp(argv[i],
"--tol") == 0){
2624 tol= atof(argv[i+1]);
2630 if( strcmp(argv[i],
"--tolhq") == 0){
2640 if( strcmp(argv[i],
"--reliable-delta") == 0){
2650 if( strcmp(argv[i],
"--alternative-reliable") == 0){
2654 if (strcmp(argv[i+1],
"true") == 0) {
2656 }
else if (strcmp(argv[i+1],
"false") == 0) {
2659 fprintf(stderr,
"ERROR: invalid multishift boolean\n");
2667 if( strcmp(argv[i],
"--mass-normalization") == 0){
2677 if( strcmp(argv[i],
"--matpc") == 0){
2687 if( strcmp(argv[i],
"--solve-type") == 0){
2697 if (strcmp(argv[i],
"--solution-type") == 0) {
2698 if (i + 1 >= argc) {
usage(argv); }
2705 if( strcmp(argv[i],
"--load-gauge") == 0){
2706 if (i + 1 >= argc) {
usage(argv); }
2713 if( strcmp(argv[i],
"--save-gauge") == 0){
2714 if (i + 1 >= argc) {
usage(argv); }
2721 if (strcmp(argv[i],
"--unit-gauge") == 0) {
2722 if (i + 1 >= argc) {
usage(argv); }
2724 if (strcmp(argv[i + 1],
"true") == 0) {
2726 }
else if (strcmp(argv[i + 1],
"false") == 0) {
2729 fprintf(stderr,
"ERROR: invalid unit-gauge type given (true/false)\n");
2738 if( strcmp(argv[i],
"--nsrc") == 0){
2742 Nsrc = atoi(argv[i+1]);
2743 if (Nsrc < 0 || Nsrc > 128){
2744 printf(
"ERROR: invalid number of sources (Nsrc=%d)\n",
Nsrc);
2752 if( strcmp(argv[i],
"--msrc") == 0){
2756 Msrc = atoi(argv[i+1]);
2757 if (Msrc < 1 || Msrc > 128){
2758 printf(
"ERROR: invalid number of sources (Msrc=%d)\n",
Msrc);
2766 if( strcmp(argv[i],
"--test") == 0){
2767 if (i + 1 >= argc) {
usage(argv); }
2774 if( strcmp(argv[i],
"--mg-nvec") == 0){
2778 int level = atoi(argv[i+1]);
2780 printf(
"ERROR: invalid multigrid level %d", level);
2785 nvec[level] = atoi(argv[i+1]);
2786 if (
nvec[level] < 0 ||
nvec[level] > 1024) {
2787 printf(
"ERROR: invalid number of vectors (%d)\n",
nvec[level]);
2795 if( strcmp(argv[i],
"--mg-levels") == 0){
2801 printf(
"ERROR: invalid number of multigrid levels (%d)\n",
mg_levels);
2809 if( strcmp(argv[i],
"--mg-nu-pre") == 0){
2813 int level = atoi(argv[i+1]);
2815 printf(
"ERROR: invalid multigrid level %d", level);
2820 nu_pre[level] = atoi(argv[i+1]);
2822 printf(
"ERROR: invalid pre-smoother applications value (nu_pre=%d)\n",
nu_pre[level]);
2830 if( strcmp(argv[i],
"--mg-nu-post") == 0){
2834 int level = atoi(argv[i+1]);
2836 printf(
"ERROR: invalid multigrid level %d", level);
2841 nu_post[level] = atoi(argv[i+1]);
2843 printf(
"ERROR: invalid pre-smoother applications value (nu_post=%d)\n",
nu_post[level]);
2851 if( strcmp(argv[i],
"--mg-coarse-solve-type") == 0){
2855 int level = atoi(argv[i+1]);
2857 printf(
"ERROR: invalid multigrid level %d", level);
2868 if( strcmp(argv[i],
"--mg-smoother-solve-type") == 0){
2872 int level = atoi(argv[i+1]);
2874 printf(
"ERROR: invalid multigrid level %d", level);
2885 if( strcmp(argv[i],
"--mg-solver-location") == 0){
2889 int level = atoi(argv[i+1]);
2892 printf(
"ERROR: invalid multigrid level %d", level);
2903 if( strcmp(argv[i],
"--mg-setup-location") == 0){
2907 int level = atoi(argv[i+1]);
2910 printf(
"ERROR: invalid multigrid level %d", level);
2921 if( strcmp(argv[i],
"--mg-setup-inv") == 0){
2925 int level = atoi(argv[i+1]);
2927 printf(
"ERROR: invalid multigrid level %d", level);
2938 if( strcmp(argv[i],
"--mg-setup-iters") == 0){
2942 int level = atoi(argv[i+1]);
2944 printf(
"ERROR: invalid multigrid level %d", level);
2955 if( strcmp(argv[i],
"--mg-setup-tol") == 0){
2959 int level = atoi(argv[i+1]);
2961 printf(
"ERROR: invalid multigrid level %d", level);
2973 if( strcmp(argv[i],
"--mg-setup-maxiter") == 0){
2977 int level = atoi(argv[i+1]);
2979 printf(
"ERROR: invalid multigrid level %d", level);
2990 if( strcmp(argv[i],
"--mg-setup-maxiter-refresh") == 0){
2994 int level = atoi(argv[i+1]);
2996 printf(
"ERROR: invalid multigrid level %d", level);
3007 if( strcmp(argv[i],
"--mg-setup-ca-basis-type") == 0){
3011 int level = atoi(argv[i+1]);
3013 printf(
"ERROR: invalid multigrid level %d", level);
3018 if (strcmp(argv[i+1],
"power") == 0){
3020 }
else if (strcmp(argv[i+1],
"chebyshev") == 0 || strcmp(argv[i+1],
"cheby") == 0){
3023 fprintf(stderr,
"ERROR: invalid value for basis ca cg type\n");
3032 if( strcmp(argv[i],
"--mg-setup-ca-basis-size") == 0){
3036 int level = atoi(argv[i+1]);
3038 printf(
"ERROR: invalid multigrid level %d", level);
3053 if( strcmp(argv[i],
"--mg-setup-cheby-basis-eig-min") == 0){
3057 int level = atoi(argv[i+1]);
3059 printf(
"ERROR: invalid multigrid level %d", level);
3066 printf(
"ERROR: invalid minimum eigenvalue for CA-CG (%f < 0.0)\n",
setup_ca_lambda_min[level]);
3074 if( strcmp(argv[i],
"--mg-setup-cheby-basis-eig-max") == 0){
3078 int level = atoi(argv[i+1]);
3080 printf(
"ERROR: invalid multigrid level %d", level);
3092 if( strcmp(argv[i],
"--mg-setup-type") == 0){
3097 if( strcmp(argv[i+1],
"test") == 0)
3099 else if( strcmp(argv[i+1],
"null")==0)
3102 fprintf(stderr,
"ERROR: invalid setup type\n");
3111 if( strcmp(argv[i],
"--mg-pre-orth") == 0){
3116 if (strcmp(argv[i+1],
"true") == 0){
3118 }
else if (strcmp(argv[i+1],
"false") == 0){
3121 fprintf(stderr,
"ERROR: invalid pre orthogonalize type\n");
3130 if( strcmp(argv[i],
"--mg-post-orth") == 0){
3135 if (strcmp(argv[i+1],
"true") == 0){
3137 }
else if (strcmp(argv[i+1],
"false") == 0){
3140 fprintf(stderr,
"ERROR: invalid post orthogonalize type\n");
3149 if (strcmp(argv[i],
"--mg-n-block-ortho") == 0) {
3150 if (i + 2 >= argc) {
usage(argv); }
3152 int level = atoi(argv[i + 1]);
3154 printf(
"ERROR: invalid multigrid level %d for number of block orthos", level);
3161 fprintf(stderr,
"ERROR: invalid number %d of block orthonormalizations",
n_block_ortho[level]);
3168 if( strcmp(argv[i],
"--mg-omega") == 0){
3173 omega = atof(argv[i+1]);
3179 if( strcmp(argv[i],
"--mg-verbosity") == 0){
3183 int level = atoi(argv[i+1]);
3185 printf(
"ERROR: invalid multigrid level %d", level);
3196 if( strcmp(argv[i],
"--mg-coarse-solver") == 0){
3200 int level = atoi(argv[i+1]);
3202 printf(
"ERROR: invalid multigrid level %d for coarse solver", level);
3213 if( strcmp(argv[i],
"--mg-smoother") == 0){
3217 int level = atoi(argv[i+1]);
3219 printf(
"ERROR: invalid multigrid level %d", level);
3230 if( strcmp(argv[i],
"--mg-smoother-tol") == 0){
3234 int level = atoi(argv[i+1]);
3236 printf(
"ERROR: invalid multigrid level %d", level);
3248 if( strcmp(argv[i],
"--mg-coarse-solver-tol") == 0){
3252 int level = atoi(argv[i+1]);
3254 printf(
"ERROR: invalid multigrid level %d for coarse solver", level);
3265 if( strcmp(argv[i],
"--mg-coarse-solver-maxiter") == 0){
3270 int level = atoi(argv[i+1]);
3272 printf(
"ERROR: invalid multigrid level %d for coarse solver", level);
3283 if( strcmp(argv[i],
"--mg-coarse-solver-ca-basis-type") == 0){
3287 int level = atoi(argv[i+1]);
3289 printf(
"ERROR: invalid multigrid level %d", level);
3294 if (strcmp(argv[i+1],
"power") == 0){
3296 }
else if (strcmp(argv[i+1],
"chebyshev") == 0 || strcmp(argv[i+1],
"cheby") == 0){
3299 fprintf(stderr,
"ERROR: invalid value for basis ca cg type\n");
3308 if( strcmp(argv[i],
"--mg-coarse-solver-ca-basis-size") == 0){
3312 int level = atoi(argv[i+1]);
3314 printf(
"ERROR: invalid multigrid level %d", level);
3329 if( strcmp(argv[i],
"--mg-coarse-solver-cheby-basis-eig-min") == 0){
3333 int level = atoi(argv[i+1]);
3335 printf(
"ERROR: invalid multigrid level %d", level);
3350 if( strcmp(argv[i],
"--mg-coarse-solver-cheby-basis-eig-max") == 0){
3354 int level = atoi(argv[i+1]);
3356 printf(
"ERROR: invalid multigrid level %d", level);
3370 if( strcmp(argv[i],
"--mg-smoother-halo-prec") == 0){
3381 if( strcmp(argv[i],
"--mg-schwarz-type") == 0){
3385 int level = atoi(argv[i+1]);
3387 printf(
"ERROR: invalid multigrid level %d", level);
3398 if( strcmp(argv[i],
"--mg-schwarz-cycle") == 0){
3402 int level = atoi(argv[i+1]);
3404 printf(
"ERROR: invalid multigrid level %d", level);
3411 printf(
"ERROR: invalid Schwarz cycle value requested %d for level %d",
3420 if( strcmp(argv[i],
"--mg-block-size") == 0){
3421 if (i + 5 >= argc) {
usage(argv); }
3422 int level = atoi(argv[i+1]);
3424 printf(
"ERROR: invalid multigrid level %d", level);
3429 int xsize = atoi(argv[i+1]);
3431 printf(
"ERROR: invalid X block size");
3437 int ysize = atoi(argv[i+1]);
3439 printf(
"ERROR: invalid Y block size");
3445 int zsize = atoi(argv[i+1]);
3447 printf(
"ERROR: invalid Z block size");
3453 int tsize = atoi(argv[i+1]);
3455 printf(
"ERROR: invalid T block size");
3465 if( strcmp(argv[i],
"--mg-mu-factor") == 0){
3469 int level = atoi(argv[i+1]);
3471 printf(
"ERROR: invalid multigrid level %d", level);
3476 double factor = atof(argv[i+1]);
3483 if( strcmp(argv[i],
"--mg-generate-nullspace") == 0){
3488 if (strcmp(argv[i+1],
"true") == 0){
3490 }
else if (strcmp(argv[i+1],
"false") == 0){
3493 fprintf(stderr,
"ERROR: invalid generate nullspace type\n");
3502 if( strcmp(argv[i],
"--mg-generate-all-levels") == 0){
3507 if (strcmp(argv[i+1],
"true") == 0){
3509 }
else if (strcmp(argv[i+1],
"false") == 0){
3512 fprintf(stderr,
"ERROR: invalid value for generate_all_levels type\n");
3521 if( strcmp(argv[i],
"--mg-load-vec") == 0){
3522 if (i + 2 >= argc) {
usage(argv); }
3523 int level = atoi(argv[i + 1]);
3525 printf(
"ERROR: invalid multigrid level %d", level);
3536 if( strcmp(argv[i],
"--mg-save-vec") == 0){
3540 int level = atoi(argv[i + 1]);
3542 printf(
"ERROR: invalid multigrid level %d", level);
3552 if( strcmp(argv[i],
"--df-nev") == 0){
3557 nev = atoi(argv[i+1]);
3563 if( strcmp(argv[i],
"--df-max-search-dim") == 0){
3574 if( strcmp(argv[i],
"--df-deflation-grid") == 0){
3586 if( strcmp(argv[i],
"--df-eigcg-max-restarts") == 0){
3597 if( strcmp(argv[i],
"--df-max-restart-num") == 0){
3608 if( strcmp(argv[i],
"--df-tol-restart") == 0){
3619 if( strcmp(argv[i],
"--df-tol-eigenval") == 0){
3630 if( strcmp(argv[i],
"--df-tol-inc") == 0){
3641 if( strcmp(argv[i],
"--solver-ext-lib-type") == 0){
3651 if( strcmp(argv[i],
"--df-ext-lib-type") == 0){
3661 if( strcmp(argv[i],
"--df-location-ritz") == 0){
3671 if( strcmp(argv[i],
"--df-mem-type-ritz") == 0){
3681 if (strcmp(argv[i],
"--eig-nEv") == 0) {
3682 if (i + 1 >= argc) {
usage(argv); }
3689 if (strcmp(argv[i],
"--eig-nKr") == 0) {
3690 if (i + 1 >= argc) {
usage(argv); }
3697 if (strcmp(argv[i],
"--eig-nConv") == 0) {
3698 if (i + 1 >= argc) {
usage(argv); }
3705 if (strcmp(argv[i],
"--eig-require-convergence") == 0) {
3706 if (i + 1 >= argc) {
usage(argv); }
3708 if (strcmp(argv[i + 1],
"true") == 0) {
3710 }
else if (strcmp(argv[i + 1],
"false") == 0) {
3713 fprintf(stderr,
"ERROR: invalid value for require-convergence (true/false)\n");
3722 if (strcmp(argv[i],
"--eig-check-interval") == 0) {
3723 if (i + 1 >= argc) {
usage(argv); }
3730 if (strcmp(argv[i],
"--eig-max-restarts") == 0) {
3731 if (i + 1 >= argc) {
usage(argv); }
3738 if (strcmp(argv[i],
"--eig-tol") == 0) {
3739 if (i + 1 >= argc) {
usage(argv); }
3746 if (strcmp(argv[i],
"--eig-use-poly-acc") == 0) {
3747 if (i + 1 >= argc) {
usage(argv); }
3749 if (strcmp(argv[i + 1],
"true") == 0) {
3751 }
else if (strcmp(argv[i + 1],
"false") == 0) {
3754 fprintf(stderr,
"ERROR: invalid value for use-poly-acc (true/false)\n");
3763 if (strcmp(argv[i],
"--eig-poly-deg") == 0) {
3764 if (i + 1 >= argc) {
usage(argv); }
3771 if (strcmp(argv[i],
"--eig-amin") == 0) {
3772 if (i + 1 >= argc) {
usage(argv); }
3779 if (strcmp(argv[i],
"--eig-amax") == 0) {
3780 if (i + 1 >= argc) {
usage(argv); }
3787 if (strcmp(argv[i],
"--eig-use-normop") == 0) {
3788 if (i + 1 >= argc) {
usage(argv); }
3790 if (strcmp(argv[i + 1],
"true") == 0) {
3792 }
else if (strcmp(argv[i + 1],
"false") == 0) {
3795 fprintf(stderr,
"ERROR: invalid value for eig-use-normop (true/false)\n");
3804 if (strcmp(argv[i],
"--eig-use-dagger") == 0) {
3805 if (i + 1 >= argc) {
usage(argv); }
3807 if (strcmp(argv[i + 1],
"true") == 0) {
3809 }
else if (strcmp(argv[i + 1],
"false") == 0) {
3812 fprintf(stderr,
"ERROR: invalid value for eig-use-dagger (true/false)\n");
3821 if (strcmp(argv[i],
"--eig-compute-svd") == 0) {
3822 if (i + 1 >= argc) {
usage(argv); }
3824 if (strcmp(argv[i + 1],
"true") == 0) {
3826 }
else if (strcmp(argv[i + 1],
"false") == 0) {
3829 fprintf(stderr,
"ERROR: invalid value for eig-compute-svd (true/false)\n");
3838 if (strcmp(argv[i],
"--eig-spectrum") == 0) {
3839 if (i + 1 >= argc) {
usage(argv); }
3846 if (strcmp(argv[i],
"--eig-type") == 0) {
3847 if (i + 1 >= argc) {
usage(argv); }
3854 if (strcmp(argv[i],
"--eig-ARPACK-logfile") == 0) {
3855 if (i + 1 >= argc) {
usage(argv); }
3862 if (strcmp(argv[i],
"--eig-QUDA-logfile") == 0) {
3863 if (i + 1 >= argc) {
usage(argv); }
3870 if (strcmp(argv[i],
"--eig-arpack-check") == 0) {
3871 if (i + 1 >= argc) {
usage(argv); }
3873 if (strcmp(argv[i + 1],
"true") == 0) {
3875 }
else if (strcmp(argv[i + 1],
"false") == 0) {
3878 fprintf(stderr,
"ERROR: invalid value for arpack-check (true/false)\n");
3887 if (strcmp(argv[i],
"--eig-load-vec") == 0) {
3888 if (i + 1 >= argc) {
usage(argv); }
3895 if (strcmp(argv[i],
"--eig-save-vec") == 0) {
3896 if (i + 1 >= argc) {
usage(argv); }
3903 if (strcmp(argv[i],
"--mg-eig") == 0) {
3904 if (i + 1 >= argc) {
usage(argv); }
3905 int level = atoi(argv[i + 1]);
3907 printf(
"ERROR: invalid multigrid level %d", level);
3912 if (strcmp(argv[i + 1],
"true") == 0) {
3914 }
else if (strcmp(argv[i + 1],
"false") == 0) {
3917 fprintf(stderr,
"ERROR: invalid value for mg-eig %d (true/false)\n", level);
3926 if (strcmp(argv[i],
"--mg-eig-nEv") == 0) {
3927 if (i + 1 >= argc) {
usage(argv); }
3928 int level = atoi(argv[i + 1]);
3930 printf(
"ERROR: invalid multigrid level %d", level);
3942 if (strcmp(argv[i],
"--mg-eig-nKr") == 0) {
3943 if (i + 1 >= argc) {
usage(argv); }
3944 int level = atoi(argv[i + 1]);
3946 printf(
"ERROR: invalid multigrid level %d", level);
3958 if (strcmp(argv[i],
"--mg-eig-require-convergence") == 0) {
3959 if (i + 1 >= argc) {
usage(argv); }
3960 int level = atoi(argv[i + 1]);
3962 printf(
"ERROR: invalid multigrid level %d", level);
3967 if (strcmp(argv[i + 1],
"true") == 0) {
3969 }
else if (strcmp(argv[i + 1],
"false") == 0) {
3972 fprintf(stderr,
"ERROR: invalid value for mg-eig-require-convergence %d (true/false)\n", level);
3981 if (strcmp(argv[i],
"--mg-eig-check-interval") == 0) {
3982 if (i + 1 >= argc) {
usage(argv); }
3983 int level = atoi(argv[i + 1]);
3985 printf(
"ERROR: invalid multigrid level %d", level);
3997 if (strcmp(argv[i],
"--mg-eig-max-restarts") == 0) {
3998 if (i + 1 >= argc) {
usage(argv); }
3999 int level = atoi(argv[i + 1]);
4001 printf(
"ERROR: invalid multigrid level %d", level);
4013 if (strcmp(argv[i],
"--mg-eig-tol") == 0) {
4014 if (i + 1 >= argc) {
usage(argv); }
4015 int level = atoi(argv[i + 1]);
4017 printf(
"ERROR: invalid multigrid level %d", level);
4029 if (strcmp(argv[i],
"--mg-eig-use-poly-acc") == 0) {
4030 if (i + 1 >= argc) {
usage(argv); }
4031 int level = atoi(argv[i + 1]);
4033 printf(
"ERROR: invalid multigrid level %d", level);
4038 if (strcmp(argv[i + 1],
"true") == 0) {
4040 }
else if (strcmp(argv[i + 1],
"false") == 0) {
4043 fprintf(stderr,
"ERROR: invalid value for mg-eig-use-poly-acc %d (true/false)\n", level);
4052 if (strcmp(argv[i],
"--mg-eig-poly-deg") == 0) {
4053 if (i + 1 >= argc) {
usage(argv); }
4054 int level = atoi(argv[i + 1]);
4056 printf(
"ERROR: invalid multigrid level %d", level);
4068 if (strcmp(argv[i],
"--mg-eig-amin") == 0) {
4069 if (i + 1 >= argc) {
usage(argv); }
4070 int level = atoi(argv[i + 1]);
4072 printf(
"ERROR: invalid multigrid level %d", level);
4084 if (strcmp(argv[i],
"--mg-eig-amax") == 0) {
4085 if (i + 1 >= argc) {
usage(argv); }
4086 int level = atoi(argv[i + 1]);
4088 printf(
"ERROR: invalid multigrid level %d", level);
4100 if (strcmp(argv[i],
"--mg-eig-use-normop") == 0) {
4101 if (i + 1 >= argc) {
usage(argv); }
4102 int level = atoi(argv[i + 1]);
4104 printf(
"ERROR: invalid multigrid level %d", level);
4109 if (strcmp(argv[i + 1],
"true") == 0) {
4111 }
else if (strcmp(argv[i + 1],
"false") == 0) {
4114 fprintf(stderr,
"ERROR: invalid value for mg-eig-use-normop (true/false)\n");
4123 if (strcmp(argv[i],
"--mg-eig-use-dagger") == 0) {
4124 if (i + 1 >= argc) {
usage(argv); }
4125 int level = atoi(argv[i + 1]);
4127 printf(
"ERROR: invalid multigrid level %d", level);
4132 if (strcmp(argv[i + 1],
"true") == 0) {
4134 }
else if (strcmp(argv[i + 1],
"false") == 0) {
4137 fprintf(stderr,
"ERROR: invalid value for mg-eig-use-dagger (true/false)\n");
4146 if (strcmp(argv[i],
"--mg-eig-spectrum") == 0) {
4147 if (i + 1 >= argc) {
usage(argv); }
4148 int level = atoi(argv[i + 1]);
4150 printf(
"ERROR: invalid multigrid level %d", level);
4162 if (strcmp(argv[i],
"--mg-eig-type") == 0) {
4163 if (i + 1 >= argc) {
usage(argv); }
4164 int level = atoi(argv[i + 1]);
4166 printf(
"ERROR: invalid multigrid level %d", level);
4178 if (strcmp(argv[i],
"--mg-eig-coarse-guess") == 0) {
4179 if (i + 1 >= argc) {
usage(argv); }
4181 if (strcmp(argv[i + 1],
"true") == 0) {
4183 }
else if (strcmp(argv[i + 1],
"false") == 0) {
4186 fprintf(stderr,
"ERROR: invalid value for mg-eig-coarse-guess (true/false)\n");
4195 if( strcmp(argv[i],
"--niter") == 0){
4199 niter= atoi(argv[i+1]);
4200 if (niter < 1 || niter > 1e6){
4201 printf(
"ERROR: invalid number of iterations (%d)\n",
niter);
4209 if( strcmp(argv[i],
"--ngcrkrylov") == 0 || strcmp(argv[i],
"--ca-basis-size") == 0) {
4214 if (gcrNkrylov < 1 || gcrNkrylov > 1e6){
4215 printf(
"ERROR: invalid number of gcrkrylov iterations (%d)\n",
gcrNkrylov);
4223 if( strcmp(argv[i],
"--ca-basis-type") == 0){
4228 if (strcmp(argv[i+1],
"power") == 0){
4230 }
else if (strcmp(argv[i+1],
"chebyshev") == 0 || strcmp(argv[i+1],
"cheby") == 0){
4233 fprintf(stderr,
"ERROR: invalid value for basis ca cg type\n");
4242 if( strcmp(argv[i],
"--cheby-basis-eig-min") == 0){
4248 printf(
"ERROR: invalid minimum eigenvalue for CA-CG (%f < 0.0)\n",
ca_lambda_min);
4256 if( strcmp(argv[i],
"--cheby-basis-eig-max") == 0){
4267 if( strcmp(argv[i],
"--pipeline") == 0){
4272 if (pipeline < 0 || pipeline > 16){
4273 printf(
"ERROR: invalid pipeline length (%d)\n",
pipeline);
4281 if( strcmp(argv[i],
"--solution-pipeline") == 0){
4286 if (solution_accumulator_pipeline < 0 || solution_accumulator_pipeline > 16){
4295 if (strcmp(argv[i],
"--gaussian-sigma") == 0) {
4296 if (i + 1 >= argc) {
usage(argv); }
4298 if (gaussian_sigma < 0.0 || gaussian_sigma > 1.0) {
4307 if( strcmp(argv[i],
"--heatbath-beta") == 0){
4321 if( strcmp(argv[i],
"--heatbath-warmup-steps") == 0){
4335 if( strcmp(argv[i],
"--heatbath-num-steps") == 0){
4349 if( strcmp(argv[i],
"--heatbath-num-hb-per-step") == 0){
4363 if( strcmp(argv[i],
"--heatbath-num-or-per-step") == 0){
4377 if( strcmp(argv[i],
"--heatbath-coldstart") == 0){
4382 if (strcmp(argv[i+1],
"true") == 0){
4384 }
else if (strcmp(argv[i+1],
"false") == 0){
4387 fprintf(stderr,
"ERROR: invalid value for heatbath_coldstart type\n");
4396 if (strcmp(argv[i],
"--contract-type") == 0) {
4397 if (i + 1 >= argc) {
usage(argv); }
4404 if( strcmp(argv[i],
"--version") == 0){
4405 printf(
"This program is linked with QUDA library, version %s,",
get_quda_ver_str());
4406 printf(
" %s GPU build\n", msg);
4424 struct timeval endTime;
4425 gettimeofday(&endTime, NULL);
4427 long ds = endTime.tv_sec -
startTime.tv_sec;
4428 long dus = endTime.tv_usec -
startTime.tv_usec;
4429 return ds + 0.000001*dus;
void printGaugeElement(void *gauge, int X, QudaPrecision precision)
QudaPrecision prec_precondition
QudaCABasis coarse_solver_ca_basis[QUDA_MAX_MG_LEVEL]
bool alternative_reliable
static void orthogonalize(complex< Float > *a, complex< Float > *b, int len)
int dimPartitioned(int dim)
void applyGaugeFieldScaling_long(Float **gauge, int Vh, QudaGaugeParam *param, QudaDslashType dslash_type)
enum QudaMassNormalization_s QudaMassNormalization
double setup_ca_lambda_max[QUDA_MAX_MG_LEVEL]
double heatbath_beta_value
void get_gridsize_from_env(int *const dims)
QudaSolutionType get_solution_type(char *s)
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
static void sum(Float *dst, Float *a, Float *b, int cnt)
void construct_gauge_field(void **gauge, int type, QudaPrecision precision, QudaGaugeParam *param)
static void constructUnitGaugeField(Float **res, QudaGaugeParam *param)
int compareLink(Float **linkA, Float **linkB, int len)
enum QudaPrecision_s QudaPrecision
char mg_vec_outfile[QUDA_MAX_MG_LEVEL][256]
int heatbath_num_heatbath_per_step
QudaPrecision prec_refinement_sloppy
_EXTERN_C_ int MPI_Init(int *argc, char ***argv)
QudaTwistFlavorType get_flavor_type(char *s)
void dw_setDims(int *X, const int L5)
static void printVector(Float *v)
int geo_block_size[QUDA_MAX_MG_LEVEL][QUDA_MAX_DIM]
__host__ __device__ ValueType norm(const complex< ValueType > &z)
Returns the magnitude of z squared.
QudaInverterType coarse_solver[QUDA_MAX_MG_LEVEL]
void accumulateConjugateProduct(Float *a, Float *b, Float *c, int sign)
QudaEigType mg_eig_type[QUDA_MAX_MG_LEVEL]
int heatbath_warmup_steps
int setup_maxiter[QUDA_MAX_MG_LEVEL]
Matrix< N, std::complex< T > > conj(const Matrix< N, std::complex< T > > &mat)
void printSpinorElement(void *spinor, int X, QudaPrecision precision)
void createHwCPU(void *hw, QudaPrecision precision)
bool mg_eig_use_normop[QUDA_MAX_MG_LEVEL]
int nvec[QUDA_MAX_MG_LEVEL]
void createMomCPU(void *mom, QudaPrecision precision)
enum QudaSolveType_s QudaSolveType
__host__ __device__ ValueType sqrt(ValueType x)
void commDimPartitionedSet(int dir)
char eig_vec_outfile[256]
int process_command_line_option(int argc, char **argv, int *idx)
void complexDotProduct(Float *a, Float *b, Float *c)
QudaFieldLocation location_ritz
int mg_eig_nEv[QUDA_MAX_MG_LEVEL]
int fullLatticeIndex_5d_4dpc(int i, int oddBit)
static void applyGaugeFieldScaling(Float **gauge, int Vh, QudaGaugeParam *param)
QudaPrecision smoother_halo_prec
QudaContractType contract_type
static void applyStaggeredScaling(Float **res, QudaGaugeParam *param, int type)
static struct timeval startTime
void su3Construct8(Float *mat)
double setup_tol[QUDA_MAX_MG_LEVEL]
char mg_vec_infile[QUDA_MAX_MG_LEVEL][256]
void usage_extra(char **argv)
void constructUnitaryGaugeField(Float **res)
int gridsize_from_cmdline[4]
int compare_floats(void *a, void *b, int len, double epsilon, QudaPrecision precision)
int get_rank_order(char *s)
enum QudaEigType_s QudaEigType
void construct_spinor_source(void *v, int nSpin, int nColor, QudaPrecision precision, const int *const x, quda::RNG &rng)
double mu_factor[QUDA_MAX_MG_LEVEL]
static int compare_link(void **linkA, void **linkB, int len, QudaPrecision precision)
QudaVerbosity mg_verbosity[QUDA_MAX_MG_LEVEL]
QudaReconstructType get_recon(char *s)
QudaInverterType precon_type
bool mg_eig[QUDA_MAX_MG_LEVEL]
QudaFieldLocation get_location(char *s)
static void printMomElement(void *mom, int X, QudaPrecision precision)
bool mg_eig_require_convergence[QUDA_MAX_MG_LEVEL]
void createSiteLinkCPU(void **link, QudaPrecision precision, int phase)
char eig_QUDA_logfile[256]
std::map< TuneKey, TuneParam > map
QudaSiteSubset siteSubset
QudaSolveType smoother_solve_type[QUDA_MAX_MG_LEVEL]
void initCommsGridQuda(int nDim, const int *dims, QudaCommsMap func, void *fdata)
QudaSchwarzType get_schwarz_type(char *s)
int fullLatticeIndex(int dim[4], int index, int oddBit)
QudaSolveType get_solve_type(char *s)
QudaReconstructType link_recon_precondition
double coarse_solver_ca_lambda_min[QUDA_MAX_MG_LEVEL]
QudaExtLibType get_solve_ext_lib_type(char *s)
QudaCABasis setup_ca_basis[QUDA_MAX_MG_LEVEL]
int compare_mom(Float *momA, Float *momB, int len)
double stopwatchReadSeconds()
QudaInverterType get_solver_type(char *s)
double mg_eig_tol[QUDA_MAX_MG_LEVEL]
int nu_pre[QUDA_MAX_MG_LEVEL]
static int dim_partitioned[4]
__host__ __device__ ValueType sin(ValueType x)
QudaFieldOrder fieldOrder
double coarse_solver_tol[QUDA_MAX_MG_LEVEL]
QudaInverterType setup_inv[QUDA_MAX_MG_LEVEL]
void setSpinorSiteSize(int n)
double mg_eig_amin[QUDA_MAX_MG_LEVEL]
QudaDslashType get_dslash_type(char *s)
double mg_eig_amax[QUDA_MAX_MG_LEVEL]
void complexAddTo(Float *a, Float *b)
QudaEigSpectrumType get_eig_spectrum_type(char *s)
QudaFieldLocation setup_location[QUDA_MAX_MG_LEVEL]
__host__ __device__ ValueType atan2(ValueType x, ValueType y)
Class declaration to initialize and hold CURAND RNG states.
QudaFieldLocation solver_location[QUDA_MAX_MG_LEVEL]
enum QudaMatPCType_s QudaMatPCType
QudaInverterType smoother_type[QUDA_MAX_MG_LEVEL]
static int compareFloats(Float *a, Float *b, int len, double epsilon)
bool eig_require_convergence
void accumulateComplexProduct(Float *a, Float *b, Float *c, Float sign)
QudaSchwarzType schwarz_type[QUDA_MAX_MG_LEVEL]
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
enum QudaSolutionType_s QudaSolutionType
QudaReconstructType link_recon_sloppy
int heatbath_num_overrelax_per_step
static void constructGaugeField(Float **res, QudaGaugeParam *param, QudaDslashType dslash_type=QUDA_WILSON_DSLASH)
bool mg_eig_use_dagger[QUDA_MAX_MG_LEVEL]
enum QudaSchwarzType_s QudaSchwarzType
QudaMatPCType get_matpc_type(char *s)
int mg_eig_nKr[QUDA_MAX_MG_LEVEL]
QudaEigSpectrumType eig_spectrum
int coarse_solver_ca_basis_size[QUDA_MAX_MG_LEVEL]
int(* QudaCommsMap)(const int *coords, void *fdata)
enum QudaDagType_s QudaDagType
void su3Construct12(Float *mat)
static void su3Reconstruct8(Float *mat, int dir, int ga_idx, QudaGaugeParam *param)
void __attribute__((weak)) usage_extra(char **argv)
int mg_eig_check_interval[QUDA_MAX_MG_LEVEL]
void complexProduct(Float *a, Float *b, Float *c)
QudaEigType get_eig_type(char *s)
int neighborIndex(int i, int oddBit, int dx4, int dx3, int dx2, int dx1)
static void su3Reconstruct12(Float *mat, int dir, int ga_idx, QudaGaugeParam *param)
int setup_ca_basis_size[QUDA_MAX_MG_LEVEL]
int strong_check_mom(void *momA, void *momB, int len, QudaPrecision prec)
void check_gauge(void **oldG, void **newG, double epsilon, QudaPrecision precision)
static void printLinkElement(void *link, int X, QudaPrecision precision)
QudaTwistFlavorType twist_flavor
int mg_eig_poly_deg[QUDA_MAX_MG_LEVEL]
int strong_check_link(void **linkA, const char *msgA, void **linkB, const char *msgB, int len, QudaPrecision prec)
QudaMemoryType get_df_mem_type_ritz(char *s)
int nu_post[QUDA_MAX_MG_LEVEL]
int neighborIndex_mg(int i, int oddBit, int dx4, int dx3, int dx2, int dx1)
void construct_fat_long_gauge_field(void **fatlink, void **longlink, int type, QudaPrecision precision, QudaGaugeParam *param, QudaDslashType dslash_type)
static int index(int ndim, const int *dims, const int *x)
void construct_clover_field(void *clover, double norm, double diag, QudaPrecision precision)
QudaDslashType dslash_type
QudaExtLibType solver_ext_lib
int coarse_solver_maxiter[QUDA_MAX_MG_LEVEL]
int neighborIndexFullLattice_mg(int i, int dx4, int dx3, int dx2, int dx1)
enum QudaFieldLocation_s QudaFieldLocation
double setup_ca_lambda_min[QUDA_MAX_MG_LEVEL]
int fullLatticeIndex_5d(int i, int oddBit)
static int commDim[QUDA_MAX_DIM]
cpuColorSpinorField * out
enum QudaEigSpectrumType_s QudaEigSpectrumType
int solution_accumulator_pipeline
QudaExtLibType deflation_ext_lib
enum QudaReconstructType_s QudaReconstructType
QudaSolutionType solution_type
enum QudaCABasis_s QudaCABasis
#define QUDA_MAX_MG_LEVEL
Maximum number of multi-grid levels. This number may be increased if needed.
enum QudaSetupType_s QudaSetupType
int fullLatticeIndex_4d(int i, int oddBit)
int x4_from_full_index(int i)
QudaSolveType coarse_solve_type[QUDA_MAX_MG_LEVEL]
QudaMassNormalization get_mass_normalization_type(char *s)
static void normalize(complex< Float > *a, int len)
int n_block_ortho[QUDA_MAX_MG_LEVEL]
int setup_maxiter_refresh[QUDA_MAX_MG_LEVEL]
char eig_arpack_logfile[256]
QudaInverterType inv_type
QudaMassNormalization normalization
enum QudaDslashType_s QudaDslashType
void accumulateComplexDotProduct(Float *a, Float *b, Float *c)
static int lex_rank_from_coords_t(const int *coords, void *fdata)
enum QudaContractType_s QudaContractType
__host__ __device__ ValueType cos(ValueType x)
__host__ __device__ complex< ValueType > polar(const ValueType &m, const ValueType &theta=0)
Returns the complex with magnitude m and angle theta in radians.
double coarse_solver_ca_lambda_max[QUDA_MAX_MG_LEVEL]
int schwarz_cycle[QUDA_MAX_MG_LEVEL]
enum QudaVerbosity_s QudaVerbosity
int mg_eig_max_restarts[QUDA_MAX_MG_LEVEL]
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
QudaMemoryType mem_type_ritz
QudaEigSpectrumType mg_eig_spectrum[QUDA_MAX_MG_LEVEL]
static void constructCloverField(Float *res, double norm, double diag)
void initComms(int argc, char **argv, int *const commDims)
QudaVerbosity get_verbosity_type(char *s)
static int lex_rank_from_coords_x(const int *coords, void *fdata)
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
void su3_construct(void *mat, QudaReconstructType reconstruct, QudaPrecision precision)
const char * get_quda_ver_str()
QudaContractType get_contract_type(char *s)
void su3_reconstruct(void *mat, int dir, int ga_idx, QudaReconstructType reconstruct, QudaPrecision precision, QudaGaugeParam *param)
QudaPrecision prec_sloppy
enum QudaInverterType_s QudaInverterType
QudaPrecision get_prec(QIO_Reader *infile)
enum QudaMemoryType_s QudaMemoryType
cpuColorSpinorField * spinor
int num_setup_iter[QUDA_MAX_MG_LEVEL]
static void checkGauge(Float **oldG, Float **newG, double epsilon)
int comm_dim_partitioned(int dim)
enum QudaExtLibType_s QudaExtLibType
void spinorNoise(ColorSpinorField &src, RNG &randstates, QudaNoiseType type)
Generate a random noise spinor. This variant allows the user to manage the RNG state.
void complexConjugateProduct(Float *a, Float *b, Float *c)
bool mg_eig_use_poly_acc[QUDA_MAX_MG_LEVEL]
double smoother_tol[QUDA_MAX_MG_LEVEL]
int neighborIndexFullLattice(int i, int dx4, int dx3, int dx2, int dx1)
static void dot(sFloat *res, gFloat *a, sFloat *b)
QudaReconstructType link_recon
enum QudaTwistFlavorType_s QudaTwistFlavorType