17 #if defined(QMP_COMMS)
19 #elif defined(MPI_COMMS)
25 #define MAX(a,b) ((a)>(b)?(a):(b))
47 extern void usage(
char** );
54 printfQuda(
"prec sloppy_prec link_recon sloppy_link_recon S_dimension T_dimension Ls_dimension\n");
71 int main(
int argc,
char **argv)
74 for (
int i = 1; i < argc; i++){
78 printfQuda(
"ERROR: Invalid option:%s\n", argv[i]);
90 #if defined(QMP_COMMS)
91 QMP_thread_level_t tl;
92 QMP_init_msg_passing(&argc, &argv, QMP_THREAD_SINGLE, &tl);
93 #elif defined(MPI_COMMS)
94 MPI_Init(&argc, &argv);
124 gauge_param.
X[0] =
xdim;
125 gauge_param.
X[1] =
ydim;
126 gauge_param.
X[2] =
zdim;
127 gauge_param.
X[3] =
tdim;
146 double mass = -0.4125;
156 inv_param.
mass = 0.02;
158 kappa5 = 0.5/(5 + inv_param.
m5);
164 double offset[4] = {0.01, 0.02, 0.03, 0.04};
181 inv_param.
tol = 1e-7;
182 #if __COMPUTE_CAPABILITY__ >= 200
206 inv_param.
omega = 1.0;
226 int x_face_size = gauge_param.
X[1]*gauge_param.
X[2]*gauge_param.
X[3]/2;
227 int y_face_size = gauge_param.
X[0]*gauge_param.
X[2]*gauge_param.
X[3]/2;
228 int z_face_size = gauge_param.
X[0]*gauge_param.
X[1]*gauge_param.
X[3]/2;
229 int t_face_size = gauge_param.
X[0]*gauge_param.
X[1]*gauge_param.
X[2]/2;
230 int pad_size =
MAX(x_face_size, y_face_size);
231 pad_size =
MAX(pad_size, z_face_size);
232 pad_size =
MAX(pad_size, t_face_size);
233 gauge_param.
ga_pad = pad_size;
265 void *
gauge[4], *clover_inv=0, *clover=0;
290 int asymmetric = preconditioned &&
293 if (!preconditioned) {
296 }
else if (asymmetric) {
306 void *
spinorOut = NULL, **spinorOutMulti = NULL;
308 spinorOutMulti = (
void**)malloc(inv_param.
num_offset*
sizeof(
void *));
330 ((
float*)spinorIn)[0] = 1.0;
334 ((
double*)spinorIn)[0] = 1.0;
338 double time0 = -((double)clock());
358 time0 /= CLOCKS_PER_SEC;
360 printfQuda(
"Device memory used:\n Spinor: %f GiB\n Gauge: %f GiB\n",
363 printfQuda(
"\nDone: %i iter / %g secs = %g Gflops, total time = %g secs\n",
385 printfQuda(
"Domain wall not supported for multi-shift\n");
389 axpy(inv_param.
offset[i], spinorOutMulti[i], spinorCheck,
V*spinorSiteSize, inv_param.
cpu_prec);
390 mxpy(spinorIn, spinorCheck,
V*spinorSiteSize, inv_param.
cpu_prec);
391 double nrm2 =
norm_2(spinorCheck,
V*spinorSiteSize, inv_param.
cpu_prec);
393 double l2r = sqrt(nrm2 / src2);
395 printfQuda(
"Shift %d residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g\n",
411 void *evenOut = spinorCheck;
412 void *oddOut = cpu_prec ==
sizeof(double) ? (
void*)((
double*)evenOut + tm_offset): (
void*)((
float*)evenOut + tm_offset);
415 void *oddIn = cpu_prec ==
sizeof(double) ? (
void*)((
double*)evenIn + tm_offset): (
void*)((
float*)evenIn + tm_offset);
417 tm_ndeg_mat(evenOut, oddOut, gauge, evenIn, oddIn, inv_param.
kappa, inv_param.
mu, inv_param.
epsilon, 0, inv_param.
cpu_prec, gauge_param);
462 double nrm2 =
norm_2(spinorCheck,
V*spinorSiteSize*inv_param.
Ls, inv_param.
cpu_prec);
463 double src2 =
norm_2(spinorIn,
V*spinorSiteSize*inv_param.
Ls, inv_param.
cpu_prec);
464 double l2r = sqrt(nrm2 / src2);
466 printfQuda(
"Residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g\n",
478 #if defined(QMP_COMMS)
479 QMP_finalize_msg_passing();
480 #elif defined(MPI_COMMS)