17 #if defined(QMP_COMMS)
19 #elif defined(MPI_COMMS)
27 #define MAX(a,b) ((a)>(b)?(a):(b))
28 #define mySpinorSiteSize 6
30 extern void usage(
char** argv);
35 void** ghost_fatlink, **ghost_longlink;
55 static double tol = 1e-7;
66 template<
typename Float>
68 for(
int i = 0; i <
Vh; i++) {
69 for (
int s = 0;
s < 1;
s++) {
70 for (
int m = 0; m < 3; m++) {
71 res[i*(1*3*2) +
s*(3*2) + m*(2) + 0] = rand() / (
Float)RAND_MAX;
72 res[i*(1*3*2) +
s*(3*2) + m*(2) + 1] = rand() / (
Float)RAND_MAX;
84 double mass,
double tol,
int maxiter,
double reliable_delta,
88 gaugeParam->
X[0] =
X1;
89 gaugeParam->
X[1] =
X2;
90 gaugeParam->
X[2] =
X3;
91 gaugeParam->
X[3] =
X4;
103 gaugeParam->
ga_pad = X1*X2*X3/2;
106 inv_param->
mass = mass;
110 inv_param->
tol = tol;
114 #if __COMPUTE_CAPABILITY__ >= 200
147 inv_param->
sp_pad = X1*X2*X3/2;
163 set_params(&gaugeParam, &inv_param,
189 ((
double*)
fatlink[
dir])[i] = 0.5 *rand()/RAND_MAX;
191 ((
float*)
fatlink[
dir])[i] = 0.5* rand()/RAND_MAX;
200 for(
int d = 0; d < 4; d++) {
201 csParam.
x[d] = gaugeParam.
X[d];
228 int fat_pad = tmp_value;
229 int link_pad = 3*tmp_value;
236 ghost_fatlink = (
void**)cpuFat->
Ghost();
242 ghost_longlink = (
void**)cpuLong->
Ghost();
245 gaugeParam.
ga_pad = fat_pad;
250 gaugeParam.
ga_pad = link_pad;
265 double time0 = -((double)clock());
278 time0 /= CLOCKS_PER_SEC;
298 time0 /= CLOCKS_PER_SEC;
314 errorQuda(
"full spinor not supported\n");
320 #define NUM_OFFSETS 12
323 double masses[
NUM_OFFSETS] ={0.002, 0.0021, 0.0064, 0.070, 0.077, 0.081, 0.1, 0.11, 0.12, 0.13, 0.14, 0.205};
334 spinorOutArray[0] =
out;
340 outArray[i] = spinorOutArray[i]->
V();
341 inv_param.
offset[i] = 4*masses[i]*masses[i];
354 cudaDeviceSynchronize();
356 time0 /= CLOCKS_PER_SEC;
358 printfQuda(
"done: total time = %g secs, compute time = %g, %i iter / %g secs = %g gflops\n",
367 errorQuda(
"full parity not supported\n");
373 errorQuda(
"ERROR: invalid spinor parity \n");
377 printfQuda(
"%dth solution: mass=%f, ", i, masses[i]);
380 spinorOutArray[i], masses[i], 0, inv_param.
cpu_prec,
389 double l2r = sqrt(nrm2/src2);
391 printfQuda(
"Shift %d residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g, host = %g\n",
396 if (sqrt(nrm2/src2) > 10*inv_param.
tol_offset[i]){
401 for(
int i=1; i < inv_param.
num_offset;i++)
delete spinorOutArray[i];
413 double l2r = sqrt(nrm2/src2);
415 printfQuda(
"Residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g, host = %g\n",
418 printfQuda(
"done: total time = %g secs, compute time = %g secs, %i iter / %g secs = %g gflops, \n",
432 for(
int i=0;i < 4;i++){
442 if (cpuFat)
delete cpuFat;
454 printfQuda(
"prec sloppy_prec link_recon sloppy_link_recon test_type S_dimension T_dimension\n");
475 printfQuda(
" --tol <resid_tol> # Set residual tolerance\n");
477 printfQuda(
" 0: Even even spinor CG inverter\n");
478 printfQuda(
" 1: Odd odd spinor CG inverter\n");
479 printfQuda(
" 3: Even even spinor multishift CG inverter\n");
480 printfQuda(
" 4: Odd odd spinor multishift CG inverter\n");
481 printfQuda(
" --cpu_prec <double/single/half> # Set CPU precision\n");
485 int main(
int argc,
char** argv)
487 for (
int i = 1; i < argc; i++) {
493 if( strcmp(argv[i],
"--tol") == 0){
498 sscanf(argv[i+1],
"%f", &tmpf);
500 printf(
"ERROR: invalid tol(%f)\n", tmpf);
508 if( strcmp(argv[i],
"--cpu_prec") == 0){
517 printf(
"ERROR: Invalid option:%s\n", argv[i]);
529 #if defined(QMP_COMMS)
530 QMP_thread_level_t tl;
531 QMP_init_msg_passing(&argc, &argv, QMP_THREAD_SINGLE, &tl);
532 #elif defined(MPI_COMMS)
533 MPI_Init(&argc, &argv);
544 #if defined(QMP_COMMS)
545 QMP_finalize_msg_passing();
546 #elif defined(MPI_COMMS)