16 #define MAX(a, b) ((a) > (b) ? (a) : (b))
23 #if QUDA_NVTX_VERSION == 3
24 #include "nvtx3/nvToolsExt.h"
26 #include "nvToolsExt.h"
29 static const uint32_t colors[] = { 0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff };
30 static const int num_colors =
sizeof(colors)/
sizeof(uint32_t);
32 #define PUSH_RANGE(name,cid) { \
34 color_id = color_id%num_colors;\
35 nvtxEventAttributes_t eventAttrib = {0}; \
36 eventAttrib.version = NVTX_VERSION; \
37 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \
38 eventAttrib.colorType = NVTX_COLOR_ARGB; \
39 eventAttrib.color = colors[color_id]; \
40 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \
41 eventAttrib.message.ascii = name; \
42 nvtxRangePushEx(&eventAttrib); \
44 #define POP_RANGE nvtxRangePop();
46 #define PUSH_RANGE(name,cid)
51 static bool initialized =
false;
52 static int gridDim[4];
53 static int localDim[4];
55 static bool invalidate_quda_gauge =
true;
56 static bool create_quda_gauge =
false;
58 static bool have_resident_gauge =
false;
60 static bool invalidate_quda_mom =
true;
62 static bool invalidate_quda_mg =
true;
64 static void *df_preconditioner =
nullptr;
70 #define QUDAMILC_VERBOSE 1
81 #ifdef QUDAMILC_VERBOSE
84 printfQuda(
"QUDA_MILC_INTERFACE: %s (called) \n", func);
86 printfQuda(
"QUDA_MILC_INTERFACE: %s (return) \n", func);
98 if (initialized)
return;
100 qudamilc_called<true>(__func__);
103 qudamilc_called<false>(__func__);
108 qudamilc_called<true>(__func__);
110 qudamilc_called<false>(__func__);
112 #if defined(MULTI_GPU) && !defined(QMP_COMMS)
117 static int rankFromCoords(
const int *coords,
void *fdata)
119 int *dims =
static_cast<int *
>(fdata);
121 int rank = coords[3];
122 for (
int i = 2; i >= 0; i--) {
123 rank = dims[i] * rank + coords[i];
132 for(
int dir=0; dir<4; ++dir){ local_dim[dir] = input.
latsize[dir]; }
134 for(
int dir=0; dir<4; ++dir){ local_dim[dir] /= input.
machsize[dir]; }
136 for(
int dir=0; dir<4; ++dir){
137 if(local_dim[dir]%2 != 0){
138 printf(
"Error: Odd lattice dimensions are not supported\n");
143 for(
int dir=0; dir<4; ++dir) localDim[dir] = local_dim[dir];
146 for(
int dir=0; dir<4; ++dir) gridDim[dir] = input.
machsize[dir];
152 static int device = -1;
154 for(
int dir=0; dir<4; ++dir) gridDim[dir] = 1;
155 static int device = input.
device;
171 static bool initialized =
false;
173 if(initialized)
return;
174 qudamilc_called<true>(__func__);
176 #if defined(GPU_HISQ_FORCE) || defined(GPU_UNITARIZE)
179 const double unitarize_eps = 1e-14;
180 const double max_error = 1e-10;
183 #ifdef GPU_HISQ_FORCE
203 qudamilc_called<false>(__func__);
212 for(
int dir=0; dir<4; ++dir)
gParam.X[dir] =
dim[dir];
222 gParam.tadpole_coeff = 1.0;
231 static void invalidateGaugeQuda() {
232 qudamilc_called<true>(__func__);
234 invalidate_quda_gauge =
true;
235 have_resident_gauge =
false;
236 qudamilc_called<false>(__func__);
240 const double act_path_coeff[6],
void* inlink,
void* fatlink,
void* longlink)
242 qudamilc_called<true>(__func__);
254 invalidateGaugeQuda();
257 create_quda_gauge =
true;
258 qudamilc_called<false>(__func__);
264 const double act_path_coeff[6],
void* inlink,
void* fatlink,
void* ulink)
266 qudamilc_called<true>(__func__);
275 invalidateGaugeQuda();
278 create_quda_gauge =
true;
279 qudamilc_called<false>(__func__);
283 void qudaHisqForce(
int prec,
int num_terms,
int num_naik_terms,
double dt,
double** coeff,
void** quark_field,
284 const double level2_coeff[6],
const double fat7_coeff[6],
285 const void*
const w_link,
const void*
const v_link,
const void*
const u_link,
286 void*
const milc_momentum)
288 qudamilc_called<true>(__func__);
292 if (!invalidate_quda_mom) {
293 gParam.use_resident_mom =
true;
294 gParam.make_resident_mom =
true;
295 gParam.return_result_mom =
false;
297 gParam.use_resident_mom =
false;
298 gParam.make_resident_mom =
false;
299 gParam.return_result_mom =
true;
303 w_link, v_link, u_link,
304 quark_field, num_terms, num_naik_terms, coeff,
307 have_resident_gauge =
false;
308 qudamilc_called<false>(__func__);
314 const void*
const one_link_src[4],
const void*
const naik_src[4],
315 const void*
const link,
void*
const milc_momentum)
317 errorQuda(
"This interface has been removed and is no longer supported");
323 void** quark_field,
void* oprod[3])
325 errorQuda(
"This interface has been removed and is no longer supported");
330 qudamilc_called<true>(__func__);
333 void *gauge =
arg->site ?
arg->site :
arg->link;
334 void *mom =
arg->site ?
arg->site :
arg->mom;
344 if (want_gaugepipe) {
347 if (!have_resident_gauge) {
349 have_resident_gauge =
true;
356 if (!invalidate_quda_mom) {
365 qudamilc_called<false>(__func__);
378 qudamilc_called<true>(__func__);
384 qudaGaugeParam.
i_mu = i_mu;
388 qudamilc_called<false>(__func__);
394 qudamilc_called<true>(__func__);
398 void *gauge =
arg->site ?
arg->site :
arg->link;
407 if (!have_resident_gauge) {
415 have_resident_gauge =
false;
418 invalidateGaugeQuda();
419 qudamilc_called<false>(__func__);
428 qudamilc_called<true>(__func__);
433 void *mom =
arg->site ?
arg->site :
arg->mom;
441 invalidate_quda_mom =
false;
443 qudamilc_called<false>(__func__);
449 qudamilc_called<true>(__func__);
454 void *mom =
arg->site ?
arg->site :
arg->mom;
462 invalidate_quda_mom =
true;
464 qudamilc_called<false>(__func__);
469 qudamilc_called<true>(__func__);
474 void *mom =
arg->site ?
arg->site :
arg->mom;
480 if (!invalidate_quda_mom) {
483 invalidate_quda_mom =
false;
487 invalidate_quda_mom =
true;
492 qudamilc_called<false>(__func__);
497 static inline int opp(
int dir){
502 static void createGaugeForcePaths(
int **paths,
int dir,
int num_loop_types){
506 if (num_loop_types >= 1)
507 for(
int i=0; i<4; ++i){
509 paths[index][0] = i; paths[index][1] = opp(dir); paths[index++][2] = opp(i);
510 paths[index][0] = opp(i); paths[index][1] = opp(dir); paths[index++][2] = i;
514 if (num_loop_types >= 2)
515 for(
int i=0; i<4; ++i){
517 paths[index][0] = paths[index][1] = i; paths[index][2] = opp(dir); paths[index][3] = paths[index][4] = opp(i);
519 paths[index][0] = paths[index][1] = opp(i); paths[index][2] = opp(dir); paths[index][3] = paths[index][4] = i;
521 paths[index][0] = dir; paths[index][1] = i; paths[index][2] = paths[index][3] = opp(dir); paths[index][4] = opp(i);
523 paths[index][0] = dir; paths[index][1] = opp(i); paths[index][2] = paths[index][3] = opp(dir); paths[index][4] = i;
525 paths[index][0] = i; paths[index][1] = paths[index][2] = opp(dir); paths[index][3] = opp(i); paths[index][4] = dir;
527 paths[index][0] = opp(i); paths[index][1] = paths[index][2] = opp(dir); paths[index][3] = i; paths[index][4] = dir;
531 if (num_loop_types >= 3) {
533 for(
int i=0; i<4; ++i){
534 for(
int j=0; j<4; ++j){
535 if(i==dir || j==dir || i==j)
continue;
536 paths[index][0] = i; paths[index][1] = j; paths[index][2] = opp(dir); paths[index][3] = opp(i), paths[index][4] = opp(j);
538 paths[index][0] = i; paths[index][1] = opp(j); paths[index][2] = opp(dir); paths[index][3] = opp(i), paths[index][4] = j;
540 paths[index][0] = opp(i); paths[index][1] = j; paths[index][2] = opp(dir); paths[index][3] = i, paths[index][4] = opp(j);
542 paths[index][0] = opp(i); paths[index][1] = opp(j); paths[index][2] = opp(dir); paths[index][3] = i, paths[index][4] = j;
553 qudamilc_called<true>(__func__);
556 switch (num_loop_types) {
567 errorQuda(
"Invalid num_loop_types = %d\n", num_loop_types);
573 void *gauge =
arg->site ?
arg->site :
arg->link;
574 void *mom =
arg->site ?
arg->site :
arg->mom;
585 if (!have_resident_gauge) {
594 double *loop_coeff =
static_cast<double*
>(
safe_malloc(numPaths*
sizeof(
double)));
597 if (num_loop_types >= 1)
for(
int i= 0; i< 6; ++i) {
598 loop_coeff[i] = milc_loop_coeff[0];
601 if (num_loop_types >= 2)
for(
int i= 6; i<24; ++i) {
602 loop_coeff[i] = milc_loop_coeff[1];
605 if (num_loop_types >= 3)
for(
int i=24; i<48; ++i) {
606 loop_coeff[i] = milc_loop_coeff[2];
610 int** input_path_buf[4];
611 for(
int dir=0; dir<4; ++dir){
612 input_path_buf[dir] =
static_cast<int**
>(
safe_malloc(numPaths*
sizeof(
int*)));
613 for(
int i=0; i<numPaths; ++i){
616 createGaugeForcePaths(input_path_buf[dir], dir, num_loop_types);
619 if (!invalidate_quda_mom) {
641 loop_coeff, numPaths, max_length, eb3, &qudaGaugeParam);
643 for(
int dir=0; dir<4; ++dir){
644 for(
int i=0; i<numPaths; ++i)
host_free(input_path_buf[dir][i]);
651 qudamilc_called<false>(__func__);
660 static int getLinkPadding(
const int dim[4])
676 invertParam->
tol = target_residual;
677 invertParam->
tol_hq = target_residual_hq;
697 invertParam->
maxiter = maxiter;
716 invertParam->
gflops = 0.0;
746 const double target_residual_offset[],
const double target_residual_hq_offset[],
750 const double null_mass = -1;
756 for (
int i = 0; i < num_offset; ++i) {
757 invertParam->
offset[i] = offset[i];
758 invertParam->
tol_offset[i] = target_residual_offset[i];
759 invertParam->
tol_hq_offset[i] = target_residual_hq_offset[i];
766 char *reconstruct_env = getenv(
"QUDA_MILC_HISQ_RECONSTRUCT");
767 if (!reconstruct_env || strcmp(reconstruct_env,
"18") == 0) {
769 }
else if (strcmp(reconstruct_env,
"13") == 0) {
771 }
else if (strcmp(reconstruct_env,
"9") == 0) {
774 errorQuda(
"QUDA_MILC_HISQ_RECONSTRUCT=%s not supported", reconstruct_env);
779 char *reconstruct_sloppy_env = getenv(
"QUDA_MILC_HISQ_RECONSTRUCT_SLOPPY");
780 if (!reconstruct_sloppy_env) {
781 reconstruct_sloppy = reconstruct;
782 }
else if (strcmp(reconstruct_sloppy_env,
"18") == 0) {
784 }
else if (strcmp(reconstruct_sloppy_env,
"13") == 0) {
786 }
else if (strcmp(reconstruct_sloppy_env,
"9") == 0) {
789 errorQuda(
"QUDA_MILC_HISQ_RECONSTRUCT_SLOPPY=%s not supported", reconstruct_sloppy_env);
798 for (
int dir = 0; dir < 4; ++dir) fat_param.
X[dir] =
dim[dir];
813 if (longlink !=
nullptr) {
818 long_param = fat_param;
839 for (
int dir = 0; dir < 4; ++dir)
param->x[dir] =
dim[dir];
842 param->setPrecision(precision);
871 static size_t getColorVectorOffset(
QudaParity local_parity,
bool even_odd_exchange,
const int dim[4])
877 offset = even_odd_exchange ? volume*6/2 : 0;
879 offset = even_odd_exchange ? 0 : volume*6/2;
886 const double target_fermilab_residual[],
const void *
const fatlink,
887 const void *
const longlink,
void *source,
void **solutionArray,
double *
const final_residual,
888 double *
const final_fermilab_residual,
int *num_iters)
891 qudamilc_called<true>(__func__,
verbosity);
893 if (target_residual[0] == 0)
errorQuda(
"qudaMultishiftInvert: zeroth target residual cannot be zero\n");
897 static bool force_double_queried =
false;
898 static bool do_not_force_double =
false;
899 if (!force_double_queried) {
900 char *donotusedouble_env = getenv(
"QUDA_MILC_OVERRIDE_DOUBLE_MULTISHIFT");
901 if (donotusedouble_env && (!(strcmp(donotusedouble_env,
"0") == 0))) {
902 do_not_force_double =
true;
903 printfQuda(
"Disabling always using double as fine precision for MILC multishift\n");
905 force_double_queried =
true;
909 bool use_mixed_precision = (((quda_precision == 2) && inv_args.
mixed_precision)
918 default: device_precision_sloppy = device_precision;
930 setGaugeParams(fat_param, long_param, fatlink, longlink, localDim, host_precision, device_precision,
937 setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy, num_offsets, offset,
950 setColorSpinorParams(localDim, host_precision, &
csParam);
956 if (invalidate_quda_gauge || !create_quda_gauge) {
958 if (longlink !=
nullptr)
loadGaugeQuda(
const_cast<void *
>(longlink), &long_param);
959 invalidate_quda_gauge =
false;
964 void** sln_pointer = (
void**)malloc(num_offsets*
sizeof(
void*));
965 int quark_offset = getColorVectorOffset(local_parity,
false, localDim) * host_precision;
966 void* src_pointer =
static_cast<char*
>(source) + quark_offset;
968 for (
int i = 0; i < num_offsets; ++i) sln_pointer[i] = static_cast<char *>(solutionArray[i]) + quark_offset;
974 *num_iters = invertParam.
iter;
975 for (
int i = 0; i < num_offsets; ++i) {
980 if (!create_quda_gauge) invalidateGaugeQuda();
982 qudamilc_called<false>(__func__,
verbosity);
986 double target_residual,
double target_fermilab_residual,
const void *
const fatlink,
987 const void *
const longlink,
void *source,
void *solution,
double *
const final_residual,
988 double *
const final_fermilab_residual,
int *num_iters)
991 qudamilc_called<true>(__func__,
verbosity);
993 if (target_fermilab_residual == 0 && target_residual == 0)
errorQuda(
"qudaInvert: requesting zero residual\n");
997 static bool force_double_queried =
false;
998 static bool do_not_force_double =
false;
999 if (!force_double_queried) {
1000 char *donotusedouble_env = getenv(
"QUDA_MILC_OVERRIDE_DOUBLE_MULTISHIFT");
1001 if (donotusedouble_env && (!(strcmp(donotusedouble_env,
"0") == 0))) {
1002 do_not_force_double =
true;
1003 printfQuda(
"Disabling always using double as fine precision for MILC multishift\n");
1005 force_double_queried =
true;
1014 default: device_precision_sloppy = device_precision;
1025 setGaugeParams(fat_param, long_param, fatlink, longlink, localDim, host_precision, device_precision,
1033 setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy,
mass, target_residual,
1038 setColorSpinorParams(localDim, host_precision, &
csParam);
1043 if (invalidate_quda_gauge || !create_quda_gauge) {
1045 if (longlink !=
nullptr)
loadGaugeQuda(
const_cast<void *
>(longlink), &long_param);
1046 invalidate_quda_gauge =
false;
1051 int quark_offset = getColorVectorOffset(local_parity,
false, localDim) * host_precision;
1053 invertQuda(
static_cast<char *
>(solution) + quark_offset,
static_cast<char *
>(source) + quark_offset, &invertParam);
1056 *num_iters = invertParam.
iter;
1057 *final_residual = invertParam.
true_res;
1058 *final_fermilab_residual = invertParam.
true_res_hq;
1060 if (!create_quda_gauge) invalidateGaugeQuda();
1062 qudamilc_called<false>(__func__,
verbosity);
1067 const void *
const longlink,
void* src,
void* dst,
int* num_iters)
1070 qudamilc_called<true>(__func__,
verbosity);
1079 setGaugeParams(fat_param, long_param, fatlink, longlink, localDim, host_precision, device_precision,
1087 setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy, 0.0, 0, 0, 0, 0.0, local_parity,
1091 setColorSpinorParams(localDim, host_precision, &
csParam);
1096 if (invalidate_quda_gauge || !create_quda_gauge) {
1098 if (longlink !=
nullptr)
loadGaugeQuda(
const_cast<void *
>(longlink), &long_param);
1099 invalidate_quda_gauge =
false;
1104 int src_offset = getColorVectorOffset(other_parity,
false, localDim);
1105 int dst_offset = getColorVectorOffset(local_parity,
false, localDim);
1107 dslashQuda(
static_cast<char*
>(dst) + dst_offset*host_precision,
1108 static_cast<char*
>(src) + src_offset*host_precision,
1109 &invertParam, local_parity);
1111 if (!create_quda_gauge) invalidateGaugeQuda();
1113 qudamilc_called<false>(__func__,
verbosity);
1117 double target_residual,
double target_fermilab_residual,
const void *
const fatlink,
1118 const void *
const longlink,
void **sourceArray,
void **solutionArray,
double *
const final_residual,
1119 double *
const final_fermilab_residual,
int *num_iters,
int num_src)
1122 qudamilc_called<true>(__func__,
verbosity);
1124 if (target_fermilab_residual == 0 && target_residual == 0)
errorQuda(
"qudaInvert: requesting zero residual\n");
1134 default: device_precision_sloppy = device_precision;
1139 setGaugeParams(fat_param, long_param, fatlink, longlink, localDim, host_precision, device_precision,
1147 setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy,
mass, target_residual,
1150 invertParam.
num_src = num_src;
1153 setColorSpinorParams(localDim, host_precision, &
csParam);
1158 if (invalidate_quda_gauge || !create_quda_gauge) {
1160 if (longlink !=
nullptr)
loadGaugeQuda(
const_cast<void *
>(longlink), &long_param);
1161 invalidate_quda_gauge =
false;
1166 int quark_offset = getColorVectorOffset(local_parity,
false, localDim) * host_precision;
1167 void** sln_pointer = (
void**)malloc(num_src*
sizeof(
void*));
1168 void** src_pointer = (
void**)malloc(num_src*
sizeof(
void*));
1170 for (
int i = 0; i < num_src; ++i) sln_pointer[i] = static_cast<char *>(solutionArray[i]) + quark_offset;
1171 for (
int i = 0; i < num_src; ++i) src_pointer[i] = static_cast<char *>(sourceArray[i]) + quark_offset;
1179 *num_iters = invertParam.
iter;
1180 *final_residual = invertParam.
true_res;
1181 *final_fermilab_residual = invertParam.
true_res_hq;
1183 if (!create_quda_gauge) invalidateGaugeQuda();
1185 qudamilc_called<false>(__func__,
verbosity);
1189 double target_residual,
double target_fermilab_residual,
const void *
const fatlink,
1190 const void *
const longlink,
1195 const int last_rhs_flag,
1196 double *
const final_residual,
double *
const final_fermilab_residual,
int *num_iters)
1199 qudamilc_called<true>(__func__,
verbosity);
1201 if (target_fermilab_residual == 0 && target_residual == 0)
errorQuda(
"qudaInvert: requesting zero residual\n");
1210 default: device_precision_sloppy = device_precision;
1215 setGaugeParams(fat_param, long_param, fatlink, longlink, localDim, host_precision, device_precision,
1221 double& target_res = target_residual;
1222 double& target_res_hq = target_fermilab_residual;
1225 setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy,
mass, target_res, target_res_hq,
1231 invertParam.
n_ev = eig_args.
nev;
1239 invertParam.
rhs_idx = rhs_idx;
1242 errorQuda(
"Incorrect inverter type.\n");
1250 setColorSpinorParams(localDim, host_precision, &
csParam);
1255 if ((invalidate_quda_gauge || !create_quda_gauge) && (rhs_idx == 0)) {
1257 if (longlink !=
nullptr)
loadGaugeQuda(
const_cast<void *
>(longlink), &long_param);
1258 invalidate_quda_gauge =
false;
1263 int quark_offset = getColorVectorOffset(local_parity,
false, localDim) * host_precision;
1269 invertQuda(
static_cast<char *
>(solution) + quark_offset,
static_cast<char *
>(source) + quark_offset, &invertParam);
1274 *num_iters = invertParam.
iter;
1275 *final_residual = invertParam.
true_res;
1276 *final_fermilab_residual = invertParam.
true_res_hq;
1278 if (!create_quda_gauge && last_rhs_flag) invalidateGaugeQuda();
1280 qudamilc_called<false>(__func__,
verbosity);
1354 deflate_max_restarts(50),
1356 deflate_use_poly_acc(false),
1357 deflate_a_min(1e-2),
1358 deflate_poly_deg(50)
1443 deflate_max_restarts = 50;
1444 deflate_use_poly_acc =
false;
1445 deflate_a_min = 1e-2;
1446 deflate_poly_deg = 20;
1451 if (strcmp(name,
"gcr") == 0) {
1453 }
else if (strcmp(name,
"cgnr") == 0) {
1455 }
else if (strcmp(name,
"cgne") == 0) {
1457 }
else if (strcmp(name,
"bicgstab") == 0) {
1459 }
else if (strcmp(name,
"ca-gcr") == 0) {
1468 if (strcmp(name,
"single") == 0) {
1470 }
else if (strcmp(name,
"half") == 0) {
1479 if (strcmp(name,
"direct") == 0) {
1481 }
else if (strcmp(name,
"direct-pc") == 0) {
1490 if (strcmp(name,
"silent") == 0) {
1492 }
else if (strcmp(name,
"summarize") == 0 || strcmp(name,
"false") == 0) {
1495 }
else if (strcmp(name,
"verbose") == 0 || strcmp(name,
"true") == 0) {
1498 }
else if (strcmp(name,
"debug") == 0) {
1506 bool update(std::vector<std::string> &input_line)
1512 if (strcmp(input_line[0].c_str(),
"mg_levels") == 0) {
1513 if (input_line.size() < 2) {
1516 mg_levels = atoi(input_line[1].c_str());
1519 }
else if (strcmp(input_line[0].c_str(),
"verify_results") == 0) {
1520 if (input_line.size() < 2) {
1526 }
else if (strcmp(input_line[0].c_str(),
"preconditioner_precision") == 0) {
1527 if (input_line.size() < 2) {
1530 preconditioner_precision = getQudaPrecision(input_line[1].c_str());
1533 }
else if (strcmp(input_line[0].c_str(),
"mg_verbosity") == 0) {
1534 if (input_line.size() < 3) {
1537 mg_verbosity[atoi(input_line[1].c_str())] = getQudaVerbosity(input_line[2].c_str());
1541 if (strcmp(input_line[0].c_str(),
"nvec") == 0) {
1542 if (input_line.size() < 3) {
1545 nvec[atoi(input_line[1].c_str())] = atoi(input_line[2].c_str());
1548 }
else if (strcmp(input_line[0].c_str(),
"geo_block_size") == 0) {
1549 if (input_line.size() < 6) {
1552 for (
int d = 0; d < 4; d++)
geo_block_size[atoi(input_line[1].c_str())][d] = atoi(input_line[2 + d].c_str());
1555 }
else if (strcmp(input_line[0].c_str(),
"setup_inv") == 0) {
1556 if (input_line.size() < 3) {
1559 setup_inv[atoi(input_line[1].c_str())] = getQudaInverterType(input_line[2].c_str());
1562 }
else if (strcmp(input_line[0].c_str(),
"setup_tol") == 0) {
1563 if (input_line.size() < 3) {
1566 setup_tol[atoi(input_line[1].c_str())] = atof(input_line[2].c_str());
1569 }
else if (strcmp(input_line[0].c_str(),
"setup_maxiter") == 0) {
1570 if (input_line.size() < 3) {
1573 setup_maxiter[atoi(input_line[1].c_str())] = atoi(input_line[2].c_str());
1576 }
else if (strcmp(input_line[0].c_str(),
"mg_vec_infile") == 0) {
1577 if (input_line.size() < 3) {
1580 strcpy(
mg_vec_infile[atoi(input_line[1].c_str())], input_line[2].c_str());
1583 }
else if (strcmp(input_line[0].c_str(),
"mg_vec_outfile") == 0) {
1584 if (input_line.size() < 3) {
1587 strcpy(
mg_vec_outfile[atoi(input_line[1].c_str())], input_line[2].c_str());
1591 if (strcmp(input_line[0].c_str(),
"coarse_solve_type") == 0) {
1592 if (input_line.size() < 3) {
1595 coarse_solve_type[atoi(input_line[1].c_str())] = getQudaSolveType(input_line[2].c_str());
1598 }
else if (strcmp(input_line[0].c_str(),
"coarse_solver") == 0) {
1599 if (input_line.size() < 3) {
1602 coarse_solver[atoi(input_line[1].c_str())] = getQudaInverterType(input_line[2].c_str());
1605 }
else if (strcmp(input_line[0].c_str(),
"coarse_solver_tol") == 0) {
1606 if (input_line.size() < 3) {
1612 }
else if (strcmp(input_line[0].c_str(),
"coarse_solver_maxiter") == 0) {
1613 if (input_line.size() < 3) {
1619 }
else if (strcmp(input_line[0].c_str(),
"smoother_type") == 0) {
1620 if (input_line.size() < 3) {
1623 smoother_type[atoi(input_line[1].c_str())] = getQudaInverterType(input_line[2].c_str());
1626 }
else if (strcmp(input_line[0].c_str(),
"nu_pre") == 0) {
1627 if (input_line.size() < 3) {
1630 nu_pre[atoi(input_line[1].c_str())] = atoi(input_line[2].c_str());
1633 }
else if (strcmp(input_line[0].c_str(),
"nu_post") == 0) {
1634 if (input_line.size() < 3) {
1637 nu_post[atoi(input_line[1].c_str())] = atoi(input_line[2].c_str());
1641 if (strcmp(input_line[0].c_str(),
"deflate_n_ev") == 0) {
1642 if (input_line.size() < 2) {
1645 deflate_n_ev = atoi(input_line[1].c_str());
1648 }
else if (strcmp(input_line[0].c_str(),
"deflate_n_kr") == 0) {
1649 if (input_line.size() < 2) {
1652 deflate_n_kr = atoi(input_line[1].c_str());
1655 }
else if (strcmp(input_line[0].c_str(),
"deflate_max_restarts") == 0) {
1656 if (input_line.size() < 2) {
1659 deflate_max_restarts = atoi(input_line[1].c_str());
1662 }
else if (strcmp(input_line[0].c_str(),
"deflate_tol") == 0) {
1663 if (input_line.size() < 2) {
1666 deflate_tol = atof(input_line[1].c_str());
1669 }
else if (strcmp(input_line[0].c_str(),
"deflate_use_poly_acc") == 0) {
1670 if (input_line.size() < 2) {
1673 deflate_use_poly_acc = input_line[1][0] ==
't' ? true :
false;
1676 }
else if (strcmp(input_line[0].c_str(),
"deflate_a_min") == 0) {
1677 if (input_line.size() < 2) {
1680 deflate_a_min = atof(input_line[1].c_str());
1683 }
else if (strcmp(input_line[0].c_str(),
"deflate_poly_deg") == 0) {
1684 if (input_line.size() < 2) {
1687 deflate_poly_deg = atoi(input_line[1].c_str());
1691 printf(
"Invalid option %s\n", input_line[0].c_str());
1695 if (error_code == 1) {
1696 printf(
"Input option %s has an invalid number of arguments\n", input_line[0].c_str());
1724 errorQuda(
"Only real spectrum type (LR or SR) can be passed to the a Lanczos type solver");
1729 mg_eig_param.
n_conv = input_struct.
nvec[level];
1746 mg_eig_param.
a_max = 0.0;
1759 QudaPrecision device_precision_sloppy,
double mass,
const char *
const mg_param_file)
1769 std::ifstream input_file(mg_param_file, std::ios_base::in);
1771 if (!input_file.is_open()) {
errorQuda(
"MILC interface MG input file %s does not exist!", mg_param_file); }
1775 std::vector<std::string> elements;
1776 while (!input_file.eof()) {
1781 input_file.getline(buffer, 1024);
1784 char *pch = strtok(buffer,
" \t");
1785 while (pch !=
nullptr) {
1787 pch = strtok(
nullptr,
" \t");
1791 if (elements.size() == 0 || elements[0][0] ==
'#')
continue;
1795 for (
auto elem : elements) { printf(
"%s ", elem.c_str()); }
1799 input_struct.
update(elements);
1861 for (
int i = 0; i < mg_param.
n_level; i++) {
1864 for (
int j = 0; j < 4; j++) {
1872 if (i == mg_param.
n_level - 1 && input_struct.
nvec[i] > 0) {
1897 mg_param.
n_vec[i] = (i == 0) ? 24 : input_struct.
nvec[i];
2006 }
else if (i == 1) {
2021 mg_param.
omega[i] = 0.85;
2048 for (
int i = 0; i < mg_param.
n_level; i++) {
2074 const void *
const fatlink,
const void *
const longlink,
const char *
const mg_param_file)
2077 qudamilc_called<true>(__func__,
verbosity);
2090 setGaugeParams(fat_param, long_param, fatlink, longlink, localDim, host_precision, device_precision,
2114 invalidateGaugeQuda();
2116 if (invalidate_quda_gauge || !create_quda_gauge) {
2118 if (longlink !=
nullptr)
loadGaugeQuda(
const_cast<void *
>(longlink), &long_param);
2119 invalidate_quda_gauge =
false;
2125 invalidate_quda_mg =
false;
2127 if (!create_quda_gauge) invalidateGaugeQuda();
2129 qudamilc_called<false>(__func__,
verbosity);
2131 return (
void *)mg_pack;
2135 double target_residual,
double target_fermilab_residual,
const void *
const fatlink,
2136 const void *
const longlink,
void *mg_pack_ptr,
int mg_rebuild_type,
void *source,
void *solution,
2137 double *
const final_residual,
double *
const final_fermilab_residual,
int *num_iters)
2140 qudamilc_called<true>(__func__,
verbosity);
2148 if (target_fermilab_residual == 0 && target_residual == 0)
errorQuda(
"qudaInvert: requesting zero residual\n");
2157 setGaugeParams(fat_param, long_param, fatlink, longlink, localDim, host_precision, device_precision,
2174 setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy,
mass, target_residual,
2191 setColorSpinorParams(localDim, host_precision, &
csParam);
2195 invalidateGaugeQuda();
2196 invalidate_quda_mg =
true;
2202 invalidateGaugeQuda();
2203 invalidate_quda_mg =
true;
2206 if (invalidate_quda_gauge || !create_quda_gauge || invalidate_quda_mg) {
2208 if (longlink !=
nullptr)
loadGaugeQuda(
const_cast<void *
>(longlink), &long_param);
2209 invalidate_quda_gauge =
false;
2214 if (mg_rebuild_type == 1) {
2222 invalidate_quda_mg =
false;
2227 int quark_offset = getColorVectorOffset(local_parity,
false, localDim) * host_precision;
2233 invertQuda(
static_cast<char *
>(solution) + quark_offset,
static_cast<char *
>(source) + quark_offset, &invertParam);
2236 int cv_size = localDim[0] * localDim[1] * localDim[2] * localDim[3] * 3 * 2;
2238 auto soln = (
double *)(solution);
2239 for (
long i = 0; i < cv_size; i++) { soln[i] = -soln[i]; }
2241 auto soln = (
float *)(solution);
2242 for (
long i = 0; i < cv_size; i++) { soln[i] = -soln[i]; }
2246 *num_iters = invertParam.
iter;
2247 *final_residual = invertParam.
true_res;
2248 *final_fermilab_residual = invertParam.
true_res_hq;
2250 if (!create_quda_gauge) invalidateGaugeQuda();
2252 qudamilc_called<false>(__func__,
verbosity);
2258 qudamilc_called<true>(__func__,
verbosity);
2260 if (mg_pack_ptr != 0) {
2266 qudamilc_called<false>(__func__,
verbosity);
2269 static int clover_alloc = 0;
2273 qudamilc_called<true>(__func__);
2277 qudamilc_called<false>(__func__);
2284 qudamilc_called<true>(__func__);
2288 qudamilc_called<false>(__func__);
2294 qudamilc_called<true>(__func__);
2296 qudamilc_called<false>(__func__);
2306 qudamilc_called<true>(__func__);
2314 for (
int i=0; i<
nvec; ++i) invertParam.
offset[i] = 0.0;
2327 computeCloverForceQuda(mom, dt, x, p, coeff, -
kappa *
kappa, ck,
nvec, multiplicity, gauge, &qudaGaugeParam,
2329 qudamilc_called<false>(__func__);
2333 int external_precision,
int quda_precision)
2343 default: device_precision_sloppy = device_precision;
2346 for (
int dir = 0; dir < 4; ++dir) qudaGaugeParam.
X[dir] =
dim[dir];
2355 bool trivial_phase =
true;
2356 for(
int dir=0; dir<3; ++dir){
2371 qudaGaugeParam.
cpu_prec = host_precision;
2372 qudaGaugeParam.
cuda_prec = device_precision;
2376 qudaGaugeParam.
ga_pad = getLinkPadding(
dim);
2388 default: device_precision_sloppy = device_precision;
2404 invertParam.
cpu_prec = host_precision;
2405 invertParam.
cuda_prec = device_precision;
2425 const void* milc_link) {
2426 qudamilc_called<true>(__func__);
2428 setGaugeParams(qudaGaugeParam, localDim, inv_args, external_precision, quda_precision);
2430 loadGaugeQuda(
const_cast<void *
>(milc_link), &qudaGaugeParam);
2431 qudamilc_called<false>(__func__);
2436 qudamilc_called<true>(__func__);
2438 qudamilc_called<false>(__func__);
2445 qudamilc_called<true>(__func__);
2447 setInvertParam(invertParam, inv_args, external_precision, quda_precision, 0.0, 0.0);
2460 invertParam.
tol = 0.;
2465 if (clover_alloc == 0) {
2469 errorQuda(
"Clover term already allocated");
2473 if (compute_trlog) {
2474 trlog[0] = invertParam.
trlogA[0];
2475 trlog[1] = invertParam.
trlogA[1];
2477 qudamilc_called<false>(__func__);
2481 qudamilc_called<true>(__func__);
2482 if (clover_alloc==1) {
2486 errorQuda(
"Trying to free non-allocated clover term");
2488 qudamilc_called<false>(__func__);
2497 double target_residual,
2498 double target_fermilab_residual,
2501 void* cloverInverse,
2504 double*
const final_residual,
2505 double*
const final_fermilab_residual,
2508 qudamilc_called<true>(__func__);
2509 if (target_fermilab_residual == 0 && target_residual == 0)
errorQuda(
"qudaCloverInvert: requesting zero residual\n");
2513 if (clover || cloverInverse) {
2526 invertParam.
tol = target_residual;
2527 invertParam.
tol_hq = target_fermilab_residual;
2539 *num_iters = invertParam.
iter;
2540 *final_residual = invertParam.
true_res;
2541 *final_fermilab_residual = invertParam.
true_res_hq;
2545 qudamilc_called<false>(__func__);
2549 QudaInvertArgs_t inv_args,
double target_residual,
double target_fermilab_residual,
2552 void *cloverInverse,
2557 const int last_rhs_flag,
2558 double *
const final_residual,
double *
const final_fermilab_residual,
int *num_iters)
2560 qudamilc_called<true>(__func__);
2561 if (target_fermilab_residual == 0 && target_residual == 0)
errorQuda(
"qudaCloverInvert: requesting zero residual\n");
2563 if (link && (rhs_idx == 0))
qudaLoadGaugeField(external_precision, quda_precision, inv_args, link);
2565 if ( (clover || cloverInverse) && (rhs_idx == 0)) {
2578 invertParam.
tol = target_residual;
2579 invertParam.
tol_hq = target_fermilab_residual;
2592 invertParam.
n_ev = eig_args.
nev;
2601 invertParam.
rhs_idx = rhs_idx;
2618 *num_iters = invertParam.
iter;
2619 *final_residual = invertParam.
true_res;
2620 *final_fermilab_residual = invertParam.
true_res_hq;
2624 qudamilc_called<false>(__func__);
2631 double*
const offset,
2635 const double* target_residual_offset,
2636 const void* milc_link,
2638 void* milc_clover_inv,
2640 void** solutionArray,
2641 double*
const final_residual,
2645 qudamilc_called<true>(__func__,
verbosity);
2647 for (
int i = 0; i < num_offsets; ++i) {
2648 if (target_residual_offset[i] == 0)
errorQuda(
"qudaCloverMultishiftInvert: target residual cannot be zero\n");
2652 const bool use_mixed_precision = (((quda_precision==2) && inv_args.
mixed_precision) ||
2653 ((quda_precision==1) && (inv_args.
mixed_precision==2)) ) ? true :
false;
2659 for(
int i=0; i<num_offsets; ++i){
2660 invertParam.
offset[i] = offset[i];
2661 invertParam.
tol_offset[i] = target_residual_offset[i];
2663 invertParam.
tol = target_residual_offset[0];
2678 if (num_offsets==1 && offset[0] == 0) {
2680 char *quda_solver = getenv(
"QUDA_MILC_CLOVER_SOLVER");
2683 if (!quda_solver || strcmp(quda_solver,
"CHRONO_CG_SOLVER")==0) {
2688 }
else if (strcmp(quda_solver,
"BICGSTAB_SOLVER")==0){
2692 }
else if (strcmp(quda_solver,
"CG_SOLVER")==0){
2698 invertQuda(solutionArray[0], source, &invertParam);
2699 *final_residual = invertParam.
true_res;
2702 for (
int i=0; i<num_offsets; ++i) final_residual[i] = invertParam.
true_res_offset[i];
2706 *num_iters = invertParam.
iter;
2708 qudamilc_called<false>(__func__,
verbosity);
2711 void qudaGaugeFixingOVR(
int precision,
unsigned int gauge_dir,
int Nsteps,
int verbose_interval,
double relax_boost,
2712 double tolerance,
unsigned int reunit_interval,
unsigned int stopWtheta,
void *milc_sitelink)
2721 computeGaugeFixingOVRQuda(milc_sitelink, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta, \
2722 &qudaGaugeParam, timeinfo);
2725 printfQuda(
"Time to Compute: %lf\n", timeinfo[1]);
2727 printfQuda(
"Time all: %lf\n", timeinfo[0]+timeinfo[1]+timeinfo[2]);
2731 unsigned int gauge_dir,
2733 int verbose_interval,
2735 unsigned int autotune,
2737 unsigned int stopWtheta,
2750 &qudaGaugeParam, timeinfo);
2753 printfQuda(
"Time to Compute: %lf\n", timeinfo[1]);
2755 printfQuda(
"Time all: %lf\n", timeinfo[0]+timeinfo[1]+timeinfo[2]);
QudaPrecision Precision() const
void comm_broadcast(void *data, size_t nbytes)
quda::mgarray< char[256]> mg_vec_outfile
quda::mgarray< QudaInverterType > coarse_solver
quda::mgarray< int > coarse_solver_maxiter
quda::mgarray< char[256]> mg_vec_infile
quda::mgarray< int > nu_post
quda::mgarray< int > nu_pre
quda::mgarray< QudaVerbosity > mg_verbosity
quda::mgarray< int > setup_maxiter
quda::mgarray< int > nvec
QudaMemoryType mem_type_ritz
QudaSolutionType solution_type
QudaExtLibType deflation_ext_lib
quda::mgarray< QudaInverterType > setup_inv
quda::mgarray< double > setup_tol
QudaFieldLocation location_ritz
quda::mgarray< QudaSolveType > coarse_solve_type
quda::mgarray< double > coarse_solver_tol
quda::mgarray< QudaInverterType > smoother_type
quda::mgarray< std::array< int, 4 > > geo_block_size
QudaInvertParam inv_param
@ QUDA_MG_CYCLE_RECURSIVE
@ QUDA_PACKED_CLOVER_ORDER
enum QudaSolveType_s QudaSolveType
enum QudaPrecision_s QudaPrecision
@ QUDA_STAGGERED_PHASE_MILC
@ QUDA_CLOVER_WILSON_DSLASH
@ QUDA_CUDA_FIELD_LOCATION
@ QUDA_CPU_FIELD_LOCATION
@ QUDA_KAPPA_NORMALIZATION
@ QUDA_MASS_NORMALIZATION
@ QUDA_USE_INIT_GUESS_YES
@ QUDA_PARITY_SITE_SUBSET
@ QUDA_DEGRAND_ROSSI_GAMMA_BASIS
@ QUDA_HEAVY_QUARK_RESIDUAL
@ QUDA_L2_RELATIVE_RESIDUAL
@ QUDA_TRANSFER_COARSE_KD
@ QUDA_TRANSFER_AGGREGATE
enum QudaSolutionType_s QudaSolutionType
enum QudaInverterType_s QudaInverterType
enum QudaFieldLocation_s QudaFieldLocation
enum QudaExtLibType_s QudaExtLibType
@ QUDA_MATPC_EVEN_EVEN_ASYMMETRIC
@ QUDA_INC_EIGCG_INVERTER
@ QUDA_PRESERVE_SOURCE_NO
@ QUDA_PRESERVE_SOURCE_YES
@ QUDA_EVEN_ODD_SITE_ORDER
enum QudaMemoryType_s QudaMemoryType
enum QudaReconstructType_s QudaReconstructType
@ QUDA_MATPCDAG_MATPC_SOLUTION
@ QUDA_MILC_SITE_GAUGE_ORDER
@ QUDA_SPACE_SPIN_COLOR_FIELD_ORDER
enum QudaVerbosity_s QudaVerbosity
enum QudaParity_s QudaParity
enum QudaLinkType_s QudaLinkType
@ QUDA_COMPUTE_NULL_VECTOR_YES
cudaGaugeField * cudaGauge
QudaPrecision & cuda_prec
QudaPrecision & cuda_prec_sloppy
#define pool_pinned_malloc(size)
#define safe_malloc(size)
#define pool_pinned_free(ptr)
#define managed_free(ptr)
#define managed_malloc(size)
void qudaLoadCloverField(int external_precision, int quda_precision, QudaInvertArgs_t inv_args, void *milc_clover, void *milc_clover_inv, QudaSolutionType solution_type, QudaSolveType solve_type, QudaInverterType inverter, double clover_coeff, int compute_trlog, double *trlog)
void qudaLoadGaugeField(int external_precision, int quda_precision, QudaInvertArgs_t inv_args, const void *milc_link)
void qudaSetLayout(QudaLayout_t input)
void setDeflationParam(QudaPrecision ritz_prec, QudaFieldLocation location_ritz, QudaMemoryType mem_type_ritz, QudaExtLibType deflation_ext_lib, char vec_infile[], char vec_outfile[], QudaEigParam *df_param)
void qudaMomLoad(int prec, QudaMILCSiteArg_t *arg)
void qudaUnitarizeSU3Phased(int prec, double tol, QudaMILCSiteArg_t *arg, int phase_in)
void setInvertParam(QudaInvertParam &invertParam, QudaInvertArgs_t &inv_args, int external_precision, int quda_precision, double kappa, double reliable_delta)
void qudamilc_called(const char *func, QudaVerbosity verb)
void qudaMultigridDestroy(void *mg_pack_ptr)
void qudaInit(QudaInitArgs_t input)
void qudaInvertMsrc(int external_precision, int quda_precision, double mass, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *const fatlink, const void *const longlink, void **sourceArray, void **solutionArray, double *const final_residual, double *const final_fermilab_residual, int *num_iters, int num_src)
void qudaHisqForce(int prec, int num_terms, int num_naik_terms, double dt, double **coeff, void **quark_field, const double level2_coeff[6], const double fat7_coeff[6], const void *const w_link, const void *const v_link, const void *const u_link, void *const milc_momentum)
void qudaUnitarizeSU3(int prec, double tol, QudaMILCSiteArg_t *arg)
void qudaGaugeForce(int precision, int num_loop_types, double milc_loop_coeff[3], double eb3, QudaMILCSiteArg_t *arg)
void qudaRephase(int prec, void *gauge, int flag, double i_mu)
void qudaGaugeFixingOVR(int precision, unsigned int gauge_dir, int Nsteps, int verbose_interval, double relax_boost, double tolerance, unsigned int reunit_interval, unsigned int stopWtheta, void *milc_sitelink)
Gauge fixing with overrelaxation with support for single and multi GPU.
void qudaMomSave(int prec, QudaMILCSiteArg_t *arg)
void * qudaAllocateManaged(size_t bytes)
void qudaFreeManaged(void *ptr)
void milcSetMultigridEigParam(QudaEigParam &mg_eig_param, mgInputStruct &input_struct, int level)
void qudaMultishiftInvert(int external_precision, int quda_precision, int num_offsets, double *const offset, QudaInvertArgs_t inv_args, const double target_residual[], const double target_fermilab_residual[], const void *const fatlink, const void *const longlink, void *source, void **solutionArray, double *const final_residual, double *const final_fermilab_residual, int *num_iters)
void * qudaMultigridCreate(int external_precision, int quda_precision, double mass, QudaInvertArgs_t inv_args, const void *const fatlink, const void *const longlink, const char *const mg_param_file)
void qudaSetMPICommHandle(void *mycomm)
void qudaFreeGaugeField()
void setGaugeParams(QudaGaugeParam &qudaGaugeParam, const int dim[4], QudaInvertArgs_t &inv_args, int external_precision, int quda_precision)
double qudaMomAction(int prec, QudaMILCSiteArg_t *arg)
void qudaComputeOprod(int prec, int num_terms, int num_naik_terms, double **coeff, double scale, void **quark_field, void *oprod[3])
void qudaGaugeFixingFFT(int precision, unsigned int gauge_dir, int Nsteps, int verbose_interval, double alpha, unsigned int autotune, double tolerance, unsigned int stopWtheta, void *milc_sitelink)
Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
void qudaFreePinned(void *ptr)
#define PUSH_RANGE(name, cid)
void qudaEigCGCloverInvert(int external_precision, int quda_precision, double kappa, double clover_coeff, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *link, void *clover, void *cloverInverse, void *source, void *solution, QudaEigArgs_t eig_args, const int rhs_idx, const int last_rhs_flag, double *const final_residual, double *const final_fermilab_residual, int *num_iters)
void qudaDslash(int external_precision, int quda_precision, QudaInvertArgs_t inv_args, const void *const fatlink, const void *const longlink, void *src, void *dst, int *num_iters)
void * qudaAllocatePinned(size_t bytes)
void qudaHisqParamsInit(QudaHisqParams_t params)
void qudaUpdateUPhased(int prec, double eps, QudaMILCSiteArg_t *arg, int phase_in)
void qudaAsqtadForce(int prec, const double act_path_coeff[6], const void *const one_link_src[4], const void *const naik_src[4], const void *const link, void *const milc_momentum)
void qudaUpdateU(int prec, double eps, QudaMILCSiteArg_t *arg)
void qudaEigCGInvert(int external_precision, int quda_precision, double mass, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *const fatlink, const void *const longlink, void *source, void *solution, QudaEigArgs_t eig_args, const int rhs_idx, const int last_rhs_flag, double *const final_residual, double *const final_fermilab_residual, int *num_iters)
void qudaLoadUnitarizedLink(int prec, QudaFatLinkArgs_t fatlink_args, const double act_path_coeff[6], void *inlink, void *fatlink, void *ulink)
void qudaCloverInvert(int external_precision, int quda_precision, double kappa, double clover_coeff, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *link, void *clover, void *cloverInverse, void *source, void *solution, double *const final_residual, double *const final_fermilab_residual, int *num_iters)
void qudaGaugeForcePhased(int precision, int num_loop_types, double milc_loop_coeff[3], double eb3, QudaMILCSiteArg_t *arg, int phase_in)
void qudaCloverForce(void *mom, double dt, void **x, void **p, double *coeff, double kappa, double ck, int nvec, double multiplicity, void *gauge, int precision, QudaInvertArgs_t inv_args)
void qudaDestroyGaugeField(void *gauge)
void qudaSaveGaugeField(void *gauge, void *inGauge)
void qudaFreeCloverField()
void qudaInvertMG(int external_precision, int quda_precision, double mass, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *const fatlink, const void *const longlink, void *mg_pack_ptr, int mg_rebuild_type, void *source, void *solution, double *const final_residual, double *const final_fermilab_residual, int *num_iters)
void qudaUpdateUPhasedPipeline(int prec, double eps, QudaMILCSiteArg_t *arg, int phase_in, int want_gaugepipe)
void qudaLoadKSLink(int prec, QudaFatLinkArgs_t fatlink_args, const double act_path_coeff[6], void *inlink, void *fatlink, void *longlink)
void * qudaCreateGaugeField(void *gauge, int geometry, int precision)
void qudaInvert(int external_precision, int quda_precision, double mass, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *const fatlink, const void *const longlink, void *source, void *solution, double *const final_residual, double *const final_fermilab_residual, int *num_iters)
void qudaCloverMultishiftInvert(int external_precision, int quda_precision, int num_offsets, double *const offset, double kappa, double clover_coeff, QudaInvertArgs_t inv_args, const double *target_residual_offset, const void *milc_link, void *milc_clover, void *milc_clover_inv, void *source, void **solutionArray, double *const final_residual, int *num_iters)
void milcSetMultigridParam(milcMultigridPack *mg_pack, QudaPrecision host_precision, QudaPrecision device_precision, QudaPrecision device_precision_sloppy, double mass, const char *const mg_param_file)
void start()
Start profiling.
void setUnitarizeForceConstants(double unitarize_eps, double hisq_force_filter, double max_det_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error)
Set the constant parameters for the force unitarization.
bool canReuseResidentGauge(QudaInvertParam *inv_param)
void setUnitarizeLinksConstants(double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error)
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
Main header file for the QUDA library.
double momActionQuda(void *momentum, QudaGaugeParam *param)
void invertMultiSrcQuda(void **_hp_x, void **_hp_b, QudaInvertParam *param, void *h_gauge, QudaGaugeParam *gauge_param)
Perform the solve like @invertQuda but for multiple rhs by spliting the comm grid into sub-partitions...
void * createGaugeFieldQuda(void *gauge, int geometry, QudaGaugeParam *param)
void destroyGaugeFieldQuda(void *gauge)
void destroyDeflationQuda(void *df_instance)
void momResidentQuda(void *mom, QudaGaugeParam *param)
int computeGaugeForceQuda(void *mom, void *sitelink, int ***input_path_buf, int *path_length, double *loop_coeff, int num_paths, int max_length, double dt, QudaGaugeParam *qudaGaugeParam)
void setMPICommHandleQuda(void *mycomm)
void * newMultigridQuda(QudaMultigridParam *param)
void computeCloverForceQuda(void *mom, double dt, void **x, void **p, double *coeff, double kappa2, double ck, int nvector, double multiplicity, void *gauge, QudaGaugeParam *gauge_param, QudaInvertParam *inv_param)
QudaGaugeParam newQudaGaugeParam(void)
void invertMultiShiftQuda(void **_hp_x, void *_hp_b, QudaInvertParam *param)
void setVerbosityQuda(QudaVerbosity verbosity, const char prefix[], FILE *outfile)
void saveGaugeFieldQuda(void *outGauge, void *inGauge, QudaGaugeParam *param)
void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity)
void initQuda(int device)
int computeGaugeFixingFFTQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, const unsigned int verbose_interval, const double alpha, const unsigned int autotune, const double tolerance, const unsigned int stopWtheta, QudaGaugeParam *param, double *timeinfo)
Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
void staggeredPhaseQuda(void *gauge_h, QudaGaugeParam *param)
QudaMultigridParam newQudaMultigridParam(void)
void updateGaugeFieldQuda(void *gauge, void *momentum, double dt, int conj_mom, int exact, QudaGaugeParam *param)
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, const unsigned int verbose_interval, const double relax_boost, const double tolerance, const unsigned int reunit_interval, const unsigned int stopWtheta, QudaGaugeParam *param, double *timeinfo)
Gauge fixing with overrelaxation with support for single and multi GPU.
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
void freeCloverQuda(void)
void computeKSLinkQuda(void *fatlink, void *longlink, void *ulink, void *inlink, double *path_coeff, QudaGaugeParam *param)
QudaInvertParam newQudaInvertParam(void)
QudaEigParam newQudaEigParam(void)
void updateMultigridQuda(void *mg_instance, QudaMultigridParam *param)
Updates the multigrid preconditioner for the new gauge / clover field.
void initCommsGridQuda(int nDim, const int *dims, QudaCommsMap func, void *fdata)
void destroyMultigridQuda(void *mg_instance)
Free resources allocated by the multigrid solver.
void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
void computeHISQForceQuda(void *momentum, double dt, const double level2_coeff[6], const double fat7_coeff[6], const void *const w_link, const void *const v_link, const void *const u_link, void **quark, int num, int num_naik, double **coeff, QudaGaugeParam *param)
void * newDeflationQuda(QudaEigParam *param)
void projectSU3Quda(void *gauge_h, double tol, QudaGaugeParam *param)
#define QUDA_MAX_MG_LEVEL
Maximum number of multi-grid levels. This number may be increased if needed.
QudaExtLibType deflation_ext_lib
QudaMemoryType mem_type_ritz
QudaFieldLocation location_ritz
QudaEigSpectrumType spectrum
QudaBoolean import_vectors
QudaBoolean io_parity_inflate
QudaPrecision cuda_prec_ritz
QudaFieldLocation location
QudaBoolean require_convergence
QudaInvertParam * invert_param
QudaMemoryType mem_type_ritz
QudaReconstructType reconstruct_precondition
QudaReconstructType reconstruct
QudaPrecision cuda_prec_precondition
QudaPrecision cuda_prec_refinement_sloppy
QudaPrecision cuda_prec_sloppy
QudaReconstructType reconstruct_sloppy
QudaGaugeFieldOrder gauge_order
QudaStaggeredPhase staggered_phase_type
QudaReconstructType reconstruct_refinement_sloppy
int staggered_phase_applied
double reunit_svd_abs_error
double reunit_svd_rel_error
int use_resident_solution
QudaInverterType solver_type
int make_resident_solution
int use_sloppy_partial_accumulator
QudaPrecision cuda_prec_refinement_sloppy
QudaSolutionType solution_type
QudaCloverFieldOrder clover_order
QudaMassNormalization mass_normalization
QudaPreserveSource preserve_source
double tol_hq_offset[QUDA_MAX_MULTI_SHIFT]
QudaResidualType residual_type
int make_resident_solution
QudaPrecision clover_cuda_prec
double reliable_delta_refinement
QudaPrecision clover_cpu_prec
QudaPrecision cuda_prec_ritz
int max_hq_res_restart_total
QudaDslashType dslash_type
QudaPrecision clover_cuda_prec_precondition
QudaVerbosity verbosity_precondition
double offset[QUDA_MAX_MULTI_SHIFT]
QudaInverterType inv_type
double true_res_offset[QUDA_MAX_MULTI_SHIFT]
QudaPrecision clover_cuda_prec_sloppy
double tol_offset[QUDA_MAX_MULTI_SHIFT]
QudaInverterType inv_type_precondition
QudaGammaBasis gamma_basis
QudaPrecision cuda_prec_sloppy
QudaFieldLocation input_location
QudaFieldLocation output_location
QudaPrecision cuda_prec_precondition
int use_resident_solution
QudaDiracFieldOrder dirac_order
double true_res_hq_offset[QUDA_MAX_MULTI_SHIFT]
QudaUseInitGuess use_init_guess
QudaBoolean pre_orthonormalize
int smoother_schwarz_cycle[QUDA_MAX_MG_LEVEL]
double coarse_solver_ca_lambda_min[QUDA_MAX_MG_LEVEL]
double setup_ca_lambda_max[QUDA_MAX_MG_LEVEL]
QudaSolutionType coarse_grid_solution_type[QUDA_MAX_MG_LEVEL]
double coarse_solver_tol[QUDA_MAX_MG_LEVEL]
QudaMultigridCycleType cycle_type[QUDA_MAX_MG_LEVEL]
double omega[QUDA_MAX_MG_LEVEL]
QudaBoolean thin_update_only
int setup_maxiter_refresh[QUDA_MAX_MG_LEVEL]
QudaPrecision precision_null[QUDA_MAX_MG_LEVEL]
char vec_infile[QUDA_MAX_MG_LEVEL][256]
int nu_post[QUDA_MAX_MG_LEVEL]
QudaBoolean use_eig_solver[QUDA_MAX_MG_LEVEL]
int coarse_solver_maxiter[QUDA_MAX_MG_LEVEL]
QudaEigParam * eig_param[QUDA_MAX_MG_LEVEL]
int n_vec[QUDA_MAX_MG_LEVEL]
int geo_block_size[QUDA_MAX_MG_LEVEL][QUDA_MAX_DIM]
QudaInverterType coarse_solver[QUDA_MAX_MG_LEVEL]
int coarse_solver_ca_basis_size[QUDA_MAX_MG_LEVEL]
QudaTransferType transfer_type[QUDA_MAX_MG_LEVEL]
QudaFieldLocation setup_location[QUDA_MAX_MG_LEVEL]
QudaBoolean post_orthonormalize
int num_setup_iter[QUDA_MAX_MG_LEVEL]
double mu_factor[QUDA_MAX_MG_LEVEL]
QudaCABasis setup_ca_basis[QUDA_MAX_MG_LEVEL]
QudaInverterType setup_inv_type[QUDA_MAX_MG_LEVEL]
QudaInverterType smoother[QUDA_MAX_MG_LEVEL]
int setup_maxiter[QUDA_MAX_MG_LEVEL]
int setup_ca_basis_size[QUDA_MAX_MG_LEVEL]
QudaBoolean preserve_deflation
double coarse_solver_ca_lambda_max[QUDA_MAX_MG_LEVEL]
char vec_outfile[QUDA_MAX_MG_LEVEL][256]
QudaBoolean run_low_mode_check
double setup_ca_lambda_min[QUDA_MAX_MG_LEVEL]
double smoother_tol[QUDA_MAX_MG_LEVEL]
QudaCABasis coarse_solver_ca_basis[QUDA_MAX_MG_LEVEL]
int nu_pre[QUDA_MAX_MG_LEVEL]
QudaBoolean vec_store[QUDA_MAX_MG_LEVEL]
QudaSolveType smoother_solve_type[QUDA_MAX_MG_LEVEL]
QudaFieldLocation location[QUDA_MAX_MG_LEVEL]
QudaVerbosity verbosity[QUDA_MAX_MG_LEVEL]
QudaBoolean setup_minimize_memory
int spin_block_size[QUDA_MAX_MG_LEVEL]
int n_block_ortho[QUDA_MAX_MG_LEVEL]
QudaBoolean generate_all_levels
QudaComputeNullVector compute_null_vector
QudaSchwarzType smoother_schwarz_type[QUDA_MAX_MG_LEVEL]
QudaBoolean run_oblique_proj_check
QudaInvertParam * invert_param
double setup_tol[QUDA_MAX_MG_LEVEL]
QudaPrecision smoother_halo_precision[QUDA_MAX_MG_LEVEL]
QudaBoolean global_reduction[QUDA_MAX_MG_LEVEL]
QudaBoolean vec_load[QUDA_MAX_MG_LEVEL]
QudaEigParam mg_eig_param[QUDA_MAX_MG_LEVEL]
QudaMultigridParam mg_param
QudaInvertParam mg_inv_param
QudaPrecision preconditioner_precision
QudaReconstructType reconstruct
QudaVerbosity getVerbosity()