13 #define MAX(a,b) ((a)>(b)?(a):(b)) 15 #ifdef BUILD_MILC_INTERFACE 21 #include "nvToolsExt.h" 23 static const uint32_t colors[] = { 0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff };
24 static const int num_colors =
sizeof(colors)/
sizeof(
uint32_t);
26 #define PUSH_RANGE(name,cid) { \ 28 color_id = color_id%num_colors;\ 29 nvtxEventAttributes_t eventAttrib = {0}; \ 30 eventAttrib.version = NVTX_VERSION; \ 31 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \ 32 eventAttrib.colorType = NVTX_COLOR_ARGB; \ 33 eventAttrib.color = colors[color_id]; \ 34 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \ 35 eventAttrib.message.ascii = name; \ 36 nvtxRangePushEx(&eventAttrib); \ 38 #define POP_RANGE nvtxRangePop(); 40 #define PUSH_RANGE(name,cid) 47 static int localDim[4];
49 static bool invalidate_quda_gauge =
true;
50 static bool create_quda_gauge =
false;
52 static bool invalidate_quda_mom =
true;
54 static void *df_preconditioner =
nullptr;
63 #define QUDAMILC_VERBOSE 1 66 #ifdef QUDAMILC_VERBOSE 82 void inline qudamilc_called(
const char *
func){
91 qudamilc_called<true>(__func__);
94 qudamilc_called<false>(__func__);
100 qudamilc_called<true>(__func__);
102 qudamilc_called<false>(__func__);
109 static int rankFromCoords(
const int *coords,
void *fdata)
111 int *dims =
static_cast<int *
>(fdata);
113 int rank = coords[3];
114 for (
int i = 2;
i >= 0;
i--) {
124 for(
int dir=0; dir<4; ++dir){ local_dim[dir] = input.
latsize[dir]; }
126 for(
int dir=0; dir<4; ++dir){ local_dim[dir] /= input.
machsize[dir]; }
128 for(
int dir=0; dir<4; ++dir){
129 if(local_dim[dir]%2 != 0){
130 printf(
"Error: Odd lattice dimensions are not supported\n");
135 for(
int dir=0; dir<4; ++dir) localDim[dir] = local_dim[dir];
142 for(
int dir=0; dir<4; ++dir)
gridDim[dir] = 1;
163 qudamilc_called<true>(__func__);
165 #if defined(GPU_HISQ_FORCE) || defined(GPU_UNITARIZE) 169 const double max_error = 1
e-10;
172 #ifdef GPU_HISQ_FORCE 189 #endif // UNITARIZE_GPU 192 qudamilc_called<false>(__func__);
201 for(
int dir=0; dir<4; ++dir)
gParam.X[dir] =
dim[dir];
211 gParam.tadpole_coeff = 1.0;
220 static void invalidateGaugeQuda() {
222 invalidate_quda_gauge =
true;
226 const double act_path_coeff[6],
void* inlink,
void*
fatlink,
void*
longlink)
228 qudamilc_called<true>(__func__);
238 qudamilc_called<false>(__func__);
241 invalidateGaugeQuda();
244 create_quda_gauge =
true;
245 qudamilc_called<false>(__func__);
251 const double act_path_coeff[6],
void* inlink,
void*
fatlink,
void* ulink)
253 qudamilc_called<true>(__func__);
260 qudamilc_called<false>(__func__);
263 invalidateGaugeQuda();
266 create_quda_gauge =
true;
267 qudamilc_called<false>(__func__);
272 const double level2_coeff[6],
const double fat7_coeff[6],
273 const void*
const w_link,
const void*
const v_link,
const void*
const u_link,
274 void*
const milc_momentum)
276 qudamilc_called<true>(__func__);
280 if (!invalidate_quda_mom) {
281 gParam.use_resident_mom =
true;
282 gParam.make_resident_mom =
true;
283 gParam.return_result_mom =
false;
285 gParam.use_resident_mom =
false;
286 gParam.make_resident_mom =
false;
287 gParam.return_result_mom =
true;
292 w_link, v_link, u_link,
293 quark_field, num_terms, num_naik_terms,
coeff,
295 qudamilc_called<false>(__func__);
301 const void*
const one_link_src[4],
const void*
const naik_src[4],
302 const void*
const link,
void*
const milc_momentum)
304 errorQuda(
"This interface has been removed and is no longer supported");
310 void** quark_field,
void* oprod[3])
312 errorQuda(
"This interface has been removed and is no longer supported");
318 qudamilc_called<true>(__func__);
322 void *gauge =
arg->site ?
arg->site :
arg->link;
323 void *mom =
arg->site ?
arg->site :
arg->mom;
330 if (!invalidate_quda_mom) {
339 qudamilc_called<false>(__func__);
345 qudamilc_called<true>(__func__);
356 qudamilc_called<false>(__func__);
362 qudamilc_called<true>(__func__);
367 void *gauge =
arg->site ?
arg->site :
arg->link;
373 qudamilc_called<false>(__func__);
379 qudamilc_called<true>(__func__);
386 if (invalidate_quda_mom) {
389 momParam.use_resident_mom =
false;
390 momParam.make_resident_mom =
true;
391 invalidate_quda_mom =
false;
394 momParam.use_resident_mom =
true;
395 momParam.make_resident_mom =
false;
396 invalidate_quda_mom =
true;
399 momParam.use_resident_mom =
false;
400 momParam.make_resident_mom =
false;
401 invalidate_quda_mom =
true;
406 qudamilc_called<false>(__func__);
411 static inline int opp(
int dir){
420 if (num_loop_types >= 1)
421 for(
int i=0;
i<4; ++
i){
428 if (num_loop_types >= 2)
429 for(
int i=0;
i<4; ++
i){
445 if (num_loop_types >= 3) {
447 for(
int i=0;
i<4; ++
i){
448 for(
int j=0; j<4; ++j){
449 if(
i==dir || j==dir ||
i==j)
continue;
467 double milc_loop_coeff[3],
471 qudamilc_called<true>(__func__);
474 switch (num_loop_types) {
485 errorQuda(
"Invalid num_loop_types = %d\n", num_loop_types);
491 void *gauge =
arg->site ?
arg->site :
arg->link;
492 void *mom =
arg->site ?
arg->site :
arg->mom;
499 double *loop_coeff =
static_cast<double*
>(
safe_malloc(numPaths*
sizeof(
double)));
502 if (num_loop_types >= 1)
for(
int i= 0;
i< 6; ++
i) {
503 loop_coeff[
i] = milc_loop_coeff[0];
506 if (num_loop_types >= 2)
for(
int i= 6;
i<24; ++
i) {
507 loop_coeff[
i] = milc_loop_coeff[1];
510 if (num_loop_types >= 3)
for(
int i=24;
i<48; ++
i) {
511 loop_coeff[
i] = milc_loop_coeff[2];
515 int** input_path_buf[4];
516 for(
int dir=0; dir<4; ++dir){
517 input_path_buf[dir] =
static_cast<int**
>(
safe_malloc(numPaths*
sizeof(
int*)));
518 for(
int i=0;
i<numPaths; ++
i){
524 if (!invalidate_quda_mom) {
548 for(
int dir=0; dir<4; ++dir){
549 for(
int i=0;
i<numPaths; ++
i)
host_free(input_path_buf[dir][
i]);
556 qudamilc_called<false>(__func__);
561 static int getFatLinkPadding(
const int dim[4])
571 static void setInvertParams(
const int dim[4],
577 double target_residual,
578 double target_residual_hq,
580 double reliable_delta,
589 invertParam->
tol = target_residual;
590 invertParam->
tol_hq =target_residual_hq;
594 invertParam->
maxiter = maxiter;
610 invertParam->
gflops = 0.0;
645 static void setInvertParams(
const int dim[4],
652 const double target_residual_offset[],
653 const double target_residual_hq_offset[],
655 double reliable_delta,
662 const double null_mass = -1;
663 const double null_residual = -1;
667 null_mass, null_residual, null_residual, maxiter, reliable_delta,
parity,
verbosity, inverter, invertParam);
670 for(
int i=0;
i<num_offset; ++
i){
681 static void setGaugeParams(
const int dim[4],
686 const double tadpole,
690 for(
int dir=0; dir<4; ++dir){
718 static void setColorSpinorParams(
const int dim[4],
727 for(
int dir=0; dir<4; ++dir)
param->x[dir] =
dim[dir];
730 param->precision = precision;
768 static size_t getColorVectorOffset(
QudaParity local_parity,
bool even_odd_exchange,
const int dim[4])
774 offset = even_odd_exchange ? volume*6/2 : 0;
776 offset = even_odd_exchange ? 0 : volume*6/2;
787 const double target_residual[],
788 const double target_fermilab_residual[],
791 const double tadpole,
793 void** solutionArray,
794 double*
const final_residual,
795 double*
const final_fermilab_residual,
800 qudamilc_called<true>(__func__,
verbosity);
802 if(target_residual[0] == 0){
803 errorQuda(
"qudaMultishiftInvert: zeroth target residual cannot be zero\n");
809 const bool use_mixed_precision = (((quda_precision==2) && inv_args.
mixed_precision) ||
810 ((quda_precision==1) && (inv_args.
mixed_precision==2)) ) ? true :
false;
815 default: device_precision_sloppy = device_precision;
818 QudaPrecision device_precision_precondition = device_precision_sloppy;
821 setGaugeParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition, tadpole, &
gaugeParam);
831 printfQuda(
"Using QUDA_L2_RELATIVE_RESIDUAL");
833 printfQuda(
"Using QUDA_HEAVY_QUARK_RESIDUAL");
840 const double reliable_delta = (use_mixed_precision ? 1
e-1 : 0.0);
841 setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition,
842 num_offsets,
offset, target_residual, target_fermilab_residual,
847 setColorSpinorParams(localDim, host_precision, &
csParam);
850 if (*num_iters == -1) {
851 invalidateGaugeQuda();
855 char *quda_reconstruct =
getenv(
"QUDA_MILC_HISQ_RECONSTRUCT");
857 if (!quda_reconstruct ||
strcmp(quda_reconstruct,
"18")==0) {
859 }
else if (
strcmp(quda_reconstruct,
"13")==0) {
861 }
else if (
strcmp(quda_reconstruct,
"9")==0) {
864 errorQuda(
"reconstruct request %s not supported", quda_reconstruct);
868 if(invalidate_quda_gauge || !create_quda_gauge ){
869 const int fat_pad = getFatLinkPadding(localDim);
875 const int long_pad = 3*fat_pad;
880 invalidate_quda_gauge =
false;
883 void** sln_pointer = (
void**)
malloc(num_offsets*
sizeof(
void*));
884 int quark_offset = getColorVectorOffset(local_parity,
false,
gaugeParam.
X)*host_precision;
885 void* src_pointer =
static_cast<char*
>(source) + quark_offset;
887 for(
int i=0; i<num_offsets; ++i) sln_pointer[i] = static_cast<char*>(solutionArray[
i]) + quark_offset;
893 *num_iters = invertParam.
iter;
894 for(
int i=0;
i<num_offsets; ++
i){
899 if(!create_quda_gauge) invalidateGaugeQuda();
901 qudamilc_called<false>(__func__,
verbosity);
912 double target_residual,
913 double target_fermilab_residual,
916 const double tadpole,
919 double*
const final_residual,
920 double*
const final_fermilab_residual,
924 qudamilc_called<true>(__func__,
verbosity);
926 if(target_fermilab_residual == 0 && target_residual == 0){
927 errorQuda(
"qudaInvert: requesting zero residual\n");
939 default: device_precision_sloppy = device_precision;
942 QudaPrecision device_precision_precondition = device_precision_sloppy;
945 setGaugeParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition, tadpole, &
gaugeParam);
954 double& target_res = target_residual;
955 double& target_res_hq = target_fermilab_residual;
956 const double reliable_delta = 1
e-1;
958 setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition,
964 setColorSpinorParams(localDim, host_precision, &
csParam);
966 const int fat_pad = getFatLinkPadding(localDim);
967 const int long_pad = 3*fat_pad;
971 invalidateGaugeQuda();
974 if(invalidate_quda_gauge || !create_quda_gauge){
985 invalidate_quda_gauge =
false;
991 int quark_offset = getColorVectorOffset(local_parity,
false,
gaugeParam.
X)*host_precision;
993 invertQuda(static_cast<char*>(solution) + quark_offset,
994 static_cast<char*>(source) + quark_offset,
998 *num_iters = invertParam.
iter;
999 *final_residual = invertParam.
true_res;
1000 *final_fermilab_residual = invertParam.
true_res_hq;
1002 if(!create_quda_gauge) invalidateGaugeQuda();
1004 qudamilc_called<false>(__func__,
verbosity);
1014 const double tadpole,
1020 qudamilc_called<true>(__func__,
verbosity);
1026 QudaPrecision device_precision_precondition = device_precision_sloppy;
1030 setGaugeParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition, tadpole, &
gaugeParam);
1036 setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition,
1040 setColorSpinorParams(localDim, host_precision, &
csParam);
1042 const int fat_pad = getFatLinkPadding(localDim);
1043 const int long_pad = 3*fat_pad;
1047 invalidateGaugeQuda();
1050 if(invalidate_quda_gauge || !create_quda_gauge){
1061 invalidate_quda_gauge =
false;
1064 int src_offset = getColorVectorOffset(other_parity,
false,
gaugeParam.
X);
1065 int dst_offset = getColorVectorOffset(local_parity,
false,
gaugeParam.
X);
1067 dslashQuda(static_cast<char*>(dst) + dst_offset*host_precision,
1068 static_cast<char*>(
src) + src_offset*host_precision,
1069 &invertParam, local_parity);
1071 if(!create_quda_gauge) invalidateGaugeQuda();
1073 qudamilc_called<false>(__func__,
verbosity);
1082 double target_residual,
1083 double target_fermilab_residual,
1086 const double tadpole,
1088 void** solutionArray,
1089 double*
const final_residual,
1090 double*
const final_fermilab_residual,
1096 qudamilc_called<true>(__func__,
verbosity);
1098 if(target_fermilab_residual == 0 && target_residual == 0){
1099 errorQuda(
"qudaInvert: requesting zero residual\n");
1111 default: device_precision_sloppy = device_precision;
1114 QudaPrecision device_precision_precondition = device_precision_sloppy;
1117 setGaugeParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition, tadpole, &
gaugeParam);
1123 invertParam.
num_src = num_src;
1127 double& target_res = target_residual;
1128 double& target_res_hq = target_fermilab_residual;
1129 const double reliable_delta = 1
e-1;
1131 setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition,
1139 setColorSpinorParams(localDim, host_precision, &
csParam);
1141 const int fat_pad = getFatLinkPadding(localDim);
1142 const int long_pad = 3*fat_pad;
1146 invalidateGaugeQuda();
1149 if(invalidate_quda_gauge || !create_quda_gauge){
1160 invalidate_quda_gauge =
false;
1163 int quark_offset = getColorVectorOffset(local_parity,
false,
gaugeParam.
X)*host_precision;
1164 void** sln_pointer = (
void**)
malloc(num_src*
sizeof(
void*));
1165 void** src_pointer = (
void**)
malloc(num_src*
sizeof(
void*));
1167 for(
int i=0; i<num_src; ++i) sln_pointer[i] = static_cast<char*>(solutionArray[
i]) + quark_offset;
1168 for(
int i=0; i<num_src; ++i) src_pointer[i] = static_cast<char*>(sourceArray[
i]) + quark_offset;
1176 *num_iters = invertParam.
iter;
1177 *final_residual = invertParam.
true_res;
1178 *final_fermilab_residual = invertParam.
true_res_hq;
1180 if(!create_quda_gauge) invalidateGaugeQuda();
1182 qudamilc_called<false>(__func__,
verbosity);
1191 double target_residual,
1192 double target_fermilab_residual,
1195 const double tadpole,
1200 const int last_rhs_flag,
1201 double*
const final_residual,
1202 double*
const final_fermilab_residual,
1207 qudamilc_called<true>(__func__,
verbosity);
1209 if(target_fermilab_residual == 0 && target_residual == 0){
1210 errorQuda(
"qudaInvert: requesting zero residual\n");
1221 default: device_precision_sloppy = device_precision;
1224 QudaPrecision device_precision_precondition = device_precision_sloppy;
1227 setGaugeParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition, tadpole, &
gaugeParam);
1236 double& target_res = target_residual;
1237 double& target_res_hq = target_fermilab_residual;
1238 const double reliable_delta = 1
e-1;
1240 setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy, device_precision_precondition,
1249 invertParam.
nev = eig_args.
nev;
1258 invertParam.
rhs_idx = rhs_idx;
1268 setColorSpinorParams(localDim, host_precision, &
csParam);
1270 if((invalidate_quda_gauge || !create_quda_gauge) && (rhs_idx == 0)){
1272 const int fat_pad = getFatLinkPadding(localDim);
1273 const int long_pad = 3*fat_pad;
1286 invalidate_quda_gauge =
false;
1289 int quark_offset = getColorVectorOffset(local_parity,
false,
gaugeParam.
X)*host_precision;
1295 invertQuda(static_cast<char*>(solution) + quark_offset,
1296 static_cast<char*>(source) + quark_offset,
1302 *num_iters = invertParam.
iter;
1303 *final_residual = invertParam.
true_res;
1304 *final_fermilab_residual = invertParam.
true_res_hq;
1306 if(!create_quda_gauge && last_rhs_flag) invalidateGaugeQuda();
1308 qudamilc_called<false>(__func__,
verbosity);
1314 static int clover_alloc = 0;
1318 qudamilc_called<true>(__func__);
1322 qudamilc_called<false>(__func__);
1329 qudamilc_called<true>(__func__);
1333 qudamilc_called<false>(__func__);
1340 qudamilc_called<true>(__func__);
1342 qudamilc_called<false>(__func__);
1348 int external_precision,
int quda_precision,
double kappa,
double reliable_delta);
1353 qudamilc_called<true>(__func__);
1377 qudamilc_called<false>(__func__);
1383 int external_precision,
int quda_precision) {
1392 default: device_precision_sloppy = device_precision;
1404 bool trivial_phase =
true;
1405 for(
int dir=0; dir<3; ++dir){
1431 int external_precision,
int quda_precision,
double kappa,
double reliable_delta) {
1439 default: device_precision_sloppy = device_precision;
1455 invertParam.
cpu_prec = host_precision;
1456 invertParam.
cuda_prec = device_precision;
1476 const void* milc_link) {
1477 qudamilc_called<true>(__func__);
1479 setGaugeParams(
gaugeParam, localDim, inv_args, external_precision, quda_precision);
1482 qudamilc_called<false>(__func__);
1487 qudamilc_called<true>(__func__);
1489 qudamilc_called<false>(__func__);
1497 void* milc_clover_inv,
1503 qudamilc_called<true>(__func__);
1505 setInvertParam(invertParam, inv_args, external_precision, quda_precision, 0.0, 0.0);
1506 invertParam.solution_type = solution_type;
1509 invertParam.compute_clover_trlog = compute_trlog;
1513 if (clover_alloc == 0) {
1517 errorQuda(
"Clover term already allocated");
1521 if (compute_trlog) {
1522 trlog[0] = invertParam.trlogA[0];
1523 trlog[1] = invertParam.trlogA[1];
1525 qudamilc_called<false>(__func__);
1531 qudamilc_called<true>(__func__);
1532 if (clover_alloc==1) {
1536 errorQuda(
"Trying to free non-allocated clover term");
1538 qudamilc_called<false>(__func__);
1547 double target_residual,
1548 double target_fermilab_residual,
1551 void* cloverInverse,
1554 double*
const final_residual,
1555 double*
const final_fermilab_residual,
1558 qudamilc_called<true>(__func__);
1559 if(target_fermilab_residual == 0 && target_residual == 0){
1560 errorQuda(
"qudaCloverInvert: requesting zero residual\n");
1566 if (
clover || cloverInverse) {
1571 double reliable_delta = 1
e-1;
1574 setInvertParam(invertParam, inv_args, external_precision, quda_precision,
kappa, reliable_delta);
1579 invertParam.
tol = target_residual;
1580 invertParam.
tol_hq = target_fermilab_residual;
1592 *num_iters = invertParam.
iter;
1593 *final_residual = invertParam.
true_res;
1594 *final_fermilab_residual = invertParam.
true_res_hq;
1598 qudamilc_called<false>(__func__);
1608 double target_residual,
1609 double target_fermilab_residual,
1612 void* cloverInverse,
1617 const int last_rhs_flag,
1618 double*
const final_residual,
1619 double*
const final_fermilab_residual,
1622 qudamilc_called<true>(__func__);
1623 if(target_fermilab_residual == 0 && target_residual == 0){
1624 errorQuda(
"qudaCloverInvert: requesting zero residual\n");
1628 if (link && (rhs_idx == 0))
qudaLoadGaugeField(external_precision, quda_precision, inv_args, link);
1630 if ( (
clover || cloverInverse) && (rhs_idx == 0)) {
1635 double reliable_delta = 1
e-1;
1638 setInvertParam(invertParam, inv_args, external_precision, quda_precision,
kappa, reliable_delta);
1643 invertParam.
tol = target_residual;
1644 invertParam.
tol_hq = target_fermilab_residual;
1657 invertParam.
nev = eig_args.
nev;
1666 invertParam.
rhs_idx = rhs_idx;
1683 *num_iters = invertParam.
iter;
1684 *final_residual = invertParam.
true_res;
1685 *final_fermilab_residual = invertParam.
true_res_hq;
1689 qudamilc_called<false>(__func__);
1701 const double* target_residual_offset,
1702 const void* milc_link,
1704 void* milc_clover_inv,
1706 void** solutionArray,
1707 double*
const final_residual,
1712 qudamilc_called<true>(__func__,
verbosity);
1714 for(
int i=0;
i<num_offsets; ++
i){
1715 if(target_residual_offset[
i] == 0){
1716 errorQuda(
"qudaCloverMultishiftInvert: target residual cannot be zero\n");
1722 const bool use_mixed_precision = (((quda_precision==2) && inv_args.
mixed_precision) ||
1723 ((quda_precision==1) && (inv_args.
mixed_precision==2)) ) ? true :
false;
1724 double reliable_delta = (use_mixed_precision) ? 1
e-2 : 0.0;
1726 setInvertParam(invertParam, inv_args, external_precision, quda_precision,
kappa, reliable_delta);
1729 for(
int i=0;
i<num_offsets; ++
i){
1733 invertParam.
tol = target_residual_offset[0];
1748 if (num_offsets==1 &&
offset[0] == 0) {
1750 char *quda_solver =
getenv(
"QUDA_MILC_CLOVER_SOLVER");
1753 if (!quda_solver ||
strcmp(quda_solver,
"CHRONO_CG_SOLVER")==0) {
1758 }
else if (
strcmp(quda_solver,
"BICGSTAB_SOLVER")==0){
1762 }
else if (
strcmp(quda_solver,
"CG_SOLVER")==0){
1768 invertQuda(solutionArray[0], source, &invertParam);
1769 *final_residual = invertParam.
true_res;
1776 *num_iters = invertParam.
iter;
1778 qudamilc_called<false>(__func__,
verbosity);
1784 unsigned int gauge_dir,
1786 int verbose_interval,
1789 unsigned int reunit_interval,
1790 unsigned int stopWtheta,
1804 computeGaugeFixingOVRQuda(milc_sitelink, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta, \
1808 printfQuda(
"Time to Compute: %lf\n", timeinfo[1]);
1810 printfQuda(
"Time all: %lf\n", timeinfo[0]+timeinfo[1]+timeinfo[2]);
1816 unsigned int gauge_dir,
1818 int verbose_interval,
1820 unsigned int autotune,
1822 unsigned int stopWtheta,
1840 printfQuda(
"Time to Compute: %lf\n", timeinfo[1]);
1842 printfQuda(
"Time all: %lf\n", timeinfo[0]+timeinfo[1]+timeinfo[2]);
1847 #endif // BUILD_MILC_INTERFACE void computeCloverForceQuda(void *mom, double dt, void **x, void **p, double *coeff, double kappa2, double ck, int nvector, double multiplicity, void *gauge, QudaGaugeParam *gauge_param, QudaInvertParam *inv_param)
static QudaGaugeParam qudaGaugeParam
QudaDiracFieldOrder dirac_order
QudaMassNormalization mass_normalization
double tol_hq_offset[QUDA_MAX_MULTI_SHIFT]
QudaReconstructType reconstruct_sloppy
void freeCloverQuda(void)
QudaGaugeParam gaugeParam
int use_resident_solution
void invertMultiShiftQuda(void **_hp_x, void *_hp_b, QudaInvertParam *param)
void setVerbosityQuda(QudaVerbosity verbosity, const char prefix[], FILE *outfile)
#define pool_pinned_free(ptr)
void qudaHisqParamsInit(QudaHisqParams_t hisq_params)
QudaVerbosity verbosity_precondition
enum QudaPrecision_s QudaPrecision
void qudaUnitarizeSU3(int prec, double tol, QudaMILCSiteArg_t *arg)
void destroyDeflationQuda(void *df_instance)
void qudaGaugeFixingFFT(int precision, unsigned int gauge_dir, int Nsteps, int verbose_interval, double alpha, unsigned int autotune, double tolerance, unsigned int stopWtheta, void *milc_sitelink)
Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
QudaExtLibType deflation_ext_lib
void setUnitarizeForceConstants(double unitarize_eps, double hisq_force_filter, double max_det_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error)
Set the constant parameters for the force unitarization.
QudaInverterType inv_type_precondition
QudaVerbosity getVerbosity()
QudaPrecision cuda_prec_ritz
void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
void setUnitarizeLinksConstants(double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error)
QudaDslashType dslash_type
QudaReconstructType reconstruct_precondition
QudaInverterType inv_type
enum QudaSolveType_s QudaSolveType
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
QudaExtLibType deflation_ext_lib
void qudaInit(QudaInitArgs_t input)
double reunit_svd_rel_error
QudaMemoryType mem_type_ritz
static bool reunit_svd_only
void setDeflationParam(QudaEigParam &df_param)
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
double momActionQuda(void *momentum, QudaGaugeParam *param)
QudaStaggeredPhase staggered_phase_type
char * strcpy(char *__dst, const char *__src)
void qudaLoadGaugeField(int external_precision, int quda_precision, QudaInvertArgs_t inv_args, const void *milc_link)
void qudaEigCGInvert(int external_precision, int quda_precision, double mass, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *const fatlink, const void *const longlink, const double tadpole, void *source, void *solution, QudaEigArgs_t eig_args, const int rhs_idx, const int last_rhs_flag, double *const final_residual, double *const final_fermilab_residual, int *num_iters)
int make_resident_solution
void qudaCloverMultishiftInvert(int external_precision, int quda_precision, int num_offsets, double *const offset, double kappa, double clover_coeff, QudaInvertArgs_t inv_args, const double *target_residual, const void *milc_link, void *milc_clover, void *milc_clover_inv, void *source, void **solutionArray, double *const final_residual, int *num_iters)
void qudaEigCGCloverInvert(int external_precision, int quda_precision, double kappa, double clover_coeff, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *milc_link, void *milc_clover, void *milc_clover_inv, void *source, void *solution, QudaEigArgs_t eig_args, const int rhs_idx, const int last_rhs_flag, double *const final_residual, double *const final_fermilab_residual, int *num_iters)
QudaGaugeFieldOrder gauge_order
void qudaInvertMsrc(int external_precision, int quda_precision, double mass, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *const fatlink, const void *const longlink, const double tadpole, void **sourceArray, void **solutionArray, double *const final_residual, double *const final_fermilab_residual, int *num_iters, int num_src)
void computeKSLinkQuda(void *fatlink, void *longlink, void *ulink, void *inlink, double *path_coeff, QudaGaugeParam *param)
void qudaGaugeFixingOVR(const int precision, const unsigned int gauge_dir, const int Nsteps, const int verbose_interval, const double relax_boost, const double tolerance, const unsigned int reunit_interval, const unsigned int stopWtheta, void *milc_sitelink)
Gauge fixing with overrelaxation with support for single and multi GPU.
void qudaSaveGaugeField(void *gauge, void *inGauge)
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
QudaPrecision & cuda_prec_precondition
int make_resident_solution
double qudaMomAction(int precision, void *momentum)
void qudaSetLayout(QudaLayout_t layout)
void exit(int) __attribute__((noreturn))
QudaPrecision clover_cuda_prec_sloppy
void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity)
QudaFieldLocation input_location
void initCommsGridQuda(int nDim, const int *dims, QudaCommsMap func, void *fdata)
int staggered_phase_applied
double true_res_hq_offset[QUDA_MAX_MULTI_SHIFT]
QudaUseInitGuess use_init_guess
int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, const unsigned int verbose_interval, const double relax_boost, const double tolerance, const unsigned int reunit_interval, const unsigned int stopWtheta, QudaGaugeParam *param, double *timeinfo)
Gauge fixing with overrelaxation with support for single and multi GPU.
char * index(const char *, int)
void setInvertParam(QudaInvertParam &inv_param)
QudaSolutionType solution_type
void projectSU3Quda(void *gauge_h, double tol, QudaGaugeParam *param)
QudaMemoryType mem_type_ritz
int strcmp(const char *__s1, const char *__s2)
QudaPrecision clover_cuda_prec
int computeGaugeForceQuda(void *mom, void *sitelink, int ***input_path_buf, int *path_length, double *loop_coeff, int num_paths, int max_length, double dt, QudaGaugeParam *qudaGaugeParam)
QudaInvertParam * invert_param
void initQuda(int device)
void qudaFreePinned(void *ptr)
void qudaUpdateU(int precision, double eps, QudaMILCSiteArg_t *arg)
QudaFieldLocation output_location
void * malloc(size_t __size) __attribute__((__warn_unused_result__)) __attribute__((alloc_size(1)))
QudaPrecision clover_cuda_prec_precondition
int printf(const char *,...) __attribute__((__format__(__printf__
bool canReuseResidentGauge(QudaInvertParam *inv_param)
QudaPrecision & cuda_prec_sloppy
VOLATILE spinorFloat kappa
void qudaFreeCloverField()
void * newDeflationQuda(QudaEigParam *param)
QudaPrecision cuda_prec_sloppy
static bool initialized
Profiler for initQuda.
void qudaMultishiftInvert(int external_precision, int precision, int num_offsets, double *const offset, QudaInvertArgs_t inv_args, const double *target_residual, const double *target_fermilab_residual, const void *const milc_fatlink, const void *const milc_longlink, const double tadpole, void *source, void **solutionArray, double *const final_residual, double *const final_fermilab_residual, int *num_iters)
double tol_offset[QUDA_MAX_MULTI_SHIFT]
double true_res_offset[QUDA_MAX_MULTI_SHIFT]
QudaInvertParam newQudaInvertParam(void)
void * qudaCreateGaugeField(void *gauge, int geometry, int precision)
QudaPrecision cuda_prec_precondition
QudaCloverFieldOrder clover_order
static __inline__ size_t p
void saveGaugeFieldQuda(void *outGauge, void *inGauge, QudaGaugeParam *param)
QudaInverterType solver_type
void qudaRephase(int prec, void *gauge, int flag, double i_mu)
void qudaInvert(int external_precision, int quda_precision, double mass, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *const milc_fatlink, const void *const milc_longlink, const double tadpole, void *source, void *solution, double *const final_resid, double *const final_rel_resid, int *num_iters)
enum QudaSolutionType_s QudaSolutionType
void qudaComputeOprod(int precision, int num_terms, int num_naik_terms, double **coeff, double scale, void **quark_field, void *oprod[3])
QudaGammaBasis gamma_basis
QudaPrecision cuda_prec_sloppy
#define PUSH_RANGE(name, cid)
static double unitarize_eps
double reunit_svd_abs_error
double offset[QUDA_MAX_MULTI_SHIFT]
void qudaFreeGaugeField()
int use_sloppy_partial_accumulator
enum QudaParity_s QudaParity
QudaReconstructType reconstruct
enum QudaLinkType_s QudaLinkType
void qudaDslash(int external_precision, int quda_precision, QudaInvertArgs_t inv_args, const void *const milc_fatlink, const void *const milc_longlink, const double tadpole, void *source, void *solution, int *num_iters)
void qudaLoadCloverField(int external_precision, int quda_precision, QudaInvertArgs_t inv_args, void *milc_clover, void *milc_clover_inv, QudaSolutionType solution_type, QudaSolveType solve_type, double clover_coeff, int compute_trlog, double *trlog)
QudaBoolean import_vectors
QudaFieldLocation location
QudaFieldLocation location_ritz
#define safe_malloc(size)
void qudaCloverInvert(int external_precision, int quda_precision, double kappa, double clover_coeff, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *milc_link, void *milc_clover, void *milc_clover_inv, void *source, void *solution, double *const final_residual, double *const final_fermilab_residual, int *num_iters)
void staggeredPhaseQuda(void *gauge_h, QudaGaugeParam *param)
static bool reunit_allow_svd
#define pool_pinned_malloc(size)
int computeGaugeFixingFFTQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, const unsigned int verbose_interval, const double alpha, const unsigned int autotune, const double tolerance, const unsigned int stopWtheta, QudaGaugeParam *param, double *timeinfo)
Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
void qudaLoadUnitarizedLink(int precision, QudaFatLinkArgs_t fatlink_args, const double path_coeff[6], void *inlink, void *fatlink, void *ulink)
void destroyGaugeFieldQuda(void *gauge)
enum QudaFieldLocation_s QudaFieldLocation
QudaPrecision cuda_prec_precondition
void updateGaugeFieldQuda(void *gauge, void *momentum, double dt, int conj_mom, int exact, QudaGaugeParam *param)
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
void invertMultiSrcQuda(void **_hp_x, void **_hp_b, QudaInvertParam *param)
void qudaCloverForce(void *mom, double dt, void **x, void **p, double *coeff, double kappa, double ck, int nvec, double multiplicity, void *gauge, int precision, QudaInvertArgs_t inv_args)
void * qudaAllocatePinned(size_t bytes)
QudaMemoryType mem_type_ritz
cudaGaugeField * cudaGauge
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
void * createGaugeFieldQuda(void *gauge, int geometry, QudaGaugeParam *param)
QudaReconstructType reconstruct
void qudaHisqForce(int precision, int num_terms, int num_naik_terms, double **coeff, void **quark_field, const double level2_coeff[6], const double fat7_coeff[6], const void *const w_link, const void *const v_link, const void *const u_link, void *const milc_momentum)
QudaFieldLocation location_ritz
QudaResidualType residual_type
enum QudaVerbosity_s QudaVerbosity
int use_resident_solution
void computeHISQForceQuda(void *momentum, long long *flops, const double level2_coeff[6], const double fat7_coeff[6], const void *const w_link, const void *const v_link, const void *const u_link, void **quark, int num, int num_naik, double **coeff, QudaGaugeParam *param)
void qudaLoadKSLink(int precision, QudaFatLinkArgs_t fatlink_args, const double act_path_coeff[6], void *inlink, void *fatlink, void *longlink)
QudaPrecision Precision() const
QudaPrecision clover_cpu_prec
QudaPrecision cuda_prec_ritz
void qudaGaugeForce(int precision, int num_loop_types, double milc_loop_coeff[3], double eb3, QudaMILCSiteArg_t *arg)
char * getenv(const char *)
QudaEigParam newQudaEigParam(void)
enum QudaInverterType_s QudaInverterType
enum QudaMemoryType_s QudaMemoryType
void qudaAsqtadForce(int precision, const double act_path_coeff[6], const void *const one_link_src[4], const void *const naik_src[4], const void *const link, void *const milc_momentum)
static void createGaugeForcePaths(int **paths, int dir, int num_loop_types)
enum QudaExtLibType_s QudaExtLibType
void qudaDestroyGaugeField(void *gauge)
QudaGaugeParam newQudaGaugeParam(void)
QudaPreserveSource preserve_source
enum cudaDeviceAttr attr int device