13 #define MAX(a,b) ((a)>(b)?(a):(b)) 15 #ifdef BUILD_MILC_INTERFACE 22 #if QUDA_NVTX_VERSION == 3 23 #include "nvtx3/nvToolsExt.h" 25 #include "nvToolsExt.h" 28 static const uint32_t colors[] = { 0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff };
29 static const int num_colors =
sizeof(colors)/
sizeof(uint32_t);
31 #define PUSH_RANGE(name,cid) { \ 33 color_id = color_id%num_colors;\ 34 nvtxEventAttributes_t eventAttrib = {0}; \ 35 eventAttrib.version = NVTX_VERSION; \ 36 eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \ 37 eventAttrib.colorType = NVTX_COLOR_ARGB; \ 38 eventAttrib.color = colors[color_id]; \ 39 eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \ 40 eventAttrib.message.ascii = name; \ 41 nvtxRangePushEx(&eventAttrib); \ 43 #define POP_RANGE nvtxRangePop(); 45 #define PUSH_RANGE(name,cid) 51 static int gridDim[4];
52 static int localDim[4];
54 static bool invalidate_quda_gauge =
true;
55 static bool create_quda_gauge =
false;
57 static bool invalidate_quda_mom =
true;
59 static void *df_preconditioner =
nullptr;
68 #define QUDAMILC_VERBOSE 1 70 template <
bool start>
void inline qudamilc_called(
const char *func,
QudaVerbosity verb)
79 #ifdef QUDAMILC_VERBOSE 82 printfQuda(
"QUDA_MILC_INTERFACE: %s (called) \n", func);
84 printfQuda(
"QUDA_MILC_INTERFACE: %s (return) \n", func);
90 template <
bool start>
void inline qudamilc_called(
const char *func) { qudamilc_called<start>(func,
getVerbosity()); }
98 qudamilc_called<true>(__func__);
101 qudamilc_called<false>(__func__);
106 qudamilc_called<true>(__func__);
108 qudamilc_called<false>(__func__);
110 #if defined(MULTI_GPU) && !defined(QMP_COMMS) 115 static int rankFromCoords(
const int *coords,
void *fdata)
117 int *
dims =
static_cast<int *
>(fdata);
119 int rank = coords[3];
120 for (
int i = 2; i >= 0; i--) {
121 rank = dims[i] * rank + coords[i];
130 for(
int dir=0; dir<4; ++dir){ local_dim[dir] = input.
latsize[dir]; }
132 for(
int dir=0; dir<4; ++dir){ local_dim[dir] /= input.
machsize[dir]; }
134 for(
int dir=0; dir<4; ++dir){
135 if(local_dim[dir]%2 != 0){
136 printf(
"Error: Odd lattice dimensions are not supported\n");
141 for(
int dir=0; dir<4; ++dir) localDim[dir] = local_dim[dir];
144 for(
int dir=0; dir<4; ++dir) gridDim[dir] = input.
machsize[dir];
152 for(
int dir=0; dir<4; ++dir) gridDim[dir] = 1;
153 static int device = input.
device;
172 if(initialized)
return;
173 qudamilc_called<true>(__func__);
175 #if defined(GPU_HISQ_FORCE) || defined(GPU_UNITARIZE) 179 const double max_error = 1e-10;
182 #ifdef GPU_HISQ_FORCE 199 #endif // UNITARIZE_GPU 202 qudamilc_called<false>(__func__);
211 for(
int dir=0; dir<4; ++dir) gParam.
X[dir] = dim[dir];
213 gParam.
type = link_type;
230 static void invalidateGaugeQuda() {
231 qudamilc_called<true>(__func__);
233 invalidate_quda_gauge =
true;
234 qudamilc_called<false>(__func__);
238 const double act_path_coeff[6],
void* inlink,
void*
fatlink,
void*
longlink)
240 qudamilc_called<true>(__func__);
246 param.staggered_phase_applied = 1;
252 invalidateGaugeQuda();
255 create_quda_gauge =
true;
256 qudamilc_called<false>(__func__);
262 const double act_path_coeff[6],
void* inlink,
void*
fatlink,
void* ulink)
264 qudamilc_called<true>(__func__);
271 qudamilc_called<false>(__func__);
274 invalidateGaugeQuda();
277 create_quda_gauge =
true;
278 qudamilc_called<false>(__func__);
282 void qudaHisqForce(
int prec,
int num_terms,
int num_naik_terms,
double dt,
double** coeff,
void** quark_field,
283 const double level2_coeff[6],
const double fat7_coeff[6],
284 const void*
const w_link,
const void*
const v_link,
const void*
const u_link,
285 void*
const milc_momentum)
287 qudamilc_called<true>(__func__);
291 if (!invalidate_quda_mom) {
292 gParam.use_resident_mom =
true;
293 gParam.make_resident_mom =
true;
294 gParam.return_result_mom =
false;
296 gParam.use_resident_mom =
false;
297 gParam.make_resident_mom =
false;
298 gParam.return_result_mom =
true;
302 w_link, v_link, u_link,
303 quark_field, num_terms, num_naik_terms, coeff,
305 qudamilc_called<false>(__func__);
311 const void*
const one_link_src[4],
const void*
const naik_src[4],
312 const void*
const link,
void*
const milc_momentum)
314 errorQuda(
"This interface has been removed and is no longer supported");
319 void qudaComputeOprod(
int prec,
int num_terms,
int num_naik_terms,
double** coeff,
double scale,
320 void** quark_field,
void* oprod[3])
322 errorQuda(
"This interface has been removed and is no longer supported");
328 qudamilc_called<true>(__func__);
337 gaugeParam.site_size = arg->
size;
340 if (!invalidate_quda_mom) {
341 gaugeParam.use_resident_mom =
true;
342 gaugeParam.make_resident_mom =
true;
344 gaugeParam.use_resident_mom =
false;
345 gaugeParam.make_resident_mom =
false;
349 qudamilc_called<false>(__func__);
355 qudamilc_called<true>(__func__);
360 gaugeParam.staggered_phase_applied = 1-flag;
362 gaugeParam.i_mu = i_mu;
366 qudamilc_called<false>(__func__);
372 qudamilc_called<true>(__func__);
379 gaugeParam.site_size = arg->
size;
383 qudamilc_called<false>(__func__);
389 qudamilc_called<true>(__func__);
396 if (invalidate_quda_mom) {
399 momParam.use_resident_mom =
false;
400 momParam.make_resident_mom =
true;
401 invalidate_quda_mom =
false;
404 momParam.use_resident_mom =
true;
405 momParam.make_resident_mom =
false;
406 invalidate_quda_mom =
true;
409 momParam.use_resident_mom =
false;
410 momParam.make_resident_mom =
false;
411 invalidate_quda_mom =
true;
416 qudamilc_called<false>(__func__);
421 static inline int opp(
int dir){
430 if (num_loop_types >= 1)
431 for(
int i=0; i<4; ++i){
433 paths[
index][0] = i; paths[
index][1] =
opp(dir); paths[index++][2] =
opp(i);
434 paths[
index][0] =
opp(i); paths[
index][1] =
opp(dir); paths[index++][2] = i;
438 if (num_loop_types >= 2)
439 for(
int i=0; i<4; ++i){
455 if (num_loop_types >= 3) {
457 for(
int i=0; i<4; ++i){
458 for(
int j=0; j<4; ++j){
459 if(i==dir || j==dir || i==j)
continue;
477 double milc_loop_coeff[3],
481 qudamilc_called<true>(__func__);
484 switch (num_loop_types) {
495 errorQuda(
"Invalid num_loop_types = %d\n", num_loop_types);
509 double *loop_coeff =
static_cast<double*
>(
safe_malloc(numPaths*
sizeof(
double)));
512 if (num_loop_types >= 1)
for(
int i= 0; i< 6; ++i) {
513 loop_coeff[i] = milc_loop_coeff[0];
516 if (num_loop_types >= 2)
for(
int i= 6; i<24; ++i) {
517 loop_coeff[i] = milc_loop_coeff[1];
520 if (num_loop_types >= 3)
for(
int i=24; i<48; ++i) {
521 loop_coeff[i] = milc_loop_coeff[2];
525 int** input_path_buf[4];
526 for(
int dir=0; dir<4; ++dir){
527 input_path_buf[dir] =
static_cast<int**
>(
safe_malloc(numPaths*
sizeof(
int*)));
528 for(
int i=0; i<numPaths; ++i){
529 input_path_buf[dir][i] =
static_cast<int*
>(
safe_malloc(length[i]*
sizeof(
int)));
534 if (!invalidate_quda_mom) {
556 loop_coeff, numPaths, max_length, eb3, &qudaGaugeParam);
558 for(
int dir=0; dir<4; ++dir){
559 for(
int i=0; i<numPaths; ++i)
host_free(input_path_buf[dir][i]);
566 qudamilc_called<false>(__func__);
571 static int getLinkPadding(
const int dim[4])
573 int padding =
MAX(dim[1]*dim[2]*dim[3]/2, dim[0]*dim[2]*dim[3]/2);
574 padding =
MAX(padding, dim[0]*dim[1]*dim[3]/2);
575 padding =
MAX(padding, dim[0]*dim[1]*dim[2]/2);
587 invertParam->
tol = target_residual;
588 invertParam->
tol_hq = target_residual_hq;
608 invertParam->
maxiter = maxiter;
625 invertParam->
gflops = 0.0;
655 const double target_residual_offset[],
const double target_residual_hq_offset[],
659 const double null_mass = -1;
665 for (
int i = 0; i < num_offset; ++i) {
666 invertParam->
offset[i] = offset[i];
667 invertParam->
tol_offset[i] = target_residual_offset[i];
668 invertParam->
tol_hq_offset[i] = target_residual_hq_offset[i];
675 char *reconstruct_env = getenv(
"QUDA_MILC_HISQ_RECONSTRUCT");
676 if (!reconstruct_env || strcmp(reconstruct_env,
"18") == 0) {
678 }
else if (strcmp(reconstruct_env,
"13") == 0) {
680 }
else if (strcmp(reconstruct_env,
"9") == 0) {
683 errorQuda(
"QUDA_MILC_HISQ_RECONSTRUCT=%s not supported", reconstruct_env);
688 char *reconstruct_sloppy_env = getenv(
"QUDA_MILC_HISQ_RECONSTRUCT_SLOPPY");
689 if (!reconstruct_sloppy_env) {
690 reconstruct_sloppy = reconstruct;
691 }
else if (strcmp(reconstruct_sloppy_env,
"18") == 0) {
693 }
else if (strcmp(reconstruct_sloppy_env,
"13") == 0) {
695 }
else if (strcmp(reconstruct_sloppy_env,
"9") == 0) {
698 errorQuda(
"QUDA_MILC_HISQ_RECONSTRUCT_SLOPPY=%s not supported", reconstruct_sloppy_env);
707 for (
int dir = 0; dir < 4; ++dir) fat_param.
X[dir] = dim[dir];
720 fat_param.
ga_pad = getLinkPadding(dim);
727 long_param = fat_param;
748 for (
int dir = 0; dir < 4; ++dir) param->
x[dir] = dim[dir];
781 static size_t getColorVectorOffset(
QudaParity local_parity,
bool even_odd_exchange,
const int dim[4])
784 int volume = dim[0]*dim[1]*dim[2]*dim[3];
787 offset = even_odd_exchange ? volume*6/2 : 0;
789 offset = even_odd_exchange ? 0 : volume*6/2;
794 void qudaMultishiftInvert(
int external_precision,
int quda_precision,
int num_offsets,
double *
const offset,
796 const double target_fermilab_residual[],
const void *
const fatlink,
797 const void *
const longlink,
void *source,
void **solutionArray,
double *
const final_residual,
798 double *
const final_fermilab_residual,
int *num_iters)
801 qudamilc_called<true>(__func__,
verbosity);
803 if (target_residual[0] == 0)
errorQuda(
"qudaMultishiftInvert: zeroth target residual cannot be zero\n");
807 const bool use_mixed_precision = (((quda_precision==2) && inv_args.
mixed_precision) ||
808 ((quda_precision==1) && (inv_args.
mixed_precision==2)) ) ? true :
false;
813 default: device_precision_sloppy = device_precision;
818 setGaugeParams(fat_param, long_param,
fatlink,
longlink, localDim, host_precision, device_precision,
825 setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy, num_offsets, offset,
826 target_residual, target_fermilab_residual, inv_args.
max_iter, reliable_delta, local_parity, verbosity,
838 setColorSpinorParams(localDim, host_precision, &csParam);
841 if (*num_iters == -1) invalidateGaugeQuda();
844 if (invalidate_quda_gauge || !create_quda_gauge) {
847 invalidate_quda_gauge =
false;
852 void** sln_pointer = (
void**)malloc(num_offsets*
sizeof(
void*));
853 int quark_offset = getColorVectorOffset(local_parity,
false, localDim) * host_precision;
854 void* src_pointer =
static_cast<char*
>(source) + quark_offset;
856 for (
int i = 0; i < num_offsets; ++i) sln_pointer[i] = static_cast<char *>(solutionArray[i]) + quark_offset;
862 *num_iters = invertParam.
iter;
863 for (
int i = 0; i < num_offsets; ++i) {
868 if (!create_quda_gauge) invalidateGaugeQuda();
870 qudamilc_called<false>(__func__,
verbosity);
874 double target_residual,
double target_fermilab_residual,
const void *
const fatlink,
875 const void *
const longlink,
void *source,
void *solution,
double *
const final_residual,
876 double *
const final_fermilab_residual,
int *num_iters)
879 qudamilc_called<true>(__func__,
verbosity);
881 if (target_fermilab_residual == 0 && target_residual == 0)
errorQuda(
"qudaInvert: requesting zero residual\n");
891 default: device_precision_sloppy = device_precision;
896 setGaugeParams(fat_param, long_param,
fatlink,
longlink, localDim, host_precision, device_precision,
904 setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy,
mass, target_residual,
905 target_fermilab_residual, inv_args.
max_iter, reliable_delta, local_parity, verbosity,
909 setColorSpinorParams(localDim, host_precision, &csParam);
914 if (invalidate_quda_gauge || !create_quda_gauge) {
917 invalidate_quda_gauge =
false;
922 int quark_offset = getColorVectorOffset(local_parity,
false, localDim) * host_precision;
924 invertQuda(static_cast<char *>(solution) + quark_offset, static_cast<char *>(source) + quark_offset, &invertParam);
927 *num_iters = invertParam.
iter;
928 *final_residual = invertParam.
true_res;
929 *final_fermilab_residual = invertParam.
true_res_hq;
931 if (!create_quda_gauge) invalidateGaugeQuda();
933 qudamilc_called<false>(__func__,
verbosity);
938 const void *
const longlink,
void* src,
void* dst,
int* num_iters)
941 qudamilc_called<true>(__func__,
verbosity);
950 setGaugeParams(fat_param, long_param,
fatlink,
longlink, localDim, host_precision, device_precision,
958 setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy, 0.0, 0, 0, 0, 0.0, local_parity,
962 setColorSpinorParams(localDim, host_precision, &csParam);
967 if (invalidate_quda_gauge || !create_quda_gauge) {
970 invalidate_quda_gauge =
false;
975 int src_offset = getColorVectorOffset(other_parity,
false, localDim);
976 int dst_offset = getColorVectorOffset(local_parity,
false, localDim);
978 dslashQuda(static_cast<char*>(dst) + dst_offset*host_precision,
979 static_cast<char*>(src) + src_offset*host_precision,
980 &invertParam, local_parity);
982 if (!create_quda_gauge) invalidateGaugeQuda();
984 qudamilc_called<false>(__func__,
verbosity);
988 double target_residual,
double target_fermilab_residual,
const void *
const fatlink,
989 const void *
const longlink,
void **sourceArray,
void **solutionArray,
double *
const final_residual,
990 double *
const final_fermilab_residual,
int *num_iters,
int num_src)
993 qudamilc_called<true>(__func__,
verbosity);
995 if (target_fermilab_residual == 0 && target_residual == 0)
errorQuda(
"qudaInvert: requesting zero residual\n");
1005 default: device_precision_sloppy = device_precision;
1010 setGaugeParams(fat_param, long_param,
fatlink,
longlink, localDim, host_precision, device_precision,
1018 setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy,
mass, target_residual,
1019 target_fermilab_residual, inv_args.
max_iter, reliable_delta, local_parity, verbosity,
1021 invertParam.
num_src = num_src;
1024 setColorSpinorParams(localDim, host_precision, &csParam);
1029 if (invalidate_quda_gauge || !create_quda_gauge) {
1032 invalidate_quda_gauge =
false;
1037 int quark_offset = getColorVectorOffset(local_parity,
false, localDim) * host_precision;
1038 void** sln_pointer = (
void**)malloc(num_src*
sizeof(
void*));
1039 void** src_pointer = (
void**)malloc(num_src*
sizeof(
void*));
1041 for (
int i = 0; i < num_src; ++i) sln_pointer[i] = static_cast<char *>(solutionArray[i]) + quark_offset;
1042 for (
int i = 0; i < num_src; ++i) src_pointer[i] = static_cast<char *>(sourceArray[i]) + quark_offset;
1050 *num_iters = invertParam.
iter;
1051 *final_residual = invertParam.
true_res;
1052 *final_fermilab_residual = invertParam.
true_res_hq;
1054 if (!create_quda_gauge) invalidateGaugeQuda();
1056 qudamilc_called<false>(__func__,
verbosity);
1060 double target_residual,
double target_fermilab_residual,
const void *
const fatlink,
1066 const int last_rhs_flag,
1067 double *
const final_residual,
double *
const final_fermilab_residual,
int *num_iters)
1070 qudamilc_called<true>(__func__,
verbosity);
1072 if (target_fermilab_residual == 0 && target_residual == 0)
errorQuda(
"qudaInvert: requesting zero residual\n");
1081 default: device_precision_sloppy = device_precision;
1086 setGaugeParams(fat_param, long_param,
fatlink,
longlink, localDim, host_precision, device_precision,
1092 double& target_res = target_residual;
1093 double& target_res_hq = target_fermilab_residual;
1096 setInvertParams(localDim, host_precision, device_precision, device_precision_sloppy,
mass, target_res, target_res_hq,
1102 invertParam.
nev = eig_args.
nev;
1111 invertParam.
rhs_idx = rhs_idx;
1114 errorQuda(
"Incorrect inverter type.\n");
1122 setColorSpinorParams(localDim, host_precision, &csParam);
1127 if ((invalidate_quda_gauge || !create_quda_gauge) && (rhs_idx == 0)) {
1130 invalidate_quda_gauge =
false;
1135 int quark_offset = getColorVectorOffset(local_parity,
false, localDim) * host_precision;
1141 invertQuda(static_cast<char *>(solution) + quark_offset, static_cast<char *>(source) + quark_offset, &invertParam);
1146 *num_iters = invertParam.
iter;
1147 *final_residual = invertParam.
true_res;
1148 *final_fermilab_residual = invertParam.
true_res_hq;
1150 if (!create_quda_gauge && last_rhs_flag) invalidateGaugeQuda();
1152 qudamilc_called<false>(__func__,
verbosity);
1156 static int clover_alloc = 0;
1160 qudamilc_called<true>(__func__);
1164 qudamilc_called<false>(__func__);
1171 qudamilc_called<true>(__func__);
1175 qudamilc_called<false>(__func__);
1181 qudamilc_called<true>(__func__);
1183 qudamilc_called<false>(__func__);
1190 void qudaCloverForce(
void *mom,
double dt,
void **x,
void **p,
double *coeff,
double kappa,
double ck,
1193 qudamilc_called<true>(__func__);
1202 for (
int i=0; i<
nvec; ++i) invertParam.
offset[i] = 0.0;
1216 gauge, &gaugeParam, &invertParam);
1217 qudamilc_called<false>(__func__);
1222 int external_precision,
int quda_precision) {
1231 default: device_precision_sloppy = device_precision;
1234 for(
int dir=0; dir<4; ++dir) gaugeParam.
X[dir] = dim[dir];
1243 bool trivial_phase =
true;
1244 for(
int dir=0; dir<3; ++dir){
1259 gaugeParam.
cpu_prec = host_precision;
1260 gaugeParam.
cuda_prec = device_precision;
1264 gaugeParam.
ga_pad = getLinkPadding(dim);
1278 default: device_precision_sloppy = device_precision;
1294 invertParam.
cpu_prec = host_precision;
1295 invertParam.
cuda_prec = device_precision;
1315 const void* milc_link) {
1316 qudamilc_called<true>(__func__);
1318 setGaugeParams(gaugeParam, localDim, inv_args, external_precision, quda_precision);
1321 qudamilc_called<false>(__func__);
1326 qudamilc_called<true>(__func__);
1328 qudamilc_called<false>(__func__);
1335 qudamilc_called<true>(__func__);
1337 setInvertParam(invertParam, inv_args, external_precision, quda_precision, 0.0, 0.0);
1340 invertParam.inv_type = inverter;
1342 invertParam.compute_clover_trlog = compute_trlog;
1350 invertParam.tol = 0.;
1351 invertParam.tol_hq = 0.;
1355 if (clover_alloc == 0) {
1359 errorQuda(
"Clover term already allocated");
1363 if (compute_trlog) {
1364 trlog[0] = invertParam.trlogA[0];
1365 trlog[1] = invertParam.trlogA[1];
1367 qudamilc_called<false>(__func__);
1371 qudamilc_called<true>(__func__);
1372 if (clover_alloc==1) {
1376 errorQuda(
"Trying to free non-allocated clover term");
1378 qudamilc_called<false>(__func__);
1387 double target_residual,
1388 double target_fermilab_residual,
1391 void* cloverInverse,
1394 double*
const final_residual,
1395 double*
const final_fermilab_residual,
1398 qudamilc_called<true>(__func__);
1399 if (target_fermilab_residual == 0 && target_residual == 0)
errorQuda(
"qudaCloverInvert: requesting zero residual\n");
1403 if (clover || cloverInverse) {
1411 setInvertParam(invertParam, inv_args, external_precision, quda_precision,
kappa, reliable_delta);
1416 invertParam.
tol = target_residual;
1417 invertParam.
tol_hq = target_fermilab_residual;
1429 *num_iters = invertParam.
iter;
1430 *final_residual = invertParam.
true_res;
1431 *final_fermilab_residual = invertParam.
true_res_hq;
1435 qudamilc_called<false>(__func__);
1439 QudaInvertArgs_t inv_args,
double target_residual,
double target_fermilab_residual,
1442 void *cloverInverse,
1447 const int last_rhs_flag,
1448 double *
const final_residual,
double *
const final_fermilab_residual,
int *num_iters)
1450 qudamilc_called<true>(__func__);
1451 if (target_fermilab_residual == 0 && target_residual == 0)
errorQuda(
"qudaCloverInvert: requesting zero residual\n");
1453 if (link && (rhs_idx == 0))
qudaLoadGaugeField(external_precision, quda_precision, inv_args, link);
1455 if ( (clover || cloverInverse) && (rhs_idx == 0)) {
1463 setInvertParam(invertParam, inv_args, external_precision, quda_precision,
kappa, reliable_delta);
1468 invertParam.
tol = target_residual;
1469 invertParam.
tol_hq = target_fermilab_residual;
1482 invertParam.
nev = eig_args.
nev;
1491 invertParam.
rhs_idx = rhs_idx;
1508 *num_iters = invertParam.
iter;
1509 *final_residual = invertParam.
true_res;
1510 *final_fermilab_residual = invertParam.
true_res_hq;
1514 qudamilc_called<false>(__func__);
1521 double*
const offset,
1525 const double* target_residual_offset,
1526 const void* milc_link,
1528 void* milc_clover_inv,
1530 void** solutionArray,
1531 double*
const final_residual,
1535 qudamilc_called<true>(__func__,
verbosity);
1537 for (
int i = 0; i < num_offsets; ++i) {
1538 if (target_residual_offset[i] == 0)
errorQuda(
"qudaCloverMultishiftInvert: target residual cannot be zero\n");
1542 const bool use_mixed_precision = (((quda_precision==2) && inv_args.
mixed_precision) ||
1543 ((quda_precision==1) && (inv_args.
mixed_precision==2)) ) ? true :
false;
1546 setInvertParam(invertParam, inv_args, external_precision, quda_precision,
kappa, reliable_delta);
1549 for(
int i=0; i<num_offsets; ++i){
1550 invertParam.
offset[i] = offset[i];
1551 invertParam.
tol_offset[i] = target_residual_offset[i];
1553 invertParam.
tol = target_residual_offset[0];
1568 if (num_offsets==1 && offset[0] == 0) {
1570 char *quda_solver = getenv(
"QUDA_MILC_CLOVER_SOLVER");
1573 if (!quda_solver || strcmp(quda_solver,
"CHRONO_CG_SOLVER")==0) {
1578 }
else if (strcmp(quda_solver,
"BICGSTAB_SOLVER")==0){
1582 }
else if (strcmp(quda_solver,
"CG_SOLVER")==0){
1588 invertQuda(solutionArray[0], source, &invertParam);
1589 *final_residual = invertParam.
true_res;
1592 for (
int i=0; i<num_offsets; ++i) final_residual[i] = invertParam.
true_res_offset[i];
1596 *num_iters = invertParam.
iter;
1598 qudamilc_called<false>(__func__,
verbosity);
1601 void qudaGaugeFixingOVR(
int precision,
unsigned int gauge_dir,
int Nsteps,
int verbose_interval,
double relax_boost,
1602 double tolerance,
unsigned int reunit_interval,
unsigned int stopWtheta,
void *milc_sitelink)
1611 computeGaugeFixingOVRQuda(milc_sitelink, gauge_dir, Nsteps, verbose_interval, relax_boost, tolerance, reunit_interval, stopWtheta, \
1612 &qudaGaugeParam, timeinfo);
1615 printfQuda(
"Time to Compute: %lf\n", timeinfo[1]);
1617 printfQuda(
"Time all: %lf\n", timeinfo[0]+timeinfo[1]+timeinfo[2]);
1621 unsigned int gauge_dir,
1623 int verbose_interval,
1625 unsigned int autotune,
1627 unsigned int stopWtheta,
1640 &qudaGaugeParam, timeinfo);
1643 printfQuda(
"Time to Compute: %lf\n", timeinfo[1]);
1645 printfQuda(
"Time all: %lf\n", timeinfo[0]+timeinfo[1]+timeinfo[2]);
1648 #endif // BUILD_MILC_INTERFACE void computeCloverForceQuda(void *mom, double dt, void **x, void **p, double *coeff, double kappa2, double ck, int nvector, double multiplicity, void *gauge, QudaGaugeParam *gauge_param, QudaInvertParam *inv_param)
static QudaGaugeParam qudaGaugeParam
static bool reunit_allow_svd
QudaDiracFieldOrder dirac_order
QudaMassNormalization mass_normalization
double tol_hq_offset[QUDA_MAX_MULTI_SHIFT]
QudaReconstructType reconstruct_sloppy
void freeCloverQuda(void)
QudaGaugeParam gaugeParam
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
int use_resident_solution
void invertMultiShiftQuda(void **_hp_x, void *_hp_b, QudaInvertParam *param)
void setVerbosityQuda(QudaVerbosity verbosity, const char prefix[], FILE *outfile)
#define pool_pinned_free(ptr)
void qudaHisqParamsInit(QudaHisqParams_t hisq_params)
QudaVerbosity verbosity_precondition
enum QudaPrecision_s QudaPrecision
void qudaUnitarizeSU3(int prec, double tol, QudaMILCSiteArg_t *arg)
void destroyDeflationQuda(void *df_instance)
void qudaGaugeFixingFFT(int precision, unsigned int gauge_dir, int Nsteps, int verbose_interval, double alpha, unsigned int autotune, double tolerance, unsigned int stopWtheta, void *milc_sitelink)
Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
void setMPICommHandleQuda(void *mycomm)
QudaExtLibType deflation_ext_lib
void qudaDslash(int external_precision, int quda_precision, QudaInvertArgs_t inv_args, const void *const milc_fatlink, const void *const milc_longlink, void *source, void *solution, int *num_iters)
void setUnitarizeForceConstants(double unitarize_eps, double hisq_force_filter, double max_det_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error)
Set the constant parameters for the force unitarization.
QudaInverterType inv_type_precondition
QudaVerbosity getVerbosity()
QudaPrecision cuda_prec_ritz
void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
void setUnitarizeLinksConstants(double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error)
QudaDslashType dslash_type
QudaReconstructType reconstruct_precondition
QudaInverterType inv_type
enum QudaSolveType_s QudaSolveType
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
QudaExtLibType deflation_ext_lib
void qudaInit(QudaInitArgs_t input)
double reunit_svd_rel_error
QudaMemoryType mem_type_ritz
QudaPrecision & cuda_prec
void setDeflationParam(QudaEigParam &df_param)
double momActionQuda(void *momentum, QudaGaugeParam *param)
QudaStaggeredPhase staggered_phase_type
void qudaLoadGaugeField(int external_precision, int quda_precision, QudaInvertArgs_t inv_args, const void *milc_link)
void setInvertParam(QudaInvertParam &inv_param)
void qudaInvert(int external_precision, int quda_precision, double mass, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *const milc_fatlink, const void *const milc_longlink, void *source, void *solution, double *const final_resid, double *const final_rel_resid, int *num_iters)
void qudaEigCGInvert(int external_precision, int quda_precision, double mass, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *const fatlink, const void *const longlink, void *source, void *solution, QudaEigArgs_t eig_args, const int rhs_idx, const int last_rhs_flag, double *const final_residual, double *const final_fermilab_residual, int *num_iters)
int make_resident_solution
QudaPrecision cuda_prec_refinement_sloppy
void qudaCloverMultishiftInvert(int external_precision, int quda_precision, int num_offsets, double *const offset, double kappa, double clover_coeff, QudaInvertArgs_t inv_args, const double *target_residual, const void *milc_link, void *milc_clover, void *milc_clover_inv, void *source, void **solutionArray, double *const final_residual, int *num_iters)
void qudaEigCGCloverInvert(int external_precision, int quda_precision, double kappa, double clover_coeff, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *milc_link, void *milc_clover, void *milc_clover_inv, void *source, void *solution, QudaEigArgs_t eig_args, const int rhs_idx, const int last_rhs_flag, double *const final_residual, double *const final_fermilab_residual, int *num_iters)
QudaGaugeFieldOrder gauge_order
void computeKSLinkQuda(void *fatlink, void *longlink, void *ulink, void *inlink, double *path_coeff, QudaGaugeParam *param)
void qudaGaugeFixingOVR(const int precision, const unsigned int gauge_dir, const int Nsteps, const int verbose_interval, const double relax_boost, const double tolerance, const unsigned int reunit_interval, const unsigned int stopWtheta, void *milc_sitelink)
Gauge fixing with overrelaxation with support for single and multi GPU.
void qudaSaveGaugeField(void *gauge, void *inGauge)
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
int make_resident_solution
double qudaMomAction(int precision, void *momentum)
QudaSiteSubset siteSubset
void qudaSetLayout(QudaLayout_t layout)
QudaPrecision clover_cuda_prec_sloppy
void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity)
QudaFieldLocation input_location
void initCommsGridQuda(int nDim, const int *dims, QudaCommsMap func, void *fdata)
double true_res_hq_offset[QUDA_MAX_MULTI_SHIFT]
QudaUseInitGuess use_init_guess
void qudaHisqForce(int precision, int num_terms, int num_naik_terms, double dt, double **coeff, void **quark_field, const double level2_coeff[6], const double fat7_coeff[6], const void *const w_link, const void *const v_link, const void *const u_link, void *const milc_momentum)
int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, const unsigned int verbose_interval, const double relax_boost, const double tolerance, const unsigned int reunit_interval, const unsigned int stopWtheta, QudaGaugeParam *param, double *timeinfo)
Gauge fixing with overrelaxation with support for single and multi GPU.
QudaSolutionType solution_type
void projectSU3Quda(void *gauge_h, double tol, QudaGaugeParam *param)
QudaMemoryType mem_type_ritz
QudaPrecision clover_cuda_prec
QudaPrecision & cuda_prec_sloppy
int computeGaugeForceQuda(void *mom, void *sitelink, int ***input_path_buf, int *path_length, double *loop_coeff, int num_paths, int max_length, double dt, QudaGaugeParam *qudaGaugeParam)
QudaSolutionType solution_type
QudaInvertParam * invert_param
void initQuda(int device)
void qudaFreePinned(void *ptr)
void qudaUpdateU(int precision, double eps, QudaMILCSiteArg_t *arg)
QudaFieldLocation output_location
QudaPrecision clover_cuda_prec_precondition
bool canReuseResidentGauge(QudaInvertParam *inv_param)
void qudaFreeCloverField()
void * newDeflationQuda(QudaEigParam *param)
QudaFieldOrder fieldOrder
QudaPrecision cuda_prec_sloppy
void qudaInvertMsrc(int external_precision, int quda_precision, double mass, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *const fatlink, const void *const longlink, void **sourceArray, void **solutionArray, double *const final_residual, double *const final_fermilab_residual, int *num_iters, int num_src)
static bool initialized
Profiler for initQuda.
static bool reunit_svd_only
double tol_offset[QUDA_MAX_MULTI_SHIFT]
double true_res_offset[QUDA_MAX_MULTI_SHIFT]
int nvec[QUDA_MAX_MG_LEVEL]
QudaInvertParam newQudaInvertParam(void)
void * qudaCreateGaugeField(void *gauge, int geometry, int precision)
QudaPrecision cuda_prec_precondition
QudaCloverFieldOrder clover_order
void computeHISQForceQuda(void *momentum, double dt, const double level2_coeff[6], const double fat7_coeff[6], const void *const w_link, const void *const v_link, const void *const u_link, void **quark, int num, int num_naik, double **coeff, QudaGaugeParam *param)
void saveGaugeFieldQuda(void *outGauge, void *inGauge, QudaGaugeParam *param)
QudaGammaBasis gammaBasis
QudaInverterType solver_type
void qudaRephase(int prec, void *gauge, int flag, double i_mu)
enum QudaSolutionType_s QudaSolutionType
void qudaComputeOprod(int precision, int num_terms, int num_naik_terms, double **coeff, double scale, void **quark_field, void *oprod[3])
QudaGammaBasis gamma_basis
QudaPrecision cuda_prec_sloppy
double reunit_svd_abs_error
double offset[QUDA_MAX_MULTI_SHIFT]
void qudaFreeGaugeField()
int use_sloppy_partial_accumulator
enum QudaParity_s QudaParity
QudaReconstructType reconstruct
enum QudaLinkType_s QudaLinkType
void qudaLoadCloverField(int external_precision, int quda_precision, QudaInvertArgs_t inv_args, void *milc_clover, void *milc_clover_inv, QudaSolutionType solution_type, QudaSolveType solve_type, double clover_coeff, int compute_trlog, double *trlog)
QudaBoolean import_vectors
QudaFieldLocation location
QudaFieldLocation location_ritz
#define safe_malloc(size)
void qudaCloverInvert(int external_precision, int quda_precision, double kappa, double clover_coeff, QudaInvertArgs_t inv_args, double target_residual, double target_fermilab_residual, const void *milc_link, void *milc_clover, void *milc_clover_inv, void *source, void *solution, double *const final_residual, double *const final_fermilab_residual, int *num_iters)
int max_hq_res_restart_total
QudaPrecision cuda_prec_refinement_sloppy
void staggeredPhaseQuda(void *gauge_h, QudaGaugeParam *param)
static int index(int ndim, const int *dims, const int *x)
#define pool_pinned_malloc(size)
int computeGaugeFixingFFTQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, const unsigned int verbose_interval, const double alpha, const unsigned int autotune, const double tolerance, const unsigned int stopWtheta, QudaGaugeParam *param, double *timeinfo)
Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
void qudaLoadUnitarizedLink(int precision, QudaFatLinkArgs_t fatlink_args, const double path_coeff[6], void *inlink, void *fatlink, void *ulink)
void destroyGaugeFieldQuda(void *gauge)
enum QudaFieldLocation_s QudaFieldLocation
QudaPrecision cuda_prec_precondition
void qudaSetMPICommHandle(void *mycomm)
void updateGaugeFieldQuda(void *gauge, void *momentum, double dt, int conj_mom, int exact, QudaGaugeParam *param)
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
void invertMultiSrcQuda(void **_hp_x, void **_hp_b, QudaInvertParam *param)
static double unitarize_eps
void qudaCloverForce(void *mom, double dt, void **x, void **p, double *coeff, double kappa, double ck, int nvec, double multiplicity, void *gauge, int precision, QudaInvertArgs_t inv_args)
void * qudaAllocatePinned(size_t bytes)
QudaMemoryType mem_type_ritz
cudaGaugeField * cudaGauge
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
void * createGaugeFieldQuda(void *gauge, int geometry, QudaGaugeParam *param)
QudaFieldLocation location_ritz
QudaResidualType residual_type
enum QudaVerbosity_s QudaVerbosity
#define PUSH_RANGE(name, cid)
int use_resident_solution
void qudaLoadKSLink(int precision, QudaFatLinkArgs_t fatlink_args, const double act_path_coeff[6], void *inlink, void *fatlink, void *longlink)
QudaPrecision clover_cpu_prec
QudaPrecision cuda_prec_ritz
void qudaGaugeForce(int precision, int num_loop_types, double milc_loop_coeff[3], double eb3, QudaMILCSiteArg_t *arg)
QudaEigParam newQudaEigParam(void)
enum QudaInverterType_s QudaInverterType
void qudaMultishiftInvert(int external_precision, int precision, int num_offsets, double *const offset, QudaInvertArgs_t inv_args, const double *target_residual, const double *target_fermilab_residual, const void *const milc_fatlink, const void *const milc_longlink, void *source, void **solutionArray, double *const final_residual, double *const final_fermilab_residual, int *num_iters)
enum QudaMemoryType_s QudaMemoryType
void qudaAsqtadForce(int precision, const double act_path_coeff[6], const void *const one_link_src[4], const void *const naik_src[4], const void *const link, void *const milc_momentum)
static void createGaugeForcePaths(int **paths, int dir, int num_loop_types)
QudaReconstructType reconstruct_refinement_sloppy
enum QudaExtLibType_s QudaExtLibType
void qudaDestroyGaugeField(void *gauge)
QudaGaugeParam newQudaGaugeParam(void)
QudaPreserveSource preserve_source
double reliable_delta_refinement