17 #if defined(QMP_COMMS)
19 #elif defined(MPI_COMMS)
27 #define MAX(a,b) ((a)>(b)?(a):(b))
28 #define mySpinorSiteSize 6
30 extern void usage(
char** argv);
38 void** ghost_fatlink, **ghost_longlink;
58 static double tol = 1e-7;
75 template<
typename Float>
77 for(
int i = 0; i <
Vh; i++) {
78 for (
int s = 0;
s < 1;
s++) {
79 for (
int m = 0; m < 3; m++) {
80 res[i*(1*3*2) +
s*(3*2) + m*(2) + 0] = rand() / (
Float)RAND_MAX;
81 res[i*(1*3*2) +
s*(3*2) + m*(2) + 1] = rand() / (
Float)RAND_MAX;
93 double mass,
double tol,
int maxiter,
double reliable_delta,
97 gaugeParam->
X[0] =
X1;
98 gaugeParam->
X[1] =
X2;
99 gaugeParam->
X[2] =
X3;
100 gaugeParam->
X[3] =
X4;
110 gaugeParam->
scale = -1.0/(24.0*tadpole_coeff*tadpole_coeff);
114 gaugeParam->
ga_pad = X1*X2*X3/2;
121 inv_param->
tol = tol;
130 #if __COMPUTE_CAPABILITY__ >= 200
173 inv_param->
sp_pad = X1*X2*X3/2;
187 set_params(&gaugeParam, &inv_param,
189 cpu_prec, prec, prec_sloppy,
190 link_recon, link_recon_sloppy, mass, tol, 500, 1e-3,
200 for (
int dir = 0; dir < 4; dir++) {
210 const double cos_pi_3 = 0.5;
211 const double sin_pi_3 =
sqrt(0.75);
213 for(
int dir=0; dir<4; ++dir){
214 for(
int i=0; i<
V; ++i){
217 ((
double*)
qdp_fatlink[dir])[i*gaugeSiteSize + j] = 0.5*rand()/RAND_MAX;
220 const double real = ((
double*)
qdp_longlink[dir])[i*gaugeSiteSize + j];
221 const double imag = ((
double*)
qdp_longlink[dir])[i*gaugeSiteSize + j + 1];
222 ((
double*)
qdp_longlink[dir])[i*gaugeSiteSize + j] = real*cos_pi_3 - imag*sin_pi_3;
223 ((
double*)
qdp_longlink[dir])[i*gaugeSiteSize + j + 1] = real*sin_pi_3 + imag*cos_pi_3;
226 ((
double*)
fatlink)[(i*4 + dir)*gaugeSiteSize + j] = ((
double*)
qdp_fatlink[dir])[i*gaugeSiteSize + j];
227 ((
double*)
longlink)[(i*4 + dir)*gaugeSiteSize + j] = ((
double*)
qdp_longlink[dir])[i*gaugeSiteSize + j];
229 ((
float*)
qdp_fatlink[dir])[i] = 0.5*rand()/RAND_MAX;
232 const float real = ((
float*)
qdp_longlink[dir])[i*gaugeSiteSize + j];
233 const float imag = ((
float*)
qdp_longlink[dir])[i*gaugeSiteSize + j + 1];
234 ((
float*)
qdp_longlink[dir])[i*gaugeSiteSize + j] = real*cos_pi_3 - imag*sin_pi_3;
235 ((
float*)
qdp_longlink[dir])[i*gaugeSiteSize + j + 1] = real*sin_pi_3 + imag*cos_pi_3;
238 ((
double*)
fatlink)[(i*4 + dir)*gaugeSiteSize + j] = ((
double*)
qdp_fatlink[dir])[i*gaugeSiteSize + j];
239 ((
float*)
fatlink)[(i*4 + dir)*gaugeSiteSize + j] = ((
float*)
qdp_fatlink[dir])[i*gaugeSiteSize + j];
240 ((
float*)
longlink)[(i*4 + dir)*gaugeSiteSize + j] = ((
float*)
qdp_longlink[dir])[i*gaugeSiteSize + j];
251 for(
int d = 0; d < 4; d++) {
252 csParam.
x[d] = gaugeParam.
X[d];
279 int fat_pad = tmp_value;
280 int link_pad = 3*tmp_value;
288 ghost_fatlink = (
void**)cpuFat->
Ghost();
293 ghost_longlink = (
void**)cpuLong->
Ghost();
297 gaugeParam.
ga_pad = fat_pad;
304 gaugeParam.
ga_pad = link_pad;
323 double time0 = -((double)clock());
344 time0 /= CLOCKS_PER_SEC;
372 time0 /= CLOCKS_PER_SEC;
388 errorQuda(
"full spinor not supported\n");
394 #define NUM_OFFSETS 12
397 double masses[
NUM_OFFSETS] ={0.002, 0.0021, 0.0064, 0.070, 0.077, 0.081, 0.1, 0.11, 0.12, 0.13, 0.14, 0.205};
408 spinorOutArray[0] =
out;
414 outArray[i] = spinorOutArray[i]->
V();
415 inv_param.
offset[i] = 4*masses[i]*masses[i];
428 cudaDeviceSynchronize();
430 time0 /= CLOCKS_PER_SEC;
432 printfQuda(
"done: total time = %g secs, compute time = %g, %i iter / %g secs = %g gflops\n",
441 errorQuda(
"full parity not supported\n");
447 errorQuda(
"ERROR: invalid spinor parity \n");
451 printfQuda(
"%dth solution: mass=%f, ", i, masses[i]);
454 spinorOutArray[i], masses[i], 0, inv_param.
cpu_prec,
463 double l2r =
sqrt(nrm2/src2);
465 printfQuda(
"Shift %d residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g, host = %g\n",
475 for(
int i=1; i < inv_param.
num_offset;i++)
delete spinorOutArray[i];
487 double l2r =
sqrt(nrm2/src2);
489 printfQuda(
"Residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g, host = %g\n",
492 printfQuda(
"done: total time = %g secs, compute time = %g secs, %i iter / %g secs = %g gflops, \n",
506 for(
int i=0;i < 4;i++){
519 if (cpuFat)
delete cpuFat;
531 printfQuda(
"prec sloppy_prec link_recon sloppy_link_recon test_type S_dimension T_dimension\n");
552 printfQuda(
" --tol <resid_tol> # Set residual tolerance\n");
554 printfQuda(
" 0: Even even spinor CG inverter\n");
555 printfQuda(
" 1: Odd odd spinor CG inverter\n");
556 printfQuda(
" 3: Even even spinor multishift CG inverter\n");
557 printfQuda(
" 4: Odd odd spinor multishift CG inverter\n");
558 printfQuda(
" --cpu_prec <double/single/half> # Set CPU precision\n");
562 int main(
int argc,
char** argv)
564 for (
int i = 1; i < argc; i++) {
570 if( strcmp(argv[i],
"--tol") == 0){
575 sscanf(argv[i+1],
"%f", &tmpf);
577 printf(
"ERROR: invalid tol(%f)\n", tmpf);
585 if( strcmp(argv[i],
"--cpu_prec") == 0){
594 printf(
"ERROR: Invalid option:%s\n", argv[i]);
int dimPartitioned(int dim)
QudaDiracFieldOrder dirac_order
QudaMassNormalization mass_normalization
double tol_hq_offset[QUDA_MAX_MULTI_SHIFT]
QudaReconstructType reconstruct_sloppy
void invertMultiShiftQuda(void **_hp_x, void *_hp_b, QudaInvertParam *param)
const void ** Ghost() const
QudaVerbosity verbosity_precondition
enum QudaPrecision_s QudaPrecision
void matdagmat_mg4dir(cpuColorSpinorField *out, void **fatlink, void **longlink, void **ghost_fatlink, void **ghost_longlink, cpuColorSpinorField *in, double mass, int dagger_bit, QudaPrecision sPrecision, QudaPrecision gPrecision, cpuColorSpinorField *tmp, QudaParity parity)
QudaPrecision prec_sloppy
enum QudaResidualType_s QudaResidualType
QudaInverterType inv_type_precondition
void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
void usage_extra(char **argv)
QudaDslashType dslash_type
QudaInverterType inv_type
QudaGaugeParam gaugeParam
QudaReconstructType link_recon_sloppy
__host__ __device__ ValueType sqrt(ValueType x)
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
int process_command_line_option(int argc, char **argv, int *idx)
void constructSpinorField(Float *res)
QudaReconstructType link_recon
QudaGaugeFieldOrder gauge_order
const char * get_prec_str(QudaPrecision prec)
QudaInverterType inv_type
QudaSiteSubset siteSubset
const char * get_test_type(int t)
QudaFieldLocation input_location
double true_res_hq_offset[QUDA_MAX_MULTI_SHIFT]
QudaUseInitGuess use_init_guess
QudaSolutionType solution_type
void initQuda(int device)
QudaFieldLocation output_location
QudaFieldOrder fieldOrder
QudaPrecision cuda_prec_sloppy
FloatingPoint< float > Float
void setSpinorSiteSize(int n)
double tol_offset[QUDA_MAX_MULTI_SHIFT]
double true_res_offset[QUDA_MAX_MULTI_SHIFT]
QudaInvertParam newQudaInvertParam(void)
const char * get_recon_str(QudaReconstructType recon)
cpuColorSpinorField * tmp
QudaPrecision cuda_prec_precondition
QudaGammaBasis gammaBasis
QudaGammaBasis gamma_basis
QudaPrecision cuda_prec_sloppy
void mxpy(void *x, void *y, int len, QudaPrecision precision)
double offset[QUDA_MAX_MULTI_SHIFT]
int use_sloppy_partial_accumulator
enum QudaParity_s QudaParity
QudaReconstructType reconstruct
double norm_2(void *v, int len, QudaPrecision precision)
int main(int argc, char **argv)
void construct_fat_long_gauge_field(void **fatlink, void **longlink, int type, QudaPrecision precision, QudaGaugeParam *param, QudaDslashType dslash_type)
QudaInvertParam inv_param
QudaDslashType dslash_type
cpuColorSpinorField * out
QudaPrecision cuda_prec_precondition
cpuColorSpinorField * ref
QudaPrecision get_prec(QIO_Reader *infile)
enum QudaReconstructType_s QudaReconstructType
Main header file for the QUDA library.
enum QudaDslashType_s QudaDslashType
void matdagmat(void *out, void **gauge, void *in, double kappa, QudaPrecision sPrecision, QudaPrecision gPrecision, double mferm)
QudaResidualType residual_type
int gridsize_from_cmdline[]
double3 HeavyQuarkResidualNormCpu(cpuColorSpinorField &x, cpuColorSpinorField &r)
void initComms(int argc, char **argv, const int *commDims)
enum QudaInverterType_s QudaInverterType
QudaGaugeParam newQudaGaugeParam(void)
QudaPreserveSource preserve_source