QUDA  v1.1.0
A library for QCD on GPUs
deflated_invert_test.cpp
Go to the documentation of this file.
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <time.h>
4 #include <math.h>
5 #include <string.h>
6 #include <algorithm>
7 
8 #include <util_quda.h>
9 #include <host_utils.h>
10 #include <command_line_params.h>
11 
12 #include <dslash_reference.h>
13 //#include <blas_reference.h>
16 #include "misc.h"
17 
18 #if defined(QMP_COMMS)
19 #include <qmp.h>
20 #elif defined(MPI_COMMS)
21 #include <mpi.h>
22 #endif
23 
24 #include <qio_field.h>
25 
26 // In a typical application, quda.h is the only QUDA header required.
27 #include <quda.h>
28 
30 {
31  printfQuda("running the following test:\n");
32 
33  printfQuda("prec prec_sloppy matpc_type recon recon_sloppy S_dimension T_dimension Ls_dimension dslash_type "
34  " normalization\n");
35  printfQuda("%6s %6s %12s %2s %2s %3d/%3d/%3d %3d %2d %14s %8s\n",
39 
40  printfQuda("Grid partition info: X Y Z T\n");
41  printfQuda(" %d %d %d %d\n", dimPartitioned(0), dimPartitioned(1), dimPartitioned(2),
42  dimPartitioned(3));
43 
44  printfQuda("Deflation space info: location mem_type\n");
47 }
48 
49 int main(int argc, char **argv)
50 {
51  // command line options
52  auto app = make_app();
53  // add_eigen_option_group(app);
55  // add_multigrid_option_group(app);
56  try {
57  app->parse(argc, argv);
58  } catch (const CLI::ParseError &e) {
59  return app->exit(e);
60  }
61 
68 
69  // initialize QMP/MPI, QUDA comms grid and RNG (host_utils.cpp)
70  initComms(argc, argv, gridsize_from_cmdline);
71 
72  // call srand() with a rank-dependent seed
73  initRand();
74 
76 
77  // initialize the QUDA library
79 
80  // *** QUDA parameters begin here.
81 
86  printfQuda("dslash_type %d not supported\n", dslash_type);
87  exit(0);
88  }
89 
92 
95 
102  inv_param.m5 = -1.8;
103  kappa5 = 0.5 / (5 + inv_param.m5);
104  inv_param.Ls = Lsdim;
105  for (int k = 0; k < Lsdim; k++) // for mobius only
106  {
107  // b5[k], c[k] values are chosen for arbitrary values,
108  // but the difference of them are same as 1.0
109  inv_param.b_5[k] = 1.452;
110  inv_param.c_5[k] = 0.452;
111  }
112  }
113 
114  QudaEigParam df_param = newQudaEigParam();
115  df_param.invert_param = &inv_param;
116  setDeflationParam(df_param);
117 
118  // *** Everything between here and the call to initQuda() is
119  // *** application-specific.
120 
121  // set parameters for the reference Dslash, and prepare fields to be loaded
125  } else {
127  }
128 
129  // Allocate host side memory for the gauge field.
130  //----------------------------------------------------------------------------
131  void *gauge[4];
132  // Allocate space on the host (always best to allocate and free in the same scope)
133  for (int dir = 0; dir < 4; dir++) gauge[dir] = malloc(V * gauge_site_size * host_gauge_data_type_size);
134  constructHostGaugeField(gauge, gauge_param, argc, argv);
135  // Load the gauge field to the device
136  loadGaugeQuda((void *)gauge, &gauge_param);
137 
138  // Allocate host side memory for clover terms if needed.
139  //----------------------------------------------------------------------------
140  void *clover = nullptr;
141  void *clover_inv = nullptr;
142  // Allocate space on the host (always best to allocate and free in the same scope)
144  clover = malloc(V * clover_site_size * host_clover_data_type_size);
145  clover_inv = malloc(V * clover_site_size * host_spinor_data_type_size);
146  constructHostCloverField(clover, clover_inv, inv_param);
147  // Load the clover terms to the device
148  loadCloverQuda(clover, clover_inv, &inv_param);
149  }
150 
151  void *spinorIn = malloc(V * spinor_site_size * host_spinor_data_type_size * inv_param.Ls);
152  void *spinorCheck = malloc(V * spinor_site_size * host_spinor_data_type_size * inv_param.Ls);
153 
154  void *spinorOut = NULL;
156 
157  // start the timer
158  double time0 = -((double)clock());
159 
160  // this line ensure that if we need to construct the clover inverse (in either the smoother or the solver) we do so
162 
163  void *df_preconditioner = newDeflationQuda(&df_param);
164  inv_param.deflation_op = df_preconditioner;
165 
166  for (int i=0; i<Nsrc; i++) {
167  // create a point source at 0 (in each subvolume... FIXME)
171 
173  //((float*)spinorIn)[i] = 1.0;
174  for (int i = 0; i < inv_param.Ls * V * spinor_site_size; i++) ((float *)spinorIn)[i] = rand() / (float)RAND_MAX;
175  } else {
176  //((double*)spinorIn)[i] = 1.0;
177  for (int i = 0; i < inv_param.Ls * V * spinor_site_size; i++) ((double *)spinorIn)[i] = rand() / (double)RAND_MAX;
178  }
179 
180  invertQuda(spinorOut, spinorIn, &inv_param);
181  printfQuda("\nDone for %d rhs.\n", inv_param.rhs_idx);
182  }
183 
184  destroyDeflationQuda(df_preconditioner);
185 
186  // stop the timer
187  time0 += clock();
188  time0 /= CLOCKS_PER_SEC;
189 
190  // printfQuda("\nDone: %i iter / %g secs = %g Gflops, total time = %g secs\n",
191  // inv_param.iter, inv_param.secs, inv_param.gflops/inv_param.secs, time0);
192  printfQuda("\nDone: %i iter / %g secs = %g Gflops, total time = %g secs\n", inv_param.iter, inv_param.secs,
193  inv_param.gflops / inv_param.secs, 0.0);
194 
196 
198  wil_mat(spinorCheck, gauge, spinorOut, inv_param.kappa, 0, inv_param.cpu_prec, gauge_param);
199 
200  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH) {
202  } else if (dslash_type == QUDA_DOMAIN_WALL_4D_DSLASH) {
204  } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
205  double _Complex *kappa_b = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
206  double _Complex *kappa_c = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
207  for (int xs = 0; xs < Lsdim; xs++) {
208  kappa_b[xs] = 1.0 / (2 * (inv_param.b_5[xs] * (4.0 + inv_param.m5) + 1.0));
209  kappa_c[xs] = 1.0 / (2 * (inv_param.c_5[xs] * (4.0 + inv_param.m5) - 1.0));
210  }
211  mdw_mat(spinorCheck, gauge, spinorOut, kappa_b, kappa_c, inv_param.dagger, inv_param.cpu_prec, gauge_param,
213  free(kappa_b);
214  free(kappa_c);
215  } else {
219  } else {
220  printfQuda("Unsupported dslash_type\n");
221  exit(-1);
222  }
223  }
224  }
225 
229  ax(0.5 / kappa5, spinorCheck, V * spinor_site_size * inv_param.Ls, inv_param.cpu_prec);
231  ax(0.5 / inv_param.kappa, spinorCheck, 2 * V * spinor_site_size, inv_param.cpu_prec);
232  } else {
233  ax(0.5 / inv_param.kappa, spinorCheck, V * spinor_site_size, inv_param.cpu_prec);
234  }
235  }
236 
238 
241  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH) {
243  inv_param.mass);
244  } else if (dslash_type == QUDA_DOMAIN_WALL_4D_DSLASH) {
246  inv_param.mass);
247  } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
248  double _Complex *kappa_b = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
249  double _Complex *kappa_c = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
250  for (int xs = 0; xs < Lsdim; xs++) {
251  kappa_b[xs] = 1.0 / (2 * (inv_param.b_5[xs] * (4.0 + inv_param.m5) + 1.0));
252  kappa_c[xs] = 1.0 / (2 * (inv_param.c_5[xs] * (4.0 + inv_param.m5) - 1.0));
253  }
254  mdw_matpc(spinorCheck, gauge, spinorOut, kappa_b, kappa_c, inv_param.matpc_type, 0, inv_param.cpu_prec,
256  free(kappa_b);
257  free(kappa_c);
258  } else {
263  } else {
264  printfQuda("Unsupported dslash_type\n");
265  exit(-1);
266  }
267  }
268  }
269 
273  ax(0.25 / (kappa5 * kappa5), spinorCheck, V * spinor_site_size * inv_param.Ls, inv_param.cpu_prec);
274  } else {
275  ax(0.25 / (inv_param.kappa * inv_param.kappa), spinorCheck, Vh * spinor_site_size, inv_param.cpu_prec);
276  }
277  }
278  }
279 
280  int vol = inv_param.solution_type == QUDA_MAT_SOLUTION ? V : Vh;
281  mxpy(spinorIn, spinorCheck, vol * spinor_site_size * inv_param.Ls, inv_param.cpu_prec);
282  double nrm2 = norm_2(spinorCheck, vol * spinor_site_size * inv_param.Ls, inv_param.cpu_prec);
283  double src2 = norm_2(spinorIn, vol * spinor_site_size * inv_param.Ls, inv_param.cpu_prec);
284  double l2r = sqrt(nrm2 / src2);
285 
286  printfQuda("Residuals: (L2 relative) tol %g, QUDA = %g, host = %g; (heavy-quark) tol %g, QUDA = %g\n",
288 
289 
290  freeGaugeQuda();
292 
293  // finalize the QUDA library
294  endQuda();
295 
296  // finalize the communications layer
297  finalizeComms();
298 
300  if (clover) free(clover);
301  if (clover_inv) free(clover_inv);
302  }
303 
304  for (int dir = 0; dir<4; dir++) free(gauge[dir]);
305 
306  return 0;
307 }
QudaPrecision prec_refinement_sloppy
QudaReconstructType link_recon_sloppy
std::shared_ptr< QUDAApp > make_app(std::string app_description, std::string app_name)
QudaReconstructType link_recon
int device_ordinal
QudaReconstructType link_recon_precondition
int & ydim
QudaTwistFlavorType twist_flavor
QudaPrecision prec_ritz
double epsilon
QudaMemoryType mem_type_ritz
int & zdim
QudaDslashType dslash_type
QudaMatPCType matpc_type
QudaFieldLocation location_ritz
int Nsrc
void add_deflation_option_group(std::shared_ptr< QUDAApp > quda_app)
QudaPrecision prec_precondition
QudaPrecision prec
int Lsdim
int & tdim
int & xdim
std::array< int, 4 > gridsize_from_cmdline
QudaMassNormalization normalization
QudaPrecision prec_sloppy
int Vh
Definition: host_utils.cpp:38
int V
Definition: host_utils.cpp:37
void * memset(void *s, int c, size_t n)
void setDims(int *)
Definition: host_utils.cpp:315
cpuColorSpinorField * spinorOut
Definition: covdev_test.cpp:31
QudaGaugeParam gauge_param
Definition: covdev_test.cpp:26
QudaInvertParam inv_param
Definition: covdev_test.cpp:27
int main(int argc, char **argv)
void display_test_info()
void dw_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void dw_4d_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void mdw_matpc(void *out, void **gauge, void *in, double _Complex *kappa_b, double _Complex *kappa_c, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *b5, double _Complex *c5)
void dw_4d_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void mdw_mat(void *out, void **gauge, void *in, double _Complex *kappa_b, double _Complex *kappa_c, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *b5, double _Complex *c5)
void dw_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
@ QUDA_WILSON_DSLASH
Definition: enum_quda.h:90
@ QUDA_TWISTED_CLOVER_DSLASH
Definition: enum_quda.h:100
@ QUDA_MOBIUS_DWF_DSLASH
Definition: enum_quda.h:95
@ QUDA_CLOVER_WILSON_DSLASH
Definition: enum_quda.h:91
@ QUDA_TWISTED_MASS_DSLASH
Definition: enum_quda.h:99
@ QUDA_DOMAIN_WALL_DSLASH
Definition: enum_quda.h:93
@ QUDA_DOMAIN_WALL_4D_DSLASH
Definition: enum_quda.h:94
@ QUDA_MASS_NORMALIZATION
Definition: enum_quda.h:227
@ QUDA_RECONSTRUCT_INVALID
Definition: enum_quda.h:76
@ QUDA_MATPC_SOLUTION
Definition: enum_quda.h:159
@ QUDA_MAT_SOLUTION
Definition: enum_quda.h:157
@ QUDA_SINGLE_PRECISION
Definition: enum_quda.h:64
@ QUDA_INVALID_PRECISION
Definition: enum_quda.h:66
@ QUDA_TWIST_SINGLET
Definition: enum_quda.h:400
@ QUDA_TWIST_NONDEG_DOUBLET
Definition: enum_quda.h:401
#define gauge_site_size
Definition: face_gauge.cpp:34
double norm_2(void *v, int len, QudaPrecision precision)
Definition: host_blas.cpp:48
void constructHostCloverField(void *clover, void *clover_inv, QudaInvertParam &inv_param)
Definition: host_utils.cpp:186
size_t host_gauge_data_type_size
Definition: host_utils.cpp:65
int dimPartitioned(int dim)
Definition: host_utils.cpp:376
void initComms(int argc, char **argv, std::array< int, 4 > &commDims)
Definition: host_utils.cpp:255
size_t host_spinor_data_type_size
Definition: host_utils.cpp:66
void finalizeComms()
Definition: host_utils.cpp:292
size_t host_clover_data_type_size
Definition: host_utils.cpp:67
void dw_setDims(int *X, const int L5)
Definition: host_utils.cpp:353
void constructHostGaugeField(void **gauge, QudaGaugeParam &gauge_param, int argc, char **argv)
Definition: host_utils.cpp:166
double kappa5
Definition: host_utils.cpp:51
void initRand()
Definition: host_utils.cpp:302
#define clover_site_size
Definition: host_utils.h:11
void setWilsonGaugeParam(QudaGaugeParam &gauge_param)
Definition: set_params.cpp:37
#define spinor_site_size
Definition: host_utils.h:9
void setDeflatedInvertParam(QudaInvertParam &inv_param)
void setDeflationParam(QudaPrecision ritz_prec, QudaFieldLocation location_ritz, QudaMemoryType mem_type_ritz, QudaExtLibType deflation_ext_lib, char vec_infile[], char vec_outfile[], QudaEigParam *df_param)
const char * get_ritz_location_str(QudaFieldLocation type)
Definition: misc.cpp:312
const char * get_matpc_str(QudaMatPCType type)
Definition: misc.cpp:200
const char * get_prec_str(QudaPrecision prec)
Definition: misc.cpp:26
const char * get_dslash_str(QudaDslashType type)
Definition: misc.cpp:118
const char * get_memory_type_str(QudaMemoryType type)
Definition: misc.cpp:325
const char * get_mass_normalization_str(QudaMassNormalization type)
Definition: misc.cpp:186
const char * get_recon_str(QudaReconstructType recon)
Definition: misc.cpp:68
void ax(double a, ColorSpinorField &x)
void mxpy(ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:42
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:120
Main header file for the QUDA library.
void destroyDeflationQuda(void *df_instance)
QudaGaugeParam newQudaGaugeParam(void)
void freeGaugeQuda(void)
void initQuda(int device)
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
void freeCloverQuda(void)
QudaInvertParam newQudaInvertParam(void)
QudaEigParam newQudaEigParam(void)
void endQuda(void)
void invertQuda(void *h_x, void *h_b, QudaInvertParam *param)
void * newDeflationQuda(QudaEigParam *param)
QudaInvertParam * invert_param
Definition: quda.h:413
int X[4]
Definition: quda.h:35
double gflops
Definition: quda.h:277
QudaSolutionType solution_type
Definition: quda.h:228
double tol_hq
Definition: quda.h:140
QudaMassNormalization mass_normalization
Definition: quda.h:232
double true_res
Definition: quda.h:143
double mass
Definition: quda.h:109
double m5
Definition: quda.h:112
QudaTwistFlavorType twist_flavor
Definition: quda.h:134
double mu
Definition: quda.h:131
double_complex b_5[QUDA_MAX_DWF_LS]
Definition: quda.h:115
double true_res_hq
Definition: quda.h:144
double secs
Definition: quda.h:278
QudaDagType dagger
Definition: quda.h:231
double epsilon
Definition: quda.h:132
QudaPrecision cpu_prec
Definition: quda.h:237
QudaMatPCType matpc_type
Definition: quda.h:230
double_complex c_5[QUDA_MAX_DWF_LS]
Definition: quda.h:116
double tol
Definition: quda.h:138
void * deflation_op
Definition: quda.h:303
double kappa
Definition: quda.h:110
#define printfQuda(...)
Definition: util_quda.h:114
void wil_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_mat(void *out, void **gauge, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void wil_matpc(void *outEven, void **gauge, void *inEven, double kappa, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_matpc(void *outEven, void **gauge, void *inEven, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)