QUDA  v1.1.0
A library for QCD on GPUs
dslash_test_utils.h
Go to the documentation of this file.
1 #pragma once
2 
3 #include <iostream>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <algorithm>
8 
9 #include <quda.h>
10 #include <quda_internal.h>
11 #include <dirac_quda.h>
12 #include <dslash_quda.h>
13 #include <invert_quda.h>
14 #include <util_quda.h>
15 #include <blas_quda.h>
16 
17 #include <host_utils.h>
18 #include <command_line_params.h>
19 #include <dslash_reference.h>
22 #include "misc.h"
23 #include "dslash_test_helpers.h"
24 
25 // google test frame work
26 #include <gtest/gtest.h>
27 
28 #include <color_spinor_field.h>
29 
30 using namespace quda;
31 
32 CLI::TransformPairs<dslash_test_type> dtest_type_map {{"Dslash", dslash_test_type::Dslash},
33  {"MatPC", dslash_test_type::MatPC},
34  {"Mat", dslash_test_type::Mat},
35  {"MatPCDagMatPC", dslash_test_type::MatPCDagMatPC},
36  {"MatPCDagMatPCLocal", dslash_test_type::MatPCDagMatPCLocal},
37  {"MatDagMat", dslash_test_type::MatDagMat},
38  {"M5", dslash_test_type::M5},
39  {"M5inv", dslash_test_type::M5inv},
40  {"Dslash4pre", dslash_test_type::Dslash4pre}};
41 struct DslashTime {
42  double event_time;
43  double cpu_time;
44  double cpu_min;
45  double cpu_max;
46 
47  DslashTime() : event_time(0.0), cpu_time(0.0), cpu_min(DBL_MAX), cpu_max(0.0) {}
48 };
49 
51 
52  // CPU color spinor fields
56  quda::cpuColorSpinorField *spinorTmp = nullptr;
57  // For split grid
58  std::vector<quda::cpuColorSpinorField *> vp_spinor;
59  std::vector<quda::cpuColorSpinorField *> vp_spinorOut;
60  std::vector<quda::cpuColorSpinorField *> vp_spinorRef;
61 
62  // CUDA color spinor fields
65  quda::cudaColorSpinorField *tmp1 = nullptr;
66  quda::cudaColorSpinorField *tmp2 = nullptr;
67 
68  // Dirac pointers
69  quda::Dirac *dirac = nullptr;
70  quda::DiracMobiusPC *dirac_mdwf = nullptr;
71  quda::DiracDomainWall4DPC *dirac_4dpc = nullptr;
72 
73  // Raw pointers
74  void *hostGauge[4] = {nullptr};
75  void *hostClover = nullptr;
76  void *hostCloverInv = nullptr;
77 
78  // Parameters
81 
82  // Test options
88  int num_src;
89 
90  const bool transfer = false;
91 
92  void init_ctest(int argc, char **argv, int precision, QudaReconstructType link_recon)
93  {
94  cuda_prec = getPrecision(precision);
95 
100 
105 
110 
112 
113  init(argc, argv);
114  }
115 
116  void init_test(int argc, char **argv)
117  {
122 
123  init(argc, argv);
124  }
125 
126  void init(int argc, char **argv)
127  {
128  num_src = grid_partition[0] * grid_partition[1] * grid_partition[2] * grid_partition[3];
129  test_split_grid = num_src > 1;
130  if (test_split_grid) { dtest_type = dslash_test_type::Dslash; }
131 
133  errorQuda("Asqtad not supported. Please try staggered_dslash_test instead");
136  // for these we always use kernel packing
138  } else {
140  Ls = 1;
141  }
142 
144  not_dagger = dagger ? QUDA_DAG_NO : QUDA_DAG_YES;
145 
149 
151  switch (dtest_type) {
159  default: errorQuda("Test type %d not defined QUDA_DOMAIN_WALL_4D_DSLASH\n", static_cast<int>(dtest_type));
160  }
162  switch (dtest_type) {
172  default: errorQuda("Test type %d not defined on QUDA_MOBIUS_DWF_(EOFA_)DSLASH\n", static_cast<int>(dtest_type));
173  }
174  } else {
175  switch (dtest_type) {
181  default: errorQuda("Test type %d not defined\n", static_cast<int>(dtest_type));
182  }
183  }
184 
185  if (inv_param.cpu_prec != gauge_param.cpu_prec) errorQuda("Gauge and spinor CPU precisions must match");
186 
187  // construct input fields
188  for (int dir = 0; dir < 4; dir++) hostGauge[dir] = malloc((size_t)V * gauge_site_size * gauge_param.cpu_prec);
189 
192  hostClover = malloc((size_t)V * clover_site_size * inv_param.clover_cpu_prec);
193  hostCloverInv = malloc((size_t)V * clover_site_size * inv_param.clover_cpu_prec);
194  }
195 
197  csParam.nColor = 3;
198  csParam.nSpin = 4;
199  csParam.nDim = 4;
200  for (int d = 0; d < 4; d++) csParam.x[d] = gauge_param.X[d];
203  csParam.nDim = 5;
204  csParam.x[4] = Ls;
205  }
206 
209  } else {
211  }
212 
213  // ndeg_tm
217  csParam.x[4] = inv_param.Ls;
218  }
219 
221  csParam.pad = 0;
222 
225  } else {
227  csParam.x[0] /= 2;
228  }
229 
234 
238  spinorTmp = new cpuColorSpinorField(csParam);
239 
241 
246 
247  if (test_split_grid) {
248  inv_param.num_src = num_src;
250  for (int n = 0; n < num_src; n++) {
251  vp_spinor.push_back(new cpuColorSpinorField(csParam));
252  vp_spinorOut.push_back(new cpuColorSpinorField(csParam));
253  vp_spinorRef.push_back(new cpuColorSpinorField(csParam));
254  }
255  }
256 
257  if (test_split_grid) {
258  for (int n = 0; n < num_src; n++) { *vp_spinor[n] = *spinor; }
259  }
260 
261  csParam.x[0] = gauge_param.X[0];
262 
263  // set verbosity prior to loadGaugeQuda
266 
267  printfQuda("Randomizing fields... ");
268  constructHostGaugeField(hostGauge, gauge_param, argc, argv);
269 
270  printfQuda("Sending gauge field to GPU\n");
271  loadGaugeQuda(hostGauge, &gauge_param);
272 
275  if (compute_clover)
276  printfQuda("Computing clover field on GPU\n");
277  else {
278  printfQuda("Sending clover field to GPU\n");
279  constructHostCloverField(hostClover, hostCloverInv, inv_param);
280  }
285 
286  loadCloverQuda(hostClover, hostCloverInv, &inv_param);
287  }
288 
289  if (!transfer) {
293 
296  } else {
298  csParam.x[0] /= 2;
299  }
300 
301  printfQuda("Creating cudaSpinor with nParity = %d\n", csParam.siteSubset);
303  printfQuda("Creating cudaSpinorOut with nParity = %d\n", csParam.siteSubset);
305 
306  tmp1 = new cudaColorSpinorField(csParam);
307 
309  csParam.x[0] /= 2;
310  }
311 
313  tmp2 = new cudaColorSpinorField(csParam);
314 
315  printfQuda("Sending spinor field to GPU\n");
316  *cudaSpinor = *spinor;
317 
318  double cpu_norm = blas::norm2(*spinor);
319  double cuda_norm = blas::norm2(*cudaSpinor);
320  printfQuda("Source: CPU = %e, CUDA = %e\n", cpu_norm, cuda_norm);
321 
323 
324  DiracParam diracParam;
325  setDiracParam(diracParam, &inv_param, pc);
326 
327  diracParam.tmp1 = tmp1;
328  diracParam.tmp2 = tmp2;
329 
331  dirac_4dpc = new DiracDomainWall4DPC(diracParam);
332  dirac = (Dirac *)dirac_4dpc;
333  } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
334  dirac_mdwf = new DiracMobiusPC(diracParam);
335  dirac = (Dirac *)dirac_mdwf;
336  } else {
337  dirac = Dirac::create(diracParam);
338  }
339 
340  } else {
341  double cpu_norm = blas::norm2(*spinor);
342  printfQuda("Source: CPU = %e\n", cpu_norm);
343  }
344  }
345 
346  void end()
347  {
348  if (!transfer) {
349  if (dirac != nullptr) {
350  delete dirac;
351  dirac = nullptr;
352  }
353  delete cudaSpinor;
354  delete cudaSpinorOut;
355  delete tmp1;
356  delete tmp2;
357  }
358 
359  // release memory
360  delete spinor;
361  delete spinorOut;
362  delete spinorRef;
363  delete spinorTmp;
364 
365  for (auto p : vp_spinor) { delete p; }
366  for (auto p : vp_spinorOut) { delete p; }
367  for (auto p : vp_spinorRef) { delete p; }
368 
369  vp_spinor.clear();
370  vp_spinorOut.clear();
371  vp_spinorRef.clear();
372 
373  for (int dir = 0; dir < 4; dir++) free(hostGauge[dir]);
376  free(hostClover);
377  free(hostCloverInv);
378  }
379  }
380 
381  void dslashRef()
382  {
383 
384  // compare to dslash reference implementation
385  printfQuda("Calculating reference implementation...");
386 
388  switch (dtest_type) {
391  break;
395  break;
398  break;
400  wil_matpc(spinorTmp->V(), hostGauge, spinor->V(), inv_param.kappa, inv_param.matpc_type, dagger,
402  wil_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), inv_param.kappa, inv_param.matpc_type, not_dagger,
404  break;
406  wil_mat(spinorTmp->V(), hostGauge, spinor->V(), inv_param.kappa, dagger, inv_param.cpu_prec, gauge_param);
407  wil_mat(spinorRef->V(), hostGauge, spinorTmp->V(), inv_param.kappa, not_dagger, inv_param.cpu_prec, gauge_param);
408  break;
409  default: printfQuda("Test type not defined\n"); exit(-1);
410  }
411  } else if (dslash_type == QUDA_CLOVER_WILSON_DSLASH) {
412  switch (dtest_type) {
414  clover_dslash(spinorRef->V(), hostGauge, hostCloverInv, spinor->V(), parity, dagger, inv_param.cpu_prec,
415  gauge_param);
416  break;
418  clover_matpc(spinorRef->V(), hostGauge, hostClover, hostCloverInv, spinor->V(), inv_param.kappa,
420  break;
422  clover_mat(spinorRef->V(), hostGauge, hostClover, spinor->V(), inv_param.kappa, dagger, inv_param.cpu_prec,
423  gauge_param);
424  break;
426  clover_matpc(spinorTmp->V(), hostGauge, hostClover, hostCloverInv, spinor->V(), inv_param.kappa,
428  clover_matpc(spinorRef->V(), hostGauge, hostClover, hostCloverInv, spinorTmp->V(), inv_param.kappa,
430  break;
432  clover_mat(spinorTmp->V(), hostGauge, hostClover, spinor->V(), inv_param.kappa, dagger, inv_param.cpu_prec,
433  gauge_param);
434  clover_mat(spinorRef->V(), hostGauge, hostClover, spinorTmp->V(), inv_param.kappa, not_dagger,
436  break;
437  default: printfQuda("Test type not defined\n"); exit(-1);
438  }
440  printfQuda("HASENBUCH_TWIST Test: kappa=%lf mu=%lf\n", inv_param.kappa, inv_param.mu);
441  switch (dtest_type) {
443  // My dslash should be the same as the clover dslash
444  clover_dslash(spinorRef->V(), hostGauge, hostCloverInv, spinor->V(), parity, dagger, inv_param.cpu_prec,
445  gauge_param);
446  break;
448  // my matpc op
449  cloverHasenbuschTwist_matpc(spinorRef->V(), hostGauge, spinor->V(), hostClover, hostCloverInv, inv_param.kappa,
451 
452  break;
454  // my mat
455  cloverHasenbuchTwist_mat(spinorRef->V(), hostGauge, hostClover, spinor->V(), inv_param.kappa, inv_param.mu,
457  break;
459  // matpc^\dagger matpc
460  // my matpc op
461  cloverHasenbuschTwist_matpc(spinorTmp->V(), hostGauge, spinor->V(), hostClover, hostCloverInv, inv_param.kappa,
463 
464  cloverHasenbuschTwist_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), hostClover, hostCloverInv, inv_param.kappa,
466 
467  break;
469  // my mat
470  cloverHasenbuchTwist_mat(spinorTmp->V(), hostGauge, hostClover, spinor->V(), inv_param.kappa, inv_param.mu,
472  cloverHasenbuchTwist_mat(spinorRef->V(), hostGauge, hostClover, spinorTmp->V(), inv_param.kappa, inv_param.mu,
474 
475  break;
476  default: printfQuda("Test type not defined\n"); exit(-1);
477  }
478  } else if (dslash_type == QUDA_TWISTED_MASS_DSLASH) {
479  switch (dtest_type) {
484  else {
485  int tm_offset = 12 * spinorRef->Volume();
486 
487  void *ref1 = spinorRef->V();
488  void *ref2 = (char *)ref1 + tm_offset * cpu_prec;
489 
490  void *flv1 = spinor->V();
491  void *flv2 = (char *)flv1 + tm_offset * cpu_prec;
492 
493  tm_ndeg_dslash(ref1, ref2, hostGauge, flv1, flv2, inv_param.kappa, inv_param.mu, inv_param.epsilon, parity,
495  }
496  break;
501  else {
502  int tm_offset = 12 * spinorRef->Volume();
503 
504  void *ref1 = spinorRef->V();
505  void *ref2 = (char *)ref1 + tm_offset * cpu_prec;
506 
507  void *flv1 = spinor->V();
508  void *flv2 = (char *)flv1 + tm_offset * cpu_prec;
509 
510  tm_ndeg_matpc(ref1, ref2, hostGauge, flv1, flv2, inv_param.kappa, inv_param.mu, inv_param.epsilon,
512  }
513  break;
518  else {
519  int tm_offset = 12 * spinorRef->Volume();
520 
521  void *evenOut = spinorRef->V();
522  void *oddOut = (char *)evenOut + tm_offset * cpu_prec;
523 
524  void *evenIn = spinor->V();
525  void *oddIn = (char *)evenIn + tm_offset * cpu_prec;
526 
527  tm_ndeg_mat(evenOut, oddOut, hostGauge, evenIn, oddIn, inv_param.kappa, inv_param.mu, inv_param.epsilon,
529  }
530  break;
533  tm_matpc(spinorTmp->V(), hostGauge, spinor->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
535  tm_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
537  } else {
538  int tm_offset = 12 * spinorRef->Volume();
539 
540  void *ref1 = spinorRef->V();
541  void *ref2 = (char *)ref1 + tm_offset * cpu_prec;
542 
543  void *flv1 = spinor->V();
544  void *flv2 = (char *)flv1 + tm_offset * cpu_prec;
545 
546  void *tmp1 = spinorTmp->V();
547  void *tmp2 = (char *)tmp1 + tm_offset * cpu_prec;
548 
549  tm_ndeg_matpc(tmp1, tmp2, hostGauge, flv1, flv2, inv_param.kappa, inv_param.mu, inv_param.epsilon,
551  tm_ndeg_matpc(ref1, ref2, hostGauge, tmp1, tmp2, inv_param.kappa, inv_param.mu, inv_param.epsilon,
553  }
554  break;
557  tm_mat(spinorTmp->V(), hostGauge, spinor->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor, dagger,
559  tm_mat(spinorRef->V(), hostGauge, spinorTmp->V(), inv_param.kappa, inv_param.mu, inv_param.twist_flavor,
560  not_dagger, inv_param.cpu_prec, gauge_param);
561  } else {
562  int tm_offset = 12 * spinorRef->Volume();
563 
564  void *evenOut = spinorRef->V();
565  void *oddOut = (char *)evenOut + tm_offset * cpu_prec;
566 
567  void *evenIn = spinor->V();
568  void *oddIn = (char *)evenIn + tm_offset * cpu_prec;
569 
570  void *evenTmp = spinorTmp->V();
571  void *oddTmp = (char *)evenTmp + tm_offset * cpu_prec;
572 
573  tm_ndeg_mat(evenTmp, oddTmp, hostGauge, evenIn, oddIn, inv_param.kappa, inv_param.mu, inv_param.epsilon,
575  tm_ndeg_mat(evenOut, oddOut, hostGauge, evenTmp, oddTmp, inv_param.kappa, inv_param.mu, inv_param.epsilon,
576  not_dagger, inv_param.cpu_prec, gauge_param);
577  }
578  break;
579  default: printfQuda("Test type not defined\n"); exit(-1);
580  }
581  } else if (dslash_type == QUDA_TWISTED_CLOVER_DSLASH) {
582  switch (dtest_type) {
585  tmc_dslash(spinorRef->V(), hostGauge, spinor->V(), hostClover, hostCloverInv, inv_param.kappa, inv_param.mu,
587  else
588  errorQuda("Not supported\n");
589  break;
592  tmc_matpc(spinorRef->V(), hostGauge, spinor->V(), hostClover, hostCloverInv, inv_param.kappa, inv_param.mu,
594  else
595  errorQuda("Not supported\n");
596  break;
599  tmc_mat(spinorRef->V(), hostGauge, hostClover, spinor->V(), inv_param.kappa, inv_param.mu,
601  else
602  errorQuda("Not supported\n");
603  break;
606  tmc_matpc(spinorTmp->V(), hostGauge, spinor->V(), hostClover, hostCloverInv, inv_param.kappa, inv_param.mu,
608  tmc_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), hostClover, hostCloverInv, inv_param.kappa, inv_param.mu,
610  } else
611  errorQuda("Not supported\n");
612  break;
615  tmc_mat(spinorTmp->V(), hostGauge, hostClover, spinor->V(), inv_param.kappa, inv_param.mu,
617  tmc_mat(spinorRef->V(), hostGauge, hostClover, spinorTmp->V(), inv_param.kappa, inv_param.mu,
619  } else
620  errorQuda("Not supported\n");
621  break;
622  default: printfQuda("Test type not defined\n"); exit(-1);
623  }
624  } else if (dslash_type == QUDA_DOMAIN_WALL_DSLASH) {
625  switch (dtest_type) {
628  inv_param.mass);
629  break;
633  break;
636  break;
638  dw_matpc(spinorTmp->V(), hostGauge, spinor->V(), kappa5, inv_param.matpc_type, dagger, gauge_param.cpu_prec,
640  dw_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), kappa5, inv_param.matpc_type, not_dagger,
642  break;
645  inv_param.mass);
646  break;
647  default: printf("Test type not supported for domain wall\n"); exit(-1);
648  }
649  } else if (dslash_type == QUDA_DOMAIN_WALL_4D_DSLASH) {
650  double *kappa_5 = (double *)malloc(Ls * sizeof(double));
651  for (int xs = 0; xs < Ls; xs++) kappa_5[xs] = kappa5;
652  switch (dtest_type) {
655  inv_param.mass);
656  break;
659  inv_param.mass, true);
660  break;
663  inv_param.mass, kappa_5);
664  break;
668  break;
671  inv_param.mass);
672  break;
674  dw_4d_matpc(spinorTmp->V(), hostGauge, spinor->V(), kappa5, inv_param.matpc_type, dagger, gauge_param.cpu_prec,
676  dw_4d_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), kappa5, inv_param.matpc_type, not_dagger,
678  break;
680  dw_4d_mat(spinorTmp->V(), hostGauge, spinor->V(), kappa5, dagger, gauge_param.cpu_prec, gauge_param,
681  inv_param.mass);
682  dw_4d_mat(spinorRef->V(), hostGauge, spinorTmp->V(), kappa5, not_dagger, gauge_param.cpu_prec, gauge_param,
683  inv_param.mass);
684  break;
685  default: printf("Test type not supported for domain wall\n"); exit(-1);
686  }
687  free(kappa_5);
688  } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
689  double _Complex *kappa_b = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
690  double _Complex *kappa_c = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
691  double _Complex *kappa_5 = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
692  double _Complex *kappa_mdwf = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
693  for (int xs = 0; xs < Lsdim; xs++) {
694  kappa_b[xs] = 1.0 / (2 * (inv_param.b_5[xs] * (4.0 + inv_param.m5) + 1.0));
695  kappa_c[xs] = 1.0 / (2 * (inv_param.c_5[xs] * (4.0 + inv_param.m5) - 1.0));
696  kappa_5[xs] = 0.5 * kappa_b[xs] / kappa_c[xs];
697  kappa_mdwf[xs] = -kappa_5[xs];
698  }
699  switch (dtest_type) {
702  inv_param.mass);
703  break;
706  inv_param.mass, kappa_5, true);
707  break;
711  break;
714  inv_param.mass, kappa_mdwf);
715  break;
717  mdw_matpc(spinorRef->V(), hostGauge, spinor->V(), kappa_b, kappa_c, inv_param.matpc_type, dagger,
719  break;
721  mdw_mat(spinorRef->V(), hostGauge, spinor->V(), kappa_b, kappa_c, dagger, gauge_param.cpu_prec, gauge_param,
723  break;
725  mdw_matpc(spinorTmp->V(), hostGauge, spinor->V(), kappa_b, kappa_c, inv_param.matpc_type, dagger,
727  mdw_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), kappa_b, kappa_c, inv_param.matpc_type, not_dagger,
729  break;
731  mdw_mat(spinorTmp->V(), hostGauge, spinor->V(), kappa_b, kappa_c, dagger, gauge_param.cpu_prec, gauge_param,
733  mdw_mat(spinorRef->V(), hostGauge, spinorTmp->V(), kappa_b, kappa_c, not_dagger, gauge_param.cpu_prec,
735  break;
737  // reference for MdagM local operator
738  mdw_mdagm_local(spinorRef->V(), hostGauge, spinor->V(), kappa_b, kappa_c, inv_param.matpc_type,
740  break;
741  default: printf("Test type not supported for Mobius domain wall\n"); exit(-1);
742  }
743  free(kappa_b);
744  free(kappa_c);
745  free(kappa_5);
746  free(kappa_mdwf);
748  double _Complex *kappa_b = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
749  double _Complex *kappa_c = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
750  double _Complex *kappa_5 = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
751  double _Complex *kappa_mdwf = (double _Complex *)malloc(Lsdim * sizeof(double _Complex));
752  for (int xs = 0; xs < Lsdim; xs++) {
753  kappa_b[xs] = 1.0 / (2 * (inv_param.b_5[xs] * (4.0 + inv_param.m5) + 1.0));
754  kappa_c[xs] = 1.0 / (2 * (inv_param.c_5[xs] * (4.0 + inv_param.m5) - 1.0));
755  kappa_5[xs] = 0.5 * kappa_b[xs] / kappa_c[xs];
756  kappa_mdwf[xs] = -kappa_5[xs];
757  }
758  switch (dtest_type) {
761  inv_param.mass);
762  break;
765  (__real__ inv_param.b_5[0]), (__real__ inv_param.c_5[0]), inv_param.mq1, inv_param.mq2,
767  break;
771  break;
774  (__real__ inv_param.b_5[0]), (__real__ inv_param.c_5[0]), inv_param.mq1, inv_param.mq2,
776  break;
779  inv_param.m5, (__real__ inv_param.b_5[0]), (__real__ inv_param.c_5[0]), inv_param.mq1,
781  break;
783  mdw_eofa_mat(spinorTmp->V(), hostGauge, spinor->V(), dagger, gauge_param.cpu_prec, gauge_param, inv_param.mass,
784  inv_param.m5, (__real__ inv_param.b_5[0]), (__real__ inv_param.c_5[0]), inv_param.mq1,
786  mdw_eofa_mat(spinorRef->V(), hostGauge, spinorTmp->V(), not_dagger, gauge_param.cpu_prec, gauge_param,
787  inv_param.mass, inv_param.m5, (__real__ inv_param.b_5[0]), (__real__ inv_param.c_5[0]),
789  break;
795  break;
797  mdw_eofa_matpc(spinorTmp->V(), hostGauge, spinor->V(), inv_param.matpc_type, dagger, gauge_param.cpu_prec,
801  mdw_eofa_matpc(spinorRef->V(), hostGauge, spinorTmp->V(), inv_param.matpc_type, not_dagger,
805  break;
806  default: printf("Test type not supported for Mobius domain wall EOFA\n"); exit(-1);
807  }
808  free(kappa_b);
809  free(kappa_c);
810  free(kappa_5);
811  free(kappa_mdwf);
812  } else {
813  printfQuda("Unsupported dslash_type\n");
814  exit(-1);
815  }
816 
817  printfQuda("done.\n");
818  }
819 
820  // execute kernel
822  {
823 
824  DslashTime dslash_time;
825  timeval tstart, tstop;
826 
827  cudaEvent_t start, end;
828  cudaEventCreate(&start);
829  cudaEventCreate(&end);
830 
831  comm_barrier();
832  cudaEventRecord(start, 0);
833 
834  if (test_split_grid) {
835 
836  std::vector<void *> _hp_x(inv_param.num_src);
837  std::vector<void *> _hp_b(inv_param.num_src);
838  for (int i = 0; i < inv_param.num_src; i++) {
839  _hp_x[i] = vp_spinorOut[i]->V();
840  _hp_b[i] = vp_spinor[i]->V();
841  }
842 
845  dslashMultiSrcCloverQuda(_hp_x.data(), _hp_b.data(), &inv_param, parity, hostGauge, &gauge_param, hostClover,
846  hostCloverInv);
847  } else {
848  dslashMultiSrcQuda(_hp_x.data(), _hp_b.data(), &inv_param, parity, hostGauge, &gauge_param);
849  }
850 
851  } else {
852 
853  for (int i = 0; i < niter; i++) {
854 
855  gettimeofday(&tstart, NULL);
856 
858  switch (dtest_type) {
860  if (transfer) {
862  } else {
863  static_cast<quda::DiracDomainWall4DPC *>(dirac)->Dslash4(*cudaSpinorOut, *cudaSpinor, parity);
864  }
865  break;
867  if (transfer) {
869  } else {
870  static_cast<quda::DiracDomainWall4DPC *>(dirac)->Dslash5(*cudaSpinorOut, *cudaSpinor, parity);
871  }
872  break;
874  if (transfer) {
876  } else {
877  static_cast<quda::DiracDomainWall4DPC *>(dirac)->Dslash5inv(*cudaSpinorOut, *cudaSpinor, parity, kappa5);
878  }
879  break;
882  if (transfer) {
883  MatQuda(spinorOut->V(), spinor->V(), &inv_param);
884  } else {
886  }
887  break;
890  if (transfer) {
892  } else {
894  }
895  break;
896  default:
897  errorQuda("Test type %s not support for current Dslash", get_string(dtest_type_map, dtest_type).c_str());
898  }
899  } else if (dslash_type == QUDA_MOBIUS_DWF_DSLASH) {
900  switch (dtest_type) {
902  if (transfer) {
904  } else {
905  static_cast<quda::DiracMobiusPC *>(dirac)->Dslash4(*cudaSpinorOut, *cudaSpinor, parity);
906  }
907  break;
909  if (transfer) {
911  } else {
912  static_cast<quda::DiracMobiusPC *>(dirac)->Dslash5(*cudaSpinorOut, *cudaSpinor, parity);
913  }
914  break;
916  if (transfer) {
918  } else {
920  }
921  break;
923  if (transfer) {
925  } else {
926  static_cast<quda::DiracMobiusPC *>(dirac)->Dslash5inv(*cudaSpinorOut, *cudaSpinor, parity);
927  }
928  break;
931  if (transfer) {
932  MatQuda(spinorOut->V(), spinor->V(), &inv_param);
933  } else {
935  }
936  break;
939  if (transfer) {
941  } else {
943  }
944  break;
946  if (transfer) {
947  errorQuda("(transfer == true) version NOT yet available!\n");
948  } else {
950  }
951  break;
952  default:
953  errorQuda("Test type %s not support for current Dslash", get_string(dtest_type_map, dtest_type).c_str());
954  }
956  switch (dtest_type) {
958  if (transfer) {
959  errorQuda("(transfer == true) version NOT yet available!\n");
960  } else {
961  static_cast<quda::DiracMobiusEofaPC *>(dirac)->Dslash4(*cudaSpinorOut, *cudaSpinor, parity);
962  }
963  break;
965  if (transfer) {
966  errorQuda("(transfer == true) version NOT yet available!\n");
967  } else {
968  static_cast<quda::DiracMobiusEofaPC *>(dirac)->m5_eofa(*cudaSpinorOut, *cudaSpinor);
969  }
970  break;
972  if (transfer) {
973  errorQuda("(transfer == true) version NOT yet available!\n");
974  } else {
976  }
977  break;
979  if (transfer) {
980  errorQuda("(transfer == true) version NOT yet available!\n");
981  } else {
982  static_cast<quda::DiracMobiusEofaPC *>(dirac)->m5inv_eofa(*cudaSpinorOut, *cudaSpinor);
983  }
984  break;
987  if (transfer) {
988  errorQuda("(transfer == true) version NOT yet available!\n");
989  // MatQuda(spinorOut->V(), spinor->V(), &inv_param);
990  } else {
992  }
993  break;
996  if (transfer) {
997  errorQuda("(transfer == true) version NOT yet available!\n");
998  // MatDagMatQuda(spinorOut->V(), spinor->V(), &inv_param);
999  } else {
1001  }
1002  break;
1004  if (transfer) {
1005  errorQuda("(transfer == true) version NOT yet available!\n");
1006  } else {
1008  }
1009  break;
1010  default: errorQuda("Undefined test type(=%d)\n", static_cast<int>(dtest_type));
1011  }
1012  } else {
1013  switch (dtest_type) {
1016  if (transfer) {
1018  } else {
1020  }
1021  } else {
1022  if (transfer) {
1024  } else {
1026  }
1027  }
1028  break;
1030  case dslash_test_type::Mat:
1031  if (transfer) {
1032  MatQuda(spinorOut->V(), spinor->V(), &inv_param);
1033  } else {
1035  }
1036  break;
1039  if (transfer) {
1041  } else {
1043  }
1044  break;
1045  default:
1046  errorQuda("Test type %s not support for current Dslash", get_string(dtest_type_map, dtest_type).c_str());
1047  }
1048  }
1049 
1050  gettimeofday(&tstop, NULL);
1051  long ds = tstop.tv_sec - tstart.tv_sec;
1052  long dus = tstop.tv_usec - tstart.tv_usec;
1053  double elapsed = ds + 0.000001 * dus;
1054 
1055  dslash_time.cpu_time += elapsed;
1056  // skip first and last iterations since they may skew these metrics if comms are not synchronous
1057  if (i > 0 && i < niter) {
1058  if (elapsed < dslash_time.cpu_min) dslash_time.cpu_min = elapsed;
1059  if (elapsed > dslash_time.cpu_max) dslash_time.cpu_max = elapsed;
1060  }
1061  }
1062  }
1063 
1064  cudaEventRecord(end, 0);
1065  cudaEventSynchronize(end);
1066  float runTime;
1067  cudaEventElapsedTime(&runTime, start, end);
1068  cudaEventDestroy(start);
1069  cudaEventDestroy(end);
1070 
1071  dslash_time.event_time = runTime / 1000;
1072 
1073  return dslash_time;
1074  }
1075 
1076  void run_test(int niter, bool print_metrics = false)
1077  {
1078  {
1079  printfQuda("Tuning...\n");
1080  dslashCUDA(1); // warm-up run
1081  }
1082  printfQuda("Executing %d kernel loops...\n", niter);
1083  if (!transfer) dirac->Flops();
1084  DslashTime dslash_time = dslashCUDA(niter);
1085  printfQuda("done.\n\n");
1086 
1087  if (!test_split_grid) {
1088  if (!transfer) *spinorOut = *cudaSpinorOut;
1089 
1090  // print timing information
1091  printfQuda("%fus per kernel call\n", 1e6 * dslash_time.event_time / niter);
1092  // FIXME No flops count for twisted-clover yet
1093  unsigned long long flops = 0;
1094  if (!transfer) flops = dirac->Flops();
1095  printfQuda("%llu flops per kernel call, %llu flops per site\n", flops / niter,
1096  (flops / niter) / cudaSpinor->Volume());
1097  printfQuda("GFLOPS = %f\n", 1.0e-9 * flops / dslash_time.event_time);
1098 
1099  size_t ghost_bytes = cudaSpinor->GhostBytes();
1100 
1101  printfQuda("Effective halo bi-directional bandwidth (GB/s) GPU = %f ( CPU = %f, min = %f , max = %f ) for "
1102  "aggregate message size %lu bytes\n",
1103  1.0e-9 * 2 * ghost_bytes * niter / dslash_time.event_time,
1104  1.0e-9 * 2 * ghost_bytes * niter / dslash_time.cpu_time, 1.0e-9 * 2 * ghost_bytes / dslash_time.cpu_max,
1105  1.0e-9 * 2 * ghost_bytes / dslash_time.cpu_min, 2 * ghost_bytes);
1106 
1107  ::testing::Test::RecordProperty("Gflops", std::to_string(1.0e-9 * flops / dslash_time.event_time));
1108  ::testing::Test::RecordProperty("Halo_bidirectitonal_BW_GPU",
1109  1.0e-9 * 2 * ghost_bytes * niter / dslash_time.event_time);
1110  ::testing::Test::RecordProperty("Halo_bidirectitonal_BW_CPU",
1111  1.0e-9 * 2 * ghost_bytes * niter / dslash_time.cpu_time);
1112  ::testing::Test::RecordProperty("Halo_bidirectitonal_BW_CPU_min", 1.0e-9 * 2 * ghost_bytes / dslash_time.cpu_max);
1113  ::testing::Test::RecordProperty("Halo_bidirectitonal_BW_CPU_max", 1.0e-9 * 2 * ghost_bytes / dslash_time.cpu_min);
1114  ::testing::Test::RecordProperty("Halo_message_size_bytes", 2 * ghost_bytes);
1115  }
1116  }
1117 
1118  double verify()
1119  {
1120  double deviation;
1121  if (test_split_grid) {
1122  for (int n = 0; n < num_src; n++) {
1123  double norm2_cpu = blas::norm2(*spinorRef);
1124  double norm2_cpu_cuda = blas::norm2(*vp_spinorOut[n]);
1125  printfQuda("Result: CPU = %f, CPU-QUDA = %f\n", norm2_cpu, norm2_cpu_cuda);
1126  deviation = std::max(deviation, pow(10, -(double)(cpuColorSpinorField::Compare(*spinorRef, *vp_spinorOut[n]))));
1127  }
1128  } else {
1129  double norm2_cpu = blas::norm2(*spinorRef);
1130  double norm2_cpu_cuda = blas::norm2(*spinorOut);
1131  if (!transfer) {
1132  double norm2_cuda = blas::norm2(*cudaSpinorOut);
1133  printfQuda("Results: CPU = %f, CUDA=%f, CPU-CUDA = %f\n", norm2_cpu, norm2_cuda, norm2_cpu_cuda);
1134  } else {
1135  printfQuda("Result: CPU = %f, CPU-QUDA = %f\n", norm2_cpu, norm2_cpu_cuda);
1136  }
1137  deviation = pow(10, -(double)(cpuColorSpinorField::Compare(*spinorRef, *spinorOut)));
1138  }
1139  return deviation;
1140  }
1141 };
void setPrecision(QudaPrecision precision, QudaPrecision ghost_precision=QUDA_INVALID_PRECISION, bool force_native=false)
QudaTwistFlavorType twistFlavor
virtual void MdagMLocal(ColorSpinorField &out, const ColorSpinorField &in) const
Apply the local MdagM operator: equivalent to applying zero Dirichlet boundary condition to MdagM on ...
Definition: dirac_quda.h:232
unsigned long long Flops() const
returns and then zeroes flopcount
Definition: dirac_quda.h:313
static Dirac * create(const DiracParam &param)
Creates a subclass from parameters.
Definition: dirac.cpp:151
ColorSpinorField * tmp1
Definition: dirac_quda.h:52
ColorSpinorField * tmp2
Definition: dirac_quda.h:53
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const
Apply M for the dirac op. E.g. the Schur Complement operator.
virtual void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
Apply MdagM operator which may be optimized.
virtual void Dslash(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
apply 'dslash' operator for the DiracOp. This may be e.g. AD
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
static int Compare(const cpuColorSpinorField &a, const cpuColorSpinorField &b, const int resolution=1)
Perform a component by component comparison of two color-spinor fields. In doing we normalize with re...
static void RecordProperty(const std::string &key, const std::string &value)
void clover_mat(void *out, void **gauge, void *clover, void *in, double kappa, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
void clover_dslash(void *out, void **gauge, void *clover, void *in, int parity, int dagger, QudaPrecision precision, QudaGaugeParam &param)
void tmc_mat(void *out, void **gauge, void *clover, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
void clover_matpc(void *out, void **gauge, void *clover, void *clover_inv, void *in, double kappa, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tmc_dslash(void *out, void **gauge, void *in, void *clover, void *cInv, double kappa, double mu, QudaTwistFlavorType flavor, int parity, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &param)
void cloverHasenbuschTwist_matpc(void *out, void **gauge, void *in, void *clover, void *cInv, double kappa, double mu, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
void cloverHasenbuchTwist_mat(void *out, void **gauge, void *clover, void *in, double kappa, double mu, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param, QudaMatPCType matpc_type)
void tmc_matpc(void *out, void **gauge, void *in, void *clover, void *cInv, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param)
void comm_barrier(void)
QudaReconstructType link_recon
int niter
QudaVerbosity verbosity
std::array< int, 4 > grid_partition
bool compute_clover
QudaDslashType dslash_type
int Lsdim
bool dagger
std::string get_string(CLI::TransformPairs< T > &map, T val)
int V
Definition: host_utils.cpp:37
void setDims(int *)
Definition: host_utils.cpp:315
void end(void)
GaugeCovDev * dirac
Definition: covdev_test.cpp:42
double dslashCUDA(int niter, int mu)
cudaColorSpinorField * cudaSpinor
Definition: covdev_test.cpp:32
QudaParity parity
Definition: covdev_test.cpp:40
cudaColorSpinorField * cudaSpinorOut
Definition: covdev_test.cpp:32
cpuColorSpinorField * spinor
Definition: covdev_test.cpp:31
cpuColorSpinorField * spinorOut
Definition: covdev_test.cpp:31
cpuColorSpinorField * spinorRef
Definition: covdev_test.cpp:31
QudaGaugeParam gauge_param
Definition: covdev_test.cpp:26
QudaInvertParam inv_param
Definition: covdev_test.cpp:27
void dw_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void dw_4d_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void mdw_matpc(void *out, void **gauge, void *in, double _Complex *kappa_b, double _Complex *kappa_c, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *b5, double _Complex *c5)
void dw_4d_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void dslash_4_4d(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void mdw_mat(void *out, void **gauge, void *in, double _Complex *kappa_b, double _Complex *kappa_c, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *b5, double _Complex *c5)
void mdw_dslash_4_pre(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *b5, double _Complex *c5, bool zero_initialize)
void dw_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void dw_dslash_5_4d(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, bool zero_initialize)
void mdw_dslash_5_inv(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *kappa)
void dw_matdagmat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void mdw_eofa_m5(void *res, void *spinorField, int oddBit, int daggerBit, double mferm, double m5, double b, double c, double mq1, double mq2, double mq3, int eofa_pm, double eofa_shift, QudaPrecision precision)
void dslash_5_inv(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double *kappa)
void mdw_mdagm_local(void *out, void **gauge, void *in, double _Complex *kappa_b, double _Complex *kappa_c, QudaMatPCType matpc_type, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *b5, double _Complex *c5)
void mdw_eofa_matpc(void *out, void **gauge, void *in, QudaMatPCType matpc_type, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double m5, double b, double c, double mq1, double mq2, double mq3, int eofa_pm, double eofa_shift)
void mdw_eofa_m5inv(void *res, void *spinorField, int oddBit, int daggerBit, double mferm, double m5, double b, double c, double mq1, double mq2, double mq3, int eofa_pm, double eofa_shift, QudaPrecision precision)
void dw_matpc(void *out, void **gauge, void *in, double kappa, QudaMatPCType matpc_type, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm)
void mdw_dslash_5(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double _Complex *kappa, bool zero_initialize)
void mdw_eofa_mat(void *out, void **gauge, void *in, int dagger, QudaPrecision precision, QudaGaugeParam &gauge_param, double mferm, double m5, double b, double c, double mq1, double mq2, double mq3, int eofa_pm, double eofa_shift)
void dslashQuda_mdwf(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity, dslash_test_type test_type)
void dslashQuda_4dpc(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity, dslash_test_type test_type)
dslash_test_type
CLI::TransformPairs< dslash_test_type > dtest_type_map
@ QUDA_RANDOM_SOURCE
Definition: enum_quda.h:376
@ QUDA_WILSON_DSLASH
Definition: enum_quda.h:90
@ QUDA_TWISTED_CLOVER_DSLASH
Definition: enum_quda.h:100
@ QUDA_STAGGERED_DSLASH
Definition: enum_quda.h:97
@ QUDA_MOBIUS_DWF_DSLASH
Definition: enum_quda.h:95
@ QUDA_CLOVER_WILSON_DSLASH
Definition: enum_quda.h:91
@ QUDA_TWISTED_MASS_DSLASH
Definition: enum_quda.h:99
@ QUDA_DOMAIN_WALL_DSLASH
Definition: enum_quda.h:93
@ QUDA_ASQTAD_DSLASH
Definition: enum_quda.h:98
@ QUDA_MOBIUS_DWF_EOFA_DSLASH
Definition: enum_quda.h:96
@ QUDA_LAPLACE_DSLASH
Definition: enum_quda.h:101
@ QUDA_CLOVER_HASENBUSCH_TWIST_DSLASH
Definition: enum_quda.h:92
@ QUDA_DOMAIN_WALL_4D_DSLASH
Definition: enum_quda.h:94
@ QUDA_DAG_NO
Definition: enum_quda.h:223
@ QUDA_DAG_YES
Definition: enum_quda.h:223
@ QUDA_FULL_SITE_SUBSET
Definition: enum_quda.h:333
@ QUDA_PARITY_SITE_SUBSET
Definition: enum_quda.h:332
enum QudaDagType_s QudaDagType
@ QUDA_UKQCD_GAMMA_BASIS
Definition: enum_quda.h:369
@ QUDA_EVEN_ODD_SITE_ORDER
Definition: enum_quda.h:340
enum QudaReconstructType_s QudaReconstructType
@ QUDA_MATPC_SOLUTION
Definition: enum_quda.h:159
@ QUDA_MATDAG_MAT_SOLUTION
Definition: enum_quda.h:158
@ QUDA_MATPCDAG_MATPC_SOLUTION
Definition: enum_quda.h:161
@ QUDA_MAT_SOLUTION
Definition: enum_quda.h:157
@ QUDA_5D_PC
Definition: enum_quda.h:397
@ QUDA_4D_PC
Definition: enum_quda.h:397
@ QUDA_SPACE_SPIN_COLOR_FIELD_ORDER
Definition: enum_quda.h:351
@ QUDA_ZERO_FIELD_CREATE
Definition: enum_quda.h:361
@ QUDA_TWIST_SINGLET
Definition: enum_quda.h:400
@ QUDA_DIRECT_SOLVE
Definition: enum_quda.h:167
@ QUDA_DIRECT_PC_SOLVE
Definition: enum_quda.h:169
enum QudaParity_s QudaParity
#define gauge_site_size
Definition: face_gauge.cpp:34
void constructHostCloverField(void *clover, void *clover_inv, QudaInvertParam &inv_param)
Definition: host_utils.cpp:186
QudaPrecision & cuda_prec
Definition: host_utils.cpp:58
void dw_setDims(int *X, const int L5)
Definition: host_utils.cpp:353
QudaPrecision & cpu_prec
Definition: host_utils.cpp:57
void constructHostGaugeField(void **gauge, QudaGaugeParam &gauge_param, int argc, char **argv)
Definition: host_utils.cpp:166
int Ls
Definition: host_utils.cpp:48
double kappa5
Definition: host_utils.cpp:51
#define clover_site_size
Definition: host_utils.h:11
void setWilsonGaugeParam(QudaGaugeParam &gauge_param)
Definition: set_params.cpp:37
QudaPrecision getPrecision(int i)
Definition: host_utils.h:222
void setInvertParam(QudaInvertParam &invertParam, QudaInvertArgs_t &inv_args, int external_precision, int quda_precision, double kappa, double reliable_delta)
void init()
Create the BLAS context.
unsigned long long flops
double norm2(const ColorSpinorField &a)
void start()
Start profiling.
Definition: device.cpp:226
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
Definition: complex_quda.h:111
void setDiracParam(DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
ColorSpinorParam csParam
Definition: pack_test.cpp:25
Main header file for the QUDA library.
QudaGaugeParam newQudaGaugeParam(void)
void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity)
void dslashMultiSrcCloverQuda(void **_hp_x, void **_hp_b, QudaInvertParam *param, QudaParity parity, void *h_gauge, QudaGaugeParam *gauge_param, void *h_clover, void *h_clovinv)
Really the same with @dslashMultiSrcQuda but for clover-style fermions, by accepting pointers to dire...
void dslashMultiSrcQuda(void **_hp_x, void **_hp_b, QudaInvertParam *param, QudaParity parity, void *h_gauge, QudaGaugeParam *gauge_param)
Perform the solve like @dslashQuda but for multiple rhs by spliting the comm grid into sub-partitions...
void MatDagMatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param)
void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param)
QudaInvertParam newQudaInvertParam(void)
void MatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param)
dslash_test_type dtest_type
void init_test(int argc, char **argv)
void init(int argc, char **argv)
void init_ctest(int argc, char **argv, int precision, QudaReconstructType link_recon)
std::vector< quda::cpuColorSpinorField * > vp_spinorOut
void run_test(int niter, bool print_metrics=false)
DslashTime dslashCUDA(int niter)
QudaDagType not_dagger
std::vector< quda::cpuColorSpinorField * > vp_spinor
QudaGaugeParam gauge_param
QudaInvertParam inv_param
std::vector< quda::cpuColorSpinorField * > vp_spinorRef
QudaReconstructType reconstruct
Definition: quda.h:49
QudaPrecision cuda_prec_precondition
Definition: quda.h:57
QudaPrecision cuda_prec_refinement_sloppy
Definition: quda.h:54
QudaPrecision cuda_prec_sloppy
Definition: quda.h:51
QudaReconstructType reconstruct_sloppy
Definition: quda.h:52
QudaPrecision cuda_prec
Definition: quda.h:48
int X[4]
Definition: quda.h:35
QudaPrecision cpu_prec
Definition: quda.h:46
int compute_clover
Definition: quda.h:266
QudaSolveType solve_type
Definition: quda.h:229
QudaSolutionType solution_type
Definition: quda.h:228
double mq2
Definition: quda.h:128
double mass
Definition: quda.h:109
double m5
Definition: quda.h:112
int return_clover
Definition: quda.h:268
int split_grid[QUDA_MAX_DIM]
Definition: quda.h:195
int compute_clover_inverse
Definition: quda.h:267
QudaTwistFlavorType twist_flavor
Definition: quda.h:134
QudaPrecision clover_cpu_prec
Definition: quda.h:249
int return_clover_inverse
Definition: quda.h:269
double mq1
Definition: quda.h:127
QudaPrecision cuda_prec
Definition: quda.h:238
double mu
Definition: quda.h:131
QudaVerbosity verbosity
Definition: quda.h:271
double_complex b_5[QUDA_MAX_DWF_LS]
Definition: quda.h:115
double eofa_shift
Definition: quda.h:125
int num_src_per_sub_partition
Definition: quda.h:190
QudaDagType dagger
Definition: quda.h:231
double epsilon
Definition: quda.h:132
QudaPrecision cpu_prec
Definition: quda.h:237
QudaMatPCType matpc_type
Definition: quda.h:230
double_complex c_5[QUDA_MAX_DWF_LS]
Definition: quda.h:116
double mq3
Definition: quda.h:129
QudaGammaBasis gamma_basis
Definition: quda.h:246
double kappa
Definition: quda.h:110
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:68
QudaSiteSubset siteSubset
Definition: lattice_field.h:72
#define printfQuda(...)
Definition: util_quda.h:114
void setVerbosity(QudaVerbosity verbosity)
Definition: util_quda.cpp:25
#define errorQuda(...)
Definition: util_quda.h:120
void wil_mat(void *out, void **gauge, void *in, double kappa, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_dslash(void *res, void **gaugeFull, void *spinorField, double kappa, double mu, QudaTwistFlavorType flavor, int oddBit, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_mat(void *out, void **gauge, void *in, double kappa, double mu, QudaTwistFlavorType flavor, int dagger_bit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_ndeg_mat(void *evenOut, void *oddOut, void **gauge, void *evenIn, void *oddIn, double kappa, double mu, double epsilon, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void wil_matpc(void *outEven, void **gauge, void *inEven, double kappa, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void wil_dslash(void *out, void **gauge, void *in, int oddBit, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_ndeg_matpc(void *outEven1, void *outEven2, void **gauge, void *inEven1, void *inEven2, double kappa, double mu, double epsilon, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_matpc(void *outEven, void **gauge, void *inEven, double kappa, double mu, QudaTwistFlavorType flavor, QudaMatPCType matpc_type, int daggerBit, QudaPrecision precision, QudaGaugeParam &gauge_param)
void tm_ndeg_dslash(void *res1, void *res2, void **gauge, void *spinorField1, void *spinorField2, double kappa, double mu, double epsilon, int oddBit, int daggerBit, QudaMatPCType matpc_type, QudaPrecision precision, QudaGaugeParam &gauge_param)