QUDA  v0.7.0
A library for QCD on GPUs
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
blas_test.cu
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 
4 #include <quda_internal.h>
5 #include <color_spinor_field.h>
6 #include <blas_quda.h>
7 
8 #include <test_util.h>
9 #include <face_quda.h>
10 
11 // include because of nasty globals used in the tests
12 #include <dslash_util.h>
13 
14 // google test
15 #include <gtest.h>
16 
17 // Wilson, clover-improved Wilson, and twisted mass are supported.
19 extern bool tune;
20 extern int device;
21 extern int xdim;
22 extern int ydim;
23 extern int zdim;
24 extern int tdim;
25 extern int gridsize_from_cmdline[];
26 extern int niter;
27 
28 extern bool tune;
29 extern bool verify_results;
30 
31 extern void usage(char** );
32 
33 #if (__COMPUTE_CAPABILITY__ >= 200)
34 const int Nkernels = 32;
35 #else // exclude Heavy Quark Norm if on Tesla architecture
36 const int Nkernels = 31;
37 #endif
38 
39 using namespace quda;
40 
43 int Nspin;
44 
46 {
47  param.precision = precision;
48  if (Nspin == 1 || precision == QUDA_DOUBLE_PRECISION) {
50  } else {
52  }
53 }
54 
55 void
57 {
58  printfQuda("running the following test:\n");
59 
60  printfQuda("S_dimension T_dimension Nspin\n");
61  printfQuda("%d/%d/%d %d %d\n", xdim, ydim, zdim, tdim, Nspin);
62 
63  printfQuda("Grid partition info: X Y Z T\n");
64  printfQuda(" %d %d %d %d\n",
65  dimPartitioned(0),
66  dimPartitioned(1),
67  dimPartitioned(2),
68  dimPartitioned(3));
69 
70  return;
71 }
72 
73 void initFields(int prec)
74 {
75  // precisions used for the source field in the copyCuda() benchmark
76  QudaPrecision high_aux_prec;
77  QudaPrecision low_aux_prec;
78 
80  param.nColor = 3;
81  // set spin according to the type of dslash
84  param.nSpin = Nspin;
85  param.nDim = 4; // number of spacetime dimensions
86 
87  param.pad = 0; // padding must be zero for cpu fields
89  if (param.siteSubset == QUDA_PARITY_SITE_SUBSET) param.x[0] = xdim/2;
90  else param.x[0] = xdim;
91  param.x[1] = ydim;
92  param.x[2] = zdim;
93  param.x[3] = tdim;
94 
99 
101 
102  vH = new cpuColorSpinorField(param);
103  wH = new cpuColorSpinorField(param);
104  xH = new cpuColorSpinorField(param);
105  yH = new cpuColorSpinorField(param);
106  zH = new cpuColorSpinorField(param);
107  hH = new cpuColorSpinorField(param);
108  lH = new cpuColorSpinorField(param);
109 
110  vH->Source(QUDA_RANDOM_SOURCE, 0, 0, 0);
111  wH->Source(QUDA_RANDOM_SOURCE, 0, 0, 0);
112  xH->Source(QUDA_RANDOM_SOURCE, 0, 0, 0);
113  yH->Source(QUDA_RANDOM_SOURCE, 0, 0, 0);
114  zH->Source(QUDA_RANDOM_SOURCE, 0, 0, 0);
115  hH->Source(QUDA_RANDOM_SOURCE, 0, 0, 0);
116  lH->Source(QUDA_RANDOM_SOURCE, 0, 0, 0);
117 
118  // Now set the parameters for the cuda fields
119  //param.pad = xdim*ydim*zdim/2;
120 
121  if (param.nSpin == 4) param.gammaBasis = QUDA_UKQCD_GAMMA_BASIS;
123 
124  switch(prec) {
125  case 0:
127  high_aux_prec = QUDA_DOUBLE_PRECISION;
128  low_aux_prec = QUDA_SINGLE_PRECISION;
129  break;
130  case 1:
132  high_aux_prec = QUDA_DOUBLE_PRECISION;
133  low_aux_prec = QUDA_HALF_PRECISION;
134  break;
135  case 2:
137  high_aux_prec = QUDA_SINGLE_PRECISION;
138  low_aux_prec = QUDA_HALF_PRECISION;
139  break;
140  }
141 
142  checkCudaError();
143 
144  vD = new cudaColorSpinorField(param);
145  wD = new cudaColorSpinorField(param);
146  xD = new cudaColorSpinorField(param);
147  yD = new cudaColorSpinorField(param);
148  zD = new cudaColorSpinorField(param);
149 
150  setPrec(param, high_aux_prec);
151  hD = new cudaColorSpinorField(param);
152 
153  setPrec(param, low_aux_prec);
154  lD = new cudaColorSpinorField(param);
155 
156  // check for successful allocation
157  checkCudaError();
158 
159  *vD = *vH;
160  *wD = *wH;
161  *xD = *xH;
162  *yD = *yH;
163  *zD = *zH;
164  *hD = *hH;
165  *lD = *lH;
166 }
167 
168 
170 {
171 
172  // release memory
173  delete vD;
174  delete wD;
175  delete xD;
176  delete yD;
177  delete zD;
178  delete hD;
179  delete lD;
180 
181  // release memory
182  delete vH;
183  delete wH;
184  delete xH;
185  delete yH;
186  delete zH;
187  delete hH;
188  delete lH;
189 }
190 
191 
192 double benchmark(int kernel, const int niter) {
193 
194  double a, b, c;
195  quda::Complex a2, b2, c2;
196 
197  cudaEvent_t start, end;
198  cudaEventCreate(&start);
199  cudaEventCreate(&end);
200  cudaEventRecord(start, 0);
201 
202  {
203  switch (kernel) {
204 
205  case 0:
206  for (int i=0; i < niter; ++i) copyCuda(*yD, *hD);
207  break;
208 
209  case 1:
210  for (int i=0; i < niter; ++i) copyCuda(*yD, *lD);
211  break;
212 
213  case 2:
214  for (int i=0; i < niter; ++i) axpbyCuda(a, *xD, b, *yD);
215  break;
216 
217  case 3:
218  for (int i=0; i < niter; ++i) xpyCuda(*xD, *yD);
219  break;
220 
221  case 4:
222  for (int i=0; i < niter; ++i) axpyCuda(a, *xD, *yD);
223  break;
224 
225  case 5:
226  for (int i=0; i < niter; ++i) xpayCuda(*xD, a, *yD);
227  break;
228 
229  case 6:
230  for (int i=0; i < niter; ++i) mxpyCuda(*xD, *yD);
231  break;
232 
233  case 7:
234  for (int i=0; i < niter; ++i) axCuda(a, *xD);
235  break;
236 
237  case 8:
238  for (int i=0; i < niter; ++i) caxpyCuda(a2, *xD, *yD);
239  break;
240 
241  case 9:
242  for (int i=0; i < niter; ++i) caxpbyCuda(a2, *xD, b2, *yD);
243  break;
244 
245  case 10:
246  for (int i=0; i < niter; ++i) cxpaypbzCuda(*xD, a2, *yD, b2, *zD);
247  break;
248 
249  case 11:
250  for (int i=0; i < niter; ++i) axpyBzpcxCuda(a, *xD, *yD, b, *zD, c);
251  break;
252 
253  case 12:
254  for (int i=0; i < niter; ++i) axpyZpbxCuda(a, *xD, *yD, *zD, b);
255  break;
256 
257  case 13:
258  for (int i=0; i < niter; ++i) caxpbypzYmbwCuda(a2, *xD, b2, *yD, *zD, *wD);
259  break;
260 
261  case 14:
262  for (int i=0; i < niter; ++i) cabxpyAxCuda(a, b2, *xD, *yD);
263  break;
264 
265  case 15:
266  for (int i=0; i < niter; ++i) caxpbypzCuda(a2, *xD, b2, *yD, *zD);
267  break;
268 
269  case 16:
270  for (int i=0; i < niter; ++i) caxpbypczpwCuda(a2, *xD, b2, *yD, c2, *zD, *wD);
271  break;
272 
273  case 17:
274  for (int i=0; i < niter; ++i) caxpyXmazCuda(a2, *xD, *yD, *zD);
275  break;
276 
277  // double
278  case 18:
279  for (int i=0; i < niter; ++i) normCuda(*xD);
280  break;
281 
282  case 19:
283  for (int i=0; i < niter; ++i) reDotProductCuda(*xD, *yD);
284  break;
285 
286  case 20:
287  for (int i=0; i < niter; ++i) axpyNormCuda(a, *xD, *yD);
288  break;
289 
290  case 21:
291  for (int i=0; i < niter; ++i) xmyNormCuda(*xD, *yD);
292  break;
293 
294  case 22:
295  for (int i=0; i < niter; ++i) caxpyNormCuda(a2, *xD, *yD);
296  break;
297 
298  case 23:
299  for (int i=0; i < niter; ++i) caxpyXmazNormXCuda(a2, *xD, *yD, *zD);
300  break;
301 
302  case 24:
303  for (int i=0; i < niter; ++i) cabxpyAxNormCuda(a, b2, *xD, *yD);
304  break;
305 
306  // double2
307  case 25:
308  for (int i=0; i < niter; ++i) cDotProductCuda(*xD, *yD);
309  break;
310 
311  case 26:
312  for (int i=0; i < niter; ++i) xpaycDotzyCuda(*xD, a, *yD, *zD);
313  break;
314 
315  case 27:
316  for (int i=0; i < niter; ++i) caxpyDotzyCuda(a2, *xD, *yD, *zD);
317  break;
318 
319  // double3
320  case 28:
321  for (int i=0; i < niter; ++i) cDotProductNormACuda(*xD, *yD);
322  break;
323 
324  case 29:
325  for (int i=0; i < niter; ++i) cDotProductNormBCuda(*xD, *yD);
326  break;
327 
328  case 30:
329  for (int i=0; i < niter; ++i) caxpbypzYmbwcDotProductUYNormYCuda(a2, *xD, b2, *yD, *zD, *wD, *vD);
330  break;
331 
332  case 31:
333  for (int i=0; i < niter; ++i) HeavyQuarkResidualNormCuda(*xD, *yD);
334  break;
335 
336  default:
337  errorQuda("Undefined blas kernel %d\n", kernel);
338  }
339  }
340 
341  cudaEventRecord(end, 0);
342  cudaEventSynchronize(end);
343  float runTime;
344  cudaEventElapsedTime(&runTime, start, end);
345  cudaEventDestroy(start);
346  cudaEventDestroy(end);
347 
348  double secs = runTime / 1000;
349  return secs;
350 }
351 
352 #define ERROR(a) fabs(norm2(*a##D) - norm2(*a##H)) / norm2(*a##H)
353 
354 double test(int kernel) {
355 
356  double a = M_PI, b = M_PI*exp(1.0), c = sqrt(M_PI);
357  quda::Complex a2(a, b), b2(b, -c), c2(a+b, c*a);
358  double error = 0;
359 
360  switch (kernel) {
361 
362  case 0:
363  *hD = *hH;
364  copyCuda(*yD, *hD);
365  yH->copy(*hH);
366  error = ERROR(y);
367  break;
368 
369  case 1:
370  *lD = *lH;
371  copyCuda(*yD, *lD);
372  yH->copy(*lH);
373  error = ERROR(y);
374  break;
375 
376  case 2:
377  *xD = *xH;
378  *yD = *yH;
379  axpbyCuda(a, *xD, b, *yD);
380  axpbyCpu(a, *xH, b, *yH);
381  error = ERROR(y);
382  break;
383 
384  case 3:
385  *xD = *xH;
386  *yD = *yH;
387  xpyCuda(*xD, *yD);
388  xpyCpu(*xH, *yH);
389  error = ERROR(y);
390  break;
391 
392  case 4:
393  *xD = *xH;
394  *yD = *yH;
395  axpyCuda(a, *xD, *yD);
396  axpyCpu(a, *xH, *yH);
397  error = ERROR(y);
398  break;
399 
400  case 5:
401  *xD = *xH;
402  *yD = *yH;
403  xpayCuda(*xD, a, *yD);
404  xpayCpu(*xH, a, *yH);
405  error = ERROR(y);
406  break;
407 
408  case 6:
409  *xD = *xH;
410  *yD = *yH;
411  mxpyCuda(*xD, *yD);
412  mxpyCpu(*xH, *yH);
413  error = ERROR(y);
414  break;
415 
416  case 7:
417  *xD = *xH;
418  axCuda(a, *xD);
419  axCpu(a, *xH);
420  error = ERROR(x);
421  break;
422 
423  case 8:
424  *xD = *xH;
425  *yD = *yH;
426  caxpyCuda(a2, *xD, *yD);
427  caxpyCpu(a2, *xH, *yH);
428  error = ERROR(y);
429  break;
430 
431  case 9:
432  *xD = *xH;
433  *yD = *yH;
434  caxpbyCuda(a2, *xD, b2, *yD);
435  caxpbyCpu(a2, *xH, b2, *yH);
436  error = ERROR(y);
437  break;
438 
439  case 10:
440  *xD = *xH;
441  *yD = *yH;
442  *zD = *zH;
443  cxpaypbzCuda(*xD, a2, *yD, b2, *zD);
444  cxpaypbzCpu(*xH, a2, *yH, b2, *zH);
445  error = ERROR(z);
446  break;
447 
448  case 11:
449  *xD = *xH;
450  *yD = *yH;
451  *zD = *zH;
452  axpyBzpcxCuda(a, *xD, *yD, b, *zD, c);
453  axpyBzpcxCpu(a, *xH, *yH, b, *zH, c);
454  error = ERROR(x) + ERROR(y);
455  break;
456 
457  case 12:
458  *xD = *xH;
459  *yD = *yH;
460  *zD = *zH;
461  axpyZpbxCuda(a, *xD, *yD, *zD, b);
462  axpyZpbxCpu(a, *xH, *yH, *zH, b);
463  error = ERROR(x) + ERROR(y);
464  break;
465 
466  case 13:
467  *xD = *xH;
468  *yD = *yH;
469  *zD = *zH;
470  *wD = *wH;
471  caxpbypzYmbwCuda(a2, *xD, b2, *yD, *zD, *wD);
472  caxpbypzYmbwCpu(a2, *xH, b2, *yH, *zH, *wH);
473  error = ERROR(z) + ERROR(y);
474  break;
475 
476  case 14:
477  *xD = *xH;
478  *yD = *yH;
479  cabxpyAxCuda(a, b2, *xD, *yD);
480  cabxpyAxCpu(a, b2, *xH, *yH);
481  error = ERROR(y) + ERROR(x);
482  break;
483 
484  case 15:
485  *xD = *xH;
486  *yD = *yH;
487  *zD = *zH;
488  {caxpbypzCuda(a2, *xD, b2, *yD, *zD);
489  caxpbypzCpu(a2, *xH, b2, *yH, *zH);
490  error = ERROR(z); }
491  break;
492 
493  case 16:
494  *xD = *xH;
495  *yD = *yH;
496  *zD = *zH;
497  *wD = *wH;
498  {caxpbypczpwCuda(a2, *xD, b2, *yD, c2, *zD, *wD);
499  caxpbypczpwCpu(a2, *xH, b2, *yH, c2, *zH, *wH);
500  error = ERROR(w); }
501  break;
502 
503  case 17:
504  *xD = *xH;
505  *yD = *yH;
506  *zD = *zH;
507  {caxpyXmazCuda(a, *xD, *yD, *zD);
508  caxpyXmazCpu(a, *xH, *yH, *zH);
509  error = ERROR(y) + ERROR(x);}
510  break;
511 
512  // double
513  case 18:
514  *xD = *xH;
515  error = fabs(normCuda(*xD) - normCpu(*xH)) / normCpu(*xH);
516  break;
517 
518  case 19:
519  *xD = *xH;
520  *yD = *yH;
521  error = fabs(reDotProductCuda(*xD, *yD) - reDotProductCpu(*xH, *yH)) / fabs(reDotProductCpu(*xH, *yH));
522  break;
523 
524  case 20:
525  *xD = *xH;
526  *yD = *yH;
527  {double d = axpyNormCuda(a, *xD, *yD);
528  double h = axpyNormCpu(a, *xH, *yH);
529  error = ERROR(y) + fabs(d-h)/fabs(h);}
530  break;
531 
532  case 21:
533  *xD = *xH;
534  *yD = *yH;
535  {double d = xmyNormCuda(*xD, *yD);
536  double h = xmyNormCpu(*xH, *yH);
537  error = ERROR(y) + fabs(d-h)/fabs(h);}
538  break;
539 
540  case 22:
541  *xD = *xH;
542  *yD = *yH;
543  {double d = caxpyNormCuda(a, *xD, *yD);
544  double h = caxpyNormCpu(a, *xH, *yH);
545  error = ERROR(y) + fabs(d-h)/fabs(h);}
546  break;
547 
548  case 23:
549  *xD = *xH;
550  *yD = *yH;
551  *zD = *zH;
552  {double d = caxpyXmazNormXCuda(a, *xD, *yD, *zD);
553  double h = caxpyXmazNormXCpu(a, *xH, *yH, *zH);
554  error = ERROR(y) + ERROR(x) + fabs(d-h)/fabs(h);}
555  break;
556 
557  case 24:
558  *xD = *xH;
559  *yD = *yH;
560  {double d = cabxpyAxNormCuda(a, b2, *xD, *yD);
561  double h = cabxpyAxNormCpu(a, b2, *xH, *yH);
562  error = ERROR(x) + ERROR(y) + fabs(d-h)/fabs(h);}
563  break;
564 
565  // double2
566  case 25:
567  *xD = *xH;
568  *yD = *yH;
569  error = abs(cDotProductCuda(*xD, *yD) - cDotProductCpu(*xH, *yH)) / abs(cDotProductCpu(*xH, *yH));
570  break;
571 
572  case 26:
573  *xD = *xH;
574  *yD = *yH;
575  *zD = *zH;
576  { quda::Complex d = xpaycDotzyCuda(*xD, a, *yD, *zD);
577  quda::Complex h = xpaycDotzyCpu(*xH, a, *yH, *zH);
578  error = fabs(norm2(*yD) - norm2(*yH)) / norm2(*yH) + abs(d-h)/abs(h);
579  }
580  break;
581 
582  case 27:
583  *xD = *xH;
584  *yD = *yH;
585  *zD = *zH;
586  {quda::Complex d = caxpyDotzyCuda(a, *xD, *yD, *zD);
587  quda::Complex h = caxpyDotzyCpu(a, *xH, *yH, *zH);
588  error = ERROR(y) + abs(d-h)/abs(h);}
589  break;
590 
591  // double3
592  case 28:
593  *xD = *xH;
594  *yD = *yH;
595  { double3 d = cDotProductNormACuda(*xD, *yD);
596  double3 h = cDotProductNormACpu(*xH, *yH);
597  error = fabs(d.x - h.x) / fabs(h.x) + fabs(d.y - h.y) / fabs(h.y) + fabs(d.z - h.z) / fabs(h.z); }
598  break;
599 
600  case 29:
601  *xD = *xH;
602  *yD = *yH;
603  { double3 d = cDotProductNormBCuda(*xD, *yD);
604  double3 h = cDotProductNormBCpu(*xH, *yH);
605  error = fabs(d.x - h.x) / fabs(h.x) + fabs(d.y - h.y) / fabs(h.y) + fabs(d.z - h.z) / fabs(h.z); }
606  break;
607 
608  case 30:
609  *xD = *xH;
610  *yD = *yH;
611  *zD = *zH;
612  *wD = *wH;
613  *vD = *vH;
614  { double3 d = caxpbypzYmbwcDotProductUYNormYCuda(a2, *xD, b2, *yD, *zD, *wD, *vD);
615  double3 h = caxpbypzYmbwcDotProductUYNormYCpu(a2, *xH, b2, *yH, *zH, *wH, *vH);
616  error = ERROR(z) + ERROR(y) + fabs(d.x - h.x) / fabs(h.x) +
617  fabs(d.y - h.y) / fabs(h.y) + fabs(d.z - h.z) / fabs(h.z); }
618  break;
619 
620  case 31:
621  *xD = *xH;
622  *yD = *yH;
623  { double3 d = HeavyQuarkResidualNormCuda(*xD, *yD);
624  double3 h = HeavyQuarkResidualNormCpu(*xH, *yH);
625  error = fabs(d.x - h.x) / fabs(h.x) +
626  fabs(d.y - h.y) / fabs(h.y) + fabs(d.z - h.z) / fabs(h.z); }
627  break;
628 
629  default:
630  errorQuda("Undefined blas kernel %d\n", kernel);
631  }
632 
633  return error;
634 }
635 
636 // Only benchmark double precision if supported
637 #if (__COMPUTE_CAPABILITY__ >= 130)
638 int Nprec = 3;
639 #else
640 int Nprec = 2;
641 #endif
642 
643 const char *prec_str[] = {"half", "single", "double"};
644 
645 const char *names[] = {
646  "copyHS",
647  "copyLS",
648  "axpby",
649  "xpy",
650  "axpy",
651  "xpay",
652  "mxpy",
653  "ax",
654  "caxpy",
655  "caxpby",
656  "cxpaypbz",
657  "axpyBzpcx",
658  "axpyZpbx",
659  "caxpbypzYmbw",
660  "cabxpyAx",
661  "caxpbypz",
662  "caxpbypczpw",
663  "caxpyXmaz",
664  "norm",
665  "reDotProduct",
666  "axpyNorm",
667  "xmyNorm",
668  "caxpyNorm",
669  "caxpyXmazNormX",
670  "cabxpyAxNorm",
671  "cDotProduct",
672  "xpaycDotzy",
673  "caxpyDotzy",
674  "cDotProductNormA",
675  "cDotProductNormB",
676  "caxpbypzYmbwcDotProductWYNormY",
677  "HeavyQuarkResidualNorm"
678 };
679 
680 int main(int argc, char** argv)
681 {
682  for (int i = 1; i < argc; i++){
683  if(process_command_line_option(argc, argv, &i) == 0){
684  continue;
685  }
686  printfQuda("ERROR: Invalid option:%s\n", argv[i]);
687  usage(argv);
688  }
689 
690  setSpinorSiteSize(24);
691  initComms(argc, argv, gridsize_from_cmdline);
693  initQuda(device);
694 
695  // enable the tuning
698 
699  for (int prec = 0; prec < Nprec; prec++) {
700 
701  printfQuda("\nBenchmarking %s precision with %d iterations...\n\n", prec_str[prec], niter);
702  initFields(prec);
703 
704  for (int kernel = 0; kernel < Nkernels; kernel++) {
705  // only benchmark "high precision" copyCuda() if double is supported
706  if ((Nprec < 3) && (kernel == 0)) continue;
707 
708  // do the initial tune
709  benchmark(kernel, 1);
710 
711  // now rerun with more iterations to get accurate speed measurements
712  quda::blas_flops = 0;
713  quda::blas_bytes = 0;
714 
715  double secs = benchmark(kernel, niter);
716 
717  double gflops = (quda::blas_flops*1e-9)/(secs);
718  double gbytes = quda::blas_bytes/(secs*1e9);
719 
720  printfQuda("%-31s: Gflop/s = %6.1f, GB/s = %6.1f\n", names[kernel], gflops, gbytes);
721  }
722  freeFields();
723  }
724 
725  // clear the error state
726  cudaGetLastError();
727 
728  // lastly check for correctness
729  if (verify_results) {
730  ::testing::InitGoogleTest(&argc, argv);
731  if (RUN_ALL_TESTS() != 0) warningQuda("Tests failed");
732  }
733 
734  endQuda();
735 
736  finalizeComms();
737 }
738 
739 // The following tests each kernel at each precision using the google testing framework
740 
741 class BlasTest : public ::testing::TestWithParam<int2> {
742 protected:
743  int2 param;
744 
745 public:
746  virtual ~BlasTest() { }
747  virtual void SetUp() {
748  param = GetParam();
749  initFields(param.x);
750  }
751  virtual void TearDown() { freeFields(); }
752 
753  virtual void NormalExit() { printf("monkey\n"); }
754 
755 };
756 
757 TEST_P(BlasTest, verify) {
758  int prec = param.x;
759  int kernel = param.y;
760  double deviation = test(kernel);
761  printfQuda("%-35s error = %e\n", names[kernel], deviation);
762  double tol = (prec == 2 ? 1e-12 : (prec == 1 ? 1e-5 : 1e-3));
763  tol = (kernel < 2) ? 1e-4 : tol; // use different tolerance for copy
764  EXPECT_LE(deviation, tol) << "CPU and CUDA implementations do not agree";
765 }
766 
767 // half precision
768 INSTANTIATE_TEST_CASE_P(copyHS_half, BlasTest, ::testing::Values( make_int2(0,0) ));
769 INSTANTIATE_TEST_CASE_P(copyLS_half, BlasTest, ::testing::Values( make_int2(0,1) ));
770 INSTANTIATE_TEST_CASE_P(axpby_half, BlasTest, ::testing::Values( make_int2(0,2) ));
771 INSTANTIATE_TEST_CASE_P(xpy_half, BlasTest, ::testing::Values( make_int2(0,3) ));
772 INSTANTIATE_TEST_CASE_P(axpy_half, BlasTest, ::testing::Values( make_int2(0,4) ));
773 INSTANTIATE_TEST_CASE_P(xpay_half, BlasTest, ::testing::Values( make_int2(0,5) ));
774 INSTANTIATE_TEST_CASE_P(mxpy_half, BlasTest, ::testing::Values( make_int2(0,6) ));
775 INSTANTIATE_TEST_CASE_P(ax_half, BlasTest, ::testing::Values( make_int2(0,7) ));
776 INSTANTIATE_TEST_CASE_P(caxpy_half, BlasTest, ::testing::Values( make_int2(0,8) ));
777 INSTANTIATE_TEST_CASE_P(caxpby_half, BlasTest, ::testing::Values( make_int2(0,9) ));
778 INSTANTIATE_TEST_CASE_P(cxpaypbz_half, BlasTest, ::testing::Values( make_int2(0,10) ));
779 INSTANTIATE_TEST_CASE_P(axpyBzpcx_half, BlasTest, ::testing::Values( make_int2(0,11) ));
780 INSTANTIATE_TEST_CASE_P(axpyZpbx_half, BlasTest, ::testing::Values( make_int2(0,12) ));
781 INSTANTIATE_TEST_CASE_P(caxpbypzYmbw_half, BlasTest, ::testing::Values( make_int2(0,13) ));
782 INSTANTIATE_TEST_CASE_P(cabxpyAx_half, BlasTest, ::testing::Values( make_int2(0,14) ));
783 INSTANTIATE_TEST_CASE_P(caxpbypz_half, BlasTest, ::testing::Values( make_int2(0,15) ));
784 INSTANTIATE_TEST_CASE_P(caxpbypczpw_half, BlasTest, ::testing::Values( make_int2(0,16) ));
785 INSTANTIATE_TEST_CASE_P(caxpyXmaz_half, BlasTest, ::testing::Values( make_int2(0,17) ));
786 INSTANTIATE_TEST_CASE_P(norm2_half, BlasTest, ::testing::Values( make_int2(0,18) ));
787 INSTANTIATE_TEST_CASE_P(reDotProduct_half, BlasTest, ::testing::Values( make_int2(0,19) ));
788 INSTANTIATE_TEST_CASE_P(axpyNorm_half, BlasTest, ::testing::Values( make_int2(0,20) ));
789 INSTANTIATE_TEST_CASE_P(xmyNorm_half, BlasTest, ::testing::Values( make_int2(0,21) ));
790 INSTANTIATE_TEST_CASE_P(caxpyNorm_half, BlasTest, ::testing::Values( make_int2(0,22) ));
791 INSTANTIATE_TEST_CASE_P(caxpyXmazNormX_half, BlasTest, ::testing::Values( make_int2(0,23) ));
792 INSTANTIATE_TEST_CASE_P(cabxpyAxNorm_half, BlasTest, ::testing::Values( make_int2(0,24) ));
793 INSTANTIATE_TEST_CASE_P(cDotProduct_half, BlasTest, ::testing::Values( make_int2(0,25) ));
794 INSTANTIATE_TEST_CASE_P(xpaycDotzy_half, BlasTest, ::testing::Values( make_int2(0,26) ));
795 INSTANTIATE_TEST_CASE_P(caxpyDotzy_half, BlasTest, ::testing::Values( make_int2(0,27) ));
796 INSTANTIATE_TEST_CASE_P(cDotProductNormA_half, BlasTest, ::testing::Values( make_int2(0,28) ));
797 INSTANTIATE_TEST_CASE_P(cDotProductNormB_half, BlasTest, ::testing::Values( make_int2(0,29) ));
798 INSTANTIATE_TEST_CASE_P(caxpbypzYmbwcDotProductWYNormY_half, BlasTest, ::testing::Values( make_int2(0,30) ));
799 INSTANTIATE_TEST_CASE_P(HeavyQuarkResidualNorm_half, BlasTest, ::testing::Values( make_int2(0,31) ));
800 
801 // single precision
802 INSTANTIATE_TEST_CASE_P(copyHS_single, BlasTest, ::testing::Values( make_int2(1,0) ));
803 INSTANTIATE_TEST_CASE_P(copyLS_single, BlasTest, ::testing::Values( make_int2(1,1) ));
804 INSTANTIATE_TEST_CASE_P(axpby_single, BlasTest, ::testing::Values( make_int2(1,2) ));
805 INSTANTIATE_TEST_CASE_P(xpy_single, BlasTest, ::testing::Values( make_int2(1,3) ));
806 INSTANTIATE_TEST_CASE_P(axpy_single, BlasTest, ::testing::Values( make_int2(1,4) ));
807 INSTANTIATE_TEST_CASE_P(xpay_single, BlasTest, ::testing::Values( make_int2(1,5) ));
808 INSTANTIATE_TEST_CASE_P(mxpy_single, BlasTest, ::testing::Values( make_int2(1,6) ));
809 INSTANTIATE_TEST_CASE_P(ax_single, BlasTest, ::testing::Values( make_int2(1,7) ));
810 INSTANTIATE_TEST_CASE_P(caxpy_single, BlasTest, ::testing::Values( make_int2(1,8) ));
811 INSTANTIATE_TEST_CASE_P(caxpby_single, BlasTest, ::testing::Values( make_int2(1,9) ));
812 INSTANTIATE_TEST_CASE_P(cxpaypbz_single, BlasTest, ::testing::Values( make_int2(1,10) ));
813 INSTANTIATE_TEST_CASE_P(axpyBzpcx_single, BlasTest, ::testing::Values( make_int2(1,11) ));
814 INSTANTIATE_TEST_CASE_P(axpyZpbx_single, BlasTest, ::testing::Values( make_int2(1,12) ));
815 INSTANTIATE_TEST_CASE_P(caxpbypzYmbw_single, BlasTest, ::testing::Values( make_int2(1,13) ));
816 INSTANTIATE_TEST_CASE_P(cabxpyAx_single, BlasTest, ::testing::Values( make_int2(1,14) ));
817 INSTANTIATE_TEST_CASE_P(caxpbypz_single, BlasTest, ::testing::Values( make_int2(1,15) ));
818 INSTANTIATE_TEST_CASE_P(caxpbypczpw_single, BlasTest, ::testing::Values( make_int2(1,16) ));
819 INSTANTIATE_TEST_CASE_P(caxpyXmaz_single, BlasTest, ::testing::Values( make_int2(1,17) ));
820 INSTANTIATE_TEST_CASE_P(norm2_single, BlasTest, ::testing::Values( make_int2(1,18) ));
821 INSTANTIATE_TEST_CASE_P(reDotProduct_single, BlasTest, ::testing::Values( make_int2(1,19) ));
822 INSTANTIATE_TEST_CASE_P(axpyNorm_single, BlasTest, ::testing::Values( make_int2(1,20) ));
823 INSTANTIATE_TEST_CASE_P(xmyNorm_single, BlasTest, ::testing::Values( make_int2(1,21) ));
824 INSTANTIATE_TEST_CASE_P(caxpyNorm_single, BlasTest, ::testing::Values( make_int2(1,22) ));
825 INSTANTIATE_TEST_CASE_P(caxpyXmazNormX_single, BlasTest, ::testing::Values( make_int2(1,23) ));
826 INSTANTIATE_TEST_CASE_P(cabxpyAxNorm_single, BlasTest, ::testing::Values( make_int2(1,24) ));
827 INSTANTIATE_TEST_CASE_P(cDotProduct_single, BlasTest, ::testing::Values( make_int2(1,25) ));
828 INSTANTIATE_TEST_CASE_P(xpaycDotzy_single, BlasTest, ::testing::Values( make_int2(1,26) ));
829 INSTANTIATE_TEST_CASE_P(caxpyDotzy_single, BlasTest, ::testing::Values( make_int2(1,27) ));
830 INSTANTIATE_TEST_CASE_P(cDotProductNormA_single, BlasTest, ::testing::Values( make_int2(1,28) ));
831 INSTANTIATE_TEST_CASE_P(cDotProductNormB_single, BlasTest, ::testing::Values( make_int2(1,29) ));
832 INSTANTIATE_TEST_CASE_P(caxpbypzYmbwcDotProductWYNormY_single, BlasTest, ::testing::Values( make_int2(1,30) ));
833 INSTANTIATE_TEST_CASE_P(HeavyQuarkResidualNorm_single, BlasTest, ::testing::Values( make_int2(1,31) ));
834 
835 // double precision
836 INSTANTIATE_TEST_CASE_P(copyHS_double, BlasTest, ::testing::Values( make_int2(2,0) ));
837 INSTANTIATE_TEST_CASE_P(copyLS_double, BlasTest, ::testing::Values( make_int2(2,1) ));
838 INSTANTIATE_TEST_CASE_P(axpby_double, BlasTest, ::testing::Values( make_int2(2,2) ));
839 INSTANTIATE_TEST_CASE_P(xpy_double, BlasTest, ::testing::Values( make_int2(2,3) ));
840 INSTANTIATE_TEST_CASE_P(axpy_double, BlasTest, ::testing::Values( make_int2(2,4) ));
841 INSTANTIATE_TEST_CASE_P(xpay_double, BlasTest, ::testing::Values( make_int2(2,5) ));
842 INSTANTIATE_TEST_CASE_P(mxpy_double, BlasTest, ::testing::Values( make_int2(2,6) ));
843 INSTANTIATE_TEST_CASE_P(ax_double, BlasTest, ::testing::Values( make_int2(2,7) ));
844 INSTANTIATE_TEST_CASE_P(caxpy_double, BlasTest, ::testing::Values( make_int2(2,8) ));
845 INSTANTIATE_TEST_CASE_P(caxpby_double, BlasTest, ::testing::Values( make_int2(2,9) ));
846 INSTANTIATE_TEST_CASE_P(cxpaypbz_double, BlasTest, ::testing::Values( make_int2(2,10) ));
847 INSTANTIATE_TEST_CASE_P(axpyBzpcx_double, BlasTest, ::testing::Values( make_int2(2,11) ));
848 INSTANTIATE_TEST_CASE_P(axpyZpbx_double, BlasTest, ::testing::Values( make_int2(2,12) ));
849 INSTANTIATE_TEST_CASE_P(caxpbypzYmbw_double, BlasTest, ::testing::Values( make_int2(2,13) ));
850 INSTANTIATE_TEST_CASE_P(cabxpyAx_double, BlasTest, ::testing::Values( make_int2(2,14) ));
851 INSTANTIATE_TEST_CASE_P(caxpbypz_double, BlasTest, ::testing::Values( make_int2(2,15) ));
852 INSTANTIATE_TEST_CASE_P(caxpbypczpw_double, BlasTest, ::testing::Values( make_int2(2,16) ));
853 INSTANTIATE_TEST_CASE_P(caxpyXmaz_double, BlasTest, ::testing::Values( make_int2(2,17) ));
854 INSTANTIATE_TEST_CASE_P(norm2_double, BlasTest, ::testing::Values( make_int2(2,18) ));
855 INSTANTIATE_TEST_CASE_P(reDotProduct_double, BlasTest, ::testing::Values( make_int2(2,19) ));
856 INSTANTIATE_TEST_CASE_P(axpyNorm_double, BlasTest, ::testing::Values( make_int2(2,20) ));
857 INSTANTIATE_TEST_CASE_P(xmyNorm_double, BlasTest, ::testing::Values( make_int2(2,21) ));
858 INSTANTIATE_TEST_CASE_P(caxpyNorm_double, BlasTest, ::testing::Values( make_int2(2,22) ));
859 INSTANTIATE_TEST_CASE_P(caxpyXmazNormX_double, BlasTest, ::testing::Values( make_int2(2,23) ));
860 INSTANTIATE_TEST_CASE_P(cabxpyAxNorm_double, BlasTest, ::testing::Values( make_int2(2,24) ));
861 INSTANTIATE_TEST_CASE_P(cDotProduct_double, BlasTest, ::testing::Values( make_int2(2,25) ));
862 INSTANTIATE_TEST_CASE_P(xpaycDotzy_double, BlasTest, ::testing::Values( make_int2(2,26) ));
863 INSTANTIATE_TEST_CASE_P(caxpyDotzy_double, BlasTest, ::testing::Values( make_int2(2,27) ));
864 INSTANTIATE_TEST_CASE_P(cDotProductNormA_double, BlasTest, ::testing::Values( make_int2(2,28) ));
865 INSTANTIATE_TEST_CASE_P(cDotProductNormB_double, BlasTest, ::testing::Values( make_int2(2,29) ));
866 INSTANTIATE_TEST_CASE_P(caxpbypzYmbwcDotProductWYNormY_double, BlasTest, ::testing::Values( make_int2(2,30) ));
867 INSTANTIATE_TEST_CASE_P(HeavyQuarkResidualNorm_double, BlasTest, ::testing::Values( make_int2(2,31) ));
868 
QudaDslashType dslash_type
Definition: test_util.cpp:1560
int dimPartitioned(int dim)
Definition: test_util.cpp:1577
int2 param
Definition: blas_test.cu:743
void caxpyCuda(const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
Definition: blas_quda.cu:207
void copy(const cpuColorSpinorField &)
void caxpbypzYmbwCpu(const Complex &, const cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &, const cpuColorSpinorField &)
Definition: blas_cpu.cpp:143
void endQuda(void)
void mxpyCpu(const cpuColorSpinorField &x, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:51
enum QudaPrecision_s QudaPrecision
int y[4]
Complex xpaycDotzyCpu(const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y, const cpuColorSpinorField &z)
Definition: blas_cpu.cpp:231
double test(int kernel)
Definition: blas_test.cu:354
void cabxpyAxCpu(const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:259
cudaColorSpinorField * hD
Definition: blas_test.cu:42
void display_test_info()
Definition: blas_test.cu:56
__host__ __device__ ValueType exp(ValueType x)
Definition: complex_quda.h:85
#define EXPECT_LE(val1, val2)
Definition: gtest.h:19753
#define errorQuda(...)
Definition: util_quda.h:73
int xdim
Definition: test_util.cpp:1553
__host__ __device__ ValueType sqrt(ValueType x)
Definition: complex_quda.h:105
double axpyNormCuda(const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
Definition: reduce_quda.cu:321
unsigned long long blas_bytes
Definition: blas_quda.cu:38
void xpayCpu(const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:41
std::complex< double > Complex
Definition: eig_variables.h:13
void axpbyCuda(const double &a, cudaColorSpinorField &x, const double &b, cudaColorSpinorField &y)
Definition: blas_quda.cu:82
int process_command_line_option(int argc, char **argv, int *idx)
Definition: test_util.cpp:1635
void Source(const QudaSourceType sourceType, const int st=0, const int s=0, const int c=0)
virtual ~BlasTest()
Definition: blas_test.cu:746
void axpyZpbxCuda(const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z, const double &b)
Definition: blas_quda.cu:338
int Nprec
Definition: blas_test.cu:640
int zdim
Definition: test_util.cpp:1555
QudaPrecision precision
Definition: lattice_field.h:41
cudaColorSpinorField * xD
Definition: blas_test.cu:42
void finalizeComms()
Definition: test_util.cpp:65
void xpyCpu(const cpuColorSpinorField &x, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:22
double cabxpyAxNormCuda(const double &a, const Complex &b, cudaColorSpinorField &x, cudaColorSpinorField &y)
Definition: reduce_quda.cu:440
double3 cDotProductNormBCuda(cudaColorSpinorField &a, cudaColorSpinorField &b)
Definition: reduce_quda.cu:620
int Nspin
Definition: blas_test.cu:43
QudaSiteSubset siteSubset
Definition: lattice_field.h:42
cpuColorSpinorField * wH
Definition: blas_test.cu:41
void axpyZpbxCpu(const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const cpuColorSpinorField &z, const double &b)
Definition: blas_cpu.cpp:136
int tdim
QudaGaugeParam param
Definition: pack_test.cpp:17
cpuColorSpinorField * yH
Definition: blas_test.cu:41
void cabxpyAxCuda(const double &a, const Complex &b, cudaColorSpinorField &x, cudaColorSpinorField &y)
Definition: blas_quda.cu:386
void usage(char **)
Definition: test_util.cpp:1584
int x[QUDA_MAX_DIM]
Definition: lattice_field.h:38
const char * names[]
Definition: blas_test.cu:645
void setTuning(QudaTune tune)
Definition: util_quda.cpp:33
void initQuda(int device)
void axpyBzpcxCuda(const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y, const double &b, cudaColorSpinorField &z, const double &c)
Definition: blas_quda.cu:311
void caxpyXmazCuda(const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)
Definition: blas_quda.cu:452
double3 caxpbypzYmbwcDotProductUYNormYCuda(const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y, cudaColorSpinorField &z, cudaColorSpinorField &w, cudaColorSpinorField &u)
Definition: reduce_quda.cu:643
double benchmark(int kernel, const int niter)
Definition: blas_test.cu:192
void initFields(int prec)
Definition: blas_test.cu:73
void caxpbyCpu(const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:92
Complex cDotProductCuda(cudaColorSpinorField &, cudaColorSpinorField &)
Definition: reduce_quda.cu:468
void mxpyCuda(cudaColorSpinorField &x, cudaColorSpinorField &y)
Definition: blas_quda.cu:154
virtual void SetUp()
Definition: blas_test.cu:747
virtual void TearDown()
Definition: blas_test.cu:751
double3 caxpbypzYmbwcDotProductUYNormYCpu(const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y, cpuColorSpinorField &z, const cpuColorSpinorField &w, const cpuColorSpinorField &u)
Definition: blas_cpu.cpp:250
double caxpyXmazNormXCuda(const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)
Definition: reduce_quda.cu:413
Complex cDotProductCpu(const cpuColorSpinorField &, const cpuColorSpinorField &)
Definition: blas_cpu.cpp:217
void setSpinorSiteSize(int n)
Definition: test_util.cpp:150
GTEST_API_ void InitGoogleTest(int *argc, char **argv)
cudaColorSpinorField * wD
Definition: blas_test.cu:42
void cxpaypbzCpu(const cpuColorSpinorField &x, const Complex &b, const cpuColorSpinorField &y, const Complex &c, cpuColorSpinorField &z)
Definition: blas_cpu.cpp:115
void caxpbypzCpu(const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &)
Definition: blas_cpu.cpp:289
#define warningQuda(...)
Definition: util_quda.h:84
const char * prec_str[]
Definition: blas_test.cu:643
int niter
Definition: test_util.cpp:1563
void copyCuda(cudaColorSpinorField &dst, const cudaColorSpinorField &src)
Definition: copy_quda.cu:235
double normCuda(const cudaColorSpinorField &b)
Definition: reduce_quda.cu:145
void caxpyCpu(const Complex &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:79
cpuColorSpinorField * hH
Definition: blas_test.cu:41
void axpyCuda(const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
Definition: blas_quda.cu:115
Complex caxpyDotzyCuda(const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)
Definition: reduce_quda.cu:559
const int Nkernels
Definition: blas_test.cu:36
void axpbyCpu(const double &a, const cpuColorSpinorField &x, const double &b, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:12
void axpyBzpcxCpu(const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const double &b, const cpuColorSpinorField &z, const double &c)
Definition: blas_cpu.cpp:129
void caxpbypczpwCuda(const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, cudaColorSpinorField &)
Definition: blas_quda.cu:429
Complex xpaycDotzyCuda(cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y, cudaColorSpinorField &z)
Definition: reduce_quda.cu:534
int x[4]
void axCpu(const double &a, cpuColorSpinorField &x)
Definition: blas_cpu.cpp:60
unsigned long long blas_flops
Definition: blas_quda.cu:37
cudaColorSpinorField * vD
Definition: blas_test.cu:42
double cabxpyAxNormCpu(const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:283
void caxpyXmazCpu(const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)
Definition: blas_cpu.cpp:277
cudaColorSpinorField * zD
Definition: blas_test.cu:42
void caxpbypczpwCpu(const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &)
Definition: blas_cpu.cpp:295
bool tune
Definition: blas_test.cu:28
cpuColorSpinorField * lH
Definition: blas_test.cu:41
void xpyCuda(cudaColorSpinorField &x, cudaColorSpinorField &y)
Definition: blas_quda.cu:98
double reDotProductCuda(cudaColorSpinorField &a, cudaColorSpinorField &b)
Definition: reduce_quda.cu:170
double caxpyNormCpu(const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:264
int ydim
Definition: test_util.cpp:1554
void cxpaypbzCuda(cudaColorSpinorField &, const Complex &b, cudaColorSpinorField &y, const Complex &c, cudaColorSpinorField &z)
Definition: blas_quda.cu:290
virtual void NormalExit()
Definition: blas_test.cu:753
void setPrec(ColorSpinorParam &param, const QudaPrecision precision)
Definition: blas_test.cu:45
#define printfQuda(...)
Definition: util_quda.h:67
cudaColorSpinorField * lD
Definition: blas_test.cu:42
void caxpbypzCuda(const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, cudaColorSpinorField &)
Definition: blas_quda.cu:407
double normCpu(const cpuColorSpinorField &b)
Definition: blas_cpu.cpp:166
double axpyNormCpu(const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:178
double3 cDotProductNormACuda(cudaColorSpinorField &a, cudaColorSpinorField &b)
Definition: reduce_quda.cu:591
int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_
Definition: gtest.h:20057
enum QudaDslashType_s QudaDslashType
int device
Definition: test_util.cpp:1546
double reDotProductCpu(const cpuColorSpinorField &a, const cpuColorSpinorField &b)
Definition: blas_cpu.cpp:191
double3 cDotProductNormACpu(const cpuColorSpinorField &a, const cpuColorSpinorField &b)
Definition: blas_cpu.cpp:237
double caxpyXmazNormXCpu(const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)
Definition: blas_cpu.cpp:270
__host__ __device__ ValueType abs(ValueType x)
Definition: complex_quda.h:110
void caxpbyCuda(const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y)
Definition: blas_quda.cu:247
#define checkCudaError()
Definition: util_quda.h:110
TEST_P(BlasTest, verify)
Definition: blas_test.cu:757
bool verify_results
Definition: test_util.cpp:1568
double3 HeavyQuarkResidualNormCpu(cpuColorSpinorField &x, cpuColorSpinorField &r)
Definition: blas_cpu.cpp:331
cudaColorSpinorField * yD
Definition: blas_test.cu:42
double caxpyNormCuda(const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
Definition: reduce_quda.cu:388
double xmyNormCpu(const cpuColorSpinorField &a, cpuColorSpinorField &b)
Definition: blas_cpu.cpp:205
void xpayCuda(cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y)
Definition: blas_quda.cu:138
double3 HeavyQuarkResidualNormCuda(cudaColorSpinorField &x, cudaColorSpinorField &r)
Definition: reduce_quda.cu:777
INSTANTIATE_TEST_CASE_P(copyHS_half, BlasTest,::testing::Values(make_int2(0, 0)))
Complex caxpyDotzyCpu(const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)
Definition: blas_cpu.cpp:303
QudaPrecision prec
Definition: test_util.cpp:1551
double3 cDotProductNormBCpu(const cpuColorSpinorField &a, const cpuColorSpinorField &b)
Definition: blas_cpu.cpp:243
void axCuda(const double &a, cudaColorSpinorField &x)
Definition: blas_quda.cu:171
double norm2(const ColorSpinorField &)
void axpyCpu(const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)
Definition: blas_cpu.cpp:31
cpuColorSpinorField * xH
Definition: blas_test.cu:41
double xmyNormCuda(cudaColorSpinorField &a, cudaColorSpinorField &b)
Definition: reduce_quda.cu:343
void initComms(int argc, char **argv, const int *commDims)
Definition: test_util.cpp:48
cpuColorSpinorField * vH
Definition: blas_test.cu:41
void freeFields()
Definition: blas_test.cu:169
int gridsize_from_cmdline[]
Definition: test_util.cpp:1559
void setVerbosity(const QudaVerbosity verbosity)
Definition: util_quda.cpp:24
void caxpbypzYmbwCuda(const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, cudaColorSpinorField &, cudaColorSpinorField &)
Definition: blas_quda.cu:366
int main(int argc, char **argv)
Definition: blas_test.cu:680
void end()
#define ERROR(a)
Definition: blas_test.cu:352
cpuColorSpinorField * zH
Definition: blas_test.cu:41
internal::ValueArray1< T1 > Values(T1 v1)
Definition: gtest.h:15914