4 template <
template <
typename Float,
typename FloatN>
class Functor,
5 int writeX,
int writeY,
int writeZ,
int writeW>
6 void blasCuda(
const double2 &
a,
const double2 &
b,
const double2 &
c,
7 ColorSpinorField &
x, ColorSpinorField &
y,
8 ColorSpinorField &
z, ColorSpinorField &
w) {
13 #if defined(GPU_WILSON_DIRAC) || defined(GPU_DOMAIN_WALL_DIRAC) || defined(GPU_STAGGERED_DIRAC) 15 blasCuda<double2,double2,double2,M,Functor,writeX,writeY,writeZ,writeW>(
a,
b,
c,
x,
y,
z,
w,
x.Length()/(2*M));
17 errorQuda(
"blas has not been built for Nspin=%d fields",
x.Nspin());
21 #if defined(GPU_WILSON_DIRAC) || defined(GPU_DOMAIN_WALL_DIRAC) 23 blasCuda<float4,float4,float4,M,Functor,writeX,writeY,writeZ,writeW>(
a,
b,
c,
x,
y,
z,
w,
x.Length()/(4*M));
25 errorQuda(
"blas has not been built for Nspin=%d fields",
x.Nspin());
27 }
else if (
x.Nspin()==2 ||
x.Nspin()==1) {
28 #if defined(GPU_WILSON_DIRAC) || defined(GPU_DOMAIN_WALL_DIRAC) || defined(GPU_STAGGERED_DIRAC) 30 blasCuda<float2,float2,float2,M,Functor,writeX,writeY,writeZ,writeW>(
a,
b,
c,
x,
y,
z,
w,
x.Length()/(2*M));
32 errorQuda(
"blas has not been built for Nspin=%d fields",
x.Nspin());
34 }
else {
errorQuda(
"nSpin=%d is not supported\n",
x.Nspin()); }
36 if (
x.Ncolor() != 3) {
errorQuda(
"nColor = %d is not supported",
x.Ncolor()); }
38 #if defined(GPU_WILSON_DIRAC) || defined(GPU_DOMAIN_WALL_DIRAC) 40 blasCuda<float4,short4,short4,M,Functor,writeX,writeY,writeZ,writeW>(
a,
b,
c,
x,
y,
z,
w,
x.Volume());
42 errorQuda(
"blas has not been built for Nspin=%d fields",
x.Nspin());
44 }
else if (
x.Nspin() == 1) {
45 #ifdef GPU_STAGGERED_DIRAC 47 blasCuda<float2,short2,short2,M,Functor,writeX,writeY,writeZ,writeW>(
a,
b,
c,
x,
y,
z,
w,
x.Volume());
49 errorQuda(
"blas has not been built for Nspin=%d fields",
x.Nspin());
52 errorQuda(
"nSpin=%d is not supported\n",
x.Nspin());
58 Functor<double2, double2>
f(
a,
b,
c);
59 genericBlas<double, double, writeX, writeY, writeZ, writeW>(
x,
y,
z,
w,
f);
61 Functor<float2, float2>
f(make_float2(
a.x,
a.y), make_float2(
b.x,
b.y), make_float2(
c.x,
c.y) );
62 genericBlas<float, float, writeX, writeY, writeZ, writeW>(
x,
y,
z,
w,
f);
#define checkLocation(...)
int int int enum cudaChannelFormatKind f
void blasCuda(const double2 &a, const double2 &b, const double2 &c, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w)