18 template <
typename doubleN,
typename ReduceType,
19 template <
typename ReducerType,
typename Float,
typename FloatN>
class Reducer,
20 int writeX,
int writeY,
int writeZ,
int writeW,
int writeV,
bool siteUnroll>
21 doubleN
reduceCuda(
const double2 &
a,
const double2 &
b, ColorSpinorField &
x,
22 ColorSpinorField &
y, ColorSpinorField &
z, ColorSpinorField &
w,
23 ColorSpinorField &v) {
29 if (siteUnroll &&
x.Ncolor()!=3)
errorQuda(
"Not supported");
31 int reduce_length = siteUnroll ?
x.RealLength() :
x.Length();
34 if (
x.Nspin() == 4 ||
x.Nspin() == 2) {
35 #if defined(GPU_WILSON_DIRAC) || defined(GPU_DOMAIN_WALL_DIRAC) || defined(GPU_MULTIGRID) 36 const int M = siteUnroll ? 12 : 1;
37 if (
x.Nspin() == 2 && siteUnroll)
errorQuda(
"siteUnroll not supported for nSpin==2");
38 value =
reduceCuda<doubleN,ReduceType,double2,double2,double2,M,Reducer,
39 writeX,writeY,writeZ,writeW,writeV>
40 (
a,
b,
x,
y,
z,
w, v, reduce_length/(2*M));
42 errorQuda(
"blas has not been built for Nspin=%d fields",
x.Nspin());
44 }
else if (
x.Nspin() == 1) {
45 #ifdef GPU_STAGGERED_DIRAC 46 const int M = siteUnroll ? 3 : 1;
47 value =
reduceCuda<doubleN,ReduceType,double2,double2,double2,M,Reducer,
48 writeX,writeY,writeZ,writeW,writeV>
49 (
a,
b,
x,
y,
z,
w, v, reduce_length/(2*M));
51 errorQuda(
"blas has not been built for Nspin=%d fields",
x.Nspin());
53 }
else {
errorQuda(
"ERROR: nSpin=%d is not supported\n",
x.Nspin()); }
56 #if defined(GPU_WILSON_DIRAC) || defined(GPU_DOMAIN_WALL_DIRAC) 57 const int M = siteUnroll ? 6 : 1;
59 writeX,writeY,writeZ,writeW,writeV>
60 (
a,
b,
x,
y,
z,
w, v, reduce_length/(4*M));
62 errorQuda(
"blas has not been built for Nspin=%d fields",
x.Nspin());
64 }
else if (
x.Nspin() == 1 ||
x.Nspin() == 2) {
65 #if defined(GPU_STAGGERED_DIRAC) || defined(GPU_MULTIGRID) 66 const int M = siteUnroll ? 3 : 1;
67 if (
x.Nspin() == 2 && siteUnroll)
errorQuda(
"siteUnroll not supported for nSpin==2");
69 writeX,writeY,writeZ,writeW,writeV>
70 (
a,
b,
x,
y,
z,
w, v, reduce_length/(2*M));
72 errorQuda(
"blas has not been built for Nspin=%d fields",
x.Nspin());
74 }
else {
errorQuda(
"ERROR: nSpin=%d is not supported\n",
x.Nspin()); }
77 #if defined(GPU_WILSON_DIRAC) || defined(GPU_DOMAIN_WALL_DIRAC) 80 writeX,writeY,writeZ,writeW,writeV>
81 (
a,
b,
x,
y,
z,
w, v,
y.Volume());
83 errorQuda(
"blas has not been built for Nspin=%d fields",
x.Nspin());
85 }
else if (
x.Nspin() == 1) {
86 #ifdef GPU_STAGGERED_DIRAC 89 writeX,writeY,writeZ,writeW,writeV>
90 (
a,
b,
x,
y,
z,
w, v,
y.Volume());
92 errorQuda(
"blas has not been built for Nspin=%d fields",
x.Nspin());
94 }
else {
errorQuda(
"nSpin=%d is not supported\n",
x.Nspin()); }
96 errorQuda(
"precision=%d is not supported\n",
x.Precision());
101 Reducer<doubleN, double2, double2> r(
a,
b);
102 value = genericReduce<doubleN,doubleN,double,double,writeX,writeY,writeZ,writeW,writeV,Reducer<doubleN,double2,double2> >(
x,
y,
z,
w,v,r);
104 Reducer<doubleN, float2, float2> r(make_float2(
a.x,
a.y), make_float2(
b.x,
b.y));
105 value = genericReduce<doubleN,doubleN,float,float,writeX,writeY,writeZ,writeW,writeV,Reducer<doubleN,float2,float2> >(
x,
y,
z,
w,v,r);
107 errorQuda(
"Precision %d not implemented",
x.Precision());
111 const int Nreduce =
sizeof(doubleN) /
sizeof(
double);
void reduceDoubleArray(double *, const int len)
#define checkLocation(...)
doubleN reduceCuda(const double2 &a, const double2 &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v)