20 template <
typename doubleN,
typename ReduceType,
21 template <
typename ReducerType,
typename Float,
typename FloatN>
class Reducer,
22 int writeX,
int writeY,
int writeZ,
int writeW,
int writeV,
bool siteUnroll>
23 doubleN
reduceCuda(
const double2 &
a,
const double2 &
b, ColorSpinorField &
x,
24 ColorSpinorField &
y, ColorSpinorField &
z, ColorSpinorField &
w,
25 ColorSpinorField &v) {
35 #if defined(GPU_WILSON_DIRAC) || defined(GPU_DOMAIN_WALL_DIRAC) 38 writeX,writeY,writeZ,writeW,writeV>
39 (
a,
b,
x,
y,
z,
w, v,
x.Volume());
41 errorQuda(
"blas has not been built for Nspin=%d fields",
x.Nspin());
43 }
else if (
x.Nspin() == 1) {
44 #ifdef GPU_STAGGERED_DIRAC 45 const int M = siteUnroll ? 3 : 1;
46 const int reduce_length = siteUnroll ?
x.RealLength() :
x.Length();
48 writeX,writeY,writeZ,writeW,writeV>
49 (
a,
b,
x,
y,
z,
w, v, reduce_length/(2*M));
51 errorQuda(
"blas has not been built for Nspin=%d fields",
x.Nspin());
53 }
else {
errorQuda(
"ERROR: nSpin=%d is not supported\n",
x.Nspin()); }
56 #if defined(GPU_WILSON_DIRAC) || defined(GPU_DOMAIN_WALL_DIRAC) 59 writeX,writeY,writeZ,writeW,writeV>
60 (
a,
b,
x,
y,
z,
w, v,
x.Volume());
62 errorQuda(
"blas has not been built for Nspin=%d fields",
x.Nspin());
64 }
else if (
x.Nspin() == 1) {
65 #ifdef GPU_STAGGERED_DIRAC 68 writeX,writeY,writeZ,writeW,writeV>
69 (
a,
b,
x,
y,
z,
w, v,
x.Volume());
71 errorQuda(
"blas has not been built for Nspin=%d fields",
x.Nspin());
73 }
else {
errorQuda(
"ERROR: nSpin=%d is not supported\n",
x.Nspin()); }
76 #if defined(GPU_WILSON_DIRAC) || defined(GPU_DOMAIN_WALL_DIRAC) 79 writeX,writeY,writeZ,writeW,writeV>
80 (
a,
b,
x,
y,
z,
w, v,
x.Volume());
82 errorQuda(
"blas has not been built for Nspin=%d fields",
x.Nspin());
84 }
else if (
x.Nspin() == 1) {
85 #ifdef GPU_STAGGERED_DIRAC 88 writeX,writeY,writeZ,writeW,writeV>
89 (
a,
b,
x,
y,
z,
w, v,
x.Volume());
91 errorQuda(
"blas has not been built for Nspin=%d fields",
x.Nspin());
93 }
else {
errorQuda(
"ERROR: nSpin=%d is not supported\n",
x.Nspin()); }
99 Reducer<doubleN, double2, double2> r(
a,
b);
100 value = genericReduce<doubleN,doubleN,float,double,writeX,writeY,writeZ,writeW,writeV,Reducer<doubleN,double2,double2> >(
x,
y,
z,
w,v,r);
102 errorQuda(
"Precision %d not implemented",
x.Precision());
106 const int Nreduce =
sizeof(doubleN) /
sizeof(
double);
void reduceDoubleArray(double *, const int len)
#define checkLocation(...)
doubleN reduceCuda(const double2 &a, const double2 &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v)