8 template <
typename Float>
9 void axpby(
const Float &
a,
const Float *
x,
const Float &
b, Float *
y,
const int N) {
16 axpby(
a, (
double*)
x.V(),
b, (
double*)
y.V(),
x.Length());
20 errorQuda(
"Precision type %d not implemented",
x.Precision());
25 axpby(1.0, (
double*)
x.V(), 1.0, (
double*)
y.V(),
x.Length());
27 axpby(1.0
f, (
float*)
x.V(), 1.0f, (
float*)
y.V(),
x.Length());
29 errorQuda(
"Precision type %d not implemented",
x.Precision());
35 axpby(
a, (
double*)
x.V(), 1.0, (
double*)
y.V(),
x.Length());
37 axpby((
float)
a, (
float*)
x.V(), 1.0f, (
float*)
y.V(),
x.Length());
39 errorQuda(
"Precision type %d not implemented",
x.Precision());
45 axpby(1.0, (
double*)
x.V(),
a, (
double*)
y.V(),
x.Length());
49 errorQuda(
"Precision type %d not implemented",
x.Precision());
54 axpby(-1.0, (
double*)
x.V(), 1.0, (
double*)
y.V(),
x.Length());
56 axpby(-1.0
f, (
float*)
x.V(), 1.0f, (
float*)
y.V(),
x.Length());
58 errorQuda(
"Precision type %d not implemented",
x.Precision());
63 axpby(0.0, (
double*)
x.V(),
a, (
double*)
x.V(),
x.Length());
67 errorQuda(
"Precision type %d not implemented",
x.Precision());
70 template <
typename Float>
71 void caxpby(
const std::complex<Float> &
a,
const std::complex<Float> *
x,
72 const std::complex<Float> &
b, std::complex<Float> *
y,
int N) {
74 for (
int i=0;
i<N;
i++) {
87 caxpby((std::complex<float>)
a, (std::complex<float>*)
x.V(), std::complex<float>(1.0),
88 (std::complex<float>*)
y.V(),
x.Length()/2);
90 errorQuda(
"Precision type %d not implemented",
x.Precision());
99 caxpby((std::complex<float>)
a, (std::complex<float>*)
x.V(), (std::complex<float>)
b,
100 (std::complex<float>*)
y.V(),
x.Length()/2);
102 errorQuda(
"Precision type %d not implemented",
x.Precision());
105 template <
typename Float>
106 void caxpbypcz(
const std::complex<Float> &
a,
const std::complex<Float> *
x,
107 const std::complex<Float> &
b,
const std::complex<Float> *
y,
108 const std::complex<Float> &
c, std::complex<Float> *
z,
int N) {
110 for (
int i=0;
i<N;
i++) {
124 caxpbypcz(std::complex<float>(1, 0), (std::complex<float>*)
x.V(), (std::complex<float>)
a, (std::complex<float>*)
y.V(),
125 (std::complex<float>)
b, (std::complex<float>*)
z.V(),
x.Length()/2);
127 errorQuda(
"Precision type %d not implemented",
x.Precision());
151 caxpbypcz((std::complex<float>)
a, (std::complex<float>*)
x.V(),
152 (std::complex<float>)
b, (std::complex<float>*)
y.V(),
153 (std::complex<float>)(1.0
f), (std::complex<float>*)
z.V(),
x.Length()/2);
155 errorQuda(
"Precision type %d not implemented",
x.Precision());
160 template <
typename Float>
161 double norm(
const Float *
a,
const int N) {
174 errorQuda(
"Precision type %d not implemented",
a.Precision());
185 template <
typename Float>
199 errorQuda(
"Precision type %d not implemented",
a.Precision());
211 template <
typename Float>
223 dot =
cDotProduct((std::complex<float>*)
a.V(), (std::complex<float>*)
b.V(),
a.Length()/2);
225 errorQuda(
"Precision type %d not implemented",
a.Precision());
241 return make_double3(real(
dot), imag(
dot),
norm);
247 return make_double3(real(
dot), imag(
dot),
norm);
310 template <
typename Float>
313 double3
sum = make_double3(0.0, 0.0, 0.0);
314 for (
int i = 0;
i<volume;
i++) {
318 for (
int j=0; j<Nint; j++) {
326 sum.z += (x2 > 0.0) ? (r2 / x2) : 1.0;
335 rtn = HeavyQuarkResidualNorm<double>((
const double*)(
x.V()), (
const double*)(r.
V()),
336 x.Volume(), 2*
x.Ncolor()*
x.Nspin());
338 rtn = HeavyQuarkResidualNorm<float>((
const float*)(
x.V()), (
const float*)(r.
V()),
339 x.Volume(), 2*
x.Ncolor()*
x.Nspin());
341 errorQuda(
"Precision type %d not implemented",
x.Precision());
double xmyNormCpu(const cpuColorSpinorField &x, cpuColorSpinorField &y)
void mxpyCpu(const cpuColorSpinorField &x, cpuColorSpinorField &y)
Complex xpaycDotzyCpu(const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y, const cpuColorSpinorField &z)
void xpayCpu(const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y)
void caxpbypzCpu(const Complex &a, cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y, cpuColorSpinorField &z)
Complex cDotProductCpu(const cpuColorSpinorField &a, const cpuColorSpinorField &b)
double axpyNormCpu(const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)
double norm2(const ColorSpinorField &a)
void xpyCpu(const cpuColorSpinorField &x, cpuColorSpinorField &y)
Complex cDotProduct(ColorSpinorField &, ColorSpinorField &)
std::complex< double > Complex
cudaColorSpinorField * tmp
void axpyBzpcxCpu(const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const double &b, const cpuColorSpinorField &z, const double &c)
void reduceDoubleArray(double *, const int len)
double reDotProduct(ColorSpinorField &x, ColorSpinorField &y)
double caxpyXmazNormXCpu(const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)
double cabxpyAxNormCpu(const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y)
void axpbyCpu(const double &a, const cpuColorSpinorField &x, const double &b, cpuColorSpinorField &y)
void axCpu(const double &a, cpuColorSpinorField &x)
__host__ __device__ void sum(double &a, double &b)
void caxpyCpu(const Complex &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)
double3 cDotProductNormACpu(const cpuColorSpinorField &a, const cpuColorSpinorField &b)
int int int enum cudaChannelFormatKind f
void axpyZpbxCpu(const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const cpuColorSpinorField &z, const double &b)
double3 HeavyQuarkResidualNorm(ColorSpinorField &x, ColorSpinorField &r)
void caxpbypcz(const std::complex< Float > &a, const std::complex< Float > *x, const std::complex< Float > &b, const std::complex< Float > *y, const std::complex< Float > &c, std::complex< Float > *z, int N)
double norm(const Float *a, const int N)
void caxpbyCpu(const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y)
double3 HeavyQuarkResidualNormCpu(cpuColorSpinorField &x, cpuColorSpinorField &r)
void caxpyXmazCpu(const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)
void axpby(const double &a, ColorSpinorField &x, const double &b, ColorSpinorField &y)
Complex caxpyDotzyCpu(const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)
double reDotProductCpu(const cpuColorSpinorField &a, const cpuColorSpinorField &b)
void cxpaypbzCpu(const cpuColorSpinorField &x, const Complex &a, const cpuColorSpinorField &y, const Complex &b, cpuColorSpinorField &z)
double normCpu(const cpuColorSpinorField &a)
void caxpbypczpwCpu(const Complex &a, cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y, const Complex &c, cpuColorSpinorField &z, cpuColorSpinorField &w)
void caxpbypzYmbwCpu(const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y, cpuColorSpinorField &z, const cpuColorSpinorField &w)
void caxpby(const Complex &a, ColorSpinorField &x, const Complex &b, ColorSpinorField &y)
double caxpyNormCpu(const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y)
void cabxpyAxCpu(const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y)
void axpyCpu(const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)
void reduceDouble(double &)
__host__ __device__ ValueType conj(ValueType x)
double3 caxpbypzYmbwcDotProductUYNormYCpu(const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y, cpuColorSpinorField &z, const cpuColorSpinorField &w, const cpuColorSpinorField &u)
double3 cDotProductNormBCpu(const cpuColorSpinorField &a, const cpuColorSpinorField &b)
static void dot(sFloat *res, gFloat *a, sFloat *b)