12 double b =
b_5[0].real();
13 double c =
c_5[0].real();
20 for (
int i = 0; i <
Ls; i++) {
21 if (
b_5[i].imag() != 0.0 ||
c_5[i].imag() != 0.0 || (i <
Ls - 1 && (
b_5[i] !=
b_5[i + 1] ||
c_5[i] !=
c_5[i + 1]))) {
28 printfQuda(
"%s: Detected variable or complex cofficients: using zMobius\n", __func__);
30 printfQuda(
"%s: Detected fixed real cofficients: using regular Mobius\n", __func__);
44 ApplyDomainWall4D(out, in, *
gauge, 0.0, 0.0,
nullptr,
nullptr, in,
parity,
dagger,
commDim,
profile);
57 long long Ls = in.
X(4);
58 long long bulk = (
Ls - 2) * (in.
Volume() /
Ls);
59 long long wall = 2 * in.
Volume() /
Ls;
60 flops += 72LL * (
long long)in.
Volume() + 96LL * bulk + 120LL * wall;
72 long long Ls = in.
X(4);
73 long long bulk = (
Ls - 2) * (in.
Volume() /
Ls);
74 long long wall = 2 * in.
Volume() /
Ls;
75 flops += 48LL * (
long long)in.
Volume() + 96LL * bulk + 120LL * wall;
86 ApplyDomainWall4D(out, in, *
gauge, k,
m5,
b_5,
c_5, x,
parity,
dagger,
commDim,
profile);
100 long long Ls = in.
X(4);
101 long long bulk = (
Ls - 2) * (in.
Volume() /
Ls);
102 long long wall = 2 * in.
Volume() /
Ls;
104 flops += 72LL * (
long long)in.
Volume() + 96LL * bulk + 120LL * wall;
117 long long Ls = in.
X(4);
118 long long bulk = (
Ls - 2) * (in.
Volume() /
Ls);
119 long long wall = 2 * in.
Volume() /
Ls;
120 flops += 96LL * (
long long)in.
Volume() + 96LL * bulk + 120LL * wall;
138 ApplyDomainWall4D(*
tmp, out, *
gauge, 0.0,
m5,
b_5,
c_5, in,
QUDA_INVALID_PARITY,
dagger,
commDim,
profile);
142 ApplyDomainWall4D(out, in, *
gauge, 0.0,
m5,
b_5,
c_5, in,
QUDA_INVALID_PARITY,
dagger,
commDim,
profile);
148 long long Ls = in.
X(4);
149 long long bulk = (
Ls - 2) * (in.
Volume() /
Ls);
150 long long wall = 2 * in.
Volume() /
Ls;
151 flops += 72LL * (
long long)in.
Volume() + 96LL * bulk + 120LL * wall;
153 flops += 48LL * (
long long)in.
Volume() + 96LL * bulk + 120LL * wall;
174 errorQuda(
"Preconditioned solution requires a preconditioned solve_type");
203 if (&
dirac !=
this) {
221 using namespace blas;
228 long long Ls = in.
X(4);
242 long long Ls = in.
X(4);
258 if (symmetric && !
dagger) {
265 }
else if (symmetric &&
dagger) {
272 }
else if (!symmetric && !
dagger) {
279 }
else if (!symmetric &&
dagger) {
377 if (
zMobius) {
errorQuda(
"DiracMobiusPC::MdagMLocal doesn't currently support zMobius.\n"); }
379 int shift0[4] = {0, 0, 0, 0};
383 for (
int d = 0; d < 4; d++) {
401 for (
int d = 1; d < 4; ++d) {
csParam.x[d] += shift2[d] * 2; }
426 const long long Ls = in.
X(4);
427 const long long mat = 2ll * 4ll *
Ls - 1ll;
428 const long long hop = 7ll * 8ll;
433 vol = (2 * in.
X(0)) * in.
X(1) * in.
X(2) * in.
X(3) *
Ls / 2ll;
436 vol = (2 * in.
X(0) + 2 * 1) * (in.
X(1) + 2 * 1) * (in.
X(2) + 2 * 1) * (in.
X(3) + 2 * 1) *
Ls / 2ll;
437 halo_vol = (2 * in.
X(0)) * in.
X(1) * in.
X(2) * in.
X(3) *
Ls / 2ll;
438 flops += halo_vol * 24ll * hop + vol * 24ll *
mat;
440 vol = (2 * in.
X(0) + 2 * 2) * (in.
X(1) + 2 * 2) * (in.
X(2) + 2 * 2) * (in.
X(3) + 2 * 2) *
Ls / 2ll;
441 halo_vol = (2 * in.
X(0) + 2 * 1) * (in.
X(1) + 2 * 1) * (in.
X(2) + 2 * 1) * (in.
X(3) + 2 * 1) *
Ls / 2ll;
442 flops += halo_vol * 24ll * hop + vol * 24ll *
mat * 2ll;
444 vol = (2 * in.
X(0) + 2 * 1) * (in.
X(1) + 2 * 1) * (in.
X(2) + 2 * 1) * (in.
X(3) + 2 * 1) *
Ls / 2ll;
447 vol = (2 * in.
X(0)) * in.
X(1) * in.
X(2) * in.
X(3) *
Ls / 2ll;
450 delete extended_tmp2;
451 delete extended_tmp1;
453 delete unextended_tmp1;
454 delete unextended_tmp2;
457 errorQuda(
"DiracMobiusPC::MdagMLocal(...) only supports half and quarter precision");
472 if (
zMobius) {
errorQuda(
"DiracMobiusEofa doesn't currently support zMobius.\n"); }
474 double b =
b_5[0].real();
475 double c =
c_5[0].real();
477 double alpha = b + c;
489 for (
int s = 0; s <
Ls; s++) {
499 for (
int s =
Ls - 1; s > 0; s--) {
506 eofa_y[
Ls - 1] = 1. / (1. + factor);
513 for (
int s = 0; s <
Ls - 1; s++) {
520 eofa_y[0] = 1. / (1. + factor);
530 if (in.
Ndim() != 5 || out.
Ndim() != 5)
errorQuda(
"Wrong number of dimensions\n");
535 mobius_eofa::apply_dslash5(out, in, in,
mass,
m5,
b_5,
c_5, 0.,
eofa_pm,
m5inv_fac,
mobius_kappa,
eofa_u,
eofa_x,
538 long long Ls = in.
X(4);
539 long long bulk = (
Ls - 2) * (in.
Volume() /
Ls);
540 long long wall = 2 * in.
Volume() /
Ls;
543 flops += 96LL * (
long long)in.
Volume() + 96LL * bulk + 120LL * wall;
549 if (in.
Ndim() != 5 || out.
Ndim() != 5)
errorQuda(
"Wrong number of dimensions\n");
556 mobius_eofa::apply_dslash5(out, in, x,
mass,
m5,
b_5,
c_5, a,
eofa_pm,
m5inv_fac,
mobius_kappa,
eofa_u,
eofa_x,
559 long long Ls = in.
X(4);
560 long long bulk = (
Ls - 2) * (in.
Volume() /
Ls);
561 long long wall = 2 * in.
Volume() /
Ls;
564 flops += 144LL * (
long long)in.
Volume() + 96LL * bulk + 120LL * wall;
582 ApplyDomainWall4D(*
tmp, out, *
gauge, 0.0,
m5,
b_5,
c_5, in,
QUDA_INVALID_PARITY,
dagger,
commDim,
profile);
583 mobius_eofa::apply_dslash5(out, in, in,
mass,
m5,
b_5,
c_5, 0.,
eofa_pm,
m5inv_fac,
mobius_kappa,
eofa_u,
eofa_x,
586 ApplyDomainWall4D(out, in, *
gauge, 0.0,
m5,
b_5,
c_5, in,
QUDA_INVALID_PARITY,
dagger,
commDim,
profile);
588 mobius_eofa::apply_dslash5(out, in, in,
mass,
m5,
b_5,
c_5, 0.,
eofa_pm,
m5inv_fac,
mobius_kappa,
eofa_u,
eofa_x,
593 long long Ls = in.
X(4);
594 long long bulk = (
Ls - 2) * (in.
Volume() /
Ls);
595 long long wall = 2 * in.
Volume() /
Ls;
596 flops += 72LL * (
long long)in.
Volume() + 96LL * bulk + 120LL * wall;
600 flops += 96LL * (
long long)in.
Volume() + 96LL * bulk + 120LL * wall;
621 errorQuda(
"Preconditioned solution requires a preconditioned solve_type");
637 if (in.
Ndim() != 5 || out.
Ndim() != 5)
errorQuda(
"Wrong number of dimensions\n");
643 mobius_eofa::apply_dslash5(out, in, in,
mass,
m5,
b_5,
c_5, 0.,
eofa_pm,
m5inv_fac,
mobius_kappa,
eofa_u,
eofa_x,
646 long long Ls = in.
X(4);
653 if (in.
Ndim() != 5 || out.
Ndim() != 5)
errorQuda(
"Wrong number of dimensions\n");
661 mobius_eofa::apply_dslash5(out, in, x,
mass,
m5,
b_5,
c_5, a,
eofa_pm,
m5inv_fac,
mobius_kappa,
eofa_u,
eofa_x,
664 long long Ls = in.
X(4);
665 flops += (192LL *
Ls + 48LL + 96LL) * (
long long)in.
Volume() + 3LL *
Ls * (
Ls - 1LL);
679 if (symmetric && !
dagger) {
686 }
else if (symmetric &&
dagger) {
693 }
else if (!symmetric && !
dagger) {
700 }
else if (!symmetric &&
dagger) {
798 checkFullSpinor(out, in);
799 bool reset1 = newTmp(&tmp1, in.Odd());
800 bool reset2 = newTmp(&tmp2, in.Odd());
803 m5_eofa(*tmp1, in.Even());
807 m5_eofa(*tmp1, in.Odd());
811 printfQuda(
"Quda EOFA full dslash dagger=yes\n");
813 m5_eofa(*tmp1, in.Even());
815 Dslash4preXpay(out.Even(), *tmp2,
QUDA_EVEN_PARITY, *tmp1, -1. / mobius_kappa_b);
817 m5_eofa(*tmp1, in.Odd());
819 Dslash4preXpay(out.Odd(), *tmp2,
QUDA_ODD_PARITY, *tmp1, -1. / mobius_kappa_b);
821 deleteTmp(&tmp1, reset1);
822 deleteTmp(&tmp2, reset2);
const ColorSpinorField & Odd() const
QudaSiteSubset SiteSubset() const
static ColorSpinorField * Create(const ColorSpinorParam ¶m)
const ColorSpinorField & Even() const
DiracDomainWall & operator=(const DiracDomainWall &dirac)
void checkDWF(const ColorSpinorField &out, const ColorSpinorField &in) const
bool newTmp(ColorSpinorField **, const ColorSpinorField &) const
void deleteTmp(ColorSpinorField **, const bool &reset) const
virtual void checkParitySpinor(const ColorSpinorField &, const ColorSpinorField &) const
Check parity spinors are usable (check geometry ?)
QudaMatPCType getMatPCType() const
returns preconditioning type
void checkSpinorAlias(const ColorSpinorField &, const ColorSpinorField &) const
check spinors do not alias
virtual void checkFullSpinor(const ColorSpinorField &, const ColorSpinorField &) const
check full spinors are compatible (check geometry ?)
int commDim[QUDA_MAX_DIM]
void Mdag(ColorSpinorField &out, const ColorSpinorField &in) const
Apply Mdag (daggered operator of M.
double eofa_y[QUDA_MAX_DWF_LS]
void m5_eofa_xpay(ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &x, double a=-1.) const
virtual void prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x, ColorSpinorField &b, const QudaSolutionType) const
void m5_eofa(ColorSpinorField &out, const ColorSpinorField &in) const
virtual void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
Apply MdagM operator which may be optimized.
double eofa_x[QUDA_MAX_DWF_LS]
DiracMobiusEofa(const DiracParam ¶m)
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const
Apply M for the dirac op. E.g. the Schur Complement operator.
double sherman_morrison_fac
double eofa_u[QUDA_MAX_DWF_LS]
virtual void reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType) const
void full_dslash(ColorSpinorField &out, const ColorSpinorField &in) const
void M(ColorSpinorField &out, const ColorSpinorField &in) const
Apply M for the dirac op. E.g. the Schur Complement operator.
void prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x, ColorSpinorField &b, const QudaSolutionType) const
void reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType) const
void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
Apply MdagM operator which may be optimized.
void m5inv_eofa_xpay(ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &x, double a=-1.) const
void m5inv_eofa(ColorSpinorField &out, const ColorSpinorField &in) const
DiracMobiusEofaPC(const DiracParam ¶m)
Complex c_5[QUDA_MAX_DWF_LS]
void Dslash4(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Apply the local MdagM operator: equivalent to applying zero Dirichlet boundary condition to MdagM on ...
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const
Apply M for the dirac op. E.g. the Schur Complement operator.
virtual void reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType) const
void Dslash5(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
void Dslash5Xpay(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity, const ColorSpinorField &x, const double &k) const
void Dslash4Xpay(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity, const ColorSpinorField &x, const double &k) const
void Dslash4pre(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Complex b_5[QUDA_MAX_DWF_LS]
void Dslash4preXpay(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity, const ColorSpinorField &x, const double &k) const
virtual void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
Apply MdagM operator which may be optimized.
virtual void prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x, ColorSpinorField &b, const QudaSolutionType) const
DiracMobius(const DiracParam ¶m)
cudaGaugeField * extended_gauge
void Dslash5invXpay(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity, const ColorSpinorField &x, const double &k) const
void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
Apply MdagM operator which may be optimized.
void Dslash5inv(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
void prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x, ColorSpinorField &b, const QudaSolutionType) const
void M(ColorSpinorField &out, const ColorSpinorField &in) const
Apply M for the dirac op. E.g. the Schur Complement operator.
void reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType) const
DiracMobiusPC(const DiracParam ¶m)
void MdagMLocal(ColorSpinorField &out, const ColorSpinorField &in) const
Apply the local MdagM operator: equivalent to applying zero Dirichlet boundary condition to MdagM on ...
DiracMobiusPC & operator=(const DiracMobiusPC &dirac)
QudaPrecision Precision() const
int comm_dim_partitioned(int dim)
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
cudaColorSpinorField * tmp
enum QudaSolutionType_s QudaSolutionType
@ QUDA_MATPC_ODD_ODD_ASYMMETRIC
@ QUDA_MATPC_EVEN_EVEN_ASYMMETRIC
@ QUDA_MATPCDAG_MATPC_SOLUTION
enum QudaParity_s QudaParity
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
void apply_dslash5(ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &x, double m_f, double m_5, const Complex *b_5, const Complex *c_5, double a, int eofa_pm, double inv, double kappa, const double *eofa_u, const double *eofa_x, const double *eofa_y, double sherman_morrison, bool dagger, Dslash5Type type)
void apply_fused_dslash(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, ColorSpinorField &y, const ColorSpinorField &x, double m_f, double m_5, const Complex *b_5, const Complex *c_5, bool dagger, int parity, int shift[4], int halo_shift[4], MdwfFusedDslashType type)
double norm2(const CloverField &a, bool inverse=false)
@ D4DAG_D5PREDAG_D5INVDAG
std::complex< double > Complex
cudaGaugeField * createExtendedGauge(cudaGaugeField &in, const int *R, TimeProfile &profile, bool redundant_comms=false, QudaReconstructType recon=QUDA_RECONSTRUCT_INVALID)
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
void ApplyDomainWall4D(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_5, const Complex *b_5, const Complex *c_5, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Driver for applying the batched Wilson 4-d stencil to a 5-d vector with 4-d preconditioned data order...
void ApplyDslash5(ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &x, double m_f, double m_5, const Complex *b_5, const Complex *c_5, double a, bool dagger, Dslash5Type type)
Apply either the domain-wall / mobius Dslash5 operator or the M5 inverse operator....
QudaVerbosity getVerbosity()