quda-ref/v1.1.0/dirac__mobius_8cpp_source.html

 #include <iostream>

 #include <dirac_quda.h>

 #include <blas_quda.h>


 namespace quda {


   DiracMobius::DiracMobius(const DiracParam &param) : DiracDomainWall(param), zMobius(false)

   {

     memcpy(b_5, param.b_5, sizeof(Complex) * param.Ls);

     memcpy(c_5, param.c_5, sizeof(Complex) * param.Ls);


     double b = b_5[0].real();

     double c = c_5[0].real();

     mobius_kappa_b = 0.5 / (b * (m5 + 4.) + 1.);

     mobius_kappa_c = 0.5 / (c * (m5 + 4.) - 1.);


     mobius_kappa = mobius_kappa_b / mobius_kappa_c;


     // check if doing zMobius

     for (int i = 0; i < Ls; i++) {

       if (b_5[i].imag() != 0.0 || c_5[i].imag() != 0.0 || (i < Ls - 1 && (b_5[i] != b_5[i + 1] || c_5[i] != c_5[i + 1]))) {

         zMobius = true;

       }

     }


     if (getVerbosity() > QUDA_VERBOSE) {

       if (zMobius) {

         printfQuda("%s: Detected variable or complex cofficients: using zMobius\n", __func__);

       } else {

         printfQuda("%s: Detected fixed real cofficients: using regular Mobius\n", __func__);

       }

     }


     if (zMobius) { errorQuda("zMobius has NOT been fully tested in QUDA.\n"); }

   }


   // Modification for the 4D preconditioned Mobius domain wall operator

   void DiracMobius::Dslash4(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const

   {

     checkDWF(in, out);

     checkParitySpinor(in, out);

     checkSpinorAlias(in, out);


     ApplyDomainWall4D(out, in, *gauge, 0.0, 0.0, nullptr, nullptr, in, parity, dagger, commDim, profile);


     flops += 1320LL * (long long)in.Volume();

   }


   void DiracMobius::Dslash4pre(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const

   {

     checkDWF(in, out);

     checkParitySpinor(in, out);

     checkSpinorAlias(in, out);


     ApplyDslash5(out, in, in, mass, m5, b_5, c_5, 0.0, dagger, DSLASH5_MOBIUS_PRE);


     long long Ls = in.X(4);

     long long bulk = (Ls - 2) * (in.Volume() / Ls);

     long long wall = 2 * in.Volume() / Ls;

     flops += 72LL * (long long)in.Volume() + 96LL * bulk + 120LL * wall;

   }


   // Unlike DWF-4d, the Mobius variant here applies the full M5 operator and not just D5

   void DiracMobius::Dslash5(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const

   {

     checkDWF(in, out);

     checkParitySpinor(in, out);

     checkSpinorAlias(in, out);


     ApplyDslash5(out, in, in, mass, m5, b_5, c_5, 0.0, dagger, DSLASH5_MOBIUS);


     long long Ls = in.X(4);

     long long bulk = (Ls - 2) * (in.Volume() / Ls);

     long long wall = 2 * in.Volume() / Ls;

     flops += 48LL * (long long)in.Volume() + 96LL * bulk + 120LL * wall;

   }


   // Modification for the 4D preconditioned Mobius domain wall operator

   void DiracMobius::Dslash4Xpay(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity,

                                 const ColorSpinorField &x, const double &k) const

   {

     checkDWF(in, out);

     checkParitySpinor(in, out);

     checkSpinorAlias(in, out);


     ApplyDomainWall4D(out, in, *gauge, k, m5, b_5, c_5, x, parity, dagger, commDim, profile);


     flops += 1320LL * (long long)in.Volume();

   }


   void DiracMobius::Dslash4preXpay(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity,

                                    const ColorSpinorField &x, const double &k) const

   {

     checkDWF(in, out);

     checkParitySpinor(in, out);

     checkSpinorAlias(in, out);


     ApplyDslash5(out, in, x, mass, m5, b_5, c_5, k, dagger, DSLASH5_MOBIUS_PRE);


     long long Ls = in.X(4);

     long long bulk = (Ls - 2) * (in.Volume() / Ls);

     long long wall = 2 * in.Volume() / Ls;


     flops += 72LL * (long long)in.Volume() + 96LL * bulk + 120LL * wall;

   }


   // The xpay operator bakes in a factor of kappa_b^2

   void DiracMobius::Dslash5Xpay(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity,

                                 const ColorSpinorField &x, const double &k) const

   {

     checkDWF(in, out);

     checkParitySpinor(in, out);

     checkSpinorAlias(in, out);


     ApplyDslash5(out, in, x, mass, m5, b_5, c_5, k, dagger, DSLASH5_MOBIUS);


     long long Ls = in.X(4);

     long long bulk = (Ls - 2) * (in.Volume() / Ls);

     long long wall = 2 * in.Volume() / Ls;

     flops += 96LL * (long long)in.Volume() + 96LL * bulk + 120LL * wall;

   }


   void DiracMobius::M(ColorSpinorField &out, const ColorSpinorField &in) const

   {

     checkFullSpinor(out, in);


     // zMobius breaks the following code. Refer to the zMobius check in DiracMobius::DiracMobius(param)

     double mobius_kappa_b = 0.5 / (b_5[0].real() * (4.0 + m5) + 1.0);


     // cannot use Xpay variants since it will scale incorrectly for this operator


     ColorSpinorField *tmp = nullptr;

     if (tmp2 && tmp2->SiteSubset() == QUDA_FULL_SITE_SUBSET) tmp = tmp2;

     bool reset = newTmp(&tmp, in);


     if (dagger == QUDA_DAG_NO) {

       ApplyDslash5(out, in, in, mass, m5, b_5, c_5, 0.0, dagger, DSLASH5_MOBIUS_PRE);

       ApplyDomainWall4D(*tmp, out, *gauge, 0.0, m5, b_5, c_5, in, QUDA_INVALID_PARITY, dagger, commDim, profile);

       ApplyDslash5(out, in, in, mass, m5, b_5, c_5, 0.0, dagger, DSLASH5_MOBIUS);

     } else {

       // the third term is added, not multiplied, so we only need to swap the first two in the dagger

       ApplyDomainWall4D(out, in, *gauge, 0.0, m5, b_5, c_5, in, QUDA_INVALID_PARITY, dagger, commDim, profile);

       ApplyDslash5(*tmp, out, in, mass, m5, b_5, c_5, 0.0, dagger, DSLASH5_MOBIUS_PRE);

       ApplyDslash5(out, in, in, mass, m5, b_5, c_5, 0.0, dagger, DSLASH5_MOBIUS);

     }

     blas::axpy(-mobius_kappa_b, *tmp, out);


     long long Ls = in.X(4);

     long long bulk = (Ls - 2) * (in.Volume() / Ls);

     long long wall = 2 * in.Volume() / Ls;

     flops += 72LL * (long long)in.Volume() + 96LL * bulk + 120LL * wall; // pre

     flops += 1320LL * (long long)in.Volume();                            // dslash4

     flops += 48LL * (long long)in.Volume() + 96LL * bulk + 120LL * wall; // dslash5


     deleteTmp(&tmp, reset);

   }


   void DiracMobius::MdagM(ColorSpinorField &out, const ColorSpinorField &in) const

   {

     checkFullSpinor(out, in);


     bool reset = newTmp(&tmp1, in);


     M(*tmp1, in);

     Mdag(out, *tmp1);


     deleteTmp(&tmp1, reset);

   }


   void DiracMobius::prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x, ColorSpinorField &b,

                             const QudaSolutionType solType) const

   {

     if (solType == QUDA_MATPC_SOLUTION || solType == QUDA_MATPCDAG_MATPC_SOLUTION) {

       errorQuda("Preconditioned solution requires a preconditioned solve_type");

     }


     src = &b;

     sol = &x;

   }


   void DiracMobius::reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType solType) const

   {

     // do nothing

   }


   DiracMobiusPC::DiracMobiusPC(const DiracParam &param) : DiracMobius(param), extended_gauge(nullptr)

   {

     // do nothing

   }


   DiracMobiusPC::DiracMobiusPC(const DiracMobiusPC &dirac) : DiracMobius(dirac), extended_gauge(nullptr)

   {

     // do nothing

   }


   DiracMobiusPC::~DiracMobiusPC()

   {

     if (extended_gauge) delete extended_gauge;

   }


   DiracMobiusPC &DiracMobiusPC::operator=(const DiracMobiusPC &dirac)

   {

     if (&dirac != this) {

       DiracMobius::operator=(dirac);

       extended_gauge = nullptr;

     }


     return *this;

   }


   void DiracMobiusPC::Dslash5inv(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const

   {

     checkDWF(in, out);

     checkParitySpinor(in, out);

     checkSpinorAlias(in, out);


     ApplyDslash5(out, in, in, mass, m5, b_5, c_5, 0.0, dagger, zMobius ? M5_INV_ZMOBIUS : M5_INV_MOBIUS);


     if (0) {

       // M5 = 1 + 0.5*kappa_b/kappa_c * D5

       using namespace blas;

       cudaColorSpinorField A(out);

       Dslash5(A, out, parity);

       printfQuda("Dslash5Xpay = %e M5inv = %e in = %e\n", norm2(A), norm2(out), norm2(in));

       exit(0);

     }


     long long Ls = in.X(4);

     flops += 144LL * (long long)in.Volume() * Ls + 3LL * Ls * (Ls - 1LL);

   }


   // The xpay operator bakes in a factor of kappa_b^2

   void DiracMobiusPC::Dslash5invXpay(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity,

                                      const ColorSpinorField &x, const double &k) const

   {

     checkDWF(in, out);

     checkParitySpinor(in, out);

     checkSpinorAlias(in, out);


     ApplyDslash5(out, in, x, mass, m5, b_5, c_5, k, dagger, zMobius ? M5_INV_ZMOBIUS : M5_INV_MOBIUS);


     long long Ls = in.X(4);

     flops += (144LL * Ls + 48LL) * (long long)in.Volume() + 3LL * Ls * (Ls - 1LL);

   }


   // Apply the even-odd preconditioned mobius DWF operator

   // Actually, Dslash5 will return M5 operation and M5 = 1 + 0.5*kappa_b/kappa_c * D5

   void DiracMobiusPC::M(ColorSpinorField &out, const ColorSpinorField &in) const

   {

     bool reset1 = newTmp(&tmp1, in);


     int odd_bit = (matpcType == QUDA_MATPC_ODD_ODD || matpcType == QUDA_MATPC_ODD_ODD_ASYMMETRIC) ? 1 : 0;

     bool symmetric = (matpcType == QUDA_MATPC_EVEN_EVEN || matpcType == QUDA_MATPC_ODD_ODD) ? true : false;

     QudaParity parity[2] = {static_cast<QudaParity>((1 + odd_bit) % 2), static_cast<QudaParity>((0 + odd_bit) % 2)};


     // QUDA_MATPC_EVEN_EVEN_ASYMMETRIC : M5 - kappa_b^2 * D4_{eo}D4pre_{oe}D5inv_{ee}D4_{eo}D4pre_{oe}

     // QUDA_MATPC_ODD_ODD_ASYMMETRIC : M5 - kappa_b^2 * D4_{oe}D4pre_{eo}D5inv_{oo}D4_{oe}D4pre_{eo}

     if (symmetric && !dagger) {

       Dslash4pre(*tmp1, in, parity[1]);

       Dslash4(out, *tmp1, parity[0]);

       Dslash5inv(*tmp1, out, parity[0]);

       Dslash4pre(out, *tmp1, parity[0]);

       Dslash4(*tmp1, out, parity[1]);

       Dslash5invXpay(out, *tmp1, parity[1], in, -1.0);

     } else if (symmetric && dagger) {

       Dslash5inv(*tmp1, in, parity[1]);

       Dslash4(out, *tmp1, parity[0]);

       Dslash4pre(*tmp1, out, parity[0]);

       Dslash5inv(out, *tmp1, parity[0]);

       Dslash4(*tmp1, out, parity[1]);

       Dslash4preXpay(out, *tmp1, parity[1], in, -1.0);

     } else if (!symmetric && !dagger) {

       Dslash4pre(*tmp1, in, parity[1]);

       Dslash4(out, *tmp1, parity[0]);

       Dslash5inv(*tmp1, out, parity[0]);

       Dslash4pre(out, *tmp1, parity[0]);

       Dslash4(*tmp1, out, parity[1]);

       Dslash5Xpay(out, in, parity[1], *tmp1, -1.0);

     } else if (!symmetric && dagger) {

       Dslash4(*tmp1, in, parity[0]);

       Dslash4pre(out, *tmp1, parity[0]);

       Dslash5inv(*tmp1, out, parity[0]);

       Dslash4(out, *tmp1, parity[1]);

       Dslash4pre(*tmp1, out, parity[1]);

       Dslash5Xpay(out, in, parity[1], *tmp1, -1.0);

     }


     deleteTmp(&tmp1, reset1);

   }


   void DiracMobiusPC::MdagM(ColorSpinorField &out, const ColorSpinorField &in) const

   {

     bool reset = newTmp(&tmp2, in);

     M(*tmp2, in);

     Mdag(out, *tmp2);

     deleteTmp(&tmp2, reset);

   }


   void DiracMobiusPC::prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x, ColorSpinorField &b,

                               const QudaSolutionType solType) const

   {

     // we desire solution to preconditioned system

     if (solType == QUDA_MATPC_SOLUTION || solType == QUDA_MATPCDAG_MATPC_SOLUTION) {

       src = &b;

       sol = &x;

     } else { // we desire solution to full system

       // prepare function in MDWF is not tested yet.

       bool reset = newTmp(&tmp1, b.Even());


       if (matpcType == QUDA_MATPC_EVEN_EVEN) {

         // src = D5^-1 (b_e + k D4_eo * D4pre * D5^-1 b_o)

         src = &(x.Odd());

         Dslash5inv(*tmp1, b.Odd(), QUDA_ODD_PARITY);

         Dslash4pre(*src, *tmp1, QUDA_ODD_PARITY);

         Dslash4Xpay(*tmp1, *src, QUDA_EVEN_PARITY, b.Even(), 1.0);

         Dslash5inv(*src, *tmp1, QUDA_EVEN_PARITY);

         sol = &(x.Even());

       } else if (matpcType == QUDA_MATPC_ODD_ODD) {

         // src = b_o + k D4_oe * D4pre * D5inv b_e

         src = &(x.Even());

         Dslash5inv(*tmp1, b.Even(), QUDA_EVEN_PARITY);

         Dslash4pre(*src, *tmp1, QUDA_EVEN_PARITY);

         Dslash4Xpay(*tmp1, *src, QUDA_ODD_PARITY, b.Odd(), 1.0);

         Dslash5inv(*src, *tmp1, QUDA_ODD_PARITY);

         sol = &(x.Odd());

       } else if (matpcType == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) {

         // src = b_e + k D4_eo * D4pre * D5inv b_o

         src = &(x.Odd());

         Dslash5inv(*src, b.Odd(), QUDA_ODD_PARITY);

         Dslash4pre(*tmp1, *src, QUDA_ODD_PARITY);

         Dslash4Xpay(*src, *tmp1, QUDA_EVEN_PARITY, b.Even(), 1.0);

         sol = &(x.Even());

       } else if (matpcType == QUDA_MATPC_ODD_ODD_ASYMMETRIC) {

         // src = b_o + k D4_oe * D4pre * D5inv b_e

         src = &(x.Even());

         Dslash5inv(*src, b.Even(), QUDA_EVEN_PARITY);

         Dslash4pre(*tmp1, *src, QUDA_EVEN_PARITY);

         Dslash4Xpay(*src, *tmp1, QUDA_ODD_PARITY, b.Odd(), 1.0);

         sol = &(x.Odd());

       } else {

         errorQuda("MatPCType %d not valid for DiracMobiusPC", matpcType);

       }

       // here we use final solution to store parity solution and parity source

       // b is now up for grabs if we want


       deleteTmp(&tmp1, reset);

     }

   }


   void DiracMobiusPC::reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType solType) const

   {

     if (solType == QUDA_MATPC_SOLUTION || solType == QUDA_MATPCDAG_MATPC_SOLUTION) { return; }


     bool reset1 = newTmp(&tmp1, x.Even());


     // create full solution

     checkFullSpinor(x, b);

     if (matpcType == QUDA_MATPC_EVEN_EVEN || matpcType == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) {

       // psi_o = M5^-1 (b_o + k_b D4_oe D4pre x_e)

       Dslash4pre(x.Odd(), x.Even(), QUDA_EVEN_PARITY);

       Dslash4Xpay(*tmp1, x.Odd(), QUDA_ODD_PARITY, b.Odd(), 1.0);

       Dslash5inv(x.Odd(), *tmp1, QUDA_ODD_PARITY);

     } else if (matpcType == QUDA_MATPC_ODD_ODD || matpcType == QUDA_MATPC_ODD_ODD_ASYMMETRIC) {

       // psi_e = M5^-1 (b_e + k_b D4_eo D4pre x_o)

       Dslash4pre(x.Even(), x.Odd(), QUDA_ODD_PARITY);

       Dslash4Xpay(*tmp1, x.Even(), QUDA_EVEN_PARITY, b.Even(), 1.0);

       Dslash5inv(x.Even(), *tmp1, QUDA_EVEN_PARITY);

     } else {

       errorQuda("MatPCType %d not valid for DiracMobiusPC", matpcType);

     }


     deleteTmp(&tmp1, reset1);

   }


   void DiracMobiusPC::MdagMLocal(ColorSpinorField &out, const ColorSpinorField &in) const

   {

     if (zMobius) { errorQuda("DiracMobiusPC::MdagMLocal doesn't currently support zMobius.\n"); }


     int shift0[4] = {0, 0, 0, 0};

     int shift1[4];

     int shift2[4];


     for (int d = 0; d < 4; d++) {

       shift1[d] = comm_dim_partitioned(d) ? 1 : 0;

       shift2[d] = comm_dim_partitioned(d) ? 2 : 0;

     }


     if (extended_gauge == nullptr) { extended_gauge = createExtendedGauge(*gauge, shift2, profile, true); }


     checkDWF(in, out);

     // checkParitySpinor(in, out);

     checkSpinorAlias(in, out);


     ColorSpinorParam csParam(out);

     csParam.create = QUDA_NULL_FIELD_CREATE;


     ColorSpinorField *unextended_tmp1 = ColorSpinorField::Create(csParam);

     ColorSpinorField *unextended_tmp2 = ColorSpinorField::Create(csParam);


     csParam.x[0] += shift2[0]; // x direction is checkerboarded

     for (int d = 1; d < 4; ++d) { csParam.x[d] += shift2[d] * 2; }

     ColorSpinorField *extended_tmp1 = ColorSpinorField::Create(csParam);

     ColorSpinorField *extended_tmp2 = ColorSpinorField::Create(csParam);


     int odd_bit = (getMatPCType() == QUDA_MATPC_ODD_ODD) ? 1 : 0;

     QudaParity parity[2] = {static_cast<QudaParity>((1 + odd_bit) % 2), static_cast<QudaParity>((0 + odd_bit) % 2)};

     if (out.Precision() == QUDA_HALF_PRECISION || out.Precision() == QUDA_QUARTER_PRECISION) {

       mobius_tensor_core::apply_fused_dslash(*unextended_tmp2, in, *extended_gauge, *unextended_tmp2, in, mass, m5, b_5,

                                              c_5, dagger, parity[1], shift0, shift0, MdwfFusedDslashType::D5PRE);


       mobius_tensor_core::apply_fused_dslash(*extended_tmp2, *unextended_tmp2, *extended_gauge, *extended_tmp2,

                                              *unextended_tmp2, mass, m5, b_5, c_5, dagger, parity[0], shift1, shift2,

                                              MdwfFusedDslashType::D4_D5INV_D5PRE);


       mobius_tensor_core::apply_fused_dslash(*extended_tmp1, *extended_tmp2, *extended_gauge, *unextended_tmp1, in,

                                              mass, m5, b_5, c_5, dagger, parity[1], shift0, shift1,

                                              MdwfFusedDslashType::D4_D5INV_D5INVDAG);


       mobius_tensor_core::apply_fused_dslash(*extended_tmp2, *extended_tmp1, *extended_gauge, *extended_tmp2,

                                              *extended_tmp1, mass, m5, b_5, c_5, dagger, parity[0], shift1, shift1,

                                              MdwfFusedDslashType::D4DAG_D5PREDAG_D5INVDAG);


       mobius_tensor_core::apply_fused_dslash(out, *extended_tmp2, *extended_gauge, out, *unextended_tmp1, mass, m5, b_5,

                                              c_5, dagger, parity[1], shift2, shift2, MdwfFusedDslashType::D4DAG_D5PREDAG);


       const long long Ls = in.X(4);

       const long long mat = 2ll * 4ll * Ls - 1ll; // (multiplicaiton-add) * (spin) * Ls - 1

       const long long hop = 7ll * 8ll;            // 8 for eight directions


       long long vol;

       long long halo_vol;


       vol = (2 * in.X(0)) * in.X(1) * in.X(2) * in.X(3) * Ls / 2ll;

       flops += vol * 24ll * mat;


       vol = (2 * in.X(0) + 2 * 1) * (in.X(1) + 2 * 1) * (in.X(2) + 2 * 1) * (in.X(3) + 2 * 1) * Ls / 2ll;

       halo_vol = (2 * in.X(0)) * in.X(1) * in.X(2) * in.X(3) * Ls / 2ll;

       flops += halo_vol * 24ll * hop + vol * 24ll * mat;


       vol = (2 * in.X(0) + 2 * 2) * (in.X(1) + 2 * 2) * (in.X(2) + 2 * 2) * (in.X(3) + 2 * 2) * Ls / 2ll;

       halo_vol = (2 * in.X(0) + 2 * 1) * (in.X(1) + 2 * 1) * (in.X(2) + 2 * 1) * (in.X(3) + 2 * 1) * Ls / 2ll;

       flops += halo_vol * 24ll * hop + vol * 24ll * mat * 2ll;


       vol = (2 * in.X(0) + 2 * 1) * (in.X(1) + 2 * 1) * (in.X(2) + 2 * 1) * (in.X(3) + 2 * 1) * Ls / 2ll;

       flops += vol * 24ll * (hop + mat);


       vol = (2 * in.X(0)) * in.X(1) * in.X(2) * in.X(3) * Ls / 2ll;

       flops += vol * 24ll * (hop + mat);


       delete extended_tmp2;

       delete extended_tmp1;


       delete unextended_tmp1;

       delete unextended_tmp2;


     } else {

       errorQuda("DiracMobiusPC::MdagMLocal(...) only supports half and quarter precision");

     }

   }


   // Copy the EOFA specific parameters

   DiracMobiusEofa::DiracMobiusEofa(const DiracParam &param) :

     DiracMobius(param),

     eofa_shift(param.eofa_shift),

     eofa_pm(param.eofa_pm),

     mq1(param.mq1),

     mq2(param.mq2),

     mq3(param.mq3)

   {

     // Initiaize the EOFA parameters here: u, x, y


     if (zMobius) { errorQuda("DiracMobiusEofa doesn't currently support zMobius.\n"); }


     double b = b_5[0].real();

     double c = c_5[0].real();


     double alpha = b + c;


     double eofa_norm = alpha * (mq3 - mq2) * std::pow(alpha + 1., 2. * Ls)

       / (std::pow(alpha + 1., Ls) + mq2 * std::pow(alpha - 1., Ls))

       / (std::pow(alpha + 1., Ls) + mq3 * std::pow(alpha - 1., Ls));


     double N = (eofa_pm ? +1. : -1.) * (2. * this->eofa_shift * eofa_norm)

       * (std::pow(alpha + 1., Ls) + this->mq1 * std::pow(alpha - 1., Ls)) / (b * (m5 + 4.) + 1.);


     // Here the signs are somewhat mixed:

     // There is one -1 from N for eofa_pm = minus, thus the u_- here is actually -u_- in the document

     // It turns out this actually simplies things.

     for (int s = 0; s < Ls; s++) {

       eofa_u[eofa_pm ? s : Ls - 1 - s]

         = N * std::pow(-1., s) * std::pow(alpha - 1., s) / std::pow(alpha + 1., Ls + s + 1);

     }


     double factor = -mobius_kappa * mass;

     if (eofa_pm) {

       // eofa_pm = plus

       // Computing x

       eofa_x[0] = eofa_u[0];

       for (int s = Ls - 1; s > 0; s--) {

         eofa_x[0] -= factor * eofa_u[s];

         factor *= -mobius_kappa;

       }

       eofa_x[0] /= 1. + factor;

       for (int s = 1; s < Ls; s++) { eofa_x[s] = eofa_x[s - 1] * (-mobius_kappa) + eofa_u[s]; }

       // Computing y

       eofa_y[Ls - 1] = 1. / (1. + factor);

       sherman_morrison_fac = eofa_x[Ls - 1];

       for (int s = Ls - 1; s > 0; s--) { eofa_y[s - 1] = eofa_y[s] * (-mobius_kappa); }

     } else {

       // eofa_pm = minus

       // Computing x

       eofa_x[Ls - 1] = eofa_u[Ls - 1];

       for (int s = 0; s < Ls - 1; s++) {

         eofa_x[Ls - 1] -= factor * eofa_u[s];

         factor *= -mobius_kappa;

       }

       eofa_x[Ls - 1] /= 1. + factor;

       for (int s = Ls - 1; s > 0; s--) { eofa_x[s - 1] = eofa_x[s] * (-mobius_kappa) + eofa_u[s - 1]; }

       // Computing y

       eofa_y[0] = 1. / (1. + factor);

       sherman_morrison_fac = eofa_x[0];

       for (int s = 1; s < Ls; s++) { eofa_y[s] = eofa_y[s - 1] * (-mobius_kappa); }

     }

     m5inv_fac = 0.5 / (1. + factor);                           // 0.5 for the spin project factor

     sherman_morrison_fac = -0.5 / (1. + sherman_morrison_fac); // 0.5 for the spin project factor

   }


   void DiracMobiusEofa::m5_eofa(ColorSpinorField &out, const ColorSpinorField &in) const

   {

     if (in.Ndim() != 5 || out.Ndim() != 5) errorQuda("Wrong number of dimensions\n");


     checkDWF(in, out);

     checkSpinorAlias(in, out);


     mobius_eofa::apply_dslash5(out, in, in, mass, m5, b_5, c_5, 0., eofa_pm, m5inv_fac, mobius_kappa, eofa_u, eofa_x,

                                eofa_y, sherman_morrison_fac, dagger, M5_EOFA);


     long long Ls = in.X(4);

     long long bulk = (Ls - 2) * (in.Volume() / Ls);

     long long wall = 2 * in.Volume() / Ls;


     // 96 = 48 + 48, the second 48 from EOFA

     flops += 96LL * (long long)in.Volume() + 96LL * bulk + 120LL * wall;

   }


   void DiracMobiusEofa::m5_eofa_xpay(ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &x,

                                      double a) const

   {

     if (in.Ndim() != 5 || out.Ndim() != 5) errorQuda("Wrong number of dimensions\n");


     checkDWF(in, out);

     checkSpinorAlias(in, out);


     a *= mobius_kappa_b * mobius_kappa_b; // a = a * kappa_b^2

     // The kernel will actually do (m5 * in - kappa_b^2 * x)

     mobius_eofa::apply_dslash5(out, in, x, mass, m5, b_5, c_5, a, eofa_pm, m5inv_fac, mobius_kappa, eofa_u, eofa_x,

                                eofa_y, sherman_morrison_fac, dagger, M5_EOFA);


     long long Ls = in.X(4);

     long long bulk = (Ls - 2) * (in.Volume() / Ls);

     long long wall = 2 * in.Volume() / Ls;


     // 144 = 96 + 48, the 48 from EOFA

     flops += 144LL * (long long)in.Volume() + 96LL * bulk + 120LL * wall;

   }


   void DiracMobiusEofa::M(ColorSpinorField &out, const ColorSpinorField &in) const

   {

     checkFullSpinor(out, in);


     // FIXME broken for variable coefficients

     double mobius_kappa_b = 0.5 / (b_5[0].real() * (4.0 + m5) + 1.0);


     // cannot use Xpay variants since it will scale incorrectly for this operator


     ColorSpinorField *tmp = nullptr;

     if (tmp2 && tmp2->SiteSubset() == QUDA_FULL_SITE_SUBSET) tmp = tmp2;

     bool reset = newTmp(&tmp, in);


     if (dagger == QUDA_DAG_NO) {

       ApplyDslash5(out, in, in, mass, m5, b_5, c_5, 0.0, dagger, DSLASH5_MOBIUS_PRE);

       ApplyDomainWall4D(*tmp, out, *gauge, 0.0, m5, b_5, c_5, in, QUDA_INVALID_PARITY, dagger, commDim, profile);

       mobius_eofa::apply_dslash5(out, in, in, mass, m5, b_5, c_5, 0., eofa_pm, m5inv_fac, mobius_kappa, eofa_u, eofa_x,

                                  eofa_y, sherman_morrison_fac, dagger, M5_EOFA);

     } else {

       ApplyDomainWall4D(out, in, *gauge, 0.0, m5, b_5, c_5, in, QUDA_INVALID_PARITY, dagger, commDim, profile);

       ApplyDslash5(*tmp, out, out, mass, m5, b_5, c_5, 0.0, dagger, DSLASH5_MOBIUS_PRE);

       mobius_eofa::apply_dslash5(out, in, in, mass, m5, b_5, c_5, 0., eofa_pm, m5inv_fac, mobius_kappa, eofa_u, eofa_x,

                                  eofa_y, sherman_morrison_fac, dagger, M5_EOFA);

     }

     blas::axpy(-mobius_kappa_b, *tmp, out);


     long long Ls = in.X(4);

     long long bulk = (Ls - 2) * (in.Volume() / Ls);

     long long wall = 2 * in.Volume() / Ls;

     flops += 72LL * (long long)in.Volume() + 96LL * bulk + 120LL * wall; // pre

     flops += 1320LL * (long long)in.Volume();                            // dslash4


     // 96 = 48 + 48, the second 48 from EOFA

     flops += 96LL * (long long)in.Volume() + 96LL * bulk + 120LL * wall; // dslash5


     deleteTmp(&tmp, reset);

   }


   void DiracMobiusEofa::MdagM(ColorSpinorField &out, const ColorSpinorField &in) const

   {

     checkFullSpinor(out, in);


     bool reset = newTmp(&tmp1, in);


     M(*tmp1, in);

     Mdag(out, *tmp1);


     deleteTmp(&tmp1, reset);

   }


   void DiracMobiusEofa::prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x,

                                 ColorSpinorField &b, const QudaSolutionType solType) const

   {

     if (solType == QUDA_MATPC_SOLUTION || solType == QUDA_MATPCDAG_MATPC_SOLUTION) {

       errorQuda("Preconditioned solution requires a preconditioned solve_type");

     }


     src = &b;

     sol = &x;

   }


   void DiracMobiusEofa::reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType solType) const

   {

     // do nothing

   }


   DiracMobiusEofaPC::DiracMobiusEofaPC(const DiracParam &param) : DiracMobiusEofa(param) {}


   void DiracMobiusEofaPC::m5inv_eofa(ColorSpinorField &out, const ColorSpinorField &in) const

   {

     if (in.Ndim() != 5 || out.Ndim() != 5) errorQuda("Wrong number of dimensions\n");


     checkDWF(in, out);

     // checkParitySpinor(in, out);

     checkSpinorAlias(in, out);


     mobius_eofa::apply_dslash5(out, in, in, mass, m5, b_5, c_5, 0., eofa_pm, m5inv_fac, mobius_kappa, eofa_u, eofa_x,

                                eofa_y, sherman_morrison_fac, dagger, M5INV_EOFA);


     long long Ls = in.X(4);

     flops += (192LL * Ls + 96LL) * (long long)in.Volume() + 3LL * Ls * (Ls - 1LL);

   }


   void DiracMobiusEofaPC::m5inv_eofa_xpay(ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &x,

                                           double a) const

   {

     if (in.Ndim() != 5 || out.Ndim() != 5) errorQuda("Wrong number of dimensions\n");


     checkDWF(in, out);

     checkParitySpinor(in, out);

     checkSpinorAlias(in, out);


     a *= mobius_kappa_b * mobius_kappa_b; // a = a * kappa_b^2

     // The kernel will actually do (x - kappa_b^2 * m5inv * in)

     mobius_eofa::apply_dslash5(out, in, x, mass, m5, b_5, c_5, a, eofa_pm, m5inv_fac, mobius_kappa, eofa_u, eofa_x,

                                eofa_y, sherman_morrison_fac, dagger, M5INV_EOFA);


     long long Ls = in.X(4);

     flops += (192LL * Ls + 48LL + 96LL) * (long long)in.Volume() + 3LL * Ls * (Ls - 1LL);

   }


   // Apply the even-odd preconditioned mobius DWF EOFA operator

   void DiracMobiusEofaPC::M(ColorSpinorField &out, const ColorSpinorField &in) const

   {

     bool reset1 = newTmp(&tmp1, in);


     int odd_bit = (matpcType == QUDA_MATPC_ODD_ODD || matpcType == QUDA_MATPC_ODD_ODD_ASYMMETRIC) ? 1 : 0;

     bool symmetric = (matpcType == QUDA_MATPC_EVEN_EVEN || matpcType == QUDA_MATPC_ODD_ODD) ? true : false;

     QudaParity parity[2] = {static_cast<QudaParity>((1 + odd_bit) % 2), static_cast<QudaParity>((0 + odd_bit) % 2)};


     // QUDA_MATPC_EVEN_EVEN_ASYMMETRIC : M5 - kappa_b^2 * D4_{eo}D4pre_{oe}D5inv_{ee}D4_{eo}D4pre_{oe}

     // QUDA_MATPC_ODD_ODD_ASYMMETRIC : M5 - kappa_b^2 * D4_{oe}D4pre_{eo}D5inv_{oo}D4_{oe}D4pre_{eo}

     if (symmetric && !dagger) {

       Dslash4pre(*tmp1, in, parity[1]);

       Dslash4(out, *tmp1, parity[0]);

       m5inv_eofa(*tmp1, out);

       Dslash4pre(out, *tmp1, parity[0]);

       Dslash4(*tmp1, out, parity[1]);

       m5inv_eofa_xpay(out, *tmp1, in, -1.);

     } else if (symmetric && dagger) {

       m5inv_eofa(*tmp1, in);

       Dslash4(out, *tmp1, parity[0]);

       Dslash4pre(*tmp1, out, parity[0]);

       m5inv_eofa(out, *tmp1);

       Dslash4(*tmp1, out, parity[1]);

       Dslash4preXpay(out, *tmp1, parity[1], in, -1.);

     } else if (!symmetric && !dagger) {

       Dslash4pre(*tmp1, in, parity[1]);

       Dslash4(out, *tmp1, parity[0]);

       m5inv_eofa(*tmp1, out);

       Dslash4pre(out, *tmp1, parity[0]);

       Dslash4(*tmp1, out, parity[1]);

       m5_eofa_xpay(out, in, *tmp1, -1.);

     } else if (!symmetric && dagger) {

       Dslash4(*tmp1, in, parity[0]);

       Dslash4pre(out, *tmp1, parity[0]);

       m5inv_eofa(*tmp1, out);

       Dslash4(out, *tmp1, parity[1]);

       Dslash4pre(*tmp1, out, parity[1]);

       m5_eofa_xpay(out, in, *tmp1, -1.);

     }


     deleteTmp(&tmp1, reset1);

   }


   void DiracMobiusEofaPC::prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x,

                                   ColorSpinorField &b, const QudaSolutionType solType) const

   {

     // we desire solution to preconditioned system

     if (solType == QUDA_MATPC_SOLUTION || solType == QUDA_MATPCDAG_MATPC_SOLUTION) {

       src = &b;

       sol = &x;

     } else {

       // we desire solution to full system

       bool reset = newTmp(&tmp1, b.Even());

       if (matpcType == QUDA_MATPC_EVEN_EVEN) {

         // src = D5^-1 (b_e + k D4_eo * D4pre * D5^-1 b_o)

         src = &(x.Odd());

         m5inv_eofa(*tmp1, b.Odd());

         Dslash4pre(*src, *tmp1, QUDA_ODD_PARITY);

         Dslash4Xpay(*tmp1, *src, QUDA_EVEN_PARITY, b.Even(), 1.0);

         m5inv_eofa(*src, *tmp1);

         sol = &(x.Even());

       } else if (matpcType == QUDA_MATPC_ODD_ODD) {

         // src = b_o + k D4_oe * D4pre * D5inv b_e

         src = &(x.Even());

         m5inv_eofa(*tmp1, b.Even());

         Dslash4pre(*src, *tmp1, QUDA_EVEN_PARITY);

         Dslash4Xpay(*tmp1, *src, QUDA_ODD_PARITY, b.Odd(), 1.0);

         m5inv_eofa(*src, *tmp1);

         sol = &(x.Odd());

       } else if (matpcType == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) {

         // src = b_e + k D4_eo * D4pre * D5inv b_o

         src = &(x.Odd());

         m5inv_eofa(*src, b.Odd());

         Dslash4pre(*tmp1, *src, QUDA_ODD_PARITY);

         Dslash4Xpay(*src, *tmp1, QUDA_EVEN_PARITY, b.Even(), 1.0);

         sol = &(x.Even());

       } else if (matpcType == QUDA_MATPC_ODD_ODD_ASYMMETRIC) {

         // src = b_o + k D4_oe * D4pre * D5inv b_e

         src = &(x.Even());

         m5inv_eofa(*src, b.Even());

         Dslash4pre(*tmp1, *src, QUDA_EVEN_PARITY);

         Dslash4Xpay(*src, *tmp1, QUDA_ODD_PARITY, b.Odd(), 1.0);

         sol = &(x.Odd());

       } else {

         errorQuda("MatPCType %d not valid for DiracMobiusEofaPC", matpcType);

       }

       // here we use final solution to store parity solution and parity source

       // b is now up for grabs if we want

       deleteTmp(&tmp1, reset);

     }

   }


   void DiracMobiusEofaPC::reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType solType) const

   {

     if (solType == QUDA_MATPC_SOLUTION || solType == QUDA_MATPCDAG_MATPC_SOLUTION) { return; }


     bool reset1 = newTmp(&tmp1, x.Even());


     // create full solution

     checkFullSpinor(x, b);

     if (matpcType == QUDA_MATPC_EVEN_EVEN || matpcType == QUDA_MATPC_EVEN_EVEN_ASYMMETRIC) {

       // psi_o = M5^-1 (b_o + k_b D4_oe D4pre x_e)

       Dslash4pre(x.Odd(), x.Even(), QUDA_EVEN_PARITY);

       Dslash4Xpay(*tmp1, x.Odd(), QUDA_ODD_PARITY, b.Odd(), 1.0);

       m5inv_eofa(x.Odd(), *tmp1);

     } else if (matpcType == QUDA_MATPC_ODD_ODD || matpcType == QUDA_MATPC_ODD_ODD_ASYMMETRIC) {

       // psi_e = M5^-1 (b_e + k_b D4_eo D4pre x_o)

       Dslash4pre(x.Even(), x.Odd(), QUDA_ODD_PARITY);

       Dslash4Xpay(*tmp1, x.Even(), QUDA_EVEN_PARITY, b.Even(), 1.0);

       m5inv_eofa(x.Even(), *tmp1);

     } else {

       errorQuda("MatPCType %d not valid for DiracMobiusPC", matpcType);

     }


     deleteTmp(&tmp1, reset1);

   }


   void DiracMobiusEofaPC::MdagM(ColorSpinorField &out, const ColorSpinorField &in) const

   {

     bool reset = newTmp(&tmp2, in);

     M(*tmp2, in);

     Mdag(out, *tmp2);

     deleteTmp(&tmp2, reset);

   }


   void

   DiracMobiusEofaPC::full_dslash(ColorSpinorField &out,

                                  const ColorSpinorField &in) const // ye = Mee * xe + Meo * xo, yo = Moo * xo + Moe * xe

   {

     checkFullSpinor(out, in);

     bool reset1 = newTmp(&tmp1, in.Odd());

     bool reset2 = newTmp(&tmp2, in.Odd());

     if (!dagger) {

       // Even

       m5_eofa(*tmp1, in.Even());

       Dslash4pre(*tmp2, in.Odd(), QUDA_ODD_PARITY);

       Dslash4Xpay(out.Even(), *tmp2, QUDA_EVEN_PARITY, *tmp1, -1.);

       // Odd

       m5_eofa(*tmp1, in.Odd());

       Dslash4pre(*tmp2, in.Even(), QUDA_EVEN_PARITY);

       Dslash4Xpay(out.Odd(), *tmp2, QUDA_ODD_PARITY, *tmp1, -1.);

     } else {

       printfQuda("Quda EOFA full dslash dagger=yes\n");

       // Even

       m5_eofa(*tmp1, in.Even());

       Dslash4(*tmp2, in.Odd(), QUDA_EVEN_PARITY);

       Dslash4preXpay(out.Even(), *tmp2, QUDA_EVEN_PARITY, *tmp1, -1. / mobius_kappa_b);

       // Odd

       m5_eofa(*tmp1, in.Odd());

       Dslash4(*tmp2, in.Even(), QUDA_ODD_PARITY);

       Dslash4preXpay(out.Odd(), *tmp2, QUDA_ODD_PARITY, *tmp1, -1. / mobius_kappa_b);

     }

     deleteTmp(&tmp1, reset1);

     deleteTmp(&tmp2, reset2);

   }

 } // namespace quda

 #include <iostream>

 #include <dirac_quda.h>

 #include <blas_quda.h>

blas_quda.h

quda::ColorSpinorField
Definition: color_spinor_field.h:379

quda::ColorSpinorField::Ndim
int Ndim() const
Definition: color_spinor_field.h:483

quda::ColorSpinorField::Odd
const ColorSpinorField & Odd() const
Definition: color_spinor_field.cpp:578

quda::ColorSpinorField::SiteSubset
QudaSiteSubset SiteSubset() const
Definition: color_spinor_field.h:566

quda::ColorSpinorField::Create
static ColorSpinorField * Create(const ColorSpinorParam &param)
Definition: color_spinor_field.cpp:714

quda::ColorSpinorField::Even
const ColorSpinorField & Even() const
Definition: color_spinor_field.cpp:570

quda::ColorSpinorField::X
const int * X() const
Definition: color_spinor_field.h:484

quda::ColorSpinorField::Volume
size_t Volume() const
Definition: color_spinor_field.h:489

quda::ColorSpinorParam
Definition: color_spinor_field.h:131

quda::DiracDomainWall
Definition: dirac_quda.h:704

quda::DiracDomainWall::operator=
DiracDomainWall & operator=(const DiracDomainWall &dirac)
Definition: dirac_domain_wall.cpp:25

quda::DiracDomainWall::Ls
int Ls
Definition: dirac_quda.h:709

quda::DiracDomainWall::m5
double m5
Definition: dirac_quda.h:707

quda::DiracDomainWall::checkDWF
void checkDWF(const ColorSpinorField &out, const ColorSpinorField &in) const
Definition: dirac_domain_wall.cpp:35

quda::Dirac::flops
unsigned long long flops
Definition: dirac_quda.h:150

quda::Dirac::newTmp
bool newTmp(ColorSpinorField **, const ColorSpinorField &) const
Definition: dirac.cpp:72

quda::Dirac::matpcType
QudaMatPCType matpcType
Definition: dirac_quda.h:148

quda::Dirac::gauge
cudaGaugeField * gauge
Definition: dirac_quda.h:144

quda::Dirac::mass
double mass
Definition: dirac_quda.h:146

quda::Dirac::deleteTmp
void deleteTmp(ColorSpinorField **, const bool &reset) const
Definition: dirac.cpp:83

quda::Dirac::checkParitySpinor
virtual void checkParitySpinor(const ColorSpinorField &, const ColorSpinorField &) const
Check parity spinors are usable (check geometry ?)
Definition: dirac.cpp:108

quda::Dirac::tmp1
ColorSpinorField * tmp1
Definition: dirac_quda.h:151

quda::Dirac::getMatPCType
QudaMatPCType getMatPCType() const
returns preconditioning type
Definition: dirac_quda.h:323

quda::Dirac::profile
TimeProfile profile
Definition: dirac_quda.h:161

quda::Dirac::dagger
QudaDagType dagger
Definition: dirac_quda.h:149

quda::Dirac::checkSpinorAlias
void checkSpinorAlias(const ColorSpinorField &, const ColorSpinorField &) const
check spinors do not alias
Definition: dirac.cpp:146

quda::Dirac::checkFullSpinor
virtual void checkFullSpinor(const ColorSpinorField &, const ColorSpinorField &) const
check full spinors are compatible (check geometry ?)
Definition: dirac.cpp:138

quda::Dirac::tmp2
ColorSpinorField * tmp2
Definition: dirac_quda.h:152

quda::Dirac::commDim
int commDim[QUDA_MAX_DIM]
Definition: dirac_quda.h:159

quda::Dirac::Mdag
void Mdag(ColorSpinorField &out, const ColorSpinorField &in) const
Apply Mdag (daggered operator of M.
Definition: dirac.cpp:92

quda::DiracMobiusEofa
Definition: dirac_quda.h:890

quda::DiracMobiusEofa::mq3
double mq3
Definition: dirac_quda.h:900

quda::DiracMobiusEofa::eofa_y
double eofa_y[QUDA_MAX_DWF_LS]
Definition: dirac_quda.h:903

quda::DiracMobiusEofa::m5_eofa_xpay
void m5_eofa_xpay(ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &x, double a=-1.) const
Definition: dirac_mobius.cpp:546

quda::DiracMobiusEofa::prepare
virtual void prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x, ColorSpinorField &b, const QudaSolutionType) const
Definition: dirac_mobius.cpp:617

quda::DiracMobiusEofa::m5_eofa
void m5_eofa(ColorSpinorField &out, const ColorSpinorField &in) const
Definition: dirac_mobius.cpp:528

quda::DiracMobiusEofa::MdagM
virtual void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
Apply MdagM operator which may be optimized.
Definition: dirac_mobius.cpp:605

quda::DiracMobiusEofa::m5inv_fac
double m5inv_fac
Definition: dirac_quda.h:894

quda::DiracMobiusEofa::eofa_x
double eofa_x[QUDA_MAX_DWF_LS]
Definition: dirac_quda.h:902

quda::DiracMobiusEofa::DiracMobiusEofa
DiracMobiusEofa(const DiracParam &param)
Definition: dirac_mobius.cpp:462

quda::DiracMobiusEofa::eofa_pm
int eofa_pm
Definition: dirac_quda.h:897

quda::DiracMobiusEofa::eofa_shift
double eofa_shift
Definition: dirac_quda.h:896

quda::DiracMobiusEofa::mq1
double mq1
Definition: dirac_quda.h:898

quda::DiracMobiusEofa::M
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const
Apply M for the dirac op. E.g. the Schur Complement operator.
Definition: dirac_mobius.cpp:567

quda::DiracMobiusEofa::mq2
double mq2
Definition: dirac_quda.h:899

quda::DiracMobiusEofa::sherman_morrison_fac
double sherman_morrison_fac
Definition: dirac_quda.h:895

quda::DiracMobiusEofa::eofa_u
double eofa_u[QUDA_MAX_DWF_LS]
Definition: dirac_quda.h:901

quda::DiracMobiusEofa::reconstruct
virtual void reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType) const
Definition: dirac_mobius.cpp:628

quda::DiracMobiusEofaPC::full_dslash
void full_dslash(ColorSpinorField &out, const ColorSpinorField &in) const
Definition: dirac_mobius.cpp:795

quda::DiracMobiusEofaPC::M
void M(ColorSpinorField &out, const ColorSpinorField &in) const
Apply M for the dirac op. E.g. the Schur Complement operator.
Definition: dirac_mobius.cpp:669

quda::DiracMobiusEofaPC::prepare
void prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x, ColorSpinorField &b, const QudaSolutionType) const
Definition: dirac_mobius.cpp:712

quda::DiracMobiusEofaPC::reconstruct
void reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType) const
Definition: dirac_mobius.cpp:761

quda::DiracMobiusEofaPC::MdagM
void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
Apply MdagM operator which may be optimized.
Definition: dirac_mobius.cpp:786

quda::DiracMobiusEofaPC::m5inv_eofa_xpay
void m5inv_eofa_xpay(ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &x, double a=-1.) const
Definition: dirac_mobius.cpp:650

quda::DiracMobiusEofaPC::m5inv_eofa
void m5inv_eofa(ColorSpinorField &out, const ColorSpinorField &in) const
Definition: dirac_mobius.cpp:635

quda::DiracMobiusEofaPC::DiracMobiusEofaPC
DiracMobiusEofaPC(const DiracParam &param)
Definition: dirac_mobius.cpp:633

quda::DiracMobius
Definition: dirac_quda.h:815

quda::DiracMobius::c_5
Complex c_5[QUDA_MAX_DWF_LS]
Definition: dirac_quda.h:820

quda::DiracMobius::Dslash4
void Dslash4(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Apply the local MdagM operator: equivalent to applying zero Dirichlet boundary condition to MdagM on ...
Definition: dirac_mobius.cpp:38

quda::DiracMobius::mobius_kappa_b
double mobius_kappa_b
Definition: dirac_quda.h:829

quda::DiracMobius::M
virtual void M(ColorSpinorField &out, const ColorSpinorField &in) const
Apply M for the dirac op. E.g. the Schur Complement operator.
Definition: dirac_mobius.cpp:123

quda::DiracMobius::reconstruct
virtual void reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType) const
Definition: dirac_mobius.cpp:181

quda::DiracMobius::mobius_kappa
double mobius_kappa
Definition: dirac_quda.h:831

quda::DiracMobius::Dslash5
void Dslash5(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Definition: dirac_mobius.cpp:64

quda::DiracMobius::Dslash5Xpay
void Dslash5Xpay(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity, const ColorSpinorField &x, const double &k) const
Definition: dirac_mobius.cpp:108

quda::DiracMobius::Dslash4Xpay
void Dslash4Xpay(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity, const ColorSpinorField &x, const double &k) const
Definition: dirac_mobius.cpp:79

quda::DiracMobius::Dslash4pre
void Dslash4pre(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Definition: dirac_mobius.cpp:49

quda::DiracMobius::b_5
Complex b_5[QUDA_MAX_DWF_LS]
Definition: dirac_quda.h:819

quda::DiracMobius::Dslash4preXpay
void Dslash4preXpay(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity, const ColorSpinorField &x, const double &k) const
Definition: dirac_mobius.cpp:91

quda::DiracMobius::zMobius
bool zMobius
Definition: dirac_quda.h:827

quda::DiracMobius::MdagM
virtual void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
Apply MdagM operator which may be optimized.
Definition: dirac_mobius.cpp:158

quda::DiracMobius::prepare
virtual void prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x, ColorSpinorField &b, const QudaSolutionType) const
Definition: dirac_mobius.cpp:170

quda::DiracMobius::DiracMobius
DiracMobius(const DiracParam &param)
Definition: dirac_mobius.cpp:7

quda::DiracMobius::mobius_kappa_c
double mobius_kappa_c
Definition: dirac_quda.h:830

quda::DiracMobiusPC
Definition: dirac_quda.h:861

quda::DiracMobiusPC::extended_gauge
cudaGaugeField * extended_gauge
Definition: dirac_quda.h:864

quda::DiracMobiusPC::Dslash5invXpay
void Dslash5invXpay(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity, const ColorSpinorField &x, const double &k) const
Definition: dirac_mobius.cpp:233

quda::DiracMobiusPC::MdagM
void MdagM(ColorSpinorField &out, const ColorSpinorField &in) const
Apply MdagM operator which may be optimized.
Definition: dirac_mobius.cpp:291

quda::DiracMobiusPC::Dslash5inv
void Dslash5inv(ColorSpinorField &out, const ColorSpinorField &in, const QudaParity parity) const
Definition: dirac_mobius.cpp:211

quda::DiracMobiusPC::~DiracMobiusPC
virtual ~DiracMobiusPC()
Definition: dirac_mobius.cpp:196

quda::DiracMobiusPC::prepare
void prepare(ColorSpinorField *&src, ColorSpinorField *&sol, ColorSpinorField &x, ColorSpinorField &b, const QudaSolutionType) const
Definition: dirac_mobius.cpp:299

quda::DiracMobiusPC::M
void M(ColorSpinorField &out, const ColorSpinorField &in) const
Apply M for the dirac op. E.g. the Schur Complement operator.
Definition: dirac_mobius.cpp:248

quda::DiracMobiusPC::reconstruct
void reconstruct(ColorSpinorField &x, const ColorSpinorField &b, const QudaSolutionType) const
Definition: dirac_mobius.cpp:350

quda::DiracMobiusPC::DiracMobiusPC
DiracMobiusPC(const DiracParam &param)
Definition: dirac_mobius.cpp:186

quda::DiracMobiusPC::MdagMLocal
void MdagMLocal(ColorSpinorField &out, const ColorSpinorField &in) const
Apply the local MdagM operator: equivalent to applying zero Dirichlet boundary condition to MdagM on ...
Definition: dirac_mobius.cpp:375

quda::DiracMobiusPC::operator=
DiracMobiusPC & operator=(const DiracMobiusPC &dirac)
Definition: dirac_mobius.cpp:201

quda::DiracParam
Definition: dirac_quda.h:21

quda::LatticeField::Precision
QudaPrecision Precision() const
Definition: lattice_field.h:567

quda::cudaColorSpinorField
Definition: color_spinor_field.h:682

comm_dim_partitioned
int comm_dim_partitioned(int dim)
Definition: communicator_stack.cpp:74

eofa_shift
double eofa_shift
Definition: command_line_params.cpp:230

eofa_pm
int eofa_pm
Definition: command_line_params.cpp:229

dagger
bool dagger
Definition: command_line_params.cpp:40

mat
void mat(void *out, void **link, void *in, int dagger_bit, int mu, QudaPrecision sPrecision, QudaPrecision gPrecision)
Definition: covdev_reference.cpp:109

dirac
GaugeCovDev * dirac
Definition: covdev_test.cpp:42

parity
QudaParity parity
Definition: covdev_test.cpp:40

tmp
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:34

dirac_quda.h

dslash_test_type::Dslash4pre
@ Dslash4pre

QUDA_DAG_NO
@ QUDA_DAG_NO
Definition: enum_quda.h:223

QUDA_VERBOSE
@ QUDA_VERBOSE
Definition: enum_quda.h:267

QUDA_FULL_SITE_SUBSET
@ QUDA_FULL_SITE_SUBSET
Definition: enum_quda.h:333

QUDA_EVEN_PARITY
@ QUDA_EVEN_PARITY
Definition: enum_quda.h:284

QUDA_ODD_PARITY
@ QUDA_ODD_PARITY
Definition: enum_quda.h:284

QUDA_INVALID_PARITY
@ QUDA_INVALID_PARITY
Definition: enum_quda.h:284

QudaSolutionType
enum QudaSolutionType_s QudaSolutionType

QUDA_MATPC_ODD_ODD_ASYMMETRIC
@ QUDA_MATPC_ODD_ODD_ASYMMETRIC
Definition: enum_quda.h:219

QUDA_MATPC_EVEN_EVEN_ASYMMETRIC
@ QUDA_MATPC_EVEN_EVEN_ASYMMETRIC
Definition: enum_quda.h:218

QUDA_MATPC_ODD_ODD
@ QUDA_MATPC_ODD_ODD
Definition: enum_quda.h:217

QUDA_MATPC_EVEN_EVEN
@ QUDA_MATPC_EVEN_EVEN
Definition: enum_quda.h:216

QUDA_MATPC_SOLUTION
@ QUDA_MATPC_SOLUTION
Definition: enum_quda.h:159

QUDA_MATPCDAG_MATPC_SOLUTION
@ QUDA_MATPCDAG_MATPC_SOLUTION
Definition: enum_quda.h:161

QUDA_QUARTER_PRECISION
@ QUDA_QUARTER_PRECISION
Definition: enum_quda.h:62

QUDA_HALF_PRECISION
@ QUDA_HALF_PRECISION
Definition: enum_quda.h:63

QUDA_NULL_FIELD_CREATE
@ QUDA_NULL_FIELD_CREATE
Definition: enum_quda.h:360

QudaParity
enum QudaParity_s QudaParity

quda::blas::axpy
void axpy(double a, ColorSpinorField &x, ColorSpinorField &y)
Definition: blas_quda.h:43

quda::mobius_eofa::apply_dslash5
void apply_dslash5(ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &x, double m_f, double m_5, const Complex *b_5, const Complex *c_5, double a, int eofa_pm, double inv, double kappa, const double *eofa_u, const double *eofa_x, const double *eofa_y, double sherman_morrison, bool dagger, Dslash5Type type)

quda::mobius_tensor_core::apply_fused_dslash
void apply_fused_dslash(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, ColorSpinorField &y, const ColorSpinorField &x, double m_f, double m_5, const Complex *b_5, const Complex *c_5, bool dagger, int parity, int shift[4], int halo_shift[4], MdwfFusedDslashType type)

quda
Definition: blas_lapack.h:24

quda::norm2
double norm2(const CloverField &a, bool inverse=false)
Definition: clover_field.cpp:485

quda::MdwfFusedDslashType::D4DAG_D5PREDAG_D5INVDAG
@ D4DAG_D5PREDAG_D5INVDAG

quda::MdwfFusedDslashType::D4_D5INV_D5PRE
@ D4_D5INV_D5PRE

quda::MdwfFusedDslashType::D4_D5INV_D5INVDAG
@ D4_D5INV_D5INVDAG

quda::MdwfFusedDslashType::D4DAG_D5PREDAG
@ D4DAG_D5PREDAG

quda::MdwfFusedDslashType::D5PRE
@ D5PRE

quda::Complex
std::complex< double > Complex
Definition: quda_internal.h:86

quda::createExtendedGauge
cudaGaugeField * createExtendedGauge(cudaGaugeField &in, const int *R, TimeProfile &profile, bool redundant_comms=false, QudaReconstructType recon=QUDA_RECONSTRUCT_INVALID)
Definition: gauge_field.cpp:364

quda::pow
__host__ __device__ ValueType pow(ValueType x, ExponentType e)
Definition: complex_quda.h:111

quda::ApplyDomainWall4D
void ApplyDomainWall4D(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_5, const Complex *b_5, const Complex *c_5, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
Driver for applying the batched Wilson 4-d stencil to a 5-d vector with 4-d preconditioned data order...

quda::DSLASH5_MOBIUS_PRE
@ DSLASH5_MOBIUS_PRE
Definition: dslash_quda.h:559

quda::DSLASH5_MOBIUS
@ DSLASH5_MOBIUS
Definition: dslash_quda.h:560

quda::M5_INV_ZMOBIUS
@ M5_INV_ZMOBIUS
Definition: dslash_quda.h:563

quda::M5INV_EOFA
@ M5INV_EOFA
Definition: dslash_quda.h:565

quda::M5_EOFA
@ M5_EOFA
Definition: dslash_quda.h:564

quda::M5_INV_MOBIUS
@ M5_INV_MOBIUS
Definition: dslash_quda.h:562

quda::ApplyDslash5
void ApplyDslash5(ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &x, double m_f, double m_5, const Complex *b_5, const Complex *c_5, double a, bool dagger, Dslash5Type type)
Apply either the domain-wall / mobius Dslash5 operator or the M5 inverse operator....

csParam
ColorSpinorParam csParam
Definition: pack_test.cpp:25

param
QudaGaugeParam param
Definition: pack_test.cpp:18

printfQuda
#define printfQuda(...)
Definition: util_quda.h:114

getVerbosity
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21

errorQuda
#define errorQuda(...)
Definition: util_quda.h:120