QUDA  v1.1.0
A library for QCD on GPUs
dslash_domain_wall_5d.cu
Go to the documentation of this file.
1 #include <gauge_field.h>
2 #include <color_spinor_field.h>
3 #include <dslash.h>
4 #include <worker.h>
5 
6 #include <dslash_policy.cuh>
7 #include <kernels/dslash_domain_wall_5d.cuh>
8 
9 /**
10  This is the gauged domain-wall 5-d preconditioned operator.
11 */
12 
13 namespace quda
14 {
15 
16  template <typename Arg> class DomainWall5D : public Dslash<domainWall5D, Arg>
17  {
18  using Dslash = Dslash<domainWall5D, Arg>;
19  using Dslash::arg;
20  using Dslash::in;
21 
22  public:
23  DomainWall5D(Arg &arg, const ColorSpinorField &out, const ColorSpinorField &in) : Dslash(arg, out, in)
24  {
25  TunableVectorYZ::resizeVector(in.X(4), arg.nParity);
26  }
27 
28  void apply(const qudaStream_t &stream)
29  {
30  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
31  Dslash::setParam(tp);
32  Dslash::template instantiate<packShmem>(tp, stream);
33  }
34 
35  long long flops() const
36  {
37  long long flops = Dslash::flops();
38  switch (arg.kernel_type) {
39  case INTERIOR_KERNEL:
40  case UBER_KERNEL:
41  case KERNEL_POLICY: {
42  int Ls = in.X(4);
43  long long bulk = (Ls - 2) * (in.Volume() / Ls);
44  long long wall = 2 * (in.Volume() / Ls);
45  flops += 96ll * bulk + 120ll * wall;
46  } break;
47  default: break; // 5-d flops are in the interior kernel
48  }
49  return flops;
50  }
51 
52  long long bytes() const
53  {
54  int spinor_bytes = 2 * in.Ncolor() * in.Nspin() * in.Precision() + (isFixed<typename Arg::Float>::value ? sizeof(float) : 0);
55  long long bytes = Dslash::bytes();
56  switch (arg.kernel_type) {
57  case INTERIOR_KERNEL:
58  case UBER_KERNEL:
59  case KERNEL_POLICY: bytes += 2 * spinor_bytes * in.VolumeCB(); break;
60  default: break;
61  }
62  return bytes;
63  }
64  };
65 
66  template <typename Float, int nColor, QudaReconstructType recon> struct DomainWall5DApply {
67 
68  inline DomainWall5DApply(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a,
69  double m_f, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
70  {
71  constexpr int nDim = 5;
72  DomainWall5DArg<Float, nColor, nDim, recon> arg(out, in, U, a, m_f, a != 0.0, x, parity, dagger, comm_override);
73  DomainWall5D<decltype(arg)> dwf(arg, out, in);
74 
75  dslash::DslashPolicyTune<decltype(dwf)> policy(
76  dwf, const_cast<cudaColorSpinorField *>(static_cast<const cudaColorSpinorField *>(&in)),
77  in.getDslashConstant().volume_4d_cb, in.getDslashConstant().ghostFaceCB, profile);
78  policy.apply(0);
79  }
80  };
81 
82  // Apply the 4-d preconditioned domain-wall Dslash operator
83  // out(x) = M*in = in(x) + a*\sum_mu U_{-\mu}(x)in(x+mu) + U^\dagger_mu(x-mu)in(x-mu)
84  void ApplyDomainWall5D(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_f,
85  const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile)
86  {
87 #ifdef GPU_DOMAIN_WALL_DIRAC
88  instantiate<DomainWall5DApply>(out, in, U, a, m_f, x, parity, dagger, comm_override, profile);
89 #else
90  errorQuda("Domain-wall dslash has not been built");
91 #endif // GPU_DOMAIN_WALL_DIRAC
92  }
93 
94 } // namespace quda