QUDA  v1.1.0
A library for QCD on GPUs
dslash_twisted_clover_preconditioned.cu
Go to the documentation of this file.
1 #include <gauge_field.h>
2 #include <color_spinor_field.h>
3 #include <clover_field.h>
4 #include <dslash.h>
5 #include <worker.h>
6 
7 #include <dslash_policy.cuh>
8 #include <kernels/dslash_twisted_clover_preconditioned.cuh>
9 
10 /**
11  This is the preconditioned gauged twisted-mass operator
12 */
13 
14 namespace quda
15 {
16 
17  template <typename Arg> class TwistedCloverPreconditioned : public Dslash<twistedCloverPreconditioned, Arg>
18  {
19  using Dslash = Dslash<twistedCloverPreconditioned, Arg>;
20  using Dslash::arg;
21  using Dslash::in;
22 
23  public:
24  TwistedCloverPreconditioned(Arg &arg, const ColorSpinorField &out, const ColorSpinorField &in) :
25  Dslash(arg, out, in)
26  {
27  }
28 
29  void apply(const qudaStream_t &stream)
30  {
31  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
32  Dslash::setParam(tp);
33  // specialize here to constrain the template instantiation
34  if (arg.nParity == 1) {
35  if (arg.xpay) {
36  if (arg.dagger) errorQuda("xpay operator only defined for not dagger");
37  Dslash::template instantiate<packShmem, 1, false, true>(tp, stream);
38  } else {
39  if (arg.dagger)
40  Dslash::template instantiate<packShmem, 1, true, false>(tp, stream);
41  else
42  Dslash::template instantiate<packShmem, 1, false, false>(tp, stream);
43  }
44  } else {
45  errorQuda("Preconditioned twisted-clover operator not defined nParity=%d", arg.nParity);
46  }
47  }
48 
49  long long flops() const
50  {
51  int clover_flops = 504 + 48;
52  long long flops = Dslash::flops();
53  switch (arg.kernel_type) {
54  case EXTERIOR_KERNEL_X:
55  case EXTERIOR_KERNEL_Y:
56  case EXTERIOR_KERNEL_Z:
57  case EXTERIOR_KERNEL_T: flops += clover_flops * 2 * in.GhostFace()[arg.kernel_type]; break;
58  case EXTERIOR_KERNEL_ALL:
59  flops += clover_flops * 2 * (in.GhostFace()[0] + in.GhostFace()[1] + in.GhostFace()[2] + in.GhostFace()[3]);
60  break;
61  case INTERIOR_KERNEL:
62  case UBER_KERNEL:
63  case KERNEL_POLICY:
64  flops += clover_flops * in.Volume();
65 
66  if (arg.kernel_type == KERNEL_POLICY) break;
67  // now correct for flops done by exterior kernel
68  long long ghost_sites = 0;
69  for (int d = 0; d < 4; d++)
70  if (arg.commDim[d]) ghost_sites += 2 * in.GhostFace()[d];
71  flops -= clover_flops * ghost_sites;
72 
73  break;
74  }
75  return flops;
76  }
77 
78  long long bytes() const
79  {
80  int clover_bytes = 72 * in.Precision() + (isFixed<typename Arg::Float>::value ? 2 * sizeof(float) : 0);
81  if (!arg.dynamic_clover) clover_bytes *= 2;
82 
83  long long bytes = Dslash::bytes();
84  switch (arg.kernel_type) {
85  case EXTERIOR_KERNEL_X:
86  case EXTERIOR_KERNEL_Y:
87  case EXTERIOR_KERNEL_Z:
88  case EXTERIOR_KERNEL_T: bytes += clover_bytes * 2 * in.GhostFace()[arg.kernel_type]; break;
89  case EXTERIOR_KERNEL_ALL:
90  bytes += clover_bytes * 2 * (in.GhostFace()[0] + in.GhostFace()[1] + in.GhostFace()[2] + in.GhostFace()[3]);
91  break;
92  case INTERIOR_KERNEL:
93  case UBER_KERNEL:
94  case KERNEL_POLICY:
95  bytes += clover_bytes * in.Volume();
96 
97  if (arg.kernel_type == KERNEL_POLICY) break;
98  // now correct for bytes done by exterior kernel
99  long long ghost_sites = 0;
100  for (int d = 0; d < 4; d++)
101  if (arg.commDim[d]) ghost_sites += 2 * in.GhostFace()[d];
102  bytes -= clover_bytes * ghost_sites;
103 
104  break;
105  }
106 
107  return bytes;
108  }
109  };
110 
111  template <typename Float, int nColor, QudaReconstructType recon> struct TwistedCloverPreconditionedApply {
112 
113  inline TwistedCloverPreconditionedApply(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U,
114  const CloverField &C, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger,
115  const int *comm_override, TimeProfile &profile)
116  {
117  constexpr int nDim = 4;
118  TwistedCloverArg<Float, nColor, nDim, recon> arg(out, in, U, C, a, b, xpay, x, parity, dagger, comm_override);
119  TwistedCloverPreconditioned<decltype(arg)> twisted(arg, out, in);
120 
121  dslash::DslashPolicyTune<decltype(twisted)> policy(twisted,
122  const_cast<cudaColorSpinorField *>(static_cast<const cudaColorSpinorField *>(&in)), in.VolumeCB(),
123  in.GhostFaceCB(), profile);
124  policy.apply(0);
125  }
126  };
127 
128  /*
129  Apply the preconditioned twisted-mass Dslash operator
130 
131  out = x + a*A^{-1} D * in = x + a*(C + i*b*gamma_5)^{-1}*\sum_mu U_{-\mu}(x)in(x+mu) + U^\dagger_mu(x-mu)in(x-mu)
132  */
133  void ApplyTwistedCloverPreconditioned(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U,
134  const CloverField &C, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger,
135  const int *comm_override, TimeProfile &profile)
136  {
137 #ifdef GPU_TWISTED_CLOVER_DIRAC
138  instantiate<TwistedCloverPreconditionedApply>(out, in, U, C, a, b, xpay, x, parity, dagger, comm_override, profile);
139 #else
140  errorQuda("Twisted-clover dslash has not been built");
141 #endif // GPU_TWISTED_CLOVER_DIRAC
142  }
143 
144 } // namespace quda