QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
dslash_twisted_mass_preconditioned.cu
Go to the documentation of this file.
1 #include <gauge_field.h>
2 #include <color_spinor_field.h>
3 #include <dslash.h>
4 #include <worker.h>
5 
6 #include <dslash_policy.cuh>
8 
13 namespace quda
14 {
15 
20  template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
22  static constexpr const char *kernel = "quda::twistedMassPreconditionedGPU"; // kernel name for jit compilation
23  template <typename Dslash>
24  inline static void launch(Dslash &dslash, TuneParam &tp, Arg &arg, const cudaStream_t &stream)
25  {
26  static_assert(nParity == 1, "preconditioned twisted-mass operator only defined for nParity=1");
27  dslash.launch(
28  twistedMassPreconditionedGPU<Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg>, tp, arg, stream);
29  }
30  };
31 
32  template <typename Float, int nDim, int nColor, typename Arg> class TwistedMassPreconditioned : public Dslash<Float>
33  {
34 
35 protected:
36  Arg &arg;
38 
39 public:
41  Dslash<Float>(arg, out, in, "kernels/dslash_twisted_mass_preconditioned.cuh"),
42  arg(arg),
43  in(in)
44  {
45  if (arg.asymmetric)
46  for (int i = 0; i < 8; i++)
47  if (i != 4) { strcat(Dslash<Float>::aux[i], ",asym"); }
48  }
49 
51 
52  void apply(const cudaStream_t &stream)
53  {
54  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
56  if (arg.asymmetric && !arg.dagger) errorQuda("asymmetric operator only defined for dagger");
57  if (arg.asymmetric && arg.xpay) errorQuda("asymmetric operator not defined for xpay");
58 
59  if (arg.nParity == 1) {
60  if (arg.xpay)
61  Dslash<Float>::template instantiate<TwistedMassPreconditionedLaunch, nDim, nColor, 1, true>(tp, arg, stream);
62  else
63  Dslash<Float>::template instantiate<TwistedMassPreconditionedLaunch, nDim, nColor, 1, false>(tp, arg, stream);
64  } else {
65  errorQuda("Preconditioned twisted-mass operator not defined nParity=%d", arg.nParity);
66  }
67  }
68 
69  long long flops() const
70  {
71  long long flops = Dslash<Float>::flops();
72  switch (arg.kernel_type) {
73  case EXTERIOR_KERNEL_X:
74  case EXTERIOR_KERNEL_Y:
75  case EXTERIOR_KERNEL_Z:
76  case EXTERIOR_KERNEL_T:
77  case EXTERIOR_KERNEL_ALL: break; // twisted-mass flops are in the interior kernel
78  case INTERIOR_KERNEL:
79  case KERNEL_POLICY:
80  flops += 2 * nColor * 4 * 2 * in.Volume(); // complex * Nc * Ns * fma * vol
81  break;
82  }
83  return flops;
84  }
85 
86  TuneKey tuneKey() const
87  {
88  return TuneKey(in.VolString(), typeid(*this).name(), Dslash<Float>::aux[arg.kernel_type]);
89  }
90  };
91 
92  template <typename Float, int nColor, QudaReconstructType recon> struct TwistedMassPreconditionedApply {
93 
95  double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric,
96  const int *comm_override, TimeProfile &profile)
97  {
98  constexpr int nDim = 4;
99  TwistedMassArg<Float, nColor, recon> arg(out, in, U, a, b, xpay, x, parity, dagger, asymmetric, comm_override);
101 
103  const_cast<cudaColorSpinorField *>(static_cast<const cudaColorSpinorField *>(&in)), in.VolumeCB(),
104  in.GhostFaceCB(), profile);
105  policy.apply(0);
106 
107  checkCudaError();
108  }
109  };
110 
111  /*
112  Apply the preconditioned twisted-mass Dslash operator
113 
114  out = x + A^{-1} D * in = x + a*(1 + i*b*gamma_5)*\sum_mu U_{-\mu}(x)in(x+mu) + U^\dagger_mu(x-mu)in(x-mu)
115  */
117  double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric,
118  const int *comm_override, TimeProfile &profile)
119  {
120 #ifdef GPU_TWISTED_MASS_DIRAC
121  if (in.V() == out.V()) errorQuda("Aliasing pointers");
122  if (in.FieldOrder() != out.FieldOrder())
123  errorQuda("Field order mismatch in = %d, out = %d", in.FieldOrder(), out.FieldOrder());
124 
125  // check all precisions match
126  checkPrecision(out, in, U);
127 
128  // check all locations match
129  checkLocation(out, in, U);
130 
131  // with symmetric dagger operator we must use kernel packing
132  if (dagger && !asymmetric) pushKernelPackT(true);
133 
134  instantiate<TwistedMassPreconditionedApply>(
135  out, in, U, a, b, xpay, x, parity, dagger, asymmetric, comm_override, profile);
136 
137  if (dagger && !asymmetric) popKernelPackT();
138 #else
139  errorQuda("Twisted-mass dslash has not been built");
140 #endif // GPU_TWISTED_MASS_DIRAC
141  }
142 
143 } // namespace quda
TwistedMassPreconditionedApply(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override, TimeProfile &profile)
void launch(T *f, const TuneParam &tp, Arg &arg, const cudaStream_t &stream)
Definition: dslash.h:101
void setParam(Arg &arg)
Definition: dslash.h:66
void apply(const cudaStream_t &stream)
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
#define checkPrecision(...)
#define errorQuda(...)
Definition: util_quda.h:121
cudaStream_t * stream
const char * VolString() const
TwistedMassPreconditioned(Arg &arg, const ColorSpinorField &out, const ColorSpinorField &in)
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
Definition: blas_quda.h:37
void popKernelPackT()
Definition: dslash_quda.cu:42
static void launch(Dslash &dslash, TuneParam &tp, Arg &arg, const cudaStream_t &stream)
const int nColor
Definition: covdev_test.cpp:75
cpuColorSpinorField * in
const int * GhostFaceCB() const
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Definition: tune.cpp:643
#define checkLocation(...)
cpuColorSpinorField * out
const int nParity
Definition: spinor_noise.cu:25
unsigned long long flops
Definition: blas_quda.cu:22
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
void ApplyTwistedMassPreconditioned(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override, TimeProfile &profile)
Driver for applying the preconditioned twisted-mass stencil.
void pushKernelPackT(bool pack)
Definition: dslash_quda.cu:30
#define checkCudaError()
Definition: util_quda.h:161
This is a helper class that is used to instantiate the correct templated kernel for the dslash...
virtual long long flops() const
Definition: dslash.h:316
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
Definition: util_quda.cpp:52
QudaDagType dagger
Definition: test_util.cpp:1620
QudaParity parity
Definition: covdev_test.cpp:54
QudaFieldOrder FieldOrder() const