QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
dslash_ndeg_twisted_mass_preconditioned.cu
Go to the documentation of this file.
1 #include <gauge_field.h>
2 #include <color_spinor_field.h>
3 #include <dslash.h>
4 #include <worker.h>
5 
6 #include <dslash_policy.cuh>
8 
14 namespace quda
15 {
16 
21  template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
23  static constexpr const char *kernel = "quda::ndegTwistedMassPreconditionedGPU"; // kernel name for jit compilation
24  template <typename Dslash>
25  inline static void launch(Dslash &dslash, TuneParam &tp, Arg &arg, const cudaStream_t &stream)
26  {
27  static_assert(nParity == 1, "Non-degenerate twisted-mass operator only defined for nParity=1");
28  dslash.launch(ndegTwistedMassPreconditionedGPU<Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg>, tp,
29  arg, stream);
30  }
31  };
32 
33  template <typename Float, int nDim, int nColor, typename Arg>
34  class NdegTwistedMassPreconditioned : public Dslash<Float>
35  {
36 
37 protected:
38  Arg &arg;
40  bool shared;
41  unsigned int sharedBytesPerThread() const
42  {
43  return shared ? 2 * nColor * 4 * sizeof(typename mapper<Float>::type) : 0;
44  }
45 
46 public:
48  Dslash<Float>(arg, out, in, "kernels/dslash_ndeg_twisted_mass_preconditioned.cuh"),
49  arg(arg),
50  in(in),
51  shared(arg.asymmetric || !arg.dagger)
52  {
54  if (arg.asymmetric)
55  for (int i = 0; i < 8; i++)
56  if (i != 4) { strcat(Dslash<Float>::aux[i], ",asym"); }
57  }
58 
60 
61  void apply(const cudaStream_t &stream)
62  {
63  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
65  if (arg.asymmetric && !arg.dagger) errorQuda("asymmetric operator only defined for dagger");
66  if (arg.asymmetric && arg.xpay) errorQuda("asymmetric operator not defined for xpay");
67 
68  if (arg.nParity == 1) {
69  if (arg.xpay)
70  Dslash<Float>::template instantiate<NdegTwistedMassPreconditionedLaunch, nDim, nColor, 1, true>(
71  tp, arg, stream);
72  else
73  Dslash<Float>::template instantiate<NdegTwistedMassPreconditionedLaunch, nDim, nColor, 1, false>(
74  tp, arg, stream);
75  } else {
76  errorQuda("Preconditioned non-degenerate twisted-mass operator not defined nParity=%d", arg.nParity);
77  }
78  }
79 
81  {
83  if (shared) {
84  param.block.y = 2; // flavor must be contained in the block
85  param.grid.y = 1;
86  param.shared_bytes = sharedBytesPerThread() * param.block.x * param.block.y * param.block.z;
87  }
88  }
89 
91  {
93  if (shared) {
94  param.block.y = 2; // flavor must be contained in the block
95  param.grid.y = 1;
96  param.shared_bytes = sharedBytesPerThread() * param.block.x * param.block.y * param.block.z;
97  }
98  }
99 
100  long long flops() const
101  {
102  long long flops = Dslash<Float>::flops();
103  switch (arg.kernel_type) {
104  case EXTERIOR_KERNEL_X:
105  case EXTERIOR_KERNEL_Y:
106  case EXTERIOR_KERNEL_Z:
107  case EXTERIOR_KERNEL_T:
108  case EXTERIOR_KERNEL_ALL: break; // twisted-mass flops are in the interior kernel
109  case INTERIOR_KERNEL:
110  case KERNEL_POLICY:
111  flops += 2 * nColor * 4 * 4 * in.Volume(); // complex * Nc * Ns * fma * vol
112  break;
113  }
114  return flops;
115  }
116 
117  TuneKey tuneKey() const
118  {
119  return TuneKey(in.VolString(), typeid(*this).name(), Dslash<Float>::aux[arg.kernel_type]);
120  }
121  };
122 
123  template <typename Float, int nColor, QudaReconstructType recon> struct NdegTwistedMassPreconditionedApply {
124 
126  double a, double b, double c, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric,
127  const int *comm_override, TimeProfile &profile)
128  {
129  constexpr int nDim = 4;
131  out, in, U, a, b, c, xpay, x, parity, dagger, asymmetric, comm_override);
133 
135  const_cast<cudaColorSpinorField *>(static_cast<const cudaColorSpinorField *>(&in)),
137  policy.apply(0);
138 
139  checkCudaError();
140  }
141  };
142 
143  // Apply the non-degenerate twisted-mass Dslash operator
144  // out(x) = M*in = a*(1 + i*b*gamma_5*tau_3 + c*tau_1)*D + x
145  // Uses the kappa normalization for the Wilson operator, with a = -kappa.
147  double a, double b, double c, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric,
148  const int *comm_override, TimeProfile &profile)
149  {
150 #ifdef GPU_NDEG_TWISTED_MASS_DIRAC
151  if (in.V() == out.V()) errorQuda("Aliasing pointers");
152  if (in.FieldOrder() != out.FieldOrder())
153  errorQuda("Field order mismatch in = %d, out = %d", in.FieldOrder(), out.FieldOrder());
154 
155  // check all precisions match
156  checkPrecision(out, in, x, U);
157 
158  // check all locations match
159  checkLocation(out, in, x, U);
160 
161  // with symmetric dagger operator we must use kernel packing
162  if (dagger && !asymmetric) pushKernelPackT(true);
163 
164  instantiate<NdegTwistedMassPreconditionedApply>(
165  out, in, U, a, b, c, xpay, x, parity, dagger, asymmetric, comm_override, profile);
166 
167  if (dagger && !asymmetric) popKernelPackT();
168 #else
169  errorQuda("Non-degenerate twisted-mass dslash has not been built");
170 #endif // GPU_NDEG_TWISTED_MASS_DIRAC
171  }
172 
173 } // namespace quda
void launch(T *f, const TuneParam &tp, Arg &arg, const cudaStream_t &stream)
Definition: dslash.h:101
void setParam(Arg &arg)
Definition: dslash.h:66
void apply(const cudaStream_t &stream)
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
#define checkPrecision(...)
#define errorQuda(...)
Definition: util_quda.h:121
cudaStream_t * stream
const char * VolString() const
void xpay(ColorSpinorField &x, double a, ColorSpinorField &y)
Definition: blas_quda.h:37
QudaGaugeParam param
Definition: pack_test.cpp:17
void popKernelPackT()
Definition: dslash_quda.cu:42
This is a helper class that is used to instantiate the correct templated kernel for the dslash...
const int nColor
Definition: covdev_test.cpp:75
cpuColorSpinorField * in
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Definition: tune.cpp:643
#define checkLocation(...)
NdegTwistedMassPreconditioned(Arg &arg, const ColorSpinorField &out, const ColorSpinorField &in)
int ghostFaceCB[QUDA_MAX_DIM+1]
void initTuneParam(TuneParam &param) const
Definition: tune_quda.h:523
const DslashConstant & getDslashConstant() const
Get the dslash_constant structure from this field.
cpuColorSpinorField * out
const int nParity
Definition: spinor_noise.cu:25
void resizeVector(int y, int z) const
Definition: tune_quda.h:538
static void launch(Dslash &dslash, TuneParam &tp, Arg &arg, const cudaStream_t &stream)
unsigned long long flops
Definition: blas_quda.cu:22
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
NdegTwistedMassPreconditionedApply(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, double c, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override, TimeProfile &profile)
void pushKernelPackT(bool pack)
Definition: dslash_quda.cu:30
#define checkCudaError()
Definition: util_quda.h:161
virtual long long flops() const
Definition: dslash.h:316
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
Definition: util_quda.cpp:52
QudaDagType dagger
Definition: test_util.cpp:1620
void ApplyNdegTwistedMassPreconditioned(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, double c, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override, TimeProfile &profile)
Driver for applying the preconditioned non-degenerate twisted-mass stencil.
QudaParity parity
Definition: covdev_test.cpp:54
QudaFieldOrder FieldOrder() const
void defaultTuneParam(TuneParam &param) const
Definition: tune_quda.h:531