QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
dslash_ndeg_twisted_mass.cuh
Go to the documentation of this file.
1 #pragma once
2 
4 
5 namespace quda
6 {
7 
8  template <typename Float, int nColor, QudaReconstructType reconstruct_>
9  struct NdegTwistedMassArg : WilsonArg<Float, nColor, reconstruct_> {
10  typedef typename mapper<Float>::type real;
11  real a;
12  real b;
13  real c;
15  NdegTwistedMassArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b,
16  double c, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override) :
17  WilsonArg<Float, nColor, reconstruct_>(out, in, U, a, x, parity, dagger, comm_override),
18  a(a),
19  b(dagger ? -b : b), // if dagger flip the chiral twist
20  c(c)
21  {
22  }
23  };
24 
30  template <typename Float, int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg>
31  __device__ __host__ inline void ndegTwistedMass(Arg &arg, int idx, int flavor, int parity)
32  {
33  typedef typename mapper<Float>::type real;
35  typedef ColorSpinor<real, nColor, 2> HalfVector;
36 
37  bool active
38  = kernel_type == EXTERIOR_KERNEL_ALL ? false : true; // is thread active (non-trival for fused kernel only)
39  int thread_dim; // which dimension is thread working on (fused kernel only)
40  int coord[nDim];
41  int x_cb = getCoords<nDim, QUDA_4D_PC, kernel_type>(coord, arg, idx, parity, thread_dim);
42 
43  const int my_spinor_parity = nParity == 2 ? parity : 0;
44  Vector out;
45 
46  // defined in dslash_wilson.cuh
47  applyWilson<Float, nDim, nColor, nParity, dagger, kernel_type>(
48  out, arg, coord, x_cb, flavor, parity, idx, thread_dim, active);
49 
50  int my_flavor_idx = x_cb + flavor * arg.dc.volume_4d_cb;
51 
53  // apply the chiral and flavor twists
54  // use consistent load order across s to ensure better cache locality
55  Vector x0 = arg.x(x_cb + 0 * arg.dc.volume_4d_cb, my_spinor_parity);
56  Vector x1 = arg.x(x_cb + 1 * arg.dc.volume_4d_cb, my_spinor_parity);
57 
58  if (flavor == 0) {
59  out = x0 + arg.a * out;
60  out += arg.b * x0.igamma(4);
61  out += arg.c * x1;
62  } else {
63  out = x1 + arg.a * out;
64  out += -arg.b * x1.igamma(4);
65  out += arg.c * x0;
66  }
67 
68  } else if (active) {
69  Vector x = arg.out(my_flavor_idx, my_spinor_parity);
70  out = x + arg.a * out;
71  }
72 
73  if (kernel_type != EXTERIOR_KERNEL_ALL || active) arg.out(my_flavor_idx, my_spinor_parity) = out;
74  }
75 
76  // CPU kernel for applying the non-degenerate twisted-mass operator to a vector
77  template <typename Float, int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg>
79  {
80  for (int parity = 0; parity < nParity; parity++) {
81  // for full fields then set parity from loop else use arg setting
82  parity = nParity == 2 ? parity : arg.parity;
83 
84  for (int x_cb = 0; x_cb < arg.threads; x_cb++) { // 4-d volume
85  for (int flavor = 0; flavor < 2; flavor++) {
86  ndegTwistedMass<Float, nDim, nColor, nParity, dagger, kernel_type>(arg, x_cb, flavor, parity);
87  }
88  } // 4-d volumeCB
89  } // parity
90  }
91 
92  // GPU Kernel for applying the non-degenerate twisted-mass operator to a vector
93  template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
94  __global__ void ndegTwistedMassGPU(Arg arg)
95  {
96  int x_cb = blockIdx.x * blockDim.x + threadIdx.x;
97  if (x_cb >= arg.threads) return;
98 
99  // for this operator flavor can be spread onto different blocks
100  int flavor = blockIdx.y * blockDim.y + threadIdx.y;
101 
102  // for full fields set parity from z thread index else use arg setting
103  int parity = nParity == 2 ? blockDim.z * blockIdx.z + threadIdx.z : arg.parity;
104 
105  switch (parity) {
106  case 0: ndegTwistedMass<Float, nDim, nColor, nParity, dagger, kernel_type>(arg, x_cb, flavor, 0); break;
107  case 1: ndegTwistedMass<Float, nDim, nColor, nParity, dagger, kernel_type>(arg, x_cb, flavor, 1); break;
108  }
109  }
110 
111 } // namespace quda
__global__ void ndegTwistedMassGPU(Arg arg)
KernelType kernel_type
void ndegTwistedMassCPU(Arg arg)
NdegTwistedMassArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, double c, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override)
__device__ __host__ void ndegTwistedMass(Arg &arg, int idx, int flavor, int parity)
Apply the twisted-mass dslash out(x) = M*in = a * D * in + (1 + i*b*gamma_5*tau_3 + c*tau_1)*x Note t...
Parameter structure for driving the Wilson operator.
const int nColor
Definition: covdev_test.cpp:75
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
VectorXcd Vector