QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
dslash_twisted_clover_preconditioned.cuh
Go to the documentation of this file.
1 #pragma once
2 
4 #include <clover_field_order.h>
5 #include <linalg.cuh>
6 
7 namespace quda
8 {
9 
10  template <typename Float, int nColor, QudaReconstructType reconstruct_, bool dynamic_clover_>
11  struct TwistedCloverArg : WilsonArg<Float, nColor, reconstruct_> {
13  static constexpr int length = (nSpin / (nSpin / 2)) * 2 * nColor * nColor * (nSpin / 2) * (nSpin / 2) / 2;
14  static constexpr bool dynamic_clover = dynamic_clover_;
15 
16  typedef typename mapper<Float>::type real;
18  const C A;
19  const C A2inv; // A^{-2}
20  real a; // this is the scaling factor
21  real b; // this is the twist factor
22 
24  double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override) :
25  WilsonArg<Float, nColor, reconstruct_>(out, in, U, xpay ? 1.0 : 0.0, x, parity, dagger, comm_override),
26  A(A, false),
27  A2inv(A, dynamic_clover ? false : true), // if dynamic clover we don't want the inverse field
28  a(a),
29  b(dagger ? -0.5 * b : 0.5 * b) // factor of 0.5 comes from basis transform
30  {
31  }
32  };
33 
39  template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
40  __device__ __host__ inline void twistedClover(Arg &arg, int idx, int parity)
41  {
42  using namespace linalg; // for Cholesky
43  typedef typename mapper<Float>::type real;
45  typedef ColorSpinor<real, nColor, 2> HalfVector;
46  typedef HMatrix<real, nColor * Arg::nSpin / 2> Mat;
47 
48  bool active
49  = kernel_type == EXTERIOR_KERNEL_ALL ? false : true; // is thread active (non-trival for fused kernel only)
50  int thread_dim; // which dimension is thread working on (fused kernel only)
51  int coord[nDim];
52  int x_cb = getCoords<nDim, QUDA_4D_PC, kernel_type>(coord, arg, idx, parity, thread_dim);
53 
54  const int my_spinor_parity = nParity == 2 ? parity : 0;
55 
56  Vector out;
57 
58  // defined in dslash_wilson.cuh
59  applyWilson<Float, nDim, nColor, nParity, dagger, kernel_type>(
60  out, arg, coord, x_cb, 0, parity, idx, thread_dim, active);
61 
62  if (kernel_type != INTERIOR_KERNEL && active) {
63  // if we're not the interior kernel, then we must sum the partial
64  Vector x = arg.out(x_cb, my_spinor_parity);
65  out += x;
66  }
67 
68  if (isComplete<kernel_type>(arg, coord) && active) {
69  out.toRel(); // switch to chiral basis
70 
71  Vector tmp;
72 
73 #pragma unroll
74  for (int chirality = 0; chirality < 2; chirality++) {
75 
76  const complex<real> b(0.0, chirality == 0 ? static_cast<real>(arg.b) : -static_cast<real>(arg.b));
77  Mat A = arg.A(x_cb, parity, chirality);
78  HalfVector chi = out.chiral_project(chirality);
79  chi = A * chi + b * chi;
80 
81  if (arg.dynamic_clover) {
82  Mat A2 = A.square();
83  A2 += b.imag() * b.imag();
84  Cholesky<HMatrix, real, nColor * Arg::nSpin / 2> cholesky(A2);
85  chi = cholesky.backward(cholesky.forward(chi));
86  tmp += static_cast<real>(0.25) * chi.chiral_reconstruct(chirality);
87  } else {
88  Mat A2inv = arg.A2inv(x_cb, parity, chirality);
89  chi = A2inv * chi;
90  tmp += static_cast<real>(2.0) * chi.chiral_reconstruct(chirality);
91  }
92  }
93 
94  tmp.toNonRel(); // switch back to non-chiral basis
95 
96  if (xpay) {
97  Vector x = arg.x(x_cb, my_spinor_parity);
98  out = x + arg.a * tmp;
99  } else {
100  out = arg.a * tmp;
101  }
102  }
103 
104  if (kernel_type != EXTERIOR_KERNEL_ALL || active) arg.out(x_cb, my_spinor_parity) = out;
105  }
106 
107  // CPU kernel for applying the Wilson operator to a vector
108  template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
110  {
111 
112  for (int parity = 0; parity < nParity; parity++) {
113  // for full fields then set parity from loop else use arg setting
114  parity = nParity == 2 ? parity : arg.parity;
115 
116  for (int x_cb = 0; x_cb < arg.threads; x_cb++) { // 4-d volume
117  twistedClover<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x_cb, parity);
118  } // 4-d volumeCB
119  } // parity
120  }
121 
122  // GPU Kernel for applying the Wilson operator to a vector
123  template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
125  {
126  int x_cb = blockIdx.x * blockDim.x + threadIdx.x;
127  if (x_cb >= arg.threads) return;
128 
129  // for full fields set parity from z thread index else use arg setting
130  int parity = nParity == 2 ? blockDim.z * blockIdx.z + threadIdx.z : arg.parity;
131 
132  switch (parity) {
133  case 0: twistedClover<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x_cb, 0); break;
134  case 1: twistedClover<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x_cb, 1); break;
135  }
136  }
137 
138 } // namespace quda
KernelType kernel_type
clover_mapper< Float, length >::type C
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:44
__global__ void twistedCloverPreconditionedGPU(Arg arg)
void Mat(sFloat *out, gFloat **link, sFloat *in, int daggerBit, int mu)
Main header file for host and device accessors to CloverFields.
__device__ __host__ void twistedClover(Arg &arg, int idx, int parity)
Apply the preconditioned twisted-clover dslash.
Parameter structure for driving the Wilson operator.
const int nColor
Definition: covdev_test.cpp:75
Specialized container for Hermitian matrices (e.g., used for wrapping clover matrices) ...
Definition: quda_matrix.h:61
void twistedCloverPreconditionedCPU(Arg arg)
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
VectorXcd Vector
static constexpr int nSpin
TwistedCloverArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override)