QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
dslash_wilson_clover_preconditioned.cuh
Go to the documentation of this file.
1 #pragma once
2 
4 #include <clover_field_order.h>
5 #include <linalg.cuh>
6 
7 namespace quda
8 {
9 
10  template <typename Float, int nColor, QudaReconstructType reconstruct_, bool dynamic_clover_>
11  struct WilsonCloverArg : WilsonArg<Float, nColor, reconstruct_> {
13  static constexpr int length = (nSpin / (nSpin / 2)) * 2 * nColor * nColor * (nSpin / 2) * (nSpin / 2) / 2;
14  static constexpr bool dynamic_clover = dynamic_clover_;
15 
17  typedef typename mapper<Float>::type real;
18 
19  const C A;
20  const real a;
23  double a, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override) :
24  WilsonArg<Float, nColor, reconstruct_>(out, in, U, a, x, parity, dagger, comm_override),
25  A(A, dynamic_clover ? false : true), // if dynamic clover we don't want the inverse field
26  a(a)
27  {
28  }
29  };
30 
36  template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
37  __device__ __host__ inline void wilsonClover(Arg &arg, int idx, int parity)
38  {
39  using namespace linalg; // for Cholesky
40  typedef typename mapper<Float>::type real;
42  typedef ColorSpinor<real, nColor, 2> HalfVector;
43 
44  bool active
45  = kernel_type == EXTERIOR_KERNEL_ALL ? false : true; // is thread active (non-trival for fused kernel only)
46  int thread_dim; // which dimension is thread working on (fused kernel only)
47  int coord[nDim];
48  int x_cb = getCoords<nDim, QUDA_4D_PC, kernel_type>(coord, arg, idx, parity, thread_dim);
49 
50  const int my_spinor_parity = nParity == 2 ? parity : 0;
51 
52  Vector out;
53 
54  // defined in dslash_wilson.cuh
55  applyWilson<Float, nDim, nColor, nParity, dagger, kernel_type>(
56  out, arg, coord, x_cb, 0, parity, idx, thread_dim, active);
57 
58  if (kernel_type != INTERIOR_KERNEL && active) {
59  // if we're not the interior kernel, then we must sum the partial
60  Vector x = arg.out(x_cb, my_spinor_parity);
61  out += x;
62  }
63 
64  if (isComplete<kernel_type>(arg, coord) && active) {
65  out.toRel(); // switch to chiral basis
66 
67  Vector tmp;
68 
69 #pragma unroll
70  for (int chirality = 0; chirality < 2; chirality++) {
71 
72  HMatrix<real, nColor *Arg::nSpin / 2> A = arg.A(x_cb, parity, chirality);
73  HalfVector chi = out.chiral_project(chirality);
74 
75  if (arg.dynamic_clover) {
76  Cholesky<HMatrix, real, nColor * Arg::nSpin / 2> cholesky(A);
77  chi = static_cast<real>(0.25) * cholesky.backward(cholesky.forward(chi));
78  } else {
79  chi = A * chi;
80  }
81 
82  tmp += chi.chiral_reconstruct(chirality);
83  }
84 
85  tmp.toNonRel(); // switch back to non-chiral basis
86 
87  if (xpay) {
88  Vector x = arg.x(x_cb, my_spinor_parity);
89  out = x + arg.a * tmp;
90  } else {
91  out = tmp;
92  }
93  }
94 
95  if (kernel_type != EXTERIOR_KERNEL_ALL || active) arg.out(x_cb, my_spinor_parity) = out;
96  }
97 
98  // CPU kernel for applying the Wilson operator to a vector
99  template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
101  {
102 
103  for (int parity = 0; parity < nParity; parity++) {
104  // for full fields then set parity from loop else use arg setting
105  parity = nParity == 2 ? parity : arg.parity;
106 
107  for (int x_cb = 0; x_cb < arg.threads; x_cb++) { // 4-d volume
108  wilsonClover<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x_cb, parity);
109  } // 4-d volumeCB
110  } // parity
111  }
112 
113  // GPU Kernel for applying the Wilson operator to a vector
114  template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
116  {
117  int x_cb = blockIdx.x * blockDim.x + threadIdx.x;
118  if (x_cb >= arg.threads) return;
119 
120  // for full fields set parity from z thread index else use arg setting
121  int parity = nParity == 2 ? blockDim.z * blockIdx.z + threadIdx.z : arg.parity;
122 
123  switch (parity) {
124  case 0: wilsonClover<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x_cb, 0); break;
125  case 1: wilsonClover<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x_cb, 1); break;
126  }
127  }
128 
129 } // namespace quda
KernelType kernel_type
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:44
clover_mapper< Float, length >::type C
Main header file for host and device accessors to CloverFields.
Parameter structure for driving the Wilson operator.
const int nColor
Definition: covdev_test.cpp:75
__device__ __host__ void wilsonClover(Arg &arg, int idx, int parity)
Apply the Wilson-clover dslash out(x) = M*in = A(x)*x(x) + D * in(x-mu) Note this routine only exists...
Specialized container for Hermitian matrices (e.g., used for wrapping clover matrices) ...
Definition: quda_matrix.h:61
void wilsonCloverPreconditionedCPU(Arg arg)
WilsonCloverArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double a, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override)
static constexpr int length
__global__ void wilsonCloverPreconditionedGPU(Arg arg)
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
VectorXcd Vector
static constexpr int nSpin