QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
clover_invert.cuh
Go to the documentation of this file.
1 #include <clover_field_order.h>
2 #include <complex_quda.h>
3 #include <quda_matrix.h>
4 #include <linalg.cuh>
5 #include <cub_helper.cuh>
6 
7 namespace quda
8 {
9 
10  template <typename Float> struct CloverInvertArg : public ReduceArg<double2> {
11  typedef typename clover_mapper<Float>::type C;
13  const C clover;
15  bool twist;
16  Float mu2;
17  CloverInvertArg(CloverField &field, bool computeTraceLog = 0) :
18  ReduceArg<double2>(),
19  inverse(field, true),
20  clover(field, false),
21  computeTraceLog(computeTraceLog),
22  twist(field.Twisted()),
23  mu2(field.Mu2())
24  {
25  if (!field.isNative()) errorQuda("Clover field %d order not supported", field.Order());
26  }
27  };
28 
32  template <typename Float, typename Arg, bool computeTrLog, bool twist>
33  __device__ __host__ inline double cloverInvertCompute(Arg &arg, int x_cb, int parity)
34  {
35 
36  constexpr int nColor = 3;
37  constexpr int nSpin = 4;
38  constexpr int N = nColor * nSpin / 2;
39  typedef HMatrix<Float, N> Mat;
40  double trlogA = 0.0;
41 
42  for (int ch = 0; ch < 2; ch++) {
43  Mat A = arg.clover(x_cb, parity, ch);
44  A *= static_cast<Float>(2.0); // factor of two is inherent to QUDA clover storage
45 
46  if (twist) { // Compute (T^2 + mu2) first, then invert
47  A = A.square();
48  A += arg.mu2;
49  }
50 
51  // compute the Cholesky decomposition
53 
54  // Accumulate trlogA
55  if (computeTrLog)
56  for (int j = 0; j < N; j++) trlogA += 2.0 * log(cholesky.D(j));
57 
58  Mat Ainv = static_cast<Float>(0.5) * cholesky.invert(); // return full inverse
59  arg.inverse(x_cb, parity, ch) = Ainv;
60  }
61 
62  return trlogA;
63  }
64 
65  template <typename Float, typename Arg, bool computeTrLog, bool twist> void cloverInvert(Arg &arg)
66  {
67  for (int parity = 0; parity < 2; parity++) {
68  for (int x = 0; x < arg.clover.volumeCB; x++) {
69  // should make this thread safe if we ever apply threads to cpu code
70  double trlogA = cloverInvertCompute<Float, Arg, computeTrLog, twist>(arg, x, parity);
71  if (computeTrLog) {
72  if (parity)
73  arg.result_h[0].y += trlogA;
74  else
75  arg.result_h[0].x += trlogA;
76  }
77  }
78  }
79  }
80 
81  template <int blockSize, typename Float, typename Arg, bool computeTrLog, bool twist>
82  __global__ void cloverInvertKernel(Arg arg)
83  {
84  int idx = blockIdx.x * blockDim.x + threadIdx.x;
85  int parity = threadIdx.y;
86  double2 trlogA = make_double2(0.0, 0.0);
87  double trlogA_parity = 0.0;
88  while (idx < arg.clover.volumeCB) {
89  trlogA_parity = cloverInvertCompute<Float, Arg, computeTrLog, twist>(arg, idx, parity);
90  trlogA = parity ? make_double2(0.0, trlogA.y + trlogA_parity) : make_double2(trlogA.x + trlogA_parity, 0.0);
91  idx += blockDim.x * gridDim.x;
92  }
93  if (computeTrLog) reduce2d<blockSize, 2>(arg, trlogA);
94  }
95 
96 } // namespace quda
__device__ __host__ const T D(int i) const
Return the diagonal element of the Cholesky decomposition L(i,i)
Definition: linalg.cuh:84
#define errorQuda(...)
Definition: util_quda.h:121
Compute Cholesky decomposition of A. By default, we use a modified Cholesky which avoids the division...
Definition: linalg.cuh:38
__device__ __host__ double cloverInvertCompute(Arg &arg, int x_cb, int parity)
void Mat(sFloat *out, gFloat **link, sFloat *in, int daggerBit, int mu)
QudaCloverFieldOrder Order() const
Definition: clover_field.h:93
Main header file for host and device accessors to CloverFields.
void cloverInvert(CloverField &clover, bool computeTraceLog)
This function compute the Cholesky decomposition of each clover matrix and stores the clover inverse ...
CloverInvertArg(CloverField &field, bool computeTraceLog=0)
const int nColor
Definition: covdev_test.cpp:75
Specialized container for Hermitian matrices (e.g., used for wrapping clover matrices) ...
Definition: quda_matrix.h:61
bool isNative() const
__global__ void cloverInvertKernel(Arg arg)
__host__ __device__ ValueType log(ValueType x)
Definition: complex_quda.h:101
__device__ __host__ Mat< T, N > invert()
Compute the inverse of A (the matrix used to construct the Cholesky decomposition).
Definition: linalg.cuh:146
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
const int volumeCB
Definition: spinor_noise.cu:26
QudaParity parity
Definition: covdev_test.cpp:54
clover_mapper< Float >::type C