quda-ref/v1.0.0/dslash__twisted__clover__preconditioned_8cuh_source.html

 #pragma once

 #include <kernels/dslash_wilson_clover_preconditioned.cuh>
 #include <clover_field_order.h>
 #include <linalg.cuh>

 namespace quda
 {

   template <typename Float, int nColor, QudaReconstructType reconstruct_, bool dynamic_clover_>
   struct TwistedCloverArg : WilsonArg<Float, nColor, reconstruct_> {
     using WilsonArg<Float, nColor, reconstruct_>::nSpin;
     static constexpr int length = (nSpin / (nSpin / 2)) * 2 * nColor * nColor * (nSpin / 2) * (nSpin / 2) / 2;
     static constexpr bool dynamic_clover = dynamic_clover_;

     typedef typename mapper<Float>::type real;
     typedef typename clover_mapper<Float, length>::type C;
     const C A;
     const C A2inv; // A^{-2}
     real a;        // this is the scaling factor
     real b;        // this is the twist factor

     TwistedCloverArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A,
         double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override) :
         WilsonArg<Float, nColor, reconstruct_>(out, in, U, xpay ? 1.0 : 0.0, x, parity, dagger, comm_override),
         A(A, false),
         A2inv(A, dynamic_clover ? false : true), // if dynamic clover we don't want the inverse field
         a(a),
         b(dagger ? -0.5 * b : 0.5 * b) // factor of 0.5 comes from basis transform
     {
     }
   };

   template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
   __device__ __host__ inline void twistedClover(Arg &arg, int idx, int parity)
   {
     using namespace linalg; // for Cholesky
     typedef typename mapper<Float>::type real;
     typedef ColorSpinor<real, nColor, 4> Vector;
     typedef ColorSpinor<real, nColor, 2> HalfVector;
     typedef HMatrix<real, nColor * Arg::nSpin / 2> Mat;

     bool active
         = kernel_type == EXTERIOR_KERNEL_ALL ? false : true; // is thread active (non-trival for fused kernel only)
     int thread_dim;                                          // which dimension is thread working on (fused kernel only)
     int coord[nDim];
     int x_cb = getCoords<nDim, QUDA_4D_PC, kernel_type>(coord, arg, idx, parity, thread_dim);

     const int my_spinor_parity = nParity == 2 ? parity : 0;

     Vector out;

     // defined in dslash_wilson.cuh
     applyWilson<Float, nDim, nColor, nParity, dagger, kernel_type>(
         out, arg, coord, x_cb, 0, parity, idx, thread_dim, active);

     if (kernel_type != INTERIOR_KERNEL && active) {
       // if we're not the interior kernel, then we must sum the partial
       Vector x = arg.out(x_cb, my_spinor_parity);
       out += x;
     }

     if (isComplete<kernel_type>(arg, coord) && active) {
       out.toRel(); // switch to chiral basis

       Vector tmp;

 #pragma unroll
       for (int chirality = 0; chirality < 2; chirality++) {

         const complex<real> b(0.0, chirality == 0 ? static_cast<real>(arg.b) : -static_cast<real>(arg.b));
         Mat A = arg.A(x_cb, parity, chirality);
         HalfVector chi = out.chiral_project(chirality);
         chi = A * chi + b * chi;

         if (arg.dynamic_clover) {
           Mat A2 = A.square();
           A2 += b.imag() * b.imag();
           Cholesky<HMatrix, real, nColor * Arg::nSpin / 2> cholesky(A2);
           chi = cholesky.backward(cholesky.forward(chi));
           tmp += static_cast<real>(0.25) * chi.chiral_reconstruct(chirality);
         } else {
           Mat A2inv = arg.A2inv(x_cb, parity, chirality);
           chi = A2inv * chi;
           tmp += static_cast<real>(2.0) * chi.chiral_reconstruct(chirality);
         }
       }

       tmp.toNonRel(); // switch back to non-chiral basis

       if (xpay) {
         Vector x = arg.x(x_cb, my_spinor_parity);
         out = x + arg.a * tmp;
       } else {
         out = arg.a * tmp;
       }
     }

     if (kernel_type != EXTERIOR_KERNEL_ALL || active) arg.out(x_cb, my_spinor_parity) = out;
   }

   // CPU kernel for applying the Wilson operator to a vector
   template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
   void twistedCloverPreconditionedCPU(Arg arg)
   {

     for (int parity = 0; parity < nParity; parity++) {
       // for full fields then set parity from loop else use arg setting
       parity = nParity == 2 ? parity : arg.parity;

       for (int x_cb = 0; x_cb < arg.threads; x_cb++) { // 4-d volume
         twistedClover<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x_cb, parity);
       } // 4-d volumeCB
     }   // parity
   }

   // GPU Kernel for applying the Wilson operator to a vector
   template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
   __global__ void twistedCloverPreconditionedGPU(Arg arg)
   {
     int x_cb = blockIdx.x * blockDim.x + threadIdx.x;
     if (x_cb >= arg.threads) return;

     // for full fields set parity from z thread index else use arg setting
     int parity = nParity == 2 ? blockDim.z * blockIdx.z + threadIdx.z : arg.parity;

     switch (parity) {
     case 0: twistedClover<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x_cb, 0); break;
     case 1: twistedClover<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x_cb, 1); break;
     }
   }

 } // namespace quda
linalg.cuh

quda::TwistedCloverArg
Definition: dslash_twisted_clover_preconditioned.cuh:11

quda::DslashArg::kernel_type
KernelType kernel_type
Definition: dslash_helper.cuh:250

quda::EXTERIOR_KERNEL_ALL
Definition: index_helper.cuh:466

quda::ColorSpinorField
Definition: color_spinor_field.h:311

quda::TwistedCloverArg::C
clover_mapper< Float, length >::type C
Definition: dslash_twisted_clover_preconditioned.cuh:17

quda::TwistedCloverArg::real
mapper< Float >::type real
Definition: dslash_twisted_clover_preconditioned.cuh:16

quda::WilsonArg::x
const F x
Definition: dslash_wilson.cuh:31

quda::TwistedCloverArg::A
const C A
Definition: dslash_twisted_clover_preconditioned.cuh:18

quda::TwistedCloverArg::a
real a
Definition: dslash_twisted_clover_preconditioned.cuh:20

quda::ColorSpinor
Definition: color_spinor.h:24

quda::CloverField
Definition: clover_field.h:45

tmp
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:44

quda::twistedCloverPreconditionedGPU
__global__ void twistedCloverPreconditionedGPU(Arg arg)
Definition: dslash_twisted_clover_preconditioned.cuh:124

Mat
void Mat(sFloat *out, gFloat **link, sFloat *in, int daggerBit, int mu)
Definition: covdev_reference.cpp:99

clover_field_order.h
Main header file for host and device accessors to CloverFields.

quda
Definition: blas_cublas.h:5

quda::twistedClover
__device__ __host__ void twistedClover(Arg &arg, int idx, int parity)
Apply the preconditioned twisted-clover dslash.
Definition: dslash_twisted_clover_preconditioned.cuh:40

quda::WilsonArg::out
F out
Definition: dslash_wilson.cuh:29

quda::WilsonArg::U
const G U
Definition: dslash_wilson.cuh:32

dslash_wilson_clover_preconditioned.cuh

quda::WilsonArg
Parameter structure for driving the Wilson operator.
Definition: dslash_wilson.cuh:16

quda::INTERIOR_KERNEL
Definition: index_helper.cuh:465

nColor
const int nColor
Definition: covdev_test.cpp:75

quda::HMatrix
Specialized container for Hermitian matrices (e.g., used for wrapping clover matrices) ...
Definition: quda_matrix.h:61

quda::TwistedCloverArg::A2inv
const C A2inv
Definition: dslash_twisted_clover_preconditioned.cuh:19

quda::Arg
Definition: spinor_noise.cu:22

quda::WilsonArg::in
const F in
Definition: dslash_wilson.cuh:30

quda::DslashArg::dagger
const bool dagger
Definition: dslash_helper.cuh:244

quda::mapper
Definition: register_traits.h:43

quda::twistedCloverPreconditionedCPU
void twistedCloverPreconditionedCPU(Arg arg)
Definition: dslash_twisted_clover_preconditioned.cuh:109

quda::TwistedCloverArg::b
real b
Definition: dslash_twisted_clover_preconditioned.cuh:21

quda::TwistedCloverArg::dynamic_clover
static constexpr bool dynamic_clover
Definition: dslash_twisted_clover_preconditioned.cuh:14

quda::clover_mapper
Definition: clover_field_order.h:975

quda::arg
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
Definition: complex_quda.h:1076

quda::Vector
VectorXcd Vector
Definition: inv_eigcg_quda.cpp:38

quda::WilsonArg::nSpin
static constexpr int nSpin
Definition: dslash_wilson.cuh:17

quda::TwistedCloverArg::TwistedCloverArg
TwistedCloverArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override)
Definition: dslash_twisted_clover_preconditioned.cuh:23

quda::DslashArg::xpay
const bool xpay
Definition: dslash_helper.cuh:245

quda::DslashArg::parity
const int parity
Definition: dslash_helper.cuh:233

quda::TwistedCloverArg::length
static constexpr int length
Definition: dslash_twisted_clover_preconditioned.cuh:13

quda::DslashArg::nParity
const int nParity
Definition: dslash_helper.cuh:234

quda::GaugeField
Definition: gauge_field.h:164