quda-ref/v1.0.0/dslash__wilson_8cuh_source.html

 #pragma once

 #include <dslash_helper.cuh>
 #include <color_spinor_field_order.h>
 #include <gauge_field_order.h>
 #include <color_spinor.h>
 #include <dslash_helper.cuh>
 #include <index_helper.cuh>

 namespace quda
 {

   template <typename Float, int nColor, QudaReconstructType reconstruct_> struct WilsonArg : DslashArg<Float> {
     static constexpr int nSpin = 4;
     static constexpr bool spin_project = true;
     static constexpr bool spinor_direct_load = false; // false means texture load
     typedef typename colorspinor_mapper<Float, nSpin, nColor, spin_project, spinor_direct_load>::type F;

     static constexpr QudaReconstructType reconstruct = reconstruct_;
     static constexpr bool gauge_direct_load = false; // false means texture load
     static constexpr QudaGhostExchange ghost = QUDA_GHOST_EXCHANGE_PAD;
     typedef typename gauge_mapper<Float, reconstruct, 18, QUDA_STAGGERED_PHASE_NO, gauge_direct_load, ghost>::type G;

     typedef typename mapper<Float>::type real;

     F out;
     const F in;
     const F x;
     const G U;
     const real a;
     WilsonArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a,
               const ColorSpinorField &x, int parity, bool dagger, const int *comm_override) :
       DslashArg<Float>(in, U, parity, dagger, a != 0.0 ? true : false, 1, spin_project, comm_override),
       out(out),
       in(in),
       U(U),
       x(x),
       a(a)
     {
       if (!out.isNative() || !x.isNative() || !in.isNative() || !U.isNative())
         errorQuda("Unsupported field order colorspinor=%d gauge=%d combination\n", in.FieldOrder(), U.FieldOrder());
     }
   };

   template <typename Float, int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg, typename Vector>
   __device__ __host__ inline void applyWilson(
       Vector &out, Arg &arg, int coord[nDim], int x_cb, int s, int parity, int idx, int thread_dim, bool &active)
   {
     typedef typename mapper<Float>::type real;
     typedef ColorSpinor<real, nColor, 2> HalfVector;
     typedef Matrix<complex<real>, nColor> Link;
     const int their_spinor_parity = nParity == 2 ? 1 - parity : 0;

     // parity for gauge field - include residual parity from 5-d => 4-d checkerboarding
     const int gauge_parity = (nDim == 5 ? (x_cb / arg.dc.volume_4d_cb + parity) % 2 : parity);

 #pragma unroll 4
     for (int d = 0; d < 4; d++) { // loop over dimension
       {                           // Forward gather - compute fwd offset for vector fetch
         const int fwd_idx = getNeighborIndexCB<nDim>(coord, d, +1, arg.dc);
         const int gauge_idx = (nDim == 5 ? x_cb % arg.dc.volume_4d_cb : x_cb);
         constexpr int proj_dir = dagger ? +1 : -1;

         const bool ghost
             = (coord[d] + arg.nFace >= arg.dim[d]) && isActive<kernel_type>(active, thread_dim, d, coord, arg);

         if (doHalo<kernel_type>(d) && ghost) {
           // we need to compute the face index if we are updating a face that isn't ours
           const int ghost_idx = (kernel_type == EXTERIOR_KERNEL_ALL && d != thread_dim) ?
               ghostFaceIndex<1, nDim>(coord, arg.dim, d, arg.nFace) :
               idx;

           Link U = arg.U(d, gauge_idx, gauge_parity);
           HalfVector in = arg.in.Ghost(d, 1, ghost_idx + s * arg.dc.ghostFaceCB[d], their_spinor_parity);
           if (d == 3) in *= arg.t_proj_scale; // put this in the Ghost accessor and merge with any rescaling?

           out += (U * in).reconstruct(d, proj_dir);
         } else if (doBulk<kernel_type>() && !ghost) {

           Link U = arg.U(d, gauge_idx, gauge_parity);
           Vector in = arg.in(fwd_idx + s * arg.dc.volume_4d_cb, their_spinor_parity);

           out += (U * in.project(d, proj_dir)).reconstruct(d, proj_dir);
         }
       }

       { // Backward gather - compute back offset for spinor and gauge fetch
         const int back_idx = getNeighborIndexCB<nDim>(coord, d, -1, arg.dc);
         const int gauge_idx = (nDim == 5 ? back_idx % arg.dc.volume_4d_cb : back_idx);
         constexpr int proj_dir = dagger ? -1 : +1;

         const bool ghost = (coord[d] - arg.nFace < 0) && isActive<kernel_type>(active, thread_dim, d, coord, arg);

         if (doHalo<kernel_type>(d) && ghost) {
           // we need to compute the face index if we are updating a face that isn't ours
           const int ghost_idx = (kernel_type == EXTERIOR_KERNEL_ALL && d != thread_dim) ?
               ghostFaceIndex<0, nDim>(coord, arg.dim, d, arg.nFace) :
               idx;

           const int gauge_ghost_idx = (nDim == 5 ? ghost_idx % arg.dc.ghostFaceCB[d] : ghost_idx);
           Link U = arg.U.Ghost(d, gauge_ghost_idx, 1 - gauge_parity);
           HalfVector in = arg.in.Ghost(d, 0, ghost_idx + s * arg.dc.ghostFaceCB[d], their_spinor_parity);
           if (d == 3) in *= arg.t_proj_scale;

           out += (conj(U) * in).reconstruct(d, proj_dir);
         } else if (doBulk<kernel_type>() && !ghost) {

           Link U = arg.U(d, gauge_idx, 1 - gauge_parity);
           Vector in = arg.in(back_idx + s * arg.dc.volume_4d_cb, their_spinor_parity);

           out += (conj(U) * in.project(d, proj_dir)).reconstruct(d, proj_dir);
         }
       }
     } // nDim
   }

   // out(x) = M*in = (-D + m) * in(x-mu)
   template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
   __device__ __host__ inline void wilson(Arg &arg, int idx, int s, int parity)
   {
     typedef typename mapper<Float>::type real;
     typedef ColorSpinor<real, nColor, 4> Vector;

     bool active
         = kernel_type == EXTERIOR_KERNEL_ALL ? false : true; // is thread active (non-trival for fused kernel only)
     int thread_dim;                                          // which dimension is thread working on (fused kernel only)
     int coord[nDim];
     int x_cb = getCoords<nDim, QUDA_4D_PC, kernel_type>(coord, arg, idx, parity, thread_dim);

     const int my_spinor_parity = nParity == 2 ? parity : 0;
     Vector out;
     applyWilson<Float, nDim, nColor, nParity, dagger, kernel_type>(
         out, arg, coord, x_cb, s, parity, idx, thread_dim, active);

     int xs = x_cb + s * arg.dc.volume_4d_cb;
     if (xpay && kernel_type == INTERIOR_KERNEL) {
       Vector x = arg.x(xs, my_spinor_parity);
       out = x + arg.a * out;
     } else if (kernel_type != INTERIOR_KERNEL && active) {
       Vector x = arg.out(xs, my_spinor_parity);
       out = x + (xpay ? arg.a * out : out);
     }

     if (kernel_type != EXTERIOR_KERNEL_ALL || active) arg.out(xs, my_spinor_parity) = out;
   }

   // CPU kernel for applying the Wilson operator to a vector
   template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
   void wilsonCPU(Arg arg)
   {

     for (int parity = 0; parity < nParity; parity++) {
       // for full fields then set parity from loop else use arg setting
       parity = nParity == 2 ? parity : arg.parity;

       for (int x_cb = 0; x_cb < arg.threads; x_cb++) { // 4-d volume
         wilson<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x_cb, 0, parity);
       } // 4-d volumeCB
     }   // parity
   }

   // GPU Kernel for applying the Wilson operator to a vector
   template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
   __global__ void wilsonGPU(Arg arg)
   {
     int x_cb = blockIdx.x * blockDim.x + threadIdx.x;
     if (x_cb >= arg.threads) return;

     // for full fields set parity from z thread index else use arg setting
     int parity = nParity == 2 ? blockDim.z * blockIdx.z + threadIdx.z : arg.parity;

     switch (parity) {
     case 0: wilson<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x_cb, 0, 0); break;
     case 1: wilson<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x_cb, 0, 1); break;
     }
   }

 } // namespace quda
quda::DslashArg
Definition: dslash_helper.cuh:229

quda::DslashArg::kernel_type
KernelType kernel_type
Definition: dslash_helper.cuh:250

quda::EXTERIOR_KERNEL_ALL
Definition: index_helper.cuh:466

quda::ColorSpinorField
Definition: color_spinor_field.h:311

quda::GaugeField::FieldOrder
QudaGaugeFieldOrder FieldOrder() const
Definition: gauge_field.h:257

quda::WilsonArg::x
const F x
Definition: dslash_wilson.cuh:31

quda::WilsonArg::gauge_direct_load
static constexpr bool gauge_direct_load
Definition: dslash_wilson.cuh:23

errorQuda
#define errorQuda(...)
Definition: util_quda.h:121

quda::ColorSpinor
Definition: color_spinor.h:24

quda::WilsonArg::ghost
static constexpr QudaGhostExchange ghost
Definition: dslash_wilson.cuh:24

quda::WilsonArg::spinor_direct_load
static constexpr bool spinor_direct_load
Definition: dslash_wilson.cuh:19

quda
Definition: blas_cublas.h:5

quda::WilsonArg::out
F out
Definition: dslash_wilson.cuh:29

quda::WilsonArg::U
const G U
Definition: dslash_wilson.cuh:32

quda::WilsonArg::reconstruct
static constexpr QudaReconstructType reconstruct
Definition: dslash_wilson.cuh:22

quda::WilsonArg
Parameter structure for driving the Wilson operator.
Definition: dslash_wilson.cuh:16

quda::wilsonCPU
void wilsonCPU(Arg arg)
Definition: dslash_wilson.cuh:165

quda::wilson
__device__ __host__ void wilson(Arg &arg, int idx, int s, int parity)
Definition: dslash_wilson.cuh:135

quda::INTERIOR_KERNEL
Definition: index_helper.cuh:465

nColor
const int nColor
Definition: covdev_test.cpp:75

quda::WilsonArg::spin_project
static constexpr bool spin_project
Definition: dslash_wilson.cuh:18

QudaGhostExchange
enum QudaGhostExchange_s QudaGhostExchange

quda::WilsonArg::real
mapper< Float >::type real
Definition: dslash_wilson.cuh:27

gauge_field_order.h
Main header file for host and device accessors to GaugeFields.

quda::WilsonArg::G
gauge_mapper< Float, reconstruct, 18, QUDA_STAGGERED_PHASE_NO, gauge_direct_load, ghost >::type G
Definition: dslash_wilson.cuh:25

dslash_helper.cuh

quda::Arg
Definition: spinor_noise.cu:22

quda::WilsonArg::in
const F in
Definition: dslash_wilson.cuh:30

quda::ColorSpinorField::isNative
bool isNative() const
Definition: color_spinor_field.cpp:568

quda::DslashArg::dagger
const bool dagger
Definition: dslash_helper.cuh:244

index_helper.cuh

quda::WilsonArg::a
const real a
Definition: dslash_wilson.cuh:33

quda::mapper
Definition: register_traits.h:43

QudaReconstructType
enum QudaReconstructType_s QudaReconstructType

color_spinor_field_order.h

quda::s
__shared__ float s[]

quda::wilsonGPU
__global__ void wilsonGPU(Arg arg)
Definition: dslash_wilson.cuh:180

quda::colorspinor_mapper
Definition: color_spinor_field_order.h:1602

quda::gauge_mapper
Definition: gauge_field_order.h:3012

quda::arg
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
Definition: complex_quda.h:1076

quda::Vector
VectorXcd Vector
Definition: inv_eigcg_quda.cpp:38

quda::WilsonArg::nSpin
static constexpr int nSpin
Definition: dslash_wilson.cuh:17

quda::WilsonArg::F
colorspinor_mapper< Float, nSpin, nColor, spin_project, spinor_direct_load >::type F
Definition: dslash_wilson.cuh:20

quda::conj
__host__ __device__ ValueType conj(ValueType x)
Definition: complex_quda.h:130

QUDA_GHOST_EXCHANGE_PAD
Definition: enum_quda.h:483

quda::GaugeField::isNative
bool isNative() const
Definition: gauge_field.cpp:167

quda::DslashArg::xpay
const bool xpay
Definition: dslash_helper.cuh:245

quda::ColorSpinorField::FieldOrder
QudaFieldOrder FieldOrder() const
Definition: color_spinor_field.h:483

quda::applyWilson
__device__ __host__ void applyWilson(Vector &out, Arg &arg, int coord[nDim], int x_cb, int s, int parity, int idx, int thread_dim, bool &active)
Applies the off-diagonal part of the Wilson operator.
Definition: dslash_wilson.cuh:62

quda::Matrix
Definition: quda_matrix.h:64

quda::WilsonArg::WilsonArg
WilsonArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override)
Definition: dslash_wilson.cuh:35

quda::DslashArg::parity
const int parity
Definition: dslash_helper.cuh:233

quda::DslashArg::nParity
const int nParity
Definition: dslash_helper.cuh:234

color_spinor.h

quda::GaugeField
Definition: gauge_field.h:164