quda-ref/v1.0.0/cov_dev_8cuh_source.html

 #pragma once

 #include <dslash_helper.cuh>
 #include <color_spinor_field_order.h>
 #include <gauge_field_order.h>
 #include <color_spinor.h>
 #include <dslash_helper.cuh>
 #include <index_helper.cuh>

 namespace quda
 {

   template <typename Float, int nColor, QudaReconstructType reconstruct_> struct CovDevArg : DslashArg<Float> {
     static constexpr int nSpin = 4;
     static constexpr bool spin_project = false;
     static constexpr bool spinor_direct_load = false; // false means texture load
     typedef typename colorspinor_mapper<Float, nSpin, nColor, spin_project, spinor_direct_load>::type F;

     static constexpr QudaReconstructType reconstruct = reconstruct_;
     static constexpr bool gauge_direct_load = false; // false means texture load
     static constexpr QudaGhostExchange ghost = QUDA_GHOST_EXCHANGE_PAD;
     typedef typename gauge_mapper<Float, reconstruct, 18, QUDA_STAGGERED_PHASE_NO, gauge_direct_load, ghost>::type G;

     typedef typename mapper<Float>::type real;

     F out;
     const F in;
     const G U;
     int mu;
     CovDevArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, int mu, int parity, bool dagger,
               const int *comm_override) :

       DslashArg<Float>(in, U, parity, dagger, false, 1, spin_project, comm_override),
       out(out),
       in(in),
       U(U),
       mu(mu)
     {
       if (!out.isNative() || !in.isNative() || !U.isNative())
         errorQuda("Unsupported field order colorspinor(in)=%d gauge=%d combination\n", in.FieldOrder(), U.FieldOrder());
     }
   };

   template <typename Float, int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, int mu,
             typename Arg, typename Vector>
   __device__ __host__ inline void applyCovDev(Vector &out, Arg &arg, int coord[nDim], int x_cb, int parity, int idx,
                                               int thread_dim, bool &active)
   {

     typedef typename mapper<Float>::type real;
     typedef Matrix<complex<real>, nColor> Link;
     const int their_spinor_parity = (arg.nParity == 2) ? 1 - parity : 0;

     const int d = mu % 4;

     if (mu < 4) { // Forward gather - compute fwd offset for vector fetch

       const int fwd_idx = getNeighborIndexCB<nDim>(coord, d, +1, arg.dc);
       const bool ghost = (coord[d] + 1 >= arg.dim[d]) && isActive<kernel_type>(active, thread_dim, d, coord, arg);

       const Link U = arg.U(d, x_cb, parity);

       if (doHalo<kernel_type>(d) && ghost) {

         const int ghost_idx = ghostFaceIndex<1>(coord, arg.dim, d, arg.nFace);
         const Vector in = arg.in.Ghost(d, 1, ghost_idx, their_spinor_parity);

         out += U * in;

       } else if (doBulk<kernel_type>() && !ghost) {

         const Vector in = arg.in(fwd_idx, their_spinor_parity);
         out += U * in;
       }

     } else { // Backward gather - compute back offset for spinor and gauge fetch

       const int back_idx = getNeighborIndexCB<nDim>(coord, d, -1, arg.dc);
       const int gauge_idx = back_idx;

       const bool ghost = (coord[d] - 1 < 0) && isActive<kernel_type>(active, thread_dim, d, coord, arg);

       if (doHalo<kernel_type>(d) && ghost) {

         const int ghost_idx = ghostFaceIndex<0>(coord, arg.dim, d, arg.nFace);
         const Link U = arg.U.Ghost(d, ghost_idx, 1 - parity);
         const Vector in = arg.in.Ghost(d, 0, ghost_idx, their_spinor_parity);

         out += conj(U) * in;
       } else if (doBulk<kernel_type>() && !ghost) {

         const Link U = arg.U(d, gauge_idx, 1 - parity);
         const Vector in = arg.in(back_idx, their_spinor_parity);

         out += conj(U) * in;
       }
     } // Forward/backward derivative
   }

   // out(x) = M*in
   template <typename Float, int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg>
   __device__ __host__ inline void covDev(Arg &arg, int idx, int parity)
   {

     using real = typename mapper<Float>::type;
     using Vector = ColorSpinor<real, nColor, 4>;

     // is thread active (non-trival for fused kernel only)
     bool active = kernel_type == EXTERIOR_KERNEL_ALL ? false : true;

     // which dimension is thread working on (fused kernel only)
     int thread_dim;

     int coord[nDim];
     int x_cb = getCoords<nDim, QUDA_4D_PC, kernel_type, Arg>(coord, arg, idx, parity, thread_dim);

     const int my_spinor_parity = nParity == 2 ? parity : 0;
     Vector out;

     switch (arg.mu) { // ensure that mu is known to compiler for indexing in applyCovDev (avoid register spillage)
     case 0:
       applyCovDev<Float, nDim, nColor, nParity, dagger, kernel_type, 0>(out, arg, coord, x_cb, parity, idx, thread_dim,
                                                                         active);
       break;
     case 1:
       applyCovDev<Float, nDim, nColor, nParity, dagger, kernel_type, 1>(out, arg, coord, x_cb, parity, idx, thread_dim,
                                                                         active);
       break;
     case 2:
       applyCovDev<Float, nDim, nColor, nParity, dagger, kernel_type, 2>(out, arg, coord, x_cb, parity, idx, thread_dim,
                                                                         active);
       break;
     case 3:
       applyCovDev<Float, nDim, nColor, nParity, dagger, kernel_type, 3>(out, arg, coord, x_cb, parity, idx, thread_dim,
                                                                         active);
       break;
     case 4:
       applyCovDev<Float, nDim, nColor, nParity, dagger, kernel_type, 4>(out, arg, coord, x_cb, parity, idx, thread_dim,
                                                                         active);
       break;
     case 5:
       applyCovDev<Float, nDim, nColor, nParity, dagger, kernel_type, 5>(out, arg, coord, x_cb, parity, idx, thread_dim,
                                                                         active);
       break;
     case 6:
       applyCovDev<Float, nDim, nColor, nParity, dagger, kernel_type, 6>(out, arg, coord, x_cb, parity, idx, thread_dim,
                                                                         active);
       break;
     case 7:
       applyCovDev<Float, nDim, nColor, nParity, dagger, kernel_type, 7>(out, arg, coord, x_cb, parity, idx, thread_dim,
                                                                         active);
       break;
     }

     if (kernel_type != INTERIOR_KERNEL) {
       Vector x = arg.out(x_cb, my_spinor_parity);
       out += x;
     }

     if (kernel_type != EXTERIOR_KERNEL_ALL || active) arg.out(x_cb, my_spinor_parity) = out;
   }

   // GPU Kernel for applying the covariant derivative operator to a vector
   template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
   __global__ void covDevGPU(Arg arg)
   {
     int x_cb = blockIdx.x * blockDim.x + threadIdx.x;
     if (x_cb >= arg.threads) return;

     // for full fields set parity from z thread index else use arg setting
     int parity = nParity == 2 ? blockDim.z * blockIdx.z + threadIdx.z : arg.parity;

     switch (parity) {
     case 0: covDev<Float, nDim, nColor, nParity, dagger, kernel_type>(arg, x_cb, 0); break;
     case 1: covDev<Float, nDim, nColor, nParity, dagger, kernel_type>(arg, x_cb, 1); break;
     }
   }
 } // namespace quda
quda::DslashArg
Definition: dslash_helper.cuh:229

quda::DslashArg::kernel_type
KernelType kernel_type
Definition: dslash_helper.cuh:250

quda::KernelType
KernelType
Definition: index_helper.cuh:464

quda::EXTERIOR_KERNEL_ALL
Definition: index_helper.cuh:466

quda::ColorSpinorField
Definition: color_spinor_field.h:311

quda::GaugeField::FieldOrder
QudaGaugeFieldOrder FieldOrder() const
Definition: gauge_field.h:257

errorQuda
#define errorQuda(...)
Definition: util_quda.h:121

quda::ColorSpinor
Definition: color_spinor.h:24

quda::CovDevArg::mu
int mu
Definition: covDev.cuh:32

quda::covDevGPU
__global__ void covDevGPU(Arg arg)
Definition: covDev.cuh:182

quda::CovDevArg::in
const F in
Definition: covDev.cuh:30

quda::CovDevArg::F
colorspinor_mapper< Float, nSpin, nColor, spin_project, spinor_direct_load >::type F
Definition: covDev.cuh:20

quda::CovDevArg::spin_project
static constexpr bool spin_project
Definition: covDev.cuh:18

quda
Definition: blas_cublas.h:5

quda::CovDevArg::U
const G U
Definition: covDev.cuh:31

quda::CovDevArg::spinor_direct_load
static constexpr bool spinor_direct_load
Definition: covDev.cuh:19

quda::CovDevArg::real
mapper< Float >::type real
Definition: covDev.cuh:27

quda::CovDevArg::out
F out
Definition: covDev.cuh:29

quda::INTERIOR_KERNEL
Definition: index_helper.cuh:465

quda::covDev
__device__ __host__ void covDev(Arg &arg, int idx, int parity)
Definition: covDev.cuh:119

quda::applyCovDev
__device__ __host__ void applyCovDev(Vector &out, Arg &arg, int coord[nDim], int x_cb, int parity, int idx, int thread_dim, bool &active)
Definition: covDev.cuh:63

nColor
const int nColor
Definition: covdev_test.cpp:75

QudaGhostExchange
enum QudaGhostExchange_s QudaGhostExchange

quda::CovDevArg
Parameter structure for driving the covariatnt derivative operator.
Definition: covDev.cuh:16

gauge_field_order.h
Main header file for host and device accessors to GaugeFields.

dslash_helper.cuh

quda::Arg
Definition: spinor_noise.cu:22

quda::ColorSpinorField::isNative
bool isNative() const
Definition: color_spinor_field.cpp:568

quda::DslashArg::dagger
const bool dagger
Definition: dslash_helper.cuh:244

index_helper.cuh

quda::Arg::nParity
const int nParity
Definition: spinor_noise.cu:25

quda::CovDevArg::CovDevArg
CovDevArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, int mu, int parity, bool dagger, const int *comm_override)
Definition: covDev.cuh:34

quda::mapper
Definition: register_traits.h:43

QudaReconstructType
enum QudaReconstructType_s QudaReconstructType

color_spinor_field_order.h

quda::colorspinor_mapper
Definition: color_spinor_field_order.h:1602

quda::gauge_mapper
Definition: gauge_field_order.h:3012

quda::arg
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
Definition: complex_quda.h:1076

quda::Vector
VectorXcd Vector
Definition: inv_eigcg_quda.cpp:38

quda::CovDevArg::G
gauge_mapper< Float, reconstruct, 18, QUDA_STAGGERED_PHASE_NO, gauge_direct_load, ghost >::type G
Definition: covDev.cuh:25

quda::CovDevArg::nSpin
static constexpr int nSpin
Definition: covDev.cuh:17

quda::CovDevArg::reconstruct
static constexpr QudaReconstructType reconstruct
Definition: covDev.cuh:22

quda::conj
__host__ __device__ ValueType conj(ValueType x)
Definition: complex_quda.h:130

QUDA_GHOST_EXCHANGE_PAD
Definition: enum_quda.h:483

quda::CovDevArg::ghost
static constexpr QudaGhostExchange ghost
Definition: covDev.cuh:24

quda::GaugeField::isNative
bool isNative() const
Definition: gauge_field.cpp:167

quda::ColorSpinorField::FieldOrder
QudaFieldOrder FieldOrder() const
Definition: color_spinor_field.h:483

quda::Matrix
Definition: quda_matrix.h:64

quda::DslashArg::parity
const int parity
Definition: dslash_helper.cuh:233

quda::DslashArg::nParity
const int nParity
Definition: dslash_helper.cuh:234

quda::CovDevArg::gauge_direct_load
static constexpr bool gauge_direct_load
Definition: covDev.cuh:23

color_spinor.h

quda::GaugeField
Definition: gauge_field.h:164