quda-ref/v1.0.0/dslash__staggered_8cuh_source.html

 #pragma once

 #include <dslash_helper.cuh>
 #include <color_spinor_field_order.h>
 #include <gauge_field_order.h>
 #include <color_spinor.h>
 #include <dslash_helper.cuh>
 #include <index_helper.cuh>

 namespace quda
 {

   template <typename Float, int nColor, QudaReconstructType reconstruct_u_, QudaReconstructType reconstruct_l_,
       bool improved_, QudaStaggeredPhase phase_ = QUDA_STAGGERED_PHASE_MILC>
   struct StaggeredArg : DslashArg<Float> {
     typedef typename mapper<Float>::type real;
     static constexpr int nSpin = 1;
     static constexpr bool spin_project = false;
     static constexpr bool spinor_direct_load = false; // false means texture load
     using F = typename colorspinor_mapper<Float, nSpin, nColor, spin_project, spinor_direct_load>::type;

     static constexpr QudaReconstructType reconstruct_u = reconstruct_u_;
     static constexpr QudaReconstructType reconstruct_l = reconstruct_l_;
     static constexpr bool gauge_direct_load = false; // false means texture load
     static constexpr QudaGhostExchange ghost = QUDA_GHOST_EXCHANGE_PAD;
     static constexpr bool use_inphase = improved_ ? false : true;
     static constexpr QudaStaggeredPhase phase = phase_;
     using GU = typename gauge_mapper<Float, reconstruct_u, 18, phase, gauge_direct_load, ghost, use_inphase>::type;
     using GL =
         typename gauge_mapper<Float, reconstruct_l, 18, QUDA_STAGGERED_PHASE_NO, gauge_direct_load, ghost, use_inphase>::type;

     F out;
     const F in;
     const F x;
     const GU U;
     const GL L;
     const real a;
     const real tboundary;
     const bool is_first_time_slice;
     const bool is_last_time_slice;
     static constexpr bool improved = improved_;

     StaggeredArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const GaugeField &L, double a,
                  const ColorSpinorField &x, int parity, bool dagger, const int *comm_override) :
       DslashArg<Float>(in, U, parity, dagger, a == 0.0 ? false : true, improved_ ? 3 : 1, spin_project, comm_override),
       out(out),
       in(in, improved_ ? 3 : 1),
       U(U),
       L(L),
       x(x),
       a(a),
       tboundary(U.TBoundary()),
       is_first_time_slice(comm_coord(3) == 0 ? true : false),
       is_last_time_slice(comm_coord(3) == comm_dim(3) - 1 ? true : false)
     {
       if (!out.isNative() || !x.isNative() || !in.isNative() || !U.isNative())
         errorQuda("Unsupported field order colorspinor=%d gauge=%d combination\n", in.FieldOrder(), U.FieldOrder());
     }
   };

   template <typename Float, int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg, typename Vector>
   __device__ __host__ inline void applyStaggered(
       Vector &out, Arg &arg, int coord[nDim], int x_cb, int parity, int idx, int thread_dim, bool &active)
   {
     typedef typename mapper<Float>::type real;
     typedef Matrix<complex<real>, nColor> Link;
     const int their_spinor_parity = (arg.nParity == 2) ? 1 - parity : 0;

 #pragma unroll 4
     for (int d = 0; d < 4; d++) { // loop over dimension

       // standard - forward direction
       {
         const bool ghost = (coord[d] + 1 >= arg.dim[d]) && isActive<kernel_type>(active, thread_dim, d, coord, arg);
         if (doHalo<kernel_type>(d) && ghost) {
           const int ghost_idx = ghostFaceIndexStaggered<1>(coord, arg.dim, d, 1);
           const Link U = arg.improved ? arg.U(d, x_cb, parity) : arg.U(d, x_cb, parity, StaggeredPhase(coord, d, +1, arg));
           Vector in = arg.in.Ghost(d, 1, ghost_idx, their_spinor_parity);
           out += (U * in);

           if (x_cb == 0 && parity == 0 && d == 0) printLink(U);
         } else if (doBulk<kernel_type>() && !ghost) {
           const int fwd_idx = linkIndexP1(coord, arg.dim, d);
           const Link U = arg.improved ? arg.U(d, x_cb, parity) : arg.U(d, x_cb, parity, StaggeredPhase(coord, d, +1, arg));
           Vector in = arg.in(fwd_idx, their_spinor_parity);
           out += (U * in);
         }
       }

       // improved - forward direction
       if (arg.improved) {
         const bool ghost = (coord[d] + 3 >= arg.dim[d]) && isActive<kernel_type>(active, thread_dim, d, coord, arg);
         if (doHalo<kernel_type>(d) && ghost) {
           const int ghost_idx = ghostFaceIndexStaggered<1>(coord, arg.dim, d, arg.nFace);
           const Link L = arg.L(d, x_cb, parity);
           const Vector in = arg.in.Ghost(d, 1, ghost_idx, their_spinor_parity);
           out += L * in;
         } else if (doBulk<kernel_type>() && !ghost) {
           const int fwd3_idx = linkIndexP3(coord, arg.dim, d);
           const Link L = arg.L(d, x_cb, parity);
           const Vector in = arg.in(fwd3_idx, their_spinor_parity);
           out += L * in;
         }
       }

       {
         // Backward gather - compute back offset for spinor and gauge fetch
         const bool ghost = (coord[d] - 1 < 0) && isActive<kernel_type>(active, thread_dim, d, coord, arg);

         if (doHalo<kernel_type>(d) && ghost) {
           const int ghost_idx2 = ghostFaceIndexStaggered<0>(coord, arg.dim, d, 1);
           const int ghost_idx = arg.improved ? ghostFaceIndexStaggered<0>(coord, arg.dim, d, 3) : ghost_idx2;
           const int back_idx = linkIndexM1(coord, arg.dim, d);
           const Link U = arg.improved ? arg.U.Ghost(d, ghost_idx2, 1 - parity) :
             arg.U.Ghost(d, ghost_idx2, 1 - parity, StaggeredPhase(coord, d, -1, arg));
           Vector in = arg.in.Ghost(d, 0, ghost_idx, their_spinor_parity);
           out -= (conj(U) * in);
         } else if (doBulk<kernel_type>() && !ghost) {
           const int back_idx = linkIndexM1(coord, arg.dim, d);
           const int gauge_idx = back_idx;
           const Link U = arg.improved ? arg.U(d, gauge_idx, 1 - parity) :
             arg.U(d, gauge_idx, 1 - parity, StaggeredPhase(coord, d, -1, arg));
           Vector in = arg.in(back_idx, their_spinor_parity);
           out -= (conj(U) * in);
         }
       }

       // improved - backward direction
       if (arg.improved) {
         const bool ghost = (coord[d] - 3 < 0) && isActive<kernel_type>(active, thread_dim, d, coord, arg);
         if (doHalo<kernel_type>(d) && ghost) {
           // when updating replace arg.nFace with 1 here
           const int ghost_idx = ghostFaceIndexStaggered<0>(coord, arg.dim, d, 1);
           const Link L = arg.L.Ghost(d, ghost_idx, 1 - parity);
           const Vector in = arg.in.Ghost(d, 0, ghost_idx, their_spinor_parity);
           out -= conj(L) * in;
         } else if (doBulk<kernel_type>() && !ghost) {
           const int back3_idx = linkIndexM3(coord, arg.dim, d);
           const int gauge_idx = back3_idx;
           const Link L = arg.L(d, gauge_idx, 1 - parity);
           const Vector in = arg.in(back3_idx, their_spinor_parity);
           out -= conj(L) * in;
         }
       }
     } // nDim
   }

   // out(x) = M*in = (-D + m) * in(x-mu)
   template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
   __device__ __host__ inline void staggered(Arg &arg, int idx, int parity)
   {
     using real = typename mapper<Float>::type;
     using Vector = ColorSpinor<real, nColor, 1>;

     bool active
         = kernel_type == EXTERIOR_KERNEL_ALL ? false : true; // is thread active (non-trival for fused kernel only)
     int thread_dim;                                          // which dimension is thread working on (fused kernel only)
     int coord[nDim];
     int x_cb = arg.improved ? getCoords<nDim, QUDA_4D_PC, kernel_type, Arg, 3>(coord, arg, idx, parity, thread_dim) :
                               getCoords<nDim, QUDA_4D_PC, kernel_type, Arg, 1>(coord, arg, idx, parity, thread_dim);

     const int my_spinor_parity = nParity == 2 ? parity : 0;

     Vector out;

     applyStaggered<Float, nDim, nColor, nParity, dagger, kernel_type>(
         out, arg, coord, x_cb, parity, idx, thread_dim, active);

     if (dagger) { out = -out; }

     if (xpay && kernel_type == INTERIOR_KERNEL) {
       Vector x = arg.x(x_cb, my_spinor_parity);
       out = arg.a * x - out;
     } else if (kernel_type != INTERIOR_KERNEL) {
       Vector x = arg.out(x_cb, my_spinor_parity);
       out = x + (xpay ? -out : out);
     }
     if (kernel_type != EXTERIOR_KERNEL_ALL || active) arg.out(x_cb, my_spinor_parity) = out;
   }

   // GPU Kernel for applying the staggered operator to a vector
   template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
   __global__ void staggeredGPU(Arg arg)
   {
     int x_cb = blockIdx.x * blockDim.x + threadIdx.x;
     if (x_cb >= arg.threads) return;

     // for full fields set parity from z thread index else use arg setting
     int parity = nParity == 2 ? blockDim.z * blockIdx.z + threadIdx.z : arg.parity;

     switch (parity) {
     case 0: staggered<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x_cb, 0); break;
     case 1: staggered<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x_cb, 1); break;
     }
   }
 } // namespace quda
quda::DslashArg
Definition: dslash_helper.cuh:229

quda::StaggeredArg::real
mapper< Float >::type real
Definition: dslash_staggered.cuh:19

quda::DslashArg::kernel_type
KernelType kernel_type
Definition: dslash_helper.cuh:250

quda::StaggeredArg::spin_project
static constexpr bool spin_project
Definition: dslash_staggered.cuh:21

quda::StaggeredArg::x
const F x
Definition: dslash_staggered.cuh:37

quda::EXTERIOR_KERNEL_ALL
Definition: index_helper.cuh:466

quda::ColorSpinorField
Definition: color_spinor_field.h:311

quda::GaugeField::FieldOrder
QudaGaugeFieldOrder FieldOrder() const
Definition: gauge_field.h:257

quda::applyStaggered
__device__ __host__ void applyStaggered(Vector &out, Arg &arg, int coord[nDim], int x_cb, int parity, int idx, int thread_dim, bool &active)
Applies the off-diagonal part of the Staggered / Asqtad operator.
Definition: dslash_staggered.cuh:76

errorQuda
#define errorQuda(...)
Definition: util_quda.h:121

quda::ColorSpinor
Definition: color_spinor.h:24

quda::StaggeredArg::reconstruct_u
static constexpr QudaReconstructType reconstruct_u
Definition: dslash_staggered.cuh:25

comm_dim
int comm_dim(int dim)
Definition: comm_common.cpp:424

quda::staggeredGPU
__global__ void staggeredGPU(Arg arg)
Definition: dslash_staggered.cuh:197

quda::StaggeredArg::tboundary
const real tboundary
Definition: dslash_staggered.cuh:42

comm_coord
int comm_coord(int dim)
Definition: comm_common.cpp:431

quda::StaggeredArg::GU
typename gauge_mapper< Float, reconstruct_u, 18, phase, gauge_direct_load, ghost, use_inphase >::type GU
Definition: dslash_staggered.cuh:31

quda::StaggeredArg::GL
typename gauge_mapper< Float, reconstruct_l, 18, QUDA_STAGGERED_PHASE_NO, gauge_direct_load, ghost, use_inphase >::type GL
Definition: dslash_staggered.cuh:33

quda::StaggeredArg::out
F out
Definition: dslash_staggered.cuh:35

quda
Definition: blas_cublas.h:5

quda::printLink
__host__ __device__ void printLink(const Matrix< Cmplx, 3 > &link)
Definition: quda_matrix.h:1149

quda::StaggeredArg::U
const GU U
Definition: dslash_staggered.cuh:38

quda::StaggeredArg::phase
static constexpr QudaStaggeredPhase phase
Definition: dslash_staggered.cuh:30

quda::StaggeredArg::gauge_direct_load
static constexpr bool gauge_direct_load
Definition: dslash_staggered.cuh:27

quda::StaggeredArg::spinor_direct_load
static constexpr bool spinor_direct_load
Definition: dslash_staggered.cuh:22

quda::staggered
__device__ __host__ void staggered(Arg &arg, int idx, int parity)
Definition: dslash_staggered.cuh:164

quda::linkIndexP3
static __device__ __host__ int linkIndexP3(const int x[], const I X[4], const int mu)
Definition: index_helper.cuh:151

quda::INTERIOR_KERNEL
Definition: index_helper.cuh:465

quda::linkIndexM1
static __device__ __host__ int linkIndexM1(const int x[], const I X[4], const int mu)
Definition: index_helper.cuh:94

quda::linkIndexM3
static __device__ __host__ int linkIndexM3(const int x[], const I X[4], const int mu)
Definition: index_helper.cuh:107

QudaStaggeredPhase
enum QudaStaggeredPhase_s QudaStaggeredPhase

nColor
const int nColor
Definition: covdev_test.cpp:75

quda::StaggeredArg::reconstruct_l
static constexpr QudaReconstructType reconstruct_l
Definition: dslash_staggered.cuh:26

QudaGhostExchange
enum QudaGhostExchange_s QudaGhostExchange

gauge_field_order.h
Main header file for host and device accessors to GaugeFields.

dslash_helper.cuh

quda::StaggeredArg::F
typename colorspinor_mapper< Float, nSpin, nColor, spin_project, spinor_direct_load >::type F
Definition: dslash_staggered.cuh:23

quda::Arg
Definition: spinor_noise.cu:22

quda::ColorSpinorField::isNative
bool isNative() const
Definition: color_spinor_field.cpp:568

quda::StaggeredArg
Parameter structure for driving the Staggered Dslash operator.
Definition: dslash_staggered.cuh:18

quda::StaggeredArg::ghost
static constexpr QudaGhostExchange ghost
Definition: dslash_staggered.cuh:28

quda::StaggeredArg::nSpin
static constexpr int nSpin
Definition: dslash_staggered.cuh:20

quda::DslashArg::dagger
const bool dagger
Definition: dslash_helper.cuh:244

index_helper.cuh

quda::Arg::nParity
const int nParity
Definition: spinor_noise.cu:25

quda::mapper
Definition: register_traits.h:43

QudaReconstructType
enum QudaReconstructType_s QudaReconstructType

color_spinor_field_order.h

quda::colorspinor_mapper
Definition: color_spinor_field_order.h:1602

quda::gauge_mapper
Definition: gauge_field_order.h:3012

quda::StaggeredArg::is_first_time_slice
const bool is_first_time_slice
Definition: dslash_staggered.cuh:43

QUDA_STAGGERED_PHASE_MILC
Definition: enum_quda.h:490

quda::StaggeredArg::is_last_time_slice
const bool is_last_time_slice
Definition: dslash_staggered.cuh:44

quda::StaggeredArg::use_inphase
static constexpr bool use_inphase
Definition: dslash_staggered.cuh:29

quda::arg
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
Definition: complex_quda.h:1076

quda::Vector
VectorXcd Vector
Definition: inv_eigcg_quda.cpp:38

quda::StaggeredArg::improved
static constexpr bool improved
Definition: dslash_staggered.cuh:45

quda::StaggeredPhase
__device__ __host__ auto StaggeredPhase(const int coords[], int dim, int dir, const Arg &arg) -> typename Arg::real
Compute the staggered phase factor at unit shift from the current lattice coordinates. The routine below optimizes out the shift where possible, hence is only visible where we need to consider the boundary condition.
Definition: index_helper.cuh:868

quda::conj
__host__ __device__ ValueType conj(ValueType x)
Definition: complex_quda.h:130

QUDA_GHOST_EXCHANGE_PAD
Definition: enum_quda.h:483

quda::linkIndexP1
static __device__ __host__ int linkIndexP1(const int x[], const I X[4], const int mu)
Definition: index_helper.cuh:139

quda::GaugeField::isNative
bool isNative() const
Definition: gauge_field.cpp:167

quda::DslashArg::xpay
const bool xpay
Definition: dslash_helper.cuh:245

quda::StaggeredArg::StaggeredArg
StaggeredArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const GaugeField &L, double a, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override)
Definition: dslash_staggered.cuh:47

quda::ColorSpinorField::FieldOrder
QudaFieldOrder FieldOrder() const
Definition: color_spinor_field.h:483

quda::StaggeredArg::a
const real a
Definition: dslash_staggered.cuh:41

quda::StaggeredArg::in
const F in
Definition: dslash_staggered.cuh:36

quda::Matrix
Definition: quda_matrix.h:64

quda::DslashArg::parity
const int parity
Definition: dslash_helper.cuh:233

quda::StaggeredArg::L
const GL L
Definition: dslash_staggered.cuh:39

quda::DslashArg::nParity
const int nParity
Definition: dslash_helper.cuh:234

color_spinor.h

quda::GaugeField
Definition: gauge_field.h:164