quda-ref/v1.0.0/laplace_8cuh_source.html

 #pragma once

 #include <dslash_helper.cuh>
 #include <color_spinor_field_order.h>
 #include <gauge_field_order.h>
 #include <color_spinor.h>
 #include <dslash_helper.cuh>
 #include <index_helper.cuh>

 namespace quda
 {

   template <typename Float, int nColor, QudaReconstructType reconstruct_> struct LaplaceArg : DslashArg<Float> {
     static constexpr int nSpin = 1;
     static constexpr bool spin_project = false;
     static constexpr bool spinor_direct_load = false; // false means texture load
     typedef typename colorspinor_mapper<Float, nSpin, nColor, spin_project, spinor_direct_load>::type F;

     static constexpr QudaReconstructType reconstruct = reconstruct_;
     static constexpr bool gauge_direct_load = false; // false means texture load
     static constexpr QudaGhostExchange ghost = QUDA_GHOST_EXCHANGE_PAD;
     typedef typename gauge_mapper<Float, reconstruct, 18, QUDA_STAGGERED_PHASE_NO, gauge_direct_load, ghost>::type G;

     typedef typename mapper<Float>::type real;

     F out;
     const F in;
     const F x;
     const G U;
     const real a;
     int dir;
     LaplaceArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, int dir, double a,
                const ColorSpinorField &x, int parity, bool dagger, const int *comm_override) :

       DslashArg<Float>(in, U, parity, dagger, a != 0.0 ? true : false, 1, false, comm_override),
       out(out),
       in(in),
       U(U),
       dir(dir),
       x(x),
       a(a)
     {
       if (!out.isNative() || !x.isNative() || !in.isNative() || !U.isNative())
         errorQuda("Unsupported field order colorspinor(in)=%d gauge=%d combination\n", in.FieldOrder(), U.FieldOrder());
       if (dir < 3 || dir > 4) errorQuda("Unsupported laplace direction %d (must be 3 or 4)", dir);
     }
   };

   template <typename Float, int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, int dir,
             typename Arg, typename Vector>
   __device__ __host__ inline void applyLaplace(Vector &out, Arg &arg, int coord[nDim], int x_cb, int parity, int idx,
                                                int thread_dim, bool &active)
   {

     typedef typename mapper<Float>::type real;
     typedef Matrix<complex<real>, nColor> Link;
     const int their_spinor_parity = (arg.nParity == 2) ? 1 - parity : 0;

 #pragma unroll
     for (int d = 0; d < nDim; d++) { // loop over dimension
       if (d != dir) {
         {
           // Forward gather - compute fwd offset for vector fetch
           const bool ghost = (coord[d] + 1 >= arg.dim[d]) && isActive<kernel_type>(active, thread_dim, d, coord, arg);

           if (doHalo<kernel_type>(d) && ghost) {

             // const int ghost_idx = ghostFaceIndexStaggered<1>(coord, arg.dim, d, 1);
             const int ghost_idx = ghostFaceIndex<1>(coord, arg.dim, d, arg.nFace);
             const Link U = arg.U(d, x_cb, parity);
             const Vector in = arg.in.Ghost(d, 1, ghost_idx, their_spinor_parity);

             out += U * in;
           } else if (doBulk<kernel_type>() && !ghost) {

             const int fwd_idx = linkIndexP1(coord, arg.dim, d);
             const Link U = arg.U(d, x_cb, parity);
             const Vector in = arg.in(fwd_idx, their_spinor_parity);

             out += U * in;
           }
         }
         {
           // Backward gather - compute back offset for spinor and gauge fetch

           const int back_idx = linkIndexM1(coord, arg.dim, d);
           const int gauge_idx = back_idx;

           const bool ghost = (coord[d] - 1 < 0) && isActive<kernel_type>(active, thread_dim, d, coord, arg);

           if (doHalo<kernel_type>(d) && ghost) {

             // const int ghost_idx = ghostFaceIndexStaggered<0>(coord, arg.dim, d, 1);
             const int ghost_idx = ghostFaceIndex<0>(coord, arg.dim, d, arg.nFace);

             const Link U = arg.U.Ghost(d, ghost_idx, 1 - parity);
             const Vector in = arg.in.Ghost(d, 0, ghost_idx, their_spinor_parity);

             out += conj(U) * in;
           } else if (doBulk<kernel_type>() && !ghost) {

             const Link U = arg.U(d, gauge_idx, 1 - parity);
             const Vector in = arg.in(back_idx, their_spinor_parity);

             out += conj(U) * in;
           }
         }
       }
     }
   }

   // out(x) = M*in
   template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
   __device__ __host__ inline void laplace(Arg &arg, int idx, int parity)
   {

     using real = typename mapper<Float>::type;
     using Vector = ColorSpinor<real, nColor, 1>;

     // is thread active (non-trival for fused kernel only)
     bool active = kernel_type == EXTERIOR_KERNEL_ALL ? false : true;

     // which dimension is thread working on (fused kernel only)
     int thread_dim;

     int coord[nDim];
     int x_cb = getCoords<nDim, QUDA_4D_PC, kernel_type, Arg>(coord, arg, idx, parity, thread_dim);

     const int my_spinor_parity = nParity == 2 ? parity : 0;
     Vector out;

     //We instantiate two kernel types:
     //case 4 is an operator in all x,y,z,t dimensions
     //case 3 is a spatial operator only, the t dimension is omitted.
     switch (arg.dir) {
     case 3:
       applyLaplace<Float, nDim, nColor, nParity, dagger, kernel_type, 3>(out, arg, coord, x_cb, parity, idx, thread_dim,
                                                                          active);
       break;
     case 4:
     default:
       applyLaplace<Float, nDim, nColor, nParity, dagger, kernel_type, -1>(out, arg, coord, x_cb, parity, idx,
                                                                           thread_dim, active);
       break;
     }

     if (xpay && kernel_type == INTERIOR_KERNEL) {
       Vector x = arg.x(x_cb, my_spinor_parity);
       out = x + arg.a * out;
     } else if (kernel_type != INTERIOR_KERNEL) {
       Vector x = arg.out(x_cb, my_spinor_parity);
       out = x + (xpay ? arg.a * out : out);
     }

     if (kernel_type != EXTERIOR_KERNEL_ALL || active) arg.out(x_cb, my_spinor_parity) = out;
   }

   // GPU Kernel for applying the covariant derivative operator to a vector
   template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
   __global__ void laplaceGPU(Arg arg)
   {

     int x_cb = blockIdx.x * blockDim.x + threadIdx.x;
     if (x_cb >= arg.threads) return;

     // for full fields set parity from z thread index else use arg setting
     int parity = nParity == 2 ? blockDim.z * blockIdx.z + threadIdx.z : arg.parity;

     switch (parity) {
     case 0: laplace<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x_cb, 0); break;
     case 1: laplace<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x_cb, 1); break;
     }
   }
 } // namespace quda
quda::LaplaceArg::LaplaceArg
LaplaceArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, int dir, double a, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override)
Definition: laplace.cuh:36

quda::DslashArg
Definition: dslash_helper.cuh:229

quda::DslashArg::kernel_type
KernelType kernel_type
Definition: dslash_helper.cuh:250

quda::KernelType
KernelType
Definition: index_helper.cuh:464

quda::EXTERIOR_KERNEL_ALL
Definition: index_helper.cuh:466

quda::ColorSpinorField
Definition: color_spinor_field.h:311

quda::GaugeField::FieldOrder
QudaGaugeFieldOrder FieldOrder() const
Definition: gauge_field.h:257

errorQuda
#define errorQuda(...)
Definition: util_quda.h:121

quda::LaplaceArg::U
const G U
Definition: laplace.cuh:32

quda::ColorSpinor
Definition: color_spinor.h:24

quda::LaplaceArg::G
gauge_mapper< Float, reconstruct, 18, QUDA_STAGGERED_PHASE_NO, gauge_direct_load, ghost >::type G
Definition: laplace.cuh:25

quda
Definition: blas_cublas.h:5

quda::LaplaceArg::real
mapper< Float >::type real
Definition: laplace.cuh:27

quda::LaplaceArg
Parameter structure for driving the covariatnt derivative operator.
Definition: laplace.cuh:16

quda::INTERIOR_KERNEL
Definition: index_helper.cuh:465

quda::linkIndexM1
static __device__ __host__ int linkIndexM1(const int x[], const I X[4], const int mu)
Definition: index_helper.cuh:94

quda::LaplaceArg::F
colorspinor_mapper< Float, nSpin, nColor, spin_project, spinor_direct_load >::type F
Definition: laplace.cuh:20

nColor
const int nColor
Definition: covdev_test.cpp:75

QudaGhostExchange
enum QudaGhostExchange_s QudaGhostExchange

gauge_field_order.h
Main header file for host and device accessors to GaugeFields.

dslash_helper.cuh

quda::LaplaceArg::nSpin
static constexpr int nSpin
Definition: laplace.cuh:17

quda::Arg
Definition: spinor_noise.cu:22

quda::ColorSpinorField::isNative
bool isNative() const
Definition: color_spinor_field.cpp:568

quda::LaplaceArg::dir
int dir
Definition: laplace.cuh:34

quda::LaplaceArg::ghost
static constexpr QudaGhostExchange ghost
Definition: laplace.cuh:24

quda::LaplaceArg::spin_project
static constexpr bool spin_project
Definition: laplace.cuh:18

quda::LaplaceArg::in
const F in
Definition: laplace.cuh:30

quda::DslashArg::dagger
const bool dagger
Definition: dslash_helper.cuh:244

index_helper.cuh

quda::Arg::nParity
const int nParity
Definition: spinor_noise.cu:25

quda::mapper
Definition: register_traits.h:43

QudaReconstructType
enum QudaReconstructType_s QudaReconstructType

color_spinor_field_order.h

quda::colorspinor_mapper
Definition: color_spinor_field_order.h:1602

quda::gauge_mapper
Definition: gauge_field_order.h:3012

quda::applyLaplace
__device__ __host__ void applyLaplace(Vector &out, Arg &arg, int coord[nDim], int x_cb, int parity, int idx, int thread_dim, bool &active)
Definition: laplace.cuh:69

quda::arg
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
Definition: complex_quda.h:1076

quda::Vector
VectorXcd Vector
Definition: inv_eigcg_quda.cpp:38

quda::LaplaceArg::spinor_direct_load
static constexpr bool spinor_direct_load
Definition: laplace.cuh:19

quda::laplace
__device__ __host__ void laplace(Arg &arg, int idx, int parity)
Definition: laplace.cuh:132

quda::laplaceGPU
__global__ void laplaceGPU(Arg arg)
Definition: laplace.cuh:178

quda::LaplaceArg::out
F out
Definition: laplace.cuh:29

quda::conj
__host__ __device__ ValueType conj(ValueType x)
Definition: complex_quda.h:130

QUDA_GHOST_EXCHANGE_PAD
Definition: enum_quda.h:483

quda::linkIndexP1
static __device__ __host__ int linkIndexP1(const int x[], const I X[4], const int mu)
Definition: index_helper.cuh:139

quda::GaugeField::isNative
bool isNative() const
Definition: gauge_field.cpp:167

quda::DslashArg::xpay
const bool xpay
Definition: dslash_helper.cuh:245

quda::ColorSpinorField::FieldOrder
QudaFieldOrder FieldOrder() const
Definition: color_spinor_field.h:483

quda::Matrix
Definition: quda_matrix.h:64

quda::LaplaceArg::a
const real a
Definition: laplace.cuh:33

quda::LaplaceArg::reconstruct
static constexpr QudaReconstructType reconstruct
Definition: laplace.cuh:22

quda::LaplaceArg::gauge_direct_load
static constexpr bool gauge_direct_load
Definition: laplace.cuh:23

quda::DslashArg::parity
const int parity
Definition: dslash_helper.cuh:233

quda::DslashArg::nParity
const int nParity
Definition: dslash_helper.cuh:234

quda::LaplaceArg::x
const F x
Definition: laplace.cuh:31

color_spinor.h

quda::GaugeField
Definition: gauge_field.h:164