quda-ref/v1.0.0/dslash__domain__wall__4d_8cuh_source.html

 #pragma once

 #include <kernels/dslash_wilson.cuh>

 namespace quda
 {

   constexpr int size = 4096;
   static __constant__ char mobius_d[size]; // constant buffer used for Mobius coefficients for GPU kernel

   template <typename Float, int nColor, QudaReconstructType reconstruct_>
   struct DomainWall4DArg : WilsonArg<Float, nColor, reconstruct_> {
     typedef typename mapper<Float>::type real;
     int Ls;
     complex<real> a_5[QUDA_MAX_DWF_LS];
     inline __device__ __host__ complex<real> a5(int s)
     {
 #ifdef __CUDA_ARCH__
       return reinterpret_cast<const complex<real> *>(mobius_d)[s];
 #else
       return a_5[s];
 #endif
     }

     DomainWall4DArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_5,
         const Complex *b_5, const Complex *c_5, bool xpay, const ColorSpinorField &x, int parity, bool dagger,
         const int *comm_override) :
         WilsonArg<Float, nColor, reconstruct_>(out, in, U, xpay ? a : 0.0, x, parity, dagger, comm_override),
         Ls(in.X(4))
     {
       if (b_5 == nullptr || c_5 == nullptr)
         for (int s = 0; s < Ls; s++) a_5[s] = a; // 4-d Shamir
       else
         for (int s = 0; s < Ls; s++) a_5[s] = 0.5 * a / (b_5[s] * (m_5 + 4.0) + 1.0); // 4-d Mobius
     }
   };

   template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
   __device__ __host__ inline void domainWall4D(Arg &arg, int idx, int s, int parity)
   {
     typedef typename mapper<Float>::type real;
     typedef ColorSpinor<real, nColor, 4> Vector;

     bool active
         = kernel_type == EXTERIOR_KERNEL_ALL ? false : true; // is thread active (non-trival for fused kernel only)
     int thread_dim;                                          // which dimension is thread working on (fused kernel only)
     int coord[nDim];
     int x_cb = getCoords<nDim, QUDA_4D_PC, kernel_type>(coord, arg, idx, parity, thread_dim);

     const int my_spinor_parity = nParity == 2 ? parity : 0;
     Vector out;
     applyWilson<Float, nDim, nColor, nParity, dagger, kernel_type>(
         out, arg, coord, x_cb, s, parity, idx, thread_dim, active);

     int xs = x_cb + s * arg.dc.volume_4d_cb;
     if (xpay && kernel_type == INTERIOR_KERNEL) {
       Vector x = arg.x(xs, my_spinor_parity);
       out = x + arg.a5(s) * out;
     } else if (kernel_type != INTERIOR_KERNEL && active) {
       Vector x = arg.out(xs, my_spinor_parity);
       out = x + (xpay ? arg.a5(s) * out : out);
     }

     if (kernel_type != EXTERIOR_KERNEL_ALL || active) arg.out(xs, my_spinor_parity) = out;
   }

   // CPU Kernel for applying 4-d Wilson operator to a 5-d vector (replicated along fifth dimension)
   template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
   void domainWall4DCPU(Arg &arg)
   {
     for (int parity = 0; parity < nParity; parity++) {
       // for full fields then set parity from loop else use arg setting
       parity = nParity == 2 ? parity : arg.parity;

       for (int x_cb = 0; x_cb < arg.threads; x_cb++) { // 4-d volume
         for (int s = 0; s < arg.Ls; s++) {
           domainWall4D<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x_cb, s, parity);
         }
       } // 4-d volumeCB
     }   // parity
   }

   // GPU Kernel for applying 4-d Wilson operator to a 5-d vector (replicated along fifth dimension)
   template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
   __global__ void domainWall4DGPU(Arg arg)
   {
     int x_cb = blockIdx.x * blockDim.x + threadIdx.x;
     if (x_cb >= arg.threads) return;

     // for this operator Ls is mapped to the y thread dimension
     int s = blockIdx.y * blockDim.y + threadIdx.y;
     if (s >= arg.Ls) return;

     // for full fields set parity from z thread index else use arg setting
     int parity = nParity == 2 ? blockDim.z * blockIdx.z + threadIdx.z : arg.parity;

     switch (parity) {
     case 0: domainWall4D<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x_cb, s, 0); break;
     case 1: domainWall4D<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x_cb, s, 1); break;
     }
   }

 } // namespace quda
quda::mobius_d
static __constant__ char mobius_d[size]
Definition: dslash_domain_wall_4d.cuh:9

quda::DslashArg::kernel_type
KernelType kernel_type
Definition: dslash_helper.cuh:250

quda::EXTERIOR_KERNEL_ALL
Definition: index_helper.cuh:466

quda::ColorSpinorField
Definition: color_spinor_field.h:311

quda::WilsonArg::x
const F x
Definition: dslash_wilson.cuh:31

quda::ColorSpinor
Definition: color_spinor.h:24

dslash_wilson.cuh

quda::DomainWall4DArg::a5
__device__ __host__ complex< real > a5(int s)
Helper function for grabbing the constant struct, whether we are on the GPU or CPU.
Definition: dslash_domain_wall_4d.cuh:21

quda::DomainWall4DArg::DomainWall4DArg
DomainWall4DArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_5, const Complex *b_5, const Complex *c_5, bool xpay, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override)
Definition: dslash_domain_wall_4d.cuh:30

quda
Definition: blas_cublas.h:5

quda::WilsonArg::out
F out
Definition: dslash_wilson.cuh:29

quda::WilsonArg::U
const G U
Definition: dslash_wilson.cuh:32

quda::WilsonArg
Parameter structure for driving the Wilson operator.
Definition: dslash_wilson.cuh:16

quda::INTERIOR_KERNEL
Definition: index_helper.cuh:465

quda::domainWall4DGPU
__global__ void domainWall4DGPU(Arg arg)
Definition: dslash_domain_wall_4d.cuh:90

nColor
const int nColor
Definition: covdev_test.cpp:75

quda::size
constexpr int size
Definition: dslash_domain_wall_4d.cuh:8

quda::DomainWall4DArg::real
mapper< Float >::type real
Definition: dslash_domain_wall_4d.cuh:13

X
int X[4]
Definition: covdev_test.cpp:70

quda::Complex
std::complex< double > Complex
Definition: quda_internal.h:46

quda::domainWall4D
__device__ __host__ void domainWall4D(Arg &arg, int idx, int s, int parity)
Definition: dslash_domain_wall_4d.cuh:44

quda::Arg
Definition: spinor_noise.cu:22

quda::WilsonArg::in
const F in
Definition: dslash_wilson.cuh:30

quda::DslashArg::dagger
const bool dagger
Definition: dslash_helper.cuh:244

quda::DomainWall4DArg::a_5
complex< real > a_5[QUDA_MAX_DWF_LS]
Definition: dslash_domain_wall_4d.cuh:15

quda::WilsonArg::a
const real a
Definition: dslash_wilson.cuh:33

quda::mapper
Definition: register_traits.h:43

quda::s
__shared__ float s[]

QUDA_MAX_DWF_LS
#define QUDA_MAX_DWF_LS
Maximum length of the Ls dimension for domain-wall fermions.
Definition: quda_constants.h:49

quda::domainWall4DCPU
void domainWall4DCPU(Arg &arg)
Definition: dslash_domain_wall_4d.cuh:74

quda::arg
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
Definition: complex_quda.h:1076

quda::Vector
VectorXcd Vector
Definition: inv_eigcg_quda.cpp:38

quda::DomainWall4DArg::Ls
int Ls
Definition: dslash_domain_wall_4d.cuh:14

quda::DomainWall4DArg
Definition: dslash_domain_wall_4d.cuh:12

quda::DslashArg::xpay
const bool xpay
Definition: dslash_helper.cuh:245

quda::DslashArg::parity
const int parity
Definition: dslash_helper.cuh:233

quda::DslashArg::nParity
const int nParity
Definition: dslash_helper.cuh:234

quda::GaugeField
Definition: gauge_field.h:164