QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
dslash_domain_wall_5d.cuh
Go to the documentation of this file.
1 #pragma once
2 
4 
5 namespace quda
6 {
7 
8  // fixme: fused kernel (thread dim mappers set after construction?) and xpay
9 
10  template <typename Float, int nColor, QudaReconstructType reconstruct_>
11  struct DomainWall5DArg : WilsonArg<Float, nColor, reconstruct_> {
12  typedef typename mapper<Float>::type real;
13  int Ls;
14  real a;
15  real m_f;
17  DomainWall5DArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_f,
18  bool xpay, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override) :
19  WilsonArg<Float, nColor, reconstruct_>(out, in, U, xpay ? a : 0.0, x, parity, dagger, comm_override),
20  Ls(in.X(4)),
21  a(a),
22  m_f(m_f)
23  {
24  }
25  };
26 
27  template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
28  __device__ __host__ inline void domainWall5D(Arg &arg, int idx, int parity)
29  {
30  typedef typename mapper<Float>::type real;
32 
33  bool active
34  = kernel_type == EXTERIOR_KERNEL_ALL ? false : true; // is thread active (non-trival for fused kernel only)
35  int thread_dim; // which dimension is thread working on (fused kernel only)
36  int coord[nDim];
37  int x_cb = getCoords<nDim, QUDA_5D_PC, kernel_type>(coord, arg, idx, parity, thread_dim);
38 
39  const int my_spinor_parity = nParity == 2 ? parity : 0;
40  Vector out;
41 
42  // we pass s=0, since x_cb is a 5-d index that includes s
43  applyWilson<Float, nDim, nColor, nParity, dagger, kernel_type>(
44  out, arg, coord, x_cb, 0, parity, idx, thread_dim, active);
45 
46  if (kernel_type == INTERIOR_KERNEL) { // 5th dimension derivative always local
47  constexpr int d = 4;
48  const int s = coord[4];
49  const int their_spinor_parity = nParity == 2 ? 1 - parity : 0;
50  {
51  const int fwd_idx = getNeighborIndexCB<nDim>(coord, d, +1, arg.dc);
52  constexpr int proj_dir = dagger ? +1 : -1;
53  Vector in = arg.in(fwd_idx, their_spinor_parity);
54  if (s == arg.Ls - 1) {
55  out += (-arg.m_f * in.project(d, proj_dir)).reconstruct(d, proj_dir);
56  } else {
57  out += in.project(d, proj_dir).reconstruct(d, proj_dir);
58  }
59  }
60 
61  {
62  const int back_idx = getNeighborIndexCB<nDim>(coord, d, -1, arg.dc);
63  constexpr int proj_dir = dagger ? -1 : +1;
64  Vector in = arg.in(back_idx, their_spinor_parity);
65  if (s == 0) {
66  out += (-arg.m_f * in.project(d, proj_dir)).reconstruct(d, proj_dir);
67  } else {
68  out += in.project(d, proj_dir).reconstruct(d, proj_dir);
69  }
70  }
71  }
72 
73  if (xpay && kernel_type == INTERIOR_KERNEL) {
74  Vector x = arg.x(x_cb, my_spinor_parity);
75  out = x + arg.a * out;
76  } else if (kernel_type != INTERIOR_KERNEL && active) {
77  Vector x = arg.out(x_cb, my_spinor_parity);
78  out = x + (xpay ? arg.a * out : out);
79  }
80 
81  if (kernel_type != EXTERIOR_KERNEL_ALL || active) arg.out(x_cb, my_spinor_parity) = out;
82  }
83 
84  // CPU Kernel for applying 5-d Wilson operator to a 5-d vector
85  template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
87  {
88  for (int parity = 0; parity < nParity; parity++) {
89  // for full fields then set parity from loop else use arg setting
90  parity = nParity == 2 ? parity : arg.parity;
91 
92  for (int x5_cb = 0; x5_cb < arg.threads * arg.Ls; x5_cb++) { // 5-d volume
93  domainWall5D<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x5_cb, parity);
94  } // 5-d volumeCB
95  } // parity
96  }
97 
98  // GPU Kernel for applying 5-d Wilson operator to a 5-d vector
99  template <typename Float, int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg>
100  __global__ void domainWall5DGPU(Arg arg)
101  {
102  int x_cb = blockIdx.x * blockDim.x + threadIdx.x;
103  if (x_cb >= arg.threads) return;
104 
105  // for this operator Ls is mapped to the y thread dimension
106  int s = blockIdx.y * blockDim.y + threadIdx.y;
107  if (s >= arg.Ls) return;
108 
109  int x5_cb = s * arg.threads + x_cb; // 5-d checkerboard index
110 
111  // for full fields set parity from z thread index else use arg setting
112  int parity = nParity == 2 ? blockDim.z * blockIdx.z + threadIdx.z : arg.parity;
113 
114  switch (parity) {
115  case 0: domainWall5D<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x5_cb, 0); break;
116  case 1: domainWall5D<Float, nDim, nColor, nParity, dagger, xpay, kernel_type>(arg, x5_cb, 1); break;
117  }
118  }
119 
120 } // namespace quda
KernelType kernel_type
mapper< Float >::type real
__global__ void domainWall5DGPU(Arg arg)
void domainWall5DCPU(Arg &arg)
static constexpr QudaReconstructType reconstruct
Parameter structure for driving the Wilson operator.
const int nColor
Definition: covdev_test.cpp:75
DomainWall5DArg(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_f, bool xpay, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override)
int X[4]
Definition: covdev_test.cpp:70
__shared__ float s[]
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
VectorXcd Vector
__device__ __host__ void domainWall5D(Arg &arg, int idx, int parity)