19 template <
typename Float,
int nSpin,
int nColor, QudaReconstructType reconstruct>
21 typedef typename colorspinor_mapper<Float,nSpin,nColor>::type F;
22 typedef typename gauge_mapper<Float,reconstruct>::type G;
35 CovDevArg(ColorSpinorField &
out,
const ColorSpinorField &
in,
const GaugeField &U,
const int parity,
const int mu)
37 dim{ (3-nParity) *
in.X(0),
in.X(1),
in.X(2),
in.X(3), 1 },
39 volumeCB(
in.VolumeCB())
42 errorQuda(
"Unsupported field order colorspinor=%d gauge=%d combination\n",
in.FieldOrder(), U.FieldOrder());
56 template <
typename Float,
int nDim,
int nColor,
int mu,
typename Vector,
typename Arg>
57 __device__ __host__
inline void applyCovDev(
Vector &
out, Arg &
arg,
int x_cb,
int parity) {
59 const int their_spinor_parity = (
arg.nParity == 2) ? 1-
parity : 0;
72 const int ghost_idx = ghostFaceIndex<1>(
coord,
arg.dim,
d,
arg.nFace);
75 const Vector in =
arg.in.Ghost(
d, 1, ghost_idx, their_spinor_parity);
81 const Vector in =
arg.in(fwd_idx, their_spinor_parity);
88 const int gauge_idx = back_idx;
91 const int ghost_idx = ghostFaceIndex<0>(
coord,
arg.dim,
d,
arg.nFace);
93 const Link U =
arg.U.Ghost(
d, ghost_idx, 1-
parity);
94 const Vector in =
arg.in.Ghost(
d, 0, ghost_idx, their_spinor_parity);
100 const Vector in =
arg.in(back_idx, their_spinor_parity);
110 template <
typename Float,
int nDim,
int nSpin,
int nColor,
typename Arg>
113 typedef ColorSpinor<Float,nColor,nSpin>
Vector;
117 case 0: applyCovDev<Float,nDim,nColor,0>(
out,
arg, x_cb,
parity);
break;
118 case 1: applyCovDev<Float,nDim,nColor,1>(
out,
arg, x_cb,
parity);
break;
119 case 2: applyCovDev<Float,nDim,nColor,2>(
out,
arg, x_cb,
parity);
break;
120 case 3: applyCovDev<Float,nDim,nColor,3>(
out,
arg, x_cb,
parity);
break;
121 case 4: applyCovDev<Float,nDim,nColor,4>(
out,
arg, x_cb,
parity);
break;
122 case 5: applyCovDev<Float,nDim,nColor,5>(
out,
arg, x_cb,
parity);
break;
123 case 6: applyCovDev<Float,nDim,nColor,6>(
out,
arg, x_cb,
parity);
break;
124 case 7: applyCovDev<Float,nDim,nColor,7>(
out,
arg, x_cb,
parity);
break;
130 template <
typename Float,
int nDim,
int nSpin,
int nColor,
typename Arg>
131 void covDevCPU(Arg
arg)
138 for (
int x_cb = 0; x_cb <
arg.volumeCB; x_cb++) {
139 covDev<Float,nDim,nSpin,nColor>(
arg, x_cb,
parity);
146 template <
typename Float,
int nDim,
int nSpin,
int nColor,
typename Arg>
147 __global__
void covDevGPU(Arg
arg)
149 int x_cb = blockIdx.x*
blockDim.x + threadIdx.x;
154 if (x_cb >=
arg.volumeCB)
return;
157 covDev<Float,nDim,nSpin,nColor>(
arg, x_cb,
parity);
160 template <
typename Float,
int nDim,
int nSpin,
int nColor,
typename Arg>
161 class CovDev :
public TunableVectorY {
165 const ColorSpinorField &meta;
167 long long flops()
const 171 long long bytes()
const 173 return arg.out.Bytes() +
arg.in.Bytes() +
arg.nParity*
arg.U.Bytes()*meta.VolumeCB();
175 bool tuneGridDim()
const {
return false; }
176 unsigned int minThreads()
const {
return arg.volumeCB; }
179 CovDev(Arg &
arg,
const ColorSpinorField &meta) : TunableVectorY(
arg.nParity),
arg(
arg), meta(meta)
181 strcpy(aux, meta.AuxString());
184 comm[0] = (
arg.commDim[0] ?
'1' :
'0');
185 comm[1] = (
arg.commDim[1] ?
'1' :
'0');
186 comm[2] = (
arg.commDim[2] ?
'1' :
'0');
187 comm[3] = (
arg.commDim[3] ?
'1' :
'0');
193 virtual ~CovDev() { }
195 void apply(
const cudaStream_t &
stream) {
197 covDevCPU<Float,nDim,nSpin,nColor>(
arg);
200 covDevGPU<Float,nDim,nSpin,nColor> <<<tp.grid,tp.block,tp.shared_bytes,
stream>>>(
arg);
204 TuneKey tuneKey()
const {
return TuneKey(meta.VolString(),
typeid(*this).name(), aux); }
208 template <
typename Float,
int nColor, QudaReconstructType recon>
211 constexpr
int nDim = 4;
213 constexpr
int nSpin = 1;
215 CovDev<Float,nDim,nSpin,nColor,CovDevArg<Float,nSpin,nColor,recon> > myCovDev(
arg,
in);
218 constexpr
int nSpin = 4;
220 CovDev<Float,nDim,nSpin,nColor,CovDevArg<Float,nSpin,nColor,recon> > myCovDev(
arg,
in);
228 template <
typename Float,
int nColor>
232 ApplyCovDev<Float,nColor,QUDA_RECONSTRUCT_NO>(
out,
in, U,
parity,
mu);
234 ApplyCovDev<Float,nColor,QUDA_RECONSTRUCT_12>(
out,
in, U,
parity,
mu);
236 ApplyCovDev<Float,nColor,QUDA_RECONSTRUCT_8> (
out,
in, U,
parity,
mu);
238 errorQuda(
"Unsupported reconstruct type %d\n", U.Reconstruct());
243 template <
typename Float>
249 errorQuda(
"Unsupported number of colors %d\n", U.Ncolor());
259 #endif // GPU_CONTRACT 292 errorQuda(
"Contraction kernels have not been built");
virtual void apply(const cudaStream_t &stream)=0
QudaVerbosity getVerbosity()
#define checkPrecision(...)
static __inline__ dim3 dim3 void size_t cudaStream_t int dim
char * strcpy(char *__dst, const char *__src)
char * strcat(char *__s1, const char *__s2)
void exchangeGhost(QudaParity parity, int nFace, int dagger, const MemoryLocation *pack_destination=nullptr, const MemoryLocation *halo_location=nullptr, bool gdr_send=false, bool gdr_recv=false) const
This is a unified ghost exchange function for doing a complete halo exchange regardless of the type o...
static __device__ __host__ int linkIndexM1(const int x[], const I X[4], const int mu)
void ApplyCovDev(ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, int parity, int mu)
Driver for applying the covariant derivative.
for(int s=0;s< param.dc.Ls;s++)
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
#define checkLocation(...)
Main header file for host and device accessors to GaugeFields.
enum QudaParity_s QudaParity
cpuColorSpinorField * out
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
__host__ __device__ ValueType conj(ValueType x)
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
static __inline__ size_t size_t d
QudaPrecision Precision() const
static __device__ __host__ int linkIndexP1(const int x[], const I X[4], const int mu)
QudaFieldOrder FieldOrder() const
int comm_dim_partitioned(int dim)
void covDev(cudaColorSpinorField *out, cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int mu, TimeProfile &profile)
static __device__ __host__ void getCoords(int x[], int cb_index, const I X[], int parity)