5 #define DOUBLE_TOL 1e-15 6 #define SINGLE_TOL 2e-6 13 #ifdef GPU_GAUGE_TOOLS 15 template <
typename Float,
typename Arg>
class GaugeAPE : TunableVectorYZ
18 const GaugeField &meta;
21 bool tuneGridDim()
const {
return false; }
22 unsigned int minThreads()
const {
return arg.threads; }
26 GaugeAPE(Arg &arg,
const GaugeField &meta) : TunableVectorYZ(2, 3),
arg(arg), meta(meta)
29 create_jitify_program(
"kernels/gauge_ape.cuh");
32 virtual ~GaugeAPE() {}
34 void apply(
const cudaStream_t &
stream)
39 using namespace jitify::reflection;
40 jitify_error = program->kernel(
"quda::computeAPEStep")
41 .instantiate(Type<Float>(), Type<Arg>())
42 .configure(tp.grid, tp.block, tp.shared_bytes, stream)
45 computeAPEStep<Float><<<tp.grid, tp.block, tp.shared_bytes>>>(
arg);
53 TuneKey tuneKey()
const 55 std::stringstream aux;
56 aux <<
"threads=" << arg.threads <<
",prec=" <<
sizeof(Float);
57 return TuneKey(meta.VolString(),
typeid(*this).name(), aux.str().c_str());
60 void preTune() { arg.dest.save(); }
61 void postTune() { arg.dest.load(); }
63 long long flops()
const {
return 3 * (2 + 2 * 4) * 198ll * arg.threads; }
64 long long bytes()
const {
return 3 * ((1 + 2 * 6) * arg.origin.Bytes() + arg.dest.Bytes()) * arg.threads; }
67 template<
typename Float,
typename GaugeOr,
typename GaugeDs>
68 void APEStep(GaugeOr origin, GaugeDs dest,
const GaugeField& dataOr, Float alpha) {
70 GaugeAPE<Float, GaugeAPEArg<Float, GaugeOr, GaugeDs>> gaugeAPE(
arg, dataOr);
75 template <
typename Float>
void APEStep(GaugeField &dataDs,
const GaugeField &dataOr, Float alpha)
79 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type GDs;
82 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type GOr;
83 APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
85 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_12>::type GOr;
86 APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
88 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_8>::type GOr;
89 APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
91 errorQuda(
"Reconstruction type %d of origin gauge field not supported", dataOr.Reconstruct());
94 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_12>::type GDs;
96 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type GOr;
97 APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
99 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_12>::type GOr;
100 APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
102 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_8>::type GOr;
103 APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
105 errorQuda(
"Reconstruction type %d of origin gauge field not supported", dataOr.Reconstruct());
108 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_8>::type GDs;
110 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type GOr;
111 APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
113 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_12>::type GOr;
114 APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
116 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_8>::type GOr;
117 APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
119 errorQuda(
"Reconstruction type %d of origin gauge field not supported", dataOr.Reconstruct());
122 errorQuda(
"Reconstruction type %d of destination gauge field not supported", dataDs.Reconstruct());
130 #ifdef GPU_GAUGE_TOOLS 133 errorQuda(
"Origin and destination fields must have the same precision\n");
137 errorQuda(
"Half precision not supported\n");
147 APEStep<float>(dataDs, dataOr, (float) alpha);
149 APEStep<double>(dataDs, dataOr, alpha);
QudaVerbosity getVerbosity()
Helper file when using jitify run-time compilation. This file should be included in source code...
void APEStep(GaugeField &dataDs, const GaugeField &dataOr, double alpha)
Apply APE smearing to the gauge field.
#define qudaDeviceSynchronize()
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
QudaReconstructType Reconstruct() const
QudaGaugeFieldOrder Order() const
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
QudaPrecision Precision() const