5 #define DOUBLE_TOL 1e-15 6 #define SINGLE_TOL 2e-6 13 #ifdef GPU_GAUGE_TOOLS 15 template <
typename Float,
typename Arg>
class GaugeSTOUT : TunableVectorYZ
18 const GaugeField &meta;
21 bool tuneGridDim()
const {
return false; }
22 unsigned int minThreads()
const {
return arg.threads; }
26 GaugeSTOUT(Arg &arg,
const GaugeField &meta) : TunableVectorYZ(2, 3),
arg(arg), meta(meta)
29 create_jitify_program(
"kernels/gauge_stout.cuh");
32 virtual ~GaugeSTOUT() {}
34 void apply(
const cudaStream_t &
stream)
39 using namespace jitify::reflection;
40 jitify_error = program->kernel(
"quda::computeSTOUTStep")
41 .instantiate(Type<Float>(), Type<Arg>())
42 .configure(tp.grid, tp.block, tp.shared_bytes, stream)
45 computeSTOUTStep<Float><<<tp.grid, tp.block, tp.shared_bytes>>>(
arg);
53 TuneKey tuneKey()
const 55 std::stringstream aux;
56 aux <<
"threads=" << arg.threads <<
",prec=" <<
sizeof(Float);
57 return TuneKey(meta.VolString(),
typeid(*this).name(), aux.str().c_str());
60 void preTune() { arg.dest.save(); }
61 void postTune() { arg.dest.load(); }
63 long long flops()
const {
return 3 * (2 + 2 * 4) * 198ll * arg.threads; }
64 long long bytes()
const {
return 3 * ((1 + 2 * 6) * arg.origin.Bytes() + arg.dest.Bytes()) * arg.threads; }
67 template<
typename Float,
typename GaugeOr,
typename GaugeDs>
68 void STOUTStep(GaugeOr origin, GaugeDs dest,
const GaugeField& dataOr, Float rho) {
70 GaugeSTOUT<Float, GaugeSTOUTArg<Float, GaugeOr, GaugeDs>> gaugeSTOUT(
arg, dataOr);
75 template<
typename Float>
76 void STOUTStep(GaugeField &dataDs,
const GaugeField& dataOr, Float rho) {
79 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type GDs;
82 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type GOr;
83 STOUTStep(GOr(dataOr), GDs(dataDs), dataOr, rho);
85 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_12>::type GOr;
86 STOUTStep(GOr(dataOr), GDs(dataDs), dataOr, rho);
88 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_8>::type GOr;
89 STOUTStep(GOr(dataOr), GDs(dataDs), dataOr, rho);
91 errorQuda(
"Reconstruction type %d of origin gauge field not supported", dataOr.Reconstruct());
94 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_12>::type GDs;
96 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type GOr;
97 STOUTStep(GOr(dataOr), GDs(dataDs), dataOr, rho);
99 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_12>::type GOr;
100 STOUTStep(GOr(dataOr), GDs(dataDs), dataOr, rho);
102 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_8>::type GOr;
103 STOUTStep(GOr(dataOr), GDs(dataDs), dataOr, rho);
105 errorQuda(
"Reconstruction type %d of origin gauge field not supported", dataOr.Reconstruct());
108 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_8>::type GDs;
110 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type GOr;
111 STOUTStep(GOr(dataOr), GDs(dataDs), dataOr, rho);
113 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_12>::type GOr;
114 STOUTStep(GOr(dataOr), GDs(dataDs), dataOr, rho);
116 typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_8>::type GOr;
117 STOUTStep(GOr(dataOr), GDs(dataDs), dataOr, rho);
119 errorQuda(
"Reconstruction type %d of origin gauge field not supported", dataOr.Reconstruct());
122 errorQuda(
"Reconstruction type %d of destination gauge field not supported", dataDs.Reconstruct());
131 #ifdef GPU_GAUGE_TOOLS 134 errorQuda(
"Origin and destination fields must have the same precision\n");
138 errorQuda(
"Half precision not supported\n");
148 STOUTStep<float>(dataDs, dataOr, (float) rho);
150 STOUTStep<double>(dataDs, dataOr, rho);
179 using namespace jitify::reflection;
180 jitify_error = program->kernel(
"quda::computeOvrImpSTOUTStep")
181 .instantiate(Type<Float>(), Type<Arg>())
195 std::stringstream aux;
196 aux <<
"threads=" << arg.threads <<
",prec=" <<
sizeof(Float);
203 long long flops()
const {
return 4*(18+2+2*4)*198ll*arg.threads; }
204 long long bytes()
const {
return 4*((1+2*12)*arg.origin.Bytes()+arg.dest.Bytes())*arg.threads; }
207 template<
typename Float,
typename GaugeOr,
typename GaugeDs>
212 gaugeOvrImpSTOUT.apply(0);
216 template<
typename Float>
263 errorQuda(
"Reconstruction type %d of destination gauge field not supported", dataDs.
Reconstruct());
271 #ifdef GPU_GAUGE_TOOLS 274 errorQuda(
"Origin and destination fields must have the same precision\n");
278 errorQuda(
"Half precision not supported\n");
288 OvrImpSTOUTStep<float>(dataDs, dataOr, (float) rho, epsilon);
290 OvrImpSTOUTStep<double>(dataDs, dataOr, rho,
epsilon);
GaugeOvrImpSTOUT(Arg &arg, const GaugeField &meta)
QudaVerbosity getVerbosity()
Helper file when using jitify run-time compilation. This file should be included in source code...
virtual ~GaugeOvrImpSTOUT()
void STOUTStep(GaugeField &dataDs, const GaugeField &dataOr, double rho)
Apply STOUT smearing to the gauge field.
const char * VolString() const
#define qudaDeviceSynchronize()
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
void OvrImpSTOUTStep(GaugeField &dataDs, const GaugeField &dataOr, double rho, double epsilon)
Apply Over Improved STOUT smearing to the gauge field.
QudaFieldLocation Location() const
unsigned int minThreads() const
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
QudaReconstructType Reconstruct() const
QudaGaugeFieldOrder Order() const
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
QudaPrecision Precision() const
void apply(const cudaStream_t &stream)