10 #ifdef GPU_GAUGE_TOOLS 12 template <
typename Float,
typename Arg>
class FmunuCompute : TunableVectorYZ
15 const GaugeField &meta;
18 unsigned int minThreads()
const {
return arg.threads; }
19 bool tuneGridDim()
const {
return false; }
22 FmunuCompute(Arg &arg,
const GaugeField &meta) : TunableVectorYZ(2, 6), arg(arg), meta(meta)
24 writeAuxString(
"threads=%d,stride=%d,prec=%lu", arg.threads, meta.Stride(),
sizeof(Float));
27 create_jitify_program(
"kernels/field_strength_tensor.cuh");
31 virtual ~FmunuCompute() {}
33 void apply(
const cudaStream_t &
stream)
38 using namespace jitify::reflection;
39 jitify_error = program->kernel(
"quda::computeFmunuKernel")
40 .instantiate(Type<Float>(), Type<Arg>())
41 .configure(tp.grid, tp.block, tp.shared_bytes, stream)
44 computeFmunuKernel<Float><<<tp.grid, tp.block, tp.shared_bytes>>>(
arg);
47 computeFmunuCPU<Float>(
arg);
51 TuneKey tuneKey()
const {
return TuneKey(meta.VolString(),
typeid(*this).name(), aux); }
53 long long flops()
const {
return (2430 + 36) * 6 * 2 * (
long long)arg.threads; }
54 long long bytes()
const 56 return ((16 * arg.gauge.Bytes() + arg.f.Bytes()) * 6 * 2 * arg.threads);
61 template <
typename Float,
typename Fmunu,
typename Gauge>
62 void computeFmunu(Fmunu f_munu, Gauge gauge,
const GaugeField &meta,
const GaugeField &meta_ex)
64 FmunuArg<Float, Fmunu, Gauge>
arg(f_munu, gauge, meta, meta_ex);
65 FmunuCompute<Float, FmunuArg<Float, Fmunu, Gauge>> fmunuCompute(arg, meta);
66 fmunuCompute.apply(0);
71 template <
typename Float>
void computeFmunu(GaugeField &Fmunu,
const GaugeField &gauge)
74 if (gauge.isNative()) {
75 typedef gauge::FloatNOrder<Float, 18, 2, 18> F;
78 typedef typename gauge_mapper<Float, QUDA_RECONSTRUCT_NO>::type G;
79 computeFmunu<Float>(F(Fmunu), G(gauge), Fmunu, gauge);
81 typedef typename gauge_mapper<Float, QUDA_RECONSTRUCT_12>::type G;
82 computeFmunu<Float>(F(Fmunu), G(gauge), Fmunu, gauge);
84 typedef typename gauge_mapper<Float, QUDA_RECONSTRUCT_8>::type G;
85 computeFmunu<Float>(F(Fmunu), G(gauge), Fmunu, gauge);
87 errorQuda(
"Reconstruction type %d not supported", gauge.Reconstruct());
90 errorQuda(
"Gauge field order %d not supported", gauge.Order());
93 errorQuda(
"Fmunu field order %d not supported", Fmunu.Order());
97 #endif // GPU_GAUGE_TOOLS 102 #ifdef GPU_GAUGE_TOOLS 108 computeFmunu<double>(Fmunu, gauge);
110 computeFmunu<float>(Fmunu, gauge);
117 #endif // GPU_GAUGE_TOOLS void computeFmunu(GaugeField &Fmunu, const GaugeField &gauge)
Compute the Fmunu tensor.
QudaVerbosity getVerbosity()
Helper file when using jitify run-time compilation. This file should be included in source code...
#define qudaDeviceSynchronize()
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
QudaPrecision Precision() const