1 #include <quda_internal.h>
3 #include <gauge_field.h>
4 #include <launch_kernel.cuh>
5 #include <jitify_helper.cuh>
6 #include <kernels/gauge_qcharge.cuh>
7 #include <instantiate.h>
12 template <typename Arg> class QChargeCompute : TunableLocalParityReduction
15 const GaugeField &meta;
18 QChargeCompute(Arg &arg, const GaugeField &meta) :
23 create_jitify_program("kernels/gauge_qcharge.cuh");
27 void apply(const qudaStream_t &stream)
29 if (meta.Location() == QUDA_CUDA_FIELD_LOCATION) {
30 TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
32 using namespace jitify::reflection;
33 jitify_error = program->kernel("quda::qChargeComputeKernel")
34 .instantiate((int)tp.block.x, Type<Arg>())
35 .configure(tp.grid, tp.block, tp.shared_bytes, stream)
38 LAUNCH_KERNEL_LOCAL_PARITY(qChargeComputeKernel, (*this), tp, stream, arg, Arg);
40 } else { // run the CPU code
41 errorQuda("qChargeComputeKernel not supported on CPU");
45 TuneKey tuneKey() const
47 return TuneKey(meta.VolString(), typeid(*this).name(), meta.AuxString());
50 long long flops() const
52 auto mm_flops = 8 * Arg::nColor * Arg::nColor * (Arg::nColor - 2);
53 auto traceless_flops = (Arg::nColor * Arg::nColor + Arg::nColor + 1);
54 auto energy_flops = 6 * (mm_flops + traceless_flops + Arg::nColor);
55 auto q_flops = 3*mm_flops + 2*Arg::nColor + 2;
56 return 2ll * arg.threads * (energy_flops + q_flops);
59 long long bytes() const { return 2 * arg.threads * (6 * arg.f.Bytes() + Arg::density * sizeof(typename Arg::Float)); }
62 template <typename Float, int nColor, QudaReconstructType recon> struct QCharge {
63 QCharge(const GaugeField &Fmunu, double energy[3], double &qcharge, void *qdensity, bool density)
65 if (!Fmunu.isNative()) errorQuda("Topological charge computation only supported on native ordered fields");
67 std::vector<double> result(3);
69 QChargeArg<Float, nColor, recon, true> arg(Fmunu, (Float*)qdensity);
70 QChargeCompute<decltype(arg)> qChargeCompute(arg, Fmunu);
71 qChargeCompute.apply(0);
74 QChargeArg<Float, nColor, recon, false> arg(Fmunu, (Float*)qdensity);
75 QChargeCompute<decltype(arg)> qChargeCompute(arg, Fmunu);
76 qChargeCompute.apply(0);
80 comm_allreduce_array(result.data(), 3);
81 for (int i=0; i<2; i++) energy[i+1] = result[i] / (Fmunu.Volume() * comm_size());
82 energy[0] = energy[1] + energy[2];
87 void computeQCharge(double energy[3], double &qcharge, const GaugeField &Fmunu)
89 #ifdef GPU_GAUGE_TOOLS
90 instantiate<QCharge,ReconstructNone>(Fmunu, energy, qcharge, nullptr, false);
92 errorQuda("Gauge tools are not built");
93 #endif // GPU_GAUGE_TOOLS
96 void computeQChargeDensity(double energy[3], double &qcharge, void *qdensity, const GaugeField &Fmunu)
98 #ifdef GPU_GAUGE_TOOLS
99 instantiate<QCharge,ReconstructNone>(Fmunu, energy, qcharge, qdensity, true);
101 errorQuda("Gauge tools are not built");
102 #endif // GPU_GAUGE_TOOLS