12 #ifdef GPU_GAUGE_TOOLS 20 bool tuneGridDim()
const {
return true; }
21 unsigned int minThreads()
const {
return arg.threads; }
24 QChargeCompute(
Arg &arg,
const GaugeField &meta) : arg(arg), meta(meta)
27 create_jitify_program(
"kernels/gauge_qcharge.cuh");
30 virtual ~QChargeCompute() {}
32 void apply(
const cudaStream_t &
stream)
38 using namespace jitify::reflection;
39 jitify_error = program->kernel(
"quda::qChargeComputeKernel")
40 .instantiate((
int)tp.
block.x, Type<Float>(), Type<Arg>())
48 errorQuda(
"qChargeComputeKernel not supported on CPU");
54 std::stringstream aux;
55 aux <<
"threads=" << arg.threads <<
",prec=" <<
sizeof(Float);
59 long long flops()
const {
return 2 * arg.threads * (3 * 198 + 9); }
60 long long bytes()
const {
return 2 * arg.threads * ((6 * 18) + Arg::density) *
sizeof(Float); }
63 template <
typename Float,
typename Gauge,
bool density>
67 QChargeCompute<Float, decltype(arg)> qChargeCompute(arg, Fmunu);
68 qChargeCompute.apply(0);
71 qChg = arg.result_h[0];
78 if (!Fmunu.
isNative())
errorQuda(
"Topological charge computation only supported on native ordered fields");
82 computeQCharge<Float, Gauge, density>(Gauge(Fmunu), Fmunu, qDensity, qChg);
85 computeQCharge<Float, Gauge, density>(Gauge(Fmunu), Fmunu, qDensity, qChg);
88 computeQCharge<Float, Gauge, density>(Gauge(Fmunu), Fmunu, qDensity, qChg);
95 #endif // GPU_GAUGE_TOOLS 100 #ifdef GPU_GAUGE_TOOLS 104 qChg = computeQCharge<float, false>(Fmunu);
106 qChg = computeQCharge<double, false>(Fmunu);
112 #endif // GPU_GAUGE_TOOLS 119 #ifdef GPU_GAUGE_TOOLS 123 qChg = computeQCharge<float, true>(Fmunu, (
float *)qDensity);
125 qChg = computeQCharge<double, true>(Fmunu, (
double *)qDensity);
131 #endif // GPU_GAUGE_TOOLS
double computeQCharge(const GaugeField &Fmunu)
Compute the topological charge.
QudaVerbosity getVerbosity()
Helper file when using jitify run-time compilation. This file should be included in source code...
const char * VolString() const
double computeQChargeDensity(const GaugeField &Fmunu, void *result)
Compute the topological charge density per lattice site.
#define qudaDeviceSynchronize()
__global__ void qChargeComputeKernel(Arg arg)
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
#define LAUNCH_KERNEL(kernel, tp, stream, arg,...)
QudaFieldLocation Location() const
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
QudaReconstructType Reconstruct() const
QudaGaugeFieldOrder Order() const
void comm_allreduce(double *data)
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
QudaPrecision Precision() const