QUDA  v1.1.0
A library for QCD on GPUs
gauge_qcharge.cu
Go to the documentation of this file.
1 #include <quda_internal.h>
2 #include <tune_quda.h>
3 #include <gauge_field.h>
4 #include <launch_kernel.cuh>
5 #include <jitify_helper.cuh>
6 #include <kernels/gauge_qcharge.cuh>
7 #include <instantiate.h>
8 
9 namespace quda
10 {
11 
12  template <typename Arg> class QChargeCompute : TunableLocalParityReduction
13  {
14  Arg &arg;
15  const GaugeField &meta;
16 
17  public:
18  QChargeCompute(Arg &arg, const GaugeField &meta) :
19  arg(arg),
20  meta(meta)
21  {
22 #ifdef JITIFY
23  create_jitify_program("kernels/gauge_qcharge.cuh");
24 #endif
25  }
26 
27  void apply(const qudaStream_t &stream)
28  {
29  if (meta.Location() == QUDA_CUDA_FIELD_LOCATION) {
30  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
31 #ifdef JITIFY
32  using namespace jitify::reflection;
33  jitify_error = program->kernel("quda::qChargeComputeKernel")
34  .instantiate((int)tp.block.x, Type<Arg>())
35  .configure(tp.grid, tp.block, tp.shared_bytes, stream)
36  .launch(arg);
37 #else
38  LAUNCH_KERNEL_LOCAL_PARITY(qChargeComputeKernel, (*this), tp, stream, arg, Arg);
39 #endif
40  } else { // run the CPU code
41  errorQuda("qChargeComputeKernel not supported on CPU");
42  }
43  }
44 
45  TuneKey tuneKey() const
46  {
47  return TuneKey(meta.VolString(), typeid(*this).name(), meta.AuxString());
48  }
49 
50  long long flops() const
51  {
52  auto mm_flops = 8 * Arg::nColor * Arg::nColor * (Arg::nColor - 2);
53  auto traceless_flops = (Arg::nColor * Arg::nColor + Arg::nColor + 1);
54  auto energy_flops = 6 * (mm_flops + traceless_flops + Arg::nColor);
55  auto q_flops = 3*mm_flops + 2*Arg::nColor + 2;
56  return 2ll * arg.threads * (energy_flops + q_flops);
57  }
58 
59  long long bytes() const { return 2 * arg.threads * (6 * arg.f.Bytes() + Arg::density * sizeof(typename Arg::Float)); }
60  }; // QChargeCompute
61 
62  template <typename Float, int nColor, QudaReconstructType recon> struct QCharge {
63  QCharge(const GaugeField &Fmunu, double energy[3], double &qcharge, void *qdensity, bool density)
64  {
65  if (!Fmunu.isNative()) errorQuda("Topological charge computation only supported on native ordered fields");
66 
67  std::vector<double> result(3);
68  if (density) {
69  QChargeArg<Float, nColor, recon, true> arg(Fmunu, (Float*)qdensity);
70  QChargeCompute<decltype(arg)> qChargeCompute(arg, Fmunu);
71  qChargeCompute.apply(0);
72  arg.complete(result);
73  } else {
74  QChargeArg<Float, nColor, recon, false> arg(Fmunu, (Float*)qdensity);
75  QChargeCompute<decltype(arg)> qChargeCompute(arg, Fmunu);
76  qChargeCompute.apply(0);
77  arg.complete(result);
78  }
79 
80  comm_allreduce_array(result.data(), 3);
81  for (int i=0; i<2; i++) energy[i+1] = result[i] / (Fmunu.Volume() * comm_size());
82  energy[0] = energy[1] + energy[2];
83  qcharge = result[2];
84  }
85  };
86 
87  void computeQCharge(double energy[3], double &qcharge, const GaugeField &Fmunu)
88  {
89 #ifdef GPU_GAUGE_TOOLS
90  instantiate<QCharge,ReconstructNone>(Fmunu, energy, qcharge, nullptr, false);
91 #else
92  errorQuda("Gauge tools are not built");
93 #endif // GPU_GAUGE_TOOLS
94  }
95 
96  void computeQChargeDensity(double energy[3], double &qcharge, void *qdensity, const GaugeField &Fmunu)
97  {
98 #ifdef GPU_GAUGE_TOOLS
99  instantiate<QCharge,ReconstructNone>(Fmunu, energy, qcharge, qdensity, true);
100 #else
101  errorQuda("Gauge tools are not built");
102 #endif // GPU_GAUGE_TOOLS
103  }
104 } // namespace quda