QUDA  v1.1.0
A library for QCD on GPUs
gauge_field_strength_tensor.cu
Go to the documentation of this file.
1 #include <tune_quda.h>
2 #include <gauge_field.h>
3 #include <jitify_helper.cuh>
4 #include <kernels/field_strength_tensor.cuh>
5 #include <instantiate.h>
6 
7 namespace quda
8 {
9 
10  template <typename Float, int nColor, QudaReconstructType recon> class Fmunu : TunableVectorYZ
11  {
12  FmunuArg<Float, nColor, recon> arg;
13  const GaugeField &meta;
14 
15  unsigned int minThreads() const { return arg.threads; }
16  bool tuneGridDim() const { return false; }
17 
18 public:
19  Fmunu(const GaugeField &u, GaugeField &f) :
20  TunableVectorYZ(2, 6),
21  arg(f, u),
22  meta(u)
23  {
24  strcpy(aux, meta.AuxString());
25  strcat(aux, comm_dim_partitioned_string());
26  if (meta.Location() == QUDA_CUDA_FIELD_LOCATION) {
27 #ifdef JITIFY
28  create_jitify_program("kernels/field_strength_tensor.cuh");
29 #endif
30  }
31  apply(0);
32  qudaDeviceSynchronize();
33  }
34 
35  void apply(const qudaStream_t &stream)
36  {
37  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
38 #ifdef JITIFY
39  using namespace jitify::reflection;
40  jitify_error = program->kernel("quda::computeFmunuKernel").instantiate(Type<decltype(arg)>())
41  .configure(tp.grid, tp.block, tp.shared_bytes, stream).launch(arg);
42 #else
43  qudaLaunchKernel(computeFmunuKernel<decltype(arg)>, tp, stream, arg);
44 #endif
45  }
46 
47  TuneKey tuneKey() const { return TuneKey(meta.VolString(), typeid(*this).name(), aux); }
48 
49  long long flops() const { return (2430 + 36) * 6 * 2 * (long long)arg.threads; }
50  long long bytes() const
51  {
52  return ((16 * arg.u.Bytes() + arg.f.Bytes()) * 6 * 2 * arg.threads);
53  } // Ignores link reconstruction
54 
55  }; // Fmunu
56 
57  void computeFmunu(GaugeField &f, const GaugeField &u)
58  {
59 #ifdef GPU_GAUGE_TOOLS
60  checkPrecision(f, u);
61  instantiate<Fmunu,ReconstructWilson>(u, f); // u must be first here for correct template instantiation
62 #else
63  errorQuda("Gauge tools are not built");
64 #endif // GPU_GAUGE_TOOLS
65  }
66 } // namespace quda