QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
gauge_field_strength_tensor.cu
Go to the documentation of this file.
1 #include <tune_quda.h>
2 #include <gauge_field.h>
3 
4 #include <jitify_helper.cuh>
6 
7 namespace quda
8 {
9 
10 #ifdef GPU_GAUGE_TOOLS
11 
12  template <typename Float, typename Arg> class FmunuCompute : TunableVectorYZ
13  {
14  Arg &arg;
15  const GaugeField &meta;
16 
17 private:
18  unsigned int minThreads() const { return arg.threads; }
19  bool tuneGridDim() const { return false; }
20 
21 public:
22  FmunuCompute(Arg &arg, const GaugeField &meta) : TunableVectorYZ(2, 6), arg(arg), meta(meta)
23  {
24  writeAuxString("threads=%d,stride=%d,prec=%lu", arg.threads, meta.Stride(), sizeof(Float));
25  if (meta.Location() == QUDA_CUDA_FIELD_LOCATION) {
26 #ifdef JITIFY
27  create_jitify_program("kernels/field_strength_tensor.cuh");
28 #endif
29  }
30  }
31  virtual ~FmunuCompute() {}
32 
33  void apply(const cudaStream_t &stream)
34  {
35  if (meta.Location() == QUDA_CUDA_FIELD_LOCATION) {
36  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
37 #ifdef JITIFY
38  using namespace jitify::reflection;
39  jitify_error = program->kernel("quda::computeFmunuKernel")
40  .instantiate(Type<Float>(), Type<Arg>())
41  .configure(tp.grid, tp.block, tp.shared_bytes, stream)
42  .launch(arg);
43 #else
44  computeFmunuKernel<Float><<<tp.grid, tp.block, tp.shared_bytes>>>(arg);
45 #endif
46  } else {
47  computeFmunuCPU<Float>(arg);
48  }
49  }
50 
51  TuneKey tuneKey() const { return TuneKey(meta.VolString(), typeid(*this).name(), aux); }
52 
53  long long flops() const { return (2430 + 36) * 6 * 2 * (long long)arg.threads; }
54  long long bytes() const
55  {
56  return ((16 * arg.gauge.Bytes() + arg.f.Bytes()) * 6 * 2 * arg.threads);
57  } // Ignores link reconstruction
58 
59  }; // FmunuCompute
60 
61  template <typename Float, typename Fmunu, typename Gauge>
62  void computeFmunu(Fmunu f_munu, Gauge gauge, const GaugeField &meta, const GaugeField &meta_ex)
63  {
64  FmunuArg<Float, Fmunu, Gauge> arg(f_munu, gauge, meta, meta_ex);
65  FmunuCompute<Float, FmunuArg<Float, Fmunu, Gauge>> fmunuCompute(arg, meta);
66  fmunuCompute.apply(0);
69  }
70 
71  template <typename Float> void computeFmunu(GaugeField &Fmunu, const GaugeField &gauge)
72  {
73  if (Fmunu.Order() == QUDA_FLOAT2_GAUGE_ORDER) {
74  if (gauge.isNative()) {
75  typedef gauge::FloatNOrder<Float, 18, 2, 18> F;
76 
77  if (gauge.Reconstruct() == QUDA_RECONSTRUCT_NO) {
78  typedef typename gauge_mapper<Float, QUDA_RECONSTRUCT_NO>::type G;
79  computeFmunu<Float>(F(Fmunu), G(gauge), Fmunu, gauge);
80  } else if (gauge.Reconstruct() == QUDA_RECONSTRUCT_12) {
81  typedef typename gauge_mapper<Float, QUDA_RECONSTRUCT_12>::type G;
82  computeFmunu<Float>(F(Fmunu), G(gauge), Fmunu, gauge);
83  } else if (gauge.Reconstruct() == QUDA_RECONSTRUCT_8) {
84  typedef typename gauge_mapper<Float, QUDA_RECONSTRUCT_8>::type G;
85  computeFmunu<Float>(F(Fmunu), G(gauge), Fmunu, gauge);
86  } else {
87  errorQuda("Reconstruction type %d not supported", gauge.Reconstruct());
88  }
89  } else {
90  errorQuda("Gauge field order %d not supported", gauge.Order());
91  }
92  } else {
93  errorQuda("Fmunu field order %d not supported", Fmunu.Order());
94  }
95  }
96 
97 #endif // GPU_GAUGE_TOOLS
98 
99  void computeFmunu(GaugeField &Fmunu, const GaugeField &gauge)
100  {
101 
102 #ifdef GPU_GAUGE_TOOLS
103  if (Fmunu.Precision() != gauge.Precision()) {
104  errorQuda("Fmunu precision %d must match gauge precision %d", Fmunu.Precision(), gauge.Precision());
105  }
106 
107  if (gauge.Precision() == QUDA_DOUBLE_PRECISION) {
108  computeFmunu<double>(Fmunu, gauge);
109  } else if (gauge.Precision() == QUDA_SINGLE_PRECISION) {
110  computeFmunu<float>(Fmunu, gauge);
111  } else {
112  errorQuda("Precision %d not supported", gauge.Precision());
113  }
114  return;
115 #else
116  errorQuda("Gauge tools are not built");
117 #endif // GPU_GAUGE_TOOLS
118  }
119 } // namespace quda
void computeFmunu(GaugeField &Fmunu, const GaugeField &gauge)
Compute the Fmunu tensor.
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
#define errorQuda(...)
Definition: util_quda.h:121
Helper file when using jitify run-time compilation. This file should be included in source code...
cudaStream_t * stream
#define qudaDeviceSynchronize()
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Definition: tune.cpp:643
unsigned long long flops
Definition: blas_quda.cu:22
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
#define checkCudaError()
Definition: util_quda.h:161
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
Definition: util_quda.cpp:52
QudaPrecision Precision() const
unsigned long long bytes
Definition: blas_quda.cu:23