QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
gauge_ape.cu
Go to the documentation of this file.
1 #include <quda_internal.h>
2 #include <tune_quda.h>
3 #include <gauge_field.h>
4 
5 #define DOUBLE_TOL 1e-15
6 #define SINGLE_TOL 2e-6
7 
8 #include <jitify_helper.cuh>
9 #include <kernels/gauge_ape.cuh>
10 
11 namespace quda {
12 
13 #ifdef GPU_GAUGE_TOOLS
14 
15  template <typename Float, typename Arg> class GaugeAPE : TunableVectorYZ
16  {
17  Arg &arg;
18  const GaugeField &meta;
19 
20 private:
21  bool tuneGridDim() const { return false; } // Don't tune the grid dimensions.
22  unsigned int minThreads() const { return arg.threads; }
23 
24 public:
25  // (2,3): 2 for parity in the y thread dim, 3 corresponds to mapping direction to the z thread dim
26  GaugeAPE(Arg &arg, const GaugeField &meta) : TunableVectorYZ(2, 3), arg(arg), meta(meta)
27  {
28 #ifdef JITIFY
29  create_jitify_program("kernels/gauge_ape.cuh");
30 #endif
31  }
32  virtual ~GaugeAPE() {}
33 
34  void apply(const cudaStream_t &stream)
35  {
36  if (meta.Location() == QUDA_CUDA_FIELD_LOCATION) {
37  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
38 #ifdef JITIFY
39  using namespace jitify::reflection;
40  jitify_error = program->kernel("quda::computeAPEStep")
41  .instantiate(Type<Float>(), Type<Arg>())
42  .configure(tp.grid, tp.block, tp.shared_bytes, stream)
43  .launch(arg);
44 #else
45  computeAPEStep<Float><<<tp.grid, tp.block, tp.shared_bytes>>>(arg);
46 #endif
47  } else {
48  errorQuda("CPU not supported yet\n");
49  // computeAPEStepCPU(arg);
50  }
51  }
52 
53  TuneKey tuneKey() const
54  {
55  std::stringstream aux;
56  aux << "threads=" << arg.threads << ",prec=" << sizeof(Float);
57  return TuneKey(meta.VolString(), typeid(*this).name(), aux.str().c_str());
58  }
59 
60  void preTune() { arg.dest.save(); } // defensive measure in case they alias
61  void postTune() { arg.dest.load(); }
62 
63  long long flops() const { return 3 * (2 + 2 * 4) * 198ll * arg.threads; } // just counts matrix multiplication
64  long long bytes() const { return 3 * ((1 + 2 * 6) * arg.origin.Bytes() + arg.dest.Bytes()) * arg.threads; }
65  }; // GaugeAPE
66 
67  template<typename Float,typename GaugeOr, typename GaugeDs>
68  void APEStep(GaugeOr origin, GaugeDs dest, const GaugeField& dataOr, Float alpha) {
69  GaugeAPEArg<Float,GaugeOr,GaugeDs> arg(origin, dest, dataOr, alpha, dataOr.Precision() == QUDA_DOUBLE_PRECISION ? DOUBLE_TOL : SINGLE_TOL);
70  GaugeAPE<Float, GaugeAPEArg<Float, GaugeOr, GaugeDs>> gaugeAPE(arg, dataOr);
71  gaugeAPE.apply(0);
73  }
74 
75  template <typename Float> void APEStep(GaugeField &dataDs, const GaugeField &dataOr, Float alpha)
76  {
77 
78  if(dataDs.Reconstruct() == QUDA_RECONSTRUCT_NO) {
79  typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type GDs;
80 
81  if(dataOr.Reconstruct() == QUDA_RECONSTRUCT_NO) {
82  typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type GOr;
83  APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
84  }else if(dataOr.Reconstruct() == QUDA_RECONSTRUCT_12){
85  typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_12>::type GOr;
86  APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
87  }else if(dataOr.Reconstruct() == QUDA_RECONSTRUCT_8){
88  typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_8>::type GOr;
89  APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
90  }else{
91  errorQuda("Reconstruction type %d of origin gauge field not supported", dataOr.Reconstruct());
92  }
93  } else if(dataDs.Reconstruct() == QUDA_RECONSTRUCT_12){
94  typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_12>::type GDs;
95  if(dataOr.Reconstruct() == QUDA_RECONSTRUCT_NO){
96  typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type GOr;
97  APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
98  }else if(dataOr.Reconstruct() == QUDA_RECONSTRUCT_12){
99  typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_12>::type GOr;
100  APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
101  }else if(dataOr.Reconstruct() == QUDA_RECONSTRUCT_8){
102  typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_8>::type GOr;
103  APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
104  }else{
105  errorQuda("Reconstruction type %d of origin gauge field not supported", dataOr.Reconstruct());
106  }
107  } else if(dataDs.Reconstruct() == QUDA_RECONSTRUCT_8){
108  typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_8>::type GDs;
109  if(dataOr.Reconstruct() == QUDA_RECONSTRUCT_NO){
110  typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type GOr;
111  APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
112  }else if(dataOr.Reconstruct() == QUDA_RECONSTRUCT_12){
113  typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_12>::type GOr;
114  APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
115  }else if(dataOr.Reconstruct() == QUDA_RECONSTRUCT_8){
116  typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_8>::type GOr;
117  APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
118  }else{
119  errorQuda("Reconstruction type %d of origin gauge field not supported", dataOr.Reconstruct());
120  }
121  } else {
122  errorQuda("Reconstruction type %d of destination gauge field not supported", dataDs.Reconstruct());
123  }
124  }
125 
126 #endif
127 
128  void APEStep(GaugeField &dataDs, const GaugeField& dataOr, double alpha) {
129 
130 #ifdef GPU_GAUGE_TOOLS
131 
132  if(dataOr.Precision() != dataDs.Precision()) {
133  errorQuda("Origin and destination fields must have the same precision\n");
134  }
135 
136  if(dataDs.Precision() == QUDA_HALF_PRECISION){
137  errorQuda("Half precision not supported\n");
138  }
139 
140  if (!dataOr.isNative())
141  errorQuda("Order %d with %d reconstruct not supported", dataOr.Order(), dataOr.Reconstruct());
142 
143  if (!dataDs.isNative())
144  errorQuda("Order %d with %d reconstruct not supported", dataDs.Order(), dataDs.Reconstruct());
145 
146  if (dataDs.Precision() == QUDA_SINGLE_PRECISION){
147  APEStep<float>(dataDs, dataOr, (float) alpha);
148  } else if(dataDs.Precision() == QUDA_DOUBLE_PRECISION) {
149  APEStep<double>(dataDs, dataOr, alpha);
150  } else {
151  errorQuda("Precision %d not supported", dataDs.Precision());
152  }
153  return;
154 #else
155  errorQuda("Gauge tools are not built");
156 #endif
157  }
158 }
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21
#define errorQuda(...)
Definition: util_quda.h:121
Helper file when using jitify run-time compilation. This file should be included in source code...
cudaStream_t * stream
void APEStep(GaugeField &dataDs, const GaugeField &dataOr, double alpha)
Apply APE smearing to the gauge field.
Definition: gauge_ape.cu:128
#define qudaDeviceSynchronize()
#define DOUBLE_TOL
Definition: gauge_ape.cu:5
#define SINGLE_TOL
Definition: gauge_ape.cu:6
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Definition: tune.cpp:643
unsigned long long flops
Definition: blas_quda.cu:22
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
QudaReconstructType Reconstruct() const
Definition: gauge_field.h:250
QudaGaugeFieldOrder Order() const
Definition: gauge_field.h:251
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
Definition: util_quda.cpp:52
QudaPrecision Precision() const
bool isNative() const
unsigned long long bytes
Definition: blas_quda.cu:23