quda-ref/v1.0.0/gauge__ape_8cu_source.html

 #include <quda_internal.h>
 #include <tune_quda.h>
 #include <gauge_field.h>

 #define  DOUBLE_TOL 1e-15
 #define  SINGLE_TOL 2e-6

 #include <jitify_helper.cuh>
 #include <kernels/gauge_ape.cuh>

 namespace quda {

 #ifdef GPU_GAUGE_TOOLS

   template <typename Float, typename Arg> class GaugeAPE : TunableVectorYZ
   {
     Arg &arg;
     const GaugeField &meta;

 private:
     bool tuneGridDim() const { return false; } // Don't tune the grid dimensions.
     unsigned int minThreads() const { return arg.threads; }

 public:
     // (2,3): 2 for parity in the y thread dim, 3 corresponds to mapping direction to the z thread dim
     GaugeAPE(Arg &arg, const GaugeField &meta) : TunableVectorYZ(2, 3), arg(arg), meta(meta)
     {
 #ifdef JITIFY
       create_jitify_program("kernels/gauge_ape.cuh");
 #endif
     }
     virtual ~GaugeAPE() {}

     void apply(const cudaStream_t &stream)
     {
       if (meta.Location() == QUDA_CUDA_FIELD_LOCATION) {
         TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
 #ifdef JITIFY
         using namespace jitify::reflection;
         jitify_error = program->kernel("quda::computeAPEStep")
                          .instantiate(Type<Float>(), Type<Arg>())
                          .configure(tp.grid, tp.block, tp.shared_bytes, stream)
                          .launch(arg);
 #else
         computeAPEStep<Float><<<tp.grid, tp.block, tp.shared_bytes>>>(arg);
 #endif
       } else {
         errorQuda("CPU not supported yet\n");
         // computeAPEStepCPU(arg);
       }
     }

     TuneKey tuneKey() const
     {
       std::stringstream aux;
       aux << "threads=" << arg.threads << ",prec=" << sizeof(Float);
       return TuneKey(meta.VolString(), typeid(*this).name(), aux.str().c_str());
     }

     void preTune() { arg.dest.save(); } // defensive measure in case they alias
     void postTune() { arg.dest.load(); }

     long long flops() const { return 3 * (2 + 2 * 4) * 198ll * arg.threads; } // just counts matrix multiplication
     long long bytes() const { return 3 * ((1 + 2 * 6) * arg.origin.Bytes() + arg.dest.Bytes()) * arg.threads; }
   }; // GaugeAPE

   template<typename Float,typename GaugeOr, typename GaugeDs>
   void APEStep(GaugeOr origin, GaugeDs dest, const GaugeField& dataOr, Float alpha) {
     GaugeAPEArg<Float,GaugeOr,GaugeDs> arg(origin, dest, dataOr, alpha, dataOr.Precision() == QUDA_DOUBLE_PRECISION ? DOUBLE_TOL : SINGLE_TOL);
     GaugeAPE<Float, GaugeAPEArg<Float, GaugeOr, GaugeDs>> gaugeAPE(arg, dataOr);
     gaugeAPE.apply(0);
     qudaDeviceSynchronize();
   }

   template <typename Float> void APEStep(GaugeField &dataDs, const GaugeField &dataOr, Float alpha)
   {

     if(dataDs.Reconstruct() == QUDA_RECONSTRUCT_NO) {
       typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type GDs;

       if(dataOr.Reconstruct() == QUDA_RECONSTRUCT_NO) {
   typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type GOr;
   APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
       }else if(dataOr.Reconstruct() == QUDA_RECONSTRUCT_12){
   typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_12>::type GOr;
   APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
       }else if(dataOr.Reconstruct() == QUDA_RECONSTRUCT_8){
   typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_8>::type GOr;
   APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
       }else{
   errorQuda("Reconstruction type %d of origin gauge field not supported", dataOr.Reconstruct());
       }
     } else if(dataDs.Reconstruct() == QUDA_RECONSTRUCT_12){
       typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_12>::type GDs;
       if(dataOr.Reconstruct() == QUDA_RECONSTRUCT_NO){
   typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type GOr;
   APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
       }else if(dataOr.Reconstruct() == QUDA_RECONSTRUCT_12){
   typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_12>::type GOr;
   APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
       }else if(dataOr.Reconstruct() == QUDA_RECONSTRUCT_8){
   typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_8>::type GOr;
   APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
       }else{
   errorQuda("Reconstruction type %d of origin gauge field not supported", dataOr.Reconstruct());
       }
     } else if(dataDs.Reconstruct() == QUDA_RECONSTRUCT_8){
       typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_8>::type GDs;
       if(dataOr.Reconstruct() == QUDA_RECONSTRUCT_NO){
   typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type GOr;
   APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
       }else if(dataOr.Reconstruct() == QUDA_RECONSTRUCT_12){
   typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_12>::type GOr;
   APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
       }else if(dataOr.Reconstruct() == QUDA_RECONSTRUCT_8){
   typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_8>::type GOr;
   APEStep(GOr(dataOr), GDs(dataDs), dataOr, alpha);
       }else{
   errorQuda("Reconstruction type %d of origin gauge field not supported", dataOr.Reconstruct());
             }
     } else {
       errorQuda("Reconstruction type %d of destination gauge field not supported", dataDs.Reconstruct());
     }
   }

 #endif

   void APEStep(GaugeField &dataDs, const GaugeField& dataOr, double alpha) {

 #ifdef GPU_GAUGE_TOOLS

     if(dataOr.Precision() != dataDs.Precision()) {
       errorQuda("Origin and destination fields must have the same precision\n");
     }

     if(dataDs.Precision() == QUDA_HALF_PRECISION){
       errorQuda("Half precision not supported\n");
     }

     if (!dataOr.isNative())
       errorQuda("Order %d with %d reconstruct not supported", dataOr.Order(), dataOr.Reconstruct());

     if (!dataDs.isNative())
       errorQuda("Order %d with %d reconstruct not supported", dataDs.Order(), dataDs.Reconstruct());

     if (dataDs.Precision() == QUDA_SINGLE_PRECISION){
       APEStep<float>(dataDs, dataOr, (float) alpha);
     } else if(dataDs.Precision() == QUDA_DOUBLE_PRECISION) {
       APEStep<double>(dataDs, dataOr, alpha);
     } else {
       errorQuda("Precision %d not supported", dataDs.Precision());
     }
     return;
 #else
     errorQuda("Gauge tools are not built");
 #endif
   }
 }
QUDA_RECONSTRUCT_NO
Definition: enum_quda.h:67

getVerbosity
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21

errorQuda
#define errorQuda(...)
Definition: util_quda.h:121

QUDA_CUDA_FIELD_LOCATION
Definition: enum_quda.h:326

jitify_helper.cuh
Helper file when using jitify run-time compilation. This file should be included in source code...

QUDA_HALF_PRECISION
Definition: enum_quda.h:60

quda::stream
cudaStream_t * stream
Definition: cuda_color_spinor_field.cpp:897

quda
Definition: blas_cublas.h:5

quda::APEStep
void APEStep(GaugeField &dataDs, const GaugeField &dataOr, double alpha)
Apply APE smearing to the gauge field.
Definition: gauge_ape.cu:128

qudaDeviceSynchronize
#define qudaDeviceSynchronize()
Definition: quda_cuda_api.h:145

QUDA_RECONSTRUCT_12
Definition: enum_quda.h:68

DOUBLE_TOL
#define DOUBLE_TOL
Definition: gauge_ape.cu:5

SINGLE_TOL
#define SINGLE_TOL
Definition: gauge_ape.cu:6

quda::tuneLaunch
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Definition: tune.cpp:643

gauge_ape.cuh

tune_quda.h

QUDA_RECONSTRUCT_8
Definition: enum_quda.h:69

QUDA_DOUBLE_PRECISION
Definition: enum_quda.h:62

QUDA_SINGLE_PRECISION
Definition: enum_quda.h:61

quda::blas::flops
unsigned long long flops
Definition: blas_quda.cu:22

quda::arg
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
Definition: complex_quda.h:1076

quda::GaugeField::Reconstruct
QudaReconstructType Reconstruct() const
Definition: gauge_field.h:250

quda::GaugeField::Order
QudaGaugeFieldOrder Order() const
Definition: gauge_field.h:251

getTuning
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
Definition: util_quda.cpp:52

quda::LatticeField::Precision
QudaPrecision Precision() const
Definition: lattice_field.h:546

quda::GaugeField::isNative
bool isNative() const
Definition: gauge_field.cpp:167

gauge_field.h

quda::blas::bytes
unsigned long long bytes
Definition: blas_quda.cu:23

quda_internal.h

quda::GaugeField
Definition: gauge_field.h:164