quda-ref/v1.0.0/gauge__update__quda_8cu_source.html

 #include <cstdio>
 #include <cstdlib>
 #include <cuda.h>
 #include <quda_internal.h>
 #include <tune_quda.h>
 #include <gauge_field.h>
 #include <gauge_field_order.h>
 #include <quda_matrix.h>
 #include <float_vector.h>
 #include <complex_quda.h>

 namespace quda {

 #ifdef GPU_GAUGE_TOOLS

   template <typename Float, typename Gauge, typename Mom>
   struct UpdateGaugeArg {
     Gauge out;
     Gauge in;
     Mom momentum;
     Float dt;
     int nDim;
     UpdateGaugeArg(const Gauge &out, const Gauge &in,
        const Mom &momentum, Float dt, int nDim)
       : out(out), in(in), momentum(momentum), dt(dt), nDim(nDim) { }
   };

   template<typename Float, typename Gauge, typename Mom, int N,
      bool conj_mom, bool exact>
   __device__ __host__  void updateGaugeFieldCompute
   (UpdateGaugeArg<Float,Gauge,Mom> &arg, int x, int parity) {
     typedef complex<Float> Complex;

     Matrix<Complex,3> link, result, mom;
     for(int dir=0; dir<arg.nDim; ++dir){
       link = arg.in(dir, x, parity);
       mom = arg.momentum(dir, x, parity);

       Complex trace = getTrace(mom);
       mom(0,0) -= trace/static_cast<Float>(3.0);
       mom(1,1) -= trace/static_cast<Float>(3.0);
       mom(2,2) -= trace/static_cast<Float>(3.0);

       if (!exact) {
   result = link;

   // Nth order expansion of exponential
   if (!conj_mom) {
     for(int r=N; r>0; r--)
       result = (arg.dt/r)*mom*result + link;
   } else {
     for(int r=N; r>0; r--)
       result = (arg.dt/r)*conj(mom)*result + link;
   }
       } else {
   mom = arg.dt * mom;
         expsu3<Float>(mom);

         if (!conj_mom) {
           link = mom * link;
         } else {
           link = conj(mom) * link;
         }

         result = link;
       }

       arg.out(dir, x, parity) = result;
     } // dir

   }

   template<typename Float, typename Gauge, typename Mom, int N,
      bool conj_mom, bool exact>
   void updateGaugeField(UpdateGaugeArg<Float,Gauge,Mom> arg) {

     for (unsigned int parity=0; parity<2; parity++) {
       for (int x=0; x<arg.out.volumeCB; x++) {
   updateGaugeFieldCompute<Float,Gauge,Mom,N,conj_mom,exact>
     (arg, x, parity);
       }
     }
   }

   template<typename Float, typename Gauge, typename Mom, int N,
      bool conj_mom, bool exact>
   __global__ void updateGaugeFieldKernel(UpdateGaugeArg<Float,Gauge,Mom> arg) {
     int idx = blockIdx.x*blockDim.x + threadIdx.x;
     if (idx >= 2*arg.out.volumeCB) return;
     int parity = (idx >= arg.out.volumeCB) ? 1 : 0;
     idx -= parity*arg.out.volumeCB;

     updateGaugeFieldCompute<Float,Gauge,Mom,N,conj_mom,exact>(arg, idx, parity);
  }

   template <typename Float, typename Gauge, typename Mom, int N,
       bool conj_mom, bool exact>
    class UpdateGaugeField : public Tunable {
   private:
     UpdateGaugeArg<Float,Gauge,Mom> arg;
     const GaugeField &meta; // meta data
     const QudaFieldLocation location; // location of the lattice fields

     unsigned int sharedBytesPerThread() const { return 0; }
     unsigned int sharedBytesPerBlock(const TuneParam &) const { return 0; }

     unsigned int minThreads() const { return 2*arg.in.volumeCB; }
     bool tuneGridDim() const { return false; }

   public:
     UpdateGaugeField(const UpdateGaugeArg<Float,Gauge,Mom> &arg,
          const GaugeField &meta, QudaFieldLocation location)
       : arg(arg), meta(meta), location(location) {
       writeAuxString("threads=%d,prec=%lu,stride=%d",
          2*arg.in.volumeCB, sizeof(Float), arg.in.stride);
     }
     virtual ~UpdateGaugeField() { }

     void apply(const cudaStream_t &stream){
       if (location == QUDA_CUDA_FIELD_LOCATION) {
   TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
   updateGaugeFieldKernel<Float,Gauge,Mom,N,conj_mom,exact>
     <<<tp.grid,tp.block,tp.shared_bytes>>>(arg);
       } else { // run the CPU code
   updateGaugeField<Float,Gauge,Mom,N,conj_mom,exact>(arg);
       }
     } // apply

     long long flops() const {
       const int Nc = 3;
       return arg.nDim*2*arg.in.volumeCB*N*(Nc*Nc*2 +                 // scalar-matrix multiply
              (8*Nc*Nc*Nc - 2*Nc*Nc) +  // matrix-matrix multiply
              Nc*Nc*2);                 // matrix-matrix addition
     }
     long long bytes() const { return arg.nDim*2*arg.in.volumeCB*
   (arg.in.Bytes() + arg.out.Bytes() + arg.momentum.Bytes()); }

     TuneKey tuneKey() const { return TuneKey(meta.VolString(), typeid(*this).name(), aux); }
   };

   template <typename Float, typename Gauge, typename Mom>
   void updateGaugeField(Gauge &out, const Gauge &in, const Mom &mom,
       double dt, const GaugeField &meta, bool conj_mom, bool exact,
       QudaFieldLocation location) {
     // degree of exponential expansion
     const int N = 8;

     if (conj_mom) {
       if (exact) {
   UpdateGaugeArg<Float, Gauge, Mom> arg(out, in, mom, dt, 4);
   UpdateGaugeField<Float,Gauge,Mom,N,true,true> updateGauge(arg, meta, location);
   updateGauge.apply(0);
       } else {
   UpdateGaugeArg<Float, Gauge, Mom> arg(out, in, mom, dt, 4);
   UpdateGaugeField<Float,Gauge,Mom,N,true,false> updateGauge(arg, meta, location);
   updateGauge.apply(0);
       }
     } else {
       if (exact) {
   UpdateGaugeArg<Float, Gauge, Mom> arg(out, in, mom, dt, 4);
   UpdateGaugeField<Float,Gauge,Mom,N,false,true> updateGauge(arg, meta, location);
   updateGauge.apply(0);
       } else {
   UpdateGaugeArg<Float, Gauge, Mom> arg(out, in, mom, dt, 4);
   UpdateGaugeField<Float,Gauge,Mom,N,false,false> updateGauge(arg, meta, location);
   updateGauge.apply(0);
       }
     }

     if (location == QUDA_CUDA_FIELD_LOCATION) checkCudaError();

   }

   template <typename Float, typename Gauge>
     void updateGaugeField(Gauge out, const Gauge &in, const GaugeField &mom,
         double dt, bool conj_mom, bool exact,
         QudaFieldLocation location) {
     if (mom.Order() == QUDA_FLOAT2_GAUGE_ORDER) {
       if (mom.Reconstruct() == QUDA_RECONSTRUCT_10) {
   // FIX ME - 11 is a misnomer to avoid confusion in template instantiation
   updateGaugeField<Float>(out, in, gauge::FloatNOrder<Float,18,2,11>(mom), dt, mom, conj_mom, exact, location);
       } else {
   errorQuda("Reconstruction type not supported");
       }
     } else if (mom.Order() == QUDA_MILC_GAUGE_ORDER) {
       updateGaugeField<Float>(out, in, gauge::MILCOrder<Float,10>(mom), dt, mom, conj_mom, exact, location);
     } else {
       errorQuda("Gauge Field order %d not supported", mom.Order());
     }

   }

   template <typename Float>
   void updateGaugeField(GaugeField &out, const GaugeField &in, const GaugeField &mom,
       double dt, bool conj_mom, bool exact,
       QudaFieldLocation location) {

     const int Nc = 3;
     if (out.Ncolor() != Nc)
       errorQuda("Ncolor=%d not supported at this time", out.Ncolor());

     if (out.Order() != in.Order() || out.Reconstruct() != in.Reconstruct()) {
       errorQuda("Input and output gauge field ordering and reconstruction must match");
     }

     if (out.isNative()) {
       if (out.Reconstruct() == QUDA_RECONSTRUCT_NO) {
   typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type G;
   updateGaugeField<Float>(G(out),G(in), mom, dt, conj_mom, exact, location);
       } else if (out.Reconstruct() == QUDA_RECONSTRUCT_12) {
   typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_12>::type G;
   updateGaugeField<Float>(G(out), G(in), mom, dt, conj_mom, exact, location);
       } else {
   errorQuda("Reconstruction type not supported");
       }
     } else if (out.Order() == QUDA_MILC_GAUGE_ORDER) {
       updateGaugeField<Float>(gauge::MILCOrder<Float, Nc*Nc*2>(out),
             gauge::MILCOrder<Float, Nc*Nc*2>(in),
             mom, dt, conj_mom, exact, location);
     } else {
       errorQuda("Gauge Field order %d not supported", out.Order());
     }

   }
 #endif

   void updateGaugeField(GaugeField &out, double dt, const GaugeField& in,
       const GaugeField& mom, bool conj_mom, bool exact)
   {
 #ifdef GPU_GAUGE_TOOLS
     if (out.Precision() != in.Precision() || out.Precision() != mom.Precision())
       errorQuda("Gauge and momentum fields must have matching precision");

     if (out.Location() != in.Location() || out.Location() != mom.Location())
       errorQuda("Gauge and momentum fields must have matching location");

     if (out.Precision() == QUDA_DOUBLE_PRECISION) {
       updateGaugeField<double>(out, in, mom, dt, conj_mom, exact, out.Location());
     } else if (out.Precision() == QUDA_SINGLE_PRECISION) {
       updateGaugeField<float>(out, in, mom, dt, conj_mom, exact, out.Location());
     } else {
       errorQuda("Precision %d not supported", out.Precision());
     }
 #else
   errorQuda("Gauge tools are not build");
 #endif

   }

 } // namespace quda
QUDA_RECONSTRUCT_NO
Definition: enum_quda.h:67

QUDA_RECONSTRUCT_10
Definition: enum_quda.h:72

float_vector.h

getVerbosity
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:21

errorQuda
#define errorQuda(...)
Definition: util_quda.h:121

QUDA_CUDA_FIELD_LOCATION
Definition: enum_quda.h:326

QUDA_FLOAT2_GAUGE_ORDER
Definition: enum_quda.h:39

quda::stream
cudaStream_t * stream
Definition: cuda_color_spinor_field.cpp:897

quda
Definition: blas_cublas.h:5

QUDA_RECONSTRUCT_12
Definition: enum_quda.h:68

QUDA_MILC_GAUGE_ORDER
Definition: enum_quda.h:44

in
cpuColorSpinorField * in
Definition: staggered_invert_test.cpp:98

quda::tuneLaunch
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Definition: tune.cpp:643

gauge_field_order.h
Main header file for host and device accessors to GaugeFields.

quda::Complex
std::complex< double > Complex
Definition: quda_internal.h:46

tune_quda.h

quda::LatticeField::Location
QudaFieldLocation Location() const
Definition: lattice_field.cpp:660

Matrix
Definition: hisq_force_reference2.cpp:131

quda_matrix.h

QUDA_DOUBLE_PRECISION
Definition: enum_quda.h:62

quda::getTrace
__device__ __host__ T getTrace(const Matrix< T, 3 > &a)
Definition: quda_matrix.h:415

QudaFieldLocation
enum QudaFieldLocation_s QudaFieldLocation

out
cpuColorSpinorField * out
Definition: staggered_invert_test.cpp:99

QUDA_SINGLE_PRECISION
Definition: enum_quda.h:61

quda::blas::flops
unsigned long long flops
Definition: blas_quda.cu:22

quda::arg
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
Definition: complex_quda.h:1076

complex_quda.h

quda::updateGaugeField
void updateGaugeField(GaugeField &out, double dt, const GaugeField &in, const GaugeField &mom, bool conj_mom, bool exact)
Definition: gauge_update_quda.cu:227

checkCudaError
#define checkCudaError()
Definition: util_quda.h:161

quda::conj
__host__ __device__ ValueType conj(ValueType x)
Definition: complex_quda.h:130

getTuning
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
Definition: util_quda.cpp:52

quda::LatticeField::Precision
QudaPrecision Precision() const
Definition: lattice_field.h:546

parity
QudaParity parity
Definition: covdev_test.cpp:54

gauge_field.h

quda::blas::bytes
unsigned long long bytes
Definition: blas_quda.cu:23

quda_internal.h

quda::GaugeField
Definition: gauge_field.h:164