QUDA  0.9.0
qcharge_quda.cu
Go to the documentation of this file.
1 #include <quda_internal.h>
2 #include <quda_matrix.h>
3 #include <tune_quda.h>
4 #include <gauge_field.h>
5 #include <gauge_field_order.h>
6 
7 #include <cub/cub.cuh>
8 #include <launch_kernel.cuh>
9 #include <atomic.cuh>
10 #include <cub_helper.cuh>
11 #include <index_helper.cuh>
12 
13 #ifndef Pi2
14 #define Pi2 6.2831853071795864769252867665590
15 #endif
16 
17 namespace quda {
18 
19 #ifdef GPU_GAUGE_TOOLS
20  template<typename Float, typename Gauge>
21  struct QChargeArg : public ReduceArg<double> {
22  int threads; // number of active threads required
23  Gauge data;
24  QChargeArg(const Gauge &data, GaugeField& Fmunu)
25  : ReduceArg<double>(), data(data), threads(Fmunu.Volume()) {}
26  };
27 
28  // Core routine for computing the topological charge from the field strength
29  template<int blockSize, typename Float, typename Gauge>
30  __global__
31  void qChargeComputeKernel(QChargeArg<Float,Gauge> arg) {
32  int idx = threadIdx.x + blockIdx.x*blockDim.x;
33 
34  double tmpQ1 = 0.;
35 
36  if(idx < arg.threads) {
37  int parity = 0;
38  if(idx >= arg.threads/2) {
39  parity = 1;
40  idx -= arg.threads/2;
41  }
42 
43  // Load the field-strength tensor from global memory
44  Matrix<complex<Float>,3> F[6], temp1, temp2, temp3;
45  double tmpQ2, tmpQ3;
46  for(int i=0; i<6; ++i){
47  arg.data.load((Float*)(F[i].data), idx, i, parity);
48  }
49 
50  temp1 = F[0]*F[5];
51  temp2 = F[1]*F[4];
52  temp3 = F[3]*F[2];
53 
54  tmpQ1 = (getTrace(temp1)).x;
55  tmpQ2 = (getTrace(temp2)).x;
56  tmpQ3 = (getTrace(temp3)).x;
57  tmpQ1 += (tmpQ3 - tmpQ2);
58  tmpQ1 /= (Pi2*Pi2);
59  }
60 
61  double Q = tmpQ1;
62  reduce<blockSize>(arg, Q);
63  }
64 
65  template<typename Float, typename Gauge>
66  class QChargeCompute : Tunable {
67  QChargeArg<Float,Gauge> arg;
68  const QudaFieldLocation location;
69  GaugeField *vol;
70 
71  private:
72  unsigned int sharedBytesPerThread() const { return 0; };
73  unsigned int sharedBytesPerBlock(const TuneParam &param) const { return 0; }
74 
75 // bool tuneSharedBytes() const { return false; } // Don't tune the shared memory.
76  bool tuneGridDim() const { return false; } // Don't tune the grid dimensions.
77  unsigned int minThreads() const { return arg.threads; }
78 
79  public:
80  QChargeCompute(QChargeArg<Float,Gauge> &arg, GaugeField *vol, QudaFieldLocation location)
81  : arg(arg), vol(vol), location(location) {
82  writeAuxString("threads=%d,prec=%lu",arg.threads,sizeof(Float));
83  }
84 
85  virtual ~QChargeCompute() { }
86 
87  void apply(const cudaStream_t &stream) {
88  if(location == QUDA_CUDA_FIELD_LOCATION){
89  arg.result_h[0] = 0.;
90  TuneParam tp = tuneLaunch(*this, getTuning(), getVerbosity());
91  LAUNCH_KERNEL(qChargeComputeKernel, tp, stream, arg, Float);
93  }else{ // run the CPU code
94  errorQuda("qChargeComputeKernel not supported on CPU");
95 // qChargeComputeCPU(arg);
96  }
97  }
98 
99  TuneKey tuneKey() const {
100  return TuneKey(vol->VolString(), typeid(*this).name(), aux);
101  }
102 
103  long long flops() const { return arg.threads*(3*198+9); }
104  long long bytes() const { return arg.threads*(6*18)*sizeof(Float); }
105  };
106 
107 
108 
109  template<typename Float, typename Gauge>
110  void computeQCharge(const Gauge data, GaugeField& Fmunu, QudaFieldLocation location, Float &qChg){
111  QChargeArg<Float,Gauge> arg(data,Fmunu);
112  QChargeCompute<Float,Gauge> qChargeCompute(arg, &Fmunu, location);
113  qChargeCompute.apply(0);
114  checkCudaError();
115  comm_allreduce((double*) arg.result_h);
116  qChg = arg.result_h[0];
117  }
118 
119  template<typename Float>
120  Float computeQCharge(GaugeField &Fmunu, QudaFieldLocation location){
121  Float res = 0.;
122 
123  if (!Fmunu.isNative()) errorQuda("Topological charge computation only supported on native ordered fields");
124 
125  if (Fmunu.Reconstruct() == QUDA_RECONSTRUCT_NO) {
126  typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_NO>::type Gauge;
127  computeQCharge<Float>(Gauge(Fmunu), Fmunu, location, res);
128  } else if(Fmunu.Reconstruct() == QUDA_RECONSTRUCT_12){
129  typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_12>::type Gauge;
130  computeQCharge<Float>(Gauge(Fmunu), Fmunu, location, res);
131  } else if(Fmunu.Reconstruct() == QUDA_RECONSTRUCT_8){
132  typedef typename gauge_mapper<Float,QUDA_RECONSTRUCT_8>::type Gauge;
133  computeQCharge<Float>(Gauge(Fmunu), Fmunu, location, res);
134  } else {
135  errorQuda("Reconstruction type %d of gauge field not supported", Fmunu.Reconstruct());
136  }
137 // computeQCharge(Fmunu, location, res);
138 
139  return res;
140  }
141 #endif
142 
143  double computeQCharge(GaugeField& Fmunu, QudaFieldLocation location){
144 
145  double charge = 0;
146 #ifdef GPU_GAUGE_TOOLS
147  if (Fmunu.Precision() == QUDA_SINGLE_PRECISION){
148  charge = computeQCharge<float>(Fmunu, location);
149  } else if(Fmunu.Precision() == QUDA_DOUBLE_PRECISION) {
150  charge = computeQCharge<double>(Fmunu, location);
151  } else {
152  errorQuda("Precision %d not supported", Fmunu.Precision());
153  }
154 #else
155  errorQuda("Gauge tools are not build");
156 #endif
157  return charge;
158 
159  }
160 
161 } // namespace quda
162 
dim3 dim3 blockDim
QudaVerbosity getVerbosity()
Definition: util_quda.cpp:20
#define errorQuda(...)
Definition: util_quda.h:90
cudaStream_t * stream
const char * VolString() const
#define Pi2
Definition: qcharge_quda.cu:14
QudaGaugeParam param
Definition: pack_test.cpp:17
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Definition: tune.cpp:603
int Volume() const
Main header file for host and device accessors to GaugeFields.
cudaError_t qudaDeviceSynchronize()
Wrapper around cudaDeviceSynchronize or cuDeviceSynchronize.
#define LAUNCH_KERNEL(kernel, tp, stream, arg,...)
__device__ __host__ T getTrace(const Matrix< T, 3 > &a)
Definition: quda_matrix.h:305
enum QudaFieldLocation_s QudaFieldLocation
double computeQCharge(GaugeField &Fmunu, QudaFieldLocation location)
unsigned long long flops
Definition: blas_quda.cu:42
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
Definition: complex_quda.h:880
QudaReconstructType Reconstruct() const
Definition: gauge_field.h:203
#define checkCudaError()
Definition: util_quda.h:129
void comm_allreduce(double *data)
Definition: comm_mpi.cpp:281
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
Definition: util_quda.cpp:51
QudaPrecision Precision() const
bool isNative() const
QudaParity parity
Definition: covdev_test.cpp:53
unsigned long long bytes
Definition: blas_quda.cu:43