14 #define Pi2 6.2831853071795864769252867665590 19 #ifdef GPU_GAUGE_TOOLS 20 template<
typename Float,
typename Gauge>
21 struct QChargeArg :
public ReduceArg<double> {
24 QChargeArg(
const Gauge &data,
GaugeField& Fmunu)
29 template<
int blockSize,
typename Float,
typename Gauge>
31 void qChargeComputeKernel(QChargeArg<Float,Gauge>
arg) {
46 for(
int i=0;
i<6; ++
i){
57 tmpQ1 += (tmpQ3 - tmpQ2);
62 reduce<blockSize>(
arg, Q);
65 template<
typename Float,
typename Gauge>
66 class QChargeCompute :
Tunable {
67 QChargeArg<Float,Gauge>
arg;
72 unsigned int sharedBytesPerThread()
const {
return 0; };
73 unsigned int sharedBytesPerBlock(
const TuneParam &
param)
const {
return 0; }
76 bool tuneGridDim()
const {
return false; }
77 unsigned int minThreads()
const {
return arg.threads; }
81 :
arg(
arg), vol(vol), location(location) {
82 writeAuxString(
"threads=%d,prec=%lu",
arg.threads,
sizeof(Float));
85 virtual ~QChargeCompute() { }
87 void apply(
const cudaStream_t &
stream) {
94 errorQuda(
"qChargeComputeKernel not supported on CPU");
103 long long flops()
const {
return arg.threads*(3*198+9); }
104 long long bytes()
const {
return arg.threads*(6*18)*
sizeof(Float); }
109 template<
typename Float,
typename Gauge>
111 QChargeArg<Float,Gauge>
arg(data,Fmunu);
112 QChargeCompute<Float,Gauge> qChargeCompute(
arg, &Fmunu, location);
113 qChargeCompute.apply(0);
116 qChg =
arg.result_h[0];
119 template<
typename Float>
123 if (!Fmunu.
isNative())
errorQuda(
"Topological charge computation only supported on native ordered fields");
127 computeQCharge<Float>(Gauge(Fmunu), Fmunu, location, res);
130 computeQCharge<Float>(Gauge(Fmunu), Fmunu, location, res);
133 computeQCharge<Float>(Gauge(Fmunu), Fmunu, location, res);
146 #ifdef GPU_GAUGE_TOOLS 148 charge = computeQCharge<float>(Fmunu, location);
150 charge = computeQCharge<double>(Fmunu, location);
QudaVerbosity getVerbosity()
const char * VolString() const
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
Main header file for host and device accessors to GaugeFields.
cudaError_t qudaDeviceSynchronize()
Wrapper around cudaDeviceSynchronize or cuDeviceSynchronize.
#define LAUNCH_KERNEL(kernel, tp, stream, arg,...)
__device__ __host__ T getTrace(const Matrix< T, 3 > &a)
enum QudaFieldLocation_s QudaFieldLocation
double computeQCharge(GaugeField &Fmunu, QudaFieldLocation location)
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
QudaReconstructType Reconstruct() const
void comm_allreduce(double *data)
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
QudaPrecision Precision() const