10 using namespace clover;
12 #ifdef GPU_CLOVER_DIRAC 14 template <
typename Float,
typename Arg>
20 bool tuneGridDim()
const {
return true; }
23 CloverInvert(
Arg &arg,
const CloverField &meta) : arg(arg), meta(meta) {
24 writeAuxString(
"stride=%d,prec=%lu,trlog=%s,twist=%s", arg.clover.stride,
sizeof(Float),
25 arg.computeTraceLog ?
"true" :
"false", arg.twist ?
"true" :
"false");
28 create_jitify_program(
"kernels/clover_invert.cuh");
33 virtual ~CloverInvert() { ; }
35 void apply(
const cudaStream_t &
stream) {
37 arg.result_h[0] = make_double2(0.,0.);
40 using namespace jitify::reflection;
41 jitify_error = program->kernel(
"quda::cloverInvertKernel")
42 .instantiate((
int)tp.
block.x, Type<Float>(), Type<Arg>(), arg.computeTraceLog, arg.twist)
46 if (arg.computeTraceLog) {
61 if (arg.computeTraceLog) {
63 cloverInvert<Float, Arg, true, true>(
arg);
65 cloverInvert<Float, Arg, true, false>(
arg);
69 cloverInvert<Float, Arg, false, true>(
arg);
71 cloverInvert<Float, Arg, false, false>(
arg);
81 long long flops()
const {
return 0; }
82 long long bytes()
const {
return 2*arg.clover.
volumeCB*(arg.inverse.Bytes() + arg.clover.Bytes()); }
84 void preTune() {
if (arg.clover.clover == arg.inverse.clover) arg.inverse.save(); }
85 void postTune() {
if (arg.clover.clover == arg.inverse.clover) arg.inverse.load(); }
89 template <
typename Float>
92 CloverInvert<Float,CloverInvertArg<Float>> invert(arg, clover);
95 if (arg.computeTraceLog) {
98 clover.
TrLog()[0] = arg.result_h[0].x;
99 clover.
TrLog()[1] = arg.result_h[0].y;
108 #ifdef GPU_CLOVER_DIRAC 110 errorQuda(
"Half precision not supported for order %d", clover.
Order());
113 cloverInvert<double>(clover, computeTraceLog);
115 cloverInvert<float>(clover, computeTraceLog);
#define LAUNCH_KERNEL_LOCAL_PARITY(kernel, tp, stream, arg,...)
QudaVerbosity getVerbosity()
Helper file when using jitify run-time compilation. This file should be included in source code...
void comm_allreduce_array(double *data, size_t size)
const char * VolString() const
QudaCloverFieldOrder Order() const
void cloverInvert(CloverField &clover, bool computeTraceLog)
This function compute the Cholesky decomposition of each clover matrix and stores the clover inverse ...
#define qudaDeviceSynchronize()
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
__global__ void cloverInvertKernel(Arg arg)
QudaFieldLocation Location() const
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
QudaPrecision Precision() const