10 #ifdef GPU_WILSON_DIRAC 17 #endif // GPU_WILSON_DIRAC 31 #undef GPU_STAGGERED_DIRAC 40 #ifdef GPU_TWISTED_MASS_DIRAC 44 #ifndef DSLASH_SHARED_FLOATS_PER_THREAD 45 #define DSLASH_SHARED_FLOATS_PER_THREAD 0 57 #ifdef GPU_TWISTED_MASS_DIRAC 58 template <
typename sFloat,
typename gFloat>
66 unsigned int sharedBytesPerThread()
const 69 int reg_size = (
typeid(sFloat)==
typeid(double2) ?
sizeof(
double) :
sizeof(
float));
77 TwistedDslashCuda(cudaColorSpinorField *
out,
const GaugeField &gauge,
78 const cudaColorSpinorField *
in,
const cudaColorSpinorField *
x,
80 const double epsilon,
const double k,
const int parity,
const int dagger,
81 const int *commOverride)
92 dslashParam.a =
kappa;
93 dslashParam.a_f =
kappa;
96 dslashParam.fl_stride =
in->VolumeCB();
98 virtual ~TwistedDslashCuda() { unbindSpinorTex<sFloat>(
in,
out,
x); }
100 TuneKey tuneKey()
const 105 strcat(key.aux,
",TwistInvDslash");
111 strcat(key.aux,
",DslashTwist");
114 errorQuda(
"Unsupported twisted-dslash type %d", dslashType);
119 void apply(
const cudaStream_t &
stream)
121 #ifdef SHARED_WILSON_DSLASH 124 #ifndef USE_TEXTURE_OBJECTS 126 #endif // USE_TEXTURE_OBJECTS 129 dslashParam.block[0] = tp.aux.x; dslashParam.block[1] = tp.aux.y; dslashParam.block[2] = tp.aux.z; dslashParam.block[3] = tp.aux.w;
130 for (
int i=0;
i<4;
i++) dslashParam.grid[
i] = ( (
i==0 ? 2 : 1) *
in->X(
i)) / dslashParam.block[
i];
134 DSLASH(twistedMassTwistInvDslash, tp.grid, tp.block, tp.shared_bytes,
stream, dslashParam);
137 DSLASH(twistedMassDslash, tp.grid, tp.block, tp.shared_bytes,
stream, dslashParam);
140 DSLASH(twistedMassDslashTwist, tp.grid, tp.block, tp.shared_bytes,
stream, dslashParam);
142 default:
errorQuda(
"Invalid twisted mass dslash type");
146 long long flops()
const {
147 int twisted_flops = 48;
149 switch(dslashParam.kernel_type) {
159 flops += twisted_flops *
in->VolumeCB();
165 #endif // GPU_TWISTED_MASS_DIRAC 172 const double &
kappa,
const double &
mu,
const double &epsilon,
173 const double &k,
const int *commOverride,
TimeProfile &profile)
175 #ifdef GPU_TWISTED_MASS_DIRAC 182 dslash =
new TwistedDslashCuda<double2,double2>(
out, gauge,
in,
x, type,
kappa,
mu, epsilon, k,
parity,
dagger, commOverride);
184 dslash =
new TwistedDslashCuda<float4,float4>(
out, gauge,
in,
x, type,
kappa,
mu, epsilon, k,
parity,
dagger, commOverride);
186 dslash =
new TwistedDslashCuda<short4,short4>(
out, gauge,
in,
x, type,
kappa,
mu, epsilon, k,
parity,
dagger, commOverride);
189 DslashPolicyTune dslash_policy(*
dslash, const_cast<cudaColorSpinorField*>(
in),
in->Volume(),
in->GhostFace(), profile);
190 dslash_policy.apply(0);
196 errorQuda(
"Twisted mass dslash has not been built");
void setParam(int kernel, int prec, int threads, int blocks)
QudaVerbosity getVerbosity()
char * strcat(char *__s1, const char *__s2)
VOLATILE spinorFloat kappa
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
enum QudaTwistDslashType_s QudaTwistDslashType
#define DSLASH_SHARED_FLOATS_PER_THREAD
#define DSLASH(FUNC, gridDim, blockDim, shared, stream, param)
cpuColorSpinorField * out
void twistedMassDslashCuda(cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const QudaTwistDslashType type, const double &kappa, const double &mu, const double &epsilon, const double &k, const int *commDim, TimeProfile &profile)
virtual TuneKey tuneKey() const
void setKernelPackT(bool pack)
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...
virtual long long flops() const
static __inline__ size_t size_t d