10 #ifdef GPU_WILSON_DIRAC
17 #endif // GPU_WILSON_DIRAC
32 #undef GPU_STAGGERED_DIRAC
35 #include <dslash_index.cuh>
41 #ifdef GPU_TWISTED_MASS_DIRAC
45 #ifndef DSLASH_SHARED_FLOATS_PER_THREAD
46 #define DSLASH_SHARED_FLOATS_PER_THREAD 0
49 #include <dslash_quda.cuh>
54 #include <dslash_events.cuh>
56 using namespace twisted;
58 #ifdef GPU_TWISTED_MASS_DIRAC
59 template <
typename sFloat,
typename gFloat>
60 class TwistedDslashCuda :
public SharedDslashCuda {
63 const gFloat *gauge0, *gauge1;
68 unsigned int sharedBytesPerThread()
const
70 #if (__COMPUTE_CAPABILITY__ >= 200)
72 int reg_size = (
typeid(sFloat)==
typeid(double2) ?
sizeof(double) :
sizeof(
float));
78 int reg_size = (
typeid(sFloat)==
typeid(double2) ?
sizeof(double) :
sizeof(
float));
84 TwistedDslashCuda(cudaColorSpinorField *
out,
const gFloat *gauge0,
const gFloat *gauge1,
87 const double epsilon,
const double k,
const int dagger)
88 : SharedDslashCuda(out, in, x, reconstruct, dagger), gauge0(gauge0), gauge1(gauge1), dslashType(dslashType)
90 bindSpinorTex<sFloat>(
in,
out,
x);
96 dslashParam.fl_stride = in->VolumeCB();
98 virtual ~TwistedDslashCuda() { unbindSpinorTex<sFloat>(
in,
out,
x); }
100 TuneKey tuneKey()
const
102 TuneKey key = DslashCuda::tuneKey();
105 strcat(key.aux,
",TwistInvDslash");
111 strcat(key.aux,
",DslashTwist");
117 void apply(
const cudaStream_t &
stream)
119 #ifdef SHARED_WILSON_DSLASH
121 errorQuda(
"Shared dslash does not yet support X-dimension partitioning");
127 DSLASH(twistedMassTwistInvDslash, tp.grid, tp.block, tp.shared_bytes, stream, dslashParam,
128 (sFloat*)out->V(), (
float*)out->Norm(), gauge0, gauge1,
129 (sFloat*)in->V(), (
float*)in->Norm(), a, b, (sFloat*)(x ? x->V() : 0), (
float*)(x ? x->Norm() : 0));
132 DSLASH(twistedMassDslash, tp.grid, tp.block, tp.shared_bytes, stream, dslashParam,
133 (sFloat*)out->V(), (
float*)out->Norm(), gauge0, gauge1,
134 (sFloat*)in->V(), (
float*)in->Norm(), a, b, (sFloat*)(x ? x->V() : 0), (
float*)(x ? x->Norm() : 0));
137 DSLASH(twistedMassDslashTwist, tp.grid, tp.block, tp.shared_bytes, stream, dslashParam,
138 (sFloat*)out->V(), (
float*)out->Norm(), gauge0, gauge1,
139 (sFloat*)in->V(), (
float*)in->Norm(), a, b, (sFloat*)x->V(), (
float*)x->Norm());
141 default:
errorQuda(
"Invalid twisted mass dslash type");
145 long long flops()
const {
return (x ? 1416ll : 1392ll) * in->VolumeCB(); }
147 #endif // GPU_TWISTED_MASS_DIRAC
149 #include <dslash_policy.cuh>
154 const double &kappa,
const double &mu,
const double &epsilon,
155 const double &k,
const int *commOverride,
TimeProfile &profile,
159 #ifdef GPU_TWISTED_MASS_DIRAC
162 int ghost_threads[4] = {0};
163 int bulk_threads = in->
Volume();
165 for(
int i=0;i<4;i++){
181 void *gauge0, *gauge1;
185 errorQuda(
"Mixing gauge and spinor precision not supported");
187 DslashCuda *dslash = 0;
188 size_t regSize =
sizeof(float);
191 #if (__COMPUTE_CAPABILITY__ >= 130)
192 dslash =
new TwistedDslashCuda<double2,double2>(
out, (double2*)gauge0,(double2*)gauge1, gauge.
Reconstruct(),
in,
x, type,
kappa,
mu, epsilon, k,
dagger);
193 regSize =
sizeof(double);
195 errorQuda(
"Double precision not supported on this GPU");
198 dslash =
new TwistedDslashCuda<float4,float4>(
out, (float4*)gauge0,(float4*)gauge1, gauge.
Reconstruct(),
in,
x, type,
kappa,
mu, epsilon, k,
dagger);
201 dslash =
new TwistedDslashCuda<short4,short4>(
out, (short4*)gauge0,(short4*)gauge1, gauge.
Reconstruct(),
in,
x, type,
kappa,
mu, epsilon, k,
dagger);
206 DslashPolicyImp* dslashImp = DslashFactory::create(dslashPolicy);
211 (*dslashImp)(*dslash,
const_cast<cudaColorSpinorField*
>(
in), regSize, parity, dagger, bulk_threads, ghost_threads, profile);
227 errorQuda(
"Twisted mass dslash has not been built");
void unbindGaugeTex(const cudaGaugeField &gauge)
int commDimPartitioned(int dir)
QudaVerbosity getVerbosity()
__global__ void const RealA *const const RealA *const const RealA *const const RealB *const const RealB *const int int mu
int GhostNormOffset(const int i) const
QudaPrecision Precision() const
VOLATILE spinorFloat kappa
enum QudaDslashPolicy_s QudaDslashPolicy
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
QudaReconstructType Reconstruct() const
void setTwistPack(bool pack)
QudaFieldOrder FieldOrder() const
enum QudaTwistDslashType_s QudaTwistDslashType
#define DSLASH_SHARED_FLOATS_PER_THREAD
cpuColorSpinorField * out
enum QudaReconstructType_s QudaReconstructType
QudaPrecision Precision() const
void bindGaugeTex(const cudaGaugeField &gauge, const int oddBit, void **gauge0, void **gauge1)
int GhostOffset(const int i) const
void twistedMassDslashCuda(cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const QudaTwistDslashType type, const double &kappa, const double &mu, const double &epsilon, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2)
const int * GhostFace() const