10 #ifdef GPU_WILSON_DIRAC 17 #endif // GPU_WILSON_DIRAC 39 #ifdef GPU_WILSON_DIRAC 45 #ifndef DSLASH_SHARED_FLOATS_PER_THREAD 46 #define DSLASH_SHARED_FLOATS_PER_THREAD 0 56 using namespace wilson;
58 #ifdef GPU_WILSON_DIRAC 59 template <
typename sFloat,
typename gFloat>
63 unsigned int sharedBytesPerThread()
const 66 int reg_size = (
typeid(sFloat)==
typeid(double2) ?
sizeof(
double) :
sizeof(
float));
74 WilsonDslashCuda(cudaColorSpinorField *
out,
const GaugeField &gauge,
const cudaColorSpinorField *
in,
75 const cudaColorSpinorField *
x,
const double a,
const int parity,
const int dagger,
76 const int *commOverride)
83 virtual ~WilsonDslashCuda() {
84 unbindSpinorTex<sFloat>(
in,
out,
x);
87 void apply(
const cudaStream_t &
stream)
89 #ifdef SHARED_WILSON_DSLASH 92 #ifndef USE_TEXTURE_OBJECTS 94 #endif // USE_TEXTURE_OBJECTS 97 dslashParam.block[0] = tp.aux.x; dslashParam.block[1] = tp.aux.y; dslashParam.block[2] = tp.aux.z; dslashParam.block[3] = tp.aux.w;
98 for (
int i=0;
i<4;
i++) dslashParam.grid[
i] = ( (
i==0 ? 2 : 1) *
in->X(
i)) / dslashParam.block[
i];
103 #endif // GPU_WILSON_DIRAC 112 #ifdef GPU_WILSON_DIRAC 124 DslashPolicyTune dslash_policy(*
dslash, const_cast<cudaColorSpinorField*>(
in),
in->Volume(),
in->GhostFace(), profile);
125 dslash_policy.apply(0);
129 errorQuda(
"Wilson dslash has not been built");
130 #endif // GPU_WILSON_DIRAC
#define DSLASH_SHARED_FLOATS_PER_THREAD
void setParam(int kernel, int prec, int threads, int blocks)
QudaVerbosity getVerbosity()
void wilsonDslashCuda(cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int oddBit, const int daggerBit, const cudaColorSpinorField *x, const double &k, const int *commDim, TimeProfile &profile)
TuneParam & tuneLaunch(Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
#define DSLASH(FUNC, gridDim, blockDim, shared, stream, param)
cpuColorSpinorField * out
QudaTune getTuning()
Query whether autotuning is enabled or not. Default is enabled but can be overridden by setting QUDA_...