QUDA
1.0.0
|
Public Member Functions | |
Dslash5 (Arg &arg, const ColorSpinorField &meta) | |
virtual | ~Dslash5 () |
template<typename T > | |
void | launch (T *f, const TuneParam &tp, Arg &arg, const cudaStream_t &stream) |
void | apply (const cudaStream_t &stream) |
void | initTuneParam (TuneParam ¶m) const |
void | defaultTuneParam (TuneParam ¶m) const |
TuneKey | tuneKey () const |
![]() | |
TunableVectorYZ (unsigned int vector_length_y, unsigned int vector_length_z) | |
bool | advanceBlockDim (TuneParam ¶m) const |
void | resizeVector (int y, int z) const |
void | resizeStep (int y, int z) const |
![]() | |
TunableVectorY (unsigned int vector_length_y) | |
void | resizeVector (int y) const |
void | resizeStep (int y) const |
![]() | |
Tunable () | |
virtual | ~Tunable () |
virtual void | preTune () |
virtual void | postTune () |
virtual int | tuningIter () const |
virtual std::string | paramString (const TuneParam ¶m) const |
virtual std::string | perfString (float time) const |
virtual bool | advanceTuneParam (TuneParam ¶m) const |
void | checkLaunchParam (TuneParam ¶m) |
CUresult | jitifyError () const |
CUresult & | jitifyError () |
Protected Member Functions | |
long long | flops () const |
long long | bytes () const |
bool | tuneGridDim () const |
unsigned int | minThreads () const |
int | blockStep () const |
int | blockMin () const |
unsigned int | sharedBytesPerThread () const |
unsigned int | maxSharedBytesPerBlock () const |
The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock. More... | |
![]() | |
virtual unsigned int | sharedBytesPerBlock (const TuneParam ¶m) const |
![]() | |
virtual bool | tuneAuxDim () const |
virtual bool | tuneSharedBytes () const |
virtual bool | advanceGridDim (TuneParam ¶m) const |
virtual unsigned int | maxBlockSize (const TuneParam ¶m) const |
virtual unsigned int | maxGridSize () const |
virtual unsigned int | minGridSize () const |
virtual int | gridStep () const |
gridStep sets the step size when iterating the grid size in advanceGridDim. More... | |
virtual void | resetBlockDim (TuneParam ¶m) const |
unsigned int | maxBlocksPerSM () const |
For some reason this can't be queried from the device properties, so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability) More... | |
template<typename F > | |
void | setMaxDynamicSharedBytesPerBlock (F *func) const |
Enable the maximum dynamic shared bytes for the kernel "func" (values given by maxDynamicSharedBytesPerBlock()). More... | |
unsigned int | maxDynamicSharedBytesPerBlock () const |
This can't be correctly queried in CUDA for all architectures so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability). More... | |
virtual bool | advanceSharedBytes (TuneParam ¶m) const |
virtual bool | advanceAux (TuneParam ¶m) const |
int | writeAuxString (const char *format,...) |
Protected Attributes | |
Arg & | arg |
const ColorSpinorField & | meta |
![]() | |
unsigned int | vector_length_y |
unsigned int | step_y |
bool | tune_block_x |
![]() | |
char | aux [TuneKey::aux_n] |
CUresult | jitify_error |
Static Protected Attributes | |
static constexpr bool | shared = true |
static constexpr bool | var_inverse = true |
Definition at line 20 of file dslash5_domain_wall.cu.
|
inline |
Definition at line 103 of file dslash5_domain_wall.cu.
References quda::Tunable::aux, quda::LatticeField::AuxString(), quda::DSLASH5_DWF, quda::DSLASH5_MOBIUS, quda::DSLASH5_MOBIUS_PRE, errorQuda, quda::M5_INV_DWF, quda::M5_INV_MOBIUS, and quda::M5_INV_ZMOBIUS.
|
inlinevirtual |
Definition at line 118 of file dslash5_domain_wall.cu.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 130 of file dslash5_domain_wall.cu.
References quda::DSLASH5_DWF, quda::DSLASH5_MOBIUS, quda::DSLASH5_MOBIUS_PRE, errorQuda, getTuning(), getVerbosity(), quda::Dslash5< Float, nColor, Arg >::launch(), quda::LatticeField::Location(), quda::M5_INV_DWF, quda::M5_INV_MOBIUS, quda::M5_INV_ZMOBIUS, QUDA_CPU_FIELD_LOCATION, and quda::tuneLaunch().
Referenced by quda::ApplyDslash5().
|
inlineprotectedvirtual |
Reimplemented from quda::Tunable.
Definition at line 80 of file dslash5_domain_wall.cu.
|
inlineprotectedvirtual |
Reimplemented from quda::Tunable.
Definition at line 79 of file dslash5_domain_wall.cu.
|
inlineprotectedvirtual |
Reimplemented from quda::Tunable.
Definition at line 62 of file dslash5_domain_wall.cu.
References quda::DSLASH5_DWF, quda::DSLASH5_MOBIUS, quda::DSLASH5_MOBIUS_PRE, errorQuda, Ls, quda::M5_INV_DWF, quda::M5_INV_MOBIUS, quda::M5_INV_ZMOBIUS, and quda::ColorSpinorField::X().
|
inlinevirtual |
sets default values for when tuning is disabled
Reimplemented from quda::TunableVectorYZ.
Definition at line 202 of file dslash5_domain_wall.cu.
References quda::TuneParam::block, quda::TunableVectorYZ::defaultTuneParam(), quda::TuneParam::grid, quda::M5_INV_DWF, quda::M5_INV_MOBIUS, quda::M5_INV_ZMOBIUS, quda::TuneParam::shared_bytes, and quda::Dslash5< Float, nColor, Arg >::sharedBytesPerThread().
|
inlineprotectedvirtual |
Implements quda::Tunable.
Definition at line 31 of file dslash5_domain_wall.cu.
References quda::DSLASH5_DWF, quda::DSLASH5_MOBIUS, quda::DSLASH5_MOBIUS_PRE, errorQuda, Ls, quda::M5_INV_DWF, quda::M5_INV_MOBIUS, quda::M5_INV_ZMOBIUS, quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), quda::ColorSpinorField::Volume(), and quda::ColorSpinorField::X().
|
inlinevirtual |
Reimplemented from quda::TunableVectorYZ.
Definition at line 192 of file dslash5_domain_wall.cu.
References quda::TuneParam::block, quda::TuneParam::grid, quda::TunableVectorYZ::initTuneParam(), quda::M5_INV_DWF, quda::M5_INV_MOBIUS, quda::M5_INV_ZMOBIUS, quda::TuneParam::shared_bytes, and quda::Dslash5< Float, nColor, Arg >::sharedBytesPerThread().
|
inline |
Definition at line 120 of file dslash5_domain_wall.cu.
References quda::TuneParam::block, quda::TuneParam::grid, quda::M5_INV_DWF, quda::M5_INV_MOBIUS, quda::M5_INV_ZMOBIUS, quda::qudaLaunchKernel(), quda::Tunable::setMaxDynamicSharedBytesPerBlock(), and quda::TuneParam::shared_bytes.
Referenced by quda::Dslash5< Float, nColor, Arg >::apply().
|
inlineprotectedvirtual |
The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock.
Reimplemented from quda::Tunable.
Definition at line 93 of file dslash5_domain_wall.cu.
References quda::M5_INV_DWF, quda::M5_INV_MOBIUS, quda::M5_INV_ZMOBIUS, quda::Tunable::maxDynamicSharedBytesPerBlock(), and quda::Tunable::maxSharedBytesPerBlock().
|
inlineprotectedvirtual |
Reimplemented from quda::Tunable.
Definition at line 78 of file dslash5_domain_wall.cu.
|
inlineprotectedvirtual |
Reimplemented from quda::TunableVectorY.
Definition at line 81 of file dslash5_domain_wall.cu.
References quda::M5_INV_DWF, quda::M5_INV_MOBIUS, quda::M5_INV_ZMOBIUS, nColor, and quda::ColorSpinorField::Nspin().
Referenced by quda::Dslash5< Float, nColor, Arg >::defaultTuneParam(), and quda::Dslash5< Float, nColor, Arg >::initTuneParam().
|
inlineprotectedvirtual |
Reimplemented from quda::Tunable.
Definition at line 77 of file dslash5_domain_wall.cu.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 212 of file dslash5_domain_wall.cu.
References quda::Tunable::aux, and quda::LatticeField::VolString().
|
protected |
Definition at line 24 of file dslash5_domain_wall.cu.
Referenced by quda::ApplyDslash5().
|
protected |
Definition at line 25 of file dslash5_domain_wall.cu.
|
staticprotected |
Definition at line 26 of file dslash5_domain_wall.cu.
|
staticprotected |
Whether to use variable or fixed coefficient algorithm. Must be true if using ZMOBIUS
Definition at line 29 of file dslash5_domain_wall.cu.