QUDA
1.0.0
|
Public Member Functions | |
DslashPolicyTune (Dslash &dslash, cudaColorSpinorField *in, const int volume, const int *ghostFace, TimeProfile &profile) | |
virtual | ~DslashPolicyTune () |
void | apply (const cudaStream_t &stream) |
int | tuningIter () const |
bool | advanceAux (TuneParam ¶m) const |
bool | advanceTuneParam (TuneParam ¶m) const |
void | initTuneParam (TuneParam ¶m) const |
void | defaultTuneParam (TuneParam ¶m) const |
TuneKey | tuneKey () const |
long long | flops () const |
long long | bytes () const |
void | preTune () |
void | postTune () |
![]() | |
Tunable () | |
virtual | ~Tunable () |
virtual std::string | paramString (const TuneParam ¶m) const |
virtual std::string | perfString (float time) const |
void | checkLaunchParam (TuneParam ¶m) |
CUresult | jitifyError () const |
CUresult & | jitifyError () |
Private Member Functions | |
bool | tuneGridDim () const |
bool | tuneAuxDim () const |
unsigned int | sharedBytesPerThread () const |
unsigned int | sharedBytesPerBlock (const TuneParam ¶m) const |
Private Attributes | |
Dslash & | dslash |
decltype(dslash.dslashParam) & | dslashParam |
cudaColorSpinorField * | in |
const int | volume |
const int * | ghostFace |
TimeProfile & | profile |
Additional Inherited Members | |
![]() | |
virtual unsigned int | minThreads () const |
virtual bool | tuneSharedBytes () const |
virtual bool | advanceGridDim (TuneParam ¶m) const |
virtual unsigned int | maxBlockSize (const TuneParam ¶m) const |
virtual unsigned int | maxGridSize () const |
virtual unsigned int | minGridSize () const |
virtual int | gridStep () const |
gridStep sets the step size when iterating the grid size in advanceGridDim. More... | |
virtual int | blockStep () const |
virtual int | blockMin () const |
virtual void | resetBlockDim (TuneParam ¶m) const |
virtual bool | advanceBlockDim (TuneParam ¶m) const |
unsigned int | maxBlocksPerSM () const |
For some reason this can't be queried from the device properties, so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability) More... | |
template<typename F > | |
void | setMaxDynamicSharedBytesPerBlock (F *func) const |
Enable the maximum dynamic shared bytes for the kernel "func" (values given by maxDynamicSharedBytesPerBlock()). More... | |
unsigned int | maxDynamicSharedBytesPerBlock () const |
This can't be correctly queried in CUDA for all architectures so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability). More... | |
virtual unsigned int | maxSharedBytesPerBlock () const |
The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock. More... | |
virtual bool | advanceSharedBytes (TuneParam ¶m) const |
int | writeAuxString (const char *format,...) |
![]() | |
char | aux [TuneKey::aux_n] |
CUresult | jitify_error |
Definition at line 1770 of file dslash_policy.cuh.
|
inline |
Definition at line 1786 of file dslash_policy.cuh.
References comm_enable_peer2peer(), comm_gdr_enabled(), comm_gpuid(), comm_peer2peer_enabled_global(), quda::dslash::DslashFactory< Dslash >::create(), device, quda::disableProfileCount(), quda::dslash::enable_policy(), quda::enableProfileCount(), quda::blas::end(), errorQuda, quda::dslash::first_active_policy, quda::getKernelPackT(), quda::getTuneCache(), getTuning(), getVerbosity(), in, quda::popKernelPackT(), quda::pushKernelPackT(), quda::dslash::QUDA_DSLASH, quda::dslash::QUDA_DSLASH_ASYNC, quda::dslash::QUDA_DSLASH_POLICY_DISABLED, quda::dslash::QUDA_FUSED_DSLASH, quda::dslash::QUDA_FUSED_DSLASH_ASYNC, quda::dslash::QUDA_FUSED_GDR_DSLASH, quda::dslash::QUDA_FUSED_GDR_RECV_DSLASH, quda::dslash::QUDA_FUSED_ZERO_COPY_DSLASH, quda::dslash::QUDA_FUSED_ZERO_COPY_PACK_DSLASH, quda::dslash::QUDA_FUSED_ZERO_COPY_PACK_GDR_RECV_DSLASH, quda::dslash::QUDA_GDR_DSLASH, quda::dslash::QUDA_GDR_RECV_DSLASH, quda::dslash::QUDA_P2P_COPY_ENGINE, quda::dslash::QUDA_P2P_DEFAULT, quda::dslash::QUDA_P2P_POLICY_DISABLED, quda::dslash::QUDA_P2P_REMOTE_WRITE, QUDA_SILENT, quda::dslash::QUDA_ZERO_COPY_DSLASH, quda::dslash::QUDA_ZERO_COPY_PACK_DSLASH, quda::dslash::QUDA_ZERO_COPY_PACK_GDR_RECV_DSLASH, quda::setKernelPackT(), quda::setPolicyTuning(), quda::cudaColorSpinorField::streamInit(), streams, and warningQuda.
|
inlinevirtual |
Definition at line 2009 of file dslash_policy.cuh.
References quda::setPolicyTuning().
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 2051 of file dslash_policy.cuh.
References quda::TuneParam::aux, quda::dslash::first_active_p2p_policy, quda::dslash::first_active_policy, quda::dslash::QUDA_DSLASH_POLICY_DISABLED, and quda::dslash::QUDA_P2P_POLICY_DISABLED.
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 2068 of file dslash_policy.cuh.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 2011 of file dslash_policy.cuh.
References quda::TuneParam::aux, comm_enable_peer2peer(), comm_peer2peer_enabled_global(), quda::dslash::DslashFactory< Dslash >::create(), errorQuda, quda::getKernelPackT(), getTuning(), getVerbosity(), in, quda::popKernelPackT(), quda::pushKernelPackT(), quda::dslash::QUDA_DSLASH_POLICY_DISABLED, quda::dslash::QUDA_FUSED_GDR_DSLASH, quda::dslash::QUDA_FUSED_ZERO_COPY_DSLASH, quda::dslash::QUDA_FUSED_ZERO_COPY_PACK_DSLASH, quda::dslash::QUDA_FUSED_ZERO_COPY_PACK_GDR_RECV_DSLASH, quda::dslash::QUDA_GDR_DSLASH, quda::dslash::QUDA_P2P_DEFAULT, quda::dslash::QUDA_P2P_REMOTE_WRITE, quda::dslash::QUDA_ZERO_COPY_DSLASH, quda::dslash::QUDA_ZERO_COPY_PACK_DSLASH, quda::dslash::QUDA_ZERO_COPY_PACK_GDR_RECV_DSLASH, quda::setKernelPackT(), and quda::tuneLaunch().
Referenced by quda::DomainWall4DApply< Float, nColor, recon >::DomainWall4DApply(), quda::DomainWall5DApply< Float, nColor, recon >::DomainWall5DApply(), quda::ImprovedStaggeredApply< Float, nColor, recon_l >::ImprovedStaggeredApply(), quda::LaplaceApply< Float, nColor, recon >::LaplaceApply(), quda::NdegTwistedMassApply< Float, nColor, recon >::NdegTwistedMassApply(), quda::NdegTwistedMassPreconditionedApply< Float, nColor, recon >::NdegTwistedMassPreconditionedApply(), quda::StaggeredApply< Float, nColor, recon_u >::StaggeredApply(), quda::TwistedCloverApply< Float, nColor, recon >::TwistedCloverApply(), quda::TwistedCloverPreconditionedApply< Float, nColor, recon >::TwistedCloverPreconditionedApply(), quda::TwistedMassApply< Float, nColor, recon >::TwistedMassApply(), quda::TwistedMassPreconditionedApply< Float, nColor, recon >::TwistedMassPreconditionedApply(), quda::WilsonApply< Float, nColor, recon >::WilsonApply(), quda::WilsonCloverApply< Float, nColor, recon >::WilsonCloverApply(), and quda::WilsonCloverPreconditionedApply< Float, nColor, recon >::WilsonCloverPreconditionedApply().
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 2103 of file dslash_policy.cuh.
References quda::Dslash< Float >::bytes(), and quda::KERNEL_POLICY.
|
inlinevirtual |
sets default values for when tuning is disabled
Reimplemented from quda::Tunable.
Definition at line 2077 of file dslash_policy.cuh.
References quda::TuneParam::aux, quda::Tunable::defaultTuneParam(), quda::dslash::first_active_p2p_policy, and quda::dslash::first_active_policy.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 2095 of file dslash_policy.cuh.
References quda::Dslash< Float >::flops(), and quda::KERNEL_POLICY.
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 2070 of file dslash_policy.cuh.
References quda::TuneParam::aux, quda::dslash::first_active_p2p_policy, quda::dslash::first_active_policy, and quda::Tunable::initTuneParam().
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 2113 of file dslash_policy.cuh.
References quda::Dslash< Float >::postTune().
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 2111 of file dslash_policy.cuh.
References quda::Dslash< Float >::preTune().
|
inlineprivatevirtual |
Implements quda::Tunable.
Definition at line 1783 of file dslash_policy.cuh.
|
inlineprivatevirtual |
Implements quda::Tunable.
Definition at line 1782 of file dslash_policy.cuh.
|
inlineprivatevirtual |
Reimplemented from quda::Tunable.
Definition at line 1781 of file dslash_policy.cuh.
|
inlineprivatevirtual |
Reimplemented from quda::Tunable.
Definition at line 1780 of file dslash_policy.cuh.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 2084 of file dslash_policy.cuh.
References quda::TuneKey::aux, comm_config_string(), comm_dim_topology_string(), quda::KERNEL_POLICY, and quda::Tunable::tuneKey().
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 2048 of file dslash_policy.cuh.
|
private |
Definition at line 1773 of file dslash_policy.cuh.
|
private |
Definition at line 1774 of file dslash_policy.cuh.
|
private |
Definition at line 1777 of file dslash_policy.cuh.
|
private |
Definition at line 1775 of file dslash_policy.cuh.
|
private |
Definition at line 1778 of file dslash_policy.cuh.
|
private |
Definition at line 1776 of file dslash_policy.cuh.