QUDA
1.0.0
|
#include <tune_quda.h>
Public Member Functions | |
TunableVectorY (unsigned int vector_length_y) | |
bool | advanceBlockDim (TuneParam ¶m) const |
void | initTuneParam (TuneParam ¶m) const |
void | defaultTuneParam (TuneParam ¶m) const |
void | resizeVector (int y) const |
void | resizeStep (int y) const |
![]() | |
Tunable () | |
virtual | ~Tunable () |
virtual TuneKey | tuneKey () const =0 |
virtual void | apply (const cudaStream_t &stream)=0 |
virtual void | preTune () |
virtual void | postTune () |
virtual int | tuningIter () const |
virtual std::string | paramString (const TuneParam ¶m) const |
virtual std::string | perfString (float time) const |
virtual bool | advanceTuneParam (TuneParam ¶m) const |
void | checkLaunchParam (TuneParam ¶m) |
CUresult | jitifyError () const |
CUresult & | jitifyError () |
Protected Member Functions | |
virtual unsigned int | sharedBytesPerThread () const |
virtual unsigned int | sharedBytesPerBlock (const TuneParam ¶m) const |
![]() | |
virtual long long | flops () const =0 |
virtual long long | bytes () const |
virtual unsigned int | minThreads () const |
virtual bool | tuneGridDim () const |
virtual bool | tuneAuxDim () const |
virtual bool | tuneSharedBytes () const |
virtual bool | advanceGridDim (TuneParam ¶m) const |
virtual unsigned int | maxBlockSize (const TuneParam ¶m) const |
virtual unsigned int | maxGridSize () const |
virtual unsigned int | minGridSize () const |
virtual int | gridStep () const |
gridStep sets the step size when iterating the grid size in advanceGridDim. More... | |
virtual int | blockStep () const |
virtual int | blockMin () const |
virtual void | resetBlockDim (TuneParam ¶m) const |
unsigned int | maxBlocksPerSM () const |
For some reason this can't be queried from the device properties, so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability) More... | |
template<typename F > | |
void | setMaxDynamicSharedBytesPerBlock (F *func) const |
Enable the maximum dynamic shared bytes for the kernel "func" (values given by maxDynamicSharedBytesPerBlock()). More... | |
unsigned int | maxDynamicSharedBytesPerBlock () const |
This can't be correctly queried in CUDA for all architectures so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability). More... | |
virtual unsigned int | maxSharedBytesPerBlock () const |
The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock. More... | |
virtual bool | advanceSharedBytes (TuneParam ¶m) const |
virtual bool | advanceAux (TuneParam ¶m) const |
int | writeAuxString (const char *format,...) |
Protected Attributes | |
unsigned int | vector_length_y |
unsigned int | step_y |
bool | tune_block_x |
![]() | |
char | aux [TuneKey::aux_n] |
CUresult | jitify_error |
This derived class is for algorithms that deploy a vector of computations across the y dimension of both the threads block and grid. For example this could be parity in the y dimension and checkerboarded volume in x.
Definition at line 426 of file tune_quda.h.
|
inline |
Definition at line 437 of file tune_quda.h.
|
inlinevirtual |
Reimplemented from quda::Tunable.
Reimplemented in quda::TunableVectorYZ, and quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >.
Definition at line 440 of file tune_quda.h.
References quda::Tunable::advanceBlockDim(), quda::TuneParam::block, deviceProp, and quda::TuneParam::grid.
Referenced by quda::TunableVectorYZ::advanceBlockDim().
|
inlinevirtual |
sets default values for when tuning is disabled
Reimplemented from quda::Tunable.
Reimplemented in quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >, quda::TunableVectorYZ, quda::Pack< Float, nColor, spin_project >, quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >, quda::Dslash5< Float, nColor, Arg >, quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >, and quda::NdegTwistedMassPreconditioned< Float, nDim, nColor, Arg >.
Definition at line 474 of file tune_quda.h.
References quda::TuneParam::block, quda::Tunable::defaultTuneParam(), and quda::TuneParam::grid.
Referenced by quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::defaultTuneParam(), and quda::TunableVectorYZ::defaultTuneParam().
|
inlinevirtual |
Reimplemented from quda::Tunable.
Reimplemented in quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >, quda::TunableVectorYZ, quda::Pack< Float, nColor, spin_project >, quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >, quda::Dslash5< Float, nColor, Arg >, quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >, and quda::NdegTwistedMassPreconditioned< Float, nDim, nColor, Arg >.
Definition at line 466 of file tune_quda.h.
References quda::TuneParam::block, quda::TuneParam::grid, and quda::Tunable::initTuneParam().
Referenced by quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::initTuneParam(), and quda::TunableVectorYZ::initTuneParam().
|
inline |
Definition at line 482 of file tune_quda.h.
Referenced by quda::TunableVectorYZ::resizeStep().
|
inline |
Definition at line 481 of file tune_quda.h.
Referenced by quda::TunableVectorYZ::resizeVector().
|
inlineprotectedvirtual |
Implements quda::Tunable.
Reimplemented in quda::ProjectSU3< Float, G >, quda::CopyColorSpinor< 4, Arg >, quda::ExtractGhost< nDim, Arg >, quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >, quda::CopyColorSpinor< Ns, Arg >, quda::SpinorNoise< real, Ns, Nc, type, Arg >, and quda::CopyGauge< FloatOut, FloatIn, length, Arg >.
Definition at line 430 of file tune_quda.h.
Referenced by quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::sharedBytesPerBlock().
|
inlineprotectedvirtual |
Implements quda::Tunable.
Reimplemented in quda::ProjectSU3< Float, G >, quda::CopyColorSpinor< 4, Arg >, quda::ExtractGhost< nDim, Arg >, quda::CopyColorSpinor< Ns, Arg >, quda::SpinorNoise< real, Ns, Nc, type, Arg >, quda::Dslash5< Float, nColor, Arg >, quda::NdegTwistedMassPreconditioned< Float, nDim, nColor, Arg >, and quda::CopyGauge< FloatOut, FloatIn, length, Arg >.
Definition at line 429 of file tune_quda.h.
|
mutableprotected |
Definition at line 433 of file tune_quda.h.
|
protected |
Definition at line 434 of file tune_quda.h.
Referenced by quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::setComputeType().
|
mutableprotected |
Definition at line 432 of file tune_quda.h.