QUDA  v1.1.0
A library for QCD on GPUs
Public Member Functions | Protected Member Functions | Protected Attributes | List of all members
quda::Tunable Class Referenceabstract

#include <tune_quda.h>

+ Inheritance diagram for quda::Tunable:

Public Member Functions

 Tunable ()
 
virtual ~Tunable ()
 
virtual TuneKey tuneKey () const =0
 
virtual void apply (const qudaStream_t &stream)=0
 
virtual void preTune ()
 
virtual void postTune ()
 
virtual int tuningIter () const
 
virtual std::string paramString (const TuneParam &param) const
 
virtual std::string perfString (float time) const
 
virtual void initTuneParam (TuneParam &param) const
 
virtual void defaultTuneParam (TuneParam &param) const
 
virtual bool advanceTuneParam (TuneParam &param) const
 
void checkLaunchParam (TuneParam &param)
 
CUresult jitifyError () const
 
CUresult & jitifyError ()
 

Protected Member Functions

virtual long long flops () const =0
 
virtual long long bytes () const
 
virtual unsigned int sharedBytesPerThread () const =0
 
virtual unsigned int sharedBytesPerBlock (const TuneParam &param) const =0
 
virtual unsigned int minThreads () const
 
virtual bool tuneGridDim () const
 
virtual bool tuneAuxDim () const
 
virtual bool tuneSharedBytes () const
 
virtual bool advanceGridDim (TuneParam &param) const
 
virtual unsigned int maxBlockSize (const TuneParam &param) const
 
virtual unsigned int maxGridSize () const
 
virtual unsigned int minGridSize () const
 
virtual int gridStep () const
 gridStep sets the step size when iterating the grid size in advanceGridDim. More...
 
virtual int blockStep () const
 
virtual int blockMin () const
 
virtual void resetBlockDim (TuneParam &param) const
 
virtual bool advanceBlockDim (TuneParam &param) const
 
unsigned int maxBlocksPerSM () const
 Returns the maximum number of simultaneously resident blocks per SM. We can directly query this of CUDA 11, but previously this needed to be hand coded. More...
 
unsigned int maxDynamicSharedBytesPerBlock () const
 Returns the maximum dynamic shared memory per block. More...
 
virtual unsigned int maxSharedBytesPerBlock () const
 The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock. More...
 
virtual bool advanceSharedBytes (TuneParam &param) const
 
virtual bool advanceAux (TuneParam &param) const
 
int writeAuxString (const char *format,...)
 
bool tuned ()
 Whether the present instance has already been tuned or not. More...
 

Protected Attributes

char aux [TuneKey::aux_n]
 
CUresult jitify_error
 

Detailed Description

Definition at line 95 of file tune_quda.h.

Constructor & Destructor Documentation

◆ Tunable()

quda::Tunable::Tunable ( )
inline

Definition at line 305 of file tune_quda.h.

◆ ~Tunable()

virtual quda::Tunable::~Tunable ( )
inlinevirtual

Definition at line 306 of file tune_quda.h.

Member Function Documentation

◆ advanceAux()

virtual bool quda::Tunable::advanceAux ( TuneParam param) const
inlineprotectedvirtual

Reimplemented in quda::Dslash< D, Arg >.

Definition at line 267 of file tune_quda.h.

◆ advanceBlockDim()

virtual bool quda::Tunable::advanceBlockDim ( TuneParam param) const
inlineprotectedvirtual

◆ advanceGridDim()

virtual bool quda::Tunable::advanceGridDim ( TuneParam param) const
inlineprotectedvirtual

Definition at line 113 of file tune_quda.h.

◆ advanceSharedBytes()

virtual bool quda::Tunable::advanceSharedBytes ( TuneParam param) const
inlineprotectedvirtual

The goal here is to throttle the number of thread blocks per SM by over-allocating shared memory (in order to improve L2 utilization, etc.). We thus request the smallest amount of dynamic shared memory that guarantees throttling to a given number of blocks, in order to allow some extra leeway.

Definition at line 242 of file tune_quda.h.

◆ advanceTuneParam()

virtual bool quda::Tunable::advanceTuneParam ( TuneParam param) const
inlinevirtual

Reimplemented in quda::QudaMem, and quda::Dslash< D, Arg >.

Definition at line 363 of file tune_quda.h.

◆ apply()

virtual void quda::Tunable::apply ( const qudaStream_t stream)
pure virtual

◆ blockMin()

virtual int quda::Tunable::blockMin ( ) const
inlineprotectedvirtual

Reimplemented in quda::Dslash< D, Arg >.

Definition at line 141 of file tune_quda.h.

◆ blockStep()

virtual int quda::Tunable::blockStep ( ) const
inlineprotectedvirtual

Reimplemented in quda::Dslash< D, Arg >.

Definition at line 140 of file tune_quda.h.

◆ bytes()

virtual long long quda::Tunable::bytes ( ) const
inlineprotectedvirtual

◆ checkLaunchParam()

void quda::Tunable::checkLaunchParam ( TuneParam param)
inline

Check the launch parameters of the kernel to ensure that they are valid for the current device.

Definition at line 372 of file tune_quda.h.

◆ defaultTuneParam()

virtual void quda::Tunable::defaultTuneParam ( TuneParam param) const
inlinevirtual

sets default values for when tuning is disabled

Reimplemented in quda::TunableVectorYZ, quda::TunableVectorY, quda::TunableLocalParityReduction, and quda::Dslash< D, Arg >.

Definition at line 357 of file tune_quda.h.

◆ flops()

virtual long long quda::Tunable::flops ( ) const
protectedpure virtual

◆ gridStep()

virtual int quda::Tunable::gridStep ( ) const
inlineprotectedvirtual

gridStep sets the step size when iterating the grid size in advanceGridDim.

Returns
Grid step size

Reimplemented in quda::TunableLocalParityReduction, and quda::Dslash< D, Arg >.

Definition at line 138 of file tune_quda.h.

◆ initTuneParam()

virtual void quda::Tunable::initTuneParam ( TuneParam param) const
inlinevirtual

◆ jitifyError() [1/2]

CUresult& quda::Tunable::jitifyError ( )
inline

Definition at line 404 of file tune_quda.h.

◆ jitifyError() [2/2]

CUresult quda::Tunable::jitifyError ( ) const
inline

Definition at line 403 of file tune_quda.h.

◆ maxBlockSize()

virtual unsigned int quda::Tunable::maxBlockSize ( const TuneParam param) const
inlineprotectedvirtual

Reimplemented in quda::TunableLocalParityReduction.

Definition at line 129 of file tune_quda.h.

◆ maxBlocksPerSM()

unsigned int quda::Tunable::maxBlocksPerSM ( ) const
inlineprotected

Returns the maximum number of simultaneously resident blocks per SM. We can directly query this of CUDA 11, but previously this needed to be hand coded.

Returns
The maximum number of simultaneously resident blocks per SM

Definition at line 186 of file tune_quda.h.

◆ maxDynamicSharedBytesPerBlock()

unsigned int quda::Tunable::maxDynamicSharedBytesPerBlock ( ) const
inlineprotected

Returns the maximum dynamic shared memory per block.

Returns
The maximum dynamic shared memory to CUDA thread block

Definition at line 220 of file tune_quda.h.

◆ maxGridSize()

virtual unsigned int quda::Tunable::maxGridSize ( ) const
inlineprotectedvirtual

Definition at line 130 of file tune_quda.h.

◆ maxSharedBytesPerBlock()

virtual unsigned int quda::Tunable::maxSharedBytesPerBlock ( ) const
inlineprotectedvirtual

The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock.

Returns
The maximum shared bytes limit per block the autotung will utilize.

Reimplemented in quda::Dslash< D, Arg >.

Definition at line 233 of file tune_quda.h.

◆ minGridSize()

virtual unsigned int quda::Tunable::minGridSize ( ) const
inlineprotectedvirtual

Reimplemented in quda::TunableLocalParityReduction, and quda::Dslash< D, Arg >.

Definition at line 131 of file tune_quda.h.

◆ minThreads()

virtual unsigned int quda::Tunable::minThreads ( ) const
inlineprotectedvirtual

Reimplemented in quda::Dslash< D, Arg >.

Definition at line 108 of file tune_quda.h.

◆ paramString()

virtual std::string quda::Tunable::paramString ( const TuneParam param) const
inlinevirtual

Definition at line 314 of file tune_quda.h.

◆ perfString()

virtual std::string quda::Tunable::perfString ( float  time) const
inlinevirtual

Definition at line 321 of file tune_quda.h.

◆ postTune()

virtual void quda::Tunable::postTune ( )
inlinevirtual

Reimplemented in quda::Dslash< D, Arg >.

Definition at line 310 of file tune_quda.h.

◆ preTune()

virtual void quda::Tunable::preTune ( )
inlinevirtual

Reimplemented in quda::Dslash< D, Arg >.

Definition at line 309 of file tune_quda.h.

◆ resetBlockDim()

virtual void quda::Tunable::resetBlockDim ( TuneParam param) const
inlineprotectedvirtual

Definition at line 143 of file tune_quda.h.

◆ sharedBytesPerBlock()

virtual unsigned int quda::Tunable::sharedBytesPerBlock ( const TuneParam param) const
protectedpure virtual

◆ sharedBytesPerThread()

virtual unsigned int quda::Tunable::sharedBytesPerThread ( ) const
protectedpure virtual

◆ tuneAuxDim()

virtual bool quda::Tunable::tuneAuxDim ( ) const
inlineprotectedvirtual

Definition at line 110 of file tune_quda.h.

◆ tuned()

bool quda::Tunable::tuned ( )
inlineprotected

Whether the present instance has already been tuned or not.

Returns
True if tuned, false if not

Definition at line 289 of file tune_quda.h.

◆ tuneGridDim()

virtual bool quda::Tunable::tuneGridDim ( ) const
inlineprotectedvirtual

Reimplemented in quda::TunableLocalParityReduction, and quda::Dslash< D, Arg >.

Definition at line 109 of file tune_quda.h.

◆ tuneKey()

virtual TuneKey quda::Tunable::tuneKey ( ) const
pure virtual

◆ tuneSharedBytes()

virtual bool quda::Tunable::tuneSharedBytes ( ) const
inlineprotectedvirtual

Definition at line 111 of file tune_quda.h.

◆ tuningIter()

virtual int quda::Tunable::tuningIter ( ) const
inlinevirtual

Reimplemented in quda::Dslash< D, Arg >.

Definition at line 311 of file tune_quda.h.

◆ writeAuxString()

int quda::Tunable::writeAuxString ( const char *  format,
  ... 
)
inlineprotected

Definition at line 271 of file tune_quda.h.

Member Data Documentation

◆ aux

char quda::Tunable::aux[TuneKey::aux_n]
protected

Definition at line 269 of file tune_quda.h.

◆ jitify_error

CUresult quda::Tunable::jitify_error
protected

This is the return result from kernels launched using jitify

Definition at line 283 of file tune_quda.h.


The documentation for this class was generated from the following file: