#include <tune_quda.h>
Definition at line 95 of file tune_quda.h.
◆ Tunable()
quda::Tunable::Tunable |
( |
| ) |
|
|
inline |
◆ ~Tunable()
virtual quda::Tunable::~Tunable |
( |
| ) |
|
|
inlinevirtual |
◆ advanceAux()
virtual bool quda::Tunable::advanceAux |
( |
TuneParam & |
param | ) |
const |
|
inlineprotectedvirtual |
◆ advanceBlockDim()
virtual bool quda::Tunable::advanceBlockDim |
( |
TuneParam & |
param | ) |
const |
|
inlineprotectedvirtual |
◆ advanceGridDim()
virtual bool quda::Tunable::advanceGridDim |
( |
TuneParam & |
param | ) |
const |
|
inlineprotectedvirtual |
◆ advanceSharedBytes()
virtual bool quda::Tunable::advanceSharedBytes |
( |
TuneParam & |
param | ) |
const |
|
inlineprotectedvirtual |
The goal here is to throttle the number of thread blocks per SM by over-allocating shared memory (in order to improve L2 utilization, etc.). We thus request the smallest amount of dynamic shared memory that guarantees throttling to a given number of blocks, in order to allow some extra leeway.
Definition at line 242 of file tune_quda.h.
◆ advanceTuneParam()
virtual bool quda::Tunable::advanceTuneParam |
( |
TuneParam & |
param | ) |
const |
|
inlinevirtual |
◆ apply()
virtual void quda::Tunable::apply |
( |
const qudaStream_t & |
stream | ) |
|
|
pure virtual |
◆ blockMin()
virtual int quda::Tunable::blockMin |
( |
| ) |
const |
|
inlineprotectedvirtual |
◆ blockStep()
virtual int quda::Tunable::blockStep |
( |
| ) |
const |
|
inlineprotectedvirtual |
◆ bytes()
virtual long long quda::Tunable::bytes |
( |
| ) |
const |
|
inlineprotectedvirtual |
◆ checkLaunchParam()
void quda::Tunable::checkLaunchParam |
( |
TuneParam & |
param | ) |
|
|
inline |
Check the launch parameters of the kernel to ensure that they are valid for the current device.
Definition at line 372 of file tune_quda.h.
◆ defaultTuneParam()
virtual void quda::Tunable::defaultTuneParam |
( |
TuneParam & |
param | ) |
const |
|
inlinevirtual |
◆ flops()
virtual long long quda::Tunable::flops |
( |
| ) |
const |
|
protectedpure virtual |
◆ gridStep()
virtual int quda::Tunable::gridStep |
( |
| ) |
const |
|
inlineprotectedvirtual |
◆ initTuneParam()
virtual void quda::Tunable::initTuneParam |
( |
TuneParam & |
param | ) |
const |
|
inlinevirtual |
◆ jitifyError() [1/2]
CUresult& quda::Tunable::jitifyError |
( |
| ) |
|
|
inline |
◆ jitifyError() [2/2]
CUresult quda::Tunable::jitifyError |
( |
| ) |
const |
|
inline |
◆ maxBlockSize()
virtual unsigned int quda::Tunable::maxBlockSize |
( |
const TuneParam & |
param | ) |
const |
|
inlineprotectedvirtual |
◆ maxBlocksPerSM()
unsigned int quda::Tunable::maxBlocksPerSM |
( |
| ) |
const |
|
inlineprotected |
Returns the maximum number of simultaneously resident blocks per SM. We can directly query this of CUDA 11, but previously this needed to be hand coded.
- Returns
- The maximum number of simultaneously resident blocks per SM
Definition at line 186 of file tune_quda.h.
◆ maxDynamicSharedBytesPerBlock()
unsigned int quda::Tunable::maxDynamicSharedBytesPerBlock |
( |
| ) |
const |
|
inlineprotected |
Returns the maximum dynamic shared memory per block.
- Returns
- The maximum dynamic shared memory to CUDA thread block
Definition at line 220 of file tune_quda.h.
◆ maxGridSize()
virtual unsigned int quda::Tunable::maxGridSize |
( |
| ) |
const |
|
inlineprotectedvirtual |
◆ maxSharedBytesPerBlock()
virtual unsigned int quda::Tunable::maxSharedBytesPerBlock |
( |
| ) |
const |
|
inlineprotectedvirtual |
The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock.
- Returns
- The maximum shared bytes limit per block the autotung will utilize.
Reimplemented in quda::Dslash< D, Arg >.
Definition at line 233 of file tune_quda.h.
◆ minGridSize()
virtual unsigned int quda::Tunable::minGridSize |
( |
| ) |
const |
|
inlineprotectedvirtual |
◆ minThreads()
virtual unsigned int quda::Tunable::minThreads |
( |
| ) |
const |
|
inlineprotectedvirtual |
◆ paramString()
virtual std::string quda::Tunable::paramString |
( |
const TuneParam & |
param | ) |
const |
|
inlinevirtual |
◆ perfString()
virtual std::string quda::Tunable::perfString |
( |
float |
time | ) |
const |
|
inlinevirtual |
◆ postTune()
virtual void quda::Tunable::postTune |
( |
| ) |
|
|
inlinevirtual |
◆ preTune()
virtual void quda::Tunable::preTune |
( |
| ) |
|
|
inlinevirtual |
◆ resetBlockDim()
virtual void quda::Tunable::resetBlockDim |
( |
TuneParam & |
param | ) |
const |
|
inlineprotectedvirtual |
◆ sharedBytesPerBlock()
virtual unsigned int quda::Tunable::sharedBytesPerBlock |
( |
const TuneParam & |
param | ) |
const |
|
protectedpure virtual |
◆ sharedBytesPerThread()
virtual unsigned int quda::Tunable::sharedBytesPerThread |
( |
| ) |
const |
|
protectedpure virtual |
◆ tuneAuxDim()
virtual bool quda::Tunable::tuneAuxDim |
( |
| ) |
const |
|
inlineprotectedvirtual |
◆ tuned()
bool quda::Tunable::tuned |
( |
| ) |
|
|
inlineprotected |
Whether the present instance has already been tuned or not.
- Returns
- True if tuned, false if not
Definition at line 289 of file tune_quda.h.
◆ tuneGridDim()
virtual bool quda::Tunable::tuneGridDim |
( |
| ) |
const |
|
inlineprotectedvirtual |
◆ tuneKey()
virtual TuneKey quda::Tunable::tuneKey |
( |
| ) |
const |
|
pure virtual |
◆ tuneSharedBytes()
virtual bool quda::Tunable::tuneSharedBytes |
( |
| ) |
const |
|
inlineprotectedvirtual |
◆ tuningIter()
virtual int quda::Tunable::tuningIter |
( |
| ) |
const |
|
inlinevirtual |
◆ writeAuxString()
int quda::Tunable::writeAuxString |
( |
const char * |
format, |
|
|
|
... |
|
) |
| |
|
inlineprotected |
◆ aux
◆ jitify_error
CUresult quda::Tunable::jitify_error |
|
protected |
This is the return result from kernels launched using jitify
Definition at line 283 of file tune_quda.h.
The documentation for this class was generated from the following file: