QUDA
1.0.0
|
#include <tune_quda.h>
Public Member Functions | |
Tunable () | |
virtual | ~Tunable () |
virtual TuneKey | tuneKey () const =0 |
virtual void | apply (const cudaStream_t &stream)=0 |
virtual void | preTune () |
virtual void | postTune () |
virtual int | tuningIter () const |
virtual std::string | paramString (const TuneParam ¶m) const |
virtual std::string | perfString (float time) const |
virtual void | initTuneParam (TuneParam ¶m) const |
virtual void | defaultTuneParam (TuneParam ¶m) const |
virtual bool | advanceTuneParam (TuneParam ¶m) const |
void | checkLaunchParam (TuneParam ¶m) |
CUresult | jitifyError () const |
CUresult & | jitifyError () |
Protected Member Functions | |
virtual long long | flops () const =0 |
virtual long long | bytes () const |
virtual unsigned int | sharedBytesPerThread () const =0 |
virtual unsigned int | sharedBytesPerBlock (const TuneParam ¶m) const =0 |
virtual unsigned int | minThreads () const |
virtual bool | tuneGridDim () const |
virtual bool | tuneAuxDim () const |
virtual bool | tuneSharedBytes () const |
virtual bool | advanceGridDim (TuneParam ¶m) const |
virtual unsigned int | maxBlockSize (const TuneParam ¶m) const |
virtual unsigned int | maxGridSize () const |
virtual unsigned int | minGridSize () const |
virtual int | gridStep () const |
gridStep sets the step size when iterating the grid size in advanceGridDim. More... | |
virtual int | blockStep () const |
virtual int | blockMin () const |
virtual void | resetBlockDim (TuneParam ¶m) const |
virtual bool | advanceBlockDim (TuneParam ¶m) const |
unsigned int | maxBlocksPerSM () const |
For some reason this can't be queried from the device properties, so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability) More... | |
template<typename F > | |
void | setMaxDynamicSharedBytesPerBlock (F *func) const |
Enable the maximum dynamic shared bytes for the kernel "func" (values given by maxDynamicSharedBytesPerBlock()). More... | |
unsigned int | maxDynamicSharedBytesPerBlock () const |
This can't be correctly queried in CUDA for all architectures so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability). More... | |
virtual unsigned int | maxSharedBytesPerBlock () const |
The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock. More... | |
virtual bool | advanceSharedBytes (TuneParam ¶m) const |
virtual bool | advanceAux (TuneParam ¶m) const |
int | writeAuxString (const char *format,...) |
Protected Attributes | |
char | aux [TuneKey::aux_n] |
CUresult | jitify_error |
Definition at line 59 of file tune_quda.h.
|
inline |
Definition at line 279 of file tune_quda.h.
|
inlinevirtual |
Definition at line 280 of file tune_quda.h.
References quda::stream.
|
inlineprotectedvirtual |
Reimplemented in quda::dslash::DslashPolicyTune< Dslash >, quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >, quda::DslashCoarsePolicyTune, quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >, and quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >.
Definition at line 263 of file tune_quda.h.
|
inlineprotectedvirtual |
Reimplemented in quda::TunableVectorYZ, quda::TunableVectorY, quda::TunableLocalParity, quda::ShiftColorSpinorField< Output, Input >, and quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >.
Definition at line 124 of file tune_quda.h.
References quda::TuneParam::block, and quda::TuneParam::grid.
Referenced by quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >::advanceBlockDim(), quda::TunableLocalParity::advanceBlockDim(), quda::TunableVectorY::advanceBlockDim(), quda::TunableVectorYZ::advanceBlockDim(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::advanceSharedBytes(), and quda::blas::copy_ns::CopyCuda< FloatN, N, Output, Input >::advanceSharedBytes().
|
inlineprotectedvirtual |
Reimplemented in quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >, and quda::ShiftColorSpinorField< Output, Input >.
Definition at line 77 of file tune_quda.h.
References quda::TuneParam::grid.
Referenced by quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >::advanceGridDim().
|
inlineprotectedvirtual |
The goal here is to throttle the number of thread blocks per SM by over-allocating shared memory (in order to improve L2 utilization, etc.). We thus request the smallest amount of dynamic shared memory that guarantees throttling to a given number of blocks, in order to allow some extra leeway.
Reimplemented in quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >, quda::CopySpinorEx< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder, Basis, extend >, quda::CopyColorSpinor< 4, Arg >, quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >, quda::CopyColorSpinor< Ns, Arg >, quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >, quda::blas::copy_ns::CopyCuda< FloatN, N, Output, Input >, quda::CopySpinor< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder >, and quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >.
Definition at line 238 of file tune_quda.h.
References quda::TuneParam::block, deviceProp, and quda::TuneParam::shared_bytes.
Referenced by quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::advanceSharedBytes().
|
inlinevirtual |
Reimplemented in quda::dslash::DslashPolicyTune< Dslash >, quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >, quda::DslashCoarsePolicyTune, quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >, quda::QudaMemCopy, quda::SpinorNoise< real, Ns, Nc, type, Arg >, and quda::CopyGauge< FloatOut, FloatIn, length, Arg >.
Definition at line 335 of file tune_quda.h.
Referenced by quda::CopyGauge< FloatOut, FloatIn, length, Arg >::advanceTuneParam(), quda::SpinorNoise< real, Ns, Nc, type, Arg >::advanceTuneParam(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::advanceTuneParam(), and quda::tuneLaunch().
|
pure virtual |
Implemented in quda::dslash::DslashPolicyTune< Dslash >, quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >, quda::TwistClover< Float, nSpin, nColor, Arg >, quda::DslashCoarsePolicyTune, quda::Clover< Float, nSpin, nColor, Arg >, quda::ProjectSU3< Float, G >, quda::TwistGamma< Float, nColor, Arg >, quda::KSLongLinkForce< Float, Result, Oprod, Gauge >, quda::Gamma< ValueType, basis, dir >, quda::CopySpinorEx< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder, Basis, extend >, quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >, quda::CopyColorSpinor< 4, Arg >, quda::ExtractGhostEx< Float, length, nDim, dim, Order >, quda::ExtractGhost< nDim, Arg >, quda::Pack< Float, nColor, spin_project >, quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >, quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >, quda::CopyColorSpinor< Ns, Arg >, quda::GaugeOvrImpSTOUT< Float, Arg >, quda::WuppertalSmearing< Float, Ns, Nc, Arg >, quda::ShiftColorSpinorField< Output, Input >, quda::CopyGaugeEx< FloatOut, FloatIn, length, OutOrder, InOrder >, quda::Dslash5< Float, nColor, Arg >, quda::KSForceComplete< Float, Oprod, Gauge, Mom >, quda::GaugeGauss< Float, Arg >, quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >, quda::SpinorNoise< real, Ns, Nc, type, Arg >, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >, quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >, quda::CopyGauge< FloatOut, FloatIn, length, Arg >, quda::blas::copy_ns::CopyCuda< FloatN, N, Output, Input >, quda::QudaMemCopy, quda::CopySpinor< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder >, quda::NdegTwistedMassPreconditioned< Float, nDim, nColor, Arg >, quda::Laplace< Float, nDim, nColor, Arg >, quda::TwistedMassPreconditioned< Float, nDim, nColor, Arg >, quda::Wilson< Float, nDim, nColor, Arg >, quda::DomainWall4D< Float, nDim, nColor, Arg >, quda::TwistedCloverPreconditioned< Float, nDim, nColor, Arg >, quda::WilsonCloverPreconditioned< Float, nDim, nColor, Arg >, quda::NdegTwistedMass< Float, nDim, nColor, Arg >, quda::TwistedClover< Float, nDim, nColor, Arg >, quda::WilsonClover< Float, nDim, nColor, Arg >, quda::DomainWall5D< Float, nDim, nColor, Arg >, quda::Staggered< Float, nDim, nColor, Arg >, quda::Staggered< Float, nDim, nColor, Arg >, quda::TwistedMass< Float, nDim, nColor, Arg >, and quda::GaugePlaq< Float, Gauge >.
Referenced by quda::dslash::DslashBasic< Dslash >::operator()(), quda::dslash::DslashFusedExterior< Dslash >::operator()(), quda::dslash::DslashGDR< Dslash >::operator()(), quda::dslash::DslashFusedGDR< Dslash >::operator()(), quda::dslash::DslashGDRRecv< Dslash >::operator()(), quda::dslash::DslashFusedGDRRecv< Dslash >::operator()(), quda::dslash::DslashZeroCopyPack< Dslash >::operator()(), quda::dslash::DslashFusedZeroCopyPack< Dslash >::operator()(), quda::dslash::DslashZeroCopyPackGDRRecv< Dslash >::operator()(), quda::dslash::DslashFusedZeroCopyPackGDRRecv< Dslash >::operator()(), quda::dslash::DslashZeroCopy< Dslash >::operator()(), quda::dslash::DslashFusedZeroCopy< Dslash >::operator()(), quda::dslash::DslashNC< Dslash >::operator()(), and quda::tuneLaunch().
|
inlineprotectedvirtual |
Reimplemented in quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >, quda::Dslash< Float >, and quda::Dslash5< Float, nColor, Arg >.
Definition at line 106 of file tune_quda.h.
References deviceProp.
Referenced by quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >::blockMin().
|
inlineprotectedvirtual |
Reimplemented in quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >, quda::Dslash< Float >, and quda::Dslash5< Float, nColor, Arg >.
Definition at line 105 of file tune_quda.h.
References deviceProp.
Referenced by quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >::blockStep().
|
inlineprotectedvirtual |
Reimplemented in quda::dslash::DslashPolicyTune< Dslash >, quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >, quda::DslashCoarsePolicyTune, quda::TwistClover< Float, nSpin, nColor, Arg >, quda::ProjectSU3< Float, G >, quda::Clover< Float, nSpin, nColor, Arg >, quda::TwistGamma< Float, nColor, Arg >, quda::Dslash< Float >, quda::KSLongLinkForce< Float, Result, Oprod, Gauge >, quda::Pack< Float, nColor, spin_project >, quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >, quda::CopyColorSpinor< 4, Arg >, quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >, quda::CopySpinorEx< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder, Basis, extend >, quda::Gamma< ValueType, basis, dir >, quda::ExtractGhostEx< Float, length, nDim, dim, Order >, quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >, quda::ExtractGhost< nDim, Arg >, quda::GaugeOvrImpSTOUT< Float, Arg >, quda::CopyColorSpinor< Ns, Arg >, quda::ShiftColorSpinorField< Output, Input >, quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >, quda::WuppertalSmearing< Float, Ns, Nc, Arg >, quda::CopyGaugeEx< FloatOut, FloatIn, length, OutOrder, InOrder >, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >, quda::KSForceComplete< Float, Oprod, Gauge, Mom >, quda::GaugeGauss< Float, Arg >, quda::QudaMemCopy, quda::CopyGauge< FloatOut, FloatIn, length, Arg >, quda::SpinorNoise< real, Ns, Nc, type, Arg >, quda::Laplace< Float, nDim, nColor, Arg >, quda::Staggered< Float, nDim, nColor, Arg >, quda::blas::copy_ns::CopyCuda< FloatN, N, Output, Input >, quda::TwistedCloverPreconditioned< Float, nDim, nColor, Arg >, quda::WilsonCloverPreconditioned< Float, nDim, nColor, Arg >, quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >, quda::CopySpinor< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder >, quda::DomainWall5D< Float, nDim, nColor, Arg >, quda::TwistedClover< Float, nDim, nColor, Arg >, quda::WilsonClover< Float, nDim, nColor, Arg >, quda::Dslash5< Float, nColor, Arg >, and quda::GaugePlaq< Float, Gauge >.
Definition at line 63 of file tune_quda.h.
References param.
|
inline |
Check the launch parameters of the kernel to ensure that they are valid for the current device.
Definition at line 344 of file tune_quda.h.
References quda::TuneParam::block, deviceProp, errorQuda, and quda::TuneParam::grid.
Referenced by quda::tuneLaunch().
|
inlinevirtual |
sets default values for when tuning is disabled
Reimplemented in quda::dslash::DslashPolicyTune< Dslash >, quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >, quda::DslashCoarsePolicyTune, quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >, quda::TunableVectorYZ, quda::TunableVectorY, quda::TunableLocalParity, quda::Pack< Float, nColor, spin_project >, quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >, quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >, quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >, quda::Dslash5< Float, nColor, Arg >, quda::ShiftColorSpinorField< Output, Input >, quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >, quda::NdegTwistedMassPreconditioned< Float, nDim, nColor, Arg >, and quda::blas::copy_ns::CopyCuda< FloatN, N, Output, Input >.
Definition at line 329 of file tune_quda.h.
References quda::TuneParam::grid.
Referenced by quda::blas::copy_ns::CopyCuda< FloatN, N, Output, Input >::defaultTuneParam(), quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >::defaultTuneParam(), quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >::defaultTuneParam(), quda::TunableLocalParity::defaultTuneParam(), quda::TunableVectorY::defaultTuneParam(), quda::DslashCoarsePolicyTune::defaultTuneParam(), quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::defaultTuneParam(), quda::dslash::DslashPolicyTune< Dslash >::defaultTuneParam(), and quda::tuneLaunch().
|
protectedpure virtual |
Implemented in quda::dslash::DslashPolicyTune< Dslash >, quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >, quda::DslashCoarsePolicyTune, quda::TwistClover< Float, nSpin, nColor, Arg >, quda::ProjectSU3< Float, G >, quda::Clover< Float, nSpin, nColor, Arg >, quda::TwistGamma< Float, nColor, Arg >, quda::KSLongLinkForce< Float, Result, Oprod, Gauge >, quda::Pack< Float, nColor, spin_project >, quda::Dslash< Float >, quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >, quda::CopyColorSpinor< 4, Arg >, quda::CopySpinorEx< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder, Basis, extend >, quda::Gamma< ValueType, basis, dir >, quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >, quda::ExtractGhostEx< Float, length, nDim, dim, Order >, quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >, quda::ExtractGhost< nDim, Arg >, quda::GaugeOvrImpSTOUT< Float, Arg >, quda::CopyColorSpinor< Ns, Arg >, quda::ShiftColorSpinorField< Output, Input >, quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >, quda::WuppertalSmearing< Float, Ns, Nc, Arg >, quda::CopyGaugeEx< FloatOut, FloatIn, length, OutOrder, InOrder >, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >, quda::KSForceComplete< Float, Oprod, Gauge, Mom >, quda::GaugeGauss< Float, Arg >, quda::QudaMemCopy, quda::CopyGauge< FloatOut, FloatIn, length, Arg >, quda::SpinorNoise< real, Ns, Nc, type, Arg >, quda::NdegTwistedMassPreconditioned< Float, nDim, nColor, Arg >, quda::blas::copy_ns::CopyCuda< FloatN, N, Output, Input >, quda::CopySpinor< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder >, quda::Staggered< Float, nDim, nColor, Arg >, quda::TwistedMassPreconditioned< Float, nDim, nColor, Arg >, quda::TwistedCloverPreconditioned< Float, nDim, nColor, Arg >, quda::WilsonCloverPreconditioned< Float, nDim, nColor, Arg >, quda::Laplace< Float, nDim, nColor, Arg >, quda::NdegTwistedMass< Float, nDim, nColor, Arg >, quda::TwistedClover< Float, nDim, nColor, Arg >, quda::WilsonClover< Float, nDim, nColor, Arg >, quda::TwistedMass< Float, nDim, nColor, Arg >, quda::DomainWall5D< Float, nDim, nColor, Arg >, quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >, quda::GaugePlaq< Float, Gauge >, and quda::Dslash5< Float, nColor, Arg >.
|
inlineprotectedvirtual |
gridStep sets the step size when iterating the grid size in advanceGridDim.
Reimplemented in quda::Pack< Float, nColor, spin_project >.
Definition at line 103 of file tune_quda.h.
Referenced by quda::Pack< Float, nColor, spin_project >::gridStep().
|
inlinevirtual |
Reimplemented in quda::dslash::DslashPolicyTune< Dslash >, quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >, quda::DslashCoarsePolicyTune, quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >, quda::TunableVectorYZ, quda::TunableVectorY, quda::TunableLocalParity, quda::Pack< Float, nColor, spin_project >, quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >, quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >, quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >, quda::Dslash5< Float, nColor, Arg >, quda::ShiftColorSpinorField< Output, Input >, quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >, quda::blas::copy_ns::CopyCuda< FloatN, N, Output, Input >, and quda::NdegTwistedMassPreconditioned< Float, nDim, nColor, Arg >.
Definition at line 304 of file tune_quda.h.
References quda::TuneParam::block, deviceProp, errorQuda, quda::TuneParam::grid, and quda::TuneParam::shared_bytes.
Referenced by quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::defaultTuneParam(), quda::blas::copy_ns::CopyCuda< FloatN, N, Output, Input >::initTuneParam(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::initTuneParam(), quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >::initTuneParam(), quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >::initTuneParam(), quda::TunableLocalParity::initTuneParam(), quda::TunableVectorY::initTuneParam(), quda::DslashCoarsePolicyTune::initTuneParam(), quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::initTuneParam(), quda::dslash::DslashPolicyTune< Dslash >::initTuneParam(), and quda::tuneLaunch().
|
inline |
Definition at line 375 of file tune_quda.h.
Referenced by quda::blas::multiReduceLaunch(), quda::blas::reduceLaunch(), and quda::tuneLaunch().
|
inline |
Definition at line 376 of file tune_quda.h.
|
inlineprotectedvirtual |
Reimplemented in quda::TunableLocalParity, and quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >.
Definition at line 94 of file tune_quda.h.
References quda::TuneParam::block, and deviceProp.
|
inlineprotected |
For some reason this can't be queried from the device properties, so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability)
Definition at line 153 of file tune_quda.h.
References deviceProp, and warningQuda.
|
inlineprotected |
This can't be correctly queried in CUDA for all architectures so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability).
Definition at line 198 of file tune_quda.h.
References deviceProp, and warningQuda.
Referenced by quda::Pack< Float, nColor, spin_project >::defaultTuneParam(), quda::Pack< Float, nColor, spin_project >::initTuneParam(), quda::Dslash5< Float, nColor, Arg >::maxSharedBytesPerBlock(), quda::Dslash< Float >::maxSharedBytesPerBlock(), and quda::Pack< Float, nColor, spin_project >::tuneSharedBytes().
|
inlineprotectedvirtual |
Reimplemented in quda::Pack< Float, nColor, spin_project >.
Definition at line 95 of file tune_quda.h.
References deviceProp.
Referenced by quda::Pack< Float, nColor, spin_project >::maxGridSize().
|
inlineprotectedvirtual |
The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock.
Reimplemented in quda::Dslash< Float >, and quda::Dslash5< Float, nColor, Arg >.
Definition at line 229 of file tune_quda.h.
References deviceProp.
Referenced by quda::Dslash5< Float, nColor, Arg >::maxSharedBytesPerBlock(), and quda::Pack< Float, nColor, spin_project >::tuneSharedBytes().
|
inlineprotectedvirtual |
Reimplemented in quda::Pack< Float, nColor, spin_project >.
Definition at line 96 of file tune_quda.h.
Referenced by quda::Pack< Float, nColor, spin_project >::minGridSize().
|
inlineprotectedvirtual |
Reimplemented in quda::TwistClover< Float, nSpin, nColor, Arg >, quda::Clover< Float, nSpin, nColor, Arg >, quda::ProjectSU3< Float, G >, quda::TwistGamma< Float, nColor, Arg >, quda::KSLongLinkForce< Float, Result, Oprod, Gauge >, quda::Gamma< ValueType, basis, dir >, quda::CopySpinorEx< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder, Basis, extend >, quda::CopyColorSpinor< 4, Arg >, quda::ExtractGhostEx< Float, length, nDim, dim, Order >, quda::ExtractGhost< nDim, Arg >, quda::CopyColorSpinor< Ns, Arg >, quda::GaugeOvrImpSTOUT< Float, Arg >, quda::WuppertalSmearing< Float, Ns, Nc, Arg >, quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >, quda::Pack< Float, nColor, spin_project >, quda::CopyGaugeEx< FloatOut, FloatIn, length, OutOrder, InOrder >, quda::KSForceComplete< Float, Oprod, Gauge, Mom >, quda::GaugeGauss< Float, Arg >, quda::SpinorNoise< real, Ns, Nc, type, Arg >, quda::Dslash5< Float, nColor, Arg >, quda::Dslash< Float >, quda::CopySpinor< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder >, quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >, and quda::CopyGauge< FloatOut, FloatIn, length, Arg >.
Definition at line 72 of file tune_quda.h.
|
inlinevirtual |
Definition at line 287 of file tune_quda.h.
References param.
Referenced by quda::tuneLaunch().
|
inlinevirtual |
Definition at line 294 of file tune_quda.h.
References quda::blas::bytes, quda::blas::flops, and quda::TuneParam::time.
Referenced by quda::tuneLaunch().
|
inlinevirtual |
Reimplemented in quda::dslash::DslashPolicyTune< Dslash >, quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >, quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >, quda::TwistClover< Float, nSpin, nColor, Arg >, quda::Clover< Float, nSpin, nColor, Arg >, quda::ProjectSU3< Float, G >, quda::TwistGamma< Float, nColor, Arg >, quda::Dslash< Float >, quda::Gamma< ValueType, basis, dir >, quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >, quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >, quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >, quda::GaugeOvrImpSTOUT< Float, Arg >, quda::GaugeGauss< Float, Arg >, quda::SpinorNoise< real, Ns, Nc, type, Arg >, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >, and quda::blas::copy_ns::CopyCuda< FloatN, N, Output, Input >.
Definition at line 284 of file tune_quda.h.
Referenced by quda::tuneLaunch().
|
inlinevirtual |
Reimplemented in quda::dslash::DslashPolicyTune< Dslash >, quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >, quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >, quda::TwistClover< Float, nSpin, nColor, Arg >, quda::Clover< Float, nSpin, nColor, Arg >, quda::ProjectSU3< Float, G >, quda::TwistGamma< Float, nColor, Arg >, quda::Dslash< Float >, quda::Gamma< ValueType, basis, dir >, quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >, quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >, quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >, quda::GaugeOvrImpSTOUT< Float, Arg >, quda::GaugeGauss< Float, Arg >, quda::SpinorNoise< real, Ns, Nc, type, Arg >, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >, and quda::blas::copy_ns::CopyCuda< FloatN, N, Output, Input >.
Definition at line 283 of file tune_quda.h.
Referenced by quda::tuneLaunch().
|
inlineprotectedvirtual |
Definition at line 108 of file tune_quda.h.
References quda::TuneParam::block, deviceProp, and errorQuda.
|
inlineprotected |
Enable the maximum dynamic shared bytes for the kernel "func" (values given by maxDynamicSharedBytesPerBlock()).
[in] | func | Function pointer to the kernel we want to enable max shared memory per block for |
Definition at line 181 of file tune_quda.h.
Referenced by quda::Dslash< Float >::launch(), quda::Dslash5< Float, nColor, Arg >::launch(), and quda::Pack< Float, nColor, spin_project >::launch().
|
protectedpure virtual |
Implemented in quda::dslash::DslashPolicyTune< Dslash >, quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >, quda::DslashCoarsePolicyTune, quda::ProjectSU3< Float, G >, quda::TunableVectorY, quda::TunableLocalParity, quda::KSLongLinkForce< Float, Result, Oprod, Gauge >, quda::CopySpinorEx< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder, Basis, extend >, quda::CopyColorSpinor< 4, Arg >, quda::ExtractGhostEx< Float, length, nDim, dim, Order >, quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >, quda::ExtractGhost< nDim, Arg >, quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >, quda::CopyColorSpinor< Ns, Arg >, quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >, quda::CopyGaugeEx< FloatOut, FloatIn, length, OutOrder, InOrder >, quda::KSForceComplete< Float, Oprod, Gauge, Mom >, quda::SpinorNoise< real, Ns, Nc, type, Arg >, quda::CopySpinor< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder >, quda::blas::copy_ns::CopyCuda< FloatN, N, Output, Input >, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >, quda::QudaMemCopy, and quda::CopyGauge< FloatOut, FloatIn, length, Arg >.
|
protectedpure virtual |
Implemented in quda::dslash::DslashPolicyTune< Dslash >, quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >, quda::DslashCoarsePolicyTune, quda::ProjectSU3< Float, G >, quda::TunableVectorY, quda::TunableLocalParity, quda::KSLongLinkForce< Float, Result, Oprod, Gauge >, quda::CopySpinorEx< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder, Basis, extend >, quda::CopyColorSpinor< 4, Arg >, quda::ExtractGhostEx< Float, length, nDim, dim, Order >, quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >, quda::ExtractGhost< nDim, Arg >, quda::CopyColorSpinor< Ns, Arg >, quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >, quda::ShiftColorSpinorField< Output, Input >, quda::CopyGaugeEx< FloatOut, FloatIn, length, OutOrder, InOrder >, quda::KSForceComplete< Float, Oprod, Gauge, Mom >, quda::SpinorNoise< real, Ns, Nc, type, Arg >, quda::Dslash5< Float, nColor, Arg >, quda::blas::copy_ns::CopyCuda< FloatN, N, Output, Input >, quda::CopySpinor< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder >, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >, quda::NdegTwistedMassPreconditioned< Float, nDim, nColor, Arg >, quda::QudaMemCopy, and quda::CopyGauge< FloatOut, FloatIn, length, Arg >.
|
inlineprotectedvirtual |
Reimplemented in quda::dslash::DslashPolicyTune< Dslash >, quda::DslashCoarsePolicyTune, quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >, quda::Pack< Float, nColor, spin_project >, and quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >.
Definition at line 74 of file tune_quda.h.
|
inlineprotectedvirtual |
Reimplemented in quda::dslash::DslashPolicyTune< Dslash >, quda::TwistClover< Float, nSpin, nColor, Arg >, quda::DslashCoarsePolicyTune, quda::Clover< Float, nSpin, nColor, Arg >, quda::ProjectSU3< Float, G >, quda::TunableLocalParity, quda::TwistGamma< Float, nColor, Arg >, quda::KSLongLinkForce< Float, Result, Oprod, Gauge >, quda::Gamma< ValueType, basis, dir >, quda::CopySpinorEx< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder, Basis, extend >, quda::CopyColorSpinor< 4, Arg >, quda::ExtractGhostEx< Float, length, nDim, dim, Order >, quda::ExtractGhost< nDim, Arg >, quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >, quda::CopyColorSpinor< Ns, Arg >, quda::GaugeOvrImpSTOUT< Float, Arg >, quda::WuppertalSmearing< Float, Ns, Nc, Arg >, quda::CopyGaugeEx< FloatOut, FloatIn, length, OutOrder, InOrder >, quda::GaugeGauss< Float, Arg >, quda::KSForceComplete< Float, Oprod, Gauge, Mom >, quda::SpinorNoise< real, Ns, Nc, type, Arg >, quda::Dslash5< Float, nColor, Arg >, quda::Dslash< Float >, quda::Pack< Float, nColor, spin_project >, quda::CopySpinor< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder >, quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >, quda::CopyGauge< FloatOut, FloatIn, length, Arg >, and quda::GaugePlaq< Float, Gauge >.
Definition at line 73 of file tune_quda.h.
|
pure virtual |
Implemented in quda::dslash::DslashPolicyTune< Dslash >, quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >, quda::TwistClover< Float, nSpin, nColor, Arg >, quda::DslashCoarsePolicyTune, quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >, quda::Clover< Float, nSpin, nColor, Arg >, quda::ProjectSU3< Float, G >, quda::TwistGamma< Float, nColor, Arg >, quda::KSLongLinkForce< Float, Result, Oprod, Gauge >, quda::Pack< Float, nColor, spin_project >, quda::Gamma< ValueType, basis, dir >, quda::CopyColorSpinor< 4, Arg >, quda::CopySpinorEx< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder, Basis, extend >, quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >, quda::ExtractGhostEx< Float, length, nDim, dim, Order >, quda::ExtractGhost< nDim, Arg >, quda::Dslash5< Float, nColor, Arg >, quda::GaugeOvrImpSTOUT< Float, Arg >, quda::ShiftColorSpinorField< Output, Input >, quda::CopyColorSpinor< Ns, Arg >, quda::WuppertalSmearing< Float, Ns, Nc, Arg >, quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >, quda::Staggered< Float, nDim, nColor, Arg >, quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >, quda::Laplace< Float, nDim, nColor, Arg >, quda::CopyGaugeEx< FloatOut, FloatIn, length, OutOrder, InOrder >, quda::KSForceComplete< Float, Oprod, Gauge, Mom >, quda::GaugeGauss< Float, Arg >, quda::TwistedCloverPreconditioned< Float, nDim, nColor, Arg >, quda::WilsonCloverPreconditioned< Float, nDim, nColor, Arg >, quda::NdegTwistedMassPreconditioned< Float, nDim, nColor, Arg >, quda::QudaMemCopy, quda::SpinorNoise< real, Ns, Nc, type, Arg >, quda::CopyGauge< FloatOut, FloatIn, length, Arg >, quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >, quda::TwistedClover< Float, nDim, nColor, Arg >, quda::WilsonClover< Float, nDim, nColor, Arg >, quda::DomainWall5D< Float, nDim, nColor, Arg >, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >, quda::TwistedMassPreconditioned< Float, nDim, nColor, Arg >, quda::NdegTwistedMass< Float, nDim, nColor, Arg >, quda::CopySpinor< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder >, quda::TwistedMass< Float, nDim, nColor, Arg >, quda::blas::copy_ns::CopyCuda< FloatN, N, Output, Input >, quda::DomainWall4D< Float, nDim, nColor, Arg >, quda::Staggered< Float, nDim, nColor, Arg >, quda::Wilson< Float, nDim, nColor, Arg >, and quda::GaugePlaq< Float, Gauge >.
Referenced by quda::dslash::DslashPolicyTune< Dslash >::tuneKey(), and quda::tuneLaunch().
|
inlineprotectedvirtual |
Reimplemented in quda::KSLongLinkForce< Float, Result, Oprod, Gauge >, quda::Pack< Float, nColor, spin_project >, quda::KSForceComplete< Float, Oprod, Gauge, Mom >, and quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >.
Definition at line 75 of file tune_quda.h.
|
inlinevirtual |
Reimplemented in quda::dslash::DslashPolicyTune< Dslash >, quda::DslashCoarsePolicyTune, quda::Pack< Float, nColor, spin_project >, quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >, quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >, quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >, quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >, quda::blas::copy_ns::CopyCuda< FloatN, N, Output, Input >, and quda::Dslash< Float >.
Definition at line 285 of file tune_quda.h.
Referenced by quda::tuneLaunch().
|
inlineprotected |
Definition at line 267 of file tune_quda.h.
References quda::TuneKey::aux_n, and errorQuda.
|
protected |
Definition at line 265 of file tune_quda.h.
Referenced by quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::BlasCuda(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::CalculateY(), quda::Dslash5< Float, nColor, Arg >::Dslash5(), quda::Pack< Float, nColor, spin_project >::fillAux(), quda::GaugePlaq< Float, Gauge >::GaugePlaq(), quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >::GenericPackGhostLauncher(), quda::QudaMemCopy::QudaMemCopy(), quda::GaugePlaq< Float, Gauge >::tuneKey(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::tuneKey(), quda::QudaMemCopy::tuneKey(), quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >::tuneKey(), quda::Dslash5< Float, nColor, Arg >::tuneKey(), quda::Pack< Float, nColor, spin_project >::tuneKey(), and quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::tuneKey().
|
protected |
This is the return result from kernels launched using jitify
Definition at line 276 of file tune_quda.h.
Referenced by quda::GaugePlaq< Float, Gauge >::apply(), quda::DomainWall4D< Float, nDim, nColor, Arg >::apply(), quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >::apply(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::apply(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::apply(), and quda::Dslash< Float >::instantiate().