QUDA
1.0.0
|
Public Member Functions | |
CalculateY (Arg &arg, const ColorSpinorField &meta, GaugeField &Y, GaugeField &X, GaugeField &Y_atomic, GaugeField &X_atomic) | |
virtual | ~CalculateY () |
void | apply (const cudaStream_t &stream) |
void | setDimension (int dim_) |
void | setDirection (QudaDirection dir_) |
void | setComputeType (ComputeType type_) |
bool | advanceAux (TuneParam ¶m) const |
bool | advanceSharedBytes (TuneParam ¶m) const |
bool | advanceTuneParam (TuneParam ¶m) const |
void | initTuneParam (TuneParam ¶m) const |
void | defaultTuneParam (TuneParam ¶m) const |
TuneKey | tuneKey () const |
void | preTune () |
void | postTune () |
![]() | |
TunableVectorYZ (unsigned int vector_length_y, unsigned int vector_length_z) | |
bool | advanceBlockDim (TuneParam ¶m) const |
void | resizeVector (int y, int z) const |
void | resizeStep (int y, int z) const |
![]() | |
TunableVectorY (unsigned int vector_length_y) | |
void | resizeVector (int y) const |
void | resizeStep (int y) const |
![]() | |
Tunable () | |
virtual | ~Tunable () |
virtual int | tuningIter () const |
virtual std::string | paramString (const TuneParam ¶m) const |
virtual std::string | perfString (float time) const |
void | checkLaunchParam (TuneParam ¶m) |
CUresult | jitifyError () const |
CUresult & | jitifyError () |
Protected Member Functions | |
long long | flops () const |
long long | bytes () const |
unsigned int | minThreads () const |
bool | tuneGridDim () const |
bool | tuneAuxDim () const |
unsigned int | sharedBytesPerBlock (const TuneParam ¶m) const |
![]() | |
virtual unsigned int | sharedBytesPerThread () const |
![]() | |
virtual bool | tuneSharedBytes () const |
virtual bool | advanceGridDim (TuneParam ¶m) const |
virtual unsigned int | maxBlockSize (const TuneParam ¶m) const |
virtual unsigned int | maxGridSize () const |
virtual unsigned int | minGridSize () const |
virtual int | gridStep () const |
gridStep sets the step size when iterating the grid size in advanceGridDim. More... | |
virtual int | blockStep () const |
virtual int | blockMin () const |
virtual void | resetBlockDim (TuneParam ¶m) const |
unsigned int | maxBlocksPerSM () const |
For some reason this can't be queried from the device properties, so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability) More... | |
template<typename F > | |
void | setMaxDynamicSharedBytesPerBlock (F *func) const |
Enable the maximum dynamic shared bytes for the kernel "func" (values given by maxDynamicSharedBytesPerBlock()). More... | |
unsigned int | maxDynamicSharedBytesPerBlock () const |
This can't be correctly queried in CUDA for all architectures so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability). More... | |
virtual unsigned int | maxSharedBytesPerBlock () const |
The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock. More... | |
int | writeAuxString (const char *format,...) |
Protected Attributes | |
Arg & | arg |
const ColorSpinorField & | meta |
GaugeField & | Y |
GaugeField & | X |
GaugeField & | Y_atomic |
GaugeField & | X_atomic |
int | dim |
QudaDirection | dir |
ComputeType | type |
![]() | |
unsigned int | vector_length_y |
unsigned int | step_y |
bool | tune_block_x |
![]() | |
char | aux [TuneKey::aux_n] |
CUresult | jitify_error |
Definition at line 32 of file coarse_op.cuh.
|
inline |
Definition at line 180 of file coarse_op.cuh.
References quda::Tunable::aux, quda::LatticeField::AuxString(), comm_dim_partitioned_string(), quda::compile_type_str(), getOmpThreadStr(), quda::LatticeField::Location(), QUDA_CPU_FIELD_LOCATION, and QUDA_CUDA_FIELD_LOCATION.
|
inlinevirtual |
Definition at line 194 of file coarse_op.cuh.
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 643 of file coarse_op.cuh.
References quda::TuneParam::aux, quda::COMPUTE_VUV, deviceProp, quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::initTuneParam(), max_color_per_block, quda::TunableVectorYZ::resizeStep(), and quda::TunableVectorYZ::resizeVector().
|
inlinevirtual |
The goal here is to throttle the number of thread blocks per SM by over-allocating shared memory (in order to improve L2 utilization, etc.). We thus request the smallest amount of dynamic shared memory that guarantees throttling to a given number of blocks, in order to allow some extra leeway.
Reimplemented from quda::Tunable.
Definition at line 691 of file coarse_op.cuh.
References quda::Tunable::advanceSharedBytes(), quda::COMPUTE_COARSE_CLOVER, and quda::COMPUTE_VUV.
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 695 of file coarse_op.cuh.
References quda::Tunable::advanceTuneParam(), quda::LatticeField::Location(), quda::LatticeField::MemType(), QUDA_CUDA_FIELD_LOCATION, and QUDA_MEMORY_DEVICE.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 196 of file coarse_op.cuh.
References quda::activeTuning(), quda::alloc, quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::arg, quda::TuneParam::aux, quda::TuneParam::block, quda::COMPUTE_AV, quda::COMPUTE_CLOVER_INV_MAX, quda::COMPUTE_COARSE_CLOVER, quda::COMPUTE_CONVERT, quda::COMPUTE_DIAGONAL, quda::COMPUTE_RESCALE, quda::COMPUTE_REVERSE_Y, quda::COMPUTE_TMAV, quda::COMPUTE_TMCAV, quda::COMPUTE_TMDIAGONAL, quda::COMPUTE_TWISTED_CLOVER_INV_MAX, quda::COMPUTE_UV, quda::COMPUTE_VUV, errorQuda, getTuning(), getVerbosity(), quda::TuneParam::grid, quda::Tunable::jitify_error, quda::LatticeField::Location(), pool_device_free, pool_device_malloc, QUDA_BACKWARDS, QUDA_CPU_FIELD_LOCATION, QUDA_FORWARDS, quda::reduce(), quda::TuneParam::shared_bytes, quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::sharedBytesPerBlock(), swap(), and quda::tuneLaunch().
Referenced by quda::calculateY().
|
inlineprotectedvirtual |
Reimplemented from quda::Tunable.
Definition at line 91 of file coarse_op.cuh.
References quda::colorspinor::FieldOrderCB< Float, nSpin, nColor, nVec, order, storeFloat, ghostFloat, disable_ghost, block_float, use_tex >::Bytes(), quda::COMPUTE_AV, quda::COMPUTE_CLOVER_INV_MAX, quda::COMPUTE_COARSE_CLOVER, quda::COMPUTE_CONVERT, quda::COMPUTE_DIAGONAL, quda::COMPUTE_RESCALE, quda::COMPUTE_REVERSE_Y, quda::COMPUTE_TMAV, quda::COMPUTE_TMCAV, quda::COMPUTE_TMDIAGONAL, quda::COMPUTE_TWISTED_CLOVER_INV_MAX, quda::COMPUTE_UV, quda::COMPUTE_VUV, and errorQuda.
|
inlinevirtual |
sets default values for when tuning is disabled
Reimplemented from quda::TunableVectorYZ.
Definition at line 716 of file coarse_op.cuh.
References quda::TuneParam::aux, quda::TuneParam::block, quda::COMPUTE_VUV, quda::TunableVectorYZ::defaultTuneParam(), and quda::TuneParam::grid.
|
inlineprotectedvirtual |
Implements quda::Tunable.
Definition at line 47 of file coarse_op.cuh.
References quda::COMPUTE_AV, quda::COMPUTE_CLOVER_INV_MAX, quda::COMPUTE_COARSE_CLOVER, quda::COMPUTE_CONVERT, quda::COMPUTE_DIAGONAL, quda::COMPUTE_RESCALE, quda::COMPUTE_REVERSE_Y, quda::COMPUTE_TMAV, quda::COMPUTE_TMCAV, quda::COMPUTE_TMDIAGONAL, quda::COMPUTE_TWISTED_CLOVER_INV_MAX, quda::COMPUTE_UV, quda::COMPUTE_VUV, and errorQuda.
|
inlinevirtual |
Reimplemented from quda::TunableVectorYZ.
Definition at line 701 of file coarse_op.cuh.
References quda::TuneParam::aux, quda::TuneParam::block, quda::COMPUTE_VUV, quda::TuneParam::grid, and quda::TunableVectorYZ::initTuneParam().
Referenced by quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::advanceAux().
|
inlineprotectedvirtual |
Reimplemented from quda::Tunable.
Definition at line 145 of file coarse_op.cuh.
References quda::COMPUTE_AV, quda::COMPUTE_CLOVER_INV_MAX, quda::COMPUTE_COARSE_CLOVER, quda::COMPUTE_CONVERT, quda::COMPUTE_DIAGONAL, quda::COMPUTE_RESCALE, quda::COMPUTE_REVERSE_Y, quda::COMPUTE_TMAV, quda::COMPUTE_TMCAV, quda::COMPUTE_TMDIAGONAL, quda::COMPUTE_TWISTED_CLOVER_INV_MAX, quda::COMPUTE_UV, quda::COMPUTE_VUV, and errorQuda.
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 813 of file coarse_op.cuh.
References quda::COMPUTE_AV, quda::COMPUTE_CLOVER_INV_MAX, quda::COMPUTE_COARSE_CLOVER, quda::COMPUTE_CONVERT, quda::COMPUTE_DIAGONAL, quda::COMPUTE_RESCALE, quda::COMPUTE_REVERSE_Y, quda::COMPUTE_TMAV, quda::COMPUTE_TMCAV, quda::COMPUTE_TMDIAGONAL, quda::COMPUTE_TWISTED_CLOVER_INV_MAX, quda::COMPUTE_UV, quda::COMPUTE_VUV, errorQuda, quda::GaugeField::Gauge_p(), and quda::LatticeField::restore().
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 785 of file coarse_op.cuh.
References quda::LatticeField::backup(), quda::COMPUTE_AV, quda::COMPUTE_CLOVER_INV_MAX, quda::COMPUTE_COARSE_CLOVER, quda::COMPUTE_CONVERT, quda::COMPUTE_DIAGONAL, quda::COMPUTE_RESCALE, quda::COMPUTE_REVERSE_Y, quda::COMPUTE_TMAV, quda::COMPUTE_TMCAV, quda::COMPUTE_TMDIAGONAL, quda::COMPUTE_TWISTED_CLOVER_INV_MAX, quda::COMPUTE_UV, quda::COMPUTE_VUV, errorQuda, and quda::GaugeField::Gauge_p().
|
inline |
Set which computation we are doing
Definition at line 610 of file coarse_op.cuh.
References quda::COMPUTE_AV, quda::COMPUTE_COARSE_CLOVER, quda::COMPUTE_CONVERT, quda::COMPUTE_RESCALE, quda::COMPUTE_REVERSE_Y, quda::COMPUTE_TMAV, quda::COMPUTE_TMCAV, quda::COMPUTE_UV, quda::COMPUTE_VUV, max_color_per_block, quda::TunableVectorYZ::resizeStep(), quda::TunableVectorYZ::resizeVector(), and quda::TunableVectorY::tune_block_x.
Referenced by quda::calculateY().
|
inline |
Set which dimension we are working on (where applicable)
Definition at line 600 of file coarse_op.cuh.
Referenced by quda::calculateY().
|
inline |
Set which direction we are working on (where applicable)
Definition at line 605 of file coarse_op.cuh.
Referenced by quda::calculateY().
|
inlineprotectedvirtual |
Reimplemented from quda::TunableVectorY.
Definition at line 174 of file coarse_op.cuh.
References quda::COMPUTE_VUV, max_color_per_block, and quda::TunableVectorY::sharedBytesPerBlock().
Referenced by quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::apply().
|
inlineprotectedvirtual |
Reimplemented from quda::Tunable.
Definition at line 172 of file coarse_op.cuh.
References quda::COMPUTE_VUV.
|
inlineprotectedvirtual |
Reimplemented from quda::Tunable.
Definition at line 171 of file coarse_op.cuh.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 730 of file coarse_op.cuh.
References quda::Tunable::aux, quda::TuneKey::aux_n, quda::COMPUTE_AV, quda::COMPUTE_CLOVER_INV_MAX, quda::COMPUTE_COARSE_CLOVER, quda::COMPUTE_CONVERT, quda::COMPUTE_DIAGONAL, quda::COMPUTE_RESCALE, quda::COMPUTE_REVERSE_Y, quda::COMPUTE_TMAV, quda::COMPUTE_TMCAV, quda::COMPUTE_TMDIAGONAL, quda::COMPUTE_TWISTED_CLOVER_INV_MAX, quda::COMPUTE_UV, quda::COMPUTE_VUV, errorQuda, quda::LatticeField::Location(), quda::LatticeField::MemType(), QUDA_BACKWARDS, QUDA_CUDA_FIELD_LOCATION, QUDA_FORWARDS, QUDA_MEMORY_MAPPED, quda::blas::copy_ns::vol_str, and quda::LatticeField::VolString().
|
protected |
Definition at line 36 of file coarse_op.cuh.
Referenced by quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::apply(), and quda::calculateY().
|
protected |
Definition at line 43 of file coarse_op.cuh.
|
protected |
Definition at line 44 of file coarse_op.cuh.
|
protected |
Definition at line 37 of file coarse_op.cuh.
|
protected |
Definition at line 45 of file coarse_op.cuh.
|
protected |
Definition at line 39 of file coarse_op.cuh.
|
protected |
Definition at line 41 of file coarse_op.cuh.
|
protected |
Definition at line 38 of file coarse_op.cuh.
|
protected |
Definition at line 40 of file coarse_op.cuh.