QUDA
1.0.0
|
Public Member Functions | |
CopyGauge (Arg &arg, const GaugeField &out, const GaugeField &in, QudaFieldLocation location) | |
void | set_ghost (int is_ghost_) |
virtual | ~CopyGauge () |
void | apply (const cudaStream_t &stream) |
TuneKey | tuneKey () const |
long long | flops () const |
long long | bytes () const |
Private Member Functions | |
unsigned int | sharedBytesPerThread () const |
unsigned int | sharedBytesPerBlock (const TuneParam ¶m) const |
bool | tuneGridDim () const |
unsigned int | minThreads () const |
bool | advanceTuneParam (TuneParam ¶m) const |
![]() | |
TunableVectorYZ (unsigned int vector_length_y, unsigned int vector_length_z) | |
bool | advanceBlockDim (TuneParam ¶m) const |
void | initTuneParam (TuneParam ¶m) const |
void | defaultTuneParam (TuneParam ¶m) const |
void | resizeVector (int y, int z) const |
void | resizeStep (int y, int z) const |
![]() | |
TunableVectorY (unsigned int vector_length_y) | |
void | resizeVector (int y) const |
void | resizeStep (int y) const |
![]() | |
Tunable () | |
virtual | ~Tunable () |
virtual void | preTune () |
virtual void | postTune () |
virtual int | tuningIter () const |
virtual std::string | paramString (const TuneParam ¶m) const |
virtual std::string | perfString (float time) const |
void | checkLaunchParam (TuneParam ¶m) |
CUresult | jitifyError () const |
CUresult & | jitifyError () |
virtual bool | tuneAuxDim () const |
virtual bool | tuneSharedBytes () const |
virtual bool | advanceGridDim (TuneParam ¶m) const |
virtual unsigned int | maxBlockSize (const TuneParam ¶m) const |
virtual unsigned int | maxGridSize () const |
virtual unsigned int | minGridSize () const |
virtual int | gridStep () const |
gridStep sets the step size when iterating the grid size in advanceGridDim. More... | |
virtual int | blockStep () const |
virtual int | blockMin () const |
virtual void | resetBlockDim (TuneParam ¶m) const |
unsigned int | maxBlocksPerSM () const |
For some reason this can't be queried from the device properties, so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability) More... | |
template<typename F > | |
void | setMaxDynamicSharedBytesPerBlock (F *func) const |
Enable the maximum dynamic shared bytes for the kernel "func" (values given by maxDynamicSharedBytesPerBlock()). More... | |
unsigned int | maxDynamicSharedBytesPerBlock () const |
This can't be correctly queried in CUDA for all architectures so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability). More... | |
virtual unsigned int | maxSharedBytesPerBlock () const |
The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock. More... | |
virtual bool | advanceSharedBytes (TuneParam ¶m) const |
virtual bool | advanceAux (TuneParam ¶m) const |
int | writeAuxString (const char *format,...) |
Private Attributes | |
Arg | arg |
int | size |
const GaugeField & | meta |
QudaFieldLocation | location |
bool | is_ghost |
![]() | |
unsigned int | vector_length_y |
unsigned int | step_y |
bool | tune_block_x |
![]() | |
char | aux [TuneKey::aux_n] |
CUresult | jitify_error |
Definition at line 11 of file copy_gauge_helper.cuh.
|
inline |
Definition at line 31 of file copy_gauge_helper.cuh.
References quda::LatticeField::AuxString(), and quda::compile_type_str().
|
inlinevirtual |
Definition at line 80 of file copy_gauge_helper.cuh.
|
inlineprivatevirtual |
Reimplemented from quda::Tunable.
Definition at line 25 of file copy_gauge_helper.cuh.
References quda::Tunable::advanceTuneParam(), and QUDA_CUDA_FIELD_LOCATION.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 82 of file copy_gauge_helper.cuh.
References quda::arg(), quda::TuneParam::block, errorQuda, getTuning(), getVerbosity(), quda::TuneParam::grid, length, QUDA_CPU_FIELD_LOCATION, QUDA_CUDA_FIELD_LOCATION, quda::TuneParam::shared_bytes, and quda::tuneLaunch().
Referenced by quda::copyGauge(), and quda::copyMom().
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 118 of file copy_gauge_helper.cuh.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 117 of file copy_gauge_helper.cuh.
|
inlineprivatevirtual |
Reimplemented from quda::Tunable.
Definition at line 23 of file copy_gauge_helper.cuh.
References quda::size.
|
inline |
Definition at line 61 of file copy_gauge_helper.cuh.
References errorQuda, quda::GaugeField::Geometry(), length, quda::gauge::Ncolor(), and quda::LatticeField::Ndim().
Referenced by quda::copyGauge().
|
inlineprivatevirtual |
Reimplemented from quda::TunableVectorY.
Definition at line 20 of file copy_gauge_helper.cuh.
|
inlineprivatevirtual |
Reimplemented from quda::TunableVectorY.
Definition at line 19 of file copy_gauge_helper.cuh.
|
inlineprivatevirtual |
Reimplemented from quda::Tunable.
Definition at line 22 of file copy_gauge_helper.cuh.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 110 of file copy_gauge_helper.cuh.
References quda::TuneKey::aux_n, and quda::LatticeField::VolString().
|
private |
Definition at line 12 of file copy_gauge_helper.cuh.
|
private |
Definition at line 16 of file copy_gauge_helper.cuh.
|
private |
Definition at line 15 of file copy_gauge_helper.cuh.
|
private |
Definition at line 14 of file copy_gauge_helper.cuh.
|
private |
Definition at line 13 of file copy_gauge_helper.cuh.