QUDA
1.0.0
|
Public Member Functions | |
ExtractGhostEx (ExtractGhostExArg< Order, nDim, dim > &arg, bool extract, const GaugeField &meta, QudaFieldLocation location) | |
virtual | ~ExtractGhostEx () |
void | apply (const cudaStream_t &stream) |
TuneKey | tuneKey () const |
long long | flops () const |
long long | bytes () const |
Private Member Functions | |
unsigned int | sharedBytesPerThread () const |
unsigned int | sharedBytesPerBlock (const TuneParam ¶m) const |
bool | tuneGridDim () const |
unsigned int | minThreads () const |
![]() | |
Tunable () | |
virtual | ~Tunable () |
virtual void | preTune () |
virtual void | postTune () |
virtual int | tuningIter () const |
virtual std::string | paramString (const TuneParam ¶m) const |
virtual std::string | perfString (float time) const |
virtual void | initTuneParam (TuneParam ¶m) const |
virtual void | defaultTuneParam (TuneParam ¶m) const |
virtual bool | advanceTuneParam (TuneParam ¶m) const |
void | checkLaunchParam (TuneParam ¶m) |
CUresult | jitifyError () const |
CUresult & | jitifyError () |
virtual bool | tuneAuxDim () const |
virtual bool | tuneSharedBytes () const |
virtual bool | advanceGridDim (TuneParam ¶m) const |
virtual unsigned int | maxBlockSize (const TuneParam ¶m) const |
virtual unsigned int | maxGridSize () const |
virtual unsigned int | minGridSize () const |
virtual int | gridStep () const |
gridStep sets the step size when iterating the grid size in advanceGridDim. More... | |
virtual int | blockStep () const |
virtual int | blockMin () const |
virtual void | resetBlockDim (TuneParam ¶m) const |
virtual bool | advanceBlockDim (TuneParam ¶m) const |
unsigned int | maxBlocksPerSM () const |
For some reason this can't be queried from the device properties, so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability) More... | |
template<typename F > | |
void | setMaxDynamicSharedBytesPerBlock (F *func) const |
Enable the maximum dynamic shared bytes for the kernel "func" (values given by maxDynamicSharedBytesPerBlock()). More... | |
unsigned int | maxDynamicSharedBytesPerBlock () const |
This can't be correctly queried in CUDA for all architectures so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability). More... | |
virtual unsigned int | maxSharedBytesPerBlock () const |
The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock. More... | |
virtual bool | advanceSharedBytes (TuneParam ¶m) const |
virtual bool | advanceAux (TuneParam ¶m) const |
int | writeAuxString (const char *format,...) |
Private Attributes | |
ExtractGhostExArg< Order, nDim, dim > | arg |
int | size |
bool | extract |
const GaugeField & | meta |
QudaFieldLocation | location |
![]() | |
char | aux [TuneKey::aux_n] |
CUresult | jitify_error |
Definition at line 192 of file extract_gauge_ghost_extended.cu.
|
inline |
Definition at line 207 of file extract_gauge_ghost_extended.cu.
References quda::ExtractGhostExArg< Order, nDim, dim >::A0, quda::ExtractGhostExArg< Order, nDim, dim >::A1, quda::ExtractGhostExArg< Order, nDim, dim >::B0, quda::ExtractGhostExArg< Order, nDim, dim >::B1, quda::ExtractGhostExArg< Order, nDim, dim >::C0, quda::ExtractGhostExArg< Order, nDim, dim >::C1, quda::ExtractGhostExArg< Order, nDim, dim >::order, and quda::ExtractGhostExArg< Order, nDim, dim >::R.
|
inlinevirtual |
Definition at line 217 of file extract_gauge_ghost_extended.cu.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 219 of file extract_gauge_ghost_extended.cu.
References quda::arg(), quda::TuneParam::block, getTuning(), getVerbosity(), quda::TuneParam::grid, QUDA_CPU_FIELD_LOCATION, quda::TuneParam::shared_bytes, and quda::tuneLaunch().
Referenced by quda::extractGhostEx().
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 246 of file extract_gauge_ghost_extended.cu.
References quda::ExtractGhostExArg< Order, nDim, dim >::order.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 245 of file extract_gauge_ghost_extended.cu.
|
inlineprivatevirtual |
Reimplemented from quda::Tunable.
Definition at line 204 of file extract_gauge_ghost_extended.cu.
References quda::size.
|
inlineprivatevirtual |
Implements quda::Tunable.
Definition at line 201 of file extract_gauge_ghost_extended.cu.
|
inlineprivatevirtual |
Implements quda::Tunable.
Definition at line 200 of file extract_gauge_ghost_extended.cu.
|
inlineprivatevirtual |
Reimplemented from quda::Tunable.
Definition at line 203 of file extract_gauge_ghost_extended.cu.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 243 of file extract_gauge_ghost_extended.cu.
References quda::LatticeField::VolString().
|
private |
Definition at line 193 of file extract_gauge_ghost_extended.cu.
|
private |
Definition at line 195 of file extract_gauge_ghost_extended.cu.
|
private |
Definition at line 197 of file extract_gauge_ghost_extended.cu.
|
private |
Definition at line 196 of file extract_gauge_ghost_extended.cu.
|
private |
Definition at line 194 of file extract_gauge_ghost_extended.cu.