QUDA
v1.1.0
A library for QCD on GPUs
|
Public Member Functions | |
QudaMem (void *dst, const void *src, size_t count, cudaMemcpyKind kind, const cudaStream_t &stream, bool async, const char *func, const char *file, const char *line) | |
QudaMem (void *dst, int value, size_t count, const cudaStream_t &stream, bool async, const char *func, const char *file, const char *line) | |
void | apply (const qudaStream_t &stream) |
bool | advanceTuneParam (TuneParam ¶m) const |
TuneKey | tuneKey () const |
long long | flops () const |
long long | bytes () const |
Public Member Functions inherited from quda::Tunable | |
Tunable () | |
virtual | ~Tunable () |
virtual void | preTune () |
virtual void | postTune () |
virtual int | tuningIter () const |
virtual std::string | paramString (const TuneParam ¶m) const |
virtual std::string | perfString (float time) const |
virtual void | initTuneParam (TuneParam ¶m) const |
virtual void | defaultTuneParam (TuneParam ¶m) const |
void | checkLaunchParam (TuneParam ¶m) |
CUresult | jitifyError () const |
CUresult & | jitifyError () |
Additional Inherited Members | |
Protected Member Functions inherited from quda::Tunable | |
virtual unsigned int | minThreads () const |
virtual bool | tuneGridDim () const |
virtual bool | tuneAuxDim () const |
virtual bool | tuneSharedBytes () const |
virtual bool | advanceGridDim (TuneParam ¶m) const |
virtual unsigned int | maxBlockSize (const TuneParam ¶m) const |
virtual unsigned int | maxGridSize () const |
virtual unsigned int | minGridSize () const |
virtual int | gridStep () const |
gridStep sets the step size when iterating the grid size in advanceGridDim. More... | |
virtual int | blockStep () const |
virtual int | blockMin () const |
virtual void | resetBlockDim (TuneParam ¶m) const |
virtual bool | advanceBlockDim (TuneParam ¶m) const |
unsigned int | maxBlocksPerSM () const |
Returns the maximum number of simultaneously resident blocks per SM. We can directly query this of CUDA 11, but previously this needed to be hand coded. More... | |
unsigned int | maxDynamicSharedBytesPerBlock () const |
Returns the maximum dynamic shared memory per block. More... | |
virtual unsigned int | maxSharedBytesPerBlock () const |
The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock. More... | |
virtual bool | advanceSharedBytes (TuneParam ¶m) const |
virtual bool | advanceAux (TuneParam ¶m) const |
int | writeAuxString (const char *format,...) |
bool | tuned () |
Whether the present instance has already been tuned or not. More... | |
Protected Attributes inherited from quda::Tunable | |
char | aux [TuneKey::aux_n] |
CUresult | jitify_error |
Definition at line 79 of file quda_api.cpp.
|
inline |
Definition at line 95 of file quda_api.cpp.
|
inline |
Definition at line 134 of file quda_api.cpp.
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 218 of file quda_api.cpp.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 155 of file quda_api.cpp.
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 229 of file quda_api.cpp.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 228 of file quda_api.cpp.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 220 of file quda_api.cpp.