QUDA
1.0.0
|
Public Member Functions | |
QudaMemCopy (void *dst, const void *src, size_t count, cudaMemcpyKind kind, bool async, const char *func, const char *file, const char *line) | |
virtual | ~QudaMemCopy () |
void | apply (const cudaStream_t &stream) |
bool | advanceTuneParam (TuneParam ¶m) const |
TuneKey | tuneKey () const |
long long | flops () const |
long long | bytes () const |
![]() | |
Tunable () | |
virtual | ~Tunable () |
virtual void | preTune () |
virtual void | postTune () |
virtual int | tuningIter () const |
virtual std::string | paramString (const TuneParam ¶m) const |
virtual std::string | perfString (float time) const |
virtual void | initTuneParam (TuneParam ¶m) const |
virtual void | defaultTuneParam (TuneParam ¶m) const |
void | checkLaunchParam (TuneParam ¶m) |
CUresult | jitifyError () const |
CUresult & | jitifyError () |
Private Member Functions | |
unsigned int | sharedBytesPerThread () const |
unsigned int | sharedBytesPerBlock (const TuneParam ¶m) const |
Private Attributes | |
void * | dst |
const void * | src |
const size_t | count |
const cudaMemcpyKind | kind |
const bool | async |
const char * | name |
Additional Inherited Members | |
![]() | |
virtual unsigned int | minThreads () const |
virtual bool | tuneGridDim () const |
virtual bool | tuneAuxDim () const |
virtual bool | tuneSharedBytes () const |
virtual bool | advanceGridDim (TuneParam ¶m) const |
virtual unsigned int | maxBlockSize (const TuneParam ¶m) const |
virtual unsigned int | maxGridSize () const |
virtual unsigned int | minGridSize () const |
virtual int | gridStep () const |
gridStep sets the step size when iterating the grid size in advanceGridDim. More... | |
virtual int | blockStep () const |
virtual int | blockMin () const |
virtual void | resetBlockDim (TuneParam ¶m) const |
virtual bool | advanceBlockDim (TuneParam ¶m) const |
unsigned int | maxBlocksPerSM () const |
For some reason this can't be queried from the device properties, so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability) More... | |
template<typename F > | |
void | setMaxDynamicSharedBytesPerBlock (F *func) const |
Enable the maximum dynamic shared bytes for the kernel "func" (values given by maxDynamicSharedBytesPerBlock()). More... | |
unsigned int | maxDynamicSharedBytesPerBlock () const |
This can't be correctly queried in CUDA for all architectures so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability). More... | |
virtual unsigned int | maxSharedBytesPerBlock () const |
The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock. More... | |
virtual bool | advanceSharedBytes (TuneParam ¶m) const |
virtual bool | advanceAux (TuneParam ¶m) const |
int | writeAuxString (const char *format,...) |
![]() | |
char | aux [TuneKey::aux_n] |
CUresult | jitify_error |
Definition at line 29 of file quda_cuda_api.cpp.
|
inline |
Definition at line 42 of file quda_cuda_api.cpp.
References quda::Tunable::aux, and errorQuda.
|
inlinevirtual |
Definition at line 72 of file quda_cuda_api.cpp.
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 112 of file quda_cuda_api.cpp.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 74 of file quda_cuda_api.cpp.
References errorQuda, getTuning(), getVerbosity(), PROFILE, quda::QUDA_PROFILE_MEMCPY_D2D_ASYNC, quda::QUDA_PROFILE_MEMCPY_D2H_ASYNC, quda::QUDA_PROFILE_MEMCPY_H2D_ASYNC, and quda::tuneLaunch().
Referenced by quda::qudaMemcpy_(), and quda::qudaMemcpyAsync_().
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 122 of file quda_cuda_api.cpp.
References count.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 121 of file quda_cuda_api.cpp.
|
inlineprivatevirtual |
Implements quda::Tunable.
Definition at line 39 of file quda_cuda_api.cpp.
|
inlineprivatevirtual |
Implements quda::Tunable.
Definition at line 38 of file quda_cuda_api.cpp.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 114 of file quda_cuda_api.cpp.
References quda::Tunable::aux, and quda::u64toa().
|
private |
Definition at line 35 of file quda_cuda_api.cpp.
|
private |
Definition at line 33 of file quda_cuda_api.cpp.
Referenced by bytes().
|
private |
Definition at line 31 of file quda_cuda_api.cpp.
Referenced by quda::qudaMemcpy2DAsync_().
|
private |
Definition at line 34 of file quda_cuda_api.cpp.
|
private |
Definition at line 36 of file quda_cuda_api.cpp.
|
private |
Definition at line 32 of file quda_cuda_api.cpp.