QUDA
1.0.0
|
Public Member Functions | |
Staggered (Arg &arg, const ColorSpinorField &out, const ColorSpinorField &in) | |
virtual | ~Staggered () |
void | apply (const cudaStream_t &stream) |
long long | flops () const |
long long | bytes () const |
TuneKey | tuneKey () const |
Staggered (Arg &arg, const ColorSpinorField &out, const ColorSpinorField &in) | |
virtual | ~Staggered () |
void | apply (const cudaStream_t &stream) |
TuneKey | tuneKey () const |
![]() | |
template<typename T , typename Arg > | |
void | launch (T *f, const TuneParam &tp, Arg &arg, const cudaStream_t &stream) |
template<template< typename, int, int, int, bool, bool, KernelType, typename > class Launch, int nDim, int nColor, int nParity, bool dagger, bool xpay, typename Arg > | |
void | instantiate (TuneParam &tp, Arg &arg, const cudaStream_t &stream) |
This instantiate function is used to instantiate the the KernelType template required for the multi-GPU dslash kernels. More... | |
template<template< typename, int, int, int, bool, bool, KernelType, typename > class Launch, int nDim, int nColor, int nParity, bool xpay, typename Arg > | |
void | instantiate (TuneParam &tp, Arg &arg, const cudaStream_t &stream) |
This instantiate function is used to instantiate the the dagger template. More... | |
template<template< typename, int, int, int, bool, bool, KernelType, typename > class Launch, int nDim, int nColor, bool xpay, typename Arg > | |
void | instantiate (TuneParam &tp, Arg &arg, const cudaStream_t &stream) |
This instantiate function is used to instantiate the the nParity template. More... | |
template<template< typename, int, int, int, bool, bool, KernelType, typename > class Launch, int nDim, int nColor, typename Arg > | |
void | instantiate (TuneParam &tp, Arg &arg, const cudaStream_t &stream) |
This instantiate function is used to instantiate the the xpay template. More... | |
Dslash (DslashArg< Float > &arg, const ColorSpinorField &out, const ColorSpinorField &in, const char *src) | |
int | Nface () const |
int | Dagger () const |
const char * | getAux (KernelType type) const |
void | setAux (KernelType type, const char *aux_) |
void | augmentAux (KernelType type, const char *extra) |
virtual void | preTune () |
Save the output field since the output field is both read from and written to in the exterior kernels. More... | |
virtual void | postTune () |
Restore the output field if doing exterior kernel. More... | |
![]() | |
TunableVectorYZ (unsigned int vector_length_y, unsigned int vector_length_z) | |
bool | advanceBlockDim (TuneParam ¶m) const |
void | initTuneParam (TuneParam ¶m) const |
void | defaultTuneParam (TuneParam ¶m) const |
void | resizeVector (int y, int z) const |
void | resizeStep (int y, int z) const |
![]() | |
TunableVectorY (unsigned int vector_length_y) | |
void | resizeVector (int y) const |
void | resizeStep (int y) const |
![]() | |
Tunable () | |
virtual | ~Tunable () |
virtual std::string | paramString (const TuneParam ¶m) const |
virtual std::string | perfString (float time) const |
virtual bool | advanceTuneParam (TuneParam ¶m) const |
void | checkLaunchParam (TuneParam ¶m) |
CUresult | jitifyError () const |
CUresult & | jitifyError () |
Protected Attributes | |
Arg & | arg |
const ColorSpinorField & | in |
![]() | |
DslashArg< Float > & | arg |
const ColorSpinorField & | out |
const ColorSpinorField & | in |
const int | nDimComms |
char | aux_base [TuneKey::aux_n - 32] |
char | aux [8][TuneKey::aux_n] |
![]() | |
unsigned int | vector_length_y |
unsigned int | step_y |
bool | tune_block_x |
![]() | |
char | aux [TuneKey::aux_n] |
CUresult | jitify_error |
Additional Inherited Members | |
![]() | |
DslashArg< Float > & | dslashParam |
![]() | |
void | fillAuxBase () |
Set the base strings used by the different dslash kernel types for autotuning. More... | |
void | fillAux (KernelType kernel_type, const char *kernel_str) |
Specialize the auxiliary strings for each kernel type. More... | |
bool | tuneGridDim () const |
unsigned int | minThreads () const |
template<typename Arg > | |
void | setParam (Arg &arg) |
virtual int | tuningIter () const |
int | blockStep () const |
int | blockMin () const |
unsigned int | maxSharedBytesPerBlock () const |
The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock. More... | |
![]() | |
virtual unsigned int | sharedBytesPerThread () const |
virtual unsigned int | sharedBytesPerBlock (const TuneParam ¶m) const |
![]() | |
virtual bool | tuneAuxDim () const |
virtual bool | tuneSharedBytes () const |
virtual bool | advanceGridDim (TuneParam ¶m) const |
virtual unsigned int | maxBlockSize (const TuneParam ¶m) const |
virtual unsigned int | maxGridSize () const |
virtual unsigned int | minGridSize () const |
virtual int | gridStep () const |
gridStep sets the step size when iterating the grid size in advanceGridDim. More... | |
virtual void | resetBlockDim (TuneParam ¶m) const |
unsigned int | maxBlocksPerSM () const |
For some reason this can't be queried from the device properties, so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability) More... | |
template<typename F > | |
void | setMaxDynamicSharedBytesPerBlock (F *func) const |
Enable the maximum dynamic shared bytes for the kernel "func" (values given by maxDynamicSharedBytesPerBlock()). More... | |
unsigned int | maxDynamicSharedBytesPerBlock () const |
This can't be correctly queried in CUDA for all architectures so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability). More... | |
virtual bool | advanceSharedBytes (TuneParam ¶m) const |
virtual bool | advanceAux (TuneParam ¶m) const |
int | writeAuxString (const char *format,...) |
Definition at line 31 of file dslash_improved_staggered.cu.
|
inline |
Definition at line 39 of file dslash_improved_staggered.cu.
|
inlinevirtual |
Definition at line 46 of file dslash_improved_staggered.cu.
|
inline |
Definition at line 39 of file dslash_staggered.cu.
|
inlinevirtual |
Definition at line 46 of file dslash_staggered.cu.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 48 of file dslash_improved_staggered.cu.
References quda::arg(), errorQuda, getTuning(), getVerbosity(), quda::LatticeField::Location(), QUDA_CPU_FIELD_LOCATION, quda::Dslash< Float >::setParam(), quda::stream, and quda::tuneLaunch().
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 48 of file dslash_staggered.cu.
References quda::arg(), errorQuda, getTuning(), getVerbosity(), quda::LatticeField::Location(), QUDA_CPU_FIELD_LOCATION, quda::Dslash< Float >::setParam(), quda::stream, and quda::tuneLaunch().
|
inlinevirtual |
Reimplemented from quda::Dslash< Float >.
Definition at line 110 of file dslash_improved_staggered.cu.
References quda::EXTERIOR_KERNEL_ALL, quda::EXTERIOR_KERNEL_T, quda::EXTERIOR_KERNEL_X, quda::EXTERIOR_KERNEL_Y, quda::EXTERIOR_KERNEL_Z, quda::ColorSpinorField::GhostFace(), quda::INTERIOR_KERNEL, quda::KERNEL_POLICY, quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), quda::LatticeField::Precision(), QUDA_RECONSTRUCT_NO, and quda::ColorSpinorField::Volume().
|
inlinevirtual |
Reimplemented from quda::Dslash< Float >.
Definition at line 70 of file dslash_improved_staggered.cu.
References quda::EXTERIOR_KERNEL_ALL, quda::EXTERIOR_KERNEL_T, quda::EXTERIOR_KERNEL_X, quda::EXTERIOR_KERNEL_Y, quda::EXTERIOR_KERNEL_Z, quda::ColorSpinorField::GhostFace(), quda::INTERIOR_KERNEL, quda::KERNEL_POLICY, quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), and quda::ColorSpinorField::Volume().
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 59 of file dslash_staggered.cu.
References quda::LatticeField::VolString().
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 154 of file dslash_improved_staggered.cu.
References quda::LatticeField::VolString().
|
protected |
Definition at line 35 of file dslash_improved_staggered.cu.
|
protected |
Definition at line 36 of file dslash_improved_staggered.cu.