QUDA
1.0.0
|
Public Member Functions | |
Pack (void *ghost[], const ColorSpinorField &in, MemoryLocation location, int nFace, bool dagger, int parity, double a, double b, double c) | |
virtual | ~Pack () |
template<typename T , typename Arg > | |
void | launch (T *f, const TuneParam &tp, Arg &arg, const cudaStream_t &stream) |
void | apply (const cudaStream_t &stream) |
bool | tuneSharedBytes () const |
void | initTuneParam (TuneParam ¶m) const |
void | defaultTuneParam (TuneParam ¶m) const |
TuneKey | tuneKey () const |
int | tuningIter () const |
long long | flops () const |
long long | bytes () const |
Protected Member Functions | |
bool | tuneGridDim () const |
unsigned int | maxGridSize () const |
unsigned int | minGridSize () const |
int | gridStep () const |
gridStep sets the step size when iterating the grid size in advanceGridDim. More... | |
bool | tuneAuxDim () const |
unsigned int | minThreads () const |
void | fillAux () |
Protected Attributes | |
void ** | ghost |
const ColorSpinorField & | in |
MemoryLocation | location |
const int | nFace |
const bool | dagger |
const int | parity |
const int | nParity |
int | threads |
const double | a |
const double | b |
const double | c |
int | twist |
Additional Inherited Members | |
![]() | |
TunableVectorYZ (unsigned int vector_length_y, unsigned int vector_length_z) | |
bool | advanceBlockDim (TuneParam ¶m) const |
void | resizeVector (int y, int z) const |
void | resizeStep (int y, int z) const |
![]() | |
TunableVectorY (unsigned int vector_length_y) | |
void | resizeVector (int y) const |
void | resizeStep (int y) const |
virtual unsigned int | sharedBytesPerThread () const |
virtual unsigned int | sharedBytesPerBlock (const TuneParam ¶m) const |
![]() | |
Tunable () | |
virtual | ~Tunable () |
virtual void | preTune () |
virtual void | postTune () |
virtual std::string | paramString (const TuneParam ¶m) const |
virtual std::string | perfString (float time) const |
virtual bool | advanceTuneParam (TuneParam ¶m) const |
void | checkLaunchParam (TuneParam ¶m) |
CUresult | jitifyError () const |
CUresult & | jitifyError () |
virtual bool | advanceGridDim (TuneParam ¶m) const |
virtual unsigned int | maxBlockSize (const TuneParam ¶m) const |
virtual int | blockStep () const |
virtual int | blockMin () const |
virtual void | resetBlockDim (TuneParam ¶m) const |
unsigned int | maxBlocksPerSM () const |
For some reason this can't be queried from the device properties, so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability) More... | |
template<typename F > | |
void | setMaxDynamicSharedBytesPerBlock (F *func) const |
Enable the maximum dynamic shared bytes for the kernel "func" (values given by maxDynamicSharedBytesPerBlock()). More... | |
unsigned int | maxDynamicSharedBytesPerBlock () const |
This can't be correctly queried in CUDA for all architectures so here we set set this. Based on Table 14 of the CUDA Programming Guide 10.0 (Technical Specifications per Compute Capability). More... | |
virtual unsigned int | maxSharedBytesPerBlock () const |
The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock. More... | |
virtual bool | advanceSharedBytes (TuneParam ¶m) const |
virtual bool | advanceAux (TuneParam ¶m) const |
int | writeAuxString (const char *format,...) |
![]() | |
unsigned int | vector_length_y |
unsigned int | step_y |
bool | tune_block_x |
![]() | |
char | aux [TuneKey::aux_n] |
CUresult | jitify_error |
Definition at line 45 of file dslash_pack2.cu.
|
inline |
Definition at line 164 of file dslash_pack2.cu.
References quda::commDim, quda::Pack< Float, nColor, spin_project >::fillAux(), quda::ColorSpinorField::getDslashConstant(), quda::getKernelPackT(), and quda::DslashConstant::ghostFaceCB.
|
inlinevirtual |
Definition at line 189 of file dslash_pack2.cu.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 202 of file dslash_pack2.cu.
References quda::arg(), quda::TuneParam::aux, errorQuda, getTuning(), getVerbosity(), quda::TuneParam::grid, quda::Host, quda::Pack< Float, nColor, spin_project >::launch(), quda::ColorSpinorField::Nspin(), quda::ColorSpinorField::PCType(), QUDA_4D_PC, QUDA_5D_PC, and quda::tuneLaunch().
Referenced by quda::PackGhost().
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 331 of file dslash_pack2.cu.
References quda::ColorSpinorField::getDslashConstant(), quda::DslashConstant::Ls, nColor, quda::ColorSpinorField::Nspin(), QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and quda::Pack< Float, nColor, spin_project >::threads.
|
inlinevirtual |
sets default values for when tuning is disabled
Reimplemented from quda::TunableVectorYZ.
Definition at line 309 of file dslash_pack2.cu.
References quda::TunableVectorYZ::defaultTuneParam(), quda::TuneParam::grid, quda::Host, quda::Tunable::maxDynamicSharedBytesPerBlock(), quda::Pack< Float, nColor, spin_project >::minGridSize(), and quda::TuneParam::shared_bytes.
|
inlineprotected |
Definition at line 125 of file dslash_pack2.cu.
References quda::Tunable::aux, quda::LatticeField::AuxString(), comm_dim_topology_string(), comm_peer2peer_enabled_global(), quda::commDim, quda::Device, errorQuda, quda::getKernelPackT(), quda::Host, quda::ColorSpinorField::Nspin(), quda::ColorSpinorField::PCType(), QUDA_5D_PC, and quda::Remote.
Referenced by quda::Pack< Float, nColor, spin_project >::Pack().
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 325 of file dslash_pack2.cu.
References quda::ColorSpinorField::getDslashConstant(), quda::DslashConstant::Ls, nColor, quda::ColorSpinorField::Nspin(), and quda::Pack< Float, nColor, spin_project >::threads.
|
inlineprotectedvirtual |
gridStep sets the step size when iterating the grid size in advanceGridDim.
Reimplemented from quda::Tunable.
Definition at line 104 of file dslash_pack2.cu.
References quda::commDim, quda::Tunable::gridStep(), quda::Host, and quda::ColorSpinorField::Ndim().
|
inlinevirtual |
Reimplemented from quda::TunableVectorYZ.
Definition at line 297 of file dslash_pack2.cu.
References quda::TuneParam::grid, quda::Host, quda::TunableVectorYZ::initTuneParam(), quda::Tunable::maxDynamicSharedBytesPerBlock(), quda::Pack< Float, nColor, spin_project >::minGridSize(), and quda::TuneParam::shared_bytes.
|
inline |
Definition at line 192 of file dslash_pack2.cu.
References quda::TuneParam::block, deviceProp, quda::TuneParam::grid, quda::qudaLaunchKernel(), quda::Tunable::setMaxDynamicSharedBytesPerBlock(), and quda::TuneParam::shared_bytes.
Referenced by quda::Pack< Float, nColor, spin_project >::apply().
|
inlineprotectedvirtual |
Reimplemented from quda::Tunable.
Definition at line 64 of file dslash_pack2.cu.
References quda::commDim, quda::Host, quda::Tunable::maxGridSize(), and quda::ColorSpinorField::Ndim().
|
inlineprotectedvirtual |
Reimplemented from quda::Tunable.
Definition at line 84 of file dslash_pack2.cu.
References quda::commDim, quda::Host, quda::Tunable::minGridSize(), and quda::ColorSpinorField::Ndim().
Referenced by quda::Pack< Float, nColor, spin_project >::defaultTuneParam(), and quda::Pack< Float, nColor, spin_project >::initTuneParam().
|
inlineprotectedvirtual |
Reimplemented from quda::Tunable.
Definition at line 123 of file dslash_pack2.cu.
References quda::Pack< Float, nColor, spin_project >::threads.
|
inlineprotectedvirtual |
Reimplemented from quda::Tunable.
Definition at line 122 of file dslash_pack2.cu.
|
inlineprotectedvirtual |
Reimplemented from quda::Tunable.
Definition at line 62 of file dslash_pack2.cu.
|
inlinevirtual |
Implements quda::Tunable.
Definition at line 321 of file dslash_pack2.cu.
References quda::Tunable::aux, and quda::LatticeField::VolString().
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 289 of file dslash_pack2.cu.
References quda::Tunable::maxDynamicSharedBytesPerBlock(), and quda::Tunable::maxSharedBytesPerBlock().
|
inlinevirtual |
Reimplemented from quda::Tunable.
Definition at line 323 of file dslash_pack2.cu.
|
protected |
Definition at line 57 of file dslash_pack2.cu.
Referenced by quda::PackGhost().
|
protected |
Definition at line 58 of file dslash_pack2.cu.
Referenced by quda::PackGhost().
|
protected |
Definition at line 59 of file dslash_pack2.cu.
Referenced by quda::PackGhost().
|
protected |
Definition at line 53 of file dslash_pack2.cu.
Referenced by quda::PackGhost().
|
protected |
Definition at line 49 of file dslash_pack2.cu.
Referenced by quda::PackGhost().
|
protected |
Definition at line 50 of file dslash_pack2.cu.
Referenced by quda::PackGhost().
|
protected |
Definition at line 51 of file dslash_pack2.cu.
Referenced by quda::PackGhost().
|
protected |
Definition at line 52 of file dslash_pack2.cu.
Referenced by quda::PackGhost().
|
protected |
Definition at line 55 of file dslash_pack2.cu.
|
protected |
Definition at line 54 of file dslash_pack2.cu.
Referenced by quda::PackGhost().
|
protected |
Definition at line 56 of file dslash_pack2.cu.
Referenced by quda::Pack< Float, nColor, spin_project >::bytes(), quda::Pack< Float, nColor, spin_project >::flops(), and quda::Pack< Float, nColor, spin_project >::minThreads().
|
protected |
Definition at line 60 of file dslash_pack2.cu.