QUDA  v1.1.0
A library for QCD on GPUs
Public Member Functions | Public Attributes | Protected Member Functions | Protected Attributes | List of all members
quda::Dslash< D, Arg > Class Template Reference

This is the generic driver for launching Dslash kernels (the base kernel of which is defined in dslash_helper.cuh). This is templated on the a template template parameter which is the underlying operator wrapped in a class,. More...

#include <dslash.h>

+ Inheritance diagram for quda::Dslash< D, Arg >:

Public Member Functions

template<template< bool, QudaPCType, typename > class P, int nParity, bool dagger, bool xpay>
void instantiate (TuneParam &tp, const qudaStream_t &stream)
 This instantiate function is used to instantiate the the KernelType template required for the multi-GPU dslash kernels. More...
 
template<template< bool, QudaPCType, typename > class P, int nParity, bool xpay>
void instantiate (TuneParam &tp, const qudaStream_t &stream)
 This instantiate function is used to instantiate the the dagger template. More...
 
template<template< bool, QudaPCType, typename > class P, bool xpay>
void instantiate (TuneParam &tp, const qudaStream_t &stream)
 This instantiate function is used to instantiate the the nParity template. More...
 
template<template< bool, QudaPCType, typename > class P>
void instantiate (TuneParam &tp, const qudaStream_t &stream)
 This instantiate function is used to instantiate the the xpay template. More...
 
 Dslash (Arg &arg, const ColorSpinorField &out, const ColorSpinorField &in)
 
void setShmem (int shmem)
 
void setPack (bool pack, MemoryLocation location)
 
int Nface () const
 
int Dagger () const
 
const char * getAux (KernelType type) const
 
void setAux (KernelType type, const char *aux_)
 
void augmentAux (KernelType type, const char *extra)
 
virtual TuneKey tuneKey () const
 
virtual void preTune ()
 Save the output field since the output field is both read from and written to in the exterior kernels. More...
 
virtual void postTune ()
 Restore the output field if doing exterior kernel. More...
 
virtual long long flops () const
 
virtual long long bytes () const
 
- Public Member Functions inherited from quda::TunableVectorYZ
 TunableVectorYZ (unsigned int vector_length_y, unsigned int vector_length_z)
 
bool advanceBlockDim (TuneParam &param) const
 
void initTuneParam (TuneParam &param) const
 
void defaultTuneParam (TuneParam &param) const
 
void resizeVector (int y, int z) const
 
void resizeStep (int y, int z) const
 
- Public Member Functions inherited from quda::TunableVectorY
 TunableVectorY (unsigned int vector_length_y)
 
void resizeVector (int y) const
 
void resizeStep (int y) const
 
- Public Member Functions inherited from quda::Tunable
 Tunable ()
 
virtual ~Tunable ()
 
virtual void apply (const qudaStream_t &stream)=0
 
virtual std::string paramString (const TuneParam &param) const
 
virtual std::string perfString (float time) const
 
void checkLaunchParam (TuneParam &param)
 
CUresult jitifyError () const
 
CUresult & jitifyError ()
 

Public Attributes

Arg & dslashParam
 

Protected Member Functions

void fillAuxBase ()
 Set the base strings used by the different dslash kernel types for autotuning. More...
 
void fillAux (KernelType kernel_type, const char *kernel_str)
 Specialize the auxiliary strings for each kernel type. More...
 
virtual bool tuneGridDim () const
 
virtual unsigned int minThreads () const
 
virtual unsigned int minGridSize () const
 
virtual int gridStep () const
 gridStep sets the step size when iterating the grid size in advanceGridDim. More...
 
void setParam (TuneParam &tp)
 
virtual int tuningIter () const
 
virtual int blockStep () const
 
virtual int blockMin () const
 
unsigned int maxSharedBytesPerBlock () const
 The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock. More...
 
virtual bool advanceAux (TuneParam &param) const
 
virtual bool advanceTuneParam (TuneParam &param) const
 
virtual void initTuneParam (TuneParam &param) const
 
virtual void defaultTuneParam (TuneParam &param) const
 
template<template< bool, QudaPCType, typename > class P, int nParity, bool dagger, bool xpay, KernelType kernel_type>
void launch (TuneParam &tp, const qudaStream_t &stream)
 This is a helper class that is used to instantiate the correct templated kernel for the dslash. This can be used for all dslash types, though in some cases we specialize to reduce compilation time. More...
 
- Protected Member Functions inherited from quda::TunableVectorY
virtual unsigned int sharedBytesPerThread () const
 
virtual unsigned int sharedBytesPerBlock (const TuneParam &param) const
 
- Protected Member Functions inherited from quda::Tunable
virtual bool tuneAuxDim () const
 
virtual bool tuneSharedBytes () const
 
virtual bool advanceGridDim (TuneParam &param) const
 
virtual unsigned int maxBlockSize (const TuneParam &param) const
 
virtual unsigned int maxGridSize () const
 
virtual void resetBlockDim (TuneParam &param) const
 
unsigned int maxBlocksPerSM () const
 Returns the maximum number of simultaneously resident blocks per SM. We can directly query this of CUDA 11, but previously this needed to be hand coded. More...
 
unsigned int maxDynamicSharedBytesPerBlock () const
 Returns the maximum dynamic shared memory per block. More...
 
virtual bool advanceSharedBytes (TuneParam &param) const
 
int writeAuxString (const char *format,...)
 
bool tuned ()
 Whether the present instance has already been tuned or not. More...
 

Protected Attributes

Arg & arg
 
const ColorSpinorFieldout
 
const ColorSpinorFieldin
 
const int nDimComms
 
char aux_base [TuneKey::aux_n - 32]
 
char aux [8][TuneKey::aux_n]
 
char aux_pack [TuneKey::aux_n]
 
char aux_barrier [TuneKey::aux_n]
 
void * packBuffer [4 *QUDA_MAX_DIM]
 
std::string kernel_file
 
- Protected Attributes inherited from quda::TunableVectorYZ
unsigned vector_length_z
 
unsigned step_z
 
bool tune_block_y
 
- Protected Attributes inherited from quda::TunableVectorY
unsigned int vector_length_y
 
unsigned int step_y
 
bool tune_block_x
 
- Protected Attributes inherited from quda::Tunable
char aux [TuneKey::aux_n]
 
CUresult jitify_error
 

Detailed Description

template<template< int, bool, bool, KernelType, typename > class D, typename Arg>
class quda::Dslash< D, Arg >

This is the generic driver for launching Dslash kernels (the base kernel of which is defined in dslash_helper.cuh). This is templated on the a template template parameter which is the underlying operator wrapped in a class,.

Template Parameters
DA class that defines the linear operator we wish to apply. This class should define an operator() method that is used to apply the operator by the dslash kernel. See the wilson class in the file kernels/dslash_wilson.cuh as an exmaple.
ArgThe argument struct that is used to parameterize the kernel. For the wilson class example above, the WilsonArg class defined in the same file is the corresponding argument class.

Definition at line 32 of file dslash.h.

Constructor & Destructor Documentation

◆ Dslash()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
quda::Dslash< D, Arg >::Dslash ( Arg &  arg,
const ColorSpinorField out,
const ColorSpinorField in 
)
inline

Definition at line 379 of file dslash.h.

Member Function Documentation

◆ advanceAux()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
virtual bool quda::Dslash< D, Arg >::advanceAux ( TuneParam param) const
inlineprotectedvirtual

Reimplemented from quda::Tunable.

Definition at line 166 of file dslash.h.

◆ advanceTuneParam()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
virtual bool quda::Dslash< D, Arg >::advanceTuneParam ( TuneParam param) const
inlineprotectedvirtual

Reimplemented from quda::Tunable.

Definition at line 206 of file dslash.h.

◆ augmentAux()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
void quda::Dslash< D, Arg >::augmentAux ( KernelType  type,
const char *  extra 
)
inline

Definition at line 490 of file dslash.h.

◆ blockMin()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
virtual int quda::Dslash< D, Arg >::blockMin ( ) const
inlineprotectedvirtual

Reimplemented from quda::Tunable.

Definition at line 162 of file dslash.h.

◆ blockStep()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
virtual int quda::Dslash< D, Arg >::blockStep ( ) const
inlineprotectedvirtual

Reimplemented from quda::Tunable.

Definition at line 161 of file dslash.h.

◆ bytes()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
virtual long long quda::Dslash< D, Arg >::bytes ( ) const
inlinevirtual

Reimplemented from quda::Tunable.

Definition at line 586 of file dslash.h.

◆ Dagger()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
int quda::Dslash< D, Arg >::Dagger ( ) const
inline

Definition at line 484 of file dslash.h.

◆ defaultTuneParam()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
virtual void quda::Dslash< D, Arg >::defaultTuneParam ( TuneParam param) const
inlineprotectedvirtual

sets default values for when tuning is disabled

Reimplemented from quda::Tunable.

Definition at line 226 of file dslash.h.

◆ fillAux()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
void quda::Dslash< D, Arg >::fillAux ( KernelType  kernel_type,
const char *  kernel_str 
)
inlineprotected

Specialize the auxiliary strings for each kernel type.

Parameters
[in]kernel_typeThe kernel_type we are generating the string got
[in]kernel_strString corresponding to the kernel type

Definition at line 75 of file dslash.h.

◆ fillAuxBase()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
void quda::Dslash< D, Arg >::fillAuxBase ( )
inlineprotected

Set the base strings used by the different dslash kernel types for autotuning.

Definition at line 55 of file dslash.h.

◆ flops()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
virtual long long quda::Dslash< D, Arg >::flops ( ) const
inlinevirtual

Implements quda::Tunable.

Definition at line 535 of file dslash.h.

◆ getAux()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
const char* quda::Dslash< D, Arg >::getAux ( KernelType  type) const
inline

Definition at line 486 of file dslash.h.

◆ gridStep()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
virtual int quda::Dslash< D, Arg >::gridStep ( ) const
inlineprotectedvirtual

gridStep sets the step size when iterating the grid size in advanceGridDim.

Returns
Grid step size

Reimplemented from quda::Tunable.

Definition at line 100 of file dslash.h.

◆ initTuneParam()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
virtual void quda::Dslash< D, Arg >::initTuneParam ( TuneParam param) const
inlineprotectedvirtual

Reimplemented from quda::Tunable.

Definition at line 211 of file dslash.h.

◆ instantiate() [1/4]

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
template<template< bool, QudaPCType, typename > class P, int nParity, bool dagger, bool xpay>
void quda::Dslash< D, Arg >::instantiate ( TuneParam tp,
const qudaStream_t stream 
)
inline

This instantiate function is used to instantiate the the KernelType template required for the multi-GPU dslash kernels.

Parameters
[in]tpThe tuning parameters to use for this kernel
[in]streamThe qudaStream_t where the kernel will run

Definition at line 291 of file dslash.h.

◆ instantiate() [2/4]

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
template<template< bool, QudaPCType, typename > class P, int nParity, bool xpay>
void quda::Dslash< D, Arg >::instantiate ( TuneParam tp,
const qudaStream_t stream 
)
inline

This instantiate function is used to instantiate the the dagger template.

Parameters
[in]tpThe tuning parameters to use for this kernel
[in]streamThe qudaStream_t where the kernel will run

Definition at line 326 of file dslash.h.

◆ instantiate() [3/4]

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
template<template< bool, QudaPCType, typename > class P, bool xpay>
void quda::Dslash< D, Arg >::instantiate ( TuneParam tp,
const qudaStream_t stream 
)
inline

This instantiate function is used to instantiate the the nParity template.

Parameters
[in]tpThe tuning parameters to use for this kernel
[in]streamThe qudaStream_t where the kernel will run

Definition at line 345 of file dslash.h.

◆ instantiate() [4/4]

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
template<template< bool, QudaPCType, typename > class P>
void quda::Dslash< D, Arg >::instantiate ( TuneParam tp,
const qudaStream_t stream 
)
inline

This instantiate function is used to instantiate the the xpay template.

Parameters
[in]tpThe tuning parameters to use for this kernel
[in]streamThe qudaStream_t where the kernel will run

Definition at line 365 of file dslash.h.

◆ launch()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
template<template< bool, QudaPCType, typename > class P, int nParity, bool dagger, bool xpay, KernelType kernel_type>
void quda::Dslash< D, Arg >::launch ( TuneParam tp,
const qudaStream_t stream 
)
inlineprotected

This is a helper class that is used to instantiate the correct templated kernel for the dslash. This can be used for all dslash types, though in some cases we specialize to reduce compilation time.

Definition at line 248 of file dslash.h.

◆ maxSharedBytesPerBlock()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
unsigned int quda::Dslash< D, Arg >::maxSharedBytesPerBlock ( ) const
inlineprotectedvirtual

The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock.

Returns
The maximum shared bytes limit per block the autotung will utilize.

Reimplemented from quda::Tunable.

Definition at line 164 of file dslash.h.

◆ minGridSize()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
virtual unsigned int quda::Dslash< D, Arg >::minGridSize ( ) const
inlineprotectedvirtual

Reimplemented from quda::Tunable.

Definition at line 85 of file dslash.h.

◆ minThreads()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
virtual unsigned int quda::Dslash< D, Arg >::minThreads ( ) const
inlineprotectedvirtual

Reimplemented from quda::Tunable.

Definition at line 83 of file dslash.h.

◆ Nface()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
int quda::Dslash< D, Arg >::Nface ( ) const
inline

Definition at line 480 of file dslash.h.

◆ postTune()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
virtual void quda::Dslash< D, Arg >::postTune ( )
inlinevirtual

Restore the output field if doing exterior kernel.

Reimplemented from quda::Tunable.

Definition at line 513 of file dslash.h.

◆ preTune()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
virtual void quda::Dslash< D, Arg >::preTune ( )
inlinevirtual

Save the output field since the output field is both read from and written to in the exterior kernels.

Reimplemented from quda::Tunable.

Definition at line 504 of file dslash.h.

◆ setAux()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
void quda::Dslash< D, Arg >::setAux ( KernelType  type,
const char *  aux_ 
)
inline

Definition at line 488 of file dslash.h.

◆ setPack()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
void quda::Dslash< D, Arg >::setPack ( bool  pack,
MemoryLocation  location 
)
inline

Definition at line 430 of file dslash.h.

◆ setParam()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
void quda::Dslash< D, Arg >::setParam ( TuneParam tp)
inlineprotected

Definition at line 112 of file dslash.h.

◆ setShmem()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
void quda::Dslash< D, Arg >::setShmem ( int  shmem)
inline

Definition at line 422 of file dslash.h.

◆ tuneGridDim()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
virtual bool quda::Dslash< D, Arg >::tuneGridDim ( ) const
inlineprotectedvirtual

Reimplemented from quda::Tunable.

Definition at line 82 of file dslash.h.

◆ tuneKey()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
virtual TuneKey quda::Dslash< D, Arg >::tuneKey ( ) const
inlinevirtual

Implements quda::Tunable.

Definition at line 492 of file dslash.h.

◆ tuningIter()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
virtual int quda::Dslash< D, Arg >::tuningIter ( ) const
inlineprotectedvirtual

Reimplemented from quda::Tunable.

Definition at line 159 of file dslash.h.

Member Data Documentation

◆ arg

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
Arg& quda::Dslash< D, Arg >::arg
protected

Definition at line 36 of file dslash.h.

◆ aux

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
char quda::Dslash< D, Arg >::aux[8][TuneKey::aux_n]
protected

Definition at line 43 of file dslash.h.

◆ aux_barrier

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
char quda::Dslash< D, Arg >::aux_barrier[TuneKey::aux_n]
protected

Definition at line 45 of file dslash.h.

◆ aux_base

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
char quda::Dslash< D, Arg >::aux_base[TuneKey::aux_n - 32]
protected

Definition at line 42 of file dslash.h.

◆ aux_pack

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
char quda::Dslash< D, Arg >::aux_pack[TuneKey::aux_n]
protected

Definition at line 44 of file dslash.h.

◆ dslashParam

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
Arg& quda::Dslash< D, Arg >::dslashParam

Definition at line 377 of file dslash.h.

◆ in

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
const ColorSpinorField& quda::Dslash< D, Arg >::in
protected

Definition at line 38 of file dslash.h.

◆ kernel_file

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
std::string quda::Dslash< D, Arg >::kernel_file
protected

Definition at line 50 of file dslash.h.

◆ nDimComms

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
const int quda::Dslash< D, Arg >::nDimComms
protected

Definition at line 40 of file dslash.h.

◆ out

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
const ColorSpinorField& quda::Dslash< D, Arg >::out
protected

Definition at line 37 of file dslash.h.

◆ packBuffer

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >
void* quda::Dslash< D, Arg >::packBuffer[4 *QUDA_MAX_DIM]
protected

Definition at line 48 of file dslash.h.


The documentation for this class was generated from the following file: