This is the generic driver for launching Dslash kernels (the base kernel of which is defined in dslash_helper.cuh). This is templated on the a template template parameter which is the underlying operator wrapped in a class,. More...

#include <dslash.h>

Inheritance diagram for quda::Dslash< D, Arg >:

Public Member Functions
template<template< bool, QudaPCType, typename > class P, int nParity, bool dagger, bool xpay>
void	instantiate (TuneParam &tp, const qudaStream_t &stream)
	This instantiate function is used to instantiate the the KernelType template required for the multi-GPU dslash kernels. More...

template<template< bool, QudaPCType, typename > class P, int nParity, bool xpay>
void	instantiate (TuneParam &tp, const qudaStream_t &stream)
	This instantiate function is used to instantiate the the dagger template. More...

template<template< bool, QudaPCType, typename > class P, bool xpay>
void	instantiate (TuneParam &tp, const qudaStream_t &stream)
	This instantiate function is used to instantiate the the nParity template. More...

template<template< bool, QudaPCType, typename > class P>
void	instantiate (TuneParam &tp, const qudaStream_t &stream)
	This instantiate function is used to instantiate the the xpay template. More...

	Dslash (Arg &arg, const ColorSpinorField &out, const ColorSpinorField &in)

void	setShmem (int shmem)

void	setPack (bool pack, MemoryLocation location)

int	Nface () const

int	Dagger () const

const char *	getAux (KernelType type) const

void	setAux (KernelType type, const char *aux_)

void	augmentAux (KernelType type, const char *extra)

virtual TuneKey	tuneKey () const

virtual void	preTune ()
	Save the output field since the output field is both read from and written to in the exterior kernels. More...

virtual void	postTune ()
	Restore the output field if doing exterior kernel. More...

virtual long long	flops () const

virtual long long	bytes () const

Public Member Functions inherited from quda::TunableVectorYZ
	TunableVectorYZ (unsigned int vector_length_y, unsigned int vector_length_z)

bool	advanceBlockDim (TuneParam &param) const

void	initTuneParam (TuneParam &param) const

void	defaultTuneParam (TuneParam &param) const

void	resizeVector (int y, int z) const

void	resizeStep (int y, int z) const

Public Member Functions inherited from quda::TunableVectorY
	TunableVectorY (unsigned int vector_length_y)

void	resizeVector (int y) const

void	resizeStep (int y) const

Public Member Functions inherited from quda::Tunable
	Tunable ()

virtual	~Tunable ()

virtual void	apply (const qudaStream_t &stream)=0

virtual std::string	paramString (const TuneParam &param) const

virtual std::string	perfString (float time) const

void	checkLaunchParam (TuneParam &param)

CUresult	jitifyError () const

CUresult &	jitifyError ()

Public Attributes
Arg &	dslashParam

Protected Member Functions
void	fillAuxBase ()
	Set the base strings used by the different dslash kernel types for autotuning. More...

void	fillAux (KernelType kernel_type, const char *kernel_str)
	Specialize the auxiliary strings for each kernel type. More...

virtual bool	tuneGridDim () const

virtual unsigned int	minThreads () const

virtual unsigned int	minGridSize () const

virtual int	gridStep () const
	gridStep sets the step size when iterating the grid size in advanceGridDim. More...

void	setParam (TuneParam &tp)

virtual int	tuningIter () const

virtual int	blockStep () const

virtual int	blockMin () const

unsigned int	maxSharedBytesPerBlock () const
	The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock. More...

virtual bool	advanceAux (TuneParam &param) const

virtual bool	advanceTuneParam (TuneParam &param) const

virtual void	initTuneParam (TuneParam &param) const

virtual void	defaultTuneParam (TuneParam &param) const

template<template< bool, QudaPCType, typename > class P, int nParity, bool dagger, bool xpay, KernelType kernel_type>
void	launch (TuneParam &tp, const qudaStream_t &stream)
	This is a helper class that is used to instantiate the correct templated kernel for the dslash. This can be used for all dslash types, though in some cases we specialize to reduce compilation time. More...

Protected Member Functions inherited from quda::TunableVectorY
virtual unsigned int	sharedBytesPerThread () const

virtual unsigned int	sharedBytesPerBlock (const TuneParam &param) const

Protected Member Functions inherited from quda::Tunable
virtual bool	tuneAuxDim () const

virtual bool	tuneSharedBytes () const

virtual bool	advanceGridDim (TuneParam &param) const

virtual unsigned int	maxBlockSize (const TuneParam &param) const

virtual unsigned int	maxGridSize () const

virtual void	resetBlockDim (TuneParam &param) const

unsigned int	maxBlocksPerSM () const
	Returns the maximum number of simultaneously resident blocks per SM. We can directly query this of CUDA 11, but previously this needed to be hand coded. More...

unsigned int	maxDynamicSharedBytesPerBlock () const
	Returns the maximum dynamic shared memory per block. More...

virtual bool	advanceSharedBytes (TuneParam &param) const

int	writeAuxString (const char *format,...)

bool	tuned ()
	Whether the present instance has already been tuned or not. More...

Protected Attributes
Arg &	arg

const ColorSpinorField &	out

const ColorSpinorField &	in

const int	nDimComms

char	aux_base [TuneKey::aux_n - 32]

char	aux [8][TuneKey::aux_n]

char	aux_pack [TuneKey::aux_n]

char	aux_barrier [TuneKey::aux_n]

void *	packBuffer [4 *QUDA_MAX_DIM]

std::string	kernel_file

Protected Attributes inherited from quda::TunableVectorYZ
unsigned	vector_length_z

unsigned	step_z

bool	tune_block_y

Protected Attributes inherited from quda::TunableVectorY
unsigned int	vector_length_y

unsigned int	step_y

bool	tune_block_x

Protected Attributes inherited from quda::Tunable
char	aux [TuneKey::aux_n]

CUresult	jitify_error

Detailed Description

template<template< int, bool, bool, KernelType, typename > class D, typename Arg>
class quda::Dslash< D, Arg >

This is the generic driver for launching Dslash kernels (the base kernel of which is defined in dslash_helper.cuh). This is templated on the a template template parameter which is the underlying operator wrapped in a class,.

Template Parameters

D	A class that defines the linear operator we wish to apply. This class should define an operator() method that is used to apply the operator by the dslash kernel. See the wilson class in the file kernels/dslash_wilson.cuh as an exmaple.
Arg	The argument struct that is used to parameterize the kernel. For the wilson class example above, the WilsonArg class defined in the same file is the corresponding argument class.

Definition at line 32 of file dslash.h.

Constructor & Destructor Documentation

◆ Dslash()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

quda::Dslash< D, Arg >::Dslash	(	Arg &	arg,
		const ColorSpinorField &	out,
		const ColorSpinorField &	in
	)

inline

Definition at line 379 of file dslash.h.

Member Function Documentation

◆ advanceAux()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

virtual bool quda::Dslash< D, Arg >::advanceAux ( TuneParam & param ) const

inlineprotectedvirtual

Reimplemented from quda::Tunable.

Definition at line 166 of file dslash.h.

◆ advanceTuneParam()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

virtual bool quda::Dslash< D, Arg >::advanceTuneParam ( TuneParam & param ) const

inlineprotectedvirtual

Reimplemented from quda::Tunable.

Definition at line 206 of file dslash.h.

◆ augmentAux()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

void quda::Dslash< D, Arg >::augmentAux	(	KernelType	type,
		const char *	extra
	)

inline

Definition at line 490 of file dslash.h.

◆ blockMin()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

virtual int quda::Dslash< D, Arg >::blockMin ( ) const

inlineprotectedvirtual

Reimplemented from quda::Tunable.

Definition at line 162 of file dslash.h.

◆ blockStep()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

virtual int quda::Dslash< D, Arg >::blockStep ( ) const

inlineprotectedvirtual

Reimplemented from quda::Tunable.

Definition at line 161 of file dslash.h.

◆ bytes()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

virtual long long quda::Dslash< D, Arg >::bytes ( ) const

inlinevirtual

Reimplemented from quda::Tunable.

Definition at line 586 of file dslash.h.

◆ Dagger()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

int quda::Dslash< D, Arg >::Dagger ( ) const

inline

Definition at line 484 of file dslash.h.

◆ defaultTuneParam()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

virtual void quda::Dslash< D, Arg >::defaultTuneParam ( TuneParam & param ) const

inlineprotectedvirtual

sets default values for when tuning is disabled

Reimplemented from quda::Tunable.

Definition at line 226 of file dslash.h.

◆ fillAux()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

void quda::Dslash< D, Arg >::fillAux	(	KernelType	kernel_type,
		const char *	kernel_str
	)

inlineprotected

Specialize the auxiliary strings for each kernel type.

Parameters

[in]	kernel_type	The kernel_type we are generating the string got
[in]	kernel_str	String corresponding to the kernel type

Definition at line 75 of file dslash.h.

◆ fillAuxBase()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

void quda::Dslash< D, Arg >::fillAuxBase ( )

inlineprotected

Set the base strings used by the different dslash kernel types for autotuning.

Definition at line 55 of file dslash.h.

◆ flops()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

virtual long long quda::Dslash< D, Arg >::flops ( ) const

inlinevirtual

Implements quda::Tunable.

Definition at line 535 of file dslash.h.

◆ getAux()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

const char* quda::Dslash< D, Arg >::getAux ( KernelType type ) const

inline

Definition at line 486 of file dslash.h.

◆ gridStep()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

virtual int quda::Dslash< D, Arg >::gridStep ( ) const

inlineprotectedvirtual

gridStep sets the step size when iterating the grid size in advanceGridDim.

Returns: Grid step size

Reimplemented from quda::Tunable.

Definition at line 100 of file dslash.h.

◆ initTuneParam()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

virtual void quda::Dslash< D, Arg >::initTuneParam ( TuneParam & param ) const

inlineprotectedvirtual

Reimplemented from quda::Tunable.

Definition at line 211 of file dslash.h.

◆ instantiate() [1/4]

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

template<template< bool, QudaPCType, typename > class P, int nParity, bool dagger, bool xpay>

void quda::Dslash< D, Arg >::instantiate	(	TuneParam &	tp,
		const qudaStream_t &	stream
	)

inline

This instantiate function is used to instantiate the the KernelType template required for the multi-GPU dslash kernels.

Parameters

[in]	tp	The tuning parameters to use for this kernel
[in]	stream	The qudaStream_t where the kernel will run

Definition at line 291 of file dslash.h.

◆ instantiate() [2/4]

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

template<template< bool, QudaPCType, typename > class P, int nParity, bool xpay>

void quda::Dslash< D, Arg >::instantiate	(	TuneParam &	tp,
		const qudaStream_t &	stream
	)

inline

This instantiate function is used to instantiate the the dagger template.

Parameters

[in]	tp	The tuning parameters to use for this kernel
[in]	stream	The qudaStream_t where the kernel will run

Definition at line 326 of file dslash.h.

◆ instantiate() [3/4]

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

template<template< bool, QudaPCType, typename > class P, bool xpay>

void quda::Dslash< D, Arg >::instantiate	(	TuneParam &	tp,
		const qudaStream_t &	stream
	)

inline

This instantiate function is used to instantiate the the nParity template.

Parameters

[in]	tp	The tuning parameters to use for this kernel
[in]	stream	The qudaStream_t where the kernel will run

Definition at line 345 of file dslash.h.

◆ instantiate() [4/4]

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

template<template< bool, QudaPCType, typename > class P>

void quda::Dslash< D, Arg >::instantiate	(	TuneParam &	tp,
		const qudaStream_t &	stream
	)

inline

This instantiate function is used to instantiate the the xpay template.

Parameters

[in]	tp	The tuning parameters to use for this kernel
[in]	stream	The qudaStream_t where the kernel will run

Definition at line 365 of file dslash.h.

◆ launch()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

template<template< bool, QudaPCType, typename > class P, int nParity, bool dagger, bool xpay, KernelType kernel_type>

void quda::Dslash< D, Arg >::launch	(	TuneParam &	tp,
		const qudaStream_t &	stream
	)

inlineprotected

This is a helper class that is used to instantiate the correct templated kernel for the dslash. This can be used for all dslash types, though in some cases we specialize to reduce compilation time.

Definition at line 248 of file dslash.h.

◆ maxSharedBytesPerBlock()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

unsigned int quda::Dslash< D, Arg >::maxSharedBytesPerBlock ( ) const

inlineprotectedvirtual

The maximum shared memory that a CUDA thread block can use in the autotuner. This isn't necessarily the same as maxDynamicSharedMemoryPerBlock since that may need explicit opt in to enable (by calling setMaxDynamicSharedBytes for the kernel in question). If the CUDA kernel in question does this opt in then this function can be overloaded to return maxDynamicSharedBytesPerBlock.

Returns: The maximum shared bytes limit per block the autotung will utilize.

Reimplemented from quda::Tunable.

Definition at line 164 of file dslash.h.

◆ minGridSize()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

virtual unsigned int quda::Dslash< D, Arg >::minGridSize ( ) const

inlineprotectedvirtual

Reimplemented from quda::Tunable.

Definition at line 85 of file dslash.h.

◆ minThreads()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

virtual unsigned int quda::Dslash< D, Arg >::minThreads ( ) const

inlineprotectedvirtual

Reimplemented from quda::Tunable.

Definition at line 83 of file dslash.h.

◆ Nface()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

int quda::Dslash< D, Arg >::Nface ( ) const

inline

Definition at line 480 of file dslash.h.

◆ postTune()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

virtual void quda::Dslash< D, Arg >::postTune ( )

inlinevirtual

Restore the output field if doing exterior kernel.

Reimplemented from quda::Tunable.

Definition at line 513 of file dslash.h.

◆ preTune()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

virtual void quda::Dslash< D, Arg >::preTune ( )

inlinevirtual

Save the output field since the output field is both read from and written to in the exterior kernels.

Reimplemented from quda::Tunable.

Definition at line 504 of file dslash.h.

◆ setAux()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

void quda::Dslash< D, Arg >::setAux	(	KernelType	type,
		const char *	aux_
	)

inline

Definition at line 488 of file dslash.h.

◆ setPack()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

void quda::Dslash< D, Arg >::setPack	(	bool	pack,
		MemoryLocation	location
	)

inline

Definition at line 430 of file dslash.h.

◆ setParam()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

void quda::Dslash< D, Arg >::setParam ( TuneParam & tp )

inlineprotected

Definition at line 112 of file dslash.h.

◆ setShmem()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

void quda::Dslash< D, Arg >::setShmem ( int shmem )

inline

Definition at line 422 of file dslash.h.

◆ tuneGridDim()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

virtual bool quda::Dslash< D, Arg >::tuneGridDim ( ) const

inlineprotectedvirtual

Reimplemented from quda::Tunable.

Definition at line 82 of file dslash.h.

◆ tuneKey()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

virtual TuneKey quda::Dslash< D, Arg >::tuneKey ( ) const

inlinevirtual

Implements quda::Tunable.

Definition at line 492 of file dslash.h.

◆ tuningIter()

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

virtual int quda::Dslash< D, Arg >::tuningIter ( ) const

inlineprotectedvirtual

Reimplemented from quda::Tunable.

Definition at line 159 of file dslash.h.

Member Data Documentation

◆ arg

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

Arg& quda::Dslash< D, Arg >::arg

protected

Definition at line 36 of file dslash.h.

◆ aux

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

char quda::Dslash< D, Arg >::aux[8][TuneKey::aux_n]

protected

Definition at line 43 of file dslash.h.

◆ aux_barrier

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

char quda::Dslash< D, Arg >::aux_barrier[TuneKey::aux_n]

protected

Definition at line 45 of file dslash.h.

◆ aux_base

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

char quda::Dslash< D, Arg >::aux_base[TuneKey::aux_n - 32]

protected

Definition at line 42 of file dslash.h.

◆ aux_pack

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

char quda::Dslash< D, Arg >::aux_pack[TuneKey::aux_n]

protected

Definition at line 44 of file dslash.h.

◆ dslashParam

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

Arg& quda::Dslash< D, Arg >::dslashParam

Definition at line 377 of file dslash.h.

◆ in

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

const ColorSpinorField& quda::Dslash< D, Arg >::in

protected

Definition at line 38 of file dslash.h.

◆ kernel_file

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

std::string quda::Dslash< D, Arg >::kernel_file

protected

Definition at line 50 of file dslash.h.

◆ nDimComms

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

const int quda::Dslash< D, Arg >::nDimComms

protected

Definition at line 40 of file dslash.h.

◆ out

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

const ColorSpinorField& quda::Dslash< D, Arg >::out

protected

Definition at line 37 of file dslash.h.

◆ packBuffer

template<template< int, bool, bool, KernelType, typename > class D, typename Arg >

void* quda::Dslash< D, Arg >::packBuffer[4 *QUDA_MAX_DIM]

protected

Definition at line 48 of file dslash.h.

The documentation for this class was generated from the following file:

quda/include/dslash.h

Public Member Functions

Public Attributes

Protected Member Functions

Protected Attributes

Detailed Description

template<template< int, bool, bool, KernelType, typename > class D, typename Arg> class quda::Dslash< D, Arg >

Constructor & Destructor Documentation

◆ Dslash()

Member Function Documentation

◆ advanceAux()

◆ advanceTuneParam()

◆ augmentAux()

◆ blockMin()

◆ blockStep()

◆ bytes()

◆ Dagger()

◆ defaultTuneParam()

◆ fillAux()

◆ fillAuxBase()

◆ flops()

◆ getAux()

◆ gridStep()

◆ initTuneParam()

◆ instantiate() [1/4]

◆ instantiate() [2/4]

◆ instantiate() [3/4]

◆ instantiate() [4/4]

◆ launch()

◆ maxSharedBytesPerBlock()

◆ minGridSize()

◆ minThreads()

◆ Nface()

◆ postTune()

◆ preTune()

◆ setAux()

◆ setPack()

◆ setParam()

◆ setShmem()

◆ tuneGridDim()

◆ tuneKey()

◆ tuningIter()

Member Data Documentation

◆ arg

◆ aux

◆ aux_barrier

◆ aux_base

◆ aux_pack

◆ dslashParam

◆ in

◆ kernel_file

◆ nDimComms

◆ out

◆ packBuffer

template<template< int, bool, bool, KernelType, typename > class D, typename Arg>
class quda::Dslash< D, Arg >