#include <blas_quda.h>
#include <tune_quda.h>
#include <float_vector.h>
#include <color_spinor_field_order.h>
#include <uint_to_char.h>
#include <launch_kernel.cuh>
#include <jitify_helper.cuh>
#include <kernels/multi_reduce_core.cuh>

Include dependency graph for multi_reduce_quda.cu:

Classes
struct	quda::blas::write< writeX, writeY, writeZ, writeW >

struct	quda::blas::detail::to_chars< digits >

struct	quda::blas::detail::explode< rem, digits >

struct	quda::blas::detail::explode< 0, digits... >

struct	quda::blas::num_to_string< num >

class	quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >

class	quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >

Namespaces
	quda

	quda::blas

	quda::blas::detail

Macros
#define	MAX_MULTI_BLAS_N 2

Functions
cudaStream_t *	quda::blas::getStream ()

cudaEvent_t *	quda::blas::getReduceEvent ()

bool	quda::blas::getFastReduce ()

void	quda::blas::initFastReduce (int words)

void	quda::blas::completeFastReduce (int32_t words)

template<typename doubleN , typename ReduceType , typename FloatN , int M, int NXZ, typename Arg >
void	quda::blas::multiReduceLaunch (doubleN result[], Arg &arg, const TuneParam &tp, const cudaStream_t &stream, Tunable &tunable)

template<typename doubleN , typename ReduceType , typename RegType , typename StoreType , typename yType , int M, int NXZ, template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , typename T >
void	quda::blas::multiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, std::vector< ColorSpinorField > &x, std::vector< ColorSpinorField > &y, std::vector< ColorSpinorField > &z, std::vector< ColorSpinorField > &w, int length)

template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , bool siteUnroll, typename T >
void	quda::blas::multiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w)

template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , bool siteUnroll, typename T >
void	quda::blas::mixedMultiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w)

template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerDiagonal, typename writeDiagonal , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerOffDiagonal, typename writeOffDiagonal , bool siteUnroll, typename T >
void	quda::blas::multiReduce (doubleN result[], const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w, int i, int j)

void	quda::blas::reDotProduct (double result, std::vector< ColorSpinorField > &a, std::vector< ColorSpinorField *> &b)

template<template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerDiagonal, typename writeDiagonal , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class ReducerOffDiagonal, typename writeOffDiagonal >
void	quda::blas::multiReduce_recurse (Complex result, std::vector< ColorSpinorField > &x, std::vector< ColorSpinorField > &y, std::vector< ColorSpinorField > &z, std::vector< ColorSpinorField *> &w, int i_idx, int j_idx, bool hermitian, unsigned int tile_size)

void	quda::blas::cDotProduct (Complex result, std::vector< ColorSpinorField > &a, std::vector< ColorSpinorField *> &b)
	Computes the matrix of inner products between the vector set a and the vector set b. More...

void	quda::blas::hDotProduct (Complex result, std::vector< ColorSpinorField > &a, std::vector< ColorSpinorField *> &b)
	Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Requires a.size()==b.size(). More...

void	quda::blas::hDotProduct_Anorm (Complex result, std::vector< ColorSpinorField > &a, std::vector< ColorSpinorField *> &b)
	Computes the matrix of inner products between the vector set a and the vector set b. This routine is specifically for the case where the result matrix is guarantted to be Hermitian. Uniquely defined for cases like (p, Ap) where the output is Hermitian, but there's an A-norm instead of an L2 norm. Requires a.size()==b.size(). More...

void	quda::blas::cDotProductCopy (Complex result, std::vector< ColorSpinorField > &a, std::vector< ColorSpinorField > &b, std::vector< ColorSpinorField > &c)
	Computes the matrix of inner products between the vector set a and the vector set b, and copies b into c. More...

Macro Definition Documentation

◆ MAX_MULTI_BLAS_N

#define MAX_MULTI_BLAS_N 2

Definition at line 14 of file multi_reduce_quda.cu.

Referenced by quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::advanceAux(), quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::apply(), quda::blas::axpyBzpcx(), quda::blas::caxpy_recurse(), quda::blas::caxpyBxpz(), quda::blas::caxpyz_recurse(), quda::blas::initReduce(), quda::blas::multiBlas(), quda::blas::multiReduce(), quda::blas::multicaxpy_< NXZ, Float2, FloatN >::operator()(), quda::blas::multicaxpyz_< NXZ, Float2, FloatN >::operator()(), quda::blas::multi_caxpyBxpz_< NXZ, Float2, FloatN >::operator()(), and quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::TileSizeTune().

Classes

Namespaces

Macros

Functions

Macro Definition Documentation

◆ MAX_MULTI_BLAS_N