QUDA  0.9.0
Functions
mixed Namespace Reference

Functions

template<template< typename Float, typename FloatN > class Functor, int writeX, int writeY, int writeZ, int writeW>
void blasCuda (const double2 &a, const double2 &b, const double2 &c, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w)
 
template<int NXZ, template< int MXZ, typename Float, typename FloatN > class Functor, typename write , typename T >
void multiblasCuda (const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w)
 
template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , bool siteUnroll, typename T >
void multiReduceCuda (doubleN result[], const reduce::coeff_array< T > &a, const reduce::coeff_array< T > &b, const reduce::coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w)
 
template<typename doubleN , typename ReduceType , template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV, bool siteUnroll>
doubleN reduceCuda (const double2 &a, const double2 &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v)
 

Function Documentation

◆ blasCuda()

template<template< typename Float, typename FloatN > class Functor, int writeX, int writeY, int writeZ, int writeW>
void mixed::blasCuda ( const double2 &  a,
const double2 &  b,
const double2 &  c,
ColorSpinorField &  x,
ColorSpinorField &  y,
ColorSpinorField &  z,
ColorSpinorField &  w 
)

Driver for generic blas routine with four loads and two store.

Definition at line 8 of file blas_mixed_core.h.

References a, b, c, checkLocation, errorQuda, f, QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, w, x, y, and z.

◆ multiblasCuda()

template<int NXZ, template< int MXZ, typename Float, typename FloatN > class Functor, typename write , typename T >
void mixed::multiblasCuda ( const coeff_array< T > &  a,
const coeff_array< T > &  b,
const coeff_array< T > &  c,
CompositeColorSpinorField &  x,
CompositeColorSpinorField &  y,
CompositeColorSpinorField &  z,
CompositeColorSpinorField &  w 
)

Driver for generic blas routine with four loads and two store.

Definition at line 8 of file multi_blas_mixed_core.h.

References a, b, c, checkLocation, errorQuda, Nspin, QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, w, x, y, and z.

◆ multiReduceCuda()

template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , bool siteUnroll, typename T >
void mixed::multiReduceCuda ( doubleN  result[],
const reduce::coeff_array< T > &  a,
const reduce::coeff_array< T > &  b,
const reduce::coeff_array< T > &  c,
CompositeColorSpinorField &  x,
CompositeColorSpinorField &  y,
CompositeColorSpinorField &  z,
CompositeColorSpinorField &  w 
)

Driver for multi-reduce with up to five vectors

Definition at line 8 of file multi_reduce_mixed_core.h.

References a, b, c, errorQuda, Nspin, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, w, x, y, and z.

◆ reduceCuda()

template<typename doubleN , typename ReduceType , template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV, bool siteUnroll>
doubleN mixed::reduceCuda ( const double2 &  a,
const double2 &  b,
ColorSpinorField &  x,
ColorSpinorField &  y,
ColorSpinorField &  z,
ColorSpinorField &  w,
ColorSpinorField &  v 
)

Driver for generic reduction routine with two loads.

Parameters
ReduceType
siteUnroll- if this is true, then one site corresponds to exactly one thread

Definition at line 23 of file reduce_mixed_core.h.

References a, b, quda::blas::bytes, checkLocation, errorQuda, float, QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, reduceDoubleArray(), streams, value, w, x, y, and z.

Here is the call graph for this function: