|
QUDA
0.9.0
|
Functions | |
| template<template< typename Float, typename FloatN > class Functor, int writeX, int writeY, int writeZ, int writeW> | |
| void | blasCuda (const double2 &a, const double2 &b, const double2 &c, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w) |
| template<int NXZ, template< int MXZ, typename Float, typename FloatN > class Functor, typename write , typename T > | |
| void | multiblasCuda (const coeff_array< T > &a, const coeff_array< T > &b, const coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w) |
| template<int NXZ, typename doubleN , typename ReduceType , template< int MXZ, typename ReducerType, typename Float, typename FloatN > class Reducer, typename write , bool siteUnroll, typename T > | |
| void | multiReduceCuda (doubleN result[], const reduce::coeff_array< T > &a, const reduce::coeff_array< T > &b, const reduce::coeff_array< T > &c, CompositeColorSpinorField &x, CompositeColorSpinorField &y, CompositeColorSpinorField &z, CompositeColorSpinorField &w) |
| template<typename doubleN , typename ReduceType , template< typename ReducerType, typename Float, typename FloatN > class Reducer, int writeX, int writeY, int writeZ, int writeW, int writeV, bool siteUnroll> | |
| doubleN | reduceCuda (const double2 &a, const double2 &b, ColorSpinorField &x, ColorSpinorField &y, ColorSpinorField &z, ColorSpinorField &w, ColorSpinorField &v) |
| void mixed::blasCuda | ( | const double2 & | a, |
| const double2 & | b, | ||
| const double2 & | c, | ||
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z, | ||
| ColorSpinorField & | w | ||
| ) |
Driver for generic blas routine with four loads and two store.
Definition at line 8 of file blas_mixed_core.h.
References a, b, c, checkLocation, errorQuda, f, QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, w, x, y, and z.
| void mixed::multiblasCuda | ( | const coeff_array< T > & | a, |
| const coeff_array< T > & | b, | ||
| const coeff_array< T > & | c, | ||
| CompositeColorSpinorField & | x, | ||
| CompositeColorSpinorField & | y, | ||
| CompositeColorSpinorField & | z, | ||
| CompositeColorSpinorField & | w | ||
| ) |
Driver for generic blas routine with four loads and two store.
Definition at line 8 of file multi_blas_mixed_core.h.
References a, b, c, checkLocation, errorQuda, Nspin, QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, w, x, y, and z.
| void mixed::multiReduceCuda | ( | doubleN | result[], |
| const reduce::coeff_array< T > & | a, | ||
| const reduce::coeff_array< T > & | b, | ||
| const reduce::coeff_array< T > & | c, | ||
| CompositeColorSpinorField & | x, | ||
| CompositeColorSpinorField & | y, | ||
| CompositeColorSpinorField & | z, | ||
| CompositeColorSpinorField & | w | ||
| ) |
Driver for multi-reduce with up to five vectors
Definition at line 8 of file multi_reduce_mixed_core.h.
References a, b, c, errorQuda, Nspin, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, w, x, y, and z.
| doubleN mixed::reduceCuda | ( | const double2 & | a, |
| const double2 & | b, | ||
| ColorSpinorField & | x, | ||
| ColorSpinorField & | y, | ||
| ColorSpinorField & | z, | ||
| ColorSpinorField & | w, | ||
| ColorSpinorField & | v | ||
| ) |
Driver for generic reduction routine with two loads.
| ReduceType | |
| siteUnroll | - if this is true, then one site corresponds to exactly one thread |
Definition at line 23 of file reduce_mixed_core.h.
References a, b, quda::blas::bytes, checkLocation, errorQuda, float, QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, reduceDoubleArray(), streams, value, w, x, y, and z.

1.8.14