QUDA
v0.7.0
A library for QCD on GPUs
|
Typedefs | |
typedef std::complex< double > | Complex |
typedef struct quda::kernel_param_s | kernel_param_t |
typedef struct quda::llfat_kernel_param_s | llfat_kernel_param_t |
typedef std::map< TuneKey, TuneParam > | map |
Functions | |
void | initBlas () |
void | endBlas (void) |
void | setBlasParam (int kernel, int prec, int threads, int blocks) |
double | norm2 (const ColorSpinorField &) |
void | zeroCuda (cudaColorSpinorField &a) |
void | copyCuda (cudaColorSpinorField &dst, const cudaColorSpinorField &src) |
double | axpyNormCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y) |
double | normCuda (const cudaColorSpinorField &b) |
double | reDotProductCuda (cudaColorSpinorField &a, cudaColorSpinorField &b) |
void | reDotProductCuda (double *result, std::vector< cudaColorSpinorField * > &a, std::vector< cudaColorSpinorField * > &b) |
double | xmyNormCuda (cudaColorSpinorField &a, cudaColorSpinorField &b) |
double2 | reDotProductNormACuda (cudaColorSpinorField &a, cudaColorSpinorField &b) |
void | axpbyCuda (const double &a, cudaColorSpinorField &x, const double &b, cudaColorSpinorField &y) |
void | axpyCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y) |
void | axCuda (const double &a, cudaColorSpinorField &x) |
void | xpyCuda (cudaColorSpinorField &x, cudaColorSpinorField &y) |
void | xpayCuda (cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y) |
void | mxpyCuda (cudaColorSpinorField &x, cudaColorSpinorField &y) |
void | axpyZpbxCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z, const double &b) |
void | axpyBzpcxCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y, const double &b, cudaColorSpinorField &z, const double &c) |
void | caxpbyCuda (const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y) |
void | caxpyCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y) |
void | cxpaypbzCuda (cudaColorSpinorField &, const Complex &b, cudaColorSpinorField &y, const Complex &c, cudaColorSpinorField &z) |
void | caxpbypzYmbwCuda (const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, cudaColorSpinorField &, cudaColorSpinorField &) |
Complex | cDotProductCuda (cudaColorSpinorField &, cudaColorSpinorField &) |
void | cDotProductCuda (Complex *result, std::vector< cudaColorSpinorField * > &a, std::vector< cudaColorSpinorField * > &b) |
Complex | xpaycDotzyCuda (cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y, cudaColorSpinorField &z) |
double3 | cDotProductNormACuda (cudaColorSpinorField &a, cudaColorSpinorField &b) |
double3 | cDotProductNormBCuda (cudaColorSpinorField &a, cudaColorSpinorField &b) |
double3 | caxpbypzYmbwcDotProductUYNormYCuda (const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y, cudaColorSpinorField &z, cudaColorSpinorField &w, cudaColorSpinorField &u) |
void | cabxpyAxCuda (const double &a, const Complex &b, cudaColorSpinorField &x, cudaColorSpinorField &y) |
double | caxpyNormCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y) |
void | caxpyXmazCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z) |
double | caxpyXmazNormXCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z) |
double | cabxpyAxNormCuda (const double &a, const Complex &b, cudaColorSpinorField &x, cudaColorSpinorField &y) |
void | caxpbypzCuda (const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, cudaColorSpinorField &) |
void | caxpbypczpwCuda (const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, cudaColorSpinorField &) |
Complex | caxpyDotzyCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z) |
Complex | axpyCGNormCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y) |
double3 | HeavyQuarkResidualNormCuda (cudaColorSpinorField &x, cudaColorSpinorField &r) |
double3 | xpyHeavyQuarkResidualNormCuda (cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &r) |
void | tripleCGUpdateCuda (const double &alpha, const double &beta, cudaColorSpinorField &q, cudaColorSpinorField &r, cudaColorSpinorField &x, cudaColorSpinorField &p) |
double3 | tripleCGReductionCuda (cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z) |
double | axpyNormCpu (const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y) |
double | normCpu (const cpuColorSpinorField &b) |
double | reDotProductCpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b) |
double | xmyNormCpu (const cpuColorSpinorField &a, cpuColorSpinorField &b) |
void | axpbyCpu (const double &a, const cpuColorSpinorField &x, const double &b, cpuColorSpinorField &y) |
void | axpyCpu (const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y) |
void | axCpu (const double &a, cpuColorSpinorField &x) |
void | xpyCpu (const cpuColorSpinorField &x, cpuColorSpinorField &y) |
void | xpayCpu (const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y) |
void | mxpyCpu (const cpuColorSpinorField &x, cpuColorSpinorField &y) |
void | axpyZpbxCpu (const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const cpuColorSpinorField &z, const double &b) |
void | axpyBzpcxCpu (const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const double &b, const cpuColorSpinorField &z, const double &c) |
void | caxpbyCpu (const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y) |
void | caxpyCpu (const Complex &a, const cpuColorSpinorField &x, cpuColorSpinorField &y) |
void | cxpaypbzCpu (const cpuColorSpinorField &x, const Complex &b, const cpuColorSpinorField &y, const Complex &c, cpuColorSpinorField &z) |
void | caxpbypzYmbwCpu (const Complex &, const cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &, const cpuColorSpinorField &) |
Complex | cDotProductCpu (const cpuColorSpinorField &, const cpuColorSpinorField &) |
Complex | xpaycDotzyCpu (const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y, const cpuColorSpinorField &z) |
double3 | cDotProductNormACpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b) |
double3 | cDotProductNormBCpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b) |
double3 | caxpbypzYmbwcDotProductUYNormYCpu (const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y, cpuColorSpinorField &z, const cpuColorSpinorField &w, const cpuColorSpinorField &u) |
void | cabxpyAxCpu (const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y) |
double | caxpyNormCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y) |
void | caxpyXmazCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z) |
double | caxpyXmazNormXCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z) |
double | cabxpyAxNormCpu (const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y) |
void | caxpbypzCpu (const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &) |
void | caxpbypczpwCpu (const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &) |
Complex | caxpyDotzyCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z) |
double3 | HeavyQuarkResidualNormCpu (cpuColorSpinorField &x, cpuColorSpinorField &r) |
double3 | xpyHeavyQuarkResidualNormCpu (cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &r) |
std::ostream & | operator<< (std::ostream &output, const CloverFieldParam ¶m) |
void | computeClover (CloverField &clover, const GaugeField &gauge, double coeff, QudaFieldLocation location) |
void | computeCloverSigmaTrace (GaugeField &gauge, const CloverField &clover, int dir1, int dir2, QudaFieldLocation location) |
void | copyGenericClover (CloverField &out, const CloverField &in, bool inverse, QudaFieldLocation location, void *Out=0, void *In=0, void *outNorm=0, void *inNorm=0) |
void | cloverDerivative (cudaGaugeField &out, cudaGaugeField &gauge, cudaGaugeField &oprod, int mu, int nu, double coeff, QudaParity parity, int conjugate) |
void | cloverInvert (CloverField &clover, bool computeTraceLog, QudaFieldLocation location) |
void | copyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, void *Dst=0, void *Src=0, void *dstNorm=0, void *srcNorm=0) |
void | genericSource (cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c) |
int | genericCompare (const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol) |
void | genericPrintVector (cpuColorSpinorField &a, unsigned int x) |
void | exchangeExtendedGhost (cudaColorSpinorField *spinor, int R[], int parity, cudaStream_t *stream_p) |
void | copyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, const int parity, void *Dst, void *Src, void *dstNorm, void *srcNorm) |
template<typename Float , int Ns, int Nc> | |
__device__ void | load_shared (typename mapper< Float >::type v[Ns *Nc *2], Float *field, int x, int volume) |
template<typename Float , int Ns, int Nc> | |
__device__ void | save_shared (Float *field, const typename mapper< Float >::type v[Ns *Nc *2], int x, int volumeCB) |
template<typename ValueType > | |
__host__ __device__ ValueType | cos (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | sin (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | tan (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | acos (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | asin (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | atan (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | atan2 (ValueType x, ValueType y) |
template<typename ValueType > | |
__host__ __device__ ValueType | cosh (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | sinh (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | tanh (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | exp (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | log (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | log10 (ValueType x) |
template<typename ValueType , typename ExponentType > | |
__host__ __device__ ValueType | pow (ValueType x, ExponentType e) |
template<typename ValueType > | |
__host__ __device__ ValueType | sqrt (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | abs (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | conj (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | abs (const complex< ValueType > &z) |
Returns the magnitude of z. More... | |
template<typename ValueType > | |
__host__ __device__ ValueType | arg (const complex< ValueType > &z) |
Returns the phase angle of z. More... | |
template<typename ValueType > | |
__host__ __device__ ValueType | norm (const complex< ValueType > &z) |
Returns the magnitude of z squared. More... | |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | conj (const complex< ValueType > &z) |
Returns the complex conjugate of z. More... | |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | polar (const ValueType &m, const ValueType &theta=0) |
Returns the complex with magnitude m and angle theta in radians. More... | |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | operator* (const complex< ValueType > &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | operator* (const complex< ValueType > &lhs, const ValueType &rhs) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | operator* (const ValueType &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | operator/ (const complex< ValueType > &lhs, const complex< ValueType > &rhs) |
template<> | |
__host__ __device__ complex < float > | operator/ (const complex< float > &lhs, const complex< float > &rhs) |
template<> | |
__host__ __device__ complex < double > | operator/ (const complex< double > &lhs, const complex< double > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | operator+ (const complex< ValueType > &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | operator+ (const complex< ValueType > &lhs, const ValueType &rhs) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | operator+ (const ValueType &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | operator- (const complex< ValueType > &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | operator- (const complex< ValueType > &lhs, const ValueType &rhs) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | operator- (const ValueType &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | operator+ (const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | operator- (const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | cos (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | cosh (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | exp (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | log (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | log10 (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | pow (const complex< ValueType > &z, const int &n) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | pow (const complex< ValueType > &z, const ValueType &x) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | pow (const complex< ValueType > &z, const complex< ValueType > &z2) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | pow (const ValueType &x, const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | sin (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | sinh (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | sqrt (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | tan (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | tanh (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | acos (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | asin (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | atan (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | acosh (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | asinh (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | atanh (const complex< ValueType > &z) |
template<typename ValueType , class charT , class traits > | |
std::basic_ostream< charT, traits > & | operator<< (std::basic_ostream< charT, traits > &os, const complex< ValueType > &z) |
template<typename ValueType , typename charT , class traits > | |
std::basic_istream< charT, traits > & | operator>> (std::basic_istream< charT, traits > &is, complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | operator+ (const volatile complex< ValueType > &lhs, const volatile complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | operator/ (const complex< ValueType > &lhs, const ValueType &rhs) |
template<typename ValueType > | |
__host__ __device__ complex < ValueType > | operator/ (const ValueType &lhs, const complex< ValueType > &rhs) |
template<> | |
__host__ __device__ complex < float > | operator/ (const float &lhs, const complex< float > &rhs) |
template<> | |
__host__ __device__ complex < double > | operator/ (const double &lhs, const complex< double > &rhs) |
template<typename ValueType > | |
__host__ __device__ bool | operator== (const complex< ValueType > &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ bool | operator== (const ValueType &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ bool | operator== (const complex< ValueType > &lhs, const ValueType &rhs) |
template<typename ValueType > | |
__host__ __device__ bool | operator!= (const complex< ValueType > &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ bool | operator!= (const ValueType &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ bool | operator!= (const complex< ValueType > &lhs, const ValueType &rhs) |
template<> | |
__host__ __device__ float | abs (const complex< float > &z) |
template<> | |
__host__ __device__ double | abs (const complex< double > &z) |
template<> | |
__host__ __device__ float | arg (const complex< float > &z) |
template<> | |
__host__ __device__ double | arg (const complex< double > &z) |
template<> | |
__host__ __device__ complex < float > | polar (const float &magnitude, const float &angle) |
template<> | |
__host__ __device__ complex < double > | polar (const double &magnitude, const double &angle) |
template<> | |
__host__ __device__ complex < float > | cos (const complex< float > &z) |
template<> | |
__host__ __device__ complex < float > | cosh (const complex< float > &z) |
template<> | |
__host__ __device__ complex < float > | exp (const complex< float > &z) |
template<> | |
__host__ __device__ complex < float > | log (const complex< float > &z) |
template<> | |
__host__ __device__ complex < float > | pow (const float &x, const complex< float > &exponent) |
template<> | |
__host__ __device__ complex < float > | sin (const complex< float > &z) |
template<> | |
__host__ __device__ complex < float > | sinh (const complex< float > &z) |
template<typename ValueType > | |
__host__ __device__ complex < float > | sqrt (const complex< float > &z) |
template<typename ValueType > | |
__host__ __device__ complex < float > | atanh (const complex< float > &z) |
void | contractCuda (const cudaColorSpinorField &x, const cudaColorSpinorField &y, void *result, const QudaContractType contract_type, const QudaParity parity) |
void | contractCuda (const cudaColorSpinorField &x, const cudaColorSpinorField &y, void *result, const QudaContractType contract_type, const int tSlice, const QudaParity parity) |
void | gamma5Cuda (cudaColorSpinorField *out, const cudaColorSpinorField *in) |
void | covDev (cudaColorSpinorField *out, cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int mu, TimeProfile &profile) |
void | setDiracParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc) |
void | setDiracSloppyParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc) |
void | setKernelPackT (bool pack) |
bool | getKernelPackT () |
void | setTwistPack (bool pack) |
bool | getTwistPack () |
void | setPackComms (const int *commDim) |
bool | getDslashLaunch () |
void | createDslashEvents () |
void | destroyDslashEvents () |
void | wilsonDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int oddBit, const int daggerBit, const cudaColorSpinorField *x, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2) |
void | cloverDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const FullClover cloverInv, const cudaColorSpinorField *in, const int oddBit, const int daggerBit, const cudaColorSpinorField *x, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2) |
void | asymCloverDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const FullClover cloverInv, const cudaColorSpinorField *in, const int oddBit, const int daggerBit, const cudaColorSpinorField *x, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2) |
void | cloverCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const FullClover clover, const cudaColorSpinorField *in, const int oddBit) |
void | domainWallDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const double &m_f, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH) |
void | domainWallDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const double &m_f, const double &k, const int *commDim, const int DS_type, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2) |
void | MDWFDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const double &m_f, const double &k, const int *commDim, const int DS_type, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2) |
void | staggeredDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2) |
void | improvedStaggeredDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &fatGauge, const cudaGaugeField &longGauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2) |
void | twistedMassDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const QudaTwistDslashType type, const double &kappa, const double &mu, const double &epsilon, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2) |
void | ndegTwistedMassDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const QudaTwistDslashType type, const double &kappa, const double &mu, const double &epsilon, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH) |
void | twistedCloverDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const FullClover *clover, const FullClover *cloverInv, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const QudaTwistCloverDslashType type, const double &kappa, const double &mu, const double &epsilon, const double &k, const int *commDim, TimeProfile &profile, const QudaDslashPolicy &dslashPolicy=QUDA_DSLASH2) |
void | twistGamma5Cuda (cudaColorSpinorField *out, const cudaColorSpinorField *in, const int dagger, const double &kappa, const double &mu, const double &epsilon, const QudaTwistGamma5Type twist) |
ndeg tm: More... | |
void | twistCloverGamma5Cuda (cudaColorSpinorField *out, const cudaColorSpinorField *in, const int dagger, const double &kappa, const double &mu, const double &epsilon, const QudaTwistGamma5Type twist, const FullClover *clov, const FullClover *clovInv, const int parity) |
void | packFace (void *ghost_buf, cudaColorSpinorField &in, const int nFace, const int dagger, const int parity, const int dim, const int face_num, const cudaStream_t &stream, const double a=0.0, const double b=0.0) |
void | packFaceExtended (void *ghost_buf, cudaColorSpinorField &field, const int nFace, const int R[], const int dagger, const int parity, const int dim, const int face_num, const cudaStream_t &stream, const bool unpack=false) |
void | packFace (void *ghost_buf, cudaColorSpinorField &in, FullClover &clov, FullClover &clovInv, const int nFace, const int dagger, const int parity, const int dim, const int face_num, const cudaStream_t &stream, const double a=0.0) |
void | loadLinkToGPU (cudaGaugeField *cudaGauge, cpuGaugeField *cpuGauge, QudaGaugeParam *param) |
void | loadLinkToGPU_ex (cudaGaugeField *cudaGauge, cpuGaugeField *cpuGauge) |
void | loadLinkToGPU_gf (cudaGaugeField *cudaGauge, cpuGaugeField *cpuGauge, QudaGaugeParam *param) |
void | storeLinkToCPU (cpuGaugeField *cpuGauge, cudaGaugeField *cudaGauge, QudaGaugeParam *param) |
void | packGhostStaple (int *X, void *even, void *odd, int volume, QudaPrecision prec, int stride, int dir, int whichway, void **fwd_nbr_buf_gpu, void **back_nbr_buf_gpu, void **fwd_nbr_buf, void **back_nbr_buf, cudaStream_t *stream) |
void | unpackGhostStaple (int *X, void *_even, void *_odd, int volume, QudaPrecision prec, int stride, int dir, int whichway, void **fwd_nbr_buf, void **back_nbr_buf, cudaStream_t *stream) |
void | pack_ghost_all_staples_cpu (void *staple, void **cpuGhostStapleBack, void **cpuGhostStapleFwd, int nFace, QudaPrecision precision, int *X) |
void | pack_ghost_all_links (void **cpuLink, void **cpuGhostBack, void **cpuGhostFwd, int dir, int nFace, QudaPrecision precision, int *X) |
void | pack_gauge_diag (void *buf, int *X, void **sitelink, int nu, int mu, int dir1, int dir2, QudaPrecision prec) |
void | fermion_force_init_cuda (QudaGaugeParam *param) |
void | fermion_force_cuda (double eps, double weight1, double weight2, void *act_path_coeff, FullHw cudaHw, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaMom, QudaGaugeParam *param) |
__host__ __device__ double2 | operator+ (const double2 &x, const double2 &y) |
__host__ __device__ double2 | operator- (const double2 &x, const double2 &y) |
__host__ __device__ float2 | operator- (const float2 &x, const float2 &y) |
__host__ __device__ float4 | operator- (const float4 &x, const float4 &y) |
__host__ __device__ double3 | operator+ (const double3 &x, const double3 &y) |
__host__ __device__ float4 | operator* (const float a, const float4 x) |
__host__ __device__ float2 | operator* (const float a, const float2 x) |
__host__ __device__ double2 | operator* (const double a, const double2 x) |
__host__ __device__ double4 | operator* (const double a, const double4 x) |
__host__ __device__ float2 | operator+ (const float2 x, const float2 y) |
__host__ __device__ float4 | operator+ (const float4 x, const float4 y) |
__host__ __device__ float4 | operator+= (float4 &x, const float4 y) |
__host__ __device__ float2 | operator+= (float2 &x, const float2 y) |
__host__ __device__ double2 | operator+= (double2 &x, const double2 y) |
__host__ __device__ double3 | operator+= (double3 &x, const double3 y) |
__host__ __device__ float4 | operator-= (float4 &x, const float4 y) |
__host__ __device__ float2 | operator-= (float2 &x, const float2 y) |
__host__ __device__ double2 | operator-= (double2 &x, const double2 y) |
__host__ __device__ float2 | operator*= (float2 &x, const float a) |
__host__ __device__ double2 | operator*= (double2 &x, const float a) |
__host__ __device__ float4 | operator*= (float4 &a, const float &b) |
__host__ __device__ double2 | operator*= (double2 &a, const double &b) |
__host__ __device__ double4 | operator*= (double4 &a, const double &b) |
__host__ __device__ float2 | operator- (const float2 &x) |
__host__ __device__ double2 | operator- (const double2 &x) |
__forceinline__ __host__ __device__ float | max_fabs (const float4 &c) |
__forceinline__ __host__ __device__ float | max_fabs (const float2 &b) |
__forceinline__ __host__ __device__ double | max_fabs (const double4 &c) |
__forceinline__ __host__ __device__ double | max_fabs (const double2 &b) |
__forceinline__ __host__ __device__ float2 | make_FloatN (const double2 &a) |
__forceinline__ __host__ __device__ float4 | make_FloatN (const double4 &a) |
__forceinline__ __host__ __device__ double2 | make_FloatN (const float2 &a) |
__forceinline__ __host__ __device__ double4 | make_FloatN (const float4 &a) |
__forceinline__ __host__ __device__ short4 | make_shortN (const float4 &a) |
__forceinline__ __host__ __device__ short2 | make_shortN (const float2 &a) |
__forceinline__ __host__ __device__ short4 | make_shortN (const double4 &a) |
__forceinline__ __host__ __device__ short2 | make_shortN (const double2 &a) |
std::ostream & | operator<< (std::ostream &output, const GaugeFieldParam ¶m) |
double | norm2 (const cudaGaugeField &u) |
void | copyGenericGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0, void **ghostOut=0, void **ghostIn=0, int type=0) |
void | copyExtendedGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0) |
void | extractGaugeGhost (const GaugeField &u, void **ghost) |
void | extractExtendedGaugeGhost (const GaugeField &u, int dim, const int *R, void **ghost, bool extract) |
double | maxGauge (const GaugeField &u) |
void | applyGaugePhase (GaugeField &u) |
template<typename Float > | |
__device__ __host__ void | accumulateComplexProduct (Float *a, const Float *b, const Float *c, Float sign) |
template<typename Float > | |
__device__ __host__ void | complexProduct (Float *a, const Float *b, const Float *c) |
template<typename Float > | |
__device__ __host__ void | complexDotProduct (Float *a, const Float *b, const Float *c) |
template<typename Float > | |
__device__ __host__ void | complexQuotient (Float *a, const Float *b, const Float *c) |
template<typename Float > | |
__device__ __host__ void | accumulateConjugateProduct (Float *a, const Float *b, const Float *c, int sign) |
template<typename Float > | |
__device__ __host__ void | complexConjugateProduct (Float *a, const Float *b, const Float *c) |
template<typename Float > | |
__device__ __host__ Float | timeBoundary (int idx, const int X[QUDA_MAX_DIM], QudaTboundary tBoundary, bool isFirstTimeSlice, bool isLastTimeSlice) |
template<typename Float > | |
__device__ __host__ Float | timeBoundary (int idx, const int X[QUDA_MAX_DIM], const int R[QUDA_MAX_DIM], QudaTboundary tBoundary, bool isFirstTimeSlice, bool isLastTimeSlice, QudaGhostExchange ghostExchange) |
void | gauge_force_init_cuda (QudaGaugeParam *param, int max_length) |
void | gauge_force_cuda (cudaGaugeField &cudaMom, double eb3, cudaGaugeField &cudaSiteLink, QudaGaugeParam *param, int ***input_path, int *length, double *path_coeff, int num_paths, int max_length) |
double | plaquette (const GaugeField &data, QudaFieldLocation location) |
void | APEStep (GaugeField &dataDs, const GaugeField &dataOr, double alpha, QudaFieldLocation location) |
void | updateGaugeField (GaugeField &out, double dt, const GaugeField &in, const GaugeField &mom, bool conj_mom, bool exact) |
void | setUnitarizeLinksPadding (int input_padding, int output_padding) |
void | setUnitarizeLinksConstants (double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error, bool check_unitarization=true) |
void | unitarizeLinksCuda (const QudaGaugeParam ¶m, cudaGaugeField &infield, cudaGaugeField *outfield, int *num_failures) |
void | unitarizeLinksCPU (const QudaGaugeParam ¶m, cpuGaugeField &infield, cpuGaugeField *outfield) |
bool | isUnitary (const QudaGaugeParam ¶m, cpuGaugeField &field, double max_error) |
void | completeKSForce (GaugeField &mom, const GaugeField &oprod, const GaugeField &gauge, QudaFieldLocation location, long long *flops=NULL) |
std::ostream & | operator<< (std::ostream &output, const LatticeFieldParam ¶m) |
void | llfat_cuda (cudaGaugeField *cudaFatLink, cudaGaugeField *cudaLongLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam *param, double *act_path_coeff) |
void | llfat_cuda_ex (cudaGaugeField *cudaFatLink, cudaGaugeField *cudaLongLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam *param, double *act_path_coeff) |
void | llfat_init_cuda (QudaGaugeParam *param) |
void | llfat_init_cuda_ex (QudaGaugeParam *param_ex) |
void | computeLongLinkCuda (void *outEven, void *outOdd, const void *const inEven, const void *const inOdd, double coeff, QudaReconstructType recon, QudaPrecision prec, dim3 halfGridDim, llfat_kernel_param_t kparam) |
void | computeGenStapleFieldParityKernel (void *staple_even, void *staple_odd, const void *sitelink_even, const void *sitelink_odd, void *fatlink_even, void *fatlink_odd, const void *mulink_even, const void *mulink_odd, int mu, int nu, int save_staple, double mycoeff, QudaReconstructType recon, QudaPrecision prec, dim3 halfGridDim, llfat_kernel_param_t kparam, cudaStream_t *stream) |
void | computeGenStapleFieldParityKernel_ex (void *staple_even, void *staple_odd, const void *sitelink_even, const void *sitelink_odd, void *fatlink_even, void *fatlink_odd, const void *mulink_even, const void *mulink_odd, int mu, int nu, int save_staple, double mycoeff, QudaReconstructType recon, QudaPrecision prec, llfat_kernel_param_t kparam) |
void | siteComputeGenStapleParityKernel (void *staple_even, void *staple_odd, const void *sitelink_even, const void *sitelink_odd, void *fatlink_even, void *fatlink_odd, int mu, int nu, double mycoeff, QudaReconstructType recon, QudaPrecision prec, dim3 halfGridDim, llfat_kernel_param_t kparam, cudaStream_t *stream) |
void | siteComputeGenStapleParityKernel_ex (void *staple_even, void *staple_odd, const void *sitelink_even, const void *sitelink_odd, void *fatlink_even, void *fatlink_odd, int mu, int nu, double mycoeff, QudaReconstructType recon, QudaPrecision prec, llfat_kernel_param_t kparam) |
void | llfatOneLinkKernel (cudaGaugeField &cudaFatLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam *param, double *act_path_coeff) |
void | llfatOneLinkKernel_ex (cudaGaugeField &cudaFatLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam *param, double *act_path_coeff, llfat_kernel_param_t kparam) |
void | computeFatLinkCore (cudaGaugeField *cudaSiteLink, double *act_path_coeff, QudaGaugeParam *qudaGaugeParam, QudaComputeFatMethod method, cudaGaugeField *cudaFatLink, cudaGaugeField *cudaLongLink, TimeProfile &profile) |
void | printPeakMemUsage () |
void | assertAllMemFree () |
void * | device_malloc_ (const char *func, const char *file, int line, size_t size) |
void * | safe_malloc_ (const char *func, const char *file, int line, size_t size) |
void * | pinned_malloc_ (const char *func, const char *file, int line, size_t size) |
void * | mapped_malloc_ (const char *func, const char *file, int line, size_t size) |
void | device_free_ (const char *func, const char *file, int line, void *ptr) |
void | host_free_ (const char *func, const char *file, int line, void *ptr) |
void | link_format_cpu_to_gpu (void *dst, void *src, int reconstruct, int Vh, int pad, int ghostV, QudaPrecision prec, QudaGaugeFieldOrder cpu_order, cudaStream_t stream) |
void | link_format_gpu_to_cpu (void *dst, void *src, int Vh, int stride, QudaPrecision prec, cudaStream_t stream) |
void | collectGhostStaple (int *X, void *even, void *odd, int volumeCB, int stride, QudaPrecision precision, void *ghost_staple_gpu, int dir, int whichway, cudaStream_t *stream) |
template<typename T1 , typename T2 > | |
__host__ __device__ void | copy (T1 &a, const T2 &b) |
template<> | |
__host__ __device__ void | copy (float &a, const short &b) |
template<> | |
__host__ __device__ void | copy (short &a, const float &b) |
void | createStaggeredOprodEvents () |
void | destroyStaggeredOprodEvents () |
void | computeStaggeredOprod (cudaGaugeField &out, cudaColorSpinorField &in, FaceBuffer &facebuffer, const unsigned int parity, const double coeff, const unsigned int displacement) |
void | computeStaggeredOprod (cudaGaugeField &outA, cudaGaugeField &outB, cudaColorSpinorField &inEven, cudaColorSpinorField &inOdd, FaceBuffer &faceBuffer, const unsigned int parity, const double coeff[2]) |
void | loadTuneCache (QudaVerbosity verbosity) |
void | saveTuneCache (QudaVerbosity verbosity) |
TuneParam & | tuneLaunch (Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity) |
template<typename Float > | |
void | axpby (const Float &a, const Float *x, const Float &b, Float *y, const int N) |
template<typename Float > | |
void | caxpby (const std::complex< Float > &a, const std::complex< Float > *x, const std::complex< Float > &b, std::complex< Float > *y, int N) |
template<typename Float > | |
void | caxpbypcz (const std::complex< Float > &a, const std::complex< Float > *x, const std::complex< Float > &b, const std::complex< Float > *y, const std::complex< Float > &c, std::complex< Float > *z, int N) |
template<typename Float > | |
double | norm (const Float *a, const int N) |
template<typename Float > | |
double | reDotProduct (const Float *a, const Float *b, const int N) |
template<typename Float > | |
Complex | cDotProduct (const std::complex< Float > *a, const std::complex< Float > *b, const int N) |
template<typename Float > | |
double3 | HeavyQuarkResidualNorm (const Float *x, const Float *r, const int volume, const int Nint) |
double3 | HeavyQuarkResidualNormCpu (cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &r) |
void | initReduce () |
void | endReduce () |
cudaStream_t * | getBlasStream () |
__device__ void | caxpy_ (const float2 &a, const float4 &x, float4 &y) |
__device__ void | caxpy_ (const float2 &a, const float2 &x, float2 &y) |
__device__ void | caxpy_ (const double2 &a, const double2 &x, double2 &y) |
__device__ void | caxpby_ (const float2 &a, const float4 &x, const float2 &b, float4 &y) |
__device__ void | caxpby_ (const float2 &a, const float2 &x, const float2 &b, float2 &y) |
__device__ void | caxpby_ (const double2 &a, const double2 &x, const double2 &b, double2 &y) |
__device__ void | cxpaypbz_ (const float4 &x, const float2 &a, const float4 &y, const float2 &b, float4 &z) |
__device__ void | cxpaypbz_ (const float2 &x, const float2 &a, const float2 &y, const float2 &b, float2 &z) |
__device__ void | cxpaypbz_ (const double2 &x, const double2 &a, const double2 &y, const double2 &b, double2 &z) |
void | setGhostSpinor (bool value) |
std::ostream & | operator<< (std::ostream &out, const ColorSpinorField &a) |
template<typename Float > | |
ColorSpinorFieldOrder< Float > * | createOrder (const cpuColorSpinorField &a) |
template<class T > | |
void | random (T &t) |
template<class T > | |
void | point (T &t, int x, int s, int c) |
template<class U , class V > | |
int | compareSpinor (const U &u, const V &v, const int tol) |
template<class Order > | |
void | print_vector (const Order &o, unsigned int x) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis > | |
void | packSpinor (OutOrder &outOrder, const InOrder &inOrder, Basis basis, int volume) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis > | |
__global__ void | packSpinorKernel (OutOrder outOrder, const InOrder inOrder, Basis basis, int volume) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder > | |
void | genericCopyColorSpinor (OutOrder &outOrder, const InOrder &inOrder, QudaGammaBasis dstBasis, QudaGammaBasis srcBasis, const ColorSpinorField &out, QudaFieldLocation location) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder > | |
void | genericCopyColorSpinor (InOrder &inOrder, ColorSpinorField &out, QudaGammaBasis inBasis, QudaFieldLocation location, FloatOut *Out, float *outNorm) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc> | |
void | genericCopyColorSpinor (ColorSpinorField &out, const ColorSpinorField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In, float *outNorm, float *inNorm) |
template<int Ns, typename dstFloat , typename srcFloat > | |
void | copyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat *Dst, srcFloat *Src, float *dstNorm, float *srcNorm) |
template<typename dstFloat , typename srcFloat > | |
void | CopyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat *Dst, srcFloat *Src, float *dstNorm=0, float *srcNorm=0) |
void | copyGenericGaugeDoubleOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type) |
void | copyGenericGaugeHalfOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type) |
void | copyGenericGaugeSingleOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type) |
void | checkMomOrder (const GaugeField &u) |
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder > | |
__device__ __host__ void | copyGaugeEx (CopyGaugeExArg< OutOrder, InOrder > &arg, int X, int parity) |
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder > | |
void | copyGaugeEx (CopyGaugeExArg< OutOrder, InOrder > arg) |
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder > | |
__global__ void | copyGaugeExKernel (CopyGaugeExArg< OutOrder, InOrder > arg) |
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder > | |
void | copyGaugeEx (OutOrder outOrder, const InOrder inOrder, const int *E, const int *X, const int *faceVolumeCB, const GaugeField &meta, QudaFieldLocation location) |
template<typename FloatOut , typename FloatIn , int length, typename InOrder > | |
void | copyGaugeEx (const InOrder &inOrder, const int *X, GaugeField &out, QudaFieldLocation location, FloatOut *Out) |
template<typename FloatOut , typename FloatIn , int length> | |
void | copyGaugeEx (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In) |
template<typename FloatOut , typename FloatIn > | |
void | copyGaugeEx (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In) |
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder > | |
void | copyGauge (CopyGaugeArg< OutOrder, InOrder > arg) |
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder > | |
__global__ void | copyGaugeKernel (CopyGaugeArg< OutOrder, InOrder > arg) |
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder > | |
void | copyGhost (CopyGaugeArg< OutOrder, InOrder > arg) |
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder > | |
__global__ void | copyGhostKernel (CopyGaugeArg< OutOrder, InOrder > arg) |
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder > | |
void | copyGauge (OutOrder outOrder, const InOrder inOrder, int volume, const int *faceVolumeCB, int nDim, int geometry, const GaugeField &out, QudaFieldLocation location, int type) |
template<typename FloatOut , typename FloatIn , int length, typename InOrder > | |
void | copyGauge (const InOrder &inOrder, GaugeField &out, QudaFieldLocation location, FloatOut *Out, FloatOut **outGhost, int type) |
template<typename FloatOut , typename FloatIn , int length> | |
void | copyGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In, FloatOut **outGhost, FloatIn **inGhost, int type) |
template<typename FloatOut , typename FloatIn > | |
void | copyGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In, FloatOut **outGhost, FloatIn **inGhost, int type) |
std::ostream & | operator<< (std::ostream &out, const cudaColorSpinorField &a) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend> | |
__device__ __host__ void | copyInterior (CopySpinorExArg< OutOrder, InOrder, Basis > &arg, int X) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend> | |
__global__ void | copyInteriorKernel (CopySpinorExArg< OutOrder, InOrder, Basis > arg) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend> | |
void | copyInterior (CopySpinorExArg< OutOrder, InOrder, Basis > &arg) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis > | |
void | copySpinorEx (OutOrder outOrder, const InOrder inOrder, const Basis basis, const int *E, const int *X, const int parity, const bool extend, const ColorSpinorField &meta, QudaFieldLocation location) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder > | |
void | copySpinorEx (OutOrder outOrder, InOrder inOrder, const QudaGammaBasis outBasis, const QudaGammaBasis inBasis, const int *E, const int *X, const int parity, const bool extend, const ColorSpinorField &meta, QudaFieldLocation location) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder > | |
void | extendedCopyColorSpinor (InOrder &inOrder, ColorSpinorField &out, QudaGammaBasis inBasis, const int *E, const int *X, const int parity, const bool extend, QudaFieldLocation location, FloatOut *Out, float *outNorm) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc> | |
void | extendedCopyColorSpinor (ColorSpinorField &out, const ColorSpinorField &in, const int parity, const QudaFieldLocation location, FloatOut *Out, FloatIn *In, float *outNorm, float *inNorm) |
template<int Ns, typename dstFloat , typename srcFloat > | |
void | copyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, const int parity, const QudaFieldLocation location, dstFloat *Dst, srcFloat *Src, float *dstNorm, float *srcNorm) |
template<typename dstFloat , typename srcFloat > | |
void | CopyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, const int parity, const QudaFieldLocation location, dstFloat *Dst, srcFloat *Src, float *dstNorm=0, float *srcNorm=0) |
template<typename Float , int length, int nDim, typename Order > | |
void | extractGhost (ExtractGhostArg< Order, nDim > arg) |
template<typename Float , int length, int nDim, typename Order > | |
__global__ void | extractGhostKernel (ExtractGhostArg< Order, nDim > arg) |
template<typename Float , int length, typename Order > | |
void | extractGhost (Order order, const GaugeField &u, QudaFieldLocation location) |
template<typename Float > | |
void | extractGhost (const GaugeField &u, Float **Ghost) |
template<typename Float , int length, typename Arg > | |
__device__ __host__ void | extractor (Arg &arg, int dir, int a, int b, int c, int d, int g, int parity) |
template<typename Float , int length, typename Arg > | |
__device__ __host__ void | injector (Arg &arg, int dir, int a, int b, int c, int d, int g, int parity) |
template<typename Float , int length, int nDim, typename Order , bool extract> | |
void | extractGhostEx (ExtractGhostExArg< Order, nDim > arg) |
template<typename Float , int length, int nDim, typename Order , bool extract> | |
__global__ void | extractGhostExKernel (ExtractGhostExArg< Order, nDim > arg) |
template<typename Float , int length, typename Order > | |
void | extractGhostEx (Order order, const int dim, const int *surfaceCB, const int *E, const int *R, bool extract, const GaugeField &u, QudaFieldLocation location) |
template<typename Float > | |
void | extractGhostEx (const GaugeField &u, int dim, const int *R, Float **Ghost, bool extract) |
template<int oddBit, typename Float , typename Float2 , typename FloatN > | |
__global__ void | GAUGE_FORCE_KERN_NAME (Float2 *momEven, Float2 *momOdd, const int dir, const double eb3, const FloatN *linkEven, const FloatN *linkOdd, const int *input_path, const int *length, const double *path_coeff, const int num_paths, const kernel_param_t kparam) |
void | gauge_force_cuda_dir (cudaGaugeField &cudaMom, const int dir, const double eb3, const cudaGaugeField &cudaSiteLink, const QudaGaugeParam *param, int **input_path, const int *length, const double *path_coeff, const int num_paths, const int max_length) |
void | printLaunchTimer () |
void | setDiracPreParam (DiracParam &diracParam, QudaInvertParam *inv_param, const bool pc) |
void | createDirac (Dirac *&d, Dirac *&dSloppy, Dirac *&dPre, QudaInvertParam ¶m, const bool pc_solve) |
void | massRescale (cudaColorSpinorField &b, QudaInvertParam ¶m) |
void | fillInnerSolveParam (SolverParam &inner, const SolverParam &outer) |
double | resNorm (const DiracMatrix &mat, cudaColorSpinorField &b, cudaColorSpinorField &x) |
int | reliable (double &rNorm, double &maxrx, double &maxrr, const double &r2, const double &delta) |
void | fillInitCGSolveParam (SolverParam &initCGparam) |
double | timeInterval (struct timeval start, struct timeval end) |
void | orthoDir (Complex **beta, cudaColorSpinorField *Ap[], int k) |
void | backSubs (const Complex *alpha, Complex **const beta, const double *gamma, Complex *delta, int n) |
void | updateSolution (cudaColorSpinorField &x, const Complex *alpha, Complex **const beta, double *gamma, int k, cudaColorSpinorField *p[]) |
void | print (const double d[], int n) |
void | updateAlphaZeta (double *alpha, double *zeta, double *zeta_old, const double *r2, const double *beta, const double pAp, const double *offset, const int nShift, const int j_low) |
__device__ __host__ int | linkIndex (int x[], int dx[], const int X[4]) |
__device__ __host__ void | getCoords (int x[4], int cb_index, const int X[4], int parity) |
template<typename Float , typename Oprod , typename Gauge , typename Mom > | |
__host__ __device__ void | completeKSForceCore (KSForceArg< Oprod, Gauge, Mom > &arg, int idx) |
template<typename Float , typename Oprod , typename Gauge , typename Mom > | |
__global__ void | completeKSForceKernel (KSForceArg< Oprod, Gauge, Mom > arg) |
template<typename Float , typename Oprod , typename Gauge , typename Mom > | |
void | completeKSForceCPU (KSForceArg< Oprod, Gauge, Mom > &arg) |
template<typename Float , typename Oprod , typename Gauge , typename Mom > | |
void | completeKSForce (Oprod oprod, Gauge gauge, Mom mom, int dim[4], const GaugeField &meta, QudaFieldLocation location, long long *flops) |
template<typename Float , typename Result , typename Oprod , typename Gauge > | |
__host__ __device__ void | computeKSLongLinkForceCore (KSLongLinkArg< Result, Oprod, Gauge > &arg, int idx) |
template<typename Float , typename Result , typename Oprod , typename Gauge > | |
__global__ void | computeKSLongLinkForceKernel (KSLongLinkArg< Result, Oprod, Gauge > arg) |
template<typename Float , typename Result , typename Oprod , typename Gauge > | |
void | computeKSLongLinkForceCPU (KSLongLinkArg< Result, Oprod, Gauge > &arg) |
template<typename Float , typename Result , typename Oprod , typename Gauge > | |
void | computeKSLongLinkForce (Result res, Oprod oprod, Gauge gauge, int dim[4], const GaugeField &meta, QudaFieldLocation location) |
template<typename Float > | |
void | computeKSLongLinkForce (GaugeField &result, const GaugeField &oprod, const GaugeField &gauge, QudaFieldLocation location) |
template<typename Float , int Nc, typename Order > | |
double | maxGauge (const Order order, int volume, int nDim) |
template<int N, typename FloatN , typename Float2 > | |
__global__ void | do_link_format_cpu_to_gpu (FloatN *dst, Float2 *src, int reconstruct, int Vh, int pad, int ghostV, size_t threads) |
template<int N, typename FloatN , typename Float2 > | |
__global__ void | do_link_format_cpu_to_gpu_milc (FloatN *dst, Float2 *src, int reconstruct, int Vh, int pad, int ghostV, size_t threads) |
template<typename FloatN > | |
__global__ void | do_link_format_gpu_to_cpu (FloatN *dst, FloatN *src, int Vh, int stride) |
template<int dir, int whichway, typename Float2 > | |
__global__ void | collectGhostStapleKernel (Float2 *out, Float2 *in, int parity, GhostStapleParam param) |
template<class Cmplx > | |
__device__ __host__ Cmplx | makeComplex (const typename RealTypeId< Cmplx >::Type &a, const typename RealTypeId< Cmplx >::Type &b) |
__device__ __host__ double2 | makeComplex (const double &a, const double &b) |
__device__ __host__ float2 | makeComplex (const float &a, const float &b) |
template<class Cmplx > | |
__device__ __host__ Cmplx | operator- (const Cmplx &a) |
template<class Cmplx > | |
__device__ __host__ Cmplx & | operator+= (Cmplx &a, const Cmplx &b) |
template<class Cmplx > | |
__device__ __host__ Cmplx & | operator-= (Cmplx &a, const Cmplx &b) |
template<class Cmplx > | |
__device__ __host__ Cmplx | operator+ (const Cmplx &a, const Cmplx &b) |
template<class Cmplx > | |
__device__ __host__ Cmplx | operator- (const Cmplx &a, const Cmplx &b) |
__device__ __host__ double2 | operator* (const double2 &a, const double &scalar) |
__device__ __host__ float2 | operator* (const float2 &a, const float &scalar) |
template<class Cmplx , class Float > | |
__device__ __host__ Cmplx | operator+ (const Cmplx &a, const Float &scalar) |
template<class Cmplx > | |
__device__ __host__ Cmplx | operator/ (const Cmplx &a, const typename RealTypeId< Cmplx >::Type &scalar) |
template<class Cmplx > | |
__device__ __host__ Cmplx | operator+ (const typename RealTypeId< Cmplx >::Type &scalar, const Cmplx &a) |
template<class Cmplx > | |
__device__ __host__ Cmplx | operator- (const Cmplx &a, const typename RealTypeId< Cmplx >::Type &scalar) |
template<class Cmplx > | |
__device__ __host__ Cmplx | operator- (const typename RealTypeId< Cmplx >::Type &scalar, const Cmplx &a) |
template<class Cmplx > | |
__device__ __host__ Cmplx | operator* (const Cmplx &a, const Cmplx &b) |
template<class Cmplx > | |
__device__ __host__ Cmplx | conj (const Cmplx &a) |
__device__ __host__ double | conj (const double &a) |
__device__ __host__ float | conj (const float &a) |
template<typename Cmplx > | |
__device__ __host__ Cmplx | Conj (const Cmplx &a) |
template<class Cmplx > | |
__device__ __host__ Cmplx | getPreciseInverse (const Cmplx &z) |
std::ostream & | operator<< (std::ostream &os, const float2 &z) |
std::ostream & | operator<< (std::ostream &os, const double2 &z) |
template<int N> | |
__device__ __host__ int | index (int i, int j) |
template<class T > | |
__device__ __host__ T | getTrace (const Matrix< T, 3 > &a) |
template<class T > | |
__device__ __host__ T | getDeterminant (const Matrix< T, 3 > &a) |
template<class T , int N> | |
__device__ __host__ Matrix< T, N > | operator+ (const Matrix< T, N > &a, const Matrix< T, N > &b) |
template<class T , int N> | |
__device__ __host__ Matrix< T, N > | operator+= (Matrix< T, N > &a, const Matrix< T, N > &b) |
template<class T , int N> | |
__device__ __host__ Matrix< T, N > | operator-= (Matrix< T, N > &a, const Matrix< T, N > &b) |
template<class T , int N> | |
__device__ __host__ Matrix< T, N > | operator- (const Matrix< T, N > &a, const Matrix< T, N > &b) |
template<class T , int N, class S > | |
__device__ __host__ Matrix< T, N > | operator* (const S &scalar, const Matrix< T, N > &a) |
template<class T , int N, class S > | |
__device__ __host__ Matrix< T, N > | operator* (const Matrix< T, N > &a, const S &scalar) |
template<class T , int N, class S > | |
__device__ __host__ Matrix< T, N > | operator*= (Matrix< T, N > &a, const S &scalar) |
template<class T , int N> | |
__device__ __host__ Matrix< T, N > | operator- (const Matrix< T, N > &a) |
template<class T > | |
__device__ __host__ Matrix< T, 3 > | operator* (const Matrix< T, 3 > &a, const Matrix< T, 3 > &b) |
template<class T , int N> | |
__device__ __host__ Matrix< T, N > | operator*= (Matrix< T, N > &a, const Matrix< T, N > &b) |
template<class T , class U > | |
__device__ __host__ Matrix < typename PromoteTypeId< T, U > ::Type, 3 > | operator* (const Matrix< T, 3 > &a, const Matrix< U, 3 > &b) |
template<class T > | |
__device__ __host__ Matrix< T, 2 > | operator* (const Matrix< T, 2 > &a, const Matrix< T, 2 > &b) |
template<class T , int N> | |
__device__ __host__ Matrix< T, N > | conj (const Matrix< T, N > &other) |
template<class T > | |
__device__ __host__ void | computeMatrixInverse (const Matrix< T, 3 > &u, Matrix< T, 3 > *uinv) |
template<class T , int N> | |
__device__ __host__ void | setIdentity (Matrix< T, N > *m) |
template<int N> | |
__device__ __host__ void | setIdentity (Matrix< float2, N > *m) |
template<int N> | |
__device__ __host__ void | setIdentity (Matrix< double2, N > *m) |
template<class T , int N> | |
__device__ __host__ void | setZero (Matrix< T, N > *m) |
template<int N> | |
__device__ __host__ void | setZero (Matrix< float2, N > *m) |
template<int N> | |
__device__ __host__ void | setZero (Matrix< double2, N > *m) |
template<class T , int N> | |
__device__ __host__ void | copyColumn (const Matrix< T, N > &m, int c, Array< T, N > *a) |
template<class T , int N> | |
__device__ __host__ void | outerProd (const Array< T, N > &a, const Array< T, N > &b, Matrix< T, N > *m) |
template<class T , int N> | |
__device__ __host__ void | outerProd (const T(&a)[N], const T(&b)[N], Matrix< T, N > *m) |
template<class T , int N> | |
std::ostream & | operator<< (std::ostream &os, const Matrix< T, N > &m) |
template<class T , int N> | |
std::ostream & | operator<< (std::ostream &os, const Array< T, N > &a) |
template<class T > | |
__device__ void | loadLinkVariableFromArray (const T *const array, const int dir, const int idx, const int stride, Matrix< T, 3 > *link) |
template<class T , int N> | |
__device__ void | loadMatrixFromArray (const T *const array, const int idx, const int stride, Matrix< T, N > *mat) |
__device__ void | loadLinkVariableFromArray (const float2 *const array, const int dir, const int idx, const int stride, Matrix< double2, 3 > *link) |
template<class T , int N> | |
__device__ void | writeMatrixToArray (const Matrix< T, N > &mat, const int idx, const int stride, T *const array) |
__device__ void | appendMatrixToArray (const Matrix< double2, 3 > &mat, const int idx, const int stride, double2 *const array) |
__device__ void | appendMatrixToArray (const Matrix< float2, 3 > &mat, const int idx, const int stride, float2 *const array) |
template<class T > | |
__device__ void | writeLinkVariableToArray (const Matrix< T, 3 > &link, const int dir, const int idx, const int stride, T *const array) |
__device__ void | writeLinkVariableToArray (const Matrix< double2, 3 > &link, const int dir, const int idx, const int stride, float2 *const array) |
template<class T > | |
__device__ void | loadMomentumFromArray (const T *const array, const int dir, const int idx, const int stride, Matrix< T, 3 > *mom) |
template<class T , class U > | |
__device__ void | writeMomentumToArray (const Matrix< T, 3 > &mom, const int dir, const int idx, const U coeff, const int stride, T *const array) |
template<class Cmplx > | |
__device__ __host__ void | computeLinkInverse (Matrix< Cmplx, 3 > *uinv, const Matrix< Cmplx, 3 > &u) |
void | copyArrayToLink (Matrix< float2, 3 > *link, float *array) |
template<class Cmplx , class Real > | |
void | copyArrayToLink (Matrix< Cmplx, 3 > *link, Real *array) |
void | copyLinkToArray (float *array, const Matrix< float2, 3 > &link) |
template<class Cmplx , class Real > | |
void | copyLinkToArray (Real *array, const Matrix< Cmplx, 3 > &link) |
template<class Cmplx > | |
__host__ __device__ void | printLink (const Matrix< Cmplx, 3 > &link) |
__device__ double | norm2_ (const double2 &a) |
__device__ float | norm2_ (const float2 &a) |
__device__ float | norm2_ (const float4 &a) |
__device__ double | dot_ (const double2 &a, const double2 &b) |
__device__ float | dot_ (const float2 &a, const float2 &b) |
__device__ float | dot_ (const float4 &a, const float4 &b) |
__device__ double2 | dotNormA_ (const double2 &a, const double2 &b) |
__device__ double2 | dotNormA_ (const float2 &a, const float2 &b) |
__device__ double2 | dotNormA_ (const float4 &a, const float4 &b) |
__device__ void | Caxpy_ (const float2 &a, const float4 &x, float4 &y) |
__device__ void | Caxpy_ (const float2 &a, const float2 &x, float2 &y) |
__device__ void | Caxpy_ (const double2 &a, const double2 &x, double2 &y) |
__device__ double2 | cdot_ (const double2 &a, const double2 &b) |
__device__ double2 | cdot_ (const float2 &a, const float2 &b) |
__device__ double2 | cdot_ (const float4 &a, const float4 &b) |
__device__ double3 | cdotNormA_ (const double2 &a, const double2 &b) |
__device__ double3 | cdotNormA_ (const float2 &a, const float2 &b) |
__device__ double3 | cdotNormA_ (const float4 &a, const float4 &b) |
__device__ double3 | cdotNormB_ (const double2 &a, const double2 &b) |
__device__ double3 | cdotNormB_ (const float2 &a, const float2 &b) |
__device__ double3 | cdotNormB_ (const float4 &a, const float4 &b) |
template<IndexType idxType, typename Int > | |
__device__ __forceinline__ int | neighborIndex (const unsigned int &cb_idx, const int(&shift)[4], const bool(&partitioned)[4], const unsigned int &parity) |
template<typename FloatN , int N, typename Output , typename Input > | |
__global__ void | shiftColorSpinorFieldKernel (ShiftQuarkArg< Output, Input > arg) |
template<typename FloatN , int N, typename Output , typename Input > | |
__global__ void | shiftColorSpinorFieldExternalKernel (ShiftQuarkArg< Output, Input > arg) |
void | shiftColorSpinorField (cudaColorSpinorField &dst, const cudaColorSpinorField &src, const unsigned int parity, const unsigned int dim, const int shift) |
Variables | |
unsigned long long | blas_flops |
unsigned long long | blas_bytes |
const int | maxNface = 3 |
const int | Nstream = 1 |
cudaStream_t * | stream |
This code has not been checked. In particular, I suspect it is erroneous in multi-GPU since it looks like the halo ghost region isn't being treated here.
Generic Multi Shift Solver
For staggered, the mass is folded into the dirac operator Otherwise the matrix mass is 'unmodified'.
The lowest offset is in offsets[0]
typedef std::complex< double > quda::Complex |
Definition at line 13 of file eig_variables.h.
typedef struct quda::kernel_param_s quda::kernel_param_t |
typedef struct quda::llfat_kernel_param_s quda::llfat_kernel_param_t |
enum quda::AllocType |
Enumerator | |
---|---|
DEVICE | |
HOST | |
PINNED | |
MAPPED | |
N_ALLOC_TYPE |
Definition at line 14 of file malloc.cpp.
Definition at line 143 of file quda_internal.h.
|
inline |
Definition at line 110 of file complex_quda.h.
|
inline |
Returns the magnitude of z.
Definition at line 827 of file complex_quda.h.
|
inline |
Definition at line 832 of file complex_quda.h.
|
inline |
Definition at line 837 of file complex_quda.h.
|
inline |
Definition at line 9 of file gauge_field_order.h.
|
inline |
Definition at line 40 of file gauge_field_order.h.
|
inline |
Definition at line 50 of file complex_quda.h.
|
inline |
Definition at line 1041 of file complex_quda.h.
|
inline |
Definition at line 1062 of file complex_quda.h.
void quda::APEStep | ( | GaugeField & | dataDs, |
const GaugeField & | dataOr, | ||
double | alpha, | ||
QudaFieldLocation | location | ||
) |
Definition at line 497 of file gauge_ape.cu.
|
inline |
Definition at line 810 of file quda_matrix.h.
|
inline |
Definition at line 819 of file quda_matrix.h.
void quda::applyGaugePhase | ( | GaugeField & | u | ) |
Apply the staggered phase factor to the gauge field.
u | The gauge field to which we apply the staggered phase factors |
Definition at line 261 of file gauge_phase.cu.
|
inline |
Returns the phase angle of z.
Definition at line 843 of file complex_quda.h.
|
inline |
Definition at line 848 of file complex_quda.h.
|
inline |
Definition at line 853 of file complex_quda.h.
|
inline |
Definition at line 55 of file complex_quda.h.
|
inline |
Definition at line 1048 of file complex_quda.h.
|
inline |
Definition at line 1087 of file complex_quda.h.
void quda::assertAllMemFree | ( | ) |
Definition at line 294 of file malloc.cpp.
void quda::asymCloverDslashCuda | ( | cudaColorSpinorField * | out, |
const cudaGaugeField & | gauge, | ||
const FullClover | cloverInv, | ||
const cudaColorSpinorField * | in, | ||
const int | oddBit, | ||
const int | daggerBit, | ||
const cudaColorSpinorField * | x, | ||
const double & | k, | ||
const int * | commDim, | ||
TimeProfile & | profile, | ||
const QudaDslashPolicy & | dslashPolicy = QUDA_DSLASH2 |
||
) |
Definition at line 118 of file dslash_clover_asym.cu.
|
inline |
Definition at line 60 of file complex_quda.h.
|
inline |
Definition at line 1055 of file complex_quda.h.
|
inline |
Definition at line 65 of file complex_quda.h.
|
inline |
Definition at line 1093 of file complex_quda.h.
|
inline |
Definition at line 1111 of file complex_quda.h.
void quda::axCpu | ( | const double & | a, |
cpuColorSpinorField & | x | ||
) |
Definition at line 60 of file blas_cpu.cpp.
void quda::axCuda | ( | const double & | a, |
cudaColorSpinorField & | x | ||
) |
Definition at line 171 of file blas_quda.cu.
void quda::axpby | ( | const Float & | a, |
const Float * | x, | ||
const Float & | b, | ||
Float * | y, | ||
const int | N | ||
) |
Definition at line 8 of file blas_cpu.cpp.
void quda::axpbyCpu | ( | const double & | a, |
const cpuColorSpinorField & | x, | ||
const double & | b, | ||
cpuColorSpinorField & | y | ||
) |
Definition at line 12 of file blas_cpu.cpp.
void quda::axpbyCuda | ( | const double & | a, |
cudaColorSpinorField & | x, | ||
const double & | b, | ||
cudaColorSpinorField & | y | ||
) |
Definition at line 82 of file blas_quda.cu.
void quda::axpyBzpcxCpu | ( | const double & | a, |
cpuColorSpinorField & | x, | ||
cpuColorSpinorField & | y, | ||
const double & | b, | ||
const cpuColorSpinorField & | z, | ||
const double & | c | ||
) |
Definition at line 129 of file blas_cpu.cpp.
void quda::axpyBzpcxCuda | ( | const double & | a, |
cudaColorSpinorField & | x, | ||
cudaColorSpinorField & | y, | ||
const double & | b, | ||
cudaColorSpinorField & | z, | ||
const double & | c | ||
) |
Definition at line 311 of file blas_quda.cu.
Complex quda::axpyCGNormCuda | ( | const double & | a, |
cudaColorSpinorField & | x, | ||
cudaColorSpinorField & | y | ||
) |
Definition at line 682 of file reduce_quda.cu.
void quda::axpyCpu | ( | const double & | a, |
const cpuColorSpinorField & | x, | ||
cpuColorSpinorField & | y | ||
) |
Definition at line 31 of file blas_cpu.cpp.
void quda::axpyCuda | ( | const double & | a, |
cudaColorSpinorField & | x, | ||
cudaColorSpinorField & | y | ||
) |
Definition at line 115 of file blas_quda.cu.
double quda::axpyNormCpu | ( | const double & | a, |
const cpuColorSpinorField & | x, | ||
cpuColorSpinorField & | y | ||
) |
Definition at line 178 of file blas_cpu.cpp.
double quda::axpyNormCuda | ( | const double & | a, |
cudaColorSpinorField & | x, | ||
cudaColorSpinorField & | y | ||
) |
Definition at line 321 of file reduce_quda.cu.
void quda::axpyZpbxCpu | ( | const double & | a, |
cpuColorSpinorField & | x, | ||
cpuColorSpinorField & | y, | ||
const cpuColorSpinorField & | z, | ||
const double & | b | ||
) |
Definition at line 136 of file blas_cpu.cpp.
void quda::axpyZpbxCuda | ( | const double & | a, |
cudaColorSpinorField & | x, | ||
cudaColorSpinorField & | y, | ||
cudaColorSpinorField & | z, | ||
const double & | b | ||
) |
Definition at line 338 of file blas_quda.cu.
void quda::backSubs | ( | const Complex * | alpha, |
Complex **const | beta, | ||
const double * | gamma, | ||
Complex * | delta, | ||
int | n | ||
) |
Definition at line 101 of file inv_gcr_quda.cpp.
void quda::cabxpyAxCpu | ( | const double & | a, |
const Complex & | b, | ||
cpuColorSpinorField & | x, | ||
cpuColorSpinorField & | y | ||
) |
Definition at line 259 of file blas_cpu.cpp.
void quda::cabxpyAxCuda | ( | const double & | a, |
const Complex & | b, | ||
cudaColorSpinorField & | x, | ||
cudaColorSpinorField & | y | ||
) |
Definition at line 386 of file blas_quda.cu.
double quda::cabxpyAxNormCpu | ( | const double & | a, |
const Complex & | b, | ||
cpuColorSpinorField & | x, | ||
cpuColorSpinorField & | y | ||
) |
Definition at line 283 of file blas_cpu.cpp.
double quda::cabxpyAxNormCuda | ( | const double & | a, |
const Complex & | b, | ||
cudaColorSpinorField & | x, | ||
cudaColorSpinorField & | y | ||
) |
Definition at line 440 of file reduce_quda.cu.
void quda::caxpby | ( | const std::complex< Float > & | a, |
const std::complex< Float > * | x, | ||
const std::complex< Float > & | b, | ||
std::complex< Float > * | y, | ||
int | N | ||
) |
Definition at line 70 of file blas_cpu.cpp.
__device__ void quda::caxpby_ | ( | const float2 & | a, |
const float4 & | x, | ||
const float2 & | b, | ||
float4 & | y | ||
) |
Functor to perform the operation y = a*x + b*y (complex-valued)
Definition at line 217 of file blas_quda.cu.
__device__ void quda::caxpby_ | ( | const float2 & | a, |
const float2 & | x, | ||
const float2 & | b, | ||
float2 & | y | ||
) |
Definition at line 225 of file blas_quda.cu.
__device__ void quda::caxpby_ | ( | const double2 & | a, |
const double2 & | x, | ||
const double2 & | b, | ||
double2 & | y | ||
) |
Definition at line 231 of file blas_quda.cu.
void quda::caxpbyCpu | ( | const Complex & | a, |
const cpuColorSpinorField & | x, | ||
const Complex & | b, | ||
cpuColorSpinorField & | y | ||
) |
Definition at line 92 of file blas_cpu.cpp.
void quda::caxpbyCuda | ( | const Complex & | a, |
cudaColorSpinorField & | x, | ||
const Complex & | b, | ||
cudaColorSpinorField & | y | ||
) |
Definition at line 247 of file blas_quda.cu.
void quda::caxpbypcz | ( | const std::complex< Float > & | a, |
const std::complex< Float > * | x, | ||
const std::complex< Float > & | b, | ||
const std::complex< Float > * | y, | ||
const std::complex< Float > & | c, | ||
std::complex< Float > * | z, | ||
int | N | ||
) |
Definition at line 105 of file blas_cpu.cpp.
void quda::caxpbypczpwCpu | ( | const Complex & | a, |
cpuColorSpinorField & | x, | ||
const Complex & | b, | ||
cpuColorSpinorField & | y, | ||
const Complex & | c, | ||
cpuColorSpinorField & | z, | ||
cpuColorSpinorField & | w | ||
) |
Definition at line 295 of file blas_cpu.cpp.
void quda::caxpbypczpwCuda | ( | const Complex & | a, |
cudaColorSpinorField & | x, | ||
const Complex & | b, | ||
cudaColorSpinorField & | y, | ||
const Complex & | c, | ||
cudaColorSpinorField & | z, | ||
cudaColorSpinorField & | w | ||
) |
Definition at line 429 of file blas_quda.cu.
void quda::caxpbypzCpu | ( | const Complex & | a, |
cpuColorSpinorField & | x, | ||
const Complex & | b, | ||
cpuColorSpinorField & | y, | ||
cpuColorSpinorField & | z | ||
) |
Definition at line 289 of file blas_cpu.cpp.
void quda::caxpbypzCuda | ( | const Complex & | a, |
cudaColorSpinorField & | x, | ||
const Complex & | b, | ||
cudaColorSpinorField & | y, | ||
cudaColorSpinorField & | z | ||
) |
Definition at line 407 of file blas_quda.cu.
double3 quda::caxpbypzYmbwcDotProductUYNormYCpu | ( | const Complex & | a, |
const cpuColorSpinorField & | x, | ||
const Complex & | b, | ||
cpuColorSpinorField & | y, | ||
cpuColorSpinorField & | z, | ||
const cpuColorSpinorField & | w, | ||
const cpuColorSpinorField & | u | ||
) |
Definition at line 250 of file blas_cpu.cpp.
double3 quda::caxpbypzYmbwcDotProductUYNormYCuda | ( | const Complex & | a, |
cudaColorSpinorField & | x, | ||
const Complex & | b, | ||
cudaColorSpinorField & | y, | ||
cudaColorSpinorField & | z, | ||
cudaColorSpinorField & | w, | ||
cudaColorSpinorField & | u | ||
) |
Definition at line 643 of file reduce_quda.cu.
void quda::caxpbypzYmbwCpu | ( | const Complex & | a, |
const cpuColorSpinorField & | x, | ||
const Complex & | b, | ||
cpuColorSpinorField & | y, | ||
cpuColorSpinorField & | z, | ||
const cpuColorSpinorField & | w | ||
) |
Definition at line 143 of file blas_cpu.cpp.
void quda::caxpbypzYmbwCuda | ( | const Complex & | a, |
cudaColorSpinorField & | x, | ||
const Complex & | b, | ||
cudaColorSpinorField & | y, | ||
cudaColorSpinorField & | z, | ||
cudaColorSpinorField & | w | ||
) |
Definition at line 366 of file blas_quda.cu.
__device__ void quda::caxpy_ | ( | const float2 & | a, |
const float4 & | x, | ||
float4 & | y | ||
) |
Functor to perform the operation y += a * x (complex-valued)
Definition at line 180 of file blas_quda.cu.
__device__ void quda::caxpy_ | ( | const float2 & | a, |
const float2 & | x, | ||
float2 & | y | ||
) |
Definition at line 187 of file blas_quda.cu.
__device__ void quda::caxpy_ | ( | const double2 & | a, |
const double2 & | x, | ||
double2 & | y | ||
) |
Definition at line 192 of file blas_quda.cu.
__device__ void quda::Caxpy_ | ( | const float2 & | a, |
const float4 & | x, | ||
float4 & | y | ||
) |
Functor to perform the operation y += a * x (complex-valued)
Definition at line 353 of file reduce_quda.cu.
__device__ void quda::Caxpy_ | ( | const float2 & | a, |
const float2 & | x, | ||
float2 & | y | ||
) |
Definition at line 360 of file reduce_quda.cu.
__device__ void quda::Caxpy_ | ( | const double2 & | a, |
const double2 & | x, | ||
double2 & | y | ||
) |
Definition at line 365 of file reduce_quda.cu.
void quda::caxpyCpu | ( | const Complex & | a, |
const cpuColorSpinorField & | x, | ||
cpuColorSpinorField & | y | ||
) |
Definition at line 79 of file blas_cpu.cpp.
void quda::caxpyCuda | ( | const Complex & | a, |
cudaColorSpinorField & | x, | ||
cudaColorSpinorField & | y | ||
) |
Definition at line 207 of file blas_quda.cu.
Complex quda::caxpyDotzyCpu | ( | const Complex & | a, |
cpuColorSpinorField & | x, | ||
cpuColorSpinorField & | y, | ||
cpuColorSpinorField & | z | ||
) |
Definition at line 303 of file blas_cpu.cpp.
Complex quda::caxpyDotzyCuda | ( | const Complex & | a, |
cudaColorSpinorField & | x, | ||
cudaColorSpinorField & | y, | ||
cudaColorSpinorField & | z | ||
) |
Definition at line 559 of file reduce_quda.cu.
double quda::caxpyNormCpu | ( | const Complex & | a, |
cpuColorSpinorField & | x, | ||
cpuColorSpinorField & | y | ||
) |
Definition at line 264 of file blas_cpu.cpp.
double quda::caxpyNormCuda | ( | const Complex & | a, |
cudaColorSpinorField & | x, | ||
cudaColorSpinorField & | y | ||
) |
Definition at line 388 of file reduce_quda.cu.
void quda::caxpyXmazCpu | ( | const Complex & | a, |
cpuColorSpinorField & | x, | ||
cpuColorSpinorField & | y, | ||
cpuColorSpinorField & | z | ||
) |
Definition at line 277 of file blas_cpu.cpp.
void quda::caxpyXmazCuda | ( | const Complex & | a, |
cudaColorSpinorField & | x, | ||
cudaColorSpinorField & | y, | ||
cudaColorSpinorField & | z | ||
) |
Definition at line 452 of file blas_quda.cu.
double quda::caxpyXmazNormXCpu | ( | const Complex & | a, |
cpuColorSpinorField & | x, | ||
cpuColorSpinorField & | y, | ||
cpuColorSpinorField & | z | ||
) |
Definition at line 270 of file blas_cpu.cpp.
double quda::caxpyXmazNormXCuda | ( | const Complex & | a, |
cudaColorSpinorField & | x, | ||
cudaColorSpinorField & | y, | ||
cudaColorSpinorField & | z | ||
) |
Definition at line 413 of file reduce_quda.cu.
__device__ double2 quda::cdot_ | ( | const double2 & | a, |
const double2 & | b | ||
) |
Returns complex-valued dot product of x and y
Definition at line 449 of file reduce_quda.cu.
__device__ double2 quda::cdot_ | ( | const float2 & | a, |
const float2 & | b | ||
) |
Definition at line 451 of file reduce_quda.cu.
__device__ double2 quda::cdot_ | ( | const float4 & | a, |
const float4 & | b | ||
) |
Definition at line 453 of file reduce_quda.cu.
__device__ double3 quda::cdotNormA_ | ( | const double2 & | a, |
const double2 & | b | ||
) |
First returns the dot product (x,y) Returns the norm of x
Definition at line 570 of file reduce_quda.cu.
__device__ double3 quda::cdotNormA_ | ( | const float2 & | a, |
const float2 & | b | ||
) |
Definition at line 572 of file reduce_quda.cu.
__device__ double3 quda::cdotNormA_ | ( | const float4 & | a, |
const float4 & | b | ||
) |
Definition at line 574 of file reduce_quda.cu.
__device__ double3 quda::cdotNormB_ | ( | const double2 & | a, |
const double2 & | b | ||
) |
First returns the dot product (x,y) Returns the norm of y
Definition at line 600 of file reduce_quda.cu.
__device__ double3 quda::cdotNormB_ | ( | const float2 & | a, |
const float2 & | b | ||
) |
Definition at line 602 of file reduce_quda.cu.
__device__ double3 quda::cdotNormB_ | ( | const float4 & | a, |
const float4 & | b | ||
) |
Definition at line 604 of file reduce_quda.cu.
Complex quda::cDotProduct | ( | const std::complex< Float > * | a, |
const std::complex< Float > * | b, | ||
const int | N | ||
) |
Definition at line 211 of file blas_cpu.cpp.
Complex quda::cDotProductCpu | ( | const cpuColorSpinorField & | a, |
const cpuColorSpinorField & | b | ||
) |
Definition at line 217 of file blas_cpu.cpp.
Complex quda::cDotProductCuda | ( | cudaColorSpinorField & | x, |
cudaColorSpinorField & | y | ||
) |
Definition at line 468 of file reduce_quda.cu.
void quda::cDotProductCuda | ( | Complex * | result, |
std::vector< cudaColorSpinorField * > & | a, | ||
std::vector< cudaColorSpinorField * > & | b | ||
) |
Definition at line 474 of file reduce_quda.cu.
double3 quda::cDotProductNormACpu | ( | const cpuColorSpinorField & | a, |
const cpuColorSpinorField & | b | ||
) |
Definition at line 237 of file blas_cpu.cpp.
double3 quda::cDotProductNormACuda | ( | cudaColorSpinorField & | a, |
cudaColorSpinorField & | b | ||
) |
Definition at line 591 of file reduce_quda.cu.
double3 quda::cDotProductNormBCpu | ( | const cpuColorSpinorField & | a, |
const cpuColorSpinorField & | b | ||
) |
Definition at line 243 of file blas_cpu.cpp.
double3 quda::cDotProductNormBCuda | ( | cudaColorSpinorField & | a, |
cudaColorSpinorField & | b | ||
) |
Definition at line 620 of file reduce_quda.cu.
void quda::checkMomOrder | ( | const GaugeField & | u | ) |
Definition at line 14 of file copy_gauge.cu.
void quda::cloverCuda | ( | cudaColorSpinorField * | out, |
const cudaGaugeField & | gauge, | ||
const FullClover | clover, | ||
const cudaColorSpinorField * | in, | ||
const int | oddBit | ||
) |
Definition at line 229 of file dslash_quda.cu.
void quda::cloverDerivative | ( | cudaGaugeField & | out, |
cudaGaugeField & | gauge, | ||
cudaGaugeField & | oprod, | ||
int | mu, | ||
int | nu, | ||
double | coeff, | ||
QudaParity | parity, | ||
int | conjugate | ||
) |
Definition at line 369 of file clover_deriv_quda.cu.
void quda::cloverDslashCuda | ( | cudaColorSpinorField * | out, |
const cudaGaugeField & | gauge, | ||
const FullClover | cloverInv, | ||
const cudaColorSpinorField * | in, | ||
const int | oddBit, | ||
const int | daggerBit, | ||
const cudaColorSpinorField * | x, | ||
const double & | k, | ||
const int * | commDim, | ||
TimeProfile & | profile, | ||
const QudaDslashPolicy & | dslashPolicy = QUDA_DSLASH2 |
||
) |
Definition at line 117 of file dslash_clover.cu.
void quda::cloverInvert | ( | CloverField & | clover, |
bool | computeTraceLog, | ||
QudaFieldLocation | location | ||
) |
This function compute the Cholesky decomposition of each clover matrix and stores the clover inverse field.
clover | The clover field (contains both the field itself and its inverse) |
computeTraceLog | Whether to compute the trace logarithm of the clover term |
location | The location of the field |
Definition at line 298 of file clover_invert.cu.
void quda::collectGhostStaple | ( | int * | X, |
void * | even, | ||
void * | odd, | ||
int | volumeCB, | ||
int | stride, | ||
QudaPrecision | precision, | ||
void * | ghost_staple_gpu, | ||
int | dir, | ||
int | whichway, | ||
cudaStream_t * | stream | ||
) |
Definition at line 481 of file misc_helpers.cu.
__global__ void quda::collectGhostStapleKernel | ( | Float2 * | out, |
Float2 * | in, | ||
int | parity, | ||
GhostStapleParam | param | ||
) |
Definition at line 403 of file misc_helpers.cu.
int quda::compareSpinor | ( | const U & | u, |
const V & | v, | ||
const int | tol | ||
) |
Definition at line 60 of file color_spinor_util.cu.
void quda::completeKSForce | ( | GaugeField & | mom, |
const GaugeField & | oprod, | ||
const GaugeField & | gauge, | ||
QudaFieldLocation | location, | ||
long long * | flops = NULL |
||
) |
Definition at line 206 of file ks_force_quda.cu.
void quda::completeKSForce | ( | Oprod | oprod, |
Gauge | gauge, | ||
Mom | mom, | ||
int | dim[4], | ||
const GaugeField & | meta, | ||
QudaFieldLocation | location, | ||
long long * | flops | ||
) |
Definition at line 195 of file ks_force_quda.cu.
__host__ __device__ void quda::completeKSForceCore | ( | KSForceArg< Oprod, Gauge, Mom > & | arg, |
int | idx | ||
) |
Definition at line 59 of file ks_force_quda.cu.
void quda::completeKSForceCPU | ( | KSForceArg< Oprod, Gauge, Mom > & | arg | ) |
Definition at line 133 of file ks_force_quda.cu.
__global__ void quda::completeKSForceKernel | ( | KSForceArg< Oprod, Gauge, Mom > | arg | ) |
Definition at line 121 of file ks_force_quda.cu.
|
inline |
Definition at line 47 of file gauge_field_order.h.
|
inline |
Definition at line 23 of file gauge_field_order.h.
|
inline |
Definition at line 16 of file gauge_field_order.h.
|
inline |
Definition at line 31 of file gauge_field_order.h.
void quda::computeClover | ( | CloverField & | clover, |
const GaugeField & | gauge, | ||
double | coeff, | ||
QudaFieldLocation | location | ||
) |
Definition at line 602 of file clover_quda.cu.
void quda::computeCloverSigmaTrace | ( | GaugeField & | gauge, |
const CloverField & | clover, | ||
int | dir1, | ||
int | dir2, | ||
QudaFieldLocation | location | ||
) |
Definition at line 310 of file clover_trace_quda.cu.
void quda::computeFatLinkCore | ( | cudaGaugeField * | cudaSiteLink, |
double * | act_path_coeff, | ||
QudaGaugeParam * | qudaGaugeParam, | ||
QudaComputeFatMethod | method, | ||
cudaGaugeField * | cudaFatLink, | ||
cudaGaugeField * | cudaLongLink, | ||
TimeProfile & | profile | ||
) |
void quda::computeGenStapleFieldParityKernel | ( | void * | staple_even, |
void * | staple_odd, | ||
const void * | sitelink_even, | ||
const void * | sitelink_odd, | ||
void * | fatlink_even, | ||
void * | fatlink_odd, | ||
const void * | mulink_even, | ||
const void * | mulink_odd, | ||
int | mu, | ||
int | nu, | ||
int | save_staple, | ||
double | mycoeff, | ||
QudaReconstructType | recon, | ||
QudaPrecision | prec, | ||
dim3 | halfGridDim, | ||
llfat_kernel_param_t | kparam, | ||
cudaStream_t * | stream | ||
) |
void quda::computeGenStapleFieldParityKernel_ex | ( | void * | staple_even, |
void * | staple_odd, | ||
const void * | sitelink_even, | ||
const void * | sitelink_odd, | ||
void * | fatlink_even, | ||
void * | fatlink_odd, | ||
const void * | mulink_even, | ||
const void * | mulink_odd, | ||
int | mu, | ||
int | nu, | ||
int | save_staple, | ||
double | mycoeff, | ||
QudaReconstructType | recon, | ||
QudaPrecision | prec, | ||
llfat_kernel_param_t | kparam | ||
) |
void quda::computeKSLongLinkForce | ( | Result | res, |
Oprod | oprod, | ||
Gauge | gauge, | ||
int | dim[4], | ||
const GaugeField & | meta, | ||
QudaFieldLocation | location | ||
) |
Definition at line 421 of file ks_force_quda.cu.
void quda::computeKSLongLinkForce | ( | GaugeField & | result, |
const GaugeField & | oprod, | ||
const GaugeField & | gauge, | ||
QudaFieldLocation | location | ||
) |
Definition at line 430 of file ks_force_quda.cu.
__host__ __device__ void quda::computeKSLongLinkForceCore | ( | KSLongLinkArg< Result, Oprod, Gauge > & | arg, |
int | idx | ||
) |
Definition at line 276 of file ks_force_quda.cu.
void quda::computeKSLongLinkForceCPU | ( | KSLongLinkArg< Result, Oprod, Gauge > & | arg | ) |
Definition at line 352 of file ks_force_quda.cu.
__global__ void quda::computeKSLongLinkForceKernel | ( | KSLongLinkArg< Result, Oprod, Gauge > | arg | ) |
Definition at line 340 of file ks_force_quda.cu.
|
inline |
Definition at line 924 of file quda_matrix.h.
void quda::computeLongLinkCuda | ( | void * | outEven, |
void * | outOdd, | ||
const void *const | inEven, | ||
const void *const | inOdd, | ||
double | coeff, | ||
QudaReconstructType | recon, | ||
QudaPrecision | prec, | ||
dim3 | halfGridDim, | ||
llfat_kernel_param_t | kparam | ||
) |
|
inline |
Definition at line 555 of file quda_matrix.h.
void quda::computeStaggeredOprod | ( | cudaGaugeField & | out, |
cudaColorSpinorField & | in, | ||
FaceBuffer & | facebuffer, | ||
const unsigned int | parity, | ||
const double | coeff, | ||
const unsigned int | displacement | ||
) |
void quda::computeStaggeredOprod | ( | cudaGaugeField & | outA, |
cudaGaugeField & | outB, | ||
cudaColorSpinorField & | inEven, | ||
cudaColorSpinorField & | inOdd, | ||
FaceBuffer & | faceBuffer, | ||
const unsigned int | parity, | ||
const double | coeff[2] | ||
) |
Definition at line 635 of file staggered_oprod.cu.
|
inline |
Definition at line 115 of file complex_quda.h.
|
inline |
Returns the complex conjugate of z.
Definition at line 821 of file complex_quda.h.
|
inline |
Definition at line 251 of file quda_matrix.h.
|
inline |
Definition at line 256 of file quda_matrix.h.
|
inline |
Definition at line 261 of file quda_matrix.h.
|
inline |
Definition at line 267 of file quda_matrix.h.
|
inline |
Definition at line 540 of file quda_matrix.h.
void quda::contractCuda | ( | const cudaColorSpinorField & | x, |
const cudaColorSpinorField & | y, | ||
void * | result, | ||
const QudaContractType | contract_type, | ||
const QudaParity | parity | ||
) |
Contracts the x and y spinors (x is daggered) and stores the result in the array result. One must specify the contract type (time-sliced or volumed contract, and whether we should include a gamma5 in the middle), as well as the time-slice (see overloaded version of the same function) in case we don't want a volume contraction. The function works only with parity spinors, and the parity must be specified.
Definition at line 290 of file contract.cu.
void quda::contractCuda | ( | const cudaColorSpinorField & | x, |
const cudaColorSpinorField & | y, | ||
void * | result, | ||
const QudaContractType | contract_type, | ||
const int | nTSlice, | ||
const QudaParity | parity | ||
) |
Contracts the x and y spinors (x is daggered) and stores the result in the array result. One must specify the contract type (time-sliced or volumed contract, and whether we should include a gamma5 in the middle), as well as the time-slice in case we don't want a volume contraction. The function works only with parity spinors, and the parity must be specified.
Definition at line 325 of file contract.cu.
|
inline |
Definition at line 33 of file register_traits.h.
|
inline |
Definition at line 34 of file register_traits.h.
|
inline |
Definition at line 35 of file register_traits.h.
|
inline |
Definition at line 962 of file quda_matrix.h.
|
inline |
Definition at line 973 of file quda_matrix.h.
|
inline |
Definition at line 709 of file quda_matrix.h.
void quda::copyCuda | ( | cudaColorSpinorField & | dst, |
const cudaColorSpinorField & | src | ||
) |
Definition at line 235 of file copy_quda.cu.
void quda::copyExtendedColorSpinor | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
const int | parity, | ||
const QudaFieldLocation | location, | ||
dstFloat * | Dst, | ||
srcFloat * | Src, | ||
float * | dstNorm, | ||
float * | srcNorm | ||
) |
Definition at line 413 of file extended_color_spinor_utilities.cu.
void quda::CopyExtendedColorSpinor | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
const int | parity, | ||
const QudaFieldLocation | location, | ||
dstFloat * | Dst, | ||
srcFloat * | Src, | ||
float * | dstNorm = 0 , |
||
float * | srcNorm = 0 |
||
) |
Definition at line 481 of file extended_color_spinor_utilities.cu.
void quda::copyExtendedColorSpinor | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
const int | parity, | ||
void * | Dst, | ||
void * | Src, | ||
void * | dstNorm, | ||
void * | srcNorm | ||
) |
Definition at line 507 of file extended_color_spinor_utilities.cu.
void quda::copyExtendedGauge | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
void * | Out = 0 , |
||
void * | In = 0 |
||
) |
This function is used for copying the gauge field into an extended gauge field. Defined in copy_extended_gauge.cu.
out | The extended output field to which we are copying |
in | The input field from which we are copying |
location | The location of where we are doing the copying (CPU or CUDA) |
Out | The output buffer (optional) |
In | The input buffer (optional) |
Definition at line 337 of file copy_gauge_extended.cu.
void quda::copyGauge | ( | CopyGaugeArg< OutOrder, InOrder > | arg | ) |
Generic CPU gauge reordering and packing
Definition at line 27 of file copy_gauge_inc.cu.
void quda::copyGauge | ( | OutOrder | outOrder, |
const InOrder | inOrder, | ||
int | volume, | ||
const int * | faceVolumeCB, | ||
int | nDim, | ||
int | geometry, | ||
const GaugeField & | out, | ||
QudaFieldLocation | location, | ||
int | type | ||
) |
Definition at line 185 of file copy_gauge_inc.cu.
void quda::copyGauge | ( | const InOrder & | inOrder, |
GaugeField & | out, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
FloatOut ** | outGhost, | ||
int | type | ||
) |
Definition at line 224 of file copy_gauge_inc.cu.
void quda::copyGauge | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
FloatIn * | In, | ||
FloatOut ** | outGhost, | ||
FloatIn ** | inGhost, | ||
int | type | ||
) |
Definition at line 349 of file copy_gauge_inc.cu.
void quda::copyGauge | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
FloatIn * | In, | ||
FloatOut ** | outGhost, | ||
FloatIn ** | inGhost, | ||
int | type | ||
) |
Definition at line 460 of file copy_gauge_inc.cu.
__device__ __host__ void quda::copyGaugeEx | ( | CopyGaugeExArg< OutOrder, InOrder > & | arg, |
int | X, | ||
int | parity | ||
) |
Copy a regular gauge field into an extended gauge field
Definition at line 35 of file copy_gauge_extended.cu.
void quda::copyGaugeEx | ( | CopyGaugeExArg< OutOrder, InOrder > | arg | ) |
Definition at line 64 of file copy_gauge_extended.cu.
void quda::copyGaugeEx | ( | OutOrder | outOrder, |
const InOrder | inOrder, | ||
const int * | E, | ||
const int * | X, | ||
const int * | faceVolumeCB, | ||
const GaugeField & | meta, | ||
QudaFieldLocation | location | ||
) |
Definition at line 141 of file copy_gauge_extended.cu.
void quda::copyGaugeEx | ( | const InOrder & | inOrder, |
const int * | X, | ||
GaugeField & | out, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out | ||
) |
Definition at line 152 of file copy_gauge_extended.cu.
void quda::copyGaugeEx | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
FloatIn * | In | ||
) |
Definition at line 236 of file copy_gauge_extended.cu.
void quda::copyGaugeEx | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
FloatIn * | In | ||
) |
Definition at line 318 of file copy_gauge_extended.cu.
__global__ void quda::copyGaugeExKernel | ( | CopyGaugeExArg< OutOrder, InOrder > | arg | ) |
Definition at line 73 of file copy_gauge_extended.cu.
__global__ void quda::copyGaugeKernel | ( | CopyGaugeArg< OutOrder, InOrder > | arg | ) |
Generic CUDA gauge reordering and packing. Adopts a similar form as the CPU version, using the same inlined functions.
Definition at line 51 of file copy_gauge_inc.cu.
void quda::copyGenericClover | ( | CloverField & | out, |
const CloverField & | in, | ||
bool | inverse, | ||
QudaFieldLocation | location, | ||
void * | Out = 0 , |
||
void * | In = 0 , |
||
void * | outNorm = 0 , |
||
void * | inNorm = 0 |
||
) |
This generic function is used for copying the clover field where in the input and output can be in any order and location.
out | The output field to which we are copying |
in | The input field from which we are copying |
inverse | Whether we are copying the inverse term or not |
location | The location of where we are doing the copying (CPU or CUDA) |
Out | The output buffer (optional) |
In | The input buffer (optional) |
outNorm | The output norm buffer (optional) |
inNorm | The input norm buffer (optional) |
Definition at line 182 of file copy_clover.cu.
void quda::copyGenericColorSpinor | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
dstFloat * | Dst, | ||
srcFloat * | Src, | ||
float * | dstNorm, | ||
float * | srcNorm | ||
) |
Definition at line 337 of file copy_color_spinor.cu.
void quda::CopyGenericColorSpinor | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
dstFloat * | Dst, | ||
srcFloat * | Src, | ||
float * | dstNorm = 0 , |
||
float * | srcNorm = 0 |
||
) |
Definition at line 405 of file copy_color_spinor.cu.
void quda::copyGenericColorSpinor | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst = 0 , |
||
void * | Src = 0 , |
||
void * | dstNorm = 0 , |
||
void * | srcNorm = 0 |
||
) |
Definition at line 422 of file copy_color_spinor.cu.
void quda::copyGenericGauge | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
void * | Out = 0 , |
||
void * | In = 0 , |
||
void ** | ghostOut = 0 , |
||
void ** | ghostIn = 0 , |
||
int | type = 0 |
||
) |
This function is used for extracting the gauge ghost zone from a gauge field array. Defined in copy_gauge.cu.
out | The output field to which we are copying |
in | The input field from which we are copying |
location | The location of where we are doing the copying (CPU or CUDA) |
Out | The output buffer (optional) |
In | The input buffer (optional) |
ghostOut | The output ghost buffer (optional) |
ghostIn | The input ghost buffer (optional) |
type | The type of copy we doing (0 body and ghost else ghost only) |
Definition at line 30 of file copy_gauge.cu.
void quda::copyGenericGaugeDoubleOut | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
void * | Out, | ||
void * | In, | ||
void ** | ghostOut, | ||
void ** | ghostIn, | ||
int | type | ||
) |
Definition at line 5 of file copy_gauge_double.cu.
void quda::copyGenericGaugeHalfOut | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
void * | Out, | ||
void * | In, | ||
void ** | ghostOut, | ||
void ** | ghostIn, | ||
int | type | ||
) |
Definition at line 5 of file copy_gauge_half.cu.
void quda::copyGenericGaugeSingleOut | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
void * | Out, | ||
void * | In, | ||
void ** | ghostOut, | ||
void ** | ghostIn, | ||
int | type | ||
) |
Definition at line 5 of file copy_gauge_single.cu.
void quda::copyGhost | ( | CopyGaugeArg< OutOrder, InOrder > | arg | ) |
Generic CPU gauge ghost reordering and packing
Definition at line 74 of file copy_gauge_inc.cu.
__global__ void quda::copyGhostKernel | ( | CopyGaugeArg< OutOrder, InOrder > | arg | ) |
Generic CUDA kernel for copying the ghost zone. Adopts a similar form as the CPU version, using the same inlined functions.
Definition at line 98 of file copy_gauge_inc.cu.
__device__ __host__ void quda::copyInterior | ( | CopySpinorExArg< OutOrder, InOrder, Basis > & | arg, |
int | X | ||
) |
Definition at line 170 of file extended_color_spinor_utilities.cu.
void quda::copyInterior | ( | CopySpinorExArg< OutOrder, InOrder, Basis > & | arg | ) |
Definition at line 220 of file extended_color_spinor_utilities.cu.
__global__ void quda::copyInteriorKernel | ( | CopySpinorExArg< OutOrder, InOrder, Basis > | arg | ) |
Definition at line 206 of file extended_color_spinor_utilities.cu.
|
inline |
Definition at line 985 of file quda_matrix.h.
|
inline |
Definition at line 997 of file quda_matrix.h.
void quda::copySpinorEx | ( | OutOrder | outOrder, |
const InOrder | inOrder, | ||
const Basis | basis, | ||
const int * | E, | ||
const int * | X, | ||
const int | parity, | ||
const bool | extend, | ||
const ColorSpinorField & | meta, | ||
QudaFieldLocation | location | ||
) |
Definition at line 281 of file extended_color_spinor_utilities.cu.
void quda::copySpinorEx | ( | OutOrder | outOrder, |
InOrder | inOrder, | ||
const QudaGammaBasis | outBasis, | ||
const QudaGammaBasis | inBasis, | ||
const int * | E, | ||
const int * | X, | ||
const int | parity, | ||
const bool | extend, | ||
const ColorSpinorField & | meta, | ||
QudaFieldLocation | location | ||
) |
Definition at line 296 of file extended_color_spinor_utilities.cu.
|
inline |
Definition at line 35 of file complex_quda.h.
|
inline |
Definition at line 884 of file complex_quda.h.
Definition at line 892 of file complex_quda.h.
|
inline |
Definition at line 70 of file complex_quda.h.
|
inline |
Definition at line 900 of file complex_quda.h.
Definition at line 908 of file complex_quda.h.
void quda::covDev | ( | cudaColorSpinorField * | out, |
cudaGaugeField & | gauge, | ||
const cudaColorSpinorField * | in, | ||
const int | parity, | ||
const int | mu, | ||
TimeProfile & | profile | ||
) |
void quda::createDirac | ( | Dirac *& | d, |
Dirac *& | dSloppy, | ||
Dirac *& | dPre, | ||
QudaInvertParam & | param, | ||
const bool | pc_solve | ||
) |
Definition at line 1228 of file interface_quda.cpp.
void quda::createDslashEvents | ( | ) |
Definition at line 108 of file dslash_quda.cu.
ColorSpinorFieldOrder<Float>* quda::createOrder | ( | const cpuColorSpinorField & | a | ) |
Definition at line 7 of file color_spinor_util.cu.
void quda::createStaggeredOprodEvents | ( | ) |
__device__ void quda::cxpaypbz_ | ( | const float4 & | x, |
const float2 & | a, | ||
const float4 & | y, | ||
const float2 & | b, | ||
float4 & | z | ||
) |
Functor to performs the operation z[i] = x[i] + a*y[i] + b*z[i]
Definition at line 256 of file blas_quda.cu.
__device__ void quda::cxpaypbz_ | ( | const float2 & | x, |
const float2 & | a, | ||
const float2 & | y, | ||
const float2 & | b, | ||
float2 & | z | ||
) |
Definition at line 265 of file blas_quda.cu.
__device__ void quda::cxpaypbz_ | ( | const double2 & | x, |
const double2 & | a, | ||
const double2 & | y, | ||
const double2 & | b, | ||
double2 & | z | ||
) |
Definition at line 272 of file blas_quda.cu.
void quda::cxpaypbzCpu | ( | const cpuColorSpinorField & | x, |
const Complex & | b, | ||
const cpuColorSpinorField & | y, | ||
const Complex & | c, | ||
cpuColorSpinorField & | z | ||
) |
Definition at line 115 of file blas_cpu.cpp.
void quda::cxpaypbzCuda | ( | cudaColorSpinorField & | x, |
const Complex & | b, | ||
cudaColorSpinorField & | y, | ||
const Complex & | c, | ||
cudaColorSpinorField & | z | ||
) |
Definition at line 290 of file blas_quda.cu.
void quda::destroyDslashEvents | ( | ) |
Definition at line 129 of file dslash_quda.cu.
void quda::destroyStaggeredOprodEvents | ( | ) |
void quda::device_free_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
void * | ptr | ||
) |
Free device memory allocated with device_malloc(). This function should only be called via the device_free() macro, defined in malloc_quda.h
Definition at line 232 of file malloc.cpp.
void * quda::device_malloc_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
size_t | size | ||
) |
Perform a standard cudaMalloc() with error-checking. This function should only be called via the device_malloc() macro, defined in malloc_quda.h
Definition at line 146 of file malloc.cpp.
__global__ void quda::do_link_format_cpu_to_gpu | ( | FloatN * | dst, |
Float2 * | src, | ||
int | reconstruct, | ||
int | Vh, | ||
int | pad, | ||
int | ghostV, | ||
size_t | threads | ||
) |
Definition at line 43 of file misc_helpers.cu.
__global__ void quda::do_link_format_cpu_to_gpu_milc | ( | FloatN * | dst, |
Float2 * | src, | ||
int | reconstruct, | ||
int | Vh, | ||
int | pad, | ||
int | ghostV, | ||
size_t | threads | ||
) |
Definition at line 103 of file misc_helpers.cu.
__global__ void quda::do_link_format_gpu_to_cpu | ( | FloatN * | dst, |
FloatN * | src, | ||
int | Vh, | ||
int | stride | ||
) |
Definition at line 322 of file misc_helpers.cu.
void quda::domainWallDslashCuda | ( | cudaColorSpinorField * | out, |
const cudaGaugeField & | gauge, | ||
const cudaColorSpinorField * | in, | ||
const int | parity, | ||
const int | dagger, | ||
const cudaColorSpinorField * | x, | ||
const double & | m_f, | ||
const double & | k, | ||
const int * | commDim, | ||
TimeProfile & | profile, | ||
const QudaDslashPolicy & | dslashPolicy = QUDA_DSLASH |
||
) |
Definition at line 172 of file dslash_domain_wall.cu.
void quda::domainWallDslashCuda | ( | cudaColorSpinorField * | out, |
const cudaGaugeField & | gauge, | ||
const cudaColorSpinorField * | in, | ||
const int | parity, | ||
const int | dagger, | ||
const cudaColorSpinorField * | x, | ||
const double & | m_f, | ||
const double & | k, | ||
const int * | commDim, | ||
const int | DS_type, | ||
TimeProfile & | profile, | ||
const QudaDslashPolicy & | dslashPolicy = QUDA_DSLASH2 |
||
) |
Definition at line 234 of file dslash_domain_wall_4d.cu.
__device__ double quda::dot_ | ( | const double2 & | a, |
const double2 & | b | ||
) |
Return the real dot product of x and y
Definition at line 154 of file reduce_quda.cu.
__device__ float quda::dot_ | ( | const float2 & | a, |
const float2 & | b | ||
) |
Definition at line 155 of file reduce_quda.cu.
__device__ float quda::dot_ | ( | const float4 & | a, |
const float4 & | b | ||
) |
Definition at line 156 of file reduce_quda.cu.
__device__ double2 quda::dotNormA_ | ( | const double2 & | a, |
const double2 & | b | ||
) |
Definition at line 273 of file reduce_quda.cu.
__device__ double2 quda::dotNormA_ | ( | const float2 & | a, |
const float2 & | b | ||
) |
Definition at line 276 of file reduce_quda.cu.
__device__ double2 quda::dotNormA_ | ( | const float4 & | a, |
const float4 & | b | ||
) |
Definition at line 280 of file reduce_quda.cu.
void quda::endBlas | ( | void | ) |
Definition at line 59 of file blas_quda.cu.
void quda::endReduce | ( | void | ) |
Definition at line 85 of file reduce_quda.cu.
void quda::exchangeExtendedGhost | ( | cudaColorSpinorField * | spinor, |
int | R[], | ||
int | parity, | ||
cudaStream_t * | stream_p | ||
) |
Definition at line 24 of file extended_color_spinor_utilities.cu.
|
inline |
Definition at line 85 of file complex_quda.h.
|
inline |
Definition at line 917 of file complex_quda.h.
Definition at line 923 of file complex_quda.h.
void quda::extendedCopyColorSpinor | ( | InOrder & | inOrder, |
ColorSpinorField & | out, | ||
QudaGammaBasis | inBasis, | ||
const int * | E, | ||
const int * | X, | ||
const int | parity, | ||
const bool | extend, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
float * | outNorm | ||
) |
Definition at line 323 of file extended_color_spinor_utilities.cu.
void quda::extendedCopyColorSpinor | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const int | parity, | ||
const QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
FloatIn * | In, | ||
float * | outNorm, | ||
float * | inNorm | ||
) |
Definition at line 359 of file extended_color_spinor_utilities.cu.
void quda::extractExtendedGaugeGhost | ( | const GaugeField & | u, |
int | dim, | ||
const int * | R, | ||
void ** | ghost, | ||
bool | extract | ||
) |
This function is used for extracting the gauge ghost zone from a gauge field array. Defined in extract_gauge_ghost.cu.
u | The gauge field from which we want to extract/pack the ghost zone |
dim | The dimension in which we are packing/unpacking |
ghost | The array where we want to pack/unpack the ghost zone into/from |
extract | Whether we are extracting into ghost or injecting from ghost |
Definition at line 440 of file extract_gauge_ghost_extended.cu.
void quda::extractGaugeGhost | ( | const GaugeField & | u, |
void ** | ghost | ||
) |
This function is used for extracting the gauge ghost zone from a gauge field array. Defined in extract_gauge_ghost.cu.
u | The gauge field from which we want to extract the ghost zone |
ghost | The array where we want to pack the ghost zone into |
Definition at line 307 of file extract_gauge_ghost.cu.
void quda::extractGhost | ( | ExtractGhostArg< Order, nDim > | arg | ) |
Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions
Definition at line 33 of file extract_gauge_ghost.cu.
void quda::extractGhost | ( | Order | order, |
const GaugeField & | u, | ||
QudaFieldLocation | location | ||
) |
Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions
Definition at line 172 of file extract_gauge_ghost.cu.
void quda::extractGhost | ( | const GaugeField & | u, |
Float ** | Ghost | ||
) |
This is the template driver for extractGhost
Definition at line 214 of file extract_gauge_ghost.cu.
void quda::extractGhostEx | ( | ExtractGhostExArg< Order, nDim > | arg | ) |
Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions
Definition at line 93 of file extract_gauge_ghost_extended.cu.
void quda::extractGhostEx | ( | Order | order, |
const int | dim, | ||
const int * | surfaceCB, | ||
const int * | E, | ||
const int * | R, | ||
bool | extract, | ||
const GaugeField & | u, | ||
QudaFieldLocation | location | ||
) |
Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions
E | the extended gauge dimensions |
R | array holding the radius of the extended region |
extract | Whether we are extracting or injecting the ghost zone |
Definition at line 274 of file extract_gauge_ghost_extended.cu.
void quda::extractGhostEx | ( | const GaugeField & | u, |
int | dim, | ||
const int * | R, | ||
Float ** | Ghost, | ||
bool | extract | ||
) |
This is the template driver for extractGhost
Definition at line 329 of file extract_gauge_ghost_extended.cu.
__global__ void quda::extractGhostExKernel | ( | ExtractGhostExArg< Order, nDim > | arg | ) |
Generic GPU gauge ghost extraction and packing NB This routines is specialized to four dimensions FIXME this implementation will have two-way warp divergence Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions
Definition at line 140 of file extract_gauge_ghost_extended.cu.
__global__ void quda::extractGhostKernel | ( | ExtractGhostArg< Order, nDim > | arg | ) |
Generic GPU gauge ghost extraction and packing NB This routines is specialized to four dimensions FIXME this implementation will have two-way warp divergence
Definition at line 78 of file extract_gauge_ghost.cu.
__device__ __host__ void quda::extractor | ( | Arg & | arg, |
int | dir, | ||
int | a, | ||
int | b, | ||
int | c, | ||
int | d, | ||
int | g, | ||
int | parity | ||
) |
Definition at line 49 of file extract_gauge_ghost_extended.cu.
void quda::fermion_force_cuda | ( | double | eps, |
double | weight1, | ||
double | weight2, | ||
void * | act_path_coeff, | ||
FullHw | cudaHw, | ||
cudaGaugeField & | cudaSiteLink, | ||
cudaGaugeField & | cudaMom, | ||
QudaGaugeParam * | param | ||
) |
void quda::fermion_force_init_cuda | ( | QudaGaugeParam * | param | ) |
void quda::fillInitCGSolveParam | ( | SolverParam & | initCGparam | ) |
Definition at line 394 of file inv_eigcg_quda.cpp.
void quda::fillInnerSolveParam | ( | SolverParam & | inner, |
const SolverParam & | outer | ||
) |
Definition at line 28 of file inv_gcr_quda.cpp.
void quda::gamma5Cuda | ( | cudaColorSpinorField * | out, |
const cudaColorSpinorField * | in | ||
) |
Applies a gamma5 matrix to a spinor, this is the function to be called in interfaces and it requires only pointers to the output spinor (out) and the input spinor (in), in that order
Definition at line 85 of file contract.cu.
void quda::gauge_force_cuda | ( | cudaGaugeField & | cudaMom, |
double | eb3, | ||
cudaGaugeField & | cudaSiteLink, | ||
QudaGaugeParam * | param, | ||
int *** | input_path, | ||
int * | length, | ||
double * | path_coeff, | ||
int | num_paths, | ||
int | max_length | ||
) |
Definition at line 328 of file gauge_force_quda.cu.
void quda::gauge_force_cuda_dir | ( | cudaGaugeField & | cudaMom, |
const int | dir, | ||
const double | eb3, | ||
const cudaGaugeField & | cudaSiteLink, | ||
const QudaGaugeParam * | param, | ||
int ** | input_path, | ||
const int * | length, | ||
const double * | path_coeff, | ||
const int | num_paths, | ||
const int | max_length | ||
) |
Definition at line 274 of file gauge_force_quda.cu.
void quda::gauge_force_init_cuda | ( | QudaGaugeParam * | param, |
int | max_length | ||
) |
Definition at line 112 of file gauge_force_quda.cu.
__global__ void quda::GAUGE_FORCE_KERN_NAME | ( | Float2 * | momEven, |
Float2 * | momOdd, | ||
const int | dir, | ||
const double | eb3, | ||
const FloatN * | linkEven, | ||
const FloatN * | linkOdd, | ||
const int * | input_path, | ||
const int * | length, | ||
const double * | path_coeff, | ||
const int | num_paths, | ||
const kernel_param_t | kparam | ||
) |
Definition at line 477 of file gauge_force_quda.cu.
int quda::genericCompare | ( | const cpuColorSpinorField & | a, |
const cpuColorSpinorField & | b, | ||
int | tol | ||
) |
Definition at line 118 of file color_spinor_util.cu.
void quda::genericCopyColorSpinor | ( | OutOrder & | outOrder, |
const InOrder & | inOrder, | ||
QudaGammaBasis | dstBasis, | ||
QudaGammaBasis | srcBasis, | ||
const ColorSpinorField & | out, | ||
QudaFieldLocation | location | ||
) |
Decide whether we are changing basis or not
Definition at line 209 of file copy_color_spinor.cu.
void quda::genericCopyColorSpinor | ( | InOrder & | inOrder, |
ColorSpinorField & | out, | ||
QudaGammaBasis | inBasis, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
float * | outNorm | ||
) |
Decide on the output order
Definition at line 268 of file copy_color_spinor.cu.
void quda::genericCopyColorSpinor | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
FloatIn * | In, | ||
float * | outNorm, | ||
float * | inNorm | ||
) |
Decide on the input order
Definition at line 305 of file copy_color_spinor.cu.
void quda::genericPrintVector | ( | cpuColorSpinorField & | a, |
unsigned int | x | ||
) |
Definition at line 165 of file color_spinor_util.cu.
void quda::genericSource | ( | cpuColorSpinorField & | a, |
QudaSourceType | sourceType, | ||
int | x, | ||
int | s, | ||
int | c | ||
) |
Definition at line 38 of file color_spinor_util.cu.
cudaStream_t * quda::getBlasStream | ( | ) |
Definition at line 64 of file blas_quda.cu.
|
inline |
Definition at line 48 of file ks_force_quda.cu.
|
inline |
Definition at line 385 of file quda_matrix.h.
bool quda::getDslashLaunch | ( | ) |
bool quda::getKernelPackT | ( | ) |
Definition at line 84 of file dslash_quda.cu.
|
inline |
Definition at line 276 of file quda_matrix.h.
|
inline |
Definition at line 378 of file quda_matrix.h.
bool quda::getTwistPack | ( | ) |
Definition at line 91 of file dslash_quda.cu.
double3 quda::HeavyQuarkResidualNorm | ( | const Float * | x, |
const Float * | r, | ||
const int | volume, | ||
const int | Nint | ||
) |
Definition at line 310 of file blas_cpu.cpp.
double3 quda::HeavyQuarkResidualNormCpu | ( | cpuColorSpinorField & | x, |
cpuColorSpinorField & | r | ||
) |
Definition at line 331 of file blas_cpu.cpp.
double3 quda::HeavyQuarkResidualNormCpu | ( | cpuColorSpinorField & | x, |
cpuColorSpinorField & | y, | ||
cpuColorSpinorField & | r | ||
) |
Definition at line 352 of file blas_cpu.cpp.
double3 quda::HeavyQuarkResidualNormCuda | ( | cudaColorSpinorField & | x, |
cudaColorSpinorField & | r | ||
) |
Definition at line 777 of file reduce_quda.cu.
void quda::host_free_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
void * | ptr | ||
) |
Free host memory allocated with safe_malloc(), pinned_malloc(), or mapped_malloc(). This function should only be called via the host_free() macro, defined in malloc_quda.h
Definition at line 256 of file malloc.cpp.
void quda::improvedStaggeredDslashCuda | ( | cudaColorSpinorField * | out, |
const cudaGaugeField & | fatGauge, | ||
const cudaGaugeField & | longGauge, | ||
const cudaColorSpinorField * | in, | ||
const int | parity, | ||
const int | dagger, | ||
const cudaColorSpinorField * | x, | ||
const double & | k, | ||
const int * | commDim, | ||
TimeProfile & | profile, | ||
const QudaDslashPolicy & | dslashPolicy = QUDA_DSLASH2 |
||
) |
Definition at line 135 of file dslash_improved_staggered.cu.
|
inline |
Definition at line 342 of file quda_matrix.h.
void quda::initBlas | ( | ) |
Definition at line 53 of file blas_quda.cu.
void quda::initReduce | ( | ) |
Definition at line 52 of file reduce_quda.cu.
__device__ __host__ void quda::injector | ( | Arg & | arg, |
int | dir, | ||
int | a, | ||
int | b, | ||
int | c, | ||
int | d, | ||
int | g, | ||
int | parity | ||
) |
Definition at line 70 of file extract_gauge_ghost_extended.cu.
bool quda::isUnitary | ( | const QudaGaugeParam & | param, |
cpuGaugeField & | field, | ||
double | max_error | ||
) |
void quda::link_format_cpu_to_gpu | ( | void * | dst, |
void * | src, | ||
int | reconstruct, | ||
int | Vh, | ||
int | pad, | ||
int | ghostV, | ||
QudaPrecision | prec, | ||
QudaGaugeFieldOrder | cpu_order, | ||
cudaStream_t | stream | ||
) |
Definition at line 144 of file misc_helpers.cu.
void quda::link_format_gpu_to_cpu | ( | void * | dst, |
void * | src, | ||
int | Vh, | ||
int | stride, | ||
QudaPrecision | prec, | ||
cudaStream_t | stream | ||
) |
Definition at line 347 of file misc_helpers.cu.
|
inline |
Definition at line 40 of file ks_force_quda.cu.
void quda::llfat_cuda | ( | cudaGaugeField * | cudaFatLink, |
cudaGaugeField * | cudaLongLink, | ||
cudaGaugeField & | cudaSiteLink, | ||
cudaGaugeField & | cudaStaple, | ||
cudaGaugeField & | cudaStaple1, | ||
QudaGaugeParam * | param, | ||
double * | act_path_coeff | ||
) |
Definition at line 23 of file llfat_quda_itf.cpp.
void quda::llfat_cuda_ex | ( | cudaGaugeField * | cudaFatLink, |
cudaGaugeField * | cudaLongLink, | ||
cudaGaugeField & | cudaSiteLink, | ||
cudaGaugeField & | cudaStaple, | ||
cudaGaugeField & | cudaStaple1, | ||
QudaGaugeParam * | param, | ||
double * | act_path_coeff | ||
) |
Definition at line 276 of file llfat_quda_itf.cpp.
void quda::llfat_init_cuda | ( | QudaGaugeParam * | param | ) |
void quda::llfat_init_cuda_ex | ( | QudaGaugeParam * | param_ex | ) |
void quda::llfatOneLinkKernel | ( | cudaGaugeField & | cudaFatLink, |
cudaGaugeField & | cudaSiteLink, | ||
cudaGaugeField & | cudaStaple, | ||
cudaGaugeField & | cudaStaple1, | ||
QudaGaugeParam * | param, | ||
double * | act_path_coeff | ||
) |
Definition at line 1187 of file llfat_quda.cu.
void quda::llfatOneLinkKernel_ex | ( | cudaGaugeField & | cudaFatLink, |
cudaGaugeField & | cudaSiteLink, | ||
cudaGaugeField & | cudaStaple, | ||
cudaGaugeField & | cudaStaple1, | ||
QudaGaugeParam * | param, | ||
double * | act_path_coeff, | ||
llfat_kernel_param_t | kparam | ||
) |
Definition at line 1232 of file llfat_quda.cu.
|
inline |
Definition at line 236 of file color_spinor_field_order.h.
void quda::loadLinkToGPU | ( | cudaGaugeField * | cudaGauge, |
cpuGaugeField * | cpuGauge, | ||
QudaGaugeParam * | param | ||
) |
void quda::loadLinkToGPU_ex | ( | cudaGaugeField * | cudaGauge, |
cpuGaugeField * | cpuGauge | ||
) |
void quda::loadLinkToGPU_gf | ( | cudaGaugeField * | cudaGauge, |
cpuGaugeField * | cpuGauge, | ||
QudaGaugeParam * | param | ||
) |
|
inline |
Definition at line 767 of file quda_matrix.h.
|
inline |
Definition at line 787 of file quda_matrix.h.
|
inline |
Definition at line 778 of file quda_matrix.h.
|
inline |
Definition at line 857 of file quda_matrix.h.
void quda::loadTuneCache | ( | QudaVerbosity | verbosity | ) |
|
inline |
Definition at line 90 of file complex_quda.h.
|
inline |
Definition at line 929 of file complex_quda.h.
Definition at line 935 of file complex_quda.h.
|
inline |
Definition at line 95 of file complex_quda.h.
|
inline |
Definition at line 942 of file complex_quda.h.
__forceinline__ __host__ __device__ float2 quda::make_FloatN | ( | const double2 & | a | ) |
Definition at line 201 of file float_vector.h.
__forceinline__ __host__ __device__ float4 quda::make_FloatN | ( | const double4 & | a | ) |
Definition at line 205 of file float_vector.h.
__forceinline__ __host__ __device__ double2 quda::make_FloatN | ( | const float2 & | a | ) |
Definition at line 209 of file float_vector.h.
__forceinline__ __host__ __device__ double4 quda::make_FloatN | ( | const float4 & | a | ) |
Definition at line 213 of file float_vector.h.
__forceinline__ __host__ __device__ short4 quda::make_shortN | ( | const float4 & | a | ) |
Definition at line 217 of file float_vector.h.
__forceinline__ __host__ __device__ short2 quda::make_shortN | ( | const float2 & | a | ) |
Definition at line 221 of file float_vector.h.
__forceinline__ __host__ __device__ short4 quda::make_shortN | ( | const double4 & | a | ) |
Definition at line 225 of file float_vector.h.
__forceinline__ __host__ __device__ short2 quda::make_shortN | ( | const double2 & | a | ) |
Definition at line 229 of file float_vector.h.
|
inline |
Definition at line 125 of file quda_matrix.h.
|
inline |
Definition at line 134 of file quda_matrix.h.
|
inline |
Definition at line 139 of file quda_matrix.h.
void * quda::mapped_malloc_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
size_t | size | ||
) |
Allocate page-locked ("pinned") host memory, and map it into the GPU address space. This function should only be called via the mapped_malloc() macro, defined in malloc_quda.h
Definition at line 212 of file malloc.cpp.
void quda::massRescale | ( | cudaColorSpinorField & | b, |
QudaInvertParam & | param | ||
) |
Definition at line 1245 of file interface_quda.cpp.
__forceinline__ __host__ __device__ float quda::max_fabs | ( | const float4 & | c | ) |
Definition at line 177 of file float_vector.h.
__forceinline__ __host__ __device__ float quda::max_fabs | ( | const float2 & | b | ) |
Definition at line 183 of file float_vector.h.
__forceinline__ __host__ __device__ double quda::max_fabs | ( | const double4 & | c | ) |
Definition at line 187 of file float_vector.h.
__forceinline__ __host__ __device__ double quda::max_fabs | ( | const double2 & | b | ) |
Definition at line 193 of file float_vector.h.
double quda::maxGauge | ( | const Order | order, |
int | volume, | ||
int | nDim | ||
) |
Generic CPU function find the gauge maximum
Definition at line 9 of file max_gauge.cu.
double quda::maxGauge | ( | const GaugeField & | u | ) |
This function is used to calculate the maximum absolute value of a gauge field array. Defined in max_gauge.cu.
u | The gauge field from which we want to compute the max |
Definition at line 29 of file max_gauge.cu.
void quda::MDWFDslashCuda | ( | cudaColorSpinorField * | out, |
const cudaGaugeField & | gauge, | ||
const cudaColorSpinorField * | in, | ||
const int | parity, | ||
const int | dagger, | ||
const cudaColorSpinorField * | x, | ||
const double & | m_f, | ||
const double & | k, | ||
const int * | commDim, | ||
const int | DS_type, | ||
TimeProfile & | profile, | ||
const QudaDslashPolicy & | dslashPolicy = QUDA_DSLASH2 |
||
) |
Definition at line 247 of file dslash_mobius.cu.
void quda::mxpyCpu | ( | const cpuColorSpinorField & | x, |
cpuColorSpinorField & | y | ||
) |
Definition at line 51 of file blas_cpu.cpp.
void quda::mxpyCuda | ( | cudaColorSpinorField & | x, |
cudaColorSpinorField & | y | ||
) |
Definition at line 154 of file blas_quda.cu.
void quda::ndegTwistedMassDslashCuda | ( | cudaColorSpinorField * | out, |
const cudaGaugeField & | gauge, | ||
const cudaColorSpinorField * | in, | ||
const int | parity, | ||
const int | dagger, | ||
const cudaColorSpinorField * | x, | ||
const QudaTwistDslashType | type, | ||
const double & | kappa, | ||
const double & | mu, | ||
const double & | epsilon, | ||
const double & | k, | ||
const int * | commDim, | ||
TimeProfile & | profile, | ||
const QudaDslashPolicy & | dslashPolicy = QUDA_DSLASH |
||
) |
Definition at line 127 of file dslash_ndeg_twisted_mass.cu.
__device__ __forceinline__ int quda::neighborIndex | ( | const unsigned int & | cb_idx, |
const int(&) | shift[4], | ||
const bool(&) | partitioned[4], | ||
const unsigned int & | parity | ||
) |
Definition at line 41 of file shift_quark_field.cu.
|
inline |
Returns the magnitude of z squared.
Definition at line 859 of file complex_quda.h.
double quda::norm | ( | const Float * | a, |
const int | N | ||
) |
Definition at line 160 of file blas_cpu.cpp.
double quda::norm2 | ( | const ColorSpinorField & | a | ) |
Definition at line 486 of file color_spinor_field.cpp.
double quda::norm2 | ( | const cudaGaugeField & | u | ) |
This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L2 norm.
u | The gauge field that we want the norm of |
Definition at line 494 of file cuda_gauge_field.cu.
__device__ double quda::norm2_ | ( | const double2 & | a | ) |
Return the L2 norm of x
Definition at line 129 of file reduce_quda.cu.
__device__ float quda::norm2_ | ( | const float2 & | a | ) |
Definition at line 130 of file reduce_quda.cu.
__device__ float quda::norm2_ | ( | const float4 & | a | ) |
Definition at line 131 of file reduce_quda.cu.
double quda::normCpu | ( | const cpuColorSpinorField & | b | ) |
Definition at line 166 of file blas_cpu.cpp.
double quda::normCuda | ( | const cudaColorSpinorField & | b | ) |
Definition at line 145 of file reduce_quda.cu.
|
inline |
Definition at line 802 of file complex_quda.h.
|
inline |
Definition at line 808 of file complex_quda.h.
|
inline |
Definition at line 814 of file complex_quda.h.
|
inline |
Definition at line 35 of file float_vector.h.
|
inline |
Definition at line 44 of file float_vector.h.
|
inline |
Definition at line 51 of file float_vector.h.
|
inline |
Definition at line 58 of file float_vector.h.
|
inline |
Definition at line 692 of file complex_quda.h.
|
inline |
Definition at line 701 of file complex_quda.h.
|
inline |
Definition at line 708 of file complex_quda.h.
|
inline |
Definition at line 193 of file quda_matrix.h.
|
inline |
Definition at line 198 of file quda_matrix.h.
|
inline |
Definition at line 245 of file quda_matrix.h.
|
inline |
Definition at line 439 of file quda_matrix.h.
|
inline |
Definition at line 449 of file quda_matrix.h.
|
inline |
Definition at line 472 of file quda_matrix.h.
|
inline |
Definition at line 508 of file quda_matrix.h.
|
inline |
Definition at line 527 of file quda_matrix.h.
|
inline |
Definition at line 130 of file float_vector.h.
|
inline |
Definition at line 136 of file float_vector.h.
|
inline |
Definition at line 142 of file float_vector.h.
|
inline |
Definition at line 150 of file float_vector.h.
|
inline |
Definition at line 156 of file float_vector.h.
|
inline |
Definition at line 454 of file quda_matrix.h.
|
inline |
Definition at line 491 of file quda_matrix.h.
|
inline |
Definition at line 13 of file float_vector.h.
|
inline |
Definition at line 29 of file float_vector.h.
|
inline |
Definition at line 67 of file float_vector.h.
|
inline |
Definition at line 74 of file float_vector.h.
|
inline |
Definition at line 164 of file quda_matrix.h.
|
inline |
Definition at line 644 of file complex_quda.h.
|
inline |
Definition at line 660 of file complex_quda.h.
|
inline |
Definition at line 666 of file complex_quda.h.
|
inline |
Definition at line 765 of file complex_quda.h.
|
inline |
Definition at line 204 of file quda_matrix.h.
|
inline |
Definition at line 227 of file quda_matrix.h.
|
inline |
Definition at line 396 of file quda_matrix.h.
|
inline |
Definition at line 652 of file complex_quda.h.
|
inline |
Definition at line 83 of file float_vector.h.
|
inline |
Definition at line 91 of file float_vector.h.
|
inline |
Definition at line 97 of file float_vector.h.
|
inline |
Definition at line 103 of file float_vector.h.
|
inline |
Definition at line 149 of file quda_matrix.h.
|
inline |
Definition at line 407 of file quda_matrix.h.
|
inline |
Definition at line 17 of file float_vector.h.
|
inline |
Definition at line 21 of file float_vector.h.
|
inline |
Definition at line 25 of file float_vector.h.
|
inline |
Definition at line 144 of file quda_matrix.h.
|
inline |
Definition at line 164 of file float_vector.h.
|
inline |
Definition at line 168 of file float_vector.h.
|
inline |
Definition at line 169 of file quda_matrix.h.
|
inline |
Definition at line 673 of file complex_quda.h.
|
inline |
Definition at line 679 of file complex_quda.h.
|
inline |
Definition at line 685 of file complex_quda.h.
|
inline |
Definition at line 770 of file complex_quda.h.
|
inline |
Definition at line 233 of file quda_matrix.h.
|
inline |
Definition at line 239 of file quda_matrix.h.
|
inline |
Definition at line 427 of file quda_matrix.h.
|
inline |
Definition at line 460 of file quda_matrix.h.
|
inline |
Definition at line 110 of file float_vector.h.
|
inline |
Definition at line 118 of file float_vector.h.
|
inline |
Definition at line 124 of file float_vector.h.
|
inline |
Definition at line 156 of file quda_matrix.h.
|
inline |
Definition at line 417 of file quda_matrix.h.
|
inline |
Definition at line 716 of file complex_quda.h.
|
inline |
Definition at line 725 of file complex_quda.h.
|
inline |
Definition at line 731 of file complex_quda.h.
|
inline |
Definition at line 221 of file quda_matrix.h.
|
inline |
Definition at line 737 of file complex_quda.h.
|
inline |
Definition at line 744 of file complex_quda.h.
|
inline |
Definition at line 752 of file complex_quda.h.
|
inline |
Definition at line 757 of file complex_quda.h.
std::ostream & quda::operator<< | ( | std::ostream & | output, |
const CloverFieldParam & | param | ||
) |
Definition at line 273 of file clover_field.cpp.
std::ostream & quda::operator<< | ( | std::ostream & | output, |
const LatticeFieldParam & | param | ||
) |
Definition at line 145 of file lattice_field.cpp.
std::ostream & quda::operator<< | ( | std::ostream & | output, |
const GaugeFieldParam & | param | ||
) |
Definition at line 122 of file gauge_field.cpp.
std::basic_ostream< charT, traits > & quda::operator<< | ( | std::basic_ostream< charT, traits > & | os, |
const complex< ValueType > & | z | ||
) |
Definition at line 295 of file complex_quda.h.
|
inline |
Definition at line 285 of file quda_matrix.h.
|
inline |
Definition at line 290 of file quda_matrix.h.
std::ostream& quda::operator<< | ( | std::ostream & | out, |
const ColorSpinorField & | a | ||
) |
Definition at line 500 of file color_spinor_field.cpp.
std::ostream& quda::operator<< | ( | std::ostream & | os, |
const Matrix< T, N > & | m | ||
) |
Definition at line 745 of file quda_matrix.h.
std::ostream& quda::operator<< | ( | std::ostream & | os, |
const Array< T, N > & | a | ||
) |
Definition at line 757 of file quda_matrix.h.
std::ostream& quda::operator<< | ( | std::ostream & | out, |
const cudaColorSpinorField & | a | ||
) |
Definition at line 1368 of file cuda_color_spinor_field.cu.
|
inline |
Definition at line 777 of file complex_quda.h.
|
inline |
Definition at line 785 of file complex_quda.h.
|
inline |
Definition at line 793 of file complex_quda.h.
std::basic_istream< charT, traits > & quda::operator>> | ( | std::basic_istream< charT, traits > & | is, |
complex< ValueType > & | z | ||
) |
Definition at line 303 of file complex_quda.h.
void quda::orthoDir | ( | Complex ** | beta, |
cudaColorSpinorField * | Ap[], | ||
int | k | ||
) |
Definition at line 48 of file inv_gcr_quda.cpp.
|
inline |
Definition at line 720 of file quda_matrix.h.
|
inline |
Definition at line 732 of file quda_matrix.h.
void quda::pack_gauge_diag | ( | void * | buf, |
int * | X, | ||
void ** | sitelink, | ||
int | nu, | ||
int | mu, | ||
int | dir1, | ||
int | dir2, | ||
QudaPrecision | prec | ||
) |
void quda::pack_ghost_all_links | ( | void ** | cpuLink, |
void ** | cpuGhostBack, | ||
void ** | cpuGhostFwd, | ||
int | dir, | ||
int | nFace, | ||
QudaPrecision | precision, | ||
int * | X | ||
) |
void quda::pack_ghost_all_staples_cpu | ( | void * | staple, |
void ** | cpuGhostStapleBack, | ||
void ** | cpuGhostStapleFwd, | ||
int | nFace, | ||
QudaPrecision | precision, | ||
int * | X | ||
) |
void quda::packFace | ( | void * | ghost_buf, |
cudaColorSpinorField & | in, | ||
const int | nFace, | ||
const int | dagger, | ||
const int | parity, | ||
const int | dim, | ||
const int | face_num, | ||
const cudaStream_t & | stream, | ||
const double | a = 0.0 , |
||
const double | b = 0.0 |
||
) |
void quda::packFace | ( | void * | ghost_buf, |
cudaColorSpinorField & | in, | ||
FullClover & | clov, | ||
FullClover & | clovInv, | ||
const int | nFace, | ||
const int | dagger, | ||
const int | parity, | ||
const int | dim, | ||
const int | face_num, | ||
const cudaStream_t & | stream, | ||
const double | a = 0.0 |
||
) |
void quda::packFaceExtended | ( | void * | ghost_buf, |
cudaColorSpinorField & | field, | ||
const int | nFace, | ||
const int | R[], | ||
const int | dagger, | ||
const int | parity, | ||
const int | dim, | ||
const int | face_num, | ||
const cudaStream_t & | stream, | ||
const bool | unpack = false |
||
) |
void quda::packGhostStaple | ( | int * | X, |
void * | even, | ||
void * | odd, | ||
int | volume, | ||
QudaPrecision | prec, | ||
int | stride, | ||
int | dir, | ||
int | whichway, | ||
void ** | fwd_nbr_buf_gpu, | ||
void ** | back_nbr_buf_gpu, | ||
void ** | fwd_nbr_buf, | ||
void ** | back_nbr_buf, | ||
cudaStream_t * | stream | ||
) |
void quda::packSpinor | ( | OutOrder & | outOrder, |
const InOrder & | inOrder, | ||
Basis | basis, | ||
int | volume | ||
) |
CPU function to reorder spinor fields.
Definition at line 127 of file copy_color_spinor.cu.
__global__ void quda::packSpinorKernel | ( | OutOrder | outOrder, |
const InOrder | inOrder, | ||
Basis | basis, | ||
int | volume | ||
) |
CUDA kernel to reorder spinor fields. Adopts a similar form as the CPU version, using the same inlined functions.
Definition at line 141 of file copy_color_spinor.cu.
void * quda::pinned_malloc_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
size_t | size | ||
) |
Allocate page-locked ("pinned") host memory. This function should only be called via the pinned_malloc() macro, defined in malloc_quda.h
Note that we do rely on cudaHostAlloc(), since buffers allocated in this way have been observed to cause problems when shared with MPI via GPU Direct on some systems.
Definition at line 192 of file malloc.cpp.
double quda::plaquette | ( | const GaugeField & | data, |
QudaFieldLocation | location | ||
) |
Definition at line 242 of file gauge_plaq.cu.
void quda::point | ( | T & | t, |
int | x, | ||
int | s, | ||
int | c | ||
) |
Definition at line 36 of file color_spinor_util.cu.
|
inline |
Returns the complex with magnitude m and angle theta in radians.
Definition at line 865 of file complex_quda.h.
|
inline |
Definition at line 871 of file complex_quda.h.
|
inline |
Definition at line 877 of file complex_quda.h.
|
inline |
Definition at line 100 of file complex_quda.h.
|
inline |
Definition at line 975 of file complex_quda.h.
|
inline |
Definition at line 951 of file complex_quda.h.
|
inline |
Definition at line 957 of file complex_quda.h.
|
inline |
Definition at line 963 of file complex_quda.h.
|
inline |
Definition at line 969 of file complex_quda.h.
void quda::print | ( | const double | d[], |
int | n | ||
) |
Definition at line 47 of file inv_mpcg_quda.cpp.
void quda::print_vector | ( | const Order & | o, |
unsigned int | x | ||
) |
Definition at line 150 of file color_spinor_util.cu.
|
inline |
Definition at line 1012 of file quda_matrix.h.
void quda::printPeakMemUsage | ( | ) |
Definition at line 286 of file malloc.cpp.
void quda::random | ( | T & | t | ) |
Definition at line 22 of file color_spinor_util.cu.
double quda::reDotProduct | ( | const Float * | a, |
const Float * | b, | ||
const int | N | ||
) |
Definition at line 185 of file blas_cpu.cpp.
double quda::reDotProductCpu | ( | const cpuColorSpinorField & | a, |
const cpuColorSpinorField & | b | ||
) |
Definition at line 191 of file blas_cpu.cpp.
double quda::reDotProductCuda | ( | cudaColorSpinorField & | a, |
cudaColorSpinorField & | b | ||
) |
Definition at line 170 of file reduce_quda.cu.
void quda::reDotProductCuda | ( | double * | result, |
std::vector< cudaColorSpinorField * > & | a, | ||
std::vector< cudaColorSpinorField * > & | b | ||
) |
Definition at line 176 of file reduce_quda.cu.
double2 quda::reDotProductNormACuda | ( | cudaColorSpinorField & | a, |
cudaColorSpinorField & | b | ||
) |
Definition at line 297 of file reduce_quda.cu.
int quda::reliable | ( | double & | rNorm, |
double & | maxrx, | ||
double & | maxrr, | ||
const double & | r2, | ||
const double & | delta | ||
) |
Definition at line 47 of file inv_bicgstab_quda.cpp.
double quda::resNorm | ( | const DiracMatrix & | mat, |
cudaColorSpinorField & | b, | ||
cudaColorSpinorField & | x | ||
) |
Definition at line 20 of file inv_bicgstab_quda.cpp.
void * quda::safe_malloc_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
size_t | size | ||
) |
Perform a standard malloc() with error-checking. This function should only be called via the safe_malloc() macro, defined in malloc_quda.h
Definition at line 168 of file malloc.cpp.
|
inline |
Definition at line 271 of file color_spinor_field_order.h.
void quda::saveTuneCache | ( | QudaVerbosity | verbosity | ) |
void quda::setBlasParam | ( | int | kernel, |
int | prec, | ||
int | threads, | ||
int | blocks | ||
) |
void quda::setDiracParam | ( | DiracParam & | diracParam, |
QudaInvertParam * | inv_param, | ||
bool | pc | ||
) |
Definition at line 1102 of file interface_quda.cpp.
void quda::setDiracPreParam | ( | DiracParam & | diracParam, |
QudaInvertParam * | inv_param, | ||
const bool | pc | ||
) |
Definition at line 1199 of file interface_quda.cpp.
void quda::setDiracSloppyParam | ( | DiracParam & | diracParam, |
QudaInvertParam * | inv_param, | ||
bool | pc | ||
) |
Definition at line 1182 of file interface_quda.cpp.
void quda::setGhostSpinor | ( | bool | value | ) |
Definition at line 42 of file color_spinor_field.cpp.
|
inline |
Definition at line 597 of file quda_matrix.h.
|
inline |
Definition at line 611 of file quda_matrix.h.
|
inline |
Definition at line 625 of file quda_matrix.h.
void quda::setKernelPackT | ( | bool | pack | ) |
pack | Sets whether to use a kernel to pack the T dimension |
Definition at line 82 of file dslash_quda.cu.
void quda::setPackComms | ( | const int * | commDim | ) |
Sets commDim array used in dslash_pack.cu
Definition at line 39 of file dslash_pack.cu.
void quda::setTwistPack | ( | bool | pack | ) |
pack | Sets whether to use a kernel to pack twisted spinor |
Definition at line 90 of file dslash_quda.cu.
void quda::setUnitarizeLinksConstants | ( | double | unitarize_eps, |
double | max_error, | ||
bool | allow_svd, | ||
bool | svd_only, | ||
double | svd_rel_error, | ||
double | svd_abs_error, | ||
bool | check_unitarization = true |
||
) |
void quda::setUnitarizeLinksPadding | ( | int | input_padding, |
int | output_padding | ||
) |
|
inline |
Definition at line 640 of file quda_matrix.h.
|
inline |
Definition at line 653 of file quda_matrix.h.
|
inline |
Definition at line 666 of file quda_matrix.h.
void quda::shiftColorSpinorField | ( | cudaColorSpinorField & | dst, |
const cudaColorSpinorField & | src, | ||
const unsigned int | parity, | ||
const unsigned int | dim, | ||
const int | shift | ||
) |
Definition at line 210 of file shift_quark_field.cu.
__global__ void quda::shiftColorSpinorFieldExternalKernel | ( | ShiftQuarkArg< Output, Input > | arg | ) |
Definition at line 93 of file shift_quark_field.cu.
__global__ void quda::shiftColorSpinorFieldKernel | ( | ShiftQuarkArg< Output, Input > | arg | ) |
Definition at line 68 of file shift_quark_field.cu.
|
inline |
Definition at line 40 of file complex_quda.h.
|
inline |
Definition at line 981 of file complex_quda.h.
Definition at line 989 of file complex_quda.h.
|
inline |
Definition at line 75 of file complex_quda.h.
|
inline |
Definition at line 997 of file complex_quda.h.
Definition at line 1005 of file complex_quda.h.
void quda::siteComputeGenStapleParityKernel | ( | void * | staple_even, |
void * | staple_odd, | ||
const void * | sitelink_even, | ||
const void * | sitelink_odd, | ||
void * | fatlink_even, | ||
void * | fatlink_odd, | ||
int | mu, | ||
int | nu, | ||
double | mycoeff, | ||
QudaReconstructType | recon, | ||
QudaPrecision | prec, | ||
dim3 | halfGridDim, | ||
llfat_kernel_param_t | kparam, | ||
cudaStream_t * | stream | ||
) |
void quda::siteComputeGenStapleParityKernel_ex | ( | void * | staple_even, |
void * | staple_odd, | ||
const void * | sitelink_even, | ||
const void * | sitelink_odd, | ||
void * | fatlink_even, | ||
void * | fatlink_odd, | ||
int | mu, | ||
int | nu, | ||
double | mycoeff, | ||
QudaReconstructType | recon, | ||
QudaPrecision | prec, | ||
llfat_kernel_param_t | kparam | ||
) |
|
inline |
Definition at line 105 of file complex_quda.h.
|
inline |
Definition at line 1013 of file complex_quda.h.
|
inline |
Definition at line 1019 of file complex_quda.h.
void quda::staggeredDslashCuda | ( | cudaColorSpinorField * | out, |
const cudaGaugeField & | gauge, | ||
const cudaColorSpinorField * | in, | ||
const int | parity, | ||
const int | dagger, | ||
const cudaColorSpinorField * | x, | ||
const double & | k, | ||
const int * | commDim, | ||
TimeProfile & | profile, | ||
const QudaDslashPolicy & | dslashPolicy = QUDA_DSLASH2 |
||
) |
Definition at line 119 of file dslash_staggered.cu.
void quda::storeLinkToCPU | ( | cpuGaugeField * | cpuGauge, |
cudaGaugeField * | cudaGauge, | ||
QudaGaugeParam * | param | ||
) |
|
inline |
Definition at line 45 of file complex_quda.h.
|
inline |
Definition at line 1025 of file complex_quda.h.
|
inline |
Definition at line 80 of file complex_quda.h.
|
inline |
Definition at line 1031 of file complex_quda.h.
|
inline |
Definition at line 92 of file gauge_field_order.h.
|
inline |
timeBoundary variant for extended gauge field
idx | extended field linear index |
X | the gauge field dimensions |
R | the radii dimenions of the extended region |
tBoundary | the boundary condition |
isFirstTimeSlice | if we're on the first time slice of nodes |
isLastTimeSlide | if we're on the last time slice of nodes |
ghostExchange | if the field is extended or not (determines indexing type) |
Definition at line 107 of file gauge_field_order.h.
double quda::timeInterval | ( | struct timeval | start, |
struct timeval | end | ||
) |
Definition at line 21 of file inv_gcr_quda.cpp.
double3 quda::tripleCGReductionCuda | ( | cudaColorSpinorField & | x, |
cudaColorSpinorField & | y, | ||
cudaColorSpinorField & | z | ||
) |
Definition at line 811 of file reduce_quda.cu.
void quda::tripleCGUpdateCuda | ( | const double & | alpha, |
const double & | beta, | ||
cudaColorSpinorField & | q, | ||
cudaColorSpinorField & | r, | ||
cudaColorSpinorField & | x, | ||
cudaColorSpinorField & | p | ||
) |
Definition at line 480 of file blas_quda.cu.
TuneParam & quda::tuneLaunch | ( | Tunable & | tunable, |
QudaTune | enabled, | ||
QudaVerbosity | verbosity | ||
) |
void quda::twistCloverGamma5Cuda | ( | cudaColorSpinorField * | out, |
const cudaColorSpinorField * | in, | ||
const int | dagger, | ||
const double & | kappa, | ||
const double & | mu, | ||
const double & | epsilon, | ||
const QudaTwistGamma5Type | twist, | ||
const FullClover * | clov, | ||
const FullClover * | clovInv, | ||
const int | parity | ||
) |
Definition at line 495 of file dslash_quda.cu.
void quda::twistedCloverDslashCuda | ( | cudaColorSpinorField * | out, |
const cudaGaugeField & | gauge, | ||
const FullClover * | clover, | ||
const FullClover * | cloverInv, | ||
const cudaColorSpinorField * | in, | ||
const int | parity, | ||
const int | dagger, | ||
const cudaColorSpinorField * | x, | ||
const QudaTwistCloverDslashType | type, | ||
const double & | kappa, | ||
const double & | mu, | ||
const double & | epsilon, | ||
const double & | k, | ||
const int * | commDim, | ||
TimeProfile & | profile, | ||
const QudaDslashPolicy & | dslashPolicy = QUDA_DSLASH2 |
||
) |
Definition at line 155 of file dslash_twisted_clover.cu.
void quda::twistedMassDslashCuda | ( | cudaColorSpinorField * | out, |
const cudaGaugeField & | gauge, | ||
const cudaColorSpinorField * | in, | ||
const int | parity, | ||
const int | dagger, | ||
const cudaColorSpinorField * | x, | ||
const QudaTwistDslashType | type, | ||
const double & | kappa, | ||
const double & | mu, | ||
const double & | epsilon, | ||
const double & | k, | ||
const int * | commDim, | ||
TimeProfile & | profile, | ||
const QudaDslashPolicy & | dslashPolicy = QUDA_DSLASH2 |
||
) |
Definition at line 151 of file dslash_twisted_mass.cu.
void quda::twistGamma5Cuda | ( | cudaColorSpinorField * | out, |
const cudaColorSpinorField * | in, | ||
const int | dagger, | ||
const double & | kappa, | ||
const double & | mu, | ||
const double & | epsilon, | ||
const QudaTwistGamma5Type | twist | ||
) |
ndeg tm:
Definition at line 356 of file dslash_quda.cu.
void quda::unitarizeLinksCPU | ( | const QudaGaugeParam & | param, |
cpuGaugeField & | infield, | ||
cpuGaugeField * | outfield | ||
) |
void quda::unitarizeLinksCuda | ( | const QudaGaugeParam & | param, |
cudaGaugeField & | infield, | ||
cudaGaugeField * | outfield, | ||
int * | num_failures | ||
) |
void quda::unpackGhostStaple | ( | int * | X, |
void * | _even, | ||
void * | _odd, | ||
int | volume, | ||
QudaPrecision | prec, | ||
int | stride, | ||
int | dir, | ||
int | whichway, | ||
void ** | fwd_nbr_buf, | ||
void ** | back_nbr_buf, | ||
cudaStream_t * | stream | ||
) |
void quda::updateAlphaZeta | ( | double * | alpha, |
double * | zeta, | ||
double * | zeta_old, | ||
const double * | r2, | ||
const double * | beta, | ||
const double | pAp, | ||
const double * | offset, | ||
const int | nShift, | ||
const int | j_low | ||
) |
Compute the new values of alpha and zeta
Definition at line 38 of file inv_multi_cg_quda.cpp.
void quda::updateGaugeField | ( | GaugeField & | out, |
double | dt, | ||
const GaugeField & | in, | ||
const GaugeField & | mom, | ||
bool | conj_mom, | ||
bool | exact | ||
) |
Evolve the gauge field by step size dt using the momentuim field
out | Updated gauge field |
dt | Step size |
in | Input gauge field |
mom | Momentum field |
conj_mom | Whether we conjugate the momentum in the exponential |
exact | Calculate exact exponential or use an expansion |
Definition at line 348 of file gauge_update_quda.cu.
void quda::updateSolution | ( | cudaColorSpinorField & | x, |
const Complex * | alpha, | ||
Complex **const | beta, | ||
double * | gamma, | ||
int | k, | ||
cudaColorSpinorField * | p[] | ||
) |
Definition at line 111 of file inv_gcr_quda.cpp.
void quda::wilsonDslashCuda | ( | cudaColorSpinorField * | out, |
const cudaGaugeField & | gauge, | ||
const cudaColorSpinorField * | in, | ||
const int | oddBit, | ||
const int | daggerBit, | ||
const cudaColorSpinorField * | x, | ||
const double & | k, | ||
const int * | commDim, | ||
TimeProfile & | profile, | ||
const QudaDslashPolicy & | dslashPolicy = QUDA_DSLASH2 |
||
) |
Definition at line 113 of file dslash_wilson.cu.
|
inline |
Definition at line 830 of file quda_matrix.h.
|
inline |
Definition at line 842 of file quda_matrix.h.
|
inline |
Definition at line 802 of file quda_matrix.h.
|
inline |
Definition at line 893 of file quda_matrix.h.
double quda::xmyNormCpu | ( | const cpuColorSpinorField & | a, |
cpuColorSpinorField & | b | ||
) |
Definition at line 205 of file blas_cpu.cpp.
double quda::xmyNormCuda | ( | cudaColorSpinorField & | a, |
cudaColorSpinorField & | b | ||
) |
Definition at line 343 of file reduce_quda.cu.
Complex quda::xpaycDotzyCpu | ( | const cpuColorSpinorField & | x, |
const double & | a, | ||
cpuColorSpinorField & | y, | ||
const cpuColorSpinorField & | z | ||
) |
Definition at line 231 of file blas_cpu.cpp.
Complex quda::xpaycDotzyCuda | ( | cudaColorSpinorField & | x, |
const double & | a, | ||
cudaColorSpinorField & | y, | ||
cudaColorSpinorField & | z | ||
) |
Definition at line 534 of file reduce_quda.cu.
void quda::xpayCpu | ( | const cpuColorSpinorField & | x, |
const double & | a, | ||
cpuColorSpinorField & | y | ||
) |
Definition at line 41 of file blas_cpu.cpp.
void quda::xpayCuda | ( | cudaColorSpinorField & | x, |
const double & | a, | ||
cudaColorSpinorField & | y | ||
) |
Definition at line 138 of file blas_quda.cu.
void quda::xpyCpu | ( | const cpuColorSpinorField & | x, |
cpuColorSpinorField & | y | ||
) |
Definition at line 22 of file blas_cpu.cpp.
void quda::xpyCuda | ( | cudaColorSpinorField & | x, |
cudaColorSpinorField & | y | ||
) |
Definition at line 98 of file blas_quda.cu.
double3 quda::xpyHeavyQuarkResidualNormCpu | ( | cpuColorSpinorField & | x, |
cpuColorSpinorField & | y, | ||
cpuColorSpinorField & | r | ||
) |
double3 quda::xpyHeavyQuarkResidualNormCuda | ( | cudaColorSpinorField & | x, |
cudaColorSpinorField & | y, | ||
cudaColorSpinorField & | r | ||
) |
Definition at line 782 of file reduce_quda.cu.
void quda::zeroCuda | ( | cudaColorSpinorField & | a | ) |
Definition at line 40 of file blas_quda.cu.
const char* quda::aux_str |
Definition at line 46 of file blas_quda.cu.
char quda::aux_tmp[TuneKey::aux_n] |
Definition at line 47 of file blas_quda.cu.
unsigned long long quda::blas_bytes |
Definition at line 38 of file blas_quda.cu.
unsigned long long quda::blas_flops |
Definition at line 37 of file blas_quda.cu.
const int quda::maxNface = 3 |
The maximum number of faces that can be exchanged
Definition at line 11 of file lattice_field.h.
const int quda::Nstream = 1 |
Definition at line 217 of file quda_internal.h.
cudaStream_t* quda::stream |
Definition at line 816 of file cuda_color_spinor_field.cu.
const char* quda::vol_str |
Definition at line 45 of file blas_quda.cu.