QUDA
v1.1.0
A library for QCD on GPUs
|
Namespaces | |
blas | |
blas_lapack | |
clover | |
colorspinor | |
device | |
dslash | |
fermion_force | |
gauge | |
mma | |
mobius_eofa | |
mobius_tensor_core | |
pool | |
reducer | |
Typedefs | |
typedef std::vector< ColorSpinorField * > | CompositeColorSpinorField |
using | ColorSpinorFieldSet = ColorSpinorField |
typedef std::complex< double > | Complex |
typedef struct curandStateMRG32k3a | cuRNGState |
using | DynamicStride = Stride< Dynamic, Dynamic > |
using | DenseMatrix = MatrixXcd |
using | VectorSet = MatrixXcd |
using | Vector = VectorXcd |
using | RealVector = VectorXd |
using | RowMajorDenseMatrix = Matrix< Complex, Dynamic, Dynamic, RowMajor > |
typedef std::map< TuneKey, TuneParam > | map |
template<typename T > | |
using | mgarray = std::array< T, QUDA_MAX_MG_LEVEL > |
Functions | |
std::ostream & | operator<< (std::ostream &output, const CloverFieldParam ¶m) |
double | norm1 (const CloverField &u, bool inverse=false) |
double | norm2 (const CloverField &a, bool inverse=false) |
void | computeClover (CloverField &clover, const GaugeField &fmunu, double coeff) |
Driver for computing the clover field from the field strength tensor. More... | |
void | copyGenericClover (CloverField &out, const CloverField &in, bool inverse, QudaFieldLocation location, void *Out=0, void *In=0, void *outNorm=0, void *inNorm=0) |
This generic function is used for copying the clover field where in the input and output can be in any order and location. More... | |
void | cloverInvert (CloverField &clover, bool computeTraceLog) |
This function compute the Cholesky decomposition of each clover matrix and stores the clover inverse field. More... | |
void | cloverRho (CloverField &clover, double rho) |
This function adds a real scalar onto the clover diagonal (only to the direct field not the inverse) More... | |
void | computeCloverForce (GaugeField &force, const GaugeField &U, std::vector< ColorSpinorField * > &x, std::vector< ColorSpinorField * > &p, std::vector< double > &coeff) |
Compute the force contribution from the solver solution fields. More... | |
void | computeCloverSigmaOprod (GaugeField &oprod, std::vector< ColorSpinorField * > &x, std::vector< ColorSpinorField * > &p, std::vector< std::vector< double > > &coeff) |
Compute the outer product from the solver solution fields arising from the diagonal term of the fermion bilinear in direction mu,nu and sum to outer product field. More... | |
void | computeCloverSigmaTrace (GaugeField &output, const CloverField &clover, double coeff) |
Compute the matrix tensor field necessary for the force calculation from the clover trace action. This computes a tensor field [mu,nu]. More... | |
void | cloverDerivative (cudaGaugeField &force, cudaGaugeField &gauge, cudaGaugeField &oprod, double coeff, QudaParity parity) |
Compute the derivative of the clover matrix in the direction mu,nu and compute the resulting force given the outer-product field. More... | |
void | copyFieldOffset (CloverField &out, const CloverField &in, CommKey offset, QudaPCType pc_type) |
This function is used for copying from a source clover field to a destination clover field with an offset. More... | |
constexpr bool | dynamic_clover_inverse () |
Helper function that returns whether we have enabled dyanmic clover inversion or not. More... | |
template<typename Float , int Nc, int Ns> | |
__device__ __host__ complex< Float > | innerProduct (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b) |
Compute the inner product over color and spin dot = \sum_s,c conj(a(s,c)) * b(s,c) More... | |
template<typename Float , int Nc, int Ns> | |
__device__ __host__ complex< Float > | colorContract (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b, int sa, int sb) |
Compute the color contraction over color at spin s dot = \sum_s,c a(s,c) * b(s,c) More... | |
template<typename Float , int Nc, int Ns> | |
__device__ __host__ complex< Float > | innerProduct (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b, int s) |
template<typename Float , int Nc, int Ns> | |
__device__ __host__ complex< Float > | innerProduct (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b, int sa, int sb) |
template<typename Float , int Nc, int Nsa, int Nsb> | |
__device__ __host__ complex< Float > | innerProduct (const ColorSpinor< Float, Nc, Nsa > &a, const ColorSpinor< Float, Nc, Nsb > &b, int sa, int sb) |
Compute the inner product over color at spin sa and sb between a color spinors a and b of different spin length dot = \sum_c conj(a(c)) * b(s,c) More... | |
template<typename Float , int Ns> | |
__device__ __host__ ColorSpinor< Float, 3, 1 > | crossProduct (const ColorSpinor< Float, 3, Ns > &a, const ColorSpinor< Float, 3, Ns > &b, int sa, int sb) |
template<typename Float , int Nc, int Ns> | |
__device__ __host__ Matrix< complex< Float >, Nc > | outerProdSpinTrace (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b) |
template<typename Float , int Nc> | |
__device__ __host__ Matrix< complex< Float >, Nc > | outerProduct (const ColorSpinor< Float, Nc, 1 > &a, const ColorSpinor< Float, Nc, 1 > &b) |
template<typename Float , int Nc, int Ns> | |
__device__ __host__ ColorSpinor< Float, Nc, Ns > | operator+ (const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y) |
ColorSpinor addition operator. More... | |
template<typename Float , int Nc, int Ns> | |
__device__ __host__ ColorSpinor< Float, Nc, Ns > | operator- (const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y) |
ColorSpinor subtraction operator. More... | |
template<typename Float , int Nc, int Ns, typename S > | |
__device__ __host__ ColorSpinor< Float, Nc, Ns > | operator* (const S &a, const ColorSpinor< Float, Nc, Ns > &x) |
Compute the scalar-vector product y = a * x. More... | |
template<typename Float , int Nc, int Ns> | |
__device__ __host__ ColorSpinor< Float, Nc, Ns > | operator* (const Matrix< complex< Float >, Nc > &A, const ColorSpinor< Float, Nc, Ns > &x) |
Compute the matrix-vector product y = A * x. More... | |
template<typename Float , int Nc, int Ns> | |
__device__ __host__ ColorSpinor< Float, Nc, Ns > | mv_add (const Matrix< complex< Float >, Nc > &A, const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y) |
Compute the matrix-vector product z = A * x + y. More... | |
template<typename Float , int Nc, int Ns> | |
__device__ __host__ ColorSpinor< Float, Nc, Ns > | operator* (const HMatrix< Float, Nc *Ns > &A, const ColorSpinor< Float, Nc, Ns > &x) |
Compute the matrix-vector product y = A * x. More... | |
constexpr QudaParity | impliedParityFromMatPC (const QudaMatPCType &matpc_type) |
Helper function for getting the implied spinor parity from a matrix preconditioning type. More... | |
void | copyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, void *Dst=0, void *Src=0, void *dstNorm=0, void *srcNorm=0) |
void | genericSource (cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c) |
int | genericCompare (const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol) |
void | copyFieldOffset (ColorSpinorField &out, const ColorSpinorField &in, CommKey offset, QudaPCType pc_type) |
This function is used for copying from a source colorspinor field to a destination field with an offset. More... | |
void | genericPrintVector (const cpuColorSpinorField &a, unsigned int x) |
void | genericCudaPrintVector (const cudaColorSpinorField &a, unsigned x) |
void | exchangeExtendedGhost (cudaColorSpinorField *spinor, int R[], int parity, qudaStream_t *stream_p) |
void | copyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, const int parity, void *Dst, void *Src, void *dstNorm, void *srcNorm) |
void | genericPackGhost (void **ghost, const ColorSpinorField &a, QudaParity parity, int nFace, int dagger, MemoryLocation *destination=nullptr) |
Generic ghost packing routine. More... | |
void | spinorNoise (ColorSpinorField &src, RNG &randstates, QudaNoiseType type) |
Generate a random noise spinor. This variant allows the user to manage the RNG state. More... | |
void | spinorNoise (ColorSpinorField &src, unsigned long long seed, QudaNoiseType type) |
Generate a random noise spinor. This variant just requires a seed and will create and destroy the random number state. More... | |
QudaPCType | PCType_ (const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b) |
Helper function for determining if the preconditioning type of the fields is the same. More... | |
template<typename... Args> | |
QudaPCType | PCType_ (const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b, const Args &... args) |
Helper function for determining if the precision of the fields is the same. More... | |
QudaFieldOrder | Order_ (const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b) |
Helper function for determining if the order of the fields is the same. More... | |
template<typename... Args> | |
QudaFieldOrder | Order_ (const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b, const Args &... args) |
Helper function for determining if the order of the fields is the same. More... | |
int | Length_ (const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b) |
Helper function for determining if the length of the fields is the same. More... | |
template<typename... Args> | |
int | Length_ (const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b, const Args &... args) |
Helper function for determining if the length of the fields is the same. More... | |
constexpr int | product (const CommKey &input) |
constexpr CommKey | operator+ (const CommKey &lhs, const CommKey &rhs) |
constexpr CommKey | operator* (const CommKey &lhs, const CommKey &rhs) |
constexpr CommKey | operator/ (const CommKey &lhs, const CommKey &rhs) |
constexpr CommKey | operator% (const CommKey &lhs, const CommKey &rhs) |
constexpr bool | operator< (const CommKey &lhs, const CommKey &rhs) |
constexpr bool | operator> (const CommKey &lhs, const CommKey &rhs) |
constexpr CommKey | coordinate_from_index (int index, CommKey dim) |
constexpr int | index_from_coordinate (CommKey coord, CommKey dim) |
template<typename ValueType > | |
__host__ __device__ ValueType | cos (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | sin (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | tan (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | acos (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | asin (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | atan (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | atan2 (ValueType x, ValueType y) |
template<typename ValueType > | |
__host__ __device__ ValueType | cosh (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | sinh (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | tanh (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | exp (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | log (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | log10 (ValueType x) |
template<typename ValueType , typename ExponentType > | |
__host__ __device__ ValueType | pow (ValueType x, ExponentType e) |
template<typename ValueType > | |
__host__ __device__ ValueType | sqrt (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | abs (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | conj (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | abs (const complex< ValueType > &z) |
Returns the magnitude of z. More... | |
template<typename ValueType > | |
__host__ __device__ ValueType | arg (const complex< ValueType > &z) |
Returns the phase angle of z. More... | |
template<typename ValueType > | |
__host__ __device__ ValueType | norm (const complex< ValueType > &z) |
Returns the magnitude of z squared. More... | |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | conj (const complex< ValueType > &z) |
Returns the complex conjugate of z. More... | |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | polar (const ValueType &m, const ValueType &theta=0) |
Returns the complex with magnitude m and angle theta in radians. More... | |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator* (const complex< ValueType > &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator* (const complex< ValueType > &lhs, const ValueType &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator* (const ValueType &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator/ (const complex< ValueType > &lhs, const complex< ValueType > &rhs) |
template<> | |
__host__ __device__ complex< float > | operator/ (const complex< float > &lhs, const complex< float > &rhs) |
template<> | |
__host__ __device__ complex< double > | operator/ (const complex< double > &lhs, const complex< double > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator+ (const complex< ValueType > &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator+ (const complex< ValueType > &lhs, const ValueType &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator+ (const ValueType &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator- (const complex< ValueType > &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator- (const complex< ValueType > &lhs, const ValueType &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator- (const ValueType &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator+ (const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator- (const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | cos (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | cosh (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | exp (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | log (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | log10 (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | pow (const complex< ValueType > &z, const int &n) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | pow (const complex< ValueType > &z, const ValueType &x) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | pow (const complex< ValueType > &z, const complex< ValueType > &z2) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | pow (const ValueType &x, const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | sin (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | sinh (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | sqrt (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | tan (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | tanh (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | acos (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | asin (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | atan (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | acosh (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | asinh (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | atanh (const complex< ValueType > &z) |
template<typename ValueType , class charT , class traits > | |
std::basic_ostream< charT, traits > & | operator<< (std::basic_ostream< charT, traits > &os, const complex< ValueType > &z) |
template<typename ValueType , typename charT , class traits > | |
std::basic_istream< charT, traits > & | operator>> (std::basic_istream< charT, traits > &is, complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator+ (const volatile complex< ValueType > &lhs, const volatile complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator/ (const complex< ValueType > &lhs, const ValueType &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator/ (const ValueType &lhs, const complex< ValueType > &rhs) |
template<> | |
__host__ __device__ complex< float > | operator/ (const float &lhs, const complex< float > &rhs) |
template<> | |
__host__ __device__ complex< double > | operator/ (const double &lhs, const complex< double > &rhs) |
template<typename ValueType > | |
__host__ __device__ bool | operator== (const complex< ValueType > &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ bool | operator== (const ValueType &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ bool | operator== (const complex< ValueType > &lhs, const ValueType &rhs) |
template<typename ValueType > | |
__host__ __device__ bool | operator!= (const complex< ValueType > &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ bool | operator!= (const ValueType &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ bool | operator!= (const complex< ValueType > &lhs, const ValueType &rhs) |
template<> | |
__host__ __device__ float | abs (const complex< float > &z) |
template<> | |
__host__ __device__ double | abs (const complex< double > &z) |
template<> | |
__host__ __device__ float | arg (const complex< float > &z) |
template<> | |
__host__ __device__ double | arg (const complex< double > &z) |
template<> | |
__host__ __device__ complex< float > | polar (const float &magnitude, const float &angle) |
template<> | |
__host__ __device__ complex< double > | polar (const double &magnitude, const double &angle) |
template<> | |
__host__ __device__ complex< float > | cos (const complex< float > &z) |
template<> | |
__host__ __device__ complex< float > | cosh (const complex< float > &z) |
template<> | |
__host__ __device__ complex< float > | exp (const complex< float > &z) |
template<> | |
__host__ __device__ complex< float > | log (const complex< float > &z) |
template<> | |
__host__ __device__ complex< float > | pow (const float &x, const complex< float > &exponent) |
template<> | |
__host__ __device__ complex< float > | sin (const complex< float > &z) |
template<> | |
__host__ __device__ complex< float > | sinh (const complex< float > &z) |
template<> | |
__host__ __device__ complex< float > | sqrt (const complex< float > &z) |
template<typename ValueType > | |
__host__ __device__ complex< float > | atanh (const complex< float > &z) |
template<typename real > | |
__host__ __device__ complex< real > | cmul (const complex< real > &x, const complex< real > &y) |
template<typename real > | |
__host__ __device__ complex< real > | cmac (const complex< real > &x, const complex< real > &y, const complex< real > &z) |
template<typename real > | |
__host__ __device__ complex< real > | i_ (const complex< real > &a) |
void | contractQuda (const ColorSpinorField &x, const ColorSpinorField &y, void *result, QudaContractType cType) |
template<typename T > | |
__host__ __device__ float | i2f (T a) |
__device__ __host__ int | f2i (float f) |
__device__ __host__ int | d2i (double d) |
template<typename T1 , typename T2 > | |
__host__ __device__ std::enable_if<!isFixed< T1 >::value &&!isFixed< T2 >::value, void >::type | copy (T1 &a, const T2 &b) |
Copy function which is trival between floating point types. When converting to an integer type, the input float is assumed to be in the range [-1,1] and we rescale to saturate the integer range. When converting from an integer type, we scale the output to be on the same range. More... | |
template<typename T1 , typename T2 > | |
__host__ __device__ std::enable_if<!isFixed< T1 >::value &&isFixed< T2 >::value, void >::type | copy (T1 &a, const T2 &b) |
template<typename T1 , typename T2 > | |
__host__ __device__ std::enable_if< isFixed< T1 >::value &&!isFixed< T2 >::value, void >::type | copy (T1 &a, const T2 &b) |
template<typename T1 , typename T2 > | |
__host__ __device__ std::enable_if<!isFixed< T1 >::value, void >::type | copy_scaled (T1 &a, const T2 &b) |
Specialized variants of the copy function that assumes the scaling factor has already been done. More... | |
template<typename T1 , typename T2 > | |
__host__ __device__ std::enable_if< isFixed< T1 >::value, void >::type | copy_scaled (T1 &a, const T2 &b) |
template<typename T1 , typename T2 , typename T3 > | |
__host__ __device__ std::enable_if<!isFixed< T2 >::value, void >::type | copy_and_scale (T1 &a, const T2 &b, const T3 &c) |
Specialized variants of the copy function that include an additional scale factor. Note the scale factor is ignored unless the input type (b) is either a short or char vector. More... | |
template<typename T1 , typename T2 , typename T3 > | |
__host__ __device__ std::enable_if< isFixed< T2 >::value, void >::type | copy_and_scale (T1 &a, const T2 &b, const T3 &c) |
void | setDiracParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc) |
void | setDiracSloppyParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc) |
void | createDirac (Dirac *&d, Dirac *&dSloppy, Dirac *&dPre, QudaInvertParam ¶m, const bool pc_solve) |
void | createDiracWithRefine (Dirac *&d, Dirac *&dSloppy, Dirac *&dPre, Dirac *&dRef, QudaInvertParam ¶m, const bool pc_solve) |
void | createDiracWithEig (Dirac *&d, Dirac *&dSloppy, Dirac *&dPre, Dirac *&dRef, QudaInvertParam ¶m, const bool pc_solve) |
void | setKernelPackT (bool pack) |
bool | getKernelPackT () |
void | pushKernelPackT (bool pack) |
void | popKernelPackT () |
void | setPackComms (const int *dim_pack) |
Helper function that sets which dimensions the packing kernel should be packing for. More... | |
bool | getDslashLaunch () |
void | createDslashEvents () |
void | destroyDslashEvents () |
void | ApplyWilson (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the Wilson stencil. More... | |
void | ApplyWilsonClover (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the Wilson-clover stencil. More... | |
void | ApplyWilsonCloverHasenbuschTwist (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, double mu, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the Wilson-clover stencil. More... | |
void | ApplyWilsonCloverPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the preconditioned Wilson-clover stencil. More... | |
void | ApplyWilsonCloverHasenbuschTwistPCClovInv (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, double mu, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the twisted-mass stencil. More... | |
void | ApplyWilsonCloverHasenbuschTwistPCNoClovInv (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, double mu, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the Wilson-clover stencil with thist for Hasenbusch. More... | |
void | ApplyTwistedMass (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
void | ApplyTwistedMassPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override, TimeProfile &profile) |
Driver for applying the preconditioned twisted-mass stencil. More... | |
void | ApplyNdegTwistedMass (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, double c, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the non-degenerate twisted-mass stencil. More... | |
void | ApplyNdegTwistedMassPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, double c, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override, TimeProfile &profile) |
Driver for applying the preconditioned non-degenerate twisted-mass stencil. More... | |
void | ApplyTwistedClover (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &C, double a, double b, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the twisted-clover stencil. More... | |
void | ApplyTwistedCloverPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &C, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the preconditioned twisted-clover stencil. More... | |
void | ApplyDomainWall5D (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_f, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the Domain-wall 5-d stencil to a 5-d vector with 5-d preconditioned data order. More... | |
void | ApplyDomainWall4D (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_5, const Complex *b_5, const Complex *c_5, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the batched Wilson 4-d stencil to a 5-d vector with 4-d preconditioned data order. More... | |
void | ApplyDslash5 (ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &x, double m_f, double m_5, const Complex *b_5, const Complex *c_5, double a, bool dagger, Dslash5Type type) |
Apply either the domain-wall / mobius Dslash5 operator or the M5 inverse operator. In the current implementation, it is expected that the color-spinor fields are 4-d preconditioned. More... | |
void | ApplyLaplace (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, int dir, double a, double b, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the Laplace stencil. More... | |
void | ApplyCovDev (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, int mu, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the covariant derivative. More... | |
void | ApplyClover (ColorSpinorField &out, const ColorSpinorField &in, const CloverField &clover, bool inverse, int parity) |
Apply clover-matrix field to a color-spinor field. More... | |
void | ApplyStaggered (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Apply the staggered dslash operator to a color-spinor field. More... | |
void | ApplyImprovedStaggered (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const GaugeField &L, double a, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Apply the improved staggered dslash operator to a color-spinor field. More... | |
void | ApplyStaggeredKahlerDiracInverse (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &Xinv, bool dagger) |
Apply the (improved) staggered Kahler-Dirac inverse block to a color-spinor field. More... | |
void | ApplyTwistGamma (ColorSpinorField &out, const ColorSpinorField &in, int d, double kappa, double mu, double epsilon, int dagger, QudaTwistGamma5Type type) |
Apply the twisted-mass gamma operator to a color-spinor field. More... | |
void | ApplyTwistClover (ColorSpinorField &out, const ColorSpinorField &in, const CloverField &clover, double kappa, double mu, double epsilon, int parity, int dagger, QudaTwistGamma5Type twist) |
Apply twisted clover-matrix field to a color-spinor field. More... | |
void | PackGhost (void *ghost[2 *QUDA_MAX_DIM], const ColorSpinorField &field, MemoryLocation location, int nFace, bool dagger, int parity, bool spin_project, double a, double b, double c, int shmem, const qudaStream_t &stream) |
Dslash face packing routine. More... | |
void | gamma5 (ColorSpinorField &out, const ColorSpinorField &in) |
Applies a gamma5 matrix to a spinor (wrapper to ApplyGamma) More... | |
void | arpack_solve (std::vector< ColorSpinorField * > &h_evecs, std::vector< Complex > &h_evals, const DiracMatrix &mat, QudaEigParam *eig_param, TimeProfile &profile) |
The QUDA interface function. One passes two allocated arrays to hold the the eigenmode data, the problem matrix, the arpack parameters defining what problem is to be solves, and a container for QUDA data structure types. More... | |
__host__ __device__ double2 | operator+ (const double2 &x, const double2 &y) |
__host__ __device__ double2 | operator- (const double2 &x, const double2 &y) |
__host__ __device__ float2 | operator- (const float2 &x, const float2 &y) |
__host__ __device__ float4 | operator- (const float4 &x, const float4 &y) |
__host__ __device__ float8 | operator- (const float8 &x, const float8 &y) |
__host__ __device__ double3 | operator+ (const double3 &x, const double3 &y) |
__host__ __device__ double4 | operator+ (const double4 &x, const double4 &y) |
__host__ __device__ float4 | operator* (const float &a, const float4 &x) |
__host__ __device__ float2 | operator* (const float &a, const float2 &x) |
__host__ __device__ double2 | operator* (const double &a, const double2 &x) |
__host__ __device__ double4 | operator* (const double &a, const double4 &x) |
__host__ __device__ float8 | operator* (const float &a, const float8 &x) |
__host__ __device__ float2 | operator+ (const float2 &x, const float2 &y) |
__host__ __device__ float4 | operator+ (const float4 &x, const float4 &y) |
__host__ __device__ float8 | operator+ (const float8 &x, const float8 &y) |
__host__ __device__ float4 | operator+= (float4 &x, const float4 &y) |
__host__ __device__ float2 | operator+= (float2 &x, const float2 &y) |
__host__ __device__ float8 | operator+= (float8 &x, const float8 &y) |
__host__ __device__ double2 | operator+= (double2 &x, const double2 &y) |
__host__ __device__ double3 | operator+= (double3 &x, const double3 &y) |
__host__ __device__ double4 | operator+= (double4 &x, const double4 &y) |
__host__ __device__ float4 | operator-= (float4 &x, const float4 &y) |
__host__ __device__ float2 | operator-= (float2 &x, const float2 &y) |
__host__ __device__ float8 | operator-= (float8 &x, const float8 &y) |
__host__ __device__ double2 | operator-= (double2 &x, const double2 &y) |
__host__ __device__ float2 | operator*= (float2 &x, const float &a) |
__host__ __device__ double2 | operator*= (double2 &x, const float &a) |
__host__ __device__ float4 | operator*= (float4 &a, const float &b) |
__host__ __device__ float8 | operator*= (float8 &a, const float &b) |
__host__ __device__ double2 | operator*= (double2 &a, const double &b) |
__host__ __device__ double4 | operator*= (double4 &a, const double &b) |
__host__ __device__ float2 | operator- (const float2 &x) |
__host__ __device__ double2 | operator- (const double2 &x) |
std::ostream & | operator<< (std::ostream &output, const double2 &a) |
std::ostream & | operator<< (std::ostream &output, const double3 &a) |
std::ostream & | operator<< (std::ostream &output, const double4 &a) |
__device__ __host__ void | zero (double &a) |
__device__ __host__ void | zero (double2 &a) |
__device__ __host__ void | zero (double3 &a) |
__device__ __host__ void | zero (double4 &a) |
__device__ __host__ void | zero (float &a) |
__device__ __host__ void | zero (float2 &a) |
__device__ __host__ void | zero (float3 &a) |
__device__ __host__ void | zero (float4 &a) |
__device__ __host__ void | zero (short &a) |
__device__ __host__ void | zero (char &a) |
template<typename T , int n> | |
std::ostream & | operator<< (std::ostream &output, const vector_type< T, n > &a) |
template<typename scalar , int n> | |
__device__ __host__ void | zero (vector_type< scalar, n > &v) |
template<typename scalar , int n> | |
__device__ __host__ vector_type< scalar, n > | operator+ (const vector_type< scalar, n > &a, const vector_type< scalar, n > &b) |
std::ostream & | operator<< (std::ostream &output, const GaugeFieldParam ¶m) |
double | norm1 (const GaugeField &u) |
This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L1 norm. More... | |
double | norm2 (const GaugeField &u) |
This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L2 norm. More... | |
void | ax (const double &a, GaugeField &u) |
Scale the gauge field by the scalar a. More... | |
void | copyGenericGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0, void **ghostOut=0, void **ghostIn=0, int type=0) |
void | copyFieldOffset (GaugeField &out, const GaugeField &in, CommKey offset, QudaPCType pc_type) |
This function is used for copying from a source gauge field to a destination gauge field with an offset. More... | |
void | copyExtendedGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0) |
cudaGaugeField * | createExtendedGauge (cudaGaugeField &in, const int *R, TimeProfile &profile, bool redundant_comms=false, QudaReconstructType recon=QUDA_RECONSTRUCT_INVALID) |
cpuGaugeField * | createExtendedGauge (void **gauge, QudaGaugeParam &gauge_param, const int *R) |
void | extractGaugeGhost (const GaugeField &u, void **ghost, bool extract=true, int offset=0) |
void | extractExtendedGaugeGhost (const GaugeField &u, int dim, const int *R, void **ghost, bool extract) |
void | applyGaugePhase (GaugeField &u) |
uint64_t | Checksum (const GaugeField &u, bool mini=false) |
QudaReconstructType | Reconstruct_ (const char *func, const char *file, int line, const GaugeField &a, const GaugeField &b) |
Helper function for determining if the reconstruct of the fields is the same. More... | |
template<typename... Args> | |
QudaReconstructType | Reconstruct_ (const char *func, const char *file, int line, const GaugeField &a, const GaugeField &b, const Args &... args) |
Helper function for determining if the reconstruct of the fields is the same. More... | |
void | gaugeForce (GaugeField &mom, const GaugeField &u, double coeff, int ***input_path, int *length, double *path_coeff, int num_paths, int max_length) |
Compute the gauge-force contribution to the momentum. More... | |
void | gaugeObservables (GaugeField &u, QudaGaugeObservableParam ¶m, TimeProfile &profile) |
Calculates a variety of gauge-field observables. More... | |
void | projectSU3 (GaugeField &U, double tol, int *fails) |
Project the input gauge field onto the SU(3) group. This is a destructive operation. The number of link failures is reported so appropriate action can be taken. More... | |
double3 | plaquette (const GaugeField &U) |
Compute the plaquette of the gauge field. More... | |
void | gaugeGauss (GaugeField &U, RNG &rngstate, double epsilon) |
Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder). More... | |
void | gaugeGauss (GaugeField &U, unsigned long long seed, double epsilon) |
Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder). More... | |
void | APEStep (GaugeField &dataDs, GaugeField &dataOr, double alpha) |
Apply APE smearing to the gauge field. More... | |
void | STOUTStep (GaugeField &dataDs, GaugeField &dataOr, double rho) |
Apply STOUT smearing to the gauge field. More... | |
void | OvrImpSTOUTStep (GaugeField &dataDs, GaugeField &dataOr, double rho, double epsilon) |
Apply Over Improved STOUT smearing to the gauge field. More... | |
void | WFlowStep (GaugeField &out, GaugeField &temp, GaugeField &in, double epsilon, QudaWFlowType wflow_type) |
Apply Wilson Flow steps W1, W2, Vt to the gauge field. This routine assumes that the input and output fields are extended, with the input field being exchanged prior to calling this function. On exit from this routine, the output field will have been exchanged. More... | |
void | gaugeFixingOVR (GaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double relax_boost, const double tolerance, const int reunit_interval, const int stopWtheta) |
Gauge fixing with overrelaxation with support for single and multi GPU. More... | |
void | gaugeFixingFFT (GaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double alpha, const int autotune, const double tolerance, const int stopWtheta) |
Gauge fixing with Steepest descent method with FFTs with support for single GPU only. More... | |
void | computeFmunu (GaugeField &Fmunu, const GaugeField &gauge) |
Compute the Fmunu tensor. More... | |
void | computeQCharge (double energy[3], double &qcharge, const GaugeField &Fmunu) |
Compute the topological charge and field energy. More... | |
void | computeQChargeDensity (double energy[3], double &qcharge, void *qdensity, const GaugeField &Fmunu) |
Compute the topological charge, field energy and the topological charge density per lattice site. More... | |
void | updateGaugeField (GaugeField &out, double dt, const GaugeField &in, const GaugeField &mom, bool conj_mom, bool exact) |
__device__ void | load_streaming_double2 (double2 &a, const double2 *addr) |
__device__ void | load_streaming_float4 (float4 &a, const float4 *addr) |
__device__ void | load_cached_short4 (short4 &a, const short4 *addr) |
__device__ void | load_cached_short2 (short2 &a, const short2 *addr) |
__device__ void | load_global_short4 (short4 &a, const short4 *addr) |
__device__ void | load_global_short2 (short2 &a, const short2 *addr) |
__device__ void | load_global_float4 (float4 &a, const float4 *addr) |
__device__ void | store_streaming_float4 (float4 *addr, float x, float y, float z, float w) |
__device__ void | store_streaming_short4 (short4 *addr, short x, short y, short z, short w) |
__device__ void | store_streaming_double2 (double2 *addr, double x, double y) |
__device__ void | store_streaming_float2 (float2 *addr, float x, float y) |
__device__ void | store_streaming_short2 (short2 *addr, short x, short y) |
template<QudaReconstructType recon> | |
constexpr bool | is_enabled () |
template<> | |
constexpr bool | is_enabled< QUDA_RECONSTRUCT_NO > () |
template<> | |
constexpr bool | is_enabled< QUDA_RECONSTRUCT_13 > () |
template<> | |
constexpr bool | is_enabled< QUDA_RECONSTRUCT_12 > () |
template<> | |
constexpr bool | is_enabled< QUDA_RECONSTRUCT_9 > () |
template<> | |
constexpr bool | is_enabled< QUDA_RECONSTRUCT_8 > () |
template<template< typename, int, QudaReconstructType > class Apply, typename Recon , typename Float , typename G , typename... Args> | |
constexpr void | instantiate (G &U, Args &&... args) |
This instantiate function is used to instantiate the colors. More... | |
template<template< typename, int, QudaReconstructType > class Apply, typename Recon = ReconstructFull, typename G , typename... Args> | |
constexpr void | instantiate (G &U, Args &&... args) |
This instantiate function is used to instantiate the precisions. More... | |
template<template< typename > class Apply, typename C , typename... Args> | |
constexpr void | instantiate (C &c, Args &&... args) |
This instantiate function is used to instantiate the clover precision. More... | |
template<template< typename, int > class Apply, typename store_t , typename F , typename... Args> | |
constexpr void | instantiate (F &field, Args &&... args) |
This instantiate function is used to instantiate the colors. More... | |
template<template< typename, int > class Apply, typename F , typename... Args> | |
constexpr void | instantiate (F &field, Args &&... args) |
This instantiate function is used to instantiate the precision and number of colors. More... | |
template<template< typename > class Apply, typename F , typename... Args> | |
constexpr void | instantiatePrecision (F &field, Args &&... args) |
The instantiatePrecision function is used to instantiate the precision. Note unlike the "instantiate" functions above, this helper always instantiates double precision regardless of the QUDA_PRECISION value: this enables its use for copy interface routines which should always enable double precision support. More... | |
template<template< typename, typename > class Apply, typename T , typename F , typename... Args> | |
constexpr void | instantiatePrecision2 (F &field, Args &&... args) |
The instantiatePrecision2 function is used to instantiate the precision for a class that accepts 2 typename arguments, with the first typename corresponding to the precision being instantiated at hand. This is useful for copy routines, where we need to instantiate a second, e.g., destination, precision after already instantiating the first, e.g., source, precision. Similar to the "instantiatePrecision" function above, this helper always instantiates double precision regardless of the QUDA_PRECISION value: this enables its use for copy interface routines which should always enable double precision support. More... | |
template<template< typename > class Apply, typename F , typename... Args> | |
constexpr void | instantiatePrecisionMG (F &field, Args &&... args) |
The instantiatePrecision function is used to instantiate the precision. More... | |
template<template< typename, int, QudaReconstructType > class Apply, typename Recon , typename Float , int nColor, typename... Args> | |
void | instantiate (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, Args &&... args) |
This instantiate function is used to instantiate the reconstruct types used. More... | |
template<template< typename, int, QudaReconstructType > class Apply, typename Recon , typename Float , typename... Args> | |
void | instantiate (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, Args &&... args) |
This instantiate function is used to instantiate the colors. More... | |
template<template< typename, int, QudaReconstructType > class Apply, typename Recon = WilsonReconstruct, typename... Args> | |
void | instantiate (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, Args &&... args) |
This instantiate function is used to instantiate the precisions. More... | |
template<template< typename, int, QudaReconstructType > class Apply, typename Recon = WilsonReconstruct, typename... Args> | |
void | instantiatePreconditioner (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, Args &&... args) |
This instantiatePrecondtiioner function is used to instantiate the precisions for a preconditioner. This is the same as the instantiate helper above, except it only handles half and quarter precision. More... | |
void | completeKSForce (GaugeField &mom, const GaugeField &oprod, const GaugeField &gauge, QudaFieldLocation location, long long *flops=NULL) |
std::ostream & | operator<< (std::ostream &output, const LatticeFieldParam ¶m) |
QudaFieldLocation | Location_ (const char *func, const char *file, int line, const LatticeField &a, const LatticeField &b) |
Helper function for determining if the location of the fields is the same. More... | |
template<typename... Args> | |
QudaFieldLocation | Location_ (const char *func, const char *file, int line, const LatticeField &a, const LatticeField &b, const Args &... args) |
Helper function for determining if the location of the fields is the same. More... | |
QudaPrecision | Precision_ (const char *func, const char *file, int line, const LatticeField &a, const LatticeField &b) |
Helper function for determining if the precision of the fields is the same. More... | |
template<typename... Args> | |
QudaPrecision | Precision_ (const char *func, const char *file, int line, const LatticeField &a, const LatticeField &b, const Args &... args) |
Helper function for determining if the precision of the fields is the same. More... | |
bool | Native_ (const char *func, const char *file, int line, const LatticeField &a) |
Helper function for determining if the field is in native order. More... | |
template<typename... Args> | |
bool | Native_ (const char *func, const char *file, int line, const LatticeField &a, const Args &... args) |
Helper function for determining if the fields are in native order. More... | |
QudaFieldLocation | reorder_location () |
Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION. More... | |
void | reorder_location_set (QudaFieldLocation reorder_location_) |
Set whether data is reorderd on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION. More... | |
const char * | compile_type_str (const LatticeField &meta, QudaFieldLocation location_=QUDA_INVALID_FIELD_LOCATION) |
Helper function for setting auxilary string. More... | |
void | fatKSLink (GaugeField *fat, const GaugeField &u, const double *coeff) |
Compute the fat links for an improved staggered (Kogut-Susskind) fermions. More... | |
void | longKSLink (GaugeField *lng, const GaugeField &u, const double *coeff) |
Compute the long links for an improved staggered (Kogut-Susskind) fermions. More... | |
void | printPeakMemUsage () |
void | assertAllMemFree () |
size_t | device_allocated () |
size_t | pinned_allocated () |
size_t | mapped_allocated () |
size_t | host_allocated () |
size_t | device_allocated_peak () |
size_t | pinned_allocated_peak () |
size_t | mapped_allocated_peak () |
size_t | host_allocated_peak () |
bool | use_managed_memory () |
bool | is_prefetch_enabled () |
void * | device_malloc_ (const char *func, const char *file, int line, size_t size) |
void * | device_pinned_malloc_ (const char *func, const char *file, int line, size_t size) |
void * | device_comms_pinned_malloc_ (const char *func, const char *file, int line, size_t size) |
void * | safe_malloc_ (const char *func, const char *file, int line, size_t size) |
void * | pinned_malloc_ (const char *func, const char *file, int line, size_t size) |
void * | mapped_malloc_ (const char *func, const char *file, int line, size_t size) |
void * | managed_malloc_ (const char *func, const char *file, int line, size_t size) |
void | device_free_ (const char *func, const char *file, int line, void *ptr) |
void | device_pinned_free_ (const char *func, const char *file, int line, void *ptr) |
void | device_comms_pinned_free_ (const char *func, const char *file, int line, void *ptr) |
void | managed_free_ (const char *func, const char *file, int line, void *ptr) |
void | host_free_ (const char *func, const char *file, int line, void *ptr) |
constexpr const char * | str_end (const char *str) |
constexpr bool | str_slant (const char *str) |
constexpr const char * | r_slant (const char *str) |
constexpr const char * | file_name (const char *str) |
QudaFieldLocation | get_pointer_location (const void *ptr) |
void * | get_mapped_device_pointer_ (const char *func, const char *file, int line, const void *ptr) |
bool | is_aligned (const void *ptr, size_t alignment) |
double | computeMomAction (const GaugeField &mom) |
Compute and return global the momentum action 1/2 mom^2. More... | |
void | updateMomentum (GaugeField &mom, double coeff, GaugeField &force, const char *fname) |
void | applyU (GaugeField &force, GaugeField &U) |
bool | forceMonitor () |
Whether we are monitoring the force or not. More... | |
void | flushForceMonitor () |
Flush any outstanding force monitoring information. More... | |
void | ApplyCoarse (ColorSpinorField &out, const ColorSpinorField &inA, const ColorSpinorField &inB, const GaugeField &Y, const GaugeField &X, double kappa, int parity=QUDA_INVALID_PARITY, bool dslash=true, bool clover=true, bool dagger=false, const int *commDim=0, QudaPrecision halo_precision=QUDA_INVALID_PRECISION) |
Apply the coarse dslash stencil. This single driver accounts for all variations with and without the clover field, with and without dslash, and both single and full parity fields. More... | |
void | CoarseOp (GaugeField &Y, GaugeField &X, const Transfer &T, const cudaGaugeField &gauge, const cudaCloverField *clover, double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc) |
Coarse operator construction from a fine-grid operator (Wilson / Clover) More... | |
void | StaggeredCoarseOp (GaugeField &Y, GaugeField &X, const Transfer &T, const cudaGaugeField &gauge, const cudaGaugeField *XinvKD, double mass, QudaDiracType dirac, QudaMatPCType matpc) |
Coarse operator construction from a fine-grid operator (Staggered) More... | |
void | CoarseCoarseOp (GaugeField &Y, GaugeField &X, const Transfer &T, const GaugeField &gauge, const GaugeField &clover, const GaugeField &cloverInv, double kappa, double mass, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc, bool need_bidirectional, bool use_mma=false) |
Coarse operator construction from an intermediate-grid operator (Coarse) More... | |
void | calculateYhat (GaugeField &Yhat, GaugeField &Xinv, const GaugeField &Y, const GaugeField &X, bool use_mma=false) |
Calculate preconditioned coarse links and coarse clover inverse field. More... | |
void | Monte (GaugeField &data, RNG &rngstate, double Beta, int nhb, int nover) |
Perform heatbath and overrelaxation. Performs nhb heatbath steps followed by nover overrelaxation steps. More... | |
void | InitGaugeField (GaugeField &data) |
Perform a cold start to the gauge field, identity SU(3) matrix, also fills the ghost links in multi-GPU case (no need to exchange data) More... | |
void | InitGaugeField (GaugeField &data, RNG &rngstate) |
Perform a hot start to the gauge field, random SU(3) matrix, followed by reunitarization, also exchange borders links in multi-GPU case. More... | |
void | PGaugeExchange (GaugeField &data, const int n_dim, const int parity) |
Exchange "borders" between nodes. Although the radius border is 2, it only updates the interior radius border, i.e., at 1 and X[d-2] where X[d] already includes the Radius border, and don't update at 0 and X[d-1] faces. More... | |
void | PGaugeExchangeFree () |
Release all allocated memory used to exchange data between nodes. More... | |
double2 | getLinkDeterminant (GaugeField &data) |
Calculate the Determinant. More... | |
double2 | getLinkTrace (GaugeField &data) |
Calculate the Trace. More... | |
qudaError_t | qudaLaunchKernel (const void *func, const TuneParam &tp, void **args, qudaStream_t stream) |
Wrapper around cudaLaunchKernel. More... | |
template<typename T , typename... Arg> | |
qudaError_t | qudaLaunchKernel (T *func, const TuneParam &tp, qudaStream_t stream, const Arg &... arg) |
Templated wrapper around qudaLaunchKernel which can accept a templated kernel, and expects a kernel with a single Arg argument. More... | |
void | qudaMemcpy_ (void *dst, const void *src, size_t count, cudaMemcpyKind kind, const char *func, const char *file, const char *line) |
Wrapper around cudaMemcpy or driver API equivalent. More... | |
void | qudaMemcpyAsync_ (void *dst, const void *src, size_t count, cudaMemcpyKind kind, const qudaStream_t &stream, const char *func, const char *file, const char *line) |
Wrapper around cudaMemcpyAsync or driver API equivalent. More... | |
void | qudaMemcpy2D_ (void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, const char *func, const char *file, const char *line) |
Wrapper around cudaMemcpy2DAsync or driver API equivalent. More... | |
void | qudaMemcpy2DAsync_ (void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, const qudaStream_t &stream, const char *func, const char *file, const char *line) |
Wrapper around cudaMemcpy2DAsync or driver API equivalent. More... | |
void | qudaMemset_ (void *ptr, int value, size_t count, const char *func, const char *file, const char *line) |
Wrapper around cudaMemset or driver API equivalent. More... | |
void | qudaMemset2D_ (void *ptr, size_t pitch, int value, size_t width, size_t height, const char *func, const char *file, const char *line) |
Wrapper around cudaMemset2D or driver API equivalent. More... | |
void | qudaMemsetAsync_ (void *ptr, int value, size_t count, const qudaStream_t &stream, const char *func, const char *file, const char *line) |
Wrapper around cudaMemsetAsync or driver API equivalent. More... | |
void | qudaMemset2DAsync_ (void *ptr, size_t pitch, int value, size_t width, size_t height, const qudaStream_t &stream, const char *func, const char *file, const char *line) |
Wrapper around cudaMemsetAsync or driver API equivalent. More... | |
void | qudaMemPrefetchAsync_ (void *ptr, size_t count, QudaFieldLocation mem_space, const qudaStream_t &stream, const char *func, const char *file, const char *line) |
Wrapper around cudaMemPrefetchAsync or driver API equivalent. More... | |
bool | qudaEventQuery_ (cudaEvent_t &event, const char *func, const char *file, const char *line) |
Wrapper around cudaEventQuery or cuEventQuery with built-in error checking. More... | |
void | qudaEventRecord_ (cudaEvent_t &event, qudaStream_t stream, const char *func, const char *file, const char *line) |
Wrapper around cudaEventRecord or cuEventRecord with built-in error checking. More... | |
void | qudaStreamWaitEvent_ (qudaStream_t stream, cudaEvent_t event, unsigned int flags, const char *func, const char *file, const char *line) |
Wrapper around cudaStreamWaitEvent or cuStreamWaitEvent with built-in error checking. More... | |
void | qudaEventSynchronize_ (cudaEvent_t &event, const char *func, const char *file, const char *line) |
Wrapper around cudaEventSynchronize or cuEventSynchronize with built-in error checking. More... | |
void | qudaStreamSynchronize_ (qudaStream_t &stream, const char *func, const char *file, const char *line) |
Wrapper around cudaStreamSynchronize or cuStreamSynchronize with built-in error checking. More... | |
void | qudaDeviceSynchronize_ (const char *func, const char *file, const char *line) |
Wrapper around cudaDeviceSynchronize or cuDeviceSynchronize with built-in error checking. More... | |
void | printAPIProfile () |
Print out the timer profile for CUDA API calls. More... | |
bool | canReuseResidentGauge (QudaInvertParam *inv_param) |
template<class T > | |
__device__ __host__ T | getTrace (const Matrix< T, 3 > &a) |
template<template< typename, int > class Mat, class T > | |
__device__ __host__ T | getDeterminant (const Mat< T, 3 > &a) |
template<template< typename, int > class Mat, class T , int N> | |
__device__ __host__ Mat< T, N > | operator+ (const Mat< T, N > &a, const Mat< T, N > &b) |
template<template< typename, int > class Mat, class T , int N> | |
__device__ __host__ Mat< T, N > | operator+= (Mat< T, N > &a, const Mat< T, N > &b) |
template<template< typename, int > class Mat, class T , int N> | |
__device__ __host__ Mat< T, N > | operator+= (Mat< T, N > &a, const T &b) |
template<template< typename, int > class Mat, class T , int N> | |
__device__ __host__ Mat< T, N > | operator-= (Mat< T, N > &a, const Mat< T, N > &b) |
template<template< typename, int > class Mat, class T , int N> | |
__device__ __host__ Mat< T, N > | operator- (const Mat< T, N > &a, const Mat< T, N > &b) |
template<template< typename, int > class Mat, class T , int N, class S > | |
__device__ __host__ Mat< T, N > | operator* (const S &scalar, const Mat< T, N > &a) |
template<template< typename, int > class Mat, class T , int N, class S > | |
__device__ __host__ Mat< T, N > | operator* (const Mat< T, N > &a, const S &scalar) |
template<template< typename, int > class Mat, class T , int N, class S > | |
__device__ __host__ Mat< T, N > | operator*= (Mat< T, N > &a, const S &scalar) |
template<template< typename, int > class Mat, class T , int N> | |
__device__ __host__ Mat< T, N > | operator- (const Mat< T, N > &a) |
template<template< typename, int > class Mat, class T , int N> | |
__device__ __host__ Mat< T, N > | operator* (const Mat< T, N > &a, const Mat< T, N > &b) |
Generic implementation of matrix multiplication. More... | |
template<template< typename > class complex, typename T , int N> | |
__device__ __host__ Matrix< complex< T >, N > | operator* (const Matrix< complex< T >, N > &a, const Matrix< complex< T >, N > &b) |
Specialization of complex matrix multiplication that will issue optimal fma instructions. More... | |
template<class T , int N> | |
__device__ __host__ Matrix< T, N > | operator*= (Matrix< T, N > &a, const Matrix< T, N > &b) |
template<class T , class U , int N> | |
__device__ __host__ Matrix< typename PromoteTypeId< T, U >::type, N > | operator* (const Matrix< T, N > &a, const Matrix< U, N > &b) |
template<class T > | |
__device__ __host__ Matrix< T, 2 > | operator* (const Matrix< T, 2 > &a, const Matrix< T, 2 > &b) |
template<class T , int N> | |
__device__ __host__ Matrix< T, N > | conj (const Matrix< T, N > &other) |
template<class T > | |
__device__ __host__ Matrix< T, 3 > | inverse (const Matrix< T, 3 > &u) |
template<class T , int N> | |
__device__ __host__ void | setIdentity (Matrix< T, N > *m) |
template<int N> | |
__device__ __host__ void | setIdentity (Matrix< float2, N > *m) |
template<int N> | |
__device__ __host__ void | setIdentity (Matrix< double2, N > *m) |
template<class T , int N> | |
__device__ __host__ void | setZero (Matrix< T, N > *m) |
template<int N> | |
__device__ __host__ void | setZero (Matrix< float2, N > *m) |
template<int N> | |
__device__ __host__ void | setZero (Matrix< double2, N > *m) |
template<typename Complex , int N> | |
__device__ __host__ void | makeAntiHerm (Matrix< Complex, N > &m) |
template<typename Complex , int N> | |
__device__ __host__ void | makeHerm (Matrix< Complex, N > &m) |
template<class T , int N> | |
__device__ __host__ void | copyColumn (const Matrix< T, N > &m, int c, Array< T, N > *a) |
template<class T , int N> | |
std::ostream & | operator<< (std::ostream &os, const Matrix< T, N > &m) |
template<class T , int N> | |
std::ostream & | operator<< (std::ostream &os, const Array< T, N > &a) |
template<class Cmplx > | |
__device__ __host__ void | computeLinkInverse (Matrix< Cmplx, 3 > *uinv, const Matrix< Cmplx, 3 > &u) |
void | copyArrayToLink (Matrix< float2, 3 > *link, float *array) |
template<class Cmplx , class Real > | |
void | copyArrayToLink (Matrix< Cmplx, 3 > *link, Real *array) |
void | copyLinkToArray (float *array, const Matrix< float2, 3 > &link) |
template<class Cmplx , class Real > | |
void | copyLinkToArray (Real *array, const Matrix< Cmplx, 3 > &link) |
template<class T > | |
__device__ __host__ Matrix< T, 3 > | getSubTraceUnit (const Matrix< T, 3 > &a) |
template<class T > | |
__device__ __host__ void | SubTraceUnit (Matrix< T, 3 > &a) |
template<class T > | |
__device__ __host__ double | getRealTraceUVdagger (const Matrix< T, 3 > &a, const Matrix< T, 3 > &b) |
template<class Cmplx > | |
__host__ __device__ void | printLink (const Matrix< Cmplx, 3 > &link) |
template<class Cmplx > | |
__device__ __host__ double | ErrorSU3 (const Matrix< Cmplx, 3 > &matrix) |
template<class T > | |
__device__ __host__ auto | exponentiate_iQ (const Matrix< T, 3 > &Q) |
template<typename Float > | |
__device__ __host__ void | expsu3 (Matrix< complex< Float >, 3 > &q) |
template<class Real > | |
__device__ Real | Random (cuRNGState &state, Real a, Real b) |
Return a random number between a and b. More... | |
template<> | |
__device__ float | Random< float > (cuRNGState &state, float a, float b) |
template<> | |
__device__ double | Random< double > (cuRNGState &state, double a, double b) |
template<class Real > | |
__device__ Real | Random (cuRNGState &state) |
Return a random number between 0 and 1. More... | |
template<> | |
__device__ float | Random< float > (cuRNGState &state) |
template<> | |
__device__ double | Random< double > (cuRNGState &state) |
constexpr int | max_n_reduce () |
template<typename T > | |
constexpr T | init_value () |
The initialization value we used to check for completion. More... | |
template<typename T > | |
constexpr T | terminate_value () |
The termination value we use to prevent a possible hang in case the computed reduction is equal to the initialization. More... | |
template<typename VectorType > | |
__device__ __host__ VectorType | vector_load (const void *ptr, int idx) |
template<> | |
__device__ __host__ short8 | vector_load (const void *ptr, int idx) |
template<typename VectorType > | |
__device__ __host__ void | vector_store (void *ptr, int idx, const VectorType &value) |
template<> | |
__device__ __host__ void | vector_store (void *ptr, int idx, const double2 &value) |
template<> | |
__device__ __host__ void | vector_store (void *ptr, int idx, const float4 &value) |
template<> | |
__device__ __host__ void | vector_store (void *ptr, int idx, const float2 &value) |
template<> | |
__device__ __host__ void | vector_store (void *ptr, int idx, const short4 &value) |
template<> | |
__device__ __host__ void | vector_store (void *ptr, int idx, const short2 &value) |
template<> | |
__device__ __host__ void | vector_store (void *ptr, int idx, const char4 &value) |
template<> | |
__device__ __host__ void | vector_store (void *ptr, int idx, const char2 &value) |
template<> | |
__device__ __host__ void | vector_store (void *ptr, int idx, const short8 &value) |
template<> | |
__device__ __host__ void | vector_store (void *ptr, int idx, const char8 &value) |
template<class Field > | |
void | split_field (Field &collect_field, std::vector< Field * > &v_base_field, const CommKey &comm_key, QudaPCType pc_type=QUDA_4D_PC) |
template<class Field > | |
void | join_field (std::vector< Field * > &v_base_field, const Field &collect_field, const CommKey &comm_key, QudaPCType pc_type=QUDA_4D_PC) |
void | BuildStaggeredKahlerDiracInverse (GaugeField &Xinv, const cudaGaugeField &gauge, const double mass) |
Build the Kahler-Dirac inverse block for KD operators. More... | |
cudaGaugeField * | AllocateAndBuildStaggeredKahlerDiracInverse (const cudaGaugeField &gauge, const double mass, const QudaPrecision override_prec) |
Allocate and build the Kahler-Dirac inverse block for KD operators. More... | |
void | computeStaggeredOprod (GaugeField *out[], ColorSpinorField &in, const double coeff[], int nFace) |
Compute the outer-product field between the staggered quark field's one and (for HISQ and ASQTAD) three hop sites. E.g.,. More... | |
void | BlockOrthogonalize (ColorSpinorField &V, const std::vector< ColorSpinorField * > &B, const int *fine_to_coarse, const int *coarse_to_fine, const int *geo_bs, const int spin_bs, const int n_block_ortho) |
Block orthogonnalize the matrix field, where the blocks are defined by lookup tables that map the fine grid points to the coarse grid points, and similarly for the spin degrees of freedom. More... | |
void | Prolongate (ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &v, int Nvec, const int *fine_to_coarse, const int *const *spin_map, int parity=QUDA_INVALID_PARITY) |
Apply the prolongation operator. More... | |
void | Restrict (ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &v, int Nvec, const int *fine_to_coarse, const int *coarse_to_fine, const int *const *spin_map, int parity=QUDA_INVALID_PARITY) |
Apply the restriction operator. More... | |
void | StaggeredProlongate (ColorSpinorField &out, const ColorSpinorField &in, const int *fine_to_coarse, const int *const *spin_map, int parity=QUDA_INVALID_PARITY) |
Apply the unitary "prolongation" operator for Kahler-Dirac preconditioning. More... | |
void | StaggeredRestrict (ColorSpinorField &out, const ColorSpinorField &in, const int *fine_to_coarse, const int *const *spin_map, int parity=QUDA_INVALID_PARITY) |
Apply the unitary "restriction" operator for Kahler-Dirac preconditioning. More... | |
template<typename Arg > | |
void | transform_reduce (Arg &arg) |
template<typename Arg > | |
__launch_bounds__ (Arg::block_size) __global__ void transform_reduce_kernel(Arg arg) | |
template<typename reduce_t , typename T , typename I , typename transformer , typename reducer > | |
void | transform_reduce (QudaFieldLocation location, std::vector< reduce_t > &result, const std::vector< T * > &v, I n_items, transformer h, reduce_t init, reducer r) |
QUDA implementation providing thrust::transform_reduce like functionality. Improves upon thrust's implementation since a single kernel is used which writes the result directly to host memory, and is a batched implementation. More... | |
template<typename reduce_t , typename T , typename I , typename transformer , typename reducer > | |
reduce_t | transform_reduce (QudaFieldLocation location, const T *v, I n_items, transformer h, reduce_t init, reducer r) |
QUDA implementation providing thrust::transform_reduce like functionality. Improves upon thrust's implementation since a single kernel is used which writes the result directly to host memory. More... | |
template<typename reduce_t , typename T , typename I , typename transformer , typename reducer > | |
void | reduce (QudaFieldLocation location, std::vector< reduce_t > &result, const std::vector< T * > &v, I n_items, reduce_t init, reducer r) |
QUDA implementation providing thrust::reduce like functionality. Improves upon thrust's implementation since a single kernel is used which writes the result directly to host memory, and is a batched implementation. More... | |
template<typename reduce_t , typename T , typename I , typename reducer > | |
reduce_t | reduce (QudaFieldLocation location, const T *v, I n_items, reduce_t init, reducer r) |
QUDA implementation providing thrust::reduce like functionality. Improves upon thrust's implementation since a single kernel is used which writes the result directly to host memory. More... | |
const std::map< TuneKey, TuneParam > & | getTuneCache () |
Returns a reference to the tunecache map. More... | |
bool | activeTuning () |
query if tuning is in progress More... | |
void | loadTuneCache () |
void | saveTuneCache (bool error=false) |
void | saveProfile (const std::string label="") |
Save profile to disk. More... | |
void | flushProfile () |
Flush profile contents, setting all counts to zero. More... | |
TuneParam | tuneLaunch (Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity) |
void | postTrace_ (const char *func, const char *file, int line) |
Post an event in the trace, recording where it was posted. More... | |
void | enableProfileCount () |
Enable the profile kernel counting. More... | |
void | disableProfileCount () |
Disable the profile kernel counting. More... | |
void | setPolicyTuning (bool) |
Enable / disable whether are tuning a policy. More... | |
bool | policyTuning () |
Query whether we are currently tuning a policy. More... | |
void | setUberTuning (bool) |
Enable / disable whether we are tuning an uber kernel. More... | |
bool | uberTuning () |
Query whether we are tuning an uber kernel. More... | |
void | u32toa (char *buffer, uint32_t value) |
void | i32toa (char *buffer, int32_t value) |
void | u64toa (char *buffer, uint64_t value) |
void | i64toa (char *buffer, int64_t value) |
void | setUnitarizeLinksConstants (double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error) |
void | unitarizeLinksCPU (GaugeField &outfield, const GaugeField &infield) |
void | unitarizeLinks (GaugeField &outfield, const GaugeField &infield, int *fails) |
void | unitarizeLinks (GaugeField &outfield, int *fails) |
bool | isUnitary (const cpuGaugeField &field, double max_error) |
ColorSpinorParam | colorSpinorParam (const CloverField &a, bool inverse) |
std::ostream & | operator<< (std::ostream &out, const ColorSpinorField &a) |
void | copyGenericColorSpinorDD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorDS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorDH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorDQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorSD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorSS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorSH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorSQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorHD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorHS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorHH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorHQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorQD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorQS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorQH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorQQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGDD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGDS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGSD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGSS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGSH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGSQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGHS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGHH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGHQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGQS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGQH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGQQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericGaugeDoubleIn (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type) |
void | copyGenericGaugeSingleIn (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type) |
void | copyGenericGaugeHalfIn (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type) |
void | copyGenericGaugeQuarterIn (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type) |
void | copyGenericGaugeMG (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type) |
void | checkMomOrder (const GaugeField &u) |
void * | create_gauge_buffer (size_t bytes, QudaGaugeFieldOrder order, QudaFieldGeometry geometry) |
void ** | create_ghost_buffer (size_t bytes[], QudaGaugeFieldOrder order, QudaFieldGeometry geometry) |
void | free_gauge_buffer (void *buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry) |
void | free_ghost_buffer (void **buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry) |
std::ostream & | operator<< (std::ostream &out, const cudaColorSpinorField &a) |
ColorSpinorParam | colorSpinorParam (const GaugeField &a) |
void | printLaunchTimer () |
void | setDiracRefineParam (DiracParam &diracParam, QudaInvertParam *inv_param, const bool pc) |
void | setDiracPreParam (DiracParam &diracParam, QudaInvertParam *inv_param, const bool pc, bool comms) |
void | setDiracEigParam (DiracParam &diracParam, QudaInvertParam *inv_param, const bool pc, bool comms) |
void | massRescale (cudaColorSpinorField &b, QudaInvertParam ¶m, bool for_multishift) |
void | fillInnerSolveParam (SolverParam &inner, const SolverParam &outer) |
int | reliable (double &rNorm, double &maxrx, double &maxrr, const double &r2, const double &delta) |
template<int N> | |
void | compute_alpha_N (Complex *Q_AQandg, Complex *alpha) |
template<int N> | |
void | compute_beta_N (Complex *Q_AQandg, Complex *Q_AS, Complex *beta) |
template<libtype which_lib> | |
void | ComputeRitz (EigCGArgs &args) |
template<> | |
void | ComputeRitz< libtype::eigen_lib > (EigCGArgs &args) |
template<> | |
void | ComputeRitz< libtype::magma_lib > (EigCGArgs &args) |
double | timeInterval (struct timeval start, struct timeval end) |
void | computeBeta (Complex **beta, std::vector< ColorSpinorField * > Ap, int i, int N, int k) |
void | updateAp (Complex **beta, std::vector< ColorSpinorField * > Ap, int begin, int size, int k) |
void | orthoDir (Complex **beta, std::vector< ColorSpinorField * > Ap, int k, int pipeline) |
void | backSubs (const Complex *alpha, Complex **const beta, const double *gamma, Complex *delta, int n) |
void | updateSolution (ColorSpinorField &x, const Complex *alpha, Complex **const beta, double *gamma, int k, std::vector< ColorSpinorField * > p) |
template<libtype which_lib> | |
void | ComputeHarmonicRitz (GMResDRArgs &args) |
template<> | |
void | ComputeHarmonicRitz< libtype::magma_lib > (GMResDRArgs &args) |
template<> | |
void | ComputeHarmonicRitz< libtype::eigen_lib > (GMResDRArgs &args) |
template<libtype which_lib> | |
void | ComputeEta (GMResDRArgs &args) |
template<> | |
void | ComputeEta< libtype::magma_lib > (GMResDRArgs &args) |
template<> | |
void | ComputeEta< libtype::eigen_lib > (GMResDRArgs &args) |
void | fillFGMResDRInnerSolveParam (SolverParam &inner, const SolverParam &outer) |
void | print (const double d[], int n) |
void | updateAlphaZeta (double *alpha, double *zeta, double *zeta_old, const double *r2, const double *beta, const double pAp, const double *offset, const int nShift, const int j_low) |
size_t | managed_allocated () |
size_t | managed_allocated_peak () |
void | qudaFuncSetAttribute_ (const void *kernel, cudaFuncAttribute attr, int value, const char *func, const char *file, const char *line) |
Wrapper around cudaFuncSetAttribute with built-in error checking. More... | |
void | qudaFuncGetAttributes_ (cudaFuncAttributes &attr, const void *kernel, const char *func, const char *file, const char *line) |
Wrapper around cudaFuncGetAttributes with built-in error checking. More... | |
int | traceEnabled () |
void | setTransferGPU (bool) |
Variables | |
const int | Nstream = 9 |
qudaStream_t * | stream |
Here we detail how the MMA kernels for computeUV and computeVUV should be launched. Specifically:
Here we detail how the MMA kernels for computeYhat should be launched. Specifically:
This file contains deinitions required when compiling with C++14. Without these, we can end up with undefined references at link time. We can remove this file when we jump to C++17 and declare these are inline variables in instantiate.h.
Generic Multi Shift Solver
For staggered, the mass is folded into the dirac operator Otherwise the matrix mass is 'unmodified'.
The lowest offset is in offsets[0]
using quda::ColorSpinorFieldSet = typedef ColorSpinorField |
Definition at line 1352 of file invert_quda.h.
typedef std::complex<double> quda::Complex |
Definition at line 86 of file quda_internal.h.
typedef std::vector<ColorSpinorField*> quda::CompositeColorSpinorField |
Typedef for a set of spinors. Can be further divided into subsets ,e.g., with different precisions (not implemented currently)
Definition at line 71 of file color_spinor_field.h.
typedef struct curandStateMRG32k3a quda::cuRNGState |
Definition at line 1 of file random_quda.h.
typedef MatrixXcd quda::DenseMatrix |
Definition at line 32 of file inv_eigcg_quda.cpp.
typedef Stride< Dynamic, Dynamic > quda::DynamicStride |
Definition at line 17 of file deflation.cpp.
using quda::mgarray = typedef std::array<T, QUDA_MAX_MG_LEVEL> |
Definition at line 12 of file command_line_params.h.
using quda::RealVector = typedef VectorXd |
Definition at line 35 of file inv_eigcg_quda.cpp.
typedef Matrix< Complex, Dynamic, Dynamic, RowMajor > quda::RowMajorDenseMatrix |
Definition at line 38 of file inv_eigcg_quda.cpp.
typedef VectorXcd quda::Vector |
Definition at line 34 of file inv_eigcg_quda.cpp.
typedef MatrixXcd quda::VectorSet |
Definition at line 33 of file inv_eigcg_quda.cpp.
enum quda::AllocType |
Enumerator | |
---|---|
DEVICE | |
DEVICE_PINNED | |
HOST | |
PINNED | |
MAPPED | |
MANAGED | |
SHMEM | |
N_ALLOC_TYPE | |
DEVICE | |
DEVICE_PINNED | |
HOST | |
PINNED | |
MAPPED | |
MANAGED | |
N_ALLOC_TYPE |
Definition at line 22 of file malloc.cpp.
enum quda::AllocType |
Enumerator | |
---|---|
DEVICE | |
DEVICE_PINNED | |
HOST | |
PINNED | |
MAPPED | |
MANAGED | |
SHMEM | |
N_ALLOC_TYPE | |
DEVICE | |
DEVICE_PINNED | |
HOST | |
PINNED | |
MAPPED | |
MANAGED | |
N_ALLOC_TYPE |
Definition at line 20 of file malloc.cpp.
The following code is based on Kate's worker class in Multi-CG.
This worker class is used to update most of the u and r vectors. On BiCG iteration j, r[0] through r[j] and u[0] through u[j] all get updated, but the subsequent mat-vec operation only gets applied to r[j] and u[j]. Thus, we can hide updating r[0] through r[j-1] and u[0] through u[j-1], respectively, in the comms for the matvec on r[j] and u[j]. This results in improved strong scaling for BiCGstab-L.
See paragraphs 2 and 3 in the comments on the Worker class in Multi-CG for more remarks.
Enumerator | |
---|---|
BICGSTABL_UPDATE_U | |
BICGSTABL_UPDATE_R |
Definition at line 168 of file inv_bicgstabl_quda.cpp.
enum quda::blockType |
Enumerator | |
---|---|
PENCIL | |
LOWER_TRI | |
UPPER_TRI |
Definition at line 12 of file eigensolve_quda.h.
|
strong |
Enumerator | |
---|---|
BOTH_CLOVER_PREFETCH_TYPE | |
CLOVER_CLOVER_PREFETCH_TYPE | |
INVERSE_CLOVER_PREFETCH_TYPE | |
INVALID_CLOVER_PREFETCH_TYPE |
Definition at line 28 of file clover_field.h.
enum quda::Dslash5Type |
Enumerator | |
---|---|
DSLASH5_DWF | |
DSLASH5_MOBIUS_PRE | |
DSLASH5_MOBIUS | |
M5_INV_DWF | |
M5_INV_MOBIUS | |
M5_INV_ZMOBIUS | |
M5_EOFA | |
M5INV_EOFA |
Definition at line 557 of file dslash_quda.h.
|
strong |
Enumerator | |
---|---|
eigen_lib | |
magma_lib | |
lapack_lib | |
mkl_lib | |
eigen_lib | |
magma_lib | |
lapack_lib | |
mkl_lib |
Definition at line 42 of file inv_eigcg_quda.cpp.
|
strong |
Enumerator | |
---|---|
eigen_lib | |
magma_lib | |
lapack_lib | |
mkl_lib | |
eigen_lib | |
magma_lib | |
lapack_lib | |
mkl_lib |
Definition at line 52 of file inv_gmresdr_quda.cpp.
|
strong |
Applying the following five kernels in the order of 4-0-1-2-3 is equivalent to applying the full even-odd preconditioned symmetric MdagM operator: op = (1 - M5inv * D4 * D5pre * M5inv * D4 * D5pre)^dag (1 - M5inv * D4 * D5pre * M5inv * D4 * D5pre)
Enumerator | |
---|---|
D4_D5INV_D5PRE | |
D4_D5INV_D5INVDAG | |
D4DAG_D5PREDAG_D5INVDAG | |
D4DAG_D5PREDAG | |
D5PRE |
Definition at line 574 of file dslash_quda.h.
enum quda::MemoryLocation |
Enumerator | |
---|---|
Device | |
Host | |
Remote | |
Shmem |
Definition at line 50 of file color_spinor_field.h.
|
strong |
Enumerator | |
---|---|
COLLECT | |
DISPERSE |
Definition at line 46 of file lattice_field.h.
quda::__launch_bounds__ | ( | Arg::block_size | ) |
Definition at line 74 of file transform_reduce.h.
|
inline |
Definition at line 1066 of file complex_quda.h.
|
inline |
Definition at line 1061 of file complex_quda.h.
|
inline |
Returns the magnitude of z.
Definition at line 1056 of file complex_quda.h.
|
inline |
Definition at line 125 of file complex_quda.h.
|
inline |
Definition at line 1270 of file complex_quda.h.
|
inline |
Definition at line 61 of file complex_quda.h.
|
inline |
Definition at line 1291 of file complex_quda.h.
bool quda::activeTuning | ( | ) |
cudaGaugeField* quda::AllocateAndBuildStaggeredKahlerDiracInverse | ( | const cudaGaugeField & | gauge, |
const double | mass, | ||
const QudaPrecision | override_prec | ||
) |
Allocate and build the Kahler-Dirac inverse block for KD operators.
[in] | in | gauge original fine gauge field |
[in] | in | mass the mass of the original staggered operator w/out factor of 2 convention |
[in] | in | precision of Xinv field |
void quda::APEStep | ( | GaugeField & | dataDs, |
GaugeField & | dataOr, | ||
double | alpha | ||
) |
Apply APE smearing to the gauge field.
[out] | dataDs | Output smeared field |
[in] | dataOr | Input gauge field |
[in] | alpha | smearing parameter |
void quda::ApplyClover | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const CloverField & | clover, | ||
bool | inverse, | ||
int | parity | ||
) |
Apply clover-matrix field to a color-spinor field.
[out] | out | Result color-spinor field |
[in] | in | Input color-spinor field |
[in] | clover | Clover-matrix field |
[in] | inverse | Whether we are applying the inverse or not |
[in] | Field | parity (if color-spinor field is single parity) |
void quda::ApplyCoarse | ( | ColorSpinorField & | out, |
const ColorSpinorField & | inA, | ||
const ColorSpinorField & | inB, | ||
const GaugeField & | Y, | ||
const GaugeField & | X, | ||
double | kappa, | ||
int | parity = QUDA_INVALID_PARITY , |
||
bool | dslash = true , |
||
bool | clover = true , |
||
bool | dagger = false , |
||
const int * | commDim = 0 , |
||
QudaPrecision | halo_precision = QUDA_INVALID_PRECISION |
||
) |
Apply the coarse dslash stencil. This single driver accounts for all variations with and without the clover field, with and without dslash, and both single and full parity fields.
[out] | out | The result vector |
[in] | inA | The first input vector |
[in] | inB | The second input vector |
[in] | Y | Coarse link field |
[in] | X | Coarse clover field |
[in] | kappa | Scaling parameter |
[in] | parity | Parity of the field (if single parity) |
[in] | dslash | Are we applying dslash? |
[in] | clover | Are we applying clover? |
[in] | dagger | Apply dagger operator? |
[in] | commDim | Which dimensions are partitioned? |
[in] | halo_precision | What precision to use for the halos (if QUDA_INVALID_PRECISION, use field precision) |
void quda::ApplyCovDev | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
int | mu, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the covariant derivative.
out = U * in
where U is the gauge field in a particular direction.
This operator can be applied to both single parity (checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the covariant derivative |
[in] | mu | Direction of the derivative. For mu > 3 it goes backwards |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
void quda::ApplyDomainWall4D | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
double | a, | ||
double | m_5, | ||
const Complex * | b_5, | ||
const Complex * | c_5, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the batched Wilson 4-d stencil to a 5-d vector with 4-d preconditioned data order.
out = D * in
where D is the gauged Wilson linear operator.
If a is non-zero, the operation is given by out = x + a * D in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | a | Scale factor applied |
[in] | m_5 | Wilson mass shift |
[in] | b_5 | Mobius coefficient array (length Ls) |
[in] | c_5 | Mobius coefficient array (length Ls) |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
void quda::ApplyDomainWall5D | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
double | a, | ||
double | m_f, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the Domain-wall 5-d stencil to a 5-d vector with 5-d preconditioned data order.
out = D_5 * in
where D_5 is the 5-d wilson linear operator with fifth dimension boundary condition set by the fermion mass.
If a is non-zero, the operation is given by out = x + a * D_5 in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | a | Scale factor applied (typically -kappa_5) |
[in] | m_f | Fermion mass parameter |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
void quda::ApplyDslash5 | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const ColorSpinorField & | x, | ||
double | m_f, | ||
double | m_5, | ||
const Complex * | b_5, | ||
const Complex * | c_5, | ||
double | a, | ||
bool | dagger, | ||
Dslash5Type | type | ||
) |
Apply either the domain-wall / mobius Dslash5 operator or the M5 inverse operator. In the current implementation, it is expected that the color-spinor fields are 4-d preconditioned.
[out] | out | Result color-spinor field |
[in] | in | Input color-spinor field |
[in] | x | Auxilary input color-spinor field |
[in] | m_f | Fermion mass parameter |
[in] | m_5 | Wilson mass shift |
[in] | b_5 | Mobius coefficient array (length Ls) |
[in] | c_5 | Mobius coefficient array (length Ls) |
[in] | a | Scale factor use in xpay operator |
[in] | dagger | Whether this is for the dagger operator |
[in] | type | Type of dslash we are applying |
void quda::applyGaugePhase | ( | GaugeField & | u | ) |
Apply the staggered phase factor to the gauge field.
[in] | u | The gauge field to which we apply the staggered phase factors |
void quda::ApplyImprovedStaggered | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
const GaugeField & | L, | ||
double | a, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Apply the improved staggered dslash operator to a color-spinor field.
[out] | out | Result color-spinor field |
[in] | in | Input color-spinor field |
[in] | U | Gauge-Link (1-link or fat-link) |
[in] | L | Long-Links for asqtad |
[in] | a | xpay parameter (set to 0.0 for non-xpay version) |
[in] | x | Vector field we accumulate onto to |
[in] | parity | parity parameter |
[in] | dagger | Whether we are applying the dagger or not |
[in] | improved | whether to apply the standard-staggered (false) or asqtad (true) operator |
void quda::ApplyLaplace | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
int | dir, | ||
double | a, | ||
double | b, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the Laplace stencil.
out = - kappa * A * in
where A is the gauge laplace linear operator.
If x is defined, the operation is given by out = x - kappa * A in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the gauge Laplace |
[in] | dir | Direction of the derivative 0,1,2,3 to omit (-1 is full 4D) |
[in] | a | Scale factor applied to derivative |
[in] | b | Scale factor applied to aux field |
[in] | x | Vector field we accumulate onto to |
void quda::ApplyNdegTwistedMass | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
double | a, | ||
double | b, | ||
double | c, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the non-degenerate twisted-mass stencil.
out = a * D * in + (1 + i*b*gamma_5*tau_3 + c*tau_1) * x
where D is the gauged Wilson linear operator. The quark fields out, in and x are five dimensional, with the fifth dimension corresponding to the flavor dimension. The convention is that the first 4-d slice (s=0) corresponds to the positive twist and the second slice (s=1) corresponds to the negative twist.
This operator can be applied to both single parity (4d checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | a | Scale factor applied to Wilson term (typically -kappa) |
[in] | b | Chiral twist factor applied (typically 2*mu*kappa) |
[in] | c | Flavor twist factor applied (typically -2*epsilon*kappa) |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
void quda::ApplyNdegTwistedMassPreconditioned | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
double | a, | ||
double | b, | ||
double | c, | ||
bool | xpay, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
bool | asymmetric, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the preconditioned non-degenerate twisted-mass stencil.
out = a * (1 + i*b*gamma_5*tau_3 + c*tau_1) * D * in + x
where D is the gauged Wilson linear operator. The quark fields out, in and x are five dimensional, with the fifth dimension corresponding to the flavor dimension. The convention is that the first 4-d slice (s=0) corresponds to the positive twist and the second slice (s=1) corresponds to the negative twist.
This operator can (at present) be applied to only single parity (checker-boarded) fields.
For the dagger operator, we generally apply the conjugate transpose operator
out = x + D^\dagger A^{-\dagger}
with the additional asymmetric special case, where we apply do not transpose the order of operations
out = A^{-\dagger} D^\dagger (no xpay term)
This variant is required when have the asymmetric preconditioned operator and require the preconditioned twist term to remain in between the applications of D. This would be combined with a subsequent non-preconditioned dagger operator, A*x - kappa^2 D, to form the full operator.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | a | Scale factor applied to Wilson term (typically -kappa^2/(1 + b*b -c*c) ) |
[in] | b | Chiral twist factor applied (typically -2*mu*kappa) |
[in] | c | Flavor twist factor applied (typically 2*epsilon*kappa) |
[in] | xpay | Whether to do xpay or not |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | asymmetric | Whether this is for the asymmetric preconditioned dagger operator (a*(1 - i*b*gamma_5) * D^dagger * in) |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
void quda::ApplyStaggered | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
double | a, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Apply the staggered dslash operator to a color-spinor field.
[out] | out | Result color-spinor field |
[in] | in | Input color-spinor field |
[in] | U | Gauge-Link (1-link or fat-link) |
[in] | a | xpay parameter (set to 0.0 for non-xpay version) |
[in] | x | Vector field we accumulate onto to |
[in] | parity | parity parameter |
[in] | dagger | Whether we are applying the dagger or not |
[in] | improved | whether to apply the standard-staggered (false) or asqtad (true) operator |
void quda::ApplyStaggeredKahlerDiracInverse | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | Xinv, | ||
bool | dagger | ||
) |
Apply the (improved) staggered Kahler-Dirac inverse block to a color-spinor field.
[out] | out | Result color-spinor field |
[in] | in | Input color-spinor field |
[in] | Xinv | Kahler-Dirac inverse field |
[in] | dagger | Whether we are applying the dagger or not |
void quda::ApplyTwistClover | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const CloverField & | clover, | ||
double | kappa, | ||
double | mu, | ||
double | epsilon, | ||
int | parity, | ||
int | dagger, | ||
QudaTwistGamma5Type | twist | ||
) |
Apply twisted clover-matrix field to a color-spinor field.
[out] | out | Result color-spinor field |
[in] | in | Input color-spinor field |
[in] | clover | Clover-matrix field |
[in] | kappa | kappa parameter |
[in] | mu | mu parameter |
[in] | epsilon | epsilon parameter |
[in] | Field | parity (if color-spinor field is single parity) |
[in] | dagger | Whether we are applying the dagger or not |
[in] | twist | The type of kernel we are doing if (twist == QUDA_TWIST_GAMMA5_DIRECT) apply (Clover + i*a*gamma_5) to the input spinor else if (twist == QUDA_TWIST_GAMMA5_INVERSE) apply (Clover + i*a*gamma_5)/(Clover^2 + a^2) to the input spinor |
void quda::ApplyTwistedClover | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
const CloverField & | C, | ||
double | a, | ||
double | b, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the twisted-clover stencil.
out = a * D * in + (C + i*b*gamma_5) * x
where D is the gauged Wilson linear operator, and C is the clover field.
This operator can be applied to both single parity (4d checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | C | The clover field used for the operator |
[in] | a | Scale factor applied to Wilson term (typically -kappa) |
[in] | b | Chiral twist factor applied (typically 2*mu*kappa) |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
void quda::ApplyTwistedCloverPreconditioned | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
const CloverField & | C, | ||
double | a, | ||
double | b, | ||
bool | xpay, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the preconditioned twisted-clover stencil.
out = a * (C + i*b*gamma_5)^{-1} * D * in + x = a * C^{-2} (C - i*b*gamma_5) * D * in + x = A^{-1} * D * in + x
where D is the gauged Wilson linear operator and C is the clover field. This operator can (at present) be applied to only single parity (checker-boarded) fields. When the dagger operator is requested, we do not transpose the order of operations, e.g.
out = A^{-\dagger} D^\dagger (no xpay term)
Although not a conjugate transpose of the regular operator, this variant is used to enable kernel fusion between the application of D and the subsequent application of A, e.g., in the symmetric dagger operator we need to apply
M = (1 - kappa^2 D^{\dagger} A^{-\dagger} D{^\dagger} A^{-\dagger} )
and since cannot fuse D{^\dagger} A^{-\dagger}, we instead fused A^{-\dagger} D{^\dagger}.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | C | The clover field used for the operator |
[in] | a | Scale factor applied to Wilson term ( typically 1 / (1 + b*b) or kappa^2 / (1 + b*b) ) |
[in] | b | Twist factor applied (typically -2*kappa*mu) |
[in] | xpay | Whether to do xpay or not |
[in] | x | Vector field we accumulate onto to when xpay is true |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
void quda::ApplyTwistedMass | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
double | a, | ||
double | b, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
void quda::ApplyTwistedMassPreconditioned | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
double | a, | ||
double | b, | ||
bool | xpay, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
bool | asymmetric, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the preconditioned twisted-mass stencil.
out = a*(1 + i*b*gamma_5) * D * in + x
where D is the gauged Wilson linear operator. This operator can (at present) be applied to only single parity (checker-boarded) fields. For the dagger operator, we generally apply the conjugate transpose operator
out = x + D^\dagger A^{-\dagger}
with the additional asymmetric special case, where we apply do not transpose the order of operations
out = A^{-\dagger} D^\dagger (no xpay term)
This variant is required when have the asymmetric preconditioned operator and require the preconditioned twist term to remain in between the applications of D. This would be combined with a subsequent non-preconditioned dagger operator, A*x - kappa^2 D, to form the full operator.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | a | Scale factor applied to Wilson term ( typically kappa^2 / (1 + b*b) ) |
[in] | b | Twist factor applied (typically -2*kappa*mu) |
[in] | xpay | Whether to do xpay or not |
[in] | x | Vector field we accumulate onto to when xpay is true |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | asymmetric | Whether this is for the asymmetric preconditioned dagger operator (a*(1 - i*b*gamma_5) * D^dagger * in) |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
void quda::ApplyTwistGamma | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
int | d, | ||
double | kappa, | ||
double | mu, | ||
double | epsilon, | ||
int | dagger, | ||
QudaTwistGamma5Type | type | ||
) |
Apply the twisted-mass gamma operator to a color-spinor field.
[out] | out | Result color-spinor field |
[in] | in | Input color-spinor field |
[in] | d | Which gamma matrix we are applying (C counting, so gamma_5 has d=4) |
[in] | kappa | kappa parameter |
[in] | mu | mu parameter |
[in] | epsilon | epsilon parameter |
[in] | dagger | Whether we are applying the dagger or not |
[in] | twist | The type of kernel we are doing |
void quda::applyU | ( | GaugeField & | force, |
GaugeField & | U | ||
) |
Left multiply the force field by the gauge field
force = U * force
force | Force field |
U | Gauge field |
void quda::ApplyWilson | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
double | kappa, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the Wilson stencil.
out = D * in
where D is the gauged Wilson linear operator.
If kappa is non-zero, the operation is given by out = x + kappa * D in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | kappa | Scale factor applied |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
void quda::ApplyWilsonClover | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
const CloverField & | A, | ||
double | kappa, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the Wilson-clover stencil.
out = A * x + kappa * D * in
where D is the gauged Wilson linear operator.
This operator can be applied to both single parity (checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | Input field that D is applied to |
[in] | x | Input field that A is applied to |
[in] | U | The gauge field used for the operator |
[in] | A | The clover field used for the operator |
[in] | kappa | Scale factor applied |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
void quda::ApplyWilsonCloverHasenbuschTwist | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
const CloverField & | A, | ||
double | kappa, | ||
double | mu, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the Wilson-clover stencil.
out = A * x + kappa * D * in
where D is the gauged Wilson linear operator.
This operator can be applied to both single parity (checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | Input field that D is applied to |
[in] | x | Input field that A is applied to |
[in] | U | The gauge field used for the operator |
[in] | A | The clover field used for the operator |
[in] | kappa | Scale factor applied |
[in] | mu | Twist factor |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
void quda::ApplyWilsonCloverHasenbuschTwistPCClovInv | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
const CloverField & | A, | ||
double | kappa, | ||
double | mu, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the twisted-mass stencil.
out = a * D * in + (1 + i*b*gamma_5) * x
where D is the gauged Wilson linear operator.
This operator can be applied to both single parity (checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | a | Scale factor applied to Wilson term (typically -kappa) |
[in] | b | Twist factor applied (typically 2*mu*kappa) |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
Driver for applying the Wilson-clover with twist for Hasenbusch
out = (1 +/- ig5 b A) * x + kappa * A^{-1}D * in
where D is the gauged Wilson linear operator.
This operator can be applied to both single parity (checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | Input field that D is applied to |
[in] | x | Input field that A is applied to |
[in] | U | The gauge field used for the operator |
[in] | A | The clover field used for the operator |
[in] | kappa | Scale factor applied |
[in] | b | Twist factor applied |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
void quda::ApplyWilsonCloverHasenbuschTwistPCNoClovInv | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
const CloverField & | A, | ||
double | kappa, | ||
double | mu, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the Wilson-clover stencil with thist for Hasenbusch.
out = (1 +/- ig5 b A) * x + kappa * D * in
where D is the gauged Wilson linear operator.
This operator can be applied to both single parity (checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | Input field that D is applied to |
[in] | x | Input field that A is applied to |
[in] | U | The gauge field used for the operator |
[in] | A | The clover field used for the operator |
[in] | kappa | Scale factor applied |
[in] | b | Twist factor applied |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
void quda::ApplyWilsonCloverPreconditioned | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
const CloverField & | A, | ||
double | kappa, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the preconditioned Wilson-clover stencil.
out = A^{-1} * D * in + x
where D is the gauged Wilson linear operator and A is the clover field. This operator can (at present) be applied to only single parity (checker-boarded) fields. When the dagger operator is requested, we do not transpose the order of operations, e.g.
out = A^{-\dagger} D^\dagger (no xpay term)
Although not a conjugate transpose of the regular operator, this variant is used to enable kernel fusion between the application of D and the subsequent application of A, e.g., in the symmetric dagger operator we need to apply
M = (1 - kappa^2 D^{\dagger} A^{-1} D{^\dagger} A^{-1} )
and since cannot fuse D{^\dagger} A^{-\dagger}, we instead fused A^{-\dagger} D{^\dagger}.
If kappa is non-zero, the operation is given by out = x + kappa * A^{-1} D in. This operator can (at present) be applied to only single parity (checker-boarded) fields.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | A | The clover field used for the operator |
[in] | kappa | Scale factor applied |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
|
inline |
Definition at line 1082 of file complex_quda.h.
|
inline |
Definition at line 1077 of file complex_quda.h.
|
inline |
Returns the phase angle of z.
Definition at line 1072 of file complex_quda.h.
void quda::arpack_solve | ( | std::vector< ColorSpinorField * > & | h_evecs, |
std::vector< Complex > & | h_evals, | ||
const DiracMatrix & | mat, | ||
QudaEigParam * | eig_param, | ||
TimeProfile & | profile | ||
) |
The QUDA interface function. One passes two allocated arrays to hold the the eigenmode data, the problem matrix, the arpack parameters defining what problem is to be solves, and a container for QUDA data structure types.
[out] | h_evecs | Host fields where the e-vectors will be copied to |
[out] | h_evals | Where the e-values will be copied to |
[in] | mat | An explicit construction of the problem matrix. |
[in] | param | Parameter container defining the how the matrix is to be solved. |
[in] | eig_param | Parameter structure for all QUDA eigensolvers |
[in,out] | profile | TimeProfile instance used for profiling |
Definition at line 507 of file quda_arpack_interface.cpp.
|
inline |
Definition at line 1277 of file complex_quda.h.
|
inline |
Definition at line 66 of file complex_quda.h.
|
inline |
Definition at line 1316 of file complex_quda.h.
void quda::assertAllMemFree | ( | ) |
Definition at line 549 of file malloc.cpp.
|
inline |
Definition at line 1284 of file complex_quda.h.
|
inline |
Definition at line 71 of file complex_quda.h.
|
inline |
Definition at line 76 of file complex_quda.h.
|
inline |
Definition at line 1340 of file complex_quda.h.
|
inline |
Definition at line 1322 of file complex_quda.h.
void quda::ax | ( | const double & | a, |
GaugeField & | u | ||
) |
Scale the gauge field by the scalar a.
[in] | a | scalar multiplier |
[in] | u | The gauge field we want to multiply |
Definition at line 339 of file gauge_field.cpp.
void quda::backSubs | ( | const Complex * | alpha, |
Complex **const | beta, | ||
const double * | gamma, | ||
Complex * | delta, | ||
int | n | ||
) |
Definition at line 136 of file inv_gcr_quda.cpp.
void quda::BlockOrthogonalize | ( | ColorSpinorField & | V, |
const std::vector< ColorSpinorField * > & | B, | ||
const int * | fine_to_coarse, | ||
const int * | coarse_to_fine, | ||
const int * | geo_bs, | ||
const int | spin_bs, | ||
const int | n_block_ortho | ||
) |
Block orthogonnalize the matrix field, where the blocks are defined by lookup tables that map the fine grid points to the coarse grid points, and similarly for the spin degrees of freedom.
[in,out] | V | Matrix field to be orthgonalized |
[in] | B | input vectors |
[in] | geo_bs | Geometric block size |
[in] | fine_to_coarse | Fine-to-coarse lookup table (linear indices) |
[in] | coarse_to_fine | Coarse-to-fine lookup table (linear indices) |
[in] | spin_bs | Spin block size |
[in] | n_block_ortho | Number of times to Gram-Schmidt |
void quda::BuildStaggeredKahlerDiracInverse | ( | GaugeField & | Xinv, |
const cudaGaugeField & | gauge, | ||
const double | mass | ||
) |
Build the Kahler-Dirac inverse block for KD operators.
[out] | out | Xinv resulting Kahler-Dirac inverse (assumed allocated) |
[in] | in | gauge original fine gauge field |
[in] | in | mass the mass of the original staggered operator w/out factor of 2 convention |
void quda::calculateYhat | ( | GaugeField & | Yhat, |
GaugeField & | Xinv, | ||
const GaugeField & | Y, | ||
const GaugeField & | X, | ||
bool | use_mma = false |
||
) |
Calculate preconditioned coarse links and coarse clover inverse field.
Yhat[out] | Preconditioned coarse link field |
Xinv[out] | Coarse clover inverse field |
Y[in] | Coarse link field |
X[in] | Coarse clover field |
use_mma[in] | Whether or not use MMA (tensor core) to do the calculation, default to false |
bool quda::canReuseResidentGauge | ( | QudaInvertParam * | inv_param | ) |
Check that the resident gauge field is compatible with the requested inv_param
inv_param | Contains all metadata regarding host and device storage |
Definition at line 2173 of file interface_quda.cpp.
void quda::checkMomOrder | ( | const GaugeField & | u | ) |
Definition at line 22 of file copy_gauge.cpp.
uint64_t quda::Checksum | ( | const GaugeField & | u, |
bool | mini = false |
||
) |
Compute XOR-based checksum of this gauge field: each gauge field entry is converted to type uint64_t, and compute the cummulative XOR of these values.
[in] | mini | Whether to compute a mini checksum or global checksum. A mini checksum only computes over a subset of the lattice sites and is to be used for online comparisons, e.g., checking a field has changed with a global update algorithm. |
void quda::cloverDerivative | ( | cudaGaugeField & | force, |
cudaGaugeField & | gauge, | ||
cudaGaugeField & | oprod, | ||
double | coeff, | ||
QudaParity | parity | ||
) |
Compute the derivative of the clover matrix in the direction mu,nu and compute the resulting force given the outer-product field.
force | The computed force field (read/write update) |
gauge | The input gauge field |
oprod | The input outer-product field (tensor matrix field) |
coeff | Multiplicative coefficient (e.g., clover coefficient) |
parity | The field parity we are working on |
void quda::cloverInvert | ( | CloverField & | clover, |
bool | computeTraceLog | ||
) |
This function compute the Cholesky decomposition of each clover matrix and stores the clover inverse field.
clover | The clover field (contains both the field itself and its inverse) |
computeTraceLog | Whether to compute the trace logarithm of the clover term |
void quda::cloverRho | ( | CloverField & | clover, |
double | rho | ||
) |
This function adds a real scalar onto the clover diagonal (only to the direct field not the inverse)
clover | The clover field |
rho | Real scalar to be added on |
|
inline |
Definition at line 1368 of file complex_quda.h.
|
inline |
Definition at line 1357 of file complex_quda.h.
void quda::CoarseCoarseOp | ( | GaugeField & | Y, |
GaugeField & | X, | ||
const Transfer & | T, | ||
const GaugeField & | gauge, | ||
const GaugeField & | clover, | ||
const GaugeField & | cloverInv, | ||
double | kappa, | ||
double | mass, | ||
double | mu, | ||
double | mu_factor, | ||
QudaDiracType | dirac, | ||
QudaMatPCType | matpc, | ||
bool | need_bidirectional, | ||
bool | use_mma = false |
||
) |
Coarse operator construction from an intermediate-grid operator (Coarse)
Y[out] | Coarse link field |
X[out] | Coarse clover field |
T[in] | Transfer operator that defines the new coarse space |
gauge[in] | Link field from fine grid |
clover[in] | Clover field on fine grid |
cloverInv[in] | Clover inverse field on fine grid |
kappa[in] | Kappa parameter |
mass[in] | Mass parameter |
mu[in] | Mu parameter (set to non-zero for twisted-mass/twisted-clover) |
mu_factor[in] | Multiplicative factor for the mu parameter |
matpc[in] | The type of even-odd preconditioned fine-grid operator we are constructing the coarse grid operator from. If matpc==QUDA_MATPC_INVALID then we assume the operator is not even-odd preconditioned and we coarsen the full operator. |
need_bidirectional[in] | Whether or not we need to force a bi-directional build, even if the given level isn't preconditioned—if any previous level is preconditioned, we've violated that symmetry. |
use_mma[in] | Whether or not use MMA (tensor core) to do the calculation, default to false |
void quda::CoarseOp | ( | GaugeField & | Y, |
GaugeField & | X, | ||
const Transfer & | T, | ||
const cudaGaugeField & | gauge, | ||
const cudaCloverField * | clover, | ||
double | kappa, | ||
double | mass, | ||
double | mu, | ||
double | mu_factor, | ||
QudaDiracType | dirac, | ||
QudaMatPCType | matpc | ||
) |
Coarse operator construction from a fine-grid operator (Wilson / Clover)
Y[out] | Coarse link field |
X[out] | Coarse clover field |
T[in] | Transfer operator that defines the coarse space |
gauge[in] | Gauge field from fine grid |
clover[in] | Clover field on fine grid (optional) |
kappa[in] | Kappa parameter |
mass[in] | Mass parameter |
mu[in] | Mu parameter (set to non-zero for twisted-mass/twisted-clover) |
mu_factor[in] | Multiplicative factor for the mu parameter |
matpc[in] | The type of even-odd preconditioned fine-grid operator we are constructing the coarse grid operator from. If matpc==QUDA_MATPC_INVALID then we assume the operator is not even-odd preconditioned and we coarsen the full operator. |
|
inline |
Compute the color contraction over color at spin s dot = \sum_s,c a(s,c) * b(s,c)
a | Left-hand side ColorSpinor |
b | Right-hand side ColorSpinor |
Definition at line 930 of file color_spinor.h.
ColorSpinorParam quda::colorSpinorParam | ( | const CloverField & | a, |
bool | inverse | ||
) |
Definition at line 460 of file clover_field.cpp.
ColorSpinorParam quda::colorSpinorParam | ( | const GaugeField & | a | ) |
Definition at line 296 of file gauge_field.cpp.
|
inline |
Helper function for setting auxilary string.
[in] | meta | LatticeField used for querying field location |
Definition at line 839 of file lattice_field.h.
void quda::completeKSForce | ( | GaugeField & | mom, |
const GaugeField & | oprod, | ||
const GaugeField & | gauge, | ||
QudaFieldLocation | location, | ||
long long * | flops = NULL |
||
) |
Definition at line 298 of file inv_ca_cg.cpp.
Definition at line 372 of file inv_ca_cg.cpp.
void quda::computeBeta | ( | Complex ** | beta, |
std::vector< ColorSpinorField * > | Ap, | ||
int | i, | ||
int | N, | ||
int | k | ||
) |
Definition at line 63 of file inv_gcr_quda.cpp.
void quda::computeClover | ( | CloverField & | clover, |
const GaugeField & | fmunu, | ||
double | coeff | ||
) |
Driver for computing the clover field from the field strength tensor.
[out] | clover | Compute clover field |
[in] | fmunu | Field strength tensor |
[in] | coefft | Clover coefficient |
void quda::computeCloverForce | ( | GaugeField & | force, |
const GaugeField & | U, | ||
std::vector< ColorSpinorField * > & | x, | ||
std::vector< ColorSpinorField * > & | p, | ||
std::vector< double > & | coeff | ||
) |
Compute the force contribution from the solver solution fields.
Force(x, mu) = U(x, mu) * sum_i=1^nvec ( P_mu^+ x(x+mu) p(x)^\dag + P_mu^- p(x+mu) x(x)^\dag )
M = A_even - kappa^2 * Dslash * A_odd^{-1} * Dslash x(even) = M^{-1} b(even) x(odd) = A_odd^{-1} * Dslash * x(even) p(even) = M * x(even) p(odd) = A_odd^{-1} * Dslash^dag * M * x(even).
force[out,in] | The resulting force field |
U | The input gauge field |
x | Solution field (both parities) |
p | Intermediate vectors (both parities) |
coeff | Multiplicative coefficient (e.g., dt * residue) |
void quda::computeCloverSigmaOprod | ( | GaugeField & | oprod, |
std::vector< ColorSpinorField * > & | x, | ||
std::vector< ColorSpinorField * > & | p, | ||
std::vector< std::vector< double > > & | coeff | ||
) |
Compute the outer product from the solver solution fields arising from the diagonal term of the fermion bilinear in direction mu,nu and sum to outer product field.
oprod[out,in] | Computed outer product field (tensor matrix field) |
x[in] | Solution field (both parities) |
p[in] | Intermediate vectors (both parities) @coeff coeff[in] Multiplicative coefficient (e.g., dt * residiue), one for each parity |
void quda::computeCloverSigmaTrace | ( | GaugeField & | output, |
const CloverField & | clover, | ||
double | coeff | ||
) |
Compute the matrix tensor field necessary for the force calculation from the clover trace action. This computes a tensor field [mu,nu].
output | The computed matrix field (tensor matrix field) |
clover | The input clover field |
coeff | Scalar coefficient multiplying the result (e.g., stepsize) |
void quda::ComputeEta | ( | GMResDRArgs & | args | ) |
Definition at line 165 of file inv_gmresdr_quda.cpp.
void quda::ComputeEta< libtype::eigen_lib > | ( | GMResDRArgs & | args | ) |
Definition at line 188 of file inv_gmresdr_quda.cpp.
void quda::ComputeEta< libtype::magma_lib > | ( | GMResDRArgs & | args | ) |
Definition at line 167 of file inv_gmresdr_quda.cpp.
void quda::computeFmunu | ( | GaugeField & | Fmunu, |
const GaugeField & | gauge | ||
) |
Compute the Fmunu tensor.
[out] | Fmunu | The Fmunu tensor |
[in] | gauge | The gauge field upon which to compute the Fmnu tensor |
void quda::ComputeHarmonicRitz | ( | GMResDRArgs & | args | ) |
Definition at line 95 of file inv_gmresdr_quda.cpp.
void quda::ComputeHarmonicRitz< libtype::eigen_lib > | ( | GMResDRArgs & | args | ) |
Definition at line 135 of file inv_gmresdr_quda.cpp.
void quda::ComputeHarmonicRitz< libtype::magma_lib > | ( | GMResDRArgs & | args | ) |
Definition at line 97 of file inv_gmresdr_quda.cpp.
|
inline |
Definition at line 830 of file quda_matrix.h.
double quda::computeMomAction | ( | const GaugeField & | mom | ) |
Compute and return global the momentum action 1/2 mom^2.
mom | Momentum field |
void quda::computeQCharge | ( | double | energy[3], |
double & | qcharge, | ||
const GaugeField & | Fmunu | ||
) |
Compute the topological charge and field energy.
[out] | energy | The total, spatial, and temporal field energy |
[out] | qcharge | The total topological charge |
[in] | Fmunu | The Fmunu tensor, usually calculated from a smeared configuration |
void quda::computeQChargeDensity | ( | double | energy[3], |
double & | qcharge, | ||
void * | qdensity, | ||
const GaugeField & | Fmunu | ||
) |
Compute the topological charge, field energy and the topological charge density per lattice site.
[out] | energy | The total, spatial, and temporal field energy |
[out] | qcharge | The total topological charge |
[out] | qdensity | The topological charge at each lattice site |
[in] | Fmunu | The Fmunu tensor, usually calculated from a smeared configuration |
void quda::ComputeRitz | ( | EigCGArgs & | args | ) |
Definition at line 147 of file inv_eigcg_quda.cpp.
void quda::ComputeRitz< libtype::eigen_lib > | ( | EigCGArgs & | args | ) |
Definition at line 150 of file inv_eigcg_quda.cpp.
void quda::ComputeRitz< libtype::magma_lib > | ( | EigCGArgs & | args | ) |
Definition at line 178 of file inv_eigcg_quda.cpp.
void quda::computeStaggeredOprod | ( | GaugeField * | out[], |
ColorSpinorField & | in, | ||
const double | coeff[], | ||
int | nFace | ||
) |
Compute the outer-product field between the staggered quark field's one and (for HISQ and ASQTAD) three hop sites. E.g.,.
out[0][d](x) = (in(x+1_d) x conj(in(x))) out[1][d](x) = (in(x+3_d) x conj(in(x)))
where 1_d and 3_d represent a relative shift of magnitude 1 and 3 in dimension d, respectively
Note out[1] is only computed if nFace=3
[out] | out | Array of nFace outer-product matrix fields |
[in] | in | Input quark field |
[in] | coeff | Coefficient |
[in] | nFace | Number of faces (1 or 3) |
|
inline |
Returns the complex conjugate of z.
Definition at line 1050 of file complex_quda.h.
|
inline |
Definition at line 590 of file quda_matrix.h.
|
inline |
Definition at line 130 of file complex_quda.h.
void quda::contractQuda | ( | const ColorSpinorField & | x, |
const ColorSpinorField & | y, | ||
void * | result, | ||
QudaContractType | cType | ||
) |
Definition at line 74 of file comm_key.h.
|
inline |
Copy function which is trival between floating point types. When converting to an integer type, the input float is assumed to be in the range [-1,1] and we rescale to saturate the integer range. When converting from an integer type, we scale the output to be on the same range.
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
Definition at line 877 of file quda_matrix.h.
|
inline |
Definition at line 865 of file quda_matrix.h.
|
inline |
Definition at line 796 of file quda_matrix.h.
void quda::copyExtendedColorSpinor | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
const int | parity, | ||
void * | Dst, | ||
void * | Src, | ||
void * | dstNorm, | ||
void * | srcNorm | ||
) |
void quda::copyExtendedGauge | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
void * | Out = 0 , |
||
void * | In = 0 |
||
) |
This function is used for copying the gauge field into an extended gauge field. Defined in copy_extended_gauge.cu.
out | The extended output field to which we are copying |
in | The input field from which we are copying |
location | The location of where we are doing the copying (CPU or CUDA) |
Out | The output buffer (optional) |
In | The input buffer (optional) |
void quda::copyFieldOffset | ( | CloverField & | out, |
const CloverField & | in, | ||
CommKey | offset, | ||
QudaPCType | pc_type | ||
) |
This function is used for copying from a source clover field to a destination clover field with an offset.
out | The output field to which we are copying |
in | The input field from which we are copying |
offset | The offset for the larger field between out and in. |
pc_type | Whether the field order uses 4d or 5d even-odd preconditioning. |
void quda::copyFieldOffset | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
CommKey | offset, | ||
QudaPCType | pc_type | ||
) |
This function is used for copying from a source colorspinor field to a destination field with an offset.
out | The output field to which we are copying |
in | The input field from which we are copying |
offset | The offset for the larger field between out and in. |
pc_type | Whether the field order uses 4d or 5d even-odd preconditioning. |
void quda::copyFieldOffset | ( | GaugeField & | out, |
const GaugeField & | in, | ||
CommKey | offset, | ||
QudaPCType | pc_type | ||
) |
This function is used for copying from a source gauge field to a destination gauge field with an offset.
out | The output field to which we are copying |
in | The input field from which we are copying |
offset | The offset for the larger field between out and in. |
pc_type | Whether the field order uses 4d or 5d even-odd preconditioning. |
void quda::copyGenericClover | ( | CloverField & | out, |
const CloverField & | in, | ||
bool | inverse, | ||
QudaFieldLocation | location, | ||
void * | Out = 0 , |
||
void * | In = 0 , |
||
void * | outNorm = 0 , |
||
void * | inNorm = 0 |
||
) |
This generic function is used for copying the clover field where in the input and output can be in any order and location.
out | The output field to which we are copying |
in | The input field from which we are copying |
inverse | Whether we are copying the inverse term or not |
location | The location of where we are doing the copying (CPU or CUDA) |
Out | The output buffer (optional) |
In | The input buffer (optional) |
outNorm | The output norm buffer (optional) |
inNorm | The input norm buffer (optional) |
void quda::copyGenericColorSpinor | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst = 0 , |
||
void * | Src = 0 , |
||
void * | dstNorm = 0 , |
||
void * | srcNorm = 0 |
||
) |
Definition at line 39 of file copy_color_spinor.cpp.
void quda::copyGenericColorSpinorDD | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorDH | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorDQ | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorDS | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorHD | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorHH | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorHQ | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorHS | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorMGDD | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorMGDS | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorMGHH | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorMGHQ | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorMGHS | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorMGQH | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorMGQQ | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorMGQS | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorMGSD | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorMGSH | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorMGSQ | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorMGSS | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorQD | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorQH | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorQQ | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorQS | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorSD | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorSH | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorSQ | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericColorSpinorSS | ( | ColorSpinorField & | , |
const ColorSpinorField & | , | ||
QudaFieldLocation | , | ||
void * | , | ||
void * | , | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
void quda::copyGenericGauge | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
void * | Out = 0 , |
||
void * | In = 0 , |
||
void ** | ghostOut = 0 , |
||
void ** | ghostIn = 0 , |
||
int | type = 0 |
||
) |
This function is used for extracting the gauge ghost zone from a gauge field array. Defined in copy_gauge.cu.
out | The output field to which we are copying |
in | The input field from which we are copying |
location | The location of where we are doing the copying (CPU or CUDA) |
Out | The output buffer (optional) |
In | The input buffer (optional) |
ghostOut | The output ghost buffer (optional) |
ghostIn | The input ghost buffer (optional) |
type | The type of copy we doing (0 body and ghost else ghost only) |
Definition at line 44 of file copy_gauge.cpp.
void quda::copyGenericGaugeDoubleIn | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
void * | Out, | ||
void * | In, | ||
void ** | ghostOut, | ||
void ** | ghostIn, | ||
int | type | ||
) |
void quda::copyGenericGaugeHalfIn | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
void * | Out, | ||
void * | In, | ||
void ** | ghostOut, | ||
void ** | ghostIn, | ||
int | type | ||
) |
void quda::copyGenericGaugeMG | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
void * | Out, | ||
void * | In, | ||
void ** | ghostOut, | ||
void ** | ghostIn, | ||
int | type | ||
) |
void quda::copyGenericGaugeQuarterIn | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
void * | Out, | ||
void * | In, | ||
void ** | ghostOut, | ||
void ** | ghostIn, | ||
int | type | ||
) |
void quda::copyGenericGaugeSingleIn | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
void * | Out, | ||
void * | In, | ||
void ** | ghostOut, | ||
void ** | ghostIn, | ||
int | type | ||
) |
|
inline |
Definition at line 890 of file quda_matrix.h.
|
inline |
Definition at line 903 of file quda_matrix.h.
Definition at line 1121 of file complex_quda.h.
|
inline |
Definition at line 1113 of file complex_quda.h.
|
inline |
Definition at line 46 of file complex_quda.h.
Definition at line 1137 of file complex_quda.h.
|
inline |
Definition at line 1129 of file complex_quda.h.
|
inline |
Definition at line 81 of file complex_quda.h.
void * quda::create_gauge_buffer | ( | size_t | bytes, |
QudaGaugeFieldOrder | order, | ||
QudaFieldGeometry | geometry | ||
) |
Definition at line 492 of file cuda_gauge_field.cpp.
void ** quda::create_ghost_buffer | ( | size_t | bytes[], |
QudaGaugeFieldOrder | order, | ||
QudaFieldGeometry | geometry | ||
) |
Definition at line 503 of file cuda_gauge_field.cpp.
void quda::createDirac | ( | Dirac *& | d, |
Dirac *& | dSloppy, | ||
Dirac *& | dPre, | ||
QudaInvertParam & | param, | ||
const bool | pc_solve | ||
) |
Create the Dirac operator. By default, we also create operators with possibly different precisions: Sloppy, and Preconditioner.
[in/out] | d User prec | |
[in/out] | dSloppy Sloppy prec | |
[in/out] | dPre Preconditioner prec | |
[in] | param | Invert param container |
[in] | pc_solve | Whether or not to perform an even/odd preconditioned solve |
Definition at line 1787 of file interface_quda.cpp.
void quda::createDiracWithEig | ( | Dirac *& | d, |
Dirac *& | dSloppy, | ||
Dirac *& | dPre, | ||
Dirac *& | dRef, | ||
QudaInvertParam & | param, | ||
const bool | pc_solve | ||
) |
Create the Dirac operator. By default, we also create operators with possibly different precisions: Sloppy, and Preconditioner. This function also creates a dirac operator for an eigensolver that creates a deflation space, dEig. We may not use dPrecon for this as, for example, the MSPCG solver uses dPrecon for a different purpose.
[in/out] | d User prec | |
[in/out] | dSloppy Sloppy prec | |
[in/out] | dPre Preconditioner prec | |
[in/out] | dEig Eigensolver prec | |
[in] | param | Invert param container |
[in] | pc_solve | Whether or not to perform an even/odd preconditioned solve |
Definition at line 1825 of file interface_quda.cpp.
void quda::createDiracWithRefine | ( | Dirac *& | d, |
Dirac *& | dSloppy, | ||
Dirac *& | dPre, | ||
Dirac *& | dRef, | ||
QudaInvertParam & | param, | ||
const bool | pc_solve | ||
) |
Create the Dirac operator. By default, we also create operators with possibly different precisions: Sloppy, and Preconditioner. This function also creates a dirac operator for refinement, dRef, used in invertMultiShiftQuda().
[in/out] | d User prec | |
[in/out] | dSloppy Sloppy prec | |
[in/out] | dPre Preconditioner prec | |
[in/out] | dRef Refine prec (EigCG and deflation) | |
[in] | param | Invert param container |
[in] | pc_solve | Whether or not to perform an even/odd preconditioned solve |
Definition at line 1804 of file interface_quda.cpp.
void quda::createDslashEvents | ( | ) |
cudaGaugeField * quda::createExtendedGauge | ( | cudaGaugeField & | in, |
const int * | R, | ||
TimeProfile & | profile, | ||
bool | redundant_comms = false , |
||
QudaReconstructType | recon = QUDA_RECONSTRUCT_INVALID |
||
) |
This function is used for creating an exteneded gauge field from the input, and copying the gauge field into the extended gauge field. Defined in lib/gauge_field.cpp.
in | The input field from which we are extending |
R | By how many do we want to extend the gauge field in each direction |
profile | The TimeProfile |
redundant_comms | |
recon | The reconsturction type |
Definition at line 364 of file gauge_field.cpp.
cpuGaugeField * quda::createExtendedGauge | ( | void ** | gauge, |
QudaGaugeParam & | gauge_param, | ||
const int * | R | ||
) |
This function is used for creating an exteneded (cpu) gauge field from the input, and copying the gauge field into the extended gauge field. Defined in lib/gauge_field.cpp.
in | The input field from which we are extending |
R | By how many do we want to extend the gauge field in each direction |
Definition at line 393 of file gauge_field.cpp.
|
inline |
Compute the cross product of two color vectors at spin sa and sb cProd = \sum_{j,k} \epsilon_{i,j,k} a(s1,j) b(s2,k) NB: Implemented for Nc=3 only
a | j ColorSpinor |
b | k ColorSpinor |
sa | j spin index |
sb | k spin index |
Definition at line 1017 of file color_spinor.h.
void quda::destroyDslashEvents | ( | ) |
size_t quda::device_allocated | ( | ) |
Definition at line 69 of file malloc.cpp.
long quda::device_allocated_peak | ( | ) |
Definition at line 79 of file malloc.cpp.
void quda::device_comms_pinned_free_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
void * | ptr | ||
) |
Free device comms memory allocated with device_comms_pinned_malloc(). This function should only be called via the device_comms_pinned_free() macro, defined in malloc_quda.h
Definition at line 530 of file malloc.cpp.
void * quda::device_comms_pinned_malloc_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
size_t | size | ||
) |
Allocate shemm device memory. This function should only be called via device_comms_pinned_malloc_() Allocate pinned or symmetric (shmem) device memory for comms. Should only be called via the device_comms_pinned_malloc macro, defined in malloc_quda.h
Definition at line 401 of file malloc.cpp.
void quda::device_free_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
void * | ptr | ||
) |
Free device memory allocated with device_malloc(). This function should only be called via the device_free() macro, defined in malloc_quda.h
Definition at line 415 of file malloc.cpp.
void * quda::device_malloc_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
size_t | size | ||
) |
Perform a standard cudaMalloc() with error-checking. This function should only be called via the device_malloc() macro, defined in malloc_quda.h
Definition at line 223 of file malloc.cpp.
void quda::device_pinned_free_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
void * | ptr | ||
) |
Free device memory allocated with device_pinned malloc(). This function should only be called via the device_pinned_free() macro, defined in malloc_quda.h
Definition at line 440 of file malloc.cpp.
void * quda::device_pinned_malloc_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
size_t | size | ||
) |
Perform a cuMemAlloc with error-checking. This function is to guarantee a unique memory allocation on the device, since cudaMalloc can be redirected (as is the case with QDPJIT). This should only be called via the device_pinned_malloc() macro, defined in malloc_quda.h.
Definition at line 255 of file malloc.cpp.
void quda::disableProfileCount | ( | ) |
|
constexpr |
Helper function that returns whether we have enabled dyanmic clover inversion or not.
Definition at line 518 of file clover_field.h.
void quda::enableProfileCount | ( | ) |
__device__ __host__ double quda::ErrorSU3 | ( | const Matrix< Cmplx, 3 > & | matrix | ) |
Definition at line 962 of file quda_matrix.h.
void quda::exchangeExtendedGhost | ( | cudaColorSpinorField * | spinor, |
int | R[], | ||
int | parity, | ||
qudaStream_t * | stream_p | ||
) |
Definition at line 1152 of file complex_quda.h.
|
inline |
Definition at line 1146 of file complex_quda.h.
|
inline |
Definition at line 96 of file complex_quda.h.
|
inline |
Definition at line 987 of file quda_matrix.h.
__device__ __host__ void quda::expsu3 | ( | Matrix< complex< Float >, 3 > & | q | ) |
Direct port of the TIFR expsu3 algorithm
Definition at line 1124 of file quda_matrix.h.
void quda::extractExtendedGaugeGhost | ( | const GaugeField & | u, |
int | dim, | ||
const int * | R, | ||
void ** | ghost, | ||
bool | extract | ||
) |
This function is used for extracting the gauge ghost zone from a gauge field array. Defined in extract_gauge_ghost.cu.
u | The gauge field from which we want to extract/pack the ghost zone |
dim | The dimension in which we are packing/unpacking |
ghost | The array where we want to pack/unpack the ghost zone into/from |
extract | Whether we are extracting into ghost or injecting from ghost |
void quda::extractGaugeGhost | ( | const GaugeField & | u, |
void ** | ghost, | ||
bool | extract = true , |
||
int | offset = 0 |
||
) |
This function is used for extracting the gauge ghost zone from a gauge field array. Defined in extract_gauge_ghost.cu.
u | The gauge field from which we want to extract the ghost zone |
ghost | The array where we want to pack the ghost zone into |
extract | Where we are extracting into ghost or injecting from ghost |
offset | By default we exchange the nDim site-vector of links in the first nDim dimensions; offset allows us to instead exchange the links in nDim+offset dimensions. This is used to faciliate sending bi-directional links which is needed for the coarse links. |
void quda::fatKSLink | ( | GaugeField * | fat, |
const GaugeField & | u, | ||
const double * | coeff | ||
) |
Compute the fat links for an improved staggered (Kogut-Susskind) fermions.
fat[out] | The computed fat link |
u[in] | The input gauge field |
coeff[in] | Array of path coefficients |
|
inlineconstexpr |
Definition at line 83 of file malloc_quda.h.
void quda::fillFGMResDRInnerSolveParam | ( | SolverParam & | inner, |
const SolverParam & | outer | ||
) |
Definition at line 197 of file inv_gmresdr_quda.cpp.
void quda::fillInnerSolveParam | ( | SolverParam & | inner, |
const SolverParam & | outer | ||
) |
Definition at line 25 of file inv_gcr_quda.cpp.
void quda::flushForceMonitor | ( | ) |
Flush any outstanding force monitoring information.
void quda::flushProfile | ( | ) |
bool quda::forceMonitor | ( | ) |
Whether we are monitoring the force or not.
void quda::free_gauge_buffer | ( | void * | buffer, |
QudaGaugeFieldOrder | order, | ||
QudaFieldGeometry | geometry | ||
) |
Definition at line 515 of file cuda_gauge_field.cpp.
void quda::free_ghost_buffer | ( | void ** | buffer, |
QudaGaugeFieldOrder | order, | ||
QudaFieldGeometry | geometry | ||
) |
Definition at line 524 of file cuda_gauge_field.cpp.
void quda::gamma5 | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in | ||
) |
Applies a gamma5 matrix to a spinor (wrapper to ApplyGamma)
[out] | out | Output field |
[in] | in | Input field |
void quda::gaugeFixingFFT | ( | GaugeField & | data, |
const int | gauge_dir, | ||
const int | Nsteps, | ||
const int | verbose_interval, | ||
const double | alpha, | ||
const int | autotune, | ||
const double | tolerance, | ||
const int | stopWtheta | ||
) |
Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
[in,out] | data,quda | gauge field |
[in] | gauge_dir,3 | for Coulomb gauge fixing, other for Landau gauge fixing |
[in] | Nsteps,maximum | number of steps to perform gauge fixing |
[in] | verbose_interval,print | gauge fixing info when iteration count is a multiple of this |
[in] | alpha,gauge | fixing parameter of the method, most common value is 0.08 |
[in] | autotune,1 | to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value |
[in] | tolerance,torelance | value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps |
[in] | stopWtheta,0 | for MILC criterium and 1 to use the theta value |
void quda::gaugeFixingOVR | ( | GaugeField & | data, |
const int | gauge_dir, | ||
const int | Nsteps, | ||
const int | verbose_interval, | ||
const double | relax_boost, | ||
const double | tolerance, | ||
const int | reunit_interval, | ||
const int | stopWtheta | ||
) |
Gauge fixing with overrelaxation with support for single and multi GPU.
[in,out] | data,quda | gauge field |
[in] | gauge_dir,3 | for Coulomb gauge fixing, other for Landau gauge fixing |
[in] | Nsteps,maximum | number of steps to perform gauge fixing |
[in] | verbose_interval,print | gauge fixing info when iteration count is a multiple of this |
[in] | relax_boost,gauge | fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7. |
[in] | tolerance,torelance | value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps |
[in] | reunit_interval,reunitarize | gauge field when iteration count is a multiple of this |
[in] | stopWtheta,0 | for MILC criterium and 1 to use the theta value |
void quda::gaugeForce | ( | GaugeField & | mom, |
const GaugeField & | u, | ||
double | coeff, | ||
int *** | input_path, | ||
int * | length, | ||
double * | path_coeff, | ||
int | num_paths, | ||
int | max_length | ||
) |
Compute the gauge-force contribution to the momentum.
[out] | mom | Momentum field |
[in] | u | Gauge field (extended when running no multiple GPUs) |
[in] | coeff | Step-size coefficient |
[in] | input_path | Host-array holding all path contributions for the gauge action |
[in] | length | Host array holding the length of all paths |
[in] | path_coeff | Coefficient of each path |
[in] | num_paths | Numer of paths |
[in] | max_length | Maximum length of each path |
void quda::gaugeGauss | ( | GaugeField & | U, |
RNG & | rngstate, | ||
double | epsilon | ||
) |
Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder).
[out] | U | The output gauge field |
[in] | rngstate | random states |
[in] | sigma | Width of Gaussian distrubution |
void quda::gaugeGauss | ( | GaugeField & | U, |
unsigned long long | seed, | ||
double | epsilon | ||
) |
Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder).
[out] | U | The GaugeField |
[in] | seed | The seed used for the RNG |
[in] | sigma | Wdith of the Gaussian distribution |
void quda::gaugeObservables | ( | GaugeField & | u, |
QudaGaugeObservableParam & | param, | ||
TimeProfile & | profile | ||
) |
Calculates a variety of gauge-field observables.
[in] | Gauge | field upon which we are measuring. |
[in,out] | param | Parameter struct that defines which observables we are making and the resulting observables. |
[in] | profile | TimeProfile instance used for profiling. |
Definition at line 7 of file gauge_observable.cpp.
int quda::genericCompare | ( | const cpuColorSpinorField & | a, |
const cpuColorSpinorField & | b, | ||
int | tol | ||
) |
void quda::genericCudaPrintVector | ( | const cudaColorSpinorField & | a, |
unsigned | x | ||
) |
void quda::genericPackGhost | ( | void ** | ghost, |
const ColorSpinorField & | a, | ||
QudaParity | parity, | ||
int | nFace, | ||
int | dagger, | ||
MemoryLocation * | destination = nullptr |
||
) |
Generic ghost packing routine.
void quda::genericPrintVector | ( | const cpuColorSpinorField & | a, |
unsigned int | x | ||
) |
void quda::genericSource | ( | cpuColorSpinorField & | a, |
QudaSourceType | sourceType, | ||
int | x, | ||
int | s, | ||
int | c | ||
) |
void * quda::get_mapped_device_pointer_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
const void * | ptr | ||
) |
Definition at line 590 of file malloc.cpp.
QudaFieldLocation quda::get_pointer_location | ( | const void * | ptr | ) |
Definition at line 566 of file malloc.cpp.
|
inline |
Definition at line 417 of file quda_matrix.h.
bool quda::getDslashLaunch | ( | ) |
bool quda::getKernelPackT | ( | ) |
double2 quda::getLinkDeterminant | ( | GaugeField & | data | ) |
Calculate the Determinant.
[in] | data | Gauge field |
double2 quda::getLinkTrace | ( | GaugeField & | data | ) |
Calculate the Trace.
[in] | data | Gauge field |
|
inline |
Definition at line 931 of file quda_matrix.h.
|
inline |
Definition at line 915 of file quda_matrix.h.
|
inline |
Definition at line 410 of file quda_matrix.h.
const map & quda::getTuneCache | ( | ) |
size_t quda::host_allocated | ( | ) |
Definition at line 77 of file malloc.cpp.
long quda::host_allocated_peak | ( | ) |
Definition at line 87 of file malloc.cpp.
void quda::host_free_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
void * | ptr | ||
) |
Free host memory allocated with safe_malloc(), pinned_malloc(), or mapped_malloc(). This function should only be called via the host_free() macro, defined in malloc_quda.h
Definition at line 477 of file malloc.cpp.
|
inline |
|
inline |
Definition at line 117 of file uint_to_char.h.
|
inline |
Definition at line 284 of file uint_to_char.h.
|
inline |
Definition at line 1378 of file complex_quda.h.
|
constexpr |
Helper function for getting the implied spinor parity from a matrix preconditioning type.
[in] | matpc_type | The matrix preconditioning type |
Definition at line 59 of file color_spinor_field.h.
Definition at line 84 of file comm_key.h.
|
constexpr |
The initialization value we used to check for completion.
Definition at line 38 of file reduce_helper.h.
void quda::InitGaugeField | ( | GaugeField & | data | ) |
Perform a cold start to the gauge field, identity SU(3) matrix, also fills the ghost links in multi-GPU case (no need to exchange data)
[in,out] | data | Gauge field |
void quda::InitGaugeField | ( | GaugeField & | data, |
RNG & | rngstate | ||
) |
Perform a hot start to the gauge field, random SU(3) matrix, followed by reunitarization, also exchange borders links in multi-GPU case.
[in,out] | data | Gauge field |
[in,out] | rngstate | state of the CURAND random number generator |
|
inline |
Compute the inner product over color and spin dot = \sum_s,c conj(a(s,c)) * b(s,c)
a | Left-hand side ColorSpinor |
b | Right-hand side ColorSpinor |
Definition at line 913 of file color_spinor.h.
|
inline |
Compute the inner product over color at spin s between two ColorSpinor fields dot = \sum_c conj(a(s,c)) * b(s,c)
a | Left-hand side ColorSpinor |
b | Right-hand side ColorSpinor |
s | diagonal spin index |
Definition at line 953 of file color_spinor.h.
|
inline |
Compute the inner product over color at spin sa and sb between two ColorSpinor fields dot = \sum_c conj(a(s1,c)) * b(s2,c)
a | Left-hand side ColorSpinor |
b | Right-hand side ColorSpinor |
sa | Left-hand side spin index |
sb | Right-hand side spin index |
Definition at line 969 of file color_spinor.h.
|
inline |
Compute the inner product over color at spin sa and sb between a color spinors a and b of different spin length dot = \sum_c conj(a(c)) * b(s,c)
a | Left-hand side ColorSpinor |
b | Right-hand side ColorSpinor |
Definition at line 992 of file color_spinor.h.
|
constexpr |
This instantiate function is used to instantiate the clover precision.
[in] | c | CloverField we wish to instantiate |
[in,out] | args | Any additional arguments required for the computation at hand |
Definition at line 163 of file instantiate.h.
|
inline |
This instantiate function is used to instantiate the reconstruct types used.
[out] | out | Output result field |
[in] | in | Input field |
[in] | U | Gauge field |
[in] | args | Additional arguments for different dslash kernels |
Definition at line 21 of file instantiate_dslash.h.
|
inline |
This instantiate function is used to instantiate the colors.
[out] | out | Output result field |
[in] | in | Input field |
[in] | U | Gauge field |
[in] | args | Additional arguments for different dslash kernels |
Definition at line 54 of file instantiate_dslash.h.
|
inline |
This instantiate function is used to instantiate the precisions.
[out] | out | Output result field |
[in] | in | Input field |
[in] | U | Gauge field |
[in] | args | Additional arguments for different dslash kernels |
Definition at line 71 of file instantiate_dslash.h.
|
constexpr |
This instantiate function is used to instantiate the colors.
[in] | field | LatticeField we wish to instantiate |
[in,out] | args | Additional arguments for kernels |
Definition at line 200 of file instantiate.h.
|
constexpr |
This instantiate function is used to instantiate the precision and number of colors.
[in] | field | LatticeField we wish to instantiate |
[in,out] | args | Any additional arguments required for the computation at hand |
Definition at line 221 of file instantiate.h.
|
constexpr |
This instantiate function is used to instantiate the colors.
[in] | U | Gauge field |
[in,out] | args | Additional arguments for kernels |
Definition at line 117 of file instantiate.h.
|
constexpr |
This instantiate function is used to instantiate the precisions.
[in] | U | Gauge field |
[in,out] | args | Any additional arguments required for the computation at hand |
Definition at line 134 of file instantiate.h.
|
constexpr |
The instantiatePrecision function is used to instantiate the precision. Note unlike the "instantiate" functions above, this helper always instantiates double precision regardless of the QUDA_PRECISION value: this enables its use for copy interface routines which should always enable double precision support.
[in] | field | LatticeField we wish to instantiate |
[in,out] | args | Any additional arguments required for the computation at hand |
Definition at line 264 of file instantiate.h.
|
constexpr |
The instantiatePrecision2 function is used to instantiate the precision for a class that accepts 2 typename arguments, with the first typename corresponding to the precision being instantiated at hand. This is useful for copy routines, where we need to instantiate a second, e.g., destination, precision after already instantiating the first, e.g., source, precision. Similar to the "instantiatePrecision" function above, this helper always instantiates double precision regardless of the QUDA_PRECISION value: this enables its use for copy interface routines which should always enable double precision support.
[in] | field | LatticeField we wish to instantiate |
[in,out] | args | Any additional arguments required for the computation at hand |
Definition at line 309 of file instantiate.h.
|
constexpr |
The instantiatePrecision function is used to instantiate the precision.
[in] | field | LatticeField we wish to instantiate |
[in,out] | args | Any additional arguments required for the computation at hand |
Definition at line 345 of file instantiate.h.
|
inline |
This instantiatePrecondtiioner function is used to instantiate the precisions for a preconditioner. This is the same as the instantiate helper above, except it only handles half and quarter precision.
[out] | out | Output result field |
[in] | in | Input field |
[in] | U | Gauge field |
[in] | args | Additional arguments for different dslash kernels |
Definition at line 113 of file instantiate_dslash.h.
Definition at line 605 of file quda_matrix.h.
|
inline |
Definition at line 95 of file malloc_quda.h.
|
constexpr |
Definition at line 10 of file instantiate.h.
|
constexpr |
Definition at line 16 of file instantiate.h.
|
constexpr |
Definition at line 15 of file instantiate.h.
|
constexpr |
Definition at line 20 of file instantiate.h.
|
constexpr |
Definition at line 19 of file instantiate.h.
|
constexpr |
Definition at line 12 of file instantiate.h.
bool quda::is_prefetch_enabled | ( | ) |
Definition at line 198 of file malloc.cpp.
bool quda::isUnitary | ( | const cpuGaugeField & | field, |
double | max_error | ||
) |
|
inline |
Definition at line 121 of file split_grid.h.
|
inline |
Helper function for determining if the length of the fields is the same.
[in] | a | Input field |
[in] | b | Input field |
Definition at line 1207 of file color_spinor_field.h.
|
inline |
Helper function for determining if the length of the fields is the same.
[in] | a | Input field |
[in] | b | Input field |
[in] | args | List of additional fields to check length on |
Definition at line 1225 of file color_spinor_field.h.
|
inline |
Definition at line 45 of file inline_ptx.h.
|
inline |
Definition at line 35 of file inline_ptx.h.
|
inline |
Definition at line 71 of file inline_ptx.h.
|
inline |
Definition at line 63 of file inline_ptx.h.
|
inline |
Definition at line 53 of file inline_ptx.h.
|
inline |
Definition at line 21 of file inline_ptx.h.
|
inline |
Definition at line 28 of file inline_ptx.h.
|
inline |
Helper function for determining if the location of the fields is the same.
[in] | a | Input field |
[in] | b | Input field |
Definition at line 738 of file lattice_field.h.
|
inline |
Helper function for determining if the location of the fields is the same.
[in] | a | Input field |
[in] | b | Input field |
[in] | args | List of additional fields to check location on |
Definition at line 755 of file lattice_field.h.
Definition at line 1164 of file complex_quda.h.
|
inline |
Definition at line 1158 of file complex_quda.h.
|
inline |
Definition at line 101 of file complex_quda.h.
|
inline |
Definition at line 1171 of file complex_quda.h.
|
inline |
Definition at line 106 of file complex_quda.h.
void quda::longKSLink | ( | GaugeField * | lng, |
const GaugeField & | u, | ||
const double * | coeff | ||
) |
Compute the long links for an improved staggered (Kogut-Susskind) fermions.
lng[out] | The computed long link (only computed if lng!=0) |
u[in] | The input gauge field |
coeff[in] | Array of path coefficients |
|
inline |
Definition at line 734 of file quda_matrix.h.
|
inline |
Definition at line 750 of file quda_matrix.h.
size_t quda::managed_allocated | ( | ) |
Definition at line 75 of file malloc.cpp.
long quda::managed_allocated_peak | ( | ) |
Definition at line 85 of file malloc.cpp.
void quda::managed_free_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
void * | ptr | ||
) |
Free device memory allocated with device_malloc(). This function should only be called via the device_free() macro, defined in malloc_quda.h
Definition at line 461 of file malloc.cpp.
void * quda::managed_malloc_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
size_t | size | ||
) |
Perform a standard cudaMallocManaged() with error-checking. This function should only be called via the managed_malloc() macro, defined in malloc_quda.h
Definition at line 356 of file malloc.cpp.
size_t quda::mapped_allocated | ( | ) |
Definition at line 73 of file malloc.cpp.
long quda::mapped_allocated_peak | ( | ) |
Definition at line 83 of file malloc.cpp.
void * quda::mapped_malloc_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
size_t | size | ||
) |
Allocate page-locked ("pinned") host memory, and map it into the GPU address space. This function should only be called via the mapped_malloc() macro, defined in malloc_quda.h
Definition at line 324 of file malloc.cpp.
void quda::massRescale | ( | cudaColorSpinorField & | b, |
QudaInvertParam & | param, | ||
bool | for_multishift | ||
) |
Definition at line 1846 of file interface_quda.cpp.
|
constexpr |
Definition at line 33 of file reduce_helper.h.
void quda::Monte | ( | GaugeField & | data, |
RNG & | rngstate, | ||
double | Beta, | ||
int | nhb, | ||
int | nover | ||
) |
Perform heatbath and overrelaxation. Performs nhb heatbath steps followed by nover overrelaxation steps.
[in,out] | data | Gauge field |
[in,out] | rngstate | state of the CURAND random number generator |
[in] | Beta | inverse of the gauge coupling, beta = 2 Nc / g_0^2 |
[in] | nhb | number of heatbath steps |
[in] | nover | number of overrelaxation steps |
|
inline |
Compute the matrix-vector product z = A * x + y.
[in] | A | Input matrix |
[in] | x | Input vector |
[in] | z | Input vector |
Definition at line 1203 of file color_spinor.h.
|
inline |
Helper function for determining if the field is in native order.
[in] | a | Input field |
Definition at line 798 of file lattice_field.h.
|
inline |
Helper function for determining if the fields are in native order.
[in] | a | Input field |
[in] | args | List of additional fields to check |
Definition at line 811 of file lattice_field.h.
|
inline |
Returns the magnitude of z squared.
Definition at line 1088 of file complex_quda.h.
double quda::norm1 | ( | const CloverField & | u, |
bool | inverse = false |
||
) |
This is a debugging function, where we cast a clover field into a spinor field so we can compute its L1 norm.
a | The clover field that we want the norm of |
Definition at line 493 of file clover_field.cpp.
double quda::norm1 | ( | const GaugeField & | u | ) |
This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L1 norm.
u | The gauge field that we want the norm of |
Definition at line 331 of file gauge_field.cpp.
double quda::norm2 | ( | const CloverField & | a, |
bool | inverse = false |
||
) |
This is a debugging function, where we cast a clover field into a spinor field so we can compute its L2 norm.
a | The clover field that we want the norm of |
Definition at line 485 of file clover_field.cpp.
double quda::norm2 | ( | const GaugeField & | u | ) |
This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L2 norm.
u | The gauge field that we want the norm of |
Definition at line 323 of file gauge_field.cpp.
|
inline |
Definition at line 1031 of file complex_quda.h.
|
inline |
Definition at line 1043 of file complex_quda.h.
|
inline |
Definition at line 1037 of file complex_quda.h.
Definition at line 51 of file comm_key.h.
Definition at line 37 of file comm_key.h.
|
inline |
Definition at line 898 of file complex_quda.h.
|
inline |
Definition at line 907 of file complex_quda.h.
|
inline |
Definition at line 70 of file float_vector.h.
|
inline |
Definition at line 78 of file float_vector.h.
|
inline |
Definition at line 62 of file float_vector.h.
|
inline |
Definition at line 52 of file float_vector.h.
Definition at line 88 of file float_vector.h.
|
inline |
Compute the matrix-vector product y = A * x.
[in] | A | Input Hermitian matrix with dimensions NcxNs x NcxNs |
[in] | x | Input vector |
Definition at line 1238 of file color_spinor.h.
|
inline |
Generic implementation of matrix multiplication.
Definition at line 502 of file quda_matrix.h.
|
inline |
Definition at line 479 of file quda_matrix.h.
|
inline |
Compute the matrix-vector product y = A * x.
[in] | A | Input matrix |
[in] | x | Input vector |
Definition at line 1167 of file color_spinor.h.
|
inline |
Specialization of complex matrix multiplication that will issue optimal fma instructions.
Definition at line 523 of file quda_matrix.h.
|
inline |
Definition at line 577 of file quda_matrix.h.
|
inline |
Definition at line 557 of file quda_matrix.h.
|
inline |
Compute the scalar-vector product y = a * x.
[in] | a | Input scalar |
[in] | x | Input vector |
Definition at line 1145 of file color_spinor.h.
|
inline |
Definition at line 471 of file quda_matrix.h.
|
inline |
Definition at line 914 of file complex_quda.h.
|
inline |
Definition at line 228 of file float_vector.h.
|
inline |
Definition at line 206 of file float_vector.h.
|
inline |
Definition at line 234 of file float_vector.h.
|
inline |
Definition at line 199 of file float_vector.h.
|
inline |
Definition at line 213 of file float_vector.h.
Definition at line 221 of file float_vector.h.
|
inline |
Definition at line 484 of file quda_matrix.h.
|
inline |
Definition at line 547 of file quda_matrix.h.
|
inline |
ColorSpinor addition operator.
[in] | x | Input vector |
[in] | y | Input vector |
Definition at line 1101 of file color_spinor.h.
Definition at line 30 of file comm_key.h.
|
inline |
Definition at line 850 of file complex_quda.h.
|
inline |
Definition at line 866 of file complex_quda.h.
|
inline |
Definition at line 992 of file complex_quda.h.
|
inline |
Definition at line 14 of file float_vector.h.
|
inline |
Definition at line 42 of file float_vector.h.
|
inline |
Definition at line 47 of file float_vector.h.
|
inline |
Definition at line 96 of file float_vector.h.
|
inline |
Definition at line 104 of file float_vector.h.
Definition at line 114 of file float_vector.h.
|
inline |
Definition at line 428 of file quda_matrix.h.
|
inline |
Definition at line 872 of file complex_quda.h.
|
inline |
Definition at line 415 of file float_vector.h.
|
inline |
Definition at line 858 of file complex_quda.h.
|
inline |
Definition at line 145 of file float_vector.h.
|
inline |
Definition at line 152 of file float_vector.h.
|
inline |
Definition at line 160 of file float_vector.h.
|
inline |
Definition at line 131 of file float_vector.h.
|
inline |
Definition at line 122 of file float_vector.h.
Definition at line 138 of file float_vector.h.
|
inline |
Definition at line 438 of file quda_matrix.h.
|
inline |
Definition at line 446 of file quda_matrix.h.
|
inline |
ColorSpinor subtraction operator.
[in] | x | Input vector |
[in] | y | Input vector |
Definition at line 1123 of file color_spinor.h.
|
inline |
Definition at line 879 of file complex_quda.h.
|
inline |
Definition at line 885 of file complex_quda.h.
|
inline |
Definition at line 997 of file complex_quda.h.
|
inline |
Definition at line 246 of file float_vector.h.
|
inline |
Definition at line 19 of file float_vector.h.
|
inline |
Definition at line 242 of file float_vector.h.
|
inline |
Definition at line 24 of file float_vector.h.
|
inline |
Definition at line 29 of file float_vector.h.
Definition at line 34 of file float_vector.h.
|
inline |
Definition at line 490 of file quda_matrix.h.
|
inline |
Definition at line 462 of file quda_matrix.h.
|
inline |
Definition at line 891 of file complex_quda.h.
|
inline |
Definition at line 192 of file float_vector.h.
|
inline |
Definition at line 178 of file float_vector.h.
|
inline |
Definition at line 169 of file float_vector.h.
Definition at line 185 of file float_vector.h.
|
inline |
Definition at line 454 of file quda_matrix.h.
Definition at line 44 of file comm_key.h.
|
inline |
Definition at line 948 of file complex_quda.h.
|
inline |
Definition at line 931 of file complex_quda.h.
|
inline |
Definition at line 922 of file complex_quda.h.
|
inline |
Definition at line 965 of file complex_quda.h.
|
inline |
Definition at line 985 of file complex_quda.h.
|
inline |
Definition at line 980 of file complex_quda.h.
|
inline |
Definition at line 972 of file complex_quda.h.
Definition at line 58 of file comm_key.h.
std::basic_ostream< charT, traits > & quda::operator<< | ( | std::basic_ostream< charT, traits > & | os, |
const complex< ValueType > & | z | ||
) |
Definition at line 305 of file complex_quda.h.
std::ostream& quda::operator<< | ( | std::ostream & | os, |
const Array< T, N > & | a | ||
) |
Definition at line 821 of file quda_matrix.h.
std::ostream& quda::operator<< | ( | std::ostream & | os, |
const Matrix< T, N > & | m | ||
) |
Definition at line 807 of file quda_matrix.h.
std::ostream& quda::operator<< | ( | std::ostream & | out, |
const ColorSpinorField & | a | ||
) |
Definition at line 865 of file color_spinor_field.cpp.
std::ostream& quda::operator<< | ( | std::ostream & | out, |
const cudaColorSpinorField & | a | ||
) |
Definition at line 1199 of file cuda_color_spinor_field.cpp.
std::ostream & quda::operator<< | ( | std::ostream & | output, |
const CloverFieldParam & | param | ||
) |
Definition at line 441 of file clover_field.cpp.
|
inline |
Definition at line 299 of file float_vector.h.
|
inline |
Definition at line 305 of file float_vector.h.
|
inline |
Definition at line 311 of file float_vector.h.
std::ostream & quda::operator<< | ( | std::ostream & | output, |
const GaugeFieldParam & | param | ||
) |
Definition at line 274 of file gauge_field.cpp.
std::ostream & quda::operator<< | ( | std::ostream & | output, |
const LatticeFieldParam & | param | ||
) |
Definition at line 727 of file lattice_field.cpp.
std::ostream& quda::operator<< | ( | std::ostream & | output, |
const vector_type< T, n > & | a | ||
) |
Definition at line 400 of file float_vector.h.
|
inline |
Definition at line 1004 of file complex_quda.h.
|
inline |
Definition at line 1021 of file complex_quda.h.
|
inline |
Definition at line 1013 of file complex_quda.h.
Definition at line 66 of file comm_key.h.
std::basic_istream< charT, traits > & quda::operator>> | ( | std::basic_istream< charT, traits > & | is, |
complex< ValueType > & | z | ||
) |
Definition at line 318 of file complex_quda.h.
|
inline |
Helper function for determining if the order of the fields is the same.
[in] | a | Input field |
[in] | b | Input field |
Definition at line 1174 of file color_spinor_field.h.
|
inline |
Helper function for determining if the order of the fields is the same.
[in] | a | Input field |
[in] | b | Input field |
[in] | args | List of additional fields to check order on |
Definition at line 1193 of file color_spinor_field.h.
void quda::orthoDir | ( | Complex ** | beta, |
std::vector< ColorSpinorField * > | Ap, | ||
int | k, | ||
int | pipeline | ||
) |
Definition at line 96 of file inv_gcr_quda.cpp.
|
inline |
Compute the outer product over color and take the spin trace out(j,i) = \sum_s a(s,j) * conj (b(s,i))
a | Left-hand side ColorSpinor |
b | Right-hand side ColorSpinor |
Definition at line 1035 of file color_spinor.h.
|
inline |
Compute the outer product over color and take the spin trace out(j,i) = \sum_s a(s,j) * conj (b(s,i))
a | Left-hand side ColorSpinor |
b | Right-hand side ColorSpinor |
Definition at line 1073 of file color_spinor.h.
void quda::OvrImpSTOUTStep | ( | GaugeField & | dataDs, |
GaugeField & | dataOr, | ||
double | rho, | ||
double | epsilon | ||
) |
Apply Over Improved STOUT smearing to the gauge field.
[out] | dataDs | Output smeared field |
[in] | dataOr | Input gauge field |
[in] | rho | smearing parameter |
[in] | epsilon | smearing parameter |
void quda::PackGhost | ( | void * | ghost[2 *QUDA_MAX_DIM], |
const ColorSpinorField & | field, | ||
MemoryLocation | location, | ||
int | nFace, | ||
bool | dagger, | ||
int | parity, | ||
bool | spin_project, | ||
double | a, | ||
double | b, | ||
double | c, | ||
int | shmem, | ||
const qudaStream_t & | stream | ||
) |
Dslash face packing routine.
[out] | ghost_buf | Array of packed halos, order is [2*dim+dir] |
[in] | field | ColorSpinorField to be packed |
[in] | location | Locations where the packed fields are (Device, Host and/or Remote) |
[in] | nFace | Depth of halo |
[in] | dagger | Whether this is for the dagger operator |
[in] | parity | Field parity |
[in] | spin_project | Whether to spin_project when packing |
[in] | a | Twisted mass scale factor (for preconditioned twisted-mass dagger operator) |
[in] | b | Twisted mass chiral twist factor (for preconditioned twisted-mass dagger operator) |
[in] | c | Twisted mass flavor twist factor (for preconditioned non degenerate twisted-mass dagger operator) |
[in] | stream | Which stream are we executing in |
|
inline |
Helper function for determining if the preconditioning type of the fields is the same.
[in] | a | Input field |
[in] | b | Input field |
Definition at line 1141 of file color_spinor_field.h.
|
inline |
Helper function for determining if the precision of the fields is the same.
[in] | a | Input field |
[in] | b | Input field |
[in] | args | List of additional fields to check precision on |
Definition at line 1160 of file color_spinor_field.h.
void quda::PGaugeExchange | ( | GaugeField & | data, |
const int | n_dim, | ||
const int | parity | ||
) |
Exchange "borders" between nodes. Although the radius border is 2, it only updates the interior radius border, i.e., at 1 and X[d-2] where X[d] already includes the Radius border, and don't update at 0 and X[d-1] faces.
[in,out] | data | Gauge field |
[in] | n_dim | Number of dimensions to exchange |
[in] | parity | Field parity |
void quda::PGaugeExchangeFree | ( | ) |
Release all allocated memory used to exchange data between nodes.
size_t quda::pinned_allocated | ( | ) |
Definition at line 71 of file malloc.cpp.
long quda::pinned_allocated_peak | ( | ) |
Definition at line 81 of file malloc.cpp.
void * quda::pinned_malloc_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
size_t | size | ||
) |
Allocate page-locked ("pinned") host memory. This function should only be called via the pinned_malloc() macro, defined in malloc_quda.h
Note that we do not rely on cudaHostAlloc(), since buffers allocated in this way have been observed to cause problems when shared with MPI via GPU Direct on some systems.
Definition at line 303 of file malloc.cpp.
double3 quda::plaquette | ( | const GaugeField & | U | ) |
Compute the plaquette of the gauge field.
[in] | U | The gauge field upon which to compute the plaquette |
|
inline |
Definition at line 1106 of file complex_quda.h.
|
inline |
Definition at line 1100 of file complex_quda.h.
|
inline |
Returns the complex with magnitude m and angle theta in radians.
Definition at line 1094 of file complex_quda.h.
bool quda::policyTuning | ( | ) |
void quda::popKernelPackT | ( | ) |
void quda::postTrace_ | ( | const char * | func, |
const char * | file, | ||
int | line | ||
) |
|
inline |
Definition at line 1186 of file complex_quda.h.
|
inline |
Definition at line 1204 of file complex_quda.h.
|
inline |
Definition at line 1180 of file complex_quda.h.
|
inline |
Definition at line 1198 of file complex_quda.h.
|
inline |
Definition at line 1192 of file complex_quda.h.
|
inline |
Definition at line 111 of file complex_quda.h.
|
inline |
Helper function for determining if the precision of the fields is the same.
[in] | a | Input field |
[in] | b | Input field |
Definition at line 768 of file lattice_field.h.
|
inline |
Helper function for determining if the precision of the fields is the same.
[in] | a | Input field |
[in] | b | Input field |
[in] | args | List of additional fields to check precision on |
Definition at line 785 of file lattice_field.h.
void quda::print | ( | const double | d[], |
int | n | ||
) |
Definition at line 44 of file inv_mpcg_quda.cpp.
void quda::printAPIProfile | ( | ) |
Print out the timer profile for CUDA API calls.
Definition at line 495 of file quda_api.cpp.
|
inline |
Definition at line 947 of file quda_matrix.h.
void quda::printPeakMemUsage | ( | ) |
Definition at line 539 of file malloc.cpp.
|
inlineconstexpr |
Definition at line 28 of file comm_key.h.
void quda::projectSU3 | ( | GaugeField & | U, |
double | tol, | ||
int * | fails | ||
) |
Project the input gauge field onto the SU(3) group. This is a destructive operation. The number of link failures is reported so appropriate action can be taken.
U | Gauge field that we are projecting onto SU(3) |
tol | Tolerance to which the iterative algorithm works |
fails | Number of link failures (device pointer) |
void quda::Prolongate | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const ColorSpinorField & | v, | ||
int | Nvec, | ||
const int * | fine_to_coarse, | ||
const int *const * | spin_map, | ||
int | parity = QUDA_INVALID_PARITY |
||
) |
Apply the prolongation operator.
[out] | out | Resulting fine grid field |
[in] | in | Input field on coarse grid |
[in] | v | Matrix field containing the null-space components |
[in] | Nvec | Number of null-space components |
[in] | fine_to_coarse | Fine-to-coarse lookup table (linear indices) |
[in] | spin_map | Spin blocking lookup table |
[in] | parity | of the output fine field (if single parity output field) |
void quda::pushKernelPackT | ( | bool | pack | ) |
void quda::qudaDeviceSynchronize_ | ( | const char * | func, |
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaDeviceSynchronize or cuDeviceSynchronize with built-in error checking.
Definition at line 464 of file quda_api.cpp.
bool quda::qudaEventQuery_ | ( | cudaEvent_t & | event, |
const char * | func, | ||
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaEventQuery or cuEventQuery with built-in error checking.
[in] | event | Event we are querying |
Definition at line 378 of file quda_api.cpp.
void quda::qudaEventRecord_ | ( | cudaEvent_t & | event, |
qudaStream_t | stream, | ||
const char * | func, | ||
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaEventRecord or cuEventRecord with built-in error checking.
[in,out] | event | Event we are recording |
[in,out] | stream | Stream where to record the event |
Definition at line 402 of file quda_api.cpp.
void quda::qudaEventSynchronize_ | ( | cudaEvent_t & | event, |
const char * | func, | ||
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaEventSynchronize or cuEventSynchronize with built-in error checking.
[in] | event | Event which we are synchronizing with respect to |
Definition at line 433 of file quda_api.cpp.
void quda::qudaFuncGetAttributes_ | ( | cudaFuncAttributes & | attr, |
const void * | kernel, | ||
const char * | func, | ||
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaFuncGetAttributes with built-in error checking.
[in] | attr | the cudaFuncGetAttributes object to store the output |
[in] | kernel | Kernel function for which we are setting the attribute |
Definition at line 487 of file quda_api.cpp.
void quda::qudaFuncSetAttribute_ | ( | const void * | kernel, |
cudaFuncAttribute | attr, | ||
int | value, | ||
const char * | func, | ||
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaFuncSetAttribute with built-in error checking.
[in] | kernel | Kernel function for which we are setting the attribute |
[in] | attr | Attribute to set |
[in] | value | Value to set |
Definition at line 479 of file quda_api.cpp.
qudaError_t quda::qudaLaunchKernel | ( | const void * | func, |
const TuneParam & | tp, | ||
void ** | args, | ||
qudaStream_t | stream | ||
) |
Wrapper around cudaLaunchKernel.
[in] | func | Device function symbol |
[in] | tp | TuneParam containing the launch parameters |
[in] | args | Arguments |
[in] | stream | Stream identifier |
Definition at line 57 of file quda_api.cpp.
qudaError_t quda::qudaLaunchKernel | ( | T * | func, |
const TuneParam & | tp, | ||
qudaStream_t | stream, | ||
const Arg &... | arg | ||
) |
Templated wrapper around qudaLaunchKernel which can accept a templated kernel, and expects a kernel with a single Arg argument.
[in] | func | Device function symbol |
[in] | tp | TuneParam containing the launch parameters |
[in] | args | Arguments |
[in] | stream | Stream identifier |
Definition at line 43 of file quda_api.h.
void quda::qudaMemcpy2D_ | ( | void * | dst, |
size_t | dpitch, | ||
const void * | src, | ||
size_t | spitch, | ||
size_t | width, | ||
size_t | height, | ||
cudaMemcpyKind | kind, | ||
const char * | func, | ||
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaMemcpy2DAsync or driver API equivalent.
[out] | dst | Destination pointer |
[in] | dpitch | Destination pitch in bytes |
[in] | src | Source pointer |
[in] | spitch | Source pitch in bytes |
[in] | width | Width in bytes |
[in] | height | Number of rows |
[in] | kind | Type of memory copy |
Definition at line 272 of file quda_api.cpp.
void quda::qudaMemcpy2DAsync_ | ( | void * | dst, |
size_t | dpitch, | ||
const void * | src, | ||
size_t | spitch, | ||
size_t | width, | ||
size_t | height, | ||
cudaMemcpyKind | kind, | ||
const qudaStream_t & | stream, | ||
const char * | func, | ||
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaMemcpy2DAsync or driver API equivalent.
[out] | dst | Destination pointer |
[in] | dpitch | Destination pitch in bytes |
[in] | src | Source pointer |
[in] | spitch | Source pitch in bytes |
[in] | width | Width in bytes |
[in] | height | Number of rows |
[in] | kind | Type of memory copy |
[in] | stream | Stream to issue copy |
Definition at line 301 of file quda_api.cpp.
void quda::qudaMemcpy_ | ( | void * | dst, |
const void * | src, | ||
size_t | count, | ||
cudaMemcpyKind | kind, | ||
const char * | func, | ||
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaMemcpy or driver API equivalent.
[out] | dst | Destination pointer |
[in] | src | Source pointer |
[in] | count | Size of transfer |
[in] | kind | Type of memory copy |
Definition at line 232 of file quda_api.cpp.
void quda::qudaMemcpyAsync_ | ( | void * | dst, |
const void * | src, | ||
size_t | count, | ||
cudaMemcpyKind | kind, | ||
const qudaStream_t & | stream, | ||
const char * | func, | ||
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaMemcpyAsync or driver API equivalent.
[out] | dst | Destination pointer |
[in] | src | Source pointer |
[in] | count | Size of transfer |
[in] | kind | Type of memory copy |
[in] | stream | Stream to issue copy |
Definition at line 241 of file quda_api.cpp.
void quda::qudaMemPrefetchAsync_ | ( | void * | ptr, |
size_t | count, | ||
QudaFieldLocation | mem_space, | ||
const qudaStream_t & | stream, | ||
const char * | func, | ||
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaMemPrefetchAsync or driver API equivalent.
[out] | ptr | Starting address pointer to be prefetched |
[in] | count | Size in bytes to prefetch |
[in] | mem_space | Memory space to prefetch to |
[in] | stream | Stream to issue prefetch |
Definition at line 363 of file quda_api.cpp.
void quda::qudaMemset2D_ | ( | void * | ptr, |
size_t | pitch, | ||
int | value, | ||
size_t | width, | ||
size_t | height, | ||
const char * | func, | ||
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaMemset2D or driver API equivalent.
[out] | ptr | Starting address pointer |
[in] | Pitch | in bytes |
[in] | value | Value to set for each byte of specified memory |
[in] | width | Width in bytes |
[in] | height | Height in bytes |
Definition at line 349 of file quda_api.cpp.
void quda::qudaMemset2DAsync_ | ( | void * | ptr, |
size_t | pitch, | ||
int | value, | ||
size_t | width, | ||
size_t | height, | ||
const qudaStream_t & | stream, | ||
const char * | func, | ||
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaMemsetAsync or driver API equivalent.
[out] | ptr | Starting address pointer |
[in] | Pitch | in bytes |
[in] | value | Value to set for each byte of specified memory |
[in] | width | Width in bytes |
[in] | height | Height in bytes |
[in] | stream | Stream to issue memset |
Definition at line 356 of file quda_api.cpp.
void quda::qudaMemset_ | ( | void * | ptr, |
int | value, | ||
size_t | count, | ||
const char * | func, | ||
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaMemset or driver API equivalent.
[out] | ptr | Starting address pointer |
[in] | value | Value to set for each byte of specified memory |
[in] | count | Size in bytes to set |
Definition at line 331 of file quda_api.cpp.
void quda::qudaMemsetAsync_ | ( | void * | ptr, |
int | value, | ||
size_t | count, | ||
const qudaStream_t & | stream, | ||
const char * | func, | ||
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaMemsetAsync or driver API equivalent.
[out] | ptr | Starting address pointer |
[in] | value | Value to set for each byte of specified memory |
[in] | count | Size in bytes to set |
[in] | stream | Stream to issue memset |
Definition at line 340 of file quda_api.cpp.
void quda::qudaStreamSynchronize_ | ( | qudaStream_t & | stream, |
const char * | func, | ||
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaStreamSynchronize or cuStreamSynchronize with built-in error checking.
[in] | stream | Stream which we are synchronizing |
Definition at line 448 of file quda_api.cpp.
void quda::qudaStreamWaitEvent_ | ( | qudaStream_t | stream, |
cudaEvent_t | event, | ||
unsigned int | flags, | ||
const char * | func, | ||
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaStreamWaitEvent or cuStreamWaitEvent with built-in error checking.
[in,out] | stream | Stream which we are instructing to wait |
[in] | event | Event we are waiting on |
[in] | flags | Flags to pass to function |
Definition at line 417 of file quda_api.cpp.
|
inlineconstexpr |
Definition at line 82 of file malloc_quda.h.
|
inline |
Return a random number between 0 and 1.
state | curand rng state |
Definition at line 96 of file random_quda.h.
|
inline |
Return a random number between a and b.
state | curand rng state |
a | lower range |
b | upper range |
Definition at line 75 of file random_quda.h.
|
inline |
Definition at line 107 of file random_quda.h.
|
inline |
Definition at line 86 of file random_quda.h.
|
inline |
Definition at line 102 of file random_quda.h.
|
inline |
Definition at line 81 of file random_quda.h.
|
inline |
Helper function for determining if the reconstruct of the fields is the same.
[in] | a | Input field |
[in] | b | Input field |
Definition at line 850 of file gauge_field.h.
|
inline |
Helper function for determining if the reconstruct of the fields is the same.
[in] | a | Input field |
[in] | b | Input field |
[in] | args | List of additional fields to check reconstruct on |
Definition at line 869 of file gauge_field.h.
reduce_t quda::reduce | ( | QudaFieldLocation | location, |
const T * | v, | ||
I | n_items, | ||
reduce_t | init, | ||
reducer | r | ||
) |
QUDA implementation providing thrust::reduce like functionality. Improves upon thrust's implementation since a single kernel is used which writes the result directly to host memory.
[in] | location | Location where the reduction will take place |
[out] | result | Result |
[in] | v | Input vector |
[in] | n_items | Number of elements to be reduced |
[in] | init | Result is initialized to this value |
[in] | reducer | Functor that applies the reduction to each transformed element |
Definition at line 240 of file transform_reduce.h.
void quda::reduce | ( | QudaFieldLocation | location, |
std::vector< reduce_t > & | result, | ||
const std::vector< T * > & | v, | ||
I | n_items, | ||
reduce_t | init, | ||
reducer | r | ||
) |
QUDA implementation providing thrust::reduce like functionality. Improves upon thrust's implementation since a single kernel is used which writes the result directly to host memory, and is a batched implementation.
[in] | location | Location where the reduction will take place |
[out] | result | Result |
[in] | v | Input vector |
[in] | n_items | Number of elements to be reduced |
[in] | init | The results are initialized to this value |
[in] | reducer | Functor that applies the reduction to each transformed element |
Definition at line 221 of file transform_reduce.h.
int quda::reliable | ( | double & | rNorm, |
double & | maxrx, | ||
double & | maxrr, | ||
const double & | r2, | ||
const double & | delta | ||
) |
Definition at line 39 of file inv_bicgstab_quda.cpp.
QudaFieldLocation quda::reorder_location | ( | ) |
Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION.
Definition at line 748 of file lattice_field.cpp.
void quda::reorder_location_set | ( | QudaFieldLocation | reorder_location_ | ) |
Set whether data is reorderd on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION.
reorder_location_ | The location to set where data will be reordered |
Definition at line 749 of file lattice_field.cpp.
void quda::Restrict | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const ColorSpinorField & | v, | ||
int | Nvec, | ||
const int * | fine_to_coarse, | ||
const int * | coarse_to_fine, | ||
const int *const * | spin_map, | ||
int | parity = QUDA_INVALID_PARITY |
||
) |
Apply the restriction operator.
[out] | out | Resulting coarsened field |
[in] | in | Input field on fine grid |
[in] | v | Matrix field containing the null-space components |
[in] | Nvec | Number of null-space components |
[in] | fine_to_coarse | Fine-to-coarse lookup table (linear indices) |
[in] | spin_map | Spin blocking lookup table |
[in] | parity | of the input fine field (if single parity input field) |
void * quda::safe_malloc_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
size_t | size | ||
) |
Perform a standard malloc() with error-checking. This function should only be called via the safe_malloc() macro, defined in malloc_quda.h
Definition at line 280 of file malloc.cpp.
void quda::saveProfile | ( | const std::string | label = "" | ) |
void quda::saveTuneCache | ( | bool | error | ) |
void quda::setDiracEigParam | ( | DiracParam & | diracParam, |
QudaInvertParam * | inv_param, | ||
const bool | pc, | ||
bool | comms | ||
) |
Definition at line 1758 of file interface_quda.cpp.
void quda::setDiracParam | ( | DiracParam & | diracParam, |
QudaInvertParam * | inv_param, | ||
bool | pc | ||
) |
Definition at line 1570 of file interface_quda.cpp.
void quda::setDiracPreParam | ( | DiracParam & | diracParam, |
QudaInvertParam * | inv_param, | ||
const bool | pc, | ||
bool | comms | ||
) |
Definition at line 1726 of file interface_quda.cpp.
void quda::setDiracRefineParam | ( | DiracParam & | diracParam, |
QudaInvertParam * | inv_param, | ||
const bool | pc | ||
) |
Definition at line 1707 of file interface_quda.cpp.
void quda::setDiracSloppyParam | ( | DiracParam & | diracParam, |
QudaInvertParam * | inv_param, | ||
bool | pc | ||
) |
Definition at line 1689 of file interface_quda.cpp.
|
inline |
Definition at line 677 of file quda_matrix.h.
|
inline |
Definition at line 662 of file quda_matrix.h.
|
inline |
Definition at line 647 of file quda_matrix.h.
void quda::setKernelPackT | ( | bool | pack | ) |
pack | Sets whether to use a kernel to pack the T dimension |
void quda::setPackComms | ( | const int * | dim_pack | ) |
Helper function that sets which dimensions the packing kernel should be packing for.
[in] | dim_pack | Array that specifies which dimenstions need to be packed. |
void quda::setPolicyTuning | ( | bool | policy_tuning_ | ) |
void quda::setTransferGPU | ( | bool | ) |
void quda::setUberTuning | ( | bool | uber_tuning_ | ) |
void quda::setUnitarizeLinksConstants | ( | double | unitarize_eps, |
double | max_error, | ||
bool | allow_svd, | ||
bool | svd_only, | ||
double | svd_rel_error, | ||
double | svd_abs_error | ||
) |
|
inline |
Definition at line 721 of file quda_matrix.h.
|
inline |
Definition at line 707 of file quda_matrix.h.
|
inline |
Definition at line 693 of file quda_matrix.h.
Definition at line 1218 of file complex_quda.h.
|
inline |
Definition at line 1210 of file complex_quda.h.
|
inline |
Definition at line 51 of file complex_quda.h.
Definition at line 1234 of file complex_quda.h.
|
inline |
Definition at line 1226 of file complex_quda.h.
|
inline |
Definition at line 86 of file complex_quda.h.
void quda::spinorNoise | ( | ColorSpinorField & | src, |
RNG & | randstates, | ||
QudaNoiseType | type | ||
) |
Generate a random noise spinor. This variant allows the user to manage the RNG state.
src | The colorspinorfield |
randstates | Random state |
type | The type of noise to create (QUDA_NOISE_GAUSSIAN or QUDA_NOISE_UNIFORM) |
void quda::spinorNoise | ( | ColorSpinorField & | src, |
unsigned long long | seed, | ||
QudaNoiseType | type | ||
) |
Generate a random noise spinor. This variant just requires a seed and will create and destroy the random number state.
src | The colorspinorfield |
seed | Seed |
type | The type of noise to create (QUDA_NOISE_GAUSSIAN or QUDA_NOISE_UNIFORM) |
|
inline |
The term partition in the variable names and comments can mean two things:
In this file the term *_dim and *_idx are all arrays of 4 int's - one can simplify them as 1d-int to understand things and the extension to 4d is trivial.
Definition at line 17 of file split_grid.h.
Definition at line 1248 of file complex_quda.h.
|
inline |
Definition at line 1242 of file complex_quda.h.
|
inline |
Definition at line 120 of file complex_quda.h.
void quda::StaggeredCoarseOp | ( | GaugeField & | Y, |
GaugeField & | X, | ||
const Transfer & | T, | ||
const cudaGaugeField & | gauge, | ||
const cudaGaugeField * | XinvKD, | ||
double | mass, | ||
QudaDiracType | dirac, | ||
QudaMatPCType | matpc | ||
) |
Coarse operator construction from a fine-grid operator (Staggered)
Y[out] | Coarse link field |
X[out] | Coarse clover field |
T[in] | Transfer operator that defines the coarse space |
gauge[in] | Gauge field from fine grid, needs to be generalized for long link. |
XinvKD[in] | Inverse Kahler-Dirac block |
mass[in] | Mass parameter |
matpc[in] | The type of even-odd preconditioned fine-grid operator we are constructing the coarse grid operator from. For staggered, should always be QUDA_MATPC_INVALID. |
void quda::StaggeredProlongate | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const int * | fine_to_coarse, | ||
const int *const * | spin_map, | ||
int | parity = QUDA_INVALID_PARITY |
||
) |
Apply the unitary "prolongation" operator for Kahler-Dirac preconditioning.
[out] | out | Resulting fine grid field |
[in] | in | Input field on coarse grid |
[in] | fine_to_coarse | Fine-to-coarse lookup table (linear indices) |
[in] | spin_map | Spin blocking lookup table |
[in] | parity | of the output fine field (if single parity output field) |
void quda::StaggeredRestrict | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const int * | fine_to_coarse, | ||
const int *const * | spin_map, | ||
int | parity = QUDA_INVALID_PARITY |
||
) |
Apply the unitary "restriction" operator for Kahler-Dirac preconditioning.
[out] | out | Resulting coarse grid field |
[in] | in | Input field on fine grid |
[in] | fine_to_coarse | Fine-to-coarse lookup table (linear indices) |
[in] | spin_map | Spin blocking lookup table |
[in] | parity | of the output fine field (if single parity output field) |
|
inline |
Definition at line 88 of file inline_ptx.h.
|
inline |
Definition at line 93 of file inline_ptx.h.
|
inline |
Definition at line 78 of file inline_ptx.h.
|
inline |
Definition at line 98 of file inline_ptx.h.
|
inline |
Definition at line 83 of file inline_ptx.h.
void quda::STOUTStep | ( | GaugeField & | dataDs, |
GaugeField & | dataOr, | ||
double | rho | ||
) |
Apply STOUT smearing to the gauge field.
[out] | dataDs | Output smeared field |
[in] | dataOr | Input gauge field |
[in] | rho | smearing parameter |
|
inlineconstexpr |
Definition at line 80 of file malloc_quda.h.
|
inlineconstexpr |
Definition at line 81 of file malloc_quda.h.
|
inline |
Definition at line 925 of file quda_matrix.h.
|
inline |
Definition at line 1254 of file complex_quda.h.
|
inline |
Definition at line 56 of file complex_quda.h.
|
inline |
Definition at line 1260 of file complex_quda.h.
|
inline |
Definition at line 91 of file complex_quda.h.
|
constexpr |
The termination value we use to prevent a possible hang in case the computed reduction is equal to the initialization.
Definition at line 44 of file reduce_helper.h.
double quda::timeInterval | ( | struct timeval | start, |
struct timeval | end | ||
) |
Definition at line 18 of file inv_gcr_quda.cpp.
void quda::transform_reduce | ( | Arg & | arg | ) |
Definition at line 58 of file transform_reduce.h.
reduce_t quda::transform_reduce | ( | QudaFieldLocation | location, |
const T * | v, | ||
I | n_items, | ||
transformer | h, | ||
reduce_t | init, | ||
reducer | r | ||
) |
QUDA implementation providing thrust::transform_reduce like functionality. Improves upon thrust's implementation since a single kernel is used which writes the result directly to host memory.
[in] | location | Location where the reduction will take place |
[out] | result | Result |
[in] | v | Input vector |
[in] | n_items | Number of elements to be reduced |
[in] | transformer | Functor that applies transform to each element |
[in] | init | Results is initialized to this value |
[in] | reducer | Functor that applies the reduction to each transformed element |
Definition at line 200 of file transform_reduce.h.
void quda::transform_reduce | ( | QudaFieldLocation | location, |
std::vector< reduce_t > & | result, | ||
const std::vector< T * > & | v, | ||
I | n_items, | ||
transformer | h, | ||
reduce_t | init, | ||
reducer | r | ||
) |
QUDA implementation providing thrust::transform_reduce like functionality. Improves upon thrust's implementation since a single kernel is used which writes the result directly to host memory, and is a batched implementation.
[in] | location | Location where the reduction will take place |
[out] | result | Vector of results |
[in] | v | Vector of inputs |
[in] | n_items | Number of elements to be reduced in each input |
[in] | transformer | Functor that applies transform to each element |
[in] | init | The results are initialized to this value |
[in] | reducer | Functor that applies the reduction to each transformed element |
Definition at line 178 of file transform_reduce.h.
TuneParam quda::tuneLaunch | ( | Tunable & | tunable, |
QudaTune | enabled, | ||
QudaVerbosity | verbosity | ||
) |
|
inline |
Definition at line 45 of file uint_to_char.h.
|
inline |
Definition at line 127 of file uint_to_char.h.
bool quda::uberTuning | ( | ) |
void quda::unitarizeLinks | ( | GaugeField & | outfield, |
const GaugeField & | infield, | ||
int * | fails | ||
) |
void quda::unitarizeLinks | ( | GaugeField & | outfield, |
int * | fails | ||
) |
void quda::unitarizeLinksCPU | ( | GaugeField & | outfield, |
const GaugeField & | infield | ||
) |
void quda::updateAlphaZeta | ( | double * | alpha, |
double * | zeta, | ||
double * | zeta_old, | ||
const double * | r2, | ||
const double * | beta, | ||
const double | pAp, | ||
const double * | offset, | ||
const int | nShift, | ||
const int | j_low | ||
) |
Compute the new values of alpha and zeta
Definition at line 126 of file inv_multi_cg_quda.cpp.
void quda::updateAp | ( | Complex ** | beta, |
std::vector< ColorSpinorField * > | Ap, | ||
int | begin, | ||
int | size, | ||
int | k | ||
) |
Definition at line 83 of file inv_gcr_quda.cpp.
void quda::updateGaugeField | ( | GaugeField & | out, |
double | dt, | ||
const GaugeField & | in, | ||
const GaugeField & | mom, | ||
bool | conj_mom, | ||
bool | exact | ||
) |
Evolve the gauge field by step size dt using the momentuim field
out | Updated gauge field |
dt | Step size |
in | Input gauge field |
mom | Momentum field |
conj_mom | Whether we conjugate the momentum in the exponential |
exact | Calculate exact exponential or use an expansion |
void quda::updateMomentum | ( | GaugeField & | mom, |
double | coeff, | ||
GaugeField & | force, | ||
const char * | fname | ||
) |
Update the momentum field from the force field
mom = mom - coeff * [force]_TA
where [A]_TA means the traceless anti-hermitian projection of A
mom | Momentum field |
coeff | Integration stepsize |
force | Force field |
func | The function calling this (fname will be printed if force monitoring is enabled) |
void quda::updateSolution | ( | ColorSpinorField & | x, |
const Complex * | alpha, | ||
Complex **const | beta, | ||
double * | gamma, | ||
int | k, | ||
std::vector< ColorSpinorField * > | p | ||
) |
Definition at line 146 of file inv_gcr_quda.cpp.
bool quda::use_managed_memory | ( | ) |
Definition at line 178 of file malloc.cpp.
|
inline |
Definition at line 494 of file register_traits.h.
|
inline |
Definition at line 503 of file register_traits.h.
|
inline |
Definition at line 580 of file register_traits.h.
|
inline |
Definition at line 571 of file register_traits.h.
|
inline |
Definition at line 597 of file register_traits.h.
|
inline |
Definition at line 525 of file register_traits.h.
|
inline |
Definition at line 543 of file register_traits.h.
|
inline |
Definition at line 534 of file register_traits.h.
|
inline |
Definition at line 561 of file register_traits.h.
|
inline |
Definition at line 552 of file register_traits.h.
|
inline |
Definition at line 588 of file register_traits.h.
|
inline |
Definition at line 520 of file register_traits.h.
void quda::WFlowStep | ( | GaugeField & | out, |
GaugeField & | temp, | ||
GaugeField & | in, | ||
double | epsilon, | ||
QudaWFlowType | wflow_type | ||
) |
Apply Wilson Flow steps W1, W2, Vt to the gauge field. This routine assumes that the input and output fields are extended, with the input field being exchanged prior to calling this function. On exit from this routine, the output field will have been exchanged.
[out] | dataDs | Output smeared field |
[in] | dataTemp | Temp space |
[in] | dataOr | Input gauge field |
[in] | epsilon | Step size |
[in] | wflow_type | Wilson (1x1) or Symanzik improved (2x1) staples |
|
inline |
Definition at line 359 of file float_vector.h.
|
inline |
Definition at line 318 of file float_vector.h.
|
inline |
Definition at line 319 of file float_vector.h.
|
inline |
Definition at line 324 of file float_vector.h.
|
inline |
Definition at line 330 of file float_vector.h.
|
inline |
Definition at line 338 of file float_vector.h.
|
inline |
Definition at line 339 of file float_vector.h.
|
inline |
Definition at line 344 of file float_vector.h.
|
inline |
Definition at line 350 of file float_vector.h.
|
inline |
Definition at line 358 of file float_vector.h.
|
inline |
Definition at line 408 of file float_vector.h.
const int quda::Nstream = 9 |
Definition at line 137 of file quda_internal.h.
qudaStream_t* quda::stream |
Definition at line 644 of file cuda_color_spinor_field.cpp.