QUDA
1.0.0
|
Namespaces | |
blas | |
clover | |
colorspinor | |
cublas | |
dslash | |
fermion_force | |
gauge | |
linalg | |
pool | |
Typedefs | |
typedef std::vector< ColorSpinorField * > | CompositeColorSpinorField |
using | ColorSpinorFieldSet = ColorSpinorField |
typedef int | storeType |
typedef std::complex< double > | Complex |
typedef struct curandStateMRG32k3a | cuRNGState |
using | DynamicStride = Stride< Dynamic, Dynamic > |
using | DenseMatrix = MatrixXcd |
using | VectorSet = MatrixXcd |
using | Vector = VectorXcd |
using | RealVector = VectorXd |
using | RowMajorDenseMatrix = Matrix< Complex, Dynamic, Dynamic, RowMajor > |
typedef std::map< TuneKey, TuneParam > | map |
Functions | |
void | checkSpinor (const ColorSpinorField &a, const ColorSpinorField &b) |
void | checkLength (const ColorSpinorField &a, const ColorSpinorField &b) |
__host__ __device__ double | set (double &x) |
__host__ __device__ double2 | set (double2 &x) |
__host__ __device__ double3 | set (double3 &x) |
__host__ __device__ double4 | set (double4 &x) |
__host__ __device__ void | sum (double &a, double &b) |
__host__ __device__ void | sum (double2 &a, double2 &b) |
__host__ __device__ void | sum (double3 &a, double3 &b) |
__host__ __device__ void | sum (double4 &a, double4 &b) |
std::ostream & | operator<< (std::ostream &output, const CloverFieldParam ¶m) |
double | norm1 (const CloverField &u, bool inverse=false) |
double | norm2 (const CloverField &a, bool inverse=false) |
void | computeClover (CloverField &clover, const GaugeField &gauge, double coeff, QudaFieldLocation location) |
void | copyGenericClover (CloverField &out, const CloverField &in, bool inverse, QudaFieldLocation location, void *Out=0, void *In=0, void *outNorm=0, void *inNorm=0) |
This generic function is used for copying the clover field where in the input and output can be in any order and location. More... | |
void | cloverInvert (CloverField &clover, bool computeTraceLog) |
This function compute the Cholesky decomposition of each clover matrix and stores the clover inverse field. More... | |
void | cloverRho (CloverField &clover, double rho) |
This function adds a real scalar onto the clover diagonal (only to the direct field not the inverse) More... | |
void | computeCloverForce (GaugeField &force, const GaugeField &U, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &p, std::vector< double > &coeff) |
Compute the force contribution from the solver solution fields. More... | |
void | computeCloverSigmaOprod (GaugeField &oprod, std::vector< ColorSpinorField *> &x, std::vector< ColorSpinorField *> &p, std::vector< std::vector< double > > &coeff) |
Compute the outer product from the solver solution fields arising from the diagonal term of the fermion bilinear in direction mu,nu and sum to outer product field. More... | |
void | computeCloverSigmaTrace (GaugeField &output, const CloverField &clover, double coeff) |
Compute the matrix tensor field necessary for the force calculation from the clover trace action. This computes a tensor field [mu,nu]. More... | |
void | cloverDerivative (cudaGaugeField &force, cudaGaugeField &gauge, cudaGaugeField &oprod, double coeff, QudaParity parity) |
Compute the derivative of the clover matrix in the direction mu,nu and compute the resulting force given the outer-product field. More... | |
template<typename Float , int Nc, int Ns> | |
__device__ __host__ complex< Float > | innerProduct (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b) |
Compute the inner product over color and spin dot = ,c conj(a(s,c)) * b(s,c) More... | |
template<typename Float , int Nc, int Ns> | |
__device__ __host__ complex< Float > | innerProduct (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b, int s) |
template<typename Float , int Nc, int Ns> | |
__device__ __host__ complex< Float > | innerProduct (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b, int sa, int sb) |
template<typename Float , int Nc, int Ns> | |
__device__ __host__ complex< Float > | innerProduct (const ColorSpinor< Float, Nc, 1 > &a, const ColorSpinor< Float, Nc, Ns > &b, int s) |
Compute the inner product over color at spin s between a color vector and a color spinor dot = conj(a(c)) * b(s,c) More... | |
template<typename Float , int Nc, int Ns> | |
__device__ __host__ Matrix< complex< Float >, Nc > | outerProdSpinTrace (const ColorSpinor< Float, Nc, Ns > &a, const ColorSpinor< Float, Nc, Ns > &b) |
template<typename Float , int Nc, int Ns> | |
__device__ __host__ ColorSpinor< Float, Nc, Ns > | operator+ (const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y) |
ColorSpinor addition operator. More... | |
template<typename Float , int Nc, int Ns> | |
__device__ __host__ ColorSpinor< Float, Nc, Ns > | operator- (const ColorSpinor< Float, Nc, Ns > &x, const ColorSpinor< Float, Nc, Ns > &y) |
ColorSpinor subtraction operator. More... | |
template<typename Float , int Nc, int Ns, typename S > | |
__device__ __host__ ColorSpinor< Float, Nc, Ns > | operator* (const S &a, const ColorSpinor< Float, Nc, Ns > &x) |
Compute the scalar-vector product y = a * x. More... | |
template<typename Float , int Nc, int Ns> | |
__device__ __host__ ColorSpinor< Float, Nc, Ns > | operator* (const Matrix< complex< Float >, Nc > &A, const ColorSpinor< Float, Nc, Ns > &x) |
Compute the matrix-vector product y = A * x. More... | |
template<typename Float , int Nc, int Ns> | |
__device__ __host__ ColorSpinor< Float, Nc, Ns > | operator* (const HMatrix< Float, Nc *Ns > &A, const ColorSpinor< Float, Nc, Ns > &x) |
Compute the matrix-vector product y = A * x. More... | |
void | copyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, void *Dst=0, void *Src=0, void *dstNorm=0, void *srcNorm=0) |
void | genericSource (cpuColorSpinorField &a, QudaSourceType sourceType, int x, int s, int c) |
int | genericCompare (const cpuColorSpinorField &a, const cpuColorSpinorField &b, int tol) |
void | genericPrintVector (const cpuColorSpinorField &a, unsigned int x) |
void | genericCudaPrintVector (const cudaColorSpinorField &a, unsigned x) |
void | wuppertalStep (ColorSpinorField &out, const ColorSpinorField &in, int parity, const GaugeField &U, double A, double B) |
void | wuppertalStep (ColorSpinorField &out, const ColorSpinorField &in, int parity, const GaugeField &U, double alpha) |
void | exchangeExtendedGhost (cudaColorSpinorField *spinor, int R[], int parity, cudaStream_t *stream_p) |
void | copyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, const int parity, void *Dst, void *Src, void *dstNorm, void *srcNorm) |
void | genericPackGhost (void **ghost, const ColorSpinorField &a, QudaParity parity, int nFace, int dagger, MemoryLocation *destination=nullptr) |
Generic ghost packing routine. More... | |
void | spinorNoise (ColorSpinorField &src, RNG &randstates, QudaNoiseType type) |
Generate a random noise spinor. This variant allows the user to manage the RNG state. More... | |
void | spinorNoise (ColorSpinorField &src, unsigned long long seed, QudaNoiseType type) |
Generate a random noise spinor. This variant just requires a seed and will create and destroy the random number state. More... | |
QudaPCType | PCType_ (const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b) |
Helper function for determining if the preconditioning type of the fields is the same. More... | |
template<typename... Args> | |
QudaPCType | PCType_ (const char *func, const char *file, int line, const ColorSpinorField &a, const ColorSpinorField &b, const Args &... args) |
Helper function for determining if the precision of the fields is the same. More... | |
template<typename ValueType > | |
__host__ __device__ ValueType | cos (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | sin (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | tan (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | acos (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | asin (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | atan (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | atan2 (ValueType x, ValueType y) |
template<typename ValueType > | |
__host__ __device__ ValueType | cosh (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | sinh (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | tanh (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | exp (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | log (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | log10 (ValueType x) |
template<typename ValueType , typename ExponentType > | |
__host__ __device__ ValueType | pow (ValueType x, ExponentType e) |
template<typename ValueType > | |
__host__ __device__ ValueType | sqrt (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | abs (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | conj (ValueType x) |
template<typename ValueType > | |
__host__ __device__ ValueType | abs (const complex< ValueType > &z) |
Returns the magnitude of z. More... | |
template<typename ValueType > | |
__host__ __device__ ValueType | arg (const complex< ValueType > &z) |
Returns the phase angle of z. More... | |
template<typename ValueType > | |
__host__ __device__ ValueType | norm (const complex< ValueType > &z) |
Returns the magnitude of z squared. More... | |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | conj (const complex< ValueType > &z) |
Returns the complex conjugate of z. More... | |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | polar (const ValueType &m, const ValueType &theta=0) |
Returns the complex with magnitude m and angle theta in radians. More... | |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator* (const complex< ValueType > &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator* (const complex< ValueType > &lhs, const ValueType &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator* (const ValueType &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator/ (const complex< ValueType > &lhs, const complex< ValueType > &rhs) |
template<> | |
__host__ __device__ complex< float > | operator/ (const complex< float > &lhs, const complex< float > &rhs) |
template<> | |
__host__ __device__ complex< double > | operator/ (const complex< double > &lhs, const complex< double > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator+ (const complex< ValueType > &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator+ (const complex< ValueType > &lhs, const ValueType &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator+ (const ValueType &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator- (const complex< ValueType > &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator- (const complex< ValueType > &lhs, const ValueType &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator- (const ValueType &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator+ (const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator- (const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | cos (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | cosh (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | exp (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | log (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | log10 (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | pow (const complex< ValueType > &z, const int &n) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | pow (const complex< ValueType > &z, const ValueType &x) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | pow (const complex< ValueType > &z, const complex< ValueType > &z2) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | pow (const ValueType &x, const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | sin (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | sinh (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | sqrt (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | tan (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | tanh (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | acos (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | asin (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | atan (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | acosh (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | asinh (const complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | atanh (const complex< ValueType > &z) |
template<typename ValueType , class charT , class traits > | |
std::basic_ostream< charT, traits > & | operator<< (std::basic_ostream< charT, traits > &os, const complex< ValueType > &z) |
template<typename ValueType , typename charT , class traits > | |
std::basic_istream< charT, traits > & | operator>> (std::basic_istream< charT, traits > &is, complex< ValueType > &z) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator+ (const volatile complex< ValueType > &lhs, const volatile complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator/ (const complex< ValueType > &lhs, const ValueType &rhs) |
template<typename ValueType > | |
__host__ __device__ complex< ValueType > | operator/ (const ValueType &lhs, const complex< ValueType > &rhs) |
template<> | |
__host__ __device__ complex< float > | operator/ (const float &lhs, const complex< float > &rhs) |
template<> | |
__host__ __device__ complex< double > | operator/ (const double &lhs, const complex< double > &rhs) |
template<typename ValueType > | |
__host__ __device__ bool | operator== (const complex< ValueType > &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ bool | operator== (const ValueType &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ bool | operator== (const complex< ValueType > &lhs, const ValueType &rhs) |
template<typename ValueType > | |
__host__ __device__ bool | operator!= (const complex< ValueType > &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ bool | operator!= (const ValueType &lhs, const complex< ValueType > &rhs) |
template<typename ValueType > | |
__host__ __device__ bool | operator!= (const complex< ValueType > &lhs, const ValueType &rhs) |
template<> | |
__host__ __device__ float | abs (const complex< float > &z) |
template<> | |
__host__ __device__ double | abs (const complex< double > &z) |
template<> | |
__host__ __device__ float | arg (const complex< float > &z) |
template<> | |
__host__ __device__ double | arg (const complex< double > &z) |
template<> | |
__host__ __device__ complex< float > | polar (const float &magnitude, const float &angle) |
template<> | |
__host__ __device__ complex< double > | polar (const double &magnitude, const double &angle) |
template<> | |
__host__ __device__ complex< float > | cos (const complex< float > &z) |
template<> | |
__host__ __device__ complex< float > | cosh (const complex< float > &z) |
template<> | |
__host__ __device__ complex< float > | exp (const complex< float > &z) |
template<> | |
__host__ __device__ complex< float > | log (const complex< float > &z) |
template<> | |
__host__ __device__ complex< float > | pow (const float &x, const complex< float > &exponent) |
template<> | |
__host__ __device__ complex< float > | sin (const complex< float > &z) |
template<> | |
__host__ __device__ complex< float > | sinh (const complex< float > &z) |
template<> | |
__host__ __device__ complex< float > | sqrt (const complex< float > &z) |
template<typename ValueType > | |
__host__ __device__ complex< float > | atanh (const complex< float > &z) |
template<typename real > | |
__host__ __device__ complex< real > | cmul (const complex< real > &x, const complex< real > &y) |
template<typename real > | |
__host__ __device__ complex< real > | cmac (const complex< real > &x, const complex< real > &y, const complex< real > &z) |
void | contractQuda (const ColorSpinorField &x, const ColorSpinorField &y, void *result, QudaContractType cType) |
template<typename type > | |
int | vecLength () |
template<> | |
int | vecLength< char > () |
template<> | |
int | vecLength< short > () |
template<> | |
int | vecLength< float > () |
template<> | |
int | vecLength< double > () |
template<> | |
int | vecLength< char2 > () |
template<> | |
int | vecLength< short2 > () |
template<> | |
int | vecLength< float2 > () |
template<> | |
int | vecLength< double2 > () |
template<> | |
int | vecLength< char4 > () |
template<> | |
int | vecLength< short4 > () |
template<> | |
int | vecLength< float4 > () |
template<> | |
int | vecLength< double4 > () |
__host__ __device__ float | s2f (short a) |
__host__ __device__ double | s2d (short a) |
__host__ __device__ float | c2f (char a) |
__host__ __device__ double | c2d (char a) |
__host__ __device__ float | s2f (short a, float c) |
__host__ __device__ double | s2d (short a, double c) |
__host__ __device__ float | c2f (char a, float c) |
__host__ __device__ double | c2d (char a, double c) |
template<typename FloatN > | |
__device__ void | copyFloatN (FloatN &a, const FloatN &b) |
__device__ void | copyFloatN (float2 &a, const char2 &b) |
__device__ void | copyFloatN (float4 &a, const char4 &b) |
__device__ void | copyFloatN (double2 &a, const char2 &b) |
__device__ void | copyFloatN (double4 &a, const char4 &b) |
__device__ void | copyFloatN (float2 &a, const short2 &b) |
__device__ void | copyFloatN (float4 &a, const short4 &b) |
__device__ void | copyFloatN (double2 &a, const short2 &b) |
__device__ void | copyFloatN (double4 &a, const short4 &b) |
__device__ void | copyFloatN (float2 &a, const double2 &b) |
__device__ void | copyFloatN (double2 &a, const float2 &b) |
__device__ void | copyFloatN (float4 &a, const double4 &b) |
__device__ void | copyFloatN (double4 &a, const float4 &b) |
__device__ __host__ int | f2i (float f) |
__device__ __host__ int | d2i (double d) |
__device__ void | copyFloatN (short2 &a, const float2 &b) |
__device__ void | copyFloatN (short4 &a, const float4 &b) |
__device__ void | copyFloatN (short2 &a, const double2 &b) |
__device__ void | copyFloatN (short4 &a, const double4 &b) |
__device__ void | copyFloatN (char2 &a, const float2 &b) |
__device__ void | copyFloatN (char4 &a, const float4 &b) |
__device__ void | copyFloatN (char2 &a, const double2 &b) |
__device__ void | copyFloatN (char4 &a, const double4 &b) |
template<typename OutputType , typename InputType > | |
__device__ void | convert (OutputType x[], InputType y[], const int N) |
template<> | |
__device__ void | convert< float2, short2 > (float2 x[], short2 y[], const int N) |
template<> | |
__device__ void | convert< float4, short4 > (float4 x[], short4 y[], const int N) |
template<> | |
__device__ void | convert< double4, double2 > (double4 x[], double2 y[], const int N) |
template<> | |
__device__ void | convert< double2, double4 > (double2 x[], double4 y[], const int N) |
template<> | |
__device__ void | convert< float4, float2 > (float4 x[], float2 y[], const int N) |
template<> | |
__device__ void | convert< float2, float4 > (float2 x[], float4 y[], const int N) |
template<> | |
__device__ void | convert< short4, float2 > (short4 x[], float2 y[], const int N) |
template<> | |
__device__ void | convert< float2, short4 > (float2 x[], short4 y[], const int N) |
template<> | |
__device__ void | convert< float4, short2 > (float4 x[], short2 y[], const int N) |
template<> | |
__device__ void | convert< short2, float4 > (short2 x[], float4 y[], const int N) |
template<> | |
__device__ void | convert< short4, double2 > (short4 x[], double2 y[], const int N) |
template<> | |
__device__ void | convert< double2, short4 > (double2 x[], short4 y[], const int N) |
template<> | |
__device__ void | convert< double4, short2 > (double4 x[], short2 y[], const int N) |
template<> | |
__device__ void | convert< short2, double4 > (short2 x[], double4 y[], const int N) |
template<> | |
__device__ void | convert< float4, double2 > (float4 x[], double2 y[], const int N) |
template<> | |
__device__ void | convert< double2, float4 > (double2 x[], float4 y[], const int N) |
template<> | |
__device__ void | convert< double4, float2 > (double4 x[], float2 y[], const int N) |
template<> | |
__device__ void | convert< float2, double4 > (float2 x[], double4 y[], const int N) |
template<typename scalar , int n> | |
__device__ __host__ void | zero (vector_type< scalar, n > &v) |
template<typename scalar , int n> | |
__device__ __host__ vector_type< scalar, n > | operator+ (const vector_type< scalar, n > &a, const vector_type< scalar, n > &b) |
template<int block_size_x, int block_size_y, typename T , bool do_sum = true, typename Reducer = cub::Sum> | |
__device__ void | reduce2d (ReduceArg< T > arg, const T &in, const int idx=0) |
template<int block_size, typename T , bool do_sum = true, typename Reducer = cub::Sum> | |
__device__ void | reduce (ReduceArg< T > arg, const T &in, const int idx=0) |
template<int block_size_x, int block_size_y, typename T > | |
__device__ void | reduceRow (ReduceArg< T > arg, const T &in) |
void | setDiracParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc) |
void | setDiracSloppyParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc) |
template<template< typename, int, QudaReconstructType > class Apply, typename Recon , typename Float , int nColor, typename... Args> | |
void | instantiate (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, Args &&... args) |
This instantiate function is used to instantiate the reconstruct types used. More... | |
template<template< typename, int, QudaReconstructType > class Apply, typename Recon , typename Float , typename... Args> | |
void | instantiate (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, Args &&... args) |
This instantiate function is used to instantiate the colors. More... | |
template<template< typename, int, QudaReconstructType > class Apply, typename Recon = WilsonReconstruct, typename... Args> | |
void | instantiate (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, Args &&... args) |
This instantiate function is used to instantiate the precisions. More... | |
template<KernelType type> | |
__host__ __device__ bool | doHalo (int dim=-1) |
Helper function to determine if we should do halo computation. More... | |
template<KernelType type> | |
__host__ __device__ bool | doBulk () |
Helper function to determine if we should do interior computation. More... | |
template<KernelType type, typename Arg > | |
__host__ __device__ bool | isComplete (const Arg &arg, int coord[]) |
Helper functon to determine if the application of the derivative in the dslash is complete. More... | |
template<int nDim, QudaPCType pc_type, KernelType kernel_type, typename Arg , int nface_ = 1> | |
__host__ __device__ int | getCoords (int coord[], const Arg &arg, int &idx, int parity, int &dim) |
Compute the space-time coordinates we are at. More... | |
template<int dim, typename Arg > | |
__host__ __device__ bool | inBoundary (const int coord[], const Arg &arg) |
Compute whether the provided coordinate is within the halo region boundary of a given dimension. More... | |
template<KernelType kernel_type, typename Arg > | |
__device__ bool | isActive (bool &active, int threadDim, int offsetDim, const int coord[], const Arg &arg) |
Compute whether this thread should be active for updating the a given offsetDim halo. For non-fused halo update kernels this is a trivial kernel that just checks if the given dimension is partitioned and if so, return true. More... | |
template<typename Float > | |
std::ostream & | operator<< (std::ostream &out, const DslashArg< Float > &arg) |
void | setKernelPackT (bool pack) |
bool | getKernelPackT () |
void | pushKernelPackT (bool pack) |
void | popKernelPackT () |
void | setPackComms (const int *dim_pack) |
Helper function that sets which dimensions the packing kernel should be packing for. More... | |
bool | getDslashLaunch () |
void | createDslashEvents () |
void | destroyDslashEvents () |
void | ApplyWilson (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the Wilson stencil. More... | |
void | ApplyWilsonClover (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the Wilson-clover stencil. More... | |
void | ApplyWilsonCloverPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &A, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the preconditioned Wilson-clover stencil. More... | |
void | ApplyTwistedMass (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the twisted-mass stencil. More... | |
void | ApplyTwistedMassPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override, TimeProfile &profile) |
Driver for applying the preconditioned twisted-mass stencil. More... | |
void | ApplyNdegTwistedMass (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, double c, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the non-degenerate twisted-mass stencil. More... | |
void | ApplyNdegTwistedMassPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double b, double c, bool xpay, const ColorSpinorField &x, int parity, bool dagger, bool asymmetric, const int *comm_override, TimeProfile &profile) |
Driver for applying the preconditioned non-degenerate twisted-mass stencil. More... | |
void | ApplyTwistedClover (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &C, double a, double b, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the twisted-clover stencil. More... | |
void | ApplyTwistedCloverPreconditioned (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const CloverField &C, double a, double b, bool xpay, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the preconditioned twisted-clover stencil. More... | |
void | ApplyDomainWall5D (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_f, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the Domain-wall 5-d stencil to a 5-d vector with 5-d preconditioned data order. More... | |
void | ApplyDomainWall4D (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, double m_5, const Complex *b_5, const Complex *c_5, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the batched Wilson 4-d stencil to a 5-d vector with 4-d preconditioned data order. More... | |
void | ApplyDslash5 (ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &x, double m_f, double m_5, const Complex *b_5, const Complex *c_5, double a, bool dagger, Dslash5Type type) |
Apply either the domain-wall / mobius Dslash5 operator or the M5 inverse operator. In the current implementation, it is expected that the color-spinor fields are 4-d preconditioned. More... | |
void | ApplyLaplace (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, int dir, double kappa, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the Laplace stencil. More... | |
void | ApplyCovDev (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, int mu, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Driver for applying the covariant derivative. More... | |
void | ApplyClover (ColorSpinorField &out, const ColorSpinorField &in, const CloverField &clover, bool inverse, int parity) |
Apply clover-matrix field to a color-spinor field. More... | |
void | ApplyStaggered (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, double a, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Apply the staggered dslash operator to a color-spinor field. More... | |
void | ApplyImprovedStaggered (ColorSpinorField &out, const ColorSpinorField &in, const GaugeField &U, const GaugeField &L, double a, const ColorSpinorField &x, int parity, bool dagger, const int *comm_override, TimeProfile &profile) |
Apply the improved staggered dslash operator to a color-spinor field. More... | |
void | ApplyTwistGamma (ColorSpinorField &out, const ColorSpinorField &in, int d, double kappa, double mu, double epsilon, int dagger, QudaTwistGamma5Type type) |
Apply the twisted-mass gamma operator to a color-spinor field. More... | |
void | ApplyTwistClover (ColorSpinorField &out, const ColorSpinorField &in, const CloverField &clover, double kappa, double mu, double epsilon, int parity, int dagger, QudaTwistGamma5Type twist) |
Apply twisted clover-matrix field to a color-spinor field. More... | |
void | PackGhost (void *ghost[2 *QUDA_MAX_DIM], const ColorSpinorField &field, MemoryLocation location, int nFace, bool dagger, int parity, bool spin_project, double a, double b, double c, const cudaStream_t &stream) |
Dslash face packing routine. More... | |
void | gamma5 (ColorSpinorField &out, const ColorSpinorField &in) |
Applies a gamma5 matrix to a spinor (wrapper to ApplyGamma) More... | |
void | arpack_solve (std::vector< ColorSpinorField *> &h_evecs, std::vector< Complex > &h_evals, const DiracMatrix &mat, QudaEigParam *eig_param, TimeProfile &profile) |
The QUDA interface function. One passes two allocated arrays to hold the the eigenmode data, the problem matrix, the arpack parameters defining what problem is to be solves, and a container for QUDA data structure types. More... | |
__device__ __host__ void | zero (double &a) |
__device__ __host__ void | zero (double2 &a) |
__device__ __host__ void | zero (double3 &a) |
__device__ __host__ void | zero (double4 &a) |
__device__ __host__ void | zero (float &a) |
__device__ __host__ void | zero (float2 &a) |
__device__ __host__ void | zero (float3 &a) |
__device__ __host__ void | zero (float4 &a) |
__host__ __device__ double2 | operator+ (const double2 &x, const double2 &y) |
__host__ __device__ double2 | operator- (const double2 &x, const double2 &y) |
__host__ __device__ float2 | operator- (const float2 &x, const float2 &y) |
__host__ __device__ float4 | operator- (const float4 &x, const float4 &y) |
__host__ __device__ double3 | operator+ (const double3 &x, const double3 &y) |
__host__ __device__ double4 | operator+ (const double4 &x, const double4 &y) |
__host__ __device__ float4 | operator* (const float a, const float4 x) |
__host__ __device__ float2 | operator* (const float a, const float2 x) |
__host__ __device__ double2 | operator* (const double a, const double2 x) |
__host__ __device__ double4 | operator* (const double a, const double4 x) |
__host__ __device__ float2 | operator+ (const float2 x, const float2 y) |
__host__ __device__ float4 | operator+ (const float4 x, const float4 y) |
__host__ __device__ float4 | operator+= (float4 &x, const float4 y) |
__host__ __device__ float2 | operator+= (float2 &x, const float2 y) |
__host__ __device__ double2 | operator+= (double2 &x, const double2 y) |
__host__ __device__ double3 | operator+= (double3 &x, const double3 y) |
__host__ __device__ double4 | operator+= (double4 &x, const double4 y) |
__host__ __device__ float4 | operator-= (float4 &x, const float4 y) |
__host__ __device__ float2 | operator-= (float2 &x, const float2 y) |
__host__ __device__ double2 | operator-= (double2 &x, const double2 y) |
__host__ __device__ float2 | operator*= (float2 &x, const float a) |
__host__ __device__ double2 | operator*= (double2 &x, const float a) |
__host__ __device__ float4 | operator*= (float4 &a, const float &b) |
__host__ __device__ double2 | operator*= (double2 &a, const double &b) |
__host__ __device__ double4 | operator*= (double4 &a, const double &b) |
__host__ __device__ float2 | operator- (const float2 &x) |
__host__ __device__ double2 | operator- (const double2 &x) |
__forceinline__ __host__ __device__ float | max_fabs (const float4 &c) |
__forceinline__ __host__ __device__ float | max_fabs (const float2 &b) |
__forceinline__ __host__ __device__ double | max_fabs (const double4 &c) |
__forceinline__ __host__ __device__ double | max_fabs (const double2 &b) |
__forceinline__ __host__ __device__ float2 | make_FloatN (const double2 &a) |
__forceinline__ __host__ __device__ float4 | make_FloatN (const double4 &a) |
__forceinline__ __host__ __device__ double2 | make_FloatN (const float2 &a) |
__forceinline__ __host__ __device__ double4 | make_FloatN (const float4 &a) |
__forceinline__ __host__ __device__ short4 | make_shortN (const char4 &a) |
__forceinline__ __host__ __device__ short2 | make_shortN (const char2 &a) |
__forceinline__ __host__ __device__ short4 | make_shortN (const float4 &a) |
__forceinline__ __host__ __device__ short2 | make_shortN (const float2 &a) |
__forceinline__ __host__ __device__ short4 | make_shortN (const double4 &a) |
__forceinline__ __host__ __device__ short2 | make_shortN (const double2 &a) |
__forceinline__ __host__ __device__ char4 | make_charN (const short4 &a) |
__forceinline__ __host__ __device__ char2 | make_charN (const short2 &a) |
__forceinline__ __host__ __device__ char4 | make_charN (const float4 &a) |
__forceinline__ __host__ __device__ char2 | make_charN (const float2 &a) |
__forceinline__ __host__ __device__ char4 | make_charN (const double4 &a) |
__forceinline__ __host__ __device__ char2 | make_charN (const double2 &a) |
template<typename Float2 , typename Complex > | |
Float2 | make_Float2 (const Complex &a) |
template<> | |
double2 | make_Float2 (const complex< double > &a) |
template<> | |
double2 | make_Float2 (const complex< float > &a) |
template<> | |
float2 | make_Float2 (const complex< double > &a) |
template<> | |
float2 | make_Float2 (const complex< float > &a) |
template<> | |
double2 | make_Float2 (const std::complex< double > &a) |
template<> | |
double2 | make_Float2 (const std::complex< float > &a) |
template<> | |
float2 | make_Float2 (const std::complex< double > &a) |
template<> | |
float2 | make_Float2 (const std::complex< float > &a) |
complex< double > | make_Complex (const double2 &a) |
complex< float > | make_Complex (const float2 &a) |
std::ostream & | operator<< (std::ostream &output, const GaugeFieldParam ¶m) |
double | norm1 (const GaugeField &u) |
This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L1 norm. More... | |
double | norm2 (const GaugeField &u) |
This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L2 norm. More... | |
void | ax (const double &a, GaugeField &u) |
Scale the gauge field by the scalar a. More... | |
void | copyGenericGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0, void **ghostOut=0, void **ghostIn=0, int type=0) |
void | copyExtendedGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out=0, void *In=0) |
void | extractGaugeGhost (const GaugeField &u, void **ghost, bool extract=true, int offset=0) |
void | extractExtendedGaugeGhost (const GaugeField &u, int dim, const int *R, void **ghost, bool extract) |
void | applyGaugePhase (GaugeField &u) |
uint64_t | Checksum (const GaugeField &u, bool mini=false) |
void | gaugeForce (GaugeField &mom, const GaugeField &u, double coeff, int ***input_path, int *length, double *path_coeff, int num_paths, int max_length) |
Compute the gauge-force contribution to the momentum. More... | |
double3 | plaquette (const GaugeField &U) |
Compute the plaquette of the gauge field. More... | |
void | gaugeGauss (GaugeField &U, RNG &rngstate, double epsilon) |
Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder). More... | |
void | gaugeGauss (GaugeField &U, unsigned long long seed, double epsilon) |
Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder). More... | |
void | APEStep (GaugeField &dataDs, const GaugeField &dataOr, double alpha) |
Apply APE smearing to the gauge field. More... | |
void | STOUTStep (GaugeField &dataDs, const GaugeField &dataOr, double rho) |
Apply STOUT smearing to the gauge field. More... | |
void | OvrImpSTOUTStep (GaugeField &dataDs, const GaugeField &dataOr, double rho, double epsilon) |
Apply Over Improved STOUT smearing to the gauge field. More... | |
void | gaugefixingOVR (cudaGaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double relax_boost, const double tolerance, const int reunit_interval, const int stopWtheta) |
Gauge fixing with overrelaxation with support for single and multi GPU. More... | |
void | gaugefixingFFT (cudaGaugeField &data, const int gauge_dir, const int Nsteps, const int verbose_interval, const double alpha, const int autotune, const double tolerance, const int stopWtheta) |
Gauge fixing with Steepest descent method with FFTs with support for single GPU only. More... | |
void | computeFmunu (GaugeField &Fmunu, const GaugeField &gauge) |
Compute the Fmunu tensor. More... | |
double | computeQCharge (const GaugeField &Fmunu) |
Compute the topological charge. More... | |
double | computeQChargeDensity (const GaugeField &Fmunu, void *result) |
Compute the topological charge density per lattice site. More... | |
void | updateGaugeField (GaugeField &out, double dt, const GaugeField &in, const GaugeField &mom, bool conj_mom, bool exact) |
template<typename I , typename J , typename K > | |
static __device__ __host__ int | linkIndexShift (const I x[], const J dx[], const K X[4]) |
template<typename I , typename J , typename K > | |
static __device__ __host__ int | linkIndexShift (I y[], const I x[], const J dx[], const K X[4]) |
template<typename I > | |
static __device__ __host__ int | linkIndex (const int x[], const I X[4]) |
template<typename I > | |
static __device__ __host__ int | linkIndex (int y[], const int x[], const I X[4]) |
template<typename I , int n> | |
static __device__ __host__ int | linkIndexDn (const int x[], const I X[4], const int mu) |
template<typename I > | |
static __device__ __host__ int | linkIndexM1 (const int x[], const I X[4], const int mu) |
template<typename I > | |
static __device__ __host__ int | linkIndexM3 (const int x[], const I X[4], const int mu) |
template<typename I > | |
static __device__ __host__ int | linkNormalIndexP1 (const int x[], const I X[4], const int mu) |
template<typename I > | |
static __device__ __host__ int | linkIndexP1 (const int x[], const I X[4], const int mu) |
template<typename I > | |
static __device__ __host__ int | linkIndexP3 (const int x[], const I X[4], const int mu) |
template<int nDim = 4, typename Arg > | |
static __device__ __host__ int | getNeighborIndexCB (const int x[], int mu, int dir, const Arg &arg) |
Compute the checkerboard 1-d index for the nearest neighbor. More... | |
template<typename I , typename J > | |
static __device__ __host__ void | getCoordsCB (int x[], int cb_index, const I X[], J X0h, int parity) |
template<typename I > | |
static __device__ __host__ void | getCoords (int x[], int cb_index, const I X[], int parity) |
template<typename I , typename J > | |
static __device__ __host__ void | getCoordsExtended (I x[], int cb_index, const J X[], int parity, const int R[]) |
template<typename I , typename J > | |
static __device__ __host__ void | getCoords5CB (int x[5], int cb_index, const I X[5], J X0h, int parity, QudaPCType pc_type) |
template<typename I > | |
static __device__ __host__ void | getCoords5 (int x[5], int cb_index, const I X[5], int parity, QudaPCType pc_type) |
template<typename I > | |
static __device__ __host__ int | getIndexFull (int cb_index, const I X[4], int parity) |
template<int dir, int nDim = 4, typename I > | |
__device__ __host__ int | ghostFaceIndex (const int x_[], const I X_[], int dim, int nFace) |
template<int dir, int nDim = 4, typename I > | |
__device__ __host__ int | ghostFaceIndexStaggered (const int x_[], const I X_[], int dim, int nFace) |
template<int nDim, QudaPCType type, int dim_, int nLayers, typename Int , typename Arg > | |
__device__ __host__ void | coordsFromFaceIndex (int &idx, int &cb_idx, Int *const x, int face_idx, const int &face_num, int parity, const Arg &arg) |
Compute the full-lattice coordinates from the input face index. This is used by the Wilson-like halo update kernels, and can deal with 4-d or 5-d field and 4-d or 5-d preconditioning. More... | |
template<int nDim, QudaPCType type, int dim_, int nLayers, typename Int , typename Arg > | |
__device__ __host__ void | coordsFromFaceIndex (int &idx, int &cb_idx, Int *const x, int face_idx, const int &face_num, const Arg &arg) |
Overloaded variant of indexFromFaceIndex where we use the parity declared in arg. More... | |
template<int nDim, QudaPCType type, int dim, int nLayers, int face_num, typename Arg > | |
__device__ __host__ int | indexFromFaceIndex (int face_idx, int parity, const Arg &arg) |
Compute the checkerboard lattice index from the input face index. This is used by the Wilson-like halo packing kernels, and can deal with 4-d or 5-d field and 4-d or 5-d preconditioning. More... | |
template<int nDim, QudaPCType type, int dim, int nLayers, int face_num, typename Arg > | |
__device__ __host__ int | indexFromFaceIndex (int face_idx, const Arg &arg) |
Overloaded variant of indexFromFaceIndex where we use the parity declared in arg. More... | |
template<int nDim, QudaPCType type, int dim, int nLayers, int face_num, typename Arg > | |
static __device__ int | indexFromFaceIndexStaggered (int face_idx_in, int parity, const Arg &arg) |
Compute global checkerboard index from face index. The following indexing routines work for arbitrary lattice dimensions (though perhaps not odd like thw Wilson variant?) Specifically, we compute an index into the local volume from an index into the face. This is used by the staggered-like face packing routines, and is different from the Wilson variant since here the halo depth is tranversed in a different order - here the halo depth is the faster running dimension. More... | |
template<int nDim = 4, typename Arg > | |
__host__ __device__ int | dimFromFaceIndex (int &face_idx, int tid, const Arg &arg) |
Determines which face a given thread is computing. Also rescale face_idx so that is relative to a given dimension. If 5-d variant if called, then it is assumed that arg.threads contains only the 3-d surface of threads but face_idx is a 4-d index (surface * fifth dimension). At present multi-src staggered uses the 4-d variant since the face_idx that is passed in is the 3-d surface not the 4-d one. More... | |
template<int nDim = 4, typename Arg > | |
__host__ __device__ int | dimFromFaceIndex (int &face_idx, const Arg &arg) |
template<typename T > | |
__device__ int | block_idx (const T &swizzle) |
Swizzler for reordering the (x) thread block indices - use on conjunction with swizzle-factor autotuning to find the optimum swizzle factor. Specfically, the thread block id is remapped by transposing its coordinates: if the original order can be parametrized by. More... | |
template<typename Arg > | |
__device__ __host__ auto | StaggeredPhase (const int coords[], int dim, int dir, const Arg &arg) -> typename Arg::real |
Compute the staggered phase factor at unit shift from the current lattice coordinates. The routine below optimizes out the shift where possible, hence is only visible where we need to consider the boundary condition. More... | |
__device__ void | load_streaming_double2 (double2 &a, const double2 *addr) |
__device__ void | load_streaming_float4 (float4 &a, const float4 *addr) |
__device__ void | load_cached_short4 (short4 &a, const short4 *addr) |
__device__ void | load_cached_short2 (short2 &a, const short2 *addr) |
__device__ void | load_global_short4 (short4 &a, const short4 *addr) |
__device__ void | load_global_short2 (short2 &a, const short2 *addr) |
__device__ void | load_global_float4 (float4 &a, const float4 *addr) |
__device__ void | store_streaming_float4 (float4 *addr, float x, float y, float z, float w) |
__device__ void | store_streaming_short4 (short4 *addr, short x, short y, short z, short w) |
__device__ void | store_streaming_double2 (double2 *addr, double x, double y) |
__device__ void | store_streaming_float2 (float2 *addr, float x, float y) |
__device__ void | store_streaming_short2 (short2 *addr, short x, short y) |
template<int nColor, typename sumType , typename real > | |
__device__ __host__ void | colorInnerProduct (complex< sumType > &dot, int i, complex< real > v[nColor], complex< real > w[nColor]) |
template<int nColor, typename sumType , typename real > | |
__device__ __host__ void | colorNorm (sumType &nrm, complex< real > v[nColor]) |
template<typename real , int nColor> | |
__device__ __host__ void | colorScaleSubtract (complex< real > v[nColor], complex< real > a, complex< real > w[nColor]) |
template<typename real , int nColor> | |
__device__ __host__ void | colorScale (complex< real > v[nColor], real a) |
template<typename sumFloat , typename Float , int nSpin, int spinBlockSize, int nColor, int coarseSpin, int nVec, typename Arg > | |
void | blockOrthoCPU (Arg &arg) |
template<int block_size, typename sumFloat , typename Float , int nSpin, int spinBlockSize, int nColor, int coarseSpin, int nVec, typename Arg > | |
__launch_bounds__ (2 *block_size) __global__ void blockOrthoGPU(Arg arg) | |
template<typename real , typename Link > | |
__device__ void | axpy (real a, const real *x, Link &y) |
template<typename real , typename Link > | |
__device__ void | operator+= (real *y, const Link &x) |
template<typename real , typename Link > | |
__device__ void | operator-= (real *y, const Link &x) |
template<typename real , typename Arg , typename Link > | |
__device__ void | computeForce (LINK force, Arg &arg, int xIndex, int yIndex, int mu, int nu) |
template<typename real , typename Arg > | |
__global__ void | cloverDerivativeKernel (Arg arg) |
template<typename Float , typename Arg , bool computeTrLog, bool twist> | |
__device__ __host__ double | cloverInvertCompute (Arg &arg, int x_cb, int parity) |
template<typename Float , typename Arg , bool computeTrLog, bool twist> | |
void | cloverInvert (Arg &arg) |
template<int blockSize, typename Float , typename Arg , bool computeTrLog, bool twist> | |
__global__ void | cloverInvertKernel (Arg arg) |
template<typename real , int nvector, int mu, int nu, int parity, typename Arg > | |
__device__ void | sigmaOprod (Arg &arg, int idx) |
template<int nvector, typename real , typename Arg > | |
__global__ void | sigmaOprodKernel (Arg arg) |
template<typename Float > | |
__device__ __host__ void | caxpy (const complex< Float > &a, const complex< Float > &x, complex< Float > &y) |
template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Wtype , typename Arg > | |
__device__ __host__ void | computeUV (Arg &arg, const Wtype &W, int parity, int x_cb, int ic_c) |
template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg > | |
void | ComputeUVCPU (Arg &arg) |
template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg > | |
__global__ void | ComputeUVGPU (Arg arg) |
template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg > | |
__device__ __host__ void | computeAV (Arg &arg, int parity, int x_cb, int ch, int ic_c) |
template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg > | |
void | ComputeAVCPU (Arg &arg) |
template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg > | |
__global__ void | ComputeAVGPU (Arg arg) |
template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg > | |
__device__ __host__ void | computeTMAV (Arg &arg, int parity, int x_cb, int v) |
template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg > | |
void | ComputeTMAVCPU (Arg &arg) |
template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg > | |
__global__ void | ComputeTMAVGPU (Arg arg) |
template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg > | |
__device__ __host__ void | computeTMCAV (Arg &arg, int parity, int x_cb, int ch, int ic_c) |
template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg > | |
void | ComputeTMCAVCPU (Arg &arg) |
template<typename Float , int fineSpin, int fineColor, int coarseColor, typename Arg > | |
__global__ void | ComputeTMCAVGPU (Arg arg) |
template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg , typename Gamma > | |
__device__ __host__ void | multiplyVUV (complex< Float > vuv[], const Arg &arg, const Gamma &gamma, int parity, int x_cb, int ic_c, int jc_c) |
Do a single (AV)^ * UV product, where for preconditioned clover, AV correspond to the clover inverse multiplied by the packed null space vectors, else AV is simply the packed null space vectors. More... | |
template<typename Arg > | |
__device__ __host__ int | virtualThreadIdx (const Arg &arg) |
template<typename Arg > | |
__device__ __host__ int | virtualBlockDim (const Arg &arg) |
template<typename Arg > | |
__device__ __host__ int | coarseIndex (const Arg &arg) |
template<bool shared_atomic, bool parity_flip, bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg , typename Gamma > | |
__device__ __host__ void | computeVUV (Arg &arg, const Gamma &gamma, int parity, int x_cb, int c_row, int c_col, int parity_coarse_, int coarse_x_cb_) |
template<bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg > | |
void | ComputeVUVCPU (Arg arg) |
template<bool parity_flip, typename Arg > | |
__device__ void | getIndicesShared (const Arg &arg, int &parity, int &x_cb, int &parity_coarse, int &x_coarse_cb, int &c_col, int &c_row) |
template<bool parity_flip, typename Arg > | |
__device__ void | getIndicesGlobal (const Arg &arg, int &parity, int &x_cb, int &parity_coarse, int &x_coarse_cb, int &c_col, int &c_row) |
template<bool shared_atomic, bool parity_flip, bool from_coarse, typename Float , int dim, QudaDirection dir, int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename Arg > | |
__global__ void | ComputeVUVGPU (Arg arg) |
template<typename Float , int nSpin, int nColor, typename Arg > | |
__device__ __host__ void | computeYreverse (Arg &arg, int parity, int x_cb, int ic_c, int jc_c) |
template<typename Float , int nSpin, int nColor, typename Arg > | |
void | ComputeYReverseCPU (Arg &arg) |
template<typename Float , int nSpin, int nColor, typename Arg > | |
__global__ void | ComputeYReverseGPU (Arg arg) |
template<bool from_coarse, typename Float , int fineSpin, int coarseSpin, int fineColor, int coarseColor, typename Arg > | |
__device__ __host__ void | computeCoarseClover (Arg &arg, int parity, int x_cb, int ic_c, int jc_c) |
template<bool from_coarse, typename Float , int fineSpin, int coarseSpin, int fineColor, int coarseColor, typename Arg > | |
void | ComputeCoarseCloverCPU (Arg &arg) |
template<bool from_coarse, typename Float , int fineSpin, int coarseSpin, int fineColor, int coarseColor, typename Arg > | |
__global__ void | ComputeCoarseCloverGPU (Arg arg) |
template<typename Float , int nSpin, int nColor, typename Arg > | |
void | AddCoarseDiagonalCPU (Arg &arg) |
template<typename Float , int nSpin, int nColor, typename Arg > | |
__global__ void | AddCoarseDiagonalGPU (Arg arg) |
template<typename Float , int nSpin, int nColor, typename Arg > | |
void | AddCoarseTmDiagonalCPU (Arg &arg) |
template<typename Float , int nSpin, int nColor, typename Arg > | |
__global__ void | AddCoarseTmDiagonalGPU (Arg arg) |
template<typename Float , int nSpin, int nColor, typename Arg > | |
__device__ __host__ void | convert (Arg &arg, int parity, int x_cb, int c_row, int c_col) |
template<typename Float , int nSpin, int nColor, typename Arg > | |
void | ConvertCPU (Arg &arg) |
template<typename Float , int nSpin, int nColor, typename Arg > | |
__global__ void | ConvertGPU (Arg arg) |
template<typename Float , int nSpin, int nColor, typename Arg > | |
__device__ __host__ void | rescaleY (Arg &arg, int parity, int x_cb, int c_row, int c_col) |
template<typename Float , int nSpin, int nColor, typename Arg > | |
void | RescaleYCPU (Arg &arg) |
template<typename Float , int nSpin, int nColor, typename Arg > | |
__global__ void | RescaleYGPU (Arg arg) |
template<typename Float , int n, bool compute_max_only, typename Arg > | |
__device__ __host__ Float | computeYhat (Arg &arg, int d, int x_cb, int parity, int i, int j) |
template<typename Float , int n, bool compute_max_only, typename Arg > | |
void | CalculateYhatCPU (Arg &arg) |
template<typename Float , int n, bool compute_max_only, typename Arg > | |
__global__ void | CalculateYhatGPU (Arg arg) |
template<typename Float , int Ns, int Ms, int Nc, int Mc, typename Arg > | |
__device__ __host__ __forceinline__ Float | compute_site_max (Arg &arg, int x_cb, int parity, int spinor_parity, int spin_block, int color_block, bool active) |
template<typename Float , bool block_float, int Ns, int Ms, int Nc, int Mc, int nDim, int dim, int dir, typename Arg > | |
__device__ __host__ __forceinline__ void | packGhost (Arg &arg, int x_cb, int parity, int spinor_parity, int spin_block, int color_block) |
template<typename Float , bool block_float, int Ns, int Ms, int Nc, int Mc, int nDim, typename Arg > | |
void | GenericPackGhost (Arg &arg) |
template<typename Float , bool block_float, int Ns, int Ms, int Nc, int Mc, int nDim, int dim_threads, typename Arg > | |
__global__ void | GenericPackGhostKernel (Arg arg) |
template<typename real , typename Arg > | |
__global__ void | computeColorContraction (Arg arg) |
template<typename real , typename Arg > | |
__global__ void | computeDegrandRossiContraction (Arg arg) |
template<typename FloatOut , typename FloatIn , int length, typename Arg > | |
void | copyGauge (Arg &arg) |
template<typename Float , int length, typename Arg > | |
void | checkNan (Arg &arg) |
template<typename FloatOut , typename FloatIn , int length, typename Arg > | |
__global__ void | copyGaugeKernel (Arg arg) |
template<typename FloatOut , typename FloatIn , int length, typename Arg > | |
void | copyGhost (Arg &arg) |
template<typename FloatOut , typename FloatIn , int length, typename Arg > | |
__global__ void | copyGhostKernel (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, int mu, typename Arg , typename Vector > | |
__device__ __host__ void | applyCovDev (Vector &out, Arg &arg, int coord[nDim], int x_cb, int parity, int idx, int thread_dim, bool &active) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg > | |
__device__ __host__ void | covDev (Arg &arg, int idx, int parity) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__global__ void | covDevGPU (Arg arg) |
template<DslashType type> | |
static __host__ __device__ bool | doHalo () |
Helper function to determine if should halo computation. More... | |
template<DslashType type> | |
static __host__ __device__ bool | doBulk () |
Helper function to determine if should interior computation. More... | |
template<typename Float , int nDim, int Ns, int Nc, int Mc, int color_stride, int dim_stride, int thread_dir, int thread_dim, bool dagger, DslashType type, typename Arg > | |
__device__ __host__ void | applyDslash (complex< Float > out[], Arg &arg, int x_cb, int src_idx, int parity, int s_row, int color_block, int color_offset) |
template<typename Float , int Ns, int Nc, int Mc, int color_stride, bool dagger, typename Arg > | |
__device__ __host__ void | applyClover (complex< Float > out[], Arg &arg, int x_cb, int src_idx, int parity, int s, int color_block, int color_offset) |
template<typename Float , int nDim, int Ns, int Nc, int Mc, int color_stride, int dim_thread_split, bool dslash, bool clover, bool dagger, DslashType type, int dir, int dim, typename Arg > | |
__device__ __host__ void | coarseDslash (Arg &arg, int x_cb, int src_idx, int parity, int s, int color_block, int color_offset) |
template<typename Float , int nDim, int Ns, int Nc, int Mc, bool dslash, bool clover, bool dagger, DslashType type, typename Arg > | |
void | coarseDslash (Arg arg) |
template<typename Float , int nDim, int Ns, int Nc, int Mc, int color_stride, int dim_thread_split, bool dslash, bool clover, bool dagger, DslashType type, typename Arg > | |
__global__ void | coarseDslashKernel (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__device__ __host__ void | domainWall4D (Arg &arg, int idx, int s, int parity) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
void | domainWall4DCPU (Arg &arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__global__ void | domainWall4DGPU (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__device__ __host__ void | domainWall5D (Arg &arg, int idx, int parity) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
void | domainWall5DCPU (Arg &arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__global__ void | domainWall5DGPU (Arg arg) |
template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, typename Arg > | |
__device__ __host__ void | dslash5 (Arg &arg, int parity, int x_cb, int s) |
Apply the D5 operator at given site. More... | |
template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, typename Arg > | |
void | dslash5CPU (Arg &arg) |
CPU kernel for applying the D5 operator. More... | |
template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, typename Arg > | |
__global__ void | dslash5GPU (Arg arg) |
GPU kernel for applying the D5 operator. More... | |
template<typename real , int nColor, bool dagger, Dslash5Type type, bool shared, typename Vector , typename Arg > | |
__device__ __host__ Vector | constantInv (Arg &arg, int parity, int x_cb, int s_) |
Apply the M5 inverse operator at a given site on the lattice. This is the original algorithm as described in Kim and Izubushi (LATTICE 2013_033), where the b and c coefficients are constant along the Ls dimension, so is suitable for Shamir and Mobius domain-wall fermions. More... | |
template<typename real , int nColor, bool dagger, Dslash5Type type, bool shared, typename Vector , typename Arg > | |
__device__ __host__ Vector | variableInv (Arg &arg, int parity, int x_cb, int s_) |
Apply the M5 inverse operator at a given site on the lattice. This is an alternative algorithm that is applicable to variable b and c coefficients: here each thread in the s dimension starts computing at s = s_, and computes the left- and right-handed contributions in two separate passes. For the left-handed contribution we sweep through increasing s, e.g., s=s_, s_+1, s_+2, and for the right-handed one we do the transpose, s=s_, s_-1, s_-2. This allows us to progressively build up the scalar coefficients needed in a SIMD-friendly fashion. More... | |
template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, bool shared, bool var_inverse, typename Arg > | |
__device__ __host__ void | dslash5inv (Arg &arg, int parity, int x_cb, int s) |
Apply the M5 inverse operator at a given site on the lattice. More... | |
template<typename Float , int nColor, bool dagger, bool xpay, Dslash5Type type, bool shared, bool var_inverse, typename Arg > | |
__global__ void | dslash5invGPU (Arg arg) |
CPU kernel for applying the M5 inverse operator. More... | |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg > | |
__device__ __host__ void | ndegTwistedMass (Arg &arg, int idx, int flavor, int parity) |
Apply the twisted-mass dslash out(x) = M*in = a * D * in + (1 + i*b*gamma_5*tau_3 + c*tau_1)*x Note this routine only exists in xpay form. More... | |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg > | |
void | ndegTwistedMassCPU (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__global__ void | ndegTwistedMassGPU (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool asymmetric, bool xpay, KernelType kernel_type, typename Arg > | |
__device__ __host__ void | ndegTwistedMass (Arg &arg, int idx, int flavor, int parity) |
Apply the twisted-mass dslash out(x) = M*in = a * D * in + (1 + i*b*gamma_5*tau_3 + c*tau_1)*x Note this routine only exists in xpay form. More... | |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
void | ndegTwistedMassPreconditionedCPU (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__global__ void | ndegTwistedMassPreconditionedGPU (Arg arg) |
template<bool dagger, int twist, int dim, QudaPCType pc, typename Arg > | |
__device__ __host__ void | pack (Arg &arg, int ghost_idx, int s, int parity) |
template<int dim, int nFace = 1, typename Arg > | |
__device__ __host__ void | packStaggered (Arg &arg, int ghost_idx, int s, int parity) |
template<bool dagger, int twist, QudaPCType pc, typename Arg > | |
__global__ void | packKernel (Arg arg) |
template<bool dagger, int twist, QudaPCType pc, typename Arg > | |
__global__ void | packShmemKernel (Arg arg) |
template<typename Arg > | |
__global__ void | packStaggeredKernel (Arg arg) |
template<typename Arg > | |
__global__ void | packStaggeredShmemKernel (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg , typename Vector > | |
__device__ __host__ void | applyStaggered (Vector &out, Arg &arg, int coord[nDim], int x_cb, int parity, int idx, int thread_dim, bool &active) |
Applies the off-diagonal part of the Staggered / Asqtad operator. More... | |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__device__ __host__ void | staggered (Arg &arg, int idx, int parity) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__global__ void | staggeredGPU (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__device__ __host__ void | twistedClover (Arg &arg, int idx, int parity) |
Apply the preconditioned twisted-clover dslash. More... | |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
void | twistedCloverPreconditionedCPU (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__global__ void | twistedCloverPreconditionedGPU (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg > | |
__device__ __host__ void | twistedMass (Arg &arg, int idx, int parity) |
Apply the twisted-mass dslash out(x) = M*in = a * D * in + (1 + i*b*gamma_5)*x Note this routine only exists in xpay form. More... | |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg > | |
void | twistedMassCPU (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__global__ void | twistedMassGPU (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, int twist, KernelType kernel_type, typename Arg , typename Vector > | |
__device__ __host__ void | applyWilsonTM (Vector &out, Arg &arg, int coord[nDim], int x_cb, int s, int parity, int idx, int thread_dim, bool &active) |
Applies the off-diagonal part of the Wilson operator premultiplied by twist rotation - this is required for applying the symmetric preconditioned twisted-mass dagger operator. More... | |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool asymmetric, bool xpay, KernelType kernel_type, typename Arg > | |
__device__ __host__ void | twistedMass (Arg &arg, int idx, int parity) |
Apply the preconditioned twisted-mass dslash. More... | |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
void | twistedMassPreconditionedCPU (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__global__ void | twistedMassPreconditionedGPU (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg , typename Vector > | |
__device__ __host__ void | applyWilson (Vector &out, Arg &arg, int coord[nDim], int x_cb, int s, int parity, int idx, int thread_dim, bool &active) |
Applies the off-diagonal part of the Wilson operator. More... | |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__device__ __host__ void | wilson (Arg &arg, int idx, int s, int parity) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
void | wilsonCPU (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__global__ void | wilsonGPU (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, typename Arg > | |
__device__ __host__ void | wilsonClover (Arg &arg, int idx, int parity) |
Apply the Wilson-clover dslash out(x) = M*in = A(x)*x(x) + D * in(x-mu) Note this routine only exists in xpay form. More... | |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
void | wilsonCloverCPU (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__global__ void | wilsonCloverGPU (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__device__ __host__ void | wilsonClover (Arg &arg, int idx, int parity) |
Apply the clover preconditioned Wilson dslash. More... | |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
void | wilsonCloverPreconditionedCPU (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__global__ void | wilsonCloverPreconditionedGPU (Arg arg) |
template<int mu, int nu, typename Float , typename Arg > | |
__device__ __host__ __forceinline__ void | computeFmunuCore (Arg &arg, int idx, int parity) |
template<typename Float , typename Arg > | |
__global__ void | computeFmunuKernel (Arg arg) |
template<typename Float , typename Arg > | |
void | computeFmunuCPU (Arg &arg) |
template<typename Float , typename Arg , typename Link > | |
__host__ __device__ void | computeStaple (Arg &arg, int idx, int parity, int dir, Link &staple) |
template<typename Float , typename Arg > | |
__global__ void | computeAPEStep (Arg arg) |
template<typename Float , typename Arg > | |
__device__ double | plaquette (Arg &arg, int x[], int parity, int mu, int nu) |
template<int blockSize, typename Float , typename Gauge > | |
__global__ void | computePlaq (GaugePlaqArg< Gauge > arg) |
template<int blockSize, typename Float , typename Arg > | |
__global__ void | qChargeComputeKernel (Arg arg) |
template<typename Float , typename Arg > | |
__global__ void | computeSTOUTStep (Arg arg) |
template<typename Float , typename Arg , typename Link > | |
__host__ __device__ void | computeStapleRectangle (Arg &arg, int idx, int parity, int dir, Link &staple, Link &rectangle) |
template<typename Float , typename Arg > | |
__global__ void | computeOvrImpSTOUTStep (Arg arg) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, KernelType kernel_type, int dir, typename Arg , typename Vector > | |
__device__ __host__ void | applyLaplace (Vector &out, Arg &arg, int coord[nDim], int x_cb, int parity, int idx, int thread_dim, bool &active) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__device__ __host__ void | laplace (Arg &arg, int idx, int parity) |
template<typename Float , int nDim, int nColor, int nParity, bool dagger, bool xpay, KernelType kernel_type, typename Arg > | |
__global__ void | laplaceGPU (Arg arg) |
template<typename Float , int fineSpin, int fineColor, int coarseColor, int coarse_colors_per_thread, class FineColor , class Rotator > | |
__device__ __host__ void | rotateCoarseColor (complex< Float > out[fineSpin *coarse_colors_per_thread], const FineColor &in, const Rotator &V, int parity, int nParity, int x_cb, int coarse_color_block) |
template<typename Float , int fineSpin, int fineColor, int coarseSpin, int coarseColor, int coarse_colors_per_thread, typename Arg > | |
void | Restrict (Arg arg) |
template<int block_size, typename Float , int fineSpin, int fineColor, int coarseSpin, int coarseColor, int coarse_colors_per_thread, typename Arg > | |
__global__ void | RestrictKernel (Arg arg) |
void | completeKSForce (GaugeField &mom, const GaugeField &oprod, const GaugeField &gauge, QudaFieldLocation location, long long *flops=NULL) |
std::ostream & | operator<< (std::ostream &output, const LatticeFieldParam ¶m) |
QudaFieldLocation | Location_ (const char *func, const char *file, int line, const LatticeField &a, const LatticeField &b) |
Helper function for determining if the location of the fields is the same. More... | |
template<typename... Args> | |
QudaFieldLocation | Location_ (const char *func, const char *file, int line, const LatticeField &a, const LatticeField &b, const Args &... args) |
Helper function for determining if the location of the fields is the same. More... | |
QudaPrecision | Precision_ (const char *func, const char *file, int line, const LatticeField &a, const LatticeField &b) |
Helper function for determining if the precision of the fields is the same. More... | |
template<typename... Args> | |
QudaPrecision | Precision_ (const char *func, const char *file, int line, const LatticeField &a, const LatticeField &b, const Args &... args) |
Helper function for determining if the precision of the fields is the same. More... | |
QudaFieldLocation | reorder_location () |
Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION. More... | |
void | reorder_location_set (QudaFieldLocation reorder_location_) |
Set whether data is reorderd on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION. More... | |
const char * | compile_type_str (const LatticeField &meta, QudaFieldLocation location_=QUDA_INVALID_FIELD_LOCATION) |
Helper function for setting auxilary string. More... | |
void | fatLongKSLink (cudaGaugeField *fat, cudaGaugeField *lng, const cudaGaugeField &gauge, const double *coeff) |
Compute the fat and long links for an improved staggered (Kogut-Susskind) fermions. More... | |
void | printPeakMemUsage () |
void | assertAllMemFree () |
long | device_allocated_peak () |
long | pinned_allocated_peak () |
long | mapped_allocated_peak () |
long | host_allocated_peak () |
void * | device_malloc_ (const char *func, const char *file, int line, size_t size) |
void * | device_pinned_malloc_ (const char *func, const char *file, int line, size_t size) |
void * | safe_malloc_ (const char *func, const char *file, int line, size_t size) |
void * | pinned_malloc_ (const char *func, const char *file, int line, size_t size) |
void * | mapped_malloc_ (const char *func, const char *file, int line, size_t size) |
void | device_free_ (const char *func, const char *file, int line, void *ptr) |
void | device_pinned_free_ (const char *func, const char *file, int line, void *ptr) |
void | host_free_ (const char *func, const char *file, int line, void *ptr) |
constexpr const char * | str_end (const char *str) |
constexpr bool | str_slant (const char *str) |
constexpr const char * | r_slant (const char *str) |
constexpr const char * | file_name (const char *str) |
QudaFieldLocation | get_pointer_location (const void *ptr) |
bool | is_aligned (const void *ptr, size_t alignment) |
template<typename real > | |
__device__ __host__ real | __fast_pow (real a, int b) |
double | computeMomAction (const GaugeField &mom) |
Compute and return global the momentum action 1/2 mom^2. More... | |
void | updateMomentum (GaugeField &mom, double coeff, GaugeField &force, const char *fname) |
void | applyU (GaugeField &force, GaugeField &U) |
bool | forceMonitor () |
Whether we are monitoring the force or not. More... | |
void | flushForceMonitor () |
Flush any outstanding force monitoring information. More... | |
void | ApplyCoarse (ColorSpinorField &out, const ColorSpinorField &inA, const ColorSpinorField &inB, const GaugeField &Y, const GaugeField &X, double kappa, int parity=QUDA_INVALID_PARITY, bool dslash=true, bool clover=true, bool dagger=false, const int *commDim=0, QudaPrecision halo_precision=QUDA_INVALID_PRECISION) |
Apply the coarse dslash stencil. This single driver accounts for all variations with and without the clover field, with and without dslash, and both single and full parity fields. More... | |
void | CoarseOp (GaugeField &Y, GaugeField &X, const Transfer &T, const cudaGaugeField &gauge, const cudaCloverField *clover, double kappa, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc) |
Coarse operator construction from a fine-grid operator (Wilson / Clover) More... | |
void | CoarseCoarseOp (GaugeField &Y, GaugeField &X, const Transfer &T, const GaugeField &gauge, const GaugeField &clover, const GaugeField &cloverInv, double kappa, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc, bool need_bidirectional) |
Coarse operator construction from an intermediate-grid operator (Coarse) More... | |
void | calculateYhat (GaugeField &Yhat, GaugeField &Xinv, const GaugeField &Y, const GaugeField &X) |
Calculate preconditioned coarse links and coarse clover inverse field. More... | |
void | Monte (cudaGaugeField &data, RNG &rngstate, double Beta, int nhb, int nover) |
Perform heatbath and overrelaxation. Performs nhb heatbath steps followed by nover overrelaxation steps. More... | |
void | InitGaugeField (cudaGaugeField &data) |
Perform a cold start to the gauge field, identity SU(3) matrix, also fills the ghost links in multi-GPU case (no need to exchange data) More... | |
void | InitGaugeField (cudaGaugeField &data, RNG &rngstate) |
Perform a hot start to the gauge field, random SU(3) matrix, followed by reunitarization, also exchange borders links in multi-GPU case. More... | |
void | PGaugeExchange (cudaGaugeField &data, const int dir, const int parity) |
Perform heatbath and overrelaxation. Performs nhb heatbath steps followed by nover overrelaxation steps. More... | |
void | PGaugeExchangeFree () |
Release all allocated memory used to exchange data between nodes. More... | |
double2 | getLinkDeterminant (cudaGaugeField &data) |
Calculate the Determinant. More... | |
double2 | getLinkTrace (cudaGaugeField &data) |
Calculate the Trace. More... | |
void | qudaMemcpy_ (void *dst, const void *src, size_t count, cudaMemcpyKind kind, const char *func, const char *file, const char *line) |
Wrapper around cudaMemcpy used for auto-profiling. Do not call directly, rather call macro below which will grab the location of the call. More... | |
void | qudaMemcpyAsync_ (void *dst, const void *src, size_t count, cudaMemcpyKind kind, const cudaStream_t &stream, const char *func, const char *file, const char *line) |
Wrapper around cudaMemcpyAsync or driver API equivalent Potentially add auto-profiling support. More... | |
void | qudaMemcpy2DAsync_ (void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t hieght, cudaMemcpyKind kind, const cudaStream_t &stream, const char *func, const char *file, const char *line) |
Wrapper around cudaMemcpy2DAsync or driver API equivalent Potentially add auto-profiling support. More... | |
cudaError_t | qudaLaunchKernel (const void *func, dim3 gridDim, dim3 blockDim, void **args, size_t sharedMem, cudaStream_t stream) |
Wrapper around cudaLaunchKernel. More... | |
cudaError_t | qudaEventQuery (cudaEvent_t &event) |
Wrapper around cudaEventQuery or cuEventQuery. More... | |
cudaError_t | qudaEventRecord (cudaEvent_t &event, cudaStream_t stream=0) |
Wrapper around cudaEventRecord or cuEventRecord. More... | |
cudaError_t | qudaStreamWaitEvent (cudaStream_t stream, cudaEvent_t event, unsigned int flags) |
Wrapper around cudaEventRecord or cuEventRecord. More... | |
cudaError_t | qudaStreamSynchronize (cudaStream_t &stream) |
Wrapper around cudaStreamSynchronize or cuStreamSynchronize. More... | |
cudaError_t | qudaEventSynchronize (cudaEvent_t &event) |
Wrapper around cudaEventSynchronize or cuEventSynchronize. More... | |
cudaError_t | qudaDeviceSynchronize_ (const char *func, const char *file, const char *line) |
Wrapper around cudaDeviceSynchronize or cuDeviceSynchronize. More... | |
void | printAPIProfile () |
Print out the timer profile for CUDA API calls. More... | |
bool | canReuseResidentGauge (QudaInvertParam *inv_param) |
template<class T > | |
__device__ __host__ T | getTrace (const Matrix< T, 3 > &a) |
template<template< typename, int > class Mat, class T > | |
__device__ __host__ T | getDeterminant (const Mat< T, 3 > &a) |
template<template< typename, int > class Mat, class T , int N> | |
__device__ __host__ Mat< T, N > | operator+ (const Mat< T, N > &a, const Mat< T, N > &b) |
template<template< typename, int > class Mat, class T , int N> | |
__device__ __host__ Mat< T, N > | operator+= (Mat< T, N > &a, const Mat< T, N > &b) |
template<template< typename, int > class Mat, class T , int N> | |
__device__ __host__ Mat< T, N > | operator+= (Mat< T, N > &a, const T &b) |
template<template< typename, int > class Mat, class T , int N> | |
__device__ __host__ Mat< T, N > | operator-= (Mat< T, N > &a, const Mat< T, N > &b) |
template<template< typename, int > class Mat, class T , int N> | |
__device__ __host__ Mat< T, N > | operator- (const Mat< T, N > &a, const Mat< T, N > &b) |
template<template< typename, int > class Mat, class T , int N, class S > | |
__device__ __host__ Mat< T, N > | operator* (const S &scalar, const Mat< T, N > &a) |
template<template< typename, int > class Mat, class T , int N, class S > | |
__device__ __host__ Mat< T, N > | operator* (const Mat< T, N > &a, const S &scalar) |
template<template< typename, int > class Mat, class T , int N, class S > | |
__device__ __host__ Mat< T, N > | operator*= (Mat< T, N > &a, const S &scalar) |
template<template< typename, int > class Mat, class T , int N> | |
__device__ __host__ Mat< T, N > | operator- (const Mat< T, N > &a) |
template<template< typename, int > class Mat, class T , int N> | |
__device__ __host__ Mat< T, N > | operator* (const Mat< T, N > &a, const Mat< T, N > &b) |
Generic implementation of matrix multiplication. More... | |
template<template< typename > class complex, typename T , int N> | |
__device__ __host__ Matrix< complex< T >, N > | operator* (const Matrix< complex< T >, N > &a, const Matrix< complex< T >, N > &b) |
Specialization of complex matrix multiplication that will issue optimal fma instructions. More... | |
template<class T , int N> | |
__device__ __host__ Matrix< T, N > | operator*= (Matrix< T, N > &a, const Matrix< T, N > &b) |
template<class T , class U , int N> | |
__device__ __host__ Matrix< typename PromoteTypeId< T, U >::Type, N > | operator* (const Matrix< T, N > &a, const Matrix< U, N > &b) |
template<class T > | |
__device__ __host__ Matrix< T, 2 > | operator* (const Matrix< T, 2 > &a, const Matrix< T, 2 > &b) |
template<class T , int N> | |
__device__ __host__ Matrix< T, N > | conj (const Matrix< T, N > &other) |
template<class T > | |
__device__ __host__ Matrix< T, 3 > | inverse (const Matrix< T, 3 > &u) |
template<class T , int N> | |
__device__ __host__ void | setIdentity (Matrix< T, N > *m) |
template<int N> | |
__device__ __host__ void | setIdentity (Matrix< float2, N > *m) |
template<int N> | |
__device__ __host__ void | setIdentity (Matrix< double2, N > *m) |
template<class T , int N> | |
__device__ __host__ void | setZero (Matrix< T, N > *m) |
template<int N> | |
__device__ __host__ void | setZero (Matrix< float2, N > *m) |
template<int N> | |
__device__ __host__ void | setZero (Matrix< double2, N > *m) |
template<typename Complex , int N> | |
__device__ __host__ void | makeAntiHerm (Matrix< Complex, N > &m) |
template<class T , int N> | |
__device__ __host__ void | copyColumn (const Matrix< T, N > &m, int c, Array< T, N > *a) |
template<class T , int N> | |
__device__ __host__ void | outerProd (const Array< T, N > &a, const Array< T, N > &b, Matrix< T, N > *m) |
template<class T , int N> | |
__device__ __host__ void | outerProd (const T(&a)[N], const T(&b)[N], Matrix< T, N > *m) |
template<class T , int N> | |
std::ostream & | operator<< (std::ostream &os, const Matrix< T, N > &m) |
template<class T , int N> | |
std::ostream & | operator<< (std::ostream &os, const Array< T, N > &a) |
template<class T , class U > | |
__device__ void | loadLinkVariableFromArray (const T *const array, const int dir, const int idx, const int stride, Matrix< U, 3 > *link) |
template<class T , class U , int N> | |
__device__ void | loadMatrixFromArray (const T *const array, const int idx, const int stride, Matrix< U, N > *mat) |
__device__ void | loadLinkVariableFromArray (const float2 *const array, const int dir, const int idx, const int stride, Matrix< complex< double >, 3 > *link) |
template<class T , int N, class U > | |
__device__ void | writeMatrixToArray (const Matrix< T, N > &mat, const int idx, const int stride, U *const array) |
__device__ void | appendMatrixToArray (const Matrix< complex< double >, 3 > &mat, const int idx, const int stride, double2 *const array) |
__device__ void | appendMatrixToArray (const Matrix< complex< float >, 3 > &mat, const int idx, const int stride, float2 *const array) |
template<class T , class U > | |
__device__ void | writeLinkVariableToArray (const Matrix< T, 3 > &link, const int dir, const int idx, const int stride, U *const array) |
__device__ void | writeLinkVariableToArray (const Matrix< complex< double >, 3 > &link, const int dir, const int idx, const int stride, float2 *const array) |
template<class T > | |
__device__ void | loadMomentumFromArray (const T *const array, const int dir, const int idx, const int stride, Matrix< T, 3 > *mom) |
template<class T , class U > | |
__device__ void | writeMomentumToArray (const Matrix< T, 3 > &mom, const int dir, const int idx, const U coeff, const int stride, T *const array) |
template<class Cmplx > | |
__device__ __host__ void | computeLinkInverse (Matrix< Cmplx, 3 > *uinv, const Matrix< Cmplx, 3 > &u) |
void | copyArrayToLink (Matrix< float2, 3 > *link, float *array) |
template<class Cmplx , class Real > | |
void | copyArrayToLink (Matrix< Cmplx, 3 > *link, Real *array) |
void | copyLinkToArray (float *array, const Matrix< float2, 3 > &link) |
template<class Cmplx , class Real > | |
void | copyLinkToArray (Real *array, const Matrix< Cmplx, 3 > &link) |
template<class T > | |
__device__ __host__ Matrix< T, 3 > | getSubTraceUnit (const Matrix< T, 3 > &a) |
template<class T > | |
__device__ __host__ void | SubTraceUnit (Matrix< T, 3 > &a) |
template<class T > | |
__device__ __host__ double | getRealTraceUVdagger (const Matrix< T, 3 > &a, const Matrix< T, 3 > &b) |
template<class Cmplx > | |
__host__ __device__ void | printLink (const Matrix< Cmplx, 3 > &link) |
template<class Cmplx > | |
__device__ __host__ double | ErrorSU3 (const Matrix< Cmplx, 3 > &matrix) |
template<class T > | |
__device__ __host__ void | exponentiate_iQ (const Matrix< T, 3 > &Q, Matrix< T, 3 > *exp_iQ) |
template<typename Float > | |
__device__ __host__ void | expsu3 (Matrix< complex< Float >, 3 > &q) |
template<class Real > | |
__device__ Real | Random (cuRNGState &state, Real a, Real b) |
Return a random number between a and b. More... | |
template<> | |
__device__ float | Random< float > (cuRNGState &state, float a, float b) |
template<> | |
__device__ double | Random< double > (cuRNGState &state, double a, double b) |
template<class Real > | |
__device__ Real | Random (cuRNGState &state) |
Return a random number between 0 and 1. More... | |
template<> | |
__device__ float | Random< float > (cuRNGState &state) |
template<> | |
__device__ double | Random< double > (cuRNGState &state) |
template<typename T1 , typename T2 > | |
__host__ __device__ void | copy (T1 &a, const T2 &b) |
template<> | |
__host__ __device__ void | copy (double &a, const int2 &b) |
template<> | |
__host__ __device__ void | copy (double2 &a, const int4 &b) |
template<> | |
__host__ __device__ void | copy (float &a, const short &b) |
template<> | |
__host__ __device__ void | copy (short &a, const float &b) |
template<> | |
__host__ __device__ void | copy (float2 &a, const short2 &b) |
template<> | |
__host__ __device__ void | copy (short2 &a, const float2 &b) |
template<> | |
__host__ __device__ void | copy (float4 &a, const short4 &b) |
template<> | |
__host__ __device__ void | copy (short4 &a, const float4 &b) |
template<> | |
__host__ __device__ void | copy (float &a, const char &b) |
template<> | |
__host__ __device__ void | copy (char &a, const float &b) |
template<> | |
__host__ __device__ void | copy (float2 &a, const char2 &b) |
template<> | |
__host__ __device__ void | copy (char2 &a, const float2 &b) |
template<> | |
__host__ __device__ void | copy (float4 &a, const char4 &b) |
template<> | |
__host__ __device__ void | copy (char4 &a, const float4 &b) |
template<typename T1 , typename T2 > | |
__host__ __device__ void | copy_scaled (T1 &a, const T2 &b) |
template<> | |
__host__ __device__ void | copy_scaled (short4 &a, const float4 &b) |
template<> | |
__host__ __device__ void | copy_scaled (char4 &a, const float4 &b) |
template<> | |
__host__ __device__ void | copy_scaled (short2 &a, const float2 &b) |
template<> | |
__host__ __device__ void | copy_scaled (char2 &a, const float2 &b) |
template<> | |
__host__ __device__ void | copy_scaled (short &a, const float &b) |
template<> | |
__host__ __device__ void | copy_scaled (char &a, const float &b) |
template<typename T1 , typename T2 , typename T3 > | |
__host__ __device__ void | copy_and_scale (T1 &a, const T2 &b, const T3 &c) |
Specialized variants of the copy function that include an additional scale factor. Note the scale factor is ignored unless the input type (b) is either a short or char vector. More... | |
template<> | |
__host__ __device__ void | copy_and_scale (float4 &a, const short4 &b, const float &c) |
template<> | |
__host__ __device__ void | copy_and_scale (float4 &a, const char4 &b, const float &c) |
template<> | |
__host__ __device__ void | copy_and_scale (float2 &a, const short2 &b, const float &c) |
template<> | |
__host__ __device__ void | copy_and_scale (float2 &a, const char2 &b, const float &c) |
template<> | |
__host__ __device__ void | copy_and_scale (float &a, const short &b, const float &c) |
template<> | |
__host__ __device__ void | copy_and_scale (float &a, const char &b, const float &c) |
template<typename VectorType > | |
__device__ __host__ VectorType | vector_load (void *ptr, int idx) |
template<typename VectorType > | |
__device__ __host__ void | vector_store (void *ptr, int idx, const VectorType &value) |
template<> | |
__device__ __host__ void | vector_store (void *ptr, int idx, const double2 &value) |
template<> | |
__device__ __host__ void | vector_store (void *ptr, int idx, const float4 &value) |
template<> | |
__device__ __host__ void | vector_store (void *ptr, int idx, const float2 &value) |
template<> | |
__device__ __host__ void | vector_store (void *ptr, int idx, const short4 &value) |
template<> | |
__device__ __host__ void | vector_store (void *ptr, int idx, const short2 &value) |
template<> | |
__device__ __host__ void | vector_store (void *ptr, int idx, const char4 &value) |
template<> | |
__device__ __host__ void | vector_store (void *ptr, int idx, const char2 &value) |
void | computeStaggeredOprod (GaugeField *out[], ColorSpinorField &in, const double coeff[], int nFace) |
Compute the outer-product field between the staggered quark field's one and (for HISQ and ASQTAD) three hop sites. E.g.,. More... | |
template<typename Matrix , typename Float > | |
__host__ __device__ bool | checkUnitary (const Matrix &inv, const Matrix &in, const Float tol) |
Check the unitarity of the input matrix to a given tolerance. More... | |
template<typename Matrix > | |
__host__ __device__ void | checkUnitaryPrint (const Matrix &inv, const Matrix &in) |
Print out deviation for each component (used for debugging only). More... | |
template<typename Float > | |
__host__ __device__ void | polarSu3 (Matrix< complex< Float >, 3 > &in, Float tol) |
Project the input matrix on the SU(3) group. First unitarize the matrix and then project onto the special unitary group. More... | |
void | BlockOrthogonalize (ColorSpinorField &V, const std::vector< ColorSpinorField *> &B, const int *fine_to_coarse, const int *coarse_to_fine, const int *geo_bs, const int spin_bs, const int n_block_ortho) |
Block orthogonnalize the matrix field, where the blocks are defined by lookup tables that map the fine grid points to the coarse grid points, and similarly for the spin degrees of freedom. More... | |
void | Prolongate (ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &v, int Nvec, const int *fine_to_coarse, const int *const *spin_map, int parity=QUDA_INVALID_PARITY) |
Apply the prolongation operator. More... | |
void | Restrict (ColorSpinorField &out, const ColorSpinorField &in, const ColorSpinorField &v, int Nvec, const int *fine_to_coarse, const int *coarse_to_fine, const int *const *spin_map, int parity=QUDA_INVALID_PARITY) |
Apply the restriction operator. More... | |
bool | activeTuning () |
query if tuning is in progress More... | |
void | loadTuneCache () |
void | saveTuneCache (bool error=false) |
void | saveProfile (const std::string label="") |
Save profile to disk. More... | |
void | flushProfile () |
Flush profile contents, setting all counts to zero. More... | |
TuneParam & | tuneLaunch (Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity) |
void | postTrace_ (const char *func, const char *file, int line) |
Post an event in the trace, recording where it was posted. More... | |
const std::map< TuneKey, TuneParam > & | getTuneCache () |
Returns a reference to the tunecache map. More... | |
void | enableProfileCount () |
Enable the profile kernel counting. More... | |
void | disableProfileCount () |
Disable the profile kernel counting. More... | |
void | setPolicyTuning (bool) |
Enable / disable whether are tuning a policy. More... | |
void | u32toa (char *buffer, uint32_t value) |
void | i32toa (char *buffer, int32_t value) |
void | u64toa (char *buffer, uint64_t value) |
void | i64toa (char *buffer, int64_t value) |
void | setUnitarizeLinksConstants (double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error) |
void | unitarizeLinksCPU (cpuGaugeField &outfield, const cpuGaugeField &infield) |
void | unitarizeLinks (cudaGaugeField &outfield, const cudaGaugeField &infield, int *fails) |
void | unitarizeLinks (cudaGaugeField &outfield, int *fails) |
bool | isUnitary (const cpuGaugeField &field, double max_error) |
void | projectSU3 (cudaGaugeField &U, double tol, int *fails) |
Project the input gauge field onto the SU(3) group. This is a destructive operation. The number of link failures is reported so appropriate action can be taken. More... | |
template<typename Arg > | |
__device__ __host__ uint64_t | siteChecksum (const Arg &arg, int d, int parity, int x_cb) |
template<typename Arg > | |
uint64_t | ChecksumCPU (const Arg &arg) |
ColorSpinorParam | colorSpinorParam (const CloverField &a, bool inverse) |
template<bool from_coarse, typename Float , int fineSpin, int fineColor, int coarseSpin, int coarseColor, typename F , typename Ftmp , typename Vt , typename coarseGauge , typename coarseGaugeAtomic , typename fineGauge , typename fineClover > | |
void | calculateY (coarseGauge &Y, coarseGauge &X, coarseGaugeAtomic &Y_atomic, coarseGaugeAtomic &X_atomic, Ftmp &UV, F &AV, Vt &V, fineGauge &G, fineClover &C, fineClover &Cinv, GaugeField &Y_, GaugeField &X_, GaugeField &Y_atomic_, GaugeField &X_atomic_, ColorSpinorField &uv, ColorSpinorField &av, const ColorSpinorField &v, double kappa, double mu, double mu_factor, QudaDiracType dirac, QudaMatPCType matpc, bool need_bidirectional, const int *fine_to_coarse, const int *coarse_to_fine) |
Calculate the coarse-link field, including the coarse clover field. More... | |
std::ostream & | operator<< (std::ostream &out, const ColorSpinorField &a) |
template<class T > | |
void | random (T &t) |
template<class T > | |
void | point (T &t, int x, int s, int c) |
template<class T > | |
void | constant (T &t, int k, int s, int c) |
template<class P > | |
void | sin (P &p, int d, int n, int offset) |
template<class T > | |
void | corner (T &p, int v, int s, int c) |
template<class U , class V > | |
int | compareSpinor (const U &u, const V &v, const int tol) |
template<class Order > | |
void | print_vector (const Order &o, unsigned int x) |
template<typename StoreType , int Ns, int Nc, QudaFieldOrder FieldOrder> | |
void | genericCudaPrintVector (const cudaColorSpinorField &field, unsigned int i) |
template<typename Float , int Ns, int Nc> | |
void | genericCudaPrintVector (const cudaColorSpinorField &field, unsigned int i) |
template<typename Float > | |
void | genericCudaPrintVector (const cudaColorSpinorField &field, unsigned int i) |
template<typename Float , int Nc, typename Vector , typename Arg > | |
__device__ __host__ void | computeNeighborSum (Vector &out, Arg &arg, int x_cb, int parity) |
template<typename Float , int Ns, int Nc, typename Arg > | |
__device__ __host__ void | computeWupperalStep (Arg &arg, int x_cb, int parity) |
template<typename Float , int Ns, int Nc, typename Arg > | |
void | wuppertalStepCPU (Arg arg) |
template<typename Float , int Ns, int Nc, typename Arg > | |
__global__ void | wuppertalStepGPU (Arg arg) |
void | copyGenericColorSpinorDD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorDS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorDH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorDQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorSD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorSS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorSH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorSQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorHD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorHS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorHH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorHQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorQD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorQS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorQH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorQQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGDD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGDS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGSD (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGSS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGSH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGSQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGHS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGHH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGHQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGQS (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGQH (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
void | copyGenericColorSpinorMGQQ (ColorSpinorField &, const ColorSpinorField &, QudaFieldLocation, void *, void *, void *a=0, void *b=0) |
template<typename Arg , typename Basis > | |
void | copyColorSpinor (Arg &arg, const Basis &basis) |
template<typename Arg , typename Basis > | |
__global__ void | copyColorSpinorKernel (Arg arg, Basis basis) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename Out , typename In > | |
void | genericCopyColorSpinor (Out &outOrder, const In &inOrder, const ColorSpinorField &out, const ColorSpinorField &in, QudaFieldLocation location) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder > | |
void | genericCopyColorSpinor (InOrder &inOrder, ColorSpinorField &out, const ColorSpinorField &in, QudaFieldLocation location, FloatOut *Out, float *outNorm) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc> | |
void | genericCopyColorSpinor (ColorSpinorField &out, const ColorSpinorField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In, float *outNorm, float *inNorm) |
template<int Ns, int Nc, typename dstFloat , typename srcFloat > | |
void | copyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat *Dst, srcFloat *Src, float *dstNorm, float *srcNorm) |
template<int Nc, typename dstFloat , typename srcFloat > | |
void | CopyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat *Dst, srcFloat *Src, float *dstNorm=0, float *srcNorm=0) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder > | |
void | packSpinor (OutOrder &outOrder, const InOrder &inOrder, int volume) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder > | |
__global__ void | packSpinorKernel (OutOrder outOrder, const InOrder inOrder, int volume) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder > | |
void | genericCopyColorSpinor (OutOrder &outOrder, const InOrder &inOrder, const ColorSpinorField &out, QudaFieldLocation location) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder > | |
void | genericCopyColorSpinor (InOrder &inOrder, ColorSpinorField &out, QudaFieldLocation location, FloatOut *Out) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc> | |
void | genericCopyColorSpinor (ColorSpinorField &out, const ColorSpinorField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In) |
template<int Ns, int Nc, typename dstFloat , typename srcFloat > | |
void | copyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat *Dst, srcFloat *Src) |
template<int Nc, typename dstFloat , typename srcFloat > | |
void | CopyGenericColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, QudaFieldLocation location, dstFloat *Dst, srcFloat *Src) |
void | copyGenericGaugeDoubleOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type) |
void | copyGenericGaugeSingleOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type) |
void | copyGenericGaugeHalfOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type) |
void | copyGenericGaugeQuarterOut (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type) |
void | copyGenericGaugeMG (GaugeField &out, const GaugeField &in, QudaFieldLocation location, void *Out, void *In, void **ghostOut, void **ghostIn, int type) |
void | checkMomOrder (const GaugeField &u) |
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder , bool regularToextended> | |
__device__ __host__ void | copyGaugeEx (CopyGaugeExArg< OutOrder, InOrder > &arg, int X, int parity) |
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder , bool regularToextended> | |
void | copyGaugeEx (CopyGaugeExArg< OutOrder, InOrder > arg) |
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder , bool regularToextended> | |
__global__ void | copyGaugeExKernel (CopyGaugeExArg< OutOrder, InOrder > arg) |
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder > | |
void | copyGaugeEx (OutOrder outOrder, const InOrder inOrder, const int *E, const int *X, const int *faceVolumeCB, const GaugeField &meta, QudaFieldLocation location) |
template<typename FloatOut , typename FloatIn , int length, typename InOrder > | |
void | copyGaugeEx (const InOrder &inOrder, const int *X, GaugeField &out, QudaFieldLocation location, FloatOut *Out) |
template<typename FloatOut , typename FloatIn , int length> | |
void | copyGaugeEx (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In) |
template<typename FloatOut , typename FloatIn > | |
void | copyGaugeEx (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In) |
template<typename FloatOut , typename FloatIn , int length, typename OutOrder , typename InOrder > | |
void | copyGauge (OutOrder &&outOrder, const InOrder &inOrder, const GaugeField &out, const GaugeField &in, QudaFieldLocation location, int type) |
template<typename FloatOut , typename FloatIn , int length, typename InOrder > | |
void | copyGauge (const InOrder &inOrder, const GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatOut **outGhost, int type) |
template<typename FloatOut , typename FloatIn , int length> | |
void | copyGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In, FloatOut **outGhost, FloatIn **inGhost, int type) |
template<typename FloatOut , typename FloatIn , int length, typename Out , typename In , typename Arg > | |
void | copyMom (Arg &arg, const GaugeField &out, const GaugeField &in, QudaFieldLocation location) |
template<typename FloatOut , typename FloatIn > | |
void | copyGauge (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In, FloatOut **outGhost, FloatIn **inGhost, int type) |
template<typename sFloatOut , typename sFloatIn , int Nc, typename InOrder > | |
void | copyGaugeMG (const InOrder &inOrder, GaugeField &out, const GaugeField &in, QudaFieldLocation location, sFloatOut *Out, sFloatOut **outGhost, int type) |
template<typename sFloatOut , typename sFloatIn , int Nc> | |
void | copyGaugeMG (GaugeField &out, const GaugeField &in, QudaFieldLocation location, sFloatOut *Out, sFloatIn *In, sFloatOut **outGhost, sFloatIn **inGhost, int type) |
template<typename FloatOut , typename FloatIn > | |
void | copyGaugeMG (GaugeField &out, const GaugeField &in, QudaFieldLocation location, FloatOut *Out, FloatIn *In, FloatOut **outGhost, FloatIn **inGhost, int type) |
void * | create_gauge_buffer (size_t bytes, QudaGaugeFieldOrder order, QudaFieldGeometry geometry) |
void ** | create_ghost_buffer (size_t bytes[], QudaGaugeFieldOrder order, QudaFieldGeometry geometry) |
void | free_gauge_buffer (void *buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry) |
void | free_ghost_buffer (void **buffer, QudaGaugeFieldOrder order, QudaFieldGeometry geometry) |
std::ostream & | operator<< (std::ostream &out, const cudaColorSpinorField &a) |
static std::vector< DslashCoarsePolicy > | policies (static_cast< int >(DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED), DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED) |
void | enable_policy (DslashCoarsePolicy p) |
void | disable_policy (DslashCoarsePolicy p) |
template<typename Float , int nSpin, int nColor, bool spin_project> | |
std::ostream & | operator<< (std::ostream &out, const PackArg< Float, nSpin, nColor, spin_project > &arg) |
template<typename Float , int nColor> | |
void | PackGhost (void *ghost[], const ColorSpinorField &in, MemoryLocation location, int nFace, bool dagger, int parity, bool spin_project, double a, double b, double c, const cudaStream_t &stream) |
template<typename Float > | |
void | PackGhost (void *ghost[], const ColorSpinorField &in, MemoryLocation location, int nFace, bool dagger, int parity, bool spin_project, double a, double b, double c, const cudaStream_t &stream) |
template<typename Float , int nColor, typename Arg > | |
void | gammaCPU (Arg arg) |
template<typename Float , int nColor, int d, typename Arg > | |
__global__ void | gammaGPU (Arg arg) |
template<typename Float , int nColor> | |
void | ApplyGamma (ColorSpinorField &out, const ColorSpinorField &in, int d) |
template<typename Float > | |
void | ApplyGamma (ColorSpinorField &out, const ColorSpinorField &in, int d) |
template<bool doublet, typename Float , int nColor, typename Arg > | |
void | twistGammaCPU (Arg arg) |
template<bool doublet, typename Float , int nColor, int d, typename Arg > | |
__global__ void | twistGammaGPU (Arg arg) |
template<typename Float , int nSpin, int nColor, typename Arg > | |
__device__ __host__ void | cloverApply (Arg &arg, int x_cb, int parity) |
template<typename Float , int nSpin, int nColor, typename Arg > | |
void | cloverCPU (Arg &arg) |
template<typename Float , int nSpin, int nColor, typename Arg > | |
__global__ void | cloverGPU (Arg arg) |
template<bool inverse, typename Float , int nSpin, int nColor, typename Arg > | |
__device__ __host__ void | twistCloverApply (Arg &arg, int x_cb, int parity) |
template<bool inverse, typename Float , int nSpin, int nColor, typename Arg > | |
void | twistCloverCPU (Arg &arg) |
template<bool inverse, typename Float , int nSpin, int nColor, typename Arg > | |
__global__ void | twistCloverGPU (Arg arg) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend> | |
__device__ __host__ void | copyInterior (CopySpinorExArg< OutOrder, InOrder, Basis > &arg, int X) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend> | |
__global__ void | copyInteriorKernel (CopySpinorExArg< OutOrder, InOrder, Basis > arg) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis , bool extend> | |
void | copyInterior (CopySpinorExArg< OutOrder, InOrder, Basis > &arg) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder , typename Basis > | |
void | copySpinorEx (OutOrder outOrder, const InOrder inOrder, const Basis basis, const int *E, const int *X, const int parity, const bool extend, const ColorSpinorField &meta, QudaFieldLocation location) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename OutOrder , typename InOrder > | |
void | copySpinorEx (OutOrder outOrder, InOrder inOrder, const QudaGammaBasis outBasis, const QudaGammaBasis inBasis, const int *E, const int *X, const int parity, const bool extend, const ColorSpinorField &meta, QudaFieldLocation location) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc, typename InOrder > | |
void | extendedCopyColorSpinor (InOrder &inOrder, ColorSpinorField &out, QudaGammaBasis inBasis, const int *E, const int *X, const int parity, const bool extend, QudaFieldLocation location, FloatOut *Out, float *outNorm) |
template<typename FloatOut , typename FloatIn , int Ns, int Nc> | |
void | extendedCopyColorSpinor (ColorSpinorField &out, const ColorSpinorField &in, const int parity, const QudaFieldLocation location, FloatOut *Out, FloatIn *In, float *outNorm, float *inNorm) |
template<int Ns, typename dstFloat , typename srcFloat > | |
void | copyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, const int parity, const QudaFieldLocation location, dstFloat *Dst, srcFloat *Src, float *dstNorm, float *srcNorm) |
template<typename dstFloat , typename srcFloat > | |
void | CopyExtendedColorSpinor (ColorSpinorField &dst, const ColorSpinorField &src, const int parity, const QudaFieldLocation location, dstFloat *Dst, srcFloat *Src, float *dstNorm=0, float *srcNorm=0) |
template<typename Float > | |
void | extractGhost (const GaugeField &u, Float **Ghost, bool extract, int offset) |
void | extractGaugeGhostMG (const GaugeField &u, void **ghost, bool extract, int offset) |
template<typename Float , int length, int dim, typename Arg > | |
__device__ __host__ void | extractor (Arg &arg, int dir, int a, int b, int c, int d, int g, int parity) |
template<typename Float , int length, int dim, typename Arg > | |
__device__ __host__ void | injector (Arg &arg, int dir, int a, int b, int c, int d, int g, int parity) |
template<typename Float , int length, int nDim, int dim, typename Order , bool extract> | |
void | extractGhostEx (ExtractGhostExArg< Order, nDim, dim > arg) |
template<typename Float , int length, int nDim, int dim, typename Order , bool extract> | |
__global__ void | extractGhostExKernel (ExtractGhostExArg< Order, nDim, dim > arg) |
template<typename Float , int length, typename Order > | |
void | extractGhostEx (Order order, const int dim, const int *surfaceCB, const int *E, const int *R, bool extract, const GaugeField &u, QudaFieldLocation location) |
template<typename Float > | |
void | extractGhostEx (const GaugeField &u, int dim, const int *R, Float **Ghost, bool extract) |
template<int nDim, bool extract, typename Arg > | |
void | extractGhost (Arg &arg) |
template<int nDim, bool extract, typename Arg > | |
__global__ void | extractGhostKernel (Arg arg) |
template<typename Float , int length, typename Order > | |
void | extractGhost (Order order, const GaugeField &u, QudaFieldLocation location, bool extract, int offset) |
template<typename storeFloat , int Nc> | |
void | extractGhostMG (const GaugeField &u, storeFloat **Ghost, bool extract, int offset) |
template<typename Float > | |
void | extractGhostMG (const GaugeField &u, Float **Ghost, bool extract, int offset) |
ColorSpinorParam | colorSpinorParam (const GaugeField &a) |
template<int NCOLORS> | |
static __host__ __device__ void | IndexBlock (int block, int &p, int &q) |
template<int blockSize, typename Float , int gauge_dir, int NCOLORS> | |
__forceinline__ __device__ void | GaugeFixHit_AtomicAdd (Matrix< complex< Float >, NCOLORS > &link, const Float relax_boost, const int tid) |
template<int blockSize, typename Float , int gauge_dir, int NCOLORS> | |
__forceinline__ __device__ void | GaugeFixHit_NoAtomicAdd (Matrix< complex< Float >, NCOLORS > &link, const Float relax_boost, const int tid) |
template<int blockSize, typename Float , int gauge_dir, int NCOLORS> | |
__forceinline__ __device__ void | GaugeFixHit_NoAtomicAdd_LessSM (Matrix< complex< Float >, NCOLORS > &link, const Float relax_boost, const int tid) |
template<int blockSize, typename Float , int gauge_dir, int NCOLORS> | |
__forceinline__ __device__ void | GaugeFixHit_AtomicAdd (Matrix< complex< Float >, NCOLORS > &link, Matrix< complex< Float >, NCOLORS > &link1, const Float relax_boost, const int tid) |
template<int blockSize, typename Float , int gauge_dir, int NCOLORS> | |
__forceinline__ __device__ void | GaugeFixHit_NoAtomicAdd (Matrix< complex< Float >, NCOLORS > &link, Matrix< complex< Float >, NCOLORS > &link1, const Float relax_boost, const int tid) |
template<int blockSize, typename Float , int gauge_dir, int NCOLORS> | |
__forceinline__ __device__ void | GaugeFixHit_NoAtomicAdd_LessSM (Matrix< complex< Float >, NCOLORS > &link, Matrix< complex< Float >, NCOLORS > &link1, const Float relax_boost, const int tid) |
template<typename Float , typename Gauge > | |
void | plaquette (const Gauge dataOr, const GaugeField &data, double2 &plq, QudaFieldLocation location) |
template<typename Float > | |
void | plaquette (const GaugeField &data, double2 &plq, QudaFieldLocation location) |
template<typename real , typename Link > | |
__device__ __host__ Link | gauss_su3 (cuRNGState &localState) |
template<typename Float , typename Arg > | |
__global__ void | computeGenGauss (Arg arg) |
template<typename Float , QudaReconstructType recon, bool group> | |
void | genGauss (GaugeField &U, RNG &rngstate, double sigma) |
template<typename Float , typename GaugeOr , typename GaugeDs > | |
void | OvrImpSTOUTStep (GaugeOr origin, GaugeDs dest, const GaugeField &dataOr, Float rho, Float epsilon) |
template<typename Float > | |
void | OvrImpSTOUTStep (GaugeField &dataDs, const GaugeField &dataOr, Float rho, Float epsilon) |
void | printLaunchTimer () |
void | setDiracRefineParam (DiracParam &diracParam, QudaInvertParam *inv_param, const bool pc) |
void | setDiracPreParam (DiracParam &diracParam, QudaInvertParam *inv_param, const bool pc, bool comms) |
void | createDirac (Dirac *&d, Dirac *&dSloppy, Dirac *&dPre, QudaInvertParam ¶m, const bool pc_solve) |
void | createDirac (Dirac *&d, Dirac *&dSloppy, Dirac *&dPre, Dirac *&dRef, QudaInvertParam ¶m, const bool pc_solve) |
void | massRescale (cudaColorSpinorField &b, QudaInvertParam ¶m) |
void | fillInnerSolveParam (SolverParam &inner, const SolverParam &outer) |
int | reliable (double &rNorm, double &maxrx, double &maxrr, const double &r2, const double &delta) |
template<int N> | |
void | compute_alpha_N (Complex *Q_AQandg, Complex *alpha) |
template<int N> | |
void | compute_beta_N (Complex *Q_AQandg, Complex *Q_AS, Complex *beta) |
template<libtype which_lib> | |
void | ComputeRitz (EigCGArgs &args) |
template<> | |
void | ComputeRitz< libtype::eigen_lib > (EigCGArgs &args) |
template<> | |
void | ComputeRitz< libtype::magma_lib > (EigCGArgs &args) |
static void | fillEigCGInnerSolverParam (SolverParam &inner, const SolverParam &outer, bool use_sloppy_partial_accumulator=true) |
static void | fillInitCGSolverParam (SolverParam &inner, const SolverParam &outer) |
double | timeInterval (struct timeval start, struct timeval end) |
void | computeBeta (Complex **beta, std::vector< ColorSpinorField *> Ap, int i, int N, int k) |
void | updateAp (Complex **beta, std::vector< ColorSpinorField *> Ap, int begin, int size, int k) |
void | orthoDir (Complex **beta, std::vector< ColorSpinorField *> Ap, int k, int pipeline) |
void | backSubs (const Complex *alpha, Complex **const beta, const double *gamma, Complex *delta, int n) |
void | updateSolution (ColorSpinorField &x, const Complex *alpha, Complex **const beta, double *gamma, int k, std::vector< ColorSpinorField *> p) |
template<libtype which_lib> | |
void | ComputeHarmonicRitz (GMResDRArgs &args) |
template<> | |
void | ComputeHarmonicRitz< libtype::magma_lib > (GMResDRArgs &args) |
template<> | |
void | ComputeHarmonicRitz< libtype::eigen_lib > (GMResDRArgs &args) |
template<libtype which_lib> | |
void | ComputeEta (GMResDRArgs &args) |
template<> | |
void | ComputeEta< libtype::magma_lib > (GMResDRArgs &args) |
template<> | |
void | ComputeEta< libtype::eigen_lib > (GMResDRArgs &args) |
void | fillFGMResDRInnerSolveParam (SolverParam &inner, const SolverParam &outer) |
template<typename T > | |
static void | applyT (T d_out[], const T d_in[], const T gamma[], const T rho[], int N) |
template<typename T > | |
static void | applyB (T d_out[], const T d_in[], int N) |
void | print (const double d[], int n) |
template<typename T > | |
static void | zero (T d[], int N) |
template<typename T > | |
static void | applyThirdTerm (T d_out[], const T d_in[], int k, int j, int s, const T gamma[], const T rho[], const T gamma_kprev[], const T rho_kprev[]) |
template<typename T > | |
static void | computeCoeffs (T d_out[], const T d_p1[], const T d_p2[], int k, int j, int s, const T gamma[], const T rho[], const T gamma_kprev[], const T rho_kprev[]) |
void | updateAlphaZeta (double *alpha, double *zeta, double *zeta_old, const double *r2, const double *beta, const double pAp, const double *offset, const int nShift, const int j_low) |
static void | fillInnerSolverParam (SolverParam &inner, const SolverParam &outer) |
template<typename Float , typename Oprod , typename Gauge , typename Mom > | |
__host__ __device__ void | completeKSForceCore (KSForceArg< Oprod, Gauge, Mom > &arg, int idx) |
template<typename Float , typename Oprod , typename Gauge , typename Mom > | |
__global__ void | completeKSForceKernel (KSForceArg< Oprod, Gauge, Mom > arg) |
template<typename Float , typename Oprod , typename Gauge , typename Mom > | |
void | completeKSForceCPU (KSForceArg< Oprod, Gauge, Mom > &arg) |
template<typename Float , typename Oprod , typename Gauge , typename Mom > | |
void | completeKSForce (Oprod oprod, Gauge gauge, Mom mom, int dim[4], const GaugeField &meta, QudaFieldLocation location, long long *flops) |
template<typename Float , typename Result , typename Oprod , typename Gauge > | |
__host__ __device__ void | computeKSLongLinkForceCore (KSLongLinkArg< Result, Oprod, Gauge > &arg, int idx) |
template<typename Float , typename Result , typename Oprod , typename Gauge > | |
__global__ void | computeKSLongLinkForceKernel (KSLongLinkArg< Result, Oprod, Gauge > arg) |
template<typename Float , typename Result , typename Oprod , typename Gauge > | |
void | computeKSLongLinkForceCPU (KSLongLinkArg< Result, Oprod, Gauge > &arg) |
template<typename Float , typename Result , typename Oprod , typename Gauge > | |
void | computeKSLongLinkForce (Result res, Oprod oprod, Gauge gauge, int dim[4], const GaugeField &meta, QudaFieldLocation location) |
template<typename Float > | |
void | computeKSLongLinkForce (GaugeField &result, const GaugeField &oprod, const GaugeField &gauge, QudaFieldLocation location) |
static void | print_trace (void) |
static void | print_alloc_header () |
static void | print_alloc (AllocType type) |
static void | track_malloc (const AllocType &type, const MemAlloc &a, void *ptr) |
static void | track_free (const AllocType &type, void *ptr) |
static void * | aligned_malloc (MemAlloc &a, size_t size) |
template<typename real , int Nc, QudaCloverFieldOrder order> | |
double | norm (const CloverField &u, norm_type_ type) |
template<typename real , int Nc> | |
double | norm (const CloverField &u, norm_type_ type) |
template<typename real > | |
double | _norm (const CloverField &u, norm_type_ type) |
template<typename real , int Nc, QudaGaugeFieldOrder order> | |
double | norm (const GaugeField &u, int d, norm_type_ type) |
template<typename real , int Nc> | |
double | norm (const GaugeField &u, int d, norm_type_ type) |
template<typename real > | |
double | norm (const GaugeField &u, int d, norm_type_ type) |
void | forceRecord (double2 &force, double dt, const char *fname) |
dim3 | GetBlockDim (size_t threads, size_t size) |
__global__ void | kernel_random (cuRNGState *state, unsigned long long seed, int size_cb, rngArg arg) |
CUDA kernel to initialize CURAND RNG states. More... | |
void | launch_kernel_random (cuRNGState *state, unsigned long long seed, int size_cb, int n_parity, int X[4]) |
Call CUDA kernel to initialize CURAND RNG states. More... | |
template<IndexType idxType, typename Int > | |
__device__ __forceinline__ int | neighborIndex (const unsigned int &cb_idx, const int(&shift)[4], const bool(&partitioned)[4], const unsigned int &parity) |
template<typename FloatN , int N, typename Output , typename Input > | |
__global__ void | shiftColorSpinorFieldKernel (ShiftQuarkArg< Output, Input > arg) |
template<typename FloatN , int N, typename Output , typename Input > | |
__global__ void | shiftColorSpinorFieldExternalKernel (ShiftQuarkArg< Output, Input > arg) |
void | shiftColorSpinorField (cudaColorSpinorField &dst, const cudaColorSpinorField &src, const unsigned int parity, const unsigned int dim, const int shift) |
static void | report (const char *type) |
template<typename real , typename Arg > | |
__device__ __host__ void | genGauss (Arg &arg, cuRNGState &localState, int parity, int x_cb, int s, int c) |
template<typename real , typename Arg > | |
__device__ __host__ void | genUniform (Arg &arg, cuRNGState &localState, int parity, int x_cb, int s, int c) |
template<typename real , int Ns, int Nc, QudaNoiseType type, typename Arg > | |
void | SpinorNoiseCPU (Arg &arg) |
template<typename real , int Ns, int Nc, QudaNoiseType type, typename Arg > | |
__global__ void | SpinorNoiseGPU (Arg arg) |
void | computeStaggeredOprod (GaugeField &outA, GaugeField &outB, ColorSpinorField &inEven, ColorSpinorField &inOdd, int parity, const double coeff[2], int nFace) |
int | traceEnabled () |
static void | deserializeTuneCache (std::istream &in) |
static void | serializeTuneCache (std::ostream &out) |
static void | serializeProfile (std::ostream &out, std::ostream &async_out) |
static void | serializeTrace (std::ostream &out) |
static void | broadcastTuneCache () |
bool | policyTuning () |
template<typename Float , typename G > | |
__global__ void | ProjectSU3kernel (ProjectSU3Arg< Float, G > arg) |
void | setTransferGPU (bool) |
Variables | |
__device__ unsigned int | count [QUDA_MAX_MULTI_REDUCE] = { } |
__shared__ bool | isLastBlockDone |
__shared__ volatile bool | isLastWarpDone [16] |
static __constant__ signed char | B_array_d [MAX_MATRIX_SIZE] |
static signed char | B_array_h [MAX_MATRIX_SIZE] |
__shared__ float | s [] |
constexpr int | size = 4096 |
static __constant__ char | mobius_d [size] |
static __constant__ char | mobius_d [size] |
static int | commDim [QUDA_MAX_DIM] |
const int | Nstream = 9 |
static const char | gDigitsLut [200] |
static bool | bidirectional_debug = false |
cudaStream_t * | stream |
static bool | complete_recv_fwd [QUDA_MAX_DIM] = { } |
static bool | complete_recv_back [QUDA_MAX_DIM] = { } |
static bool | complete_send_fwd [QUDA_MAX_DIM] = { } |
static bool | complete_send_back [QUDA_MAX_DIM] = { } |
static auto | pinned_allocator = [] (size_t bytes ) { return static_cast<Complex*>(pool_pinned_malloc(bytes)); } |
static auto | pinned_deleter = [] (Complex *hptr) { pool_pinned_free(hptr); } |
static bool | dslash_init = false |
static int | first_active_policy =static_cast<int>(DslashCoarsePolicy::DSLASH_COARSE_POLICY_DISABLED) |
static char | policy_string [TuneKey::aux_n] |
static bool | kernelPackT = false |
static std::stack< bool > | kptstack |
static double | unscaled_shifts [QUDA_MAX_MULTI_SHIFT] |
static int | max_eigcg_cycles = 4 |
static QudaFieldLocation | reorder_location_ = QUDA_CUDA_FIELD_LOCATION |
static std::map< void *, MemAlloc > | alloc [N_ALLOC_TYPE] |
static long | total_bytes [N_ALLOC_TYPE] = {0} |
static long | max_total_bytes [N_ALLOC_TYPE] = {0} |
static long | total_host_bytes |
static long | max_total_host_bytes |
static long | total_pinned_bytes |
static long | max_total_pinned_bytes |
static std::stringstream | force_stream |
static long long | force_count = 0 |
static long long | force_flush = 1000 |
static bool | debug = false |
static TimeProfile | apiTimer ("CUDA API calls (driver)") |
static TuneKey | last_key |
static std::list< TraceKey > | trace_list |
static int | enable_trace = 0 |
static const std::string | quda_hash = QUDA_HASH |
static std::string | resource_path |
static map | tunecache |
static map::iterator | it |
static size_t | initial_cache_size = 0 |
static const std::string | quda_version = STR(QUDA_VERSION_MAJOR) "." STR(QUDA_VERSION_MINOR) "." STR(QUDA_VERSION_SUBMINOR) |
static bool | tuning = false |
static bool | profile_count = true |
static bool | policy_tuning = false |
static TimeProfile | launchTimer ("tuneLaunch") |
This is the covariant derivative based on the basic gauged Laplace operator
This is the gauged domain-wall 4-d preconditioned operator.
Note, for now, this just applies a batched 4-d dslash across the fifth dimension.
This is the gauged domain-wall 5-d preconditioned operator.
This is a staggered Dirac operator
This is the gauged twisted-mass operator acting on a non-generate quark doublet.
This is the preconditioned twisted-mass operator acting on a non-generate quark doublet.
This is the basic gauged twisted-clover operator
This is the preconditioned gauged twisted-mass operator
This is the basic gauged twisted-mass operator
This is the basic gauged Wilson operator
TODO
This is the Wilson-clover linear operator
This is the Wilson-clover preconditioned linear operator
This code has not been checked. In particular, I suspect it is erroneous in multi-GPU since it looks like the halo ghost region isn't being treated here.
Generic Multi Shift Solver
For staggered, the mass is folded into the dirac operator Otherwise the matrix mass is 'unmodified'.
The lowest offset is in offsets[0]
This is the laplacian derivative based on the basic gauged differential operator
using quda::ColorSpinorFieldSet = typedef ColorSpinorField |
Definition at line 1220 of file invert_quda.h.
typedef std::complex<double> quda::Complex |
Definition at line 46 of file quda_internal.h.
typedef std::vector<ColorSpinorField*> quda::CompositeColorSpinorField |
Typedef for a set of spinors. Can be further divided into subsets ,e.g., with different precisions (not implemented currently)
Definition at line 17 of file color_spinor_field.h.
typedef struct curandStateMRG32k3a quda::cuRNGState |
Definition at line 17 of file random_quda.h.
typedef MatrixXcd quda::DenseMatrix |
Definition at line 36 of file inv_eigcg_quda.cpp.
typedef Stride< Dynamic, Dynamic > quda::DynamicStride |
Definition at line 18 of file deflation.cpp.
using quda::RealVector = typedef VectorXd |
Definition at line 39 of file inv_eigcg_quda.cpp.
typedef Matrix< Complex, Dynamic, Dynamic, RowMajor > quda::RowMajorDenseMatrix |
Definition at line 42 of file inv_eigcg_quda.cpp.
typedef int quda::storeType |
Definition at line 15 of file coarse_op_kernel.cuh.
typedef VectorXcd quda::Vector |
Definition at line 38 of file inv_eigcg_quda.cpp.
typedef MatrixXcd quda::VectorSet |
Definition at line 37 of file inv_eigcg_quda.cpp.
enum quda::AllocType |
Enumerator | |
---|---|
DEVICE | |
DEVICE_PINNED | |
HOST | |
PINNED | |
MAPPED | |
N_ALLOC_TYPE |
Definition at line 16 of file malloc.cpp.
The following code is based on Kate's worker class in Multi-CG.
This worker class is used to update most of the u and r vectors. On BiCG iteration j, r[0] through r[j] and u[0] through u[j] all get updated, but the subsequent mat-vec operation only gets applied to r[j] and u[j]. Thus, we can hide updating r[0] through r[j-1] and u[0] through u[j-1], respectively, in the comms for the matvec on r[j] and u[j]. This results in improved strong scaling for BiCGstab-L.
See paragraphs 2 and 3 in the comments on the Worker class in Multi-CG for more remarks.
Enumerator | |
---|---|
BICGSTABL_UPDATE_U | |
BICGSTABL_UPDATE_R |
Definition at line 173 of file inv_bicgstabl_quda.cpp.
enum quda::ComputeType |
Definition at line 13 of file coarse_op.cuh.
enum quda::Dslash5Type |
Enumerator | |
---|---|
DSLASH5_DWF | |
DSLASH5_MOBIUS_PRE | |
DSLASH5_MOBIUS | |
M5_INV_DWF | |
M5_INV_MOBIUS | |
M5_INV_ZMOBIUS |
Definition at line 396 of file dslash_quda.h.
|
strong |
Definition at line 458 of file dslash_coarse.cu.
enum quda::DslashType |
Enumerator | |
---|---|
DSLASH_INTERIOR | |
DSLASH_EXTERIOR | |
DSLASH_FULL |
Definition at line 16 of file dslash_coarse.cuh.
enum quda::KernelType |
Enumerator | |
---|---|
INTERIOR_KERNEL | |
EXTERIOR_KERNEL_ALL | |
EXTERIOR_KERNEL_X | |
EXTERIOR_KERNEL_Y | |
EXTERIOR_KERNEL_Z | |
EXTERIOR_KERNEL_T | |
KERNEL_POLICY |
Definition at line 464 of file index_helper.cuh.
|
strong |
Enumerator | |
---|---|
eigen_lib | |
magma_lib | |
lapack_lib | |
mkl_lib | |
eigen_lib | |
magma_lib | |
lapack_lib | |
mkl_lib |
Definition at line 47 of file inv_eigcg_quda.cpp.
|
strong |
Enumerator | |
---|---|
eigen_lib | |
magma_lib | |
lapack_lib | |
mkl_lib | |
eigen_lib | |
magma_lib | |
lapack_lib | |
mkl_lib |
Definition at line 57 of file inv_gmresdr_quda.cpp.
enum quda::MemoryLocation |
Enumerator | |
---|---|
Device | |
Host | |
Remote |
Definition at line 15 of file color_spinor_field.h.
enum quda::norm_type_ |
Enumerator | |
---|---|
NORM1 | |
NORM2 | |
ABS_MAX | |
ABS_MIN | |
NORM1 | |
NORM2 | |
ABS_MAX | |
ABS_MIN |
Definition at line 7 of file max_gauge.cu.
enum quda::norm_type_ |
Enumerator | |
---|---|
NORM1 | |
NORM2 | |
ABS_MAX | |
ABS_MIN | |
NORM1 | |
NORM2 | |
ABS_MAX | |
ABS_MIN |
Definition at line 7 of file max_clover.cu.
|
inline |
Definition at line 15 of file math_helper.cuh.
References pow().
Referenced by constantInv().
quda::__launch_bounds__ | ( | 2 * | block_size | ) |
Definition at line 233 of file block_orthogonalize.cuh.
References quda::BlockOrthoArg< Rotator, Vector, fineSpin, spinBlockSize, coarseSpin, nVec >::B, B_array_d, dot(), quda::blas::dot_(), nColor, quda::Arg< real, Ns, Nc, order >::nParity, quda::BlockOrthoArg< Rotator, Vector, fineSpin, spinBlockSize, coarseSpin, nVec >::parity, and s.
double quda::_norm | ( | const CloverField & | u, |
norm_type_ | type | ||
) |
Definition at line 40 of file max_clover.cu.
References errorQuda, and quda::CloverField::Ncolor().
|
inline |
Definition at line 125 of file complex_quda.h.
References abs().
Referenced by calculateY(), check_displacement(), comm_peer2peer_init(), ComputeHarmonicRitz< libtype::eigen_lib >(), ComputeHarmonicRitz< libtype::magma_lib >(), quda::EigenSolver::computeSVD(), contraction_reference(), quda::Matrix< T, N >::L1(), quda::Matrix< T, N >::Linf(), log(), quda::HMatrix< T, N >::max(), quda::clover::abs_< ReduceType, Float >::operator()(), quda::gauge::abs_< Float, storeFloat >::operator()(), quda::gauge::abs_< Float, char >::operator()(), quda::gauge::abs_< Float, short >::operator()(), quda::colorspinor::abs_< Float, storeFloat >::operator()(), quda::gauge::abs_< Float, int >::operator()(), quda::colorspinor::abs_< Float, short >::operator()(), quda::colorspinor::abs_< Float, char >::operator()(), quda::BiCGstab::operator()(), sqrt(), test(), and int_fastdiv::update_magic_numbers().
|
inline |
Returns the magnitude of z.
Definition at line 1060 of file complex_quda.h.
|
inline |
Definition at line 1065 of file complex_quda.h.
References quda::complex< float >::imag(), and quda::complex< float >::real().
|
inline |
Definition at line 1070 of file complex_quda.h.
References quda::complex< double >::imag(), and quda::complex< double >::real().
Referenced by abs().
|
inline |
Definition at line 61 of file complex_quda.h.
References acos().
Referenced by exponentiate_iQ(), and setUnitarizeLinksConstants().
|
inline |
Definition at line 1274 of file complex_quda.h.
References asin().
Referenced by acos().
|
inline |
bool quda::activeTuning | ( | ) |
query if tuning is in progress
Definition at line 121 of file tune.cpp.
References tuning.
Referenced by quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::apply(), qudaLaunchKernel(), and quda::TunableVectorYZ::resizeStep().
void quda::AddCoarseDiagonalCPU | ( | Arg & | arg | ) |
Definition at line 1020 of file coarse_op_kernel.cuh.
__global__ void quda::AddCoarseDiagonalGPU | ( | Arg | arg | ) |
Definition at line 1036 of file coarse_op_kernel.cuh.
void quda::AddCoarseTmDiagonalCPU | ( | Arg & | arg | ) |
__global__ void quda::AddCoarseTmDiagonalGPU | ( | Arg | arg | ) |
|
static |
Under CUDA 4.0, cudaHostRegister seems to require that both the beginning and end of the buffer be aligned on page boundaries. This local function takes care of the alignment and gets called by pinned_malloc_() and mapped_malloc_()
Definition at line 141 of file malloc.cpp.
References quda::MemAlloc::base_size, errorQuda, quda::MemAlloc::file, quda::MemAlloc::func, quda::MemAlloc::line, and quda::MemAlloc::size.
Referenced by mapped_malloc_(), and pinned_malloc_().
void quda::APEStep | ( | GaugeField & | dataDs, |
const GaugeField & | dataOr, | ||
double | alpha | ||
) |
Apply APE smearing to the gauge field.
[out] | dataDs | Output smeared field |
[in] | dataOr | Input gauge field |
[in] | alpha | smearing parameter |
Definition at line 128 of file gauge_ape.cu.
References errorQuda, quda::GaugeField::isNative(), quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, and quda::GaugeField::Reconstruct().
Referenced by performAPEnStep().
|
inline |
Definition at line 904 of file quda_matrix.h.
References mat().
|
inline |
Definition at line 914 of file quda_matrix.h.
References mat().
|
static |
Definition at line 37 of file inv_mpcg_quda.cpp.
Referenced by applyThirdTerm().
|
inline |
Applies the coarse clover matrix on a given parity and checkerboard site index
out | The result out += X * in |
X | The coarse clover field |
in | The input field |
parity | The site parity |
x_cb | The checkerboarded site index |
Definition at line 280 of file dslash_coarse.cuh.
References conj(), dagger, quda::DslashCoarseArg< Float, yFloat, ghostFloat, coarseSpin, coarseColor, csOrder, gOrder >::dim, quda::Arg< real, Ns, Nc, order >::nParity, and quda::Arg< real, Ns, Nc, order >::volumeCB.
void quda::ApplyClover | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const CloverField & | clover, | ||
bool | inverse, | ||
int | parity | ||
) |
Apply clover-matrix field to a color-spinor field.
[out] | out | Result color-spinor field |
[in] | in | Input color-spinor field |
[in] | clover | Clover-matrix field |
[in] | inverse | Whether we are applying the inverse or not |
[in] | Field | parity (if color-spinor field is single parity) |
Definition at line 604 of file dslash_quda.cu.
References quda::Clover< Float, nSpin, nColor, Arg >::apply(), arg(), checkCudaError, checkLocation, checkPrecision, errorQuda, in, inverse(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), Nstream, out, parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, and streams.
Referenced by quda::DiracClover::Clover(), and quda::DiracCloverPC::CloverInv().
void quda::ApplyCoarse | ( | ColorSpinorField & | out, |
const ColorSpinorField & | inA, | ||
const ColorSpinorField & | inB, | ||
const GaugeField & | Y, | ||
const GaugeField & | X, | ||
double | kappa, | ||
int | parity = QUDA_INVALID_PARITY , |
||
bool | dslash = true , |
||
bool | clover = true , |
||
bool | dagger = false , |
||
const int * | commDim = 0 , |
||
QudaPrecision | halo_precision = QUDA_INVALID_PRECISION |
||
) |
Apply the coarse dslash stencil. This single driver accounts for all variations with and without the clover field, with and without dslash, and both single and full parity fields.
[out] | out | The result vector |
[in] | inA | The first input vector |
[in] | inB | The second input vector |
[in] | Y | Coarse link field |
[in] | X | Coarse clover field |
[in] | kappa | Scaling parameter |
[in] | parity | Parity of the field (if single parity) |
[in] | dslash | Are we applying dslash? |
[in] | clover | Are we applying clover? |
[in] | dagger | Apply dagger operator? |
[in] | commDim | Which dimensions are partitioned? |
[in] | halo_precision | What precision to use for the halos (if QUDA_INVALID_PRECISION, use field precision) |
Definition at line 772 of file dslash_coarse.cu.
References quda::DslashCoarsePolicyTune::apply().
Referenced by quda::DiracCoarse::Clover(), quda::DiracCoarse::CloverInv(), quda::DiracCoarse::Dslash(), quda::DiracCoarsePC::Dslash(), quda::DiracCoarse::DslashXpay(), and quda::DiracCoarse::M().
|
inline |
Applies the off-diagonal part of the covariant derivative operator
[out] | out | The out result field |
[in,out] | arg | Parameter struct |
[in] | U | The gauge field |
[in] | coord | Site coordinate |
[in] | x_cb | The checker-boarded site index. This is a 4-d index only |
[in] | parity | The site parity |
[in] | idx | Thread index (equal to face index for exterior kernels) |
[in] | thread_dim | Which dimension this thread corresponds to (fused exterior only) |
Definition at line 63 of file covDev.cuh.
References conj(), quda::CovDevArg< Float, nColor, reconstruct_ >::ghost, quda::CovDevArg< Float, nColor, reconstruct_ >::in, quda::Arg< real, Ns, Nc, order >::nParity, and quda::CovDevArg< Float, nColor, reconstruct_ >::U.
void quda::ApplyCovDev | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
int | mu, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the covariant derivative.
out = U * in
where U is the gauge field in a particular direction.
This operator can be applied to both single parity (checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the covariant derivative |
[in] | mu | Direction of the derivative. For mu > 3 it goes backwards |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
Definition at line 185 of file covDev.cu.
References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, mu, out, parity, popKernelPackT(), pushKernelPackT(), and quda::ColorSpinorField::V().
Referenced by quda::GaugeCovDev::DslashCD().
void quda::ApplyDomainWall4D | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
double | a, | ||
double | m_5, | ||
const Complex * | b_5, | ||
const Complex * | c_5, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the batched Wilson 4-d stencil to a 5-d vector with 4-d preconditioned data order.
out = D * in
where D is the gauged Wilson linear operator.
If a is non-zero, the operation is given by out = x + a * D in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | a | Scale factor applied |
[in] | m_5 | Wilson mass shift |
[in] | b_5 | Mobius coefficient array (length Ls) |
[in] | c_5 | Mobius coefficient array (length Ls) |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
Definition at line 99 of file dslash_domain_wall_4d.cu.
References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().
Referenced by quda::DiracDomainWall4D::Dslash4(), quda::DiracMobius::Dslash4(), quda::DiracDomainWall4D::Dslash4Xpay(), quda::DiracMobius::Dslash4Xpay(), quda::DiracDomainWall4D::M(), and quda::DiracMobius::M().
void quda::ApplyDomainWall5D | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
double | a, | ||
double | m_f, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the Domain-wall 5-d stencil to a 5-d vector with 5-d preconditioned data order.
out = D_5 * in
where D_5 is the 5-d wilson linear operator with fifth dimension boundary condition set by the fermion mass.
If a is non-zero, the operation is given by out = x + a * D_5 in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | a | Scale factor applied (typically -kappa_5) |
[in] | m_f | Fermion mass parameter |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
Definition at line 118 of file dslash_domain_wall_5d.cu.
References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, popKernelPackT(), pushKernelPackT(), and quda::ColorSpinorField::V().
Referenced by quda::DiracDomainWall::Dslash(), quda::DiracDomainWall::DslashXpay(), and quda::DiracDomainWall::M().
|
inline |
Definition at line 94 of file dslash_coarse.cuh.
References conj(), dagger, getCoordsCB(), linkIndexM1(), linkIndexP1(), quda::Arg< real, Ns, Nc, order >::nParity, and quda::Arg< real, Ns, Nc, order >::volumeCB.
void quda::ApplyDslash5 | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const ColorSpinorField & | x, | ||
double | m_f, | ||
double | m_5, | ||
const Complex * | b_5, | ||
const Complex * | c_5, | ||
double | a, | ||
bool | dagger, | ||
Dslash5Type | type | ||
) |
Apply either the domain-wall / mobius Dslash5 operator or the M5 inverse operator. In the current implementation, it is expected that the color-spinor fields are 4-d preconditioned.
[out] | out | Result color-spinor field |
[in] | in | Input color-spinor field |
[in] | x | Auxilary input color-spinor field |
[in] | m_f | Fermion mass parameter |
[in] | m_5 | Wilson mass shift |
[in] | b_5 | Mobius coefficient array (length Ls) |
[in] | c_5 | Mobius coefficient array (length Ls) |
[in] | a | Scale factor use in xpay operator |
[in] | dagger | Whether this is for the dagger operator |
[in] | type | Type of dslash we are applying |
Definition at line 216 of file dslash5_domain_wall.cu.
References quda::Dslash5< Float, nColor, Arg >::apply(), quda::Dslash5< Float, nColor, Arg >::arg, checkLocation, checkPrecision, dagger, errorQuda, in, quda::ColorSpinorField::Ncolor(), Nstream, out, quda::ColorSpinorField::PCType(), QUDA_4D_PC, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, and streams.
Referenced by quda::DiracMobius::Dslash4pre(), quda::DiracMobius::Dslash4preXpay(), quda::DiracDomainWall4D::Dslash5(), quda::DiracMobius::Dslash5(), quda::DiracDomainWall4DPC::Dslash5inv(), quda::DiracMobiusPC::Dslash5inv(), quda::DiracDomainWall4DPC::Dslash5invXpay(), quda::DiracMobiusPC::Dslash5invXpay(), quda::DiracDomainWall4D::Dslash5Xpay(), quda::DiracMobius::Dslash5Xpay(), quda::DiracDomainWall4D::M(), and quda::DiracMobius::M().
void quda::ApplyGamma | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
int | d | ||
) |
Definition at line 292 of file dslash_quda.cu.
References quda::Gamma< ValueType, basis, dir >::apply(), arg(), Nstream, and streams.
Referenced by ApplyGamma(), and gamma5().
void quda::ApplyGamma | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
int | d | ||
) |
Definition at line 301 of file dslash_quda.cu.
References ApplyGamma(), checkLocation, checkPrecision, errorQuda, in, quda::ColorSpinorField::Ncolor(), out, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.
void quda::applyGaugePhase | ( | GaugeField & | u | ) |
Apply the staggered phase factor to the gauge field.
[in] | u | The gauge field to which we apply the staggered phase factors |
Definition at line 223 of file gauge_phase.cu.
References errorQuda, quda::GaugeField::exchangeGhost(), quda::LatticeField::GhostExchange(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_GHOST_EXCHANGE_PAD, and QUDA_SINGLE_PRECISION.
Referenced by quda::GaugeField::applyStaggeredPhase(), quda::cpuGaugeField::Gauge_p(), and quda::GaugeField::removeStaggeredPhase().
void quda::ApplyImprovedStaggered | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
const GaugeField & | L, | ||
double | a, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Apply the improved staggered dslash operator to a color-spinor field.
[out] | out | Result color-spinor field |
[in] | in | Input color-spinor field |
[in] | U | Gauge-Link (1-link or fat-link) |
[in] | L | Long-Links for asqtad |
[in] | a | xpay parameter (set to 0.0 for non-xpay version) |
[in] | x | Vector field we accumulate onto to |
[in] | parity | parity parameter |
[in] | dagger | Whether we are applying the dagger or not |
[in] | improved | whether to apply the standard-staggered (false) or asqtad (true) operator |
Definition at line 181 of file dslash_improved_staggered.cu.
References checkLocation, checkPrecision, comm_dim_partitioned(), dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, quda::ColorSpinorField::V(), and quda::LatticeField::X().
Referenced by quda::DiracImprovedStaggered::Dslash(), quda::DiracImprovedStaggered::DslashXpay(), and quda::DiracImprovedStaggered::M().
|
inline |
Applies the off-diagonal part of the covariant derivative operator
[out] | out | The out result field |
[in,out] | arg | Parameter struct |
[in] | U | The gauge field |
[in] | coord | Site coordinate |
[in] | x_cb | The checker-boarded site index. This is a 4-d index only |
[in] | parity | The site parity |
[in] | idx | Thread index (equal to face index for exterior kernels) |
[in] | thread_dim | Which dimension this thread corresponds to (fused exterior only) |
Definition at line 69 of file laplace.cuh.
References conj(), quda::LaplaceArg< Float, nColor, reconstruct_ >::ghost, quda::LaplaceArg< Float, nColor, reconstruct_ >::in, linkIndexM1(), linkIndexP1(), quda::Arg< real, Ns, Nc, order >::nParity, and quda::LaplaceArg< Float, nColor, reconstruct_ >::U.
Referenced by laplace().
void quda::ApplyLaplace | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
int | dir, | ||
double | kappa, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the Laplace stencil.
out = - kappa * A * in
where A is the gauge laplace linear operator.
If x is defined, the operation is given by out = x - kappa * A in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the gauge Laplace |
[in] | dir | Direction of the derivative 0,1,2,3 to omit (-1 is full 4D) |
[in] | kappa | Scale factor applied |
[in] | x | Vector field we accumulate onto to |
Definition at line 188 of file laplace.cu.
References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, kappa, out, parity, and quda::ColorSpinorField::V().
Referenced by quda::GaugeLaplace::Dslash(), and quda::GaugeLaplace::DslashXpay().
void quda::ApplyNdegTwistedMass | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
double | a, | ||
double | b, | ||
double | c, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the non-degenerate twisted-mass stencil.
out = a * D * in + (1 + i*b*gamma_5*tau_3 + c*tau_1) * x
where D is the gauged Wilson linear operator. The quark fields out, in and x are five dimensional, with the fifth dimension corresponding to the flavor dimension. The convention is that the first 4-d slice (s=0) corresponds to the positive twist and the second slice (s=1) corresponds to the negative twist.
This operator can be applied to both single parity (4d checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | a | Scale factor applied to Wilson term (typically -kappa) |
[in] | b | Chiral twist factor applied (typically 2*mu*kappa) |
[in] | c | Flavor twist factor applied (typically -2*epsilon*kappa) |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
Definition at line 102 of file dslash_ndeg_twisted_mass.cu.
References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().
Referenced by quda::DiracTwistedMass::Dslash(), quda::DiracTwistedMass::DslashXpay(), and quda::DiracTwistedMass::M().
void quda::ApplyNdegTwistedMassPreconditioned | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
double | a, | ||
double | b, | ||
double | c, | ||
bool | xpay, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
bool | asymmetric, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the preconditioned non-degenerate twisted-mass stencil.
out = a * (1 + i*b*gamma_5*tau_3 + c*tau_1) * D * in + x
where D is the gauged Wilson linear operator. The quark fields out, in and x are five dimensional, with the fifth dimension corresponding to the flavor dimension. The convention is that the first 4-d slice (s=0) corresponds to the positive twist and the second slice (s=1) corresponds to the negative twist.
This operator can (at present) be applied to only single parity (checker-boarded) fields.
For the dagger operator, we generally apply the conjugate transpose operator
out = x + D^ A^{-}
with the additional asymmetric special case, where we apply do not transpose the order of operations
out = A^{-} D^ (no xpay term)
This variant is required when have the asymmetric preconditioned operator and require the preconditioned twist term to remain in between the applications of D. This would be combined with a subsequent non-preconditioned dagger operator, A*x - kappa^2 D, to form the full operator.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | a | Scale factor applied to Wilson term (typically -kappa^2/(1 + b*b -c*c) ) |
[in] | b | Chiral twist factor applied (typically -2*mu*kappa) |
[in] | c | Flavor twist factor applied (typically 2*epsilon*kappa) |
[in] | xpay | Whether to do xpay or not |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | asymmetric | Whether this is for the asymmetric preconditioned dagger operator (a*(1 - i*b*gamma_5) * D^dagger * in) |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
Definition at line 146 of file dslash_ndeg_twisted_mass_preconditioned.cu.
References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, popKernelPackT(), pushKernelPackT(), quda::ColorSpinorField::V(), and quda::blas::xpay().
Referenced by quda::DiracTwistedMassPC::Dslash(), and quda::DiracTwistedMassPC::DslashXpay().
|
inline |
Applies the off-diagonal part of the Staggered / Asqtad operator.
[out] | out | The out result field |
[in] | U | The gauge field |
[in] | in | The input field |
[in] | parity | The site parity |
[in] | x_cb | The checkerboarded site index |
Definition at line 76 of file dslash_staggered.cuh.
References conj(), quda::StaggeredArg< Float, nColor, reconstruct_u_, reconstruct_l_, improved_, phase_ >::ghost, quda::StaggeredArg< Float, nColor, reconstruct_u_, reconstruct_l_, improved_, phase_ >::in, quda::StaggeredArg< Float, nColor, reconstruct_u_, reconstruct_l_, improved_, phase_ >::L, linkIndexM1(), linkIndexM3(), linkIndexP1(), linkIndexP3(), quda::Arg< real, Ns, Nc, order >::nParity, printLink(), StaggeredPhase(), and quda::StaggeredArg< Float, nColor, reconstruct_u_, reconstruct_l_, improved_, phase_ >::U.
void quda::ApplyStaggered | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
double | a, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Apply the staggered dslash operator to a color-spinor field.
[out] | out | Result color-spinor field |
[in] | in | Input color-spinor field |
[in] | U | Gauge-Link (1-link or fat-link) |
[in] | a | xpay parameter (set to 0.0 for non-xpay version) |
[in] | x | Vector field we accumulate onto to |
[in] | parity | parity parameter |
[in] | dagger | Whether we are applying the dagger or not |
[in] | improved | whether to apply the standard-staggered (false) or asqtad (true) operator |
Definition at line 112 of file dslash_staggered.cu.
References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().
Referenced by quda::DiracStaggered::Dslash(), quda::DiracStaggered::DslashXpay(), and quda::DiracStaggered::M().
|
static |
Definition at line 18 of file inv_mpcg_quda.cpp.
Referenced by applyThirdTerm().
|
static |
Definition at line 57 of file inv_mpcg_quda.cpp.
References applyB(), applyT(), s, and zero().
Referenced by computeCoeffs().
void quda::ApplyTwistClover | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const CloverField & | clover, | ||
double | kappa, | ||
double | mu, | ||
double | epsilon, | ||
int | parity, | ||
int | dagger, | ||
QudaTwistGamma5Type | twist | ||
) |
Apply twisted clover-matrix field to a color-spinor field.
[out] | out | Result color-spinor field |
[in] | in | Input color-spinor field |
[in] | clover | Clover-matrix field |
[in] | kappa | kappa parameter |
[in] | mu | mu parameter |
[in] | epsilon | epsilon parameter |
[in] | Field | parity (if color-spinor field is single parity) |
[in] | dagger | Whether we are applying the dagger or not |
[in] | twist | The type of kernel we are doing if (twist == QUDA_TWIST_GAMMA5_DIRECT) apply (Clover + i*a*gamma_5) to the input spinor else if (twist == QUDA_TWIST_GAMMA5_INVERSE) apply (Clover + i*a*gamma_5)/(Clover^2 + a^2) to the input spinor |
Definition at line 769 of file dslash_quda.cu.
References quda::TwistClover< Float, nSpin, nColor, Arg >::apply(), arg(), checkCudaError, checkLocation, checkPrecision, dagger, epsilon, errorQuda, in, kappa, mu, quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), Nstream, out, parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, QUDA_TWIST_GAMMA5_DIRECT, and streams.
Referenced by quda::DiracTwistedClover::twistedCloverApply().
void quda::ApplyTwistedClover | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
const CloverField & | C, | ||
double | a, | ||
double | b, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the twisted-clover stencil.
out = a * D * in + (C + i*b*gamma_5) * x
where D is the gauged Wilson linear operator, and C is the clover field.
This operator can be applied to both single parity (4d checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | C | The clover field used for the operator |
[in] | a | Scale factor applied to Wilson term (typically -kappa) |
[in] | b | Chiral twist factor applied (typically 2*mu*kappa) |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
Definition at line 122 of file dslash_twisted_clover.cu.
References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().
Referenced by quda::DiracTwistedClover::DslashXpay(), and quda::DiracTwistedClover::M().
void quda::ApplyTwistedCloverPreconditioned | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
const CloverField & | C, | ||
double | a, | ||
double | b, | ||
bool | xpay, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the preconditioned twisted-clover stencil.
out = a * (C + i*b*gamma_5)^{-1} * D * in + x = a * C^{-2} (C - i*b*gamma_5) * D * in + x = A^{-1} * D * in + x
where D is the gauged Wilson linear operator and C is the clover field. This operator can (at present) be applied to only single parity (checker-boarded) fields. When the dagger operator is requested, we do not transpose the order of operations, e.g.
out = A^{-} D^ (no xpay term)
Although not a conjugate transpose of the regular operator, this variant is used to enable kernel fusion between the application of D and the subsequent application of A, e.g., in the symmetric dagger operator we need to apply
M = (1 - kappa^2 D^{} A^{-} D{^} A^{-} )
and since cannot fuse D{^} A^{-}, we instead fused A^{-} D{^}.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | C | The clover field used for the operator |
[in] | a | Scale factor applied to Wilson term ( typically 1 / (1 + b*b) or kappa^2 / (1 + b*b) ) |
[in] | b | Twist factor applied (typically -2*kappa*mu) |
[in] | xpay | Whether to do xpay or not |
[in] | x | Vector field we accumulate onto to when xpay is true |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
Definition at line 161 of file dslash_twisted_clover_preconditioned.cu.
References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, quda::ColorSpinorField::V(), and quda::blas::xpay().
Referenced by quda::DiracTwistedCloverPC::Dslash(), and quda::DiracTwistedCloverPC::DslashXpay().
void quda::ApplyTwistedMass | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
double | a, | ||
double | b, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the twisted-mass stencil.
out = a * D * in + (1 + i*b*gamma_5) * x
where D is the gauged Wilson linear operator.
This operator can be applied to both single parity (checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | a | Scale factor applied to Wilson term (typically -kappa) |
[in] | b | Twist factor applied (typically 2*mu*kappa) |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
Definition at line 103 of file dslash_twisted_mass.cu.
References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().
Referenced by quda::DiracTwistedMass::Dslash(), quda::DiracTwistedMass::DslashXpay(), and quda::DiracTwistedMass::M().
void quda::ApplyTwistedMassPreconditioned | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
double | a, | ||
double | b, | ||
bool | xpay, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
bool | asymmetric, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the preconditioned twisted-mass stencil.
out = a*(1 + i*b*gamma_5) * D * in + x
where D is the gauged Wilson linear operator. This operator can (at present) be applied to only single parity (checker-boarded) fields. For the dagger operator, we generally apply the conjugate transpose operator
out = x + D^ A^{-}
with the additional asymmetric special case, where we apply do not transpose the order of operations
out = A^{-} D^ (no xpay term)
This variant is required when have the asymmetric preconditioned operator and require the preconditioned twist term to remain in between the applications of D. This would be combined with a subsequent non-preconditioned dagger operator, A*x - kappa^2 D, to form the full operator.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | a | Scale factor applied to Wilson term ( typically kappa^2 / (1 + b*b) ) |
[in] | b | Twist factor applied (typically -2*kappa*mu) |
[in] | xpay | Whether to do xpay or not |
[in] | x | Vector field we accumulate onto to when xpay is true |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | asymmetric | Whether this is for the asymmetric preconditioned dagger operator (a*(1 - i*b*gamma_5) * D^dagger * in) |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
Definition at line 116 of file dslash_twisted_mass_preconditioned.cu.
References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, popKernelPackT(), pushKernelPackT(), quda::ColorSpinorField::V(), and quda::blas::xpay().
Referenced by quda::DiracTwistedMassPC::Dslash(), and quda::DiracTwistedMassPC::DslashXpay().
void quda::ApplyTwistGamma | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
int | d, | ||
double | kappa, | ||
double | mu, | ||
double | epsilon, | ||
int | dagger, | ||
QudaTwistGamma5Type | type | ||
) |
Apply the twisted-mass gamma operator to a color-spinor field.
[out] | out | Result color-spinor field |
[in] | in | Input color-spinor field |
[in] | d | Which gamma matrix we are applying (C counting, so gamma_5 has d=4) |
[in] | kappa | kappa parameter |
[in] | mu | mu parameter |
[in] | epsilon | epsilon parameter |
[in] | dagger | Whether we are applying the dagger or not |
[in] | twist | The type of kernel we are doing |
Definition at line 416 of file dslash_quda.cu.
References quda::TwistGamma< Float, nColor, Arg >::apply(), arg(), checkCudaError, checkLocation, checkPrecision, dagger, epsilon, errorQuda, in, kappa, mu, quda::ColorSpinorField::Ncolor(), Nstream, out, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, and streams.
Referenced by quda::DiracTwistedMass::twistedApply().
void quda::applyU | ( | GaugeField & | force, |
GaugeField & | U | ||
) |
Left multiply the force field by the gauge field
force = U * force
force | Force field |
U | Gauge field |
Definition at line 446 of file momentum.cu.
References checkCudaError, errorQuda, quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_FLOAT2_GAUGE_ORDER.
Referenced by computeStaggeredForceQuda(), and updateMomentum().
void quda::ApplyWilson | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
double | kappa, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the Wilson stencil.
out = D * in
where D is the gauged Wilson linear operator.
If kappa is non-zero, the operation is given by out = x + kappa * D in. This operator can be applied to both single parity (checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | kappa | Scale factor applied |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
Definition at line 86 of file dslash_wilson.cu.
References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().
Referenced by quda::DiracWilson::Dslash(), quda::DiracWilson::DslashXpay(), and quda::DiracWilson::M().
|
inline |
Applies the off-diagonal part of the Wilson operator.
[out] | out | The out result field |
[in,out] | arg | Parameter struct |
[in] | coord | Site coordinate |
[in] | x_cb | The checker-boarded site index (at present this is a 4-d index only) |
[in] | s | The fifth-dimension index |
[in] | parity | Site parity |
[in] | idx | Thread index (equal to face index for exterior kernels) |
[in] | thread_dim | Which dimension this thread corresponds to (fused exterior only) |
Definition at line 62 of file dslash_wilson.cuh.
References conj(), quda::DslashArg< Float >::dagger, EXTERIOR_KERNEL_ALL, quda::WilsonArg< Float, nColor, reconstruct_ >::ghost, quda::WilsonArg< Float, nColor, reconstruct_ >::in, quda::DslashArg< Float >::kernel_type, nColor, quda::DslashArg< Float >::nParity, quda::DslashArg< Float >::parity, quda::WilsonArg< Float, nColor, reconstruct_ >::reconstruct, and quda::WilsonArg< Float, nColor, reconstruct_ >::U.
void quda::ApplyWilsonClover | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
const CloverField & | A, | ||
double | kappa, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the Wilson-clover stencil.
out = A * x + kappa * D * in
where D is the gauged Wilson linear operator.
This operator can be applied to both single parity (checker-boarded) fields, or to full fields.
[out] | out | The output result field |
[in] | in | Input field that D is applied to |
[in] | x | Input field that A is applied to |
[in] | U | The gauge field used for the operator |
[in] | A | The clover field used for the operator |
[in] | kappa | Scale factor applied |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
Definition at line 121 of file dslash_wilson_clover.cu.
References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().
Referenced by quda::DiracClover::DslashXpay(), and quda::DiracClover::M().
void quda::ApplyWilsonCloverPreconditioned | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const GaugeField & | U, | ||
const CloverField & | A, | ||
double | kappa, | ||
const ColorSpinorField & | x, | ||
int | parity, | ||
bool | dagger, | ||
const int * | comm_override, | ||
TimeProfile & | profile | ||
) |
Driver for applying the preconditioned Wilson-clover stencil.
out = A^{-1} * D * in + x
where D is the gauged Wilson linear operator and A is the clover field. This operator can (at present) be applied to only single parity (checker-boarded) fields. When the dagger operator is requested, we do not transpose the order of operations, e.g.
out = A^{-} D^ (no xpay term)
Although not a conjugate transpose of the regular operator, this variant is used to enable kernel fusion between the application of D and the subsequent application of A, e.g., in the symmetric dagger operator we need to apply
M = (1 - kappa^2 D^{} A^{-1} D{^} A^{-1} )
and since cannot fuse D{^} A^{-}, we instead fused A^{-} D{^}.
If kappa is non-zero, the operation is given by out = x + kappa * A^{-1} D in. This operator can (at present) be applied to only single parity (checker-boarded) fields.
[out] | out | The output result field |
[in] | in | The input field |
[in] | U | The gauge field used for the operator |
[in] | A | The clover field used for the operator |
[in] | kappa | Scale factor applied |
[in] | x | Vector field we accumulate onto to |
[in] | parity | Destination parity |
[in] | dagger | Whether this is for the dagger operator |
[in] | comm_override | Override for which dimensions are partitioned |
[in] | profile | The TimeProfile used for profiling the dslash |
Definition at line 158 of file dslash_wilson_clover_preconditioned.cu.
References checkLocation, checkPrecision, dagger, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, and quda::ColorSpinorField::V().
Referenced by quda::DiracCloverPC::Dslash(), and quda::DiracCloverPC::DslashXpay().
|
inline |
Applies the off-diagonal part of the Wilson operator premultiplied by twist rotation - this is required for applying the symmetric preconditioned twisted-mass dagger operator.
[out] | out | The out result field |
[in,out] | arg | Parameter struct |
[in] | coord | Site coordinate |
[in] | x_cb | The checker-boarded site index |
[in] | s | Fifth-dimension index |
[in] | parity | Site parity |
[in] | idx | Thread index (equal to face index for exterior kernels) |
[in] | thread_dim | Which dimension this thread corresponds to (fused exterior only) |
Definition at line 52 of file dslash_twisted_mass_preconditioned.cuh.
References quda::TwistedMassArg< Float, nColor, reconstruct_ >::asymmetric, conj(), quda::DslashArg< Float >::dagger, EXTERIOR_KERNEL_ALL, getNeighborIndexCB(), quda::WilsonArg< Float, nColor, reconstruct_ >::ghost, quda::WilsonArg< Float, nColor, reconstruct_ >::in, quda::DslashArg< Float >::kernel_type, nColor, quda::DslashArg< Float >::nParity, quda::DslashArg< Float >::parity, quda::WilsonArg< Float, nColor, reconstruct_ >::reconstruct, quda::WilsonArg< Float, nColor, reconstruct_ >::U, and quda::DslashArg< Float >::xpay.
|
inline |
Returns the phase angle of z.
Definition at line 1076 of file complex_quda.h.
References atan2().
Referenced by quda::DomainWall5D< Float, nDim, nColor, Arg >::apply(), quda::TwistedMass< Float, nDim, nColor, Arg >::apply(), quda::Staggered< Float, nDim, nColor, Arg >::apply(), quda::TwistedClover< Float, nDim, nColor, Arg >::apply(), quda::WilsonClover< Float, nDim, nColor, Arg >::apply(), quda::NdegTwistedMass< Float, nDim, nColor, Arg >::apply(), quda::TwistedCloverPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::WilsonCloverPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::DomainWall4D< Float, nDim, nColor, Arg >::apply(), quda::TwistedMassPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::Wilson< Float, nDim, nColor, Arg >::apply(), quda::Laplace< Float, nDim, nColor, Arg >::apply(), quda::NdegTwistedMassPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::CopyGauge< FloatOut, FloatIn, length, Arg >::apply(), quda::SpinorNoise< real, Ns, Nc, type, Arg >::apply(), quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::apply(), quda::GaugeGauss< Float, Arg >::apply(), quda::KSForceComplete< Float, Oprod, Gauge, Mom >::apply(), quda::CopyGaugeEx< FloatOut, FloatIn, length, OutOrder, InOrder >::apply(), quda::ShiftColorSpinorField< Output, Input >::apply(), quda::WuppertalSmearing< Float, Ns, Nc, Arg >::apply(), quda::GaugeOvrImpSTOUT< Float, Arg >::apply(), quda::CopyColorSpinor< Ns, Arg >::apply(), quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >::apply(), quda::Pack< Float, nColor, spin_project >::apply(), quda::ExtractGhost< nDim, Arg >::apply(), quda::ExtractGhostEx< Float, length, nDim, dim, Order >::apply(), quda::CopyColorSpinor< 4, Arg >::apply(), quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >::apply(), quda::CopySpinorEx< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder, Basis, extend >::apply(), quda::Gamma< ValueType, basis, dir >::apply(), quda::KSLongLinkForce< Float, Result, Oprod, Gauge >::apply(), quda::TwistGamma< Float, nColor, Arg >::apply(), quda::ProjectSU3< Float, G >::apply(), quda::Clover< Float, nSpin, nColor, Arg >::apply(), quda::TwistClover< Float, nSpin, nColor, Arg >::apply(), ApplyClover(), ApplyGamma(), ApplyTwistClover(), ApplyTwistGamma(), CalculateYhatCPU(), CalculateYhatGPU(), Checksum(), cloverDerivativeKernel(), cloverGPU(), cloverInvert(), cloverInvertKernel(), coarseDslash(), coarseDslashKernel(), quda::coeff_type< real, true, Arg >::coeff(), completeKSForce(), completeKSForceCPU(), completeKSForceKernel(), computeAPEStep(), ComputeAVCPU(), ComputeAVGPU(), ComputeCoarseCloverCPU(), ComputeCoarseCloverGPU(), computeFmunuCPU(), computeFmunuKernel(), computeKSLongLinkForce(), computeKSLongLinkForceCPU(), computeKSLongLinkForceKernel(), computeMomAction(), computeOvrImpSTOUTStep(), computePlaq(), computeSTOUTStep(), ComputeTMAVGPU(), ComputeTMCAVCPU(), ComputeTMCAVGPU(), ComputeUVCPU(), ComputeUVGPU(), computeVUV(), ComputeVUVGPU(), computeWupperalStep(), ComputeYReverseCPU(), ComputeYReverseGPU(), ConvertCPU(), ConvertGPU(), coordsFromFaceIndex(), copyGauge(), copyGaugeEx(), copyGaugeExKernel(), copyInterior(), copyInteriorKernel(), copySpinorEx(), covDev(), covDevGPU(), dimFromFaceIndex(), domainWall4D(), quda::DomainWall4DApply< Float, nColor, recon >::DomainWall4DApply(), domainWall4DCPU(), domainWall4DGPU(), domainWall5D(), quda::DomainWall5DApply< Float, nColor, recon >::DomainWall5DApply(), domainWall5DCPU(), domainWall5DGPU(), dslash5CPU(), dslash5GPU(), dslash5inv(), dslash5invGPU(), quda::ExtractGhost< nDim, Arg >::ExtractGhost(), extractGhost(), extractGhostEx(), extractGhostExKernel(), forceRecord(), quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::GaugeSTOUTArg(), genericCopyColorSpinor(), GenericPackGhost(), GenericPackGhostKernel(), genGauss(), getCoords(), quda::gauge::Reconstruct< 13, Float, ghostExchange_, stag_phase >::getPhase(), quda::gauge::Reconstruct< 9, Float, ghostExchange_, stag_phase >::getPhase(), quda::ImprovedStaggeredApply< Float, nColor, recon_l >::ImprovedStaggeredApply(), indexFromFaceIndex(), quda::dslash::issuePack(), isUnitary(), laplace(), quda::LaplaceApply< Float, nColor, recon >::LaplaceApply(), laplaceGPU(), quda::StaggeredLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch(), launch_kernel_random(), log(), quda::blas::multiBlasKernel(), quda::blas::multiReduceKernel(), ndegTwistedMass(), quda::NdegTwistedMassApply< Float, nColor, recon >::NdegTwistedMassApply(), ndegTwistedMassCPU(), ndegTwistedMassGPU(), quda::NdegTwistedMassPreconditionedApply< Float, nColor, recon >::NdegTwistedMassPreconditionedApply(), ndegTwistedMassPreconditionedCPU(), ndegTwistedMassPreconditionedGPU(), operator<<(), OvrImpSTOUTStep(), pack(), packGhost(), packKernel(), packShmemKernel(), packStaggered(), packStaggeredKernel(), packStaggeredShmemKernel(), polarSu3(), projectSU3(), qChargeComputeKernel(), reduce(), quda::blas::reduceKernel(), RescaleYCPU(), RescaleYGPU(), setUnitarizeLinksConstants(), shiftColorSpinorField(), sigmaOprodKernel(), spinorNoise(), SpinorNoiseCPU(), SpinorNoiseGPU(), sqrt(), staggered(), quda::StaggeredApply< Float, nColor, recon_u >::StaggeredApply(), staggeredGPU(), StaggeredPhase(), twistCloverGPU(), twistedClover(), quda::TwistedCloverApply< Float, nColor, recon >::TwistedCloverApply(), quda::TwistedCloverPreconditionedApply< Float, nColor, recon >::TwistedCloverPreconditionedApply(), twistedCloverPreconditionedCPU(), twistedCloverPreconditionedGPU(), twistedMass(), quda::TwistedMassApply< Float, nColor, recon >::TwistedMassApply(), twistedMassCPU(), twistedMassGPU(), quda::TwistedMassPreconditionedApply< Float, nColor, recon >::TwistedMassPreconditionedApply(), twistedMassPreconditionedCPU(), twistedMassPreconditionedGPU(), updateMomentum(), wilson(), quda::WilsonApply< Float, nColor, recon >::WilsonApply(), wilsonClover(), quda::WilsonCloverApply< Float, nColor, recon >::WilsonCloverApply(), wilsonCloverCPU(), wilsonCloverGPU(), quda::WilsonCloverPreconditionedApply< Float, nColor, recon >::WilsonCloverPreconditionedApply(), wilsonCloverPreconditionedCPU(), wilsonCloverPreconditionedGPU(), wilsonCPU(), wilsonGPU(), wuppertalStep(), and wuppertalStepCPU().
|
inline |
Definition at line 1081 of file complex_quda.h.
References quda::complex< float >::imag(), and quda::complex< float >::real().
|
inline |
Definition at line 1086 of file complex_quda.h.
References atan2(), quda::complex< double >::imag(), and quda::complex< double >::real().
void quda::arpack_solve | ( | std::vector< ColorSpinorField *> & | h_evecs, |
std::vector< Complex > & | h_evals, | ||
const DiracMatrix & | mat, | ||
QudaEigParam * | eig_param, | ||
TimeProfile & | profile | ||
) |
The QUDA interface function. One passes two allocated arrays to hold the the eigenmode data, the problem matrix, the arpack parameters defining what problem is to be solves, and a container for QUDA data structure types.
[out] | h_evecs | Host fields where the e-vectors will be copied to |
[out] | h_evals | Where the e-values will be copied to |
[in] | mat | An explicit construction of the problem matrix. |
[in] | param | Parameter container defining the how the matrix is to be solved. |
[in] | eig_param | Parameter structure for all QUDA eigensolvers |
[in,out] | profile | TimeProfile instance used for profiling |
Definition at line 507 of file quda_arpack_interface.cpp.
References errorQuda.
Referenced by eigensolveQuda().
|
inline |
Definition at line 66 of file complex_quda.h.
References asin().
Referenced by acos().
|
inline |
Definition at line 1281 of file complex_quda.h.
References asinh().
Referenced by asin().
|
inline |
Definition at line 1320 of file complex_quda.h.
Referenced by asin().
void quda::assertAllMemFree | ( | ) |
Definition at line 384 of file malloc.cpp.
References DEVICE, DEVICE_PINNED, HOST, MAPPED, PINNED, print_alloc(), print_alloc_header(), printfQuda, and warningQuda.
Referenced by endQuda().
|
inline |
Definition at line 71 of file complex_quda.h.
References atan().
|
inline |
Definition at line 1288 of file complex_quda.h.
References atanh().
Referenced by atan().
|
inline |
Definition at line 76 of file complex_quda.h.
Referenced by arg(), quda::Trig< isFixed, T >::Atan2(), atanh(), link_sanity_check_internal_8(), new_save_half(), and su3Construct8().
|
inline |
Definition at line 1326 of file complex_quda.h.
References atan2(), and log().
Referenced by atan().
|
inline |
Definition at line 1344 of file complex_quda.h.
References quda::complex< float >::imag(), and quda::complex< float >::real().
void quda::ax | ( | const double & | a, |
GaugeField & | u | ||
) |
Scale the gauge field by the scalar a.
[in] | a | scalar multiplier |
[in] | u | The gauge field we want to multiply |
Definition at line 349 of file gauge_field.cpp.
References quda::blas::ax(), colorSpinorParam(), and quda::ColorSpinorField::Create().
Referenced by quda::MG::buildFreeVectors(), computeHISQForceQuda(), dslashReference_5th(), dslashReference_5th_inv(), quda::cpuGaugeField::Gauge_p(), quda::MG::generateNullVectors(), main(), and mdslashReference_5th_inv().
|
inline |
Definition at line 37 of file clover_deriv.cuh.
Referenced by cloverDerivativeKernel(), dslashReference_5th_inv(), mdslashReference_5th_inv(), quda::PreconCG::operator()(), quda::SD::operator()(), operator-=(), and staggeredDslashRef().
void quda::backSubs | ( | const Complex * | alpha, |
Complex **const | beta, | ||
const double * | gamma, | ||
Complex * | delta, | ||
int | n | ||
) |
Definition at line 135 of file inv_gcr_quda.cpp.
Referenced by updateSolution().
|
inline |
Swizzler for reordering the (x) thread block indices - use on conjunction with swizzle-factor autotuning to find the optimum swizzle factor. Specfically, the thread block id is remapped by transposing its coordinates: if the original order can be parametrized by.
blockIdx.x = j * swizzle + i,
then the new order is
block_idx = i * (gridDim.x / swizzle) + j
We need to factor out any remainder and leave this in original ordering.
[in] | swizzle | Swizzle factor to be applied |
Definition at line 834 of file index_helper.cuh.
void quda::blockOrthoCPU | ( | Arg & | arg | ) |
Definition at line 105 of file block_orthogonalize.cuh.
References dot(), nColor, quda::Arg< real, Ns, Nc, order >::nParity, quda::BlockOrthoArg< Rotator, Vector, fineSpin, spinBlockSize, coarseSpin, nVec >::parity, and s.
void quda::BlockOrthogonalize | ( | ColorSpinorField & | V, |
const std::vector< ColorSpinorField *> & | B, | ||
const int * | fine_to_coarse, | ||
const int * | coarse_to_fine, | ||
const int * | geo_bs, | ||
const int | spin_bs, | ||
const int | n_block_ortho | ||
) |
Block orthogonnalize the matrix field, where the blocks are defined by lookup tables that map the fine grid points to the coarse grid points, and similarly for the spin degrees of freedom.
[in,out] | V | Matrix field to be orthgonalized |
[in] | B | input vectors |
[in] | geo_bs | Geometric block size |
[in] | fine_to_coarse | Fine-to-coarse lookup table (linear indices) |
[in] | coarse_to_fine | Coarse-to-fine lookup table (linear indices) |
[in] | spin_bs | Spin block size |
[in] | n_block_ortho | Number of times to Gram-Schmidt |
Definition at line 317 of file block_orthogonalize.cu.
References errorQuda, n_block_ortho, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, and V.
Referenced by quda::Transfer::reset(), and quda::Transfer::setTransferGPU().
|
static |
Distribute the tunecache from node 0 to all other nodes.
Definition at line 290 of file tune.cpp.
References comm_broadcast(), comm_rank(), deserializeTuneCache(), serializeTuneCache(), and size.
Referenced by loadTuneCache(), and tuneLaunch().
|
inline |
Definition at line 39 of file convert.h.
Referenced by copyFloatN().
|
inline |
|
inline |
Definition at line 38 of file convert.h.
Referenced by copy(), copy_and_scale(), and copyFloatN().
|
inline |
void quda::calculateY | ( | coarseGauge & | Y, |
coarseGauge & | X, | ||
coarseGaugeAtomic & | Y_atomic, | ||
coarseGaugeAtomic & | X_atomic, | ||
Ftmp & | UV, | ||
F & | AV, | ||
Vt & | V, | ||
fineGauge & | G, | ||
fineClover & | C, | ||
fineClover & | Cinv, | ||
GaugeField & | Y_, | ||
GaugeField & | X_, | ||
GaugeField & | Y_atomic_, | ||
GaugeField & | X_atomic_, | ||
ColorSpinorField & | uv, | ||
ColorSpinorField & | av, | ||
const ColorSpinorField & | v, | ||
double | kappa, | ||
double | mu, | ||
double | mu_factor, | ||
QudaDiracType | dirac, | ||
QudaMatPCType | matpc, | ||
bool | need_bidirectional, | ||
const int * | fine_to_coarse, | ||
const int * | coarse_to_fine | ||
) |
Calculate the coarse-link field, including the coarse clover field.
Y[out] | Coarse link field accessor |
X[out] | Coarse clover field accessor |
UV[out] | Temporary accessor used to store fine link field * null space vectors |
AV[out] | Temporary accessor use to store fine clover inverse * null space vectors (only applicable when fine-grid operator is the preconditioned clover operator else in general this just aliases V |
V[in] | Packed null-space vector accessor |
G[in] | Fine grid link / gauge field accessor |
C[in] | Fine grid clover field accessor |
Cinv[in] | Fine grid clover inverse field accessor |
Y_[out] | Coarse link field |
X_[out] | Coarse clover field |
X_[out] | Coarse clover inverese field (used as temporary here) |
v[in] | Packed null-space vectors |
kappa[in] | Kappa parameter |
mu[in] | Twisted-mass parameter |
matpc[in] | The type of preconditioning of the source fine-grid operator |
need_bidirectional[in] | If we need to force bi-directional build or not. Required if some previous level was preconditioned, even if this one isn't |
Definition at line 869 of file coarse_op.cuh.
References abs(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::apply(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::arg, quda::LatticeField::bufferIndex, checkLocation, COMPUTE_AV, COMPUTE_CLOVER_INV_MAX, COMPUTE_COARSE_CLOVER, COMPUTE_CONVERT, COMPUTE_DIAGONAL, COMPUTE_RESCALE, COMPUTE_REVERSE_Y, COMPUTE_TMAV, COMPUTE_TMCAV, COMPUTE_TMDIAGONAL, COMPUTE_TWISTED_CLOVER_INV_MAX, COMPUTE_UV, COMPUTE_VUV, errorQuda, quda::ColorSpinorField::exchangeGhost(), getVerbosity(), quda::ColorSpinorField::Ghost(), quda::colorspinor::FieldOrderCB< Float, nSpin, nColor, nVec, order, storeFloat, ghostFloat, disable_ghost, block_float, use_tex >::norm2(), quda::LatticeField::Precision(), printfQuda, QUDA_BACKWARDS, QUDA_CLOVER_DIRAC, QUDA_CLOVERPC_DIRAC, QUDA_COARSE_DIRAC, QUDA_COARSEPC_DIRAC, QUDA_CUDA_FIELD_LOCATION, QUDA_DEBUG_VERBOSE, QUDA_FORWARDS, QUDA_HALF_PRECISION, QUDA_INVALID_PARITY, QUDA_MATPC_EVEN_EVEN, QUDA_MATPC_EVEN_EVEN_ASYMMETRIC, QUDA_MATPC_ODD_ODD, QUDA_MATPC_ODD_ODD_ASYMMETRIC, QUDA_MAX_DIM, QUDA_TWISTED_CLOVER_DIRAC, QUDA_TWISTED_CLOVERPC_DIRAC, QUDA_TWISTED_MASS_DIRAC, QUDA_TWISTED_MASSPC_DIRAC, QUDA_VERBOSE, quda::colorspinor::FieldOrderCB< Float, nSpin, nColor, nVec, order, storeFloat, ghostFloat, disable_ghost, block_float, use_tex >::resetGhost(), quda::LatticeField::Scale(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::setComputeType(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::setDimension(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::setDirection(), sqrt(), quda::ColorSpinorField::X(), quda::LatticeField::X(), and quda::GaugeField::zero().
Referenced by CoarseOp().
void quda::calculateYhat | ( | GaugeField & | Yhat, |
GaugeField & | Xinv, | ||
const GaugeField & | Y, | ||
const GaugeField & | X | ||
) |
Calculate preconditioned coarse links and coarse clover inverse field.
Yhat[out] | Preconditioned coarse link field |
Xinv[out] | Coarse clover inverse field |
Y[in] | Coarse link field |
X[in] | Coarse clover field |
Definition at line 245 of file coarse_op_preconditioned.cu.
References checkPrecision, errorQuda, getVerbosity(), quda::LatticeField::Precision(), printfQuda, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, QUDA_SUMMARIZE, and X.
Referenced by quda::DiracCoarse::createPreconditionedCoarseOp().
void quda::CalculateYhatCPU | ( | Arg & | arg | ) |
Definition at line 100 of file coarse_op_preconditioned.cuh.
__global__ void quda::CalculateYhatGPU | ( | Arg | arg | ) |
Definition at line 118 of file coarse_op_preconditioned.cuh.
References arg(), atomicMax(), and parity.
bool quda::canReuseResidentGauge | ( | QudaInvertParam * | inv_param | ) |
Check that the resident gauge field is compatible with the requested inv_param
inv_param | Contains all metadata regarding host and device storage |
Definition at line 2232 of file interface_quda.cpp.
References QudaInvertParam_s::cuda_prec, QudaInvertParam_s::dslash_type, quda::LatticeField::Precision(), and QUDA_ASQTAD_DSLASH.
|
inline |
Definition at line 117 of file coarse_op_kernel.cuh.
Referenced by quda::CalculateYhatArg< Float, PreconditionedGauge, Gauge, n >::CalculateYhatArg(), computeUV(), computeYhat(), quda::GMResDR::FlexArnoldiProcedure(), quda::MG::generateNullVectors(), multiplyVUV(), and quda::GMResDR::RestartVZH().
|
inline |
Definition at line 26 of file blas_helper.cuh.
References errorQuda, quda::ColorSpinorField::Length(), and quda::ColorSpinorField::Stride().
Referenced by quda::blas::nativeBlas(), and quda::blas::nativeReduce().
void quda::checkMomOrder | ( | const GaugeField & | u | ) |
Definition at line 22 of file copy_gauge.cu.
References errorQuda, quda::GaugeField::Order(), QUDA_FLOAT2_GAUGE_ORDER, QUDA_MILC_GAUGE_ORDER, QUDA_MILC_SITE_GAUGE_ORDER, QUDA_RECONSTRUCT_10, QUDA_RECONSTRUCT_NO, QUDA_TIFR_GAUGE_ORDER, QUDA_TIFR_PADDED_GAUGE_ORDER, and quda::GaugeField::Reconstruct().
Referenced by copyGauge().
void quda::checkNan | ( | Arg & | arg | ) |
Check whether the field contains Nans
Definition at line 63 of file copy_gauge.cuh.
References errorQuda, length, nColor, quda::gauge::Ncolor(), and parity.
|
inline |
Definition at line 20 of file blas_helper.cuh.
References errorQuda, quda::ColorSpinorField::Length(), and quda::ColorSpinorField::Stride().
Referenced by quda::blas::multiReduce().
uint64_t quda::Checksum | ( | const GaugeField & | u, |
bool | mini = false |
||
) |
Compute XOR-based checksum of this gauge field: each gauge field entry is converted to type uint64_t, and compute the cummulative XOR of these values.
[in] | mini | Whether to compute a mini checksum or global checksum. A mini checksum only computes over a subset of the lattice sites and is to be used for online comparisons, e.g., checking a field has changed with a global update algorithm. |
Definition at line 34 of file checksum.cu.
References arg(), ChecksumCPU(), comm_allreduce_xor(), errorQuda, quda::GaugeField::Ncolor(), quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_BQCD_GAUGE_ORDER, QUDA_DOUBLE_PRECISION, QUDA_MILC_GAUGE_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_QDPJIT_GAUGE_ORDER, QUDA_SINGLE_PRECISION, QUDA_TIFR_GAUGE_ORDER, and QUDA_TIFR_PADDED_GAUGE_ORDER.
Referenced by quda::GaugeField::checksum(), and quda::cpuGaugeField::Gauge_p().
uint64_t quda::ChecksumCPU | ( | const Arg & | arg | ) |
Definition at line 23 of file checksum.cu.
References parity, siteChecksum(), and quda::Arg< real, Ns, Nc, order >::volumeCB.
Referenced by Checksum().
|
inline |
Check the unitarity of the input matrix to a given tolerance.
inv | The inverse of the input matrix |
in | The input matrix to which we're reporting its unitarity |
tol | Tolerance to which this check is applied |
Definition at line 24 of file su3_project.cuh.
References conj(), in, quda::Matrix< T, N >::size(), and tol.
Referenced by polarSu3().
__host__ __device__ void quda::checkUnitaryPrint | ( | const Matrix & | inv, |
const Matrix & | in | ||
) |
Print out deviation for each component (used for debugging only).
inv | The inverse of the input matrix |
in | The input matrix to which we're reporting its unitarity |
Definition at line 66 of file su3_project.cuh.
References in, and quda::Matrix< T, N >::size().
|
inline |
Definition at line 519 of file dslash_quda.cu.
References nColor, quda::Arg< real, Ns, Nc, order >::nParity, out, and parity.
void quda::cloverCPU | ( | Arg & | arg | ) |
Definition at line 552 of file dslash_quda.cu.
References quda::Arg< real, Ns, Nc, order >::nParity, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.
void quda::cloverDerivative | ( | cudaGaugeField & | force, |
cudaGaugeField & | gauge, | ||
cudaGaugeField & | oprod, | ||
double | coeff, | ||
QudaParity | parity | ||
) |
Compute the derivative of the clover matrix in the direction mu,nu and compute the resulting force given the outer-product field.
force | The computed force field (read/write update) |
gauge | The input gauge field |
oprod | The input outer-product field (tensor matrix field) |
coeff | Multiplicative coefficient (e.g., clover coefficient) |
parity | The field parity we are working on |
Definition at line 174 of file clover_deriv_quda.cu.
References errorQuda, quda::GaugeField::Geometry(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_EVEN_PARITY, QUDA_SINGLE_PRECISION, QUDA_TENSOR_GEOMETRY, QUDA_VECTOR_GEOMETRY, and quda::LatticeField::X().
Referenced by computeCloverForceQuda(), and quda::FullClover::FullClover().
__global__ void quda::cloverDerivativeKernel | ( | Arg | arg | ) |
Definition at line 320 of file clover_deriv.cuh.
References arg(), axpy(), DECLARE_LINK, quda::CloverDerivArg< Float, Force, Gauge, Oprod >::force, index(), mu, and quda::Arg< real, Ns, Nc, order >::volumeCB.
__global__ void quda::cloverGPU | ( | Arg | arg | ) |
Definition at line 560 of file dslash_quda.cu.
References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.
void quda::cloverInvert | ( | Arg & | arg | ) |
Definition at line 65 of file clover_invert.cuh.
References arg(), parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.
void quda::cloverInvert | ( | CloverField & | clover, |
bool | computeTraceLog | ||
) |
This function compute the Cholesky decomposition of each clover matrix and stores the clover inverse field.
clover | The clover field (contains both the field itself and its inverse) |
computeTraceLog | Whether to compute the trace logarithm of the clover term |
Definition at line 106 of file clover_invert.cu.
References errorQuda, quda::CloverField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by quda::FullClover::FullClover(), and loadCloverQuda().
|
inline |
Use a Cholesky decomposition and invert the clover matrix
Definition at line 33 of file clover_invert.cuh.
References quda::linalg::Cholesky< Mat, T, N, fast >::D(), quda::linalg::Cholesky< Mat, T, N, fast >::invert(), log(), Mat(), nColor, and quda::CloverInvertArg< Float >::twist.
__global__ void quda::cloverInvertKernel | ( | Arg | arg | ) |
Definition at line 82 of file clover_invert.cuh.
References arg(), parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.
void quda::cloverRho | ( | CloverField & | clover, |
double | rho | ||
) |
This function adds a real scalar onto the clover diagonal (only to the direct field not the inverse)
clover | The clover field |
rho | Real scalar to be added on |
Referenced by quda::FullClover::FullClover().
|
inline |
Definition at line 1372 of file complex_quda.h.
Referenced by quda::gauge::Reconstruct< 12, Float, ghostExchange_ >::Unpack(), quda::gauge::Reconstruct< 13, Float, ghostExchange_, stag_phase >::Unpack(), and quda::gauge::Reconstruct< 8, Float, ghostExchange_ >::Unpack().
|
inline |
Definition at line 1361 of file complex_quda.h.
Referenced by quda::gauge::Reconstruct< 9, Float, ghostExchange_, stag_phase >::Pack(), quda::gauge::Reconstruct< 12, Float, ghostExchange_ >::Unpack(), quda::gauge::Reconstruct< 13, Float, ghostExchange_, stag_phase >::Unpack(), quda::gauge::Reconstruct< 8, Float, ghostExchange_ >::Unpack(), and quda::gauge::Reconstruct< 9, Float, ghostExchange_, stag_phase >::Unpack().
void quda::CoarseCoarseOp | ( | GaugeField & | Y, |
GaugeField & | X, | ||
const Transfer & | T, | ||
const GaugeField & | gauge, | ||
const GaugeField & | clover, | ||
const GaugeField & | cloverInv, | ||
double | kappa, | ||
double | mu, | ||
double | mu_factor, | ||
QudaDiracType | dirac, | ||
QudaMatPCType | matpc, | ||
bool | need_bidirectional | ||
) |
Coarse operator construction from an intermediate-grid operator (Coarse)
Y[out] | Coarse link field |
X[out] | Coarse clover field |
T[in] | Transfer operator that defines the new coarse space |
gauge[in] | Link field from fine grid |
clover[in] | Clover field on fine grid |
cloverInv[in] | Clover inverse field on fine grid |
kappa[in] | Kappa parameter |
mu[in] | Mu parameter (set to non-zero for twisted-mass/twisted-clover) |
mu_factor[in] | Multiplicative factor for the mu parameter |
matpc[in] | The type of even-odd preconditioned fine-grid operator we are constructing the coarse grid operator from. If matpc==QUDA_MATPC_INVALID then we assume the operator is not even-odd preconditioned and we coarsen the full operator. |
need_bidirectional[in] | Whether or not we need to force a bi-directional build, even if the given level isn't preconditioned—if any previous level is preconditioned, we've violated that symmetry. |
Definition at line 192 of file coarsecoarse_op.cu.
References checkLocation, quda::ColorSpinorParam::create, quda::GaugeField::Create(), quda::ColorSpinorField::Create(), errorQuda, quda::GaugeFieldParam::location, quda::LatticeField::MemType(), param, quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_SINGLE_PRECISION, QUDA_ZERO_FIELD_CREATE, quda::GaugeFieldParam::setPrecision(), quda::Transfer::Vectors(), and X.
Referenced by quda::DiracCoarse::createCoarseOp(), and quda::DiracCoarsePC::createCoarseOp().
|
inline |
Definition at line 309 of file dslash_coarse.cuh.
References arg(), quda::vector_type< scalar, n >::data, quda::Arg< real, Ns, Nc, order >::nParity, quda::DslashCoarseArg< Float, yFloat, ghostFloat, coarseSpin, coarseColor, csOrder, gOrder >::out, quda::DslashCoarseArg< Float, yFloat, ghostFloat, coarseSpin, coarseColor, csOrder, gOrder >::parity, s, and quda::Arg< real, Ns, Nc, order >::volumeCB.
void quda::coarseDslash | ( | Arg | arg | ) |
Definition at line 350 of file dslash_coarse.cuh.
References arg(), quda::Arg< real, Ns, Nc, order >::nParity, quda::DslashCoarseArg< Float, yFloat, ghostFloat, coarseSpin, coarseColor, csOrder, gOrder >::parity, s, and quda::Arg< real, Ns, Nc, order >::volumeCB.
__global__ void quda::coarseDslashKernel | ( | Arg | arg | ) |
Definition at line 379 of file dslash_coarse.cuh.
References arg(), quda::Arg< real, Ns, Nc, order >::nParity, quda::DslashCoarseArg< Float, yFloat, ghostFloat, coarseSpin, coarseColor, csOrder, gOrder >::parity, s, and quda::Arg< real, Ns, Nc, order >::volumeCB.
|
inline |
Definition at line 619 of file coarse_op_kernel.cuh.
Referenced by getIndicesShared().
void quda::CoarseOp | ( | GaugeField & | Y, |
GaugeField & | X, | ||
const Transfer & | T, | ||
const cudaGaugeField & | gauge, | ||
const cudaCloverField * | clover, | ||
double | kappa, | ||
double | mu, | ||
double | mu_factor, | ||
QudaDiracType | dirac, | ||
QudaMatPCType | matpc | ||
) |
Coarse operator construction from a fine-grid operator (Wilson / Clover)
Y[out] | Coarse link field |
X[out] | Coarse clover field |
T[in] | Transfer operator that defines the coarse space |
gauge[in] | Gauge field from fine grid |
clover[in] | Clover field on fine grid (optional) |
kappa[in] | Kappa parameter |
mu[in] | Mu parameter (set to non-zero for twisted-mass/twisted-clover) |
mu_factor[in] | Multiplicative factor for the mu parameter |
matpc[in] | The type of even-odd preconditioned fine-grid operator we are constructing the coarse grid operator from. If matpc==QUDA_MATPC_INVALID then we assume the operator is not even-odd preconditioned and we coarsen the full operator. |
Definition at line 201 of file coarse_op.cu.
References quda::GaugeField::Anisotropy(), calculateY(), checkLocation, quda::CloverFieldParam::clover, quda::CloverFieldParam::cloverInv, quda::GaugeField::copy(), quda::CloverFieldParam::create, quda::ColorSpinorParam::create, quda::GaugeField::Create(), quda::ColorSpinorField::Create(), quda::CloverFieldParam::direct, errorQuda, quda::GaugeField::GaugeFixed(), quda::GaugeField::Geometry(), quda::CloverFieldParam::inverse, quda::CloverFieldParam::invNorm, quda::GaugeField::LinkType(), quda::GaugeFieldParam::location, quda::LatticeField::MemType(), quda::LatticeFieldParam::nDim, quda::CloverFieldParam::norm, quda::GaugeFieldParam::order, quda::CloverFieldParam::order, quda::LatticeFieldParam::pad, param, quda::LatticeFieldParam::Precision(), quda::LatticeField::Precision(), QUDA_CPU_FIELD_LOCATION, QUDA_CUDA_FIELD_LOCATION, QUDA_FLOAT2_GAUGE_ORDER, QUDA_FULL_SITE_SUBSET, QUDA_GHOST_EXCHANGE_PAD, QUDA_INVALID_CLOVER_ORDER, QUDA_MATPC_INVALID, QUDA_NULL_FIELD_CREATE, QUDA_PACKED_CLOVER_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, QUDA_TWISTED_MASSPC_DIRAC, QUDA_ZERO_FIELD_CREATE, quda::GaugeFieldParam::reconstruct, quda::GaugeField::Reconstruct(), quda::cudaCloverField::saveCPUField(), quda::cudaGaugeField::saveCPUField(), quda::CloverFieldParam::setPrecision(), quda::GaugeFieldParam::setPrecision(), quda::LatticeFieldParam::siteSubset, quda::GaugeField::TBoundary(), quda::Transfer::Vectors(), quda::LatticeFieldParam::x, X, and quda::LatticeField::X().
Referenced by quda::DiracWilson::createCoarseOp(), quda::DiracClover::createCoarseOp(), quda::DiracCloverPC::createCoarseOp(), quda::DiracTwistedMass::createCoarseOp(), quda::DiracTwistedMassPC::createCoarseOp(), quda::DiracTwistedClover::createCoarseOp(), and quda::DiracTwistedCloverPC::createCoarseOp().
|
inline |
Definition at line 63 of file block_orthogonalize.cuh.
References nColor.
|
inline |
Definition at line 76 of file block_orthogonalize.cuh.
References nColor.
|
inline |
Definition at line 97 of file block_orthogonalize.cuh.
References nColor.
|
inline |
Definition at line 86 of file block_orthogonalize.cuh.
References nColor.
ColorSpinorParam quda::colorSpinorParam | ( | const GaugeField & | a | ) |
Definition at line 304 of file gauge_field.cpp.
References quda::ColorSpinorParam::create, errorQuda, quda::ColorSpinorParam::fieldOrder, quda::GaugeField::FieldOrder(), quda::ColorSpinorParam::gammaBasis, quda::GaugeField::Gauge_p(), quda::GaugeField::Geometry(), quda::GaugeField::LinkType(), quda::ColorSpinorParam::location, quda::LatticeField::Location(), quda::ColorSpinorParam::nColor, quda::GaugeField::Ncolor(), quda::LatticeFieldParam::nDim, quda::LatticeField::Ndim(), quda::ColorSpinorParam::nSpin, quda::LatticeFieldParam::pad, quda::LatticeField::Pad(), quda::LatticeField::Precision(), QUDA_COARSE_LINKS, QUDA_DOUBLE_PRECISION, QUDA_EVEN_ODD_SITE_ORDER, QUDA_FLOAT2_FIELD_ORDER, QUDA_FLOAT4_FIELD_ORDER, QUDA_FULL_SITE_SUBSET, QUDA_HALF_PRECISION, QUDA_QDP_GAUGE_ORDER, QUDA_QDPJIT_GAUGE_ORDER, QUDA_QUARTER_PRECISION, QUDA_REFERENCE_FIELD_CREATE, QUDA_UKQCD_GAMMA_BASIS, quda::GaugeField::Reconstruct(), quda::ColorSpinorParam::setPrecision(), quda::ColorSpinorParam::siteOrder, quda::LatticeFieldParam::siteSubset, quda::ColorSpinorParam::v, quda::LatticeFieldParam::x, and quda::LatticeField::X().
ColorSpinorParam quda::colorSpinorParam | ( | const CloverField & | a, |
bool | inverse | ||
) |
Definition at line 445 of file clover_field.cpp.
References quda::ColorSpinorParam::create, errorQuda, quda::ColorSpinorParam::fieldOrder, quda::ColorSpinorParam::gammaBasis, quda::ColorSpinorParam::location, quda::LatticeField::Location(), quda::ColorSpinorParam::nColor, quda::LatticeFieldParam::nDim, quda::LatticeField::Ndim(), quda::ColorSpinorParam::nSpin, quda::LatticeFieldParam::pad, quda::LatticeField::Pad(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_EVEN_ODD_SITE_ORDER, QUDA_FLOAT2_FIELD_ORDER, QUDA_FLOAT4_FIELD_ORDER, QUDA_FULL_SITE_SUBSET, QUDA_HALF_PRECISION, QUDA_REFERENCE_FIELD_CREATE, QUDA_UKQCD_GAMMA_BASIS, quda::ColorSpinorParam::setPrecision(), quda::ColorSpinorParam::siteOrder, quda::LatticeFieldParam::siteSubset, quda::CloverField::V(), quda::ColorSpinorParam::v, quda::LatticeFieldParam::x, and quda::LatticeField::X().
Referenced by ax(), norm1(), and norm2().
int quda::compareSpinor | ( | const U & | u, |
const V & | v, | ||
const int | tol | ||
) |
Definition at line 184 of file color_spinor_util.cu.
References comm_allreduce_int(), comm_size(), parity, pow(), printfQuda, and tol.
Referenced by genericCompare().
|
inline |
Helper function for setting auxilary string.
[in] | meta | LatticeField used for querying field location |
Definition at line 718 of file lattice_field.h.
References quda::LatticeField::Location(), QUDA_CUDA_FIELD_LOCATION, and QUDA_INVALID_FIELD_LOCATION.
Referenced by quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::CalculateY(), quda::CopyGauge< FloatOut, FloatIn, length, Arg >::CopyGauge(), quda::GaugePlaq< Float, Gauge >::GaugePlaq(), and quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >::GenericPackGhostLauncher().
void quda::completeKSForce | ( | GaugeField & | mom, |
const GaugeField & | oprod, | ||
const GaugeField & | gauge, | ||
QudaFieldLocation | location, | ||
long long * | flops = NULL |
||
) |
Definition at line 152 of file ks_force_quda.cu.
References errorQuda, quda::blas::flops, quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_RECONSTRUCT_10, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, quda::GaugeField::Reconstruct(), and quda::LatticeField::X().
void quda::completeKSForce | ( | Oprod | oprod, |
Gauge | gauge, | ||
Mom | mom, | ||
int | dim[4], | ||
const GaugeField & | meta, | ||
QudaFieldLocation | location, | ||
long long * | flops | ||
) |
Definition at line 141 of file ks_force_quda.cu.
References quda::KSForceComplete< Float, Oprod, Gauge, Mom >::apply(), arg(), quda::KSForceComplete< Float, Oprod, Gauge, Mom >::flops(), and qudaDeviceSynchronize.
__host__ __device__ void quda::completeKSForceCore | ( | KSForceArg< Oprod, Gauge, Mom > & | arg, |
int | idx | ||
) |
Definition at line 43 of file ks_force_quda.cu.
References quda::KSForceArg< Oprod, Gauge, Mom >::gauge, getCoords(), linkIndexShift(), makeAntiHerm(), quda::KSForceArg< Oprod, Gauge, Mom >::mom, quda::KSForceArg< Oprod, Gauge, Mom >::oprod, parity, quda::KSForceArg< Oprod, Gauge, Mom >::threads, quda::KSForceArg< Oprod, Gauge, Mom >::X, and X.
void quda::completeKSForceCPU | ( | KSForceArg< Oprod, Gauge, Mom > & | arg | ) |
Definition at line 93 of file ks_force_quda.cu.
References arg(), and quda::KSForceArg< Oprod, Gauge, Mom >::threads.
__global__ void quda::completeKSForceKernel | ( | KSForceArg< Oprod, Gauge, Mom > | arg | ) |
Definition at line 84 of file ks_force_quda.cu.
References arg(), and quda::KSForceArg< Oprod, Gauge, Mom >::threads.
Definition at line 280 of file inv_ca_cg.cpp.
Definition at line 356 of file inv_ca_cg.cpp.
__device__ __host__ __forceinline__ Float quda::compute_site_max | ( | Arg & | arg, |
int | x_cb, | ||
int | parity, | ||
int | spinor_parity, | ||
int | spin_block, | ||
int | color_block, | ||
bool | active | ||
) |
Compute the max element over the spin-color components of a given site.
Definition at line 48 of file color_spinor_pack.cuh.
References errorQuda, MAX_BLOCK_FLOAT_NC, and s.
__global__ void quda::computeAPEStep | ( | Arg | arg | ) |
Definition at line 96 of file gauge_ape.cuh.
References arg(), conj(), getCoords(), linkIndexShift(), parity, setIdentity(), and quda::GaugeAPEArg< Float, GaugeOr, GaugeDs >::X.
|
inline |
Calculates the matrix A V^{s,c'}(x) = A^{c}(x) * V^{s,c}(x) Where: s = fine spin, c' = coarse color, c = fine color
Definition at line 230 of file coarse_op_kernel.cuh.
References quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::AV, quda::linalg::Cholesky< Mat, T, N, fast >::backward(), quda::linalg::Cholesky< Mat, T, N, fast >::forward(), s, and quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::V.
void quda::ComputeAVCPU | ( | Arg & | arg | ) |
__global__ void quda::ComputeAVGPU | ( | Arg | arg | ) |
void quda::computeBeta | ( | Complex ** | beta, |
std::vector< ColorSpinorField *> | Ap, | ||
int | i, | ||
int | N, | ||
int | k | ||
) |
Definition at line 62 of file inv_gcr_quda.cpp.
References quda::blas::cDotProduct(), and printfQuda.
Referenced by orthoDir().
void quda::computeClover | ( | CloverField & | clover, |
const GaugeField & | gauge, | ||
double | coeff, | ||
QudaFieldLocation | location | ||
) |
Definition at line 204 of file clover_quda.cu.
References quda::CloverArg< Float, nSpin, nColor, dynamic_clover_ >::clover, errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by quda::cudaCloverField::compute(), createCloverQuda(), and quda::FullClover::FullClover().
void quda::computeCloverForce | ( | GaugeField & | force, |
const GaugeField & | U, | ||
std::vector< ColorSpinorField *> & | x, | ||
std::vector< ColorSpinorField *> & | p, | ||
std::vector< double > & | coeff | ||
) |
Compute the force contribution from the solver solution fields.
Force(x, mu) = U(x, mu) * sum_i=1^nvec ( P_mu^+ x(x+mu) p(x)^ + P_mu^- p(x+mu) x(x)^ )
M = A_even - kappa^2 * Dslash * A_odd^{-1} * Dslash x(even) = M^{-1} b(even) x(odd) = A_odd^{-1} * Dslash * x(even) p(even) = M * x(even) p(odd) = A_odd^{-1} * Dslash^dag * M * x(even).
force[out,in] | The resulting force field |
U | The input gauge field |
x | Solution field (both parities) |
p | Intermediate vectors (both parities) |
coeff | Multiplicative coefficient (e.g., dt * residue) |
Definition at line 465 of file clover_outer_product.cu.
References checkCudaError, errorQuda, quda::ColorSpinorField::GhostFace(), quda::GaugeField::Order(), parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_GAUGE_ORDER, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, and quda::GaugeField::Reconstruct().
Referenced by computeCloverForceQuda(), and quda::FullClover::FullClover().
void quda::computeCloverSigmaOprod | ( | GaugeField & | oprod, |
std::vector< ColorSpinorField *> & | x, | ||
std::vector< ColorSpinorField *> & | p, | ||
std::vector< std::vector< double > > & | coeff | ||
) |
Compute the outer product from the solver solution fields arising from the diagonal term of the fermion bilinear in direction mu,nu and sum to outer product field.
oprod[out,in] | Computed outer product field (tensor matrix field) |
x[in] | Solution field (both parities) |
p[in] | Intermediate vectors (both parities) coeff[in] Multiplicative coefficient (e.g., dt * residiue), one for each parity |
Definition at line 98 of file clover_sigma_outer_product.cu.
References checkCudaError, errorQuda, MAX_NVECTOR, quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_GAUGE_ORDER, and Spinor< RegType, StoreType, N, write >::set().
Referenced by computeCloverForceQuda(), and quda::FullClover::FullClover().
void quda::computeCloverSigmaTrace | ( | GaugeField & | output, |
const CloverField & | clover, | ||
double | coeff | ||
) |
Compute the matrix tensor field necessary for the force calculation from the clover trace action. This computes a tensor field [mu,nu].
output | The computed matrix field (tensor matrix field) |
clover | The input clover field |
coeff | Scalar coefficient multiplying the result (e.g., stepsize) |
Definition at line 242 of file clover_trace_quda.cu.
References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by computeCloverForceQuda(), and quda::FullClover::FullClover().
__device__ __host__ void quda::computeCoarseClover | ( | Arg & | arg, |
int | parity, | ||
int | x_cb, | ||
int | ic_c, | ||
int | jc_c | ||
) |
Definition at line 928 of file coarse_op_kernel.cuh.
References conj(), getCoords(), QUDA_MAX_DIM, s, and quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::X.
void quda::ComputeCoarseCloverCPU | ( | Arg & | arg | ) |
__global__ void quda::ComputeCoarseCloverGPU | ( | Arg | arg | ) |
|
static |
Definition at line 79 of file inv_mpcg_quda.cpp.
References applyThirdTerm().
Referenced by quda::MPCG::operator()().
__global__ void quda::computeColorContraction | ( | Arg | arg | ) |
Definition at line 38 of file contraction.cuh.
References innerProduct(), mu, quda::ContractionArg< real >::nColor, nColor, quda::ContractionArg< real >::nSpin, parity, quda::ContractionArg< real >::x, and quda::ContractionArg< real >::y.
__global__ void quda::computeDegrandRossiContraction | ( | Arg | arg | ) |
Definition at line 65 of file contraction.cuh.
References innerProduct(), mu, quda::ContractionArg< real >::nColor, quda::ContractionArg< real >::nSpin, parity, quda::ContractionArg< real >::x, and quda::ContractionArg< real >::y.
void quda::ComputeEta | ( | GMResDRArgs & | args | ) |
Definition at line 157 of file inv_gmresdr_quda.cpp.
References errorQuda.
void quda::ComputeEta< libtype::eigen_lib > | ( | GMResDRArgs & | args | ) |
Definition at line 179 of file inv_gmresdr_quda.cpp.
void quda::ComputeEta< libtype::magma_lib > | ( | GMResDRArgs & | args | ) |
Definition at line 159 of file inv_gmresdr_quda.cpp.
References errorQuda, magma_Xgels(), and memset().
void quda::computeFmunu | ( | GaugeField & | Fmunu, |
const GaugeField & | gauge | ||
) |
Compute the Fmunu tensor.
[out] | Fmunu | The Fmunu tensor |
[in] | gauge | The gauge field upon which to compute the Fmnu tensor |
Definition at line 99 of file gauge_field_strength_tensor.cu.
References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by createCloverQuda(), qChargeDensityQuda(), and qChargeQuda().
__device__ __host__ __forceinline__ void quda::computeFmunuCore | ( | Arg & | arg, |
int | idx, | ||
int | parity | ||
) |
Definition at line 28 of file field_strength_tensor.cuh.
References conj(), getCoords(), linkIndexShift(), mu, and quda::FmunuArg< Float, Fmunu, Gauge >::X.
void quda::computeFmunuCPU | ( | Arg & | arg | ) |
Definition at line 184 of file field_strength_tensor.cuh.
References arg(), mu, and parity.
__global__ void quda::computeFmunuKernel | ( | Arg | arg | ) |
__device__ void quda::computeForce | ( | LINK | force, |
Arg & | arg, | ||
int | xIndex, | ||
int | yIndex, | ||
int | mu, | ||
int | nu | ||
) |
Definition at line 128 of file clover_deriv.cuh.
References conj(), DECLARE_ARRAY, getCoordsExtended(), LINK, linkIndexShift(), and mu.
__global__ void quda::computeGenGauss | ( | Arg | arg | ) |
Definition at line 71 of file gauge_random.cu.
References getCoords(), linkIndex(), mu, parity, and setIdentity().
void quda::ComputeHarmonicRitz | ( | GMResDRArgs & | args | ) |
Definition at line 88 of file inv_gmresdr_quda.cpp.
References errorQuda.
void quda::ComputeHarmonicRitz< libtype::eigen_lib > | ( | GMResDRArgs & | args | ) |
Definition at line 127 of file inv_gmresdr_quda.cpp.
References abs(), norm(), and quda::SortedEvals::SelectSmall().
void quda::ComputeHarmonicRitz< libtype::magma_lib > | ( | GMResDRArgs & | args | ) |
Definition at line 90 of file inv_gmresdr_quda.cpp.
References abs(), errorQuda, magma_Xgeev(), magma_Xgesv(), norm(), and quda::SortedEvals::SelectSmall().
void quda::computeKSLongLinkForce | ( | Result | res, |
Oprod | oprod, | ||
Gauge | gauge, | ||
int | dim[4], | ||
const GaugeField & | meta, | ||
QudaFieldLocation | location | ||
) |
Definition at line 353 of file ks_force_quda.cu.
References quda::KSLongLinkForce< Float, Result, Oprod, Gauge >::apply(), arg(), and qudaDeviceSynchronize.
Referenced by computeKSLongLinkForce().
void quda::computeKSLongLinkForce | ( | GaugeField & | result, |
const GaugeField & | oprod, | ||
const GaugeField & | gauge, | ||
QudaFieldLocation | location | ||
) |
Definition at line 362 of file ks_force_quda.cu.
References computeKSLongLinkForce(), errorQuda, quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_RECONSTRUCT_10, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, quda::GaugeField::Reconstruct(), and quda::LatticeField::X().
__host__ __device__ void quda::computeKSLongLinkForceCore | ( | KSLongLinkArg< Result, Oprod, Gauge > & | arg, |
int | idx | ||
) |
Definition at line 222 of file ks_force_quda.cu.
void quda::computeKSLongLinkForceCPU | ( | KSLongLinkArg< Result, Oprod, Gauge > & | arg | ) |
Definition at line 298 of file ks_force_quda.cu.
References arg(), and quda::KSLongLinkArg< Result, Oprod, Gauge >::threads.
__global__ void quda::computeKSLongLinkForceKernel | ( | KSLongLinkArg< Result, Oprod, Gauge > | arg | ) |
Definition at line 286 of file ks_force_quda.cu.
References arg(), and quda::KSLongLinkArg< Result, Oprod, Gauge >::threads.
|
inline |
Definition at line 1023 of file quda_matrix.h.
References getDeterminant().
double quda::computeMomAction | ( | const GaugeField & | mom | ) |
Compute and return global the momentum action 1/2 mom^2.
mom | Momentum field |
Definition at line 178 of file momentum.cu.
References arg(), quda::blas::bytes, E, errorQuda, quda::blas::flops, forceMonitor(), forceRecord(), getCoords(), getTuning(), getVerbosity(), quda::Matrix< T, N >::L1(), quda::Matrix< T, N >::L2(), LAUNCH_KERNEL_LOCAL_PARITY, linkIndex(), quda::LatticeField::Location(), makeAntiHerm(), norm2(), quda::GaugeField::Order(), parity, quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_GAUGE_ORDER, QUDA_RECONSTRUCT_10, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, quda::LatticeField::R(), quda::GaugeField::Reconstruct(), stream, tuneLaunch(), updateMomentum(), quda::LatticeField::VolString(), quda::LatticeField::VolumeCB(), X, and quda::LatticeField::X().
Referenced by forceRecord(), and momActionQuda().
|
inline |
Computes out = sum_mu U_mu(x)in(x+d) + U^(x-d)in(x-d)
[out] | out | The out result field |
[in] | U | The gauge field |
[in] | in | The input field |
[in] | x_cb | The checkerboarded site index |
[in] | parity | The site parity |
Definition at line 51 of file color_spinor_wuppertal.cu.
References conj(), getCoords(), quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::in, linkIndexM1(), linkIndexP1(), quda::Arg< real, Ns, Nc, order >::nParity, and quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::U.
__global__ void quda::computeOvrImpSTOUTStep | ( | Arg | arg | ) |
Definition at line 463 of file gauge_stout.cuh.
References arg(), conj(), ErrorSU3(), exponentiate_iQ(), getCoords(), getTrace(), inverse(), linkIndexShift(), parity, setIdentity(), and quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::X.
__global__ void quda::computePlaq | ( | GaugePlaqArg< Gauge > | arg | ) |
Definition at line 49 of file gauge_plaq.cuh.
References arg(), quda::GaugePlaqArg< Gauge >::border, getCoords(), mu, parity, quda::GaugePlaqArg< Gauge >::threads, and quda::GaugePlaqArg< Gauge >::X.
Referenced by quda::GaugePlaq< Float, Gauge >::apply().
double quda::computeQCharge | ( | const GaugeField & | Fmunu | ) |
Compute the topological charge.
[in] | Fmunu | The Fmunu tensor, usually calculated from a smeared configuration |
Definition at line 97 of file gauge_qcharge.cu.
References errorQuda, quda::GaugeField::isNative(), quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and quda::GaugeField::Reconstruct().
Referenced by qChargeQuda().
double quda::computeQChargeDensity | ( | const GaugeField & | Fmunu, |
void * | result | ||
) |
Compute the topological charge density per lattice site.
[in] | Fmunu | The Fmunu tensor, usually calculated from a smeared configuration |
[out] | qDensity | The topological charge at each lattice site |
Definition at line 116 of file gauge_qcharge.cu.
References errorQuda, quda::GaugeField::isNative(), quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and quda::GaugeField::Reconstruct().
Referenced by qChargeDensityQuda().
void quda::ComputeRitz | ( | EigCGArgs & | args | ) |
Definition at line 133 of file inv_eigcg_quda.cpp.
References errorQuda.
void quda::ComputeRitz< libtype::eigen_lib > | ( | EigCGArgs & | args | ) |
Definition at line 136 of file inv_eigcg_quda.cpp.
void quda::ComputeRitz< libtype::magma_lib > | ( | EigCGArgs & | args | ) |
Definition at line 164 of file inv_eigcg_quda.cpp.
References errorQuda, and magma_Xheev().
void quda::computeStaggeredOprod | ( | GaugeField * | out[], |
ColorSpinorField & | in, | ||
const double | coeff[], | ||
int | nFace | ||
) |
Compute the outer-product field between the staggered quark field's one and (for HISQ and ASQTAD) three hop sites. E.g.,.
out[0][d](x) = (in(x+1_d) x conj(in(x))) out[1][d](x) = (in(x+3_d) x conj(in(x)))
where 1_d and 3_d represent a relative shift of magnitude 1 and 3 in dimension d, respectively
Note out[1] is only computed if nFace=3
[out] | out | Array of nFace outer-product matrix fields |
[in] | in | Input quark field |
[in] | coeff | Coefficient |
[in] | nFace | Number of faces (1 or 3) |
Definition at line 447 of file staggered_oprod.cu.
References errorQuda, quda::ColorSpinorField::Even(), and quda::ColorSpinorField::Odd().
Referenced by computeHISQForceQuda(), and computeStaggeredForceQuda().
void quda::computeStaggeredOprod | ( | GaugeField & | outA, |
GaugeField & | outB, | ||
ColorSpinorField & | inEven, | ||
ColorSpinorField & | inOdd, | ||
int | parity, | ||
const double | coeff[2], | ||
int | nFace | ||
) |
Definition at line 404 of file staggered_oprod.cu.
References quda::cudaColorSpinorField::allocateGhostBuffer(), errorQuda, quda::GaugeField::Order(), parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_GAUGE_ORDER, and QUDA_SINGLE_PRECISION.
__host__ __device__ void quda::computeStaple | ( | Arg & | arg, |
int | idx, | ||
int | parity, | ||
int | dir, | ||
Link & | staple | ||
) |
Definition at line 36 of file gauge_ape.cuh.
References conj(), getCoords(), linkIndexShift(), mu, setZero(), and quda::GaugeAPEArg< Float, GaugeOr, GaugeDs >::X.
Referenced by fatLongKSLink(), and quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::GaugeSTOUTArg().
__host__ __device__ void quda::computeStapleRectangle | ( | Arg & | arg, |
int | idx, | ||
int | parity, | ||
int | dir, | ||
Link & | staple, | ||
Link & | rectangle | ||
) |
Definition at line 232 of file gauge_stout.cuh.
References conj(), getCoords(), linkIndexShift(), mu, setZero(), and quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::X.
__global__ void quda::computeSTOUTStep | ( | Arg | arg | ) |
Definition at line 96 of file gauge_stout.cuh.
References arg(), conj(), ErrorSU3(), exponentiate_iQ(), getCoords(), getTrace(), inverse(), linkIndexShift(), parity, setIdentity(), and quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::X.
|
inline |
Calculates the matrix A V^{s,c'}(x) = A^{c}(x) * V^{s,c}(x) for twisted-mass fermions Where: s = fine spin, c' = coarse color, c = fine color
Definition at line 312 of file coarse_op_kernel.cuh.
References s.
void quda::ComputeTMAVCPU | ( | Arg & | arg | ) |
Definition at line 332 of file coarse_op_kernel.cuh.
References parity.
__global__ void quda::ComputeTMAVGPU | ( | Arg | arg | ) |
Definition at line 343 of file coarse_op_kernel.cuh.
References arg(), quda::linalg::Cholesky< Mat, T, N, fast >::invert(), Mat(), nColor, and parity.
|
inline |
Calculates the matrix A V^{s,c'}(x) = A^{c}(x) * V^{s,c}(x) for twisted-clover fermions Where: s = fine spin, c' = coarse color, c = fine color
Definition at line 430 of file coarse_op_kernel.cuh.
References quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::AV, quda::linalg::Cholesky< Mat, T, N, fast >::backward(), quda::linalg::Cholesky< Mat, T, N, fast >::forward(), quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::mu, s, quda::HMatrix< T, N >::square(), quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::UV, and quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::V.
void quda::ComputeTMCAVCPU | ( | Arg & | arg | ) |
__global__ void quda::ComputeTMCAVGPU | ( | Arg | arg | ) |
|
inline |
Calculates the matrix UV^{s,c'}_mu(x) = U^{c}_mu(x) * V^{s,c}_mu(x+mu) Where: mu = dir, s = fine spin, c' = coarse color, c = fine color
Definition at line 130 of file coarse_op_kernel.cuh.
References caxpy(), getCoords(), linkIndexP1(), QUDA_FORWARDS, s, and quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::UV.
void quda::ComputeUVCPU | ( | Arg & | arg | ) |
Definition at line 197 of file coarse_op_kernel.cuh.
References arg(), parity, and QUDA_FORWARDS.
__global__ void quda::ComputeUVGPU | ( | Arg | arg | ) |
Definition at line 212 of file coarse_op_kernel.cuh.
References arg(), parity, and QUDA_FORWARDS.
__device__ __host__ void quda::computeVUV | ( | Arg & | arg, |
const Gamma & | gamma, | ||
int | parity, | ||
int | x_cb, | ||
int | c_row, | ||
int | c_col, | ||
int | parity_coarse_, | ||
int | coarse_x_cb_ | ||
) |
Definition at line 628 of file coarse_op_kernel.cuh.
References arg(), atomicAdd(), conj(), quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::dim_index, errorQuda, getCoords(), max_color_per_block, parity, QUDA_BACKWARDS, QUDA_MAX_DIM, virtualThreadIdx(), quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::X, and quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::Y.
void quda::ComputeVUVCPU | ( | Arg | arg | ) |
Definition at line 779 of file coarse_op_kernel.cuh.
__global__ void quda::ComputeVUVGPU | ( | Arg | arg | ) |
|
inline |
Definition at line 102 of file color_spinor_wuppertal.cu.
References arg(), quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::in, quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::out, and quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::parity.
|
inline |
Definition at line 45 of file coarse_op_preconditioned.cuh.
References caxpy(), conj(), getCoords(), and linkIndexM1().
__device__ __host__ void quda::computeYreverse | ( | Arg & | arg, |
int | parity, | ||
int | x_cb, | ||
int | ic_c, | ||
int | jc_c | ||
) |
Compute the forward links from backwards links by flipping the sign of the spin projector
Definition at line 877 of file coarse_op_kernel.cuh.
void quda::ComputeYReverseCPU | ( | Arg & | arg | ) |
Definition at line 898 of file coarse_op_kernel.cuh.
References arg(), nColor, and parity.
__global__ void quda::ComputeYReverseGPU | ( | Arg | arg | ) |
Definition at line 912 of file coarse_op_kernel.cuh.
References arg(), nColor, and parity.
|
inline |
Definition at line 130 of file complex_quda.h.
Referenced by applyClover(), applyCovDev(), applyDslash(), applyLaplace(), applyStaggered(), applyWilson(), applyWilsonTM(), quda::CG::blocksolve(), checkUnitary(), computeAPEStep(), computeCoarseClover(), computeFmunuCore(), computeForce(), quda::MPBiCGstab::computeMatrixPowers(), computeNeighborSum(), computeOvrImpSTOUTStep(), computeStaple(), computeStapleRectangle(), computeSTOUTStep(), computeVUV(), computeYhat(), conj(), ErrorSU3(), expsu3(), quda::GMResDR::FlexArnoldiProcedure(), quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::GaugeSTOUTArg(), quda::gauge::Reconstruct< 13, Float, ghostExchange_, stag_phase >::getPhase(), quda::gauge::Reconstruct< 9, Float, ghostExchange_, stag_phase >::getPhase(), quda::blas::hDotProduct(), quda::blas::hDotProduct_Anorm(), quda::Deflation::increment(), isUnitary(), quda::Matrix< T, N >::isUnitary(), llfat_mult_su3_an(), llfat_mult_su3_na(), makeAntiHerm(), multiplyVUV(), quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT2_CLOVER_ORDER >::operator()(), quda::BiCGstab::operator()(), quda::SimpleBiCGstab::operator()(), quda::MPBiCGstab::operator()(), outerProd(), plaquette(), polarSu3(), rotateCoarseColor(), setUnitarizeLinksConstants(), sigmaOprod(), quda::gauge::Reconstruct< 12, Float, ghostExchange_ >::Unpack(), quda::gauge::Reconstruct< 13, Float, ghostExchange_, stag_phase >::Unpack(), and quda::gauge::Reconstruct< 8, Float, ghostExchange_ >::Unpack().
|
inline |
Returns the complex conjugate of z.
Definition at line 1054 of file complex_quda.h.
|
inline |
Definition at line 596 of file quda_matrix.h.
References conj().
void quda::constant | ( | T & | t, |
int | k, | ||
int | s, | ||
int | c | ||
) |
Set all space-time real elements at spin s and color c of the field equal to k
Definition at line 38 of file color_spinor_util.cu.
References parity.
Referenced by genericSource().
|
inline |
Apply the M5 inverse operator at a given site on the lattice. This is the original algorithm as described in Kim and Izubushi (LATTICE 2013_033), where the b and c coefficients are constant along the Ls dimension, so is suitable for Shamir and Mobius domain-wall fermions.
shared | Whether to use a shared memory scratch pad to store the input field acroos the Ls dimension to minimize global memory reads. |
[in] | arg | Argument struct containing any meta data and accessors |
[in] | parity | Parity we are on |
[in] | x_b | Checkerboarded 4-d space-time index |
[in] | s_ | Ls dimension coordinate |
Definition at line 295 of file dslash_domain_wall_m5.cuh.
References __fast_pow(), dagger, exp(), in, quda::VectorCache< real, Vector >::load(), out, s, quda::VectorCache< real, Vector >::save(), and quda::VectorCache< real, Vector >::sync().
void quda::contractQuda | ( | const ColorSpinorField & | x, |
const ColorSpinorField & | y, | ||
void * | result, | ||
QudaContractType | cType | ||
) |
Definition at line 107 of file contract.cu.
References checkPrecision, errorQuda, quda::ColorSpinorField::GammaBasis(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), quda::LatticeField::Precision(), QUDA_DEGRAND_ROSSI_GAMMA_BASIS, QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by contractQuda(), and test().
|
inline |
Convert a vector of type InputType to type OutputType.
The main current limitation is that there is an implicit assumption that N * sizeof(OutputType) / sizeof(InputType) is an integer. E.g., you cannot convert a vector 9 float2s into a vector of 5 float4s.
x | Output vector. |
y | Input vector. |
N | Length of output vector. |
Definition at line 149 of file convert.h.
References copyFloatN().
__device__ __host__ void quda::convert | ( | Arg & | arg, |
int | parity, | ||
int | x_cb, | ||
int | c_row, | ||
int | c_col | ||
) |
Convert the field from the atomic format to the required computation format, e.g. fixed point to floating point
Definition at line 1096 of file coarse_op_kernel.cuh.
References in.
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
void quda::ConvertCPU | ( | Arg & | arg | ) |
Definition at line 1133 of file coarse_op_kernel.cuh.
References arg(), nColor, and parity.
__global__ void quda::ConvertGPU | ( | Arg | arg | ) |
Definition at line 1147 of file coarse_op_kernel.cuh.
References arg(), nColor, and parity.
|
inline |
Compute the full-lattice coordinates from the input face index. This is used by the Wilson-like halo update kernels, and can deal with 4-d or 5-d field and 4-d or 5-d preconditioning.
[out] | idx | The full lattice coordinate |
[out] | cb_idx | The checkboarded lattice coordinate |
[out] | x | Coordinates we are computing |
[in] | face_idx | Input checkerboarded face index |
[in] | face_num | Face number |
[in] | parity | Parity index |
[in] | arg | Argument struct with required meta data |
Definition at line 488 of file index_helper.cuh.
References EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, QUDA_4D_PC, QUDA_5D_PC, and X.
|
inline |
Overloaded variant of indexFromFaceIndex where we use the parity declared in arg.
Definition at line 585 of file index_helper.cuh.
References arg().
|
inline |
Definition at line 152 of file register_traits.h.
Referenced by computeCloverForceQuda(), copy_and_scale(), copy_scaled(), genericCopyColorSpinor(), quda::clover::FloatNOrder< Float, length, N, add_rho, huge_alloc >::load(), quda::gauge::FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc, ghostExchange_, use_inphase >::load(), quda::gauge::FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc, ghostExchange_, use_inphase >::loadGhost(), quda::gauge::FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc, ghostExchange_, use_inphase >::loadGhostEx(), new_load_half(), new_save_half(), old_load_half(), old_save_half(), quda::PreconCG::operator()(), qudaMemcpy_(), qudaMemcpyAsync_(), quda::gauge::FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc, ghostExchange_, use_inphase >::save(), quda::gauge::FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc, ghostExchange_, use_inphase >::saveGhost(), quda::gauge::FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc, ghostExchange_, use_inphase >::saveGhostEx(), and quda::GaugeField::SiteSize().
|
inline |
Definition at line 154 of file register_traits.h.
References errorQuda.
|
inline |
Definition at line 162 of file register_traits.h.
References errorQuda.
|
inline |
Definition at line 170 of file register_traits.h.
References s2f().
|
inline |
Definition at line 171 of file register_traits.h.
References f2i().
|
inline |
Definition at line 173 of file register_traits.h.
References s2f().
|
inline |
Definition at line 177 of file register_traits.h.
References f2i().
|
inline |
Definition at line 181 of file register_traits.h.
References s2f().
|
inline |
Definition at line 185 of file register_traits.h.
References f2i().
|
inline |
Definition at line 189 of file register_traits.h.
References c2f().
|
inline |
Definition at line 190 of file register_traits.h.
References f2i().
|
inline |
Definition at line 192 of file register_traits.h.
References c2f().
|
inline |
Definition at line 196 of file register_traits.h.
References f2i().
|
inline |
Definition at line 200 of file register_traits.h.
References c2f().
|
inline |
Definition at line 204 of file register_traits.h.
References f2i().
|
inline |
Specialized variants of the copy function that include an additional scale factor. Note the scale factor is ignored unless the input type (b) is either a short or char vector.
Definition at line 249 of file register_traits.h.
References copy().
Referenced by quda::clover::FloatNOrder< Float, length, N, add_rho, huge_alloc >::load().
|
inline |
Definition at line 254 of file register_traits.h.
References s2f().
|
inline |
Definition at line 262 of file register_traits.h.
References c2f().
|
inline |
Definition at line 270 of file register_traits.h.
References s2f().
|
inline |
Definition at line 276 of file register_traits.h.
References c2f().
|
inline |
Definition at line 282 of file register_traits.h.
References s2f().
|
inline |
Definition at line 287 of file register_traits.h.
References c2f().
|
inline |
Definition at line 209 of file register_traits.h.
References copy().
|
inline |
Definition at line 211 of file register_traits.h.
References f2i().
|
inline |
Definition at line 219 of file register_traits.h.
References f2i().
|
inline |
Definition at line 227 of file register_traits.h.
References f2i().
|
inline |
Definition at line 233 of file register_traits.h.
References f2i().
|
inline |
Definition at line 239 of file register_traits.h.
References f2i().
|
inline |
Definition at line 241 of file register_traits.h.
References f2i().
|
inline |
Definition at line 1061 of file quda_matrix.h.
Referenced by isUnitary(), and unitarizeLinksCPU().
|
inline |
Definition at line 1074 of file quda_matrix.h.
void quda::copyColorSpinor | ( | Arg & | arg, |
const Basis & | basis | ||
) |
CPU function to reorder spinor fields.
Definition at line 136 of file copy_color_spinor.cuh.
References quda::ColorSpinor< Float, Nc, Ns >::data, in, quda::Arg< real, Ns, Nc, order >::nParity, out, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.
Referenced by quda::CopyColorSpinor< Ns, Arg >::apply(), and quda::CopyColorSpinor< 4, Arg >::apply().
__global__ void quda::copyColorSpinorKernel | ( | Arg | arg, |
Basis | basis | ||
) |
CUDA kernel to reorder spinor fields. Adopts a similar form as the CPU version, using the same inlined functions.
Definition at line 149 of file copy_color_spinor.cuh.
References quda::ColorSpinor< Float, Nc, Ns >::data, in, out, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.
Referenced by quda::CopyColorSpinor< Ns, Arg >::apply(), and quda::CopyColorSpinor< 4, Arg >::apply().
|
inline |
Definition at line 793 of file quda_matrix.h.
Referenced by getRealBidiagMatrix().
void quda::copyExtendedColorSpinor | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
const int | parity, | ||
const QudaFieldLocation | location, | ||
dstFloat * | Dst, | ||
srcFloat * | Src, | ||
float * | dstNorm, | ||
float * | srcNorm | ||
) |
Definition at line 360 of file extended_color_spinor_utilities.cu.
References quda::ColorSpinorField::Bytes(), errorQuda, quda::ColorSpinorField::FieldOrder(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Ndim(), quda::ColorSpinorField::Norm(), quda::ColorSpinorField::NormBytes(), parity, QUDA_EVEN_ODD_SITE_ORDER, QUDA_FULL_SITE_SUBSET, QUDA_LEXICOGRAPHIC_SITE_ORDER, QUDA_ODD_EVEN_SITE_ORDER, QUDA_QDPJIT_FIELD_ORDER, quda::ColorSpinorField::SiteOrder(), quda::ColorSpinorField::SiteSubset(), and quda::ColorSpinorField::V().
void quda::CopyExtendedColorSpinor | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
const int | parity, | ||
const QudaFieldLocation | location, | ||
dstFloat * | Dst, | ||
srcFloat * | Src, | ||
float * | dstNorm = 0 , |
||
float * | srcNorm = 0 |
||
) |
Definition at line 428 of file extended_color_spinor_utilities.cu.
References errorQuda, quda::ColorSpinorField::Nspin(), and parity.
Referenced by copyExtendedColorSpinor().
void quda::copyExtendedColorSpinor | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
const int | parity, | ||
void * | Dst, | ||
void * | Src, | ||
void * | dstNorm, | ||
void * | srcNorm | ||
) |
Definition at line 454 of file extended_color_spinor_utilities.cu.
References CopyExtendedColorSpinor(), errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by quda::XSD::operator()().
void quda::copyExtendedGauge | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
void * | Out = 0 , |
||
void * | In = 0 |
||
) |
This function is used for copying the gauge field into an extended gauge field. Defined in copy_extended_gauge.cu.
out | The extended output field to which we are copying |
in | The input field from which we are copying |
location | The location of where we are doing the copying (CPU or CUDA) |
Out | The output buffer (optional) |
In | The input buffer (optional) |
Definition at line 343 of file copy_gauge_extended.cu.
References copyGaugeEx(), errorQuda, quda::LatticeField::Ndim(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and quda::LatticeField::X().
Referenced by computeGaugeFixingOVRQuda(), computeHISQForceQuda(), quda::cudaGaugeField::copy(), quda::cpuGaugeField::copy(), copyExtendedResidentGaugeQuda(), createExtendedGauge(), quda::cpuGaugeField::Gauge_p(), hisq_force_init(), main(), performWuppertalnStep(), quda::cudaGaugeField::saveCPUField(), and saveGaugeQuda().
|
inline |
Definition at line 61 of file convert.h.
Referenced by convert(), SpinorTexture< RegType, StoreType, N >::load(), SpinorTexture< RegType, StoreType, N >::loadGhost(), Texture< InterType, StoreType >::operator[](), and Spinor< RegType, StoreType, N, write >::save().
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
void quda::copyGauge | ( | const InOrder & | inOrder, |
const GaugeField & | out, | ||
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
FloatOut ** | outGhost, | ||
int | type | ||
) |
Definition at line 7 of file copy_gauge_inc.cu.
References errorQuda, in, quda::GaugeField::isNative(), quda::GaugeField::Order(), out, QUDA_BQCD_GAUGE_ORDER, QUDA_CPS_WILSON_GAUGE_ORDER, QUDA_MILC_GAUGE_ORDER, QUDA_MILC_SITE_GAUGE_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_QDPJIT_GAUGE_ORDER, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_13, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_9, QUDA_RECONSTRUCT_NO, QUDA_STAGGERED_PHASE_MILC, QUDA_STAGGERED_PHASE_NO, QUDA_STAGGERED_PHASE_TIFR, QUDA_TIFR_GAUGE_ORDER, QUDA_TIFR_PADDED_GAUGE_ORDER, quda::GaugeField::Reconstruct(), and quda::GaugeField::StaggeredPhase().
void quda::copyGauge | ( | Arg & | arg | ) |
Generic CPU gauge reordering and packing
Definition at line 32 of file copy_gauge.cuh.
References in, length, nColor, quda::gauge::Ncolor(), out, and parity.
Referenced by copyGauge().
void quda::copyGauge | ( | OutOrder && | outOrder, |
const InOrder & | inOrder, | ||
const GaugeField & | out, | ||
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
int | type | ||
) |
Definition at line 135 of file copy_gauge_helper.cuh.
References quda::CopyGauge< FloatOut, FloatIn, length, Arg >::apply(), arg(), errorQuda, quda::GaugeField::Geometry(), QUDA_COARSE_GEOMETRY, QUDA_CPU_FIELD_LOCATION, QUDA_VECTOR_GEOMETRY, quda::CopyGauge< FloatOut, FloatIn, length, Arg >::set_ghost(), and warningQuda.
void quda::copyGauge | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
FloatIn * | In, | ||
FloatOut ** | outGhost, | ||
FloatIn ** | inGhost, | ||
int | type | ||
) |
Definition at line 144 of file copy_gauge_inc.cu.
References checkMomOrder(), errorQuda, in, quda::GaugeField::isNative(), quda::GaugeField::Order(), out, QUDA_BQCD_GAUGE_ORDER, QUDA_CPS_WILSON_GAUGE_ORDER, QUDA_MILC_GAUGE_ORDER, QUDA_MILC_SITE_GAUGE_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_QDPJIT_GAUGE_ORDER, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_13, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_9, QUDA_RECONSTRUCT_NO, QUDA_STAGGERED_PHASE_MILC, QUDA_STAGGERED_PHASE_NO, QUDA_TIFR_GAUGE_ORDER, QUDA_TIFR_PADDED_GAUGE_ORDER, quda::GaugeField::Reconstruct(), and quda::GaugeField::StaggeredPhase().
void quda::copyGauge | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
FloatIn * | In, | ||
FloatOut ** | outGhost, | ||
FloatIn ** | inGhost, | ||
int | type | ||
) |
Definition at line 284 of file copy_gauge_inc.cu.
References arg(), checkMomOrder(), copyGauge(), copyGenericGauge(), errorQuda, quda::GaugeField::Geometry(), in, quda::GaugeField::LinkType(), quda::GaugeField::Ncolor(), quda::GaugeField::Order(), out, quda::LatticeField::Precision(), QUDA_ASQTAD_MOM_LINKS, QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_GAUGE_ORDER, QUDA_HALF_PRECISION, QUDA_MILC_GAUGE_ORDER, QUDA_MILC_SITE_GAUGE_ORDER, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, QUDA_TIFR_GAUGE_ORDER, QUDA_TIFR_PADDED_GAUGE_ORDER, and QUDA_VECTOR_GEOMETRY.
__device__ __host__ void quda::copyGaugeEx | ( | CopyGaugeExArg< OutOrder, InOrder > & | arg, |
int | X, | ||
int | parity | ||
) |
Copy a regular/extended gauge field into an extended/regular gauge field
Definition at line 50 of file copy_gauge_extended.cu.
References quda::CopyGaugeExArg< OutOrder, InOrder >::geometry, quda::CopyGaugeExArg< OutOrder, InOrder >::in, in, length, nColor, quda::gauge::Ncolor(), quda::CopyGaugeExArg< OutOrder, InOrder >::out, out, parity, R, X, quda::CopyGaugeExArg< OutOrder, InOrder >::Xin, and quda::CopyGaugeExArg< OutOrder, InOrder >::Xout.
Referenced by copyExtendedGauge().
void quda::copyGaugeEx | ( | CopyGaugeExArg< OutOrder, InOrder > | arg | ) |
Definition at line 93 of file copy_gauge_extended.cu.
References arg(), parity, quda::CopyGaugeExArg< OutOrder, InOrder >::volume, and X.
void quda::copyGaugeEx | ( | OutOrder | outOrder, |
const InOrder | inOrder, | ||
const int * | E, | ||
const int * | X, | ||
const int * | faceVolumeCB, | ||
const GaugeField & | meta, | ||
QudaFieldLocation | location | ||
) |
Definition at line 158 of file copy_gauge_extended.cu.
References arg(), checkCudaError, quda::GaugeField::Geometry(), quda::LatticeField::Ndim(), and QUDA_CUDA_FIELD_LOCATION.
void quda::copyGaugeEx | ( | const InOrder & | inOrder, |
const int * | X, | ||
GaugeField & | out, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out | ||
) |
Definition at line 169 of file copy_gauge_extended.cu.
References errorQuda, quda::GaugeField::isNative(), quda::GaugeField::Nface(), quda::GaugeField::Order(), out, QUDA_MAX_DIM, QUDA_MILC_GAUGE_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_13, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_9, QUDA_RECONSTRUCT_NO, QUDA_TIFR_GAUGE_ORDER, quda::GaugeField::Reconstruct(), quda::LatticeField::SurfaceCB(), X, and quda::LatticeField::X().
void quda::copyGaugeEx | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
FloatIn * | In | ||
) |
Definition at line 250 of file copy_gauge_extended.cu.
References errorQuda, in, quda::GaugeField::isNative(), quda::GaugeField::Order(), out, QUDA_MILC_GAUGE_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_13, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_9, QUDA_RECONSTRUCT_NO, QUDA_TIFR_GAUGE_ORDER, quda::GaugeField::Reconstruct(), and quda::LatticeField::X().
void quda::copyGaugeEx | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
FloatIn * | In | ||
) |
Definition at line 324 of file copy_gauge_extended.cu.
References errorQuda, quda::GaugeField::Geometry(), in, quda::GaugeField::LinkType(), quda::GaugeField::Ncolor(), out, and QUDA_ASQTAD_MOM_LINKS.
__global__ void quda::copyGaugeExKernel | ( | CopyGaugeExArg< OutOrder, InOrder > | arg | ) |
Definition at line 102 of file copy_gauge_extended.cu.
References arg(), parity, quda::CopyGaugeExArg< OutOrder, InOrder >::volume, and X.
__global__ void quda::copyGaugeKernel | ( | Arg | arg | ) |
Generic CUDA gauge reordering and packing. Adopts a similar form as the CPU version, using the same inlined functions.
Definition at line 96 of file copy_gauge.cuh.
References in, length, nColor, quda::gauge::Ncolor(), out, and parity.
void quda::copyGaugeMG | ( | const InOrder & | inOrder, |
GaugeField & | out, | ||
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
sFloatOut * | Out, | ||
sFloatOut ** | outGhost, | ||
int | type | ||
) |
Definition at line 10 of file copy_gauge_mg.cu.
References quda::GaugeField::abs_max(), errorQuda, in, quda::GaugeField::isNative(), length, quda::GaugeField::Order(), out, quda::LatticeField::Precision(), QUDA_HALF_PRECISION, QUDA_MILC_GAUGE_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_RECONSTRUCT_NO, quda::GaugeField::Reconstruct(), and quda::LatticeField::Scale().
Referenced by copyGenericGaugeMG().
void quda::copyGaugeMG | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
sFloatOut * | Out, | ||
sFloatIn * | In, | ||
sFloatOut ** | outGhost, | ||
sFloatIn ** | inGhost, | ||
int | type | ||
) |
Definition at line 74 of file copy_gauge_mg.cu.
References errorQuda, in, quda::GaugeField::isNative(), length, quda::GaugeField::Order(), out, QUDA_MILC_GAUGE_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_RECONSTRUCT_NO, and quda::GaugeField::Reconstruct().
void quda::copyGaugeMG | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
FloatIn * | In, | ||
FloatOut ** | outGhost, | ||
FloatIn ** | inGhost, | ||
int | type | ||
) |
Definition at line 126 of file copy_gauge_mg.cu.
References errorQuda, in, quda::GaugeField::Ncolor(), and out.
void quda::copyGenericClover | ( | CloverField & | out, |
const CloverField & | in, | ||
bool | inverse, | ||
QudaFieldLocation | location, | ||
void * | Out = 0 , |
||
void * | In = 0 , |
||
void * | outNorm = 0 , |
||
void * | inNorm = 0 |
||
) |
This generic function is used for copying the clover field where in the input and output can be in any order and location.
out | The output field to which we are copying |
in | The input field from which we are copying |
inverse | Whether we are copying the inverse term or not |
location | The location of where we are doing the copying (CPU or CUDA) |
Out | The output buffer (optional) |
In | The input buffer (optional) |
outNorm | The output norm buffer (optional) |
inNorm | The input norm buffer (optional) |
Definition at line 175 of file copy_clover.cu.
References errorQuda, in, inverse(), quda::CloverField::Order(), out, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by quda::cudaCloverField::copy(), quda::FullClover::FullClover(), and quda::cudaCloverField::saveCPUField().
void quda::copyGenericColorSpinor | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
dstFloat * | Dst, | ||
srcFloat * | Src | ||
) |
Definition at line 130 of file copy_color_spinor_mg.cuh.
References quda::ColorSpinorField::Bytes(), errorQuda, quda::ColorSpinorField::FieldOrder(), quda::ColorSpinorField::Ndim(), QUDA_EVEN_ODD_SITE_ORDER, QUDA_FULL_SITE_SUBSET, QUDA_LEXICOGRAPHIC_SITE_ORDER, QUDA_ODD_EVEN_SITE_ORDER, QUDA_QDPJIT_FIELD_ORDER, quda::ColorSpinorField::SiteOrder(), quda::ColorSpinorField::SiteSubset(), quda::ColorSpinorField::V(), and quda::ColorSpinorField::Volume().
void quda::CopyGenericColorSpinor | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
dstFloat * | Dst, | ||
srcFloat * | Src | ||
) |
Definition at line 184 of file copy_color_spinor_mg.cuh.
References errorQuda, and quda::ColorSpinorField::Nspin().
void quda::copyGenericColorSpinor | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
dstFloat * | Dst, | ||
srcFloat * | Src, | ||
float * | dstNorm, | ||
float * | srcNorm | ||
) |
Definition at line 374 of file copy_color_spinor.cuh.
References errorQuda, quda::ColorSpinorField::FieldOrder(), quda::ColorSpinorField::Ndim(), QUDA_EVEN_ODD_SITE_ORDER, QUDA_FULL_SITE_SUBSET, QUDA_LEXICOGRAPHIC_SITE_ORDER, QUDA_ODD_EVEN_SITE_ORDER, QUDA_QDPJIT_FIELD_ORDER, quda::ColorSpinorField::SiteOrder(), quda::ColorSpinorField::SiteSubset(), and quda::ColorSpinorField::Volume().
void quda::CopyGenericColorSpinor | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
dstFloat * | Dst, | ||
srcFloat * | Src, | ||
float * | dstNorm = 0 , |
||
float * | srcNorm = 0 |
||
) |
Definition at line 409 of file copy_color_spinor.cuh.
References errorQuda, and quda::ColorSpinorField::Nspin().
void quda::copyGenericColorSpinor | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst = 0 , |
||
void * | Src = 0 , |
||
void * | dstNorm = 0 , |
||
void * | srcNorm = 0 |
||
) |
Definition at line 40 of file copy_color_spinor.cu.
References copyGenericColorSpinorDD(), copyGenericColorSpinorDH(), copyGenericColorSpinorDQ(), copyGenericColorSpinorDS(), copyGenericColorSpinorHD(), copyGenericColorSpinorHH(), copyGenericColorSpinorHQ(), copyGenericColorSpinorHS(), copyGenericColorSpinorMGDD(), copyGenericColorSpinorMGDS(), copyGenericColorSpinorMGHH(), copyGenericColorSpinorMGHQ(), copyGenericColorSpinorMGHS(), copyGenericColorSpinorMGQH(), copyGenericColorSpinorMGQQ(), copyGenericColorSpinorMGQS(), copyGenericColorSpinorMGSD(), copyGenericColorSpinorMGSH(), copyGenericColorSpinorMGSQ(), copyGenericColorSpinorMGSS(), copyGenericColorSpinorQD(), copyGenericColorSpinorQH(), copyGenericColorSpinorQQ(), copyGenericColorSpinorQS(), copyGenericColorSpinorSD(), copyGenericColorSpinorSH(), copyGenericColorSpinorSQ(), copyGenericColorSpinorSS(), errorQuda, quda::ColorSpinorField::Ncolor(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, and quda::ColorSpinorField::SiteSubset().
Referenced by quda::cpuColorSpinorField::copy(), quda::cudaColorSpinorField::copySpinorField(), quda::cudaColorSpinorField::loadSpinorField(), and quda::cudaColorSpinorField::saveSpinorField().
void quda::copyGenericColorSpinorDD | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_dd.cu.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorDH | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_dh.cu.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorDQ | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_dq.cu.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorDS | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_ds.cu.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorHD | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_hd.cu.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorHH | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_hh.cu.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorHQ | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_hq.cu.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorHS | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_hs.cu.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorMGDD | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_mg_dd.cu.
References errorQuda, and INSTANTIATE_COLOR.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorMGDS | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_mg_ds.cu.
References errorQuda, and INSTANTIATE_COLOR.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorMGHH | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_mg_hh.cu.
References errorQuda, and INSTANTIATE_COLOR.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorMGHQ | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_mg_hq.cu.
References errorQuda, and INSTANTIATE_COLOR.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorMGHS | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_mg_hs.cu.
References errorQuda, and INSTANTIATE_COLOR.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorMGQH | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_mg_qh.cu.
References errorQuda, and INSTANTIATE_COLOR.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorMGQQ | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_mg_qq.cu.
References errorQuda, and INSTANTIATE_COLOR.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorMGQS | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_mg_qs.cu.
References errorQuda, and INSTANTIATE_COLOR.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorMGSD | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_mg_sd.cu.
References errorQuda, and INSTANTIATE_COLOR.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorMGSH | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_mg_sh.cu.
References errorQuda, and INSTANTIATE_COLOR.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorMGSQ | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_mg_sq.cu.
References errorQuda, and INSTANTIATE_COLOR.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorMGSS | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_mg_ss.cu.
References errorQuda, and INSTANTIATE_COLOR.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorQD | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_qd.cu.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorQH | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_qh.cu.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorQQ | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_qq.cu.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorQS | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_qs.cu.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorSD | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_sd.cu.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorSH | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_sh.cu.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorSQ | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_sq.cu.
Referenced by copyGenericColorSpinor().
void quda::copyGenericColorSpinorSS | ( | ColorSpinorField & | dst, |
const ColorSpinorField & | src, | ||
QudaFieldLocation | location, | ||
void * | Dst, | ||
void * | Src, | ||
void * | a = 0 , |
||
void * | b = 0 |
||
) |
Definition at line 5 of file copy_color_spinor_ss.cu.
Referenced by copyGenericColorSpinor().
void quda::copyGenericGauge | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
void * | Out = 0 , |
||
void * | In = 0 , |
||
void ** | ghostOut = 0 , |
||
void ** | ghostIn = 0 , |
||
int | type = 0 |
||
) |
This function is used for extracting the gauge ghost zone from a gauge field array. Defined in copy_gauge.cu.
out | The output field to which we are copying |
in | The input field from which we are copying |
location | The location of where we are doing the copying (CPU or CUDA) |
Out | The output buffer (optional) |
In | The input buffer (optional) |
ghostOut | The output ghost buffer (optional) |
ghostIn | The input ghost buffer (optional) |
type | The type of copy we doing (0 body and ghost else ghost only) |
Definition at line 41 of file copy_gauge.cu.
References copyGenericGaugeDoubleOut(), copyGenericGaugeHalfOut(), copyGenericGaugeMG(), copyGenericGaugeQuarterOut(), copyGenericGaugeSingleOut(), errorQuda, quda::GaugeField::Geometry(), quda::LatticeField::GhostExchange(), quda::GaugeField::Ncolor(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_GHOST_EXCHANGE_PAD, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by quda::cudaGaugeField::copy(), quda::cpuGaugeField::copy(), copyGauge(), quda::cudaGaugeField::exchangeGhost(), quda::cpuGaugeField::Gauge_p(), quda::cudaGaugeField::injectGhost(), and quda::cudaGaugeField::saveCPUField().
void quda::copyGenericGaugeDoubleOut | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
void * | Out, | ||
void * | In, | ||
void ** | ghostOut, | ||
void ** | ghostIn, | ||
int | type | ||
) |
Definition at line 5 of file copy_gauge_double.cu.
Referenced by copyGenericGauge().
void quda::copyGenericGaugeHalfOut | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
void * | Out, | ||
void * | In, | ||
void ** | ghostOut, | ||
void ** | ghostIn, | ||
int | type | ||
) |
Definition at line 5 of file copy_gauge_half.cu.
References errorQuda, in, and out.
Referenced by copyGenericGauge().
void quda::copyGenericGaugeMG | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
void * | Out, | ||
void * | In, | ||
void ** | ghostOut, | ||
void ** | ghostIn, | ||
int | type | ||
) |
Definition at line 146 of file copy_gauge_mg.cu.
References copyGaugeMG(), errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by copyGenericGauge().
void quda::copyGenericGaugeQuarterOut | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
void * | Out, | ||
void * | In, | ||
void ** | ghostOut, | ||
void ** | ghostIn, | ||
int | type | ||
) |
Definition at line 6 of file copy_gauge_quarter.cu.
References errorQuda, in, and out.
Referenced by copyGenericGauge().
void quda::copyGenericGaugeSingleOut | ( | GaugeField & | out, |
const GaugeField & | in, | ||
QudaFieldLocation | location, | ||
void * | Out, | ||
void * | In, | ||
void ** | ghostOut, | ||
void ** | ghostIn, | ||
int | type | ||
) |
Definition at line 5 of file copy_gauge_single.cu.
References errorQuda, in, and out.
Referenced by copyGenericGauge().
void quda::copyGhost | ( | Arg & | arg | ) |
Generic CPU gauge ghost reordering and packing
Definition at line 126 of file copy_gauge.cuh.
References in, length, nColor, quda::gauge::Ncolor(), out, and parity.
__global__ void quda::copyGhostKernel | ( | Arg | arg | ) |
Generic CUDA kernel for copying the ghost zone. Adopts a similar form as the CPU version, using the same inlined functions.
Definition at line 157 of file copy_gauge.cuh.
References in, length, nColor, quda::gauge::Ncolor(), out, and parity.
__device__ __host__ void quda::copyInterior | ( | CopySpinorExArg< OutOrder, InOrder, Basis > & | arg, |
int | X | ||
) |
Definition at line 166 of file extended_color_spinor_utilities.cu.
References quda::CopySpinorExArg< OutOrder, InOrder, Basis >::basis, quda::CopySpinorExArg< OutOrder, InOrder, Basis >::E, in, quda::CopySpinorExArg< OutOrder, InOrder, Basis >::in, out, quda::CopySpinorExArg< OutOrder, InOrder, Basis >::out, parity, quda::CopySpinorExArg< OutOrder, InOrder, Basis >::parity, R, and quda::CopySpinorExArg< OutOrder, InOrder, Basis >::X.
void quda::copyInterior | ( | CopySpinorExArg< OutOrder, InOrder, Basis > & | arg | ) |
Definition at line 217 of file extended_color_spinor_utilities.cu.
References arg(), and quda::CopySpinorExArg< OutOrder, InOrder, Basis >::length.
__global__ void quda::copyInteriorKernel | ( | CopySpinorExArg< OutOrder, InOrder, Basis > | arg | ) |
Definition at line 203 of file extended_color_spinor_utilities.cu.
References arg(), and quda::CopySpinorExArg< OutOrder, InOrder, Basis >::length.
|
inline |
Definition at line 1088 of file quda_matrix.h.
Referenced by unitarizeLinksCPU().
|
inline |
Definition at line 1102 of file quda_matrix.h.
void quda::copyMom | ( | Arg & | arg, |
const GaugeField & | out, | ||
const GaugeField & | in, | ||
QudaFieldLocation | location | ||
) |
Definition at line 278 of file copy_gauge_inc.cu.
References quda::CopyGauge< FloatOut, FloatIn, length, Arg >::apply().
void quda::copySpinorEx | ( | OutOrder | outOrder, |
const InOrder | inOrder, | ||
const Basis | basis, | ||
const int * | E, | ||
const int * | X, | ||
const int | parity, | ||
const bool | extend, | ||
const ColorSpinorField & | meta, | ||
QudaFieldLocation | location | ||
) |
Definition at line 271 of file extended_color_spinor_utilities.cu.
References quda::CopySpinorEx< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder, Basis, extend >::apply(), arg(), checkCudaError, and QUDA_CUDA_FIELD_LOCATION.
void quda::copySpinorEx | ( | OutOrder | outOrder, |
InOrder | inOrder, | ||
const QudaGammaBasis | outBasis, | ||
const QudaGammaBasis | inBasis, | ||
const int * | E, | ||
const int * | X, | ||
const int | parity, | ||
const bool | extend, | ||
const ColorSpinorField & | meta, | ||
QudaFieldLocation | location | ||
) |
Definition at line 286 of file extended_color_spinor_utilities.cu.
References E, errorQuda, parity, QUDA_DEGRAND_ROSSI_GAMMA_BASIS, QUDA_UKQCD_GAMMA_BASIS, and X.
void quda::corner | ( | T & | p, |
int | v, | ||
int | s, | ||
int | c | ||
) |
Create a corner source with value "v" on color "c" on a single corner overloaded into "s". "s" is encoded via a bitmap: 1010 -> x = 0, y = 1, z = 0, t = 1 corner, for ex.
Definition at line 82 of file color_spinor_util.cu.
References errorQuda, getCoords(), parity, and X.
Referenced by genericSource().
|
inline |
Definition at line 46 of file complex_quda.h.
References cos().
Referenced by cos(), quda::Trig< isFixed, T >::Cos(), cosh(), exponentiate_iQ(), genGauss(), link_sanity_check_internal_8(), new_load_half(), polar(), setUnitarizeLinksConstants(), sin(), sinh(), su3Reconstruct8(), and tan().
|
inline |
|
inline |
Definition at line 1125 of file complex_quda.h.
References quda::complex< float >::imag(), and quda::complex< float >::real().
Referenced by cos().
|
inline |
|
inline |
|
inline |
Definition at line 1141 of file complex_quda.h.
References quda::complex< float >::imag(), and quda::complex< float >::real().
Referenced by cosh().
|
inline |
Definition at line 119 of file covDev.cuh.
References arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::CovDevArg< Float, nColor, reconstruct_ >::out, and quda::DslashArg< Float >::parity.
__global__ void quda::covDevGPU | ( | Arg | arg | ) |
Definition at line 182 of file covDev.cuh.
References arg(), and quda::DslashArg< Float >::parity.
void * quda::create_gauge_buffer | ( | size_t | bytes, |
QudaGaugeFieldOrder | order, | ||
QudaFieldGeometry | geometry | ||
) |
Definition at line 591 of file cuda_gauge_field.cpp.
References quda::GaugeField::geometry, pool_device_malloc, and QUDA_QDP_GAUGE_ORDER.
Referenced by quda::cudaGaugeField::copy(), quda::cpuGaugeField::copy(), quda::cpuGaugeField::exchangeExtendedGhost(), and quda::cudaGaugeField::saveCPUField().
void ** quda::create_ghost_buffer | ( | size_t | bytes[], |
QudaGaugeFieldOrder | order, | ||
QudaFieldGeometry | geometry | ||
) |
Definition at line 602 of file cuda_gauge_field.cpp.
References quda::GaugeField::geometry, and pool_device_malloc.
Referenced by quda::cudaGaugeField::copy(), quda::cpuGaugeField::copy(), quda::cpuGaugeField::exchangeExtendedGhost(), and quda::cudaGaugeField::saveCPUField().
void quda::createDirac | ( | Dirac *& | d, |
Dirac *& | dSloppy, | ||
Dirac *& | dPre, | ||
QudaInvertParam & | param, | ||
const bool | pc_solve | ||
) |
Definition at line 1730 of file interface_quda.cpp.
References quda::Dirac::create(), QudaInvertParam_s::inv_type, QUDA_INC_EIGCG_INVERTER, setDiracParam(), setDiracPreParam(), and setDiracSloppyParam().
Referenced by eigensolveQuda(), invertMultiShiftQuda(), invertMultiSrcQuda(), and invertQuda().
void quda::createDirac | ( | Dirac *& | d, |
Dirac *& | dSloppy, | ||
Dirac *& | dPre, | ||
Dirac *& | dRef, | ||
QudaInvertParam & | param, | ||
const bool | pc_solve | ||
) |
Definition at line 1747 of file interface_quda.cpp.
References quda::Dirac::create(), QudaInvertParam_s::inv_type, QUDA_INC_EIGCG_INVERTER, setDiracParam(), setDiracPreParam(), setDiracRefineParam(), and setDiracSloppyParam().
void quda::createDslashEvents | ( | ) |
Definition at line 95 of file dslash_quda.cu.
References quda::dslash::aux_worker, checkCudaError, quda::dslash::dslash_comms, quda::dslash::dslash_copy, quda::dslash::dslash_exterior_compute, quda::dslash::dslash_interior_compute, quda::dslash::dslash_pack_compute, quda::dslash::dslash_policy_init, quda::dslash::dslashStart, quda::dslash::first_active_p2p_policy, quda::dslash::first_active_policy, quda::dslash::gatherEnd, quda::dslash::gatherStart, mapped_malloc, Nstream, quda::dslash::p2p_policies, quda::dslash::packEnd, quda::dslash::policies, quda::dslash::policy_string, quda::dslash::QUDA_DSLASH_POLICY_DISABLED, quda::dslash::QUDA_P2P_POLICY_DISABLED, quda::dslash::scatterEnd, and quda::dslash::scatterStart.
Referenced by initQudaMemory().
|
inline |
Definition at line 104 of file convert.h.
Referenced by convert< short2, double4 >(), convert< short4, double2 >(), and copyFloatN().
|
static |
Deserialize tunecache from an istream, useful for reading a file or receiving from other nodes.
Definition at line 134 of file tune.cpp.
References quda::TuneKey::aux, quda::TuneParam::aux, quda::TuneKey::aux_n, quda::TuneParam::block, quda::TuneParam::comment, errorQuda, quda::TuneParam::grid, quda::TraceKey::key, quda::TuneKey::name, quda::TuneKey::name_n, param, quda::TuneParam::shared_bytes, quda::TuneParam::time, quda::TuneKey::volume, and quda::TuneKey::volume_n.
Referenced by broadcastTuneCache(), and loadTuneCache().
void quda::destroyDslashEvents | ( | ) |
Definition at line 144 of file dslash_quda.cu.
References checkCudaError, quda::dslash::dslashStart, quda::dslash::gatherEnd, quda::dslash::gatherStart, host_free, Nstream, quda::dslash::packEnd, quda::dslash::scatterEnd, and quda::dslash::scatterStart.
Referenced by endQuda().
long quda::device_allocated_peak | ( | ) |
void quda::device_free_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
void * | ptr | ||
) |
Free device memory allocated with device_malloc(). This function should only be called via the device_free() macro, defined in malloc_quda.h
Definition at line 301 of file malloc.cpp.
References count, DEVICE, device_pinned_free_(), errorQuda, and track_free().
Referenced by quda::pool::device_free_(), quda::pool::device_malloc_(), and device_pinned_free_().
void * quda::device_malloc_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
size_t | size | ||
) |
Perform a standard cudaMalloc() with error-checking. This function should only be called via the device_malloc() macro, defined in malloc_quda.h
Definition at line 169 of file malloc.cpp.
References quda::MemAlloc::base_size, DEVICE, device_pinned_malloc_(), errorQuda, quda::MemAlloc::size, and track_malloc().
Referenced by quda::pool::device_malloc_(), and device_pinned_malloc_().
void quda::device_pinned_free_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
void * | ptr | ||
) |
Free device memory allocated with device_pinned malloc(). This function should only be called via the device_pinned_free() macro, defined in malloc_quda.h
Definition at line 322 of file malloc.cpp.
References comm_peer2peer_present(), count, device_free_(), DEVICE_PINNED, errorQuda, printfQuda, and track_free().
Referenced by device_free_().
void * quda::device_pinned_malloc_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
size_t | size | ||
) |
Perform a cuMemAlloc with error-checking. This function is to guarantee a unique memory allocation on the device, since cudaMalloc can be redirected (as is the case with QDPJIT). This should only be called via the device_pinned_malloc() macro, defined in malloc_quda.h.
Definition at line 200 of file malloc.cpp.
References quda::MemAlloc::base_size, comm_peer2peer_present(), device_malloc_(), DEVICE_PINNED, errorQuda, quda::MemAlloc::size, and track_malloc().
Referenced by device_malloc_().
|
inline |
Determines which face a given thread is computing. Also rescale face_idx so that is relative to a given dimension. If 5-d variant if called, then it is assumed that arg.threads contains only the 3-d surface of threads but face_idx is a 4-d index (surface * fifth dimension). At present multi-src staggered uses the 4-d variant since the face_idx that is passed in is the 3-d surface not the 4-d one.
[out] | face_idx | Face index |
[in] | tid | Checkerboard volume index |
[in] | arg | Input parameters |
Definition at line 783 of file index_helper.cuh.
References s.
Referenced by packKernel(), and packStaggeredKernel().
|
inline |
Definition at line 809 of file index_helper.cuh.
References arg().
void quda::disable_policy | ( | DslashCoarsePolicy | p | ) |
Definition at line 606 of file dslash_coarse.cu.
References DSLASH_COARSE_POLICY_DISABLED, and policies().
void quda::disableProfileCount | ( | ) |
Disable the profile kernel counting.
Definition at line 125 of file tune.cpp.
Referenced by quda::DslashCoarsePolicyTune::DslashCoarsePolicyTune(), quda::dslash::DslashPolicyTune< Dslash >::DslashPolicyTune(), quda::TunableVectorYZ::resizeStep(), and quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::TileSizeTune().
|
inline |
Helper function to determine if we should do interior computation.
[in] | dim | Dimension we are working on |
Definition at line 35 of file dslash_helper.cuh.
References EXTERIOR_KERNEL_ALL, EXTERIOR_KERNEL_T, EXTERIOR_KERNEL_X, EXTERIOR_KERNEL_Y, EXTERIOR_KERNEL_Z, and INTERIOR_KERNEL.
|
static |
Helper function to determine if should interior computation.
Definition at line 72 of file dslash_coarse.cuh.
References DSLASH_FULL, DSLASH_INTERIOR, and s.
|
inline |
Helper function to determine if we should do halo computation.
[in] | dim | Dimension we are working on. If dim=-1 (default argument) then we return true if type is any halo kernel. |
Definition at line 17 of file dslash_helper.cuh.
References EXTERIOR_KERNEL_ALL, EXTERIOR_KERNEL_T, EXTERIOR_KERNEL_X, EXTERIOR_KERNEL_Y, EXTERIOR_KERNEL_Z, and INTERIOR_KERNEL.
|
static |
Helper function to determine if should halo computation.
Definition at line 58 of file dslash_coarse.cuh.
References DSLASH_EXTERIOR, and DSLASH_FULL.
|
inline |
Definition at line 44 of file dslash_domain_wall_4d.cuh.
References arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, quda::DslashArg< Float >::nParity, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, s, quda::WilsonArg< Float, nColor, reconstruct_ >::x, and quda::DslashArg< Float >::xpay.
void quda::domainWall4DCPU | ( | Arg & | arg | ) |
Definition at line 74 of file dslash_domain_wall_4d.cuh.
References arg(), quda::DslashArg< Float >::nParity, quda::DslashArg< Float >::parity, and s.
__global__ void quda::domainWall4DGPU | ( | Arg | arg | ) |
Definition at line 90 of file dslash_domain_wall_4d.cuh.
References arg(), quda::DslashArg< Float >::nParity, quda::DslashArg< Float >::parity, and s.
|
inline |
Definition at line 28 of file dslash_domain_wall_5d.cuh.
References arg(), quda::DslashArg< Float >::dagger, EXTERIOR_KERNEL_ALL, quda::WilsonArg< Float, nColor, reconstruct_ >::in, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, quda::DslashArg< Float >::nParity, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, quda::WilsonArg< Float, nColor, reconstruct_ >::reconstruct, s, quda::WilsonArg< Float, nColor, reconstruct_ >::x, and quda::DslashArg< Float >::xpay.
void quda::domainWall5DCPU | ( | Arg & | arg | ) |
Definition at line 86 of file dslash_domain_wall_5d.cuh.
References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.
__global__ void quda::domainWall5DGPU | ( | Arg | arg | ) |
Definition at line 100 of file dslash_domain_wall_5d.cuh.
References arg(), quda::DslashArg< Float >::nParity, quda::DslashArg< Float >::parity, and s.
|
inline |
Apply the D5 operator at given site.
[in] | arg | Argument struct containing any meta data and accessors |
[in] | parity | Parity we are on |
[in] | x_b | Checkerboarded 4-d space-time index |
[in] | s | Ls dimension coordinate |
Definition at line 191 of file dslash_domain_wall_m5.cuh.
References quda::coeff_type< real, is_variable, Arg >::a(), quda::coeff_type< real, is_variable, Arg >::b(), quda::coeff_type< real, is_variable, Arg >::c(), dagger, DSLASH5_DWF, DSLASH5_MOBIUS, DSLASH5_MOBIUS_PRE, in, out, and quda::blas::xpay().
void quda::dslash5CPU | ( | Arg & | arg | ) |
CPU kernel for applying the D5 operator.
[in] | arg | Argument struct containing any meta data and accessors |
Definition at line 250 of file dslash_domain_wall_m5.cuh.
References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, and s.
__global__ void quda::dslash5GPU | ( | Arg | arg | ) |
GPU kernel for applying the D5 operator.
[in] | arg | Argument struct containing any meta data and accessors |
Definition at line 266 of file dslash_domain_wall_m5.cuh.
References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, and s.
|
inline |
Apply the M5 inverse operator at a given site on the lattice.
shared | Whether to use a shared memory scratch pad to store the input field across the Ls dimension to minimize global memory reads. |
[in] | arg | Argument struct containing any meta data and accessors |
[in] | parity | Parity we are on |
[in] | x_b | Checkerboarded 4-d space-time index |
[in] | s | Ls dimension coordinate |
Definition at line 433 of file dslash_domain_wall_m5.cuh.
References quda::coeff_type< real, is_variable, Arg >::a(), arg(), out, parity, s, and quda::blas::xpay().
__global__ void quda::dslash5invGPU | ( | Arg | arg | ) |
CPU kernel for applying the M5 inverse operator.
shared | Whether to use a shared memory scratch pad to store the input field acroos the Ls dimension to minimize global memory reads. |
[in] | arg | Argument struct containing any meta data and accessors |
Definition at line 463 of file dslash_domain_wall_m5.cuh.
References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, and s.
void quda::enable_policy | ( | DslashCoarsePolicy | p | ) |
Definition at line 602 of file dslash_coarse.cu.
References policies().
Referenced by quda::DslashCoarsePolicyTune::DslashCoarsePolicyTune().
void quda::enableProfileCount | ( | ) |
Enable the profile kernel counting.
Definition at line 126 of file tune.cpp.
Referenced by quda::DslashCoarsePolicyTune::DslashCoarsePolicyTune(), quda::dslash::DslashPolicyTune< Dslash >::DslashPolicyTune(), quda::TunableVectorYZ::resizeStep(), and quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::TileSizeTune().
__device__ __host__ double quda::ErrorSU3 | ( | const Matrix< Cmplx, 3 > & | matrix | ) |
Definition at line 1164 of file quda_matrix.h.
References conj(), and norm().
Referenced by computeOvrImpSTOUTStep(), and computeSTOUTStep().
void quda::exchangeExtendedGhost | ( | cudaColorSpinorField * | spinor, |
int | R[], | ||
int | parity, | ||
cudaStream_t * | stream_p | ||
) |
Definition at line 25 of file extended_color_spinor_utilities.cu.
References commDim, quda::cudaColorSpinorField::commsQuery(), quda::cudaColorSpinorField::commsStart(), dagger, quda::cudaColorSpinorField::gather(), quda::dslash::gatherEnd, quda::cudaColorSpinorField::packExtended(), qudaDeviceSynchronize, qudaEventRecord(), quda::cudaColorSpinorField::scatterExtended(), and streams.
Referenced by quda::GaugeField::Nface(), and quda::XSD::operator()().
|
inline |
Definition at line 96 of file complex_quda.h.
References exp().
Referenced by constantInv(), exp(), expsu3(), pow(), smallSVD(), tanh(), and test().
|
inline |
Definition at line 1150 of file complex_quda.h.
References exp(), and polar().
|
inline |
Definition at line 1156 of file complex_quda.h.
References quda::complex< float >::imag(), polar(), and quda::complex< float >::real().
Referenced by exp().
|
inline |
Definition at line 1191 of file quda_matrix.h.
References acos(), cos(), getDeterminant(), getTrace(), parity, pow(), setIdentity(), setZero(), sin(), and sqrt().
Referenced by computeOvrImpSTOUTStep(), and computeSTOUTStep().
__device__ __host__ void quda::expsu3 | ( | Matrix< complex< Float >, 3 > & | q | ) |
void quda::extendedCopyColorSpinor | ( | InOrder & | inOrder, |
ColorSpinorField & | out, | ||
QudaGammaBasis | inBasis, | ||
const int * | E, | ||
const int * | X, | ||
const int | parity, | ||
const bool | extend, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
float * | outNorm | ||
) |
Definition at line 313 of file extended_color_spinor_utilities.cu.
References E, errorQuda, quda::ColorSpinorField::GammaBasis(), quda::ColorSpinorField::isNative(), out, parity, and X.
void quda::extendedCopyColorSpinor | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const int | parity, | ||
const QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
FloatIn * | In, | ||
float * | outNorm, | ||
float * | inNorm | ||
) |
Definition at line 329 of file extended_color_spinor_utilities.cu.
References E, errorQuda, quda::ColorSpinorField::GammaBasis(), quda::ColorSpinorField::isNative(), out, parity, quda::ColorSpinorField::Volume(), X, and quda::ColorSpinorField::X().
void quda::extractExtendedGaugeGhost | ( | const GaugeField & | u, |
int | dim, | ||
const int * | R, | ||
void ** | ghost, | ||
bool | extract | ||
) |
This function is used for extracting the gauge ghost zone from a gauge field array. Defined in extract_gauge_ghost.cu.
u | The gauge field from which we want to extract/pack the ghost zone |
dim | The dimension in which we are packing/unpacking |
ghost | The array where we want to pack/unpack the ghost zone into/from |
extract | Whether we are extracting into ghost or injecting from ghost |
Definition at line 418 of file extract_gauge_ghost_extended.cu.
References errorQuda, extractGhostEx(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by quda::cudaGaugeField::exchangeExtendedGhost(), quda::cpuGaugeField::exchangeExtendedGhost(), and quda::cpuGaugeField::Gauge_p().
void quda::extractGaugeGhost | ( | const GaugeField & | u, |
void ** | ghost, | ||
bool | extract = true , |
||
int | offset = 0 |
||
) |
This function is used for extracting the gauge ghost zone from a gauge field array. Defined in extract_gauge_ghost.cu.
u | The gauge field from which we want to extract the ghost zone |
ghost | The array where we want to pack the ghost zone into |
extract | Where we are extracting into ghost or injecting from ghost |
offset | By default we exchange the nDim site-vector of links in the first nDim dimensions; offset allows us to instead exchange the links in nDim+offset dimensions. This is used to faciliate sending bi-directional links which is needed for the coarse links. |
Definition at line 105 of file extract_gauge_ghost.cu.
References errorQuda, extractGaugeGhostMG(), extractGhost(), quda::GaugeField::Ncolor(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by quda::cudaGaugeField::exchangeGhost(), quda::cpuGaugeField::exchangeGhost(), quda::cpuGaugeField::Gauge_p(), quda::cudaGaugeField::injectGhost(), and quda::cpuGaugeField::injectGhost().
void quda::extractGaugeGhostMG | ( | const GaugeField & | u, |
void ** | ghost, | ||
bool | extract, | ||
int | offset | ||
) |
Definition at line 75 of file extract_gauge_ghost_mg.cu.
References errorQuda, extractGhostMG(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by extractGaugeGhost(), and extractGhost().
void quda::extractGhost | ( | const GaugeField & | u, |
Float ** | Ghost, | ||
bool | extract, | ||
int | offset | ||
) |
This is the template driver for extractGhost
Definition at line 10 of file extract_gauge_ghost.cu.
References errorQuda, extractGaugeGhostMG(), quda::GaugeField::isNative(), length, quda::GaugeField::Order(), QUDA_BQCD_GAUGE_ORDER, QUDA_CPS_WILSON_GAUGE_ORDER, QUDA_CPU_FIELD_LOCATION, QUDA_CUDA_FIELD_LOCATION, QUDA_MILC_GAUGE_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_QDPJIT_GAUGE_ORDER, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_13, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_9, QUDA_RECONSTRUCT_NO, QUDA_STAGGERED_PHASE_MILC, QUDA_STAGGERED_PHASE_NO, QUDA_TIFR_GAUGE_ORDER, QUDA_TIFR_PADDED_GAUGE_ORDER, quda::GaugeField::Reconstruct(), and quda::GaugeField::StaggeredPhase().
Referenced by extractGaugeGhost().
void quda::extractGhost | ( | Arg & | arg | ) |
Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions
Definition at line 46 of file extract_gauge_ghost_helper.cuh.
void quda::extractGhost | ( | Order | order, |
const GaugeField & | u, | ||
QudaFieldLocation | location, | ||
bool | extract, | ||
int | offset | ||
) |
Generic gauge ghost extraction and packing (or the converse) NB This routines is specialized to four dimensions
Definition at line 236 of file extract_gauge_ghost_helper.cuh.
References quda::ExtractGhost< nDim, Arg >::apply(), arg(), commDim, extractor(), X, and quda::LatticeField::X().
void quda::extractGhostEx | ( | ExtractGhostExArg< Order, nDim, dim > | arg | ) |
Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions
Definition at line 100 of file extract_gauge_ghost_extended.cu.
References quda::ExtractGhostExArg< Order, nDim, dim >::A0, quda::ExtractGhostExArg< Order, nDim, dim >::A1, arg(), quda::ExtractGhostExArg< Order, nDim, dim >::B0, quda::ExtractGhostExArg< Order, nDim, dim >::B1, quda::ExtractGhostExArg< Order, nDim, dim >::C0, quda::ExtractGhostExArg< Order, nDim, dim >::C1, quda::ExtractGhostExArg< Order, nDim, dim >::order, parity, quda::ExtractGhostExArg< Order, nDim, dim >::R, and quda::ExtractGhostExArg< Order, nDim, dim >::X.
Referenced by extractExtendedGaugeGhost().
void quda::extractGhostEx | ( | Order | order, |
const int | dim, | ||
const int * | surfaceCB, | ||
const int * | E, | ||
const int * | R, | ||
bool | extract, | ||
const GaugeField & | u, | ||
QudaFieldLocation | location | ||
) |
Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions
E | the extended gauge dimensions |
R | array holding the radius of the extended region |
extract | Whether we are extracting or injecting the ghost zone |
Definition at line 258 of file extract_gauge_ghost_extended.cu.
References quda::ExtractGhostEx< Float, length, nDim, dim, Order >::apply(), arg(), checkCudaError, commDim, errorQuda, extractor(), and X.
void quda::extractGhostEx | ( | const GaugeField & | u, |
int | dim, | ||
const int * | R, | ||
Float ** | Ghost, | ||
bool | extract | ||
) |
This is the template driver for extractGhost
Definition at line 330 of file extract_gauge_ghost_extended.cu.
References errorQuda, quda::GaugeField::isNative(), length, quda::GaugeField::Order(), QUDA_BQCD_GAUGE_ORDER, QUDA_CPS_WILSON_GAUGE_ORDER, QUDA_CPU_FIELD_LOCATION, QUDA_CUDA_FIELD_LOCATION, QUDA_MILC_GAUGE_ORDER, QUDA_QDP_GAUGE_ORDER, QUDA_QDPJIT_GAUGE_ORDER, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_13, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_9, QUDA_RECONSTRUCT_NO, QUDA_TIFR_GAUGE_ORDER, R, quda::GaugeField::Reconstruct(), quda::LatticeField::SurfaceCB(), and quda::LatticeField::X().
__global__ void quda::extractGhostExKernel | ( | ExtractGhostExArg< Order, nDim, dim > | arg | ) |
Generic GPU gauge ghost extraction and packing NB This routines is specialized to four dimensions FIXME this implementation will have two-way warp divergence Generic CPU gauge ghost extraction and packing NB This routines is specialized to four dimensions
Definition at line 144 of file extract_gauge_ghost_extended.cu.
References quda::ExtractGhostExArg< Order, nDim, dim >::A0, quda::ExtractGhostExArg< Order, nDim, dim >::A1, arg(), quda::ExtractGhostExArg< Order, nDim, dim >::B0, quda::ExtractGhostExArg< Order, nDim, dim >::B1, quda::ExtractGhostExArg< Order, nDim, dim >::C0, quda::ExtractGhostExArg< Order, nDim, dim >::C1, parity, quda::ExtractGhostExArg< Order, nDim, dim >::R, quda::ExtractGhostExArg< Order, nDim, dim >::threads, quda::ExtractGhostExArg< Order, nDim, dim >::X, and X.
__global__ void quda::extractGhostKernel | ( | Arg | arg | ) |
Generic GPU gauge ghost extraction and packing NB This routines is specialized to four dimensions FIXME this implementation will have two-way warp divergence
Definition at line 114 of file extract_gauge_ghost_helper.cuh.
void quda::extractGhostMG | ( | const GaugeField & | u, |
storeFloat ** | Ghost, | ||
bool | extract, | ||
int | offset | ||
) |
This is the template driver for extractGhost
Definition at line 15 of file extract_gauge_ghost_mg.cu.
References errorQuda, quda::GaugeField::isNative(), length, quda::GaugeField::Order(), QUDA_CPU_FIELD_LOCATION, QUDA_CUDA_FIELD_LOCATION, QUDA_QDP_GAUGE_ORDER, QUDA_RECONSTRUCT_NO, and quda::GaugeField::Reconstruct().
Referenced by extractGaugeGhostMG().
void quda::extractGhostMG | ( | const GaugeField & | u, |
Float ** | Ghost, | ||
bool | extract, | ||
int | offset | ||
) |
This is the template driver for extractGhost
Definition at line 54 of file extract_gauge_ghost_mg.cu.
References errorQuda, quda::GaugeField::LinkType(), quda::GaugeField::Ncolor(), QUDA_COARSE_LINKS, QUDA_RECONSTRUCT_NO, and quda::GaugeField::Reconstruct().
__device__ __host__ void quda::extractor | ( | Arg & | arg, |
int | dir, | ||
int | a, | ||
int | b, | ||
int | c, | ||
int | d, | ||
int | g, | ||
int | parity | ||
) |
Definition at line 56 of file extract_gauge_ghost_extended.cu.
References quda::Matrix< T, N >::data, length, and quda::gauge::Ncolor().
Referenced by extractGhost(), and extractGhostEx().
|
inline |
Definition at line 93 of file convert.h.
Referenced by convert< short2, float4 >(), convert< short4, float2 >(), copy(), copy_scaled(), and copyFloatN().
void quda::fatLongKSLink | ( | cudaGaugeField * | fat, |
cudaGaugeField * | lng, | ||
const cudaGaugeField & | gauge, | ||
const double * | coeff | ||
) |
Compute the fat and long links for an improved staggered (Kogut-Susskind) fermions.
fat[out] | The computed fat link |
lng[out] | The computed long link (only computed if lng!=0) |
u[in] | The input gauge field |
coeff[in] | Array of path coefficients |
Definition at line 532 of file llfat_quda.cu.
References checkCudaError, computeStaple(), quda::GaugeFieldParam::create, errorQuda, gParam, MIN_COEFF, quda::LatticeFieldParam::Precision(), QUDA_NULL_FIELD_CREATE, QUDA_RECONSTRUCT_NO, qudaDeviceSynchronize, quda::GaugeFieldParam::reconstruct, quda::GaugeField::Reconstruct(), quda::GaugeFieldParam::setPrecision(), and quda::LatticeField::X().
Referenced by computeKSLinkQuda().
|
inline |
Definition at line 50 of file malloc_quda.h.
References get_pointer_location(), r_slant(), str_end(), and str_slant().
|
static |
Definition at line 210 of file inv_eigcg_quda.cpp.
References quda::SolverParam::delta, quda::SolverParam::gflops, quda::SolverParam::inv_type, quda::SolverParam::inv_type_precondition, quda::SolverParam::is_preconditioner, quda::SolverParam::iter, quda::SolverParam::maxiter, quda::SolverParam::maxiter_precondition, quda::SolverParam::precision, quda::SolverParam::precision_precondition, quda::SolverParam::precision_sloppy, quda::SolverParam::preserve_source, QUDA_EIGCG_INVERTER, QUDA_INVALID_INVERTER, QUDA_PRESERVE_SOURCE_NO, QUDA_PRESERVE_SOURCE_YES, quda::SolverParam::secs, quda::SolverParam::tol, quda::SolverParam::tol_precondition, and quda::SolverParam::use_sloppy_partial_accumulator.
Referenced by quda::IncEigCG::IncEigCG().
void quda::fillFGMResDRInnerSolveParam | ( | SolverParam & | inner, |
const SolverParam & | outer | ||
) |
Definition at line 187 of file inv_gmresdr_quda.cpp.
References quda::SolverParam::delta, quda::SolverParam::gflops, quda::SolverParam::global_reduction, quda::SolverParam::inv_type, quda::SolverParam::inv_type_precondition, quda::SolverParam::is_preconditioner, quda::SolverParam::iter, quda::SolverParam::maxiter, quda::SolverParam::maxiter_precondition, quda::SolverParam::precision, quda::SolverParam::precision_precondition, quda::SolverParam::precision_sloppy, quda::SolverParam::preserve_source, QUDA_INVALID_INVERTER, QUDA_PRESERVE_SOURCE_NO, QUDA_PRESERVE_SOURCE_YES, quda::SolverParam::secs, quda::SolverParam::tol, quda::SolverParam::tol_precondition, and warningQuda.
Referenced by quda::GMResDR::GMResDR().
|
static |
Definition at line 233 of file inv_eigcg_quda.cpp.
References quda::SolverParam::delta, quda::SolverParam::gflops, quda::SolverParam::inv_type, quda::SolverParam::iter, quda::SolverParam::maxiter, quda::SolverParam::precision, quda::SolverParam::precision_precondition, quda::SolverParam::precision_sloppy, QUDA_CG_INVERTER, QUDA_USE_INIT_GUESS_YES, quda::SolverParam::secs, quda::SolverParam::tol, quda::SolverParam::tol_restart, quda::SolverParam::use_init_guess, and quda::SolverParam::use_sloppy_partial_accumulator.
Referenced by quda::IncEigCG::IncEigCG().
void quda::fillInnerSolveParam | ( | SolverParam & | inner, |
const SolverParam & | outer | ||
) |
Definition at line 25 of file inv_gcr_quda.cpp.
References quda::SolverParam::compute_true_res, quda::SolverParam::delta, quda::SolverParam::gflops, quda::SolverParam::global_reduction, quda::SolverParam::inv_type_precondition, quda::SolverParam::is_preconditioner, quda::SolverParam::iter, quda::SolverParam::maxiter, quda::SolverParam::maxiter_precondition, quda::SolverParam::Nkrylov, quda::SolverParam::Nsteps, quda::SolverParam::precision, quda::SolverParam::precision_precondition, quda::SolverParam::precision_sloppy, quda::SolverParam::precondition_cycle, quda::SolverParam::preserve_source, QUDA_CA_GCR_INVERTER, QUDA_INVALID_INVERTER, QUDA_INVALID_RESIDUAL, QUDA_INVALID_SCHWARZ, QUDA_L2_RELATIVE_RESIDUAL, QUDA_MR_INVERTER, QUDA_PRESERVE_SOURCE_YES, QUDA_USE_INIT_GUESS_NO, quda::SolverParam::residual_type, quda::SolverParam::schwarz_type, quda::SolverParam::secs, quda::SolverParam::sloppy_converge, quda::SolverParam::tol, quda::SolverParam::tol_precondition, quda::SolverParam::use_init_guess, and quda::SolverParam::verbosity_precondition.
Referenced by quda::GCR::GCR(), and quda::BiCGstab::operator()().
|
static |
Definition at line 18 of file inv_pcg_quda.cpp.
References quda::SolverParam::delta, quda::SolverParam::gflops, quda::SolverParam::inv_type, quda::SolverParam::inv_type_precondition, quda::SolverParam::is_preconditioner, quda::SolverParam::iter, quda::SolverParam::maxiter, quda::SolverParam::maxiter_precondition, quda::SolverParam::precision, quda::SolverParam::precision_precondition, quda::SolverParam::precision_sloppy, quda::SolverParam::preserve_source, QUDA_INVALID_INVERTER, QUDA_PCG_INVERTER, QUDA_PRESERVE_SOURCE_NO, QUDA_PRESERVE_SOURCE_YES, quda::SolverParam::secs, quda::SolverParam::tol, and quda::SolverParam::tol_precondition.
Referenced by quda::PreconCG::PreconCG().
void quda::flushForceMonitor | ( | ) |
Flush any outstanding force monitoring information.
Definition at line 29 of file momentum.cu.
References comm_rank(), count, forceMonitor(), getVerbosity(), printfQuda, and QUDA_VERBOSE.
Referenced by endQuda(), and forceRecord().
void quda::flushProfile | ( | ) |
Flush profile contents, setting all counts to zero.
Definition at line 504 of file tune.cpp.
References quda::TuneParam::n_calls, and param.
Referenced by newDeflationQuda(), and quda::TunableVectorYZ::resizeStep().
bool quda::forceMonitor | ( | ) |
Whether we are monitoring the force or not.
Definition at line 13 of file momentum.cu.
References quda::cublas::init().
Referenced by computeGaugeForceQuda(), computeMomAction(), and flushForceMonitor().
void quda::forceRecord | ( | double2 & | force, |
double | dt, | ||
const char * | fname | ||
) |
Definition at line 57 of file momentum.cu.
References arg(), quda::blas::bytes, comm_allreduce(), comm_allreduce_max_array(), comm_rank(), computeMomAction(), errorQuda, quda::blas::flops, flushForceMonitor(), getTuning(), getVerbosity(), LAUNCH_KERNEL_LOCAL_PARITY, quda::LatticeField::Location(), mu, quda::GaugeField::Order(), parity, QUDA_CUDA_FIELD_LOCATION, QUDA_FLOAT2_GAUGE_ORDER, QUDA_RECONSTRUCT_10, qudaDeviceSynchronize, quda::GaugeField::Reconstruct(), stream, tuneLaunch(), quda::LatticeField::VolString(), quda::LatticeField::VolumeCB(), X, and quda::LatticeField::X().
Referenced by computeMomAction().
void quda::free_gauge_buffer | ( | void * | buffer, |
QudaGaugeFieldOrder | order, | ||
QudaFieldGeometry | geometry | ||
) |
Definition at line 614 of file cuda_gauge_field.cpp.
References quda::GaugeField::geometry, pool_device_free, and QUDA_QDP_GAUGE_ORDER.
Referenced by quda::cudaGaugeField::copy(), quda::cpuGaugeField::copy(), quda::cpuGaugeField::exchangeExtendedGhost(), and quda::cudaGaugeField::saveCPUField().
void quda::free_ghost_buffer | ( | void ** | buffer, |
QudaGaugeFieldOrder | order, | ||
QudaFieldGeometry | geometry | ||
) |
Definition at line 623 of file cuda_gauge_field.cpp.
References quda::GaugeField::geometry, and pool_device_free.
Referenced by quda::cudaGaugeField::copy(), quda::cpuGaugeField::copy(), quda::cpuGaugeField::exchangeExtendedGhost(), and quda::cudaGaugeField::saveCPUField().
void quda::gamma5 | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in | ||
) |
Applies a gamma5 matrix to a spinor (wrapper to ApplyGamma)
[out] | out | Output field |
[in] | in | Input field |
Definition at line 461 of file dslash_quda.cu.
References ApplyGamma().
Referenced by computeCloverForceQuda().
void quda::gammaCPU | ( | Arg | arg | ) |
Definition at line 225 of file dslash_quda.cu.
References in, quda::Arg< real, Ns, Nc, order >::nParity, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.
__global__ void quda::gammaGPU | ( | Arg | arg | ) |
Definition at line 240 of file dslash_quda.cu.
References in, quda::Arg< real, Ns, Nc, order >::nParity, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.
__forceinline__ __device__ void quda::GaugeFixHit_AtomicAdd | ( | Matrix< complex< Float >, NCOLORS > & | link, |
const Float | relax_boost, | ||
const int | tid | ||
) |
Device function to perform gauge fixing with overrelxation. Uses 8 treads per lattice site, the reduction is performed by shared memory without using atomicadd. This implementation needs 8x more shared memory than the implementation using atomicadd
Definition at line 69 of file gauge_fix_ovr_hit_devf.cuh.
References atomicAdd().
__forceinline__ __device__ void quda::GaugeFixHit_AtomicAdd | ( | Matrix< complex< Float >, NCOLORS > & | link, |
Matrix< complex< Float >, NCOLORS > & | link1, | ||
const Float | relax_boost, | ||
const int | tid | ||
) |
Device function to perform gauge fixing with overrelxation. Uses 8 treads per lattice site, the reduction is performed by shared memory without using atomicadd. This implementation needs 8x more shared memory than the implementation using atomicadd
Definition at line 392 of file gauge_fix_ovr_hit_devf.cuh.
References atomicAdd().
__forceinline__ __device__ void quda::GaugeFixHit_NoAtomicAdd | ( | Matrix< complex< Float >, NCOLORS > & | link, |
const Float | relax_boost, | ||
const int | tid | ||
) |
Device function to perform gauge fixing with overrelxation. Uses 4 treads per lattice site, the reduction is performed by shared memory using atomicadd.
Definition at line 159 of file gauge_fix_ovr_hit_devf.cuh.
__forceinline__ __device__ void quda::GaugeFixHit_NoAtomicAdd | ( | Matrix< complex< Float >, NCOLORS > & | link, |
Matrix< complex< Float >, NCOLORS > & | link1, | ||
const Float | relax_boost, | ||
const int | tid | ||
) |
Device function to perform gauge fixing with overrelxation. Uses 4 treads per lattice site, the reduction is performed by shared memory using atomicadd.
Definition at line 486 of file gauge_fix_ovr_hit_devf.cuh.
__forceinline__ __device__ void quda::GaugeFixHit_NoAtomicAdd_LessSM | ( | Matrix< complex< Float >, NCOLORS > & | link, |
const Float | relax_boost, | ||
const int | tid | ||
) |
Device function to perform gauge fixing with overrelxation. Uses 8 treads per lattice site, the reduction is performed by shared memory without using atomicadd. This implementation uses the same amount of shared memory as the atomicadd implementation with more thread block synchronization
Definition at line 254 of file gauge_fix_ovr_hit_devf.cuh.
__forceinline__ __device__ void quda::GaugeFixHit_NoAtomicAdd_LessSM | ( | Matrix< complex< Float >, NCOLORS > & | link, |
Matrix< complex< Float >, NCOLORS > & | link1, | ||
const Float | relax_boost, | ||
const int | tid | ||
) |
Device function to perform gauge fixing with overrelxation. Uses 4 treads per lattice site, the reduction is performed by shared memory without using atomicadd. This implementation uses the same amount of shared memory as the atomicadd implementation with more thread block synchronization
Definition at line 563 of file gauge_fix_ovr_hit_devf.cuh.
void quda::gaugefixingFFT | ( | cudaGaugeField & | data, |
const int | gauge_dir, | ||
const int | Nsteps, | ||
const int | verbose_interval, | ||
const double | alpha, | ||
const int | autotune, | ||
const double | tolerance, | ||
const int | stopWtheta | ||
) |
Gauge fixing with Steepest descent method with FFTs with support for single GPU only.
[in,out] | data,quda | gauge field |
[in] | gauge_dir,3 | for Coulomb gauge fixing, other for Landau gauge fixing |
[in] | Nsteps,maximum | number of steps to perform gauge fixing |
[in] | verbose_interval,print | gauge fixing info when iteration count is a multiple of this |
[in] | alpha,gauge | fixing parameter of the method, most common value is 0.08 |
[in] | autotune,1 | to autotune the method, i.e., if the Fg inverts its tendency we decrease the alpha value |
[in] | tolerance,torelance | value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps |
[in] | stopWtheta,0 | for MILC criterium and 1 to use the theta value |
Definition at line 1083 of file gauge_fix_fft.cu.
References comm_dim_partitioned(), errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by computeGaugeFixingFFTQuda(), and TEST_F().
void quda::gaugefixingOVR | ( | cudaGaugeField & | data, |
const int | gauge_dir, | ||
const int | Nsteps, | ||
const int | verbose_interval, | ||
const double | relax_boost, | ||
const double | tolerance, | ||
const int | reunit_interval, | ||
const int | stopWtheta | ||
) |
Gauge fixing with overrelaxation with support for single and multi GPU.
[in,out] | data,quda | gauge field |
[in] | gauge_dir,3 | for Coulomb gauge fixing, other for Landau gauge fixing |
[in] | Nsteps,maximum | number of steps to perform gauge fixing |
[in] | verbose_interval,print | gauge fixing info when iteration count is a multiple of this |
[in] | relax_boost,gauge | fixing parameter of the overrelaxation method, most common value is 1.5 or 1.7. |
[in] | tolerance,torelance | value to stop the method, if this value is zero then the method stops when iteration reachs the maximum number of steps defined by Nsteps |
[in] | reunit_interval,reunitarize | gauge field when iteration count is a multiple of this |
[in] | stopWtheta,0 | for MILC criterium and 1 to use the theta value |
Definition at line 1606 of file gauge_fix_ovr.cu.
References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by computeGaugeFixingOVRQuda(), and TEST_F().
void quda::gaugeForce | ( | GaugeField & | mom, |
const GaugeField & | u, | ||
double | coeff, | ||
int *** | input_path, | ||
int * | length, | ||
double * | path_coeff, | ||
int | num_paths, | ||
int | max_length | ||
) |
Compute the gauge-force contribution to the momentum.
[out] | mom | Momentum field |
[in] | u | Gauge field (extended when running no multiple GPUs) |
[in] | coeff | Step-size coefficient |
[in] | input_path | Host-array holding all path contributions for the gauge action |
[in] | length | Host array holding the length of all paths |
[in] | path_coeff | Coefficient of each path |
[in] | num_paths | Numer of paths |
[in] | max_length | Maximum length of each path |
Definition at line 340 of file gauge_force.cu.
References errorQuda, length, quda::LatticeField::Location(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by computeGaugeForceQuda().
void quda::gaugeGauss | ( | GaugeField & | U, |
RNG & | rngstate, | ||
double | epsilon | ||
) |
Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder).
[out] | U | The output gauge field |
[in] | rngstate | random states |
[in] | sigma | Width of Gaussian distrubution |
Definition at line 145 of file gauge_random.cu.
References errorQuda, quda::GaugeField::exchangeExtendedGhost(), quda::GaugeField::exchangeGhost(), getVerbosity(), quda::LatticeField::GhostExchange(), quda::GaugeField::isNative(), quda::GaugeField::LinkType(), quda::GaugeField::Ncolor(), quda::GaugeField::Order(), quda::LatticeField::Precision(), printfQuda, QUDA_DOUBLE_PRECISION, QUDA_GHOST_EXCHANGE_EXTENDED, QUDA_GHOST_EXCHANGE_PAD, QUDA_MOMENTUM_LINKS, QUDA_RECONSTRUCT_10, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_13, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_9, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, QUDA_SU3_LINKS, QUDA_SUMMARIZE, quda::LatticeField::R(), quda::GaugeField::Reconstruct(), quda::GaugeGaussArg< Float, recon, group_ >::rngstate, quda::GaugeGaussArg< Float, recon, group_ >::sigma, and quda::GaugeGaussArg< Float, recon, group_ >::U.
Referenced by gaugeGauss(), gaussGaugeQuda(), and genGauss().
void quda::gaugeGauss | ( | GaugeField & | U, |
unsigned long long | seed, | ||
double | epsilon | ||
) |
Generate Gaussian distributed su(N) or SU(N) fields. If U is a momentum field, then we generate random Gaussian distributed field in the Lie algebra using the anti-Hermitation convention. If U is in the group then we create a Gaussian distributed su(n) field and exponentiate it, e.g., U = exp(sigma * H), where H is the distributed su(n) field and sigma is the width of the distribution (sigma = 0 results in a free field, and sigma = 1 has maximum disorder).
[out] | U | The GaugeField |
[in] | seed | The seed used for the RNG |
[in] | sigma | Wdith of the Gaussian distribution |
Definition at line 187 of file gauge_random.cu.
References gaugeGauss(), quda::RNG::Init(), and quda::RNG::Release().
__device__ __host__ Link quda::gauss_su3 | ( | cuRNGState & | localState | ) |
int quda::genericCompare | ( | const cpuColorSpinorField & | a, |
const cpuColorSpinorField & | b, | ||
int | tol | ||
) |
Definition at line 241 of file color_spinor_util.cu.
References quda::colorspinor::FieldOrderCB< Float, nSpin, nColor, nVec, order, storeFloat, ghostFloat, disable_ghost, block_float, use_tex >::abs_max(), quda::blas::ax(), compareSpinor(), errorQuda, quda::ColorSpinorField::FieldOrder(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER, and tol.
Referenced by quda::cpuColorSpinorField::Compare().
void quda::genericCopyColorSpinor | ( | OutOrder & | outOrder, |
const InOrder & | inOrder, | ||
const ColorSpinorField & | out, | ||
QudaFieldLocation | location | ||
) |
Definition at line 84 of file copy_color_spinor_mg.cuh.
References quda::CopySpinor< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder >::apply(), and copy().
void quda::genericCopyColorSpinor | ( | InOrder & | inOrder, |
ColorSpinorField & | out, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out | ||
) |
Decide on the output order
Definition at line 92 of file copy_color_spinor_mg.cuh.
References errorQuda, quda::ColorSpinorField::FieldOrder(), out, QUDA_FLOAT2_FIELD_ORDER, and QUDA_SPACE_SPIN_COLOR_FIELD_ORDER.
void quda::genericCopyColorSpinor | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
FloatIn * | In | ||
) |
Decide on the input order
Definition at line 111 of file copy_color_spinor_mg.cuh.
References errorQuda, quda::ColorSpinorField::FieldOrder(), out, QUDA_FLOAT2_FIELD_ORDER, and QUDA_SPACE_SPIN_COLOR_FIELD_ORDER.
void quda::genericCopyColorSpinor | ( | Out & | outOrder, |
const In & | inOrder, | ||
const ColorSpinorField & | out, | ||
const ColorSpinorField & | in, | ||
QudaFieldLocation | location | ||
) |
Decide whether we are changing basis or not
Definition at line 270 of file copy_color_spinor.cuh.
References quda::CopyColorSpinor< Ns, Arg >::apply(), arg(), and copy().
void quda::genericCopyColorSpinor | ( | InOrder & | inOrder, |
ColorSpinorField & | out, | ||
const ColorSpinorField & | in, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
float * | outNorm | ||
) |
Decide on the output order
Definition at line 280 of file copy_color_spinor.cuh.
References errorQuda, quda::ColorSpinorField::FieldOrder(), in, quda::ColorSpinorField::isNative(), out, QUDA_FLOAT2_FIELD_ORDER, QUDA_PADDED_SPACE_SPIN_COLOR_FIELD_ORDER, QUDA_QDPJIT_FIELD_ORDER, QUDA_SPACE_COLOR_SPIN_FIELD_ORDER, and QUDA_SPACE_SPIN_COLOR_FIELD_ORDER.
void quda::genericCopyColorSpinor | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
QudaFieldLocation | location, | ||
FloatOut * | Out, | ||
FloatIn * | In, | ||
float * | outNorm, | ||
float * | inNorm | ||
) |
Decide on the input order
Definition at line 330 of file copy_color_spinor.cuh.
References errorQuda, quda::ColorSpinorField::FieldOrder(), in, quda::ColorSpinorField::isNative(), out, QUDA_FLOAT2_FIELD_ORDER, QUDA_PADDED_SPACE_SPIN_COLOR_FIELD_ORDER, QUDA_QDPJIT_FIELD_ORDER, QUDA_SPACE_COLOR_SPIN_FIELD_ORDER, and QUDA_SPACE_SPIN_COLOR_FIELD_ORDER.
void quda::genericCudaPrintVector | ( | const cudaColorSpinorField & | field, |
unsigned int | i | ||
) |
Definition at line 397 of file color_spinor_util.cu.
References quda::ColorSpinorField::Norm(), printfQuda, s, and quda::ColorSpinorField::V().
void quda::genericCudaPrintVector | ( | const cudaColorSpinorField & | field, |
unsigned int | i | ||
) |
Definition at line 445 of file color_spinor_util.cu.
References errorQuda, quda::ColorSpinorField::FieldOrder(), QUDA_FLOAT2_FIELD_ORDER, QUDA_FLOAT4_FIELD_ORDER, QUDA_FLOAT_FIELD_ORDER, QUDA_SPACE_COLOR_SPIN_FIELD_ORDER, and QUDA_SPACE_SPIN_COLOR_FIELD_ORDER.
void quda::genericCudaPrintVector | ( | const cudaColorSpinorField & | field, |
unsigned int | i | ||
) |
Definition at line 461 of file color_spinor_util.cu.
References errorQuda, genericCudaPrintVector(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.
void quda::genericCudaPrintVector | ( | const cudaColorSpinorField & | a, |
unsigned | x | ||
) |
Referenced by genericCudaPrintVector(), and quda::cudaColorSpinorField::PrintVector().
void quda::GenericPackGhost | ( | Arg & | arg | ) |
Definition at line 135 of file color_spinor_pack.cuh.
References arg(), quda::Arg< real, Ns, Nc, order >::nParity, quda::PackGhostArg< Field >::parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.
|
inline |
Generic ghost packing routine.
[out] | ghost | Array of packed ghosts with array ordering [2*dim+dir] |
[in] | a | Input field that is being packed |
[in] | parity | Which parity are we packing |
[in] | dagger | Is for a dagger operator (presently ignored) |
Definition at line 180 of file color_spinor_pack.cu.
References quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >::apply(), quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >::arg, errorQuda, MAX_BLOCK_FLOAT_NC, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by quda::cudaColorSpinorField::exchangeGhost(), and quda::cpuColorSpinorField::packGhost().
__global__ void quda::GenericPackGhostKernel | ( | Arg | arg | ) |
Definition at line 165 of file color_spinor_pack.cuh.
References arg(), quda::Arg< real, Ns, Nc, order >::nParity, quda::PackGhostArg< Field >::parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.
void quda::genericPrintVector | ( | const cpuColorSpinorField & | a, |
unsigned int | x | ||
) |
Definition at line 337 of file color_spinor_util.cu.
References errorQuda, quda::ColorSpinorField::FieldOrder(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), quda::LatticeField::Precision(), print_vector(), QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and QUDA_SPACE_SPIN_COLOR_FIELD_ORDER.
Referenced by quda::cpuColorSpinorField::PrintVector().
void quda::genericSource | ( | cpuColorSpinorField & | a, |
QudaSourceType | sourceType, | ||
int | x, | ||
int | s, | ||
int | c | ||
) |
Definition at line 112 of file color_spinor_util.cu.
References constant(), corner(), errorQuda, quda::ColorSpinorField::FieldOrder(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), point(), quda::LatticeField::Precision(), QUDA_CONSTANT_SOURCE, QUDA_CORNER_SOURCE, QUDA_DOUBLE_PRECISION, QUDA_POINT_SOURCE, QUDA_RANDOM_SOURCE, QUDA_SINGLE_PRECISION, QUDA_SINUSOIDAL_SOURCE, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER, random(), s, and sin().
Referenced by quda::cpuColorSpinorField::Source().
|
inline |
Definition at line 32 of file spinor_noise.cu.
References cos(), log(), sin(), sqrt(), and quda::Arg< real, Ns, Nc, order >::v.
void quda::genGauss | ( | GaugeField & | U, |
RNG & | rngstate, | ||
double | sigma | ||
) |
Definition at line 138 of file gauge_random.cu.
References quda::GaugeGauss< Float, Arg >::apply(), arg(), and gaugeGauss().
|
inline |
Definition at line 40 of file spinor_noise.cu.
References quda::Arg< real, Ns, Nc, order >::v.
QudaFieldLocation quda::get_pointer_location | ( | const void * | ptr | ) |
Definition at line 399 of file malloc.cpp.
References errorQuda, QUDA_CPU_FIELD_LOCATION, QUDA_CUDA_FIELD_LOCATION, and QUDA_INVALID_FIELD_LOCATION.
Referenced by file_name(), and printQudaInvertParam().
dim3 quda::GetBlockDim | ( | size_t | threads, |
size_t | size | ||
) |
Definition at line 25 of file random.cu.
References BLOCKSDIVUP.
Referenced by launch_kernel_random().
|
inline |
Compute the space-time coordinates we are at.
[out] | coord | The computed space-time coordinates |
[in] | arg | DslashArg struct |
[in,out] | idx | Space-time index (usually equal to global x-thread index). When doing EXTERIOR kernels we overwrite this with the index into our face (ghost index). |
[in] | parity | Field parity |
[out] | the | dimension we are working on (fused kernel only) |
Definition at line 88 of file dslash_helper.cuh.
References arg(), EXTERIOR_KERNEL_ALL, getCoords5CB(), getCoordsCB(), INTERIOR_KERNEL, Ls, parity, QUDA_5D_PC, and X.
Referenced by completeKSForceCore(), computeAPEStep(), computeCoarseClover(), computeFmunuCore(), computeGenGauss(), computeMomAction(), computeNeighborSum(), computeOvrImpSTOUTStep(), computePlaq(), computeStaple(), computeStapleRectangle(), computeSTOUTStep(), computeUV(), computeVUV(), computeYhat(), corner(), quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::GaugeSTOUTArg(), quda::colorspinor::PaddedSpaceSpinorColorOrder< Float, Ns, Nc >::getPaddedIndex(), quda::gauge::TIFRPaddedOrder< Float, length >::getPaddedIndex(), kernel_random(), packGhost(), and sin().
|
inlinestatic |
Compute the 4-d spatial index from the checkerboarded 1-d index at parity parity. Wrapper around getCoordsCB.
[out] | x | Computed spatial index |
[in] | cb_index | 1-d checkerboarded index |
[in] | X | Full lattice dimensions |
[in] | X0h | Half of x-dim lattice dimension |
[in] | parity | Site parity |
Definition at line 228 of file index_helper.cuh.
References getCoordsCB().
|
inlinestatic |
Compute the 5-d spatial index from the checkerboarded 1-d index at parity parity. Wrapper around getCoords5CB.
[out] | x | Computed spatial index |
[in] | cb_index | 1-d checkerboarded index |
[in] | X | Full lattice dimensions |
[in] | parity | Site parity |
Definition at line 301 of file index_helper.cuh.
References getCoords5CB().
Referenced by packGhost().
|
inlinestatic |
Compute the 5-d spatial index from the checkerboarded 1-d index at parity parity
[out] | x | Computed spatial index |
[in] | cb_index | 1-d checkerboarded index |
[in] | X | Full lattice dimensions |
[in] | X0h | Half of x-dim lattice dimension |
[in] | parity | Site parity |
Definition at line 270 of file index_helper.cuh.
References QUDA_5D_PC.
Referenced by getCoords(), and getCoords5().
|
inlinestatic |
Compute the 4-d spatial index from the checkerboarded 1-d index at parity parity
[out] | x | Computed spatial index |
[in] | cb_index | 1-d checkerboarded index |
[in] | X | Full lattice dimensions |
[in] | X0h | Half of x-dim lattice dimension |
[in] | parity | Site parity |
Definition at line 201 of file index_helper.cuh.
References parity.
Referenced by applyDslash(), and getCoords().
|
inlinestatic |
Compute the 4-d spatial index from the checkerboarded 1-d index at parity parity
x | Computed spatial index |
cb_index | 1-d checkerboarded index |
X | Full lattice dimensions |
parity | Site parity |
Definition at line 242 of file index_helper.cuh.
References parity.
Referenced by computeForce().
|
inline |
Definition at line 422 of file quda_matrix.h.
References Mat().
Referenced by computeLinkInverse(), exponentiate_iQ(), quda::gauge::Reconstruct< 13, Float, ghostExchange_, stag_phase >::getPhase(), quda::gauge::Reconstruct< 9, Float, ghostExchange_, stag_phase >::getPhase(), inverse(), polarSu3(), and setUnitarizeLinksConstants().
bool quda::getDslashLaunch | ( | ) |
|
inlinestatic |
Compute the 1-d global index from 1-d checkerboard index and parity. This should never be used to index into QUDA fields due to the potential of padding between even and odd regions.
cb_index | 1-d checkerboard index |
X | lattice dimensions |
parity | Site parity |
Definition at line 316 of file index_helper.cuh.
References parity.
|
inline |
Definition at line 834 of file coarse_op_kernel.cuh.
References quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::parity_flip, and quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::shared_atomic.
|
inline |
Definition at line 797 of file coarse_op_kernel.cuh.
References coarseIndex(), parity, virtualBlockDim(), and virtualThreadIdx().
bool quda::getKernelPackT | ( | ) |
Definition at line 26 of file dslash_quda.cu.
References kernelPackT.
Referenced by quda::dslash::DslashPolicyTune< Dslash >::apply(), quda::dslash::DslashPolicyTune< Dslash >::DslashPolicyTune(), quda::Pack< Float, nColor, spin_project >::fillAux(), quda::dslash::issueGather(), quda::dslash::issuePack(), quda::dslash::DslashBasic< Dslash >::operator()(), quda::dslash::DslashFusedExterior< Dslash >::operator()(), quda::dslash::DslashGDRRecv< Dslash >::operator()(), quda::dslash::DslashFusedGDRRecv< Dslash >::operator()(), quda::Pack< Float, nColor, spin_project >::Pack(), PackGhost(), pushKernelPackT(), quda::cudaColorSpinorField::sendGhost(), quda::cudaColorSpinorField::sendStart(), quda::Dslash< Float >::setParam(), and DslashCuda::setParam().
double2 quda::getLinkDeterminant | ( | cudaGaugeField & | data | ) |
Calculate the Determinant.
[in] | data | Gauge field |
Definition at line 194 of file pgauge_det_trace.cu.
References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by GaugeAlgTest::TearDown(), and TEST_F().
double2 quda::getLinkTrace | ( | cudaGaugeField & | data | ) |
Calculate the Trace.
[in] | data | Gauge field |
Definition at line 215 of file pgauge_det_trace.cu.
References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by GaugeAlgTest::TearDown().
|
inlinestatic |
Compute the checkerboard 1-d index for the nearest neighbor.
[in] | x | nDim lattice coordinates |
[in] | mu | dimension in which to add 1 |
[in] | dir | direction (+1 or -1) |
[in] | arg | parameter struct |
Definition at line 166 of file index_helper.cuh.
Referenced by applyWilsonTM().
|
inline |
Definition at line 1131 of file quda_matrix.h.
References sum().
|
inline |
Definition at line 1115 of file quda_matrix.h.
|
inline |
Definition at line 415 of file quda_matrix.h.
References Mat().
Referenced by computeOvrImpSTOUTStep(), computeSTOUTStep(), exponentiate_iQ(), plaquette(), qChargeComputeKernel(), and setUnitarizeLinksConstants().
const map & quda::getTuneCache | ( | ) |
Returns a reference to the tunecache map.
Definition at line 128 of file tune.cpp.
References tunecache.
Referenced by quda::DslashCoarsePolicyTune::DslashCoarsePolicyTune(), quda::dslash::DslashPolicyTune< Dslash >::DslashPolicyTune(), quda::TunableVectorYZ::resizeStep(), and quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::TileSizeTune().
|
inline |
Compute the checkerboarded index into the ghost field corresponding to full (local) site index x[]
x_ | local site |
X_ | local lattice dimensions |
dim | dimension |
nFace | depth of ghost |
Definition at line 335 of file index_helper.cuh.
|
inline |
Compute the checkerboarded index into the ghost field corresponding to full (local) site index x[] for staggered
x_ | local site |
X_ | local lattice dimensions |
dim | dimension |
nFace | depth of ghost |
Definition at line 396 of file index_helper.cuh.
long quda::host_allocated_peak | ( | ) |
void quda::host_free_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
void * | ptr | ||
) |
Free host memory allocated with safe_malloc(), pinned_malloc(), or mapped_malloc(). This function should only be called via the host_free() macro, defined in malloc_quda.h
Definition at line 344 of file malloc.cpp.
References count, errorQuda, HOST, MAPPED, PINNED, print_trace(), printfQuda, and track_free().
Referenced by quda::pool::pinned_free_().
|
inline |
Definition at line 117 of file uint_to_char.h.
References u32toa().
Referenced by quda::DslashCoarsePolicyTune::DslashCoarsePolicyTune(), and postTrace_().
|
inline |
Definition at line 284 of file uint_to_char.h.
References u64toa().
|
inline |
Compute whether the provided coordinate is within the halo region boundary of a given dimension.
Definition at line 155 of file dslash_helper.cuh.
|
inlinestatic |
Retrieve the SU(N) indices for the current block number
[in] | block,current | block number, from 0 to (NCOLORS * (NCOLORS - 1) / 2) |
[out] | p,row | index pointing to the SU(N) matrix |
[out] | q,column | index pointing to the SU(N) matrix |
Definition at line 36 of file gauge_fix_ovr_hit_devf.cuh.
References index().
|
inline |
Compute the checkerboard lattice index from the input face index. This is used by the Wilson-like halo packing kernels, and can deal with 4-d or 5-d field and 4-d or 5-d preconditioning.
[in] | face_idx | Checkerboard halo index |
[in] | parity | Parity index |
[in] | arg | Argument struct with required meta data |
Definition at line 601 of file index_helper.cuh.
References QUDA_4D_PC, QUDA_5D_PC, and s.
|
inline |
Overloaded variant of indexFromFaceIndex where we use the parity declared in arg.
Definition at line 694 of file index_helper.cuh.
References arg().
|
inlinestatic |
Compute global checkerboard index from face index. The following indexing routines work for arbitrary lattice dimensions (though perhaps not odd like thw Wilson variant?) Specifically, we compute an index into the local volume from an index into the face. This is used by the staggered-like face packing routines, and is different from the Wilson variant since here the halo depth is tranversed in a different order - here the halo depth is the faster running dimension.
[in] | face_idx_in | Checkerboarded face index |
[in] | param | Parameter struct with required meta data |
Definition at line 717 of file index_helper.cuh.
void quda::InitGaugeField | ( | cudaGaugeField & | data | ) |
Perform a cold start to the gauge field, identity SU(3) matrix, also fills the ghost links in multi-GPU case (no need to exchange data)
[in,out] | data | Gauge field |
Referenced by main(), and GaugeAlgTest::SetUp().
void quda::InitGaugeField | ( | cudaGaugeField & | data, |
RNG & | rngstate | ||
) |
Perform a hot start to the gauge field, random SU(3) matrix, followed by reunitarization, also exchange borders links in multi-GPU case.
[in,out] | data | Gauge field |
[in,out] | rngstate | state of the CURAND random number generator |
Definition at line 450 of file pgauge_init.cu.
References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.
__device__ __host__ void quda::injector | ( | Arg & | arg, |
int | dir, | ||
int | a, | ||
int | b, | ||
int | c, | ||
int | d, | ||
int | g, | ||
int | parity | ||
) |
Definition at line 76 of file extract_gauge_ghost_extended.cu.
References quda::Matrix< T, N >::data, length, and quda::gauge::Ncolor().
|
inline |
Compute the inner product over color and spin dot = ,c conj(a(s,c)) * b(s,c)
a | Left-hand side ColorSpinor |
b | Right-hand side ColorSpinor |
Definition at line 914 of file color_spinor.h.
Referenced by computeColorContraction(), computeDegrandRossiContraction(), and innerProduct().
|
inline |
Compute the inner product over color at spin s between two ColorSpinor fields dot = conj(a(s,c)) * b(s,c)
a | Left-hand side ColorSpinor |
b | Right-hand side ColorSpinor |
s | diagonal spin index |
Definition at line 932 of file color_spinor.h.
References innerProduct().
|
inline |
Compute the inner product over color at spin sa and sb between two ColorSpinor fields dot = conj(a(s1,c)) * b(s2,c)
a | Left-hand side ColorSpinor |
b | Right-hand side ColorSpinor |
sa | Left-hand side spin index |
sb | Right-hand side spin index |
Definition at line 948 of file color_spinor.h.
References dot().
|
inline |
Compute the inner product over color at spin s between a color vector and a color spinor dot = conj(a(c)) * b(s,c)
a | Left-hand side ColorVector |
b | Right-hand side ColorSpinor |
Definition at line 971 of file color_spinor.h.
References innerProduct().
|
inline |
This instantiate function is used to instantiate the reconstruct types used.
[out] | out | Output result field |
[in] | in | Input field |
[in] | U | Gauge field |
[in] | args | Additional arguments for different dslash kernels |
Definition at line 426 of file dslash.h.
References errorQuda, quda::Dslash< Float >::in, quda::Dslash< Float >::out, and quda::GaugeField::Reconstruct().
|
inline |
This instantiate function is used to instantiate the colors.
[out] | out | Output result field |
[in] | in | Input field |
[in] | U | Gauge field |
[in] | args | Additional arguments for different dslash kernels |
Definition at line 459 of file dslash.h.
References errorQuda, quda::Dslash< Float >::in, quda::GaugeField::Ncolor(), quda::ColorSpinorField::Ncolor(), and quda::Dslash< Float >::out.
|
inline |
This instantiate function is used to instantiate the precisions.
[out] | out | Output result field |
[in] | in | Input field |
[in] | U | Gauge field |
[in] | args | Additional arguments for different dslash kernels |
Definition at line 476 of file dslash.h.
References errorQuda, quda::Dslash< Float >::in, quda::Dslash< Float >::out, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, and QUDA_SINGLE_PRECISION.
Definition at line 611 of file quda_matrix.h.
References getDeterminant().
Referenced by quda::clover::Accessor< Float, nColor, nSpin, QUDA_PACKED_CLOVER_ORDER >::Accessor(), ApplyClover(), computeOvrImpSTOUTStep(), computeSTOUTStep(), quda::cudaCloverField::copy(), copyGenericClover(), loadCloverQuda(), polarSu3(), and setUnitarizeLinksConstants().
|
inline |
Definition at line 57 of file malloc_quda.h.
Referenced by quda::cudaColorSpinorField::create(), quda::cudaCloverField::cudaCloverField(), and quda::cudaGaugeField::zeroPad().
|
inline |
Compute whether this thread should be active for updating the a given offsetDim halo. For non-fused halo update kernels this is a trivial kernel that just checks if the given dimension is partitioned and if so, return true.
For fused halo region update kernels: here every thread has a prescribed dimension it is tasked with updating, but for the edges and vertices, the thread responsible for the entire update is the "greatest" one. Hence some threads may be labelled as a given dimension, but they have to update other dimensions too. Conversely, a given thread may be labeled for a given dimension, but if that thread lies at en edge or vertex, and we have partitioned a higher dimension, then that thread will cede to the higher thread.
[in,out] | Whether | this thread is "cumulatively" active (cumulative over all dimensions) |
[in] | threadDim | Prescribed dimension of this thread |
[in] | offsetDim | The dimension we are querying whether this thread should be responsible |
[in] | offset | The size of the hop |
[in] | y | Site coordinate |
[in] | partitioned | Array of which dimensions have been partitioned |
[in] | X | Lattice dimensions |
Definition at line 188 of file dslash_helper.cuh.
References EXTERIOR_KERNEL_ALL.
|
inline |
Helper functon to determine if the application of the derivative in the dslash is complete.
[in] | Argument | parameter struct |
[in] | Checkerboard | space-time index |
[in] | Parity | we are acting on |
Definition at line 55 of file dslash_helper.cuh.
References EXTERIOR_KERNEL_ALL, EXTERIOR_KERNEL_T, EXTERIOR_KERNEL_X, EXTERIOR_KERNEL_Y, EXTERIOR_KERNEL_Z, and INTERIOR_KERNEL.
bool quda::isUnitary | ( | const cpuGaugeField & | field, |
double | max_error | ||
) |
Definition at line 329 of file unitarize_links_quda.cu.
References arg(), atomicAdd(), quda::TuneParam::block, quda::blas::bytes, conj(), copyArrayToLink(), errorQuda, quda::blas::flops, quda::cpuGaugeField::Gauge_p(), getTuning(), getVerbosity(), quda::TuneParam::grid, quda::GaugeField::isNative(), quda::Matrix< T, N >::isUnitary(), mu, quda::GaugeField::Order(), parity, quda::LatticeField::Precision(), printLink(), QUDA_DOUBLE_PRECISION, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, qudaDeviceSynchronize, quda::GaugeField::Reconstruct(), quda::TuneParam::shared_bytes, stream, tmp, tuneLaunch(), unitarizeLinks(), quda::LatticeField::VolString(), and quda::LatticeField::Volume().
__global__ void quda::kernel_random | ( | cuRNGState * | state, |
unsigned long long | seed, | ||
int | size_cb, | ||
rngArg | arg | ||
) |
CUDA kernel to initialize CURAND RNG states.
state | CURAND RNG state array |
seed | initial seed for RNG |
size | size of the CURAND RNG state array |
arg | Metadata needed for computing multi-gpu offsets |
Definition at line 51 of file random.cu.
References quda::rngArg::commCoord, quda::rngArg::commDim, getCoords(), parity, and quda::rngArg::X.
|
inline |
Definition at line 132 of file laplace.cuh.
References applyLaplace(), arg(), quda::DslashArg< Float >::dagger, EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, nColor, quda::DslashArg< Float >::nParity, quda::LaplaceArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, quda::LaplaceArg< Float, nColor, reconstruct_ >::x, and quda::DslashArg< Float >::xpay.
Referenced by quda::LaplaceApply< Float, nColor, recon >::LaplaceApply(), and quda::Laplace< Float, nDim, nColor, Arg >::tuneKey().
__global__ void quda::laplaceGPU | ( | Arg | arg | ) |
Definition at line 178 of file laplace.cuh.
References arg(), and quda::DslashArg< Float >::parity.
void quda::launch_kernel_random | ( | cuRNGState * | state, |
unsigned long long | seed, | ||
int | size_cb, | ||
int | n_parity, | ||
int | X[4] | ||
) |
Call CUDA kernel to initialize CURAND RNG states.
state | CURAND RNG state array |
seed | initial seed for RNG |
size_cb | Checkerboarded size of the CURAND RNG state array |
n_parity | Number of parities (1 or 2) |
X | array of lattice dimensions |
Definition at line 75 of file random.cu.
References arg(), GetBlockDim(), and qudaDeviceSynchronize.
Referenced by quda::RNG::Init().
|
inlinestatic |
Compute the checkerboard 1-d index from the 4-d coordinate x[]
x | 4-d lattice index |
X | Full lattice dimensions |
Definition at line 46 of file index_helper.cuh.
Referenced by computeGenGauss(), computeMomAction(), quda::colorspinor::PaddedSpaceSpinorColorOrder< Float, Ns, Nc >::getPaddedIndex(), and quda::gauge::TIFRPaddedOrder< Float, length >::getPaddedIndex().
|
inlinestatic |
Compute the checkerboard 1-d index from the 4-d coordinate x[]
y | copy of 4-d lattice index |
x | 4-d lattice index |
X | Full lattice dimensions |
Definition at line 60 of file index_helper.cuh.
|
inlinestatic |
Compute the checkerboard 1-d index from the 4-d coordinate x[] +n in the mu direction
n | number of hops (=/-) in the mu direction |
x | 4-d lattice index |
X | Full lattice dimensions |
mu | direction in which to add n hops |
Definition at line 76 of file index_helper.cuh.
References mu.
Referenced by linkIndexM1(), and linkIndexM3().
|
inlinestatic |
Compute the checkerboard 1-d index from the 4-d coordinate x[] -1 in the mu direction
x | 4-d lattice index |
X | Full lattice dimensions |
mu | direction in which to subtract 1 |
Definition at line 94 of file index_helper.cuh.
References linkIndexDn(), mu, and X.
Referenced by applyDslash(), applyLaplace(), applyStaggered(), computeNeighborSum(), and computeYhat().
|
inlinestatic |
Compute the checkerboard 1-d index from the 4-d coordinate x[] -3 in the mu direction
x | 4-d lattice index |
X | Full lattice dimensions |
mu | direction in which to subtract 3 |
Definition at line 107 of file index_helper.cuh.
References linkIndexDn(), mu, and X.
Referenced by applyStaggered().
|
inlinestatic |
Compute the checkerboard 1-d index from the 4-d coordinate x[] +1 in the mu direction
x | 4-d lattice index |
X | Full lattice dimensions |
mu | direction in which to add 1 |
Definition at line 139 of file index_helper.cuh.
Referenced by applyDslash(), applyLaplace(), applyStaggered(), computeNeighborSum(), and computeUV().
|
inlinestatic |
Compute the checkerboard 1-d index from the 4-d coordinate x[] +3 in the mu direction
x | 4-d lattice index |
X | Full lattice dimensions |
mu | direction in which to add 3 |
Definition at line 151 of file index_helper.cuh.
Referenced by applyStaggered().
|
inlinestatic |
Compute the checkerboard 1-d index from the 4-d coordinate x[] + dx[]
x | 4-d lattice index |
dx | 4-d shift index |
X | Full lattice dimensions |
Definition at line 13 of file index_helper.cuh.
Referenced by completeKSForceCore(), computeAPEStep(), computeFmunuCore(), computeForce(), computeOvrImpSTOUTStep(), computeStaple(), computeStapleRectangle(), computeSTOUTStep(), quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::GaugeSTOUTArg(), and plaquette().
|
inlinestatic |
Compute the checkerboard 1-d index from the 4-d coordinate x[] + dx[]
y | new 4-d lattice index |
x | original 4-d lattice index |
dx | 4-d shift index |
X | Full lattice dimensions |
Definition at line 31 of file index_helper.cuh.
|
inlinestatic |
Compute the full 1-d index from the 4-d coordinate x[] +1 in the mu direction
x | 4-d lattice index |
X | Full lattice dimensions |
mu | direction in which to add 1 |
Definition at line 121 of file index_helper.cuh.
References mu.
|
inline |
Definition at line 45 of file inline_ptx.h.
References __PTR.
|
inline |
Definition at line 35 of file inline_ptx.h.
References __PTR.
|
inline |
Definition at line 71 of file inline_ptx.h.
References __PTR.
|
inline |
Definition at line 63 of file inline_ptx.h.
References __PTR.
|
inline |
Definition at line 53 of file inline_ptx.h.
References __PTR.
|
inline |
Definition at line 21 of file inline_ptx.h.
References __PTR.
|
inline |
Definition at line 28 of file inline_ptx.h.
References __PTR.
|
inline |
Definition at line 857 of file quda_matrix.h.
References quda::Matrix< T, N >::data.
|
inline |
Definition at line 879 of file quda_matrix.h.
|
inline |
Definition at line 869 of file quda_matrix.h.
References quda::Matrix< T, N >::data.
|
inline |
Definition at line 955 of file quda_matrix.h.
References quda::Matrix< T, N >::data.
void quda::loadTuneCache | ( | ) |
Definition at line 322 of file tune.cpp.
References broadcastTuneCache(), comm_rank(), deserializeTuneCache(), errorQuda, getTuning(), getVerbosity(), gitversion, printfQuda, QUDA_SUMMARIZE, QUDA_TUNE_NO, resource_path, and warningQuda.
Referenced by initQudaMemory(), and quda::TunableVectorYZ::resizeStep().
|
inline |
Helper function for determining if the location of the fields is the same.
[in] | a | Input field |
[in] | b | Input field |
Definition at line 642 of file lattice_field.h.
References errorQuda, quda::LatticeField::Location(), and QUDA_INVALID_FIELD_LOCATION.
Referenced by Location_().
|
inline |
Helper function for determining if the location of the fields is the same.
[in] | a | Input field |
[in] | b | Input field |
[in] | args | List of additional fields to check location on |
Definition at line 659 of file lattice_field.h.
References Location_().
|
inline |
Definition at line 101 of file complex_quda.h.
References log().
Referenced by acosh(), asinh(), atanh(), cloverInvertCompute(), expsu3(), gauss_su3(), genGauss(), log(), log10(), pow(), and smallSVD().
|
inline |
Definition at line 1162 of file complex_quda.h.
References abs(), arg(), and log().
|
inline |
Definition at line 1168 of file complex_quda.h.
Referenced by log().
|
inline |
Definition at line 106 of file complex_quda.h.
References log10().
|
inline |
Definition at line 1175 of file complex_quda.h.
References log().
Referenced by log10().
__forceinline__ __host__ __device__ char4 quda::make_charN | ( | const short4 & | a | ) |
Definition at line 263 of file float_vector.h.
__forceinline__ __host__ __device__ char2 quda::make_charN | ( | const short2 & | a | ) |
Definition at line 267 of file float_vector.h.
__forceinline__ __host__ __device__ char4 quda::make_charN | ( | const float4 & | a | ) |
Definition at line 271 of file float_vector.h.
__forceinline__ __host__ __device__ char2 quda::make_charN | ( | const float2 & | a | ) |
Definition at line 275 of file float_vector.h.
__forceinline__ __host__ __device__ char4 quda::make_charN | ( | const double4 & | a | ) |
Definition at line 279 of file float_vector.h.
__forceinline__ __host__ __device__ char2 quda::make_charN | ( | const double2 & | a | ) |
Definition at line 283 of file float_vector.h.
|
inline |
Definition at line 309 of file float_vector.h.
|
inline |
Definition at line 310 of file float_vector.h.
|
inline |
Definition at line 288 of file float_vector.h.
|
inline |
Definition at line 291 of file float_vector.h.
References quda::complex< double >::imag(), and quda::complex< double >::real().
|
inline |
Definition at line 293 of file float_vector.h.
References quda::complex< float >::imag(), and quda::complex< float >::real().
|
inline |
Definition at line 295 of file float_vector.h.
References quda::complex< double >::imag(), and quda::complex< double >::real().
|
inline |
Definition at line 297 of file float_vector.h.
References quda::complex< float >::imag(), and quda::complex< float >::real().
|
inline |
Definition at line 300 of file float_vector.h.
|
inline |
Definition at line 302 of file float_vector.h.
|
inline |
Definition at line 304 of file float_vector.h.
|
inline |
Definition at line 306 of file float_vector.h.
__forceinline__ __host__ __device__ float2 quda::make_FloatN | ( | const double2 & | a | ) |
Definition at line 223 of file float_vector.h.
__forceinline__ __host__ __device__ float4 quda::make_FloatN | ( | const double4 & | a | ) |
Definition at line 227 of file float_vector.h.
__forceinline__ __host__ __device__ double2 quda::make_FloatN | ( | const float2 & | a | ) |
Definition at line 231 of file float_vector.h.
__forceinline__ __host__ __device__ double4 quda::make_FloatN | ( | const float4 & | a | ) |
Definition at line 235 of file float_vector.h.
__forceinline__ __host__ __device__ short4 quda::make_shortN | ( | const char4 & | a | ) |
Definition at line 239 of file float_vector.h.
__forceinline__ __host__ __device__ short2 quda::make_shortN | ( | const char2 & | a | ) |
Definition at line 243 of file float_vector.h.
__forceinline__ __host__ __device__ short4 quda::make_shortN | ( | const float4 & | a | ) |
Definition at line 247 of file float_vector.h.
__forceinline__ __host__ __device__ short2 quda::make_shortN | ( | const float2 & | a | ) |
Definition at line 251 of file float_vector.h.
__forceinline__ __host__ __device__ short4 quda::make_shortN | ( | const double4 & | a | ) |
Definition at line 255 of file float_vector.h.
__forceinline__ __host__ __device__ short2 quda::make_shortN | ( | const double2 & | a | ) |
Definition at line 259 of file float_vector.h.
|
inline |
Definition at line 746 of file quda_matrix.h.
References conj().
Referenced by completeKSForceCore(), and computeMomAction().
long quda::mapped_allocated_peak | ( | ) |
void * quda::mapped_malloc_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
size_t | size | ||
) |
Allocate page-locked ("pinned") host memory, and map it into the GPU address space. This function should only be called via the mapped_malloc() macro, defined in malloc_quda.h
Definition at line 273 of file malloc.cpp.
References aligned_malloc(), quda::MemAlloc::base_size, errorQuda, MAPPED, memset(), and track_malloc().
void quda::massRescale | ( | cudaColorSpinorField & | b, |
QudaInvertParam & | param | ||
) |
Definition at line 1769 of file interface_quda.cpp.
References quda::blas::ax(), QudaInvertParam_s::dslash_type, errorQuda, getVerbosity(), kappa, QudaInvertParam_s::kappa, kappa5, QudaInvertParam_s::m5, QudaInvertParam_s::mass, QudaInvertParam_s::mass_normalization, quda::blas::norm2(), QudaInvertParam_s::num_offset, QudaInvertParam_s::offset, pow(), printfQuda, QUDA_ASQTAD_DSLASH, QUDA_ASYMMETRIC_MASS_NORMALIZATION, QUDA_DEBUG_VERBOSE, QUDA_DOMAIN_WALL_4D_DSLASH, QUDA_DOMAIN_WALL_DSLASH, QUDA_KAPPA_NORMALIZATION, QUDA_MASS_NORMALIZATION, QUDA_MAT_SOLUTION, QUDA_MATDAG_MAT_SOLUTION, QUDA_MATPC_SOLUTION, QUDA_MATPCDAG_MATPC_SOLUTION, QUDA_MOBIUS_DWF_DSLASH, QUDA_STAGGERED_DSLASH, and QudaInvertParam_s::solution_type.
Referenced by invertMultiShiftQuda(), invertMultiSrcQuda(), and invertQuda().
__forceinline__ __host__ __device__ float quda::max_fabs | ( | const float4 & | c | ) |
Definition at line 198 of file float_vector.h.
Referenced by store_norm().
__forceinline__ __host__ __device__ float quda::max_fabs | ( | const float2 & | b | ) |
Definition at line 204 of file float_vector.h.
__forceinline__ __host__ __device__ double quda::max_fabs | ( | const double4 & | c | ) |
Definition at line 208 of file float_vector.h.
__forceinline__ __host__ __device__ double quda::max_fabs | ( | const double2 & | b | ) |
Definition at line 214 of file float_vector.h.
void quda::Monte | ( | cudaGaugeField & | data, |
RNG & | rngstate, | ||
double | Beta, | ||
int | nhb, | ||
int | nover | ||
) |
Perform heatbath and overrelaxation. Performs nhb heatbath steps followed by nover overrelaxation steps.
[in,out] | data | Gauge field |
[in,out] | rngstate | state of the CURAND random number generator |
[in] | Beta | inverse of the gauge coupling, beta = 2 Nc / g_0^2 |
[in] | nhb | number of heatbath steps |
[in] | nover | number of overrelaxation steps |
Definition at line 856 of file pgauge_heatbath.cu.
References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by main(), and GaugeAlgTest::SetUp().
|
inline |
Do a single (AV)^ * UV product, where for preconditioned clover, AV correspond to the clover inverse multiplied by the packed null space vectors, else AV is simply the packed null space vectors.
[out] | vuv | Result array |
[in,out] | arg | Arg storing the fields and parameters |
[in] | Fine | grid parity we're working on |
[in] | x_cb | Checkboarded x dimension |
Definition at line 537 of file coarse_op_kernel.cuh.
References quda::Gamma< ValueType, basis, dir >::apply(), quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::AV, caxpy(), conj(), quda::Gamma< ValueType, basis, dir >::getcol(), QUDA_BACKWARDS, s, quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::UV, and quda::CalculateYArg< Float, fineSpin, coarseSpin, fineColor, coarseColor, coarseGauge, coarseGaugeAtomic, fineGauge, fineSpinor, fineSpinorTmp, fineSpinorV, fineClover >::V.
|
inline |
Apply the twisted-mass dslash out(x) = M*in = a * D * in + (1 + i*b*gamma_5*tau_3 + c*tau_1)*x Note this routine only exists in xpay form.
Definition at line 31 of file dslash_ndeg_twisted_mass.cuh.
References arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, quda::DslashArg< Float >::nParity, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, and quda::WilsonArg< Float, nColor, reconstruct_ >::x.
|
inline |
Apply the twisted-mass dslash out(x) = M*in = a * D * in + (1 + i*b*gamma_5*tau_3 + c*tau_1)*x Note this routine only exists in xpay form.
Definition at line 49 of file dslash_ndeg_twisted_mass_preconditioned.cuh.
References arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::VectorCache< real, Vector >::load(), quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, quda::VectorCache< real, Vector >::save(), quda::VectorCache< real, Vector >::sync(), and quda::WilsonArg< Float, nColor, reconstruct_ >::x.
void quda::ndegTwistedMassCPU | ( | Arg | arg | ) |
Definition at line 78 of file dslash_ndeg_twisted_mass.cuh.
References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.
__global__ void quda::ndegTwistedMassGPU | ( | Arg | arg | ) |
Definition at line 94 of file dslash_ndeg_twisted_mass.cuh.
References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.
void quda::ndegTwistedMassPreconditionedCPU | ( | Arg | arg | ) |
Definition at line 113 of file dslash_ndeg_twisted_mass_preconditioned.cuh.
References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.
__global__ void quda::ndegTwistedMassPreconditionedGPU | ( | Arg | arg | ) |
Definition at line 142 of file dslash_ndeg_twisted_mass_preconditioned.cuh.
References arg(), and quda::DslashArg< Float >::parity.
__device__ __forceinline__ int quda::neighborIndex | ( | const unsigned int & | cb_idx, |
const int(&) | shift[4], | ||
const bool(&) | partitioned[4], | ||
const unsigned int & | parity | ||
) |
Definition at line 41 of file shift_quark_field.cu.
References coordsFromIndex(), quda::ShiftColorSpinorFieldArg< Output, Input >::partitioned, quda::ShiftColorSpinorFieldArg< Output, Input >::shift, X1, X2, X3, and X4.
Referenced by gaugeLink(), shiftColorSpinorFieldKernel(), and spinorNeighbor().
double quda::norm | ( | const GaugeField & | u, |
int | d, | ||
norm_type_ | type | ||
) |
Definition at line 15 of file max_gauge.cu.
References ABS_MAX, quda::clover::FieldOrder< Float, nColor, nSpin, order >::abs_max(), ABS_MIN, quda::clover::FieldOrder< Float, nColor, nSpin, order >::abs_min(), NORM1, quda::clover::FieldOrder< Float, nColor, nSpin, order >::norm1(), NORM2, and quda::clover::FieldOrder< Float, nColor, nSpin, order >::norm2().
double quda::norm | ( | const CloverField & | u, |
norm_type_ | type | ||
) |
Definition at line 15 of file max_clover.cu.
References ABS_MAX, quda::clover::FieldOrder< Float, nColor, nSpin, order >::abs_max(), ABS_MIN, quda::clover::FieldOrder< Float, nColor, nSpin, order >::abs_min(), NORM1, quda::clover::FieldOrder< Float, nColor, nSpin, order >::norm1(), NORM2, and quda::clover::FieldOrder< Float, nColor, nSpin, order >::norm2().
double quda::norm | ( | const GaugeField & | u, |
int | d, | ||
norm_type_ | type | ||
) |
Definition at line 28 of file max_gauge.cu.
References errorQuda, quda::GaugeField::FieldOrder(), QUDA_FLOAT2_GAUGE_ORDER, QUDA_MILC_GAUGE_ORDER, and QUDA_QDP_GAUGE_ORDER.
double quda::norm | ( | const CloverField & | u, |
norm_type_ | type | ||
) |
Definition at line 29 of file max_clover.cu.
References errorQuda, quda::CloverField::Order(), QUDA_FLOAT2_CLOVER_ORDER, and QUDA_FLOAT4_CLOVER_ORDER.
double quda::norm | ( | const GaugeField & | u, |
int | d, | ||
norm_type_ | type | ||
) |
Definition at line 40 of file max_gauge.cu.
References errorQuda, and quda::GaugeField::Ncolor().
|
inline |
Returns the magnitude of z squared.
Definition at line 1092 of file complex_quda.h.
Referenced by Spinor< RegType, StoreType, N, write >::backup(), ComputeHarmonicRitz< libtype::eigen_lib >(), ComputeHarmonicRitz< libtype::magma_lib >(), quda::EigenSolver::computeSVD(), constructCloverField(), ErrorSU3(), quda::colorspinor::FieldOrderCB< Float, coarseSpin, coarseColor, 1, csOrder, Float, ghostFloat >::FieldOrderCB(), quda::GMResDR::FlexArnoldiProcedure(), init(), quda::Matrix< T, N >::L2(), main(), Spinor< RegType, StoreType, N, write >::Norm(), quda::ColorSpinorField::Norm(), normalize(), quda::clover::square_< ReduceType, Float >::operator()(), quda::gauge::square_< ReduceType, Float >::operator()(), quda::gauge::square_< ReduceType, char >::operator()(), quda::gauge::square_< ReduceType, short >::operator()(), quda::gauge::square_< ReduceType, int >::operator()(), quda::colorspinor::square_< ReduceType, Float >::operator()(), quda::colorspinor::square_< ReduceType, short >::operator()(), quda::TRLM::operator()(), quda::colorspinor::square_< ReduceType, char >::operator()(), quda::colorspinor::FieldOrderCB< Float, coarseSpin, coarseColor, 1, csOrder, Float, ghostFloat >::operator()(), quda::GMResDR::operator()(), operator/(), performWuppertalnStep(), polarSu3(), Spinor< RegType, StoreType, N, write >::restore(), and Spinor< RegType, StoreType, N, write >::save().
double quda::norm1 | ( | const CloverField & | u, |
bool | inverse = false |
||
) |
This is a debugging function, where we cast a clover field into a spinor field so we can compute its L1 norm.
a | The clover field that we want the norm of |
Definition at line 478 of file clover_field.cpp.
References colorSpinorParam(), quda::ColorSpinorField::Create(), and quda::blas::norm1().
Referenced by quda::cpuGaugeField::Gauge_p(), quda::CloverField::Rho(), and quda::GaugeField::SiteSize().
double quda::norm1 | ( | const GaugeField & | u | ) |
This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L1 norm.
u | The gauge field that we want the norm of |
Definition at line 341 of file gauge_field.cpp.
References colorSpinorParam(), quda::ColorSpinorField::Create(), and quda::blas::norm1().
double quda::norm2 | ( | const CloverField & | a, |
bool | inverse = false |
||
) |
This is a debugging function, where we cast a clover field into a spinor field so we can compute its L2 norm.
a | The clover field that we want the norm of |
Definition at line 470 of file clover_field.cpp.
References colorSpinorParam(), quda::ColorSpinorField::Create(), and quda::blas::norm2().
Referenced by quda::MG::buildFreeVectors(), computeMomAction(), quda::DiracMobiusPC::Dslash5inv(), quda::GMResDR::FlexArnoldiProcedure(), quda::cpuGaugeField::Gauge_p(), quda::MG::generateNullVectors(), quda::Deflation::operator()(), quda::MG::operator()(), quda::PreconCG::operator()(), quda::SimpleBiCGstab::operator()(), quda::SD::operator()(), quda::IncEigCG::operator()(), quda::GMResDR::operator()(), quda::Deflation::reduce(), quda::CloverField::Rho(), quda::GaugeField::SiteSize(), quda::Deflation::verify(), and quda::MG::verify().
double quda::norm2 | ( | const GaugeField & | u | ) |
This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L2 norm.
u | The gauge field that we want the norm of |
Definition at line 333 of file gauge_field.cpp.
References colorSpinorParam(), quda::ColorSpinorField::Create(), and quda::blas::norm2().
|
inline |
Definition at line 1035 of file complex_quda.h.
|
inline |
Definition at line 1041 of file complex_quda.h.
|
inline |
Definition at line 1047 of file complex_quda.h.
|
inline |
Definition at line 48 of file float_vector.h.
|
inline |
Definition at line 57 of file float_vector.h.
|
inline |
Definition at line 64 of file float_vector.h.
|
inline |
Definition at line 71 of file float_vector.h.
|
inline |
Definition at line 902 of file complex_quda.h.
|
inline |
Definition at line 911 of file complex_quda.h.
|
inline |
Definition at line 918 of file complex_quda.h.
|
inline |
Definition at line 476 of file quda_matrix.h.
|
inline |
Definition at line 484 of file quda_matrix.h.
References Mat().
|
inline |
Generic implementation of matrix multiplication.
Definition at line 507 of file quda_matrix.h.
|
inline |
Specialization of complex matrix multiplication that will issue optimal fma instructions.
Definition at line 528 of file quda_matrix.h.
|
inline |
Definition at line 563 of file quda_matrix.h.
|
inline |
Definition at line 583 of file quda_matrix.h.
|
inline |
Compute the scalar-vector product y = a * x.
[in] | a | Input scalar |
[in] | x | Input vector |
Definition at line 1067 of file color_spinor.h.
References quda::ColorSpinor< Float, Nc, Ns >::data, and s.
|
inline |
Compute the matrix-vector product y = A * x.
[in] | A | Input matrix |
[in] | x | Input vector |
Definition at line 1089 of file color_spinor.h.
References quda::ColorSpinor< Float, Nc, Ns >::data, and s.
|
inline |
Compute the matrix-vector product y = A * x.
[in] | A | Input Hermitian matrix with dimensions NcxNs x NcxNs |
[in] | x | Input vector |
Definition at line 1124 of file color_spinor.h.
References quda::ColorSpinor< Float, Nc, Ns >::data.
|
inline |
Definition at line 151 of file float_vector.h.
|
inline |
Definition at line 157 of file float_vector.h.
|
inline |
Definition at line 163 of file float_vector.h.
|
inline |
Definition at line 171 of file float_vector.h.
|
inline |
Definition at line 177 of file float_vector.h.
|
inline |
Definition at line 489 of file quda_matrix.h.
References Mat().
|
inline |
Definition at line 552 of file quda_matrix.h.
|
inline |
Definition at line 24 of file float_vector.h.
|
inline |
Definition at line 40 of file float_vector.h.
|
inline |
Definition at line 44 of file float_vector.h.
|
inline |
Definition at line 60 of file cub_helper.cuh.
|
inline |
Definition at line 80 of file float_vector.h.
|
inline |
Definition at line 87 of file float_vector.h.
|
inline |
Definition at line 854 of file complex_quda.h.
|
inline |
Definition at line 870 of file complex_quda.h.
|
inline |
Definition at line 876 of file complex_quda.h.
|
inline |
Definition at line 996 of file complex_quda.h.
|
inline |
Definition at line 433 of file quda_matrix.h.
|
inline |
Definition at line 862 of file complex_quda.h.
|
inline |
ColorSpinor addition operator.
[in] | x | Input vector |
[in] | y | Input vector |
Definition at line 1023 of file color_spinor.h.
References quda::ColorSpinor< Float, Nc, Ns >::data, and s.
|
inline |
Definition at line 47 of file clover_deriv.cuh.
|
inline |
Definition at line 96 of file float_vector.h.
|
inline |
Definition at line 104 of file float_vector.h.
|
inline |
Definition at line 110 of file float_vector.h.
|
inline |
Definition at line 116 of file float_vector.h.
|
inline |
Definition at line 123 of file float_vector.h.
|
inline |
Definition at line 443 of file quda_matrix.h.
|
inline |
Definition at line 451 of file quda_matrix.h.
References Mat().
|
inline |
Definition at line 28 of file float_vector.h.
|
inline |
Definition at line 32 of file float_vector.h.
|
inline |
Definition at line 36 of file float_vector.h.
|
inline |
Definition at line 185 of file float_vector.h.
|
inline |
Definition at line 189 of file float_vector.h.
|
inline |
Definition at line 883 of file complex_quda.h.
|
inline |
Definition at line 889 of file complex_quda.h.
|
inline |
Definition at line 895 of file complex_quda.h.
|
inline |
Definition at line 1001 of file complex_quda.h.
|
inline |
Definition at line 467 of file quda_matrix.h.
|
inline |
Definition at line 495 of file quda_matrix.h.
|
inline |
ColorSpinor subtraction operator.
[in] | x | Input vector |
[in] | y | Input vector |
Definition at line 1045 of file color_spinor.h.
References quda::ColorSpinor< Float, Nc, Ns >::data, and s.
|
inline |
Definition at line 58 of file clover_deriv.cuh.
References axpy().
|
inline |
Definition at line 131 of file float_vector.h.
|
inline |
Definition at line 139 of file float_vector.h.
|
inline |
Definition at line 145 of file float_vector.h.
|
inline |
Definition at line 459 of file quda_matrix.h.
|
inline |
Definition at line 926 of file complex_quda.h.
References norm().
|
inline |
Definition at line 935 of file complex_quda.h.
References quda::complex< float >::imag(), quda::complex< float >::real(), and s.
|
inline |
Definition at line 952 of file complex_quda.h.
References quda::complex< double >::imag(), quda::complex< double >::real(), and s.
|
inline |
Definition at line 969 of file complex_quda.h.
|
inline |
Definition at line 976 of file complex_quda.h.
References norm().
|
inline |
Definition at line 984 of file complex_quda.h.
|
inline |
Definition at line 989 of file complex_quda.h.
std::ostream& quda::operator<< | ( | std::ostream & | out, |
const PackArg< Float, nSpin, nColor, spin_project > & | arg | ||
) |
std::ostream & quda::operator<< | ( | std::ostream & | output, |
const CloverFieldParam & | param | ||
) |
Definition at line 427 of file clover_field.cpp.
References quda::CloverFieldParam::clover, quda::CloverFieldParam::cloverInv, quda::CloverFieldParam::create, quda::CloverFieldParam::csw, quda::CloverFieldParam::direct, quda::CloverFieldParam::inverse, quda::CloverFieldParam::invNorm, quda::CloverFieldParam::mu2, quda::CloverFieldParam::norm, quda::CloverFieldParam::order, param, quda::CloverFieldParam::rho, and quda::CloverFieldParam::twisted.
Referenced by quda::CloverFieldParam::CloverFieldParam(), quda::ColorSpinorField::Components(), quda::cudaColorSpinorField::Ghost2(), quda::LatticeFieldParam::LatticeFieldParam(), and quda::GaugeFieldParam::setPrecision().
std::ostream & quda::operator<< | ( | std::ostream & | output, |
const LatticeFieldParam & | param | ||
) |
Definition at line 704 of file lattice_field.cpp.
References quda::LatticeFieldParam::ghostExchange, quda::LatticeFieldParam::GhostPrecision(), quda::LatticeFieldParam::nDim, quda::LatticeFieldParam::pad, quda::LatticeFieldParam::Precision(), quda::LatticeFieldParam::r, quda::LatticeFieldParam::scale, and quda::LatticeFieldParam::x.
std::ostream & quda::operator<< | ( | std::ostream & | output, |
const GaugeFieldParam & | param | ||
) |
Definition at line 282 of file gauge_field.cpp.
References quda::GaugeFieldParam::anisotropy, quda::GaugeFieldParam::create, quda::GaugeFieldParam::fixed, quda::GaugeFieldParam::geometry, quda::GaugeFieldParam::link_type, quda::GaugeFieldParam::nColor, quda::GaugeFieldParam::nFace, quda::GaugeField::nInternal, quda::GaugeFieldParam::order, param, QUDA_RECONSTRUCT_NO, quda::GaugeFieldParam::reconstruct, quda::GaugeFieldParam::staggeredPhaseApplied, quda::GaugeFieldParam::staggeredPhaseType, quda::GaugeFieldParam::t_boundary, and quda::GaugeFieldParam::tadpole.
std::basic_ostream< charT, traits > & quda::operator<< | ( | std::basic_ostream< charT, traits > & | os, |
const complex< ValueType > & | z | ||
) |
Definition at line 310 of file complex_quda.h.
std::ostream& quda::operator<< | ( | std::ostream & | out, |
const DslashArg< Float > & | arg | ||
) |
std::ostream& quda::operator<< | ( | std::ostream & | os, |
const Matrix< T, N > & | m | ||
) |
Definition at line 833 of file quda_matrix.h.
std::ostream& quda::operator<< | ( | std::ostream & | os, |
const Array< T, N > & | a | ||
) |
Definition at line 847 of file quda_matrix.h.
std::ostream& quda::operator<< | ( | std::ostream & | out, |
const ColorSpinorField & | a | ||
) |
Definition at line 860 of file color_spinor_field.cpp.
References quda::ColorSpinorField::bytes, quda::ColorSpinorField::composite_descr, quda::CompositeColorSpinorFieldDescriptor::dim, quda::ColorSpinorField::fieldOrder, quda::ColorSpinorField::gammaBasis, quda::LatticeField::ghost_precision, quda::CompositeColorSpinorFieldDescriptor::id, quda::CompositeColorSpinorFieldDescriptor::is_component, quda::CompositeColorSpinorFieldDescriptor::is_composite, quda::CompositeColorSpinorFieldDescriptor::length, quda::ColorSpinorField::length, quda::ColorSpinorField::nColor, quda::ColorSpinorField::nDim, quda::ColorSpinorField::norm_bytes, quda::ColorSpinorField::nSpin, out, quda::ColorSpinorField::pad, quda::ColorSpinorField::pc_type, quda::LatticeField::precision, quda::ColorSpinorField::real_length, quda::ColorSpinorField::siteOrder, quda::ColorSpinorField::siteSubset, quda::CompositeColorSpinorFieldDescriptor::stride, quda::ColorSpinorField::stride, quda::ColorSpinorField::twistFlavor, quda::CompositeColorSpinorFieldDescriptor::volume, quda::ColorSpinorField::volume, and quda::ColorSpinorField::x.
std::ostream& quda::operator<< | ( | std::ostream & | out, |
const cudaColorSpinorField & | a | ||
) |
Definition at line 1435 of file cuda_color_spinor_field.cpp.
References quda::cudaColorSpinorField::alloc, quda::cudaColorSpinorField::init, quda::ColorSpinorField::norm, out, and quda::ColorSpinorField::v.
|
inline |
Definition at line 1008 of file complex_quda.h.
|
inline |
Definition at line 1017 of file complex_quda.h.
|
inline |
Definition at line 1025 of file complex_quda.h.
std::basic_istream< charT, traits > & quda::operator>> | ( | std::basic_istream< charT, traits > & | is, |
complex< ValueType > & | z | ||
) |
Definition at line 318 of file complex_quda.h.
void quda::orthoDir | ( | Complex ** | beta, |
std::vector< ColorSpinorField *> | Ap, | ||
int | k, | ||
int | pipeline | ||
) |
Definition at line 95 of file inv_gcr_quda.cpp.
References quda::blas::caxpy(), quda::blas::caxpyDotzy(), quda::blas::cDotProduct(), computeBeta(), pipeline, and updateAp().
Referenced by quda::GCR::operator()().
|
inline |
Definition at line 805 of file quda_matrix.h.
References conj().
Referenced by constructHHMat().
|
inline |
Definition at line 818 of file quda_matrix.h.
References conj().
|
inline |
Compute the outer product over color and take the spin trace out(j,i) = a(s,j) * conj (b(s,i))
a | Left-hand side ColorSpinor |
b | Right-hand side ColorSpinor |
Definition at line 985 of file color_spinor.h.
Referenced by sigmaOprod().
void quda::OvrImpSTOUTStep | ( | GaugeField & | dataDs, |
const GaugeField & | dataOr, | ||
double | rho, | ||
double | epsilon | ||
) |
Apply Over Improved STOUT smearing to the gauge field.
[out] | dataDs | Output smeared field |
[in] | dataOr | Input gauge field |
[in] | rho | smearing parameter |
[in] | epsilon | smearing parameter |
Definition at line 269 of file gauge_stout.cu.
References epsilon, errorQuda, quda::GaugeField::isNative(), quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, and quda::GaugeField::Reconstruct().
Referenced by OvrImpSTOUTStep(), and performOvrImpSTOUTnStep().
void quda::OvrImpSTOUTStep | ( | GaugeOr | origin, |
GaugeDs | dest, | ||
const GaugeField & | dataOr, | ||
Float | rho, | ||
Float | epsilon | ||
) |
Definition at line 208 of file gauge_stout.cu.
References arg(), DOUBLE_TOL, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, qudaDeviceSynchronize, and SINGLE_TOL.
void quda::OvrImpSTOUTStep | ( | GaugeField & | dataDs, |
const GaugeField & | dataOr, | ||
Float | rho, | ||
Float | epsilon | ||
) |
Definition at line 217 of file gauge_stout.cu.
References errorQuda, OvrImpSTOUTStep(), QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_NO, and quda::GaugeField::Reconstruct().
|
inline |
Definition at line 83 of file dslash_pack.cuh.
References arg(), quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::dagger, quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::in, quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::nFace, quda::Arg< real, Ns, Nc, order >::nParity, quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::parity, QUDA_5D_PC, and quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::twist.
Referenced by quda::dslash::issuePack(), and PackGhost().
__device__ __host__ __forceinline__ void quda::packGhost | ( | Arg & | arg, |
int | x_cb, | ||
int | parity, | ||
int | spinor_parity, | ||
int | spin_block, | ||
int | color_block | ||
) |
Definition at line 95 of file color_spinor_pack.cuh.
References arg(), getCoords(), getCoords5(), quda::PackGhostArg< Field >::nDim, quda::PackGhostArg< Field >::parity, and s.
void quda::PackGhost | ( | void * | ghost[], |
const ColorSpinorField & | in, | ||
MemoryLocation | location, | ||
int | nFace, | ||
bool | dagger, | ||
int | parity, | ||
bool | spin_project, | ||
double | a, | ||
double | b, | ||
double | c, | ||
const cudaStream_t & | stream | ||
) |
Definition at line 342 of file dslash_pack2.cu.
References quda::Pack< Float, nColor, spin_project >::apply(), and pack().
void quda::PackGhost | ( | void * | ghost[], |
const ColorSpinorField & | in, | ||
MemoryLocation | location, | ||
int | nFace, | ||
bool | dagger, | ||
int | parity, | ||
bool | spin_project, | ||
double | a, | ||
double | b, | ||
double | c, | ||
const cudaStream_t & | stream | ||
) |
Definition at line 356 of file dslash_pack2.cu.
References quda::Pack< Float, nColor, spin_project >::a, quda::Pack< Float, nColor, spin_project >::b, quda::Pack< Float, nColor, spin_project >::c, quda::Pack< Float, nColor, spin_project >::dagger, errorQuda, quda::Pack< Float, nColor, spin_project >::ghost, quda::Pack< Float, nColor, spin_project >::in, quda::Pack< Float, nColor, spin_project >::location, quda::ColorSpinorField::Ncolor(), quda::Pack< Float, nColor, spin_project >::nFace, quda::Pack< Float, nColor, spin_project >::parity, and stream.
void quda::PackGhost | ( | void * | ghost[2 *QUDA_MAX_DIM], |
const ColorSpinorField & | field, | ||
MemoryLocation | location, | ||
int | nFace, | ||
bool | dagger, | ||
int | parity, | ||
bool | spin_project, | ||
double | a, | ||
double | b, | ||
double | c, | ||
const cudaStream_t & | stream | ||
) |
Dslash face packing routine.
[out] | ghost_buf | Array of packed halos, order is [2*dim+dir] |
[in] | field | ColorSpinorField to be packed |
[in] | location | Locations where the packed fields are (Device, Host and/or Remote) |
[in] | nFace | Depth of halo |
[in] | dagger | Whether this is for the dagger operator |
[in] | parity | Field parity |
[in] | spin_project | Whether to spin_project when packing |
[in] | a | Twisted mass scale factor (for preconditioned twisted-mass dagger operator) |
[in] | b | Twisted mass chiral twist factor (for preconditioned twisted-mass dagger operator) |
[in] | c | Twisted mass flavor twist factor (for preconditioned non degenerate twisted-mass dagger operator) |
[in] | stream | Which stream are we executing in |
Definition at line 367 of file dslash_pack2.cu.
References quda::Pack< Float, nColor, spin_project >::a, quda::Pack< Float, nColor, spin_project >::b, quda::Pack< Float, nColor, spin_project >::c, commDim, quda::Pack< Float, nColor, spin_project >::dagger, errorQuda, getKernelPackT(), quda::Pack< Float, nColor, spin_project >::ghost, quda::Pack< Float, nColor, spin_project >::in, quda::Pack< Float, nColor, spin_project >::location, quda::Pack< Float, nColor, spin_project >::nFace, quda::Pack< Float, nColor, spin_project >::parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_QUARTER_PRECISION, QUDA_SINGLE_PRECISION, and stream.
Referenced by quda::cudaColorSpinorField::packGhost().
__global__ void quda::packKernel | ( | Arg | arg | ) |
Definition at line 184 of file dslash_pack.cuh.
References arg(), dimFromFaceIndex(), quda::Arg< real, Ns, Nc, order >::nParity, quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::parity, QUDA_5D_PC, s, and quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::sites_per_block.
__global__ void quda::packShmemKernel | ( | Arg | arg | ) |
Definition at line 222 of file dslash_pack.cuh.
References arg(), quda::Arg< real, Ns, Nc, order >::nParity, quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::parity, QUDA_5D_PC, and s.
void quda::packSpinor | ( | OutOrder & | outOrder, |
const InOrder & | inOrder, | ||
int | volume | ||
) |
CPU function to reorder spinor fields.
Definition at line 22 of file copy_color_spinor_mg.cuh.
References s.
__global__ void quda::packSpinorKernel | ( | OutOrder | outOrder, |
const InOrder | inOrder, | ||
int | volume | ||
) |
CUDA kernel to reorder spinor fields. Adopts a similar form as the CPU version, using the same inlined functions.
Definition at line 34 of file copy_color_spinor_mg.cuh.
References s.
|
inline |
Definition at line 154 of file dslash_pack.cuh.
References arg(), quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::in, quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::nFace, quda::Arg< real, Ns, Nc, order >::nParity, and quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::parity.
__global__ void quda::packStaggeredKernel | ( | Arg | arg | ) |
Definition at line 288 of file dslash_pack.cuh.
References arg(), dimFromFaceIndex(), quda::Arg< real, Ns, Nc, order >::nParity, quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::parity, s, and quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::sites_per_block.
__global__ void quda::packStaggeredShmemKernel | ( | Arg | arg | ) |
Definition at line 325 of file dslash_pack.cuh.
References arg(), quda::Arg< real, Ns, Nc, order >::nParity, quda::PackArg< Float_, nColor_, nSpin_, spin_project_ >::parity, and s.
|
inline |
Helper function for determining if the preconditioning type of the fields is the same.
[in] | a | Input field |
[in] | b | Input field |
Definition at line 1011 of file color_spinor_field.h.
References errorQuda, quda::ColorSpinorField::PCType(), and QUDA_PC_INVALID.
Referenced by PCType_().
|
inline |
Helper function for determining if the precision of the fields is the same.
[in] | a | Input field |
[in] | b | Input field |
[in] | args | List of additional fields to check precision on |
Definition at line 1030 of file color_spinor_field.h.
References PCType_().
void quda::PGaugeExchange | ( | cudaGaugeField & | data, |
const int | dir, | ||
const int | parity | ||
) |
Perform heatbath and overrelaxation. Performs nhb heatbath steps followed by nover overrelaxation steps.
[in,out] | data | Gauge field |
[in,out] | rngstate | state of the CURAND random number generator |
[in] | Beta | inverse of the gauge coupling, beta = 2 Nc / g_0^2 |
[in] | nhb | number of heatbath steps |
[in] | nover | number of overrelaxation steps |
Definition at line 342 of file pgauge_exchange.cu.
References comm_dim_partitioned(), errorQuda, parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, and QUDA_SINGLE_PRECISION.
void quda::PGaugeExchangeFree | ( | ) |
Release all allocated memory used to exchange data between nodes.
Referenced by main(), and GaugeAlgTest::TearDown().
long quda::pinned_allocated_peak | ( | ) |
void * quda::pinned_malloc_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
size_t | size | ||
) |
Allocate page-locked ("pinned") host memory. This function should only be called via the pinned_malloc() macro, defined in malloc_quda.h
Note that we do not rely on cudaHostAlloc(), since buffers allocated in this way have been observed to cause problems when shared with MPI via GPU Direct on some systems.
Definition at line 250 of file malloc.cpp.
References aligned_malloc(), quda::MemAlloc::base_size, errorQuda, memset(), PINNED, and track_malloc().
Referenced by quda::pool::pinned_malloc_().
double3 quda::plaquette | ( | const GaugeField & | U | ) |
Compute the plaquette of the gauge field.
[in] | U | The gauge field upon which to compute the plaquette |
Definition at line 65 of file gauge_plaq.cu.
References INSTANTIATE_PRECISION, and quda::LatticeField::Location().
Referenced by main(), performAPEnStep(), performOvrImpSTOUTnStep(), performSTOUTnStep(), plaqQuda(), GaugeAlgTest::SetUp(), and TEST_F().
|
inline |
Definition at line 32 of file gauge_plaq.cuh.
References conj(), getTrace(), linkIndexShift(), and mu.
void quda::plaquette | ( | const Gauge | dataOr, |
const GaugeField & | data, | ||
double2 & | plq, | ||
QudaFieldLocation | location | ||
) |
Definition at line 51 of file gauge_plaq.cu.
References quda::GaugePlaq< Float, Gauge >::apply(), quda::GaugePlaq< Float, Gauge >::arg, comm_allreduce_array(), comm_size(), qudaDeviceSynchronize, quda::ReduceArg< double2 >::result_h, and quda::GaugePlaqArg< Gauge >::threads.
void quda::plaquette | ( | const GaugeField & | data, |
double2 & | plq, | ||
QudaFieldLocation | location | ||
) |
Definition at line 61 of file gauge_plaq.cu.
References INSTANTIATE_RECONSTRUCT.
void quda::point | ( | T & | t, |
int | x, | ||
int | s, | ||
int | c | ||
) |
Create a point source at spacetime point x, spin s and colour c
Definition at line 31 of file color_spinor_util.cu.
Referenced by genericSource().
|
inline |
Returns the complex with magnitude m and angle theta in radians.
Definition at line 1098 of file complex_quda.h.
Referenced by construct_fat_long_gauge_field(), exp(), and sqrt().
|
inline |
Definition at line 1104 of file complex_quda.h.
|
inline |
|
inline |
Project the input matrix on the SU(3) group. First unitarize the matrix and then project onto the special unitary group.
in | The input matrix to which we're projecting |
tol | Tolerance to which this check is applied |
Definition at line 87 of file su3_project.cuh.
References arg(), checkUnitary(), conj(), getDeterminant(), in, inverse(), mod(), norm(), out, and pow().
|
static |
Referenced by quda::DslashCoarsePolicyTune::advanceAux(), quda::DslashCoarsePolicyTune::apply(), disable_policy(), quda::DslashCoarsePolicyTune::DslashCoarsePolicyTune(), and enable_policy().
bool quda::policyTuning | ( | ) |
Definition at line 495 of file tune.cpp.
References policy_tuning.
Referenced by tuneLaunch().
void quda::popKernelPackT | ( | ) |
Definition at line 42 of file dslash_quda.cu.
References errorQuda, and setKernelPackT().
Referenced by quda::dslash::DslashPolicyTune< Dslash >::apply(), ApplyCovDev(), ApplyDomainWall5D(), ApplyNdegTwistedMassPreconditioned(), ApplyTwistedMassPreconditioned(), quda::dslash::DslashPolicyTune< Dslash >::DslashPolicyTune(), quda::cudaColorSpinorField::exchangeGhost(), quda::cudaColorSpinorField::sendGhost(), and quda::cudaColorSpinorField::sendStart().
void quda::postTrace_ | ( | const char * | func, |
const char * | file, | ||
int | line | ||
) |
Post an event in the trace, recording where it was posted.
Definition at line 92 of file tune.cpp.
References quda::TuneKey::aux_n, i32toa(), quda::TraceKey::key, tmp, and traceEnabled().
Referenced by quda::TunableVectorYZ::resizeStep().
|
inline |
Definition at line 111 of file complex_quda.h.
References pow().
Referenced by __fast_pow(), checkGauge(), comm_declare_receive_displaced(), comm_declare_send_displaced(), comm_declare_strided_receive_displaced(), comm_declare_strided_send_displaced(), compare_mom(), compareLink(), compareSpinor(), quda::Dslash5Arg< Float, nColor >::Dslash5Arg(), dslashReference_5th_inv(), exponentiate_iQ(), insertNoise(), invertMultiShiftQuda(), massRescale(), MatDagMatQuda(), quda::CG::operator()(), quda::MultiShiftCG::operator()(), polarSu3(), TEST(), and TEST_P().
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
Definition at line 1202 of file complex_quda.h.
References exp().
Referenced by pow().
|
inline |
Helper function for determining if the precision of the fields is the same.
[in] | a | Input field |
[in] | b | Input field |
Definition at line 672 of file lattice_field.h.
References errorQuda, quda::LatticeFieldParam::precision, quda::LatticeField::Precision(), and QUDA_INVALID_PRECISION.
Referenced by Precision_().
|
inline |
Helper function for determining if the precision of the fields is the same.
[in] | a | Input field |
[in] | b | Input field |
[in] | args | List of additional fields to check precision on |
Definition at line 689 of file lattice_field.h.
References Precision_().
void quda::print | ( | const double | d[], |
int | n | ||
) |
Definition at line 44 of file inv_mpcg_quda.cpp.
Referenced by quda::MPBiCGstab::computeMatrixPowers().
|
static |
Definition at line 85 of file malloc.cpp.
References quda::MemAlloc::base_size, quda::MemAlloc::file, quda::MemAlloc::func, quda::MemAlloc::line, and printfQuda.
Referenced by assertAllMemFree().
|
static |
Definition at line 78 of file malloc.cpp.
References printfQuda.
Referenced by assertAllMemFree().
|
static |
Definition at line 67 of file malloc.cpp.
References printfQuda, and quda::MemAlloc::size.
Referenced by host_free_().
void quda::print_vector | ( | const Order & | o, |
unsigned int | x | ||
) |
Definition at line 321 of file color_spinor_util.cu.
References parity, and printfQuda.
Referenced by genericPrintVector().
void quda::printAPIProfile | ( | ) |
Print out the timer profile for CUDA API calls.
Definition at line 336 of file quda_cuda_api.cpp.
Referenced by endQuda().
void quda::printLaunchTimer | ( | ) |
Definition at line 843 of file tune.cpp.
References quda::TimeProfile::Print().
Referenced by endQuda(), and profilerStop().
|
inline |
Definition at line 1149 of file quda_matrix.h.
Referenced by applyStaggered(), and isUnitary().
void quda::printPeakMemUsage | ( | ) |
Definition at line 375 of file malloc.cpp.
References DEVICE, DEVICE_PINNED, and printfQuda.
Referenced by endQuda().
void quda::projectSU3 | ( | cudaGaugeField & | U, |
double | tol, | ||
int * | fails | ||
) |
Project the input gauge field onto the SU(3) group. This is a destructive operation. The number of link failures is reported so appropriate action can be taken.
U | Gauge field that we are projecting onto SU(3) |
tol | Tolerance to which the iterative algorithm works |
fails | Number of link failures (device pointer) |
Definition at line 590 of file unitarize_links_quda.cu.
References quda::ProjectSU3< Float, G >::apply(), arg(), checkCudaError, errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, qudaDeviceSynchronize, quda::GaugeField::Reconstruct(), quda::GaugeField::StaggeredPhaseApplied(), and tol.
Referenced by projectSU3Quda().
__global__ void quda::ProjectSU3kernel | ( | ProjectSU3Arg< Float, G > | arg | ) |
Definition at line 533 of file unitarize_links_quda.cu.
References atomicAdd(), quda::ProjectSU3Arg< Float, G >::fails, mu, parity, quda::ProjectSU3Arg< Float, G >::threads, quda::ProjectSU3Arg< Float, G >::tol, and quda::ProjectSU3Arg< Float, G >::u.
void quda::Prolongate | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const ColorSpinorField & | v, | ||
int | Nvec, | ||
const int * | fine_to_coarse, | ||
const int *const * | spin_map, | ||
int | parity = QUDA_INVALID_PARITY |
||
) |
Apply the prolongation operator.
[out] | out | Resulting fine grid field |
[in] | in | Input field on coarse grid |
[in] | v | Matrix field containing the null-space components |
[in] | Nvec | Number of null-space components |
[in] | fine_to_coarse | Fine-to-coarse lookup table (linear indices) |
[in] | spin_map | Spin blocking lookup table |
[in] | parity | of the output fine field (if single parity output field) |
Definition at line 296 of file prolongator.cu.
References checkCudaError, checkLocation, checkPrecision, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by quda::Transfer::P(), and quda::Transfer::setTransferGPU().
void quda::pushKernelPackT | ( | bool | pack | ) |
Definition at line 30 of file dslash_quda.cu.
References getKernelPackT(), setKernelPackT(), and warningQuda.
Referenced by quda::dslash::DslashPolicyTune< Dslash >::apply(), ApplyCovDev(), ApplyDomainWall5D(), ApplyNdegTwistedMassPreconditioned(), ApplyTwistedMassPreconditioned(), quda::dslash::DslashPolicyTune< Dslash >::DslashPolicyTune(), quda::cudaColorSpinorField::exchangeGhost(), quda::cudaColorSpinorField::sendGhost(), and quda::cudaColorSpinorField::sendStart().
__global__ void quda::qChargeComputeKernel | ( | Arg | arg | ) |
Definition at line 28 of file gauge_qcharge.cuh.
References arg(), getTrace(), parity, and Pi2.
cudaError_t quda::qudaDeviceSynchronize_ | ( | const char * | func, |
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaDeviceSynchronize or cuDeviceSynchronize.
Definition at line 306 of file quda_cuda_api.cpp.
References errorQuda, PROFILE, QUDA_PROFILE_DEVICE_SYNCHRONIZE, and QUDA_PROFILE_FUNC_SET_ATTRIBUTE.
cudaError_t quda::qudaEventQuery | ( | cudaEvent_t & | event | ) |
Wrapper around cudaEventQuery or cuEventQuery.
[in] | event | Event we are querying |
Definition at line 209 of file quda_cuda_api.cpp.
References errorQuda, PROFILE, and QUDA_PROFILE_EVENT_QUERY.
Referenced by quda::blas::multiReduceLaunch(), quda::dslash::DslashBasic< Dslash >::operator()(), quda::dslash::DslashFusedExterior< Dslash >::operator()(), quda::dslash::DslashGDRRecv< Dslash >::operator()(), quda::dslash::DslashFusedGDRRecv< Dslash >::operator()(), and quda::blas::reduceLaunch().
cudaError_t quda::qudaEventRecord | ( | cudaEvent_t & | event, |
cudaStream_t | stream = 0 |
||
) |
Wrapper around cudaEventRecord or cuEventRecord.
[in,out] | event | Event we are recording |
[in,out] | stream | Stream where to record the event |
Definition at line 230 of file quda_cuda_api.cpp.
References errorQuda, PROFILE, and QUDA_PROFILE_EVENT_RECORD.
Referenced by exchangeExtendedGhost(), quda::dslash::issueGather(), quda::dslash::issuePack(), quda::blas::multiReduceLaunch(), quda::dslash::DslashBasic< Dslash >::operator()(), quda::dslash::DslashFusedExterior< Dslash >::operator()(), quda::dslash::DslashGDRRecv< Dslash >::operator()(), quda::dslash::DslashFusedGDRRecv< Dslash >::operator()(), quda::dslash::DslashZeroCopyPack< Dslash >::operator()(), quda::dslash::DslashFusedZeroCopyPack< Dslash >::operator()(), quda::dslash::DslashZeroCopyPackGDRRecv< Dslash >::operator()(), quda::dslash::DslashFusedZeroCopyPackGDRRecv< Dslash >::operator()(), quda::dslash::DslashZeroCopy< Dslash >::operator()(), quda::dslash::DslashFusedZeroCopy< Dslash >::operator()(), quda::blas::reduceLaunch(), quda::cudaGaugeField::sendStart(), quda::cudaColorSpinorField::sendStart(), and shiftColorSpinorField().
cudaError_t quda::qudaEventSynchronize | ( | cudaEvent_t & | event | ) |
Wrapper around cudaEventSynchronize or cuEventSynchronize.
[in] | event | Event which we are synchronizing with respect to |
Definition at line 287 of file quda_cuda_api.cpp.
References errorQuda, PROFILE, and QUDA_PROFILE_EVENT_SYNCHRONIZE.
Referenced by quda::cudaGaugeField::commsComplete().
cudaError_t quda::qudaLaunchKernel | ( | const void * | func, |
dim3 | gridDim, | ||
dim3 | blockDim, | ||
void ** | args, | ||
size_t | sharedMem, | ||
cudaStream_t | stream | ||
) |
Wrapper around cudaLaunchKernel.
[in] | func | Device function symbol |
[in] | gridDim | Grid dimensions |
[in] | blockDim | Block dimensions |
[in] | args | Arguments |
[in] | sharedMem | Shared memory requested per thread block |
[in] | stream | Stream identifier |
Definition at line 201 of file quda_cuda_api.cpp.
References activeTuning(), errorQuda, PROFILE, and QUDA_PROFILE_LAUNCH_KERNEL.
Referenced by quda::Dslash< Float >::launch(), quda::Dslash5< Float, nColor, Arg >::launch(), and quda::Pack< Float, nColor, spin_project >::launch().
void quda::qudaMemcpy2DAsync_ | ( | void * | dst, |
size_t | dpitch, | ||
const void * | src, | ||
size_t | spitch, | ||
size_t | width, | ||
size_t | hieght, | ||
cudaMemcpyKind | kind, | ||
const cudaStream_t & | stream, | ||
const char * | func, | ||
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaMemcpy2DAsync or driver API equivalent Potentially add auto-profiling support.
[out] | dst | Destination pointer |
[in] | dpitch | Destination pitch |
[in] | src | Source pointer |
[in] | spitch | Source pitch |
[in] | width | Width in bytes |
[in] | height | Number of rows |
[in] | kind | Type of memory copy |
[in] | stream | Stream to issue copy |
Definition at line 170 of file quda_cuda_api.cpp.
References quda::QudaMemCopy::dst, errorQuda, param, PROFILE, and QUDA_PROFILE_MEMCPY2D_D2H_ASYNC.
void quda::qudaMemcpy_ | ( | void * | dst, |
const void * | src, | ||
size_t | count, | ||
cudaMemcpyKind | kind, | ||
const char * | func, | ||
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaMemcpy used for auto-profiling. Do not call directly, rather call macro below which will grab the location of the call.
[out] | dst | Destination pointer |
[in] | src | Source pointer |
[in] | count | Size of transfer |
[in] | kind | Type of memory copy |
Definition at line 126 of file quda_cuda_api.cpp.
References quda::QudaMemCopy::apply(), copy(), and errorQuda.
void quda::qudaMemcpyAsync_ | ( | void * | dst, |
const void * | src, | ||
size_t | count, | ||
cudaMemcpyKind | kind, | ||
const cudaStream_t & | stream, | ||
const char * | func, | ||
const char * | file, | ||
const char * | line | ||
) |
Wrapper around cudaMemcpyAsync or driver API equivalent Potentially add auto-profiling support.
[out] | dst | Destination pointer |
[in] | src | Source pointer |
[in] | count | Size of transfer |
[in] | kind | Type of memory copy |
[in] | stream | Stream to issue copy |
Definition at line 140 of file quda_cuda_api.cpp.
References quda::QudaMemCopy::apply(), copy(), errorQuda, PROFILE, QUDA_PROFILE_MEMCPY_D2D_ASYNC, QUDA_PROFILE_MEMCPY_D2H_ASYNC, and QUDA_PROFILE_MEMCPY_H2D_ASYNC.
cudaError_t quda::qudaStreamSynchronize | ( | cudaStream_t & | stream | ) |
Wrapper around cudaStreamSynchronize or cuStreamSynchronize.
[in] | stream | Stream which we are synchronizing with respect to |
Definition at line 268 of file quda_cuda_api.cpp.
References errorQuda, PROFILE, and QUDA_PROFILE_STREAM_SYNCHRONIZE.
Referenced by quda::cudaGaugeField::exchangeGhost(), quda::cudaGaugeField::injectGhost(), quda::dslash::DslashZeroCopyPack< Dslash >::operator()(), quda::dslash::DslashFusedZeroCopyPack< Dslash >::operator()(), quda::dslash::DslashZeroCopyPackGDRRecv< Dslash >::operator()(), quda::dslash::DslashFusedZeroCopyPackGDRRecv< Dslash >::operator()(), quda::dslash::DslashZeroCopy< Dslash >::operator()(), and quda::dslash::DslashFusedZeroCopy< Dslash >::operator()().
cudaError_t quda::qudaStreamWaitEvent | ( | cudaStream_t | stream, |
cudaEvent_t | event, | ||
unsigned int | flags | ||
) |
Wrapper around cudaEventRecord or cuEventRecord.
[in,out] | stream | Stream which we are instructing to waitç∂ |
[in] | event | Event we are waiting on |
[in] | flags | Flags to pass to function |
Definition at line 249 of file quda_cuda_api.cpp.
References errorQuda, PROFILE, and QUDA_PROFILE_STREAM_WAIT_EVENT.
Referenced by quda::dslash::commsComplete(), quda::dslash::completeDslash(), quda::cudaColorSpinorField::exchangeGhost(), quda::dslash::issueGather(), quda::dslash::DslashBasic< Dslash >::operator()(), quda::dslash::DslashFusedExterior< Dslash >::operator()(), quda::dslash::DslashZeroCopyPack< Dslash >::operator()(), quda::dslash::DslashFusedZeroCopyPack< Dslash >::operator()(), quda::dslash::DslashZeroCopyPackGDRRecv< Dslash >::operator()(), quda::dslash::DslashFusedZeroCopyPackGDRRecv< Dslash >::operator()(), quda::dslash::DslashZeroCopy< Dslash >::operator()(), quda::dslash::DslashFusedZeroCopy< Dslash >::operator()(), and shiftColorSpinorField().
|
inline |
Definition at line 49 of file malloc_quda.h.
Referenced by file_name().
void quda::random | ( | T & | t | ) |
Random number insertion over all field elements
Definition at line 14 of file color_spinor_util.cu.
References comm_drand(), parity, and s.
Referenced by genericSource().
|
inline |
Return a random number between a and b.
state | curand rng state |
a | lower range |
b | upper range |
Definition at line 75 of file random_quda.h.
|
inline |
Return a random number between 0 and 1.
state | curand rng state |
Definition at line 96 of file random_quda.h.
|
inline |
Definition at line 86 of file random_quda.h.
|
inline |
Definition at line 107 of file random_quda.h.
|
inline |
Definition at line 81 of file random_quda.h.
|
inline |
Definition at line 102 of file random_quda.h.
|
inline |
Definition at line 137 of file cub_helper.cuh.
Referenced by quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::apply(), quda::blas::multiReduce(), and quda::blas::nativeReduce().
|
inline |
Definition at line 94 of file cub_helper.cuh.
References quda::ReduceArg< T >::partial, quda::ReduceArg< T >::result_d, sum(), and zero().
|
inline |
Definition at line 207 of file cub_helper.cuh.
References quda::vector_type< scalar, n >::data, quda::ColorSpinorField::exchange(), in, quda::ReduceArg< T >::partial, quda::ReduceArg< T >::result_d, and sum().
int quda::reliable | ( | double & | rNorm, |
double & | maxrx, | ||
double & | maxrr, | ||
const double & | r2, | ||
const double & | delta | ||
) |
Definition at line 37 of file inv_bicgstab_quda.cpp.
References sqrt(), and updateR().
Referenced by quda::BiCGstab::operator()(), and quda::MultiShiftCG::operator()().
QudaFieldLocation quda::reorder_location | ( | ) |
Return whether data is reordered on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION.
Definition at line 725 of file lattice_field.cpp.
References reorder_location_.
Referenced by quda::cudaCloverField::copy(), quda::cudaGaugeField::copy(), quda::cpuGaugeField::copy(), quda::cudaColorSpinorField::loadSpinorField(), quda::cudaGaugeField::saveCPUField(), and quda::cudaColorSpinorField::saveSpinorField().
void quda::reorder_location_set | ( | QudaFieldLocation | reorder_location_ | ) |
Set whether data is reorderd on the CPU or GPU. This can set at QUDA initialization using the environment variable QUDA_REORDER_LOCATION.
reorder_location_ | The location to set where data will be reordered |
Definition at line 726 of file lattice_field.cpp.
Referenced by initQudaDevice().
|
static |
Definition at line 9 of file solver.cpp.
References getVerbosity(), printfQuda, and QUDA_VERBOSE.
Referenced by quda::Solver::create().
__device__ __host__ void quda::rescaleY | ( | Arg & | arg, |
int | parity, | ||
int | x_cb, | ||
int | c_row, | ||
int | c_col | ||
) |
Rescale the matrix elements by arg.rescale
Definition at line 1167 of file coarse_op_kernel.cuh.
void quda::RescaleYCPU | ( | Arg & | arg | ) |
Definition at line 1181 of file coarse_op_kernel.cuh.
References arg(), nColor, and parity.
__global__ void quda::RescaleYGPU | ( | Arg | arg | ) |
Definition at line 1195 of file coarse_op_kernel.cuh.
References arg(), nColor, and parity.
void quda::Restrict | ( | Arg | arg | ) |
Definition at line 90 of file restrictor.cuh.
References quda::Arg< real, Ns, Nc, order >::nParity, parity, s, and tmp.
Referenced by quda::Transfer::R(), and quda::Transfer::setTransferGPU().
void quda::Restrict | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
const ColorSpinorField & | v, | ||
int | Nvec, | ||
const int * | fine_to_coarse, | ||
const int * | coarse_to_fine, | ||
const int *const * | spin_map, | ||
int | parity = QUDA_INVALID_PARITY |
||
) |
Apply the restriction operator.
[out] | out | Resulting coarsened field |
[in] | in | Input field on fine grid |
[in] | v | Matrix field containing the null-space components |
[in] | Nvec | Number of null-space components |
[in] | fine_to_coarse | Fine-to-coarse lookup table (linear indices) |
[in] | spin_map | Spin blocking lookup table |
[in] | parity | of the input fine field (if single parity input field) |
Definition at line 263 of file restrictor.cu.
References checkPrecision, errorQuda, quda::ColorSpinorField::FieldOrder(), in, out, parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.
__global__ void quda::RestrictKernel | ( | Arg | arg | ) |
Here, we ensure that each thread block maps exactly to a geometric block. Each thread block corresponds to one geometric block, with number of threads equal to the number of fine grid points per aggregate, so each thread represents a fine-grid point. The look up table coarse_to_fine is the mapping to each fine grid point.
Definition at line 136 of file restrictor.cuh.
References quda::Arg< real, Ns, Nc, order >::nParity, parity, s, and tmp.
|
inline |
|
inline |
Definition at line 35 of file convert.h.
Referenced by copyFloatN().
|
inline |
|
inline |
Definition at line 34 of file convert.h.
Referenced by copy(), copy_and_scale(), and copyFloatN().
|
inline |
void * quda::safe_malloc_ | ( | const char * | func, |
const char * | file, | ||
int | line, | ||
size_t | size | ||
) |
Perform a standard malloc() with error-checking. This function should only be called via the safe_malloc() macro, defined in malloc_quda.h
Definition at line 226 of file malloc.cpp.
References quda::MemAlloc::base_size, errorQuda, HOST, memset(), quda::MemAlloc::size, and track_malloc().
void quda::saveProfile | ( | const std::string | label = "" | ) |
Save profile to disk.
Definition at line 514 of file tune.cpp.
References quda::TuneKey::aux_n, comm_rank(), count, getVerbosity(), gitversion, launchTimer, quda::TuneParam::n_calls, param, printfQuda, QUDA_SUMMARIZE, quda_version, serializeProfile(), serializeTrace(), quda::TraceKey::time, tmp, traceEnabled(), and warningQuda.
Referenced by endQuda(), newDeflationQuda(), and quda::TunableVectorYZ::resizeStep().
void quda::saveTuneCache | ( | bool | error | ) |
Write tunecache to disk.
Definition at line 426 of file tune.cpp.
References comm_rank(), getVerbosity(), gitversion, printfQuda, QUDA_SUMMARIZE, quda_version, serializeTuneCache(), quda::TraceKey::time, and warningQuda.
Referenced by eigensolveQuda(), endQuda(), invertMultiShiftQuda(), invertMultiSrcQuda(), invertQuda(), quda::multigrid_solver::multigrid_solver(), newMultigridQuda(), quda::TunableVectorYZ::resizeStep(), and updateMultigridQuda().
|
static |
Serialize tunecache to an ostream, useful for writing to a file or sending to other nodes.
Definition at line 199 of file tune.cpp.
References quda::TuneKey::aux, quda::TuneKey::aux_n, quda::TuneParam::comment, quda::TraceKey::key, quda::TuneParam::n_calls, quda::TuneKey::name, param, quda::TuneParam::time, quda::TraceKey::time, tmp, and quda::TuneKey::volume.
Referenced by saveProfile().
|
static |
Serialize trace to an ostream, useful for writing to a file or sending to other nodes.
Definition at line 261 of file tune.cpp.
References quda::TuneKey::aux, quda::TuneKey::aux_n, quda::TraceKey::key, quda::TuneKey::name, tmp, and quda::TuneKey::volume.
Referenced by saveProfile().
|
static |
Serialize tunecache to an ostream, useful for writing to a file or sending to other nodes.
Definition at line 172 of file tune.cpp.
References quda::TuneKey::aux, quda::TuneParam::aux, quda::TuneParam::block, quda::TuneParam::comment, quda::TuneParam::grid, quda::TraceKey::key, quda::TuneKey::name, param, quda::TuneParam::shared_bytes, quda::TuneParam::time, and quda::TuneKey::volume.
Referenced by broadcastTuneCache(), and saveTuneCache().
|
inline |
Definition at line 58 of file blas_helper.cuh.
Referenced by Spinor< RegType, StoreType, N, write >::set().
|
inline |
Definition at line 59 of file blas_helper.cuh.
|
inline |
Definition at line 60 of file blas_helper.cuh.
|
inline |
Definition at line 61 of file blas_helper.cuh.
void quda::setDiracParam | ( | DiracParam & | diracParam, |
QudaInvertParam * | inv_param, | ||
bool | pc | ||
) |
Definition at line 1562 of file interface_quda.cpp.
References quda::GaugeField::Anisotropy(), quda::DiracParam::b_5, QudaInvertParam_s::b_5, quda::DiracParam::c_5, QudaInvertParam_s::c_5, quda::DiracParam::clover, cloverPrecise, quda::DiracParam::commDim, QudaInvertParam_s::cuda_prec, quda::DiracParam::dagger, QudaInvertParam_s::dagger, QudaInvertParam_s::dirac_order, QudaInvertParam_s::dslash_type, quda::DiracParam::epsilon, QudaInvertParam_s::epsilon, errorQuda, quda::DiracParam::fatGauge, quda::DiracParam::gauge, gaugeFatPrecise, gaugeLongPrecise, gaugePrecise, getVerbosity(), quda::DiracParam::kappa, kappa, QudaInvertParam_s::kappa, quda::DiracParam::laplace3D, QudaInvertParam_s::laplace3D, quda::DiracParam::longGauge, quda::DiracParam::Ls, QudaInvertParam_s::Ls, quda::DiracParam::m5, QudaInvertParam_s::m5, quda::DiracParam::mass, QudaInvertParam_s::mass, QudaInvertParam_s::matpc_type, quda::DiracParam::matpcType, quda::DiracParam::mu, QudaInvertParam_s::mu, quda::LatticeField::Precision(), printfQuda, QUDA_ASQTAD_DIRAC, QUDA_ASQTAD_DSLASH, QUDA_ASQTADPC_DIRAC, QUDA_CLOVER_DIRAC, QUDA_CLOVER_WILSON_DSLASH, QUDA_CLOVERPC_DIRAC, QUDA_COVDEV_DSLASH, QUDA_CPS_WILSON_DIRAC_ORDER, QUDA_DEBUG_VERBOSE, QUDA_DOMAIN_WALL_4D_DIRAC, QUDA_DOMAIN_WALL_4D_DSLASH, QUDA_DOMAIN_WALL_4DPC_DIRAC, QUDA_DOMAIN_WALL_DIRAC, QUDA_DOMAIN_WALL_DSLASH, QUDA_DOMAIN_WALLPC_DIRAC, QUDA_GAUGE_COVDEV_DIRAC, QUDA_GAUGE_LAPLACE_DIRAC, QUDA_GAUGE_LAPLACEPC_DIRAC, QUDA_LAPLACE_DSLASH, QUDA_MAX_DWF_LS, QUDA_MOBIUS_DOMAIN_WALL_DIRAC, QUDA_MOBIUS_DOMAIN_WALLPC_DIRAC, QUDA_MOBIUS_DWF_DSLASH, QUDA_STAGGERED_DIRAC, QUDA_STAGGERED_DSLASH, QUDA_STAGGEREDPC_DIRAC, QUDA_TWIST_NONDEG_DOUBLET, QUDA_TWIST_SINGLET, QUDA_TWISTED_CLOVER_DIRAC, QUDA_TWISTED_CLOVER_DSLASH, QUDA_TWISTED_CLOVERPC_DIRAC, QUDA_TWISTED_MASS_DIRAC, QUDA_TWISTED_MASS_DSLASH, QUDA_TWISTED_MASSPC_DIRAC, QUDA_WILSON_DIRAC, QUDA_WILSON_DSLASH, QUDA_WILSONPC_DIRAC, QudaInvertParam_s::twist_flavor, and quda::DiracParam::type.
Referenced by cloverQuda(), computeCloverForceQuda(), computeStaggeredForceQuda(), createDirac(), quda::deflated_solver::deflated_solver(), dslashQuda(), dslashQuda_4dpc(), dslashQuda_mdwf(), init(), MatDagMatQuda(), MatQuda(), quda::DiracParam::print(), setDiracPreParam(), setDiracRefineParam(), and setDiracSloppyParam().
void quda::setDiracPreParam | ( | DiracParam & | diracParam, |
QudaInvertParam * | inv_param, | ||
const bool | pc, | ||
bool | comms | ||
) |
Definition at line 1698 of file interface_quda.cpp.
References quda::DiracParam::clover, cloverPrecondition, quda::DiracParam::commDim, QudaInvertParam_s::cuda_prec_precondition, QudaInvertParam_s::dslash_type, QudaInvertParam_s::dslash_type_precondition, errorQuda, quda::DiracParam::fatGauge, quda::DiracParam::gauge, gaugeExtended, gaugeFatExtended, gaugeFatPrecondition, gaugeLongExtended, gaugeLongPrecondition, gaugePrecondition, QudaInvertParam_s::inv_type, quda::DiracParam::longGauge, QudaInvertParam_s::overlap, quda::LatticeField::Precision(), QUDA_ASQTAD_DSLASH, QUDA_PCG_INVERTER, QUDA_STAGGERED_DIRAC, QUDA_STAGGERED_DSLASH, QUDA_STAGGEREDPC_DIRAC, setDiracParam(), and quda::DiracParam::type.
Referenced by createDirac(), quda::multigrid_solver::multigrid_solver(), and updateMultigridQuda().
void quda::setDiracRefineParam | ( | DiracParam & | diracParam, |
QudaInvertParam * | inv_param, | ||
const bool | pc | ||
) |
Definition at line 1679 of file interface_quda.cpp.
References quda::DiracParam::clover, cloverRefinement, quda::DiracParam::commDim, QudaInvertParam_s::cuda_prec_refinement_sloppy, QudaInvertParam_s::dslash_type, errorQuda, quda::DiracParam::fatGauge, quda::DiracParam::gauge, gaugeFatRefinement, gaugeLongRefinement, gaugeRefinement, quda::DiracParam::longGauge, quda::LatticeField::Precision(), QUDA_ASQTAD_DSLASH, and setDiracParam().
Referenced by createDirac().
void quda::setDiracSloppyParam | ( | DiracParam & | diracParam, |
QudaInvertParam * | inv_param, | ||
bool | pc | ||
) |
Definition at line 1661 of file interface_quda.cpp.
References quda::DiracParam::clover, cloverSloppy, quda::DiracParam::commDim, QudaInvertParam_s::cuda_prec_sloppy, QudaInvertParam_s::dslash_type, errorQuda, quda::DiracParam::fatGauge, quda::DiracParam::gauge, gaugeFatSloppy, gaugeLongSloppy, gaugeSloppy, quda::DiracParam::longGauge, quda::LatticeField::Precision(), QUDA_ASQTAD_DSLASH, and setDiracParam().
Referenced by createDirac(), quda::deflated_solver::deflated_solver(), quda::multigrid_solver::multigrid_solver(), quda::DiracParam::print(), and updateMultigridQuda().
|
inline |
Definition at line 653 of file quda_matrix.h.
Referenced by bdSVD(), computeAPEStep(), computeGenGauss(), computeOvrImpSTOUTStep(), computeSTOUTStep(), constructHHMat(), exponentiate_iQ(), getRealBidiagMatrix(), and smallSVD().
|
inline |
Definition at line 669 of file quda_matrix.h.
|
inline |
Definition at line 685 of file quda_matrix.h.
void quda::setKernelPackT | ( | bool | pack | ) |
pack | Sets whether to use a kernel to pack the T dimension |
Definition at line 24 of file dslash_quda.cu.
Referenced by quda::dslash::DslashPolicyTune< Dslash >::apply(), quda::dslash::DslashPolicyTune< Dslash >::DslashPolicyTune(), eigensolveQuda(), popKernelPackT(), pushKernelPackT(), and set_kernel_pack_t_().
void quda::setPackComms | ( | const int * | dim_pack | ) |
Helper function that sets which dimensions the packing kernel should be packing for.
[in] | dim_pack | Array that specifies which dimenstions need to be packed. |
Definition at line 14 of file dslash_pack2.cu.
References commDim, and QUDA_MAX_DIM.
Referenced by quda::Dslash< Float >::Dslash(), and DslashCuda::DslashCuda().
void quda::setPolicyTuning | ( | bool | policy_tuning_ | ) |
Enable / disable whether are tuning a policy.
Definition at line 499 of file tune.cpp.
Referenced by quda::DslashCoarsePolicyTune::DslashCoarsePolicyTune(), quda::dslash::DslashPolicyTune< Dslash >::DslashPolicyTune(), quda::TunableVectorYZ::resizeStep(), quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::TileSizeTune(), quda::DslashCoarsePolicyTune::~DslashCoarsePolicyTune(), quda::dslash::DslashPolicyTune< Dslash >::~DslashPolicyTune(), and quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::~TileSizeTune().
void quda::setTransferGPU | ( | bool | ) |
void quda::setUnitarizeLinksConstants | ( | double | unitarize_eps, |
double | max_error, | ||
bool | allow_svd, | ||
bool | svd_only, | ||
double | svd_rel_error, | ||
double | svd_abs_error | ||
) |
Definition at line 72 of file unitarize_links_quda.cu.
References acos(), arg(), conj(), cos(), epsilon, errorQuda, getDeterminant(), getTrace(), in, inverse(), s, size, and sqrt().
Referenced by computeKSLinkQuda(), hisq_test(), setActionPaths(), GaugeAlgTest::SetReunitarizationConsts(), setReunitarizationConsts(), and unitarize_link_test().
|
inline |
Definition at line 702 of file quda_matrix.h.
Referenced by computeStaple(), computeStapleRectangle(), exponentiate_iQ(), and quda::GaugeSTOUTArg< Float, GaugeOr, GaugeDs >::GaugeSTOUTArg().
|
inline |
Definition at line 717 of file quda_matrix.h.
|
inline |
Definition at line 732 of file quda_matrix.h.
void quda::shiftColorSpinorField | ( | cudaColorSpinorField & | dst, |
const cudaColorSpinorField & | src, | ||
const unsigned int | parity, | ||
const unsigned int | dim, | ||
const int | shift | ||
) |
Definition at line 207 of file shift_quark_field.cu.
References arg(), commDimPartitioned(), dagger, quda::ShiftColorSpinorFieldArg< Output, Input >::dir, errorQuda, quda::ColorSpinorField::Even(), quda::dslash::gatherEnd, quda::ColorSpinorField::Nspin(), quda::ColorSpinorField::Odd(), quda::dslash::packEnd, quda::ShiftColorSpinorFieldArg< Output, Input >::parity, quda::LatticeField::Precision(), QUDA_BACKWARDS, QUDA_CPU_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_FORWARDS, QUDA_FULL_SITE_SUBSET, QUDA_SINGLE_PRECISION, qudaEventRecord(), qudaStreamWaitEvent(), quda::dslash::scatterEnd, quda::ShiftColorSpinorFieldArg< Output, Input >::shift, quda::ColorSpinorField::SiteSubset(), streams, and quda::ColorSpinorField::Volume().
__global__ void quda::shiftColorSpinorFieldExternalKernel | ( | ShiftQuarkArg< Output, Input > | arg | ) |
Definition at line 93 of file shift_quark_field.cu.
__global__ void quda::shiftColorSpinorFieldKernel | ( | ShiftQuarkArg< Output, Input > | arg | ) |
Definition at line 68 of file shift_quark_field.cu.
References neighborIndex(), and quda::ShiftColorSpinorFieldArg< Output, Input >::shift.
|
inline |
Definition at line 42 of file clover_sigma_outer_product.cuh.
References conj(), quda::ColorSpinor< Float, Nc, Ns >::data, mu, quda::CloverSigmaOprodArg< Float, Output, InputA, InputB >::nvector, outerProdSpinTrace(), and parity.
__global__ void quda::sigmaOprodKernel | ( | Arg | arg | ) |
Definition at line 66 of file clover_sigma_outer_product.cuh.
|
inline |
Definition at line 51 of file complex_quda.h.
References sin().
Referenced by cos(), cosh(), exponentiate_iQ(), genericSource(), genGauss(), link_sanity_check_internal_8(), new_load_half(), polar(), sin(), quda::Trig< isFixed, T >::Sin(), sinh(), su3Reconstruct8(), and tan().
void quda::sin | ( | P & | p, |
int | d, | ||
int | n, | ||
int | offset | ||
) |
Insert a sinusoidal wave sin ( n * (x[d] / X[d]) * pi ) in dimension d
Definition at line 56 of file color_spinor_util.cu.
References getCoords(), parity, s, sin(), and X.
|
inline |
|
inline |
Definition at line 1222 of file complex_quda.h.
References quda::complex< float >::imag(), and quda::complex< float >::real().
Referenced by sin().
|
inline |
|
inline |
|
inline |
Definition at line 1238 of file complex_quda.h.
References quda::complex< float >::imag(), and quda::complex< float >::real().
Referenced by sinh().
|
inline |
Definition at line 17 of file checksum.cu.
References quda::Matrix< T, N >::checksum(), and nColor.
Referenced by ChecksumCPU().
void quda::spinorNoise | ( | ColorSpinorField & | in, |
RNG & | rngstate, | ||
QudaNoiseType | type | ||
) |
Generate a random noise spinor. This variant allows the user to manage the RNG state.
src | The colorspinorfield |
randstates | Random state |
type | The type of noise to create (QUDA_NOISE_GAUSSIAN or QUDA_NOISE_UNIFORM) |
Decide on the input order
Definition at line 122 of file spinor_noise.cu.
References quda::SpinorNoise< real, Ns, Nc, type, Arg >::apply(), arg(), quda::ColorSpinorParam::create, quda::ColorSpinorField::Create(), errorQuda, quda::ColorSpinorField::FieldOrder(), in, quda::ColorSpinorParam::location, quda::LatticeField::Location(), quda::ColorSpinorField::Ncolor(), quda::ColorSpinorField::Nspin(), param, prec, quda::LatticeField::Precision(), QUDA_CPU_FIELD_LOCATION, QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_FIELD_ORDER, QUDA_FLOAT4_FIELD_ORDER, QUDA_NOISE_GAUSS, QUDA_NOISE_UNIFORM, QUDA_NULL_FIELD_CREATE, QUDA_SINGLE_PRECISION, and quda::ColorSpinorParam::setPrecision().
Referenced by construct_spinor_source(), quda::MG::MG(), quda::TRLM::operator()(), quda::MG::reset(), spinorNoise(), and quda::MG::verify().
void quda::spinorNoise | ( | ColorSpinorField & | src, |
unsigned long long | seed, | ||
QudaNoiseType | type | ||
) |
Generate a random noise spinor. This variant just requires a seed and will create and destroy the random number state.
src | The colorspinorfield |
seed | Seed |
type | The type of noise to create (QUDA_NOISE_GAUSSIAN or QUDA_NOISE_UNIFORM) |
Definition at line 210 of file spinor_noise.cu.
References quda::RNG::Init(), quda::RNG::Release(), and spinorNoise().
void quda::SpinorNoiseCPU | ( | Arg & | arg | ) |
CPU function to reorder spinor fields.
Definition at line 47 of file spinor_noise.cu.
References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, QUDA_NOISE_GAUSS, QUDA_NOISE_UNIFORM, quda::Arg< real, Ns, Nc, order >::rng, s, quda::RNG::State(), and quda::Arg< real, Ns, Nc, order >::volumeCB.
__global__ void quda::SpinorNoiseGPU | ( | Arg | arg | ) |
CUDA kernel to reorder spinor fields. Adopts a similar form as the CPU version, using the same inlined functions.
Definition at line 68 of file spinor_noise.cu.
References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, QUDA_NOISE_GAUSS, QUDA_NOISE_UNIFORM, quda::Arg< real, Ns, Nc, order >::rng, s, quda::RNG::State(), and quda::Arg< real, Ns, Nc, order >::volumeCB.
|
inline |
Definition at line 120 of file complex_quda.h.
References sqrt().
Referenced by acosh(), asinh(), quda::CG::blocksolve(), quda::MG::buildFreeVectors(), cabs(), calculateY(), quda::linalg::Cholesky< Mat, T, N, fast >::Cholesky(), quda::EigenSolver::computeEvals(), quda::EigenSolver::computeSVD(), quda::IncEigCG::eigCGsolve(), exponentiate_iQ(), expsu3(), quda::GMResDR::FlexArnoldiProcedure(), gauss_su3(), quda::MG::generateNullVectors(), genGauss(), quda::Deflation::increment(), invert_test(), invertMultiShiftQuda(), invertMultiSrcQuda(), invertQuda(), l2(), quda::Matrix< T, N >::L2(), quda::TRLM::lanczosStep(), link_sanity_check_internal_8(), main(), new_save_half(), quda::blas::norm1_(), normalize(), quda::Deflation::operator()(), quda::TRLM::operator()(), quda::CG::operator()(), quda::CG3::operator()(), quda::CG3NE::operator()(), quda::CGNE::operator()(), quda::CGNR::operator()(), quda::MPCG::operator()(), quda::PreconCG::operator()(), quda::BiCGstab::operator()(), quda::SimpleBiCGstab::operator()(), quda::MPBiCGstab::operator()(), quda::BiCGstabL::operator()(), quda::GCR::operator()(), quda::MR::operator()(), quda::CACG::operator()(), quda::CACGNE::operator()(), quda::CACGNR::operator()(), quda::CAGCR::operator()(), quda::SD::operator()(), quda::MultiShiftCG::operator()(), quda::MinResExt::operator()(), quda::IncEigCG::operator()(), quda::GMResDR::operator()(), quda::Solver::PrintStats(), quda::Solver::PrintSummary(), quadSum(), quda::Deflation::reduce(), reliable(), quda::BiCGstabL::reliable(), quda::CACG::reliable(), quda::GMResDR::RestartVZH(), setUnitarizeLinksConstants(), sqrt(), su3Reconstruct8(), test(), quda::Deflation::verify(), and quda::MG::verify().
|
inline |
|
inline |
|
inline |
Definition at line 164 of file dslash_staggered.cuh.
References arg(), quda::DslashArg< Float >::dagger, EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, quda::DslashArg< Float >::nParity, quda::StaggeredArg< Float, nColor, reconstruct_u_, reconstruct_l_, improved_, phase_ >::out, quda::DslashArg< Float >::parity, quda::StaggeredArg< Float, nColor, reconstruct_u_, reconstruct_l_, improved_, phase_ >::x, and quda::DslashArg< Float >::xpay.
Referenced by quda::ImprovedStaggeredApply< Float, nColor, recon_l >::ImprovedStaggeredApply(), and quda::StaggeredApply< Float, nColor, recon_u >::StaggeredApply().
__global__ void quda::staggeredGPU | ( | Arg | arg | ) |
Definition at line 197 of file dslash_staggered.cuh.
References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.
|
inline |
Compute the staggered phase factor at unit shift from the current lattice coordinates. The routine below optimizes out the shift where possible, hence is only visible where we need to consider the boundary condition.
[in] | coords | Lattice coordinates |
[in] | X | Lattice dimensions |
[in] | dim | Dimension we are hopping |
[in] | dir | Direction of the unit hop (+1 or -1) |
[in] | tboundary | Boundary condition |
Definition at line 868 of file index_helper.cuh.
References arg(), QUDA_STAGGERED_PHASE_MILC, QUDA_STAGGERED_PHASE_TIFR, and X.
Referenced by applyStaggered().
|
inline |
Definition at line 88 of file inline_ptx.h.
References __PTR.
Referenced by vector_store().
|
inline |
Definition at line 93 of file inline_ptx.h.
References __PTR.
Referenced by vector_store().
|
inline |
Definition at line 78 of file inline_ptx.h.
References __PTR.
Referenced by vector_store().
|
inline |
Definition at line 98 of file inline_ptx.h.
References __PTR.
Referenced by vector_store().
|
inline |
Definition at line 83 of file inline_ptx.h.
References __PTR.
Referenced by vector_store().
void quda::STOUTStep | ( | GaugeField & | dataDs, |
const GaugeField & | dataOr, | ||
double | rho | ||
) |
Apply STOUT smearing to the gauge field.
[out] | dataDs | Output smeared field |
[in] | dataOr | Input gauge field |
[in] | rho | smearing parameter |
Definition at line 129 of file gauge_stout.cu.
References errorQuda, quda::GaugeField::isNative(), quda::GaugeField::Order(), quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_HALF_PRECISION, QUDA_SINGLE_PRECISION, and quda::GaugeField::Reconstruct().
Referenced by performSTOUTnStep().
|
inline |
Definition at line 47 of file malloc_quda.h.
Referenced by file_name().
|
inline |
Definition at line 48 of file malloc_quda.h.
Referenced by file_name().
|
inline |
Definition at line 1125 of file quda_matrix.h.
|
inline |
Definition at line 62 of file blas_helper.cuh.
Referenced by quda::EigenSolver::blockOrthogonalize(), quda::blas::cdotNormA_(), quda::blas::cdotNormB_(), dslashReference(), dslashReference_4d_sgpu(), dslashReference_5th(), quda::blas::Norm2< ReduceType, Float2, FloatN >::flops(), getRealTraceUVdagger(), quda::blas::multiReduceKernel(), quda::blas::multiReduceLaunch(), quda::blas::Norm2< ReduceType, Float2, FloatN >::operator()(), quda::blas::Dot< NXZ, ReduceType, Float2, FloatN >::operator()(), quda::reduce_vector< T >::operator()(), quda::blas::axpbyzNorm2< ReduceType, Float2, FloatN >::operator()(), quda::blas::AxpyReDot< ReduceType, Float2, FloatN >::operator()(), quda::blas::Cdot< NXZ, ReduceType, Float2, FloatN >::operator()(), quda::blas::CdotCopy< NXZ, ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpyNorm2< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpyxmaznormx< ReduceType, Float2, FloatN >::operator()(), quda::blas::cabxpyzaxnorm< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpydotzy< ReduceType, Float2, FloatN >::operator()(), quda::blas::CdotNormA< ReduceType, Float2, FloatN >::operator()(), quda::blas::caxpbypzYmbwcDotProductUYNormY_< ReduceType, Float2, FloatN >::operator()(), quda::blas::quadrupleCG3InitNorm_< ReduceType, Float2, FloatN >::operator()(), quda::blas::quadrupleCG3UpdateNorm_< ReduceType, Float2, FloatN >::operator()(), quda::blas::doubleCG3InitNorm_< ReduceType, Float2, FloatN >::operator()(), quda::blas::ReduceFunctor< ReduceType, Float2, FloatN >::pre(), quda::blas::MultiReduceFunctor< NXZ, ReduceType, Float2, FloatN >::pre(), reduce2d(), quda::blas::reduceKernel(), quda::blas::reduceLaunch(), reduceRow(), and sum().
|
inline |
Definition at line 63 of file blas_helper.cuh.
|
inline |
Definition at line 68 of file blas_helper.cuh.
|
inline |
Definition at line 74 of file blas_helper.cuh.
References doubledouble::head(), sum(), doubledouble2::x, doubledouble3::x, doubledouble2::y, doubledouble3::y, and doubledouble3::z.
|
inline |
Definition at line 56 of file complex_quda.h.
References tan().
|
inline |
Definition at line 1258 of file complex_quda.h.
Referenced by tan().
|
inline |
Definition at line 91 of file complex_quda.h.
References tanh().
|
inline |
Definition at line 1264 of file complex_quda.h.
References exp().
Referenced by tanh().
double quda::timeInterval | ( | struct timeval | start, |
struct timeval | end | ||
) |
Definition at line 18 of file inv_gcr_quda.cpp.
int quda::traceEnabled | ( | ) |
Definition at line 73 of file tune.cpp.
References enable_trace, and quda::cublas::init().
Referenced by postTrace_(), saveProfile(), and tuneLaunch().
|
static |
Definition at line 121 of file malloc.cpp.
References DEVICE, DEVICE_PINNED, MAPPED, PINNED, and quda::MemAlloc::size.
Referenced by device_free_(), device_pinned_free_(), and host_free_().
Definition at line 99 of file malloc.cpp.
References quda::MemAlloc::base_size, DEVICE, DEVICE_PINNED, MAPPED, PINNED, total_host_bytes, and total_pinned_bytes.
Referenced by device_malloc_(), device_pinned_malloc_(), mapped_malloc_(), pinned_malloc_(), and safe_malloc_().
TuneParam & quda::tuneLaunch | ( | Tunable & | tunable, |
QudaTune | enabled, | ||
QudaVerbosity | verbosity | ||
) |
Return the optimal launch parameters for a given kernel, either by retrieving them from tunecache or autotuning on the spot.
Definition at line 643 of file tune.cpp.
References quda::Tunable::advanceTuneParam(), quda::Tunable::apply(), quda::TuneKey::aux, quda::TuneParam::aux, quda::TuneParam::block, broadcastTuneCache(), quda::Tunable::checkLaunchParam(), comm_rank(), quda::TuneParam::comment, commGlobalReduction(), quda::Tunable::defaultTuneParam(), quda::blas::end(), errorQuda, quda::TuneParam::grid, quda::Tunable::initTuneParam(), quda::Tunable::jitifyError(), quda::TraceKey::key, quda::Timer::Last(), last_key, quda::TuneParam::n_calls, quda::TuneKey::name, param, quda::Tunable::paramString(), quda::Tunable::perfString(), policyTuning(), quda::Tunable::postTune(), quda::Tunable::preTune(), printfQuda, QUDA_DEBUG_VERBOSE, QUDA_PROFILE_COMPUTE, QUDA_PROFILE_EPILOGUE, QUDA_PROFILE_INIT, QUDA_PROFILE_PREAMBLE, QUDA_PROFILE_TOTAL, QUDA_TUNE_NO, QUDA_TUNE_YES, QUDA_VERBOSE, quda::TuneParam::shared_bytes, quda::Timer::Start(), quda::Timer::Stop(), quda::TuneParam::time, quda::TraceKey::time, traceEnabled(), quda::Tunable::tuneKey(), quda::Tunable::tuningIter(), and quda::TuneKey::volume.
Referenced by quda::GaugePlaq< Float, Gauge >::apply(), quda::DomainWall5D< Float, nDim, nColor, Arg >::apply(), quda::Staggered< Float, nDim, nColor, Arg >::apply(), quda::TwistedMass< Float, nDim, nColor, Arg >::apply(), quda::TwistedClover< Float, nDim, nColor, Arg >::apply(), quda::WilsonClover< Float, nDim, nColor, Arg >::apply(), quda::NdegTwistedMass< Float, nDim, nColor, Arg >::apply(), quda::DomainWall4D< Float, nDim, nColor, Arg >::apply(), quda::TwistedCloverPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::WilsonCloverPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::Wilson< Float, nDim, nColor, Arg >::apply(), quda::TwistedMassPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::Laplace< Float, nDim, nColor, Arg >::apply(), quda::NdegTwistedMassPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::CopySpinor< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder >::apply(), quda::QudaMemCopy::apply(), quda::blas::copy_ns::CopyCuda< FloatN, N, Output, Input >::apply(), quda::CopyGauge< FloatOut, FloatIn, length, Arg >::apply(), quda::GenericPackGhostLauncher< Float, block_float, Ns, Ms, Nc, Mc, Arg >::apply(), quda::blas::BlasCuda< FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Functor >::apply(), quda::SpinorNoise< real, Ns, Nc, type, Arg >::apply(), quda::blas::MultiBlas< NXZ, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Functor, T >::apply(), quda::GaugeGauss< Float, Arg >::apply(), quda::CopyGaugeEx< FloatOut, FloatIn, length, OutOrder, InOrder >::apply(), quda::Dslash5< Float, nColor, Arg >::apply(), quda::ShiftColorSpinorField< Output, Input >::apply(), quda::WuppertalSmearing< Float, Ns, Nc, Arg >::apply(), quda::GaugeOvrImpSTOUT< Float, Arg >::apply(), quda::CopyColorSpinor< Ns, Arg >::apply(), quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >::apply(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::apply(), quda::Pack< Float, nColor, spin_project >::apply(), quda::ExtractGhost< nDim, Arg >::apply(), quda::ExtractGhostEx< Float, length, nDim, dim, Order >::apply(), quda::CopyColorSpinor< 4, Arg >::apply(), quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >::apply(), quda::CopySpinorEx< FloatOut, FloatIn, Ns, Nc, OutOrder, InOrder, Basis, extend >::apply(), quda::Gamma< ValueType, basis, dir >::apply(), quda::TwistGamma< Float, nColor, Arg >::apply(), quda::ProjectSU3< Float, G >::apply(), quda::Clover< Float, nSpin, nColor, Arg >::apply(), quda::DslashCoarsePolicyTune::apply(), quda::TwistClover< Float, nSpin, nColor, Arg >::apply(), quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::apply(), quda::dslash::DslashPolicyTune< Dslash >::apply(), computeMomAction(), forceRecord(), isUnitary(), quda::TunableVectorYZ::resizeStep(), and updateMomentum().
|
inline |
Definition at line 665 of file dslash_quda.cu.
References Mat(), nColor, quda::Arg< real, Ns, Nc, order >::nParity, out, and parity.
void quda::twistCloverCPU | ( | Arg & | arg | ) |
Definition at line 709 of file dslash_quda.cu.
References quda::Arg< real, Ns, Nc, order >::nParity, and quda::Arg< real, Ns, Nc, order >::volumeCB.
__global__ void quda::twistCloverGPU | ( | Arg | arg | ) |
Definition at line 717 of file dslash_quda.cu.
References arg(), quda::Arg< real, Ns, Nc, order >::nParity, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.
|
inline |
Apply the preconditioned twisted-clover dslash.
Definition at line 40 of file dslash_twisted_clover_preconditioned.cuh.
References quda::TwistedCloverArg< Float, nColor, reconstruct_, dynamic_clover_ >::A, quda::TwistedCloverArg< Float, nColor, reconstruct_, dynamic_clover_ >::A2inv, arg(), quda::TwistedCloverArg< Float, nColor, reconstruct_, dynamic_clover_ >::b, EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, Mat(), nColor, quda::DslashArg< Float >::nParity, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, tmp, quda::WilsonArg< Float, nColor, reconstruct_ >::x, and quda::DslashArg< Float >::xpay.
void quda::twistedCloverPreconditionedCPU | ( | Arg | arg | ) |
Definition at line 109 of file dslash_twisted_clover_preconditioned.cuh.
References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.
__global__ void quda::twistedCloverPreconditionedGPU | ( | Arg | arg | ) |
Definition at line 124 of file dslash_twisted_clover_preconditioned.cuh.
References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.
Referenced by quda::TwistedCloverPreconditionedLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch().
|
inline |
Apply the twisted-mass dslash out(x) = M*in = a * D * in + (1 + i*b*gamma_5)*x Note this routine only exists in xpay form.
Definition at line 29 of file dslash_twisted_mass.cuh.
References arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, quda::DslashArg< Float >::nParity, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, and quda::WilsonArg< Float, nColor, reconstruct_ >::x.
|
inline |
Apply the preconditioned twisted-mass dslash.
Definition at line 146 of file dslash_twisted_mass_preconditioned.cuh.
References arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, and quda::WilsonArg< Float, nColor, reconstruct_ >::x.
void quda::twistedMassCPU | ( | Arg | arg | ) |
Definition at line 62 of file dslash_twisted_mass.cuh.
References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.
__global__ void quda::twistedMassGPU | ( | Arg | arg | ) |
Definition at line 76 of file dslash_twisted_mass.cuh.
References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.
void quda::twistedMassPreconditionedCPU | ( | Arg | arg | ) |
Definition at line 191 of file dslash_twisted_mass_preconditioned.cuh.
References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.
__global__ void quda::twistedMassPreconditionedGPU | ( | Arg | arg | ) |
Definition at line 217 of file dslash_twisted_mass_preconditioned.cuh.
References arg(), and quda::DslashArg< Float >::parity.
void quda::twistGammaCPU | ( | Arg | arg | ) |
Definition at line 332 of file dslash_quda.cu.
References quda::Arg< real, Ns, Nc, order >::nParity, parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.
__global__ void quda::twistGammaGPU | ( | Arg | arg | ) |
Definition at line 353 of file dslash_quda.cu.
References parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.
|
inline |
Definition at line 45 of file uint_to_char.h.
Referenced by i32toa(), and quda::Laplace< Float, nDim, nColor, Arg >::tuneKey().
|
inline |
Definition at line 127 of file uint_to_char.h.
Referenced by i64toa(), quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::TileSizeTune(), and quda::QudaMemCopy::tuneKey().
void quda::unitarizeLinks | ( | cudaGaugeField & | outfield, |
const cudaGaugeField & | infield, | ||
int * | fails | ||
) |
Definition at line 500 of file unitarize_links_quda.cu.
References errorQuda, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by GaugeAlgTest::CallUnitarizeLinks(), CallUnitarizeLinks(), computeKSLinkQuda(), isUnitary(), unitarize_link_test(), and unitarizeLinks().
void quda::unitarizeLinks | ( | cudaGaugeField & | outfield, |
int * | fails | ||
) |
Definition at line 517 of file unitarize_links_quda.cu.
References unitarizeLinks().
void quda::unitarizeLinksCPU | ( | cpuGaugeField & | outfield, |
const cpuGaugeField & | infield | ||
) |
Definition at line 299 of file unitarize_links_quda.cu.
References copyArrayToLink(), copyLinkToArray(), errorQuda, quda::cpuGaugeField::Gauge_p(), num_failures, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_SINGLE_PRECISION, and quda::LatticeField::Volume().
Referenced by computeHISQLinksCPU(), and TEST().
void quda::updateAlphaZeta | ( | double * | alpha, |
double * | zeta, | ||
double * | zeta_old, | ||
const double * | r2, | ||
const double * | beta, | ||
const double | pAp, | ||
const double * | offset, | ||
const int | nShift, | ||
const int | j_low | ||
) |
Compute the new values of alpha and zeta
Definition at line 128 of file inv_multi_cg_quda.cpp.
References QUDA_MAX_MULTI_SHIFT.
Referenced by quda::MultiShiftCG::operator()().
void quda::updateAp | ( | Complex ** | beta, |
std::vector< ColorSpinorField *> | Ap, | ||
int | begin, | ||
int | size, | ||
int | k | ||
) |
Definition at line 82 of file inv_gcr_quda.cpp.
References quda::blas::caxpy(), and size.
Referenced by orthoDir().
void quda::updateGaugeField | ( | GaugeField & | out, |
double | dt, | ||
const GaugeField & | in, | ||
const GaugeField & | mom, | ||
bool | conj_mom, | ||
bool | exact | ||
) |
Evolve the gauge field by step size dt using the momentuim field
out | Updated gauge field |
dt | Step size |
in | Input gauge field |
mom | Momentum field |
conj_mom | Whether we conjugate the momentum in the exponential |
exact | Calculate exact exponential or use an expansion |
Definition at line 227 of file gauge_update_quda.cu.
References errorQuda, in, quda::LatticeField::Location(), out, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, and QUDA_SINGLE_PRECISION.
Referenced by updateGaugeFieldQuda().
void quda::updateMomentum | ( | GaugeField & | mom, |
double | coeff, | ||
GaugeField & | force, | ||
const char * | fname | ||
) |
Update the momentum field from the force field
mom = mom - coeff * [force]_TA
where [A]_TA means the traceless anti-hermitian projection of A
mom | Momentum field |
coeff | Integration stepsize |
force | Force field |
func | The function calling this (fname will be printed if force monitoring is enabled) |
Definition at line 328 of file momentum.cu.
References applyU(), arg(), quda::TuneParam::block, quda::blas::bytes, checkCudaError, errorQuda, quda::blas::flops, getTuning(), getVerbosity(), quda::TuneParam::grid, quda::LatticeField::Location(), quda::GaugeField::Order(), parity, quda::LatticeField::Precision(), QUDA_CUDA_FIELD_LOCATION, QUDA_DOUBLE_PRECISION, QUDA_FLOAT2_GAUGE_ORDER, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, qudaDeviceSynchronize, quda::GaugeField::Reconstruct(), quda::TuneParam::shared_bytes, stream, tuneLaunch(), quda::LatticeField::VolString(), quda::LatticeField::VolumeCB(), X, and quda::LatticeField::X().
Referenced by computeCloverForceQuda(), computeGaugeForceQuda(), computeHISQForceQuda(), computeMomAction(), computeStaggeredForceQuda(), and hisq_force_test().
void quda::updateSolution | ( | ColorSpinorField & | x, |
const Complex * | alpha, | ||
Complex **const | beta, | ||
double * | gamma, | ||
int | k, | ||
std::vector< ColorSpinorField *> | p | ||
) |
Definition at line 145 of file inv_gcr_quda.cpp.
References backSubs(), quda::blas::caxpy(), and X.
Referenced by quda::GCR::operator()().
|
inline |
Apply the M5 inverse operator at a given site on the lattice. This is an alternative algorithm that is applicable to variable b and c coefficients: here each thread in the s dimension starts computing at s = s_, and computes the left- and right-handed contributions in two separate passes. For the left-handed contribution we sweep through increasing s, e.g., s=s_, s_+1, s_+2, and for the right-handed one we do the transpose, s=s_, s_-1, s_-2. This allows us to progressively build up the scalar coefficients needed in a SIMD-friendly fashion.
shared | Whether to use a shared memory scratch pad to store the input field acroos the Ls dimension to minimize global memory reads. |
[in] | arg | Argument struct containing any meta data and accessors |
[in] | parity | Parity we are on |
[in] | x_b | Checkerboarded 4-d space-time index |
[in] | s_ | Ls dimension coordinate |
Definition at line 352 of file dslash_domain_wall_m5.cuh.
References quda::coeff_type< real, is_variable, Arg >::b(), quda::coeff_type< real, is_variable, Arg >::c(), dagger, in, quda::VectorCache< real, Vector >::load(), nColor, out, R, s, quda::VectorCache< real, Vector >::save(), and quda::VectorCache< real, Vector >::sync().
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
Definition at line 412 of file register_traits.h.
|
inline |
Definition at line 422 of file register_traits.h.
Referenced by quda::clover::FloatNOrder< Float, length, N, add_rho, huge_alloc >::save(), quda::colorspinor::FloatNOrder< Float, Ns, Nc, N, spin_project, huge_alloc >::save(), quda::gauge::FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc, ghostExchange_, use_inphase >::save(), quda::colorspinor::FloatNOrder< Float, Ns, Nc, N, spin_project, huge_alloc >::saveGhost(), quda::gauge::FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc, ghostExchange_, use_inphase >::saveGhost(), quda::gauge::FloatNOrder< Float, length, N, reconLenParam, stag_phase, huge_alloc, ghostExchange_, use_inphase >::saveGhostEx(), and vector_store().
|
inline |
Definition at line 427 of file register_traits.h.
References store_streaming_double2().
|
inline |
Definition at line 436 of file register_traits.h.
References store_streaming_float4().
|
inline |
Definition at line 445 of file register_traits.h.
References store_streaming_float2().
|
inline |
Definition at line 454 of file register_traits.h.
References store_streaming_short4().
|
inline |
Definition at line 463 of file register_traits.h.
References store_streaming_short2().
|
inline |
Definition at line 473 of file register_traits.h.
References store_streaming_short2().
|
inline |
Definition at line 484 of file register_traits.h.
References vector_store().
|
inline |
Definition at line 613 of file coarse_op_kernel.cuh.
Referenced by getIndicesShared().
|
inline |
Definition at line 604 of file coarse_op_kernel.cuh.
Referenced by computeVUV(), and getIndicesShared().
|
inline |
Definition at line 135 of file dslash_wilson.cuh.
References arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, quda::DslashArg< Float >::nParity, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, s, quda::WilsonArg< Float, nColor, reconstruct_ >::x, and quda::DslashArg< Float >::xpay.
Referenced by quda::WilsonApply< Float, nColor, recon >::WilsonApply(), quda::WilsonCloverApply< Float, nColor, recon >::WilsonCloverApply(), and quda::WilsonCloverPreconditionedApply< Float, nColor, recon >::WilsonCloverPreconditionedApply().
|
inline |
Apply the clover preconditioned Wilson dslash.
Definition at line 37 of file dslash_wilson_clover_preconditioned.cuh.
References quda::WilsonCloverArg< Float, nColor, reconstruct_, twist_ >::A, arg(), EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, nColor, quda::DslashArg< Float >::nParity, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, tmp, quda::WilsonArg< Float, nColor, reconstruct_ >::x, and quda::DslashArg< Float >::xpay.
|
inline |
Apply the Wilson-clover dslash out(x) = M*in = A(x)*x(x) + D * in(x-mu) Note this routine only exists in xpay form.
Definition at line 38 of file dslash_wilson_clover.cuh.
References quda::WilsonCloverArg< Float, nColor, reconstruct_, twist_ >::A, arg(), quda::WilsonCloverArg< Float, nColor, reconstruct_, twist_ >::b, EXTERIOR_KERNEL_ALL, INTERIOR_KERNEL, quda::DslashArg< Float >::kernel_type, nColor, quda::DslashArg< Float >::nParity, quda::WilsonArg< Float, nColor, reconstruct_ >::out, quda::DslashArg< Float >::parity, tmp, and quda::WilsonArg< Float, nColor, reconstruct_ >::x.
void quda::wilsonCloverCPU | ( | Arg | arg | ) |
Definition at line 89 of file dslash_wilson_clover.cuh.
References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.
__global__ void quda::wilsonCloverGPU | ( | Arg | arg | ) |
Definition at line 103 of file dslash_wilson_clover.cuh.
References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.
void quda::wilsonCloverPreconditionedCPU | ( | Arg | arg | ) |
Definition at line 100 of file dslash_wilson_clover_preconditioned.cuh.
References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.
__global__ void quda::wilsonCloverPreconditionedGPU | ( | Arg | arg | ) |
Definition at line 115 of file dslash_wilson_clover_preconditioned.cuh.
References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.
Referenced by quda::WilsonCloverPreconditionedLaunch< Float, nDim, nColor, nParity, dagger, xpay, kernel_type, Arg >::launch().
void quda::wilsonCPU | ( | Arg | arg | ) |
Definition at line 165 of file dslash_wilson.cuh.
References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.
__global__ void quda::wilsonGPU | ( | Arg | arg | ) |
Definition at line 180 of file dslash_wilson.cuh.
References arg(), quda::DslashArg< Float >::nParity, and quda::DslashArg< Float >::parity.
|
inline |
Definition at line 926 of file quda_matrix.h.
References quda::Matrix< T, N >::data.
|
inline |
Definition at line 939 of file quda_matrix.h.
|
inline |
Definition at line 895 of file quda_matrix.h.
References quda::Matrix< T, N >::data.
|
inline |
Definition at line 991 of file quda_matrix.h.
References quda::Matrix< T, N >::data.
void quda::wuppertalStep | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
int | parity, | ||
const GaugeField & | U, | ||
double | A, | ||
double | B | ||
) |
Apply a generic Wuppertal smearing step Computes out(x) = A*in(x) + B* (U_{-}(x)in(x+mu) + U^(x-mu)in(x-mu))
[out] | out | The out result field |
[in] | in | The in spinor field |
[in] | U | The gauge field |
[in] | A | The scaling factor for in(x) |
[in] | B | The scaling factor for (U_{-}(x)in(x+mu) + U^(x-mu)in(x-mu)) |
Definition at line 186 of file color_spinor_wuppertal.cu.
References quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::A, quda::WuppertalSmearing< Float, Ns, Nc, Arg >::apply(), arg(), quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::B, quda::LatticeField::bufferIndex, checkLocation, checkPrecision, errorQuda, quda::ColorSpinorField::exchangeGhost(), quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::in, quda::ColorSpinorField::Ncolor(), quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::nFace, quda::ColorSpinorField::Nspin(), quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::out, quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::parity, quda::LatticeField::Precision(), QUDA_DOUBLE_PRECISION, QUDA_RECONSTRUCT_12, QUDA_RECONSTRUCT_8, QUDA_RECONSTRUCT_NO, QUDA_SINGLE_PRECISION, quda::GaugeField::Reconstruct(), quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::U, and quda::ColorSpinorField::V().
Referenced by performWuppertalnStep(), and wuppertalStep().
void quda::wuppertalStep | ( | ColorSpinorField & | out, |
const ColorSpinorField & | in, | ||
int | parity, | ||
const GaugeField & | U, | ||
double | alpha | ||
) |
Apply a standard Wuppertal smearing step Computes out(x) = 1/(1+6*alpha)*(in(x) + alpha* (U_{-}(x)in(x+mu) + U^(x-mu)in(x-mu)))
[out] | out | The out result field |
[in] | in | The in spinor field |
[in] | U | The gauge field |
[in] | alpha | The smearing parameter |
Definition at line 291 of file color_spinor_wuppertal.cu.
References wuppertalStep().
void quda::wuppertalStepCPU | ( | Arg | arg | ) |
Definition at line 117 of file color_spinor_wuppertal.cu.
References arg(), quda::Arg< real, Ns, Nc, order >::nParity, quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.
__global__ void quda::wuppertalStepGPU | ( | Arg | arg | ) |
Definition at line 133 of file color_spinor_wuppertal.cu.
References quda::Arg< real, Ns, Nc, order >::nParity, quda::WuppertalSmearingArg< Float, Ns, Nc, gRecon >::parity, and quda::Arg< real, Ns, Nc, order >::volumeCB.
|
inline |
Definition at line 14 of file float_vector.h.
|
inline |
Definition at line 15 of file float_vector.h.
|
inline |
Definition at line 16 of file float_vector.h.
|
inline |
Definition at line 17 of file float_vector.h.
|
inline |
Definition at line 19 of file float_vector.h.
|
inline |
Definition at line 20 of file float_vector.h.
|
inline |
Definition at line 21 of file float_vector.h.
|
inline |
Definition at line 22 of file float_vector.h.
|
static |
Definition at line 52 of file inv_mpcg_quda.cpp.
|
inline |
Definition at line 54 of file cub_helper.cuh.
References quda::vector_type< scalar, n >::data.
Referenced by quda::ShiftUpdate::apply(), applyThirdTerm(), quda::MG::buildFreeVectors(), quda::MPBiCGstab::computeMatrixPowers(), quda::GMResDR::FlexArnoldiProcedure(), quda::cpuGaugeField::Gauge_p(), quda::MG::generateNullVectors(), quda::cudaColorSpinorField::Ghost2(), quda::HMatrix< T, N >::HMatrix(), quda::Matrix< T, N >::Matrix(), quda::cudaGaugeField::Odd_p(), quda::clover::Accessor< Float, nColor, nSpin, QUDA_PACKED_CLOVER_ORDER >::operator()(), quda::MG::operator()(), quda::MPCG::operator()(), quda::PreconCG::operator()(), quda::MPBiCGstab::operator()(), quda::SD::operator()(), quda::GMResDR::operator()(), reduce2d(), quda::ReduceArg< vector_type< ReduceType, NXZ > >::ReduceArg(), quda::MG::reset(), quda::GaugeField::SiteSize(), quda::vector_type< ReduceType, NXZ >::vector_type(), quda::Deflation::verify(), and quda::MG::verify().
|
static |
Definition at line 53 of file malloc.cpp.
Referenced by quda::colorspinor::FieldOrderCB< Float, coarseSpin, coarseColor, 1, csOrder, Float, ghostFloat >::abs_max(), quda::CalculateY< from_coarse, Float, fineSpin, fineColor, coarseSpin, coarseColor, Arg >::apply(), quda::colorspinor::FieldOrderCB< Float, coarseSpin, coarseColor, 1, csOrder, Float, ghostFloat >::norm2(), quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT2_CLOVER_ORDER >::transform_reduce(), quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT4_CLOVER_ORDER >::transform_reduce(), quda::gauge::Accessor< Float, nColor, QUDA_QDP_GAUGE_ORDER, storeFloat, use_tex >::transform_reduce(), quda::gauge::Accessor< Float, nColor, QUDA_MILC_GAUGE_ORDER, storeFloat, use_tex >::transform_reduce(), and quda::gauge::Accessor< Float, nColor, QUDA_FLOAT2_GAUGE_ORDER, storeFloat, use_tex >::transform_reduce().
|
static |
|
static |
Definition at line 16 of file block_orthogonalize.cuh.
Referenced by __launch_bounds__().
|
static |
Definition at line 19 of file block_orthogonalize.cuh.
|
static |
Definition at line 11 of file coarse_op.cuh.
|
static |
Definition at line 9 of file dslash_pack.cuh.
Referenced by quda::dslash::DslashCommsPattern::DslashCommsPattern(), exchange_cpu_sitelink_ex(), exchangeExtendedGhost(), extractGhost(), extractGhostEx(), quda::Pack< Float, nColor, spin_project >::fillAux(), quda::MG::generateNullVectors(), quda::Pack< Float, nColor, spin_project >::gridStep(), last_node_in_t(), quda::Pack< Float, nColor, spin_project >::maxGridSize(), quda::Pack< Float, nColor, spin_project >::minGridSize(), quda::Pack< Float, nColor, spin_project >::Pack(), PackGhost(), and setPackComms().
|
static |
Definition at line 1123 of file cuda_color_spinor_field.cpp.
|
static |
Definition at line 1122 of file cuda_color_spinor_field.cpp.
|
static |
Definition at line 1125 of file cuda_color_spinor_field.cpp.
|
static |
Definition at line 1124 of file cuda_color_spinor_field.cpp.
__device__ unsigned int quda::count[QUDA_MAX_MULTI_REDUCE] = { } |
Definition at line 90 of file cub_helper.cuh.
Referenced by quda::ShiftUpdate::apply(), quda::BiCGstabLUpdate::apply(), quda::blas::caxpy_recurse(), quda::blas::caxpyz_recurse(), quda::blas::completeFastReduce(), device_free_(), device_pinned_free_(), flushForceMonitor(), host_free_(), MPI_Allreduce(), MPI_Alltoallw(), MPI_Init(), MPI_Recv(), MPI_Send(), MPI_Send_init(), MPI_Test(), MPI_Wait(), MPI_Waitany(), quda::blas::multiReduce_recurse(), quda::TimeProfile::Print(), quda::TimeProfile::PrintGlobal(), read_field(), read_su3_field(), saveProfile(), vcompare_M(), vcreate_M(), vdestroy_M(), vget_M(), vput_M(), vset_M(), write_field(), and write_su3_field().
|
static |
Definition at line 12 of file multigrid.cpp.
|
static |
Definition at line 595 of file dslash_coarse.cu.
|
static |
Definition at line 71 of file tune.cpp.
Referenced by traceEnabled().
|
static |
Definition at line 597 of file dslash_coarse.cu.
Referenced by quda::DslashCoarsePolicyTune::defaultTuneParam(), quda::DslashCoarsePolicyTune::DslashCoarsePolicyTune(), and quda::DslashCoarsePolicyTune::initTuneParam().
|
static |
Definition at line 26 of file momentum.cu.
|
static |
Definition at line 27 of file momentum.cu.
|
static |
Definition at line 25 of file momentum.cu.
|
static |
Definition at line 32 of file uint_to_char.h.
__shared__ bool quda::isLastBlockDone |
Definition at line 91 of file cub_helper.cuh.
__shared__ volatile bool quda::isLastWarpDone[16] |
Definition at line 140 of file cub_helper.cuh.
|
static |
Definition at line 109 of file tune.cpp.
Referenced by quda::pool::device_malloc_(), quda::pool::flush_device(), quda::pool::flush_pinned(), quda::MPCG::operator()(), quda::MPBiCGstab::operator()(), and quda::pool::pinned_malloc_().
|
static |
Definition at line 22 of file dslash_quda.cu.
Referenced by getKernelPackT().
|
static |
Definition at line 28 of file dslash_quda.cu.
|
static |
Definition at line 22 of file tune.cpp.
Referenced by getLastTuneKey(), and tuneLaunch().
|
static |
Referenced by saveProfile().
|
static |
Definition at line 44 of file inv_eigcg_quda.cpp.
Referenced by quda::IncEigCG::operator()().
|
static |
Definition at line 55 of file malloc.cpp.
|
static |
Definition at line 56 of file malloc.cpp.
|
static |
Definition at line 57 of file malloc.cpp.
|
static |
Definition at line 9 of file dslash_domain_wall_4d.cuh.
Referenced by quda::DomainWall4DArg< Float, nColor, reconstruct_ >::a5(), quda::DomainWall4D< Float, nDim, nColor, Arg >::apply(), and quda::coeff_type< real, true, Arg >::coeff().
|
static |
Definition at line 19 of file dslash_domain_wall_m5.cuh.
const int quda::Nstream = 9 |
Definition at line 83 of file quda_internal.h.
Referenced by quda::DomainWall4D< Float, nDim, nColor, Arg >::apply(), ApplyClover(), ApplyDslash5(), ApplyGamma(), ApplyTwistClover(), ApplyTwistGamma(), quda::dslash::commsComplete(), quda::dslash::completeDslash(), createDslashEvents(), destroyDslashEvents(), quda::Dslash5Arg< Float, nColor >::Dslash5Arg(), quda::dslash::DslashCommsPattern::DslashCommsPattern(), endQuda(), quda::blas::init(), initQudaMemory(), quda::dslash::DslashBasic< Dslash >::operator()(), quda::dslash::DslashFusedExterior< Dslash >::operator()(), quda::dslash::DslashGDR< Dslash >::operator()(), quda::dslash::DslashFusedGDR< Dslash >::operator()(), quda::dslash::DslashGDRRecv< Dslash >::operator()(), quda::dslash::DslashFusedGDRRecv< Dslash >::operator()(), quda::dslash::DslashZeroCopyPack< Dslash >::operator()(), quda::dslash::DslashFusedZeroCopyPack< Dslash >::operator()(), quda::dslash::DslashZeroCopyPackGDRRecv< Dslash >::operator()(), quda::dslash::DslashFusedZeroCopyPackGDRRecv< Dslash >::operator()(), quda::dslash::DslashZeroCopy< Dslash >::operator()(), quda::dslash::DslashFusedZeroCopy< Dslash >::operator()(), quda::dslash::DslashNC< Dslash >::operator()(), and quda::cudaColorSpinorField::packExtended().
|
static |
Definition at line 20 of file deflation.cpp.
Referenced by quda::Deflation::reduce(), and quda::Deflation::verify().
|
static |
Definition at line 21 of file deflation.cpp.
Referenced by quda::Deflation::reduce(), and quda::Deflation::verify().
|
static |
Definition at line 600 of file dslash_coarse.cu.
|
static |
Definition at line 494 of file tune.cpp.
Referenced by policyTuning().
|
static |
Definition at line 114 of file tune.cpp.
Referenced by initQudaDevice(), saveProfile(), and saveTuneCache().
|
static |
Definition at line 723 of file lattice_field.cpp.
Referenced by reorder_location().
|
static |
Definition at line 107 of file tune.cpp.
Referenced by loadTuneCache().
__shared__ float quda::s[] |
Applies the coarse dslash on a given parity and checkerboard site index
out | The result - kappa * Dslash in |
Y | The coarse gauge field |
kappa | Kappa value |
in | The input field |
parity | The site parity |
x_cb | The checkerboarded site index |
Referenced by __launch_bounds__(), quda::coeff_type< real, true, Arg >::a(), quda::DomainWall4DArg< Float, nColor, reconstruct_ >::a5(), accumGivensRotation(), AddCoarseDiagonalCPU(), AddCoarseDiagonalGPU(), AddCoarseTmDiagonalCPU(), AddCoarseTmDiagonalGPU(), applyThirdTerm(), applyTwist(), assignGivensRotation(), quda::coeff_type< real, true, Arg >::b(), bdSVD(), blockOrthoCPU(), quda::EigenSolver::blockOrthogonalize(), quda::MG::buildFreeVectors(), quda::coeff_type< real, true, Arg >::c(), quda::ColorSpinor< Float, Nc, 4 >::chiral_project(), quda::ColorSpinor< Float, Nc, 2 >::chiral_reconstruct(), quda::linalg::Cholesky< Mat, T, N, fast >::Cholesky(), coarseDslash(), coarseDslashKernel(), quda::ColorSpinorField::Components(), compute_site_max(), computeAV(), computeCoarseClover(), computeTMAV(), computeTMCAV(), computeUV(), constantInv(), constructSpinorField(), coordsFromIndex(), covdevReference(), quda::Transfer::createSpinMap(), quda::EigenSolver::deflate(), quda::EigenSolver::deflateSVD(), dimFromFaceIndex(), doBulk(), domainWall4D(), quda::DomainWall4DArg< Float, nColor, reconstruct_ >::DomainWall4DArg(), domainWall4DCPU(), domainWall4DGPU(), domainWall5D(), domainWall5DGPU(), quda::Dslash5Arg< Float, nColor >::Dslash5Arg(), dslash5CPU(), dslash5GPU(), dslash5inv(), dslash5invGPU(), dslashReference(), dslashReference_4d_sgpu(), genericBlas(), genericCudaPrintVector(), genericReduce(), genericSource(), get_coords(), get_mass_normalization_str(), get_memory_type_str(), get_ritz_location_str(), getGivensRotation(), quda::cudaColorSpinorField::Ghost2(), quda::colorspinor::AccessorCB< Float, nSpin, nColor, nVec, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER >::index(), quda::colorspinor::GhostAccessorCB< Float, nSpin, nColor, nVec, QUDA_SPACE_SPIN_COLOR_FIELD_ORDER >::index(), quda::colorspinor::AccessorCB< Float, nSpin, nColor, nVec, QUDA_FLOAT4_FIELD_ORDER >::index(), quda::colorspinor::GhostAccessorCB< Float, nSpin, nColor, nVec, QUDA_FLOAT4_FIELD_ORDER >::index(), indexFromFaceIndex(), indexFromFaceIndexStaggered(), indexFromFaceIndexStaggered(), innerProduct(), quda::colorspinor::SpaceColorSpinorOrder< Float, Ns, Nc >::load(), quda::colorspinor::SpaceSpinorColorOrder< Float, Ns, Nc >::load(), quda::colorspinor::PaddedSpaceSpinorColorOrder< Float, Ns, Nc >::load(), quda::colorspinor::QDPJITDiracOrder< Float, Ns, Nc >::load(), quda::colorspinor::SpaceColorSpinorOrder< Float, Ns, Nc >::loadGhost(), quda::colorspinor::SpaceSpinorColorOrder< Float, Ns, Nc >::loadGhost(), quda::colorspinor::PaddedSpaceSpinorColorOrder< Float, Ns, Nc >::loadGhost(), matdagmat(), multiplySpinorByDiracProjector(), multiplySpinorByDiracProjector5(), multiplyVUV(), ndegTwistGamma5(), quda::PreserveBasis< Arg >::operator()(), quda::NonRelBasis< Arg >::operator()(), quda::RelBasis< Arg >::operator()(), quda::ChiralToNonRelBasis< Arg >::operator()(), quda::NonRelToChiralBasis< Arg >::operator()(), quda::ColorSpinor< Float, Nc, 4 >::operator()(), quda::MPCG::operator()(), quda::MPBiCGstab::operator()(), quda::ColorSpinor< Float, Nc, 2 >::operator()(), operator*(), operator+(), operator-(), quda::ColorSpinor< Float, Nc, Ns >::operator-=(), operator/(), outerProdSpinTrace(), packGhost(), packKernel(), packShmemKernel(), packSpinor(), packSpinorKernel(), packStaggeredKernel(), packStaggeredShmemKernel(), printSpinorElement(), random(), quda::EigCGArgs::RestartLanczos(), Restrict(), RestrictKernel(), rotateCoarseColor(), rsqrt_dbldbl(), quda::colorspinor::SpaceColorSpinorOrder< Float, Ns, Nc >::save(), quda::colorspinor::SpaceSpinorColorOrder< Float, Ns, Nc >::save(), quda::colorspinor::PaddedSpaceSpinorColorOrder< Float, Ns, Nc >::save(), quda::colorspinor::QDPJITDiracOrder< Float, Ns, Nc >::save(), quda::colorspinor::SpaceColorSpinorOrder< Float, Ns, Nc >::saveGhost(), quda::colorspinor::SpaceSpinorColorOrder< Float, Ns, Nc >::saveGhost(), quda::colorspinor::PaddedSpaceSpinorColorOrder< Float, Ns, Nc >::saveGhost(), scale_su3_matrix(), quda::cudaColorSpinorField::sendGhost(), quda::cudaColorSpinorField::sendStart(), setUnitarizeLinksConstants(), sin(), smallSVD(), SpinorNoiseCPU(), SpinorNoiseGPU(), sqrt_dbldbl(), quda::Transfer::Transfer(), twistGamma5(), variableInv(), wilson(), and quda::Transfer::~Transfer().
constexpr int quda::size = 4096 |
Definition at line 8 of file dslash_domain_wall_4d.cuh.
Referenced by quda::cublas::BatchInvertMatrix(), broadcastTuneCache(), quda::EigenSolver::computeEvals(), quda::BiCGstabL::computeTau(), cpu_axy(), cpu_xpy(), quda::Dslash5Arg< Float, nColor >::Dslash5Arg(), quda::CopyGauge< FloatOut, FloatIn, length, Arg >::minThreads(), quda::ExtractGhost< nDim, Arg >::minThreads(), quda::ExtractGhostEx< Float, length, nDim, dim, Order >::minThreads(), quda::Object::operator new(), quda::Object::operator new[](), qChargeDensityQuda(), setUnitarizeLinksConstants(), quda::blas::TileSizeTune< ReducerDiagonal, writeDiagonal, ReducerOffDiagonal, writeOffDiagonal >::TileSizeTune(), updateAp(), and quda::BiCGstabL::updateR().
cudaStream_t* quda::stream |
Definition at line 897 of file cuda_color_spinor_field.cpp.
Referenced by quda::DomainWall5D< Float, nDim, nColor, Arg >::apply(), quda::Staggered< Float, nDim, nColor, Arg >::apply(), quda::TwistedMass< Float, nDim, nColor, Arg >::apply(), quda::WilsonClover< Float, nDim, nColor, Arg >::apply(), quda::TwistedClover< Float, nDim, nColor, Arg >::apply(), quda::NdegTwistedMass< Float, nDim, nColor, Arg >::apply(), quda::DomainWall4D< Float, nDim, nColor, Arg >::apply(), quda::WilsonCloverPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::TwistedCloverPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::Wilson< Float, nDim, nColor, Arg >::apply(), quda::TwistedMassPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::Laplace< Float, nDim, nColor, Arg >::apply(), quda::NdegTwistedMassPreconditioned< Float, nDim, nColor, Arg >::apply(), quda::blas::MultiReduceCuda< NXZ, doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, Reducer >::apply(), quda::blas::ReduceCuda< doubleN, ReduceType, FloatN, M, SpinorX, SpinorY, SpinorZ, SpinorW, SpinorV, Reducer >::apply(), quda::dslash::commsComplete(), computeMomAction(), forceRecord(), quda::Dslash< Float >::instantiate(), quda::dslash::issueRecv(), isUnitary(), PackGhost(), updateMomentum(), quda::Tunable::~Tunable(), and quda::Worker::~Worker().
|
static |
Definition at line 54 of file malloc.cpp.
|
static |
Definition at line 56 of file malloc.cpp.
Referenced by track_malloc().
|
static |
Definition at line 57 of file malloc.cpp.
Referenced by track_malloc().
|
static |
Definition at line 108 of file tune.cpp.
Referenced by getTuneCache().
|
static |
|
static |
Definition at line 1767 of file interface_quda.cpp.
Referenced by invertMultiShiftQuda().