QUDA  v0.5.0
A library for QCD on GPUs
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Namespaces | Classes | Typedefs | Enumerations | Functions | Variables
quda Namespace Reference

Namespaces

namespace  copy
 
namespace  fermion_force
 
namespace  quda
 
namespace  reduce
 

Classes

struct  CloverFieldParam
 
class  CloverField
 
class  cudaCloverField
 
class  cpuCloverField
 
struct  FullClover
 
class  ColorSpinorParam
 
class  ColorSpinorField
 
class  cudaColorSpinorField
 
class  cpuColorSpinorField
 
class  ColorSpinorFieldOrder
 
class  SpaceSpinColorOrder
 
class  SpaceColorSpinOrder
 
class  QOPDomainWallOrder
 
class  DiracParam
 
class  Dirac
 
class  DiracWilson
 
class  DiracWilsonPC
 
class  DiracClover
 
class  DiracCloverPC
 
class  DiracDomainWall
 
class  DiracDomainWallPC
 
class  DiracTwistedMass
 
class  DiracTwistedMassPC
 
class  DiracStaggered
 
class  DiracStaggeredPC
 
class  DiracMatrix
 
class  DiracM
 
class  DiracMdagM
 
class  DiracMdag
 
class  FaceBuffer
 
struct  GaugeFieldParam
 
class  GaugeField
 
class  cudaGaugeField
 
class  cpuGaugeField
 
struct  kernel_param_s
 
class  Solver
 
class  CG
 
class  BiCGstab
 
class  GCR
 
class  MR
 
class  alphaSA
 
class  MultiShiftSolver
 
class  MultiShiftCG
 
class  MinResExt
 
struct  LatticeFieldParam
 
class  LatticeField
 
struct  llfat_kernel_param_s
 
struct  Timer
 
struct  TimeProfile
 
class  TuneKey
 
class  TuneParam
 
class  Tunable
 
struct  axpby
 
struct  xpy
 
struct  axpy
 
struct  xpay
 
struct  mxpy
 
struct  ax
 
struct  caxpy
 
struct  caxpby
 
struct  cxpaypbz
 
struct  axpyBzpcx
 
struct  axpyZpbx
 
struct  caxpbypzYmbw
 
struct  cabxpyAx
 
struct  caxpbypz
 
struct  caxpbypczpw
 
struct  caxpyxmaz
 
struct  tripleCGUpdate
 
struct  CloverParam
 
struct  DslashParam
 
class  DslashCuda
 
class  SharedDslashCuda
 
class  WilsonDslashCuda
 
class  CloverDslashCuda
 
class  AsymCloverDslashCuda
 
class  TwistedDslashCuda
 
class  DomainWallDslashCuda
 
class  StaggeredDslashCuda
 
class  CloverCuda
 
class  TwistGamma5Cuda
 
class  GaugeForceCuda
 
class  MemAlloc
 
struct  ComplexTypeId< float >
 
struct  ComplexTypeId< double >
 
struct  RealTypeId< float2 >
 
struct  RealTypeId< double2 >
 
struct  PromoteTypeId
 
struct  PromoteTypeId< float2, float >
 
struct  PromoteTypeId< float, float2 >
 
struct  PromoteTypeId< double2, double >
 
struct  PromoteTypeId< double, double2 >
 
struct  PromoteTypeId< double, int >
 
struct  PromoteTypeId< int, double >
 
struct  PromoteTypeId< float, int >
 
struct  PromoteTypeId< int, float >
 
struct  Zero
 
struct  Identity
 
class  Matrix
 
class  Array
 
struct  ReduceFunctor
 
struct  Norm2
 
struct  Dot
 
struct  axpyNorm2
 
struct  xmyNorm2
 
struct  caxpyNorm2
 
struct  caxpyxmaznormx
 
struct  cabxpyaxnorm
 
struct  Cdot
 
struct  xpaycdotzy
 
struct  caxpydotzy
 
struct  CdotNormA
 
struct  CdotNormB
 
struct  caxpbypzYmbwcDotProductUYNormY
 
struct  axpyCGNorm2
 
struct  tripleCGReduction
 
class  UnitarizeLinksCuda
 

Typedefs

typedef std::complex< double > Complex
 
typedef struct quda::kernel_param_s kernel_param_t
 
typedef struct
quda::llfat_kernel_param_s 
llfat_kernel_param_t
 

Enumerations

enum  QudaProfileType {
  QUDA_PROFILE_H2D, QUDA_PROFILE_D2H, QUDA_PROFILE_INIT, QUDA_PROFILE_PREAMBLE,
  QUDA_PROFILE_COMPUTE, QUDA_PROFILE_EPILOGUE, QUDA_PROFILE_FREE, QUDA_PROFILE_TOTAL,
  QUDA_PROFILE_COUNT
}
 
enum  AllocType {
  DEVICE, HOST, PINNED, MAPPED,
  N_ALLOC_TYPE
}
 

Functions

void initBlas ()
 
void endBlas (void)
 
void setBlasTuning (QudaTune tune, QudaVerbosity verbose)
 
void setBlasParam (int kernel, int prec, int threads, int blocks)
 
double norm2 (const ColorSpinorField &)
 
void zeroCuda (cudaColorSpinorField &a)
 
void copyCuda (cudaColorSpinorField &dst, const cudaColorSpinorField &src)
 
double axpyNormCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
 
double normCuda (const cudaColorSpinorField &b)
 
double reDotProductCuda (cudaColorSpinorField &a, cudaColorSpinorField &b)
 
double xmyNormCuda (cudaColorSpinorField &a, cudaColorSpinorField &b)
 
void axpbyCuda (const double &a, cudaColorSpinorField &x, const double &b, cudaColorSpinorField &y)
 
void axpyCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
 
void axCuda (const double &a, cudaColorSpinorField &x)
 
void xpyCuda (cudaColorSpinorField &x, cudaColorSpinorField &y)
 
void xpayCuda (cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y)
 
void mxpyCuda (cudaColorSpinorField &x, cudaColorSpinorField &y)
 
void axpyZpbxCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z, const double &b)
 
void axpyBzpcxCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y, const double &b, cudaColorSpinorField &z, const double &c)
 
void caxpbyCuda (const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y)
 
void caxpyCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
 
void cxpaypbzCuda (cudaColorSpinorField &, const Complex &b, cudaColorSpinorField &y, const Complex &c, cudaColorSpinorField &z)
 
void caxpbypzYmbwCuda (const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, cudaColorSpinorField &, cudaColorSpinorField &)
 
Complex cDotProductCuda (cudaColorSpinorField &, cudaColorSpinorField &)
 
Complex xpaycDotzyCuda (cudaColorSpinorField &x, const double &a, cudaColorSpinorField &y, cudaColorSpinorField &z)
 
double3 cDotProductNormACuda (cudaColorSpinorField &a, cudaColorSpinorField &b)
 
double3 cDotProductNormBCuda (cudaColorSpinorField &a, cudaColorSpinorField &b)
 
double3 caxpbypzYmbwcDotProductUYNormYCuda (const Complex &a, cudaColorSpinorField &x, const Complex &b, cudaColorSpinorField &y, cudaColorSpinorField &z, cudaColorSpinorField &w, cudaColorSpinorField &u)
 
void cabxpyAxCuda (const double &a, const Complex &b, cudaColorSpinorField &x, cudaColorSpinorField &y)
 
double caxpyNormCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
 
void caxpyXmazCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)
 
double caxpyXmazNormXCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)
 
double cabxpyAxNormCuda (const double &a, const Complex &b, cudaColorSpinorField &x, cudaColorSpinorField &y)
 
void caxpbypzCuda (const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, cudaColorSpinorField &)
 
void caxpbypczpwCuda (const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, const Complex &, cudaColorSpinorField &, cudaColorSpinorField &)
 
Complex caxpyDotzyCuda (const Complex &a, cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)
 
Complex axpyCGNormCuda (const double &a, cudaColorSpinorField &x, cudaColorSpinorField &y)
 
double3 HeavyQuarkResidualNormCuda (cudaColorSpinorField &x, cudaColorSpinorField &r)
 
double3 xpyHeavyQuarkResidualNormCuda (cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &r)
 
void tripleCGUpdateCuda (const double &alpha, const double &beta, cudaColorSpinorField &q, cudaColorSpinorField &r, cudaColorSpinorField &x, cudaColorSpinorField &p)
 
double3 tripleCGReductionCuda (cudaColorSpinorField &x, cudaColorSpinorField &y, cudaColorSpinorField &z)
 
double axpyNormCpu (const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)
 
double normCpu (const cpuColorSpinorField &b)
 
double reDotProductCpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b)
 
double xmyNormCpu (const cpuColorSpinorField &a, cpuColorSpinorField &b)
 
void axpbyCpu (const double &a, const cpuColorSpinorField &x, const double &b, cpuColorSpinorField &y)
 
void axpyCpu (const double &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)
 
void axCpu (const double &a, cpuColorSpinorField &x)
 
void xpyCpu (const cpuColorSpinorField &x, cpuColorSpinorField &y)
 
void xpayCpu (const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y)
 
void mxpyCpu (const cpuColorSpinorField &x, cpuColorSpinorField &y)
 
void axpyZpbxCpu (const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const cpuColorSpinorField &z, const double &b)
 
void axpyBzpcxCpu (const double &a, cpuColorSpinorField &x, cpuColorSpinorField &y, const double &b, const cpuColorSpinorField &z, const double &c)
 
void caxpbyCpu (const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y)
 
void caxpyCpu (const Complex &a, const cpuColorSpinorField &x, cpuColorSpinorField &y)
 
void cxpaypbzCpu (const cpuColorSpinorField &x, const Complex &b, const cpuColorSpinorField &y, const Complex &c, cpuColorSpinorField &z)
 
void caxpbypzYmbwCpu (const Complex &, const cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &, const cpuColorSpinorField &)
 
Complex cDotProductCpu (const cpuColorSpinorField &, const cpuColorSpinorField &)
 
Complex xpaycDotzyCpu (const cpuColorSpinorField &x, const double &a, cpuColorSpinorField &y, const cpuColorSpinorField &z)
 
double3 cDotProductNormACpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b)
 
double3 cDotProductNormBCpu (const cpuColorSpinorField &a, const cpuColorSpinorField &b)
 
double3 caxpbypzYmbwcDotProductUYNormYCpu (const Complex &a, const cpuColorSpinorField &x, const Complex &b, cpuColorSpinorField &y, cpuColorSpinorField &z, const cpuColorSpinorField &w, const cpuColorSpinorField &u)
 
void cabxpyAxCpu (const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y)
 
double caxpyNormCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y)
 
void caxpyXmazCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)
 
double caxpyXmazNormXCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)
 
double cabxpyAxNormCpu (const double &a, const Complex &b, cpuColorSpinorField &x, cpuColorSpinorField &y)
 
void caxpbypzCpu (const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &)
 
void caxpbypczpwCpu (const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, const Complex &, cpuColorSpinorField &, cpuColorSpinorField &)
 
Complex caxpyDotzyCpu (const Complex &a, cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &z)
 
double3 HeavyQuarkResidualNormCpu (cpuColorSpinorField &x, cpuColorSpinorField &r)
 
double3 xpyHeavyQuarkResidualNormCpu (cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &r)
 
void computeCloverCuda (cudaCloverField &clover, const cudaGaugeField &gauge)
 
void setDiracParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
 
void setDiracSloppyParam (DiracParam &diracParam, QudaInvertParam *inv_param, bool pc)
 
void setDslashTuning (QudaTune tune, QudaVerbosity verbose)
 
void setKernelPackT (bool pack)
 
bool getKernelPackT ()
 
void setFace (const FaceBuffer &face)
 
bool getDslashLaunch ()
 
void createDslashEvents ()
 
void destroyDslashEvents ()
 
void initLatticeConstants (const LatticeField &lat)
 
void initGaugeConstants (const cudaGaugeField &gauge)
 
void initSpinorConstants (const cudaColorSpinorField &spinor)
 
void initDslashConstants ()
 
void initCloverConstants (const cudaCloverField &clover)
 
void initStaggeredConstants (const cudaGaugeField &fatgauge, const cudaGaugeField &longgauge)
 
void initTwistedMassConstants (const int flv_stride)
 ndeg tm:
 
void wilsonDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int oddBit, const int daggerBit, const cudaColorSpinorField *x, const double &k, const int *commDim)
 
void cloverDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const FullClover cloverInv, const cudaColorSpinorField *in, const int oddBit, const int daggerBit, const cudaColorSpinorField *x, const double &k, const int *commDim)
 
void asymCloverDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const FullClover cloverInv, const cudaColorSpinorField *in, const int oddBit, const int daggerBit, const cudaColorSpinorField *x, const double &k, const int *commDim)
 
void cloverCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const FullClover clover, const cudaColorSpinorField *in, const int oddBit)
 
void domainWallDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const double &m_f, const double &k, const int *commDim)
 
void staggeredDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &fatGauge, const cudaGaugeField &longGauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const double &k, const int *commDim)
 NEW:extra argument.
 
void twistedMassDslashCuda (cudaColorSpinorField *out, const cudaGaugeField &gauge, const cudaColorSpinorField *in, const int parity, const int dagger, const cudaColorSpinorField *x, const double &kappa, const double &mu, const double &epsilon, const int *commDim)
 ndeg tm:
 
void twistGamma5Cuda (cudaColorSpinorField *out, const cudaColorSpinorField *in, const int dagger, const double &kappa, const double &mu, const double &epsilon, const QudaTwistGamma5Type)
 ndeg tm:
 
void packFace (void *ghost_buf, cudaColorSpinorField &in, const int dagger, const int parity, const cudaStream_t &stream)
 
void loadLinkToGPU (cudaGaugeField *cudaGauge, cpuGaugeField *cpuGauge, QudaGaugeParam *param)
 
void loadLinkToGPU_ex (cudaGaugeField *cudaGauge, cpuGaugeField *cpuGauge)
 
void loadLinkToGPU_gf (cudaGaugeField *cudaGauge, cpuGaugeField *cpuGauge, QudaGaugeParam *param)
 
void storeLinkToCPU (cpuGaugeField *cpuGauge, cudaGaugeField *cudaGauge, QudaGaugeParam *param)
 
void packGhostStaple (int *X, void *even, void *odd, int volume, QudaPrecision prec, int stride, int dir, int whichway, void **fwd_nbr_buf_gpu, void **back_nbr_buf_gpu, void **fwd_nbr_buf, void **back_nbr_buf, cudaStream_t *stream)
 
void unpackGhostStaple (int *X, void *_even, void *_odd, int volume, QudaPrecision prec, int stride, int dir, int whichway, void **fwd_nbr_buf, void **back_nbr_buf, cudaStream_t *stream)
 
void pack_ghost_all_staples_cpu (void *staple, void **cpuGhostStapleBack, void **cpuGhostStapleFwd, int nFace, QudaPrecision precision, int *X)
 
void pack_ghost_all_links (void **cpuLink, void **cpuGhostBack, void **cpuGhostFwd, int dir, int nFace, QudaPrecision precision, int *X)
 
void pack_gauge_diag (void *buf, int *X, void **sitelink, int nu, int mu, int dir1, int dir2, QudaPrecision prec)
 
void fermion_force_init_cuda (QudaGaugeParam *param)
 
void fermion_force_cuda (double eps, double weight1, double weight2, void *act_path_coeff, FullHw cudaHw, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaMom, QudaGaugeParam *param)
 
std::ostream & operator<< (std::ostream &output, const GaugeFieldParam &param)
 
double norm2 (const cudaGaugeField &a)
 
void gauge_force_init_cuda (QudaGaugeParam *param, int max_length)
 
void gauge_force_cuda (cudaGaugeField &cudaMom, double eb3, cudaGaugeField &cudaSiteLink, QudaGaugeParam *param, int ***input_path, int *length, void *path_coeff, int num_paths, int max_length)
 
void setUnitarizeLinksPadding (int input_padding, int output_padding)
 
void setUnitarizeLinksConstants (double unitarize_eps, double max_error, bool allow_svd, bool svd_only, double svd_rel_error, double svd_abs_error, bool check_unitarization=true)
 
void unitarizeLinksCuda (const QudaGaugeParam &param, cudaGaugeField &infield, cudaGaugeField *outfield, int *num_failures)
 
void unitarizeLinksCPU (const QudaGaugeParam &param, cpuGaugeField &infield, cpuGaugeField *outfield)
 
bool isUnitary (const QudaGaugeParam &param, cpuGaugeField &field, double max_error)
 
std::ostream & operator<< (std::ostream &output, const LatticeFieldParam &param)
 
void llfat_cuda (cudaGaugeField &cudaFatLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam *param, double *act_path_coeff)
 
void llfat_cuda_ex (cudaGaugeField &cudaFatLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam *param, double *act_path_coeff)
 
void llfat_init_cuda (QudaGaugeParam *param)
 
void llfat_init_cuda_ex (QudaGaugeParam *param_ex)
 
void computeGenStapleFieldParityKernel (void *staple_even, void *staple_odd, const void *sitelink_even, const void *sitelink_odd, void *fatlink_even, void *fatlink_odd, const void *mulink_even, const void *mulink_odd, int mu, int nu, int save_staple, double mycoeff, QudaReconstructType recon, QudaPrecision prec, dim3 halfGridDim, llfat_kernel_param_t kparam, cudaStream_t *stream)
 
void computeGenStapleFieldParityKernel_ex (void *staple_even, void *staple_odd, const void *sitelink_even, const void *sitelink_odd, void *fatlink_even, void *fatlink_odd, const void *mulink_even, const void *mulink_odd, int mu, int nu, int save_staple, double mycoeff, QudaReconstructType recon, QudaPrecision prec, llfat_kernel_param_t kparam)
 
void siteComputeGenStapleParityKernel (void *staple_even, void *staple_odd, const void *sitelink_even, const void *sitelink_odd, void *fatlink_even, void *fatlink_odd, int mu, int nu, double mycoeff, QudaReconstructType recon, QudaPrecision prec, dim3 halfGridDim, llfat_kernel_param_t kparam, cudaStream_t *stream)
 
void siteComputeGenStapleParityKernel_ex (void *staple_even, void *staple_odd, const void *sitelink_even, const void *sitelink_odd, void *fatlink_even, void *fatlink_odd, int mu, int nu, double mycoeff, QudaReconstructType recon, QudaPrecision prec, llfat_kernel_param_t kparam)
 
void llfatOneLinkKernel (cudaGaugeField &cudaFatLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam *param, double *act_path_coeff)
 
void llfatOneLinkKernel_ex (cudaGaugeField &cudaFatLink, cudaGaugeField &cudaSiteLink, cudaGaugeField &cudaStaple, cudaGaugeField &cudaStaple1, QudaGaugeParam *param, double *act_path_coeff, llfat_kernel_param_t kparam)
 
void computeFatLinkCore (cudaGaugeField *cudaSiteLink, double *act_path_coeff, QudaGaugeParam *qudaGaugeParam, QudaComputeFatMethod method, cudaGaugeField *cudaFatLink, struct timeval time_array[])
 
void printPeakMemUsage ()
 
void assertAllMemFree ()
 
void * device_malloc_ (const char *func, const char *file, int line, size_t size)
 
void * safe_malloc_ (const char *func, const char *file, int line, size_t size)
 
void * pinned_malloc_ (const char *func, const char *file, int line, size_t size)
 
void * mapped_malloc_ (const char *func, const char *file, int line, size_t size)
 
void device_free_ (const char *func, const char *file, int line, void *ptr)
 
void host_free_ (const char *func, const char *file, int line, void *ptr)
 
void link_format_cpu_to_gpu (void *dst, void *src, int reconstruct, int Vh, int pad, int ghostV, QudaPrecision prec, QudaGaugeFieldOrder cpu_order, cudaStream_t stream)
 
void link_format_gpu_to_cpu (void *dst, void *src, int Vh, int stride, QudaPrecision prec, cudaStream_t stream)
 
void collectGhostStaple (int *X, void *even, void *odd, int volume, QudaPrecision precision, void *ghost_staple_gpu, int dir, int whichway, cudaStream_t *stream)
 
void loadTuneCache (QudaVerbosity verbosity)
 
void saveTuneCache (QudaVerbosity verbosity)
 
TuneParam tuneLaunch (Tunable &tunable, QudaTune enabled, QudaVerbosity verbosity)
 
template<typename Float >
void axpby (const Float &a, const Float *x, const Float &b, Float *y, const int N)
 
template<typename Float >
void caxpby (const std::complex< Float > &a, const std::complex< Float > *x, const std::complex< Float > &b, std::complex< Float > *y, int N)
 
template<typename Float >
void caxpbypcz (const std::complex< Float > &a, const std::complex< Float > *x, const std::complex< Float > &b, const std::complex< Float > *y, const std::complex< Float > &c, std::complex< Float > *z, int N)
 
template<typename Float >
double norm (const Float *a, const int N)
 
template<typename Float >
double reDotProduct (const Float *a, const Float *b, const int N)
 
template<typename Float >
Complex cDotProduct (const std::complex< Float > *a, const std::complex< Float > *b, const int N)
 
template<typename Float >
double3 HeavyQuarkResidualNorm (const Float *x, const Float *r, const int volume, const int Nint)
 
double3 HeavyQuarkResidualNormCpu (cpuColorSpinorField &x, cpuColorSpinorField &y, cpuColorSpinorField &r)
 
void initReduce ()
 
void endReduce ()
 
QudaTune getBlasTuning ()
 
QudaVerbosity getBlasVerbosity ()
 
cudaStream_t * getBlasStream ()
 
__device__ void caxpy_ (const float2 &a, const float4 &x, float4 &y)
 
__device__ void caxpy_ (const float2 &a, const float2 &x, float2 &y)
 
__device__ void caxpy_ (const double2 &a, const double2 &x, double2 &y)
 
__device__ void caxpby_ (const float2 &a, const float4 &x, const float2 &b, float4 &y)
 
__device__ void caxpby_ (const float2 &a, const float2 &x, const float2 &b, float2 &y)
 
__device__ void caxpby_ (const double2 &a, const double2 &x, const double2 &b, double2 &y)
 
__device__ void cxpaypbz_ (const float4 &x, const float2 &a, const float4 &y, const float2 &b, float4 &z)
 
__device__ void cxpaypbz_ (const float2 &x, const float2 &a, const float2 &y, const float2 &b, float2 &z)
 
__device__ void cxpaypbz_ (const double2 &x, const double2 &a, const double2 &y, const double2 &b, double2 &z)
 
__device__ int linkIndex (int x[], int dx[], const CloverParam &param)
 
template<typename Cmplx >
__global__ void computeFmunuKernel (Cmplx *Fmunu, const Cmplx *gauge, const CloverParam param)
 
std::ostream & operator<< (std::ostream &out, const ColorSpinorField &a)
 
template<class D , class S >
void genericCopy (D &dst, const S &src)
 
template<class T >
void random (T &t)
 
template<class T >
void point (T &t, const int x, const int s, const int c)
 
template<class U , class V >
int compareSpinor (const U &u, const V &v, const int tol)
 
template<class Order >
void print_vector (const Order &o, unsigned int x)
 
template<typename Float >
void transpose (Float *gT, const Float *g)
 
template<typename Float >
void packGhost (Float **ghost, const Float **gauge, const int nFace, const int *X, const int volumeCB, const int *surfaceCB, const QudaGaugeFieldOrder order)
 
std::ostream & operator<< (std::ostream &out, const cudaColorSpinorField &a)
 
template<typename Float , typename Float2 >
void loadMomField (Float2 *even, Float2 *odd, Float *mom, int bytes, int Vh, int pad, void *buffer)
 
template<typename Float , typename Float2 >
void storeMomToCPUArray (Float *mom, Float2 *even, Float2 *odd, int bytes, int V, int pad, void *buffer)
 
void setTwistParam (double &a, double &b, const double &kappa, const double &mu, const int dagger, const QudaTwistGamma5Type twist)
 
void initDslashCommsPattern ()
 
void dslashCuda (DslashCuda &dslash, const size_t regSize, const int parity, const int dagger, const int volume, const int *faceVolumeCB)
 
template<int sig_positive, int mu_positive, int oddBit, typename Float2 >
__global__ void do_middle_link_kernel (Float2 *tempxEven, Float2 *tempxOdd, Float2 *PmuEven, Float2 *PmuOdd, Float2 *P3Even, Float2 *P3Odd, int sig, int mu, Float2 coeff, float4 *linkEven, float4 *linkOdd, Float2 *momEven, Float2 *momOdd)
 
template<int sig_positive, int mu_positive, int oddBit, typename Float2 >
__global__ void do_side_link_kernel (Float2 *P3Even, Float2 *P3Odd, Float2 *P3muEven, Float2 *P3muOdd, Float2 *TempxEven, Float2 *TempxOdd, Float2 *PmuEven, Float2 *PmuOdd, Float2 *shortPEven, Float2 *shortPOdd, int sig, int mu, Float2 coeff, Float2 accumu_coeff, float4 *linkEven, float4 *linkOdd, Float2 *momEven, Float2 *momOdd)
 
template<int sig_positive, int mu_positive, int oddBit, typename Float2 >
__global__ void do_all_link_kernel (Float2 *tempxEven, Float2 *tempxOdd, Float2 *PmuEven, Float2 *PmuOdd, Float2 *P3Even, Float2 *P3Odd, Float2 *P3muEven, Float2 *P3muOdd, Float2 *shortPEven, Float2 *shortPOdd, int sig, int mu, Float2 coeff, Float2 mcoeff, Float2 accumu_coeff, float4 *linkEven, float4 *linkOdd, Float2 *momEven, Float2 *momOdd)
 
template<int oddBit, typename Float2 >
__global__ void do_one_and_naik_terms_kernel (Float2 *TempxEven, Float2 *TempxOdd, Float2 *PmuEven, Float2 *PmuOdd, Float2 *PnumuEven, Float2 *PnumuOdd, int mu, Float2 OneLink, Float2 Naik, Float2 mNaik, float4 *linkEven, float4 *linkOdd, Float2 *momEven, Float2 *momOdd)
 
template<int oddBit, typename Float2 , typename FloatN , typename Float >
__global__ void GAUGE_FORCE_KERN_NAME (Float2 *momEven, Float2 *momOdd, const int dir, const double eb3, const FloatN *linkEven, const FloatN *linkOdd, const int *input_path, const int *length, const Float *path_coeff, const int num_paths, const kernel_param_t kparam)
 
void gauge_force_cuda_dir (cudaGaugeField &cudaMom, const int dir, const double eb3, const cudaGaugeField &cudaSiteLink, const QudaGaugeParam *param, int **input_path, const int *length, const void *path_coeff, const int num_paths, const int max_length)
 
void setDiracPreParam (DiracParam &diracParam, QudaInvertParam *inv_param, const bool pc)
 
void createDirac (Dirac *&d, Dirac *&dSloppy, Dirac *&dPre, QudaInvertParam &param, const bool pc_solve)
 
void massRescale (QudaDslashType dslash_type, double &kappa, QudaSolutionType solution_type, QudaMassNormalization mass_normalization, cudaColorSpinorField &b)
 
void massRescaleCoeff (QudaDslashType dslash_type, double &kappa, QudaSolutionType solution_type, QudaMassNormalization mass_normalization, double &coeff)
 
void fillInnerInvertParam (QudaInvertParam &inner, const QudaInvertParam &outer)
 
double resNorm (const DiracMatrix &mat, cudaColorSpinorField &b, cudaColorSpinorField &x)
 
double timeInterval (struct timeval start, struct timeval end)
 
void orthoDir (Complex **beta, cudaColorSpinorField *Ap[], int k)
 
void backSubs (const Complex *alpha, Complex **const beta, const double *gamma, Complex *delta, int n)
 
void updateSolution (cudaColorSpinorField &x, const Complex *alpha, Complex **const beta, double *gamma, int k, cudaColorSpinorField *p[])
 
void updateAlphaZeta (double *alpha, double *zeta, double *zeta_old, const double *r2, const double *beta, const double pAp, const double *offset, const int nShift, const int j_low)
 
template<int mu, int nu, int odd_bit>
__global__ void LLFAT_KERNEL (do_siteComputeGenStapleParity, RECONSTRUCT)(FloatM *staple_even
 
 if (kparam.kernel_type==LLFAT_EXTERIOR_KERNEL_FWD_X &&x1!=X1m1) return
 
 COMPUTE_RECONSTRUCT_SIGN (sign, nu, x1, x2, x3, x4)
 
 RECONSTRUCT_SITE_LINK (sign, a)
 
 LLFAT_COMPUTE_NEW_IDX_PLUS (nu, X)
 
 LOAD_ODD_SITE_MATRIX (mu, new_mem_idx, B)
 
 COMPUTE_RECONSTRUCT_SIGN (sign, mu, new_x1, new_x2, new_x3, new_x4)
 
 RECONSTRUCT_SITE_LINK (sign, b)
 
 MULT_SU3_NN (a, b, tempa)
 
 LLFAT_COMPUTE_NEW_IDX_PLUS (mu, X)
 
 LOAD_ODD_SITE_MATRIX (nu, new_mem_idx, C)
 
 COMPUTE_RECONSTRUCT_SIGN (sign, nu, new_x1, new_x2, new_x3, new_x4)
 
 RECONSTRUCT_SITE_LINK (sign, c)
 
 MULT_SU3_NA (tempa, c, staple)
 
 LOAD_ODD_SITE_MATRIX (nu,(new_mem_idx), A)
 
 LOAD_ODD_SITE_MATRIX (mu,(new_mem_idx), B)
 
 MULT_SU3_AN (a, b, tempa)
 
 LLFAT_COMPUTE_NEW_IDX_LOWER_STAPLE (nu, mu)
 
 LOAD_EVEN_SITE_MATRIX (nu, new_mem_idx, C)
 
 MULT_SU3_NN (tempa, c, b)
 
 LLFAT_ADD_SU3_MATRIX (b, staple, staple)
 
 WRITE_STAPLE_MATRIX (staple_even, mem_idx)
 
template<int mu, int nu, int odd_bit, int save_staple>
__global__ void LLFAT_KERNEL (do_computeGenStapleFieldParity, RECONSTRUCT)(FloatM *staple_even
 
 LOAD_ODD_MULINK_MATRIX (0, new_mem_idx, BB)
 
 MULT_SU3_NN (a, bb, tempa)
 
 if (save_staple)
 
 LOAD_ODD_SITE_MATRIX (nu, new_mem_idx, A)
 
 LLFAT_COMPUTE_NEW_IDX_MINUS (nu, X)
 
 MULT_SU3_AN (a, bb, tempa)
 
 MULT_SU3_NN (tempa, c, a)
 
 LOAD_EVEN_FAT_MATRIX (mu, mem_idx)
 
__global__ void LLFAT_KERNEL (llfatOneLink, RECONSTRUCT)(const FloatN *sitelink_even
 
 if (mem_idx >=Vh)
 
 for (int dir=0;dir< 4;dir++)
 
template<int mu, int nu, int odd_bit>
__global__ void LLFAT_KERNEL_EX (do_siteComputeGenStapleParity, RECONSTRUCT)(FloatM *staple_even
 
 if (mem_idx >=kparam.threads) return
 
 COMPUTE_RECONSTRUCT_SIGN (sign, nu,(x1-2),(x2-2),(x3-2),(x4-2))
 
 LLFAT_COMPUTE_NEW_IDX_PLUS_EX (nu, X)
 
 COMPUTE_RECONSTRUCT_SIGN (sign, mu,(new_x1-2),(new_x2-2),(new_x3-2),(new_x4-2))
 
 LLFAT_COMPUTE_NEW_IDX_PLUS_EX (mu, X)
 
 COMPUTE_RECONSTRUCT_SIGN (sign, nu,(new_x1-2),(new_x2-2),(new_x3-2),(new_x4-2))
 
 LLFAT_COMPUTE_NEW_IDX_LOWER_STAPLE_EX (nu, mu)
 
 if (!(x1==1||x1==X1+2||x2==1||x2==X2+2||x3==1||x3==X3+2||x4==1||x4==X4+2))
 
template<int mu, int nu, int odd_bit, int save_staple>
__global__ void LLFAT_KERNEL_EX (do_computeGenStapleFieldParity, RECONSTRUCT)(FloatM *staple_even
 
 LLFAT_COMPUTE_NEW_IDX_MINUS_EX (nu, X)
 
 LLFAT_ADD_SU3_MATRIX (a, staple, staple)
 
__global__ void LLFAT_KERNEL_EX (llfatOneLink, RECONSTRUCT)(const FloatN *sitelink_even
 
 if (sid >=2 *kparam.threads) return
 
 if (idx >=kparam.threads)
 
template<int N, typename FloatN , typename Float2 >
__global__ void do_link_format_cpu_to_gpu (FloatN *dst, Float2 *src, int reconstruct, int Vh, int pad, int ghostV, size_t threads)
 
template<int N, typename FloatN , typename Float2 >
__global__ void do_link_format_cpu_to_gpu_milc (FloatN *dst, Float2 *src, int reconstruct, int Vh, int pad, int ghostV, size_t threads)
 
template<typename FloatN >
__global__ void do_link_format_gpu_to_cpu (FloatN *dst, FloatN *src, int Vh, int stride)
 
template<int dir, int whichway, typename Float2 >
__global__ void collectGhostStapleKernel (Float2 *in, const int oddBit, Float2 *nbr_staple_gpu)
 
template<class Cmplx >
__device__ __host__ Cmplx makeComplex (const typename RealTypeId< Cmplx >::Type &a, const typename RealTypeId< Cmplx >::Type &b)
 
__device__ __host__ double2 makeComplex (const double &a, const double &b)
 
__device__ __host__ float2 makeComplex (const float &a, const float &b)
 
template<class Cmplx >
__device__ __host__ Cmplx & operator+= (Cmplx &a, const Cmplx &b)
 
template<class Cmplx >
__device__ __host__ Cmplx operator+ (const Cmplx &a, const Cmplx &b)
 
template<class Cmplx >
__device__ __host__ Cmplx operator- (const Cmplx &a, const Cmplx &b)
 
template<class Cmplx >
__device__ __host__ Cmplx operator* (const Cmplx &a, const typename RealTypeId< Cmplx >::Type &scalar)
 
template<class Cmplx >
__device__ __host__ Cmplx operator/ (const Cmplx &a, const typename RealTypeId< Cmplx >::Type &scalar)
 
template<class Cmplx >
__device__ __host__ Cmplx operator+ (const Cmplx &a, const typename RealTypeId< Cmplx >::Type &scalar)
 
template<class Cmplx >
__device__ __host__ Cmplx operator+ (const typename RealTypeId< Cmplx >::Type &scalar, const Cmplx &a)
 
template<class Cmplx >
__device__ __host__ Cmplx operator- (const Cmplx &a, const typename RealTypeId< Cmplx >::Type &scalar)
 
template<class Cmplx >
__device__ __host__ Cmplx operator- (const typename RealTypeId< Cmplx >::Type &scalar, const Cmplx &a)
 
template<class Cmplx >
__device__ __host__ Cmplx operator* (const typename RealTypeId< Cmplx >::Type &scalar, const Cmplx &b)
 
template<class Cmplx >
__device__ __host__ Cmplx operator* (const Cmplx &a, const Cmplx &b)
 
template<class Cmplx >
__device__ __host__ Cmplx conj (const Cmplx &a)
 
__device__ __host__ double conj (const double &a)
 
__device__ __host__ float conj (const float &a)
 
template<class Cmplx >
__device__ __host__ Cmplx getPreciseInverse (const Cmplx &z)
 
std::ostream & operator<< (std::ostream &os, const float2 &z)
 
std::ostream & operator<< (std::ostream &os, const double2 &z)
 
template<int N>
__device__ __host__ int index (int i, int j)
 
template<class T >
__device__ __host__ T getTrace (const Matrix< T, 3 > &a)
 
template<class T >
__device__ __host__ T getDeterminant (const Matrix< T, 3 > &a)
 
template<class T , int N>
__device__ __host__ Matrix< T, N > operator+ (const Matrix< T, N > &a, const Matrix< T, N > &b)
 
template<class T , int N>
__device__ __host__ Matrix< T, N > operator+= (Matrix< T, N > &a, const Matrix< T, N > &b)
 
template<class T , int N>
__device__ __host__ Matrix< T, N > operator- (const Matrix< T, N > &a, const Matrix< T, N > &b)
 
template<class T , int N, class S >
__device__ __host__ Matrix< T, N > operator* (const S &scalar, const Matrix< T, N > &a)
 
template<class T , int N, class S >
__device__ __host__ Matrix< T, N > operator* (const Matrix< T, N > &a, const S &scalar)
 
template<class T >
__device__ __host__ Matrix< T, 3 > operator* (const Matrix< T, 3 > &a, const Matrix< T, 3 > &b)
 
template<class T , class U >
__device__ __host__ Matrix
< typename PromoteTypeId< T, U >
::Type, 3 > 
operator* (const Matrix< T, 3 > &a, const Matrix< U, 3 > &b)
 
template<class T >
__device__ __host__ Matrix< T, 2 > operator* (const Matrix< T, 2 > &a, const Matrix< T, 2 > &b)
 
template<class T , int N>
__device__ __host__ Matrix< T, N > conj (const Matrix< T, N > &other)
 
template<class T >
__device__ __host__ void computeMatrixInverse (const Matrix< T, 3 > &u, Matrix< T, 3 > *uinv)
 
template<class T , int N>
__device__ __host__ void setIdentity (Matrix< T, N > *m)
 
template<int N>
__device__ __host__ void setIdentity (Matrix< float2, N > *m)
 
template<int N>
__device__ __host__ void setIdentity (Matrix< double2, N > *m)
 
template<class T , int N>
__device__ __host__ void setZero (Matrix< T, N > *m)
 
template<int N>
__device__ __host__ void setZero (Matrix< float2, N > *m)
 
template<int N>
__device__ __host__ void setZero (Matrix< double2, N > *m)
 
template<class T , int N>
__device__ __host__ void copyColumn (const Matrix< T, N > &m, int c, Array< T, N > *a)
 
template<class T , int N>
__device__ __host__ void outerProd (const Array< T, N > &a, const Array< T, N > &b, Matrix< T, N > *m)
 
template<class T , int N>
std::ostream & operator<< (std::ostream &os, const Matrix< T, N > &m)
 
template<class T , int N>
std::ostream & operator<< (std::ostream &os, const Array< T, N > &a)
 
template<class T >
__device__ void loadLinkVariableFromArray (const T *const array, int dir, int idx, int stride, Matrix< T, 3 > *link)
 
__device__ void loadLinkVariableFromArray (const float2 *const array, int dir, int idx, int stride, Matrix< double2, 3 > *link)
 
template<class T >
__device__ void writeLinkVariableToArray (const Matrix< T, 3 > &link, int dir, int idx, int stride, T *const array)
 
__device__ void writeLinkVariableToArray (const Matrix< double2, 3 > &link, int dir, int idx, int stride, float2 *const array)
 
template<class Cmplx >
__device__ __host__ void computeLinkInverse (Matrix< Cmplx, 3 > *uinv, const Matrix< Cmplx, 3 > &u)
 
void copyArrayToLink (Matrix< float2, 3 > *link, float *array)
 
template<class Cmplx , class Real >
void copyArrayToLink (Matrix< Cmplx, 3 > *link, Real *array)
 
void copyLinkToArray (float *array, const Matrix< float2, 3 > &link)
 
template<class Cmplx , class Real >
void copyLinkToArray (Real *array, const Matrix< Cmplx, 3 > &link)
 
template<class Cmplx >
__host__ __device__ void printLink (const Matrix< Cmplx, 3 > &link)
 
__device__ double norm2_ (const double2 &a)
 
__device__ float norm2_ (const float2 &a)
 
__device__ float norm2_ (const float4 &a)
 
__device__ double dot_ (const double2 &a, const double2 &b)
 
__device__ float dot_ (const float2 &a, const float2 &b)
 
__device__ float dot_ (const float4 &a, const float4 &b)
 
__device__ void Caxpy_ (const float2 &a, const float4 &x, float4 &y)
 
__device__ void Caxpy_ (const float2 &a, const float2 &x, float2 &y)
 
__device__ void Caxpy_ (const double2 &a, const double2 &x, double2 &y)
 
__device__ double2 cdot_ (const double2 &a, const double2 &b)
 
__device__ double2 cdot_ (const float2 &a, const float2 &b)
 
__device__ double2 cdot_ (const float4 &a, const float4 &b)
 
__device__ double3 cdotNormA_ (const double2 &a, const double2 &b)
 
__device__ double3 cdotNormA_ (const float2 &a, const float2 &b)
 
__device__ double3 cdotNormA_ (const float4 &a, const float4 &b)
 
__device__ double3 cdotNormB_ (const double2 &a, const double2 &b)
 
__device__ double3 cdotNormB_ (const float2 &a, const float2 &b)
 
__device__ double3 cdotNormB_ (const float4 &a, const float4 &b)
 
template<class Cmplx >
DEVICEHOST RealTypeId< Cmplx >
::Type 
cabs (const Cmplx &z)
 
template<class T , class U >
DEVICEHOST PromoteTypeId< T, U >
::Type 
quadSum (const T &a, const U &b)
 
DEVICEHOST float getNorm (const Array< float2, 3 > &a)
 
DEVICEHOST double getNorm (const Array< double2, 3 > &a)
 
template<class T >
DEVICEHOST void constructHHMat (const T &tau, const Array< T, 3 > &v, Matrix< T, 3 > &hh)
 
template<class Real >
DEVICEHOST void getLambdaMax (const Matrix< Real, 3 > &b, Real &lambda_max)
 
template<class Real >
DEVICEHOST void getGivensRotation (const Real &alpha, const Real &beta, Real &c, Real &s)
 
template<class Real >
DEVICEHOST void accumGivensRotation (int index, const Real &c, const Real &s, Matrix< Real, 3 > &m)
 
template<class Real >
DEVICEHOST void assignGivensRotation (const Real &c, const Real &s, Matrix< Real, 2 > &m)
 
template<class Real >
DEVICEHOST void swap (Real &a, Real &b)
 
template<class Real >
DEVICEHOST void smallSVD (Matrix< Real, 2 > &u, Matrix< Real, 2 > &v, Matrix< Real, 2 > &m)
 
template<class Cmplx >
DEVICEHOST void getRealBidiagMatrix (const Matrix< Cmplx, 3 > &mat, Matrix< Cmplx, 3 > &u, Matrix< Cmplx, 3 > &v)
 
template<class Real >
DEVICEHOST void bdSVD (Matrix< Real, 3 > &u, Matrix< Real, 3 > &v, Matrix< Real, 3 > &b, int max_it)
 
template<class Cmplx >
DEVICEHOST void computeSVD (const Matrix< Cmplx, 3 > &m, Matrix< Cmplx, 3 > &u, Matrix< Cmplx, 3 > &v, typename RealTypeId< Cmplx >::Type singular_values[3])
 
template<class Cmplx >
__device__ __host__ bool isUnitary (const Matrix< Cmplx, 3 > &matrix, double max_error)
 
template<class Cmplx >
__device__ __host__ bool isUnitarizedLinkConsistent (const Matrix< Cmplx, 3 > &initial_matrix, const Matrix< Cmplx, 3 > &unitary_matrix, double max_error)
 
template<class T >
__device__ __host__ T getAbsMin (const T *const array, int size)
 
template<class Real >
__device__ __host__ bool checkAbsoluteError (Real a, Real b, Real epsilon)
 
template<class Real >
__device__ __host__ bool checkRelativeError (Real a, Real b, Real epsilon)
 
template<class Cmplx >
__device__ __host__ bool reciprocalRoot (const Matrix< Cmplx, 3 > &q, Matrix< Cmplx, 3 > *res)
 
template<class Cmplx >
__host__ __device__ bool unitarizeLinkMILC (const Matrix< Cmplx, 3 > &in, Matrix< Cmplx, 3 > *const result)
 
template<class Cmplx >
__host__ __device__ bool unitarizeLinkSVD (const Matrix< Cmplx, 3 > &in, Matrix< Cmplx, 3 > *const result)
 
template<class Cmplx >
__host__ __device__ bool unitarizeLinkNewton (const Matrix< Cmplx, 3 > &in, Matrix< Cmplx, 3 > *const result)
 
template<class Cmplx >
__global__ void getUnitarizedField (const Cmplx *inlink_even, const Cmplx *inlink_odd, Cmplx *outlink_even, Cmplx *outlink_odd, int *num_failures, const int threads)
 

Variables

unsigned long long blas_flops
 
unsigned long long blas_bytes
 
const int Nstream = 1
 
DslashParam dslashParam
 
int Vspatial
 
int gatherCompleted [Nstream]
 
int previousDir [Nstream]
 
int commsCompleted [Nstream]
 
int dslashCompleted [Nstream]
 
int commDimTotal
 
__constant__ int dir1_array [16]
 
__constant__ int dir2_array [16]
 
unsigned long staple_bytes =0
 
__global__ void FloatMstaple_odd
 
__global__ void FloatM const
FloatN
sitelink_even
 
__global__ void FloatM const
FloatN const FloatN
sitelink_odd
 
__global__ void FloatM const
FloatN const FloatN FloatM
fatlink_even
 
__global__ void FloatM const
FloatN const FloatN FloatM
FloatM
fatlink_odd
 
__global__ void FloatM const
FloatN const FloatN FloatM
FloatM Float 
mycoeff
 
__global__ void FloatM const
FloatN const FloatN FloatM
FloatM Float
llfat_kernel_param_t 
kparam
 
FloatM TEMPA5
 
FloatM TEMPA6
 
FloatM TEMPA7
 
FloatM TEMPA8
 
FloatM STAPLE0
 
FloatM STAPLE1
 
FloatM STAPLE2
 
FloatM STAPLE3
 
FloatM STAPLE4
 
FloatM STAPLE5
 
FloatM STAPLE6
 
FloatM STAPLE7
 
FloatM STAPLE8
 
int mem_idx = blockIdx.x*blockDim.x + threadIdx.x
 
int z1 = mem_idx / X1h
 
short x1h = mem_idx - z1*X1h
 
int z2 = z1 / X2
 
short x2 = z1 - z2*X2
 
short x4 = z2 / X3
 
short x3 = z2 - x4*X3
 
short x1odd = (x2 + x3 + x4 + odd_bit) & 1
 
short x1 = 2*x1h + x1odd
 
int X = 2*mem_idx + x1odd
 
int new_mem_idx
 
 DECLARE_VAR_SIGN
 
 DECLARE_NEW_X
 
 DECLARE_X_ARRAY
 
 return
 
__global__ void FloatM const
FloatN const FloatN FloatM
FloatM const FloatM
mulink_even
 
__global__ void FloatM const
FloatN const FloatN FloatM
FloatM const FloatM const
FloatM
mulink_odd
 
FloatM TEMPB0
 
FloatM TEMPB1
 
FloatM TEMPB2
 
FloatM TEMPB3
 
FloatM TEMPB4
 
FloatM TEMPB5
 
FloatM TEMPB6
 
FloatM TEMPB7
 
FloatM TEMPB8
 
 else
 
__global__ void const FloatN
FloatM FloatM Float 
coeff0
 
__global__ void const FloatN
FloatM FloatM Float Float 
coeff5
 
FloatMmy_fatlink = fatlink_even
 
int sid = blockIdx.x*blockDim.x + threadIdx.x
 
int odd_bit = 0
 
 my_sitelink = sitelink_even
 
int idx = sid
 
__constant__ double DEV_HISQ_UNITARIZE_EPS
 
__constant__ double DEV_HISQ_FORCE_FILTER
 
__constant__ double DEV_MAX_DET_ERROR
 
__constant__ bool DEV_REUNIT_ALLOW_SVD
 
__constant__ bool DEV_REUNIT_SVD_ONLY
 
__constant__ double DEV_REUNIT_SVD_REL_ERROR
 
__constant__ double DEV_REUNIT_SVD_ABS_ERROR
 
__constant__ int INPUT_PADDING =0
 
__constant__ int OUTPUT_PADDING =0
 
__constant__ int DEV_MAX_ITER = 20
 
__constant__ double DEV_FL_MAX_ERROR
 
__constant__ double DEV_FL_UNITARIZE_EPS
 
__constant__ bool DEV_FL_REUNIT_ALLOW_SVD
 
__constant__ bool DEV_FL_REUNIT_SVD_ONLY
 
__constant__ double DEV_FL_REUNIT_SVD_REL_ERROR
 
__constant__ double DEV_FL_REUNIT_SVD_ABS_ERROR
 
__constant__ bool DEV_FL_CHECK_UNITARIZATION
 

Detailed Description

Generic Multi Shift Solver

For staggered, the mass is folded into the dirac operator Otherwise the matrix mass is 'unmodified'.

The lowest offset is in offsets[0]

Typedef Documentation

typedef std::complex<double> quda::Complex

Definition at line 13 of file color_spinor_field.h.

Enumeration Type Documentation

Enumerator:
DEVICE 
HOST 
PINNED 
MAPPED 
N_ALLOC_TYPE 

Definition at line 10 of file malloc.cpp.

Enumerator:
QUDA_PROFILE_H2D 

host -> device transfers

QUDA_PROFILE_D2H 

The time in seconds for device -> host transfers

QUDA_PROFILE_INIT 

The time in seconds taken for initiation

QUDA_PROFILE_PREAMBLE 

The time in seconds taken for any preamble

QUDA_PROFILE_COMPUTE 

The time in seconds taken for the actual computation

QUDA_PROFILE_EPILOGUE 

The time in seconds taken for any epilogue

QUDA_PROFILE_FREE 

The time in seconds for freeing resources

QUDA_PROFILE_TOTAL 

The total time in seconds for the algorithm. Must be the penultimate type.

QUDA_PROFILE_COUNT 

The total number of timers we have. Must be last enum type.

Definition at line 126 of file quda_internal.h.

Function Documentation

template<class Real >
DEVICEHOST void quda::accumGivensRotation ( int  index,
const Real &  c,
const Real &  s,
Matrix< Real, 3 > &  m 
)
inline

Definition at line 136 of file svd_quda.h.

void quda::assertAllMemFree ( )

Definition at line 290 of file malloc.cpp.

template<class Real >
DEVICEHOST void quda::assignGivensRotation ( const Real &  c,
const Real &  s,
Matrix< Real, 2 > &  m 
)
inline

Definition at line 150 of file svd_quda.h.

void quda::asymCloverDslashCuda ( cudaColorSpinorField *  out,
const cudaGaugeField &  gauge,
const FullClover  cloverInv,
const cudaColorSpinorField *  in,
const int  oddBit,
const int  daggerBit,
const cudaColorSpinorField *  x,
const double &  k,
const int *  commDim 
)

Definition at line 1451 of file dslash_quda.cu.

void quda::axCpu ( const double &  a,
cpuColorSpinorField &  x 
)

Definition at line 60 of file blas_cpu.cpp.

void quda::axCuda ( const double &  a,
cudaColorSpinorField &  x 
)

Definition at line 166 of file blas_quda.cu.

template<typename Float >
void quda::axpby ( const Float a,
const Float x,
const Float b,
Float y,
const int  N 
)

Definition at line 8 of file blas_cpu.cpp.

void quda::axpbyCpu ( const double &  a,
const cpuColorSpinorField &  x,
const double &  b,
cpuColorSpinorField &  y 
)

Definition at line 12 of file blas_cpu.cpp.

void quda::axpbyCuda ( const double &  a,
cudaColorSpinorField &  x,
const double &  b,
cudaColorSpinorField &  y 
)

Definition at line 83 of file blas_quda.cu.

void quda::axpyBzpcxCpu ( const double &  a,
cpuColorSpinorField &  x,
cpuColorSpinorField &  y,
const double &  b,
const cpuColorSpinorField &  z,
const double &  c 
)

Definition at line 129 of file blas_cpu.cpp.

void quda::axpyBzpcxCuda ( const double &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y,
const double &  b,
cudaColorSpinorField &  z,
const double &  c 
)

Definition at line 304 of file blas_quda.cu.

Complex quda::axpyCGNormCuda ( const double &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 496 of file reduce_quda.cu.

void quda::axpyCpu ( const double &  a,
const cpuColorSpinorField &  x,
cpuColorSpinorField &  y 
)

Definition at line 31 of file blas_cpu.cpp.

void quda::axpyCuda ( const double &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 116 of file blas_quda.cu.

double quda::axpyNormCpu ( const double &  a,
const cpuColorSpinorField &  x,
cpuColorSpinorField &  y 
)

Definition at line 178 of file blas_cpu.cpp.

double quda::axpyNormCuda ( const double &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 182 of file reduce_quda.cu.

void quda::axpyZpbxCpu ( const double &  a,
cpuColorSpinorField &  x,
cpuColorSpinorField &  y,
const cpuColorSpinorField &  z,
const double &  b 
)

Definition at line 136 of file blas_cpu.cpp.

void quda::axpyZpbxCuda ( const double &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z,
const double &  b 
)

Definition at line 324 of file blas_quda.cu.

void quda::backSubs ( const Complex *  alpha,
Complex **const  beta,
const double *  gamma,
Complex *  delta,
int  n 
)

Definition at line 103 of file inv_gcr_quda.cpp.

template<class Real >
DEVICEHOST void quda::bdSVD ( Matrix< Real, 3 > &  u,
Matrix< Real, 3 > &  v,
Matrix< Real, 3 > &  b,
int  max_it 
)

Definition at line 409 of file svd_quda.h.

template<class Cmplx >
DEVICEHOST RealTypeId<Cmplx>::Type quda::cabs ( const Cmplx &  z)
inline

Definition at line 20 of file svd_quda.h.

void quda::cabxpyAxCpu ( const double &  a,
const Complex &  b,
cpuColorSpinorField &  x,
cpuColorSpinorField &  y 
)

Definition at line 259 of file blas_cpu.cpp.

void quda::cabxpyAxCuda ( const double &  a,
const Complex &  b,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 366 of file blas_quda.cu.

double quda::cabxpyAxNormCpu ( const double &  a,
const Complex &  b,
cpuColorSpinorField &  x,
cpuColorSpinorField &  y 
)

Definition at line 283 of file blas_cpu.cpp.

double quda::cabxpyAxNormCuda ( const double &  a,
const Complex &  b,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 301 of file reduce_quda.cu.

template<typename Float >
void quda::caxpby ( const std::complex< Float > &  a,
const std::complex< Float > *  x,
const std::complex< Float > &  b,
std::complex< Float > *  y,
int  N 
)

Definition at line 70 of file blas_cpu.cpp.

__device__ void quda::caxpby_ ( const float2 &  a,
const float4 &  x,
const float2 &  b,
float4 &  y 
)

Functor to perform the operation y = a*x + b*y (complex-valued)

Definition at line 210 of file blas_quda.cu.

__device__ void quda::caxpby_ ( const float2 &  a,
const float2 &  x,
const float2 &  b,
float2 &  y 
)

Definition at line 218 of file blas_quda.cu.

__device__ void quda::caxpby_ ( const double2 &  a,
const double2 &  x,
const double2 &  b,
double2 &  y 
)

Definition at line 224 of file blas_quda.cu.

void quda::caxpbyCpu ( const Complex &  a,
const cpuColorSpinorField &  x,
const Complex &  b,
cpuColorSpinorField &  y 
)

Definition at line 92 of file blas_cpu.cpp.

void quda::caxpbyCuda ( const Complex &  a,
cudaColorSpinorField &  x,
const Complex &  b,
cudaColorSpinorField &  y 
)

Definition at line 240 of file blas_quda.cu.

template<typename Float >
void quda::caxpbypcz ( const std::complex< Float > &  a,
const std::complex< Float > *  x,
const std::complex< Float > &  b,
const std::complex< Float > *  y,
const std::complex< Float > &  c,
std::complex< Float > *  z,
int  N 
)

Definition at line 105 of file blas_cpu.cpp.

void quda::caxpbypczpwCpu ( const Complex &  a,
cpuColorSpinorField &  x,
const Complex &  b,
cpuColorSpinorField &  y,
const Complex &  c,
cpuColorSpinorField &  z,
cpuColorSpinorField &  w 
)

Definition at line 295 of file blas_cpu.cpp.

void quda::caxpbypczpwCuda ( const Complex &  a,
cudaColorSpinorField &  x,
const Complex &  b,
cudaColorSpinorField &  y,
const Complex &  c,
cudaColorSpinorField &  z,
cudaColorSpinorField &  w 
)

Definition at line 409 of file blas_quda.cu.

void quda::caxpbypzCpu ( const Complex &  a,
cpuColorSpinorField &  x,
const Complex &  b,
cpuColorSpinorField &  y,
cpuColorSpinorField &  z 
)

Definition at line 289 of file blas_cpu.cpp.

void quda::caxpbypzCuda ( const Complex &  a,
cudaColorSpinorField &  x,
const Complex &  b,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z 
)

Definition at line 387 of file blas_quda.cu.

double3 quda::caxpbypzYmbwcDotProductUYNormYCpu ( const Complex &  a,
const cpuColorSpinorField &  x,
const Complex &  b,
cpuColorSpinorField &  y,
cpuColorSpinorField &  z,
const cpuColorSpinorField &  w,
const cpuColorSpinorField &  u 
)

Definition at line 250 of file blas_cpu.cpp.

double3 quda::caxpbypzYmbwcDotProductUYNormYCuda ( const Complex &  a,
cudaColorSpinorField &  x,
const Complex &  b,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z,
cudaColorSpinorField &  w,
cudaColorSpinorField &  u 
)

Definition at line 463 of file reduce_quda.cu.

void quda::caxpbypzYmbwCpu ( const Complex &  a,
const cpuColorSpinorField &  x,
const Complex &  b,
cpuColorSpinorField &  y,
cpuColorSpinorField &  z,
const cpuColorSpinorField &  w 
)

Definition at line 143 of file blas_cpu.cpp.

void quda::caxpbypzYmbwCuda ( const Complex &  a,
cudaColorSpinorField &  x,
const Complex &  b,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z,
cudaColorSpinorField &  w 
)

Definition at line 346 of file blas_quda.cu.

__device__ void quda::caxpy_ ( const float2 &  a,
const float4 &  x,
float4 &  y 
)

Functor to perform the operation y += a * x (complex-valued)

Definition at line 175 of file blas_quda.cu.

__device__ void quda::caxpy_ ( const float2 &  a,
const float2 &  x,
float2 &  y 
)

Definition at line 182 of file blas_quda.cu.

__device__ void quda::caxpy_ ( const double2 &  a,
const double2 &  x,
double2 &  y 
)

Definition at line 187 of file blas_quda.cu.

__device__ void quda::Caxpy_ ( const float2 &  a,
const float4 &  x,
float4 &  y 
)

Functor to perform the operation y += a * x (complex-valued)

Definition at line 214 of file reduce_quda.cu.

__device__ void quda::Caxpy_ ( const float2 &  a,
const float2 &  x,
float2 &  y 
)

Definition at line 221 of file reduce_quda.cu.

__device__ void quda::Caxpy_ ( const double2 &  a,
const double2 &  x,
double2 &  y 
)

Definition at line 226 of file reduce_quda.cu.

void quda::caxpyCpu ( const Complex &  a,
const cpuColorSpinorField &  x,
cpuColorSpinorField &  y 
)

Definition at line 79 of file blas_cpu.cpp.

void quda::caxpyCuda ( const Complex &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 201 of file blas_quda.cu.

Complex quda::caxpyDotzyCpu ( const Complex &  a,
cpuColorSpinorField &  x,
cpuColorSpinorField &  y,
cpuColorSpinorField &  z 
)

Definition at line 303 of file blas_cpu.cpp.

Complex quda::caxpyDotzyCuda ( const Complex &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z 
)

Definition at line 379 of file reduce_quda.cu.

double quda::caxpyNormCpu ( const Complex &  a,
cpuColorSpinorField &  x,
cpuColorSpinorField &  y 
)

Definition at line 264 of file blas_cpu.cpp.

double quda::caxpyNormCuda ( const Complex &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 249 of file reduce_quda.cu.

void quda::caxpyXmazCpu ( const Complex &  a,
cpuColorSpinorField &  x,
cpuColorSpinorField &  y,
cpuColorSpinorField &  z 
)

Definition at line 277 of file blas_cpu.cpp.

void quda::caxpyXmazCuda ( const Complex &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z 
)

Definition at line 432 of file blas_quda.cu.

double quda::caxpyXmazNormXCpu ( const Complex &  a,
cpuColorSpinorField &  x,
cpuColorSpinorField &  y,
cpuColorSpinorField &  z 
)

Definition at line 270 of file blas_cpu.cpp.

double quda::caxpyXmazNormXCuda ( const Complex &  a,
cudaColorSpinorField &  x,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z 
)

Definition at line 274 of file reduce_quda.cu.

__device__ double2 quda::cdot_ ( const double2 &  a,
const double2 &  b 
)

Returns complex-valued dot product of x and y

Definition at line 310 of file reduce_quda.cu.

__device__ double2 quda::cdot_ ( const float2 &  a,
const float2 &  b 
)

Definition at line 312 of file reduce_quda.cu.

__device__ double2 quda::cdot_ ( const float4 &  a,
const float4 &  b 
)

Definition at line 314 of file reduce_quda.cu.

__device__ double3 quda::cdotNormA_ ( const double2 &  a,
const double2 &  b 
)

First returns the dot product (x,y) Returns the norm of x

Definition at line 390 of file reduce_quda.cu.

__device__ double3 quda::cdotNormA_ ( const float2 &  a,
const float2 &  b 
)

Definition at line 392 of file reduce_quda.cu.

__device__ double3 quda::cdotNormA_ ( const float4 &  a,
const float4 &  b 
)

Definition at line 394 of file reduce_quda.cu.

__device__ double3 quda::cdotNormB_ ( const double2 &  a,
const double2 &  b 
)

First returns the dot product (x,y) Returns the norm of y

Definition at line 420 of file reduce_quda.cu.

__device__ double3 quda::cdotNormB_ ( const float2 &  a,
const float2 &  b 
)

Definition at line 422 of file reduce_quda.cu.

__device__ double3 quda::cdotNormB_ ( const float4 &  a,
const float4 &  b 
)

Definition at line 424 of file reduce_quda.cu.

template<typename Float >
Complex quda::cDotProduct ( const std::complex< Float > *  a,
const std::complex< Float > *  b,
const int  N 
)

Definition at line 211 of file blas_cpu.cpp.

Complex quda::cDotProductCpu ( const cpuColorSpinorField &  a,
const cpuColorSpinorField &  b 
)

Definition at line 217 of file blas_cpu.cpp.

Complex quda::cDotProductCuda ( cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 329 of file reduce_quda.cu.

double3 quda::cDotProductNormACpu ( const cpuColorSpinorField &  a,
const cpuColorSpinorField &  b 
)

Definition at line 237 of file blas_cpu.cpp.

double3 quda::cDotProductNormACuda ( cudaColorSpinorField &  a,
cudaColorSpinorField &  b 
)

Definition at line 411 of file reduce_quda.cu.

double3 quda::cDotProductNormBCpu ( const cpuColorSpinorField &  a,
const cpuColorSpinorField &  b 
)

Definition at line 243 of file blas_cpu.cpp.

double3 quda::cDotProductNormBCuda ( cudaColorSpinorField &  a,
cudaColorSpinorField &  b 
)

Definition at line 440 of file reduce_quda.cu.

template<class Real >
__device__ __host__ bool quda::checkAbsoluteError ( Real  a,
Real  b,
Real  epsilon 
)
inline

Definition at line 141 of file unitarize_links_quda.cu.

template<class Real >
__device__ __host__ bool quda::checkRelativeError ( Real  a,
Real  b,
Real  epsilon 
)
inline

Definition at line 150 of file unitarize_links_quda.cu.

void quda::cloverCuda ( cudaColorSpinorField *  out,
const cudaGaugeField &  gauge,
const FullClover  clover,
const cudaColorSpinorField *  in,
const int  oddBit 
)

Definition at line 1781 of file dslash_quda.cu.

void quda::cloverDslashCuda ( cudaColorSpinorField *  out,
const cudaGaugeField &  gauge,
const FullClover  cloverInv,
const cudaColorSpinorField *  in,
const int  oddBit,
const int  daggerBit,
const cudaColorSpinorField *  x,
const double &  k,
const int *  commDim 
)

Definition at line 1388 of file dslash_quda.cu.

void quda::collectGhostStaple ( int *  X,
void *  even,
void *  odd,
int  volume,
QudaPrecision  precision,
void *  ghost_staple_gpu,
int  dir,
int  whichway,
cudaStream_t *  stream 
)

Definition at line 472 of file misc_helpers.cu.

template<int dir, int whichway, typename Float2 >
__global__ void quda::collectGhostStapleKernel ( Float2 *  in,
const int  oddBit,
Float2 *  nbr_staple_gpu 
)

Definition at line 392 of file misc_helpers.cu.

template<class U , class V >
int quda::compareSpinor ( const U &  u,
const V v,
const int  tol 
)

Definition at line 288 of file cpu_color_spinor_field.cpp.

quda::COMPUTE_RECONSTRUCT_SIGN ( sign  ,
nu  ,
x1  ,
x2  ,
x3  ,
x4   
)
Initial value:
quda::COMPUTE_RECONSTRUCT_SIGN ( sign  ,
mu  ,
new_x1  ,
new_x2  ,
new_x3  ,
new_x4   
)
quda::COMPUTE_RECONSTRUCT_SIGN ( sign  ,
nu  ,
new_x1  ,
new_x2  ,
new_x3  ,
new_x4   
)
quda::COMPUTE_RECONSTRUCT_SIGN ( sign  ,
nu  ,
(x1-2)  ,
(x2-2)  ,
(x3-2)  ,
(x4-2)   
)
Initial value:
quda::COMPUTE_RECONSTRUCT_SIGN ( sign  ,
mu  ,
(new_x1-2)  ,
(new_x2-2)  ,
(new_x3-2)  ,
(new_x4-2)   
)
quda::COMPUTE_RECONSTRUCT_SIGN ( sign  ,
nu  ,
(new_x1-2)  ,
(new_x2-2)  ,
(new_x3-2)  ,
(new_x4-2)   
)
void quda::computeCloverCuda ( cudaCloverField clover,
const cudaGaugeField gauge 
)

Definition at line 248 of file clover_quda.cu.

void quda::computeFatLinkCore ( cudaGaugeField *  cudaSiteLink,
double *  act_path_coeff,
QudaGaugeParam qudaGaugeParam,
QudaComputeFatMethod  method,
cudaGaugeField *  cudaFatLink,
struct timeval  time_array[] 
)
template<typename Cmplx >
__global__ void quda::computeFmunuKernel ( Cmplx *  Fmunu,
const Cmplx *  gauge,
const CloverParam  param 
)

Construct the field-strength tensor field Fmunu First pass only supports no reconstruct for expediency

Parameters
Fmunu- Pointer to field-strength tensor array. Result is stored here.
gauge- Pointer to gauge field.
param- CloverParam struct

Definition at line 41 of file clover_quda.cu.

void quda::computeGenStapleFieldParityKernel ( void *  staple_even,
void *  staple_odd,
const void *  sitelink_even,
const void *  sitelink_odd,
void *  fatlink_even,
void *  fatlink_odd,
const void *  mulink_even,
const void *  mulink_odd,
int  mu,
int  nu,
int  save_staple,
double  mycoeff,
QudaReconstructType  recon,
QudaPrecision  prec,
dim3  halfGridDim,
llfat_kernel_param_t  kparam,
cudaStream_t *  stream 
)

Definition at line 856 of file llfat_quda.cu.

void quda::computeGenStapleFieldParityKernel_ex ( void *  staple_even,
void *  staple_odd,
const void *  sitelink_even,
const void *  sitelink_odd,
void *  fatlink_even,
void *  fatlink_odd,
const void *  mulink_even,
const void *  mulink_odd,
int  mu,
int  nu,
int  save_staple,
double  mycoeff,
QudaReconstructType  recon,
QudaPrecision  prec,
llfat_kernel_param_t  kparam 
)

Definition at line 1013 of file llfat_quda.cu.

template<class Cmplx >
__device__ __host__ void quda::computeLinkInverse ( Matrix< Cmplx, 3 > *  uinv,
const Matrix< Cmplx, 3 > &  u 
)
inline

Definition at line 692 of file quda_matrix.h.

template<class T >
__device__ __host__ void quda::computeMatrixInverse ( const Matrix< T, 3 > &  u,
Matrix< T, 3 > *  uinv 
)
inline

Definition at line 438 of file quda_matrix.h.

template<class Cmplx >
DEVICEHOST void quda::computeSVD ( const Matrix< Cmplx, 3 > &  m,
Matrix< Cmplx, 3 > &  u,
Matrix< Cmplx, 3 > &  v,
typename RealTypeId< Cmplx >::Type  singular_values[3] 
)

Definition at line 637 of file svd_quda.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::conj ( const Cmplx &  a)
inline

Definition at line 195 of file quda_matrix.h.

__device__ __host__ double quda::conj ( const double &  a)
inline

Definition at line 200 of file quda_matrix.h.

__device__ __host__ float quda::conj ( const float &  a)
inline

Definition at line 205 of file quda_matrix.h.

template<class T , int N>
__device__ __host__ Matrix<T,N> quda::conj ( const Matrix< T, N > &  other)
inline

Definition at line 425 of file quda_matrix.h.

template<class T >
DEVICEHOST void quda::constructHHMat ( const T &  tau,
const Array< T, 3 > &  v,
Matrix< T, 3 > &  hh 
)

Definition at line 82 of file svd_quda.h.

void quda::copyArrayToLink ( Matrix< float2, 3 > *  link,
float *  array 
)
inline

Definition at line 730 of file quda_matrix.h.

template<class Cmplx , class Real >
void quda::copyArrayToLink ( Matrix< Cmplx, 3 > *  link,
Real *  array 
)
inline

Definition at line 741 of file quda_matrix.h.

template<class T , int N>
__device__ __host__ void quda::copyColumn ( const Matrix< T, N > &  m,
int  c,
Array< T, N > *  a 
)
inline

Definition at line 592 of file quda_matrix.h.

void quda::copyCuda ( cudaColorSpinorField &  dst,
const cudaColorSpinorField &  src 
)

Definition at line 234 of file copy_quda.cu.

void quda::copyLinkToArray ( float *  array,
const Matrix< float2, 3 > &  link 
)
inline

Definition at line 753 of file quda_matrix.h.

template<class Cmplx , class Real >
void quda::copyLinkToArray ( Real *  array,
const Matrix< Cmplx, 3 > &  link 
)
inline

Definition at line 765 of file quda_matrix.h.

void quda::createDirac ( Dirac *&  d,
Dirac *&  dSloppy,
Dirac *&  dPre,
QudaInvertParam param,
const bool  pc_solve 
)

Definition at line 695 of file interface_quda.cpp.

void quda::createDslashEvents ( )

Definition at line 220 of file dslash_quda.cu.

__device__ void quda::cxpaypbz_ ( const float4 &  x,
const float2 &  a,
const float4 &  y,
const float2 &  b,
float4 &  z 
)

Functor to performs the operation z[i] = x[i] + a*y[i] + b*z[i]

Definition at line 249 of file blas_quda.cu.

__device__ void quda::cxpaypbz_ ( const float2 &  x,
const float2 &  a,
const float2 &  y,
const float2 &  b,
float2 &  z 
)

Definition at line 258 of file blas_quda.cu.

__device__ void quda::cxpaypbz_ ( const double2 &  x,
const double2 &  a,
const double2 &  y,
const double2 &  b,
double2 &  z 
)

Definition at line 265 of file blas_quda.cu.

void quda::cxpaypbzCpu ( const cpuColorSpinorField &  x,
const Complex &  b,
const cpuColorSpinorField &  y,
const Complex &  c,
cpuColorSpinorField &  z 
)

Definition at line 115 of file blas_cpu.cpp.

void quda::cxpaypbzCuda ( cudaColorSpinorField &  x,
const Complex &  b,
cudaColorSpinorField &  y,
const Complex &  c,
cudaColorSpinorField &  z 
)

Definition at line 283 of file blas_quda.cu.

void quda::destroyDslashEvents ( )

Definition at line 267 of file dslash_quda.cu.

void quda::device_free_ ( const char *  func,
const char *  file,
int  line,
void *  ptr 
)

Free device memory allocated with device_malloc(). This function should only be called via the device_free() macro, defined in malloc_quda.h

void* quda::device_malloc_ ( const char *  func,
const char *  file,
int  line,
size_t  size 
)

Perform a standard cudaMalloc() with error-checking. This function should only be called via the device_malloc() macro, defined in malloc_quda.h

template<int sig_positive, int mu_positive, int oddBit, typename Float2 >
__global__ void quda::do_all_link_kernel ( Float2 *  tempxEven,
Float2 *  tempxOdd,
Float2 *  PmuEven,
Float2 *  PmuOdd,
Float2 *  P3Even,
Float2 *  P3Odd,
Float2 *  P3muEven,
Float2 *  P3muOdd,
Float2 *  shortPEven,
Float2 *  shortPOdd,
int  sig,
int  mu,
Float2  coeff,
Float2  mcoeff,
Float2  accumu_coeff,
float4 *  linkEven,
float4 *  linkOdd,
Float2 *  momEven,
Float2 *  momOdd 
)

Definition at line 811 of file fermion_force_quda.cu.

template<int N, typename FloatN , typename Float2 >
__global__ void quda::do_link_format_cpu_to_gpu ( FloatN dst,
Float2 *  src,
int  reconstruct,
int  Vh,
int  pad,
int  ghostV,
size_t  threads 
)

Definition at line 42 of file misc_helpers.cu.

template<int N, typename FloatN , typename Float2 >
__global__ void quda::do_link_format_cpu_to_gpu_milc ( FloatN dst,
Float2 *  src,
int  reconstruct,
int  Vh,
int  pad,
int  ghostV,
size_t  threads 
)

Definition at line 102 of file misc_helpers.cu.

template<typename FloatN >
__global__ void quda::do_link_format_gpu_to_cpu ( FloatN dst,
FloatN src,
int  Vh,
int  stride 
)

Definition at line 320 of file misc_helpers.cu.

template<int sig_positive, int mu_positive, int oddBit, typename Float2 >
__global__ void quda::do_middle_link_kernel ( Float2 *  tempxEven,
Float2 *  tempxOdd,
Float2 *  PmuEven,
Float2 *  PmuOdd,
Float2 *  P3Even,
Float2 *  P3Odd,
int  sig,
int  mu,
Float2  coeff,
float4 *  linkEven,
float4 *  linkOdd,
Float2 *  momEven,
Float2 *  momOdd 
)

Definition at line 427 of file fermion_force_quda.cu.

template<int oddBit, typename Float2 >
__global__ void quda::do_one_and_naik_terms_kernel ( Float2 *  TempxEven,
Float2 *  TempxOdd,
Float2 *  PmuEven,
Float2 *  PmuOdd,
Float2 *  PnumuEven,
Float2 *  PnumuOdd,
int  mu,
Float2  OneLink,
Float2  Naik,
Float2  mNaik,
float4 *  linkEven,
float4 *  linkOdd,
Float2 *  momEven,
Float2 *  momOdd 
)

Definition at line 1065 of file fermion_force_quda.cu.

template<int sig_positive, int mu_positive, int oddBit, typename Float2 >
__global__ void quda::do_side_link_kernel ( Float2 *  P3Even,
Float2 *  P3Odd,
Float2 *  P3muEven,
Float2 *  P3muOdd,
Float2 *  TempxEven,
Float2 *  TempxOdd,
Float2 *  PmuEven,
Float2 *  PmuOdd,
Float2 *  shortPEven,
Float2 *  shortPOdd,
int  sig,
int  mu,
Float2  coeff,
Float2  accumu_coeff,
float4 *  linkEven,
float4 *  linkOdd,
Float2 *  momEven,
Float2 *  momOdd 
)

Definition at line 633 of file fermion_force_quda.cu.

void quda::domainWallDslashCuda ( cudaColorSpinorField *  out,
const cudaGaugeField &  gauge,
const cudaColorSpinorField *  in,
const int  parity,
const int  dagger,
const cudaColorSpinorField *  x,
const double &  m_f,
const double &  k,
const int *  commDim 
)

Definition at line 1567 of file dslash_quda.cu.

__device__ double quda::dot_ ( const double2 &  a,
const double2 &  b 
)

Return the real dot product of x and y

Definition at line 143 of file reduce_quda.cu.

__device__ float quda::dot_ ( const float2 &  a,
const float2 &  b 
)

Definition at line 144 of file reduce_quda.cu.

__device__ float quda::dot_ ( const float4 &  a,
const float4 &  b 
)

Definition at line 145 of file reduce_quda.cu.

void quda::dslashCuda ( DslashCuda &  dslash,
const size_t  regSize,
const int  parity,
const int  dagger,
const int  volume,
const int *  faceVolumeCB 
)

Definition at line 1238 of file dslash_quda.cu.

void quda::endBlas ( void  )

Definition at line 53 of file blas_quda.cu.

void quda::endReduce ( void  )

Definition at line 75 of file reduce_quda.cu.

void quda::fermion_force_cuda ( double  eps,
double  weight1,
double  weight2,
void *  act_path_coeff,
FullHw  cudaHw,
cudaGaugeField &  cudaSiteLink,
cudaGaugeField &  cudaMom,
QudaGaugeParam param 
)

Definition at line 1387 of file fermion_force_quda.cu.

void quda::fermion_force_init_cuda ( QudaGaugeParam param)

Definition at line 401 of file fermion_force_quda.cu.

void quda::fillInnerInvertParam ( QudaInvertParam inner,
const QudaInvertParam outer 
)

Definition at line 28 of file inv_gcr_quda.cpp.

quda::for ( )

Definition at line 1091 of file llfat_quda.cu.

void quda::gauge_force_cuda ( cudaGaugeField &  cudaMom,
double  eb3,
cudaGaugeField &  cudaSiteLink,
QudaGaugeParam param,
int ***  input_path,
int *  length,
void *  path_coeff,
int  num_paths,
int  max_length 
)

Definition at line 334 of file gauge_force_quda.cu.

void quda::gauge_force_cuda_dir ( cudaGaugeField &  cudaMom,
const int  dir,
const double  eb3,
const cudaGaugeField &  cudaSiteLink,
const QudaGaugeParam param,
int **  input_path,
const int *  length,
const void *  path_coeff,
const int  num_paths,
const int  max_length 
)

Definition at line 279 of file gauge_force_quda.cu.

void quda::gauge_force_init_cuda ( QudaGaugeParam param,
int  max_length 
)

Definition at line 102 of file gauge_force_quda.cu.

template<int oddBit, typename Float2 , typename FloatN , typename Float >
__global__ void quda::GAUGE_FORCE_KERN_NAME ( Float2 *  momEven,
Float2 *  momOdd,
const int  dir,
const double  eb3,
const FloatN linkEven,
const FloatN linkOdd,
const int *  input_path,
const int *  length,
const Float path_coeff,
const int  num_paths,
const kernel_param_t  kparam 
)

Definition at line 477 of file gauge_force_quda.cu.

template<class D , class S >
void quda::genericCopy ( D &  dst,
const S &  src 
)

Definition at line 201 of file cpu_color_spinor_field.cpp.

template<class T >
__device__ __host__ T quda::getAbsMin ( const T *const  array,
int  size 
)

Definition at line 129 of file unitarize_links_quda.cu.

cudaStream_t * quda::getBlasStream ( )

Definition at line 66 of file blas_quda.cu.

QudaTune quda::getBlasTuning ( )

Definition at line 64 of file blas_quda.cu.

QudaVerbosity quda::getBlasVerbosity ( )

Definition at line 65 of file blas_quda.cu.

template<class T >
__device__ __host__ T quda::getDeterminant ( const Matrix< T, 3 > &  a)
inline

Definition at line 308 of file quda_matrix.h.

bool quda::getDslashLaunch ( )
template<class Real >
DEVICEHOST void quda::getGivensRotation ( const Real &  alpha,
const Real &  beta,
Real &  c,
Real &  s 
)

Definition at line 117 of file svd_quda.h.

bool quda::getKernelPackT ( )
Returns
Whether the T dimension is kernel packed or not

Definition at line 157 of file dslash_quda.cu.

template<class Real >
DEVICEHOST void quda::getLambdaMax ( const Matrix< Real, 3 > &  b,
Real &  lambda_max 
)

Definition at line 98 of file svd_quda.h.

DEVICEHOST float quda::getNorm ( const Array< float2, 3 > &  a)

Definition at line 59 of file svd_quda.h.

DEVICEHOST double quda::getNorm ( const Array< double2, 3 > &  a)

Definition at line 70 of file svd_quda.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::getPreciseInverse ( const Cmplx &  z)
inline

Definition at line 214 of file quda_matrix.h.

template<class Cmplx >
DEVICEHOST void quda::getRealBidiagMatrix ( const Matrix< Cmplx, 3 > &  mat,
Matrix< Cmplx, 3 > &  u,
Matrix< Cmplx, 3 > &  v 
)

Definition at line 286 of file svd_quda.h.

template<class T >
__device__ __host__ T quda::getTrace ( const Matrix< T, 3 > &  a)
inline

Definition at line 301 of file quda_matrix.h.

template<class Cmplx >
__global__ void quda::getUnitarizedField ( const Cmplx *  inlink_even,
const Cmplx *  inlink_odd,
Cmplx *  outlink_even,
Cmplx *  outlink_odd,
int *  num_failures,
const int  threads 
)

Definition at line 350 of file unitarize_links_quda.cu.

template<typename Float >
double3 quda::HeavyQuarkResidualNorm ( const Float x,
const Float r,
const int  volume,
const int  Nint 
)

Definition at line 310 of file blas_cpu.cpp.

double3 quda::HeavyQuarkResidualNormCpu ( cpuColorSpinorField &  x,
cpuColorSpinorField &  r 
)

Definition at line 331 of file blas_cpu.cpp.

double3 quda::HeavyQuarkResidualNormCpu ( cpuColorSpinorField &  x,
cpuColorSpinorField &  y,
cpuColorSpinorField &  r 
)

Definition at line 350 of file blas_cpu.cpp.

double3 quda::HeavyQuarkResidualNormCuda ( cudaColorSpinorField &  x,
cudaColorSpinorField &  r 
)

Definition at line 591 of file reduce_quda.cu.

void quda::host_free_ ( const char *  func,
const char *  file,
int  line,
void *  ptr 
)

Free host memory allocated with safe_malloc(), pinned_malloc(), or mapped_malloc(). This function should only be called via the host_free() macro, defined in malloc_quda.h

quda::if ( kparam.  kernel_type = =LLFAT_EXTERIOR_KERNEL_FWD_X &&x1!=X1m1)

Definition at line 890 of file llfat_quda.cu.

quda::if ( save_staple  )

Definition at line 976 of file llfat_quda.cu.

quda::if ( mem_idx >=  Vh)

Definition at line 1070 of file llfat_quda.cu.

quda::if ( mem_idx >=kparam.  threads)
quda::if ( x1==1||x1==X1+2||x2==1||x2==X2+2||x3==1||x3==X3+2||x4==1||x4==X4+2)

Definition at line 1226 of file llfat_quda.cu.

quda::if ( sid >=2 *kparam.  threads)
quda::if ( idx >=kparam.  threads)

Definition at line 1382 of file llfat_quda.cu.

template<int N>
__device__ __host__ int quda::index ( int  i,
int  j 
)
inline

Definition at line 280 of file quda_matrix.h.

void quda::initBlas ( )

Definition at line 47 of file blas_quda.cu.

void quda::initCloverConstants ( const cudaCloverField &  clover)
void quda::initDslashCommsPattern ( )

Initialize the arrays used for the dynamic scheduling.

Definition at line 1209 of file dslash_quda.cu.

void quda::initDslashConstants ( )

Definition at line 362 of file dslash_constants.h.

void quda::initGaugeConstants ( const cudaGaugeField &  gauge)
void quda::initLatticeConstants ( const LatticeField &  lat)
void quda::initReduce ( )

Definition at line 45 of file reduce_quda.cu.

void quda::initSpinorConstants ( const cudaColorSpinorField &  spinor)
void quda::initStaggeredConstants ( const cudaGaugeField &  fatgauge,
const cudaGaugeField &  longgauge 
)
void quda::initTwistedMassConstants ( const int  flv_stride)

ndeg tm:

Definition at line 402 of file dslash_constants.h.

template<class Cmplx >
__device__ __host__ bool quda::isUnitarizedLinkConsistent ( const Matrix< Cmplx, 3 > &  initial_matrix,
const Matrix< Cmplx, 3 > &  unitary_matrix,
double  max_error 
)

Definition at line 73 of file unitarize_links_quda.cu.

bool quda::isUnitary ( const QudaGaugeParam param,
cpuGaugeField &  field,
double  max_error 
)

Definition at line 517 of file unitarize_links_quda.cu.

template<class Cmplx >
__device__ __host__ bool quda::isUnitary ( const Matrix< Cmplx, 3 > &  matrix,
double  max_error 
)

Definition at line 53 of file unitarize_links_quda.cu.

void quda::link_format_cpu_to_gpu ( void *  dst,
void *  src,
int  reconstruct,
int  Vh,
int  pad,
int  ghostV,
QudaPrecision  prec,
QudaGaugeFieldOrder  cpu_order,
cudaStream_t  stream 
)

Definition at line 143 of file misc_helpers.cu.

void quda::link_format_gpu_to_cpu ( void *  dst,
void *  src,
int  Vh,
int  stride,
QudaPrecision  prec,
cudaStream_t  stream 
)

Definition at line 345 of file misc_helpers.cu.

__device__ int quda::linkIndex ( int  x[],
int  dx[],
const CloverParam param 
)
inline

linkIndex computes the spacetime index of the link with coordinate y = x + dx.

Parameters
x- coordinate in spacetime
dx- coordinate offsets in spacetime
param- CloverParam struct

Definition at line 25 of file clover_quda.cu.

quda::LLFAT_ADD_SU3_MATRIX ( ,
staple  ,
staple   
)
quda::LLFAT_ADD_SU3_MATRIX ( ,
staple  ,
staple   
)
quda::LLFAT_COMPUTE_NEW_IDX_LOWER_STAPLE ( nu  ,
mu   
)
quda::LLFAT_COMPUTE_NEW_IDX_LOWER_STAPLE_EX ( nu  ,
mu   
)
quda::LLFAT_COMPUTE_NEW_IDX_MINUS ( nu  ,
X   
)
quda::LLFAT_COMPUTE_NEW_IDX_MINUS_EX ( nu  ,
X   
)
quda::LLFAT_COMPUTE_NEW_IDX_PLUS ( nu  ,
X   
)
quda::LLFAT_COMPUTE_NEW_IDX_PLUS ( mu  ,
X   
)
quda::LLFAT_COMPUTE_NEW_IDX_PLUS_EX ( nu  ,
X   
)
quda::LLFAT_COMPUTE_NEW_IDX_PLUS_EX ( mu  ,
X   
)
void quda::llfat_cuda ( cudaGaugeField &  cudaFatLink,
cudaGaugeField &  cudaSiteLink,
cudaGaugeField &  cudaStaple,
cudaGaugeField &  cudaStaple1,
QudaGaugeParam param,
double *  act_path_coeff 
)

Definition at line 22 of file llfat_quda_itf.cpp.

void quda::llfat_cuda_ex ( cudaGaugeField &  cudaFatLink,
cudaGaugeField &  cudaSiteLink,
cudaGaugeField &  cudaStaple,
cudaGaugeField &  cudaStaple1,
QudaGaugeParam param,
double *  act_path_coeff 
)

Definition at line 249 of file llfat_quda_itf.cpp.

void quda::llfat_init_cuda ( QudaGaugeParam param)

Definition at line 197 of file llfat_quda.cu.

void quda::llfat_init_cuda_ex ( QudaGaugeParam param_ex)

Definition at line 246 of file llfat_quda.cu.

template<int mu, int nu, int odd_bit>
__global__ void quda::LLFAT_KERNEL ( do_siteComputeGenStapleParity  ,
RECONSTRUCT   
)
template<int mu, int nu, int odd_bit, int save_staple>
__global__ void quda::LLFAT_KERNEL ( do_computeGenStapleFieldParity  ,
RECONSTRUCT   
)
__global__ void quda::LLFAT_KERNEL ( llfatOneLink  ,
RECONSTRUCT   
) const
template<int mu, int nu, int odd_bit>
__global__ void quda::LLFAT_KERNEL_EX ( do_siteComputeGenStapleParity  ,
RECONSTRUCT   
)
template<int mu, int nu, int odd_bit, int save_staple>
__global__ void quda::LLFAT_KERNEL_EX ( do_computeGenStapleFieldParity  ,
RECONSTRUCT   
)
__global__ void quda::LLFAT_KERNEL_EX ( llfatOneLink  ,
RECONSTRUCT   
) const
void quda::llfatOneLinkKernel ( cudaGaugeField &  cudaFatLink,
cudaGaugeField &  cudaSiteLink,
cudaGaugeField &  cudaStaple,
cudaGaugeField &  cudaStaple1,
QudaGaugeParam param,
double *  act_path_coeff 
)

Definition at line 1100 of file llfat_quda.cu.

void quda::llfatOneLinkKernel_ex ( cudaGaugeField &  cudaFatLink,
cudaGaugeField &  cudaSiteLink,
cudaGaugeField &  cudaStaple,
cudaGaugeField &  cudaStaple1,
QudaGaugeParam param,
double *  act_path_coeff,
llfat_kernel_param_t  kparam 
)

Definition at line 1141 of file llfat_quda.cu.

quda::LOAD_EVEN_FAT_MATRIX ( mu  ,
mem_idx   
)
quda::LOAD_EVEN_SITE_MATRIX ( nu  ,
new_mem_idx  ,
 
)
quda::LOAD_ODD_MULINK_MATRIX ( ,
new_mem_idx  ,
BB   
)
quda::LOAD_ODD_SITE_MATRIX ( mu  ,
new_mem_idx  ,
 
)
quda::LOAD_ODD_SITE_MATRIX ( nu  ,
new_mem_idx  ,
 
)
quda::LOAD_ODD_SITE_MATRIX ( nu  ,
(new_mem_idx ,
 
)
Initial value:
quda::LOAD_ODD_SITE_MATRIX ( mu  ,
(new_mem_idx ,
 
)
quda::LOAD_ODD_SITE_MATRIX ( nu  ,
new_mem_idx  ,
 
)
Initial value:
void quda::loadLinkToGPU ( cudaGaugeField *  cudaGauge,
cpuGaugeField *  cpuGauge,
QudaGaugeParam param 
)
void quda::loadLinkToGPU_ex ( cudaGaugeField *  cudaGauge,
cpuGaugeField *  cpuGauge 
)
void quda::loadLinkToGPU_gf ( cudaGaugeField *  cudaGauge,
cpuGaugeField *  cpuGauge,
QudaGaugeParam param 
)
template<class T >
__device__ void quda::loadLinkVariableFromArray ( const T *const  array,
int  dir,
int  idx,
int  stride,
Matrix< T, 3 > *  link 
)
inline

Definition at line 638 of file quda_matrix.h.

__device__ void quda::loadLinkVariableFromArray ( const float2 *const  array,
int  dir,
int  idx,
int  stride,
Matrix< double2, 3 > *  link 
)
inline

Definition at line 648 of file quda_matrix.h.

template<typename Float , typename Float2 >
void quda::loadMomField ( Float2 *  even,
Float2 *  odd,
Float mom,
int  bytes,
int  Vh,
int  pad,
void *  buffer 
)

Definition at line 187 of file cuda_gauge_field.cpp.

void quda::loadTuneCache ( QudaVerbosity  verbosity)

Definition at line 104 of file tune.cpp.

template<class Cmplx >
__device__ __host__ Cmplx quda::makeComplex ( const typename RealTypeId< Cmplx >::Type &  a,
const typename RealTypeId< Cmplx >::Type &  b 
)
inline

Definition at line 107 of file quda_matrix.h.

__device__ __host__ double2 quda::makeComplex ( const double &  a,
const double &  b 
)
inline

Definition at line 116 of file quda_matrix.h.

__device__ __host__ float2 quda::makeComplex ( const float &  a,
const float &  b 
)
inline

Definition at line 121 of file quda_matrix.h.

void* quda::mapped_malloc_ ( const char *  func,
const char *  file,
int  line,
size_t  size 
)

Allocate page-locked ("pinned") host memory, and map it into the GPU address space. This function should only be called via the mapped_malloc() macro, defined in malloc_quda.h

void quda::massRescale ( QudaDslashType  dslash_type,
double &  kappa,
QudaSolutionType  solution_type,
QudaMassNormalization  mass_normalization,
cudaColorSpinorField b 
)

Definition at line 710 of file interface_quda.cpp.

void quda::massRescaleCoeff ( QudaDslashType  dslash_type,
double &  kappa,
QudaSolutionType  solution_type,
QudaMassNormalization  mass_normalization,
double &  coeff 
)

Definition at line 769 of file interface_quda.cpp.

quda::MULT_SU3_AN ( ,
,
tempa   
)
quda::MULT_SU3_AN ( ,
bb  ,
tempa   
)
quda::MULT_SU3_NA ( tempa  ,
,
staple   
)
quda::MULT_SU3_NN ( ,
,
tempa   
)
quda::MULT_SU3_NN ( tempa  ,
,
 
)
quda::MULT_SU3_NN ( ,
bb  ,
tempa   
)
quda::MULT_SU3_NN ( tempa  ,
,
 
)
void quda::mxpyCpu ( const cpuColorSpinorField &  x,
cpuColorSpinorField &  y 
)

Definition at line 51 of file blas_cpu.cpp.

void quda::mxpyCuda ( cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 149 of file blas_quda.cu.

template<typename Float >
double quda::norm ( const Float a,
const int  N 
)

Definition at line 160 of file blas_cpu.cpp.

double quda::norm2 ( const ColorSpinorField &  a)

Definition at line 315 of file color_spinor_field.cpp.

double quda::norm2 ( const cudaGaugeField &  a)

This is a debugging function, where we cast a gauge field into a spinor field so we can compute its L2 norm.

Parameters
aThe gauge field that we want the norm of
Returns
The L2 norm squared of the gauge field

Definition at line 530 of file cuda_gauge_field.cpp.

__device__ double quda::norm2_ ( const double2 &  a)

Return the L2 norm of x

Definition at line 118 of file reduce_quda.cu.

__device__ float quda::norm2_ ( const float2 &  a)

Definition at line 119 of file reduce_quda.cu.

__device__ float quda::norm2_ ( const float4 &  a)

Definition at line 120 of file reduce_quda.cu.

double quda::normCpu ( const cpuColorSpinorField &  b)

Definition at line 166 of file blas_cpu.cpp.

double quda::normCuda ( const cudaColorSpinorField &  b)

Definition at line 134 of file reduce_quda.cu.

template<class Cmplx >
__device__ __host__ Cmplx quda::operator* ( const Cmplx &  a,
const typename RealTypeId< Cmplx >::Type &  scalar 
)
inline

Definition at line 146 of file quda_matrix.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::operator* ( const typename RealTypeId< Cmplx >::Type &  scalar,
const Cmplx &  b 
)
inline

Definition at line 183 of file quda_matrix.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::operator* ( const Cmplx &  a,
const Cmplx &  b 
)
inline

Definition at line 189 of file quda_matrix.h.

template<class T , int N, class S >
__device__ __host__ Matrix<T,N> quda::operator* ( const S &  scalar,
const Matrix< T, N > &  a 
)
inline

Definition at line 353 of file quda_matrix.h.

template<class T , int N, class S >
__device__ __host__ Matrix<T,N> quda::operator* ( const Matrix< T, N > &  a,
const S &  scalar 
)
inline

Definition at line 363 of file quda_matrix.h.

template<class T >
__device__ __host__ Matrix<T,3> quda::operator* ( const Matrix< T, 3 > &  a,
const Matrix< T, 3 > &  b 
)
inline

Definition at line 371 of file quda_matrix.h.

template<class T , class U >
__device__ __host__ Matrix<typename PromoteTypeId<T,U>::Type,3> quda::operator* ( const Matrix< T, 3 > &  a,
const Matrix< U, 3 > &  b 
)
inline

Definition at line 393 of file quda_matrix.h.

template<class T >
__device__ __host__ Matrix<T,2> quda::operator* ( const Matrix< T, 2 > &  a,
const Matrix< T, 2 > &  b 
)
inline

Definition at line 412 of file quda_matrix.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::operator+ ( const Cmplx &  a,
const Cmplx &  b 
)
inline

Definition at line 135 of file quda_matrix.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::operator+ ( const Cmplx &  a,
const typename RealTypeId< Cmplx >::Type &  scalar 
)
inline

Definition at line 158 of file quda_matrix.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::operator+ ( const typename RealTypeId< Cmplx >::Type &  scalar,
const Cmplx &  a 
)
inline

Definition at line 164 of file quda_matrix.h.

template<class T , int N>
__device__ __host__ Matrix<T,N> quda::operator+ ( const Matrix< T, N > &  a,
const Matrix< T, N > &  b 
)
inline

Definition at line 319 of file quda_matrix.h.

template<class Cmplx >
__device__ __host__ Cmplx& quda::operator+= ( Cmplx &  a,
const Cmplx &  b 
)
inline

Definition at line 127 of file quda_matrix.h.

template<class T , int N>
__device__ __host__ Matrix<T,N> quda::operator+= ( Matrix< T, N > &  a,
const Matrix< T, N > &  b 
)
inline

Definition at line 330 of file quda_matrix.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::operator- ( const Cmplx &  a,
const Cmplx &  b 
)
inline

Definition at line 140 of file quda_matrix.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::operator- ( const Cmplx &  a,
const typename RealTypeId< Cmplx >::Type &  scalar 
)
inline

Definition at line 170 of file quda_matrix.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::operator- ( const typename RealTypeId< Cmplx >::Type &  scalar,
const Cmplx &  a 
)
inline

Definition at line 176 of file quda_matrix.h.

template<class T , int N>
__device__ __host__ Matrix<T,N> quda::operator- ( const Matrix< T, N > &  a,
const Matrix< T, N > &  b 
)
inline

Definition at line 341 of file quda_matrix.h.

template<class Cmplx >
__device__ __host__ Cmplx quda::operator/ ( const Cmplx &  a,
const typename RealTypeId< Cmplx >::Type &  scalar 
)
inline

Definition at line 152 of file quda_matrix.h.

std::ostream & quda::operator<< ( std::ostream &  output,
const LatticeFieldParam &  param 
)

Definition at line 75 of file lattice_field.cpp.

std::ostream & quda::operator<< ( std::ostream &  output,
const GaugeFieldParam &  param 
)

Definition at line 48 of file gauge_field.cpp.

std::ostream& quda::operator<< ( std::ostream &  os,
const float2 &  z 
)
inline

Definition at line 223 of file quda_matrix.h.

std::ostream& quda::operator<< ( std::ostream &  os,
const double2 &  z 
)
inline

Definition at line 228 of file quda_matrix.h.

std::ostream& quda::operator<< ( std::ostream &  out,
const ColorSpinorField &  a 
)

Definition at line 329 of file color_spinor_field.cpp.

template<class T , int N>
std::ostream& quda::operator<< ( std::ostream &  os,
const Matrix< T, N > &  m 
)

Definition at line 616 of file quda_matrix.h.

template<class T , int N>
std::ostream& quda::operator<< ( std::ostream &  os,
const Array< T, N > &  a 
)

Definition at line 628 of file quda_matrix.h.

std::ostream& quda::operator<< ( std::ostream &  out,
const cudaColorSpinorField &  a 
)

Definition at line 767 of file cuda_color_spinor_field.cu.

void quda::orthoDir ( Complex **  beta,
cudaColorSpinorField *  Ap[],
int  k 
)

Definition at line 50 of file inv_gcr_quda.cpp.

template<class T , int N>
__device__ __host__ void quda::outerProd ( const Array< T, N > &  a,
const Array< T, N > &  b,
Matrix< T, N > *  m 
)
inline

Definition at line 603 of file quda_matrix.h.

void quda::pack_gauge_diag ( void *  buf,
int *  X,
void **  sitelink,
int  nu,
int  mu,
int  dir1,
int  dir2,
QudaPrecision  prec 
)
void quda::pack_ghost_all_links ( void **  cpuLink,
void **  cpuGhostBack,
void **  cpuGhostFwd,
int  dir,
int  nFace,
QudaPrecision  precision,
int *  X 
)
void quda::pack_ghost_all_staples_cpu ( void *  staple,
void **  cpuGhostStapleBack,
void **  cpuGhostStapleFwd,
int  nFace,
QudaPrecision  precision,
int *  X 
)
void quda::packFace ( void *  ghost_buf,
cudaColorSpinorField &  in,
const int  dagger,
const int  parity,
const cudaStream_t &  stream 
)
template<typename Float >
void quda::packGhost ( Float **  ghost,
const Float **  gauge,
const int  nFace,
const int *  X,
const int  volumeCB,
const int *  surfaceCB,
const QudaGaugeFieldOrder  order 
)

Definition at line 108 of file cpu_gauge_field.cpp.

void quda::packGhostStaple ( int *  X,
void *  even,
void *  odd,
int  volume,
QudaPrecision  prec,
int  stride,
int  dir,
int  whichway,
void **  fwd_nbr_buf_gpu,
void **  back_nbr_buf_gpu,
void **  fwd_nbr_buf,
void **  back_nbr_buf,
cudaStream_t *  stream 
)
void* quda::pinned_malloc_ ( const char *  func,
const char *  file,
int  line,
size_t  size 
)

Allocate page-locked ("pinned") host memory. This function should only be called via the pinned_malloc() macro, defined in malloc_quda.h

Note that we do rely on cudaHostAlloc(), since buffers allocated in this way have been observed to cause problems when shared with MPI via GPU Direct on some systems.

template<class T >
void quda::point ( T &  t,
const int  x,
const int  s,
const int  c 
)

Definition at line 260 of file cpu_color_spinor_field.cpp.

template<class Order >
void quda::print_vector ( const Order &  o,
unsigned int  x 
)

Definition at line 352 of file cpu_color_spinor_field.cpp.

template<class Cmplx >
__host__ __device__ void quda::printLink ( const Matrix< Cmplx, 3 > &  link)
inline

Definition at line 780 of file quda_matrix.h.

void quda::printPeakMemUsage ( )

Definition at line 282 of file malloc.cpp.

template<class T , class U >
DEVICEHOST PromoteTypeId<T,U>::Type quda::quadSum ( const T &  a,
const U &  b 
)
inline

Definition at line 39 of file svd_quda.h.

template<class T >
void quda::random ( T &  t)

Definition at line 246 of file cpu_color_spinor_field.cpp.

template<class Cmplx >
__device__ __host__ bool quda::reciprocalRoot ( const Matrix< Cmplx, 3 > &  q,
Matrix< Cmplx, 3 > *  res 
)

Definition at line 163 of file unitarize_links_quda.cu.

quda::RECONSTRUCT_SITE_LINK ( sign  ,
 
)
quda::RECONSTRUCT_SITE_LINK ( sign  ,
 
)
quda::RECONSTRUCT_SITE_LINK ( sign  ,
 
)
template<typename Float >
double quda::reDotProduct ( const Float a,
const Float b,
const int  N 
)

Definition at line 185 of file blas_cpu.cpp.

double quda::reDotProductCpu ( const cpuColorSpinorField &  a,
const cpuColorSpinorField &  b 
)

Definition at line 191 of file blas_cpu.cpp.

double quda::reDotProductCuda ( cudaColorSpinorField &  a,
cudaColorSpinorField &  b 
)

Definition at line 159 of file reduce_quda.cu.

double quda::resNorm ( const DiracMatrix &  mat,
cudaColorSpinorField &  b,
cudaColorSpinorField &  x 
)

Definition at line 22 of file inv_bicgstab_quda.cpp.

void* quda::safe_malloc_ ( const char *  func,
const char *  file,
int  line,
size_t  size 
)

Perform a standard malloc() with error-checking. This function should only be called via the safe_malloc() macro, defined in malloc_quda.h

void quda::saveTuneCache ( QudaVerbosity  verbosity)

Write tunecache to disk.

void quda::setBlasParam ( int  kernel,
int  prec,
int  threads,
int  blocks 
)
void quda::setBlasTuning ( QudaTune  tune,
QudaVerbosity  verbose 
)

Definition at line 58 of file blas_quda.cu.

void quda::setDiracParam ( DiracParam diracParam,
QudaInvertParam inv_param,
bool  pc 
)

Definition at line 608 of file interface_quda.cpp.

void quda::setDiracPreParam ( DiracParam diracParam,
QudaInvertParam inv_param,
const bool  pc 
)

Definition at line 680 of file interface_quda.cpp.

void quda::setDiracSloppyParam ( DiracParam diracParam,
QudaInvertParam inv_param,
bool  pc 
)

Definition at line 664 of file interface_quda.cpp.

void quda::setDslashTuning ( QudaTune  tune,
QudaVerbosity  verbose 
)
Parameters
tuneSets whether to tune the dslash kernels or not
verboseThe verbosity level to use in the dslash tuning functions

Definition at line 145 of file dslash_quda.cu.

void quda::setFace ( const FaceBuffer &  face)

Definition at line 215 of file dslash_quda.cu.

template<class T , int N>
__device__ __host__ void quda::setIdentity ( Matrix< T, N > *  m)
inline

Definition at line 480 of file quda_matrix.h.

template<int N>
__device__ __host__ void quda::setIdentity ( Matrix< float2, N > *  m)
inline

Definition at line 494 of file quda_matrix.h.

template<int N>
__device__ __host__ void quda::setIdentity ( Matrix< double2, N > *  m)
inline

Definition at line 508 of file quda_matrix.h.

void quda::setKernelPackT ( bool  pack)
Parameters
packSets whether to use a kernel to pack the T dimension

Definition at line 155 of file dslash_quda.cu.

void quda::setTwistParam ( double &  a,
double &  b,
const double &  kappa,
const double &  mu,
const int  dagger,
const QudaTwistGamma5Type  twist 
)

Definition at line 840 of file dslash_quda.cu.

void quda::setUnitarizeLinksConstants ( double  unitarize_eps,
double  max_error,
bool  allow_svd,
bool  svd_only,
double  svd_rel_error,
double  svd_abs_error,
bool  check_unitarization = true 
)

Definition at line 93 of file unitarize_links_quda.cu.

void quda::setUnitarizeLinksPadding ( int  input_padding,
int  output_padding 
)

Definition at line 43 of file unitarize_links_quda.cu.

template<class T , int N>
__device__ __host__ void quda::setZero ( Matrix< T, N > *  m)
inline

Definition at line 523 of file quda_matrix.h.

template<int N>
__device__ __host__ void quda::setZero ( Matrix< float2, N > *  m)
inline

Definition at line 536 of file quda_matrix.h.

template<int N>
__device__ __host__ void quda::setZero ( Matrix< double2, N > *  m)
inline

Definition at line 549 of file quda_matrix.h.

void quda::siteComputeGenStapleParityKernel ( void *  staple_even,
void *  staple_odd,
const void *  sitelink_even,
const void *  sitelink_odd,
void *  fatlink_even,
void *  fatlink_odd,
int  mu,
int  nu,
double  mycoeff,
QudaReconstructType  recon,
QudaPrecision  prec,
dim3  halfGridDim,
llfat_kernel_param_t  kparam,
cudaStream_t *  stream 
)

Definition at line 783 of file llfat_quda.cu.

void quda::siteComputeGenStapleParityKernel_ex ( void *  staple_even,
void *  staple_odd,
const void *  sitelink_even,
const void *  sitelink_odd,
void *  fatlink_even,
void *  fatlink_odd,
int  mu,
int  nu,
double  mycoeff,
QudaReconstructType  recon,
QudaPrecision  prec,
llfat_kernel_param_t  kparam 
)

Definition at line 937 of file llfat_quda.cu.

template<class Real >
DEVICEHOST void quda::smallSVD ( Matrix< Real, 2 > &  u,
Matrix< Real, 2 > &  v,
Matrix< Real, 2 > &  m 
)
inline

Definition at line 169 of file svd_quda.h.

void quda::staggeredDslashCuda ( cudaColorSpinorField *  out,
const cudaGaugeField &  fatGauge,
const cudaGaugeField &  longGauge,
const cudaColorSpinorField *  in,
const int  parity,
const int  dagger,
const cudaColorSpinorField *  x,
const double &  k,
const int *  commDim 
)

NEW:extra argument.

Definition at line 1626 of file dslash_quda.cu.

void quda::storeLinkToCPU ( cpuGaugeField *  cpuGauge,
cudaGaugeField *  cudaGauge,
QudaGaugeParam param 
)
template<typename Float , typename Float2 >
void quda::storeMomToCPUArray ( Float mom,
Float2 *  even,
Float2 *  odd,
int  bytes,
int  V,
int  pad,
void *  buffer 
)

Definition at line 389 of file cuda_gauge_field.cpp.

template<class Real >
DEVICEHOST void quda::swap ( Real &  a,
Real &  b 
)
inline

Definition at line 160 of file svd_quda.h.

double quda::timeInterval ( struct timeval  start,
struct timeval  end 
)

Definition at line 21 of file inv_gcr_quda.cpp.

template<typename Float >
void quda::transpose ( Float gT,
const Float g 
)
inline

Definition at line 96 of file cpu_gauge_field.cpp.

double3 quda::tripleCGReductionCuda ( cudaColorSpinorField &  x,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z 
)

Definition at line 625 of file reduce_quda.cu.

void quda::tripleCGUpdateCuda ( const double &  alpha,
const double &  beta,
cudaColorSpinorField &  q,
cudaColorSpinorField &  r,
cudaColorSpinorField &  x,
cudaColorSpinorField &  p 
)

Definition at line 455 of file blas_quda.cu.

TuneParam quda::tuneLaunch ( Tunable &  tunable,
QudaTune  enabled,
QudaVerbosity  verbosity 
)

Return the optimal launch parameters for a given kernel, either by retrieving them from tunecache or autotuning on the spot.

void quda::twistedMassDslashCuda ( cudaColorSpinorField *  out,
const cudaGaugeField &  gauge,
const cudaColorSpinorField *  in,
const int  parity,
const int  dagger,
const cudaColorSpinorField *  x,
const double &  kappa,
const double &  mu,
const double &  epsilon,
const int *  commDim 
)

ndeg tm:

Definition at line 1513 of file dslash_quda.cu.

void quda::twistGamma5Cuda ( cudaColorSpinorField *  out,
const cudaColorSpinorField *  in,
const int  dagger,
const double &  kappa,
const double &  mu,
const double &  epsilon,
const QudaTwistGamma5Type  twist 
)

ndeg tm:

template<class Cmplx >
__host__ __device__ bool quda::unitarizeLinkMILC ( const Matrix< Cmplx, 3 > &  in,
Matrix< Cmplx, 3 > *const  result 
)

Definition at line 256 of file unitarize_links_quda.cu.

template<class Cmplx >
__host__ __device__ bool quda::unitarizeLinkNewton ( const Matrix< Cmplx, 3 > &  in,
Matrix< Cmplx, 3 > *const  result 
)

Definition at line 315 of file unitarize_links_quda.cu.

void quda::unitarizeLinksCPU ( const QudaGaugeParam param,
cpuGaugeField &  infield,
cpuGaugeField *  outfield 
)

Definition at line 495 of file unitarize_links_quda.cu.

void quda::unitarizeLinksCuda ( const QudaGaugeParam param,
cudaGaugeField &  infield,
cudaGaugeField *  outfield,
int *  num_failures 
)

Definition at line 487 of file unitarize_links_quda.cu.

template<class Cmplx >
__host__ __device__ bool quda::unitarizeLinkSVD ( const Matrix< Cmplx, 3 > &  in,
Matrix< Cmplx, 3 > *const  result 
)

Definition at line 287 of file unitarize_links_quda.cu.

void quda::unpackGhostStaple ( int *  X,
void *  _even,
void *  _odd,
int  volume,
QudaPrecision  prec,
int  stride,
int  dir,
int  whichway,
void **  fwd_nbr_buf,
void **  back_nbr_buf,
cudaStream_t *  stream 
)
void quda::updateAlphaZeta ( double *  alpha,
double *  zeta,
double *  zeta_old,
const double *  r2,
const double *  beta,
const double  pAp,
const double *  offset,
const int  nShift,
const int  j_low 
)

Compute the new values of alpha and zeta

Definition at line 38 of file inv_multi_cg_quda.cpp.

void quda::updateSolution ( cudaColorSpinorField &  x,
const Complex *  alpha,
Complex **const  beta,
double *  gamma,
int  k,
cudaColorSpinorField *  p[] 
)

Definition at line 113 of file inv_gcr_quda.cpp.

void quda::wilsonDslashCuda ( cudaColorSpinorField *  out,
const cudaGaugeField &  gauge,
const cudaColorSpinorField *  in,
const int  oddBit,
const int  daggerBit,
const cudaColorSpinorField *  x,
const double &  k,
const int *  commDim 
)

Definition at line 1338 of file dslash_quda.cu.

quda::WRITE_STAPLE_MATRIX ( staple_even  ,
mem_idx   
)
template<class T >
__device__ void quda::writeLinkVariableToArray ( const Matrix< T, 3 > &  link,
int  dir,
int  idx,
int  stride,
T *const  array 
)
inline

Definition at line 665 of file quda_matrix.h.

__device__ void quda::writeLinkVariableToArray ( const Matrix< double2, 3 > &  link,
int  dir,
int  idx,
int  stride,
float2 *const  array 
)
inline

Definition at line 677 of file quda_matrix.h.

double quda::xmyNormCpu ( const cpuColorSpinorField &  a,
cpuColorSpinorField &  b 
)

Definition at line 205 of file blas_cpu.cpp.

double quda::xmyNormCuda ( cudaColorSpinorField &  a,
cudaColorSpinorField &  b 
)

Definition at line 204 of file reduce_quda.cu.

Complex quda::xpaycDotzyCpu ( const cpuColorSpinorField &  x,
const double &  a,
cpuColorSpinorField &  y,
const cpuColorSpinorField &  z 
)

Definition at line 231 of file blas_cpu.cpp.

Complex quda::xpaycDotzyCuda ( cudaColorSpinorField &  x,
const double &  a,
cudaColorSpinorField &  y,
cudaColorSpinorField &  z 
)

Definition at line 354 of file reduce_quda.cu.

void quda::xpayCpu ( const cpuColorSpinorField &  x,
const double &  a,
cpuColorSpinorField &  y 
)

Definition at line 41 of file blas_cpu.cpp.

void quda::xpayCuda ( cudaColorSpinorField &  x,
const double &  a,
cudaColorSpinorField &  y 
)

Definition at line 133 of file blas_quda.cu.

void quda::xpyCpu ( const cpuColorSpinorField &  x,
cpuColorSpinorField &  y 
)

Definition at line 22 of file blas_cpu.cpp.

void quda::xpyCuda ( cudaColorSpinorField &  x,
cudaColorSpinorField &  y 
)

Definition at line 99 of file blas_quda.cu.

double3 quda::xpyHeavyQuarkResidualNormCpu ( cpuColorSpinorField &  x,
cpuColorSpinorField &  y,
cpuColorSpinorField &  r 
)
double3 quda::xpyHeavyQuarkResidualNormCuda ( cudaColorSpinorField &  x,
cudaColorSpinorField &  y,
cudaColorSpinorField &  r 
)

Definition at line 596 of file reduce_quda.cu.

void quda::zeroCuda ( cudaColorSpinorField &  a)

Definition at line 32 of file blas_quda.cu.

Variable Documentation

unsigned long long quda::blas_bytes

Definition at line 30 of file blas_quda.cu.

unsigned long long quda::blas_flops

Definition at line 29 of file blas_quda.cu.

__global__ void const FloatN FloatM FloatM Float quda::coeff0

Definition at line 1055 of file llfat_quda.cu.

__global__ void const FloatN FloatM FloatM Float Float quda::coeff5
Initial value:

Definition at line 1058 of file llfat_quda.cu.

int quda::commDimTotal

Definition at line 1204 of file dslash_quda.cu.

int quda::commsCompleted[Nstream]

Definition at line 1202 of file dslash_quda.cu.

quda::DECLARE_NEW_X

Definition at line 795 of file llfat_quda.cu.

quda::DECLARE_VAR_SIGN

Definition at line 794 of file llfat_quda.cu.

quda::DECLARE_X_ARRAY

Definition at line 796 of file llfat_quda.cu.

__constant__ bool quda::DEV_FL_CHECK_UNITARIZATION

Definition at line 33 of file unitarize_links_quda.cu.

__constant__ double quda::DEV_FL_MAX_ERROR

Definition at line 27 of file unitarize_links_quda.cu.

__constant__ bool quda::DEV_FL_REUNIT_ALLOW_SVD

Definition at line 29 of file unitarize_links_quda.cu.

__constant__ double quda::DEV_FL_REUNIT_SVD_ABS_ERROR

Definition at line 32 of file unitarize_links_quda.cu.

__constant__ bool quda::DEV_FL_REUNIT_SVD_ONLY

Definition at line 30 of file unitarize_links_quda.cu.

__constant__ double quda::DEV_FL_REUNIT_SVD_REL_ERROR

Definition at line 31 of file unitarize_links_quda.cu.

__constant__ double quda::DEV_FL_UNITARIZE_EPS

Definition at line 28 of file unitarize_links_quda.cu.

__constant__ double quda::DEV_HISQ_FORCE_FILTER

Definition at line 18 of file unitarize_force_quda.cu.

__constant__ double quda::DEV_HISQ_UNITARIZE_EPS

Definition at line 17 of file unitarize_force_quda.cu.

__constant__ double quda::DEV_MAX_DET_ERROR

Definition at line 19 of file unitarize_force_quda.cu.

__constant__ int quda::DEV_MAX_ITER = 20

Definition at line 23 of file unitarize_links_quda.cu.

__constant__ bool quda::DEV_REUNIT_ALLOW_SVD

Definition at line 20 of file unitarize_force_quda.cu.

__constant__ double quda::DEV_REUNIT_SVD_ABS_ERROR

Definition at line 23 of file unitarize_force_quda.cu.

__constant__ bool quda::DEV_REUNIT_SVD_ONLY

Definition at line 21 of file unitarize_force_quda.cu.

__constant__ double quda::DEV_REUNIT_SVD_REL_ERROR

Definition at line 22 of file unitarize_force_quda.cu.

__constant__ int quda::dir1_array[16]

Definition at line 191 of file llfat_quda.cu.

__constant__ int quda::dir2_array[16]

Definition at line 192 of file llfat_quda.cu.

int quda::dslashCompleted[Nstream]

Definition at line 1203 of file dslash_quda.cu.

DslashParam quda::dslashParam

Definition at line 88 of file dslash_quda.cu.

quda::else
Initial value:
{
MULT_SU3_NA(tempa, c, tempb)

Definition at line 978 of file llfat_quda.cu.

__global__ void const FloatN FloatM * quda::fatlink_even

Definition at line 759 of file llfat_quda.cu.

__global__ void const FloatN FloatM FloatM * quda::fatlink_odd

Definition at line 759 of file llfat_quda.cu.

int quda::gatherCompleted[Nstream]

Definition at line 1200 of file dslash_quda.cu.

int quda::idx = sid

Definition at line 1374 of file llfat_quda.cu.

__constant__ int quda::INPUT_PADDING =0

Definition at line 21 of file unitarize_links_quda.cu.

__global__ void const FloatN FloatM FloatM Float Float llfat_kernel_param_t quda::kparam
Initial value:
{
__shared__ FloatM sd_data[NUM_FLOATS*64]

Definition at line 763 of file llfat_quda.cu.

int quda::Ls

Definition at line 135 of file dslash_quda.cu.

int quda::mem_idx = blockIdx.x*blockDim.x + threadIdx.x

Definition at line 771 of file llfat_quda.cu.

__global__ void FloatM const FloatN const FloatN FloatM FloatM const FloatM * quda::mulink_even

Definition at line 902 of file llfat_quda.cu.

__global__ void FloatM const FloatN const FloatN FloatM FloatM const FloatM const FloatM * quda::mulink_odd

Definition at line 902 of file llfat_quda.cu.

FloatM * quda::my_fatlink = fatlink_even

Definition at line 1060 of file llfat_quda.cu.

quda::my_sitelink = sitelink_even

Definition at line 1068 of file llfat_quda.cu.

__global__ void FloatM const FloatN const FloatN FloatM FloatM const FloatM const FloatM Float quda::mycoeff

Definition at line 759 of file llfat_quda.cu.

int quda::new_mem_idx

Definition at line 793 of file llfat_quda.cu.

const int quda::Nstream = 1

Definition at line 156 of file quda_internal.h.

short quda::odd_bit = 0

Definition at line 1065 of file llfat_quda.cu.

__constant__ int quda::OUTPUT_PADDING =0

Definition at line 22 of file unitarize_links_quda.cu.

int quda::previousDir[Nstream]

Definition at line 1201 of file dslash_quda.cu.

quda::return

Definition at line 897 of file llfat_quda.cu.

int quda::sid = blockIdx.x*blockDim.x + threadIdx.x

Definition at line 1061 of file llfat_quda.cu.

__global__ void FloatM const FloatN * quda::sitelink_even

Definition at line 759 of file llfat_quda.cu.

__global__ void const FloatN * quda::sitelink_odd

Definition at line 759 of file llfat_quda.cu.

FloatM quda::STAPLE0

Definition at line 768 of file llfat_quda.cu.

FloatM quda::STAPLE1

Definition at line 768 of file llfat_quda.cu.

FloatM quda::STAPLE2

Definition at line 768 of file llfat_quda.cu.

FloatM quda::STAPLE3

Definition at line 768 of file llfat_quda.cu.

FloatM quda::STAPLE4

Definition at line 768 of file llfat_quda.cu.

FloatM quda::STAPLE5

Definition at line 768 of file llfat_quda.cu.

FloatM quda::STAPLE6

Definition at line 768 of file llfat_quda.cu.

FloatM quda::STAPLE7

Definition at line 768 of file llfat_quda.cu.

FloatM quda::STAPLE8

Definition at line 768 of file llfat_quda.cu.

unsigned long quda::staple_bytes =0

Definition at line 194 of file llfat_quda.cu.

__global__ void FloatM * quda::staple_odd

Definition at line 759 of file llfat_quda.cu.

int quda::stride

Definition at line 41 of file blas_quda.cu.

FloatM quda::TEMPA5

Definition at line 767 of file llfat_quda.cu.

FloatM quda::TEMPA6

Definition at line 767 of file llfat_quda.cu.

FloatM quda::TEMPA7

Definition at line 767 of file llfat_quda.cu.

FloatM quda::TEMPA8

Definition at line 767 of file llfat_quda.cu.

FloatM quda::TEMPB0

Definition at line 911 of file llfat_quda.cu.

FloatM quda::TEMPB1

Definition at line 911 of file llfat_quda.cu.

FloatM quda::TEMPB2

Definition at line 911 of file llfat_quda.cu.

FloatM quda::TEMPB3

Definition at line 911 of file llfat_quda.cu.

FloatM quda::TEMPB4

Definition at line 911 of file llfat_quda.cu.

FloatM quda::TEMPB5

Definition at line 911 of file llfat_quda.cu.

FloatM quda::TEMPB6

Definition at line 911 of file llfat_quda.cu.

FloatM quda::TEMPB7

Definition at line 911 of file llfat_quda.cu.

FloatM quda::TEMPB8

Definition at line 911 of file llfat_quda.cu.

int quda::Vspatial

Definition at line 91 of file dslash_quda.cu.

int quda::x[4]

Definition at line 40 of file blas_quda.cu.

int quda::X = 2*mem_idx + x1odd

Definition at line 782 of file llfat_quda.cu.

short quda::x1 = 2*x1h + x1odd

Definition at line 781 of file llfat_quda.cu.

short quda::x1h = mem_idx - z1*X1h

Definition at line 774 of file llfat_quda.cu.

short quda::x1odd = (x2 + x3 + x4 + odd_bit) & 1

Definition at line 780 of file llfat_quda.cu.

short quda::x2 = z1 - z2*X2

Definition at line 776 of file llfat_quda.cu.

short quda::x3 = z2 - x4*X3

Definition at line 778 of file llfat_quda.cu.

int quda::x4 = z2 / X3

Definition at line 777 of file llfat_quda.cu.

int quda::z1 = mem_idx / X1h

Definition at line 773 of file llfat_quda.cu.

int quda::z2 = z1 / X2

Definition at line 775 of file llfat_quda.cu.