QUDA  v1.1.0
A library for QCD on GPUs
Namespaces | Macros | Functions
blas_lapack.h File Reference
#include <quda_internal.h>

Go to the source code of this file.

Namespaces

 quda
 
 quda::blas_lapack
 
 quda::blas_lapack::native
 
 quda::blas_lapack::generic
 

Macros

#define FMULS_GETRF(m_, n_)
 
#define FADDS_GETRF(m_, n_)
 
#define FLOPS_ZGETRF(m_, n_)    (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)))
 
#define FLOPS_CGETRF(m_, n_)    (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)))
 
#define FMULS_GETRI(n_)   ((n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_) + 0.5)))
 
#define FADDS_GETRI(n_)   ((n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_)-1.5)))
 
#define FLOPS_ZGETRI(n_)   (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)))
 
#define FLOPS_CGETRI(n_)   (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)))
 

Functions

bool quda::blas_lapack::use_native ()
 
void quda::blas_lapack::set_native (bool native)
 
void quda::blas_lapack::native::init ()
 Create the BLAS context. More...
 
void quda::blas_lapack::native::destroy ()
 Destroy the BLAS context. More...
 
long long quda::blas_lapack::native::BatchInvertMatrix (void *Ainv, void *A, const int n, const uint64_t batch, QudaPrecision precision, QudaFieldLocation location)
 Batch inversion the matrix field using an LU decomposition method. More...
 
long long quda::blas_lapack::native::stridedBatchGEMM (void *A, void *B, void *C, QudaBLASParam blas_param, QudaFieldLocation location)
 Strided Batch GEMM. This function performs N GEMM type operations in a strided batched fashion. If the user passes. More...
 
void quda::blas_lapack::generic::init ()
 Create the BLAS context. More...
 
void quda::blas_lapack::generic::destroy ()
 Destroy the BLAS context. More...
 
long long quda::blas_lapack::generic::BatchInvertMatrix (void *Ainv, void *A, const int n, const uint64_t batch, QudaPrecision precision, QudaFieldLocation location)
 Batch inversion the matrix field using an LU decomposition method. More...
 
long long quda::blas_lapack::generic::stridedBatchGEMM (void *A, void *B, void *C, QudaBLASParam blas_param, QudaFieldLocation location)
 Strided Batch GEMM. This function performs N GEMM type operations in a strided batched fashion. If the user passes. More...
 

Macro Definition Documentation

◆ FADDS_GETRF

#define FADDS_GETRF (   m_,
  n_ 
)
Value:
(((m_) < (n_)) ? (0.5 * (m_) * ((m_) * ((n_) - (1. / 3.) * (m_)) - (n_)) + (1. / 6.) * (m_)) : \
(0.5 * (n_) * ((n_) * ((m_) - (1. / 3.) * (n_)) - (m_)) + (1. / 6.) * (n_)))

Definition at line 8 of file blas_lapack.h.

◆ FADDS_GETRI

#define FADDS_GETRI (   n_)    ((n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_)-1.5)))

Definition at line 18 of file blas_lapack.h.

◆ FLOPS_CGETRF

#define FLOPS_CGETRF (   m_,
  n_ 
)     (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)))

Definition at line 14 of file blas_lapack.h.

◆ FLOPS_CGETRI

#define FLOPS_CGETRI (   n_)    (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)))

Definition at line 21 of file blas_lapack.h.

◆ FLOPS_ZGETRF

#define FLOPS_ZGETRF (   m_,
  n_ 
)     (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)))

Definition at line 12 of file blas_lapack.h.

◆ FLOPS_ZGETRI

#define FLOPS_ZGETRI (   n_)    (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)))

Definition at line 20 of file blas_lapack.h.

◆ FMULS_GETRF

#define FMULS_GETRF (   m_,
  n_ 
)
Value:
(((m_) < (n_)) ? (0.5 * (m_) * ((m_) * ((n_) - (1. / 3.) * (m_)-1.) + (n_)) + (2. / 3.) * (m_)) : \
(0.5 * (n_) * ((n_) * ((m_) - (1. / 3.) * (n_)-1.) + (m_)) + (2. / 3.) * (n_)))

Definition at line 5 of file blas_lapack.h.

◆ FMULS_GETRI

#define FMULS_GETRI (   n_)    ((n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_) + 0.5)))

Definition at line 17 of file blas_lapack.h.