5 #define FMULS_GETRF(m_, n_) \
6 (((m_) < (n_)) ? (0.5 * (m_) * ((m_) * ((n_) - (1. / 3.) * (m_)-1.) + (n_)) + (2. / 3.) * (m_)) : \
7 (0.5 * (n_) * ((n_) * ((m_) - (1. / 3.) * (n_)-1.) + (m_)) + (2. / 3.) * (n_)))
8 #define FADDS_GETRF(m_, n_) \
9 (((m_) < (n_)) ? (0.5 * (m_) * ((m_) * ((n_) - (1. / 3.) * (m_)) - (n_)) + (1. / 6.) * (m_)) : \
10 (0.5 * (n_) * ((n_) * ((m_) - (1. / 3.) * (n_)) - (m_)) + (1. / 6.) * (n_)))
12 #define FLOPS_ZGETRF(m_, n_) \
13 (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)))
14 #define FLOPS_CGETRF(m_, n_) \
15 (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)))
17 #define FMULS_GETRI(n_) ((n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_) + 0.5)))
18 #define FADDS_GETRI(n_) ((n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_)-1.5)))
20 #define FLOPS_ZGETRI(n_) (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)))
21 #define FLOPS_CGETRI(n_) (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)))
enum QudaPrecision_s QudaPrecision
enum QudaFieldLocation_s QudaFieldLocation
void init()
Create the BLAS context.
long long stridedBatchGEMM(void *A, void *B, void *C, QudaBLASParam blas_param, QudaFieldLocation location)
Strided Batch GEMM. This function performs N GEMM type operations in a strided batched fashion....
long long BatchInvertMatrix(void *Ainv, void *A, const int n, const uint64_t batch, QudaPrecision precision, QudaFieldLocation location)
Batch inversion the matrix field using an LU decomposition method.
void destroy()
Destroy the BLAS context.
long long BatchInvertMatrix(void *Ainv, void *A, const int n, const uint64_t batch, QudaPrecision precision, QudaFieldLocation location)
Batch inversion the matrix field using an LU decomposition method.
void init()
Create the BLAS context.
long long stridedBatchGEMM(void *A, void *B, void *C, QudaBLASParam blas_param, QudaFieldLocation location)
Strided Batch GEMM. This function performs N GEMM type operations in a strided batched fashion....
void destroy()
Destroy the BLAS context.
void set_native(bool native)