QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
Namespaces | Macros | Functions
blas_cublas.cu File Reference
#include <malloc_quda.h>
Include dependency graph for blas_cublas.cu:

Go to the source code of this file.

Namespaces

 quda
 
 quda::cublas
 

Macros

#define FMULS_GETRF(m_, n_)
 
#define FADDS_GETRF(m_, n_)
 
#define FLOPS_ZGETRF(m_, n_)   (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)) )
 
#define FLOPS_CGETRF(m_, n_)   (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)) )
 
#define FMULS_GETRI(n_)   ( (n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_) + 0.5)) )
 
#define FADDS_GETRI(n_)   ( (n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_) - 1.5)) )
 
#define FLOPS_ZGETRI(n_)   (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)) )
 
#define FLOPS_CGETRI(n_)   (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)) )
 

Functions

void quda::cublas::init ()
 Create the CUBLAS context. More...
 
void quda::cublas::destroy ()
 Destroy the CUBLAS context. More...
 
template<typename T >
__global__ void quda::cublas::set_pointer (T **output_array_a, T *input_a, T **output_array_b, T *input_b, int batch_offset)
 
long long quda::cublas::BatchInvertMatrix (void *Ainv, void *A, const int n, const int batch, QudaPrecision precision, QudaFieldLocation location)
 

Macro Definition Documentation

◆ FADDS_GETRF

#define FADDS_GETRF (   m_,
  n_ 
)
Value:
( ((m_) < (n_)) \
? (0.5 * (m_) * ((m_) * ((n_) - (1./3.) * (m_) ) - (n_)) + (1. / 6.) * (m_)) \
: (0.5 * (n_) * ((n_) * ((m_) - (1./3.) * (n_) ) - (m_)) + (1. / 6.) * (n_)) )

Definition at line 10 of file blas_cublas.cu.

◆ FADDS_GETRI

#define FADDS_GETRI (   n_)    ( (n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_) - 1.5)) )

Definition at line 18 of file blas_cublas.cu.

◆ FLOPS_CGETRF

#define FLOPS_CGETRF (   m_,
  n_ 
)    (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)) )

Definition at line 15 of file blas_cublas.cu.

Referenced by quda::cublas::BatchInvertMatrix().

◆ FLOPS_CGETRI

#define FLOPS_CGETRI (   n_)    (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)) )

Definition at line 21 of file blas_cublas.cu.

Referenced by quda::cublas::BatchInvertMatrix().

◆ FLOPS_ZGETRF

#define FLOPS_ZGETRF (   m_,
  n_ 
)    (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)) )

Definition at line 14 of file blas_cublas.cu.

◆ FLOPS_ZGETRI

#define FLOPS_ZGETRI (   n_)    (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)) )

Definition at line 20 of file blas_cublas.cu.

◆ FMULS_GETRF

#define FMULS_GETRF (   m_,
  n_ 
)
Value:
( ((m_) < (n_)) \
? (0.5 * (m_) * ((m_) * ((n_) - (1./3.) * (m_) - 1. ) + (n_)) + (2. / 3.) * (m_)) \
: (0.5 * (n_) * ((n_) * ((m_) - (1./3.) * (n_) - 1. ) + (m_)) + (2. / 3.) * (n_)) )

Definition at line 7 of file blas_cublas.cu.

◆ FMULS_GETRI

#define FMULS_GETRI (   n_)    ( (n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_) + 0.5)) )

Definition at line 17 of file blas_cublas.cu.