QUDA
1.0.0
|
#include <malloc_quda.h>
Go to the source code of this file.
Namespaces | |
quda | |
quda::cublas | |
Macros | |
#define | FMULS_GETRF(m_, n_) |
#define | FADDS_GETRF(m_, n_) |
#define | FLOPS_ZGETRF(m_, n_) (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)) ) |
#define | FLOPS_CGETRF(m_, n_) (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)) ) |
#define | FMULS_GETRI(n_) ( (n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_) + 0.5)) ) |
#define | FADDS_GETRI(n_) ( (n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_) - 1.5)) ) |
#define | FLOPS_ZGETRI(n_) (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)) ) |
#define | FLOPS_CGETRI(n_) (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)) ) |
Functions | |
void | quda::cublas::init () |
Create the CUBLAS context. More... | |
void | quda::cublas::destroy () |
Destroy the CUBLAS context. More... | |
template<typename T > | |
__global__ void | quda::cublas::set_pointer (T **output_array_a, T *input_a, T **output_array_b, T *input_b, int batch_offset) |
long long | quda::cublas::BatchInvertMatrix (void *Ainv, void *A, const int n, const int batch, QudaPrecision precision, QudaFieldLocation location) |
#define FADDS_GETRF | ( | m_, | |
n_ | |||
) |
Definition at line 10 of file blas_cublas.cu.
#define FADDS_GETRI | ( | n_ | ) | ( (n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_) - 1.5)) ) |
Definition at line 18 of file blas_cublas.cu.
#define FLOPS_CGETRF | ( | m_, | |
n_ | |||
) | (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)) ) |
Definition at line 15 of file blas_cublas.cu.
Referenced by quda::cublas::BatchInvertMatrix().
#define FLOPS_CGETRI | ( | n_ | ) | (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)) ) |
Definition at line 21 of file blas_cublas.cu.
Referenced by quda::cublas::BatchInvertMatrix().
#define FLOPS_ZGETRF | ( | m_, | |
n_ | |||
) | (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)) ) |
Definition at line 14 of file blas_cublas.cu.
#define FLOPS_ZGETRI | ( | n_ | ) | (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)) ) |
Definition at line 20 of file blas_cublas.cu.
#define FMULS_GETRF | ( | m_, | |
n_ | |||
) |
Definition at line 7 of file blas_cublas.cu.
#define FMULS_GETRI | ( | n_ | ) | ( (n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_) + 0.5)) ) |
Definition at line 17 of file blas_cublas.cu.