|
QUDA
1.0.0
|
Functions | |
| void | init () |
| Create the CUBLAS context. More... | |
| void | destroy () |
| Destroy the CUBLAS context. More... | |
| long long | BatchInvertMatrix (void *Ainv, void *A, const int n, const int batch, QudaPrecision precision, QudaFieldLocation location) |
| template<typename T > | |
| __global__ void | set_pointer (T **output_array_a, T *input_a, T **output_array_b, T *input_b, int batch_offset) |
| long long quda::cublas::BatchInvertMatrix | ( | void * | Ainv, |
| void * | A, | ||
| const int | n, | ||
| const int | batch, | ||
| QudaPrecision | precision, | ||
| QudaFieldLocation | location | ||
| ) |
Batch inversion the matrix field using an LU decomposition method.
| [out] | Ainv | Matrix field containing the inverse matrices |
| [in] | A | Matrix field containing the input matrices |
| [in] | n | Dimension each matrix |
| [in] | batch | Problem batch size |
| [in] | precision | Precision of the input/output data |
| [in] | Location | of the input/output data |
Definition at line 54 of file blas_cublas.cu.
References errorQuda, quda::blas::flops, FLOPS_CGETRF, FLOPS_CGETRI, getVerbosity(), pool_device_free, pool_device_malloc, pool_pinned_free, pool_pinned_malloc, printfQuda, QUDA_CPU_FIELD_LOCATION, QUDA_CUDA_FIELD_LOCATION, QUDA_SINGLE_PRECISION, QUDA_VERBOSE, qudaDeviceSynchronize, qudaMemcpy, and quda::size.

| void quda::cublas::destroy | ( | ) |
Destroy the CUBLAS context.
Definition at line 38 of file blas_cublas.cu.
References errorQuda.
Referenced by endQuda().

| void quda::cublas::init | ( | ) |
Create the CUBLAS context.
Definition at line 31 of file blas_cublas.cu.
References errorQuda.
Referenced by comm_peer2peer_enabled_global(), quda::forceMonitor(), getOmpThreadStr(), getRankVerbosity(), getTuning(), initQudaMemory(), profilerStart(), quda::traceEnabled(), quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT2_CLOVER_ORDER >::transform_reduce(), quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT4_CLOVER_ORDER >::transform_reduce(), quda::gauge::Accessor< Float, nColor, QUDA_QDP_GAUGE_ORDER, storeFloat, use_tex >::transform_reduce(), quda::gauge::Accessor< Float, nColor, QUDA_MILC_GAUGE_ORDER, storeFloat, use_tex >::transform_reduce(), and quda::gauge::Accessor< Float, nColor, QUDA_FLOAT2_GAUGE_ORDER, storeFloat, use_tex >::transform_reduce().

| __global__ void quda::cublas::set_pointer | ( | T ** | output_array_a, |
| T * | input_a, | ||
| T ** | output_array_b, | ||
| T * | input_b, | ||
| int | batch_offset | ||
| ) |
Definition at line 47 of file blas_cublas.cu.
1.8.13