QUDA
1.0.0
|
Functions | |
void | init () |
Create the CUBLAS context. More... | |
void | destroy () |
Destroy the CUBLAS context. More... | |
long long | BatchInvertMatrix (void *Ainv, void *A, const int n, const int batch, QudaPrecision precision, QudaFieldLocation location) |
template<typename T > | |
__global__ void | set_pointer (T **output_array_a, T *input_a, T **output_array_b, T *input_b, int batch_offset) |
long long quda::cublas::BatchInvertMatrix | ( | void * | Ainv, |
void * | A, | ||
const int | n, | ||
const int | batch, | ||
QudaPrecision | precision, | ||
QudaFieldLocation | location | ||
) |
Batch inversion the matrix field using an LU decomposition method.
[out] | Ainv | Matrix field containing the inverse matrices |
[in] | A | Matrix field containing the input matrices |
[in] | n | Dimension each matrix |
[in] | batch | Problem batch size |
[in] | precision | Precision of the input/output data |
[in] | Location | of the input/output data |
Definition at line 54 of file blas_cublas.cu.
References errorQuda, quda::blas::flops, FLOPS_CGETRF, FLOPS_CGETRI, getVerbosity(), pool_device_free, pool_device_malloc, pool_pinned_free, pool_pinned_malloc, printfQuda, QUDA_CPU_FIELD_LOCATION, QUDA_CUDA_FIELD_LOCATION, QUDA_SINGLE_PRECISION, QUDA_VERBOSE, qudaDeviceSynchronize, qudaMemcpy, and quda::size.
void quda::cublas::destroy | ( | ) |
Destroy the CUBLAS context.
Definition at line 38 of file blas_cublas.cu.
References errorQuda.
Referenced by endQuda().
void quda::cublas::init | ( | ) |
Create the CUBLAS context.
Definition at line 31 of file blas_cublas.cu.
References errorQuda.
Referenced by comm_peer2peer_enabled_global(), quda::forceMonitor(), getOmpThreadStr(), getRankVerbosity(), getTuning(), initQudaMemory(), profilerStart(), quda::traceEnabled(), quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT2_CLOVER_ORDER >::transform_reduce(), quda::clover::Accessor< Float, nColor, nSpin, QUDA_FLOAT4_CLOVER_ORDER >::transform_reduce(), quda::gauge::Accessor< Float, nColor, QUDA_QDP_GAUGE_ORDER, storeFloat, use_tex >::transform_reduce(), quda::gauge::Accessor< Float, nColor, QUDA_MILC_GAUGE_ORDER, storeFloat, use_tex >::transform_reduce(), and quda::gauge::Accessor< Float, nColor, QUDA_FLOAT2_GAUGE_ORDER, storeFloat, use_tex >::transform_reduce().
__global__ void quda::cublas::set_pointer | ( | T ** | output_array_a, |
T * | input_a, | ||
T ** | output_array_b, | ||
T * | input_b, | ||
int | batch_offset | ||
) |
Definition at line 47 of file blas_cublas.cu.