|
void | quda::qudaMemcpy_ (void *dst, const void *src, size_t count, cudaMemcpyKind kind, const char *func, const char *file, const char *line) |
| Wrapper around cudaMemcpy used for auto-profiling. Do not call directly, rather call macro below which will grab the location of the call. More...
|
|
void | quda::qudaMemcpyAsync_ (void *dst, const void *src, size_t count, cudaMemcpyKind kind, const cudaStream_t &stream, const char *func, const char *file, const char *line) |
| Wrapper around cudaMemcpyAsync or driver API equivalent Potentially add auto-profiling support. More...
|
|
void | quda::qudaMemcpy2DAsync_ (void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t hieght, cudaMemcpyKind kind, const cudaStream_t &stream, const char *func, const char *file, const char *line) |
| Wrapper around cudaMemcpy2DAsync or driver API equivalent Potentially add auto-profiling support. More...
|
|
cudaError_t | quda::qudaLaunchKernel (const void *func, dim3 gridDim, dim3 blockDim, void **args, size_t sharedMem, cudaStream_t stream) |
| Wrapper around cudaLaunchKernel. More...
|
|
cudaError_t | quda::qudaEventQuery (cudaEvent_t &event) |
| Wrapper around cudaEventQuery or cuEventQuery. More...
|
|
cudaError_t | quda::qudaEventRecord (cudaEvent_t &event, cudaStream_t stream=0) |
| Wrapper around cudaEventRecord or cuEventRecord. More...
|
|
cudaError_t | quda::qudaStreamWaitEvent (cudaStream_t stream, cudaEvent_t event, unsigned int flags) |
| Wrapper around cudaEventRecord or cuEventRecord. More...
|
|
cudaError_t | quda::qudaStreamSynchronize (cudaStream_t &stream) |
| Wrapper around cudaStreamSynchronize or cuStreamSynchronize. More...
|
|
cudaError_t | quda::qudaEventSynchronize (cudaEvent_t &event) |
| Wrapper around cudaEventSynchronize or cuEventSynchronize. More...
|
|
cudaError_t | quda::qudaDeviceSynchronize_ (const char *func, const char *file, const char *line) |
| Wrapper around cudaDeviceSynchronize or cuDeviceSynchronize. More...
|
|
void | quda::printAPIProfile () |
| Print out the timer profile for CUDA API calls. More...
|
|