QUDA  1.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
quda_cuda_api.h
Go to the documentation of this file.
1 #pragma once
2 
3 #ifndef __CUDACC_RTC__
4 #include <cuda.h>
5 #include <cuda_runtime.h>
6 #include <quda_cuda_api.h>
7 
15 namespace quda {
16 
26  void qudaMemcpy_(void *dst, const void *src, size_t count, cudaMemcpyKind kind,
27  const char *func, const char *file, const char *line);
28 
29 }
30 
31 #define STRINGIFY__(x) #x
32 #define __STRINGIFY__(x) STRINGIFY__(x)
33 #define qudaMemcpy(dst, src, count, kind) \
34  ::quda::qudaMemcpy_(dst, src, count, kind, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__));
35 
36 #define STRINGIFY__(x) #x
37 #define __STRINGIFY__(x) STRINGIFY__(x)
38 #define qudaMemcpyAsync(dst, src, count, kind, stream) \
39  ::quda::qudaMemcpyAsync_(dst, src, count, kind, stream, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__));
40 
41 #define STRINGIFY__(x) #x
42 #define __STRINGIFY__(x) STRINGIFY__(x)
43 #define qudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, stream) \
44  ::quda::qudaMemcpy2DAsync_(dst, dpitch, src, spitch, width, height, kind, stream, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__));
45 
46 namespace quda {
47 
57  void qudaMemcpyAsync_(void *dst, const void *src, size_t count, cudaMemcpyKind kind, const cudaStream_t &stream,
58  const char *func, const char *file, const char *line);
59 
72  void qudaMemcpy2DAsync_(void *dst, size_t dpitch, const void *src, size_t spitch,
73  size_t width, size_t hieght, cudaMemcpyKind kind, const cudaStream_t &stream,
74  const char *func, const char *file, const char *line);
75 
85  cudaError_t qudaLaunchKernel(const void* func, dim3 gridDim, dim3 blockDim, void** args, size_t sharedMem, cudaStream_t stream);
86 
92  cudaError_t qudaEventQuery(cudaEvent_t &event);
93 
99  cudaError_t qudaEventRecord(cudaEvent_t &event, cudaStream_t stream=0);
100 
107  cudaError_t qudaStreamWaitEvent(cudaStream_t stream, cudaEvent_t event, unsigned int flags);
108 
113  cudaError_t qudaStreamSynchronize(cudaStream_t &stream);
114 
119  cudaError_t qudaEventSynchronize(cudaEvent_t &event);
120 
124  cudaError_t qudaDeviceSynchronize_(const char *func, const char *file, const char *line);
125 
126 #if CUDA_VERSION >= 9000
127 
133  cudaError_t qudaFuncSetAttribute(const void* func, cudaFuncAttribute attr, int value);
134 #endif
135 
139  void printAPIProfile();
140 
141 } // namespace quda
142 
143 #define STRINGIFY__(x) #x
144 #define __STRINGIFY__(x) STRINGIFY__(x)
145 #define qudaDeviceSynchronize() \
146  ::quda::qudaDeviceSynchronize_(__func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__));
147 
148 #endif
void qudaMemcpy2DAsync_(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t hieght, cudaMemcpyKind kind, const cudaStream_t &stream, const char *func, const char *file, const char *line)
Wrapper around cudaMemcpy2DAsync or driver API equivalent Potentially add auto-profiling support...
cudaError_t qudaEventSynchronize(cudaEvent_t &event)
Wrapper around cudaEventSynchronize or cuEventSynchronize.
cudaError_t qudaEventQuery(cudaEvent_t &event)
Wrapper around cudaEventQuery or cuEventQuery.
cudaError_t qudaStreamWaitEvent(cudaStream_t stream, cudaEvent_t event, unsigned int flags)
Wrapper around cudaEventRecord or cuEventRecord.
cudaStream_t * stream
cudaError_t qudaStreamSynchronize(cudaStream_t &stream)
Wrapper around cudaStreamSynchronize or cuStreamSynchronize.
cudaError_t qudaDeviceSynchronize_(const char *func, const char *file, const char *line)
Wrapper around cudaDeviceSynchronize or cuDeviceSynchronize.
void qudaMemcpyAsync_(void *dst, const void *src, size_t count, cudaMemcpyKind kind, const cudaStream_t &stream, const char *func, const char *file, const char *line)
Wrapper around cudaMemcpyAsync or driver API equivalent Potentially add auto-profiling support...
void printAPIProfile()
Print out the timer profile for CUDA API calls.
cudaError_t qudaEventRecord(cudaEvent_t &event, cudaStream_t stream=0)
Wrapper around cudaEventRecord or cuEventRecord.
bool flags
__device__ unsigned int count[QUDA_MAX_MULTI_REDUCE]
Definition: cub_helper.cuh:90
cudaError_t qudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, size_t sharedMem, cudaStream_t stream)
Wrapper around cudaLaunchKernel.
void qudaMemcpy_(void *dst, const void *src, size_t count, cudaMemcpyKind kind, const char *func, const char *file, const char *line)
Wrapper around cudaMemcpy used for auto-profiling. Do not call directly, rather call macro below whic...