5 #include <cuda_runtime.h>
42 template <
typename T,
typename... Arg>
45 const void *args[] = {&
arg...};
56 void qudaMemcpy_(
void *dst,
const void *src,
size_t count, cudaMemcpyKind kind,
const char *func,
const char *file,
68 const char *func,
const char *file,
const char *line);
80 void qudaMemcpy2D_(
void *dst,
size_t dpitch,
const void *src,
size_t spitch,
size_t width,
size_t height,
81 cudaMemcpyKind kind,
const char *func,
const char *file,
const char *line);
94 void qudaMemcpy2DAsync_(
void *dst,
size_t dpitch,
const void *src,
size_t spitch,
size_t width,
size_t height,
104 void qudaMemset_(
void *ptr,
int value,
size_t count,
const char *func,
const char *file,
const char *line);
114 void qudaMemset2D_(
void *ptr,
size_t pitch,
int value,
size_t width,
size_t height,
const char *func,
115 const char *file,
const char *line);
125 const char *file,
const char *line);
137 const char *func,
const char *file,
const char *line);
147 const char *func,
const char *file,
const char *line);
154 bool qudaEventQuery_(cudaEvent_t &event,
const char *func,
const char *file,
const char *line);
172 const char *file,
const char *line);
179 void qudaEventSynchronize_(cudaEvent_t &event,
const char *func,
const char *file,
const char *line);
201 #define STRINGIFY__(x) #x
202 #define __STRINGIFY__(x) STRINGIFY__(x)
204 #define qudaMemcpy(dst, src, count, kind) \
205 ::quda::qudaMemcpy_(dst, src, count, kind, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
207 #define qudaMemcpyAsync(dst, src, count, kind, stream) \
208 ::quda::qudaMemcpyAsync_(dst, src, count, kind, stream, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
210 #define qudaMemcpy2D(dst, dpitch, src, spitch, width, height, kind) \
211 ::quda::qudaMemcpy2D_(dst, dpitch, src, spitch, width, height, kind, __func__, quda::file_name(__FILE__), \
212 __STRINGIFY__(__LINE__))
214 #define qudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, stream) \
215 ::quda::qudaMemcpy2DAsync_(dst, dpitch, src, spitch, width, height, kind, stream, __func__, \
216 quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
218 #define qudaMemset(ptr, value, count) \
219 ::quda::qudaMemset_(ptr, value, count, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
221 #define qudaMemset2D(ptr, pitch, value, width, height) \
222 ::quda::qudaMemset2D_(ptr, pitch, value, width, height, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
224 #define qudaMemsetAsync(ptr, value, count, stream) \
225 ::quda::qudaMemsetAsync_(ptr, value, count, stream, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
227 #define qudaMemset2DAsync(ptr, pitch, value, width, height, stream) \
228 ::quda::qudaMemset2DAsync_(ptr, pitch, value, width, height, stream, __func__, quda::file_name(__FILE__), \
229 __STRINGIFY__(__LINE__))
231 #define qudaMemPrefetchAsync(ptr, count, mem_space, stream) \
232 ::quda::qudaMemPrefetchAsync_(ptr, count, mem_space, stream, __func__, quda::file_name(__FILE__), \
233 __STRINGIFY__(__LINE__))
235 #define qudaEventQuery(event) \
236 ::quda::qudaEventQuery_(event, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
238 #define qudaEventRecord(event, stream) \
239 ::quda::qudaEventRecord_(event, stream, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
241 #define qudaStreamWaitEvent(stream, event, flags) \
242 ::quda::qudaStreamWaitEvent_(stream, event, flags, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
244 #define qudaEventSynchronize(event) \
245 ::quda::qudaEventSynchronize_(event, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
247 #define qudaStreamSynchronize(stream) \
248 ::quda::qudaStreamSynchronize_(stream, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
250 #define qudaDeviceSynchronize() \
251 ::quda::qudaDeviceSynchronize_(__func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
enum QudaFieldLocation_s QudaFieldLocation
void qudaMemset2DAsync_(void *ptr, size_t pitch, int value, size_t width, size_t height, const qudaStream_t &stream, const char *func, const char *file, const char *line)
Wrapper around cudaMemsetAsync or driver API equivalent.
bool qudaEventQuery_(cudaEvent_t &event, const char *func, const char *file, const char *line)
Wrapper around cudaEventQuery or cuEventQuery with built-in error checking.
void qudaMemcpy2DAsync_(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, const qudaStream_t &stream, const char *func, const char *file, const char *line)
Wrapper around cudaMemcpy2DAsync or driver API equivalent.
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
void qudaMemset2D_(void *ptr, size_t pitch, int value, size_t width, size_t height, const char *func, const char *file, const char *line)
Wrapper around cudaMemset2D or driver API equivalent.
void printAPIProfile()
Print out the timer profile for CUDA API calls.
void qudaDeviceSynchronize_(const char *func, const char *file, const char *line)
Wrapper around cudaDeviceSynchronize or cuDeviceSynchronize with built-in error checking.
void qudaStreamSynchronize_(qudaStream_t &stream, const char *func, const char *file, const char *line)
Wrapper around cudaStreamSynchronize or cuStreamSynchronize with built-in error checking.
void qudaEventSynchronize_(cudaEvent_t &event, const char *func, const char *file, const char *line)
Wrapper around cudaEventSynchronize or cuEventSynchronize with built-in error checking.
void qudaMemcpy2D_(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, const char *func, const char *file, const char *line)
Wrapper around cudaMemcpy2DAsync or driver API equivalent.
qudaError_t qudaLaunchKernel(const void *func, const TuneParam &tp, void **args, qudaStream_t stream)
Wrapper around cudaLaunchKernel.
void qudaMemsetAsync_(void *ptr, int value, size_t count, const qudaStream_t &stream, const char *func, const char *file, const char *line)
Wrapper around cudaMemsetAsync or driver API equivalent.
void qudaMemPrefetchAsync_(void *ptr, size_t count, QudaFieldLocation mem_space, const qudaStream_t &stream, const char *func, const char *file, const char *line)
Wrapper around cudaMemPrefetchAsync or driver API equivalent.
void qudaMemcpy_(void *dst, const void *src, size_t count, cudaMemcpyKind kind, const char *func, const char *file, const char *line)
Wrapper around cudaMemcpy or driver API equivalent.
void qudaEventRecord_(cudaEvent_t &event, qudaStream_t stream, const char *func, const char *file, const char *line)
Wrapper around cudaEventRecord or cuEventRecord with built-in error checking.
void qudaStreamWaitEvent_(qudaStream_t stream, cudaEvent_t event, unsigned int flags, const char *func, const char *file, const char *line)
Wrapper around cudaStreamWaitEvent or cuStreamWaitEvent with built-in error checking.
void qudaMemset_(void *ptr, int value, size_t count, const char *func, const char *file, const char *line)
Wrapper around cudaMemset or driver API equivalent.
void qudaMemcpyAsync_(void *dst, const void *src, size_t count, cudaMemcpyKind kind, const qudaStream_t &stream, const char *func, const char *file, const char *line)
Wrapper around cudaMemcpyAsync or driver API equivalent.
cudaDeviceProp deviceProp
cudaStream_t qudaStream_t