QUDA  v1.1.0
A library for QCD on GPUs
quda_api.h
Go to the documentation of this file.
1 #pragma once
2 
3 #ifndef __CUDACC_RTC__
4 #include <cuda.h>
5 #include <cuda_runtime.h>
6 #endif
7 
8 extern cudaDeviceProp deviceProp;
9 using qudaStream_t = cudaStream_t;
10 
11 #include <enum_quda.h>
12 
20 namespace quda
21 {
22 
23  class TuneParam;
24 
32  qudaError_t qudaLaunchKernel(const void *func, const TuneParam &tp, void **args, qudaStream_t stream);
33 
42  template <typename T, typename... Arg>
43  qudaError_t qudaLaunchKernel(T *func, const TuneParam &tp, qudaStream_t stream, const Arg &... arg)
44  {
45  const void *args[] = {&arg...};
46  return qudaLaunchKernel(reinterpret_cast<const void *>(func), tp, const_cast<void **>(args), stream);
47  }
48 
56  void qudaMemcpy_(void *dst, const void *src, size_t count, cudaMemcpyKind kind, const char *func, const char *file,
57  const char *line);
58 
67  void qudaMemcpyAsync_(void *dst, const void *src, size_t count, cudaMemcpyKind kind, const qudaStream_t &stream,
68  const char *func, const char *file, const char *line);
69 
80  void qudaMemcpy2D_(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height,
81  cudaMemcpyKind kind, const char *func, const char *file, const char *line);
82 
94  void qudaMemcpy2DAsync_(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height,
95  cudaMemcpyKind kind, const qudaStream_t &stream, const char *func, const char *file,
96  const char *line);
97 
104  void qudaMemset_(void *ptr, int value, size_t count, const char *func, const char *file, const char *line);
105 
114  void qudaMemset2D_(void *ptr, size_t pitch, int value, size_t width, size_t height, const char *func,
115  const char *file, const char *line);
116 
124  void qudaMemsetAsync_(void *ptr, int value, size_t count, const qudaStream_t &stream, const char *func,
125  const char *file, const char *line);
126 
136  void qudaMemset2DAsync_(void *ptr, size_t pitch, int value, size_t width, size_t height, const qudaStream_t &stream,
137  const char *func, const char *file, const char *line);
138 
146  void qudaMemPrefetchAsync_(void *ptr, size_t count, QudaFieldLocation mem_space, const qudaStream_t &stream,
147  const char *func, const char *file, const char *line);
148 
154  bool qudaEventQuery_(cudaEvent_t &event, const char *func, const char *file, const char *line);
155 
162  void qudaEventRecord_(cudaEvent_t &event, qudaStream_t stream, const char *func, const char *file, const char *line);
163 
171  void qudaStreamWaitEvent_(qudaStream_t stream, cudaEvent_t event, unsigned int flags, const char *func,
172  const char *file, const char *line);
173 
179  void qudaEventSynchronize_(cudaEvent_t &event, const char *func, const char *file, const char *line);
180 
186  void qudaStreamSynchronize_(qudaStream_t &stream, const char *func, const char *file, const char *line);
187 
192  void qudaDeviceSynchronize_(const char *func, const char *file, const char *line);
193 
197  void printAPIProfile();
198 
199 } // namespace quda
200 
201 #define STRINGIFY__(x) #x
202 #define __STRINGIFY__(x) STRINGIFY__(x)
203 
204 #define qudaMemcpy(dst, src, count, kind) \
205  ::quda::qudaMemcpy_(dst, src, count, kind, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
206 
207 #define qudaMemcpyAsync(dst, src, count, kind, stream) \
208  ::quda::qudaMemcpyAsync_(dst, src, count, kind, stream, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
209 
210 #define qudaMemcpy2D(dst, dpitch, src, spitch, width, height, kind) \
211  ::quda::qudaMemcpy2D_(dst, dpitch, src, spitch, width, height, kind, __func__, quda::file_name(__FILE__), \
212  __STRINGIFY__(__LINE__))
213 
214 #define qudaMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, stream) \
215  ::quda::qudaMemcpy2DAsync_(dst, dpitch, src, spitch, width, height, kind, stream, __func__, \
216  quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
217 
218 #define qudaMemset(ptr, value, count) \
219  ::quda::qudaMemset_(ptr, value, count, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
220 
221 #define qudaMemset2D(ptr, pitch, value, width, height) \
222  ::quda::qudaMemset2D_(ptr, pitch, value, width, height, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
223 
224 #define qudaMemsetAsync(ptr, value, count, stream) \
225  ::quda::qudaMemsetAsync_(ptr, value, count, stream, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
226 
227 #define qudaMemset2DAsync(ptr, pitch, value, width, height, stream) \
228  ::quda::qudaMemset2DAsync_(ptr, pitch, value, width, height, stream, __func__, quda::file_name(__FILE__), \
229  __STRINGIFY__(__LINE__))
230 
231 #define qudaMemPrefetchAsync(ptr, count, mem_space, stream) \
232  ::quda::qudaMemPrefetchAsync_(ptr, count, mem_space, stream, __func__, quda::file_name(__FILE__), \
233  __STRINGIFY__(__LINE__))
234 
235 #define qudaEventQuery(event) \
236  ::quda::qudaEventQuery_(event, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
237 
238 #define qudaEventRecord(event, stream) \
239  ::quda::qudaEventRecord_(event, stream, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
240 
241 #define qudaStreamWaitEvent(stream, event, flags) \
242  ::quda::qudaStreamWaitEvent_(stream, event, flags, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
243 
244 #define qudaEventSynchronize(event) \
245  ::quda::qudaEventSynchronize_(event, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
246 
247 #define qudaStreamSynchronize(stream) \
248  ::quda::qudaStreamSynchronize_(stream, __func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
249 
250 #define qudaDeviceSynchronize() \
251  ::quda::qudaDeviceSynchronize_(__func__, quda::file_name(__FILE__), __STRINGIFY__(__LINE__))
enum QudaFieldLocation_s QudaFieldLocation
qudaError_t
Definition: enum_quda.h:10
void qudaMemset2DAsync_(void *ptr, size_t pitch, int value, size_t width, size_t height, const qudaStream_t &stream, const char *func, const char *file, const char *line)
Wrapper around cudaMemsetAsync or driver API equivalent.
Definition: quda_api.cpp:356
bool qudaEventQuery_(cudaEvent_t &event, const char *func, const char *file, const char *line)
Wrapper around cudaEventQuery or cuEventQuery with built-in error checking.
Definition: quda_api.cpp:378
void qudaMemcpy2DAsync_(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, const qudaStream_t &stream, const char *func, const char *file, const char *line)
Wrapper around cudaMemcpy2DAsync or driver API equivalent.
Definition: quda_api.cpp:301
__host__ __device__ ValueType arg(const complex< ValueType > &z)
Returns the phase angle of z.
void qudaMemset2D_(void *ptr, size_t pitch, int value, size_t width, size_t height, const char *func, const char *file, const char *line)
Wrapper around cudaMemset2D or driver API equivalent.
Definition: quda_api.cpp:349
void printAPIProfile()
Print out the timer profile for CUDA API calls.
Definition: quda_api.cpp:495
void qudaDeviceSynchronize_(const char *func, const char *file, const char *line)
Wrapper around cudaDeviceSynchronize or cuDeviceSynchronize with built-in error checking.
Definition: quda_api.cpp:464
qudaStream_t * stream
void qudaStreamSynchronize_(qudaStream_t &stream, const char *func, const char *file, const char *line)
Wrapper around cudaStreamSynchronize or cuStreamSynchronize with built-in error checking.
Definition: quda_api.cpp:448
void qudaEventSynchronize_(cudaEvent_t &event, const char *func, const char *file, const char *line)
Wrapper around cudaEventSynchronize or cuEventSynchronize with built-in error checking.
Definition: quda_api.cpp:433
void qudaMemcpy2D_(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, const char *func, const char *file, const char *line)
Wrapper around cudaMemcpy2DAsync or driver API equivalent.
Definition: quda_api.cpp:272
qudaError_t qudaLaunchKernel(const void *func, const TuneParam &tp, void **args, qudaStream_t stream)
Wrapper around cudaLaunchKernel.
Definition: quda_api.cpp:57
void qudaMemsetAsync_(void *ptr, int value, size_t count, const qudaStream_t &stream, const char *func, const char *file, const char *line)
Wrapper around cudaMemsetAsync or driver API equivalent.
Definition: quda_api.cpp:340
void qudaMemPrefetchAsync_(void *ptr, size_t count, QudaFieldLocation mem_space, const qudaStream_t &stream, const char *func, const char *file, const char *line)
Wrapper around cudaMemPrefetchAsync or driver API equivalent.
Definition: quda_api.cpp:363
void qudaMemcpy_(void *dst, const void *src, size_t count, cudaMemcpyKind kind, const char *func, const char *file, const char *line)
Wrapper around cudaMemcpy or driver API equivalent.
Definition: quda_api.cpp:232
void qudaEventRecord_(cudaEvent_t &event, qudaStream_t stream, const char *func, const char *file, const char *line)
Wrapper around cudaEventRecord or cuEventRecord with built-in error checking.
Definition: quda_api.cpp:402
void qudaStreamWaitEvent_(qudaStream_t stream, cudaEvent_t event, unsigned int flags, const char *func, const char *file, const char *line)
Wrapper around cudaStreamWaitEvent or cuStreamWaitEvent with built-in error checking.
Definition: quda_api.cpp:417
void qudaMemset_(void *ptr, int value, size_t count, const char *func, const char *file, const char *line)
Wrapper around cudaMemset or driver API equivalent.
Definition: quda_api.cpp:331
void qudaMemcpyAsync_(void *dst, const void *src, size_t count, cudaMemcpyKind kind, const qudaStream_t &stream, const char *func, const char *file, const char *line)
Wrapper around cudaMemcpyAsync or driver API equivalent.
Definition: quda_api.cpp:241
cudaDeviceProp deviceProp
Definition: device.cpp:14
cudaStream_t qudaStream_t
Definition: quda_api.h:9