QUDA  0.9.0
quda_internal.h
Go to the documentation of this file.
1 #ifndef _QUDA_INTERNAL_H
2 #define _QUDA_INTERNAL_H
3 
4 #include <quda_cuda_api.h>
5 #include <sys/time.h>
6 #include <string>
7 #include <complex>
8 
9 #if ((defined(QMP_COMMS) || defined(MPI_COMMS)) && !defined(MULTI_GPU))
10 #error "MULTI_GPU must be enabled to use MPI or QMP"
11 #endif
12 
13 #if (!defined(QMP_COMMS) && !defined(MPI_COMMS) && defined(MULTI_GPU))
14 #error "MPI or QMP must be enabled to use MULTI_GPU"
15 #endif
16 
17 //#ifdef USE_QDPJIT
18 //#include "qdp_quda.h"
19 //#endif
20 
21 #ifdef QMP_COMMS
22 #include <qmp.h>
23 #endif
24 
25 #ifdef PTHREADS
26 #include <pthread.h>
27 #endif
28 
29 #define MAX_SHORT 32767.0f
30 
31 #define TEX_ALIGN_REQ (512*2) //Fermi, factor 2 comes from even/odd
32 #define ALIGNMENT_ADJUST(n) ( (n+TEX_ALIGN_REQ-1)/TEX_ALIGN_REQ*TEX_ALIGN_REQ)
33 #include <enum_quda.h>
34 #include <quda.h>
35 #include <util_quda.h>
36 #include <malloc_quda.h>
37 #include <object.h>
38 
39 #include <vector>
40 
41 // Use bindless texture on Kepler
42 #if (__COMPUTE_CAPABILITY__ >= 300) && (CUDA_VERSION >= 5000)
43 #define USE_TEXTURE_OBJECTS
44 #endif
45 
46 // if not using texture objects then we need to disable multi-blas support since these don't work with texture references
47 #ifndef USE_TEXTURE_OBJECTS
48 #undef MAX_MULTI_BLAS_N
49 #define MAX_MULTI_BLAS_N 1
50 #endif
51 
52 
53 #ifdef INTERFACE_NVTX
54 #include "nvToolsExt.h"
55 #endif
56 
57 
58 #ifdef __cplusplus
59 extern "C" {
60 #endif
61 
62  typedef void *ParityGauge;
63 
64  // replace below with ColorSpinorField
65  typedef struct {
66  size_t bytes;
68  int length; // total length
69  int volume; // geometric volume (single parity)
70  int X[QUDA_MAX_DIM]; // the geometric lengths (single parity)
71  int Nc; // length of color dimension
72  int Ns; // length of spin dimension
73  void *data; // either (double2*), (float4 *) or (short4 *), depending on precision
74  float *dataNorm; // used only when precision is QUDA_HALF_PRECISION
75  } ParityHw;
76 
77  typedef struct {
80  } FullHw;
81 
83  void *field;
84  };
85 
86  extern cudaDeviceProp deviceProp;
87  extern cudaStream_t *streams;
88 
89 #ifdef PTHREADS
90  extern pthread_mutex_t pthread_mutex;
91 #endif
92 
93 #ifdef __cplusplus
94 }
95 #endif
96 
97 namespace quda {
98 
99  typedef std::complex<double> Complex;
100 
106  struct Timer {
108  double time;
109 
111  double last;
112 
115 
118 
120  bool running;
121 
123  int count;
124 
125  Timer() : time(0.0), last(0.0), running(false), count(0) { ; }
126 
127  void Start(const char *func, const char *file, int line) {
128  if (running) {
129  printfQuda("ERROR: Cannot start an already running timer (%s:%d in %s())\n", file, line, func);
130  errorQuda("Aborting");
131  }
132  gettimeofday(&start, NULL);
133  running = true;
134  }
135 
136  void Stop(const char *func, const char *file, int line) {
137  if (!running) {
138  printfQuda("ERROR: Cannot stop an unstarted timer (%s:%d in %s())\n", file, line, func);
139  errorQuda("Aborting");
140  }
141  gettimeofday(&stop, NULL);
142 
143  long ds = stop.tv_sec - start.tv_sec;
144  long dus = stop.tv_usec - start.tv_usec;
145  last = ds + 0.000001*dus;
146  time += last;
147  count++;
148 
149  running = false;
150  }
151 
152  double Last() { return last; }
153 
154  void Reset(const char *func, const char *file, int line) {
155  if (running) {
156  printfQuda("ERROR: Cannot reset a started timer (%s:%d in %s())\n", file, line, func);
157  errorQuda("Aborting");
158  }
159  time = 0.0;
160  last = 0.0;
161  count = 0;
162  }
163 
164  };
165 
178  // lower level counters used in the dslash and api profiling
207  };
208 
209 #ifdef INTERFACE_NVTX
210 
211 
212 
213 #define PUSH_RANGE(name,cid) { \
214  int color_id = cid; \
215  color_id = color_id%nvtx_num_colors;\
216  nvtxEventAttributes_t eventAttrib = {0}; \
217  eventAttrib.version = NVTX_VERSION; \
218  eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; \
219  eventAttrib.colorType = NVTX_COLOR_ARGB; \
220  eventAttrib.color = nvtx_colors[color_id]; \
221  eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; \
222  eventAttrib.message.ascii = name; \
223  eventAttrib.category = cid;\
224  nvtxRangePushEx(&eventAttrib); \
225 }
226 #define POP_RANGE nvtxRangePop();
227 #else
228 #define PUSH_RANGE(name,cid)
229 #define POP_RANGE
230 #endif
231 
232  class TimeProfile {
233  std::string fname;
234 #ifdef INTERFACE_NVTX
235  static const uint32_t nvtx_colors[];// = { 0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff };
236  static const int nvtx_num_colors;// = sizeof(nvtx_colors)/sizeof(uint32_t);
237 #endif
239  static std::string pname[];
240 
241  bool switchOff;
243 
244  // global timer
247  static int global_total_level[QUDA_PROFILE_COUNT]; // zero initialize
248 
249  static void StopGlobal(const char *func, const char *file, int line, QudaProfileType idx) {
250 
252  if (global_total_level[idx]==0) global_profile[idx].Stop(func,file,line);
253 
254  // switch off total timer if we need to
255  if (global_switchOff[idx]) {
257  if (global_total_level[idx]==0) global_profile[idx].Stop(func,file,line);
258  global_switchOff[idx] = false;
259  }
260  }
261 
262  static void StartGlobal(const char *func, const char *file, int line, QudaProfileType idx) {
263  // if total timer isn't running, then start it running
264  if (!global_profile[idx].running) {
265  global_profile[idx].Start(func,file,line);
267  global_switchOff[idx] = true;
268  }
269 
270  if (global_total_level[idx]==0) global_profile[idx].Start(func,file,line);
272  }
273 
274  public:
275  TimeProfile(std::string fname) : fname(fname), switchOff(false), use_global(true) { ; }
276 
277  TimeProfile(std::string fname, bool use_global) : fname(fname), switchOff(false), use_global(use_global) { ; }
278 
280  void Print();
281 
282  void Start_(const char *func, const char *file, int line, QudaProfileType idx) {
283  // if total timer isn't running, then start it running
284  if (!profile[QUDA_PROFILE_TOTAL].running && idx != QUDA_PROFILE_TOTAL) {
285  profile[QUDA_PROFILE_TOTAL].Start(func,file,line);
286  switchOff = true;
287  }
288 
289  profile[idx].Start(func, file, line);
290  PUSH_RANGE(fname.c_str(),idx)
291  if (use_global) StartGlobal(func,file,line,idx);
292  }
293 
294 
295  void Stop_(const char *func, const char *file, int line, QudaProfileType idx) {
296  profile[idx].Stop(func, file, line);
297  POP_RANGE
298 
299  // switch off total timer if we need to
300  if (switchOff && idx != QUDA_PROFILE_TOTAL) {
301  profile[QUDA_PROFILE_TOTAL].Stop(func,file,line);
302  switchOff = false;
303  }
304  if (use_global) StopGlobal(func,file,line,idx);
305  }
306 
307  void Reset_(const char *func, const char *file, int line) {
308  for (int idx=0; idx<QUDA_PROFILE_COUNT; idx++)
309  profile[idx].Reset(func, file, line);
310  }
311 
313  return profile[idx].last;
314  }
315 
316  static void PrintGlobal();
317 
318  };
319 
320 #define TPSTART(idx) Start_(__func__, __FILE__, __LINE__, idx)
321 #define TPSTOP(idx) Stop_(__func__, __FILE__, __LINE__, idx)
322 #define TPRESET() Reset_(__func__, __FILE__, __LINE__)
323 
324 #undef PUSH_RANGE
325 #undef POP_RANGE
326 
327 #ifdef PTHREADS
328  const int Nstream = 10;
329 #else
330  const int Nstream = 9;
331 #endif
332 
338 }
339 
340 #endif // _QUDA_INTERNAL_H
void Stop_(const char *func, const char *file, int line, QudaProfileType idx)
QudaPrecision precision
Definition: quda_internal.h:67
static std::string pname[]
void Start_(const char *func, const char *file, int line, QudaProfileType idx)
enum QudaPrecision_s QudaPrecision
cudaDeviceProp deviceProp
__darwin_time_t tv_sec
const void * func
#define errorQuda(...)
Definition: util_quda.h:90
std::complex< double > Complex
Definition: eig_variables.h:13
cudaStream_t * streams
static void StartGlobal(const char *func, const char *file, int line, QudaProfileType idx)
const int Nstream
static int global_total_level[QUDA_PROFILE_COUNT]
void * ParityGauge
Definition: quda_internal.h:62
timeval start
__darwin_suseconds_t tv_usec
double Last(QudaProfileType idx)
size_t bytes
Definition: quda_internal.h:66
QudaInvertParam inv_param
Definition: covdev_test.cpp:37
bool canReuseResidentGauge(QudaInvertParam *inv_param)
Timer profile[QUDA_PROFILE_COUNT]
static void StopGlobal(const char *func, const char *file, int line, QudaProfileType idx)
ParityHw odd
Definition: quda_internal.h:78
void Print()
Definition: timer.cpp:6
#define PUSH_RANGE(name, cid)
void Start(const char *func, const char *file, int line)
QudaProfileType
void Reset_(const char *func, const char *file, int line)
void * data
Definition: quda_internal.h:73
static bool global_switchOff[QUDA_PROFILE_COUNT]
double Last()
static Timer global_profile[QUDA_PROFILE_COUNT]
unsigned int uint32_t
timeval stop
Main header file for the QUDA library.
ParityHw even
Definition: quda_internal.h:79
#define printfQuda(...)
Definition: util_quda.h:84
std::string fname
#define POP_RANGE
TimeProfile(std::string fname)
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
static void PrintGlobal()
Definition: timer.cpp:55
float * dataNorm
Definition: quda_internal.h:74
void Stop(const char *func, const char *file, int line)
void Reset(const char *func, const char *file, int line)
TimeProfile(std::string fname, bool use_global)