QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
quda_internal.h
Go to the documentation of this file.
1 #ifndef _QUDA_INTERNAL_H
2 #define _QUDA_INTERNAL_H
3 
4 #include <cuda.h>
5 #include <cuda_runtime.h>
6 #include <sys/time.h>
7 #include <string>
8 #include <complex>
9 
10 #if ((defined(QMP_COMMS) || defined(MPI_COMMS)) && !defined(MULTI_GPU))
11 #error "MULTI_GPU must be enabled to use MPI or QMP"
12 #endif
13 
14 #if (!defined(QMP_COMMS) && !defined(MPI_COMMS) && defined(MULTI_GPU))
15 #error "MPI or QMP must be enabled to use MULTI_GPU"
16 #endif
17 
18 //#ifdef USE_QDPJIT
19 //#include "qdp_quda.h"
20 //#endif
21 
22 #ifdef QMP_COMMS
23 #include <qmp.h>
24 #endif
25 
26 #ifdef PTHREADS
27 #include <pthread.h>
28 #endif
29 
30 #define MAX_SHORT 32767.0f
31 
32 #define TEX_ALIGN_REQ (512*2) //Fermi, factor 2 comes from even/odd
33 #define ALIGNMENT_ADJUST(n) ( (n+TEX_ALIGN_REQ-1)/TEX_ALIGN_REQ*TEX_ALIGN_REQ)
34 #include <enum_quda.h>
35 #include <quda.h>
36 #include <util_quda.h>
37 #include <malloc_quda.h>
38 
39 #include <vector>
40 
41 // Use bindless texture on Kepler
42 #if (__COMPUTE_CAPABILITY__ >= 300) && (CUDA_VERSION >= 5000)
43 #define USE_TEXTURE_OBJECTS
44 #endif
45 
46 
47 #ifdef __cplusplus
48 extern "C" {
49 #endif
50 
51  typedef void *ParityGauge;
52 
53  // replace below with ColorSpinorField
54  typedef struct {
55  size_t bytes;
57  int length; // total length
58  int volume; // geometric volume (single parity)
59  int X[QUDA_MAX_DIM]; // the geometric lengths (single parity)
60  int Nc; // length of color dimension
61  int Ns; // length of spin dimension
62  void *data; // either (double2*), (float4 *) or (short4 *), depending on precision
63  float *dataNorm; // used only when precision is QUDA_HALF_PRECISION
64  } ParityHw;
65 
66  typedef struct {
69  } FullHw;
70 
72  void *field;
73  };
74 
75  extern cudaDeviceProp deviceProp;
76  extern cudaStream_t *streams;
77 
78 #ifdef PTHREADS
79  extern pthread_mutex_t pthread_mutex;
80 #endif
81 
82 #ifdef __cplusplus
83 }
84 #endif
85 
86 #define REAL(a) (*((double*)&a))
87 #define IMAG(a) (*((double*)&a+1))
88 
89 namespace quda {
90 
91  typedef std::complex<double> Complex;
92 
98  struct Timer {
100  double time;
101 
103  double last;
104 
106  timeval start;
107 
109  timeval stop;
110 
112  bool running;
113 
115  int count;
116 
117  Timer() : time(0.0), last(0.0), running(false), count(0) { ; }
118 
119  void Start() {
120  if (running) errorQuda("Cannot start an already running timer");
121  gettimeofday(&start, NULL);
122  running = true;
123  }
124 
125  void Stop() {
126  if (!running) errorQuda("Cannot stop an unstarted timer");
127  gettimeofday(&stop, NULL);
128 
129  long ds = stop.tv_sec - start.tv_sec;
130  long dus = stop.tv_usec - start.tv_usec;
131  last = ds + 0.000001*dus;
132  time += last;
133  count++;
134 
135  running = false;
136  }
137 
138  double Last() { return last; }
139 
140  };
141 
152  // lower level counters used in the dslash
169  };
170 
171  struct TimeProfile {
175  static std::string pname[];
176 
177  bool switchOff;
178 
179  TimeProfile(std::string fname) : fname(fname), switchOff(false) { ; }
180 
182  void Print();
183 
185  // if total timer isn't running, then start it running
186  if (!profile[QUDA_PROFILE_TOTAL].running && idx != QUDA_PROFILE_TOTAL) {
188  switchOff = true;
189  }
190 
191  profile[idx].Start();
192  }
193 
195  profile[idx].Stop();
196 
197  // switch off total timer if we need to
198  if (switchOff && idx != QUDA_PROFILE_TOTAL) {
200  switchOff = false;
201  }
202  }
203 
205  return profile[idx].last;
206  }
207 
208  };
209 
210 #ifdef MULTI_GPU
211 #ifdef PTHREADS
212  const int Nstream = 10;
213 #else
214  const int Nstream = 9;
215 #endif
216 #else
217  const int Nstream = 1;
218 #endif
219 
220 } // namespace quda
221 
222 #endif // _QUDA_INTERNAL_H
QudaPrecision precision
Definition: quda_internal.h:56
enum QudaPrecision_s QudaPrecision
static std::string pname[]
cudaDeviceProp deviceProp
void Print()
Definition: timer.cpp:6
#define errorQuda(...)
Definition: util_quda.h:73
std::complex< double > Complex
Definition: eig_variables.h:13
cudaStream_t * streams
::std::string string
Definition: gtest.h:1979
const int Nstream
void * ParityGauge
Definition: quda_internal.h:51
timeval start
size_t bytes
Definition: quda_internal.h:55
ParityHw odd
Definition: quda_internal.h:67
TimeProfile(std::string fname)
std::string fname
QudaProfileType
void * data
Definition: quda_internal.h:62
double Last()
timeval stop
void Stop(QudaProfileType idx)
Main header file for the QUDA library.
ParityHw even
Definition: quda_internal.h:68
double Last(QudaProfileType idx)
void Start(QudaProfileType idx)
#define QUDA_MAX_DIM
Maximum number of dimensions supported by QUDA. In practice, no routines make use of more than 5...
float * dataNorm
Definition: quda_internal.h:63
Timer profile[QUDA_PROFILE_COUNT]