QUDA  v0.5.0
A library for QCD on GPUs
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
malloc.cpp
Go to the documentation of this file.
1 #include <cstdlib>
2 #include <cstdio>
3 #include <string>
4 #include <map>
5 #include <unistd.h> // for getpagesize()
6 #include <quda_internal.h>
7 
8 namespace quda {
9 
10  enum AllocType {
16  };
17 
18  class MemAlloc {
19 
20  public:
21  std::string func;
22  std::string file;
23  int line;
24  size_t size;
25  size_t base_size;
26 
28  : line(-1), size(0), base_size(0) { }
29 
30  MemAlloc(std::string func, std::string file, int line)
31  : func(func), file(file), line(line), size(0), base_size(0) { }
32 
34  if (&a != this) {
35  func = a.func;
36  file = a.file;
37  line = a.line;
38  size = a.size;
39  base_size = a.base_size;
40  }
41  return *this;
42  }
43  };
44 
45 
46  static std::map<void *, MemAlloc> alloc[N_ALLOC_TYPE];
47  static long total_bytes[N_ALLOC_TYPE] = {0};
48  static long max_total_bytes[N_ALLOC_TYPE] = {0};
49  static long total_host_bytes, max_total_host_bytes;
50  static long total_pinned_bytes, max_total_pinned_bytes;
51 
52  static void print_alloc_header()
53  {
54  printfQuda("Type Pointer Size Location\n");
55  printfQuda("----------------------------------------------------------\n");
56  }
57 
58 
59  static void print_alloc(AllocType type)
60  {
61  const char *type_str[] = {"Device", "Host ", "Pinned", "Mapped"};
62  std::map<void *, MemAlloc>::iterator entry;
63 
64  for (entry = alloc[type].begin(); entry != alloc[type].end(); entry++) {
65  void *ptr = entry->first;
66  MemAlloc a = entry->second;
67  printfQuda("%s %15p %15lu %s(), %s:%d\n", type_str[type], ptr, (unsigned long) a.base_size,
68  a.func.c_str(), a.file.c_str(), a.line);
69  }
70  }
71 
72 
73  static void track_malloc(const AllocType &type, const MemAlloc &a, void *ptr)
74  {
75  total_bytes[type] += a.base_size;
76  if (total_bytes[type] > max_total_bytes[type]) {
77  max_total_bytes[type] = total_bytes[type];
78  }
79  if (type != DEVICE) {
80  total_host_bytes += a.base_size;
81  if (total_host_bytes > max_total_host_bytes) {
82  max_total_host_bytes = total_host_bytes;
83  }
84  }
85  if (type == PINNED || type == MAPPED) {
86  total_pinned_bytes += a.base_size;
87  if (total_pinned_bytes > max_total_pinned_bytes) {
88  max_total_pinned_bytes = total_pinned_bytes;
89  }
90  }
91  alloc[type][ptr] = a;
92  }
93 
94 
95  static void track_free(const AllocType &type, void *ptr)
96  {
97  size_t size = alloc[type][ptr].base_size;
98  total_bytes[type] -= size;
99  if (type != DEVICE) {
100  total_host_bytes -= size;
101  }
102  if (type == PINNED || type == MAPPED) {
103  total_pinned_bytes -= size;
104  }
105  alloc[type].erase(ptr);
106  }
107 
108 
115  static void *aligned_malloc(MemAlloc &a, size_t size)
116  {
117  void *ptr;
118 
119  a.size = size;
120 
121 #if (CUDA_VERSION > 4000)
122  a.base_size = size;
123  ptr = malloc(size);
124 #else
125  static int page_size = getpagesize();
126  a.base_size = ((size + page_size - 1) / page_size) * page_size; // round up to the nearest multiple of page_size
127  posix_memalign(&ptr, page_size, a.base_size);
128 #endif
129  if (!ptr) {
130  printfQuda("ERROR: Failed to allocate aligned host memory (%s:%d in %s())\n", a.file.c_str(), a.line, a.func.c_str());
131  errorQuda("Aborting");
132  }
133  return ptr;
134  }
135 
136 
142  void *device_malloc_(const char *func, const char *file, int line, size_t size)
143  {
144  MemAlloc a(func, file, line);
145  void *ptr;
146 
147  a.size = a.base_size = size;
148 
149  cudaError_t err = cudaMalloc(&ptr, size);
150  if (err != cudaSuccess) {
151  printfQuda("ERROR: Failed to allocate device memory (%s:%d in %s())\n", file, line, func);
152  errorQuda("Aborting");
153  }
154  track_malloc(DEVICE, a, ptr);
155  return ptr;
156  }
157 
158 
164  void *safe_malloc_(const char *func, const char *file, int line, size_t size)
165  {
166  MemAlloc a(func, file, line);
167  a.size = a.base_size = size;
168 
169  void *ptr = malloc(size);
170  if (!ptr) {
171  printfQuda("ERROR: Failed to allocate host memory (%s:%d in %s())\n", file, line, func);
172  errorQuda("Aborting");
173  }
174  track_malloc(HOST, a, ptr);
175  return ptr;
176  }
177 
178 
188  void *pinned_malloc_(const char *func, const char *file, int line, size_t size)
189  {
190  MemAlloc a(func, file, line);
191  void *ptr = aligned_malloc(a, size);
192 
193  cudaError_t err = cudaHostRegister(ptr, a.base_size, cudaHostRegisterDefault);
194  if (err != cudaSuccess) {
195  printfQuda("ERROR: Failed to register pinned memory (%s:%d in %s())\n", file, line, func);
196  errorQuda("Aborting");
197  }
198  track_malloc(PINNED, a, ptr);
199  return ptr;
200  }
201 
202 
208  void *mapped_malloc_(const char *func, const char *file, int line, size_t size)
209  {
210  MemAlloc a(func, file, line);
211  void *ptr = aligned_malloc(a, size);
212 
213  cudaError_t err = cudaHostRegister(ptr, a.base_size, cudaHostRegisterMapped);
214  if (err != cudaSuccess) {
215  printfQuda("ERROR: Failed to register host-mapped memory (%s:%d in %s())\n", file, line, func);
216  errorQuda("Aborting");
217  }
218  track_malloc(MAPPED, a, ptr);
219  return ptr;
220  }
221 
222 
228  void device_free_(const char *func, const char *file, int line, void *ptr)
229  {
230  if (!ptr) {
231  printfQuda("ERROR: Attempt to free NULL device pointer (%s:%d in %s())\n", file, line, func);
232  errorQuda("Aborting");
233  }
234  if (!alloc[DEVICE].count(ptr)) {
235  printfQuda("ERROR: Attempt to free invalid device pointer (%s:%d in %s())\n", file, line, func);
236  errorQuda("Aborting");
237  }
238  cudaError_t err = cudaFree(ptr);
239  if (err != cudaSuccess) {
240  printfQuda("ERROR: Failed to free device memory (%s:%d in %s())\n", file, line, func);
241  errorQuda("Aborting");
242  }
243  track_free(DEVICE, ptr);
244  }
245 
246 
252  void host_free_(const char *func, const char *file, int line, void *ptr)
253  {
254  if (!ptr) {
255  printfQuda("ERROR: Attempt to free NULL host pointer (%s:%d in %s())\n", file, line, func);
256  errorQuda("Aborting");
257  }
258  if (alloc[HOST].count(ptr)) {
259  track_free(HOST, ptr);
260  } else if (alloc[PINNED].count(ptr)) {
261  cudaError_t err = cudaHostUnregister(ptr);
262  if (err != cudaSuccess) {
263  printfQuda("ERROR: Failed to unregister pinned memory (%s:%d in %s())\n", file, line, func);
264  errorQuda("Aborting");
265  }
266  track_free(PINNED, ptr);
267  } else if (alloc[MAPPED].count(ptr)) {
268  cudaError_t err = cudaHostUnregister(ptr);
269  if (err != cudaSuccess) {
270  printfQuda("ERROR: Failed to unregister host-mapped memory (%s:%d in %s())\n", file, line, func);
271  errorQuda("Aborting");
272  }
273  track_free(MAPPED, ptr);
274  } else {
275  printfQuda("ERROR: Attempt to free invalid host pointer (%s:%d in %s())\n", file, line, func);
276  errorQuda("Aborting");
277  }
278  free(ptr);
279  }
280 
281 
283  {
284  printfQuda("Device memory used = %.1f MB\n", max_total_bytes[DEVICE] / (double)(1<<20));
285  printfQuda("Page-locked host memory used = %.1f MB\n", max_total_pinned_bytes / (double)(1<<20));
286  printfQuda("Total host memory used >= %.1f MB\n", max_total_host_bytes / (double)(1<<20));
287  }
288 
289 
291  {
292  if (!alloc[DEVICE].empty() || !alloc[HOST].empty() || !alloc[PINNED].empty() || !alloc[MAPPED].empty()) {
293  warningQuda("The following internal memory allocations were not freed.");
294  printfQuda("\n");
295  print_alloc_header();
296  print_alloc(DEVICE);
297  print_alloc(HOST);
298  print_alloc(PINNED);
299  print_alloc(MAPPED);
300  printfQuda("\n");
301  }
302  }
303 
304 } // namespace quda