QUDA  v0.7.0
A library for QCD on GPUs
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
malloc.cpp
Go to the documentation of this file.
1 #include <cstdlib>
2 #include <cstdio>
3 #include <string>
4 #include <map>
5 #include <unistd.h> // for getpagesize()
6 #include <quda_internal.h>
7 
8 #ifdef USE_QDPJIT
9 #include "qdp_quda.h"
10 #endif
11 
12 namespace quda {
13 
14  enum AllocType {
20  };
21 
22  class MemAlloc {
23 
24  public:
27  int line;
28  size_t size;
29  size_t base_size;
30 
32  : line(-1), size(0), base_size(0) { }
33 
35  : func(func), file(file), line(line), size(0), base_size(0) { }
36 
38  if (&a != this) {
39  func = a.func;
40  file = a.file;
41  line = a.line;
42  size = a.size;
43  base_size = a.base_size;
44  }
45  return *this;
46  }
47  };
48 
49 
50  static std::map<void *, MemAlloc> alloc[N_ALLOC_TYPE];
51  static long total_bytes[N_ALLOC_TYPE] = {0};
52  static long max_total_bytes[N_ALLOC_TYPE] = {0};
53  static long total_host_bytes, max_total_host_bytes;
54  static long total_pinned_bytes, max_total_pinned_bytes;
55 
56  static void print_alloc_header()
57  {
58  printfQuda("Type Pointer Size Location\n");
59  printfQuda("----------------------------------------------------------\n");
60  }
61 
62 
63  static void print_alloc(AllocType type)
64  {
65  const char *type_str[] = {"Device", "Host ", "Pinned", "Mapped"};
66  std::map<void *, MemAlloc>::iterator entry;
67 
68  for (entry = alloc[type].begin(); entry != alloc[type].end(); entry++) {
69  void *ptr = entry->first;
70  MemAlloc a = entry->second;
71  printfQuda("%s %15p %15lu %s(), %s:%d\n", type_str[type], ptr, (unsigned long) a.base_size,
72  a.func.c_str(), a.file.c_str(), a.line);
73  }
74  }
75 
76 
77  static void track_malloc(const AllocType &type, const MemAlloc &a, void *ptr)
78  {
79  total_bytes[type] += a.base_size;
80  if (total_bytes[type] > max_total_bytes[type]) {
81  max_total_bytes[type] = total_bytes[type];
82  }
83  if (type != DEVICE) {
84  total_host_bytes += a.base_size;
85  if (total_host_bytes > max_total_host_bytes) {
86  max_total_host_bytes = total_host_bytes;
87  }
88  }
89  if (type == PINNED || type == MAPPED) {
90  total_pinned_bytes += a.base_size;
91  if (total_pinned_bytes > max_total_pinned_bytes) {
92  max_total_pinned_bytes = total_pinned_bytes;
93  }
94  }
95  alloc[type][ptr] = a;
96  }
97 
98 
99  static void track_free(const AllocType &type, void *ptr)
100  {
101  size_t size = alloc[type][ptr].base_size;
102  total_bytes[type] -= size;
103  if (type != DEVICE) {
104  total_host_bytes -= size;
105  }
106  if (type == PINNED || type == MAPPED) {
107  total_pinned_bytes -= size;
108  }
109  alloc[type].erase(ptr);
110  }
111 
112 
119  static void *aligned_malloc(MemAlloc &a, size_t size)
120  {
121  void *ptr;
122 
123  a.size = size;
124 
125 #if (CUDA_VERSION > 4000)
126  a.base_size = size;
127  ptr = malloc(size);
128 #else
129  static int page_size = getpagesize();
130  a.base_size = ((size + page_size - 1) / page_size) * page_size; // round up to the nearest multiple of page_size
131  posix_memalign(&ptr, page_size, a.base_size);
132 #endif
133  if (!ptr) {
134  printfQuda("ERROR: Failed to allocate aligned host memory (%s:%d in %s())\n", a.file.c_str(), a.line, a.func.c_str());
135  errorQuda("Aborting");
136  }
137  return ptr;
138  }
139 
140 
146  void *device_malloc_(const char *func, const char *file, int line, size_t size)
147  {
148  MemAlloc a(func, file, line);
149  void *ptr;
150 
151  a.size = a.base_size = size;
152 
153  cudaError_t err = cudaMalloc(&ptr, size);
154  if (err != cudaSuccess) {
155  printfQuda("ERROR: Failed to allocate device memory (%s:%d in %s())\n", file, line, func);
156  errorQuda("Aborting");
157  }
158  track_malloc(DEVICE, a, ptr);
159  return ptr;
160  }
161 
162 
168  void *safe_malloc_(const char *func, const char *file, int line, size_t size)
169  {
170  MemAlloc a(func, file, line);
171  a.size = a.base_size = size;
172 
173  void *ptr = malloc(size);
174  if (!ptr) {
175  printfQuda("ERROR: Failed to allocate host memory (%s:%d in %s())\n", file, line, func);
176  errorQuda("Aborting");
177  }
178  track_malloc(HOST, a, ptr);
179  return ptr;
180  }
181 
182 
192  void *pinned_malloc_(const char *func, const char *file, int line, size_t size)
193  {
194  MemAlloc a(func, file, line);
195  void *ptr = aligned_malloc(a, size);
196 
197  cudaError_t err = cudaHostRegister(ptr, a.base_size, cudaHostRegisterDefault);
198  if (err != cudaSuccess) {
199  printfQuda("ERROR: Failed to register pinned memory (%s:%d in %s())\n", file, line, func);
200  errorQuda("Aborting");
201  }
202  track_malloc(PINNED, a, ptr);
203  return ptr;
204  }
205 
206 
212  void *mapped_malloc_(const char *func, const char *file, int line, size_t size)
213  {
214  MemAlloc a(func, file, line);
215  void *ptr = aligned_malloc(a, size);
216 
217  cudaError_t err = cudaHostRegister(ptr, a.base_size, cudaHostRegisterMapped);
218  if (err != cudaSuccess) {
219  printfQuda("ERROR: Failed to register host-mapped memory (%s:%d in %s())\n", file, line, func);
220  errorQuda("Aborting");
221  }
222  track_malloc(MAPPED, a, ptr);
223  return ptr;
224  }
225 
226 
232  void device_free_(const char *func, const char *file, int line, void *ptr)
233  {
234  if (!ptr) {
235  printfQuda("ERROR: Attempt to free NULL device pointer (%s:%d in %s())\n", file, line, func);
236  errorQuda("Aborting");
237  }
238  if (!alloc[DEVICE].count(ptr)) {
239  printfQuda("ERROR: Attempt to free invalid device pointer (%s:%d in %s())\n", file, line, func);
240  errorQuda("Aborting");
241  }
242  cudaError_t err = cudaFree(ptr);
243  if (err != cudaSuccess) {
244  printfQuda("ERROR: Failed to free device memory (%s:%d in %s())\n", file, line, func);
245  errorQuda("Aborting");
246  }
247  track_free(DEVICE, ptr);
248  }
249 
250 
256  void host_free_(const char *func, const char *file, int line, void *ptr)
257  {
258  if (!ptr) {
259  printfQuda("ERROR: Attempt to free NULL host pointer (%s:%d in %s())\n", file, line, func);
260  errorQuda("Aborting");
261  }
262  if (alloc[HOST].count(ptr)) {
263  track_free(HOST, ptr);
264  } else if (alloc[PINNED].count(ptr)) {
265  cudaError_t err = cudaHostUnregister(ptr);
266  if (err != cudaSuccess) {
267  printfQuda("ERROR: Failed to unregister pinned memory (%s:%d in %s())\n", file, line, func);
268  errorQuda("Aborting");
269  }
270  track_free(PINNED, ptr);
271  } else if (alloc[MAPPED].count(ptr)) {
272  cudaError_t err = cudaHostUnregister(ptr);
273  if (err != cudaSuccess) {
274  printfQuda("ERROR: Failed to unregister host-mapped memory (%s:%d in %s())\n", file, line, func);
275  errorQuda("Aborting");
276  }
277  track_free(MAPPED, ptr);
278  } else {
279  printfQuda("ERROR: Attempt to free invalid host pointer (%s:%d in %s())\n", file, line, func);
280  errorQuda("Aborting");
281  }
282  free(ptr);
283  }
284 
285 
287  {
288  printfQuda("Device memory used = %.1f MB\n", max_total_bytes[DEVICE] / (double)(1<<20));
289  printfQuda("Page-locked host memory used = %.1f MB\n", max_total_pinned_bytes / (double)(1<<20));
290  printfQuda("Total host memory used >= %.1f MB\n", max_total_host_bytes / (double)(1<<20));
291  }
292 
293 
295  {
296  if (!alloc[DEVICE].empty() || !alloc[HOST].empty() || !alloc[PINNED].empty() || !alloc[MAPPED].empty()) {
297  warningQuda("The following internal memory allocations were not freed.");
298  printfQuda("\n");
299  print_alloc_header();
300  print_alloc(DEVICE);
301  print_alloc(HOST);
302  print_alloc(PINNED);
303  print_alloc(MAPPED);
304  printfQuda("\n");
305  }
306  }
307 
308 } // namespace quda
std::string file
Definition: malloc.cpp:26
void * safe_malloc_(const char *func, const char *file, int line, size_t size)
Definition: malloc.cpp:168
#define errorQuda(...)
Definition: util_quda.h:73
void * device_malloc_(const char *func, const char *file, int line, size_t size)
Definition: malloc.cpp:146
::std::string string
Definition: gtest.h:1979
AllocType
Definition: malloc.cpp:14
void assertAllMemFree()
Definition: malloc.cpp:294
void device_free_(const char *func, const char *file, int line, void *ptr)
Definition: malloc.cpp:232
MemAlloc & operator=(const MemAlloc &a)
Definition: malloc.cpp:37
std::string func
Definition: malloc.cpp:25
MemAlloc(std::string func, std::string file, int line)
Definition: malloc.cpp:34
void host_free_(const char *func, const char *file, int line, void *ptr)
Definition: malloc.cpp:256
#define warningQuda(...)
Definition: util_quda.h:84
__device__ unsigned int count
Definition: reduce_core.h:112
void * mapped_malloc_(const char *func, const char *file, int line, size_t size)
Definition: malloc.cpp:212
void printPeakMemUsage()
Definition: malloc.cpp:286
#define printfQuda(...)
Definition: util_quda.h:67
void * pinned_malloc_(const char *func, const char *file, int line, size_t size)
Definition: malloc.cpp:192
size_t base_size
Definition: malloc.cpp:29
size_t size
Definition: malloc.cpp:28