QUDA  v1.1.0
A library for QCD on GPUs
malloc.cpp
Go to the documentation of this file.
1 #include <cstdlib>
2 #include <cstdio>
3 #include <string>
4 #include <map>
5 #include <unistd.h> // for getpagesize()
6 #include <execinfo.h> // for backtrace
7 #include <quda_internal.h>
8 #include <shmem_helper.cuh>
9 
10 #ifdef USE_QDPJIT
11 #include "qdp_quda.h"
12 #include "qdp_config.h"
13 #endif
14 
15 #ifdef QUDA_BACKWARDSCPP
16 #include "backward.hpp"
17 #endif
18 
19 namespace quda
20 {
21 
23 
24  class MemAlloc
25  {
26 
27  public:
30  int line;
31  size_t size;
32  size_t base_size;
33 #ifdef QUDA_BACKWARDSCPP
34  backward::StackTrace st;
35 #endif
36 
37  MemAlloc() : line(-1), size(0), base_size(0) {}
38 
40  {
41 #ifdef QUDA_BACKWARDSCPP
42  st.load_here(32);
43  st.skip_n_firsts(1);
44 #endif
45  }
46 
48  {
49  if (&a != this) {
50  func = a.func;
51  file = a.file;
52  line = a.line;
53  size = a.size;
54  base_size = a.base_size;
55 #ifdef QUDA_BACKWARDSCPP
56  st = a.st;
57 #endif
58  }
59  return *this;
60  }
61  };
62 
63  static std::map<void *, MemAlloc> alloc[N_ALLOC_TYPE];
64  static size_t total_bytes[N_ALLOC_TYPE] = {0};
65  static size_t max_total_bytes[N_ALLOC_TYPE] = {0};
66  static size_t total_host_bytes, max_total_host_bytes;
67  static size_t total_pinned_bytes, max_total_pinned_bytes;
68 
69  size_t device_allocated() { return total_bytes[DEVICE]; }
70 
71  size_t pinned_allocated() { return total_bytes[PINNED]; }
72 
73  size_t mapped_allocated() { return total_bytes[MAPPED]; }
74 
75  size_t managed_allocated() { return total_bytes[MANAGED]; }
76 
77  size_t host_allocated() { return total_bytes[HOST]; }
78 
79  size_t device_allocated_peak() { return max_total_bytes[DEVICE]; }
80 
81  size_t pinned_allocated_peak() { return max_total_bytes[PINNED]; }
82 
83  size_t mapped_allocated_peak() { return max_total_bytes[MAPPED]; }
84 
85  size_t managed_allocated_peak() { return max_total_bytes[MANAGED]; }
86 
87  size_t host_allocated_peak() { return max_total_bytes[HOST]; }
88 
89  static void print_trace(void)
90  {
91  void *array[10];
92  size_t size;
93  char **strings;
94  size = backtrace(array, 10);
95  strings = backtrace_symbols(array, size);
96  printfQuda("Obtained %zd stack frames.\n", size);
97  for (size_t i = 0; i < size; i++) printfQuda("%s\n", strings[i]);
98  free(strings);
99  }
100 
101  static void print_alloc_header()
102  {
103  printfQuda("Type Pointer Size Location\n");
104  printfQuda("----------------------------------------------------------\n");
105  }
106 
107  static void print_alloc(AllocType type)
108  {
109  const char *type_str[] = {"Device", "Device Pinned", "Host ", "Pinned", "Mapped", "Managed", "Shmem "};
110 
111  for (auto entry : alloc[type]) {
112  void *ptr = entry.first;
113  MemAlloc a = entry.second;
114  printfQuda("%s %15p %15lu %s(), %s:%d\n", type_str[type], ptr, (unsigned long)a.base_size, a.func.c_str(),
115  a.file.c_str(), a.line);
116 #ifdef QUDA_BACKWARDSCPP
117  if (getRankVerbosity()) {
118  backward::Printer p;
119  p.print(a.st);
120  }
121 #endif
122  }
123  }
124 
125  static void track_malloc(const AllocType &type, const MemAlloc &a, void *ptr)
126  {
127  total_bytes[type] += a.base_size;
128  if (total_bytes[type] > max_total_bytes[type]) { max_total_bytes[type] = total_bytes[type]; }
129  if (type != DEVICE && type != DEVICE_PINNED && type != SHMEM) {
130  total_host_bytes += a.base_size;
131  if (total_host_bytes > max_total_host_bytes) { max_total_host_bytes = total_host_bytes; }
132  }
133  if (type == PINNED || type == MAPPED) {
134  total_pinned_bytes += a.base_size;
135  if (total_pinned_bytes > max_total_pinned_bytes) { max_total_pinned_bytes = total_pinned_bytes; }
136  }
137  alloc[type][ptr] = a;
138  }
139 
140  static void track_free(const AllocType &type, void *ptr)
141  {
142  size_t size = alloc[type][ptr].base_size;
143  total_bytes[type] -= size;
144  if (type != DEVICE && type != DEVICE_PINNED && type != SHMEM) { total_host_bytes -= size; }
145  if (type == PINNED || type == MAPPED) { total_pinned_bytes -= size; }
146  alloc[type].erase(ptr);
147  }
148 
155  static void *aligned_malloc(MemAlloc &a, size_t size)
156  {
157  void *ptr = nullptr;
158 
159  a.size = size;
160 
161 #if (CUDA_VERSION > 4000) \
162  && 0 // we need to manually align to page boundaries to allow us to bind a texture to mapped memory
163  a.base_size = size;
164  ptr = malloc(size);
165  if (!ptr) {
166 #else
167  static int page_size = 2 * getpagesize();
168  a.base_size = ((size + page_size - 1) / page_size) * page_size; // round up to the nearest multiple of page_size
169  int align = posix_memalign(&ptr, page_size, a.base_size);
170  if (!ptr || align != 0) {
171 #endif
172  errorQuda("Failed to allocate aligned host memory of size %zu (%s:%d in %s())\n", size, a.file.c_str(), a.line,
173  a.func.c_str());
174  }
175  return ptr;
176  }
177 
179  {
180  static bool managed = false;
181  static bool init = false;
182 
183  if (!init) {
184  char *enable_managed_memory = getenv("QUDA_ENABLE_MANAGED_MEMORY");
185  if (enable_managed_memory && strcmp(enable_managed_memory, "1") == 0) {
186  warningQuda("Using managed memory for CUDA allocations");
187  managed = true;
188 
189  if (deviceProp.major < 6) warningQuda("Using managed memory on pre-Pascal architecture is limited");
190  }
191 
192  init = true;
193  }
194 
195  return managed;
196  }
197 
199  {
200  static bool prefetch = false;
201  static bool init = false;
202 
203  if (!init) {
204  if (use_managed_memory()) {
205  char *enable_managed_prefetch = getenv("QUDA_ENABLE_MANAGED_PREFETCH");
206  if (enable_managed_prefetch && strcmp(enable_managed_prefetch, "1") == 0) {
207  warningQuda("Enabling prefetch support for managed memory");
208  prefetch = true;
209  }
210  }
211 
212  init = true;
213  }
214 
215  return prefetch;
216  }
217 
223  void *device_malloc_(const char *func, const char *file, int line, size_t size)
224  {
225  if (use_managed_memory()) return managed_malloc_(func, file, line, size);
226 
227 #ifndef QDP_USE_CUDA_MANAGED_MEMORY
228  MemAlloc a(func, file, line);
229  void *ptr;
230 
231  a.size = a.base_size = size;
232 
233  cudaError_t err = cudaMalloc(&ptr, size);
234  if (err != cudaSuccess) {
235  errorQuda("Failed to allocate device memory of size %zu (%s:%d in %s())\n", size, file, line, func);
236  }
237  track_malloc(DEVICE, a, ptr);
238 #ifdef HOST_DEBUG
239  qudaMemset(ptr, 0xff, size);
240 #endif
241  return ptr;
242 #else
243  // when QDO uses managed memory we can bypass the QDP memory manager
244  return device_pinned_malloc_(func, file, line, size);
245 #endif
246  }
247 
255  void *device_pinned_malloc_(const char *func, const char *file, int line, size_t size)
256  {
257  if (!comm_peer2peer_present()) return device_malloc_(func, file, line, size);
258 
259  MemAlloc a(func, file, line);
260  void *ptr;
261 
262  a.size = a.base_size = size;
263 
264  CUresult err = cuMemAlloc((CUdeviceptr *)&ptr, size);
265  if (err != CUDA_SUCCESS) {
266  errorQuda("Failed to allocate device memory of size %zu (%s:%d in %s())\n", size, file, line, func);
267  }
268  track_malloc(DEVICE_PINNED, a, ptr);
269 #ifdef HOST_DEBUG
270  qudaMemset(ptr, 0xff, size);
271 #endif
272  return ptr;
273  }
274 
280  void *safe_malloc_(const char *func, const char *file, int line, size_t size)
281  {
282  MemAlloc a(func, file, line);
283  a.size = a.base_size = size;
284 
285  void *ptr = malloc(size);
286  if (!ptr) { errorQuda("Failed to allocate host memory of size %zu (%s:%d in %s())\n", size, file, line, func); }
287  track_malloc(HOST, a, ptr);
288 #ifdef HOST_DEBUG
289  memset(ptr, 0xff, size);
290 #endif
291  return ptr;
292  }
293 
303  void *pinned_malloc_(const char *func, const char *file, int line, size_t size)
304  {
305  MemAlloc a(func, file, line);
306  void *ptr = aligned_malloc(a, size);
307 
308  cudaError_t err = cudaHostRegister(ptr, a.base_size, cudaHostRegisterDefault);
309  if (err != cudaSuccess) {
310  errorQuda("Failed to register pinned memory of size %zu (%s:%d in %s())\n", size, file, line, func);
311  }
312  track_malloc(PINNED, a, ptr);
313 #ifdef HOST_DEBUG
314  memset(ptr, 0xff, a.base_size);
315 #endif
316  return ptr;
317  }
318 
324  void *mapped_malloc_(const char *func, const char *file, int line, size_t size)
325  {
326  MemAlloc a(func, file, line);
327 
328 #if 0
329  void *ptr;
330  static int page_size = 2*getpagesize();
331  a.base_size = ((size + page_size - 1) / page_size) * page_size; // round up to the nearest multiple of page_size
332  a.size = size;
333  cudaError_t err = cudaHostAlloc(&ptr, a.base_size, cudaHostAllocMapped | cudaHostAllocPortable);
334  if (err != cudaSuccess) {
335  errorQuda("cudaHostAlloc failed of size %zu (%s:%d in %s())\n", size, file, line, func); }
336  }
337 #else
338  void *ptr = aligned_malloc(a, size);
339  cudaError_t err = cudaHostRegister(ptr, a.base_size, cudaHostRegisterMapped | cudaHostRegisterPortable);
340  if (err != cudaSuccess) {
341  errorQuda("Failed to register host-mapped memory of size %zu (%s:%d in %s())\n", size, file, line, func);
342  }
343 #endif
344  track_malloc(MAPPED, a, ptr);
345 #ifdef HOST_DEBUG
346  memset(ptr, 0xff, a.base_size);
347 #endif
348  return ptr;
349  }
350 
356  void *managed_malloc_(const char *func, const char *file, int line, size_t size)
357  {
358  MemAlloc a(func, file, line);
359  void *ptr;
360 
361  a.size = a.base_size = size;
362 
363  cudaError_t err = cudaMallocManaged(&ptr, size);
364  if (err != cudaSuccess) {
365  errorQuda("Failed to allocate managed memory of size %zu (%s:%d in %s())\n", size, file, line, func);
366  }
367  track_malloc(MANAGED, a, ptr);
368 #ifdef HOST_DEBUG
369  qudaMemset(ptr, 0xff, size);
370 #endif
371  return ptr;
372  }
377 #ifdef NVSHMEM_COMMS
378  void *shmem_malloc_(const char *func, const char *file, int line, size_t size)
379  {
380  MemAlloc a(func, file, line);
381 
382  a.size = a.base_size = size;
383 
384  auto ptr = nvshmem_malloc(size);
385  if (ptr == nullptr) {
386  printfQuda("ERROR: Failed to allocate shmem memory of size %zu (%s:%d in %s())\n", size, file, line, func);
387  errorQuda("Aborting");
388  }
389  track_malloc(SHMEM, a, ptr);
390 #ifdef HOST_DEBUG
391  qudaMemset(ptr, 0xff, size);
392 #endif
393  return ptr;
394  }
395 #endif
396 
401  void *device_comms_pinned_malloc_(const char *func, const char *file, int line, size_t size)
402  {
403 #ifdef NVSHMEM_COMMS
404  return shmem_malloc_(func, file, line, size);
405 #else
406  return device_pinned_malloc_(func, file, line, size);
407 #endif
408  }
409 
415  void device_free_(const char *func, const char *file, int line, void *ptr)
416  {
417  if (use_managed_memory()) {
418  managed_free_(func, file, line, ptr);
419  return;
420  }
421 
422 #ifndef QDP_USE_CUDA_MANAGED_MEMORY
423  if (!ptr) { errorQuda("Attempt to free NULL device pointer (%s:%d in %s())\n", file, line, func); }
424  if (!alloc[DEVICE].count(ptr)) {
425  errorQuda("Attempt to free invalid device pointer (%s:%d in %s())\n", file, line, func);
426  }
427  cudaError_t err = cudaFree(ptr);
428  if (err != cudaSuccess) { errorQuda("Failed to free device memory (%s:%d in %s())\n", file, line, func); }
429  track_free(DEVICE, ptr);
430 #else
431  device_pinned_free_(func, file, line, ptr);
432 #endif
433  }
434 
440  void device_pinned_free_(const char *func, const char *file, int line, void *ptr)
441  {
442  if (!comm_peer2peer_present()) {
443  device_free_(func, file, line, ptr);
444  return;
445  }
446 
447  if (!ptr) { errorQuda("Attempt to free NULL device pointer (%s:%d in %s())\n", file, line, func); }
448  if (!alloc[DEVICE_PINNED].count(ptr)) {
449  errorQuda("Attempt to free invalid device pointer (%s:%d in %s())\n", file, line, func);
450  }
451  CUresult err = cuMemFree((CUdeviceptr)ptr);
452  if (err != CUDA_SUCCESS) { printfQuda("Failed to free device memory (%s:%d in %s())\n", file, line, func); }
453  track_free(DEVICE_PINNED, ptr);
454  }
455 
461  void managed_free_(const char *func, const char *file, int line, void *ptr)
462  {
463  if (!ptr) { errorQuda("Attempt to free NULL managed pointer (%s:%d in %s())\n", file, line, func); }
464  if (!alloc[MANAGED].count(ptr)) {
465  errorQuda("Attempt to free invalid managed pointer (%s:%d in %s())\n", file, line, func);
466  }
467  cudaError_t err = cudaFree(ptr);
468  if (err != cudaSuccess) { errorQuda("Failed to free device memory (%s:%d in %s())\n", file, line, func); }
469  track_free(MANAGED, ptr);
470  }
471 
477  void host_free_(const char *func, const char *file, int line, void *ptr)
478  {
479  if (!ptr) { errorQuda("Attempt to free NULL host pointer (%s:%d in %s())\n", file, line, func); }
480  if (alloc[HOST].count(ptr)) {
481  track_free(HOST, ptr);
482  free(ptr);
483  } else if (alloc[PINNED].count(ptr)) {
484  cudaError_t err = cudaHostUnregister(ptr);
485  if (err != cudaSuccess) { errorQuda("Failed to unregister pinned memory (%s:%d in %s())\n", file, line, func); }
486  track_free(PINNED, ptr);
487  free(ptr);
488  } else if (alloc[MAPPED].count(ptr)) {
489 #ifdef HOST_ALLOC
490  cudaError_t err = cudaFreeHost(ptr);
491  if (err != cudaSuccess) { errorQuda("Failed to free host memory (%s:%d in %s())\n", file, line, func); }
492 #else
493  cudaError_t err = cudaHostUnregister(ptr);
494  if (err != cudaSuccess) {
495  errorQuda("Failed to unregister host-mapped memory (%s:%d in %s())\n", file, line, func);
496  }
497  free(ptr);
498 #endif
499  track_free(MAPPED, ptr);
500  } else {
501  printfQuda("ERROR: Attempt to free invalid host pointer (%s:%d in %s())\n", file, line, func);
502  print_trace();
503  errorQuda("Aborting");
504  }
505  }
506 
507 #ifdef NVSHMEM_COMMS
511  void shmem_free_(const char *func, const char *file, int line, void *ptr)
512  {
513  if (!ptr) {
514  printfQuda("ERROR: Attempt to free NULL shmem pointer (%s:%d in %s())\n", file, line, func);
515  errorQuda("Aborting");
516  }
517  if (!alloc[SHMEM].count(ptr)) {
518  printfQuda("ERROR: Attempt to free invalid shmem pointer (%s:%d in %s())\n", file, line, func);
519  errorQuda("Aborting");
520  }
521  nvshmem_free(ptr);
522  track_free(SHMEM, ptr);
523  }
524 #endif
525 
530  void device_comms_pinned_free_(const char *func, const char *file, int line, void *ptr)
531  {
532 #ifdef NVSHMEM_COMMS
533  shmem_free_(func, file, line, ptr);
534 #else
535  device_pinned_free_(func, file, line, ptr);
536 #endif
537  }
538 
540  {
541  printfQuda("Device memory used = %.1f MiB\n", max_total_bytes[DEVICE] / (double)(1 << 20));
542  printfQuda("Pinned device memory used = %.1f MiB\n", max_total_bytes[DEVICE_PINNED] / (double)(1 << 20));
543  printfQuda("Managed memory used = %.1f MiB\n", max_total_bytes[MANAGED] / (double)(1 << 20));
544  printfQuda("Shmem memory used = %.1f MiB\n", max_total_bytes[SHMEM] / (double)(1 << 20));
545  printfQuda("Page-locked host memory used = %.1f MiB\n", max_total_pinned_bytes / (double)(1 << 20));
546  printfQuda("Total host memory used >= %.1f MiB\n", max_total_host_bytes / (double)(1 << 20));
547  }
548 
550  {
551  if (!alloc[DEVICE].empty() || !alloc[DEVICE_PINNED].empty() || !alloc[HOST].empty() || !alloc[PINNED].empty()
552  || !alloc[MAPPED].empty()) {
553  warningQuda("The following internal memory allocations were not freed.");
554  printfQuda("\n");
555  print_alloc_header();
556  print_alloc(DEVICE);
557  print_alloc(DEVICE_PINNED);
558  print_alloc(SHMEM);
559  print_alloc(HOST);
560  print_alloc(PINNED);
561  print_alloc(MAPPED);
562  printfQuda("\n");
563  }
564  }
565 
567  {
568 
569  CUpointer_attribute attribute[] = {CU_POINTER_ATTRIBUTE_MEMORY_TYPE};
570  CUmemorytype mem_type;
571  void *data[] = {&mem_type};
572  CUresult error = cuPointerGetAttributes(1, attribute, data, reinterpret_cast<CUdeviceptr>(ptr));
573  if (error != CUDA_SUCCESS) {
574  const char *string;
575  cuGetErrorString(error, &string);
576  errorQuda("cuPointerGetAttributes failed with error %s", string);
577  }
578 
579  // catch pointers that have not been created in CUDA
580  if (mem_type == 0) mem_type = CU_MEMORYTYPE_HOST;
581 
582  switch (mem_type) {
583  case CU_MEMORYTYPE_DEVICE:
584  case CU_MEMORYTYPE_UNIFIED: return QUDA_CUDA_FIELD_LOCATION;
585  case CU_MEMORYTYPE_HOST: return QUDA_CPU_FIELD_LOCATION;
586  default: errorQuda("Unknown memory type %d", mem_type); return QUDA_INVALID_FIELD_LOCATION;
587  }
588  }
589 
590  void *get_mapped_device_pointer_(const char *func, const char *file, int line, const void *host)
591  {
592  void *device;
593  auto error = cudaHostGetDevicePointer(&device, const_cast<void *>(host), 0);
594  if (error != cudaSuccess) {
595  errorQuda("cudaHostGetDevicePointer failed with error %s (%s:%d in %s()", cudaGetErrorString(error), file, line,
596  func);
597  }
598  return device;
599  }
600 
601  namespace pool
602  {
603 
607  static std::multimap<size_t, void *> pinnedCache;
608 
612  static std::map<void *, size_t> pinnedSize;
613 
617  static std::multimap<size_t, void *> deviceCache;
618 
622  static std::map<void *, size_t> deviceSize;
623 
624  static bool pool_init = false;
625 
627  static bool device_memory_pool = true;
628 
630  static bool pinned_memory_pool = true;
631 
632  void init()
633  {
634  if (!pool_init) {
635  // device memory pool
636  char *enable_device_pool = getenv("QUDA_ENABLE_DEVICE_MEMORY_POOL");
637  if (!enable_device_pool || strcmp(enable_device_pool, "0") != 0) {
638  warningQuda("Using device memory pool allocator");
639  device_memory_pool = true;
640  } else {
641  warningQuda("Not using device memory pool allocator");
642  device_memory_pool = false;
643  }
644 
645  // pinned memory pool
646  char *enable_pinned_pool = getenv("QUDA_ENABLE_PINNED_MEMORY_POOL");
647  if (!enable_pinned_pool || strcmp(enable_pinned_pool, "0") != 0) {
648  warningQuda("Using pinned memory pool allocator");
649  pinned_memory_pool = true;
650  } else {
651  warningQuda("Not using pinned memory pool allocator");
652  pinned_memory_pool = false;
653  }
654  pool_init = true;
655  }
656 #if defined(NVSHMEM_COMMS)
657  MPI_Comm tmp = MPI_COMM_WORLD;
658  warningQuda("Init NVSHMEM");
659  nvshmemx_init_attr_t attr;
660  attr.mpi_comm = &tmp;
661  nvshmemx_init_attr(NVSHMEMX_INIT_WITH_MPI_COMM, &attr);
662 #endif
663  }
664 
665  void *pinned_malloc_(const char *func, const char *file, int line, size_t nbytes)
666  {
667  void *ptr = nullptr;
668  if (pinned_memory_pool) {
669  if (pinnedCache.empty()) {
670  ptr = quda::pinned_malloc_(func, file, line, nbytes);
671  } else {
672  auto it = pinnedCache.lower_bound(nbytes);
673  if (it != pinnedCache.end()) { // sufficiently large allocation found
674  nbytes = it->first;
675  ptr = it->second;
676  pinnedCache.erase(it);
677  } else { // sacrifice the smallest cached allocation
678  it = pinnedCache.begin();
679  ptr = it->second;
680  pinnedCache.erase(it);
681  host_free(ptr);
682  ptr = quda::pinned_malloc_(func, file, line, nbytes);
683  }
684  }
685  pinnedSize[ptr] = nbytes;
686  } else {
687  ptr = quda::pinned_malloc_(func, file, line, nbytes);
688  }
689  return ptr;
690  }
691 
692  void pinned_free_(const char *func, const char *file, int line, void *ptr)
693  {
694  if (pinned_memory_pool) {
695  if (!pinnedSize.count(ptr)) { errorQuda("Attempt to free invalid pointer"); }
696  pinnedCache.insert(std::make_pair(pinnedSize[ptr], ptr));
697  pinnedSize.erase(ptr);
698  } else {
699  quda::host_free_(func, file, line, ptr);
700  }
701  }
702 
703  void *device_malloc_(const char *func, const char *file, int line, size_t nbytes)
704  {
705  void *ptr = nullptr;
706  if (device_memory_pool) {
707  if (deviceCache.empty()) {
708  ptr = quda::device_malloc_(func, file, line, nbytes);
709  } else {
710  auto it = deviceCache.lower_bound(nbytes);
711  if (it != deviceCache.end()) { // sufficiently large allocation found
712  nbytes = it->first;
713  ptr = it->second;
714  deviceCache.erase(it);
715  } else { // sacrifice the smallest cached allocation
716  it = deviceCache.begin();
717  ptr = it->second;
718  deviceCache.erase(it);
719  quda::device_free_(func, file, line, ptr);
720  ptr = quda::device_malloc_(func, file, line, nbytes);
721  }
722  }
723  deviceSize[ptr] = nbytes;
724  } else {
725  ptr = quda::device_malloc_(func, file, line, nbytes);
726  }
727  return ptr;
728  }
729 
730  void device_free_(const char *func, const char *file, int line, void *ptr)
731  {
732  if (device_memory_pool) {
733  if (!deviceSize.count(ptr)) { errorQuda("Attempt to free invalid pointer"); }
734  deviceCache.insert(std::make_pair(deviceSize[ptr], ptr));
735  deviceSize.erase(ptr);
736  } else {
737  quda::device_free_(func, file, line, ptr);
738  }
739  }
740 
741 #ifdef NVSHMEM_COMMS
742  void *shmem_malloc_(const char *func, const char *file, int line, size_t nbytes)
743  {
744  return quda::shmem_malloc_(func, file, line, nbytes);
745  }
746 
747  void shmem_free_(const char *func, const char *file, int line, void *ptr)
748  {
749  quda::shmem_free_(func, file, line, ptr);
750  }
751 #endif
752 
754  {
755  if (pinned_memory_pool) {
756  for (auto it : pinnedCache) { host_free(it.second); }
757  pinnedCache.clear();
758  }
759  }
760 
762  {
763  if (device_memory_pool) {
764  for (auto it : deviceCache) { device_free(it.second); }
765  deviceCache.clear();
766  }
767  }
768 
769  } // namespace pool
770 
771 } // namespace quda
std::string func
Definition: malloc.cpp:28
std::string file
Definition: malloc.cpp:29
MemAlloc(std::string func, std::string file, int line)
Definition: malloc.cpp:39
MemAlloc & operator=(const MemAlloc &a)
Definition: malloc.cpp:47
size_t size
Definition: malloc.cpp:31
size_t base_size
Definition: malloc.cpp:32
bool comm_peer2peer_present()
Returns true if any peer-to-peer capability is present on this system (regardless of whether it has b...
void * memset(void *s, int c, size_t n)
cudaColorSpinorField * tmp
Definition: covdev_test.cpp:34
@ QUDA_CUDA_FIELD_LOCATION
Definition: enum_quda.h:326
@ QUDA_CPU_FIELD_LOCATION
Definition: enum_quda.h:325
@ QUDA_INVALID_FIELD_LOCATION
Definition: enum_quda.h:327
enum QudaFieldLocation_s QudaFieldLocation
#define device_free(ptr)
Definition: malloc_quda.h:110
#define host_free(ptr)
Definition: malloc_quda.h:115
void init()
Create the BLAS context.
void init()
Initialize the memory pool allocator.
Definition: malloc.cpp:632
void * pinned_malloc_(const char *func, const char *file, int line, size_t size)
Allocate pinned-memory. If a free pre-existing allocation exists reuse this.
Definition: malloc.cpp:665
void flush_pinned()
Free all outstanding pinned-memory allocations.
Definition: malloc.cpp:753
void flush_device()
Free all outstanding device-memory allocations.
Definition: malloc.cpp:761
void pinned_free_(const char *func, const char *file, int line, void *ptr)
Virtual free of pinned-memory allocation.
Definition: malloc.cpp:692
void device_free_(const char *func, const char *file, int line, void *ptr)
Virtual free of pinned-memory allocation.
Definition: malloc.cpp:730
void * device_malloc_(const char *func, const char *file, int line, size_t size)
Allocate device-memory. If free pre-existing allocation exists reuse this.
Definition: malloc.cpp:703
void device_free_(const char *func, const char *file, int line, void *ptr)
Definition: malloc.cpp:415
void host_free_(const char *func, const char *file, int line, void *ptr)
Definition: malloc.cpp:477
void * device_pinned_malloc_(const char *func, const char *file, int line, size_t size)
Definition: malloc.cpp:255
void * mapped_malloc_(const char *func, const char *file, int line, size_t size)
Definition: malloc.cpp:324
size_t host_allocated()
Definition: malloc.cpp:77
size_t mapped_allocated()
Definition: malloc.cpp:73
void * pinned_malloc_(const char *func, const char *file, int line, size_t size)
Definition: malloc.cpp:303
size_t device_allocated()
Definition: malloc.cpp:69
void printPeakMemUsage()
Definition: malloc.cpp:539
size_t host_allocated_peak()
Definition: malloc.cpp:87
void * get_mapped_device_pointer_(const char *func, const char *file, int line, const void *ptr)
Definition: malloc.cpp:590
void device_pinned_free_(const char *func, const char *file, int line, void *ptr)
Definition: malloc.cpp:440
size_t managed_allocated()
Definition: malloc.cpp:75
void managed_free_(const char *func, const char *file, int line, void *ptr)
Definition: malloc.cpp:461
size_t pinned_allocated()
Definition: malloc.cpp:71
size_t managed_allocated_peak()
Definition: malloc.cpp:85
void device_comms_pinned_free_(const char *func, const char *file, int line, void *ptr)
Definition: malloc.cpp:530
size_t mapped_allocated_peak()
Definition: malloc.cpp:83
void assertAllMemFree()
Definition: malloc.cpp:549
void * device_malloc_(const char *func, const char *file, int line, size_t size)
Definition: malloc.cpp:223
void * safe_malloc_(const char *func, const char *file, int line, size_t size)
Definition: malloc.cpp:280
bool use_managed_memory()
Definition: malloc.cpp:178
size_t pinned_allocated_peak()
Definition: malloc.cpp:81
size_t device_allocated_peak()
Definition: malloc.cpp:79
bool is_prefetch_enabled()
Definition: malloc.cpp:198
QudaFieldLocation get_pointer_location(const void *ptr)
Definition: malloc.cpp:566
AllocType
Definition: malloc.cpp:22
@ N_ALLOC_TYPE
Definition: malloc.cpp:22
@ MANAGED
Definition: malloc.cpp:22
@ DEVICE_PINNED
Definition: malloc.cpp:22
@ SHMEM
Definition: malloc.cpp:22
@ HOST
Definition: malloc.cpp:22
@ MAPPED
Definition: malloc.cpp:22
@ DEVICE
Definition: malloc.cpp:22
@ PINNED
Definition: malloc.cpp:22
void * managed_malloc_(const char *func, const char *file, int line, size_t size)
Definition: malloc.cpp:356
void * device_comms_pinned_malloc_(const char *func, const char *file, int line, size_t size)
Definition: malloc.cpp:401
::std::string string
Definition: gtest-port.h:891
#define qudaMemset(ptr, value, count)
Definition: quda_api.h:218
cudaDeviceProp deviceProp
Definition: device.cpp:14
#define printfQuda(...)
Definition: util_quda.h:114
bool getRankVerbosity()
This function returns true if the calling rank is enabled for verbosity (e.g., whether printQuda and ...
Definition: util_quda.cpp:30
#define warningQuda(...)
Definition: util_quda.h:132
#define errorQuda(...)
Definition: util_quda.h:120