11 #include "qdp_config.h" 35 : line(-1), size(0), base_size(0) { }
37 MemAlloc(std::string func, std::string file,
int line)
38 : func(func), file(file), line(line), size(0), base_size(0) { }
71 size = backtrace (array, 10);
72 strings = backtrace_symbols (array, size);
73 printfQuda(
"Obtained %zd stack frames.\n", size);
81 printfQuda(
"----------------------------------------------------------\n");
87 const char *type_str[] = {
"Device",
"Device Pinned",
"Host ",
"Pinned",
"Mapped"};
88 std::map<void *, MemAlloc>::iterator entry;
90 for (entry = alloc[type].begin(); entry != alloc[type].end(); entry++) {
91 void *ptr = entry->first;
102 if (total_bytes[type] > max_total_bytes[type]) {
103 max_total_bytes[type] = total_bytes[type];
107 if (total_host_bytes > max_total_host_bytes) {
113 if (total_pinned_bytes > max_total_pinned_bytes) {
117 alloc[type][ptr] = a;
123 size_t size = alloc[type][ptr].base_size;
124 total_bytes[type] -=
size;
126 total_host_bytes -=
size;
129 total_pinned_bytes -=
size;
131 alloc[type].erase(ptr);
147 #if (CUDA_VERSION > 4000) && 0 // we need to manually align to page boundaries to allow us to bind a texture to mapped memory 152 static int page_size = 2*getpagesize();
153 a.
base_size = ((size + page_size - 1) / page_size) * page_size;
154 int align = posix_memalign(&ptr, page_size, a.
base_size);
155 if (!ptr || align != 0) {
157 errorQuda(
"Failed to allocate aligned host memory of size %zu (%s:%d in %s())\n", size, a.
file.c_str(), a.
line,
171 #ifndef QDP_USE_CUDA_MANAGED_MEMORY 177 cudaError_t err = cudaMalloc(&ptr, size);
178 if (err != cudaSuccess) {
179 errorQuda(
"Failed to allocate device memory of size %zu (%s:%d in %s())\n", size, file, line, func);
183 cudaMemset(ptr, 0xff, size);
209 CUresult err = cuMemAlloc((CUdeviceptr*)&ptr, size);
210 if (err != CUDA_SUCCESS) {
211 errorQuda(
"Failed to allocate device memory of size %zu (%s:%d in %s())\n", size, file, line, func);
215 cudaMemset(ptr, 0xff, size);
231 void *ptr = malloc(size);
232 if (!ptr) {
errorQuda(
"Failed to allocate host memory of size %zu (%s:%d in %s())\n", size, file, line, func); }
255 cudaError_t err = cudaHostRegister(ptr, a.
base_size, cudaHostRegisterDefault);
256 if (err != cudaSuccess) {
257 errorQuda(
"Failed to register pinned memory of size %zu (%s:%d in %s())\n", size, file, line, func);
266 #define HOST_ALLOC // this needs to be set presently on P9 279 cudaError_t err = cudaHostAlloc(&ptr, size, cudaHostRegisterMapped | cudaHostRegisterPortable);
280 if (err != cudaSuccess) {
errorQuda(
"cudaHostAlloc failed of size %zu (%s:%d in %s())\n", size, file, line, func); }
283 cudaError_t err = cudaHostRegister(ptr, a.
base_size, cudaHostRegisterMapped);
284 if (err != cudaSuccess) {
285 errorQuda(
"Failed to register host-mapped memory of size %zu (%s:%d in %s())\n", size, file, line, func);
303 #ifndef QDP_USE_CUDA_MANAGED_MEMORY 304 if (!ptr) {
errorQuda(
"Attempt to free NULL device pointer (%s:%d in %s())\n", file, line, func); }
306 errorQuda(
"Attempt to free invalid device pointer (%s:%d in %s())\n", file, line, func);
308 cudaError_t err = cudaFree(ptr);
309 if (err != cudaSuccess) {
errorQuda(
"Failed to free device memory (%s:%d in %s())\n", file, line, func); }
329 if (!ptr) {
errorQuda(
"Attempt to free NULL device pointer (%s:%d in %s())\n", file, line, func); }
331 errorQuda(
"Attempt to free invalid device pointer (%s:%d in %s())\n", file, line, func);
333 CUresult err = cuMemFree((CUdeviceptr)ptr);
334 if (err != CUDA_SUCCESS) {
printfQuda(
"Failed to free device memory (%s:%d in %s())\n", file, line, func); }
346 if (!ptr) {
errorQuda(
"Attempt to free NULL host pointer (%s:%d in %s())\n", file, line, func); }
351 cudaError_t err = cudaHostUnregister(ptr);
352 if (err != cudaSuccess) {
errorQuda(
"Failed to unregister pinned memory (%s:%d in %s())\n", file, line, func); }
357 cudaError_t err = cudaFreeHost(ptr);
358 if (err != cudaSuccess) {
errorQuda(
"Failed to free host memory (%s:%d in %s())\n", file, line, func); }
360 cudaError_t err = cudaHostUnregister(ptr);
361 if (err != cudaSuccess) {
362 errorQuda(
"Failed to unregister host-mapped memory (%s:%d in %s())\n", file, line, func);
368 printfQuda(
"ERROR: Attempt to free invalid host pointer (%s:%d in %s())\n", file, line, func);
377 printfQuda(
"Device memory used = %.1f MB\n", max_total_bytes[
DEVICE] / (
double)(1<<20));
379 printfQuda(
"Page-locked host memory used = %.1f MB\n", max_total_pinned_bytes / (
double)(1<<20));
380 printfQuda(
"Total host memory used >= %.1f MB\n", max_total_host_bytes / (
double)(1<<20));
387 warningQuda(
"The following internal memory allocations were not freed.");
401 CUpointer_attribute attribute[] = { CU_POINTER_ATTRIBUTE_MEMORY_TYPE };
402 CUmemorytype mem_type;
403 void *data[] = { &mem_type };
404 CUresult error = cuPointerGetAttributes(1, attribute, data, reinterpret_cast<CUdeviceptr>(ptr));
405 if (error != CUDA_SUCCESS) {
407 cuGetErrorString(error, &
string);
408 errorQuda(
"cuPointerGetAttributes failed with error %s",
string);
412 if (mem_type == 0) mem_type = CU_MEMORYTYPE_HOST;
415 case CU_MEMORYTYPE_DEVICE:
416 case CU_MEMORYTYPE_UNIFIED:
418 case CU_MEMORYTYPE_HOST:
421 errorQuda(
"Unknown memory type %d", mem_type);
460 char *enable_device_pool = getenv(
"QUDA_ENABLE_DEVICE_MEMORY_POOL");
461 if (!enable_device_pool || strcmp(enable_device_pool,
"0")!=0) {
463 device_memory_pool =
true;
465 warningQuda(
"Not using device memory pool allocator");
466 device_memory_pool =
false;
470 char *enable_pinned_pool = getenv(
"QUDA_ENABLE_PINNED_MEMORY_POOL");
471 if (!enable_pinned_pool || strcmp(enable_pinned_pool,
"0")!=0) {
473 pinned_memory_pool =
true;
475 warningQuda(
"Not using pinned memory pool allocator");
476 pinned_memory_pool =
false;
485 if (pinned_memory_pool) {
486 std::multimap<size_t, void *>::iterator
it;
488 if (pinnedCache.empty()) {
491 it = pinnedCache.lower_bound(nbytes);
492 if (it != pinnedCache.end()) {
495 pinnedCache.erase(it);
497 it = pinnedCache.begin();
499 pinnedCache.erase(it);
504 pinnedSize[ptr] = nbytes;
513 if (pinned_memory_pool) {
514 if (!pinnedSize.count(ptr)) {
515 errorQuda(
"Attempt to free invalid pointer");
517 pinnedCache.insert(std::make_pair(pinnedSize[ptr], ptr));
518 pinnedSize.erase(ptr);
527 if (device_memory_pool) {
528 std::multimap<size_t, void *>::iterator
it;
530 if (deviceCache.empty()) {
533 it = deviceCache.lower_bound(nbytes);
534 if (it != deviceCache.end()) {
537 deviceCache.erase(it);
539 it = deviceCache.begin();
541 deviceCache.erase(it);
546 deviceSize[ptr] = nbytes;
555 if (device_memory_pool) {
556 if (!deviceSize.count(ptr)) {
557 errorQuda(
"Attempt to free invalid pointer");
559 deviceCache.insert(std::make_pair(deviceSize[ptr], ptr));
560 deviceSize.erase(ptr);
568 if (pinned_memory_pool) {
569 std::multimap<size_t, void *>::iterator
it;
570 for (it = pinnedCache.begin(); it != pinnedCache.end(); it++) {
571 void *ptr = it->second;
580 if (device_memory_pool) {
581 std::multimap<size_t, void *>::iterator
it;
582 for (it = deviceCache.begin(); it != deviceCache.end(); it++) {
583 void *ptr = it->second;
static long max_total_host_bytes
long device_allocated_peak()
static std::map< void *, size_t > deviceSize
static std::multimap< size_t, void * > deviceCache
void * safe_malloc_(const char *func, const char *file, int line, size_t size)
static void print_alloc_header()
static long max_total_pinned_bytes
static std::multimap< size_t, void * > pinnedCache
void * device_malloc_(const char *func, const char *file, int line, size_t size)
static std::map< void *, MemAlloc > alloc[N_ALLOC_TYPE]
static void print_alloc(AllocType type)
void * device_pinned_malloc_(const char *func, const char *file, int line, size_t size)
static std::map< void *, size_t > pinnedSize
void device_free_(const char *func, const char *file, int line, void *ptr)
MemAlloc & operator=(const MemAlloc &a)
void flush_pinned()
Free all outstanding pinned-memory allocations.
static void track_free(const AllocType &type, void *ptr)
void device_pinned_free_(const char *func, const char *file, int line, void *ptr)
MemAlloc(std::string func, std::string file, int line)
void host_free_(const char *func, const char *file, int line, void *ptr)
static long max_total_bytes[N_ALLOC_TYPE]
void flush_device()
Free all outstanding device-memory allocations.
static void print_trace(void)
static void * aligned_malloc(MemAlloc &a, size_t size)
long host_allocated_peak()
long mapped_allocated_peak()
void init()
Create the CUBLAS context.
static bool pinned_memory_pool
void * mapped_malloc_(const char *func, const char *file, int line, size_t size)
void * memset(void *s, int c, size_t n)
static bool device_memory_pool
static long total_pinned_bytes
QudaFieldLocation get_pointer_location(const void *ptr)
enum QudaFieldLocation_s QudaFieldLocation
long pinned_allocated_peak()
static long total_bytes[N_ALLOC_TYPE]
void pinned_free_(const char *func, const char *file, int line, void *ptr)
Virtual free of pinned-memory allocation.
void * pinned_malloc_(const char *func, const char *file, int line, size_t size)
bool comm_peer2peer_present()
Returns true if any peer-to-peer capability is present on this system (regardless of whether it has b...
__device__ unsigned int count[QUDA_MAX_MULTI_REDUCE]
static void track_malloc(const AllocType &type, const MemAlloc &a, void *ptr)
static long total_host_bytes