35 : func(func), file(file), line(line),
size(0),
base_size(0) { }
53 static long total_host_bytes, max_total_host_bytes;
54 static long total_pinned_bytes, max_total_pinned_bytes;
56 static void print_alloc_header()
59 printfQuda(
"----------------------------------------------------------\n");
65 const char *type_str[] = {
"Device",
"Host ",
"Pinned",
"Mapped"};
66 std::map<void *, MemAlloc>::iterator entry;
68 for (entry = alloc[type].begin(); entry != alloc[type].end(); entry++) {
69 void *ptr = entry->first;
70 MemAlloc a = entry->second;
71 printfQuda(
"%s %15p %15lu %s(), %s:%d\n", type_str[type], ptr, (
unsigned long) a.base_size,
72 a.func.c_str(), a.file.c_str(), a.line);
77 static void track_malloc(
const AllocType &type,
const MemAlloc &a,
void *ptr)
79 total_bytes[type] += a.base_size;
80 if (total_bytes[type] > max_total_bytes[type]) {
81 max_total_bytes[type] = total_bytes[type];
84 total_host_bytes += a.base_size;
85 if (total_host_bytes > max_total_host_bytes) {
86 max_total_host_bytes = total_host_bytes;
90 total_pinned_bytes += a.base_size;
91 if (total_pinned_bytes > max_total_pinned_bytes) {
92 max_total_pinned_bytes = total_pinned_bytes;
99 static void track_free(
const AllocType &type,
void *ptr)
101 size_t size = alloc[type][ptr].base_size;
102 total_bytes[type] -= size;
104 total_host_bytes -= size;
107 total_pinned_bytes -= size;
109 alloc[type].erase(ptr);
119 static void *aligned_malloc(MemAlloc &a,
size_t size)
125 #if (CUDA_VERSION > 4000)
129 static int page_size = getpagesize();
130 a.base_size = ((size + page_size - 1) / page_size) * page_size;
131 posix_memalign(&ptr, page_size, a.base_size);
134 printfQuda(
"ERROR: Failed to allocate aligned host memory (%s:%d in %s())\n", a.file.c_str(), a.line, a.func.c_str());
153 cudaError_t err = cudaMalloc(&ptr, size);
154 if (err != cudaSuccess) {
155 printfQuda(
"ERROR: Failed to allocate device memory (%s:%d in %s())\n", file, line, func);
158 track_malloc(
DEVICE, a, ptr);
168 void *
safe_malloc_(
const char *func,
const char *file,
int line,
size_t size)
173 void *ptr = malloc(size);
175 printfQuda(
"ERROR: Failed to allocate host memory (%s:%d in %s())\n", file, line, func);
178 track_malloc(
HOST, a, ptr);
195 void *ptr = aligned_malloc(a, size);
197 cudaError_t err = cudaHostRegister(ptr, a.
base_size, cudaHostRegisterDefault);
198 if (err != cudaSuccess) {
199 printfQuda(
"ERROR: Failed to register pinned memory (%s:%d in %s())\n", file, line, func);
202 track_malloc(
PINNED, a, ptr);
215 void *ptr = aligned_malloc(a, size);
217 cudaError_t err = cudaHostRegister(ptr, a.
base_size, cudaHostRegisterMapped);
218 if (err != cudaSuccess) {
219 printfQuda(
"ERROR: Failed to register host-mapped memory (%s:%d in %s())\n", file, line, func);
222 track_malloc(
MAPPED, a, ptr);
232 void device_free_(
const char *func,
const char *file,
int line,
void *ptr)
235 printfQuda(
"ERROR: Attempt to free NULL device pointer (%s:%d in %s())\n", file, line, func);
239 printfQuda(
"ERROR: Attempt to free invalid device pointer (%s:%d in %s())\n", file, line, func);
242 cudaError_t err = cudaFree(ptr);
243 if (err != cudaSuccess) {
244 printfQuda(
"ERROR: Failed to free device memory (%s:%d in %s())\n", file, line, func);
256 void host_free_(
const char *func,
const char *file,
int line,
void *ptr)
259 printfQuda(
"ERROR: Attempt to free NULL host pointer (%s:%d in %s())\n", file, line, func);
263 track_free(
HOST, ptr);
265 cudaError_t err = cudaHostUnregister(ptr);
266 if (err != cudaSuccess) {
267 printfQuda(
"ERROR: Failed to unregister pinned memory (%s:%d in %s())\n", file, line, func);
272 cudaError_t err = cudaHostUnregister(ptr);
273 if (err != cudaSuccess) {
274 printfQuda(
"ERROR: Failed to unregister host-mapped memory (%s:%d in %s())\n", file, line, func);
279 printfQuda(
"ERROR: Attempt to free invalid host pointer (%s:%d in %s())\n", file, line, func);
288 printfQuda(
"Device memory used = %.1f MB\n", max_total_bytes[
DEVICE] / (
double)(1<<20));
289 printfQuda(
"Page-locked host memory used = %.1f MB\n", max_total_pinned_bytes / (
double)(1<<20));
290 printfQuda(
"Total host memory used >= %.1f MB\n", max_total_host_bytes / (
double)(1<<20));
297 warningQuda(
"The following internal memory allocations were not freed.");
299 print_alloc_header();
void * safe_malloc_(const char *func, const char *file, int line, size_t size)
void * device_malloc_(const char *func, const char *file, int line, size_t size)
void device_free_(const char *func, const char *file, int line, void *ptr)
MemAlloc & operator=(const MemAlloc &a)
MemAlloc(std::string func, std::string file, int line)
void host_free_(const char *func, const char *file, int line, void *ptr)
__device__ unsigned int count
void * mapped_malloc_(const char *func, const char *file, int line, size_t size)
void * pinned_malloc_(const char *func, const char *file, int line, size_t size)