31 : func(func), file(file), line(line),
size(0),
base_size(0) { }
49 static long total_host_bytes, max_total_host_bytes;
50 static long total_pinned_bytes, max_total_pinned_bytes;
52 static void print_alloc_header()
55 printfQuda(
"----------------------------------------------------------\n");
61 const char *type_str[] = {
"Device",
"Host ",
"Pinned",
"Mapped"};
62 std::map<void *, MemAlloc>::iterator entry;
64 for (entry = alloc[type].begin(); entry != alloc[type].end(); entry++) {
65 void *ptr = entry->first;
66 MemAlloc a = entry->second;
67 printfQuda(
"%s %15p %15lu %s(), %s:%d\n", type_str[type], ptr, (
unsigned long) a.base_size,
68 a.func.c_str(), a.file.c_str(), a.line);
73 static void track_malloc(
const AllocType &type,
const MemAlloc &a,
void *ptr)
75 total_bytes[type] += a.base_size;
76 if (total_bytes[type] > max_total_bytes[type]) {
77 max_total_bytes[type] = total_bytes[type];
80 total_host_bytes += a.base_size;
81 if (total_host_bytes > max_total_host_bytes) {
82 max_total_host_bytes = total_host_bytes;
86 total_pinned_bytes += a.base_size;
87 if (total_pinned_bytes > max_total_pinned_bytes) {
88 max_total_pinned_bytes = total_pinned_bytes;
95 static void track_free(
const AllocType &type,
void *ptr)
97 size_t size = alloc[type][ptr].base_size;
98 total_bytes[type] -= size;
100 total_host_bytes -= size;
103 total_pinned_bytes -= size;
105 alloc[type].erase(ptr);
115 static void *aligned_malloc(MemAlloc &a,
size_t size)
121 #if (CUDA_VERSION > 4000)
125 static int page_size = getpagesize();
126 a.base_size = ((size + page_size - 1) / page_size) * page_size;
127 posix_memalign(&ptr, page_size, a.base_size);
130 printfQuda(
"ERROR: Failed to allocate aligned host memory (%s:%d in %s())\n", a.file.c_str(), a.line, a.func.c_str());
142 void *
device_malloc_(
const char *func,
const char *file,
int line,
size_t size)
144 MemAlloc a(func, file, line);
147 a.size = a.base_size = size;
149 cudaError_t err = cudaMalloc(&ptr, size);
150 if (err != cudaSuccess) {
151 printfQuda(
"ERROR: Failed to allocate device memory (%s:%d in %s())\n", file, line, func);
154 track_malloc(
DEVICE, a, ptr);
164 void *
safe_malloc_(
const char *func,
const char *file,
int line,
size_t size)
166 MemAlloc a(func, file, line);
167 a.size = a.base_size = size;
169 void *ptr = malloc(size);
171 printfQuda(
"ERROR: Failed to allocate host memory (%s:%d in %s())\n", file, line, func);
174 track_malloc(
HOST, a, ptr);
188 void *
pinned_malloc_(
const char *func,
const char *file,
int line,
size_t size)
190 MemAlloc a(func, file, line);
191 void *ptr = aligned_malloc(a, size);
193 cudaError_t err = cudaHostRegister(ptr, a.base_size, cudaHostRegisterDefault);
194 if (err != cudaSuccess) {
195 printfQuda(
"ERROR: Failed to register pinned memory (%s:%d in %s())\n", file, line, func);
198 track_malloc(
PINNED, a, ptr);
208 void *
mapped_malloc_(
const char *func,
const char *file,
int line,
size_t size)
210 MemAlloc a(func, file, line);
211 void *ptr = aligned_malloc(a, size);
213 cudaError_t err = cudaHostRegister(ptr, a.base_size, cudaHostRegisterMapped);
214 if (err != cudaSuccess) {
215 printfQuda(
"ERROR: Failed to register host-mapped memory (%s:%d in %s())\n", file, line, func);
218 track_malloc(
MAPPED, a, ptr);
228 void device_free_(
const char *func,
const char *file,
int line,
void *ptr)
231 printfQuda(
"ERROR: Attempt to free NULL device pointer (%s:%d in %s())\n", file, line, func);
235 printfQuda(
"ERROR: Attempt to free invalid device pointer (%s:%d in %s())\n", file, line, func);
238 cudaError_t err = cudaFree(ptr);
239 if (err != cudaSuccess) {
240 printfQuda(
"ERROR: Failed to free device memory (%s:%d in %s())\n", file, line, func);
252 void host_free_(
const char *func,
const char *file,
int line,
void *ptr)
255 printfQuda(
"ERROR: Attempt to free NULL host pointer (%s:%d in %s())\n", file, line, func);
259 track_free(
HOST, ptr);
261 cudaError_t err = cudaHostUnregister(ptr);
262 if (err != cudaSuccess) {
263 printfQuda(
"ERROR: Failed to unregister pinned memory (%s:%d in %s())\n", file, line, func);
268 cudaError_t err = cudaHostUnregister(ptr);
269 if (err != cudaSuccess) {
270 printfQuda(
"ERROR: Failed to unregister host-mapped memory (%s:%d in %s())\n", file, line, func);
275 printfQuda(
"ERROR: Attempt to free invalid host pointer (%s:%d in %s())\n", file, line, func);
284 printfQuda(
"Device memory used = %.1f MB\n", max_total_bytes[
DEVICE] / (
double)(1<<20));
285 printfQuda(
"Page-locked host memory used = %.1f MB\n", max_total_pinned_bytes / (
double)(1<<20));
286 printfQuda(
"Total host memory used >= %.1f MB\n", max_total_host_bytes / (
double)(1<<20));
293 warningQuda(
"The following internal memory allocations were not freed.");
295 print_alloc_header();