70 strings = backtrace_symbols (
array,
size);
79 printfQuda(
"----------------------------------------------------------\n");
85 const char *type_str[] = {
"Device",
"Host ",
"Pinned",
"Mapped"};
86 std::map<void *, MemAlloc>::iterator
entry;
91 printfQuda(
"%s %15p %15lu %s(), %s:%d\n", type_str[type],
ptr, (
unsigned long)
a.base_size,
92 a.func.c_str(),
a.file.c_str(),
a.line);
145 #if (CUDA_VERSION > 4000) && 0 // we need to manually align to page boundaries to allow us to bind a texture to mapped memory 150 static int page_size = 2*getpagesize();
151 a.base_size = ((
size + page_size - 1) / page_size) * page_size;
153 if (!
ptr || align != 0) {
155 printfQuda(
"ERROR: Failed to allocate aligned host memory of size %zu (%s:%d in %s())\n",
size,
a.file.c_str(),
a.line,
a.func.c_str());
172 a.size =
a.base_size =
size;
175 if (
err != cudaSuccess) {
176 printfQuda(
"ERROR: Failed to allocate device memory of size %zu (%s:%d in %s())\n",
size, file, line,
func);
199 a.size =
a.base_size =
size;
201 CUresult
err = cuMemAlloc((CUdeviceptr*)&
ptr,
size);
202 if (
err != CUDA_SUCCESS) {
203 printfQuda(
"ERROR: Failed to allocate device memory of size %zu (%s:%d in %s())\n",
size, file, line,
func);
222 a.size =
a.base_size =
size;
226 printfQuda(
"ERROR: Failed to allocate host memory of size %zu (%s:%d in %s())\n",
size, file, line,
func);
251 cudaError_t
err = cudaHostRegister(
ptr,
a.base_size, cudaHostRegisterDefault);
252 if (
err != cudaSuccess) {
253 printfQuda(
"ERROR: Failed to register pinned memory of size %zu (%s:%d in %s())\n",
size, file, line,
func);
274 cudaError_t
err = cudaHostRegister(
ptr,
a.base_size, cudaHostRegisterMapped);
275 if (
err != cudaSuccess) {
276 printfQuda(
"ERROR: Failed to register host-mapped memory of size %zu (%s:%d in %s())\n",
size, file, line,
func);
295 printfQuda(
"ERROR: Attempt to free NULL device pointer (%s:%d in %s())\n", file, line,
func);
299 printfQuda(
"ERROR: Attempt to free invalid device pointer (%s:%d in %s())\n", file, line,
func);
302 cudaError_t
err = cudaFree(
ptr);
303 if (
err != cudaSuccess) {
304 printfQuda(
"ERROR: Failed to free device memory (%s:%d in %s())\n", file, line,
func);
319 printfQuda(
"ERROR: Attempt to free NULL device pointer (%s:%d in %s())\n", file, line,
func);
323 printfQuda(
"ERROR: Attempt to free invalid device pointer (%s:%d in %s())\n", file, line,
func);
326 CUresult
err = cuMemFree((CUdeviceptr)
ptr);
327 if (
err != CUDA_SUCCESS) {
328 printfQuda(
"ERROR: Failed to free device memory (%s:%d in %s())\n", file, line,
func);
343 printfQuda(
"ERROR: Attempt to free NULL host pointer (%s:%d in %s())\n", file, line,
func);
349 cudaError_t
err = cudaHostUnregister(
ptr);
350 if (
err != cudaSuccess) {
351 printfQuda(
"ERROR: Failed to unregister pinned memory (%s:%d in %s())\n", file, line,
func);
356 cudaError_t
err = cudaHostUnregister(
ptr);
357 if (
err != cudaSuccess) {
358 printfQuda(
"ERROR: Failed to unregister host-mapped memory (%s:%d in %s())\n", file, line,
func);
363 printfQuda(
"ERROR: Attempt to free invalid host pointer (%s:%d in %s())\n", file, line,
func);
382 warningQuda(
"The following internal memory allocations were not freed.");
427 char *enable_device_pool =
getenv(
"QUDA_ENABLE_DEVICE_MEMORY_POOL");
428 if (!enable_device_pool ||
strcmp(enable_device_pool,
"0")!=0) {
432 warningQuda(
"Not using device memory pool allocator");
437 char *enable_pinned_pool =
getenv(
"QUDA_ENABLE_PINNED_MEMORY_POOL");
438 if (!enable_pinned_pool ||
strcmp(enable_pinned_pool,
"0")!=0) {
442 warningQuda(
"Not using pinned memory pool allocator");
453 std::multimap<size_t, void *>::iterator
it;
482 errorQuda(
"Attempt to free invalid pointer");
495 std::multimap<size_t, void *>::iterator
it;
524 errorQuda(
"Attempt to free invalid pointer");
536 std::multimap<size_t, void *>::iterator
it;
538 void *
ptr =
it->second;
548 std::multimap<size_t, void *>::iterator
it;
550 void *
ptr =
it->second;
static long max_total_host_bytes
long device_allocated_peak()
static std::map< void *, size_t > deviceSize
static std::multimap< size_t, void * > deviceCache
void * safe_malloc_(const char *func, const char *file, int line, size_t size)
static void print_alloc_header()
static long max_total_pinned_bytes
static std::multimap< size_t, void * > pinnedCache
void * device_malloc_(const char *func, const char *file, int line, size_t size)
static std::map< void *, MemAlloc > alloc[N_ALLOC_TYPE]
int posix_memalign(void **__memptr, size_t __alignment, size_t __size) __attribute__((availability(macosx
static void print_alloc(AllocType type)
void * device_pinned_malloc_(const char *func, const char *file, int line, size_t size)
static std::map< void *, size_t > pinnedSize
void device_free_(const char *func, const char *file, int line, void *ptr)
Virtual free of pinned-memory allocation.
void * pinned_malloc_(const char *func, const char *file, int line, size_t size)
Allocate pinned-memory. If a free pre-existing allocation exists reuse this.
void device_free_(const char *func, const char *file, int line, void *ptr)
MemAlloc & operator=(const MemAlloc &a)
void flush_pinned()
Free all outstanding pinned-memory allocations.
static void track_free(const AllocType &type, void *ptr)
int strcmp(const char *__s1, const char *__s2)
static __inline__ T * entry
void device_pinned_free_(const char *func, const char *file, int line, void *ptr)
void init()
Initialize the memory pool allocator.
MemAlloc(std::string func, std::string file, int line)
void host_free_(const char *func, const char *file, int line, void *ptr)
void * malloc(size_t __size) __attribute__((__warn_unused_result__)) __attribute__((alloc_size(1)))
static long max_total_bytes[N_ALLOC_TYPE]
void flush_device()
Free all outstanding device-memory allocations.
static void print_trace(void)
static void * aligned_malloc(MemAlloc &a, size_t size)
long host_allocated_peak()
long mapped_allocated_peak()
static bool pinned_memory_pool
void * mapped_malloc_(const char *func, const char *file, int line, size_t size)
static bool device_memory_pool
static long total_pinned_bytes
void * memset(void *__b, int __c, size_t __len)
long pinned_allocated_peak()
void * device_malloc_(const char *func, const char *file, int line, size_t size)
Allocate device-memory. If free pre-existing allocation exists reuse this.
static long total_bytes[N_ALLOC_TYPE]
void pinned_free_(const char *func, const char *file, int line, void *ptr)
Virtual free of pinned-memory allocation.
struct cudaExtent unsigned int cudaArray_t array
void * pinned_malloc_(const char *func, const char *file, int line, size_t size)
__device__ unsigned int count[QUDA_MAX_MULTI_REDUCE]
char * getenv(const char *)
static void track_malloc(const AllocType &type, const MemAlloc &a, void *ptr)
static long total_host_bytes