28#include "timemory/backends/gpu.hpp"
43#if defined(__GNUC__) && (__GNUC__ >= 6)
44# pragma GCC diagnostic push
45# pragma GCC diagnostic ignored "-Wignored-attributes"
54:
base<malloc_gotcha, double>
57#if defined(TIMEMORY_USE_GPU)
73#if defined(TIMEMORY_USE_CUDA)
74 return "GOTCHA wrapper for memory allocation functions: malloc, calloc, free, "
75 "cudaMalloc, cudaMallocHost, cudaMallocManaged, cudaHostAlloc, cudaFree, "
77#elif defined(TIMEMORY_USE_HIP)
78 return "GOTCHA wrapper for memory allocation functions: malloc, calloc, free, "
79 "hipMalloc, hipMallocHost, hipMallocManaged, hipHostAlloc, hipFree, "
82 return "GOTCHA wrapper for memory allocation functions: malloc, calloc, free";
86 using base_type::accum;
89 using base_type::value;
91 template <
typename Tp>
94 template <
typename Tp>
98 template <
typename Tp>
103 for(
auto& itr : get_cleanup_list())
105 get_cleanup_list().clear();
109 template <
typename Tp>
112 template <
typename Tp>
147 value = (nmemb * size);
157 get_allocation_map()[ptr] = value;
166 auto itr = get_allocation_map().find(ptr);
167 if(itr != get_allocation_map().
end())
171 get_allocation_map().erase(itr);
177 printf(
"[%s]> free of unknown pointer size: %p\n",
185#if defined(TIMEMORY_USE_GPU)
195 m_last_addr = devPtr;
206 m_last_addr = hostPtr;
214 void audit(audit::outgoing, gpu::error_t err)
218 void* ptr = (
void*) ((
char**) (m_last_addr)[0]);
219 get_allocation_map()[ptr] = value;
222 PRINT_HERE(
"%s did not return success, values may be corrupted",
253 using alloc_map_t = std::unordered_map<void*, size_t>;
254 using clean_list_t = std::vector<std::function<void()>>;
256 static clean_list_t& get_cleanup_list()
258 static clean_list_t _instance{};
262 static alloc_map_t& get_allocation_map()
264 static thread_local alloc_map_t _instance{};
269 const char* m_prefix =
nullptr;
270#if defined(TIMEMORY_USE_GPU)
271 void** m_last_addr =
nullptr;
277#if defined(TIMEMORY_USE_GOTCHA)
279template <
typename Tp>
286 using tuple_t = push_back_t<Tp, this_type>;
287 using local_gotcha_type = gotcha<data_size, tuple_t, type_list<this_type>>;
289 local_gotcha_type::get_default_ready() =
false;
290 local_gotcha_type::get_initializer() = []() {
291 local_gotcha_type::template configure<0, void*, size_t>(
"malloc");
292 local_gotcha_type::template configure<1, void*, size_t, size_t>(
"calloc");
293 local_gotcha_type::template configure<2, void, void*>(
"free");
297# if defined(TIMEMORY_USE_CUDA)
298 local_gotcha_type::template configure<3, cudaError_t, void**, size_t>(
300 local_gotcha_type::template configure<4, cudaError_t, void**, size_t>(
302 local_gotcha_type::template
configure<5, cudaError_t,
void**, size_t,
303 unsigned int>(
"cudaMallocManaged");
304 local_gotcha_type::template
configure<6, cudaError_t,
void**, size_t,
305 unsigned int>(
"cudaHostAlloc");
306 local_gotcha_type::template configure<7, cudaError_t, void*>(
"cudaFree");
307 local_gotcha_type::template configure<8, cudaError_t, void*>(
"cudaFreeHost");
308# elif defined(TIMEMORY_USE_HIP)
309 local_gotcha_type::template configure<3, hipError_t, void**, size_t>(
"hipMalloc");
310 local_gotcha_type::template configure<4, hipError_t, void**, size_t>(
312 local_gotcha_type::template
configure<5, hipError_t,
void**, size_t,
313 unsigned int>(
"hipMallocManaged");
314 local_gotcha_type::template
configure<6, hipError_t,
void**, size_t,
315 unsigned int>(
"hipHostAlloc");
316 local_gotcha_type::template configure<7, hipError_t, void*>(
"hipFree");
317 local_gotcha_type::template configure<8, hipError_t, void*>(
"hipFreeHost");
321 get_cleanup_list().emplace_back([]() { malloc_gotcha::tear_down<Tp>(); });
324template <
typename Tp>
331 using tuple_t = push_back_t<Tp, this_type>;
332 using local_gotcha_type = gotcha<data_size, tuple_t, type_list<this_type>>;
334 local_gotcha_type::get_default_ready() =
false;
335 local_gotcha_type::get_initializer() = []() {};
336 local_gotcha_type::disable();
348:
base<memory_allocations, void>
364 return "Number of bytes allocated/freed instead of peak/current memory usage: "
365 "free(malloc(10)) + free(malloc(10)) would use 10 bytes but this would "
369 static void global_init() { malloc_gotcha::configure<component_tuple_t<>>(); }
375 if(_cnt.first == 0 && _cnt.second == 0 && !get_data())
377 get_data() = std::make_unique<malloc_bundle_t>();
385 if(_cnt.first == 0 && _cnt.second == 0 && get_data())
388 get_data().reset(
nullptr);
403#if defined(__GNUC__) && (__GNUC__ >= 6)
404# pragma GCC diagnostic pop
Implementation of the gotcha component(s)
Definition for various functions for construct in operations.
void stop(TupleT< Tp... > &obj, Args &&... args)
void start(TupleT< Tp... > &obj, Args &&... args)
Inherit from this policy to add reference counting support. Useful if you want to turn a global setti...
char const std::string & _prefix
convert_t< mpl::available_t< concat< T... > >, component_tuple<> > component_tuple_t
typename mpl::push_back< Tuple, T >::type push_back_t
tim::mpl::apply< std::string > string
void consume_parameters(ArgsT &&...)
lightweight tuple-alternative for meta-programming logic
Used by component audit member function to designate the parameters being passed are incoming (e....
Used by component audit member function to designate the parameters being passed are outgoing (e....
static int64_t get_unit()
static std::string get_label()
void set_stopped()
store that stop has been called
storage< Tp, Value > storage_type
void set_started()
store that start has been called
A very lightweight storage class which provides nothing.
The gotcha component rewrites the global offset table such that calling the wrapped function actually...
void set_prefix(const char *_prefix)
void audit(audit::outgoing, void *ptr)
void* is returned from malloc and calloc
push_back_t< Tp, gotcha_type< Tp > > component_type
void audit(audit::incoming, size_t nbytes)
nbytes is passed to malloc
double get_display() const
void audit(audit::incoming, void *ptr)
void* is passed to free
std::hash< std::string > string_hash
static std::string label()
push_back_t< Tp, this_type > gotcha_component_type
this_type & operator-=(const this_type &rhs)
static void global_finalize()
void audit(audit::incoming, size_t nmemb, size_t size)
nmemb and size is passed to calloc
this_type & operator+=(const this_type &rhs)
static std::string description()
static constexpr size_t data_size
This component wraps malloc, calloc, free, CUDA/HIP malloc/free via GOTCHA and tracks the number of b...
std::unique_ptr< malloc_bundle_t > data_pointer_t
static void global_init()
typename malloc_gotcha::gotcha_type< component_tuple_t<> > malloc_gotcha_t
static std::string description()
component_tuple_t< malloc_gotcha_t > malloc_bundle_t
static std::string label()
static void global_finalize()
#define DEBUG_PRINT_HERE(...)