25#if !defined(TIMEMORY_LIBRARY_SOURCE)
26# define TIMEMORY_LIBRARY_SOURCE 1
37#if defined(TIMEMORY_USE_MPI)
38# include "timemory/backends/types/mpi/extern.hpp"
45#include <unordered_map>
50using overhead_map_t = std::unordered_map<size_t, std::pair<wall_clock, size_t>>;
53using trace_map_t = std::unordered_map<size_t, std::deque<traceset_t>>;
57static std::atomic<uint32_t> library_trace_count{ 0 };
62get_overhead() TIMEMORY_VISIBILITY("default");
64get_throttle() TIMEMORY_VISIBILITY("default");
66get_trace_map() TIMEMORY_VISIBILITY("default");
73 static thread_local auto _instance = std::make_unique<overhead_map_t>();
82 static thread_local auto _instance = std::make_unique<throttle_set_t>();
97extern std::array<bool, 2>&
102static bool use_mpi_gotcha =
false;
106#if defined(TIMEMORY_MPI_GOTCHA)
108static bool mpi_is_attached =
false;
114timemory_mpi_finalize_comm_keyval()
116 return tim::get_env<bool>(
"TIMEMORY_MPI_FINALIZE_COMM_KEYVAL",
true);
123timemory_mpi_finalize_gotcha_wrapper()
125 return tim::get_env<bool>(
"TIMEMORY_MPI_FINALIZE_GOTCHA_WRAPPER",
false);
131timemory_trace_mpi_comm_key()
133 static int comm_key = -1;
138timemory_trace_mpi_copy(MPI_Comm,
int,
void*,
void*,
void*,
int*)
144timemory_trace_mpi_finalize(MPI_Comm,
int,
void*,
void*)
147 static bool once =
false;
148 if(once || tim::mpi::is_finalized())
153 PRINT_HERE(
"%s",
"comm keyval finalization started");
156 PRINT_HERE(
"%s",
"comm keyval finalization complete");
164 static void set_attr()
166 if(!timemory_mpi_finalize_comm_keyval())
172 MPI_Comm_create_keyval(&timemory_trace_mpi_copy, &timemory_trace_mpi_finalize,
173 &timemory_trace_mpi_comm_key(),
nullptr);
174 if(ret == MPI_SUCCESS)
175 MPI_Comm_set_attr(MPI_COMM_SELF, timemory_trace_mpi_comm_key(),
nullptr);
180 int operator()(
int* argc,
char***
argv)
183 return MPI_Init(argc,
argv);
185 auto ret = MPI_Init(argc,
argv);
188 auto mode = tim::get_env<std::string>(
"TIMEMORY_INSTRUMENTATION_MODE",
"trace");
191 else if(mode ==
"region")
198 int operator()(
int* argc,
char***
argv,
int req,
int* prov)
201 return MPI_Init_thread(argc,
argv, req, prov);
203 auto ret = MPI_Init_thread(argc,
argv, req, prov);
206 auto mode = tim::get_env<std::string>(
"TIMEMORY_INSTRUMENTATION_MODE",
"trace");
209 else if(mode ==
"region")
218 if(recursive() || tim::mpi::is_finalized())
221 if(!timemory_mpi_finalize_gotcha_wrapper())
223 auto ret = MPI_Finalize();
231 tim::get_env<std::string>(
"TIMEMORY_INSTRUMENTATION_MODE",
"trace");
245 timemory_trace_mpi_finalize(MPI_COMM_WORLD, 0,
nullptr,
nullptr);
249 auto ret = MPI_Finalize();
256 static auto _instance =
257 tim::get_env<std::string>(
"TIMEMORY_TRACE_COMPONENTS",
"");
261 static bool& recursive()
263 static bool _instance =
false;
269 static bool _instance =
true;
290 mpi_trace_gotcha_t::get_initializer() = []() {
293 if(mpi_is_attached || timemory_mpi_finalize_gotcha_wrapper())
311 static void set_attr() {}
315 static auto _instance =
316 tim::get_env<std::string>(
"TIMEMORY_TRACE_COMPONENTS",
"");
322 static bool _instance =
true;
352static std::shared_ptr<mpi_trace_bundle_t> mpi_gotcha_handle{
nullptr };
353static std::map<size_t, string_t> master_hash_ids;
369 library_trace_count.load(std::memory_order_relaxed) > 0);
377 return (get_throttle()->count(_id) > 0);
385 auto itr = get_throttle()->find(_id);
386 if(itr != get_throttle()->
end())
387 get_throttle()->erase(itr);
399 "[timemory-trace]> timemory_add_hash_id failed: locked...\n");
403 fprintf(stderr,
"[timemory-trace]> adding '%s' with hash %lu...\n", name,
410 if(tim::threading::get_id() == 0)
412 master_hash_ids[id] = name;
414 master_hash_ids[_id] = name;
423 fprintf(stderr,
"[timemory-trace]> adding %lu hash ids...\n",
424 (
unsigned long) nentries);
425 for(uint64_t i = 0; i < nentries; ++i)
434 static thread_local bool once_per_thread =
false;
435 if(!once_per_thread && tim::threading::get_id() > 0)
438 fprintf(stderr,
"[timemory-trace]> copying hash ids...\n");
439 once_per_thread =
true;
440 auto _master_ids = master_hash_ids;
441 for(
const auto& itr : _master_ids)
460#if defined(DEBUG) || !defined(NDEBUG)
462 PRINT_HERE(
"Tracing is locked: %s", (lk) ?
"Y" :
"N");
469#if defined(DEBUG) || !defined(NDEBUG)
471 PRINT_HERE(
"Invalid library state: init = %s, fini = %s, enabled = %s",
479 if(get_throttle()->count(
id) > 0)
481#if defined(DEBUG) || !defined(NDEBUG)
483 PRINT_HERE(
"trace %llu is throttled", (
unsigned long long)
id);
488 auto& _trace_map = get_trace_map();
489 auto& _overh_map = *get_overhead();
496 int64_t n = _trace_map[id].size();
500 "beginning trace for '%s' (id = %llu, offset = %lli, rank = %i, pid "
501 "= %i, thread = %i)...\n",
502 name.c_str(), (
long long unsigned)
id, (
long long int) n,
503 tim::dmp::rank(), (int) tim::process::get_id(),
504 (int) tim::threading::get_id());
507 _trace_map[id].emplace_back(
traceset_t{
id });
508 _trace_map[id].back().start();
509 _overh_map[id].first.start();
520 auto& _trace_map = get_trace_map();
525 "[timemory-trace]> timemory_pop_trace_hash(%lu) failed. "
526 "trace_map empty...\n",
531 int64_t ntotal = _trace_map[id].size();
532 int64_t offset = ntotal - 1;
539 "ending trace for '%s' (id = %llu, offset = %lli, rank = %i, pid = "
540 "%i, thread = %i)...\n",
541 name.c_str(), (
long long unsigned)
id, (
long long int) offset,
542 tim::dmp::rank(), (int) tim::process::get_id(),
543 (int) tim::threading::get_id());
546 (*get_overhead())[
id].first.stop();
552 if(offset >= 0 && ntotal > 0)
554 _trace_map[id].back().stop();
555 _trace_map[id].pop_back();
558 if(get_throttle() && get_throttle()->count(
id) > 0)
561 auto _count = ++(get_overhead()->at(
id).second);
565 auto _accum = get_overhead()->at(
id).first.get_accum() / _count;
573 "[timemory-trace]> Throttling all future calls to '%s' on rank = "
575 "%i, thread = %i. avg runtime = %lu ns from %lu invocations... "
576 "Consider eliminating from instrumentation...\n",
577 name.c_str(), tim::dmp::rank(), (
int) tim::process::get_id(),
578 (
int) tim::threading::get_id(), (
unsigned long) _accum,
579 (
unsigned long) _count);
581 get_throttle()->insert(
id);
591 "[timemory-trace]> Warning! function call '%s' within an order "
592 "of magnitude of threshold for throttling value on rank = %i, "
594 "%i, thread = %i. avg runtime = %lu ns from %lu invocations... "
595 "Consider eliminating from instrumentation...\n",
596 name.c_str(), tim::dmp::rank(), (
int) tim::process::get_id(),
597 (
int) tim::threading::get_id(), (
unsigned long) _accum,
598 (
unsigned long) _count);
601 get_overhead()->at(
id).first.reset();
602 get_overhead()->at(
id).second = 0;
617 PRINT_HERE(
"rank = %i, pid = %i, thread = %i, name = %s",
618 tim::dmp::rank(), (
int) tim::process::get_id(),
619 (
int) tim::threading::get_id(), name);
646#if defined(TIMEMORY_MPI_GOTCHA)
648 void timemory_trace_set_mpi(
bool use,
bool attached)
650 use_mpi_gotcha = use;
651 mpi_is_attached = attached;
661 tim::set_env<std::string>(env_var, env_val, 0);
665 user_trace_bundle::global_init(
true);
678 auto _configure_output_path = [&]() {
679 static bool _performed_explicit =
false;
691 _performed_explicit =
true;
693 while(
exe_name.find(
'\\') != std::string::npos)
695 while(
exe_name.find(
'/') != std::string::npos)
698 static const std::vector<std::string> _exe_suffixes = {
".py",
".exe" };
699 for(
const auto& ext : _exe_suffixes)
701 if(
exe_name.find(ext) != std::string::npos)
719 auto _write_info = [&]() {
720 std::stringstream
_info;
721 _info <<
"[timemory_trace_init]> ";
722 if(tim::dmp::is_initialized())
723 _info <<
"rank = " << tim::dmp::rank() <<
", ";
724 _info <<
"pid = " << tim::process::get_id() <<
", ";
725 _info <<
"tid = " << tim::threading::get_id();
726 if(
cmd && strlen(
cmd) > 0)
728 if(comps && strlen(comps) > 0)
729 _info <<
", default components: " << comps;
730 fprintf(stderr,
"%s\n",
_info.str().c_str());
734 auto _configure_components = [&]() {
735 if(comps && strlen(comps) > 0)
738 tim::set_env<std::string>(
"TIMEMORY_TRACE_COMPONENTS", comps, 0);
746 user_trace_bundle::global_init(
true);
753 _configure_components();
754 _configure_output_path();
759 auto _exit_action = [](
int nsig) {
761 if(_manager && !_manager->is_finalized() && !_manager->is_finalizing())
763 std::cout <<
"Finalizing after signal: " << nsig <<
" :: "
772#if !defined(TIMEMORY_MACOS)
778#if defined(TIMEMORY_MPI_GOTCHA)
779 if(!mpi_gotcha_handle.get())
782 std::make_shared<mpi_trace_bundle_t>(
"timemory_trace_mpi_gotcha");
783 mpi_trace_gotcha::get_trace_components() = comps;
785 mpi_trace_gotcha::get_command() = (
cmd) ?
cmd :
"";
787 mpi_trace_gotcha::set_attr();
788 mpi_gotcha_handle->start();
790 else if(mpi_gotcha_handle.get())
802 _configure_components();
811 if(library_trace_count.load() == 0)
821 PRINT_HERE(
"rank = %i, pid = %i, thread = %i", tim::dmp::rank(),
822 (
int) tim::process::get_id(), (
int) tim::threading::get_id());
826 auto _count = --library_trace_count;
853 bool _skip_stop =
false;
854 if(!_manager || _manager->is_finalized())
861 for(
auto& itr : get_trace_map())
863 for(
auto& eitr : itr.second)
870 get_trace_map().clear();
876 mpi_gotcha_handle.reset();
This is a variadic component wrapper where all components are allocated on the stack and cannot be di...
static pointer_t instance()
Get a shared pointer to the instance for the current thread.
static pointer_t master_instance()
Get a shared pointer to the instance on the primary thread.
static void use_exit_hook(bool val)
Enable setting std::exit callback.
static std::string str(const sys_signal &)
static void set_exit_action(signal_function_t _f)
void timemory_push_region(const char *name)
void timemory_pop_region(const char *name)
void timemory_init_library(int argc, char **argv)
Initializes timemory. Not strictly necessary but highly recommended.
void timemory_finalize_library(void)
Finalizes timemory. Output will be generated. Any attempt to store data within timemory storage is un...
#define TIMEMORY_C_GOTCHA(...)
::tim::statistics< Tp > max(::tim::statistics< Tp > lhs, const Tp &rhs)
void read_command_line(Func &&_func)
this only works on Linux where there is a /proc/<PID>/cmdline file
hash_value_t add_hash_id(hash_map_ptr_t &_hash_map, string_view_cref_t _prefix)
add an string to the given hash-map (if it doesn't already exist) and return the hash
hash_map_ptr_t & get_hash_ids()
hash_value_t get_hash_id(Tp &&_prefix)
void reset(TupleT< Tp... > &obj, Args &&... args)
A lightweight synchronization object for preventing recursion. The first template parameter should ha...
std::unique_lock< mutex_t > auto_lock_t
Unique lock type around mutex_t.
void timemory_finalize()
finalization of the specified types
std::vector< std::string > read_command_line(pid_t _pid)
char argparse::argument_parser tim::settings * _settings
tim::mpl::apply< std::string > string
void consume_parameters(ArgsT &&...)
This is a variadic component wrapper which combines the features of tim::component_tuple<T....
The gotcha component rewrites the global offset table such that calling the wrapped function actually...
static void parse(settings *=instance< TIMEMORY_API >())
static settings * instance()
void timemory_trace_init(const char *comps, bool read_command_line, const char *cmd)
std::set< size_t > throttle_set_t
void timemory_push_trace_hash(uint64_t id)
void timemory_push_trace(const char *name)
bool timemory_is_throttled(const char *name)
void timemory_pop_trace(const char *name)
std::unordered_map< size_t, std::pair< wall_clock, size_t > > overhead_map_t
void timemory_trace_set_env(const char *env_var, const char *env_val)
void timemory_pop_trace_hash(uint64_t id)
bool timemory_trace_is_initialized()
std::array< bool, 2 > & get_library_state()
void timemory_copy_hash_ids()
void timemory_trace_finalize(void)
void timemory_add_hash_ids(uint64_t nentries, uint64_t *ids, const char **names)
std::unordered_map< size_t, std::deque< traceset_t > > trace_map_t
void timemory_add_hash_id(uint64_t id, const char *name)
void timemory_reset_throttle(const char *name)
#define CONDITIONAL_PRINT_HERE(CONDITION,...)