25 #ifndef TIMEMORY_SETTINGS_SETTINGS_CPP_
26 # define TIMEMORY_SETTINGS_SETTINGS_CPP_
29 # include "timemory/backends/dmp.hpp"
30 # include "timemory/defines.h"
34 # include "timemory/tpls/cereal/archives.hpp"
49 template <
typename Archive>
59 template <
typename Archive>
63 ar(cereal::make_nvp(
"settings", _obj));
69 std::shared_ptr<settings>
74 static auto _instance = shared_instance<TIMEMORY_API>();
87 return _instance.get();
94 settings::command_line()
103 settings::environment()
114 # if defined(TIMEMORY_UNIX)
120 _environ.push_back(
environ[idx++]);
124 return std::vector<std::string>();
138 if(std::strftime(mbstr,
sizeof(mbstr), dt_format, std::localtime(dt_curr)))
204 if(
_dir.find(_local_datetime) == std::string::npos)
208 _dir += _local_datetime;
224 auto& _cmdline = command_line();
226 for(
int i = 0; i < argc; ++i)
235 bool _mpi_init,
const int32_t _mpi_rank,
bool fake,
245 auto only_ascii = [](
char c) {
return !isascii(c); };
251 if(!_explicit.empty())
259 auto _rank_suffix = (_mpi_init && _mpi_rank >= 0)
264 if(_ext.find(
'.') != 0)
266 auto plast =
static_cast<intmax_t
>(
_prefix.length()) - 1;
272 using strpairvec_t = std::vector<std::pair<std::string, std::string>>;
273 for(
auto&& itr : strpairvec_t{ {
"--",
"-" }, {
"__",
"_" }, {
"//",
"/" } })
275 auto pos = std::string::npos;
276 while((
pos = fpath.find(itr.first)) != std::string::npos)
277 fpath.replace(
pos, itr.first.length(), itr.second);
279 return filepath::osrepr(fpath);
287 bool _mpi_init,
const int32_t _mpi_rank,
298 auto only_ascii = [](
char c) {
return !isascii(c); };
303 if(_explicit.length() > 0)
306 auto _rank_suffix = (_mpi_init && _mpi_rank >= 0)
309 if(_ext.find(
'.') != 0)
311 auto plast =
_prefix.length() - 1;
315 while(fpath.find(
"//") != std::string::npos)
316 fpath.replace(fpath.find(
"//"), 2,
"/");
317 return std::move(fpath);
350 static auto _once =
false;
353 PRINT_HERE(
"%s",
"settings parsing has been suppressed");
380 , m_order(rhs.m_order)
381 , m_command_line(rhs.m_command_line)
382 , m_environment(rhs.m_environment)
384 for(
auto& itr : rhs.m_data)
385 m_data.emplace(itr.first, itr.second->clone());
386 for(
auto& itr : m_order)
388 if(m_data.find(itr) == m_data.end())
390 auto ritr = rhs.m_data.find(itr);
391 if(ritr == rhs.m_data.end())
397 m_data.emplace(itr, ritr->second->clone());
413 for(
auto& itr : rhs.m_data)
414 m_data[itr.first] = itr.second->clone();
415 m_order = rhs.m_order;
416 m_command_line = rhs.m_command_line;
417 m_environment = rhs.m_environment;
418 for(
auto& itr : m_order)
420 if(m_data.find(itr) == m_data.end())
422 auto ritr = rhs.m_data.find(itr);
423 if(ritr == rhs.m_data.end())
429 m_data.emplace(itr, ritr->second->clone());
440 settings::initialize_core()
443 auto homedir = get_env<string_t>(
"HOME");
447 "Configuration file for timemory",
456 "Disable processing of setting configuration files",
false,
457 strvector_t({
"--timemory-suppress-config",
"--timemory-no-config" }));
461 "Disable parsing environment",
false,
462 strvector_t({
"--timemory-suppress-parsing" }), -1, 1);
469 "Verbosity level", 0,
473 "Enable debug output",
false,
478 "Set the label hierarchy mode to default to flat",
480 strvector_t({
"--timemory-flat-profile" }), -1, 1);
484 "Set the label hierarchy mode to default to timeline",
486 strvector_t({
"--timemory-timeline-profile" }), -1, 1);
490 "Set the maximum depth of label hierarchy reporting",
491 std::numeric_limits<uint16_t>::max(),
strvector_t({
"--timemory-max-depth" }), 1);
498 settings::initialize_components()
503 "A specification of components which is used by multiple variadic bundlers and "
504 "user_bundles as the fall-back set of components if their specific variable is "
505 "not set. E.g. user_mpip_bundle will use this if TIMEMORY_MPIP_COMPONENTS is not "
507 "",
strvector_t({
"--timemory-global-components" }));
511 "A specification of components which will be added "
512 "to structures containing the 'user_ompt_bundle'. Priority: TRACE_COMPONENTS -> "
513 "PROFILER_COMPONENTS -> COMPONENTS -> GLOBAL_COMPONENTS",
514 "",
strvector_t({
"--timemory-ompt-components" }));
518 "A specification of components which will be added "
519 "to structures containing the 'user_mpip_bundle'. Priority: TRACE_COMPONENTS -> "
520 "PROFILER_COMPONENTS -> COMPONENTS -> GLOBAL_COMPONENTS",
521 "",
strvector_t({
"--timemory-mpip-components" }));
525 "A specification of components which will be added "
526 "to structures containing the 'user_ncclp_bundle'. Priority: MPIP_COMPONENTS -> "
527 "TRACE_COMPONENTS -> PROFILER_COMPONENTS -> COMPONENTS -> GLOBAL_COMPONENTS",
528 "",
strvector_t({
"--timemory-ncclp-components" }));
532 "A specification of components which will be used by the interfaces which are "
533 "designed for full profiling. These components will be subjected to throttling. "
534 "Priority: COMPONENTS -> GLOBAL_COMPONENTS",
535 "",
strvector_t({
"--timemory-trace-components" }));
539 "A specification of components which will be used by the interfaces which are "
540 "designed for full python profiling. This specification will be overridden by a "
541 "trace_components specification. Priority: COMPONENTS -> GLOBAL_COMPONENTS",
542 "",
strvector_t({
"--timemory-profiler-components" }));
546 "A specification of components which will be used by the interfaces which are "
547 "designed for kokkos profiling. Priority: TRACE_COMPONENTS -> "
548 "PROFILER_COMPONENTS -> COMPONENTS -> GLOBAL_COMPONENTS",
549 "",
strvector_t({
"--timemory-kokkos-components" }));
553 "A specification of components which is used by the library interface. This "
554 "falls back to TIMEMORY_GLOBAL_COMPONENTS.",
562 settings::initialize_io()
567 "Generate output at application termination",
true,
572 true,
strvector_t({
"--timemory-cout-output" }), -1, 1);
576 true,
strvector_t({
"--timemory-file-output" }), -1, 1);
580 "Write text output files",
true,
585 "Write json output files",
true,
590 "Write hierarchical json output files",
true,
595 "Write dart measurements for CDash",
false,
600 "Output data to subfolder w/ a timestamp (see also: TIMEMORY_TIME_FORMAT)",
false,
610 "Generate a difference output vs. a pre-existing output (see also: "
611 "TIMEMORY_INPUT_PATH and TIMEMORY_INPUT_PREFIX)",
612 false,
strvector_t({
"--timemory-diff-output" }), -1, 1);
616 "Write a json output for flamegraph visualization (use chrome://tracing)",
true,
617 strvector_t({
"--timemory-flamegraph-output" }), -1, 1);
621 "Write a CTestNotes.txt for each text output",
false,
626 "Explicitly specify the output folder for results",
"timemory-output",
631 "Explicitly specify a prefix for all output files",
637 "Explicitly specify the input folder for difference "
638 "comparisons (see also: TIMEMORY_DIFF_OUTPUT)",
643 "Explicitly specify the prefix for input files used in difference comparisons "
644 "(see also: TIMEMORY_DIFF_OUTPUT)",
645 "",
strvector_t({
"--timemory-input-prefix" }), 1);
649 "File extensions used when searching for input files used in difference "
650 "comparisons (see also: TIMEMORY_DIFF_OUTPUT)",
651 "json,xml",
strvector_t({
"--timemory-input-extensions" }));
658 settings::initialize_format()
663 "Customize the folder generation when TIMEMORY_TIME_OUTPUT is enabled (see also: "
665 "%F_%I.%M_%p",
strvector_t({
"--timemory-time-format" }), 1);
669 "Set the global output precision for components",
673 "Set the global output width for components", -1,
678 "Set the maximum width for component label outputs",
683 "Set the global numerical reporting to scientific format",
false,
688 "Set the precision for components with 'is_timing_category' type-trait", -1);
692 "Set the output width for components with 'is_timing_category' type-trait", -1);
696 "Set the units for components with 'uses_timing_units' type-trait",
"",
701 "Set the numerical reporting format for components "
702 "with 'is_timing_category' type-trait",
707 "Set the precision for components with 'is_memory_category' type-trait", -1);
711 "Set the output width for components with 'is_memory_category' type-trait", -1);
715 "Set the units for components with 'uses_memory_units' type-trait",
"",
720 "Set the numerical reporting format for components "
721 "with 'is_memory_category' type-trait",
726 "Frequency of dashed separator lines in text output", 0);
733 settings::initialize_parallel()
738 "Maximum number of times a worker thread bookmarks the call-graph location w.r.t."
739 " the master thread. Higher values tend to increase the finalization merge time",
744 "Enable/disable combining thread-specific data",
true,
745 strvector_t({
"--timemory-collapse-threads" }), -1, 1);
749 "Enable/disable combining process-specific data",
true,
750 strvector_t({
"--timemory-collapse-processes" }), -1, 1);
754 "Enable pinning threads to CPUs (Linux-only)",
false,
755 strvector_t({
"--timemory-cpu-affinity" }), -1, 1);
759 "Process ID for the components which require this", process::get_target_id());
763 "Enable/disable timemory calling MPI_Init / MPI_Init_thread during certain "
764 "timemory_init(...) invocations",
765 false,
strvector_t({
"--timemory-mpi-init" }), -1, 1);
769 "Enable/disable timemory calling MPI_Finalize during "
770 "timemory_finalize(...) invocations",
771 false,
strvector_t({
"--timemory-mpi-finalize" }), -1, 1);
775 "Call MPI_Init_thread instead of MPI_Init (see also: TIMEMORY_MPI_INIT)",
776 mpi::use_mpi_thread(),
strvector_t({
"--timemory-mpi-thread" }), -1, 1);
780 "MPI_Init_thread mode: 'single', 'serialized', "
781 "'funneled', or 'multiple' (see also: "
782 "TIMEMORY_MPI_INIT and TIMEMORY_MPI_THREAD)",
783 mpi::use_mpi_thread_type(),
strvector_t({
"--timemory-mpi-thread-type" }), 1);
787 "Enable/disable timemory calling upcxx::init() during certain "
788 "timemory_init(...) invocations",
789 false,
strvector_t({
"--timemory-upcxx-init" }), -1, 1);
793 "Enable/disable timemory calling upcxx::finalize() during "
794 "timemory_finalize()",
795 false,
strvector_t({
"--timemory-upcxx-finalize" }), -1, 1);
799 "Total number of nodes used in application. Setting this value > 1 will result "
800 "in aggregating N processes into groups of N / NODE_COUNT",
808 settings::initialize_tpls()
813 "Enable multithreading support when using PAPI",
true,
814 strvector_t({
"--timemory-papi-threading" }), -1, 1);
818 "Enable multiplexing when using PAPI",
false,
819 strvector_t({
"--timemory-papi-multiplexing" }), -1, 1);
823 "Configure PAPI errors to trigger a runtime error",
false,
824 strvector_t({
"--timemory-papi-fail-on-error" }), -1, 1);
828 "Configure suppression of reporting PAPI errors/warnings",
false,
833 "PAPI presets and events to collect (see also: papi_avail)",
"",
838 "Configure PAPI to attach to another process (see also: TIMEMORY_TARGET_PID)",
843 "Value at which PAPI hw counters trigger an overflow callback", 0,
848 "Batch size for create cudaEvent_t in cuda_event components", 5);
852 "Use cudaDeviceSync when stopping NVTX marker (vs. cudaStreamSychronize)",
true);
856 "Default group of kinds tracked via CUpti Activity API", 1,
857 strvector_t({
"--timemory-cupti-activity-level" }), 1);
861 "Specific cupti activity kinds to track",
"",
862 strvector_t({
"--timemory-cupti-activity-kinds" }));
866 "Hardware counter event types to collect on NVIDIA GPUs",
"",
871 "Hardware counter metric types to collect on NVIDIA GPUs",
"",
876 "Target device for CUPTI data collection", 0,
879 insert<int>(
"TIMEMORY_CUPTI_PCSAMPLING_PERIOD",
"cupti_pcsampling_period",
880 "The period for PC sampling. Must be >= 5 and <= 31", 8,
881 strvector_t{
"--timemory-cupti-pcsampling-period" });
884 "TIMEMORY_CUPTI_PCSAMPLING_PER_LINE",
"cupti_pcsampling_per_line",
885 "Report the PC samples per-line or collapse into one entry for entire function",
886 false,
strvector_t{
"--timemory-cupti-pcsampling-per-line" });
889 "TIMEMORY_CUPTI_PCSAMPLING_REGION_TOTALS",
"cupti_pcsampling_region_totals",
890 "When enabled, region markers will report total samples from all child functions",
891 true,
strvector_t{
"--timemory-cupti-pcsampling-region-totals" });
893 insert<bool>(
"TIMEMORY_CUPTI_PCSAMPLING_SERIALIZED",
"cupti_pcsampling_serialized",
894 "Serialize all the kernel functions",
false,
895 strvector_t{
"--timemory-cupti-pcsampling-serialize" });
897 insert<size_t>(
"TIMEMORY_CUPTI_PCSAMPLING_NUM_COLLECT",
898 "cupti_pcsampling_num_collect",
"Number of PCs to be collected",
900 strvector_t{
"--timemory-cupti-pcsampling-num-collect" });
902 insert<std::string>(
"TIMEMORY_CUPTI_PCSAMPLING_STALL_REASONS",
903 "cupti_pcsampling_stall_reasons",
904 "The PC sampling stall reasons to count",
std::string{},
905 strvector_t{
"--timemory-cupti-pcsampling-stall-reasons" });
909 "Configure the CrayPAT categories to collect",
910 get_env<std::string>(
"PAT_RT_PERFCTR",
""))
922 settings::initialize_roofline()
927 "Configure the roofline collection mode. Options: 'op' 'ai'.",
"op",
932 "Configure the roofline collection mode for CPU specifically. Options: 'op', "
934 "op",
strvector_t({
"--timemory-cpu-roofline-mode" }), 1, 1,
939 "Configure the roofline collection mode for GPU specifically. Options: 'op' "
941 static_cast<tsettings<string_t>*
>(
944 strvector_t({
"--timemory-gpu-roofline-mode" }), 1, 1,
949 "Configure custom hw counters to add to the cpu roofline",
"");
953 "Configure custom hw counters to add to the gpu roofline",
"");
957 "Configure roofline labels/descriptions/output-files "
958 "encode the list of data types",
963 "Configure labels, etc. for the roofline components "
964 "for CPU (see also: TIMEMORY_ROOFLINE_TYPE_LABELS)",
965 static_cast<tsettings<bool>*
>(
971 "Configure labels, etc. for the roofline components "
972 "for GPU (see also: TIMEMORY_ROOFLINE_TYPE_LABELS)",
973 static_cast<tsettings<bool>*
>(
979 "Configure the roofline to include the hw counters "
980 "required for generating an instruction roofline",
987 settings::initialize_miscellaneous()
992 "Enable/disable components adding secondary (child) entries when available. E.g. "
993 "suppress individual CUDA kernels, etc. when using Cupti components",
994 true,
strvector_t({
"--timemory-add-secondary" }), -1, 1);
998 "Minimum number of laps before checking whether a key should be throttled", 10000,
1003 "Average call time in nanoseconds when # laps > throttle_count that triggers "
1005 10000,
strvector_t({
"--timemory-throttle-value" }), 1);
1009 "Enable signals in timemory_init",
false,
1010 strvector_t({
"--timemory-enable-signal-handler" }), -1, 1)
1014 "Allow signal handling to be activated", signal_settings::allow(),
1015 strvector_t({
"--timemory-allow-signal-handler" }), -1, 1);
1027 "Configure default setting for auto_{list,tuple,hybrid} to write to stdout during"
1028 " destruction of the bundle",
1029 false,
strvector_t({
"--timemory-destructor-report" }), -1, 1);
1033 "Enable/disable stopping any markers still running during finalization",
true,
1034 strvector_t({
"--timemory-stack-clearing" }), -1, 1);
1038 "Notify about tim::manager creation and destruction",
1044 strvector_t({
"--timemory-network-interface" }), -1, 1);
1051 settings::initialize_ert()
1056 "Number of threads to use when running ERT", 0);
1060 "Number of threads to use when running ERT on CPU",
1061 std::thread::hardware_concurrency());
1065 "Number of threads which launch kernels when running ERT on the GPU", 1);
1069 "Number of streams to use when launching kernels in ERT on the GPU", 1);
1073 "Configure the grid size (number of blocks) for ERT on GPU (0 == auto-compute)",
1078 "Configure the block size (number of threads per block) for ERT on GPU", 1024);
1082 "Configure the alignment (in bits) when running ERT on CPU (0 == 8 * sizeof(T))",
1087 "Configure the minimum working size when running ERT (0 == device specific)", 0);
1092 "Configure the minimum working size when running ERT on CPU", 64);
1097 "Configure the minimum working size when running ERT on GPU", 10 * 1000 * 1000);
1101 "Configure the max data size when running ERT on CPU", 0);
1105 "Configure the max data size when running ERT on CPU", 0);
1109 "Configure the max data size when running ERT on GPU", 500 * 1000 * 1000);
1113 "Skip these number of ops (i.e. ERT_FLOPS) when were set at compile time",
"");
1120 settings::initialize_dart()
1125 "Only echo this measurement type (see also: TIMEMORY_DART_OUTPUT)",
"",
1130 "Only echo this number of dart tags (see also: TIMEMORY_DART_OUTPUT)", 1,
1135 "Echo the category instead of the label (see also: TIMEMORY_DART_OUTPUT)",
true,
1147 m_data.reserve(160);
1150 initialize_components();
1152 initialize_format();
1153 initialize_parallel();
1155 initialize_roofline();
1156 initialize_miscellaneous();
1167 std::ifstream ifs(inp);
1172 return read(ifs, inp);
1181 if(inp.find(
".json") != std::string::npos || inp ==
"json")
1187 ia->setNextName(
"timemory");
1192 ia->setNextName(
"metadata");
1195 (*ia)(cereal::make_nvp(
"settings", *
this));
1200 (*ia)(cereal::make_nvp(
"settings", *
this));
1204 }
catch(tim::cereal::Exception& e)
1206 PRINT_HERE(
"Exception reading %s :: %s", inp.c_str(), e.what());
1207 # if defined(TIMEMORY_INTERNAL_TESTING)
1214 # if defined(TIMEMORY_USE_XML)
1215 else if(inp.find(
".xml") != std::string::npos || inp ==
"xml")
1219 ia->setNextName(
"timemory");
1224 ia->setNextName(
"metadata");
1227 (*ia)(cereal::make_nvp(
"settings", *
this));
1232 (*ia)(cereal::make_nvp(
"settings", *
this));
1249 for(
const auto& itr : s)
1251 if(std::isprint(itr))
1252 return (itr ==
'#') ? true :
false;
1262 std::getline(ifs, line);
1265 if(is_comment(line))
1270 if(delim.size() > 0)
1275 for(
size_t i = 1; i < delim.size(); ++i)
1276 val +=
"," + delim.at(i);
1278 if(val.length() > 0)
1279 val = val.substr(1);
1281 for(
auto itr : *
this)
1283 if(itr.second->matches(key))
1285 if(get_debug() || get_verbose() > 0)
1286 fprintf(stderr,
"[timemory::settings]['%s']> %-30s :: %s\n",
1287 inp.c_str(), key.c_str(), val.c_str());
1289 itr.second->parse(val);
1296 "[timemory::settings]['%s']> WARNING! Unknown setting "
1297 "ignored: '%s' (value = '%s')\n",
1298 inp.c_str(), key.c_str(), val.c_str());
1302 return (expected == valid);
1501 # include "timemory/tpls/cereal/archives.hpp"
#define TIMEMORY_DEFAULT_ENABLED
#define TIMEMORY_DEFAULT_PLOTTING
#define TIMEMORY_PYTHON_PLOTTER
static bool & enable_all()
static bool & disable_all()
const string_t const string_t & _dir
input_type & get_fields()
TIMEMORY_SETTINGS_MEMBER_DEF(string_t, config_file, TIMEMORY_SETTINGS_KEY("CONFIG_FILE")) TIMEMORY_SETTINGS_MEMBER_DEF(bool
char const std::string & _prefix
TIMEMORY_SETTINGS_INLINE std::string get_local_datetime(const char *dt_format, std::time_t *dt_curr)
char argparse::argument_parser tim::settings * _settings
tim::mpl::apply< std::string > string
TIMEMORY_SETTINGS_KEY("SUPPRESS_PARSING")) TIMEMORY_SETTINGS_MEMBER_DEF(bool
int makedir(std::string _dir, int umask)
auto get(const auto_bundle< Tag, Types... > &_obj)
ContainerT delimit(const std::string &line, const std::string &delimiters="\"',;: ", PredicateT &&predicate=[](const std::string &s) -> std::string { return s;})
#define TIMEMORY_SETTINGS_MEMBER_IMPL(TYPE, FUNC, ENV_VAR, DESC, INIT)
#define TIMEMORY_SETTINGS_HIDDEN_MEMBER_ARG_IMPL(TYPE, ENV_VAR, DESC, INIT,...)
#define TIMEMORY_SETTINGS_INLINE
#define TIMEMORY_SETTINGS_MEMBER_ARG_IMPL(TYPE, FUNC, ENV_VAR, DESC, INIT,...)
#define TIMEMORY_SETTINGS_EXTERN_TEMPLATE(...)
#define TIMEMORY_SETTINGS_REFERENCE_ARG_IMPL(TYPE, FUNC, ENV_VAR, DESC, INIT,...)
#define TIMEMORY_SETTINGS_REFERENCE_IMPL(TYPE, FUNC, ENV_VAR, DESC, INIT)
#define TIMEMORY_SETTINGS_REFERENCE_DEF(TYPE, FUNC, ENV_VAR)
Declare the settings types.
arg_result get(size_t _idx, Tp &_value)
bool read(const string_t &)
read a configuration file
static pointer_t shared_instance()
static void store_command_line(int argc, char **argv)
strvector_t & get_environment()
Tp get(Sp &&_key, bool _exact=true)
static std::time_t * get_launch_time(Tag={})
static string_t compose_output_filename(const string_t &_tag, string_t _ext, bool _mpi_init=false, int32_t _mpi_rank=-1, bool fake=false, std::string _explicit="")
static void serialize_settings(Archive &)
static string_t toupper(string_t str)
static string_t get_global_input_prefix()
static string_t compose_input_filename(const string_t &_tag, string_t _ext, bool _mpi_init=false, int32_t _mpi_rank=-1, std::string _explicit="")
std::unordered_map< string_view_t, value_type > data_type
static string_t get_global_output_prefix(bool fake=false)
settings & operator=(const settings &)
static void parse(settings *=instance< TIMEMORY_API >())
static strvector_t get_global_environment()
static settings * instance()
std::vector< std::string > strvector_t
static string_t tolower(string_t str)
The declaration for the types for utility without definitions.
#define TIMEMORY_CONDITIONAL_DEMANGLED_BACKTRACE(CONDITION, DEPTH)
#define TIMEMORY_EXCEPTION(...)
#define TIMEMORY_JOIN(delim,...)