timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
flamegraph.hpp
Go to the documentation of this file.
1// MIT License
2//
3// Copyright (c) 2020, The Regents of the University of California,
4// through Lawrence Berkeley National Laboratory (subject to receipt of any
5// required approvals from the U.S. Dept. of Energy). All rights reserved.
6//
7// Permission is hereby granted, free of charge, to any person obtaining a copy
8// of this software and associated documentation files (the "Software"), to deal
9// in the Software without restriction, including without limitation the rights
10// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11// copies of the Software, and to permit persons to whom the Software is
12// furnished to do so, subject to the following conditions:
13//
14// The above copyright notice and this permission notice shall be included in all
15// copies or substantial portions of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23// SOFTWARE.
24
25/**
26 * \file timemory/operations/types/finalize_get.hpp
27 * \brief Definition for various functions for finalize_get in operations
28 */
29
30#pragma once
31
37#include "timemory/units.hpp"
39
40namespace tim
41{
42namespace operation
43{
44namespace finalize
45{
46//
47//--------------------------------------------------------------------------------------//
48//
49template <typename Type>
51{
52 static constexpr bool has_data = true;
53 using storage_type = impl::storage<Type, has_data>;
54 using result_type = typename storage_type::result_array_t;
55 using distrib_type = typename storage_type::dmp_result_t;
56 using result_node = typename storage_type::result_node;
57 using graph_type = typename storage_type::graph_t;
58 using graph_node = typename storage_type::graph_node;
59 using hierarchy_type = typename storage_type::uintvector_t;
60
61 template <typename Up = Type>
62 TIMEMORY_COLD flamegraph(storage_type*, std::string,
64
65 template <typename Up = Type>
66 TIMEMORY_COLD flamegraph(
69};
70//
71//--------------------------------------------------------------------------------------//
72//
73template <typename Type>
74template <typename Up>
77{
78 // auto node_init = dmp::is_initialized();
79 // auto node_size = dmp::size();
80 dmp::barrier();
81 auto node_rank = dmp::rank();
82 auto node_results = _data->dmp_get();
83 dmp::barrier();
84
85 if(node_rank != 0 || node_results.empty())
86 return;
87
88 result_type results;
89 for(auto&& itr : node_results)
90 {
91 for(auto&& nitr : itr)
92 {
93 results.emplace_back(std::move(nitr));
94 }
95 }
96
97 if(results.empty())
98 return;
99
100 using Archive = cereal::MinimalJSONOutputArchive;
101 // using Archive = cereal::PrettyJSONOutputArchive;
103
104 auto outfname = settings::compose_output_filename(_label, ".flamegraph.json");
105
106 if(outfname.length() > 0)
107 {
108 std::ofstream ofs{};
109 if(filepath::open(ofs, outfname))
110 {
111 manager::instance()->add_json_output(_label, outfname);
112 printf("[%s]|%i> Outputting '%s'...\n", _label.c_str(), node_rank,
113 outfname.c_str());
114
115 // ensure write final block during destruction before the file is closed
116 auto oa = policy_type::get(ofs);
117
118 oa->setNextName("traceEvents");
119 oa->startNode();
120 oa->makeArray();
121
122 using value_type = decay_t<decltype(std::declval<const Type>().get())>;
123 using offset_map_t = std::map<int64_t, value_type>;
124 using useoff_map_t = std::map<int64_t, bool>;
125 auto conv = units::usec;
126 offset_map_t total_offset;
127 offset_map_t last_offset;
128 offset_map_t last_value;
129 useoff_map_t use_last;
130 int64_t max_depth = 1;
131
132 for(auto& itr : results)
133 {
134 max_depth = std::max<int64_t>(max_depth, itr.depth() + 1);
135 use_last[itr.depth()] = false;
136 }
137
138 for(auto& itr : results)
139 {
140 auto _prefix = itr.prefix();
141 auto value = itr.data().get() * conv;
142
143 auto litr = last_offset.find(itr.depth());
144 if(litr != last_offset.end())
145 {
146 // for(int64_t i = 0; i < max_depth; ++i)
147 // use_last[i] = false;
148
149 total_offset[itr.depth()] += litr->second;
150
151 for(int64_t i = itr.depth() + 1; i < max_depth; ++i)
152 {
153 // use_last[i] = true;
154 total_offset[i] = total_offset[itr.depth()];
155 last_value[i] = litr->second;
156 auto ditr = last_offset.find(i);
157 if(ditr != last_offset.end())
158 last_offset.erase(ditr);
159 }
160 last_offset.erase(litr);
161 }
162
163 value_type offset = total_offset[itr.depth()];
164 if(use_last[itr.depth()])
165 offset += last_value[itr.depth()] - value;
166
167 oa->startNode();
168
169 // oa->setNextName("args");
170 // oa->startNode();
171 // (*oa)(cereal::make_nvp("detail", _prefix));
172 // (*oa)(cereal::make_nvp("count", itr.data().get_laps()));
173 // (*oa)(cereal::make_nvp("depth", itr.depth()));
174 // (*oa)(cereal::make_nvp("units", itr.data().get_display_unit()));
175 // oa->finishNode();
176
177 string_t _ph = "X";
178 if(_prefix.find(">>>") != std::string::npos)
179 _prefix = _prefix.substr(_prefix.find_first_of(">>>") + 3);
180 if(_prefix.find("|_") != std::string::npos)
181 _prefix = _prefix.substr(_prefix.find_first_of("|_") + 2);
182
183 (*oa)(cereal::make_nvp("dur", value));
184 (*oa)(cereal::make_nvp("name", _prefix));
185 (*oa)(cereal::make_nvp("ph", _ph));
186 (*oa)(cereal::make_nvp("pid", itr.pid()));
187 (*oa)(cereal::make_nvp("tid", itr.tid()));
188 (*oa)(cereal::make_nvp("ts", offset));
189
190 oa->finishNode();
191
192 last_offset[itr.depth()] = value;
193 last_value[itr.depth()] = value;
194 // total_offset[itr.depth()] += value;
195 }
196
197 /*
198 oa->startNode();
199 oa->setNextName("args");
200 oa->startNode();
201 (*oa)(cereal::make_nvp("name", _label));
202 oa->finishNode();
203 string_t _ph = "M";
204 string_t _cat = "";
205 string_t _name = "metric";
206 (*oa)(cereal::make_nvp("cat", _cat));
207 (*oa)(cereal::make_nvp("name", _name));
208 (*oa)(cereal::make_nvp("ph", _ph));
209 (*oa)(cereal::make_nvp("pid", process::get_id()));
210 (*oa)(cereal::make_nvp("tid", 0));
211 (*oa)(cereal::make_nvp("ts", 0));
212 oa->finishNode();
213 */
214
215 oa->finishNode();
216 }
217 if(ofs)
218 ofs << std::endl;
219 ofs.close();
220 }
221}
222//
223//--------------------------------------------------------------------------------------//
224//
225template <typename Type>
226template <typename Up>
229{}
230//
231//--------------------------------------------------------------------------------------//
232//
233} // namespace finalize
234} // namespace operation
235} // namespace tim
static pointer_t instance()
Get a shared pointer to the instance for the current thread.
The declaration for the types for manager without definitions.
bool open(std::ofstream &_ofs, std::string _fpath, Args &&... _args)
Definition: filepath.hpp:207
Definition: kokkosp.cpp:39
max_depth
Definition: settings.cpp:1641
char const std::string & _prefix
Definition: config.cpp:55
std::string string_t
Definition: utility.hpp:98
typename std::decay< T >::type decay_t
Alias template for decay.
Definition: types.hpp:194
typename std::enable_if< B, T >::type enable_if_t
Alias template for enable_if.
Definition: types.hpp:190
tim::mpl::apply< std::string > string
Definition: macros.hpp:53
void finalize()
Definition: types.hpp:119
auto get(const auto_bundle< Tag, Types... > &_obj)
The declaration for the types for operations without definitions.
Include the macros for operations.
Declare the operations types.
typename storage_type::graph_node graph_node
Definition: flamegraph.hpp:58
typename storage_type::result_node result_node
Definition: flamegraph.hpp:56
flamegraph(storage_type *, std::string, enable_if_t< trait::supports_flamegraph< Up >::value, int >=0)
Definition: flamegraph.hpp:75
typename storage_type::graph_t graph_type
Definition: flamegraph.hpp:57
typename storage_type::dmp_result_t distrib_type
Definition: flamegraph.hpp:55
typename storage_type::uintvector_t hierarchy_type
Definition: flamegraph.hpp:59
impl::storage< Type, has_data > storage_type
Definition: flamegraph.hpp:53
typename storage_type::result_array_t result_type
Definition: flamegraph.hpp:54
Provides a static get() function which return a shared pointer to an instance of the given archive fo...
Definition: policy.hpp:136
static string_t compose_output_filename(string_t _tag, string_t _ext, bool _use_suffix=use_output_suffix(), int32_t _suffix=default_process_suffix(), bool _make_dir=false, std::string _explicit={})
Definition: settings.cpp:322
trait that designates a type supports flamegraph output