timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
get.hpp
Go to the documentation of this file.
1// MIT License
2//
3// Copyright (c) 2020, The Regents of the University of California,
4// through Lawrence Berkeley National Laboratory (subject to receipt of any
5// required approvals from the U.S. Dept. of Energy). All rights reserved.
6//
7// Permission is hereby granted, free of charge, to any person obtaining a copy
8// of this software and associated documentation files (the "Software"), to deal
9// in the Software without restriction, including without limitation the rights
10// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11// copies of the Software, and to permit persons to whom the Software is
12// furnished to do so, subject to the following conditions:
13//
14// The above copyright notice and this permission notice shall be included in all
15// copies or substantial portions of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23// SOFTWARE.
24
25/**
26 * \file timemory/operations/types/finalize_get.hpp
27 * \brief Definition for various functions for finalize_get in operations
28 */
29
30#pragma once
31
32#include "timemory/backends/dmp.hpp"
42#include "timemory/tpls/cereal/cereal.hpp"
43
44#include <string>
45#include <vector>
46
47namespace tim
48{
49namespace operation
50{
51namespace finalize
52{
53//
54//--------------------------------------------------------------------------------------//
55//
56template <typename Type>
57struct get<Type, true>
58{
59 static constexpr bool value = true;
60 using storage_type = impl::storage<Type, value>;
61 using result_type = typename storage_type::result_array_t;
62 using distrib_type = typename storage_type::dmp_result_t;
63 using result_node = typename storage_type::result_node;
64 using graph_type = typename storage_type::graph_t;
65 using graph_node = typename storage_type::graph_node;
66 using hierarchy_type = typename storage_type::uintvector_t;
68 using basic_tree_vector_type = std::vector<basic_tree_type>;
70
71 TIMEMORY_DEFAULT_OBJECT(get)
72
73 explicit TIMEMORY_COLD get(storage_type& _storage)
74 : m_storage(&_storage)
75 {}
76
77 explicit TIMEMORY_COLD get(storage_type* _storage)
78 : m_storage(_storage)
79 {}
80
81 TIMEMORY_COLD result_type& operator()(result_type&);
82 TIMEMORY_COLD basic_tree_vector_type& operator()(basic_tree_vector_type&);
83 TIMEMORY_COLD std::vector<basic_tree_vector_type>& operator()(
84 std::vector<basic_tree_vector_type>& _data)
85 {
87 (*this)(_obj);
88 _data.emplace_back(_obj);
89 return _data;
90 }
91
92 template <typename Archive>
94 operator()(Archive&);
95
96 template <typename Archive>
98 operator()(Archive&, metadata);
99
100public:
101 static TIMEMORY_COLD auto get_identifier(const Type& _obj = Type{})
102 {
104 }
105 static TIMEMORY_COLD auto get_label(const Type& _obj = Type{})
106 {
108 }
109 static TIMEMORY_COLD auto get_description(const Type& _obj = Type{})
110 {
112 }
113 static TIMEMORY_COLD auto get_unit(const Type& _obj = Type{})
114 {
116 }
117 static TIMEMORY_COLD auto get_display_unit(const Type& _obj = Type{})
118 {
120 }
121
122private:
123 storage_type* m_storage = nullptr;
124};
125//
126//--------------------------------------------------------------------------------------//
127//
128template <typename Type>
129struct get<Type, false>
130{
131 static constexpr bool value = false;
132 using storage_type = impl::storage<Type, value>;
133
135
136 template <typename Tp>
137 Tp& operator()(Tp&)
138 {}
139};
140//
141//--------------------------------------------------------------------------------------//
142//
143template <typename Type>
146{
147 if(!m_storage)
148 return ret;
149
150 auto& data = *m_storage;
151 bool _thread_scope_only = trait::thread_scope_only<Type>::value;
152 bool _use_tid_prefix = (!settings::collapse_threads() || _thread_scope_only);
153 bool _use_pid_prefix = (!settings::collapse_processes());
154 auto _num_thr_count = manager::get_thread_count();
155 auto _num_pid_count = dmp::size();
156
157 data.m_node_init = dmp::is_initialized();
158 data.m_node_rank = dmp::rank();
159 data.m_node_size = dmp::size();
160
161 //------------------------------------------------------------------------------//
162 //
163 // Compute the thread prefix
164 //
165 //------------------------------------------------------------------------------//
166 auto _get_thread_prefix = [&](const graph_node& itr) {
167 if(!_use_tid_prefix || itr.tid() == std::numeric_limits<uint16_t>::max())
168 return std::string(">>> ");
169
170 // prefix spacing
171 static uint16_t width = 1;
172 if(_num_thr_count > 9)
173 width = std::max(width, (uint16_t)(log10(_num_thr_count) + 1));
174 std::stringstream ss;
175 ss.fill('0');
176 ss << "|" << std::setw(width) << itr.tid() << ">>> ";
177 return ss.str();
178 };
179
180 //------------------------------------------------------------------------------//
181 //
182 // Compute the node prefix
183 //
184 //------------------------------------------------------------------------------//
185 auto _get_node_prefix = [&](const graph_node& itr) {
186 if(!data.m_node_init || !_use_pid_prefix)
187 return _get_thread_prefix(itr);
188
189 auto _nc = settings::node_count(); // node-count
190 auto _idx = data.m_node_rank;
191 auto _range = std::make_pair(-1, -1);
192
193 if(_nc > 0 && _nc < data.m_node_size)
194 {
195 // calculate some size parameters and generate map of the pids to node ids
196 int32_t nmod = _num_pid_count % _nc;
197 int32_t bins = _num_pid_count / _nc + ((nmod == 0) ? 0 : 1);
198 int32_t bsize = _num_pid_count / bins;
199 int32_t ncnt = 0; // current count
200 int32_t midx = 0; // current bin map index
201 std::map<int32_t, std::set<int32_t>> binmap;
202 for(int32_t i = 0; i < _num_pid_count; ++i)
203 {
204 binmap[midx].insert(i);
205 // check to see if we reached the bin size
206 if(++ncnt == bsize)
207 {
208 // set counter to zero and advance the node
209 ncnt = 0;
210 ++midx;
211 }
212 }
213
214 // loop over the bins
215 for(const auto& bitr : binmap)
216 {
217 // if rank is found in a bin, assing range to first and last entry
218 if(bitr.second.find(_idx) != bitr.second.end())
219 {
220 auto vitr = bitr.second.begin();
221 _range.first = *vitr;
222 vitr = bitr.second.end();
223 --vitr;
224 _range.second = *vitr;
225 }
226 }
227
228 if(settings::debug())
229 {
230 std::stringstream ss;
231 for(const auto& bitr : binmap)
232 {
233 ss << ", [" << bitr.first << "] ";
234 std::stringstream bss;
235 for(const auto& nitr : bitr.second)
236 bss << ", " << nitr;
237 ss << bss.str().substr(2);
238 }
239 std::string _msg = "Intervals: ";
240 _msg += ss.str().substr(2);
241 PRINT_HERE("[%s][pid=%i][tid=%i]> %s. range = { %i, %i }",
242 demangle<get<Type, true>>().c_str(), (int) process::get_id(),
243 (int) threading::get_id(), _msg.c_str(), (int) _range.first,
244 (int) _range.second);
245 }
246 }
247
248 // prefix spacing
249 static uint16_t width = 1;
250 if(_num_pid_count > 9)
251 width = std::max(width, (uint16_t)(log10(_num_pid_count) + 1));
252 std::stringstream ss;
253 ss.fill('0');
254 if(_range.first >= 0 && _range.second >= 0)
255 {
256 ss << "|" << std::setw(width) << _range.first << ":" << std::setw(width)
257 << _range.second << _get_thread_prefix(itr);
258 }
259 else
260 {
261 ss << "|" << std::setw(width) << _idx << _get_thread_prefix(itr);
262 }
263 return ss.str();
264 };
265
266 //------------------------------------------------------------------------------//
267 //
268 // Compute the indentation
269 //
270 //------------------------------------------------------------------------------//
271 // fix up the prefix based on the actual depth
272 auto _compute_modified_prefix = [&](const graph_node& itr) {
273 std::string _prefix = data.get_prefix(itr);
274 std::string _indent = {};
275 std::string _node_prefix = _get_node_prefix(itr);
276
277 int64_t _depth = itr.depth() - 1;
278 if(_depth > 0)
279 {
280 for(int64_t ii = 0; ii < _depth - 1; ++ii)
281 _indent += " ";
282 _indent += "|_";
283 }
284
285 return _node_prefix + _indent + _prefix;
286 };
287
288 // convert graph to a vector
289 auto convert_graph = [&]() {
290 result_type _list{};
291 {
292 // the head node should always be ignored
293 int64_t _min = std::numeric_limits<int64_t>::max();
294 auto& _graph = data.graph();
295 for(auto itr = _graph.begin(); itr != _graph.end(); ++itr)
296 {
297 if(!itr)
298 {
299 PRINT_HERE("[%s] Warning! Invalid iterator!",
300 demangle<Type>().c_str());
301 continue;
302 }
303 _min = std::min<int64_t>(_min, itr->depth());
304 }
305
306 for(auto itr = _graph.begin(); itr != _graph.end(); ++itr)
307 {
308 if(!itr)
309 {
310 PRINT_HERE("[%s] Warning! Invalid iterator!",
311 demangle<Type>().c_str());
312 continue;
313 }
314 // skip if invalid
316 continue;
317 if(itr->depth() > _min)
318 {
319 auto _depth = itr->depth() - (_min + 1);
320 auto _prefix = _compute_modified_prefix(*itr);
321 auto _rolling = itr->id();
322 auto _stats = itr->stats();
323 auto _parent = graph_type::parent(itr);
324 auto _hierarchy = hierarchy_type{};
325 auto _tid = itr->tid();
326 auto _pid = itr->pid();
327 while(_parent && _parent->depth() > _min)
328 {
329 if(operation::get_is_invalid<Type, false>{}(_parent->data()))
330 break;
331 _hierarchy.push_back(_parent->id());
332 _rolling += _parent->id();
333 _parent = graph_type::parent(_parent);
334 }
335 if(_hierarchy.size() > 1)
336 std::reverse(_hierarchy.begin(), _hierarchy.end());
337 _hierarchy.push_back(itr->id());
338 auto _entry = result_node(itr->id(), itr->obj(), _prefix, _depth,
339 _rolling, _hierarchy, _stats, _tid, _pid);
340 _list.push_back(std::move(_entry));
341 }
342 }
343 }
344
345 // if collapse is disabled or thread-scope only, there is nothing to merge
346 if(!settings::collapse_threads() || _thread_scope_only)
347 return _list;
348
349 result_type _combined{};
351 return _combined;
352 };
353
354 ret = convert_graph();
355 return ret;
356}
357//
358//--------------------------------------------------------------------------------------//
359//
360template <typename Type>
363{
364 using sibling_iterator = typename graph_type::sibling_iterator;
365
366 if(!m_storage)
367 return bt;
368
369 auto& data = *m_storage;
370 auto& t = data.graph();
371 for(sibling_iterator itr = t.begin(); itr != t.end(); ++itr)
372 bt.push_back(basic_tree_type{}(t, itr));
373
374 bt = merge<Type, true>{}(bt);
375 return bt;
376 // return (trait::thread_scope_only<Type>::value || !settings::collapse_threads())
377 // ? bt
378 // : merge<Type, true>{}(bt);
379}
380//
381//--------------------------------------------------------------------------------------//
382//
383template <typename Type>
384template <typename Archive>
387{
389 return ar;
390}
391//
392//--------------------------------------------------------------------------------------//
393//
394template <typename Type>
395template <typename Archive>
398{
399 if(!m_storage)
400 return ar;
401
402 m_storage->m_node_init = dmp::is_initialized();
403 m_storage->m_node_rank = dmp::rank();
404 m_storage->m_node_size = dmp::size();
405 m_storage->merge();
406
407 auto bt = basic_tree_vector_type{};
408 serialization<Type>{}(ar, bt);
409 return ar;
410}
411//
412//--------------------------------------------------------------------------------------//
413//
414} // namespace finalize
415} // namespace operation
416} // namespace tim
static int32_t get_thread_count()
This effectively provides the total number of threads which collected data. It is only "decremented" ...
Definition: manager.hpp:182
The declaration for the types for manager without definitions.
::tim::statistics< Tp > max(::tim::statistics< Tp > lhs, const Tp &rhs)
Definition: statistics.hpp:320
return false
Definition: definition.hpp:326
Definition: kokkosp.cpp:39
char const std::string & _prefix
Definition: config.cpp:55
node_count
Definition: settings.cpp:1780
collapse_threads
Definition: settings.cpp:1637
typename std::enable_if< B, T >::type enable_if_t
Alias template for enable_if.
Definition: types.hpp:190
std::string demangle(const char *_mangled_name, int *_status=nullptr)
Definition: demangle.hpp:47
tim::mpl::apply< std::string > string
Definition: macros.hpp:53
collapse_processes
Definition: settings.cpp:1639
void finalize()
Definition: types.hpp:119
The declaration for the types for operations without definitions.
Include the macros for operations.
Declare the operations types.
Declare the storage types.
Basic hierarchical tree implementation. Expects population from tim::graph.
Definition: basic_tree.hpp:44
impl::storage< Type, value > storage_type
Definition: get.hpp:132
typename storage_type::result_array_t result_type
Definition: get.hpp:61
typename storage_type::graph_node graph_node
Definition: get.hpp:65
typename storage_type::result_node result_node
Definition: get.hpp:63
std::vector< basic_tree_vector_type > & operator()(std::vector< basic_tree_vector_type > &_data)
Definition: get.hpp:83
get(storage_type *_storage)
Definition: get.hpp:77
static auto get_display_unit(const Type &_obj=Type{})
Definition: get.hpp:117
typename storage_type::uintvector_t hierarchy_type
Definition: get.hpp:66
typename storage_type::dmp_result_t distrib_type
Definition: get.hpp:62
static auto get_description(const Type &_obj=Type{})
Definition: get.hpp:109
static auto get_label(const Type &_obj=Type{})
Definition: get.hpp:105
std::vector< basic_tree_type > basic_tree_vector_type
Definition: get.hpp:68
typename serialization< Type >::metadata metadata
Definition: get.hpp:69
static auto get_unit(const Type &_obj=Type{})
Definition: get.hpp:113
typename storage_type::graph_t graph_type
Definition: get.hpp:64
static auto get_identifier(const Type &_obj=Type{})
Definition: get.hpp:101
impl::storage< Type, value > storage_type
Definition: get.hpp:60
This operation attempts to call a member function which provides whether or not the component is in a...
Definition: types.hpp:673
The purpose of this operation class is to provide a non-template hook to get the object itself.
Definition: get.hpp:54
typename base_type::metadata metadata
trait that signifies the component only has relevant values if it is not collapsed into the master th...
#define PRINT_HERE(...)
Definition: macros.hpp:152