timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
declaration.hpp
Go to the documentation of this file.
1// MIT License
2//
3// Copyright (c) 2020, The Regents of the University of California,
4// through Lawrence Berkeley National Laboratory (subject to receipt of any
5// required approvals from the U.S. Dept. of Energy). All rights reserved.
6//
7// Permission is hereby granted, free of charge, to any person obtaining a copy
8// of this software and associated documentation files (the "Software"), to deal
9// in the Software without restriction, including without limitation the rights
10// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11// copies of the Software, and to permit persons to whom the Software is
12// furnished to do so, subject to the following conditions:
13//
14// The above copyright notice and this permission notice shall be included in all
15// copies or substantial portions of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23// SOFTWARE.
24
25/**
26 * \file timemory/storage/declaration.hpp
27 * \brief The declaration for the types for storage without definitions
28 */
29
30#pragma once
31
32#include "timemory/backends/dmp.hpp"
33#include "timemory/backends/gperftools.hpp"
34#include "timemory/backends/threading.hpp"
48#include "timemory/tpls/cereal/cereal.hpp"
53
54#include <atomic>
55#include <cstdint>
56#include <memory>
57#include <tuple>
58#include <type_traits>
59#include <unordered_map>
60#include <unordered_set>
61
62namespace tim
63{
64//
65//--------------------------------------------------------------------------------------//
66//
67template <typename Tp>
68TIMEMORY_NOINLINE storage_singleton<Tp>*
70//
71//--------------------------------------------------------------------------------------//
72//
73template <typename Tp>
74storage_singleton<Tp>*
76{
77 using singleton_type = tim::storage_singleton<Tp>;
78 using component_type = typename Tp::component_type;
79 static auto _instance = (trait::runtime_enabled<component_type>::get())
80 ? std::make_unique<singleton_type>()
81 : std::unique_ptr<singleton_type>{};
82 static auto _dtor = scope::destructor{ []() { _instance.reset(); } };
83 return _instance.get();
84 consume_parameters(_dtor);
85}
86//
87//--------------------------------------------------------------------------------------//
88//
89namespace impl
90{
91//
92//--------------------------------------------------------------------------------------//
93//
94// impl::storage<Tp, true>
95//
96//--------------------------------------------------------------------------------------//
97//
98template <typename Type>
99class storage<Type, true> : public base::storage
100{
101public:
102 //----------------------------------------------------------------------------------//
103 //
104 static constexpr bool has_data_v = true;
105
106 template <typename KeyT, typename MappedT>
107 using uomap_t = std::unordered_map<KeyT, MappedT>;
108
109 using result_node = node::result<Type>;
110 using graph_node = node::graph<Type>;
111 using strvector_t = std::vector<string_t>;
112 using uintvector_t = std::vector<uint64_t>;
113 using EmptyT = std::tuple<>;
114 using base_type = base::storage;
115 using component_type = Type;
116 using this_type = storage<Type, has_data_v>;
117 using smart_pointer = std::unique_ptr<this_type, impl::storage_deleter<this_type>>;
118 using singleton_t = singleton<this_type, smart_pointer>;
119 using singleton_type = singleton_t;
120 using pointer = typename singleton_t::pointer;
121 using auto_lock_t = typename singleton_t::auto_lock_t;
122 using node_type = typename node::data<Type>::node_type;
123 using stats_type = typename node::data<Type>::stats_type;
124 using result_type = typename node::data<Type>::result_type;
125 using result_array_t = std::vector<result_node>;
126 using dmp_result_t = std::vector<result_array_t>;
127 using printer_t = operation::finalize::print<Type, has_data_v>;
128 using sample_array_t = std::vector<Type>;
129 using graph_node_t = graph_node;
130 using graph_data_t = graph_data<graph_node_t>;
131 using graph_t = typename graph_data_t::graph_t;
132 using graph_type = graph_t;
133 using iterator = typename graph_type::iterator;
134 using const_iterator = typename graph_type::const_iterator;
135
136 template <typename Vp>
137 using secondary_data_t = std::tuple<iterator, const std::string&, Vp>;
138 using iterator_hash_submap_t = uomap_t<int64_t, iterator>;
139 using iterator_hash_map_t = uomap_t<int64_t, iterator_hash_submap_t>;
140
141 friend class tim::manager;
142 friend struct node::result<Type>;
143 friend struct node::graph<Type>;
144 friend struct impl::storage_deleter<this_type>;
145 friend struct operation::finalize::get<Type, has_data_v>;
146 friend struct operation::finalize::mpi_get<Type, has_data_v>;
147 friend struct operation::finalize::upc_get<Type, has_data_v>;
148 friend struct operation::finalize::dmp_get<Type, has_data_v>;
149 friend struct operation::finalize::print<Type, has_data_v>;
150 friend struct operation::finalize::merge<Type, has_data_v>;
151
152public:
153 // static functions
154 static pointer instance();
155 static pointer master_instance();
156 static pointer noninit_instance();
157 static pointer noninit_master_instance();
158
159 static bool& master_is_finalizing();
160 static bool& worker_is_finalizing();
161 static bool is_finalizing();
162
163private:
164 static singleton_t* get_singleton() { return get_storage_singleton<this_type>(); }
165 static std::atomic<int64_t>& instance_count();
166
167public:
168public:
169 storage();
170 ~storage() override;
171
172 storage(const this_type&) = delete;
173 storage(this_type&&) = delete;
174
175 this_type& operator=(const this_type&) = delete;
176 this_type& operator=(this_type&& rhs) = delete;
177
178public:
179 void get_shared_manager();
180
181 void print() final { internal_print(); }
182 void cleanup() final { operation::cleanup<Type>{}; }
183 void disable() final { trait::runtime_enabled<component_type>::set(false); }
184 void initialize() final;
185 void finalize() final;
186 void stack_clear() final;
187 bool global_init() final;
188 bool thread_init() final;
189 bool data_init() final;
190
191 const graph_data_t& data() const;
192 const graph_t& graph() const;
193 int64_t depth() const;
194 graph_data_t& data();
195 graph_t& graph();
196 iterator& current();
197
198 void reset();
199 inline bool empty() const
200 {
201 return (m_graph_data_instance) ? (_data().graph().size() <= 1) : true;
202 }
203 inline size_t size() const
204 {
205 return (m_graph_data_instance) ? (_data().graph().size() - 1) : 0;
206 }
207 inline size_t true_size() const
208 {
209 if(!m_graph_data_instance)
210 return 0;
211 size_t _sz = _data().graph().size();
212 size_t _dc = _data().dummy_count();
213 return (_dc < _sz) ? (_sz - _dc) : 0;
214 }
215 iterator pop();
216 result_array_t get();
217 dmp_result_t mpi_get();
218 dmp_result_t upc_get();
219 dmp_result_t dmp_get();
220
221 template <typename Tp>
222 Tp& get(Tp&);
223 template <typename Tp>
224 Tp& mpi_get(Tp&);
225 template <typename Tp>
226 Tp& upc_get(Tp&);
227 template <typename Tp>
228 Tp& dmp_get(Tp&);
229
230 std::shared_ptr<printer_t> get_printer() const { return m_printer; }
231
232 iterator_hash_map_t get_node_ids() const { return m_node_ids; }
233
234 void stack_push(Type* obj) { m_stack.insert(obj); }
235 void stack_pop(Type* obj);
236
237 void ensure_init();
238
239 iterator insert(scope::config scope_data, const Type& obj, uint64_t hash_id);
240
241 // append a value to the the graph
242 template <typename Vp, enable_if_t<!std::is_same<decay_t<Vp>, Type>::value, int> = 0>
243 iterator append(const secondary_data_t<Vp>& _secondary);
244
245 // append an instance to the graph
246 template <typename Vp, enable_if_t<std::is_same<decay_t<Vp>, Type>::value, int> = 0>
247 iterator append(const secondary_data_t<Vp>& _secondary);
248
249 template <typename Archive>
250 void serialize(Archive& ar, unsigned int version);
251
252 void add_sample(Type&& _obj) { m_samples.emplace_back(std::forward<Type>(_obj)); }
253
254 auto& get_samples() { return m_samples; }
255 const auto& get_samples() const { return m_samples; }
256
257protected:
258 iterator insert_tree(uint64_t hash_id, const Type& obj, uint64_t hash_depth);
259 iterator insert_timeline(uint64_t hash_id, const Type& obj, uint64_t hash_depth);
260 iterator insert_flat(uint64_t hash_id, const Type& obj, uint64_t hash_depth);
261 iterator insert_hierarchy(uint64_t hash_id, const Type& obj, uint64_t hash_depth,
262 bool has_head);
263
264 void merge();
265 void merge(this_type* itr);
266 string_t get_prefix(const graph_node&);
267 string_t get_prefix(iterator _node) { return get_prefix(*_node); }
268 string_t get_prefix(const uint64_t& _id);
269
270private:
271 void check_consistency();
272
273 template <typename Archive>
274 void do_serialize(Archive& ar);
275
276 void internal_print();
277
278 graph_data_t& _data();
279 const graph_data_t& _data() const
280 {
281 using type_t = decay_t<remove_pointer_t<decltype(this)>>;
282 return const_cast<type_t*>(this)->_data();
283 }
284
285private:
286 uint64_t m_timeline_counter = 1;
287 mutable graph_data_t* m_graph_data_instance = nullptr;
288 iterator_hash_map_t m_node_ids;
289 std::unordered_set<Type*> m_stack;
290 std::shared_ptr<printer_t> m_printer;
291 sample_array_t m_samples;
292};
293//
294//--------------------------------------------------------------------------------------//
295//
296template <typename Type>
297void
299{
300 // have the data graph erase all children of the head node
301 if(m_graph_data_instance)
302 m_graph_data_instance->reset();
303 // erase all the cached iterators except for m_node_ids[0][0]
304 for(auto& ditr : m_node_ids)
305 {
306 auto _depth = ditr.first;
307 if(_depth != 0)
308 {
309 ditr.second.clear();
310 }
311 else
312 {
313 for(auto itr = ditr.second.begin(); itr != ditr.second.end(); ++itr)
314 {
315 if(itr->first != 0)
316 ditr.second.erase(itr);
317 }
318 }
319 }
320}
321//
322//--------------------------------------------------------------------------------------//
323//
324template <typename Type>
326storage<Type, true>::insert(scope::config scope_data, const Type& obj, uint64_t hash_id)
327{
328 ensure_init();
329
330 using force_tree_t = trait::tree_storage<Type>;
331 using force_flat_t = trait::flat_storage<Type>;
332 using force_time_t = trait::timeline_storage<Type>;
333
334 // if data is all the way up to the zeroth (relative) depth then worker
335 // threads should insert a new dummy at the current master thread id and depth.
336 // Be aware, this changes 'm_current' inside the data graph
337 //
338 if(!m_is_master && _data().at_sea_level() &&
339 _data().dummy_count() < m_settings->get_max_thread_bookmarks())
340 _data().add_dummy();
341
342 // compute the insertion depth
343 auto hash_depth = scope_data.compute_depth<force_tree_t, force_flat_t, force_time_t>(
344 _data().depth());
345
346 // compute the insertion key
347 auto hash_value = scope_data.compute_hash<force_tree_t, force_flat_t, force_time_t>(
348 hash_id, hash_depth, m_timeline_counter);
349
350 // alias the true id with the insertion key
351 add_hash_id(hash_id, hash_value);
352
353 // even when flat is combined with timeline, it still inserts at depth of 1
354 // so this is easiest check
355 if(scope_data.is_flat() || force_flat_t::value)
356 return insert_flat(hash_value, obj, hash_depth);
357
358 // in the case of tree + timeline, timeline will have appropriately modified the
359 // depth and hash so it doesn't really matter which check happens first here
360 // however, the query for is_timeline() is cheaper so we will check that
361 // and fallback to inserting into tree without a check
362 // if(scope_data.is_timeline() || force_time_t::value)
363 // return insert_timeline(hash_value, obj, hash_depth);
364
365 // default fall-through if neither flat nor timeline
366 return insert_tree(hash_value, obj, hash_depth);
367}
368//
369//--------------------------------------------------------------------------------------//
370//
371template <typename Type>
372template <typename Vp, enable_if_t<!std::is_same<decay_t<Vp>, Type>::value, int>>
374storage<Type, true>::append(const secondary_data_t<Vp>& _secondary)
375{
376 ensure_init();
377
378 // get the iterator and check if valid
379 auto&& _itr = std::get<0>(_secondary);
380 if(!_data().graph().is_valid(_itr))
381 return nullptr;
382
383 // compute hash of prefix
384 auto _hash_id = add_hash_id(std::get<1>(_secondary));
385 // compute hash w.r.t. parent iterator (so identical kernels from different
386 // call-graph parents do not locate same iterator)
387 auto _hash = _hash_id ^ _itr->id();
388 // add the hash alias
389 add_hash_id(_hash_id, _hash);
390 // compute depth
391 auto _depth = _itr->depth() + 1;
392
393 // see if depth + hash entry exists already
394 auto _nitr = m_node_ids[_depth].find(_hash);
395 if(_nitr != m_node_ids[_depth].end())
396 {
397 // if so, then update
398 auto& _obj = _nitr->second->obj();
399 _obj += std::get<2>(_secondary);
400 _obj.set_laps(_nitr->second->obj().get_laps() + 1);
401 auto& _stats = _nitr->second->stats();
402 operation::add_statistics<Type>(_nitr->second->obj(), _stats);
403 return _nitr->second;
404 }
405
406 // else, create a new entry
407 auto&& _tmp = Type{};
408 _tmp += std::get<2>(_secondary);
409 _tmp.set_laps(_tmp.get_laps() + 1);
410 graph_node_t _node{ _hash, _tmp, _depth, m_thread_idx };
411 _node.stats() += _tmp.get();
412 auto& _stats = _node.stats();
413 operation::add_statistics<Type>(_tmp, _stats);
414 auto itr = _data().emplace_child(_itr, std::move(_node));
415 operation::set_iterator<Type>{}(itr->data(), itr);
416 m_node_ids[_depth][_hash] = itr;
417 return itr;
418}
419//
420//--------------------------------------------------------------------------------------//
421//
422template <typename Type>
423template <typename Vp, enable_if_t<std::is_same<decay_t<Vp>, Type>::value, int>>
425storage<Type, true>::append(const secondary_data_t<Vp>& _secondary)
426{
427 ensure_init();
428
429 // get the iterator and check if valid
430 auto&& _itr = std::get<0>(_secondary);
431 if(!_data().graph().is_valid(_itr))
432 return nullptr;
433
434 // compute hash of prefix
435 auto _hash_id = add_hash_id(std::get<1>(_secondary));
436 // compute hash w.r.t. parent iterator (so identical kernels from different
437 // call-graph parents do not locate same iterator)
438 auto _hash = _hash_id ^ _itr->id();
439 // add the hash alias
440 add_hash_id(_hash_id, _hash);
441 // compute depth
442 auto _depth = _itr->depth() + 1;
443
444 // see if depth + hash entry exists already
445 auto _nitr = m_node_ids[_depth].find(_hash);
446 if(_nitr != m_node_ids[_depth].end())
447 {
448 _nitr->second->obj() += std::get<2>(_secondary);
449 return _nitr->second;
450 }
451
452 // else, create a new entry
453 auto&& _tmp = std::get<2>(_secondary);
454 auto itr = _data().emplace_child(
455 _itr, graph_node_t{ _hash, _tmp, static_cast<int64_t>(_depth), m_thread_idx });
456 operation::set_iterator<Type>{}(itr->data(), itr);
457 m_node_ids[_depth][_hash] = itr;
458 return itr;
459}
460//
461//----------------------------------------------------------------------------------//
462//
463template <typename Type>
465storage<Type, true>::insert_tree(uint64_t hash_id, const Type& obj, uint64_t hash_depth)
466{
467 bool has_head = _data().has_head();
468 return insert_hierarchy(hash_id, obj, hash_depth, has_head);
469}
470
471//----------------------------------------------------------------------------------//
472//
473template <typename Type>
475storage<Type, true>::insert_timeline(uint64_t hash_id, const Type& obj,
476 uint64_t hash_depth)
477{
478 auto _current = _data().current();
479 return _data().emplace_child(
480 _current,
481 graph_node_t{ hash_id, obj, static_cast<int64_t>(hash_depth), m_thread_idx });
482}
483
484//----------------------------------------------------------------------------------//
485//
486template <typename Type>
488storage<Type, true>::insert_flat(uint64_t hash_id, const Type& obj, uint64_t hash_depth)
489{
490 static thread_local auto _current = _data().head();
491 static thread_local bool _first = true;
492 if(_first)
493 {
494 _first = false;
495 if(_current.begin())
496 {
497 _current = _current.begin();
498 }
499 else
500 {
501 auto itr = _data().emplace_child(
502 _current, graph_node_t{ hash_id, obj, static_cast<int64_t>(hash_depth),
503 m_thread_idx });
504 m_node_ids[hash_depth][hash_id] = itr;
505 _current = itr;
506 return itr;
507 }
508 }
509
510 auto _existing = m_node_ids[hash_depth].find(hash_id);
511 if(_existing != m_node_ids[hash_depth].end())
512 return m_node_ids[hash_depth].find(hash_id)->second;
513
514 auto itr = _data().emplace_child(
515 _current,
516 graph_node_t{ hash_id, obj, static_cast<int64_t>(hash_depth), m_thread_idx });
517 m_node_ids[hash_depth][hash_id] = itr;
518 return itr;
519}
520//
521//----------------------------------------------------------------------------------//
522//
523template <typename Type>
525storage<Type, true>::insert_hierarchy(uint64_t hash_id, const Type& obj,
526 uint64_t hash_depth, bool has_head)
527{
528 using id_hash_map_t = typename iterator_hash_map_t::mapped_type;
529
530 auto& m_data = m_graph_data_instance;
531 auto tid = m_thread_idx;
532
533 // if first instance
534 if(!has_head || (m_is_master && m_node_ids.empty()))
535 {
536 auto itr = m_data->append_child(
537 graph_node_t{ hash_id, obj, static_cast<int64_t>(hash_depth), tid });
538 m_node_ids[hash_depth][hash_id] = itr;
539 return itr;
540 }
541
542 // lambda for updating settings
543 auto _update = [&](iterator itr) {
544 m_data->depth() = itr->depth();
545 return (m_data->current() = itr);
546 };
547
548 if(m_node_ids[hash_depth].find(hash_id) != m_node_ids[hash_depth].end() &&
549 m_node_ids[hash_depth].find(hash_id)->second->depth() == m_data->depth())
550 {
551 return _update(m_node_ids[hash_depth].find(hash_id)->second);
552 }
553
554 using sibling_itr = typename graph_t::sibling_iterator;
555 graph_node_t node{ hash_id, obj, m_data->depth(), tid };
556
557 // lambda for inserting child
558 auto _insert_child = [&]() {
559 node.depth() = hash_depth;
560 auto itr = m_data->append_child(std::move(node));
561 auto ditr = m_node_ids.find(hash_depth);
562 if(ditr == m_node_ids.end())
563 m_node_ids.insert({ hash_depth, id_hash_map_t{} });
564 auto hitr = m_node_ids.at(hash_depth).find(hash_id);
565 if(hitr == m_node_ids.at(hash_depth).end())
566 m_node_ids.at(hash_depth).insert({ hash_id, iterator{} });
567 m_node_ids.at(hash_depth).at(hash_id) = itr;
568 return itr;
569 };
570
571 auto current = m_data->current();
572 if(!m_data->graph().is_valid(current))
573 _insert_child(); // create valid current, intentional non-return
574
575 // check children first because in general, child match is ideal
576 auto fchild = graph_t::child(current, 0);
577 if(m_data->graph().is_valid(fchild))
578 {
579 for(sibling_itr itr = fchild.begin(); itr != fchild.end(); ++itr)
580 {
581 if((hash_id) == itr->id())
582 return _update(itr);
583 }
584 }
585
586 // occasionally, we end up here because of some of the threading stuff that
587 // has to do with the head node. Protected against mis-matches in hierarchy
588 // because the actual hash includes the depth so "example" at depth 2
589 // has a different hash than "example" at depth 3.
590 if((hash_id) == current->id())
591 return current;
592
593 // check siblings
594 for(sibling_itr itr = current.begin(); itr != current.end(); ++itr)
595 {
596 // skip if current
597 if(itr == current)
598 continue;
599 // check hash id's
600 if((hash_id) == itr->id())
601 return _update(itr);
602 }
603
604 return _insert_child();
605}
606
607//
608//--------------------------------------------------------------------------------------//
609//
610template <typename Type>
611template <typename Archive>
612void
613storage<Type, true>::serialize(Archive& ar, const unsigned int version)
614{
615 auto&& _results = dmp_get();
616 operation::serialization<Type>{}(ar, _results);
617 consume_parameters(version);
618}
619//
620//--------------------------------------------------------------------------------------//
621//
622template <typename Type>
623template <typename Archive>
624void
625storage<Type, true>::do_serialize(Archive& ar)
626{
627 if(m_is_master)
628 merge();
629
630 auto&& _results = dmp_get();
631 operation::serialization<Type>{}(ar, _results);
632}
633//
634//--------------------------------------------------------------------------------------//
635//
636template <typename Type>
637typename storage<Type, true>::pointer
638storage<Type, true>::instance()
639{
640 return get_singleton() ? get_singleton()->instance() : nullptr;
641}
642//
643//--------------------------------------------------------------------------------------//
644//
645template <typename Type>
646typename storage<Type, true>::pointer
647storage<Type, true>::master_instance()
648{
649 return get_singleton() ? get_singleton()->master_instance() : nullptr;
650}
651//
652//--------------------------------------------------------------------------------------//
653//
654// impl::storage<Type, false>
655// impl::storage_false
656//
657//--------------------------------------------------------------------------------------//
658//
659template <typename Type>
660class storage<Type, false> : public base::storage
661{
662public:
663 //----------------------------------------------------------------------------------//
664 //
665 static constexpr bool has_data_v = false;
666
667 using result_node = std::tuple<>;
668 using graph_node = std::tuple<>;
669 using graph_t = std::tuple<>;
670 using graph_type = graph_t;
671 using dmp_result_t = std::vector<std::tuple<>>;
672 using result_array_t = std::vector<std::tuple<>>;
673 using uintvector_t = std::vector<uint64_t>;
674 using base_type = base::storage;
675 using component_type = Type;
676 using this_type = storage<Type, has_data_v>;
677 using string_t = std::string;
678 using smart_pointer = std::unique_ptr<this_type, impl::storage_deleter<this_type>>;
679 using singleton_t = singleton<this_type, smart_pointer>;
680 using singleton_type = singleton_t;
681 using pointer = typename singleton_t::pointer;
682 using auto_lock_t = typename singleton_t::auto_lock_t;
683 using printer_t = operation::finalize::print<Type, has_data_v>;
684
685 using iterator = void*;
686 using const_iterator = const void*;
687
688 friend class tim::manager;
689 friend struct node::result<Type>;
690 friend struct node::graph<Type>;
691 friend struct impl::storage_deleter<this_type>;
692 friend struct operation::finalize::get<Type, has_data_v>;
693 friend struct operation::finalize::mpi_get<Type, has_data_v>;
694 friend struct operation::finalize::upc_get<Type, has_data_v>;
695 friend struct operation::finalize::dmp_get<Type, has_data_v>;
696 friend struct operation::finalize::print<Type, has_data_v>;
697 friend struct operation::finalize::merge<Type, has_data_v>;
698
699public:
700 static pointer instance();
701 static pointer master_instance();
702 static pointer noninit_instance();
703 static pointer noninit_master_instance();
704
705 static bool& master_is_finalizing();
706 static bool& worker_is_finalizing();
707 static bool is_finalizing();
708
709private:
710 static singleton_t* get_singleton() { return get_storage_singleton<this_type>(); }
711 static std::atomic<int64_t>& instance_count();
712
713public:
714 storage();
715 ~storage() override;
716
717 explicit storage(const this_type&) = delete;
718 explicit storage(this_type&&) = delete;
719 this_type& operator=(const this_type&) = delete;
720 this_type& operator=(this_type&& rhs) = delete;
721
722 void print() final { finalize(); }
723 void cleanup() final { operation::cleanup<Type>{}; }
724 void stack_clear() final;
725 void disable() final { trait::runtime_enabled<component_type>::set(false); }
726
727 void initialize() final;
728 void finalize() final;
729
730 void reset() {}
731 TIMEMORY_NODISCARD bool empty() const { return true; }
732 TIMEMORY_NODISCARD inline size_t size() const { return 0; }
733 TIMEMORY_NODISCARD inline size_t true_size() const { return 0; }
734 TIMEMORY_NODISCARD inline size_t depth() const { return 0; }
735
736 iterator pop() { return nullptr; }
737 iterator insert(int64_t, const Type&, const string_t&) { return nullptr; }
738
739 template <typename Archive>
740 void serialize(Archive&, const unsigned int)
741 {}
742
743 void stack_push(Type* obj) { m_stack.insert(obj); }
744 void stack_pop(Type* obj);
745
746 TIMEMORY_NODISCARD std::shared_ptr<printer_t> get_printer() const
747 {
748 return m_printer;
749 }
750
751protected:
752 void get_shared_manager();
753 void merge();
754 void merge(this_type* itr);
755
756private:
757 template <typename Archive>
758 void do_serialize(Archive&)
759 {}
760
761private:
762 std::unordered_set<Type*> m_stack;
763 std::shared_ptr<printer_t> m_printer;
764};
765//
766//--------------------------------------------------------------------------------------//
767//
768template <typename Type>
769typename storage<Type, false>::pointer
770storage<Type, false>::instance()
771{
772 return get_singleton() ? get_singleton()->instance() : nullptr;
773}
774//
775//--------------------------------------------------------------------------------------//
776//
777template <typename Type>
778typename storage<Type, false>::pointer
779storage<Type, false>::master_instance()
780{
781 return get_singleton() ? get_singleton()->master_instance() : nullptr;
782}
783//
784//--------------------------------------------------------------------------------------//
785//
786} // namespace impl
787//
788//--------------------------------------------------------------------------------------//
789//
790/// \class tim::storage<Tp, Vp>
791/// \tparam Tp Component type
792/// \tparam Vp Component intermediate value type
793///
794/// \brief Responsible for maintaining the call-stack storage in timemory. This class
795/// and the serialization library are responsible for most of the timemory compilation
796/// time.
797template <typename Tp, typename Vp>
798class storage final : public impl::storage<Tp, trait::uses_value_storage<Tp, Vp>::value>
799{
800public:
801 static constexpr bool uses_value_storage_v = trait::uses_value_storage<Tp, Vp>::value;
803 using base_type = impl::storage<Tp, uses_value_storage_v>;
804 using deleter_t = impl::storage_deleter<base_type>;
805 using smart_pointer = std::unique_ptr<base_type, deleter_t>;
809 using iterator = typename base_type::iterator;
810 using const_iterator = typename base_type::const_iterator;
811
812 friend struct impl::storage_deleter<this_type>;
813 friend class manager;
814
815 /// get the pointer to the storage on the current thread. Will initialize instance if
816 /// one does not exist.
817 using base_type::instance;
818 /// get the pointer to the storage on the primary thread. Will initialize instance if
819 /// one does not exist.
820 using base_type::master_instance;
821 /// get the pointer to the storage on the current thread w/o initializing if one does
822 /// not exist
823 using base_type::noninit_instance;
824 /// get the pointer to the storage on the primary thread w/o initializing if one does
825 /// not exist
826 using base_type::noninit_master_instance;
827 /// returns whether storage is finalizing on the primary thread
828 using base_type::master_is_finalizing;
829 /// returns whether storage is finalizing on the current thread
830 using base_type::worker_is_finalizing;
831 /// returns whether storage is finalizing on any thread
832 using base_type::is_finalizing;
833 /// reset the storage data
834 using base_type::reset;
835 /// returns whether any data has been stored
836 using base_type::empty;
837 /// get the current estimated number of nodes
838 using base_type::size;
839 /// inspect the graph and get the true number of nodes
840 using base_type::true_size;
841 /// get the depth of the last node which pushed to hierarchical storage. Nodes which
842 /// used \ref tim::scope::flat or have \ref tim::trait::flat_storage type-trait
843 /// set to true will not affect this value
844 using base_type::depth;
845 /// drop the current node depth and set the current node to it's parent
846 using base_type::pop;
847 /// insert a new node
848 using base_type::insert;
849 /// add a component to the stack which can be flushed if the merging or output is
850 /// requested/required
851 using base_type::stack_pop;
852 /// remove component from the stack that will be flushed if the merging or output is
853 /// requested/required
854 using base_type::stack_push;
855};
856//
857//--------------------------------------------------------------------------------------//
858//
859template <typename Tp>
860class storage<Tp, type_list<>>
861: public storage<
862 Tp, conditional_t<trait::is_available<Tp>::value, typename Tp::value_type, void>>
863{
864public:
865 using Vp =
866 conditional_t<trait::is_available<Tp>::value, typename Tp::value_type, void>;
867 static constexpr bool uses_value_storage_v = trait::uses_value_storage<Tp, Vp>::value;
869 using base_type = impl::storage<Tp, uses_value_storage_v>;
870 using deleter_t = impl::storage_deleter<base_type>;
871 using smart_pointer = std::unique_ptr<base_type, deleter_t>;
875 using iterator = typename base_type::iterator;
876 using const_iterator = typename base_type::const_iterator;
877
878 friend struct impl::storage_deleter<this_type>;
879 friend class manager;
880};
881//
882//--------------------------------------------------------------------------------------//
883//
884namespace impl
885{
886//
887//--------------------------------------------------------------------------------------//
888//
889template <typename StorageType>
890struct storage_deleter : public std::default_delete<StorageType>
891{
892 using Pointer = std::unique_ptr<StorageType, storage_deleter<StorageType>>;
893 using singleton_t = tim::singleton<StorageType, Pointer>;
894
895 storage_deleter() = default;
896 ~storage_deleter() = default;
897
898 void operator()(StorageType* ptr)
899 {
900 // if(ptr == nullptr)
901 // return;
902
903 StorageType* master = singleton_t::master_instance_ptr();
904 std::thread::id master_tid = singleton_t::master_thread_id();
905 std::thread::id this_tid = std::this_thread::get_id();
906
907 static_assert(!std::is_same<StorageType, tim::base::storage>::value,
908 "Error! Base class");
909 // tim::dmp::barrier();
910
911 if(ptr && master && ptr != master)
912 {
913 ptr->StorageType::stack_clear();
914 master->StorageType::merge(ptr);
915 }
916 else
917 {
918 // sometimes the worker threads get deleted after the master thread
919 // but the singleton class will ensure it is merged so we are
920 // safe to leak here
921 if(ptr && !master && this_tid != master_tid)
922 {
923 ptr->StorageType::free_shared_manager();
924 ptr = nullptr;
925 return;
926 }
927
928 if(ptr)
929 {
930 ptr->StorageType::print();
931 }
932 else if(master)
933 {
934 if(!_printed_master)
935 {
936 master->StorageType::stack_clear();
937 master->StorageType::print();
938 master->StorageType::cleanup();
939 _printed_master = true;
940 }
941 }
942 }
943
944 if(this_tid == master_tid)
945 {
946 if(ptr)
947 {
948 // ptr->StorageType::disable();
949 ptr->StorageType::free_shared_manager();
950 }
951 delete ptr;
952 ptr = nullptr;
953 }
954 else
955 {
956 if(master && ptr != master)
957 singleton_t::remove(ptr);
958
959 if(ptr)
960 ptr->StorageType::free_shared_manager();
961 delete ptr;
962 ptr = nullptr;
963 }
964
965 if(_printed_master && !_deleted_master)
966 {
967 if(master)
968 {
969 // master->StorageType::disable();
970 master->StorageType::free_shared_manager();
971 }
972 delete master;
973 master = nullptr;
974 _deleted_master = true;
975 }
976
977 using Type = typename StorageType::component_type;
978 if(_deleted_master)
979 trait::runtime_enabled<Type>::set(false);
980 }
981
982 bool _printed_master = false;
983 bool _deleted_master = false;
984};
985//
986//--------------------------------------------------------------------------------------//
987//
988} // namespace impl
989//
990//--------------------------------------------------------------------------------------//
991//
992template <typename Tp, typename Vp>
993inline base::storage*
994base::storage::base_instance()
995{
996 using storage_type = tim::storage<Tp, Vp>;
997
998 // thread-local variable
999 static thread_local base::storage* _ret = nullptr;
1000
1001 // return nullptr is disabled
1003 return nullptr;
1004
1005 // if nullptr, try to get instance
1006 if(_ret == nullptr)
1007 {
1008 // thread will copy the hash-table so use a lock here
1009 auto_lock_t lk(type_mutex<base::storage>());
1010 _ret = static_cast<base::storage*>(storage_type::instance());
1011 }
1012
1013 // return pointer
1014 return _ret;
1015}
1016//
1017//--------------------------------------------------------------------------------------//
1018//
1019} // namespace tim
Thread-safe singleton management.
Definition: singleton.hpp:112
typename base_type::const_iterator const_iterator
conditional_t< trait::is_available< Tp >::value, typename Tp::value_type, void > Vp
typename singleton_t::auto_lock_t auto_lock_t
typename base_type::iterator iterator
std::unique_ptr< base_type, deleter_t > smart_pointer
impl::storage_deleter< base_type > deleter_t
impl::storage< Tp, uses_value_storage_v > base_type
typename singleton_t::pointer pointer
typename singleton_t::auto_lock_t auto_lock_t
impl::storage_deleter< base_type > deleter_t
impl::storage< Tp, uses_value_storage_v > base_type
typename base_type::const_iterator const_iterator
typename singleton_t::pointer pointer
std::unique_ptr< base_type, deleter_t > smart_pointer
typename base_type::iterator iterator
std::string string_t
Definition: library.cpp:57
The declaration for the types for manager without definitions.
Responsible for maintaining the call-stack storage in timemory. This class and the serialization libr...
void serialize(std::string fname, exec_data< Counter > &obj)
Definition: counter.hpp:325
hash_value_t _hash_id
Definition: definition.hpp:114
return _hash_map end()
hash_value_t add_hash_id(hash_map_ptr_t &_hash_map, string_view_cref_t _prefix)
add an string to the given hash-map (if it doesn't already exist) and return the hash
Definition: types.hpp:190
return false
Definition: definition.hpp:326
const hash_alias_ptr_t hash_value_t std::string *& _ret
Definition: definition.hpp:300
_reported insert(_hash_id)
void pop(TupleT< Tp... > &obj, Args &&... args)
Definition: functional.cpp:739
void reset(TupleT< Tp... > &obj, Args &&... args)
Definition: functional.cpp:599
void print(std::ostream &os, Args &&... args)
Definition: functional.cpp:159
void cleanup()
Definition: kokkosp.hpp:187
std::tuple< bool, uint32_t, uint32_t, uint64_t, int64_t, Tp, stats_type > node_type
Definition: node.hpp:123
typename stats_policy::statistics_type stats_type
Definition: node.hpp:121
std::tuple< uint32_t, uint32_t, int64_t, uint64_t, uint64_t, string_t, uintvector_t, Tp, stats_type > result_type
Definition: node.hpp:125
std::unordered_map< KeyT, MappedT > uomap_t
Definition: merge.hpp:49
Definition: kokkosp.cpp:39
storage_singleton< Tp > * get_storage_singleton()
Definition: declaration.hpp:75
std::unique_lock< mutex_t > auto_lock_t
Unique lock type around mutex_t.
Definition: locking.hpp:42
void initialize(CompList< CompTypes... > &obj, std::initializer_list< EnumT > components)
Definition: initialize.hpp:53
typename std::remove_pointer< U >::type remove_pointer_t
Definition: types.hpp:569
typename std::decay< T >::type decay_t
Alias template for decay.
Definition: types.hpp:194
tim::mpl::apply< std::string > string
Definition: macros.hpp:53
void finalize()
Definition: types.hpp:119
auto get(const auto_bundle< Tag, Types... > &_obj)
typename std::conditional< B, Lhs, Rhs >::type conditional_t
Definition: types.hpp:197
void consume_parameters(ArgsT &&...)
Definition: types.hpp:285
lightweight tuple-alternative for meta-programming logic
Definition: types.hpp:233
Declare the operations types.
Include the macros for storage.
Declare the storage types.
provides an object which can be returned from functions that will execute the lambda provided during ...
Definition: types.hpp:700
trait that signifies that an implementation is enabled at runtime. The value returned from get() is f...
static bool get(enable_if_t< is_available< U >::value &&get_value< U >(), int >=0)
GET specialization if component is available.
static bool set(bool val, enable_if_t< is_available< U >::value &&get_value< U >(), int >=0)
SET specialization if component is available.
This trait is used to determine whether the (expensive) instantiation of the storage class happens.