timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
merge.hpp
Go to the documentation of this file.
1// MIT License
2//
3// Copyright (c) 2020, The Regents of the University of California,
4// through Lawrence Berkeley National Laboratory (subject to receipt of any
5// required approvals from the U.S. Dept. of Energy). All rights reserved.
6//
7// Permission is hereby granted, free of charge, to any person obtaining a copy
8// of this software and associated documentation files (the "Software"), to deal
9// in the Software without restriction, including without limitation the rights
10// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11// copies of the Software, and to permit persons to whom the Software is
12// furnished to do so, subject to the following conditions:
13//
14// The above copyright notice and this permission notice shall be included in all
15// copies or substantial portions of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23// SOFTWARE.
24
25/**
26 * \file timemory/operations/types/finalize_get.hpp
27 * \brief Definition for various functions for finalize_get in operations
28 */
29
30#pragma once
31
38
39#include <unordered_map>
40
41namespace tim
42{
43namespace operation
44{
45namespace finalize
46{
47//
48template <typename KeyT, typename MappedT>
49using uomap_t = std::unordered_map<KeyT, MappedT>;
50//
51//--------------------------------------------------------------------------------------//
52//
53template <typename Type>
55{
56 using pre_order_iterator = typename graph_t::pre_order_iterator;
57 using sibling_iterator = typename graph_t::sibling_iterator;
58
59 // don't merge self
60 if(&lhs == &rhs)
61 return;
62
63 // create lock
64 auto_lock_t l(singleton_t::get_mutex(), std::defer_lock);
65 if(!l.owns_lock())
66 l.lock();
67
68 // if merge was not initialized return
69 if(!rhs.is_initialized())
70 return;
71
73 if(!_settings)
74 PRINT_HERE("[%s]> nullptr to settings!", Type::get_label().c_str());
75 auto _debug =
76 _settings != nullptr && (_settings->get_debug() || _settings->get_verbose() > 2);
77
78 rhs.stack_clear();
79
80 auto _copy_hash_ids = [&lhs, &rhs, _debug]() {
81 // copy over mapping of hashes to strings
82 if(rhs.get_hash_ids() && lhs.get_hash_ids())
83 {
84 auto_lock_t _lk{ type_mutex<hash_map_t>(), std::defer_lock };
85 if(!_lk.owns_lock())
86 _lk.lock();
87
89 _debug, "[%s]> merging %lu hash-ids into existing set of %lu hash-ids!",
90 Type::get_label().c_str(), (unsigned long) rhs.get_hash_ids()->size(),
91 (unsigned long) lhs.get_hash_ids()->size());
92
93 auto _hash_ids = *rhs.get_hash_ids();
94 for(const auto& itr : _hash_ids)
95 {
96 if(lhs.m_hash_ids->find(itr.first) == lhs.m_hash_ids->end())
97 lhs.m_hash_ids->emplace(itr.first, itr.second);
98 }
99 }
100 // copy over aliases
101 if(rhs.get_hash_aliases() && lhs.get_hash_aliases())
102 {
103 auto_lock_t _lk{ type_mutex<hash_alias_map_t>(), std::defer_lock };
104 if(!_lk.owns_lock())
105 _lk.lock();
106
108 "[%s]> merging %lu hash-aliases into existing set of "
109 "%lu hash-aliases!",
110 Type::get_label().c_str(),
111 (unsigned long) rhs.get_hash_aliases()->size(),
112 (unsigned long) lhs.get_hash_aliases()->size());
113
114 auto _hash_aliases = *rhs.get_hash_aliases();
115 for(const auto& itr : _hash_aliases)
116 {
117 if(lhs.m_hash_aliases->find(itr.first) == lhs.m_hash_aliases->end())
118 lhs.m_hash_aliases->emplace(itr.first, itr.second);
119 }
120 }
121 };
122
123 // if self is not initialized but itr is, copy data
124 if(rhs.is_initialized() && !lhs.is_initialized())
125 {
126 PRINT_HERE("[%s]> Warning! master is not initialized! Segmentation fault likely",
127 Type::get_label().c_str());
128 lhs.graph().insert_subgraph_after(lhs._data().head(), rhs.data().head());
129 lhs.graph().steal_resources(rhs.graph());
130 lhs.m_initialized = rhs.m_initialized;
131 lhs.m_finalized = rhs.m_finalized;
132 _copy_hash_ids();
133 return;
134 }
135
136 _copy_hash_ids();
137
138 if(rhs.empty() || !rhs.data().has_head())
139 return;
140
141 int64_t num_merged = 0;
142 auto inverse_insert = rhs.data().get_inverse_insert();
143
144 for(auto entry : inverse_insert)
145 {
146 auto master_entry = lhs.data().find(entry.second);
147 if(master_entry != lhs.data().end())
148 {
149 pre_order_iterator pitr(entry.second);
150
151 if(rhs.graph().is_valid(pitr) && pitr)
152 {
154 _debug, "[%s]> worker is merging %i records into %i records",
155 Type::get_label().c_str(), (int) rhs.size(), (int) lhs.size());
156
157 pre_order_iterator pos = master_entry;
158
159 if(*pos == *pitr)
160 {
161 ++num_merged;
162 sibling_iterator other = pitr;
163 for(auto sitr = other.begin(); sitr != other.end(); ++sitr)
164 {
165 pre_order_iterator pchild = sitr;
166 if(!pchild || pchild->data().get_is_invalid())
167 continue;
168 lhs.graph().append_child(pos, pchild);
169 }
170 }
171
172 CONDITIONAL_PRINT_HERE(_debug, "[%s]> master has %i records",
173 Type::get_label().c_str(), (int) lhs.size());
174
175 // remove the entry from this graph since it has been added
176 // rhs.graph().erase(pitr);
177 }
178 }
179 }
180
181 int64_t merge_size = static_cast<int64_t>(inverse_insert.size());
182 if(num_merged != merge_size)
183 {
184 int64_t diff = merge_size - num_merged;
185 std::stringstream ss;
186 ss << "Testing error! Missing " << diff << " merge points. The worker thread "
187 << "contained " << merge_size << " bookmarks but only merged " << num_merged
188 << " nodes!";
189
190 CONDITIONAL_PRINT_HERE(_debug, "%s", ss.str().c_str());
191
192#if defined(TIMEMORY_TESTING) || defined(TIMEMORY_INTERNAL_TESTING)
193 TIMEMORY_EXCEPTION(ss.str());
194#endif
195 }
196
197 if(num_merged == 0)
198 {
199 CONDITIONAL_PRINT_HERE(_debug, "[%s]> worker is not merged!",
200 Type::get_label().c_str());
201 pre_order_iterator _nitr(rhs.data().head());
202 ++_nitr;
203 if(!lhs.graph().is_valid(_nitr))
204 _nitr = pre_order_iterator(rhs.data().head());
205 lhs.graph().append_child(lhs._data().head(), _nitr);
206 }
207
208 CONDITIONAL_PRINT_HERE(_debug, "[%s]> clearing merged storage!",
209 Type::get_label().c_str());
210
211 lhs.graph().steal_resources(rhs.graph());
212 rhs.data().clear();
213}
214//
215//--------------------------------------------------------------------------------------//
216//
217template <typename Type>
219{
220 using result_node = typename result_type::value_type;
221
222 try
223 {
224 dst.reserve(dst.size() + src.size());
225 } catch(std::bad_alloc& e)
226 {
227 std::cerr << "Warning! Running out of memory is probable! " << e.what()
228 << std::endl;
229 }
230
231 //----------------------------------------------------------------------------------//
232 //
233 using merge_hash_map_t =
234 uomap_t<int64_t,
236
237 // this is a look-up table for the index in dst of existing records with same depth +
238 // hash + rolling-hash + prefix
239 merge_hash_map_t _hash_table{};
240
241 //----------------------------------------------------------------------------------//
242 // add hash-table entries for a new record
243 //
244 auto _add_entry = [&_hash_table](const result_node& _obj, int64_t _idx) {
245 _hash_table[_obj.depth()][_obj.hash()][_obj.rolling_hash()][_obj.prefix()] = _idx;
246 };
247
248 //----------------------------------------------------------------------------------//
249 // get the index in dst of an existing match if it exists, otherwise return -1
250 //
251 auto _get_index = [&](const result_node& _lhs) -> int64_t {
252 auto& _tbl = _hash_table[_lhs.depth()][_lhs.hash()][_lhs.rolling_hash()];
253 auto itr = _tbl.find(_lhs.prefix());
254 if(itr != _tbl.end())
255 return itr->second;
256 return -1;
257 };
258
259 //----------------------------------------------------------------------------------//
260 // add hash-table entries for all the existing records
261 //
262 for(size_t i = 0; i < dst.size(); ++i)
263 _add_entry(dst.at(i), static_cast<int64_t>(i));
264
265 //----------------------------------------------------------------------------------//
266 // collapse duplicates
267 //
268 size_t cnt = 0;
269 size_t ndup = 0;
271 auto _debug = (_settings) ? _settings->get_debug() : true;
272 auto _verbose = (_settings) ? _settings->get_verbose() : 1;
273
274 if(_debug || _verbose > 0)
275 {
276 fprintf(stderr, "Merging %lu new records into %lu existing records\n",
277 (unsigned long) src.size(), (unsigned long) dst.size());
278 }
279
280 for(auto& itr : src)
281 {
282 if(_debug || _verbose > 3)
283 {
284 fprintf(stderr, "Checking %lu of %lu...", (unsigned long) cnt++,
285 (unsigned long) src.size());
286 }
287
288 auto idx = _get_index(itr);
289 if(idx < 0)
290 {
291 if(_debug || _verbose > 3)
292 fprintf(stderr, "new entry\n");
293 _add_entry(itr, static_cast<int64_t>(dst.size()));
294 dst.emplace_back(std::move(itr));
295 }
296 else
297 {
298 ++ndup;
299 if(_debug || _verbose > 3)
300 fprintf(stderr, "duplicate\n");
301 auto& citr = dst.at(idx);
302 citr.data() += itr.data();
303 citr.data().plus(itr.data());
304 citr.stats() += itr.stats();
305 }
306 }
307
308 if(_debug || _verbose > 0)
309 {
310 fprintf(stderr, "Merged %lu duplicates into %lu records\n", (unsigned long) ndup,
311 (unsigned long) dst.size());
312 }
313
314 // shrink the reserved memory
315 dst.shrink_to_fit();
316}
317//
318//--------------------------------------------------------------------------------------//
319//
320template <typename Type>
321template <typename Tp>
324{
325 using children_type = typename basic_tree<Tp>::children_type;
326
327 // do nothing if no children
328 if(_bt.get_children().empty())
329 return _bt;
330
331 // make copy
332 auto _ret = _bt;
333 // recursively apply
334 for(auto& itr : _ret.get_children())
335 *itr = (*this)(*itr);
336
337 // aggregate children
338 children_type _children{};
339 for(auto& itr : _ret.get_children())
340 {
341 bool found = false;
342 for(auto& citr : _children)
343 {
344 if(*citr == *itr)
345 {
346 found = true;
347 *citr += *itr;
348 }
349 }
350 if(!found)
351 _children.emplace_back(itr);
352 // _children.emplace_back(std::move(itr));
353 }
354
355 // update new children
356 _ret.get_children() = _children;
357 return _ret;
358}
359//
360//--------------------------------------------------------------------------------------//
361//
362template <typename Type>
363template <typename Tp>
366{
367 return basic_tree<Tp>{ _lhs } += _rhs;
368}
369//
370//--------------------------------------------------------------------------------------//
371//
372template <typename Type>
373template <typename Tp>
374std::vector<basic_tree<Tp>>
376{
377 auto _ret = _bt;
378 for(auto& itr : _ret)
379 itr = (*this)(itr);
380 return _ret;
381}
382//
383//--------------------------------------------------------------------------------------//
384//
385template <typename Type>
386template <typename Tp>
387std::vector<basic_tree<Tp>>
389 const std::vector<basic_tree<Tp>>& _rhs)
390{
391 using basic_t = basic_tree<Tp>;
392 using basic_vec_t = std::vector<basic_t>;
393 using basic_map_t = std::map<size_t, basic_t>;
394 using basic_bool_t = std::vector<bool>;
395
396 auto _p = basic_map_t{}; // map of paired instances in lhs and rhs
397 auto _ul = basic_map_t{}; // map of unpaired instances in lhs
398 auto _ur = basic_map_t{}; // map of unpaired instances in rhs
399 auto _bl = basic_bool_t(_lhs.size(), false); // track if unpaired
400 auto _br = basic_bool_t(_rhs.size(), false);
401 for(size_t i = 0; i < _lhs.size(); ++i)
402 {
403 const auto& _l = _lhs.at(i);
404 // look for any matches b/t lhs and rhs
405 for(size_t j = 0; j < _rhs.size(); ++j)
406 {
407 const auto& _r = _rhs.at(j);
408 if(_l == _r)
409 {
410 _bl.at(i) = true;
411 _br.at(j) = true;
412 if(_p.find(i) == _p.end())
413 _p[i] = (*this)(_l);
414 _p[i] += (*this)(_r);
415 }
416 }
417 // insert any unmatched instances into unused-lhs map
418 if(!_bl.at(i))
419 _ul[i] = (*this)(_l);
420 }
421
422 for(size_t j = 0; j < _rhs.size(); ++j)
423 {
424 // insert any unmatched instances into unused-rhs map
425 const auto& _r = _rhs.at(j);
426 if(!_br.at(j))
427 _ur[j] = (*this)(_r);
428 }
429
430 // create the final product
431 auto _ret = basic_vec_t{};
432 auto n = std::max<size_t>(_lhs.size(), _rhs.size());
433 _ret.reserve(n);
434 for(size_t i = 0; i < n; ++i)
435 {
436 auto _append = [&](auto& _obj) {
437 auto itr = _obj.find(i);
438 if(itr != _obj.end())
439 _ret.emplace_back(itr->second);
440 //_ret.emplace_back(std::move(itr->second));
441 };
442 _append(_p);
443 _append(_ul);
444 _append(_ur);
445 }
446
447 return (*this)(_ret);
448}
449//
450//--------------------------------------------------------------------------------------//
451//
452template <typename Type>
453template <typename Tp>
454std::vector<basic_tree<Tp>>
455merge<Type, true>::operator()(const std::vector<std::vector<basic_tree<Tp>>>& _bt,
456 size_t _root)
457{
458 if(_bt.empty())
459 return _bt;
460
461 if(_root >= _bt.size())
462 _root = 0;
463 auto _ret = _bt.at(_root);
464 for(size_t i = 0; i < _ret.size(); ++i)
465 {
466 if(i == _root)
467 continue;
468 _ret = (*this)(_ret, _ret.at(i));
469 }
470
471 return (*this)(_ret);
472}
473//
474//--------------------------------------------------------------------------------------//
475//
476/*template <typename Type>
477template <typename Tp>
478void
479merge<Type, true>::operator()(GraphT& _g, ItrT _root, ItrT _rhs)
480{
481 using pre_order_iterator = typename GraphT::pre_order_iterator;
482 using sibling_iterator = typename GraphT::sibling_iterator;
483
484 auto _equiv = [](ItrT _lhs, ItrT _rhs) {
485 if(_lhs->depth() != _rhs->depth())
486 return false;
487 auto _lhs_id = get_hash_id(get_hash_aliases(), _lhs->id());
488 auto _rhs_id = get_hash_id(get_hash_aliases(), _rhs->id());
489 return (_lhs_id == _rhs_id);
490 };
491
492 for(sibling_iterator ritr = _rhs.begin(); ritr != _rhs.end(); ++ritr)
493 {
494 bool found = false;
495 for(sibling_iterator itr = _root.begin(); itr != _root.end(); ++itr)
496 {
497 if(_equiv(ritr, itr))
498 {
499 found = true;
500 if(itr != ritr)
501 {
502 itr->data() += ritr->data();
503 itr->data().plus(ritr->data());
504 itr->stats() += ritr->stats();
505 }
506 }
507 }
508 if(!found)
509 {
510 pre_order_iterator citr = ritr;
511 _g.append_child(_root, citr);
512 }
513 }
514}*/
515//
516//--------------------------------------------------------------------------------------//
517//
518template <typename Type>
520{
521 rhs.stack_clear();
522
523 // create lock
524 auto_lock_t l(singleton_t::get_mutex(), std::defer_lock);
525 if(!l.owns_lock())
526 l.lock();
527
528 for(const auto& itr : *rhs.get_hash_ids())
529 {
530 if(lhs.m_hash_ids->find(itr.first) == lhs.m_hash_ids->end())
531 (*lhs.m_hash_ids)[itr.first] = itr.second;
532 }
533 for(const auto& itr : (*rhs.get_hash_aliases()))
534 {
535 if(lhs.m_hash_aliases->find(itr.first) == lhs.m_hash_aliases->end())
536 (*lhs.m_hash_aliases)[itr.first] = itr.second;
537 }
538}
539//
540//--------------------------------------------------------------------------------------//
541//
542} // namespace finalize
543} // namespace operation
544} // namespace tim
const hash_alias_ptr_t hash_value_t std::string *& _ret
Definition: definition.hpp:300
std::unordered_map< KeyT, MappedT > uomap_t
Definition: merge.hpp:49
data::entry entry
Definition: stream.hpp:980
Definition: kokkosp.cpp:39
std::unique_lock< mutex_t > auto_lock_t
Unique lock type around mutex_t.
Definition: locking.hpp:42
char argparse::argument_parser tim::settings * _settings
Definition: config.cpp:255
size_t pos
Definition: config.cpp:102
void finalize()
Definition: types.hpp:119
The declaration for the types for operations without definitions.
Include the macros for operations.
Declare the operations types.
Basic hierarchical tree implementation. Expects population from tim::graph.
Definition: basic_tree.hpp:44
auto & get_children()
return the array of child nodes
Definition: basic_tree.hpp:70
std::vector< child_pointer > children_type
Definition: basic_tree.hpp:49
impl::storage< Type, has_data > storage_type
Definition: types.hpp:1144
typename storage_type::result_array_t result_type
Definition: types.hpp:1107
impl::storage< Type, has_data > storage_type
Definition: types.hpp:1104
static settings * instance()
Definition: settings.hpp:536
#define CONDITIONAL_PRINT_HERE(CONDITION,...)
Definition: macros.hpp:183
#define PRINT_HERE(...)
Definition: macros.hpp:152
#define TIMEMORY_EXCEPTION(...)
Definition: types.hpp:138