timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
cupti_counters.hpp
Go to the documentation of this file.
1// MIT License
2//
3// Copyright (c) 2020, The Regents of the University of California,
4// through Lawrence Berkeley National Laboratory (subject to receipt of any
5// required approvals from the U.S. Dept. of Energy). All rights reserved.
6//
7// Permission is hereby granted, free of charge, to any person obtaining a copy
8// of this software and associated documentation files (the "Software"), to deal
9// in the Software without restriction, including without limitation the rights
10// to use, copy, modify, merge, publish, distribute, sublicense, and
11// copies of the Software, and to permit persons to whom the Software is
12// furnished to do so, subject to the following conditions:
13//
14// The above copyright notice and this permission notice shall be included in all
15// copies or substantial portions of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23// SOFTWARE.
24
25/** \file cupti.hpp
26 * \headerfile cupti_counters.hpp "timemory/cupti_counters.hpp"
27 * Provides implementation of CUPTI routines.
28 *
29 */
30
31#pragma once
32
34#include "timemory/components/cupti/backends.hpp"
38
39#include <algorithm>
40#include <iterator>
41#include <memory>
42#include <numeric>
43#include <set>
44#include <string>
45#include <vector>
46
47//======================================================================================//
48
49namespace tim
50{
51namespace component
52{
53//--------------------------------------------------------------------------------------//
54//
55// CUPTI hardware counters component
56//
57//--------------------------------------------------------------------------------------//
58/// \struct tim::component::cupti_counters
59/// \brief NVprof-style hardware counters via the CUpti callback API. Collecting these
60/// hardware counters has a higher overhead than the new CUpti Profiling API (\ref
61/// tim::component::cupti_profiler). However, there are currently some issues with nesting
62/// the Profiling API and it is currently recommended to use this component for NVIDIA
63/// hardware counters in timemory. The callback API / NVprof is quite specific about
64/// the distinction between an "event" and a "metric". For your convenience, timemory
65/// removes this distinction and events can be specified arbitrarily as metrics and
66/// vice-versa and this component will sort them into their appropriate category.
67/// For the full list of the available events/metrics, use `timemory-avail -H` from the
68/// command-line.
69///
70struct cupti_counters : public base<cupti_counters, cupti::profiler::results_t>
71{
72 // required aliases
73 using value_type = cupti::profiler::results_t;
76
77 // custom aliases
78 using size_type = std::size_t;
80 using kernel_data_t = cupti::result;
81 using entry_type = typename value_type::value_type;
82 using results_t = cupti::profiler::results_t;
83 using kernel_results_t = cupti::profiler::kernel_results_t;
84
85 // short-hand for vectors
86 using strvec_t = std::vector<string_t>;
87 using profptr_t = std::shared_ptr<cupti::profiler>;
88 // a tuple of the <devices, events, metrics>
89 using tuple_type = std::tuple<int, strvec_t, strvec_t>;
90 // function for setting device, metrics, and events to record
91 using event_func_t = std::function<strvec_t()>;
92 using metric_func_t = std::function<strvec_t()>;
93 using device_func_t = std::function<int()>;
94 // function for setting all of device, metrics, and events
95 using get_initializer_t = std::function<tuple_type()>;
96
97 static const short precision = 3;
98 static const short width = 8;
99
101 {
102 static event_func_t _instance = []() {
104 };
105 return _instance;
106 }
107
109 {
110 static metric_func_t _instance = []() {
112 };
113 return _instance;
114 }
115
117 {
118 static device_func_t _instance = []() {
119 if(cuda::device_count() < 1)
120 return -1;
121 return settings::cupti_device();
122 };
123 return _instance;
124 }
125
127 {
128 static get_initializer_t _instance = []() -> tuple_type {
131 };
132 return _instance;
133 }
134
135 static void configure()
136 {
137 if(_get_profiler().get() == nullptr)
138 init();
139 }
140
141 /// explicitly configure for a device and set of events/metrics.
142 static void configure(int device, const strvec_t& events,
143 const strvec_t& metrics = {})
144 {
145 get_initializer() = [=]() -> tuple_type {
146 return tuple_type(device, events, metrics);
147 };
148 if(_get_profiler().get() == nullptr)
149 init();
150 }
151
152 static void global_init() { configure(); }
153 static void global_finalize() { clear(); }
154
155 static const profptr_t& get_profiler() { return _get_profiler(); }
156 static const strvec_t& get_events() { return *_get_events(); }
157 static const strvec_t& get_metrics() { return *_get_metrics(); }
158 static int get_device() { return *_get_device(); }
159 static const strvec_t& get_labels() { return *_get_labels(); }
160
162 {
163 configure();
164 auto* _labels = _get_labels();
165 if(_labels)
166 {
167 value.resize(_labels->size());
168 accum.resize(_labels->size());
169 for(size_type i = 0; i < _labels->size(); ++i)
170 {
171 value[i].name = (*_labels)[i];
172 accum[i].name = (*_labels)[i];
173 }
174 }
175 }
176
177 ~cupti_counters() = default;
179 cupti_counters(cupti_counters&&) noexcept = default;
180 cupti_counters& operator =(const cupti_counters& rhs)
181 {
182 if(this != &rhs)
183 {
184 base_type::operator=(rhs);
185 m_kernel_value = rhs.m_kernel_value;
186 m_kernel_accum = rhs.m_kernel_accum;
187 }
188 return *this;
189 }
191
192 static int64_t unit() { return 1; }
193 // leave these empty
194 static string_t label() { return "cupti_counters"; }
195 static string_t description() { return "Hardware counters for the CUDA API"; }
196 static string_t display_unit() { return ""; }
197
199 {
200 configure();
201 value_type tmp;
202 auto& _profiler = _get_profiler();
203 if(!_profiler || !_get_labels())
204 return tmp;
205 auto& _labels = *_get_labels();
206 _profiler->stop();
207 if(tmp.empty())
208 {
209 tmp = _profiler->get_events_and_metrics(_labels);
210 }
211 else if(tmp.size() == _labels.size())
212 {
213 auto ret = _profiler->get_events_and_metrics(_labels);
214 for(size_t j = 0; j < _labels.size(); ++j)
215 tmp[j] += ret[j];
216 }
217 else
218 {
219 fprintf(stderr, "Warning! mis-matched size in cupti_event::%s @ %s:%i\n",
220 TIMEMORY_ERROR_FUNCTION_MACRO, __FILE__, __LINE__);
221 }
222
223 return tmp;
224 }
225
226 //----------------------------------------------------------------------------------//
227 // start
228 //
229 void start()
230 {
231 value = record();
232 auto& _profiler = _get_profiler();
233 if(_profiler)
234 {
235 m_kernel_value = _profiler->get_kernel_events_and_metrics(*_get_labels());
236 _profiler->start();
237 }
238 }
239
240 void stop()
241 {
242 using namespace stl;
243 using namespace tim::component::operators;
244
245 value_type tmp = record();
246 auto& _profiler = _get_profiler();
247 if(!_profiler)
248 return;
249
250 kernel_results_t kernel_data =
251 _profiler->get_kernel_events_and_metrics(*_get_labels());
252 kernel_results_t kernel_tmp = kernel_data;
253
254 if(accum.empty())
255 {
256 accum = tmp;
257 for(size_type i = 0; i < tmp.size(); ++i)
258 accum[i] -= value[i];
259 }
260 else
261 {
262 for(size_type i = 0; i < tmp.size(); ++i)
263 accum[i] += (tmp[i] - value[i]);
264 }
265
266 for(size_t i = 0; i < m_kernel_value.size(); ++i)
267 kernel_tmp[i].second -= m_kernel_value[i].second;
268 for(size_t i = 0; i < kernel_tmp.size(); ++i)
269 {
270 if(i >= m_kernel_accum.size())
271 {
272 m_kernel_accum.resize(i + 1, kernel_tmp[i]);
273 }
274 else
275 {
276 m_kernel_accum[i].second += kernel_tmp[i].second;
277 }
278 }
279
280 value = std::move(tmp);
281 m_kernel_value = std::move(kernel_data);
282 }
283
284 TIMEMORY_NODISCARD string_t get_display() const
285 {
286 auto _get_display = [&](std::ostream& os, const cupti::result& obj) {
287 auto _label = obj.name;
288 auto _prec = base_type::get_precision();
289 auto _width = base_type::get_width();
290 auto _flags = base_type::get_format_flags();
291
292 std::stringstream ss;
293 std::stringstream ssv;
294 std::stringstream ssi;
295 ssv.setf(_flags);
296 ssv << std::setw(_width) << std::setprecision(_prec);
297 cupti::print(ssv, obj.data);
298 if(!_label.empty())
299 ssi << " " << _label;
300 ss << ssv.str() << ssi.str();
301 os << ss.str();
302 };
303
304 const auto& _data = load();
305 std::stringstream ss;
306 for(size_type i = 0; i < _data.size(); ++i)
307 {
308 _get_display(ss, _data[i]);
309 if(i + 1 < _data.size())
310 ss << ", ";
311 }
312 return ss.str();
313 }
314
315 TIMEMORY_NODISCARD std::vector<double> get() const
316 {
317 std::vector<double> values;
318 const auto& _data = load();
319 values.reserve(_data.size());
320 for(const auto& itr : _data)
321 values.push_back(cupti::get<double>(itr.data));
322 return values;
323 }
324
325 using secondary_type = std::unordered_multimap<std::string, value_type>;
326
327 TIMEMORY_NODISCARD secondary_type get_secondary() const
328 {
329 secondary_type _data;
330 for(const auto& itr : m_kernel_accum)
331 _data.insert({ itr.first, itr.second });
332 return _data;
333 }
334
335 template <typename Tp>
336 using array_t = std::vector<Tp>;
337
338 //----------------------------------------------------------------------------------//
339 // array of descriptions
340 //
342 {
344 auto contains = [&](const string_t& entry) {
345 return std::find(arr.begin(), arr.end(), entry) != arr.end();
346 };
347 auto insert = [&](const string_t& entry) {
348 if(!contains(entry))
349 arr.push_back(entry);
350 };
351 auto* _labels = _get_labels();
352 if(_labels)
353 {
354 for(const auto& itr : *_labels)
355 insert(itr);
356 }
357 // auto profiler = get_profiler();
358 // for(const auto& itr : profiler->get_event_names())
359 // insert(itr);
360 // for(const auto& itr : profiler->get_metric_names())
361 // insert(itr);
362 return arr;
363 }
364
365 //----------------------------------------------------------------------------------//
366 // array of labels
367 //
369
370 //----------------------------------------------------------------------------------//
371 // array of unit
372 //
374 {
375 return array_t<string_t>(get_labels().size(), "");
376 }
377
378 //----------------------------------------------------------------------------------//
379 // array of unit values
380 //
382 {
383 return array_t<int64_t>(get_labels().size(), 1);
384 }
385
387 {
388 auto _combine = [](value_type& _data, const value_type& _other) {
389 auto& _labels = *_get_labels();
390 if(_data.empty())
391 {
392 _data = _other;
393 }
394 else
395 {
396 for(size_type i = 0; i < _labels.size(); ++i)
397 _data[i] += _other[i];
398 }
399 };
400
401 _combine(value, rhs.value);
402 _combine(accum, rhs.accum);
403 return *this;
404 }
405
407 {
408 auto _combine = [](value_type& _data, const value_type& _other) {
409 auto& _labels = *_get_labels();
410 // set to other
411 if(_data.empty())
412 _data = _other;
413 // subtract other (if data was empty, will contain zero data)
414 for(size_type i = 0; i < _labels.size(); ++i)
415 _data[i] -= _other[i];
416 };
417
418 _combine(value, rhs.value);
419 _combine(accum, rhs.accum);
420 return *this;
421 }
422
424 {
425 auto _combine = [](value_type& _data, const value_type& _other) {
426 if(_data.empty())
427 {
428 _data = _other;
429 }
430 else
431 {
432 for(size_type i = 0; i < _other.size(); ++i)
433 _data[i] += _other[i];
434 }
435 };
436
437 _combine(value, rhs);
438 _combine(accum, rhs);
439
440 return *this;
441 }
442
443 //----------------------------------------------------------------------------------//
444 // serialization
445 //
446 template <typename Archive>
447 void serialize(Archive& ar, const unsigned int)
448 {
449 auto _get = [&](const value_type& _data) {
450 std::vector<double> values;
451 for(const auto& itr : _data)
452 values.push_back(cupti::get<double>(itr.data));
453 return values;
454 };
455 array_t<double> _disp = _get(accum);
456 array_t<double> _value = _get(value);
457 array_t<double> _accum = _get(accum);
458 ar(cereal::make_nvp("laps", laps), cereal::make_nvp("repr_data", _disp),
459 cereal::make_nvp("value", _value), cereal::make_nvp("accum", _accum),
460 cereal::make_nvp("display", _disp));
461 // ar(cereal::make_nvp("units", unit_array()),
462 // cereal::make_nvp("display_units", display_unit_array()));
463 }
464
465 //----------------------------------------------------------------------------------//
466 //
467 template <typename Archive>
468 static void extra_serialization(Archive& ar)
469 {
470 auto& _devices = *_get_device();
471 auto& _events = *_get_events();
472 auto& _metrics = *_get_metrics();
473 auto& _labels = *_get_labels();
474
475 ar(cereal::make_nvp("devices", _devices), cereal::make_nvp("events", _events),
476 cereal::make_nvp("metrics", _metrics), cereal::make_nvp("labels", _labels));
477 }
478
479 //----------------------------------------------------------------------------------//
480
481private:
482 template <typename Tp>
483 struct writer
484 {
485 using const_iterator = typename Tp::const_iterator;
486 Tp& obj;
487 writer(Tp& _obj)
488 : obj(_obj)
489 {}
490
491 TIMEMORY_NODISCARD const_iterator begin() const { return obj.begin(); }
492 TIMEMORY_NODISCARD const_iterator end() const { return obj.end(); }
493
494 friend std::ostream& operator<<(std::ostream& os, const writer<Tp>& _obj)
495 {
496 auto sz = std::distance(_obj.begin(), _obj.end());
497 for(auto itr = _obj.begin(); itr != _obj.end(); ++itr)
498 {
499 auto idx = std::distance(_obj.begin(), itr);
500 os << (*itr);
501 if(idx + 1 < sz)
502 os << ", ";
503 }
504 return os;
505 }
506 };
507
508 static profptr_t& _get_profiler()
509 {
510 static profptr_t _instance = profptr_t(nullptr);
511 return _instance;
512 }
513
514 static strvec_t*& _get_events()
515 {
516 static strvec_t* _instance = new strvec_t();
517 return _instance;
518 }
519
520 static strvec_t*& _get_metrics()
521 {
522 static strvec_t* _instance = new strvec_t();
523 return _instance;
524 }
525
526 static int*& _get_device()
527 {
528 static int* _instance = new int(0);
529 return _instance;
530 }
531
532 static strvec_t*& _get_labels()
533 {
534 static strvec_t* _instance = new strvec_t();
535 return _instance;
536 }
537
538 static strvec_t generate_labels()
539 {
540 array_t<string_t> arr;
541 auto contains = [&](const string_t& entry) {
542 return std::find(arr.begin(), arr.end(), entry) != arr.end();
543 };
544 auto insert = [&](const string_t& entry) {
545 if(!contains(entry))
546 arr.push_back(entry);
547 };
548 auto profiler = get_profiler();
549 if(profiler)
550 {
551 for(const auto& itr : profiler->get_event_names())
552 insert(itr);
553 for(const auto& itr : profiler->get_metric_names())
554 insert(itr);
555 }
556 return arr;
557 }
558
559 static strvec_t get_available_events(int devid)
560 {
561 return cupti::available_events(cupti::get_device(devid));
562 }
563
564 static strvec_t get_available_metrics(int devid)
565 {
566 return cupti::available_metrics(cupti::get_device(devid));
567 }
568
569 static tuple_type get_available(const tuple_type&, int);
570
571 static void init()
572 {
573 auto _manager = manager::instance();
574 if(!_manager || _manager->is_finalized() || _manager->is_finalizing())
575 return;
576
577 auto _init_cb = tim::get_env<bool>("TIMEMORY_CUPTI_INIT_CB", true);
578 cupti::init_driver();
579 if(_init_cb)
580 cuda::device_sync();
581 clear();
582
583 auto& _profiler = _get_profiler();
584 auto& _events = *_get_events();
585 auto& _metrics = *_get_metrics();
586 auto& _device = *_get_device();
587 auto& _labels = *_get_labels();
588
589 auto _init = get_initializer()();
590
591 _device = std::get<0>(_init);
592 _events = std::get<1>(_init);
593 _metrics = std::get<2>(_init);
594
595 using intset_t = std::set<int>;
596 using strset_t = std::set<string_t>;
597
598 intset_t _used_devs;
599 strset_t _used_evts;
600 strset_t _used_mets;
601
602 auto _dev_init = get_available(_init, _device);
603 auto& _dev = std::get<0>(_dev_init);
604
605 // if < 0, no metrics or events available/specified
606 if(_dev >= 0)
607 {
608 if(settings::debug())
609 printf("Creating CUPTI hardware profiler for device %i...\n", _device);
610
611 auto& _evt = std::get<1>(_dev_init);
612 auto& _met = std::get<2>(_dev_init);
613
614 if(!_evt.empty() || !_met.empty())
615 {
616 _profiler = std::make_shared<cupti::profiler>(_evt, _met, _dev, _init_cb);
617 _used_devs.insert(_dev);
618 for(const auto& itr : _evt)
619 _used_evts.insert(itr);
620 for(const auto& itr : _met)
621 _used_mets.insert(itr);
622 _labels = generate_labels();
623 }
624 else
625 {
626 static int _pass = 0;
627 if(_pass++ > 0)
628 fprintf(stderr, "[cupti_counters]> Warning! No events or metrics!\n");
629 }
630 }
631 else
632 {
633 fprintf(stderr, "[cupti_counters]> Warning! No devices available!\n");
634 }
635
636 if(!_used_devs.empty())
637 {
638 // if(settings::verbose() > 0 || settings::debug())
639 {
640 std::cout << "Devices : " << writer<intset_t>(_used_devs) << std::endl;
641 std::cout << "Event : " << writer<strset_t>(_used_evts) << std::endl;
642 std::cout << "Metrics : " << writer<strset_t>(_used_mets) << std::endl;
643 std::cout << "Labels : " << writer<strvec_t>(_labels) << std::endl;
644 }
645 }
646 }
647
648 static void clear()
649 {
650 if(_get_metrics())
651 _get_metrics()->clear();
652 if(_get_events())
653 _get_events()->clear();
654 _get_profiler().reset();
655 }
656
657public:
658 static void cleanup()
659 {
660 clear();
661 delete _get_device();
662 delete _get_events();
663 delete _get_labels();
664 delete _get_metrics();
665 _get_device() = nullptr;
666 _get_events() = nullptr;
667 _get_labels() = nullptr;
668 _get_metrics() = nullptr;
669 }
670
671private:
672 kernel_results_t m_kernel_value;
673 kernel_results_t m_kernel_accum;
674};
675
676//--------------------------------------------------------------------------------------//
677
679cupti_counters::get_available(const tuple_type& _init, int devid)
680{
681 if(devid < 0 || devid >= cuda::device_count())
682 {
683 int ndev = cuda::device_count();
684 fprintf(stderr, "[cupti_counters]> Invalid device id: %i. # devices: %i...\n",
685 devid, ndev);
686 return tuple_type(-1, strvec_t(), strvec_t());
687 }
688
689 strvec_t _events = std::get<1>(_init);
690 strvec_t _metrics = std::get<2>(_init);
691
692 auto _tmp_init = get_initializer()();
693
694 if(_events.empty())
695 _events = std::get<1>(_tmp_init);
696
697 // provide defaults events
698 if(_events.empty())
699 {
700 // _events = { "active_warps", "active_cycles", "global_load", "global_store" };
701 }
702
703 if(_metrics.empty())
704 _metrics = std::get<2>(_tmp_init);
705
706 // provide default metrics
707 if(_metrics.empty())
708 {
709 //_metrics = { "inst_per_warp", "branch_efficiency", "gld_efficiency",
710 // "gst_efficiency", "warp_execution_efficiency" };
711 }
712
713 const auto& _avail_events = get_available_events(devid);
714 const auto& _avail_metric = get_available_metrics(devid);
715
716 std::set<std::string> _discarded_events{};
717 std::set<std::string> _discarded_metrics{};
718
719 bool _discard = true;
720
721 // handle events
722 auto _not_event = [&_avail_events, &_discarded_events,
723 &_discard](const string_t& evt) {
724 bool nf = (std::find(std::begin(_avail_events), std::end(_avail_events), evt) ==
725 std::end(_avail_events));
726 if(nf && _discard)
727 _discarded_events.insert(evt);
728 return nf;
729 };
730
731 // handle metrics
732 auto _not_metric = [&_avail_metric, &_discarded_metrics,
733 &_discard](const string_t& met) {
734 bool nf = (std::find(std::begin(_avail_metric), std::end(_avail_metric), met) ==
735 std::end(_avail_metric));
736 if(nf && _discard)
737 _discarded_metrics.insert(met);
738 return nf;
739 };
740
741 // do the removals
742 _events.erase(std::remove_if(std::begin(_events), std::end(_events), _not_event),
743 std::end(_events));
744
745 _metrics.erase(std::remove_if(std::begin(_metrics), std::end(_metrics), _not_metric),
746 std::end(_metrics));
747
748 // turn off discarding
749 _discard = false;
750
751 // check to see if any requested events are actually metrics
752 for(const auto& itr : _discarded_events)
753 {
754 bool is_metric = !(_not_metric(itr));
755 if(is_metric)
756 {
757 _metrics.push_back(itr);
758 }
759 else
760 {
761 fprintf(stderr,
762 "[cupti_counters]> Removing unavailable event '%s' on device %i...\n",
763 itr.c_str(), devid);
764 }
765 }
766
767 // check to see if any requested metrics are actually events
768 for(const auto& itr : _discarded_metrics)
769 {
770 bool is_event = !(_not_event(itr));
771 if(is_event)
772 {
773 _events.push_back(itr);
774 }
775 else
776 {
777 fprintf(stderr,
778 "[cupti_counters]> Removing unavailable metric '%s' on device "
779 "%i...\n",
780 itr.c_str(), devid);
781 }
782 }
783
784 // determine total
785 return tuple_type(devid, _events, _metrics);
786}
787
788//--------------------------------------------------------------------------------------//
789
791
792//--------------------------------------------------------------------------------------//
793
794} // namespace component
795
796} // namespace tim
static pointer_t instance()
Get a shared pointer to the instance for the current thread.
std::string string_t
Definition: library.cpp:57
The declaration for the types for manager without definitions.
return _hash_map end()
_reported insert(_hash_id)
void print(std::ostream &os, Args &&... args)
Definition: functional.cpp:159
data::entry entry
Definition: stream.hpp:980
Definition: kokkosp.cpp:39
cupti_events
Definition: settings.cpp:1727
cupti_metrics
Definition: settings.cpp:1729
tim::mpl::apply< std::string > string
Definition: macros.hpp:53
cupti_device
Definition: settings.cpp:1731
const std::string std::ostream * os
ContainerT delimit(const std::string &line, const std::string &delimiters="\"',;: ", PredicateT &&predicate=[](const std::string &s) -> std::string { return s;})
Definition: delimit.hpp:68
#define TIMEMORY_ERROR_FUNCTION_MACRO
Definition: macros.hpp:229
static short get_precision()
friend std::ostream & operator<<(std::ostream &os, const base_type &obj)
static short get_width()
static fmtflags get_format_flags()
NVprof-style hardware counters via the CUpti callback API. Collecting these hardware counters has a h...
this_type & operator+=(const this_type &rhs)
std::tuple< int, strvec_t, strvec_t > tuple_type
std::vector< double > get() const
typename value_type::value_type entry_type
cupti::profiler::results_t value_type
static device_func_t & get_device_initializer()
static const strvec_t & get_metrics()
static array_t< int64_t > unit_array()
static array_t< string_t > description_array()
static void extra_serialization(Archive &ar)
cupti_counters(const cupti_counters &)=default
this_type & operator+=(const results_t &rhs)
static get_initializer_t & get_initializer()
static const strvec_t & get_events()
std::function< int()> device_func_t
static event_func_t & get_event_initializer()
std::function< tuple_type()> get_initializer_t
cupti_counters & operator=(cupti_counters &&) noexcept=default
static void configure(int device, const strvec_t &events, const strvec_t &metrics={})
explicitly configure for a device and set of events/metrics.
std::function< strvec_t()> metric_func_t
std::unordered_multimap< std::string, value_type > secondary_type
static const profptr_t & get_profiler()
std::shared_ptr< cupti::profiler > profptr_t
cupti_counters(cupti_counters &&) noexcept=default
static metric_func_t & get_metric_initializer()
void serialize(Archive &ar, const unsigned int)
std::function< strvec_t()> event_func_t
secondary_type get_secondary() const
std::vector< string_t > strvec_t
static const strvec_t & get_labels()
cupti::profiler::results_t results_t
cupti::profiler::kernel_results_t kernel_results_t
static array_t< string_t > label_array()
this_type & operator-=(const this_type &rhs)
static array_t< string_t > display_unit_array()