timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
gpu_roofline.hpp
Go to the documentation of this file.
1// MIT License
2//
3// Copyright (c) 2020, The Regents of the University of California,
4// through Lawrence Berkeley National Laboratory (subject to receipt of any
5// required approvals from the U.S. Dept. of Energy). All rights reserved.
6//
7// Permission is hereby granted, free of charge, to any person obtaining a copy
8// of this software and associated documentation files (the "Software"), to deal
9// in the Software without restriction, including without limitation the rights
10// to use, copy, modify, merge, publish, distribute, sublicense, and
11// copies of the Software, and to permit persons to whom the Software is
12// furnished to do so, subject to the following conditions:
13//
14// The above copyright notice and this permission notice shall be included in all
15// copies or substantial portions of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23// SOFTWARE.
24
25#pragma once
26
30#include "timemory/components/roofline/backends.hpp"
38
39#include <array>
40#include <cassert>
41#include <memory>
42#include <numeric>
43#include <utility>
44
45//======================================================================================//
46
47namespace tim
48{
49namespace component
50{
51//--------------------------------------------------------------------------------------//
52// this computes the numerator of the roofline for a given set of PAPI counters.
53// e.g. for FLOPS roofline (floating point operations / second:
54//
55// single precision:
56// gpu_roofline<float>
57//
58// double precision:
59// gpu_roofline<double>
60//
61//
62/// \struct tim::component::gpu_roofline
63/// \tparam Types Variadic list of data types for roofline analysis
64///
65/// \brief Combines hardware counters and timers and executes the empirical roofline
66/// toolkit during application termination to estimate the peak possible performance for
67/// the machine
68///
69template <typename... Types>
71: public base<gpu_roofline<Types...>, std::tuple<typename cupti_activity::value_type,
72 typename cupti_counters::value_type>>
73{
74 using value_type = std::tuple<typename cupti_activity::value_type,
79
80 using size_type = std::size_t;
83 using device_t = device::gpu;
84 using result_type = std::vector<double>;
85 using label_type = std::vector<std::string>;
87 using types_tuple = std::tuple<Types...>;
88
89 friend struct operation::record<this_type>;
90 friend struct operation::start<this_type>;
91 friend struct operation::stop<this_type>;
92 friend struct operation::set_started<this_type>;
93 friend struct operation::set_stopped<this_type>;
94
96 using ert_data_ptr_t = std::shared_ptr<ert_data_t>;
97
98 // short-hand for variadic expansion
99 template <typename Tp>
101 template <typename Tp>
103 template <typename Tp>
105 template <typename Tp>
107
108 // variadic expansion for ERT types
109 using ert_config_t = std::tuple<ert_config_type<Types>...>;
110 using ert_counter_t = std::tuple<ert_counter_type<Types>...>;
111 using ert_executor_t = std::tuple<ert_executor_type<Types>...>;
112 using ert_callback_t = std::tuple<ert_callback_type<Types>...>;
113
114 static_assert(std::tuple_size<ert_config_t>::value ==
115 std::tuple_size<types_tuple>::value,
116 "Error! ert_config_t size does not match types_tuple size!");
117
118 static const short precision = 3;
119 static const short width = 8;
120
121 //----------------------------------------------------------------------------------//
122 // collection mode, COUNTERS is the HW counting, ACTIVITY in the runtime measurements
123 enum class MODE
124 {
125 COUNTERS,
127 };
128
129 //----------------------------------------------------------------------------------//
130
131 using strvec_t = std::vector<std::string>;
132 using events_callback_t = std::function<strvec_t()>;
134
135 //----------------------------------------------------------------------------------//
136
138 {
139 static events_callback_t _instance = []() { return strvec_t{}; };
140 return _instance;
141 }
142
144 {
145 static metrics_callback_t _instance = []() { return strvec_t{}; };
146 return _instance;
147 }
148
149public:
150 //----------------------------------------------------------------------------------//
151
152 static MODE& event_mode()
153 {
154 auto&& _get = [=]() {
155 auto&& aslc = [](std::string str) {
156 for(auto& itr : str)
157 itr = tolower(itr);
158 return str;
159 };
160
161 // check the standard variable
163 if(_env.empty())
164 _env = aslc(settings::roofline_mode());
165 return (_env == "op" || _env == "hw" || _env == "counters")
167 : ((_env == "ai" || _env == "ac" || _env == "activity")
170 };
171
172 static MODE _instance = _get();
173 if(!is_configured())
174 _instance = _get();
175 return _instance;
176 }
177
178 //----------------------------------------------------------------------------------//
179
180 static void configure(const MODE& _mode, int _device = 0)
181 {
182 if(is_configured())
183 return;
184 is_configured() = true;
185
186 event_mode() = _mode;
187
189 {
190 get_labels() = { std::string("runtime") };
191 }
192 else
193 {
194 strvec_t events = { "global_load", "global_store" };
195 strvec_t metrics = { "ldst_executed" };
196#if defined(TIMEMORY_USE_CUDA_HALF)
198 {
199 metrics.push_back("flop_count_hp");
200 }
201#endif
202
204 {
205 metrics.push_back("flop_count_sp");
206 }
207
209 {
210 metrics.push_back("flop_count_dp");
211 }
212
213 // integer
216 {
217 for(const auto* itr :
218 { "ipc", "inst_executed", "inst_integer", "inst_fp_64", "inst_fp_32",
219 "inst_fp_16", "local_load_transactions_per_request",
220 "local_store_transactions_per_request",
221 "shared_load_transactions_per_request",
222 "shared_store_transactions_per_request",
223 "gld_transactions_per_request", "gst_transactions_per_request",
224 "inst_executed_global_reductions", "inst_executed_global_stores",
225 "inst_executed_global_loads", "inst_executed_local_loads",
226 "inst_executed_local_stores", "inst_executed_shared_loads",
227 "inst_executed_shared_stores" })
228 metrics.emplace_back(itr);
229 }
230
231 // add in extra events
232 auto _extra_events = get_events_callback()();
233 for(const auto& itr : _extra_events)
234 events.push_back(itr);
235
236 // add in extra metrics
237 auto _extra_metrics = get_metrics_callback()();
238 for(const auto& itr : _extra_metrics)
239 metrics.push_back(itr);
240
241 auto _get_unique = [](const strvec_t& _vec) {
242 std::set<std::string> _set;
243 for(const auto& itr : _vec)
244 _set.insert(itr);
246 for(const auto& itr : _set)
247 _ret.push_back(itr);
248 return _ret;
249 };
250
251 metrics = _get_unique(metrics);
252 events = _get_unique(events);
253
254 counters_type::configure(_device, events, metrics);
255 get_labels() = counters_type::label_array();
256 }
257 }
258
259 //----------------------------------------------------------------------------------//
260
261 static void configure()
262 {
263 if(!is_configured())
265 }
266
267 //----------------------------------------------------------------------------------//
268
270 {
271 return (event_mode() == MODE::COUNTERS) ? "counters" : "activity";
272 }
273
274 //----------------------------------------------------------------------------------//
275
277 {
278 return mpl::apply<std::string>::join('_', demangle(typeid(Types).name())...);
279 }
280
281 //----------------------------------------------------------------------------------//
282
284 {
285 static ert_config_t _instance;
286 return _instance;
287 }
288
289 //----------------------------------------------------------------------------------//
290
292 {
293 static ert_data_ptr_t _instance = std::make_shared<ert_data_t>();
294 return _instance;
295 }
296
297 //----------------------------------------------------------------------------------//
298
299 static void global_init()
300 {
302 {
305 }
306 else
307 {
310 }
311 }
312
313 //----------------------------------------------------------------------------------//
314
315 template <typename Tp, typename FuncT>
316 static void set_executor_callback(FuncT&& f)
317 {
318 ert_executor_type<Tp>::get_callback() = std::forward<FuncT>(f);
319 }
320
321 //----------------------------------------------------------------------------------//
322
323 static void global_finalize(storage_type* _store)
324 {
325 // disable the activity/counters before running ERT
327 {
330 }
331 else
332 {
335 }
336
337 // run ERT
338 if(_store && _store->size() > 0)
339 {
340 assert(_store->is_finalizing());
341 // run roofline peak generation
342 auto ert_config = get_finalizer();
343 auto ert_data = get_ert_data();
344 mpl::apply<void>::access<ert_executor_t>(ert_config, ert_data);
345 if(ert_data && (settings::verbose() > 0 || settings::debug()))
346 std::cout << *(ert_data) << std::endl;
347 }
348 }
349
350 //----------------------------------------------------------------------------------//
351
352 static void thread_init() {}
353 static void thread_finalize() {}
354
355 //----------------------------------------------------------------------------------//
356
357 template <typename Archive>
358 static void extra_serialization(Archive& ar)
359 {
360 auto& _ert_data = get_ert_data();
361 if(!_ert_data) // for input
362 _ert_data = std::make_shared<ert_data_t>();
363 ar(cereal::make_nvp("roofline", *_ert_data));
364 }
365
366 //----------------------------------------------------------------------------------//
367
368 static int64_t unit()
369 {
371 return activity_type::unit();
372 return counters_type::unit();
373 }
374
376 {
378 {
379 auto ret = std::string("gpu_roofline_") + get_type_string() + "_" +
381 // erase consecutive underscores
382 while(ret.find("__") != std::string::npos)
383 ret.erase(ret.find("__"), 1);
384 return ret;
385 }
386
387 return std::string("gpu_roofline_") + get_mode_string();
388 }
389
391 {
392 return "Model used to provide performance relative to the peak possible "
393 "performance on a GPU architecture.";
394 }
395
397 {
401 }
402
403 //----------------------------------------------------------------------------------//
404
406 {
407 value_type tmp;
408 switch(event_mode())
409 {
410 case MODE::ACTIVITY: std::get<0>(tmp) = activity_type::record(); break;
411 case MODE::COUNTERS: std::get<1>(tmp) = counters_type::record(); break;
412 default: break;
413 }
414 return tmp;
415 }
416
417private:
418 //----------------------------------------------------------------------------------//
419
420 static bool& is_configured()
421 {
422 static bool _instance = false;
423 return _instance;
424 }
425
426public:
428 ~gpu_roofline() = default;
429
431 : base_type(rhs)
432 , m_data(rhs.m_data)
433 {}
434
436 {
437 if(this != &rhs)
438 {
439 base_type::operator=(rhs);
440 m_data = rhs.m_data;
441 }
442 return *this;
443 }
444
445 gpu_roofline(gpu_roofline&&) noexcept = default;
446 gpu_roofline& operator=(gpu_roofline&&) noexcept = default;
447
448 //----------------------------------------------------------------------------------//
449
450 TIMEMORY_NODISCARD result_type get() const
451 {
452 switch(event_mode())
453 {
454 case MODE::ACTIVITY: return result_type({ m_data.activity->get() });
455 case MODE::COUNTERS: return m_data.counters->get();
456 default: break;
457 }
458 return result_type{};
459 }
460
461 //----------------------------------------------------------------------------------//
462
463 void start()
464 {
465 switch(event_mode())
466 {
467 case MODE::ACTIVITY:
468 {
469 m_data.activity->start();
470 std::get<0>(value) = m_data.activity->get_value();
471 break;
472 }
473 case MODE::COUNTERS:
474 {
475 m_data.counters->start();
476 std::get<1>(value) = m_data.counters->get_value();
477 break;
478 }
479 }
480 }
481
482 //----------------------------------------------------------------------------------//
483
484 void stop()
485 {
486 using namespace tim::component::operators;
487 switch(event_mode())
488 {
489 case MODE::ACTIVITY:
490 {
491 m_data.activity->stop();
492 std::get<0>(accum) = m_data.activity->get_accum();
493 std::get<0>(value) = m_data.activity->get_value();
494 break;
495 }
496 case MODE::COUNTERS:
497 {
498 m_data.counters->stop();
499 std::get<1>(accum) = m_data.counters->get_accum();
500 std::get<1>(value) = m_data.counters->get_value();
501 break;
502 }
503 }
504 }
505
506 //----------------------------------------------------------------------------------//
507
509 {
510 switch(event_mode())
511 {
512 case MODE::ACTIVITY:
513 {
514 *m_data.activity += *rhs.m_data.activity;
515 std::get<0>(accum) = m_data.activity->get_accum();
516 std::get<0>(value) = m_data.activity->get_value();
517 break;
518 }
519 case MODE::COUNTERS:
520 {
521 *m_data.counters += *rhs.m_data.counters;
522 std::get<1>(accum) = m_data.counters->get_accum();
523 std::get<1>(value) = m_data.counters->get_value();
524 break;
525 }
526 }
527 return *this;
528 }
529
530 //----------------------------------------------------------------------------------//
531
533 {
534 switch(event_mode())
535 {
536 case MODE::ACTIVITY:
537 {
538 *m_data.activity -= *rhs.m_data.activity;
539 std::get<0>(accum) = m_data.activity->get_accum();
540 std::get<0>(value) = m_data.activity->get_value();
541 break;
542 }
543 case MODE::COUNTERS:
544 {
545 *m_data.counters -= *rhs.m_data.counters;
546 std::get<1>(accum) = m_data.counters->get_accum();
547 std::get<1>(value) = m_data.counters->get_value();
548 break;
549 }
550 }
551 return *this;
552 }
553
554 //----------------------------------------------------------------------------------//
555
557 {
558 switch(event_mode())
559 {
560 case MODE::ACTIVITY:
561 {
562 *m_data.activity += std::get<0>(rhs);
563 std::get<0>(accum) = m_data.activity->get_accum();
564 std::get<0>(value) = m_data.activity->get_value();
565 break;
566 }
567 case MODE::COUNTERS:
568 {
569 *m_data.counters += std::get<1>(rhs);
570 std::get<1>(accum) = m_data.counters->get_accum();
571 std::get<1>(value) = m_data.counters->get_value();
572 break;
573 }
574 }
575 return *this;
576 }
577
578 //----------------------------------------------------------------------------------//
579
582 using secondary_type = std::unordered_multimap<std::string, value_type>;
583
584 //----------------------------------------------------------------------------------//
585
586 TIMEMORY_NODISCARD secondary_type get_secondary() const
587 {
588 secondary_type ret;
589 switch(event_mode())
590 {
591 case MODE::ACTIVITY:
592 {
593 auto&& _tmp = m_data.activity->get_secondary();
594 for(auto&& itr : _tmp)
595 {
596 ret.insert(
597 { itr.first, value_type{ itr.second, counters_value_type{} } });
598 }
599 break;
600 }
601 case MODE::COUNTERS:
602 {
603 auto&& _tmp = m_data.counters->get_secondary();
604 for(auto&& itr : _tmp)
605 {
606 ret.insert(
607 { itr.first, value_type{ activity_value_type{}, itr.second } });
608 }
609 break;
610 }
611 }
612 return ret;
613 }
614
615 //----------------------------------------------------------------------------------//
616
617protected:
618 using base_type::accum;
619 using base_type::laps;
622 using base_type::value;
623
624 friend struct base<this_type, value_type>;
625 friend class impl::storage<this_type,
626 trait::uses_value_storage<this_type, value_type>::value>;
627
628public:
629 //==================================================================================//
630 //
631 // representation as a string
632 //
633 //==================================================================================//
634 //
635 TIMEMORY_NODISCARD string_t get_display() const
636 {
637 std::stringstream ss;
639 {
640 return m_data.counters->get_display();
641 }
642 {
643 ss << m_data.activity->get_display();
644 }
645 return ss.str();
646 }
647
648 //----------------------------------------------------------------------------------//
649 //
650 friend std::ostream& operator<<(std::ostream& os, const this_type& obj)
651 {
652 os << as_string(obj.get_display());
653 return os;
654 }
655
656 //----------------------------------------------------------------------------------//
657 //
658 static label_type label_array() { return this_type::get_labels(); }
659
660 //----------------------------------------------------------------------------------//
661 //
663 {
664 const auto& _labels = get_labels();
665 return label_type(_labels.size(), this_type::display_unit());
666 }
667
668private:
669 //----------------------------------------------------------------------------------//
670 //
671 static string_t as_string(const string_t& _value)
672 {
673 auto _label = this_type::get_label();
674 auto _disp = this_type::get_display_unit();
675 auto _prec = this_type::get_precision();
676 auto _width = this_type::get_width();
677 auto _flags = this_type::get_format_flags();
678
679 std::stringstream ss_value;
680 std::stringstream ss_extra;
681 ss_value.setf(_flags);
682 ss_value << std::setw(_width) << std::setprecision(_prec) << _value;
684 ss_extra << " " << _disp;
686 ss_extra << " " << _label;
687
688 std::stringstream ss;
689 ss << ss_value.str() << ss_extra.str();
690 return ss.str();
691 }
692
693private:
694 static label_type& get_labels() { return *_get_labels(); }
695
696 static label_type* _get_labels()
697 {
698 static auto _instance = std::make_unique<label_type>();
699 return _instance.get();
700 }
701
702private:
703 union cupti_data
704 {
705 cupti_activity* activity = nullptr;
706 cupti_counters* counters;
707
708 cupti_data()
709 {
710 switch(event_mode())
711 {
712 case MODE::ACTIVITY: activity = new cupti_activity(); break;
713 case MODE::COUNTERS: counters = new cupti_counters(); break;
714 }
715 }
716
717 ~cupti_data()
718 {
719 switch(event_mode())
720 {
721 case MODE::ACTIVITY: delete activity; break;
722 case MODE::COUNTERS: delete counters; break;
723 }
724 }
725
726 cupti_data(const cupti_data& rhs)
727 {
728 switch(event_mode())
729 {
730 case MODE::ACTIVITY: activity = new cupti_activity(*rhs.activity); break;
731 case MODE::COUNTERS: counters = new cupti_counters(*rhs.counters); break;
732 }
733 }
734
735 cupti_data(cupti_data&& rhs) noexcept
736 {
737 switch(event_mode())
738 {
739 case MODE::ACTIVITY:
740 activity = nullptr;
741 std::swap(activity, rhs.activity);
742 break;
743 case MODE::COUNTERS:
744 counters = nullptr;
745 std::swap(counters, rhs.counters);
746 break;
747 }
748 }
749
750 cupti_data& operator=(const cupti_data& rhs)
751 {
752 if(this == &rhs)
753 return *this;
754 switch(event_mode())
755 {
756 case MODE::ACTIVITY:
757 delete activity;
758 activity = new cupti_activity(*rhs.activity);
759 break;
760 case MODE::COUNTERS:
761 delete counters;
762 counters = new cupti_counters(*rhs.counters);
763 break;
764 }
765 return *this;
766 }
767
768 cupti_data& operator=(cupti_data&& rhs) noexcept
769 {
770 if(this == &rhs)
771 return *this;
772 switch(event_mode())
773 {
774 case MODE::ACTIVITY:
775 delete activity;
776 activity = nullptr;
777 std::swap(activity, rhs.activity);
778 break;
779 case MODE::COUNTERS:
780 delete counters;
781 counters = nullptr;
782 std::swap(counters, rhs.counters);
783 break;
784 }
785 return *this;
786 }
787 };
788
789 cupti_data m_data;
790
791public:
792 //----------------------------------------------------------------------------------//
793
794 template <typename Archive>
795 void save(Archive& ar, const unsigned int) const
796 {
797 auto _disp = get_display();
798 auto _data = get();
799 auto _labels = get_labels();
800
801 ar(cereal::make_nvp("laps", laps), cereal::make_nvp("display", _disp),
802 cereal::make_nvp("mode", get_mode_string()),
803 cereal::make_nvp("type", get_type_string()),
804 cereal::make_nvp("labels", _labels));
805
806 ar.setNextName("repr_data");
807 ar.startNode();
808 auto litr = _labels.begin();
809 auto ditr = _data.begin();
810 for(; litr != _labels.end() && ditr != _data.end(); ++litr, ++ditr)
811 ar(cereal::make_nvp(*litr, *ditr));
812 ar.finishNode();
813
814 ar.setNextName("value");
815 ar.startNode();
816 ar.makeArray();
818 {
819 ar(std::get<0>(value));
820 }
821 else
822 {
823 ar(std::get<1>(value));
824 }
825 ar.finishNode();
826
827 ar.setNextName("accum");
828 ar.startNode();
829 ar.makeArray();
831 {
832 ar(std::get<0>(accum));
833 }
834 else
835 {
836 ar(std::get<1>(accum));
837 }
838 ar.finishNode();
839 }
840
841 //----------------------------------------------------------------------------------//
842
843 template <typename Archive>
844 void load(Archive& ar, const unsigned int)
845 {
846 std::string _disp;
847 result_type _data;
848 std::vector<std::string> _labels;
849 std::string _mode_str;
850 std::string _type_str;
851
852 ar(cereal::make_nvp("laps", laps), cereal::make_nvp("display", _disp),
853 cereal::make_nvp("mode", _mode_str), cereal::make_nvp("type", _type_str),
854 cereal::make_nvp("labels", _labels));
855
856 if(_mode_str == "counters")
857 {
859 }
860 else if(_mode_str == "activity")
861 {
863 }
864
865 _data.resize(_labels.size());
866
867 ar.setNextName("repr_data");
868 ar.startNode();
869 auto litr = _labels.begin();
870 auto ditr = _data.begin();
871 for(; litr != _labels.end() && ditr != _data.end(); ++litr, ++ditr)
872 ar(cereal::make_nvp(*litr, *ditr));
873 ar.finishNode();
874
875 ar.setNextName("value");
876 ar.startNode();
878 {
879 ar(std::get<0>(value));
880 }
881 else
882 {
883 ar(std::get<1>(value));
884 }
885 ar.finishNode();
886
887 ar.setNextName("accum");
888 ar.startNode();
890 {
891 ar(std::get<0>(accum));
892 }
893 else
894 {
895 ar(std::get<1>(accum));
896 }
897 ar.finishNode();
898 }
899};
900
901} // namespace component
902} // namespace tim
Definition for global and thread-local finalization functions for a component.
Definition for global and thread-local initialzation functions for a component.
const hash_alias_ptr_t hash_value_t std::string *& _ret
Definition: definition.hpp:300
typename impl::is_one_of_integral< Types > is_one_of_integral
check if type is in expansion
Definition: filters.hpp:417
std::integral_constant< int, ModeV > mode_constant
Definition: types.hpp:240
Definition: kokkosp.cpp:39
roofline_mode
Definition: settings.cpp:1732
std::string string_t
Definition: utility.hpp:98
instruction_roofline
Definition: settings.cpp:1748
std::string demangle(const char *_mangled_name, int *_status=nullptr)
Definition: demangle.hpp:47
tim::mpl::apply< std::string > string
Definition: macros.hpp:53
roofline_type_labels_gpu
Definition: settings.cpp:1746
const std::string std::ostream * os
roofline_type_labels
Definition: settings.cpp:1742
typename impl::is_one_of< Tp, Types > is_one_of
check if type is in expansion
Definition: types.hpp:777
gpu_roofline_mode
Definition: settings.cpp:1736
void set_stopped()
store that stop has been called
storage< Tp, Value > storage_type
void set_started()
store that start has been called
CUPTI activity tracing component for high-precision kernel timing. For low-precision kernel timing,...
NVprof-style hardware counters via the CUpti callback API. Collecting these hardware counters has a h...
cupti::profiler::results_t value_type
static array_t< string_t > label_array()
A very lightweight storage class which provides nothing.
Definition: declaration.hpp:51
constexpr size_t size() const
Definition: declaration.hpp:54
Combines hardware counters and timers and executes the empirical roofline toolkit during application ...
this_type & operator+=(const this_type &rhs)
static std::string description()
std::shared_ptr< ert_data_t > ert_data_ptr_t
secondary_type get_secondary() const
typename cupti_activity::value_type activity_value_type
std::tuple< ert_counter_type< Types >... > ert_counter_t
std::unordered_multimap< std::string, value_type > secondary_type
std::vector< std::string > label_type
static void configure(const MODE &_mode, int _device=0)
static std::string get_type_string()
friend std::ostream & operator<<(std::ostream &os, const this_type &obj)
static std::string get_mode_string()
static std::string display_unit()
gpu_roofline< Types... > this_type
static void set_executor_callback(FuncT &&f)
std::vector< std::string > strvec_t
void load(Archive &ar, const unsigned int)
std::function< strvec_t()> events_callback_t
typename cupti_counters::value_type counters_value_type
this_type & operator+=(const value_type &rhs)
static label_type label_array()
static events_callback_t & get_events_callback()
gpu_roofline(gpu_roofline &&) noexcept=default
static void global_finalize(storage_type *_store)
void save(Archive &ar, const unsigned int) const
std::tuple< ert_callback_type< Types >... > ert_callback_t
events_callback_t metrics_callback_t
this_type & operator-=(const this_type &rhs)
static std::string label()
std::tuple< typename cupti_activity::value_type, typename cupti_counters::value_type > value_type
gpu_roofline & operator=(const gpu_roofline &rhs)
std::tuple< Types... > types_tuple
std::tuple< ert_config_type< Types >... > ert_config_t
static void extra_serialization(Archive &ar)
gpu_roofline(const gpu_roofline &rhs)
std::tuple< ert_executor_type< Types >... > ert_executor_t
std::vector< double > result_type
static ert_config_t & get_finalizer()
static value_type record()
static ert_data_ptr_t & get_ert_data()
static metrics_callback_t & get_metrics_callback()
static const short precision
static label_type display_unit_array()
for variadic expansion to set the callback
static callback_type & get_callback()
static string_t join(SepT &&separator, Tuple &&__tup, index_sequence< Idx... >) noexcept
Definition: apply.hpp:408
This operation class is used for invoking the static initializer and thread-local initializer of a co...
Definition: fini.hpp:53
This operation class is used for invoking the static initializer and thread-local initializer of a co...
Definition: init.hpp:51
This operation attempts to call a member function which the component provides to internally store wh...
Definition: types.hpp:469
This operation attempts to call a member function which the component provides to internally store wh...
Definition: types.hpp:502
trait that signifies that a component will handle printing the label(s)
trait that signifies that a component will handle printing the units(s)