timemory  3.2.1
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
cupti_counters.hpp
Go to the documentation of this file.
1 // MIT License
2 //
3 // Copyright (c) 2020, The Regents of the University of California,
4 // through Lawrence Berkeley National Laboratory (subject to receipt of any
5 // required approvals from the U.S. Dept. of Energy). All rights reserved.
6 //
7 // Permission is hereby granted, free of charge, to any person obtaining a copy
8 // of this software and associated documentation files (the "Software"), to deal
9 // in the Software without restriction, including without limitation the rights
10 // to use, copy, modify, merge, publish, distribute, sublicense, and
11 // copies of the Software, and to permit persons to whom the Software is
12 // furnished to do so, subject to the following conditions:
13 //
14 // The above copyright notice and this permission notice shall be included in all
15 // copies or substantial portions of the Software.
16 //
17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 // SOFTWARE.
24 
25 /** \file cupti.hpp
26  * \headerfile cupti_counters.hpp "timemory/cupti_counters.hpp"
27  * Provides implementation of CUPTI routines.
28  *
29  */
30 
31 #pragma once
32 
34 #include "timemory/components/cupti/backends.hpp"
38 
39 #include <algorithm>
40 #include <iterator>
41 #include <memory>
42 #include <numeric>
43 #include <set>
44 #include <string>
45 #include <vector>
46 
47 //======================================================================================//
48 
49 namespace tim
50 {
51 namespace component
52 {
53 //--------------------------------------------------------------------------------------//
54 //
55 // CUPTI hardware counters component
56 //
57 //--------------------------------------------------------------------------------------//
58 /// \struct tim::component::cupti_counters
59 /// \brief NVprof-style hardware counters via the CUpti callback API. Collecting these
60 /// hardware counters has a higher overhead than the new CUpti Profiling API (\ref
61 /// tim::component::cupti_profiler). However, there are currently some issues with nesting
62 /// the Profiling API and it is currently recommended to use this component for NVIDIA
63 /// hardware counters in timemory. The callback API / NVprof is quite specific about
64 /// the distinction between an "event" and a "metric". For your convenience, timemory
65 /// removes this distinction and events can be specified arbitrarily as metrics and
66 /// vice-versa and this component will sort them into their appropriate category.
67 /// For the full list of the available events/metrics, use `timemory-avail -H` from the
68 /// command-line.
69 ///
70 struct cupti_counters : public base<cupti_counters, cupti::profiler::results_t>
71 {
72  // required aliases
73  using value_type = cupti::profiler::results_t;
76 
77  // custom aliases
78  using size_type = std::size_t;
80  using kernel_data_t = cupti::result;
81  using entry_type = typename value_type::value_type;
82  using results_t = cupti::profiler::results_t;
83  using kernel_results_t = cupti::profiler::kernel_results_t;
84 
85  // short-hand for vectors
86  using strvec_t = std::vector<string_t>;
87  using profptr_t = std::shared_ptr<cupti::profiler>;
88  // a tuple of the <devices, events, metrics>
89  using tuple_type = std::tuple<int, strvec_t, strvec_t>;
90  // function for setting device, metrics, and events to record
91  using event_func_t = std::function<strvec_t()>;
92  using metric_func_t = std::function<strvec_t()>;
93  using device_func_t = std::function<int()>;
94  // function for setting all of device, metrics, and events
95  using get_initializer_t = std::function<tuple_type()>;
96 
97  static const short precision = 3;
98  static const short width = 8;
99 
101  {
102  static event_func_t _instance = []() {
104  };
105  return _instance;
106  }
107 
109  {
110  static metric_func_t _instance = []() {
112  };
113  return _instance;
114  }
115 
117  {
118  static device_func_t _instance = []() {
119  if(cuda::device_count() < 1)
120  return -1;
121  return settings::cupti_device();
122  };
123  return _instance;
124  }
125 
127  {
128  static get_initializer_t _instance = []() -> tuple_type {
131  };
132  return _instance;
133  }
134 
135  static void configure()
136  {
137  if(_get_profiler().get() == nullptr)
138  init();
139  }
140 
141  /// explicitly configure for a device and set of events/metrics.
142  static void configure(int device, const strvec_t& events,
143  const strvec_t& metrics = {})
144  {
145  get_initializer() = [=]() -> tuple_type {
146  return tuple_type(device, events, metrics);
147  };
148  if(_get_profiler().get() == nullptr)
149  init();
150  }
151 
152  static void global_init() { configure(); }
153  static void global_finalize() { clear(); }
154 
155  static const profptr_t& get_profiler() { return _get_profiler(); }
156  static const strvec_t& get_events() { return *_get_events(); }
157  static const strvec_t& get_metrics() { return *_get_metrics(); }
158  static int get_device() { return *_get_device(); }
159  static const strvec_t& get_labels() { return *_get_labels(); }
160 
162  {
163  configure();
164  auto* _labels = _get_labels();
165  if(_labels)
166  {
167  value.resize(_labels->size());
168  accum.resize(_labels->size());
169  for(size_type i = 0; i < _labels->size(); ++i)
170  {
171  value[i].name = (*_labels)[i];
172  accum[i].name = (*_labels)[i];
173  }
174  }
175  }
176 
177  ~cupti_counters() = default;
178  cupti_counters(const cupti_counters&) = default;
179  cupti_counters(cupti_counters&&) noexcept = default;
180  cupti_counters& operator =(const cupti_counters& rhs)
181  {
182  if(this != &rhs)
183  {
184  base_type::operator=(rhs);
185  m_kernel_value = rhs.m_kernel_value;
186  m_kernel_accum = rhs.m_kernel_accum;
187  }
188  return *this;
189  }
190  cupti_counters& operator=(cupti_counters&&) noexcept = default;
191 
192  static int64_t unit() { return 1; }
193  // leave these empty
194  static string_t label() { return "cupti_counters"; }
195  static string_t description() { return "Hardware counters for the CUDA API"; }
196  static string_t display_unit() { return ""; }
197 
199  {
200  configure();
201  value_type tmp;
202  auto& _profiler = _get_profiler();
203  if(!_profiler || !_get_labels())
204  return tmp;
205  auto& _labels = *_get_labels();
206  _profiler->stop();
207  if(tmp.empty())
208  {
209  tmp = _profiler->get_events_and_metrics(_labels);
210  }
211  else if(tmp.size() == _labels.size())
212  {
213  auto ret = _profiler->get_events_and_metrics(_labels);
214  for(size_t j = 0; j < _labels.size(); ++j)
215  tmp[j] += ret[j];
216  }
217  else
218  {
219  fprintf(stderr, "Warning! mis-matched size in cupti_event::%s @ %s:%i\n",
220  TIMEMORY_ERROR_FUNCTION_MACRO, __FILE__, __LINE__);
221  }
222 
223  return tmp;
224  }
225 
226  //----------------------------------------------------------------------------------//
227  // start
228  //
229  void start()
230  {
231  value = record();
232  auto& _profiler = _get_profiler();
233  if(_profiler)
234  {
235  m_kernel_value = _profiler->get_kernel_events_and_metrics(*_get_labels());
236  _profiler->start();
237  }
238  }
239 
240  void stop()
241  {
242  using namespace stl;
243  using namespace tim::component::operators;
244 
245  value_type tmp = record();
246  auto& _profiler = _get_profiler();
247  if(!_profiler)
248  return;
249 
250  kernel_results_t kernel_data =
251  _profiler->get_kernel_events_and_metrics(*_get_labels());
252  kernel_results_t kernel_tmp = kernel_data;
253 
254  if(accum.empty())
255  {
256  accum = tmp;
257  for(size_type i = 0; i < tmp.size(); ++i)
258  accum[i] -= value[i];
259  }
260  else
261  {
262  for(size_type i = 0; i < tmp.size(); ++i)
263  accum[i] += (tmp[i] - value[i]);
264  }
265 
266  for(size_t i = 0; i < m_kernel_value.size(); ++i)
267  kernel_tmp[i].second -= m_kernel_value[i].second;
268  for(size_t i = 0; i < kernel_tmp.size(); ++i)
269  {
270  if(i >= m_kernel_accum.size())
271  {
272  m_kernel_accum.resize(i + 1, kernel_tmp[i]);
273  }
274  else
275  {
276  m_kernel_accum[i].second += kernel_tmp[i].second;
277  }
278  }
279 
280  value = std::move(tmp);
281  m_kernel_value = std::move(kernel_data);
282  }
283 
284  TIMEMORY_NODISCARD string_t get_display() const
285  {
286  auto _get_display = [&](std::ostream& os, const cupti::result& obj) {
287  auto _label = obj.name;
288  auto _prec = base_type::get_precision();
289  auto _width = base_type::get_width();
290  auto _flags = base_type::get_format_flags();
291 
292  std::stringstream ss;
293  std::stringstream ssv;
294  std::stringstream ssi;
295  ssv.setf(_flags);
296  ssv << std::setw(_width) << std::setprecision(_prec);
297  cupti::print(ssv, obj.data);
298  if(!_label.empty())
299  ssi << " " << _label;
300  ss << ssv.str() << ssi.str();
301  os << ss.str();
302  };
303 
304  const auto& _data = load();
305  std::stringstream ss;
306  for(size_type i = 0; i < _data.size(); ++i)
307  {
308  _get_display(ss, _data[i]);
309  if(i + 1 < _data.size())
310  ss << ", ";
311  }
312  return ss.str();
313  }
314 
315  TIMEMORY_NODISCARD std::vector<double> get() const
316  {
317  std::vector<double> values;
318  const auto& _data = load();
319  values.reserve(_data.size());
320  for(const auto& itr : _data)
321  values.push_back(cupti::get<double>(itr.data));
322  return values;
323  }
324 
325  using secondary_type = std::unordered_multimap<std::string, value_type>;
326 
327  TIMEMORY_NODISCARD secondary_type get_secondary() const
328  {
329  secondary_type _data;
330  for(const auto& itr : m_kernel_accum)
331  _data.insert({ itr.first, itr.second });
332  return _data;
333  }
334 
335  template <typename Tp>
336  using array_t = std::vector<Tp>;
337 
338  //----------------------------------------------------------------------------------//
339  // array of descriptions
340  //
342  {
343  array_t<string_t> arr;
344  auto contains = [&](const string_t& entry) {
345  return std::find(arr.begin(), arr.end(), entry) != arr.end();
346  };
347  auto insert = [&](const string_t& entry) {
348  if(!contains(entry))
349  arr.push_back(entry);
350  };
351  auto* _labels = _get_labels();
352  if(_labels)
353  {
354  for(const auto& itr : *_labels)
355  insert(itr);
356  }
357  // auto profiler = get_profiler();
358  // for(const auto& itr : profiler->get_event_names())
359  // insert(itr);
360  // for(const auto& itr : profiler->get_metric_names())
361  // insert(itr);
362  return arr;
363  }
364 
365  //----------------------------------------------------------------------------------//
366  // array of labels
367  //
369 
370  //----------------------------------------------------------------------------------//
371  // array of unit
372  //
374  {
375  return array_t<string_t>(get_labels().size(), "");
376  }
377 
378  //----------------------------------------------------------------------------------//
379  // array of unit values
380  //
382  {
383  return array_t<int64_t>(get_labels().size(), 1);
384  }
385 
387  {
388  auto _combine = [](value_type& _data, const value_type& _other) {
389  auto& _labels = *_get_labels();
390  if(_data.empty())
391  {
392  _data = _other;
393  }
394  else
395  {
396  for(size_type i = 0; i < _labels.size(); ++i)
397  _data[i] += _other[i];
398  }
399  };
400 
401  _combine(value, rhs.value);
402  _combine(accum, rhs.accum);
403  return *this;
404  }
405 
407  {
408  auto _combine = [](value_type& _data, const value_type& _other) {
409  auto& _labels = *_get_labels();
410  // set to other
411  if(_data.empty())
412  _data = _other;
413  // subtract other (if data was empty, will contain zero data)
414  for(size_type i = 0; i < _labels.size(); ++i)
415  _data[i] -= _other[i];
416  };
417 
418  _combine(value, rhs.value);
419  _combine(accum, rhs.accum);
420  return *this;
421  }
422 
424  {
425  auto _combine = [](value_type& _data, const value_type& _other) {
426  if(_data.empty())
427  {
428  _data = _other;
429  }
430  else
431  {
432  for(size_type i = 0; i < _other.size(); ++i)
433  _data[i] += _other[i];
434  }
435  };
436 
437  _combine(value, rhs);
438  _combine(accum, rhs);
439 
440  return *this;
441  }
442 
443  //----------------------------------------------------------------------------------//
444  // serialization
445  //
446  template <typename Archive>
447  void serialize(Archive& ar, const unsigned int)
448  {
449  auto _get = [&](const value_type& _data) {
450  std::vector<double> values;
451  for(const auto& itr : _data)
452  values.push_back(cupti::get<double>(itr.data));
453  return values;
454  };
455  array_t<double> _disp = _get(accum);
456  array_t<double> _value = _get(value);
457  array_t<double> _accum = _get(accum);
458  ar(cereal::make_nvp("laps", laps), cereal::make_nvp("repr_data", _disp),
459  cereal::make_nvp("value", _value), cereal::make_nvp("accum", _accum),
460  cereal::make_nvp("display", _disp));
461  // ar(cereal::make_nvp("units", unit_array()),
462  // cereal::make_nvp("display_units", display_unit_array()));
463  }
464 
465  //----------------------------------------------------------------------------------//
466  //
467  template <typename Archive>
468  static void extra_serialization(Archive& ar)
469  {
470  auto& _devices = *_get_device();
471  auto& _events = *_get_events();
472  auto& _metrics = *_get_metrics();
473  auto& _labels = *_get_labels();
474 
475  ar(cereal::make_nvp("devices", _devices), cereal::make_nvp("events", _events),
476  cereal::make_nvp("metrics", _metrics), cereal::make_nvp("labels", _labels));
477  }
478 
479  //----------------------------------------------------------------------------------//
480 
481 private:
482  template <typename Tp>
483  struct writer
484  {
485  using const_iterator = typename Tp::const_iterator;
486  Tp& obj;
487  writer(Tp& _obj)
488  : obj(_obj)
489  {}
490 
491  TIMEMORY_NODISCARD const_iterator begin() const { return obj.begin(); }
492  TIMEMORY_NODISCARD const_iterator end() const { return obj.end(); }
493 
494  friend std::ostream& operator<<(std::ostream& os, const writer<Tp>& _obj)
495  {
496  auto sz = std::distance(_obj.begin(), _obj.end());
497  for(auto itr = _obj.begin(); itr != _obj.end(); ++itr)
498  {
499  auto idx = std::distance(_obj.begin(), itr);
500  os << (*itr);
501  if(idx + 1 < sz)
502  os << ", ";
503  }
504  return os;
505  }
506  };
507 
508  static profptr_t& _get_profiler()
509  {
510  static profptr_t _instance = profptr_t(nullptr);
511  return _instance;
512  }
513 
514  static strvec_t*& _get_events()
515  {
516  static strvec_t* _instance = new strvec_t();
517  return _instance;
518  }
519 
520  static strvec_t*& _get_metrics()
521  {
522  static strvec_t* _instance = new strvec_t();
523  return _instance;
524  }
525 
526  static int*& _get_device()
527  {
528  static int* _instance = new int(0);
529  return _instance;
530  }
531 
532  static strvec_t*& _get_labels()
533  {
534  static strvec_t* _instance = new strvec_t();
535  return _instance;
536  }
537 
538  static strvec_t generate_labels()
539  {
540  array_t<string_t> arr;
541  auto contains = [&](const string_t& entry) {
542  return std::find(arr.begin(), arr.end(), entry) != arr.end();
543  };
544  auto insert = [&](const string_t& entry) {
545  if(!contains(entry))
546  arr.push_back(entry);
547  };
548  auto profiler = get_profiler();
549  if(profiler)
550  {
551  for(const auto& itr : profiler->get_event_names())
552  insert(itr);
553  for(const auto& itr : profiler->get_metric_names())
554  insert(itr);
555  }
556  return arr;
557  }
558 
559  static strvec_t get_available_events(int devid)
560  {
561  return cupti::available_events(cupti::get_device(devid));
562  }
563 
564  static strvec_t get_available_metrics(int devid)
565  {
566  return cupti::available_metrics(cupti::get_device(devid));
567  }
568 
569  static tuple_type get_available(const tuple_type&, int);
570 
571  static void init()
572  {
573  auto _manager = manager::instance();
574  if(!_manager || _manager->is_finalized() || _manager->is_finalizing())
575  return;
576 
577  auto _init_cb = tim::get_env<bool>("TIMEMORY_CUPTI_INIT_CB", true);
578  cupti::init_driver();
579  if(_init_cb)
580  cuda::device_sync();
581  clear();
582 
583  auto& _profiler = _get_profiler();
584  auto& _events = *_get_events();
585  auto& _metrics = *_get_metrics();
586  auto& _device = *_get_device();
587  auto& _labels = *_get_labels();
588 
589  auto _init = get_initializer()();
590 
591  _device = std::get<0>(_init);
592  _events = std::get<1>(_init);
593  _metrics = std::get<2>(_init);
594 
595  using intset_t = std::set<int>;
596  using strset_t = std::set<string_t>;
597 
598  intset_t _used_devs;
599  strset_t _used_evts;
600  strset_t _used_mets;
601 
602  auto _dev_init = get_available(_init, _device);
603  auto& _dev = std::get<0>(_dev_init);
604 
605  // if < 0, no metrics or events available/specified
606  if(_dev >= 0)
607  {
608  if(settings::debug())
609  printf("Creating CUPTI hardware profiler for device %i...\n", _device);
610 
611  auto& _evt = std::get<1>(_dev_init);
612  auto& _met = std::get<2>(_dev_init);
613 
614  if(!_evt.empty() || !_met.empty())
615  {
616  _profiler = std::make_shared<cupti::profiler>(_evt, _met, _dev, _init_cb);
617  _used_devs.insert(_dev);
618  for(const auto& itr : _evt)
619  _used_evts.insert(itr);
620  for(const auto& itr : _met)
621  _used_mets.insert(itr);
622  _labels = generate_labels();
623  }
624  else
625  {
626  static int _pass = 0;
627  if(_pass++ > 0)
628  fprintf(stderr, "[cupti_counters]> Warning! No events or metrics!\n");
629  }
630  }
631  else
632  {
633  fprintf(stderr, "[cupti_counters]> Warning! No devices available!\n");
634  }
635 
636  if(!_used_devs.empty())
637  {
638  // if(settings::verbose() > 0 || settings::debug())
639  {
640  std::cout << "Devices : " << writer<intset_t>(_used_devs) << std::endl;
641  std::cout << "Event : " << writer<strset_t>(_used_evts) << std::endl;
642  std::cout << "Metrics : " << writer<strset_t>(_used_mets) << std::endl;
643  std::cout << "Labels : " << writer<strvec_t>(_labels) << std::endl;
644  }
645  }
646  }
647 
648  static void clear()
649  {
650  if(_get_metrics())
651  _get_metrics()->clear();
652  if(_get_events())
653  _get_events()->clear();
654  _get_profiler().reset();
655  }
656 
657 public:
658  static void cleanup()
659  {
660  clear();
661  delete _get_device();
662  delete _get_events();
663  delete _get_labels();
664  delete _get_metrics();
665  _get_device() = nullptr;
666  _get_events() = nullptr;
667  _get_labels() = nullptr;
668  _get_metrics() = nullptr;
669  }
670 
671 private:
672  kernel_results_t m_kernel_value;
673  kernel_results_t m_kernel_accum;
674 };
675 
676 //--------------------------------------------------------------------------------------//
677 
679 cupti_counters::get_available(const tuple_type& _init, int devid)
680 {
681  if(devid < 0 || devid >= cuda::device_count())
682  {
683  int ndev = cuda::device_count();
684  fprintf(stderr, "[cupti_counters]> Invalid device id: %i. # devices: %i...\n",
685  devid, ndev);
686  return tuple_type(-1, strvec_t(), strvec_t());
687  }
688 
689  strvec_t _events = std::get<1>(_init);
690  strvec_t _metrics = std::get<2>(_init);
691 
692  auto _tmp_init = get_initializer()();
693 
694  if(_events.empty())
695  _events = std::get<1>(_tmp_init);
696 
697  // provide defaults events
698  if(_events.empty())
699  {
700  // _events = { "active_warps", "active_cycles", "global_load", "global_store" };
701  }
702 
703  if(_metrics.empty())
704  _metrics = std::get<2>(_tmp_init);
705 
706  // provide default metrics
707  if(_metrics.empty())
708  {
709  //_metrics = { "inst_per_warp", "branch_efficiency", "gld_efficiency",
710  // "gst_efficiency", "warp_execution_efficiency" };
711  }
712 
713  const auto& _avail_events = get_available_events(devid);
714  const auto& _avail_metric = get_available_metrics(devid);
715 
716  std::set<std::string> _discarded_events{};
717  std::set<std::string> _discarded_metrics{};
718 
719  bool _discard = true;
720 
721  // handle events
722  auto _not_event = [&_avail_events, &_discarded_events,
723  &_discard](const string_t& evt) {
724  bool nf = (std::find(std::begin(_avail_events), std::end(_avail_events), evt) ==
725  std::end(_avail_events));
726  if(nf && _discard)
727  _discarded_events.insert(evt);
728  return nf;
729  };
730 
731  // handle metrics
732  auto _not_metric = [&_avail_metric, &_discarded_metrics,
733  &_discard](const string_t& met) {
734  bool nf = (std::find(std::begin(_avail_metric), std::end(_avail_metric), met) ==
735  std::end(_avail_metric));
736  if(nf && _discard)
737  _discarded_metrics.insert(met);
738  return nf;
739  };
740 
741  // do the removals
742  _events.erase(std::remove_if(std::begin(_events), std::end(_events), _not_event),
743  std::end(_events));
744 
745  _metrics.erase(std::remove_if(std::begin(_metrics), std::end(_metrics), _not_metric),
746  std::end(_metrics));
747 
748  // turn off discarding
749  _discard = false;
750 
751  // check to see if any requested events are actually metrics
752  for(const auto& itr : _discarded_events)
753  {
754  bool is_metric = !(_not_metric(itr));
755  if(is_metric)
756  {
757  _metrics.push_back(itr);
758  }
759  else
760  {
761  fprintf(stderr,
762  "[cupti_counters]> Removing unavailable event '%s' on device %i...\n",
763  itr.c_str(), devid);
764  }
765  }
766 
767  // check to see if any requested metrics are actually events
768  for(const auto& itr : _discarded_metrics)
769  {
770  bool is_event = !(_not_event(itr));
771  if(is_event)
772  {
773  _events.push_back(itr);
774  }
775  else
776  {
777  fprintf(
778  stderr,
779  "[cupti_counters]> Removing unavailable metric '%s' on device %i...\n",
780  itr.c_str(), devid);
781  }
782  }
783 
784  // determine total
785  return tuple_type(devid, _events, _metrics);
786 }
787 
788 //--------------------------------------------------------------------------------------//
789 
791 
792 //--------------------------------------------------------------------------------------//
793 
794 } // namespace component
795 
796 } // namespace tim
static pointer_t instance()
Get a shared pointer to the instance for the current thread.
std::string string_t
Definition: library.cpp:56
The declaration for the types for manager without definitions.
void print(std::ostream &os, Args &&... args)
Definition: functional.cpp:159
data::entry entry
Definition: stream.hpp:980
Definition: kokkosp.cpp:38
cupti_events
Definition: settings.cpp:1421
void insert(Bundle< Idx, Type > &obj, std::initializer_list< EnumT > components)
Definition: insert.hpp:47
void init(Args &&... args)
Definition: types.hpp:111
cupti_metrics
Definition: settings.cpp:1423
tim::mpl::apply< std::string > string
Definition: macros.hpp:52
cupti_device
Definition: settings.cpp:1425
ContainerT delimit(const std::string &line, const std::string &delimiters="\"',;: ", PredicateT &&predicate=[](const std::string &s) -> std::string { return s;})
Definition: utility.hpp:666
The declaration for the types for settings without definitions.
#define TIMEMORY_ERROR_FUNCTION_MACRO
Definition: macros.hpp:229
static short get_precision()
friend std::ostream & operator<<(std::ostream &os, const base_type &obj)
static short get_width()
static fmtflags get_format_flags()
NVprof-style hardware counters via the CUpti callback API. Collecting these hardware counters has a h...
static array_t< string_t > description_array()
static array_t< int64_t > unit_array()
this_type & operator-=(const this_type &rhs)
std::tuple< int, strvec_t, strvec_t > tuple_type
static device_func_t & get_device_initializer()
static const strvec_t & get_labels()
typename value_type::value_type entry_type
cupti::profiler::results_t value_type
static void extra_serialization(Archive &ar)
cupti_counters(const cupti_counters &)=default
static const strvec_t & get_events()
cupti_counters & operator=(cupti_counters &&) noexcept=default
static array_t< string_t > label_array()
static get_initializer_t & get_initializer()
static array_t< string_t > display_unit_array()
std::function< int()> device_func_t
std::function< tuple_type()> get_initializer_t
static void configure(int device, const strvec_t &events, const strvec_t &metrics={})
explicitly configure for a device and set of events/metrics.
this_type & operator+=(const results_t &rhs)
std::function< strvec_t()> metric_func_t
std::unordered_multimap< std::string, value_type > secondary_type
std::shared_ptr< cupti::profiler > profptr_t
cupti_counters(cupti_counters &&) noexcept=default
static const strvec_t & get_metrics()
static const profptr_t & get_profiler()
void serialize(Archive &ar, const unsigned int)
std::function< strvec_t()> event_func_t
secondary_type get_secondary() const
std::vector< double > get() const
std::vector< string_t > strvec_t
static event_func_t & get_event_initializer()
cupti::profiler::results_t results_t
this_type & operator+=(const this_type &rhs)
cupti::profiler::kernel_results_t kernel_results_t
static metric_func_t & get_metric_initializer()