timemory  3.2.1
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
tim::component::cupti_counters Struct Reference

NVprof-style hardware counters via the CUpti callback API. Collecting these hardware counters has a higher overhead than the new CUpti Profiling API (tim::component::cupti_profiler). However, there are currently some issues with nesting the Profiling API and it is currently recommended to use this component for NVIDIA hardware counters in timemory. The callback API / NVprof is quite specific about the distinction between an "event" and a "metric". For your convenience, timemory removes this distinction and events can be specified arbitrarily as metrics and vice-versa and this component will sort them into their appropriate category. For the full list of the available events/metrics, use timemory-avail -H from the command-line. More...

#include "timemory/components/cupti/cupti_counters.hpp"

+ Collaboration diagram for tim::component::cupti_counters:

Public Types

using value_type = cupti::profiler::results_t
 
using this_type = cupti_counters
 
using base_type = base< cupti_counters, value_type >
 
using size_type = std::size_t
 
using string_t = std::string
 
using kernel_data_t = cupti::result
 
using entry_type = typename value_type::value_type
 
using results_t = cupti::profiler::results_t
 
using kernel_results_t = cupti::profiler::kernel_results_t
 
using strvec_t = std::vector< string_t >
 
using profptr_t = std::shared_ptr< cupti::profiler >
 
using tuple_type = std::tuple< int, strvec_t, strvec_t >
 
using event_func_t = std::function< strvec_t()>
 
using metric_func_t = std::function< strvec_t()>
 
using device_func_t = std::function< int()>
 
using get_initializer_t = std::function< tuple_type()>
 
using secondary_type = std::unordered_multimap< std::string, value_type >
 
template<typename Tp >
using array_t = std::vector< Tp >
 

Public Member Functions

 cupti_counters ()
 
 ~cupti_counters ()=default
 
 cupti_counters (const cupti_counters &)=default
 
 cupti_counters (cupti_counters &&) noexcept=default
 
cupti_countersoperator= (const cupti_counters &rhs)
 
cupti_countersoperator= (cupti_counters &&) noexcept=default
 
void start ()
 
void stop ()
 
string_t get_display () const
 
std::vector< double > get () const
 
secondary_type get_secondary () const
 
this_typeoperator+= (const this_type &rhs)
 
this_typeoperator-= (const this_type &rhs)
 
this_typeoperator+= (const results_t &rhs)
 
template<typename Archive >
void serialize (Archive &ar, const unsigned int)
 

Static Public Member Functions

static event_func_tget_event_initializer ()
 
static metric_func_tget_metric_initializer ()
 
static device_func_tget_device_initializer ()
 
static get_initializer_tget_initializer ()
 
static void configure ()
 
static void configure (int device, const strvec_t &events, const strvec_t &metrics={})
 explicitly configure for a device and set of events/metrics. More...
 
static void global_init ()
 
static void global_finalize ()
 
static const profptr_tget_profiler ()
 
static const strvec_tget_events ()
 
static const strvec_tget_metrics ()
 
static int get_device ()
 
static const strvec_tget_labels ()
 
static int64_t unit ()
 
static string_t label ()
 
static string_t description ()
 
static string_t display_unit ()
 
static value_type record ()
 
static array_t< string_tlabel_array ()
 
static array_t< string_tdescription_array ()
 
static array_t< string_tdisplay_unit_array ()
 
static array_t< int64_t > unit_array ()
 
template<typename Archive >
static void extra_serialization (Archive &ar)
 
static void cleanup ()
 

Static Public Attributes

static const short precision = 3
 
static const short width = 8
 

Detailed Description

NVprof-style hardware counters via the CUpti callback API. Collecting these hardware counters has a higher overhead than the new CUpti Profiling API (tim::component::cupti_profiler). However, there are currently some issues with nesting the Profiling API and it is currently recommended to use this component for NVIDIA hardware counters in timemory. The callback API / NVprof is quite specific about the distinction between an "event" and a "metric". For your convenience, timemory removes this distinction and events can be specified arbitrarily as metrics and vice-versa and this component will sort them into their appropriate category. For the full list of the available events/metrics, use timemory-avail -H from the command-line.

Definition at line 70 of file cupti_counters.hpp.

Member Typedef Documentation

◆ array_t

template<typename Tp >
using tim::component::cupti_counters::array_t = std::vector<Tp>

Definition at line 336 of file cupti_counters.hpp.

◆ base_type

◆ device_func_t

using tim::component::cupti_counters::device_func_t = std::function<int()>

Definition at line 93 of file cupti_counters.hpp.

◆ entry_type

using tim::component::cupti_counters::entry_type = typename value_type::value_type

Definition at line 81 of file cupti_counters.hpp.

◆ event_func_t

Definition at line 91 of file cupti_counters.hpp.

◆ get_initializer_t

Definition at line 95 of file cupti_counters.hpp.

◆ kernel_data_t

Definition at line 80 of file cupti_counters.hpp.

◆ kernel_results_t

using tim::component::cupti_counters::kernel_results_t = cupti::profiler::kernel_results_t

Definition at line 83 of file cupti_counters.hpp.

◆ metric_func_t

Definition at line 92 of file cupti_counters.hpp.

◆ profptr_t

using tim::component::cupti_counters::profptr_t = std::shared_ptr<cupti::profiler>

Definition at line 87 of file cupti_counters.hpp.

◆ results_t

using tim::component::cupti_counters::results_t = cupti::profiler::results_t

Definition at line 82 of file cupti_counters.hpp.

◆ secondary_type

using tim::component::cupti_counters::secondary_type = std::unordered_multimap<std::string, value_type>

Definition at line 325 of file cupti_counters.hpp.

◆ size_type

Definition at line 78 of file cupti_counters.hpp.

◆ string_t

Definition at line 79 of file cupti_counters.hpp.

◆ strvec_t

Definition at line 86 of file cupti_counters.hpp.

◆ this_type

◆ tuple_type

Definition at line 89 of file cupti_counters.hpp.

◆ value_type

using tim::component::cupti_counters::value_type = cupti::profiler::results_t

Definition at line 73 of file cupti_counters.hpp.

Constructor & Destructor Documentation

◆ cupti_counters() [1/3]

tim::component::cupti_counters::cupti_counters ( )
inline

Definition at line 161 of file cupti_counters.hpp.

162  {
163  configure();
164  auto* _labels = _get_labels();
165  if(_labels)
166  {
167  value.resize(_labels->size());
168  accum.resize(_labels->size());
169  for(size_type i = 0; i < _labels->size(); ++i)
170  {
171  value[i].name = (*_labels)[i];
172  accum[i].name = (*_labels)[i];
173  }
174  }
175  }

References configure().

◆ ~cupti_counters()

tim::component::cupti_counters::~cupti_counters ( )
default

◆ cupti_counters() [2/3]

tim::component::cupti_counters::cupti_counters ( const cupti_counters )
default

◆ cupti_counters() [3/3]

tim::component::cupti_counters::cupti_counters ( cupti_counters &&  )
defaultnoexcept

Member Function Documentation

◆ cleanup()

static void tim::component::cupti_counters::cleanup ( )
inlinestatic

Definition at line 658 of file cupti_counters.hpp.

659  {
660  clear();
661  delete _get_device();
662  delete _get_events();
663  delete _get_labels();
664  delete _get_metrics();
665  _get_device() = nullptr;
666  _get_events() = nullptr;
667  _get_labels() = nullptr;
668  _get_metrics() = nullptr;
669  }

◆ configure() [1/2]

static void tim::component::cupti_counters::configure ( )
inlinestatic

Definition at line 135 of file cupti_counters.hpp.

136  {
137  if(_get_profiler().get() == nullptr)
138  init();
139  }
std::vector< double > get() const

References get().

Referenced by cupti_counters(), tim::component::gpu_roofline< Types >::configure(), global_init(), and record().

◆ configure() [2/2]

static void tim::component::cupti_counters::configure ( int  device,
const strvec_t events,
const strvec_t metrics = {} 
)
inlinestatic

explicitly configure for a device and set of events/metrics.

Definition at line 142 of file cupti_counters.hpp.

143  {})
144  {
145  get_initializer() = [=]() -> tuple_type {
146  return tuple_type(device, events, metrics);
147  };
148  if(_get_profiler().get() == nullptr)
149  init();
150  }
std::tuple< int, strvec_t, strvec_t > tuple_type
static get_initializer_t & get_initializer()

◆ description()

static string_t tim::component::cupti_counters::description ( )
inlinestatic

Definition at line 195 of file cupti_counters.hpp.

195 { return "Hardware counters for the CUDA API"; }

◆ description_array()

static array_t<string_t> tim::component::cupti_counters::description_array ( )
inlinestatic

Definition at line 368 of file cupti_counters.hpp.

368 { return label_array(); }
static array_t< string_t > label_array()

References label_array().

◆ display_unit()

static string_t tim::component::cupti_counters::display_unit ( )
inlinestatic

Definition at line 196 of file cupti_counters.hpp.

196 { return ""; }

Referenced by tim::component::gpu_roofline< Types >::display_unit().

◆ display_unit_array()

static array_t<string_t> tim::component::cupti_counters::display_unit_array ( )
inlinestatic

Definition at line 373 of file cupti_counters.hpp.

374  {
375  return array_t<string_t>(get_labels().size(), "");
376  }
static const strvec_t & get_labels()

References get_labels().

◆ extra_serialization()

template<typename Archive >
static void tim::component::cupti_counters::extra_serialization ( Archive &  ar)
inlinestatic

Definition at line 468 of file cupti_counters.hpp.

469  {
470  auto& _devices = *_get_device();
471  auto& _events = *_get_events();
472  auto& _metrics = *_get_metrics();
473  auto& _labels = *_get_labels();
474 
475  ar(cereal::make_nvp("devices", _devices), cereal::make_nvp("events", _events),
476  cereal::make_nvp("metrics", _metrics), cereal::make_nvp("labels", _labels));
477  }

◆ get()

std::vector<double> tim::component::cupti_counters::get ( ) const
inline

Definition at line 315 of file cupti_counters.hpp.

316  {
317  std::vector<double> values;
318  const auto& _data = load();
319  values.reserve(_data.size());
320  for(const auto& itr : _data)
321  values.push_back(cupti::get<double>(itr.data));
322  return values;
323  }

References tim::component::base< cupti_counters, cupti::profiler::results_t >::load().

Referenced by configure().

◆ get_device()

static int tim::component::cupti_counters::get_device ( )
inlinestatic

Definition at line 158 of file cupti_counters.hpp.

158 { return *_get_device(); }

◆ get_device_initializer()

static device_func_t& tim::component::cupti_counters::get_device_initializer ( )
inlinestatic

Definition at line 116 of file cupti_counters.hpp.

117  {
118  static device_func_t _instance = []() {
119  if(cuda::device_count() < 1)
120  return -1;
121  return settings::cupti_device();
122  };
123  return _instance;
124  }
cupti_device
Definition: settings.cpp:1425
std::function< int()> device_func_t

References tim::cupti_device.

Referenced by get_initializer().

◆ get_display()

string_t tim::component::cupti_counters::get_display ( ) const
inline

Definition at line 284 of file cupti_counters.hpp.

285  {
286  auto _get_display = [&](std::ostream& os, const cupti::result& obj) {
287  auto _label = obj.name;
288  auto _prec = base_type::get_precision();
289  auto _width = base_type::get_width();
290  auto _flags = base_type::get_format_flags();
291 
292  std::stringstream ss;
293  std::stringstream ssv;
294  std::stringstream ssi;
295  ssv.setf(_flags);
296  ssv << std::setw(_width) << std::setprecision(_prec);
297  cupti::print(ssv, obj.data);
298  if(!_label.empty())
299  ssi << " " << _label;
300  ss << ssv.str() << ssi.str();
301  os << ss.str();
302  };
303 
304  const auto& _data = load();
305  std::stringstream ss;
306  for(size_type i = 0; i < _data.size(); ++i)
307  {
308  _get_display(ss, _data[i]);
309  if(i + 1 < _data.size())
310  ss << ", ";
311  }
312  return ss.str();
313  }
void print(std::ostream &os, Args &&... args)
Definition: functional.cpp:159
static short get_precision()
static short get_width()
static fmtflags get_format_flags()

References tim::component::base< Tp, Value >::get_format_flags(), tim::component::base< Tp, Value >::get_precision(), tim::component::base< Tp, Value >::get_width(), tim::component::base< cupti_counters, cupti::profiler::results_t >::load(), and tim::invoke::print().

◆ get_event_initializer()

static event_func_t& tim::component::cupti_counters::get_event_initializer ( )
inlinestatic

Definition at line 100 of file cupti_counters.hpp.

101  {
102  static event_func_t _instance = []() {
104  };
105  return _instance;
106  }
cupti_events
Definition: settings.cpp:1421
ContainerT delimit(const std::string &line, const std::string &delimiters="\"',;: ", PredicateT &&predicate=[](const std::string &s) -> std::string { return s;})
Definition: utility.hpp:666
std::function< strvec_t()> event_func_t

References tim::cupti_events, and tim::delimit().

Referenced by get_initializer().

◆ get_events()

static const strvec_t& tim::component::cupti_counters::get_events ( )
inlinestatic

Definition at line 156 of file cupti_counters.hpp.

156 { return *_get_events(); }

◆ get_initializer()

static get_initializer_t& tim::component::cupti_counters::get_initializer ( )
inlinestatic

Definition at line 126 of file cupti_counters.hpp.

127  {
128  static get_initializer_t _instance = []() -> tuple_type {
131  };
132  return _instance;
133  }
static device_func_t & get_device_initializer()
std::function< tuple_type()> get_initializer_t
static event_func_t & get_event_initializer()
static metric_func_t & get_metric_initializer()

References get_device_initializer(), get_event_initializer(), and get_metric_initializer().

◆ get_labels()

static const strvec_t& tim::component::cupti_counters::get_labels ( )
inlinestatic

Definition at line 159 of file cupti_counters.hpp.

159 { return *_get_labels(); }

Referenced by display_unit_array(), and unit_array().

◆ get_metric_initializer()

static metric_func_t& tim::component::cupti_counters::get_metric_initializer ( )
inlinestatic

Definition at line 108 of file cupti_counters.hpp.

109  {
110  static metric_func_t _instance = []() {
112  };
113  return _instance;
114  }
cupti_metrics
Definition: settings.cpp:1423
std::function< strvec_t()> metric_func_t

References tim::cupti_metrics, and tim::delimit().

Referenced by get_initializer().

◆ get_metrics()

static const strvec_t& tim::component::cupti_counters::get_metrics ( )
inlinestatic

Definition at line 157 of file cupti_counters.hpp.

157 { return *_get_metrics(); }

◆ get_profiler()

static const profptr_t& tim::component::cupti_counters::get_profiler ( )
inlinestatic

Definition at line 155 of file cupti_counters.hpp.

155 { return _get_profiler(); }

◆ get_secondary()

secondary_type tim::component::cupti_counters::get_secondary ( ) const
inline

Definition at line 327 of file cupti_counters.hpp.

328  {
329  secondary_type _data;
330  for(const auto& itr : m_kernel_accum)
331  _data.insert({ itr.first, itr.second });
332  return _data;
333  }
std::unordered_multimap< std::string, value_type > secondary_type

◆ global_finalize()

static void tim::component::cupti_counters::global_finalize ( )
inlinestatic

Definition at line 153 of file cupti_counters.hpp.

153 { clear(); }

◆ global_init()

static void tim::component::cupti_counters::global_init ( )
inlinestatic

Definition at line 152 of file cupti_counters.hpp.

152 { configure(); }

References configure().

◆ label()

static string_t tim::component::cupti_counters::label ( )
inlinestatic

Definition at line 194 of file cupti_counters.hpp.

194 { return "cupti_counters"; }

◆ label_array()

static array_t<string_t> tim::component::cupti_counters::label_array ( )
inlinestatic

Definition at line 341 of file cupti_counters.hpp.

342  {
343  array_t<string_t> arr;
344  auto contains = [&](const string_t& entry) {
345  return std::find(arr.begin(), arr.end(), entry) != arr.end();
346  };
347  auto insert = [&](const string_t& entry) {
348  if(!contains(entry))
349  arr.push_back(entry);
350  };
351  auto* _labels = _get_labels();
352  if(_labels)
353  {
354  for(const auto& itr : *_labels)
355  insert(itr);
356  }
357  // auto profiler = get_profiler();
358  // for(const auto& itr : profiler->get_event_names())
359  // insert(itr);
360  // for(const auto& itr : profiler->get_metric_names())
361  // insert(itr);
362  return arr;
363  }
std::string string_t
Definition: library.cpp:56
data::entry entry
Definition: stream.hpp:980
void insert(Bundle< Idx, Type > &obj, std::initializer_list< EnumT > components)
Definition: insert.hpp:47

References tim::insert().

Referenced by tim::component::gpu_roofline< Types >::configure(), and description_array().

◆ operator+=() [1/2]

this_type& tim::component::cupti_counters::operator+= ( const results_t rhs)
inline

Definition at line 423 of file cupti_counters.hpp.

424  {
425  auto _combine = [](value_type& _data, const value_type& _other) {
426  if(_data.empty())
427  {
428  _data = _other;
429  }
430  else
431  {
432  for(size_type i = 0; i < _other.size(); ++i)
433  _data[i] += _other[i];
434  }
435  };
436 
437  _combine(value, rhs);
438  _combine(accum, rhs);
439 
440  return *this;
441  }
cupti::profiler::results_t value_type

◆ operator+=() [2/2]

this_type& tim::component::cupti_counters::operator+= ( const this_type rhs)
inline

Definition at line 386 of file cupti_counters.hpp.

387  {
388  auto _combine = [](value_type& _data, const value_type& _other) {
389  auto& _labels = *_get_labels();
390  if(_data.empty())
391  {
392  _data = _other;
393  }
394  else
395  {
396  for(size_type i = 0; i < _labels.size(); ++i)
397  _data[i] += _other[i];
398  }
399  };
400 
401  _combine(value, rhs.value);
402  _combine(accum, rhs.accum);
403  return *this;
404  }

◆ operator-=()

this_type& tim::component::cupti_counters::operator-= ( const this_type rhs)
inline

Definition at line 406 of file cupti_counters.hpp.

407  {
408  auto _combine = [](value_type& _data, const value_type& _other) {
409  auto& _labels = *_get_labels();
410  // set to other
411  if(_data.empty())
412  _data = _other;
413  // subtract other (if data was empty, will contain zero data)
414  for(size_type i = 0; i < _labels.size(); ++i)
415  _data[i] -= _other[i];
416  };
417 
418  _combine(value, rhs.value);
419  _combine(accum, rhs.accum);
420  return *this;
421  }

◆ operator=() [1/2]

cupti_counters& tim::component::cupti_counters::operator= ( const cupti_counters rhs)
inline

Definition at line 180 of file cupti_counters.hpp.

181  {
182  if(this != &rhs)
183  {
184  base_type::operator=(rhs);
185  m_kernel_value = rhs.m_kernel_value;
186  m_kernel_accum = rhs.m_kernel_accum;
187  }
188  return *this;
189  }

◆ operator=() [2/2]

cupti_counters& tim::component::cupti_counters::operator= ( cupti_counters &&  )
defaultnoexcept

◆ record()

static value_type tim::component::cupti_counters::record ( )
inlinestatic

Definition at line 198 of file cupti_counters.hpp.

199  {
200  configure();
201  value_type tmp;
202  auto& _profiler = _get_profiler();
203  if(!_profiler || !_get_labels())
204  return tmp;
205  auto& _labels = *_get_labels();
206  _profiler->stop();
207  if(tmp.empty())
208  {
209  tmp = _profiler->get_events_and_metrics(_labels);
210  }
211  else if(tmp.size() == _labels.size())
212  {
213  auto ret = _profiler->get_events_and_metrics(_labels);
214  for(size_t j = 0; j < _labels.size(); ++j)
215  tmp[j] += ret[j];
216  }
217  else
218  {
219  fprintf(stderr, "Warning! mis-matched size in cupti_event::%s @ %s:%i\n",
220  TIMEMORY_ERROR_FUNCTION_MACRO, __FILE__, __LINE__);
221  }
222 
223  return tmp;
224  }
#define TIMEMORY_ERROR_FUNCTION_MACRO
Definition: macros.hpp:229

References configure(), and TIMEMORY_ERROR_FUNCTION_MACRO.

Referenced by tim::component::gpu_roofline< Types >::record(), start(), and stop().

◆ serialize()

template<typename Archive >
void tim::component::cupti_counters::serialize ( Archive &  ar,
const unsigned int   
)
inline

Definition at line 447 of file cupti_counters.hpp.

448  {
449  auto _get = [&](const value_type& _data) {
450  std::vector<double> values;
451  for(const auto& itr : _data)
452  values.push_back(cupti::get<double>(itr.data));
453  return values;
454  };
455  array_t<double> _disp = _get(accum);
456  array_t<double> _value = _get(value);
457  array_t<double> _accum = _get(accum);
458  ar(cereal::make_nvp("laps", laps), cereal::make_nvp("repr_data", _disp),
459  cereal::make_nvp("value", _value), cereal::make_nvp("accum", _accum),
460  cereal::make_nvp("display", _disp));
461  // ar(cereal::make_nvp("units", unit_array()),
462  // cereal::make_nvp("display_units", display_unit_array()));
463  }

References tim::component::base< cupti_counters, cupti::profiler::results_t >::laps.

◆ start()

void tim::component::cupti_counters::start ( )
inline

Definition at line 229 of file cupti_counters.hpp.

230  {
231  value = record();
232  auto& _profiler = _get_profiler();
233  if(_profiler)
234  {
235  m_kernel_value = _profiler->get_kernel_events_and_metrics(*_get_labels());
236  _profiler->start();
237  }
238  }

References record().

◆ stop()

void tim::component::cupti_counters::stop ( )
inline

Definition at line 240 of file cupti_counters.hpp.

241  {
242  using namespace stl;
243  using namespace tim::component::operators;
244 
245  value_type tmp = record();
246  auto& _profiler = _get_profiler();
247  if(!_profiler)
248  return;
249 
250  kernel_results_t kernel_data =
251  _profiler->get_kernel_events_and_metrics(*_get_labels());
252  kernel_results_t kernel_tmp = kernel_data;
253 
254  if(accum.empty())
255  {
256  accum = tmp;
257  for(size_type i = 0; i < tmp.size(); ++i)
258  accum[i] -= value[i];
259  }
260  else
261  {
262  for(size_type i = 0; i < tmp.size(); ++i)
263  accum[i] += (tmp[i] - value[i]);
264  }
265 
266  for(size_t i = 0; i < m_kernel_value.size(); ++i)
267  kernel_tmp[i].second -= m_kernel_value[i].second;
268  for(size_t i = 0; i < kernel_tmp.size(); ++i)
269  {
270  if(i >= m_kernel_accum.size())
271  {
272  m_kernel_accum.resize(i + 1, kernel_tmp[i]);
273  }
274  else
275  {
276  m_kernel_accum[i].second += kernel_tmp[i].second;
277  }
278  }
279 
280  value = std::move(tmp);
281  m_kernel_value = std::move(kernel_data);
282  }
cupti::profiler::kernel_results_t kernel_results_t

References record().

◆ unit()

static int64_t tim::component::cupti_counters::unit ( )
inlinestatic

Definition at line 192 of file cupti_counters.hpp.

192 { return 1; }

Referenced by tim::component::gpu_roofline< Types >::unit().

◆ unit_array()

static array_t<int64_t> tim::component::cupti_counters::unit_array ( )
inlinestatic

Definition at line 381 of file cupti_counters.hpp.

382  {
383  return array_t<int64_t>(get_labels().size(), 1);
384  }

References get_labels().

Member Data Documentation

◆ precision

const short tim::component::cupti_counters::precision = 3
static

Definition at line 97 of file cupti_counters.hpp.

◆ width

const short tim::component::cupti_counters::width = 8
static

Definition at line 98 of file cupti_counters.hpp.


The documentation for this struct was generated from the following file: