timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
tim::component::cupti_counters Struct Reference

NVprof-style hardware counters via the CUpti callback API. Collecting these hardware counters has a higher overhead than the new CUpti Profiling API (tim::component::cupti_profiler). However, there are currently some issues with nesting the Profiling API and it is currently recommended to use this component for NVIDIA hardware counters in timemory. The callback API / NVprof is quite specific about the distinction between an "event" and a "metric". For your convenience, timemory removes this distinction and events can be specified arbitrarily as metrics and vice-versa and this component will sort them into their appropriate category. For the full list of the available events/metrics, use timemory-avail -H from the command-line. More...

#include "timemory/components/cupti/cupti_counters.hpp"

+ Collaboration diagram for tim::component::cupti_counters:

Public Types

using value_type = cupti::profiler::results_t
 
using this_type = cupti_counters
 
using base_type = base< cupti_counters, value_type >
 
using size_type = std::size_t
 
using string_t = std::string
 
using kernel_data_t = cupti::result
 
using entry_type = typename value_type::value_type
 
using results_t = cupti::profiler::results_t
 
using kernel_results_t = cupti::profiler::kernel_results_t
 
using strvec_t = std::vector< string_t >
 
using profptr_t = std::shared_ptr< cupti::profiler >
 
using tuple_type = std::tuple< int, strvec_t, strvec_t >
 
using event_func_t = std::function< strvec_t()>
 
using metric_func_t = std::function< strvec_t()>
 
using device_func_t = std::function< int()>
 
using get_initializer_t = std::function< tuple_type()>
 
using secondary_type = std::unordered_multimap< std::string, value_type >
 
template<typename Tp >
using array_t = std::vector< Tp >
 
using storage_type = empty_storage
 

Public Member Functions

 cupti_counters ()
 
 ~cupti_counters ()=default
 
 cupti_counters (const cupti_counters &)=default
 
 cupti_counters (cupti_counters &&) noexcept=default
 
cupti_countersoperator= (const cupti_counters &rhs)
 
cupti_countersoperator= (cupti_counters &&) noexcept=default
 
void start ()
 
void stop ()
 
string_t get_display () const
 
std::vector< double > get () const
 
secondary_type get_secondary () const
 
this_typeoperator+= (const this_type &rhs)
 
this_typeoperator-= (const this_type &rhs)
 
this_typeoperator+= (const results_t &rhs)
 
template<typename Archive >
void serialize (Archive &ar, const unsigned int)
 

Static Public Member Functions

static event_func_tget_event_initializer ()
 
static metric_func_tget_metric_initializer ()
 
static device_func_tget_device_initializer ()
 
static get_initializer_tget_initializer ()
 
static void configure ()
 
static void configure (int device, const strvec_t &events, const strvec_t &metrics={})
 explicitly configure for a device and set of events/metrics. More...
 
static void global_init ()
 
static void global_finalize ()
 
static const profptr_tget_profiler ()
 
static const strvec_tget_events ()
 
static const strvec_tget_metrics ()
 
static int get_device ()
 
static const strvec_tget_labels ()
 
static int64_t unit ()
 
static string_t label ()
 
static string_t description ()
 
static string_t display_unit ()
 
static value_type record ()
 
static array_t< string_tlabel_array ()
 
static array_t< string_tdescription_array ()
 
static array_t< string_tdisplay_unit_array ()
 
static array_t< int64_t > unit_array ()
 
template<typename Archive >
static void extra_serialization (Archive &ar)
 
static void cleanup ()
 
template<typename... Args>
static opaque get_opaque (Args &&...)
 

Static Public Attributes

static const short precision = 3
 
static const short width = 8
 

Detailed Description

NVprof-style hardware counters via the CUpti callback API. Collecting these hardware counters has a higher overhead than the new CUpti Profiling API (tim::component::cupti_profiler). However, there are currently some issues with nesting the Profiling API and it is currently recommended to use this component for NVIDIA hardware counters in timemory. The callback API / NVprof is quite specific about the distinction between an "event" and a "metric". For your convenience, timemory removes this distinction and events can be specified arbitrarily as metrics and vice-versa and this component will sort them into their appropriate category. For the full list of the available events/metrics, use timemory-avail -H from the command-line.

Definition at line 70 of file cupti_counters.hpp.

Member Typedef Documentation

◆ array_t

template<typename Tp >
using tim::component::cupti_counters::array_t = std::vector<Tp>

Definition at line 336 of file cupti_counters.hpp.

◆ base_type

◆ device_func_t

using tim::component::cupti_counters::device_func_t = std::function<int()>

Definition at line 93 of file cupti_counters.hpp.

◆ entry_type

using tim::component::cupti_counters::entry_type = typename value_type::value_type

Definition at line 81 of file cupti_counters.hpp.

◆ event_func_t

Definition at line 91 of file cupti_counters.hpp.

◆ get_initializer_t

Definition at line 95 of file cupti_counters.hpp.

◆ kernel_data_t

Definition at line 80 of file cupti_counters.hpp.

◆ kernel_results_t

using tim::component::cupti_counters::kernel_results_t = cupti::profiler::kernel_results_t

Definition at line 83 of file cupti_counters.hpp.

◆ metric_func_t

Definition at line 92 of file cupti_counters.hpp.

◆ profptr_t

using tim::component::cupti_counters::profptr_t = std::shared_ptr<cupti::profiler>

Definition at line 87 of file cupti_counters.hpp.

◆ results_t

using tim::component::cupti_counters::results_t = cupti::profiler::results_t

Definition at line 82 of file cupti_counters.hpp.

◆ secondary_type

using tim::component::cupti_counters::secondary_type = std::unordered_multimap<std::string, value_type>

Definition at line 325 of file cupti_counters.hpp.

◆ size_type

Definition at line 78 of file cupti_counters.hpp.

◆ storage_type

Definition at line 66 of file declaration.hpp.

◆ string_t

Definition at line 79 of file cupti_counters.hpp.

◆ strvec_t

Definition at line 86 of file cupti_counters.hpp.

◆ this_type

◆ tuple_type

Definition at line 89 of file cupti_counters.hpp.

◆ value_type

using tim::component::cupti_counters::value_type = cupti::profiler::results_t

Definition at line 73 of file cupti_counters.hpp.

Constructor & Destructor Documentation

◆ cupti_counters() [1/3]

tim::component::cupti_counters::cupti_counters ( )
inline

Definition at line 161 of file cupti_counters.hpp.

162 {
163 configure();
164 auto* _labels = _get_labels();
165 if(_labels)
166 {
167 value.resize(_labels->size());
168 accum.resize(_labels->size());
169 for(size_type i = 0; i < _labels->size(); ++i)
170 {
171 value[i].name = (*_labels)[i];
172 accum[i].name = (*_labels)[i];
173 }
174 }
175 }

References configure().

◆ ~cupti_counters()

tim::component::cupti_counters::~cupti_counters ( )
default

◆ cupti_counters() [2/3]

tim::component::cupti_counters::cupti_counters ( const cupti_counters )
default

◆ cupti_counters() [3/3]

tim::component::cupti_counters::cupti_counters ( cupti_counters &&  )
defaultnoexcept

Member Function Documentation

◆ cleanup()

static void tim::component::cupti_counters::cleanup ( )
inlinestatic

Definition at line 658 of file cupti_counters.hpp.

659 {
660 clear();
661 delete _get_device();
662 delete _get_events();
663 delete _get_labels();
664 delete _get_metrics();
665 _get_device() = nullptr;
666 _get_events() = nullptr;
667 _get_labels() = nullptr;
668 _get_metrics() = nullptr;
669 }

◆ configure() [1/2]

static void tim::component::cupti_counters::configure ( )
inlinestatic

Definition at line 135 of file cupti_counters.hpp.

136 {
137 if(_get_profiler().get() == nullptr)
138 init();
139 }
std::vector< double > get() const

References get().

Referenced by cupti_counters(), tim::component::gpu_roofline< Types >::configure(), global_init(), and record().

◆ configure() [2/2]

static void tim::component::cupti_counters::configure ( int  device,
const strvec_t events,
const strvec_t metrics = {} 
)
inlinestatic

explicitly configure for a device and set of events/metrics.

Definition at line 142 of file cupti_counters.hpp.

143 {})
144 {
145 get_initializer() = [=]() -> tuple_type {
146 return tuple_type(device, events, metrics);
147 };
148 if(_get_profiler().get() == nullptr)
149 init();
150 }
std::tuple< int, strvec_t, strvec_t > tuple_type
static get_initializer_t & get_initializer()

◆ description()

static string_t tim::component::cupti_counters::description ( )
inlinestatic

Definition at line 195 of file cupti_counters.hpp.

195{ return "Hardware counters for the CUDA API"; }

◆ description_array()

static array_t< string_t > tim::component::cupti_counters::description_array ( )
inlinestatic

Definition at line 368 of file cupti_counters.hpp.

368{ return label_array(); }
static array_t< string_t > label_array()

References label_array().

◆ display_unit()

static string_t tim::component::cupti_counters::display_unit ( )
inlinestatic

Definition at line 196 of file cupti_counters.hpp.

196{ return ""; }

Referenced by tim::component::gpu_roofline< Types >::display_unit().

◆ display_unit_array()

static array_t< string_t > tim::component::cupti_counters::display_unit_array ( )
inlinestatic

Definition at line 373 of file cupti_counters.hpp.

374 {
375 return array_t<string_t>(get_labels().size(), "");
376 }
static const strvec_t & get_labels()

References get_labels().

◆ extra_serialization()

template<typename Archive >
static void tim::component::cupti_counters::extra_serialization ( Archive &  ar)
inlinestatic

Definition at line 468 of file cupti_counters.hpp.

469 {
470 auto& _devices = *_get_device();
471 auto& _events = *_get_events();
472 auto& _metrics = *_get_metrics();
473 auto& _labels = *_get_labels();
474
475 ar(cereal::make_nvp("devices", _devices), cereal::make_nvp("events", _events),
476 cereal::make_nvp("metrics", _metrics), cereal::make_nvp("labels", _labels));
477 }

◆ get()

std::vector< double > tim::component::cupti_counters::get ( ) const
inline

Definition at line 315 of file cupti_counters.hpp.

316 {
317 std::vector<double> values;
318 const auto& _data = load();
319 values.reserve(_data.size());
320 for(const auto& itr : _data)
321 values.push_back(cupti::get<double>(itr.data));
322 return values;
323 }

References tim::component::base< cupti_counters, cupti::profiler::results_t >::load().

Referenced by configure().

◆ get_device()

static int tim::component::cupti_counters::get_device ( )
inlinestatic

Definition at line 158 of file cupti_counters.hpp.

158{ return *_get_device(); }

◆ get_device_initializer()

static device_func_t & tim::component::cupti_counters::get_device_initializer ( )
inlinestatic

Definition at line 116 of file cupti_counters.hpp.

117 {
118 static device_func_t _instance = []() {
119 if(cuda::device_count() < 1)
120 return -1;
121 return settings::cupti_device();
122 };
123 return _instance;
124 }
cupti_device
Definition: settings.cpp:1731
std::function< int()> device_func_t

References tim::cupti_device.

Referenced by get_initializer().

◆ get_display()

string_t tim::component::cupti_counters::get_display ( ) const
inline

Definition at line 284 of file cupti_counters.hpp.

285 {
286 auto _get_display = [&](std::ostream& os, const cupti::result& obj) {
287 auto _label = obj.name;
288 auto _prec = base_type::get_precision();
289 auto _width = base_type::get_width();
290 auto _flags = base_type::get_format_flags();
291
292 std::stringstream ss;
293 std::stringstream ssv;
294 std::stringstream ssi;
295 ssv.setf(_flags);
296 ssv << std::setw(_width) << std::setprecision(_prec);
297 cupti::print(ssv, obj.data);
298 if(!_label.empty())
299 ssi << " " << _label;
300 ss << ssv.str() << ssi.str();
301 os << ss.str();
302 };
303
304 const auto& _data = load();
305 std::stringstream ss;
306 for(size_type i = 0; i < _data.size(); ++i)
307 {
308 _get_display(ss, _data[i]);
309 if(i + 1 < _data.size())
310 ss << ", ";
311 }
312 return ss.str();
313 }
void print(std::ostream &os, Args &&... args)
Definition: functional.cpp:159
const std::string std::ostream * os
static short get_precision()
static short get_width()
static fmtflags get_format_flags()

References tim::component::base< Tp, Value >::get_format_flags(), tim::component::base< Tp, Value >::get_precision(), tim::component::base< Tp, Value >::get_width(), tim::component::base< cupti_counters, cupti::profiler::results_t >::load(), tim::os, and tim::invoke::print().

◆ get_event_initializer()

static event_func_t & tim::component::cupti_counters::get_event_initializer ( )
inlinestatic

Definition at line 100 of file cupti_counters.hpp.

101 {
102 static event_func_t _instance = []() {
104 };
105 return _instance;
106 }
cupti_events
Definition: settings.cpp:1727
ContainerT delimit(const std::string &line, const std::string &delimiters="\"',;: ", PredicateT &&predicate=[](const std::string &s) -> std::string { return s;})
Definition: delimit.hpp:68
std::function< strvec_t()> event_func_t

References tim::cupti_events, and tim::delimit().

Referenced by get_initializer().

◆ get_events()

static const strvec_t & tim::component::cupti_counters::get_events ( )
inlinestatic

Definition at line 156 of file cupti_counters.hpp.

156{ return *_get_events(); }

◆ get_initializer()

static get_initializer_t & tim::component::cupti_counters::get_initializer ( )
inlinestatic

Definition at line 126 of file cupti_counters.hpp.

127 {
128 static get_initializer_t _instance = []() -> tuple_type {
131 };
132 return _instance;
133 }
static device_func_t & get_device_initializer()
static event_func_t & get_event_initializer()
std::function< tuple_type()> get_initializer_t
static metric_func_t & get_metric_initializer()

References get_device_initializer(), get_event_initializer(), and get_metric_initializer().

◆ get_labels()

static const strvec_t & tim::component::cupti_counters::get_labels ( )
inlinestatic

Definition at line 159 of file cupti_counters.hpp.

159{ return *_get_labels(); }

Referenced by display_unit_array(), and unit_array().

◆ get_metric_initializer()

static metric_func_t & tim::component::cupti_counters::get_metric_initializer ( )
inlinestatic

Definition at line 108 of file cupti_counters.hpp.

109 {
110 static metric_func_t _instance = []() {
112 };
113 return _instance;
114 }
cupti_metrics
Definition: settings.cpp:1729
std::function< strvec_t()> metric_func_t

References tim::cupti_metrics, and tim::delimit().

Referenced by get_initializer().

◆ get_metrics()

static const strvec_t & tim::component::cupti_counters::get_metrics ( )
inlinestatic

Definition at line 157 of file cupti_counters.hpp.

157{ return *_get_metrics(); }

◆ get_opaque()

template<typename... Args>
static opaque tim::component::empty_base::get_opaque ( Args &&  ...)
inlinestaticinherited

Definition at line 72 of file declaration.hpp.

73 {
74 return opaque{};
75 }

◆ get_profiler()

static const profptr_t & tim::component::cupti_counters::get_profiler ( )
inlinestatic

Definition at line 155 of file cupti_counters.hpp.

155{ return _get_profiler(); }

◆ get_secondary()

secondary_type tim::component::cupti_counters::get_secondary ( ) const
inline

Definition at line 327 of file cupti_counters.hpp.

328 {
329 secondary_type _data;
330 for(const auto& itr : m_kernel_accum)
331 _data.insert({ itr.first, itr.second });
332 return _data;
333 }
std::unordered_multimap< std::string, value_type > secondary_type

◆ global_finalize()

static void tim::component::cupti_counters::global_finalize ( )
inlinestatic

Definition at line 153 of file cupti_counters.hpp.

153{ clear(); }

◆ global_init()

static void tim::component::cupti_counters::global_init ( )
inlinestatic

Definition at line 152 of file cupti_counters.hpp.

152{ configure(); }

References configure().

◆ label()

static string_t tim::component::cupti_counters::label ( )
inlinestatic

Definition at line 194 of file cupti_counters.hpp.

194{ return "cupti_counters"; }

◆ label_array()

static array_t< string_t > tim::component::cupti_counters::label_array ( )
inlinestatic

Definition at line 341 of file cupti_counters.hpp.

342 {
343 array_t<string_t> arr;
344 auto contains = [&](const string_t& entry) {
345 return std::find(arr.begin(), arr.end(), entry) != arr.end();
346 };
347 auto insert = [&](const string_t& entry) {
348 if(!contains(entry))
349 arr.push_back(entry);
350 };
351 auto* _labels = _get_labels();
352 if(_labels)
353 {
354 for(const auto& itr : *_labels)
355 insert(itr);
356 }
357 // auto profiler = get_profiler();
358 // for(const auto& itr : profiler->get_event_names())
359 // insert(itr);
360 // for(const auto& itr : profiler->get_metric_names())
361 // insert(itr);
362 return arr;
363 }
std::string string_t
Definition: library.cpp:57
_reported insert(_hash_id)
data::entry entry
Definition: stream.hpp:980

Referenced by tim::component::gpu_roofline< Types >::configure(), and description_array().

◆ operator+=() [1/2]

this_type & tim::component::cupti_counters::operator+= ( const results_t rhs)
inline

Definition at line 423 of file cupti_counters.hpp.

424 {
425 auto _combine = [](value_type& _data, const value_type& _other) {
426 if(_data.empty())
427 {
428 _data = _other;
429 }
430 else
431 {
432 for(size_type i = 0; i < _other.size(); ++i)
433 _data[i] += _other[i];
434 }
435 };
436
437 _combine(value, rhs);
438 _combine(accum, rhs);
439
440 return *this;
441 }
cupti::profiler::results_t value_type

◆ operator+=() [2/2]

this_type & tim::component::cupti_counters::operator+= ( const this_type rhs)
inline

Definition at line 386 of file cupti_counters.hpp.

387 {
388 auto _combine = [](value_type& _data, const value_type& _other) {
389 auto& _labels = *_get_labels();
390 if(_data.empty())
391 {
392 _data = _other;
393 }
394 else
395 {
396 for(size_type i = 0; i < _labels.size(); ++i)
397 _data[i] += _other[i];
398 }
399 };
400
401 _combine(value, rhs.value);
402 _combine(accum, rhs.accum);
403 return *this;
404 }

◆ operator-=()

this_type & tim::component::cupti_counters::operator-= ( const this_type rhs)
inline

Definition at line 406 of file cupti_counters.hpp.

407 {
408 auto _combine = [](value_type& _data, const value_type& _other) {
409 auto& _labels = *_get_labels();
410 // set to other
411 if(_data.empty())
412 _data = _other;
413 // subtract other (if data was empty, will contain zero data)
414 for(size_type i = 0; i < _labels.size(); ++i)
415 _data[i] -= _other[i];
416 };
417
418 _combine(value, rhs.value);
419 _combine(accum, rhs.accum);
420 return *this;
421 }

◆ operator=() [1/2]

cupti_counters & tim::component::cupti_counters::operator= ( const cupti_counters rhs)
inline

Definition at line 180 of file cupti_counters.hpp.

181 {
182 if(this != &rhs)
183 {
184 base_type::operator=(rhs);
185 m_kernel_value = rhs.m_kernel_value;
186 m_kernel_accum = rhs.m_kernel_accum;
187 }
188 return *this;
189 }

◆ operator=() [2/2]

cupti_counters & tim::component::cupti_counters::operator= ( cupti_counters &&  )
defaultnoexcept

◆ record()

static value_type tim::component::cupti_counters::record ( )
inlinestatic

Definition at line 198 of file cupti_counters.hpp.

199 {
200 configure();
201 value_type tmp;
202 auto& _profiler = _get_profiler();
203 if(!_profiler || !_get_labels())
204 return tmp;
205 auto& _labels = *_get_labels();
206 _profiler->stop();
207 if(tmp.empty())
208 {
209 tmp = _profiler->get_events_and_metrics(_labels);
210 }
211 else if(tmp.size() == _labels.size())
212 {
213 auto ret = _profiler->get_events_and_metrics(_labels);
214 for(size_t j = 0; j < _labels.size(); ++j)
215 tmp[j] += ret[j];
216 }
217 else
218 {
219 fprintf(stderr, "Warning! mis-matched size in cupti_event::%s @ %s:%i\n",
220 TIMEMORY_ERROR_FUNCTION_MACRO, __FILE__, __LINE__);
221 }
222
223 return tmp;
224 }
#define TIMEMORY_ERROR_FUNCTION_MACRO
Definition: macros.hpp:229

References configure(), and TIMEMORY_ERROR_FUNCTION_MACRO.

Referenced by tim::component::gpu_roofline< Types >::record(), start(), and stop().

◆ serialize()

template<typename Archive >
void tim::component::cupti_counters::serialize ( Archive &  ar,
const unsigned int   
)
inline

Definition at line 447 of file cupti_counters.hpp.

448 {
449 auto _get = [&](const value_type& _data) {
450 std::vector<double> values;
451 for(const auto& itr : _data)
452 values.push_back(cupti::get<double>(itr.data));
453 return values;
454 };
455 array_t<double> _disp = _get(accum);
456 array_t<double> _value = _get(value);
457 array_t<double> _accum = _get(accum);
458 ar(cereal::make_nvp("laps", laps), cereal::make_nvp("repr_data", _disp),
459 cereal::make_nvp("value", _value), cereal::make_nvp("accum", _accum),
460 cereal::make_nvp("display", _disp));
461 // ar(cereal::make_nvp("units", unit_array()),
462 // cereal::make_nvp("display_units", display_unit_array()));
463 }

References tim::component::base< cupti_counters, cupti::profiler::results_t >::laps.

◆ start()

void tim::component::cupti_counters::start ( )
inline

Definition at line 229 of file cupti_counters.hpp.

230 {
231 value = record();
232 auto& _profiler = _get_profiler();
233 if(_profiler)
234 {
235 m_kernel_value = _profiler->get_kernel_events_and_metrics(*_get_labels());
236 _profiler->start();
237 }
238 }

References record().

◆ stop()

void tim::component::cupti_counters::stop ( )
inline

Definition at line 240 of file cupti_counters.hpp.

241 {
242 using namespace stl;
243 using namespace tim::component::operators;
244
245 value_type tmp = record();
246 auto& _profiler = _get_profiler();
247 if(!_profiler)
248 return;
249
250 kernel_results_t kernel_data =
251 _profiler->get_kernel_events_and_metrics(*_get_labels());
252 kernel_results_t kernel_tmp = kernel_data;
253
254 if(accum.empty())
255 {
256 accum = tmp;
257 for(size_type i = 0; i < tmp.size(); ++i)
258 accum[i] -= value[i];
259 }
260 else
261 {
262 for(size_type i = 0; i < tmp.size(); ++i)
263 accum[i] += (tmp[i] - value[i]);
264 }
265
266 for(size_t i = 0; i < m_kernel_value.size(); ++i)
267 kernel_tmp[i].second -= m_kernel_value[i].second;
268 for(size_t i = 0; i < kernel_tmp.size(); ++i)
269 {
270 if(i >= m_kernel_accum.size())
271 {
272 m_kernel_accum.resize(i + 1, kernel_tmp[i]);
273 }
274 else
275 {
276 m_kernel_accum[i].second += kernel_tmp[i].second;
277 }
278 }
279
280 value = std::move(tmp);
281 m_kernel_value = std::move(kernel_data);
282 }
cupti::profiler::kernel_results_t kernel_results_t

References record().

◆ unit()

static int64_t tim::component::cupti_counters::unit ( )
inlinestatic

Definition at line 192 of file cupti_counters.hpp.

192{ return 1; }

Referenced by tim::component::gpu_roofline< Types >::unit().

◆ unit_array()

static array_t< int64_t > tim::component::cupti_counters::unit_array ( )
inlinestatic

Definition at line 381 of file cupti_counters.hpp.

382 {
383 return array_t<int64_t>(get_labels().size(), 1);
384 }

References get_labels().

Member Data Documentation

◆ precision

const short tim::component::cupti_counters::precision = 3
static

Definition at line 97 of file cupti_counters.hpp.

◆ width

const short tim::component::cupti_counters::width = 8
static

Definition at line 98 of file cupti_counters.hpp.


The documentation for this struct was generated from the following file: