timemory  3.2.1
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
tim::component::gpu_roofline< Types > Struct Template Reference

Combines hardware counters and timers and executes the empirical roofline toolkit during application termination to estimate the peak possible performance for the machine. More...

#include "timemory/components/roofline/gpu_roofline.hpp"

+ Collaboration diagram for tim::component::gpu_roofline< Types >:

Public Types

enum class  MODE {
  COUNTERS ,
  ACTIVITY
}
 
using value_type = std::tuple< typename cupti_activity::value_type, typename cupti_counters::value_type >
 
using this_type = gpu_roofline< Types... >
 
using base_type = base< this_type, value_type >
 
using storage_type = typename base_type::storage_type
 
using size_type = std::size_t
 
using counters_type = cupti_counters
 
using activity_type = cupti_activity
 
using device_t = device::gpu
 
using result_type = std::vector< double >
 
using label_type = std::vector< std::string >
 
using count_type = wall_clock
 
using types_tuple = std::tuple< Types... >
 
using ert_data_t = ert::exec_data< count_type >
 
using ert_data_ptr_t = std::shared_ptr< ert_data_t >
 
template<typename Tp >
using ert_config_type = ert::configuration< device_t, Tp, count_type >
 
template<typename Tp >
using ert_counter_type = ert::counter< device_t, Tp, count_type >
 
template<typename Tp >
using ert_executor_type = ert::executor< device_t, Tp, count_type >
 
template<typename Tp >
using ert_callback_type = ert::callback< ert_executor_type< Tp > >
 
using ert_config_t = std::tuple< ert_config_type< Types >... >
 
using ert_counter_t = std::tuple< ert_counter_type< Types >... >
 
using ert_executor_t = std::tuple< ert_executor_type< Types >... >
 
using ert_callback_t = std::tuple< ert_callback_type< Types >... >
 
using strvec_t = std::vector< std::string >
 
using events_callback_t = std::function< strvec_t()>
 
using metrics_callback_t = events_callback_t
 
using activity_value_type = typename cupti_activity::value_type
 
using counters_value_type = typename cupti_counters::value_type
 
using secondary_type = std::unordered_multimap< std::string, value_type >
 

Public Member Functions

 gpu_roofline ()
 
 ~gpu_roofline ()=default
 
 gpu_roofline (const gpu_roofline &rhs)
 
gpu_rooflineoperator= (const gpu_roofline &rhs)
 
 gpu_roofline (gpu_roofline &&) noexcept=default
 
gpu_rooflineoperator= (gpu_roofline &&) noexcept=default
 
result_type get () const
 
void start ()
 
void stop ()
 
this_typeoperator+= (const this_type &rhs)
 
this_typeoperator-= (const this_type &rhs)
 
this_typeoperator+= (const value_type &rhs)
 
secondary_type get_secondary () const
 
string_t get_display () const
 
template<typename Archive >
void save (Archive &ar, const unsigned int) const
 
template<typename Archive >
void load (Archive &ar, const unsigned int)
 

Static Public Member Functions

static events_callback_tget_events_callback ()
 
static metrics_callback_tget_metrics_callback ()
 
static MODEevent_mode ()
 
static void configure (const MODE &_mode, int _device=0)
 
static void configure ()
 
static std::string get_mode_string ()
 
static std::string get_type_string ()
 
static ert_config_tget_finalizer ()
 
static ert_data_ptr_tget_ert_data ()
 
static void global_init ()
 
template<typename Tp , typename FuncT >
static void set_executor_callback (FuncT &&f)
 
static void global_finalize (storage_type *_store)
 
static void thread_init ()
 
static void thread_finalize ()
 
template<typename Archive >
static void extra_serialization (Archive &ar)
 
static int64_t unit ()
 
static std::string label ()
 
static std::string description ()
 
static std::string display_unit ()
 
static value_type record ()
 
static label_type label_array ()
 
static label_type display_unit_array ()
 

Static Public Attributes

static const short precision = 3
 
static const short width = 8
 

Friends

struct operation::record< this_type >
 
struct operation::start< this_type >
 
struct operation::stop< this_type >
 
struct operation::set_started< this_type >
 
struct operation::set_stopped< this_type >
 
class impl::storage< this_type, trait::uses_value_storage< this_type, value_type >::value >
 
std::ostream & operator<< (std::ostream &os, const this_type &obj)
 

Detailed Description

template<typename... Types>
struct tim::component::gpu_roofline< Types >

Combines hardware counters and timers and executes the empirical roofline toolkit during application termination to estimate the peak possible performance for the machine.

Template Parameters
TypesVariadic list of data types for roofline analysis

Definition at line 71 of file gpu_roofline.hpp.

Member Typedef Documentation

◆ activity_type

template<typename... Types>
using tim::component::gpu_roofline< Types >::activity_type = cupti_activity

Definition at line 83 of file gpu_roofline.hpp.

◆ activity_value_type

Definition at line 581 of file gpu_roofline.hpp.

◆ base_type

template<typename... Types>
using tim::component::gpu_roofline< Types >::base_type = base<this_type, value_type>

Definition at line 78 of file gpu_roofline.hpp.

◆ count_type

template<typename... Types>
using tim::component::gpu_roofline< Types >::count_type = wall_clock

Definition at line 87 of file gpu_roofline.hpp.

◆ counters_type

template<typename... Types>
using tim::component::gpu_roofline< Types >::counters_type = cupti_counters

Definition at line 82 of file gpu_roofline.hpp.

◆ counters_value_type

Definition at line 582 of file gpu_roofline.hpp.

◆ device_t

template<typename... Types>
using tim::component::gpu_roofline< Types >::device_t = device::gpu

Definition at line 84 of file gpu_roofline.hpp.

◆ ert_callback_t

template<typename... Types>
using tim::component::gpu_roofline< Types >::ert_callback_t = std::tuple<ert_callback_type<Types>...>

Definition at line 113 of file gpu_roofline.hpp.

◆ ert_callback_type

template<typename... Types>
template<typename Tp >
using tim::component::gpu_roofline< Types >::ert_callback_type = ert::callback<ert_executor_type<Tp> >

Definition at line 107 of file gpu_roofline.hpp.

◆ ert_config_t

template<typename... Types>
using tim::component::gpu_roofline< Types >::ert_config_t = std::tuple<ert_config_type<Types>...>

Definition at line 110 of file gpu_roofline.hpp.

◆ ert_config_type

template<typename... Types>
template<typename Tp >
using tim::component::gpu_roofline< Types >::ert_config_type = ert::configuration<device_t, Tp, count_type>

Definition at line 101 of file gpu_roofline.hpp.

◆ ert_counter_t

template<typename... Types>
using tim::component::gpu_roofline< Types >::ert_counter_t = std::tuple<ert_counter_type<Types>...>

Definition at line 111 of file gpu_roofline.hpp.

◆ ert_counter_type

template<typename... Types>
template<typename Tp >
using tim::component::gpu_roofline< Types >::ert_counter_type = ert::counter<device_t, Tp, count_type>

Definition at line 103 of file gpu_roofline.hpp.

◆ ert_data_ptr_t

template<typename... Types>
using tim::component::gpu_roofline< Types >::ert_data_ptr_t = std::shared_ptr<ert_data_t>

Definition at line 97 of file gpu_roofline.hpp.

◆ ert_data_t

template<typename... Types>
using tim::component::gpu_roofline< Types >::ert_data_t = ert::exec_data<count_type>

Definition at line 96 of file gpu_roofline.hpp.

◆ ert_executor_t

template<typename... Types>
using tim::component::gpu_roofline< Types >::ert_executor_t = std::tuple<ert_executor_type<Types>...>

Definition at line 112 of file gpu_roofline.hpp.

◆ ert_executor_type

template<typename... Types>
template<typename Tp >
using tim::component::gpu_roofline< Types >::ert_executor_type = ert::executor<device_t, Tp, count_type>

Definition at line 105 of file gpu_roofline.hpp.

◆ events_callback_t

template<typename... Types>
using tim::component::gpu_roofline< Types >::events_callback_t = std::function<strvec_t()>

Definition at line 133 of file gpu_roofline.hpp.

◆ label_type

template<typename... Types>
using tim::component::gpu_roofline< Types >::label_type = std::vector<std::string>

Definition at line 86 of file gpu_roofline.hpp.

◆ metrics_callback_t

Definition at line 134 of file gpu_roofline.hpp.

◆ result_type

template<typename... Types>
using tim::component::gpu_roofline< Types >::result_type = std::vector<double>

Definition at line 85 of file gpu_roofline.hpp.

◆ secondary_type

template<typename... Types>
using tim::component::gpu_roofline< Types >::secondary_type = std::unordered_multimap<std::string, value_type>

Definition at line 583 of file gpu_roofline.hpp.

◆ size_type

template<typename... Types>
using tim::component::gpu_roofline< Types >::size_type = std::size_t

Definition at line 81 of file gpu_roofline.hpp.

◆ storage_type

template<typename... Types>
using tim::component::gpu_roofline< Types >::storage_type = typename base_type::storage_type

Definition at line 79 of file gpu_roofline.hpp.

◆ strvec_t

template<typename... Types>
using tim::component::gpu_roofline< Types >::strvec_t = std::vector<std::string>

Definition at line 132 of file gpu_roofline.hpp.

◆ this_type

template<typename... Types>
using tim::component::gpu_roofline< Types >::this_type = gpu_roofline<Types...>

Definition at line 77 of file gpu_roofline.hpp.

◆ types_tuple

template<typename... Types>
using tim::component::gpu_roofline< Types >::types_tuple = std::tuple<Types...>

Definition at line 88 of file gpu_roofline.hpp.

◆ value_type

Definition at line 75 of file gpu_roofline.hpp.

Member Enumeration Documentation

◆ MODE

template<typename... Types>
enum tim::component::gpu_roofline::MODE
strong
Enumerator
COUNTERS 
ACTIVITY 

Definition at line 124 of file gpu_roofline.hpp.

125  {
126  COUNTERS,
127  ACTIVITY
128  };

Constructor & Destructor Documentation

◆ gpu_roofline() [1/3]

template<typename... Types>
tim::component::gpu_roofline< Types >::gpu_roofline ( )
inline

Definition at line 428 of file gpu_roofline.hpp.

428 { configure(); }

References tim::component::gpu_roofline< Types >::configure().

◆ ~gpu_roofline()

template<typename... Types>
tim::component::gpu_roofline< Types >::~gpu_roofline ( )
default

◆ gpu_roofline() [2/3]

template<typename... Types>
tim::component::gpu_roofline< Types >::gpu_roofline ( const gpu_roofline< Types > &  rhs)
inline

Definition at line 431 of file gpu_roofline.hpp.

432  : base_type(rhs)
433  , m_data(rhs.m_data)
434  {}
base< this_type, value_type > base_type

◆ gpu_roofline() [3/3]

template<typename... Types>
tim::component::gpu_roofline< Types >::gpu_roofline ( gpu_roofline< Types > &&  )
defaultnoexcept

Member Function Documentation

◆ configure() [1/2]

template<typename... Types>
static void tim::component::gpu_roofline< Types >::configure ( )
inlinestatic

Definition at line 262 of file gpu_roofline.hpp.

263  {
264  if(!is_configured())
266  }

References tim::component::gpu_roofline< Types >::event_mode().

Referenced by tim::component::gpu_roofline< Types >::gpu_roofline().

◆ configure() [2/2]

template<typename... Types>
static void tim::component::gpu_roofline< Types >::configure ( const MODE _mode,
int  _device = 0 
)
inlinestatic

Definition at line 181 of file gpu_roofline.hpp.

182  {
183  if(is_configured())
184  return;
185  is_configured() = true;
186 
187  event_mode() = _mode;
188 
189  if(event_mode() == MODE::ACTIVITY)
190  {
191  get_labels() = { std::string("runtime") };
192  }
193  else
194  {
195  strvec_t events = { "global_load", "global_store" };
196  strvec_t metrics = { "ldst_executed" };
197 #if defined(TIMEMORY_USE_CUDA_HALF)
198  if(is_one_of<cuda::fp16_t, types_tuple>::value)
199  {
200  metrics.push_back("flop_count_hp");
201  }
202 #endif
203 
204  if(is_one_of<float, types_tuple>::value)
205  {
206  metrics.push_back("flop_count_sp");
207  }
208 
209  if(is_one_of<double, types_tuple>::value)
210  {
211  metrics.push_back("flop_count_dp");
212  }
213 
214  // integer
215  if(mpl::is_one_of_integral<types_tuple>::value ||
217  {
218  for(const string_t& itr :
219  { "ipc", "inst_executed", "inst_integer", "inst_fp_64", "inst_fp_32",
220  "inst_fp_16", "local_load_transactions_per_request",
221  "local_store_transactions_per_request",
222  "shared_load_transactions_per_request",
223  "shared_store_transactions_per_request",
224  "gld_transactions_per_request", "gst_transactions_per_request",
225  "inst_executed_global_reductions", "inst_executed_global_stores",
226  "inst_executed_global_loads", "inst_executed_local_loads",
227  "inst_executed_local_stores", "inst_executed_shared_loads",
228  "inst_executed_shared_stores" })
229  metrics.push_back(itr);
230  }
231 
232  // add in extra events
233  auto _extra_events = get_events_callback()();
234  for(const auto& itr : _extra_events)
235  events.push_back(itr);
236 
237  // add in extra metrics
238  auto _extra_metrics = get_metrics_callback()();
239  for(const auto& itr : _extra_metrics)
240  metrics.push_back(itr);
241 
242  auto _get_unique = [](const strvec_t& _vec) {
243  std::set<std::string> _set;
244  for(const auto& itr : _vec)
245  _set.insert(itr);
246  strvec_t _ret;
247  for(const auto& itr : _set)
248  _ret.push_back(itr);
249  return _ret;
250  };
251 
252  metrics = _get_unique(metrics);
253  events = _get_unique(events);
254 
255  counters_type::configure(_device, events, metrics);
256  get_labels() = counters_type::label_array();
257  }
258  }
std::string string_t
Definition: library.cpp:56
instruction_roofline
Definition: settings.cpp:1442
tim::mpl::apply< std::string > string
Definition: macros.hpp:52
static array_t< string_t > label_array()
std::vector< std::string > strvec_t
static metrics_callback_t & get_metrics_callback()
static events_callback_t & get_events_callback()

References tim::component::gpu_roofline< Types >::ACTIVITY, tim::component::cupti_counters::configure(), tim::component::gpu_roofline< Types >::event_mode(), tim::component::gpu_roofline< Types >::get_events_callback(), tim::component::gpu_roofline< Types >::get_metrics_callback(), tim::instruction_roofline, and tim::component::cupti_counters::label_array().

◆ description()

template<typename... Types>
static std::string tim::component::gpu_roofline< Types >::description ( )
inlinestatic

Definition at line 391 of file gpu_roofline.hpp.

392  {
393  return "Model used to provide performance relative to the peak possible "
394  "performance on a GPU architecture.";
395  }

◆ display_unit()

◆ display_unit_array()

template<typename... Types>
static label_type tim::component::gpu_roofline< Types >::display_unit_array ( )
inlinestatic

Definition at line 663 of file gpu_roofline.hpp.

664  {
665  const auto& _labels = get_labels();
666  return label_type(_labels.size(), this_type::display_unit());
667  }
std::vector< std::string > label_type
static std::string display_unit()

References tim::component::gpu_roofline< Types >::display_unit().

◆ event_mode()

template<typename... Types>
static MODE& tim::component::gpu_roofline< Types >::event_mode ( )
inlinestatic

Definition at line 153 of file gpu_roofline.hpp.

154  {
155  auto&& _get = [=]() {
156  auto&& aslc = [](std::string str) {
157  for(auto& itr : str)
158  itr = tolower(itr);
159  return str;
160  };
161 
162  // check the standard variable
164  if(_env.empty())
165  _env = aslc(settings::roofline_mode());
166  return (_env == "op" || _env == "hw" || _env == "counters")
168  : ((_env == "ai" || _env == "ac" || _env == "activity")
170  : MODE::COUNTERS);
171  };
172 
173  static MODE _instance = _get();
174  if(!is_configured())
175  _instance = _get();
176  return _instance;
177  }
roofline_mode
Definition: settings.cpp:1426
gpu_roofline_mode
Definition: settings.cpp:1430

References tim::component::gpu_roofline< Types >::ACTIVITY, tim::component::gpu_roofline< Types >::COUNTERS, tim::gpu_roofline_mode, and tim::roofline_mode.

Referenced by tim::component::gpu_roofline< Types >::configure(), tim::component::gpu_roofline< Types >::display_unit(), tim::component::gpu_roofline< Types >::get(), tim::component::gpu_roofline< Types >::get_display(), tim::component::gpu_roofline< Types >::get_mode_string(), tim::component::gpu_roofline< Types >::get_secondary(), tim::component::gpu_roofline< Types >::global_finalize(), tim::component::gpu_roofline< Types >::global_init(), tim::component::gpu_roofline< Types >::load(), tim::component::gpu_roofline< Types >::operator+=(), tim::component::gpu_roofline< Types >::operator-=(), tim::component::gpu_roofline< Types >::record(), tim::component::gpu_roofline< Types >::save(), tim::component::gpu_roofline< Types >::start(), tim::component::gpu_roofline< Types >::stop(), and tim::component::gpu_roofline< Types >::unit().

◆ extra_serialization()

template<typename... Types>
template<typename Archive >
static void tim::component::gpu_roofline< Types >::extra_serialization ( Archive &  ar)
inlinestatic

Definition at line 359 of file gpu_roofline.hpp.

360  {
361  auto& _ert_data = get_ert_data();
362  if(!_ert_data) // for input
363  _ert_data = std::make_shared<ert_data_t>();
364  ar(cereal::make_nvp("roofline", *_ert_data));
365  }
static ert_data_ptr_t & get_ert_data()

References tim::component::gpu_roofline< Types >::get_ert_data().

◆ get()

template<typename... Types>
result_type tim::component::gpu_roofline< Types >::get ( ) const
inline

Definition at line 451 of file gpu_roofline.hpp.

452  {
453  switch(event_mode())
454  {
455  case MODE::ACTIVITY: return result_type({ m_data.activity->get() });
456  case MODE::COUNTERS: return m_data.counters->get();
457  default: break;
458  }
459  return result_type{};
460  }
std::vector< double > result_type

References tim::component::gpu_roofline< Types >::ACTIVITY, tim::component::gpu_roofline< Types >::COUNTERS, and tim::component::gpu_roofline< Types >::event_mode().

Referenced by tim::component::gpu_roofline< Types >::save().

◆ get_display()

template<typename... Types>
string_t tim::component::gpu_roofline< Types >::get_display ( ) const
inline

Definition at line 636 of file gpu_roofline.hpp.

637  {
638  std::stringstream ss;
639  if(event_mode() == MODE::COUNTERS)
640  {
641  return m_data.counters->get_display();
642  }
643  {
644  ss << m_data.activity->get_display();
645  }
646  return ss.str();
647  }

References tim::component::gpu_roofline< Types >::COUNTERS, and tim::component::gpu_roofline< Types >::event_mode().

Referenced by tim::component::gpu_roofline< Types >::save().

◆ get_ert_data()

template<typename... Types>
static ert_data_ptr_t& tim::component::gpu_roofline< Types >::get_ert_data ( )
inlinestatic

Definition at line 292 of file gpu_roofline.hpp.

293  {
294  static ert_data_ptr_t _instance = std::make_shared<ert_data_t>();
295  return _instance;
296  }
std::shared_ptr< ert_data_t > ert_data_ptr_t

Referenced by tim::component::gpu_roofline< Types >::extra_serialization(), and tim::component::gpu_roofline< Types >::global_finalize().

◆ get_events_callback()

template<typename... Types>
static events_callback_t& tim::component::gpu_roofline< Types >::get_events_callback ( )
inlinestatic

Definition at line 138 of file gpu_roofline.hpp.

139  {
140  static events_callback_t _instance = []() { return strvec_t{}; };
141  return _instance;
142  }
std::function< strvec_t()> events_callback_t

Referenced by tim::component::gpu_roofline< Types >::configure().

◆ get_finalizer()

template<typename... Types>
static ert_config_t& tim::component::gpu_roofline< Types >::get_finalizer ( )
inlinestatic

Definition at line 284 of file gpu_roofline.hpp.

285  {
286  static ert_config_t _instance;
287  return _instance;
288  }
std::tuple< ert_config_type< Types >... > ert_config_t

Referenced by tim::component::gpu_roofline< Types >::global_finalize().

◆ get_metrics_callback()

template<typename... Types>
static metrics_callback_t& tim::component::gpu_roofline< Types >::get_metrics_callback ( )
inlinestatic

Definition at line 144 of file gpu_roofline.hpp.

145  {
146  static metrics_callback_t _instance = []() { return strvec_t{}; };
147  return _instance;
148  }
events_callback_t metrics_callback_t

Referenced by tim::component::gpu_roofline< Types >::configure().

◆ get_mode_string()

template<typename... Types>
static std::string tim::component::gpu_roofline< Types >::get_mode_string ( )
inlinestatic

◆ get_secondary()

template<typename... Types>
secondary_type tim::component::gpu_roofline< Types >::get_secondary ( ) const
inline

Definition at line 587 of file gpu_roofline.hpp.

588  {
589  secondary_type ret;
590  switch(event_mode())
591  {
592  case MODE::ACTIVITY:
593  {
594  auto&& _tmp = m_data.activity->get_secondary();
595  for(auto&& itr : _tmp)
596  {
597  ret.insert(
598  { itr.first, value_type{ itr.second, counters_value_type{} } });
599  }
600  break;
601  }
602  case MODE::COUNTERS:
603  {
604  auto&& _tmp = m_data.counters->get_secondary();
605  for(auto&& itr : _tmp)
606  {
607  ret.insert(
608  { itr.first, value_type{ activity_value_type{}, itr.second } });
609  }
610  break;
611  }
612  }
613  return ret;
614  }
typename cupti_activity::value_type activity_value_type
std::unordered_multimap< std::string, value_type > secondary_type
typename cupti_counters::value_type counters_value_type
std::tuple< typename cupti_activity::value_type, typename cupti_counters::value_type > value_type

References tim::component::gpu_roofline< Types >::ACTIVITY, tim::component::gpu_roofline< Types >::COUNTERS, and tim::component::gpu_roofline< Types >::event_mode().

◆ get_type_string()

template<typename... Types>
static std::string tim::component::gpu_roofline< Types >::get_type_string ( )
inlinestatic

Definition at line 277 of file gpu_roofline.hpp.

278  {
279  return mpl::apply<std::string>::join('_', demangle(typeid(Types).name())...);
280  }
std::string demangle(const char *_mangled_name, int *_status=nullptr)
Definition: utility.hpp:166
static string_t join(SepT &&separator, Tuple &&__tup, index_sequence< Idx... >) noexcept
Definition: apply.hpp:409

References tim::demangle(), and tim::mpl::apply< Ret >::join().

Referenced by tim::component::gpu_roofline< Types >::label(), and tim::component::gpu_roofline< Types >::save().

◆ global_finalize()

template<typename... Types>
static void tim::component::gpu_roofline< Types >::global_finalize ( storage_type _store)
inlinestatic

Definition at line 324 of file gpu_roofline.hpp.

325  {
326  // disable the activity/counters before running ERT
327  if(event_mode() == MODE::ACTIVITY)
328  {
329  operation::fini<activity_type>{}(
330  operation::mode_constant<operation::fini_mode::global>{});
331  }
332  else
333  {
334  operation::fini<counters_type>{}(
335  operation::mode_constant<operation::fini_mode::global>{});
336  }
337 
338  // run ERT
339  if(_store && _store->size() > 0)
340  {
341  assert(_store->is_finalizing());
342  // run roofline peak generation
343  auto ert_config = get_finalizer();
344  auto ert_data = get_ert_data();
345  mpl::apply<void>::access<ert_executor_t>(ert_config, ert_data);
346  if(ert_data && (settings::verbose() > 0 || settings::debug()))
347  std::cout << *(ert_data) << std::endl;
348  }
349  }
static ert_config_t & get_finalizer()

References tim::component::gpu_roofline< Types >::ACTIVITY, tim::debug, tim::component::gpu_roofline< Types >::event_mode(), tim::component::gpu_roofline< Types >::get_ert_data(), tim::component::gpu_roofline< Types >::get_finalizer(), and tim::verbose.

◆ global_init()

template<typename... Types>
static void tim::component::gpu_roofline< Types >::global_init ( )
inlinestatic

Definition at line 300 of file gpu_roofline.hpp.

301  {
302  if(event_mode() == MODE::ACTIVITY)
303  {
304  operation::init<activity_type>{}(
305  operation::mode_constant<operation::init_mode::global>{});
306  }
307  else
308  {
309  operation::init<counters_type>{}(
310  operation::mode_constant<operation::init_mode::global>{});
311  }
312  }

References tim::component::gpu_roofline< Types >::ACTIVITY, and tim::component::gpu_roofline< Types >::event_mode().

◆ label()

template<typename... Types>
static std::string tim::component::gpu_roofline< Types >::label ( )
inlinestatic

Definition at line 376 of file gpu_roofline.hpp.

377  {
379  {
380  auto ret = std::string("gpu_roofline_") + get_type_string() + "_" +
381  get_mode_string();
382  // erase consecutive underscores
383  while(ret.find("__") != std::string::npos)
384  ret.erase(ret.find("__"), 1);
385  return ret;
386  }
387 
388  return std::string("gpu_roofline_") + get_mode_string();
389  }
roofline_type_labels_gpu
Definition: settings.cpp:1440
roofline_type_labels
Definition: settings.cpp:1436
static std::string get_type_string()
static std::string get_mode_string()

References tim::component::gpu_roofline< Types >::get_mode_string(), tim::component::gpu_roofline< Types >::get_type_string(), tim::roofline_type_labels, and tim::roofline_type_labels_gpu.

◆ label_array()

template<typename... Types>
static label_type tim::component::gpu_roofline< Types >::label_array ( )
inlinestatic

Definition at line 659 of file gpu_roofline.hpp.

659 { return this_type::get_labels(); }

◆ load()

template<typename... Types>
template<typename Archive >
void tim::component::gpu_roofline< Types >::load ( Archive &  ar,
const unsigned int   
)
inline

Definition at line 845 of file gpu_roofline.hpp.

846  {
847  std::string _disp;
848  result_type _data;
849  std::vector<std::string> _labels;
850  std::string _mode_str;
851  std::string _type_str;
852 
853  ar(cereal::make_nvp("laps", laps), cereal::make_nvp("display", _disp),
854  cereal::make_nvp("mode", _mode_str), cereal::make_nvp("type", _type_str),
855  cereal::make_nvp("labels", _labels));
856 
857  if(_mode_str == "counters")
858  {
860  }
861  else if(_mode_str == "activity")
862  {
864  }
865 
866  _data.resize(_labels.size());
867 
868  ar.setNextName("repr_data");
869  ar.startNode();
870  auto litr = _labels.begin();
871  auto ditr = _data.begin();
872  for(; litr != _labels.end() && ditr != _data.end(); ++litr, ++ditr)
873  ar(cereal::make_nvp(*litr, *ditr));
874  ar.finishNode();
875 
876  ar.setNextName("value");
877  ar.startNode();
878  if(event_mode() == MODE::ACTIVITY)
879  {
880  ar(std::get<0>(value));
881  }
882  else
883  {
884  ar(std::get<1>(value));
885  }
886  ar.finishNode();
887 
888  ar.setNextName("accum");
889  ar.startNode();
890  if(event_mode() == MODE::ACTIVITY)
891  {
892  ar(std::get<0>(accum));
893  }
894  else
895  {
896  ar(std::get<1>(accum));
897  }
898  ar.finishNode();
899  }

References tim::component::gpu_roofline< Types >::ACTIVITY, tim::component::gpu_roofline< Types >::COUNTERS, tim::component::gpu_roofline< Types >::event_mode(), and tim::component::base< gpu_roofline< Types... >, std::tuple< cupti_activity::value_type, cupti_counters::value_type > >::laps.

◆ operator+=() [1/2]

template<typename... Types>
this_type& tim::component::gpu_roofline< Types >::operator+= ( const this_type rhs)
inline

Definition at line 509 of file gpu_roofline.hpp.

510  {
511  switch(event_mode())
512  {
513  case MODE::ACTIVITY:
514  {
515  *m_data.activity += *rhs.m_data.activity;
516  std::get<0>(accum) = m_data.activity->get_accum();
517  std::get<0>(value) = m_data.activity->get_value();
518  break;
519  }
520  case MODE::COUNTERS:
521  {
522  *m_data.counters += *rhs.m_data.counters;
523  std::get<1>(accum) = m_data.counters->get_accum();
524  std::get<1>(value) = m_data.counters->get_value();
525  break;
526  }
527  }
528  return *this;
529  }

References tim::component::gpu_roofline< Types >::ACTIVITY, tim::component::gpu_roofline< Types >::COUNTERS, and tim::component::gpu_roofline< Types >::event_mode().

◆ operator+=() [2/2]

template<typename... Types>
this_type& tim::component::gpu_roofline< Types >::operator+= ( const value_type rhs)
inline

Definition at line 557 of file gpu_roofline.hpp.

558  {
559  switch(event_mode())
560  {
561  case MODE::ACTIVITY:
562  {
563  *m_data.activity += std::get<0>(rhs);
564  std::get<0>(accum) = m_data.activity->get_accum();
565  std::get<0>(value) = m_data.activity->get_value();
566  break;
567  }
568  case MODE::COUNTERS:
569  {
570  *m_data.counters += std::get<1>(rhs);
571  std::get<1>(accum) = m_data.counters->get_accum();
572  std::get<1>(value) = m_data.counters->get_value();
573  break;
574  }
575  }
576  return *this;
577  }

References tim::component::gpu_roofline< Types >::ACTIVITY, tim::component::gpu_roofline< Types >::COUNTERS, and tim::component::gpu_roofline< Types >::event_mode().

◆ operator-=()

template<typename... Types>
this_type& tim::component::gpu_roofline< Types >::operator-= ( const this_type rhs)
inline

Definition at line 533 of file gpu_roofline.hpp.

534  {
535  switch(event_mode())
536  {
537  case MODE::ACTIVITY:
538  {
539  *m_data.activity -= *rhs.m_data.activity;
540  std::get<0>(accum) = m_data.activity->get_accum();
541  std::get<0>(value) = m_data.activity->get_value();
542  break;
543  }
544  case MODE::COUNTERS:
545  {
546  *m_data.counters -= *rhs.m_data.counters;
547  std::get<1>(accum) = m_data.counters->get_accum();
548  std::get<1>(value) = m_data.counters->get_value();
549  break;
550  }
551  }
552  return *this;
553  }

References tim::component::gpu_roofline< Types >::ACTIVITY, tim::component::gpu_roofline< Types >::COUNTERS, and tim::component::gpu_roofline< Types >::event_mode().

◆ operator=() [1/2]

template<typename... Types>
gpu_roofline& tim::component::gpu_roofline< Types >::operator= ( const gpu_roofline< Types > &  rhs)
inline

Definition at line 436 of file gpu_roofline.hpp.

437  {
438  if(this != &rhs)
439  {
440  base_type::operator=(rhs);
441  m_data = rhs.m_data;
442  }
443  return *this;
444  }

◆ operator=() [2/2]

template<typename... Types>
gpu_roofline& tim::component::gpu_roofline< Types >::operator= ( gpu_roofline< Types > &&  )
defaultnoexcept

◆ record()

template<typename... Types>
static value_type tim::component::gpu_roofline< Types >::record ( )
inlinestatic

Definition at line 406 of file gpu_roofline.hpp.

407  {
408  value_type tmp;
409  switch(event_mode())
410  {
411  case MODE::ACTIVITY: std::get<0>(tmp) = activity_type::record(); break;
412  case MODE::COUNTERS: std::get<1>(tmp) = counters_type::record(); break;
413  default: break;
414  }
415  return tmp;
416  }

References tim::component::gpu_roofline< Types >::ACTIVITY, tim::component::gpu_roofline< Types >::COUNTERS, tim::component::gpu_roofline< Types >::event_mode(), tim::component::cupti_activity::record(), and tim::component::cupti_counters::record().

◆ save()

template<typename... Types>
template<typename Archive >
void tim::component::gpu_roofline< Types >::save ( Archive &  ar,
const unsigned int   
) const
inline

Definition at line 796 of file gpu_roofline.hpp.

797  {
798  auto _disp = get_display();
799  auto _data = get();
800  auto _labels = get_labels();
801 
802  ar(cereal::make_nvp("laps", laps), cereal::make_nvp("display", _disp),
803  cereal::make_nvp("mode", get_mode_string()),
804  cereal::make_nvp("type", get_type_string()),
805  cereal::make_nvp("labels", _labels));
806 
807  ar.setNextName("repr_data");
808  ar.startNode();
809  auto litr = _labels.begin();
810  auto ditr = _data.begin();
811  for(; litr != _labels.end() && ditr != _data.end(); ++litr, ++ditr)
812  ar(cereal::make_nvp(*litr, *ditr));
813  ar.finishNode();
814 
815  ar.setNextName("value");
816  ar.startNode();
817  ar.makeArray();
818  if(event_mode() == MODE::ACTIVITY)
819  {
820  ar(std::get<0>(value));
821  }
822  else
823  {
824  ar(std::get<1>(value));
825  }
826  ar.finishNode();
827 
828  ar.setNextName("accum");
829  ar.startNode();
830  ar.makeArray();
831  if(event_mode() == MODE::ACTIVITY)
832  {
833  ar(std::get<0>(accum));
834  }
835  else
836  {
837  ar(std::get<1>(accum));
838  }
839  ar.finishNode();
840  }

References tim::component::gpu_roofline< Types >::ACTIVITY, tim::component::gpu_roofline< Types >::event_mode(), tim::component::gpu_roofline< Types >::get(), tim::component::gpu_roofline< Types >::get_display(), tim::component::gpu_roofline< Types >::get_mode_string(), tim::component::gpu_roofline< Types >::get_type_string(), and tim::component::base< gpu_roofline< Types... >, std::tuple< cupti_activity::value_type, cupti_counters::value_type > >::laps.

◆ set_executor_callback()

template<typename... Types>
template<typename Tp , typename FuncT >
static void tim::component::gpu_roofline< Types >::set_executor_callback ( FuncT &&  f)
inlinestatic

Definition at line 317 of file gpu_roofline.hpp.

318  {
319  ert_executor_type<Tp>::get_callback() = std::forward<FuncT>(f);
320  }
static callback_type & get_callback()

References tim::ert::executor< DeviceT, Tp, CounterT >::get_callback().

◆ start()

template<typename... Types>
void tim::component::gpu_roofline< Types >::start ( )
inline

Definition at line 464 of file gpu_roofline.hpp.

465  {
466  switch(event_mode())
467  {
468  case MODE::ACTIVITY:
469  {
470  m_data.activity->start();
471  std::get<0>(value) = m_data.activity->get_value();
472  break;
473  }
474  case MODE::COUNTERS:
475  {
476  m_data.counters->start();
477  std::get<1>(value) = m_data.counters->get_value();
478  break;
479  }
480  }
481  }

References tim::component::gpu_roofline< Types >::ACTIVITY, tim::component::gpu_roofline< Types >::COUNTERS, and tim::component::gpu_roofline< Types >::event_mode().

◆ stop()

template<typename... Types>
void tim::component::gpu_roofline< Types >::stop ( )
inline

Definition at line 485 of file gpu_roofline.hpp.

486  {
487  using namespace tim::component::operators;
488  switch(event_mode())
489  {
490  case MODE::ACTIVITY:
491  {
492  m_data.activity->stop();
493  std::get<0>(accum) = m_data.activity->get_accum();
494  std::get<0>(value) = m_data.activity->get_value();
495  break;
496  }
497  case MODE::COUNTERS:
498  {
499  m_data.counters->stop();
500  std::get<1>(accum) = m_data.counters->get_accum();
501  std::get<1>(value) = m_data.counters->get_value();
502  break;
503  }
504  }
505  }

References tim::component::gpu_roofline< Types >::ACTIVITY, tim::component::gpu_roofline< Types >::COUNTERS, and tim::component::gpu_roofline< Types >::event_mode().

◆ thread_finalize()

template<typename... Types>
static void tim::component::gpu_roofline< Types >::thread_finalize ( )
inlinestatic

Definition at line 354 of file gpu_roofline.hpp.

354 {}

◆ thread_init()

template<typename... Types>
static void tim::component::gpu_roofline< Types >::thread_init ( )
inlinestatic

Definition at line 353 of file gpu_roofline.hpp.

353 {}

◆ unit()

Friends And Related Function Documentation

◆ impl::storage< this_type, trait::uses_value_storage< this_type, value_type >::value >

template<typename... Types>
friend class impl::storage< this_type, trait::uses_value_storage< this_type, value_type >::value >
friend

Definition at line 587 of file gpu_roofline.hpp.

◆ operation::record< this_type >

template<typename... Types>
friend struct operation::record< this_type >
friend

Definition at line 1 of file gpu_roofline.hpp.

◆ operation::set_started< this_type >

template<typename... Types>
friend struct operation::set_started< this_type >
friend

Definition at line 1 of file gpu_roofline.hpp.

◆ operation::set_stopped< this_type >

template<typename... Types>
friend struct operation::set_stopped< this_type >
friend

Definition at line 1 of file gpu_roofline.hpp.

◆ operation::start< this_type >

template<typename... Types>
friend struct operation::start< this_type >
friend

Definition at line 1 of file gpu_roofline.hpp.

◆ operation::stop< this_type >

template<typename... Types>
friend struct operation::stop< this_type >
friend

Definition at line 1 of file gpu_roofline.hpp.

◆ operator<<

template<typename... Types>
std::ostream& operator<< ( std::ostream &  os,
const this_type obj 
)
friend

Definition at line 651 of file gpu_roofline.hpp.

652  {
653  os << as_string(obj.get_display());
654  return os;
655  }

Member Data Documentation

◆ precision

template<typename... Types>
const short tim::component::gpu_roofline< Types >::precision = 3
static

Definition at line 119 of file gpu_roofline.hpp.

◆ width

template<typename... Types>
const short tim::component::gpu_roofline< Types >::width = 8
static

Definition at line 120 of file gpu_roofline.hpp.


The documentation for this struct was generated from the following file: