timemory  3.2.1
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
tim::component::cuda_event Struct Reference

Records the time interval between two points in a CUDA stream. Less accurate than 'cupti_activity' for kernel timing but does not require linking to the CUDA driver. More...

#include "timemory/components/cuda/components.hpp"

+ Collaboration diagram for tim::component::cuda_event:

Classes

struct  marker
 

Public Types

using ratio_t = std::milli
 
using value_type = float
 
using base_type = base< cuda_event, value_type >
 
using marker_list_t = std::vector< marker >
 

Public Member Functions

 cuda_event (cuda::stream_t _stream)
 
float get_display () const
 
float get () const
 
void start ()
 
void stop ()
 
void sync ()
 
void set_stream (cuda::stream_t _stream)
 
auto get_stream ()
 
void mark_begin ()
 
void mark_end ()
 
void mark_begin (cuda::stream_t _stream)
 
void mark_end (cuda::stream_t _stream)
 

Static Public Member Functions

static std::string label ()
 
static std::string description ()
 
static value_type record ()
 
static uint64_t & get_batched_marker_size ()
 

Protected Member Functions

void append_marker_list (const uint64_t nsize)
 

Detailed Description

Records the time interval between two points in a CUDA stream. Less accurate than 'cupti_activity' for kernel timing but does not require linking to the CUDA driver.

Definition at line 64 of file components.hpp.

Member Typedef Documentation

◆ base_type

◆ marker_list_t

Definition at line 108 of file components.hpp.

◆ ratio_t

Definition at line 105 of file components.hpp.

◆ value_type

Definition at line 106 of file components.hpp.

Constructor & Destructor Documentation

◆ cuda_event()

tim::component::cuda_event::cuda_event ( cuda::stream_t  _stream)
inlineexplicit

Definition at line 127 of file components.hpp.

128  : m_stream(_stream)
129 
130  {}

Member Function Documentation

◆ append_marker_list()

void tim::component::cuda_event::append_marker_list ( const uint64_t  nsize)
inlineprotected

Definition at line 219 of file components.hpp.

220  {
221  for(uint64_t i = 0; i < nsize; ++i)
222  m_markers.emplace_back(marker());
223  }

Referenced by mark_begin().

◆ description()

static std::string tim::component::cuda_event::description ( )
inlinestatic

Definition at line 111 of file components.hpp.

112  {
113  return "Records the time interval between two points in a CUDA stream. Less "
114  "accurate than 'cupti_activity' for kernel timing";
115  }

◆ get()

float tim::component::cuda_event::get ( ) const
inline

Definition at line 144 of file components.hpp.

145  {
146  return static_cast<float>(load() / static_cast<float>(ratio_t::den) *
148  }
static int64_t get_unit()

References tim::component::base< Tp, Value >::get_unit(), and tim::component::base< cuda_event, float >::load().

◆ get_batched_marker_size()

static uint64_t& tim::component::cuda_event::get_batched_marker_size ( )
inlinestatic

Definition at line 118 of file components.hpp.

119  {
120  static uint64_t _instance = settings::cuda_event_batch_size();
121  return _instance;
122  }
cuda_event_batch_size
Definition: settings.cpp:1413

References tim::cuda_event_batch_size.

◆ get_display()

float tim::component::cuda_event::get_display ( ) const
inline

Definition at line 138 of file components.hpp.

139  {
140  return static_cast<float>(load() / static_cast<float>(ratio_t::den) *
142  }

References tim::component::base< Tp, Value >::get_unit(), and tim::component::base< cuda_event, float >::load().

◆ get_stream()

auto tim::component::cuda_event::get_stream ( )
inline

Definition at line 189 of file components.hpp.

189 { return m_stream; }

◆ label()

static std::string tim::component::cuda_event::label ( )
inlinestatic

Definition at line 110 of file components.hpp.

110 { return "cuda_event"; }

◆ mark_begin() [1/2]

void tim::component::cuda_event::mark_begin ( )
inline

Definition at line 191 of file components.hpp.

192  {
193  m_markers_synced = false;
194  m_current_marker = m_num_markers++;
195  if(m_current_marker >= m_markers.size())
196  append_marker_list(std::max<uint64_t>(m_marker_batch_size, 1));
197  m_markers[m_current_marker].start(m_stream);
198  }
void append_marker_list(const uint64_t nsize)
Definition: components.hpp:219

References append_marker_list().

◆ mark_begin() [2/2]

void tim::component::cuda_event::mark_begin ( cuda::stream_t  _stream)
inline

Definition at line 202 of file components.hpp.

203  {
204  m_markers_synced = false;
205  m_current_marker = m_num_markers++;
206  if(m_current_marker >= m_markers.size())
207  append_marker_list(std::max<uint64_t>(m_marker_batch_size, 1));
208  m_markers[m_current_marker].start(_stream);
209  }

References append_marker_list().

◆ mark_end() [1/2]

void tim::component::cuda_event::mark_end ( )
inline

Definition at line 200 of file components.hpp.

200 { m_markers[m_current_marker].stop(m_stream); }

◆ mark_end() [2/2]

void tim::component::cuda_event::mark_end ( cuda::stream_t  _stream)
inline

Definition at line 211 of file components.hpp.

211 { m_markers[m_current_marker].stop(_stream); }

◆ record()

static value_type tim::component::cuda_event::record ( )
inlinestatic

Definition at line 116 of file components.hpp.

116 { return 0.0f; }

◆ set_stream()

void tim::component::cuda_event::set_stream ( cuda::stream_t  _stream)
inline

Definition at line 188 of file components.hpp.

188 { m_stream = _stream; }

◆ start()

void tim::component::cuda_event::start ( )
inline

Definition at line 150 of file components.hpp.

151  {
152  m_global_synced = false;
153  m_global.start(m_stream);
154  }
void start(cuda::stream_t &stream)
Definition: components.hpp:77

References tim::component::cuda_event::marker::start().

◆ stop()

void tim::component::cuda_event::stop ( )
inline

Definition at line 156 of file components.hpp.

157  {
158  for(uint64_t i = 0; i < m_num_markers; ++i)
159  m_markers[i].stop(m_stream);
160  if(m_current_marker == 0 && m_num_markers == 0)
161  m_global.stop(m_stream);
162  sync();
163  }
void stop(cuda::stream_t &stream)
Definition: components.hpp:86

References tim::component::cuda_event::marker::stop(), and sync().

◆ sync()

void tim::component::cuda_event::sync ( )
inline

Definition at line 165 of file components.hpp.

166  {
167  if(m_current_marker == 0 && m_num_markers == 0)
168  {
169  if(!m_global_synced)
170  {
171  float tmp = m_global.sync();
172  m_global_synced = true;
173  accum += tmp;
174  value = tmp;
175  }
176  }
177  else if(m_current_marker > m_synced_markers)
178  {
179  float tmp = 0.0;
180  for(uint64_t i = m_synced_markers; i < m_num_markers; ++i, ++m_synced_markers)
181  tmp += m_markers[i].sync();
182  m_markers_synced = true;
183  accum += tmp;
184  value = tmp;
185  }
186  }

References tim::component::cuda_event::marker::sync().

Referenced by stop().


The documentation for this struct was generated from the following file: