timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
tim::component::cuda_event Struct Reference

Records the time interval between two points in a CUDA stream. Less accurate than 'cupti_activity' for kernel timing but does not require linking to the CUDA driver. More...

#include "timemory/components/cuda/components.hpp"

+ Collaboration diagram for tim::component::cuda_event:

Classes

struct  explicit_streams_only
 
struct  marker
 

Public Types

using ratio_t = std::milli
 
using value_type = float
 
using base_type = base< cuda_event, value_type >
 
using marker_list_t = std::vector< marker >
 
using storage_type = empty_storage
 

Public Member Functions

 cuda_event (cuda::stream_t _stream)
 
float get () const noexcept
 
float get_display () const noexcept
 
void store (explicit_streams_only, bool _v)
 
void start ()
 
void stop ()
 
void sync ()
 
void set_stream (cuda::stream_t _stream)
 
auto get_stream ()
 
void mark_begin ()
 
void mark_end ()
 
void mark_begin (cuda::stream_t _stream)
 
void mark_end (cuda::stream_t _stream)
 

Static Public Member Functions

static std::string label ()
 
static std::string description ()
 
static value_type record ()
 
static uint64_t & get_batched_marker_size ()
 
template<typename... Args>
static opaque get_opaque (Args &&...)
 

Protected Member Functions

void append_marker_list (const uint64_t nsize)
 

Detailed Description

Records the time interval between two points in a CUDA stream. Less accurate than 'cupti_activity' for kernel timing but does not require linking to the CUDA driver.

Definition at line 64 of file components.hpp.


Class Documentation

◆ tim::component::cuda_event::explicit_streams_only

struct tim::component::cuda_event::explicit_streams_only

Definition at line 124 of file components.hpp.

+ Collaboration diagram for tim::component::cuda_event::explicit_streams_only:

Member Typedef Documentation

◆ base_type

◆ marker_list_t

Definition at line 108 of file components.hpp.

◆ ratio_t

Definition at line 105 of file components.hpp.

◆ storage_type

Definition at line 66 of file declaration.hpp.

◆ value_type

Definition at line 106 of file components.hpp.

Constructor & Destructor Documentation

◆ cuda_event()

tim::component::cuda_event::cuda_event ( cuda::stream_t  _stream)
inlineexplicit

Definition at line 130 of file components.hpp.

131 : m_stream(_stream)
132 {}

Member Function Documentation

◆ append_marker_list()

void tim::component::cuda_event::append_marker_list ( const uint64_t  nsize)
inlineprotected

Definition at line 209 of file components.hpp.

210 {
211 m_markers.reserve(m_markers.size() + nsize);
212 for(uint64_t i = 0; i < nsize; ++i)
213 m_markers.emplace_back(marker{});
214 }

Referenced by mark_begin().

◆ description()

static std::string tim::component::cuda_event::description ( )
inlinestatic

Definition at line 111 of file components.hpp.

112 {
113 return "Records the time interval between two points in a CUDA stream. Less "
114 "accurate than 'cupti_activity' for kernel timing";
115 }

◆ get()

float tim::component::cuda_event::get ( ) const
inlinenoexcept

Definition at line 134 of file components.hpp.

135 {
136 return load() / static_cast<float>(base_type::get_unit());
137 }
static int64_t get_unit()

References tim::component::base< Tp, Value >::get_unit(), and tim::component::base< cuda_event, float >::load().

Referenced by get_display().

◆ get_batched_marker_size()

static uint64_t & tim::component::cuda_event::get_batched_marker_size ( )
inlinestatic

Definition at line 118 of file components.hpp.

119 {
120 static uint64_t _instance = settings::cuda_event_batch_size();
121 return _instance;
122 }
cuda_event_batch_size
Definition: settings.cpp:1719

References tim::cuda_event_batch_size.

◆ get_display()

float tim::component::cuda_event::get_display ( ) const
inlinenoexcept

Definition at line 138 of file components.hpp.

138{ return get(); }
float get() const noexcept
Definition: components.hpp:134

References get().

◆ get_opaque()

template<typename... Args>
static opaque tim::component::empty_base::get_opaque ( Args &&  ...)
inlinestaticinherited

Definition at line 72 of file declaration.hpp.

73 {
74 return opaque{};
75 }

◆ get_stream()

auto tim::component::cuda_event::get_stream ( )
inline

Definition at line 184 of file components.hpp.

184{ return m_stream; }

◆ label()

static std::string tim::component::cuda_event::label ( )
inlinestatic

Definition at line 110 of file components.hpp.

110{ return "cuda_event"; }

◆ mark_begin() [1/2]

void tim::component::cuda_event::mark_begin ( )
inline

Definition at line 186 of file components.hpp.

187 {
188 m_markers_synced = false;
189 m_current_marker = m_num_markers++;
190 if(m_current_marker >= m_markers.size())
191 append_marker_list(std::max<uint64_t>(m_marker_batch_size, 1));
192 m_markers[m_current_marker].start(m_stream);
193 }
void append_marker_list(const uint64_t nsize)
Definition: components.hpp:209

References append_marker_list().

◆ mark_begin() [2/2]

void tim::component::cuda_event::mark_begin ( cuda::stream_t  _stream)
inline

Definition at line 197 of file components.hpp.

198 {
199 m_markers_synced = false;
200 m_current_marker = m_num_markers++;
201 if(m_current_marker >= m_markers.size())
202 append_marker_list(std::max<uint64_t>(m_marker_batch_size, 1));
203 m_markers[m_current_marker].start(_stream);
204 }

References append_marker_list().

◆ mark_end() [1/2]

void tim::component::cuda_event::mark_end ( )
inline

Definition at line 195 of file components.hpp.

195{ m_markers[m_current_marker].stop(m_stream); }

◆ mark_end() [2/2]

void tim::component::cuda_event::mark_end ( cuda::stream_t  _stream)
inline

Definition at line 206 of file components.hpp.

206{ m_markers[m_current_marker].stop(_stream); }

◆ record()

static value_type tim::component::cuda_event::record ( )
inlinestatic

Definition at line 116 of file components.hpp.

116{ return 0.0f; }

◆ set_stream()

void tim::component::cuda_event::set_stream ( cuda::stream_t  _stream)
inline

Definition at line 183 of file components.hpp.

183{ m_stream = _stream; }

◆ start()

void tim::component::cuda_event::start ( )
inline

Definition at line 142 of file components.hpp.

143 {
144 if(!m_explicit_only || m_stream != cuda::default_stream_v)
145 {
146 m_global_synced = false;
147 m_global.start(m_stream);
148 }
149 }
void start(cuda::stream_t &stream)
Definition: components.hpp:77

References tim::component::cuda_event::marker::start().

◆ stop()

void tim::component::cuda_event::stop ( )
inline

Definition at line 151 of file components.hpp.

152 {
153 for(uint64_t i = 0; i < m_num_markers; ++i)
154 m_markers[i].stop(m_stream);
155 if(m_current_marker == 0 && m_num_markers == 0)
156 m_global.stop(m_stream);
157 sync();
158 }
void stop(cuda::stream_t &stream)
Definition: components.hpp:86

References stop(), tim::component::cuda_event::marker::stop(), and sync().

Referenced by stop().

◆ store()

void tim::component::cuda_event::store ( explicit_streams_only  ,
bool  _v 
)
inline

Definition at line 140 of file components.hpp.

140{ m_explicit_only = _v; }

◆ sync()

void tim::component::cuda_event::sync ( )
inline

Definition at line 160 of file components.hpp.

161 {
162 if(m_current_marker == 0 && m_num_markers == 0)
163 {
164 if(!m_global_synced)
165 {
166 float tmp = m_global.sync();
167 m_global_synced = true;
168 accum += tmp;
169 value = tmp;
170 }
171 }
172 else if(m_current_marker > m_synced_markers)
173 {
174 float tmp = 0.0;
175 for(uint64_t i = m_synced_markers; i < m_num_markers; ++i, ++m_synced_markers)
176 tmp += m_markers[i].sync();
177 m_markers_synced = true;
178 accum += tmp;
179 value = tmp;
180 }
181 }

References tim::component::cuda_event::marker::sync(), and sync().

Referenced by stop(), and sync().


The documentation for this struct was generated from the following file: