Records the time interval between two points in a CUDA stream. Less accurate than 'cupti_activity' for kernel timing but does not require linking to the CUDA driver.
More...
#include "timemory/components/cuda/components.hpp"
Records the time interval between two points in a CUDA stream. Less accurate than 'cupti_activity' for kernel timing but does not require linking to the CUDA driver.
Definition at line 64 of file components.hpp.
◆ tim::component::cuda_event::explicit_streams_only
struct tim::component::cuda_event::explicit_streams_only |
◆ base_type
◆ marker_list_t
◆ ratio_t
◆ storage_type
◆ value_type
◆ cuda_event()
tim::component::cuda_event::cuda_event |
( |
cuda::stream_t |
_stream | ) |
|
|
inlineexplicit |
◆ append_marker_list()
void tim::component::cuda_event::append_marker_list |
( |
const uint64_t |
nsize | ) |
|
|
inlineprotected |
Definition at line 209 of file components.hpp.
211 m_markers.reserve(m_markers.size() + nsize);
212 for(uint64_t i = 0; i < nsize; ++i)
213 m_markers.emplace_back(marker{});
Referenced by mark_begin().
◆ description()
static std::string tim::component::cuda_event::description |
( |
| ) |
|
|
inlinestatic |
Definition at line 111 of file components.hpp.
113 return "Records the time interval between two points in a CUDA stream. Less "
114 "accurate than 'cupti_activity' for kernel timing";
◆ get()
float tim::component::cuda_event::get |
( |
| ) |
const |
|
inlinenoexcept |
◆ get_batched_marker_size()
static uint64_t & tim::component::cuda_event::get_batched_marker_size |
( |
| ) |
|
|
inlinestatic |
◆ get_display()
float tim::component::cuda_event::get_display |
( |
| ) |
const |
|
inlinenoexcept |
◆ get_opaque()
template<typename... Args>
static opaque tim::component::empty_base::get_opaque |
( |
Args && |
... | ) |
|
|
inlinestaticinherited |
◆ get_stream()
auto tim::component::cuda_event::get_stream |
( |
| ) |
|
|
inline |
◆ label()
static std::string tim::component::cuda_event::label |
( |
| ) |
|
|
inlinestatic |
◆ mark_begin() [1/2]
void tim::component::cuda_event::mark_begin |
( |
| ) |
|
|
inline |
Definition at line 186 of file components.hpp.
188 m_markers_synced =
false;
189 m_current_marker = m_num_markers++;
190 if(m_current_marker >= m_markers.size())
192 m_markers[m_current_marker].start(m_stream);
void append_marker_list(const uint64_t nsize)
References append_marker_list().
◆ mark_begin() [2/2]
void tim::component::cuda_event::mark_begin |
( |
cuda::stream_t |
_stream | ) |
|
|
inline |
Definition at line 197 of file components.hpp.
199 m_markers_synced =
false;
200 m_current_marker = m_num_markers++;
201 if(m_current_marker >= m_markers.size())
203 m_markers[m_current_marker].start(_stream);
References append_marker_list().
◆ mark_end() [1/2]
void tim::component::cuda_event::mark_end |
( |
| ) |
|
|
inline |
Definition at line 195 of file components.hpp.
195{ m_markers[m_current_marker].stop(m_stream); }
◆ mark_end() [2/2]
void tim::component::cuda_event::mark_end |
( |
cuda::stream_t |
_stream | ) |
|
|
inline |
Definition at line 206 of file components.hpp.
206{ m_markers[m_current_marker].stop(_stream); }
◆ record()
static value_type tim::component::cuda_event::record |
( |
| ) |
|
|
inlinestatic |
◆ set_stream()
void tim::component::cuda_event::set_stream |
( |
cuda::stream_t |
_stream | ) |
|
|
inline |
◆ start()
void tim::component::cuda_event::start |
( |
| ) |
|
|
inline |
◆ stop()
void tim::component::cuda_event::stop |
( |
| ) |
|
|
inline |
◆ store()
◆ sync()
void tim::component::cuda_event::sync |
( |
| ) |
|
|
inline |
Definition at line 160 of file components.hpp.
162 if(m_current_marker == 0 && m_num_markers == 0)
166 float tmp = m_global.
sync();
167 m_global_synced =
true;
172 else if(m_current_marker > m_synced_markers)
175 for(uint64_t i = m_synced_markers; i < m_num_markers; ++i, ++m_synced_markers)
176 tmp += m_markers[i].
sync();
177 m_markers_synced =
true;
References tim::component::cuda_event::marker::sync(), and sync().
Referenced by stop(), and sync().
The documentation for this struct was generated from the following file: