32#include "timemory/backends/device.hpp"
34#include "timemory/components/hip/backends.hpp"
43#if defined(TIMEMORY_PYBIND11_SOURCE)
44# include "pybind11/cast.h"
45# include "pybind11/pybind11.h"
46# include "pybind11/stl.h"
71 hip::event_t
first = hip::event_t{};
72 hip::event_t
second = hip::event_t{};
101 return hip::event_elapsed_time(
first,
second) * units::msec;
112 return "Records the time interval between two points in a HIP stream. Less "
113 "accurate than 'roctracer' for kernel timing";
141 if(!m_explicit_only || m_stream != hip::default_stream_v)
143 m_global_synced =
false;
144 m_global.
start(m_stream);
150 for(uint64_t i = 0; i < m_num_markers; ++i)
151 m_markers[i].
stop(m_stream);
152 if(m_current_marker == 0 && m_num_markers == 0)
153 m_global.
stop(m_stream);
159 if(m_current_marker == 0 && m_num_markers == 0)
163 float tmp = m_global.
sync();
164 m_global_synced =
true;
169 else if(m_current_marker > m_synced_markers)
172 for(uint64_t i = m_synced_markers; i < m_num_markers; ++i, ++m_synced_markers)
173 tmp += m_markers[i].
sync();
174 m_markers_synced =
true;
180 void set_stream(hip::stream_t _stream) { m_stream = _stream; }
185 m_markers_synced =
false;
186 m_current_marker = m_num_markers++;
187 if(m_current_marker >= m_markers.size())
189 m_markers[m_current_marker].start(m_stream);
192 void mark_end() { m_markers[m_current_marker].stop(m_stream); }
196 m_markers_synced =
false;
197 m_current_marker = m_num_markers++;
198 if(m_current_marker >= m_markers.size())
200 m_markers[m_current_marker].start(_stream);
203 void mark_end(hip::stream_t _stream) { m_markers[m_current_marker].stop(_stream); }
208 m_markers.reserve(m_markers.size() + nsize);
209 for(uint64_t i = 0; i < nsize; ++i)
210 m_markers.emplace_back(
marker{});
214 bool m_global_synced =
false;
215 bool m_markers_synced =
false;
216 bool m_explicit_only =
false;
217 uint64_t m_synced_markers = 0;
218 uint64_t m_current_marker = 0;
219 uint64_t m_num_markers = 0;
221 hip::stream_t m_stream = hip::default_stream_v;
222 marker m_global = {};
226#if defined(TIMEMORY_PYBIND11_SOURCE)
235 template <
template <
typename...>
class BundleT>
236 static void configure(project::python, pybind11::class_<BundleT<hip_event>>& _pyclass)
238 auto _sync = [](BundleT<hip_event>* obj) {
239 obj->template get<hip_event>()->sync();
241 _pyclass.def(
"sync", _sync,
"Synchronize the event (blocking)");
261 return "Generates high-level region markers for HIP profilers";
279 void start() { m_range_id = roctx::range_start(m_prefix); }
293 hip::stream_sync(m_stream);
295 roctx::range_stop(m_range_id);
314 get_stream_id(_stream)));
321 get_stream_id(_stream)));
325 void set_stream(hip::stream_t _stream) { m_stream = _stream; }
333 static int32_t get_stream_id(hip::stream_t _stream)
335 using pair_t = std::pair<hip::stream_t, int32_t>;
336 using map_t = std::map<hip::stream_t, int32_t>;
337 using map_ptr_t = std::unique_ptr<map_t>;
339 static thread_local map_ptr_t _instance = std::make_unique<map_t>();
340 if(_instance->find(_stream) == _instance->end())
341 _instance->insert(pair_t(_stream, _instance->size()));
342 return _instance->find(_stream)->second;
346 roctx::range_id_t m_range_id = 0;
347 hip::stream_t m_stream = 0;
348 const char* m_prefix =
nullptr;
351#if defined(TIMEMORY_PYBIND11_SOURCE)
360 template <
template <
typename...>
class BundleT>
362 pybind11::class_<BundleT<roctx_marker>>& _pyclass)
364 _pyclass.def_property_static(
367 "Configure CudaEvent to use hipSynchronize() vs. hipStreamSychronize(...)");
Declare the hip component types.
void mark(TupleT< Tp... > &obj, Args &&... args)
char const std::string & _prefix
tim::mpl::apply< std::string > string
static int64_t get_unit()
static void configure(Args &&...)
void stop(hip::stream_t &stream)
void start(hip::stream_t &stream)
Records the time interval between two points in a HIP stream. Less accurate than 'cupti_activity' for...
void set_stream(hip::stream_t _stream)
std::vector< marker > marker_list_t
static uint64_t & get_batched_marker_size()
static std::string description()
void mark_end(hip::stream_t _stream)
void append_marker_list(const uint64_t nsize)
void mark_begin(hip::stream_t _stream)
static std::string label()
static value_type record()
float get_display() const
void store(explicit_streams_only, bool _v)
Inserts ROCTX markers with the current timemory prefix.
void mark_begin(hip::stream_t _stream)
asynchronously add a marker for a specific stream. Equivalent to roctxMarkA
void mark_begin()
asynchronously add a marker. Equivalent to roctxMarkA
void stop()
stop the roctx range. Equivalent to roctxRangeEnd. Depending on settings::roctx_marker_device_sync() ...
void mark_end()
asynchronously add a marker. Equivalent to roctxMarkA
void set_stream(hip::stream_t _stream)
set the current HIP stream
void set_prefix(const char *_prefix)
set the label
roctx_marker(hip::stream_t _stream)
construct with an specific HIP stream
void start()
start an roctx range. Equivalent to roctxRangeStartEx
static bool & use_device_sync()
static std::string description()
static std::string label()
void mark_end(hip::stream_t _stream)
asynchronously add a marker for a specific stream. Equivalent to roctxMarkA
static value_type record()
#define TIMEMORY_JOIN(delim,...)