34#include "timemory/components/cupti/backends.hpp"
78 return "Wall-clock execution timing for the CUDA API";
86 std::vector<cupti::activity_kind_t> _kinds;
91 for(
const auto& itr : vec)
93 int iactivity = atoi(itr.c_str());
94 if(iactivity >
static_cast<int>(CUPTI_ACTIVITY_KIND_INVALID) &&
95 iactivity <
static_cast<int>(CUPTI_ACTIVITY_KIND_COUNT))
97 _kinds.push_back(
static_cast<cupti::activity_kind_t
>(iactivity));
109 _kinds = { CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL };
114 _kinds = { { CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL,
115 CUPTI_ACTIVITY_KIND_MEMCPY, CUPTI_ACTIVITY_KIND_MEMSET } };
120 _kinds = { { CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL,
121 CUPTI_ACTIVITY_KIND_MEMCPY, CUPTI_ACTIVITY_KIND_MEMSET,
122 CUPTI_ACTIVITY_KIND_RUNTIME, CUPTI_ACTIVITY_KIND_DEVICE,
123 CUPTI_ACTIVITY_KIND_DRIVER, CUPTI_ACTIVITY_KIND_OVERHEAD } };
129 _kinds = { { CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL,
130 CUPTI_ACTIVITY_KIND_MEMCPY, CUPTI_ACTIVITY_KIND_MEMSET,
131 CUPTI_ACTIVITY_KIND_RUNTIME, CUPTI_ACTIVITY_KIND_DEVICE,
132 CUPTI_ACTIVITY_KIND_DRIVER, CUPTI_ACTIVITY_KIND_OVERHEAD,
133 CUPTI_ACTIVITY_KIND_MARKER, CUPTI_ACTIVITY_KIND_STREAM,
134 CUPTI_ACTIVITY_KIND_CDP_KERNEL } };
153 static std::atomic<short> _once(0);
157 cupti::init_driver();
181 cupti::activity::start_trace(
this, m_depth_change);
182 value = cupti::activity::get_receiver().get();
183 m_kernels_index = cupti::activity::get_receiver().get_named_index();
191 cupti::activity::stop_trace(
this);
192 auto tmp = cupti::activity::get_receiver().get();
193 auto kernels = cupti::activity::get_receiver().get_named(m_kernels_index,
true);
195 accum += (tmp - value);
197 for(
const auto& itr : kernels)
198 m_kernels_accum[itr.first] += itr.second;
199 m_kernels_value = std::move(kernels);
215 bool m_depth_change =
false;
216 uint64_t m_kernels_index = 0;
tim::mpl::apply< std::string > string
ContainerT delimit(const std::string &line, const std::string &delimiters="\"',;: ", PredicateT &&predicate=[](const std::string &s) -> std::string { return s;})
static int64_t get_unit()
CUPTI activity tracing component for high-precision kernel timing. For low-precision kernel timing,...
static void global_init()
static get_initializer_t & get_initializer()
void set_depth_change(bool v)
static value_type record()
std::vector< cupti::activity_kind_t > kind_vector_type
static std::string description()
typename cupti::activity::receiver::named_elapsed_t kernel_elapsed_t
double get_display() const
kernel_elapsed_t get_secondary() const
static kind_vector_type get_kind_types()
std::function< kind_vector_type()> get_initializer_t
cupti::activity::receiver receiver_type
std::unordered_set< std::string > kernel_names_t
static void global_finalize()
static std::string label()