timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
cupti_activity.hpp
Go to the documentation of this file.
1// MIT License
2//
3// Copyright (c) 2020, The Regents of the University of California,
4// through Lawrence Berkeley National Laboratory (subject to receipt of any
5// required approvals from the U.S. Dept. of Energy). All rights reserved.
6//
7// Permission is hereby granted, free of charge, to any person obtaining a copy
8// of this software and associated documentation files (the "Software"), to deal
9// in the Software without restriction, including without limitation the rights
10// to use, copy, modify, merge, publish, distribute, sublicense, and
11// copies of the Software, and to permit persons to whom the Software is
12// furnished to do so, subject to the following conditions:
13//
14// The above copyright notice and this permission notice shall be included in all
15// copies or substantial portions of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23// SOFTWARE.
24
25/** \file cupti.hpp
26 * \headerfile cupti_activity.hpp "timemory/cupti_activity.hpp"
27 * Provides implementation of CUPTI routines.
28 *
29 */
30
31#pragma once
32
34#include "timemory/components/cupti/backends.hpp"
37
38#include <functional>
39#include <numeric>
40#include <string>
41#include <vector>
42
43//======================================================================================//
44
45namespace tim
46{
47namespace component
48{
49//--------------------------------------------------------------------------------------//
50//
51// CUPTI activity tracing component (high-precisin kernel timers)
52//
53//--------------------------------------------------------------------------------------//
54/// \struct tim::component::cupti_activity
55/// \brief CUPTI activity tracing component for high-precision kernel timing. For
56/// low-precision kernel timing, use \ref tim::component::cuda_event component.
57///
58struct cupti_activity : public base<cupti_activity, intmax_t>
59{
60 // required aliases
61 using value_type = intmax_t;
64
65 // component-specific aliases
66 using ratio_t = std::nano;
67 using size_type = std::size_t;
69 using receiver_type = cupti::activity::receiver;
70 using kind_vector_type = std::vector<cupti::activity_kind_t>;
71 using get_initializer_t = std::function<kind_vector_type()>;
72 using kernel_elapsed_t = typename cupti::activity::receiver::named_elapsed_t;
73 using kernel_names_t = std::unordered_set<std::string>;
74
75 static std::string label() { return "cupti_activity"; }
77 {
78 return "Wall-clock execution timing for the CUDA API";
79 }
80
81 //----------------------------------------------------------------------------------//
82
84 {
85 static get_initializer_t _instance = []() -> kind_vector_type {
86 std::vector<cupti::activity_kind_t> _kinds;
88
89 /// look up integer codes in <timemory/backends/types/cupti.hpp>
91 for(const auto& itr : vec)
92 {
93 int iactivity = atoi(itr.c_str());
94 if(iactivity > static_cast<int>(CUPTI_ACTIVITY_KIND_INVALID) &&
95 iactivity < static_cast<int>(CUPTI_ACTIVITY_KIND_COUNT))
96 {
97 _kinds.push_back(static_cast<cupti::activity_kind_t>(iactivity));
98 }
99 }
100
101 // if found settings in environment, use those
102 if(!_kinds.empty())
103 {
104 return _kinds;
105 }
106 if(lvl == 0)
107 {
108 // general settings for kernels, runtime, overhead
109 _kinds = { CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL };
110 }
111 else if(lvl == 1)
112 {
113 // general settings for kernels, runtime, memory, overhead
114 _kinds = { { CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL,
115 CUPTI_ACTIVITY_KIND_MEMCPY, CUPTI_ACTIVITY_KIND_MEMSET } };
116 }
117 else if(lvl == 2)
118 {
119 // general settings for kernels, runtime, memory, overhead, and device
120 _kinds = { { CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL,
121 CUPTI_ACTIVITY_KIND_MEMCPY, CUPTI_ACTIVITY_KIND_MEMSET,
122 CUPTI_ACTIVITY_KIND_RUNTIME, CUPTI_ACTIVITY_KIND_DEVICE,
123 CUPTI_ACTIVITY_KIND_DRIVER, CUPTI_ACTIVITY_KIND_OVERHEAD } };
124 }
125 else if(lvl > 2)
126 {
127 // general settings for kernels, runtime, memory, overhead, device,
128 // stream, CDP kernels
129 _kinds = { { CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL,
130 CUPTI_ACTIVITY_KIND_MEMCPY, CUPTI_ACTIVITY_KIND_MEMSET,
131 CUPTI_ACTIVITY_KIND_RUNTIME, CUPTI_ACTIVITY_KIND_DEVICE,
132 CUPTI_ACTIVITY_KIND_DRIVER, CUPTI_ACTIVITY_KIND_OVERHEAD,
133 CUPTI_ACTIVITY_KIND_MARKER, CUPTI_ACTIVITY_KIND_STREAM,
134 CUPTI_ACTIVITY_KIND_CDP_KERNEL } };
135 }
136 return _kinds;
137 };
138 return _instance;
139 }
140
141 //----------------------------------------------------------------------------------//
142
144 {
145 static kind_vector_type _instance = get_initializer()();
146 return _instance;
147 }
148
149 //----------------------------------------------------------------------------------//
150
151 static void global_init()
152 {
153 static std::atomic<short> _once(0);
154 if(_once++ > 0)
155 return;
156 cupti::activity::initialize_trace(get_kind_types());
157 cupti::init_driver();
158 }
159
160 //----------------------------------------------------------------------------------//
161
162 static void global_finalize() { cupti::activity::finalize_trace(get_kind_types()); }
163
164 //----------------------------------------------------------------------------------//
165
166 static value_type record() { return cupti::activity::get_receiver().get(); }
167
168 //----------------------------------------------------------------------------------//
169
170public:
171 TIMEMORY_DEFAULT_OBJECT(cupti_activity)
172
173 // make sure it is removed
174 ~cupti_activity() { cupti::activity::get_receiver().remove(this); }
175
176 //----------------------------------------------------------------------------------//
177 // start
178 //
179 void start()
180 {
181 cupti::activity::start_trace(this, m_depth_change);
182 value = cupti::activity::get_receiver().get();
183 m_kernels_index = cupti::activity::get_receiver().get_named_index();
184 }
185
186 //----------------------------------------------------------------------------------//
187
188 void stop()
189 {
190 using namespace tim::component::operators;
191 cupti::activity::stop_trace(this);
192 auto tmp = cupti::activity::get_receiver().get();
193 auto kernels = cupti::activity::get_receiver().get_named(m_kernels_index, true);
194
195 accum += (tmp - value);
196 value = tmp;
197 for(const auto& itr : kernels)
198 m_kernels_accum[itr.first] += itr.second;
199 m_kernels_value = std::move(kernels);
200 }
201
202 //----------------------------------------------------------------------------------//
203
204 double get() const { return load() / static_cast<double>(base_type::get_unit()); }
205
206 double get_display() const { return get(); }
207
208 //----------------------------------------------------------------------------------//
209
210 kernel_elapsed_t get_secondary() const { return m_kernels_accum; }
211
212 void set_depth_change(bool v) { m_depth_change = v; }
213
214private:
215 bool m_depth_change = false;
216 uint64_t m_kernels_index = 0;
217 kernel_elapsed_t m_kernels_value;
218 kernel_elapsed_t m_kernels_accum;
219};
220
221} // namespace component
222} // namespace tim
Definition: kokkosp.cpp:39
cupti_activity_kinds
Definition: settings.cpp:1725
cupti_activity_level
Definition: settings.cpp:1723
tim::mpl::apply< std::string > string
Definition: macros.hpp:53
ContainerT delimit(const std::string &line, const std::string &delimiters="\"',;: ", PredicateT &&predicate=[](const std::string &s) -> std::string { return s;})
Definition: delimit.hpp:68
static int64_t get_unit()
CUPTI activity tracing component for high-precision kernel timing. For low-precision kernel timing,...
static get_initializer_t & get_initializer()
std::vector< cupti::activity_kind_t > kind_vector_type
static std::string description()
typename cupti::activity::receiver::named_elapsed_t kernel_elapsed_t
kernel_elapsed_t get_secondary() const
static kind_vector_type get_kind_types()
std::function< kind_vector_type()> get_initializer_t
cupti::activity::receiver receiver_type
std::unordered_set< std::string > kernel_names_t