timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
backends.cpp
Go to the documentation of this file.
1// MIT License
2//
3// Copyright (c) 2020, The Regents of the University of California,
4// through Lawrence Berkeley National Laboratory (subject to receipt of any
5// required approvals from the U.S. Dept. of Energy). All rights reserved.
6//
7// Permission is hereby granted, free of charge, to any person obtaining a copy
8// of this software and associated documentation files (the "Software"), to deal
9// in the Software without restriction, including without limitation the rights
10// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11// copies of the Software, and to permit persons to whom the Software is
12// furnished to do so, subject to the following conditions:
13//
14// The above copyright notice and this permission notice shall be included in all
15// copies or substantial portions of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23// SOFTWARE.
24
26
27#if !defined(TIMEMORY_CUPTI_HEADER_MODE)
28# include "timemory/components/cupti/backends.hpp"
29#endif
30
31#if defined(TIMEMORY_USE_CUPTI)
32# include "timemory/backends/cupti.hpp"
33
34# include <cuda.h>
35# include <cupti.h>
36# if defined(TIMEMORY_USE_CUPTI_PCSAMPLING)
37# include <cupti_pcsampling.h>
38# endif
39#endif
40
41namespace tim
42{
43namespace cupti
44{
45//
47pcdata::operator+=(const pcdata& rhs)
48{
49 assert(rangeId == rhs.rangeId);
50 totalNumPcs = std::max(totalNumPcs, rhs.totalNumPcs);
51 remainingNumPcs = std::min(remainingNumPcs, rhs.remainingNumPcs);
52 for(const auto& itr : rhs.samples)
53 append(itr);
54 return *this;
55}
56//
58pcdata::operator+=(pcdata&& rhs)
59{
60 assert(rangeId == rhs.rangeId);
61 totalNumPcs = std::max(totalNumPcs, rhs.totalNumPcs);
62 remainingNumPcs = std::min(remainingNumPcs, rhs.remainingNumPcs);
63 for(auto&& itr : rhs.samples)
64 append(std::move(itr));
65 return *this;
66}
67//
69pcdata::operator-=(const pcdata& rhs)
70{
71 assert(rangeId == rhs.rangeId);
72 for(const auto& ritr : rhs.samples)
73 {
74 for(auto& itr : samples)
75 {
76 if(ritr == itr) // match found
77 {
78 itr -= ritr;
79 break;
80 }
81 }
82 }
83 return *this;
84}
85//
87pcdata::append(const pcsample& _sample)
88{
89 for(auto& itr : samples)
90 {
91 if(_sample == itr) // match found
92 {
93 itr += _sample;
94 return false;
95 }
96 }
97 samples.insert(_sample);
98 return true;
99}
100//
102pcdata::append(pcsample&& _sample)
103{
104 for(auto& itr : samples)
105 {
106 if(_sample == itr) // match found
107 {
108 itr += _sample;
109 return false;
110 }
111 }
112 samples.insert(std::move(_sample));
113 return true;
114}
115//
116//--------------------------------------------------------------------------------------//
117//
119pcsample::pcsample()
120{
121 for(size_t i = 0; i < stalls.size(); ++i)
122 stalls[i].index = i;
123}
124//---------------------------------------//
125#if defined(TIMEMORY_USE_CUPTI_PCSAMPLING)
126//---------------------------------------//
128pcsample::pcsample(const CUpti_PCSamplingPCData_t& _pcdata)
129: cubinCrc(_pcdata.cubinCrc)
130, pcOffset(_pcdata.pcOffset)
131, functionIndex(_pcdata.functionIndex)
132, functionName(_pcdata.functionName)
133{
134 for(size_t i = 0; i < stalls.size(); ++i)
135 stalls[i].index = i;
136 for(size_t i = 0; i < _pcdata.stallReasonCount; ++i)
137 {
138 const auto& _stall = _pcdata.stallReason[i];
139 auto ridx = _stall.pcSamplingStallReasonIndex;
140 stalls[ridx] = std::move(pcstall{ _stall });
141 }
142
143 for(auto& itr : stalls)
144 totalSamples += itr.samples;
145}
146//---------------------------------------//
147#else
148//---------------------------------------//
150pcsample::pcsample(const CUpti_PCSamplingPCData_t&) {}
151//---------------------------------------//
152#endif
153//
154TIMEMORY_CUPTI_INLINE const pcsample&
155pcsample::operator+=(const pcsample& rhs) const
156{
157 for(int32_t i = 0; i < stall_reasons_size; ++i)
158 stalls[i] += rhs.stalls[i];
159 for(int32_t i = 0; i < stall_reasons_size; ++i)
160 totalSamples += rhs.stalls[i].samples;
161 return *this;
162}
163//
164TIMEMORY_CUPTI_INLINE const pcsample&
165pcsample::operator-=(const pcsample& rhs) const
166{
167 for(int32_t i = 0; i < stall_reasons_size; ++i)
168 stalls[i] -= rhs.stalls[i];
169 for(int32_t i = 0; i < stall_reasons_size; ++i)
170 totalSamples -= rhs.stalls[i].samples;
171 return *this;
172}
173//
175pcsample::operator==(const pcsample& rhs) const
176{
177 return std::tie(cubinCrc, pcOffset, functionIndex) ==
178 std::tie(rhs.cubinCrc, rhs.pcOffset, rhs.functionIndex);
179}
180//
182pcsample::operator<(const pcsample& rhs) const
183{
184 return (cubinCrc < rhs.cubinCrc) || (pcOffset < rhs.pcOffset) ||
185 (functionIndex < rhs.functionIndex);
186}
187//
189pcsample::operator<=(const pcsample& rhs) const
190{
191 return (*this == rhs) || (*this < rhs);
192}
193//
195 pcsample::name() const
196{
197#if defined(TIMEMORY_USE_CUPTI_PCSAMPLING)
198 // involves a look-up so cache this result
199 static auto _per_line = settings::instance()->get<bool>("cupti_pcsampling_per_line");
200
201 if(_per_line)
202 {
203 static uomap_t<uint32_t, uomap_t<uint32_t, uomap_t<uint64_t, std::string>>>
204 _sass2src{};
205 auto itr = _sass2src[functionIndex][pcOffset].find(cubinCrc);
206 if(itr == _sass2src[functionIndex][pcOffset].end())
207 {
208 CUpti_GetSassToSourceCorrelationParams sassToSourceParams = {};
209 sassToSourceParams.size = sizeof(CUpti_GetSassToSourceCorrelationParams);
210 sassToSourceParams.cubin = std::get<0>(get_cubin_map().at(cubinCrc));
211 sassToSourceParams.cubinSize = std::get<1>(get_cubin_map().at(cubinCrc));
212 sassToSourceParams.functionName = functionName;
213 sassToSourceParams.pcOffset = pcOffset;
214 TIMEMORY_CUPTI_API_CALL(cuptiGetSassToSourceCorrelation(&sassToSourceParams));
215 if(sassToSourceParams.fileName)
216 {
217 auto _fname = string_view_t{ sassToSourceParams.fileName };
218 auto _line = sassToSourceParams.lineNumber;
219 _sass2src[functionIndex][pcOffset][cubinCrc] =
220 TIMEMORY_JOIN("", demangle(functionName), '/', _fname, ':', _line);
221 free(sassToSourceParams.fileName);
222 free(sassToSourceParams.dirName);
223 }
224 else
225 {
226 _sass2src[functionIndex][pcOffset][cubinCrc] = functionName;
227 }
228 itr = _sass2src[functionIndex][pcOffset].find(cubinCrc);
229 }
230 return itr->second;
231 }
232#endif
233
234 return functionName;
235}
236//
237//--------------------------------------------------------------------------------------//
238//
239//---------------------------------------//
240#if defined(TIMEMORY_USE_CUPTI_PCSAMPLING)
241//---------------------------------------//
243pcstall::pcstall(const CUpti_PCSamplingStallReason_t& _obj)
244: index(_obj.pcSamplingStallReasonIndex)
245, samples(_obj.samples)
246{}
247//---------------------------------------//
248#else
249//---------------------------------------//
251pcstall::pcstall(const CUpti_PCSamplingStallReason_t&) {}
252//---------------------------------------//
253#endif
254//
256pcstall::pcstall(uint32_t _index, uint32_t _samples)
257: index(_index)
258, samples(_samples)
259{}
260//
262pcstall::operator+=(const pcstall& rhs)
263{
264 samples += rhs.samples;
265 return *this;
266}
267//
269pcstall::operator-=(const pcstall& rhs)
270{
271 samples -= rhs.samples;
272 return *this;
273}
274//
275TIMEMORY_CUPTI_INLINE const char*
276pcstall::name(uint32_t _index)
277{
278 if(_index >= get_size())
279 return "<unknown>";
280 for(uint32_t i = 0; i < get_size(); ++i)
281 {
282 auto _idx = get_index_array()[i];
283 if(_idx == _index)
284 return get_name_array()[i];
285 }
286 return "<unknown>";
287}
288//
290pcstall::enabled(uint32_t _index)
291{
292 if(_index >= get_size())
293 return false;
294 for(uint32_t i = 0; i < get_size(); ++i)
295 {
296 auto _idx = get_index_array()[i];
297 if(_idx == _index)
298 return get_bool_array()[i];
299 }
300 return false;
301}
302
303} // namespace cupti
304} // namespace tim
#define TIMEMORY_CUPTI_INLINE
Definition: types.hpp:34
::tim::statistics< Tp > max(::tim::statistics< Tp > lhs, const Tp &rhs)
Definition: statistics.hpp:320
::tim::statistics< Tp > min(::tim::statistics< Tp > lhs, const Tp &rhs)
Definition: statistics.hpp:329
::tim::statistics< tuple<> > & operator+=(::tim::statistics< tuple<> > &_lhs, const Tp &)
Definition: statistics.hpp:338
return _hash_map end()
constexpr auto get_size(const Tp &, std::tuple<>) -> size_t
Definition: types.hpp:877
std::array< Tp, N > & operator-=(std::array< Tp, N > &, const std::array< Tp, N > &)
Definition: stl.hpp:96
Definition: kokkosp.cpp:39
std::string string_view_t
Definition: language.hpp:102
std::string demangle(const char *_mangled_name, int *_status=nullptr)
Definition: demangle.hpp:47
tim::mpl::apply< std::string > string
Definition: macros.hpp:53
_args at(0)
Tp get(Sp &&_key, bool _exact=true)
Definition: settings.hpp:674
static settings * instance()
Definition: settings.hpp:536
#define TIMEMORY_JOIN(delim,...)
Definition: macros.hpp:90