timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
cupti_pcsampling.hpp
Go to the documentation of this file.
1// MIT License
2//
3// Copyright (c) 2020, The Regents of the University of California,
4// through Lawrence Berkeley National Laboratory (subject to receipt of any
5// required approvals from the U.S. Dept. of Energy). All rights reserved.
6//
7// Permission is hereby granted, free of charge, to any person obtaining a copy
8// of this software and associated documentation files (the "Software"), to deal
9// in the Software without restriction, including without limitation the rights
10// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11// copies of the Software, and to permit persons to whom the Software is
12// furnished to do so, subject to the following conditions:
13//
14// The above copyright notice and this permission notice shall be included in all
15// copies or substantial portions of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23// SOFTWARE.
24
25#pragma once
26
28#include "timemory/components/cupti/backends.hpp"
30#include "timemory/macros.hpp"
33
34#if defined(TIMEMORY_USE_CUPTI)
35# include "timemory/backends/cupti.hpp"
36
37# include <cuda.h>
38# include <cupti.h>
39# if defined(TIMEMORY_USE_CUPTI_PCSAMPLING)
40# include <cupti_pcsampling.h>
41# endif
42#endif
43
44namespace tim
45{
46namespace component
47{
48//--------------------------------------------------------------------------------------//
49//
50// CUPTI Program Counter (PC) sampling component
51//
52//--------------------------------------------------------------------------------------//
53/// \struct tim::component::cupti_pcsampling
54/// \brief The PC Sampling gives the number of samples for each source and assembly
55/// line with various stall reasons. Using this information, you can pinpoint portions
56/// of your kernel that are introducing latencies and the reason for the latency.
57///
59: public base<cupti_pcsampling, cupti::pcsample>
60, private policy::instance_tracker<cupti_pcsampling, false>
61{
62 // component-specific aliases
63 using data_type = cupti::pcdata;
64 using size_type = std::size_t;
66#if defined(TIMEMORY_USE_CUPTI_PCSAMPLING)
67 using config_type =
68 std::tuple<CUcontext, CUpti_PCSamplingEnableParams,
69 CUpti_PCSamplingGetNumStallReasonsParams,
70 CUpti_PCSamplingGetStallReasonsParams, CUpti_PCSamplingData,
71 std::vector<CUpti_PCSamplingConfigurationInfo>,
72 CUpti_PCSamplingConfigurationInfoParams, CUpti_PCSamplingStartParams,
73 CUpti_PCSamplingStopParams, size_t, size_t>;
74#else
76 std::vector<null_type>, null_type, null_type,
77 null_type, size_t, size_t>;
78#endif
79
80 // required aliases
81 using value_type = cupti::pcsample;
84
85 static std::string label() { return "cupti_pcsampling"; }
86 static std::string description() { return "CUpti Program Counter (PC) Sampling API"; }
87 static void global_init() { initialize(); }
88 static void global_finalize() { finalize(); }
89 static data_type record();
90 static void sample();
91
92 TIMEMORY_DEFAULT_OBJECT(cupti_pcsampling)
93
94 void store(const value_type& _data);
95 void store(value_type&& _data);
96 void start();
97 void stop();
100 std::string get_display() const;
101
102 auto get_laps() const { return value.totalSamples; }
103
104 std::vector<int64_t> get() const;
105 static std::vector<std::string> label_array();
106
107 static void cleanup() { cupti::pcstall::allocate_arrays(0); }
108
109#if defined(TIMEMORY_USE_CUPTI_PCSAMPLING)
110 static CUpti_PCSamplingData get_pcsampling_data(size_t numStallReasons,
111 size_t numPcsToCollect);
112 static void free_pcsampling_data(CUpti_PCSamplingData);
113#endif
114
115protected:
116 const char* m_prefix = nullptr;
117
118public:
120 static void initialize();
121 static void finalize();
123 {
124 static persistent_data _instance{};
125 return _instance;
126 }
127
128private:
129 static std::unordered_set<cupti_pcsampling*>& get_stack()
130 {
131 static thread_local std::unordered_set<cupti_pcsampling*> _instance{};
132 return _instance;
133 }
134
135private:
136 struct persistent_data
137 {
138 bool enabled = false;
139 bool region_totals = true;
140 config_type data = {};
141
142 // create ref and cr member functions e.g.:
143 // auto& context() { return get<0>(data); }
144 // const auto& context() const { return get<0>(data); }
145 TIMEMORY_TUPLE_ACCESSOR(0, data, context)
146 TIMEMORY_TUPLE_ACCESSOR(1, data, enable_params)
147 TIMEMORY_TUPLE_ACCESSOR(2, data, num_stall_reasons_params)
148 TIMEMORY_TUPLE_ACCESSOR(3, data, stall_reasons_params)
149 TIMEMORY_TUPLE_ACCESSOR(4, data, sampling_data)
150 TIMEMORY_TUPLE_ACCESSOR(5, data, info)
151 TIMEMORY_TUPLE_ACCESSOR(6, data, info_params)
152 TIMEMORY_TUPLE_ACCESSOR(7, data, start_params)
153 TIMEMORY_TUPLE_ACCESSOR(8, data, stop_params)
154 TIMEMORY_TUPLE_ACCESSOR(9, data, num_stall_reasons)
155 TIMEMORY_TUPLE_ACCESSOR(10, data, num_collect_pcs)
156 };
157};
158//
159} // namespace component
160//
161//--------------------------------------------------------------------------------------//
162//
163namespace cupti
164{
165//
166template <typename Archive>
167void
168pcsample::save(Archive& ar, const unsigned int) const
169{
170 std::string _fname = functionName;
171 ar(cereal::make_nvp("samples", totalSamples), cereal::make_nvp("cubin_id", cubinCrc),
172 cereal::make_nvp("pc_offset", pcOffset),
173 cereal::make_nvp("func_index", functionIndex),
174 cereal::make_nvp("func_name", _fname), cereal::make_nvp("stalls", stalls));
175}
176//
177template <typename Archive>
178void
179pcsample::load(Archive& ar, const unsigned int)
180{
181 // memory leak
182 auto* _fname = new std::string{};
183 ar(cereal::make_nvp("samples", totalSamples), cereal::make_nvp("cubin_id", cubinCrc),
184 cereal::make_nvp("pc_offset", pcOffset),
185 cereal::make_nvp("func_index", functionIndex),
186 cereal::make_nvp("func_name", *_fname), cereal::make_nvp("stalls", stalls));
187 functionName = _fname->c_str();
188}
189//
190template <typename Archive>
191inline void
192pcstall::save(Archive& ar, const unsigned int) const
193{
194 auto _idx = index;
195 auto _samples = samples;
196 std::string _name = name();
197 ar(cereal::make_nvp("index", _idx), cereal::make_nvp("name", _name),
198 cereal::make_nvp("samples", _samples));
199}
200//
201template <typename Archive>
202inline void
203pcstall::load(Archive& ar, const unsigned int)
204{
205 auto _idx = index;
206 auto _samples = samples;
207 ar(cereal::make_nvp("index", _idx), cereal::make_nvp("samples", _samples));
208}
209//
210} // namespace cupti
211} // namespace tim
212
213// #endif // TIMEMORY_USE_CUPTI_PCSAMPLING
214
215#if defined(TIMEMORY_CUPTI_HEADER_MODE)
217#endif
STL namespace.
void load(Archive &ar, tim::node::graph< Tp > &d)
Definition: node.hpp:520
void save(Archive &ar, std::shared_ptr< tim::tsettings< Tp, Tp & > > obj)
Definition: tsettings.hpp:471
Inherit from this policy to add reference counting support. Useful if you want to turn a global setti...
Definition: types.hpp:406
Definition: kokkosp.cpp:39
tim::mpl::apply< std::string > string
Definition: macros.hpp:53
The PC Sampling gives the number of samples for each source and assembly line with various stall reas...
static config_type configure()
static std::vector< std::string > label_array()
std::tuple< null_type, null_type, null_type, null_type, null_type, std::vector< null_type >, null_type, null_type, null_type, size_t, size_t > config_type
std::vector< int64_t > get() const
void store(const value_type &_data)
std::string get_display() const
this is a placeholder type for optional type-traits. It is used as the default type for the type-trai...
Definition: types.hpp:225
#define TIMEMORY_TUPLE_ACCESSOR(INDEX, TUPLE, NAME)
Definition: types.hpp:163