28#include "timemory/components/cupti/backends.hpp"
34#if defined(TIMEMORY_USE_CUPTI)
35# include "timemory/backends/cupti.hpp"
39# if defined(TIMEMORY_USE_CUPTI_PCSAMPLING)
40# include <cupti_pcsampling.h>
59:
public base<cupti_pcsampling, cupti::pcsample>
66#if defined(TIMEMORY_USE_CUPTI_PCSAMPLING)
68 std::tuple<CUcontext, CUpti_PCSamplingEnableParams,
69 CUpti_PCSamplingGetNumStallReasonsParams,
70 CUpti_PCSamplingGetStallReasonsParams, CUpti_PCSamplingData,
71 std::vector<CUpti_PCSamplingConfigurationInfo>,
72 CUpti_PCSamplingConfigurationInfoParams, CUpti_PCSamplingStartParams,
73 CUpti_PCSamplingStopParams, size_t,
size_t>;
102 auto
get_laps()
const {
return value.totalSamples; }
104 std::vector<int64_t>
get()
const;
107 static void cleanup() { cupti::pcstall::allocate_arrays(0); }
109#if defined(TIMEMORY_USE_CUPTI_PCSAMPLING)
110 static CUpti_PCSamplingData get_pcsampling_data(
size_t numStallReasons,
111 size_t numPcsToCollect);
112 static void free_pcsampling_data(CUpti_PCSamplingData);
124 static persistent_data _instance{};
129 static std::unordered_set<cupti_pcsampling*>& get_stack()
131 static thread_local std::unordered_set<cupti_pcsampling*> _instance{};
136 struct persistent_data
139 bool region_totals =
true;
166template <
typename Archive>
171 ar(cereal::make_nvp(
"samples", totalSamples), cereal::make_nvp(
"cubin_id", cubinCrc),
172 cereal::make_nvp(
"pc_offset", pcOffset),
173 cereal::make_nvp(
"func_index", functionIndex),
174 cereal::make_nvp(
"func_name", _fname), cereal::make_nvp(
"stalls", stalls));
177template <
typename Archive>
183 ar(cereal::make_nvp(
"samples", totalSamples), cereal::make_nvp(
"cubin_id", cubinCrc),
184 cereal::make_nvp(
"pc_offset", pcOffset),
185 cereal::make_nvp(
"func_index", functionIndex),
186 cereal::make_nvp(
"func_name", *_fname), cereal::make_nvp(
"stalls", stalls));
187 functionName = _fname->c_str();
190template <
typename Archive>
195 auto _samples = samples;
197 ar(cereal::make_nvp(
"index", _idx), cereal::make_nvp(
"name", _name),
198 cereal::make_nvp(
"samples", _samples));
201template <
typename Archive>
206 auto _samples = samples;
207 ar(cereal::make_nvp(
"index", _idx), cereal::make_nvp(
"samples", _samples));
215#if defined(TIMEMORY_CUPTI_HEADER_MODE)
void load(Archive &ar, tim::node::graph< Tp > &d)
void save(Archive &ar, std::shared_ptr< tim::tsettings< Tp, Tp & > > obj)
Inherit from this policy to add reference counting support. Useful if you want to turn a global setti...
tim::mpl::apply< std::string > string
The PC Sampling gives the number of samples for each source and assembly line with various stall reas...
cupti::pcsample value_type
static std::string description()
static config_type configure()
static void global_init()
static std::vector< std::string > label_array()
std::tuple< null_type, null_type, null_type, null_type, null_type, std::vector< null_type >, null_type, null_type, null_type, size_t, size_t > config_type
static auto & get_configuration_data()
static void global_finalize()
std::vector< int64_t > get() const
void store(const value_type &_data)
static std::string label()
static data_type record()
std::string get_display() const
this is a placeholder type for optional type-traits. It is used as the default type for the type-trai...
#define TIMEMORY_TUPLE_ACCESSOR(INDEX, TUPLE, NAME)