27#if !defined(TIMEMORY_CUPTI_HEADER_MODE)
28# include "timemory/components/cupti/backends.hpp"
31#if defined(TIMEMORY_USE_CUPTI)
32# include "timemory/backends/cupti.hpp"
36# if defined(TIMEMORY_USE_CUPTI_PCSAMPLING)
37# include <cupti_pcsampling.h>
49 assert(rangeId == rhs.rangeId);
50 totalNumPcs =
std::max(totalNumPcs, rhs.totalNumPcs);
51 remainingNumPcs =
std::min(remainingNumPcs, rhs.remainingNumPcs);
52 for(
const auto& itr : rhs.samples)
60 assert(rangeId == rhs.rangeId);
61 totalNumPcs =
std::max(totalNumPcs, rhs.totalNumPcs);
62 remainingNumPcs =
std::min(remainingNumPcs, rhs.remainingNumPcs);
63 for(
auto&& itr : rhs.samples)
64 append(std::move(itr));
71 assert(rangeId == rhs.rangeId);
72 for(
const auto& ritr : rhs.samples)
74 for(
auto& itr : samples)
87pcdata::append(
const pcsample& _sample)
89 for(
auto& itr : samples)
97 samples.insert(_sample);
102pcdata::append(pcsample&& _sample)
104 for(
auto& itr : samples)
112 samples.insert(std::move(_sample));
121 for(
size_t i = 0; i < stalls.size(); ++i)
125#if defined(TIMEMORY_USE_CUPTI_PCSAMPLING)
128pcsample::pcsample(
const CUpti_PCSamplingPCData_t& _pcdata)
129: cubinCrc(_pcdata.cubinCrc)
130, pcOffset(_pcdata.pcOffset)
131, functionIndex(_pcdata.functionIndex)
132, functionName(_pcdata.functionName)
134 for(
size_t i = 0; i < stalls.size(); ++i)
136 for(
size_t i = 0; i < _pcdata.stallReasonCount; ++i)
138 const auto& _stall = _pcdata.stallReason[i];
139 auto ridx = _stall.pcSamplingStallReasonIndex;
140 stalls[ridx] = std::move(pcstall{ _stall });
143 for(
auto& itr : stalls)
144 totalSamples += itr.samples;
150pcsample::pcsample(
const CUpti_PCSamplingPCData_t&) {}
157 for(int32_t i = 0; i < stall_reasons_size; ++i)
158 stalls[i] += rhs.stalls[i];
159 for(int32_t i = 0; i < stall_reasons_size; ++i)
160 totalSamples += rhs.stalls[i].samples;
167 for(int32_t i = 0; i < stall_reasons_size; ++i)
168 stalls[i] -= rhs.stalls[i];
169 for(int32_t i = 0; i < stall_reasons_size; ++i)
170 totalSamples -= rhs.stalls[i].samples;
175pcsample::operator==(
const pcsample& rhs)
const
177 return std::tie(cubinCrc, pcOffset, functionIndex) ==
178 std::tie(rhs.cubinCrc, rhs.pcOffset, rhs.functionIndex);
182pcsample::operator<(
const pcsample& rhs)
const
184 return (cubinCrc < rhs.cubinCrc) || (pcOffset < rhs.pcOffset) ||
185 (functionIndex < rhs.functionIndex);
189pcsample::operator<=(
const pcsample& rhs)
const
191 return (*
this == rhs) || (*
this < rhs);
195 pcsample::name()
const
197#if defined(TIMEMORY_USE_CUPTI_PCSAMPLING)
203 static uomap_t<uint32_t, uomap_t<uint32_t, uomap_t<uint64_t, std::string>>>
205 auto itr = _sass2src[functionIndex][pcOffset].find(cubinCrc);
206 if(itr == _sass2src[functionIndex][pcOffset].
end())
208 CUpti_GetSassToSourceCorrelationParams sassToSourceParams = {};
209 sassToSourceParams.size =
sizeof(CUpti_GetSassToSourceCorrelationParams);
210 sassToSourceParams.cubin = std::get<0>(get_cubin_map().
at(cubinCrc));
211 sassToSourceParams.cubinSize = std::get<1>(get_cubin_map().
at(cubinCrc));
212 sassToSourceParams.functionName = functionName;
213 sassToSourceParams.pcOffset = pcOffset;
214 TIMEMORY_CUPTI_API_CALL(cuptiGetSassToSourceCorrelation(&sassToSourceParams));
215 if(sassToSourceParams.fileName)
218 auto _line = sassToSourceParams.lineNumber;
219 _sass2src[functionIndex][pcOffset][cubinCrc] =
221 free(sassToSourceParams.fileName);
222 free(sassToSourceParams.dirName);
226 _sass2src[functionIndex][pcOffset][cubinCrc] = functionName;
228 itr = _sass2src[functionIndex][pcOffset].find(cubinCrc);
240#if defined(TIMEMORY_USE_CUPTI_PCSAMPLING)
243pcstall::pcstall(
const CUpti_PCSamplingStallReason_t& _obj)
244: index(_obj.pcSamplingStallReasonIndex)
245, samples(_obj.samples)
251pcstall::pcstall(
const CUpti_PCSamplingStallReason_t&) {}
256pcstall::pcstall(uint32_t _index, uint32_t _samples)
264 samples += rhs.samples;
271 samples -= rhs.samples;
276pcstall::name(uint32_t _index)
280 for(uint32_t i = 0; i <
get_size(); ++i)
282 auto _idx = get_index_array()[i];
284 return get_name_array()[i];
294 for(uint32_t i = 0; i <
get_size(); ++i)
296 auto _idx = get_index_array()[i];
298 return get_bool_array()[i];
#define TIMEMORY_CUPTI_INLINE
::tim::statistics< Tp > max(::tim::statistics< Tp > lhs, const Tp &rhs)
::tim::statistics< Tp > min(::tim::statistics< Tp > lhs, const Tp &rhs)
::tim::statistics< tuple<> > & operator+=(::tim::statistics< tuple<> > &_lhs, const Tp &)
constexpr auto get_size(const Tp &, std::tuple<>) -> size_t
std::array< Tp, N > & operator-=(std::array< Tp, N > &, const std::array< Tp, N > &)
std::string string_view_t
std::string demangle(const char *_mangled_name, int *_status=nullptr)
tim::mpl::apply< std::string > string
Tp get(Sp &&_key, bool _exact=true)
static settings * instance()
#define TIMEMORY_JOIN(delim,...)