33#if defined(TIMEMORY_USE_CUPTI_NVPERF)
36# include "timemory/components/cupti/backends.hpp"
41# include <nvperf_cuda_host.h>
42# include <nvperf_host.h>
43# include <nvperf_target.h>
45# include <cupti_profiler_target.h>
46# include <cupti_target.h>
57# if !defined(TIMEMORY_CUPTI_API_CALL)
58# define TIMEMORY_CUPTI_API_CALL(...) TIMEMORY_CUPTI_CALL(__VA_ARGS__)
63# if !defined(TIMEMORY_NVPW_API_CALL)
64# define TIMEMORY_NVPW_API_CALL(apiFuncCall) \
67 NVPA_Status _status = apiFuncCall; \
68 if(_status != NVPA_STATUS_SUCCESS) \
70 fprintf(stderr, "%s:%d: error: function %s failed with error %d.\n", \
71 __FILE__, __LINE__, #apiFuncCall, _status); \
79# define TIMEMORY_RETURN_IF_NVPW_ERROR(retval, actual) \
82 if(NVPA_STATUS_SUCCESS != actual) \
84 fprintf(stderr, "FAILED: %s\n", #actual); \
108 return ScopeExit<T>(t);
113# define NV_ANONYMOUS_VARIABLE_DIRECT(name, line) name##line
114# define NV_ANONYMOUS_VARIABLE_INDIRECT(name, line) \
115 NV_ANONYMOUS_VARIABLE_DIRECT(name, line)
116# define SCOPE_EXIT(func) \
117 const auto NV_ANONYMOUS_VARIABLE_INDIRECT(EXIT, __LINE__) = \
118 MoveScopeExit([=]() { func; })
136struct cupti_profiler :
public base<cupti_profiler, std::map<std::string, double>>
139 struct MetricNameValue;
143 using value_type = std::map<std::string, double>;
144 using this_type = cupti_profiler;
145 using base_type = base<this_type, value_type>;
146 using data_type = std::vector<MetricNameValue>;
149 using size_type = std::size_t;
152 static std::string label() {
return "cupti_profiler"; }
158 static void global_init() {
configure(); }
160 static void global_finalize() {
finalize(); }
162 TIMEMORY_DEFAULT_OBJECT(cupti_profiler)
166 auto& chipName = get_persistent_data().chipName;
167 auto& counterDataImage = get_persistent_data().counterDataImage;
168 auto& metricNames = get_persistent_data().metricNames;
171 GetMetricGpuValue(chipName, counterDataImage, metricNames, _data);
173 PRINT_HERE(
"METRIC_GPU_VALUE size: %li", (
long int) _data.size());
174 for(
const auto& itr : _data)
176 auto _prefix = itr.metricName +
".";
178 PRINT_HERE(
" METRIC[%s] size: %li", itr.metricName.c_str(),
179 (
long int) itr.rangeNameMetricValueMap.size());
180 for(
const auto& vitr : itr.rangeNameMetricValueMap)
181 _tmp[
_prefix + vitr.first] = vitr.second;
188 auto _count = get_counter()++;
192 TIMEMORY_CUPTI_API_CALL(cuptiProfilerBeginPass(&beginPassParams));
193 TIMEMORY_CUPTI_API_CALL(cuptiProfilerEnableProfiling(&enableProfilingParams));
197 TIMEMORY_CUPTI_API_CALL(cuptiProfilerPushRange(&pushRangeParams));
203 TIMEMORY_CUPTI_API_CALL(cuptiProfilerPopRange(&popRangeParams));
204 auto _count = --get_counter();
207 TIMEMORY_CUPTI_API_CALL(
208 cuptiProfilerDisableProfiling(&disableProfilingParams));
209 TIMEMORY_CUPTI_API_CALL(cuptiProfilerEndPass(&endPassParams));
213 cuda::stream_sync(0);
216 TIMEMORY_CUPTI_API_CALL(cuptiProfilerFlushCounterData(&flushCounterDataParams));
219 for(
auto& itr : _tmp)
221 auto& vitr = value[itr.first];
222 vitr = (itr.second - vitr);
223 accum[itr.first] += vitr;
229 pushRangeParams.pRangeName =
_prefix.c_str();
232 std::vector<double>
get()
const
234 std::vector<double> data;
235 for(
const auto& itr : accum)
236 data.emplace_back(itr.second);
242 auto _get_display = [&](std::ostream&
os,
const auto& obj) {
243 auto _label = obj.first;
244 auto _prec = base_type::get_precision();
245 auto _width = base_type::get_width();
246 auto _flags = base_type::get_format_flags();
248 std::stringstream ssv, ssi;
250 ssv << std::setw(_width) << std::setprecision(_prec) << obj.second;
252 ssi <<
" " << _label;
253 os << ssv.str() << ssi.str();
256 const auto& _data =
load();
257 std::stringstream ss;
258 for(size_type i = 0; i < _data.size(); ++i)
260 auto itr = _data.begin();
261 std::advance(itr, i);
262 _get_display(ss, *itr);
263 if(i + 1 < _data.size())
269 static std::vector<string_t> label_array()
271 auto ret = get_persistent_data().metricNames;
272 std::sort(ret.begin(), ret.end());
276 static std::vector<string_t> description_array() {
return label_array(); }
278 static std::vector<string_t> display_unit_array()
280 return std::vector<string_t>(get_persistent_data().metricNames.size(),
"");
283 static std::vector<int64_t> unit_array()
285 return std::vector<int64_t>(get_persistent_data().metricNames.size(), 1);
288 template <
typename Archive>
289 void serialize(Archive& ar,
const unsigned int)
291 auto _get = [&](
const value_type& _data) {
292 std::vector<double> values;
293 for(
auto itr : _data)
294 values.push_back(itr.second);
297 std::vector<double> _disp = _get(accum);
298 std::vector<double> _value = _get(value);
299 std::vector<double> _accum = _get(accum);
300 ar(cereal::make_nvp(
"laps", laps), cereal::make_nvp(
"repr_data", _disp),
301 cereal::make_nvp(
"value", _value), cereal::make_nvp(
"accum", _accum),
302 cereal::make_nvp(
"display", _disp));
310 for(
const auto& itr : rhs.value)
312 value[itr.first] += itr.second;
315 for(
const auto& itr : rhs.accum)
317 accum[itr.first] += itr.second;
325 for(
const auto& itr : rhs.value)
327 if(value.find(itr.first) != value.end())
328 value[itr.first] -= itr.second;
331 for(
const auto& itr : rhs.accum)
333 if(accum.find(itr.first) != accum.end())
334 accum[itr.first] -= itr.second;
341 static bool WriteBinaryFile(
const char* pFileName,
const std::vector<uint8_t>& data);
342 static bool ReadBinaryFile(
const char* pFileName, std::vector<uint8_t>& image);
344 static std::set<std::string> ListSupportedChips();
345 static std::set<std::string> ListMetrics(
const char* chipName,
bool listSubMetrics);
348 static bool GetMetricGpuValue(
std::string chipName,
349 std::vector<uint8_t> counterDataImage,
350 std::vector<std::string> metricNames,
351 std::vector<MetricNameValue>& metricNameValueMap);
353 static bool PrintMetricValues(
std::string chipName,
354 std::vector<uint8_t> counterDataImage,
355 std::vector<std::string> metricNames);
358 static bool create_counter_data_image(std::vector<uint8_t>& counterDataImage,
359 std::vector<uint8_t>& counterDataScratchBuffer,
360 std::vector<uint8_t>& counterDataImagePrefix);
362 static bool enable();
363 static bool disable();
365 static bool GetConfigImage(
std::string chipName, std::vector<std::string> metricNames,
366 std::vector<uint8_t>& configImage);
368 static bool GetCounterDataPrefixImage(
std::string chipName,
369 std::vector<std::string> metricNames,
370 std::vector<uint8_t>& counterDataImagePrefix);
372 static bool GetRawMetricRequests(
373 NVPA_MetricsContext* pMetricsContext, std::vector<std::string> metricNames,
374 std::vector<NVPA_RawMetricRequest>& rawMetricRequests,
375 std::vector<std::string>& temp);
378 bool* isolated,
bool* keepInstances);
383 CUpti_Profiler_PushRange_Params pushRangeParams = {
384 CUpti_Profiler_PushRange_Params_STRUCT_SIZE
386 CUpti_Profiler_PopRange_Params popRangeParams = {
387 CUpti_Profiler_PopRange_Params_STRUCT_SIZE
389 CUpti_Profiler_BeginPass_Params beginPassParams = {
390 CUpti_Profiler_BeginPass_Params_STRUCT_SIZE
392 CUpti_Profiler_EndPass_Params endPassParams = {
393 CUpti_Profiler_EndPass_Params_STRUCT_SIZE
395 CUpti_Profiler_FlushCounterData_Params flushCounterDataParams = {
396 CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE
398 CUpti_Profiler_EnableProfiling_Params enableProfilingParams = {
399 CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE
401 CUpti_Profiler_DisableProfiling_Params disableProfilingParams = {
402 CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE
406 struct MetricNameValue
410 std::vector<std::pair<std::string, double>> rangeNameMetricValueMap;
413 struct persistent_data
415 std::atomic<int64_t> instCounter;
422 int computeCapabilityMajor = 0;
423 int computeCapabilityMinor = 0;
424 CUpti_ProfilerReplayMode profilerReplayMode = CUPTI_UserReplay;
425 CUpti_ProfilerRange profilerRange = CUPTI_UserRange;
427 std::string CounterDataFileName =
"SimpleCupti.counterdata";
428 std::string CounterDataSBFileName =
"SimpleCupti.counterdataSB";
429 std::vector<uint8_t> counterDataImagePrefix;
430 std::vector<uint8_t> configImage;
431 std::vector<uint8_t> counterDataImage;
432 std::vector<uint8_t> counterDataScratchBuffer;
433 std::vector<std::string> metricNames;
436 static persistent_data& get_persistent_data()
438 static persistent_data _instance;
442 static std::atomic<int64_t>& get_counter()
444 return get_persistent_data().instCounter;
451cupti_profiler::create_counter_data_image(std::vector<uint8_t>& counterDataImage,
452 std::vector<uint8_t>& counterDataScratchBuffer,
453 std::vector<uint8_t>& counterDataImagePrefix)
455 auto& numRanges = get_persistent_data().numRanges;
457 CUpti_Profiler_CounterDataImageOptions counterDataImageOptions;
458 counterDataImageOptions.pCounterDataPrefix = &counterDataImagePrefix[0];
459 counterDataImageOptions.counterDataPrefixSize = counterDataImagePrefix.size();
460 counterDataImageOptions.maxNumRanges = numRanges;
461 counterDataImageOptions.maxNumRangeTreeNodes = numRanges;
462 counterDataImageOptions.maxRangeNameLength = 64;
464 CUpti_Profiler_CounterDataImage_CalculateSize_Params calculateSizeParams = {
465 CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE
468 calculateSizeParams.pOptions = &counterDataImageOptions;
469 calculateSizeParams.sizeofCounterDataImageOptions =
470 CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE;
472 TIMEMORY_CUPTI_API_CALL(
473 cuptiProfilerCounterDataImageCalculateSize(&calculateSizeParams));
475 CUpti_Profiler_CounterDataImage_Initialize_Params initializeParams = {
476 CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE
479 initializeParams.sizeofCounterDataImageOptions =
480 CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE;
481 initializeParams.pOptions = &counterDataImageOptions;
482 initializeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
484 counterDataImage.resize(calculateSizeParams.counterDataImageSize);
485 initializeParams.pCounterDataImage = &counterDataImage[0];
487 TIMEMORY_CUPTI_API_CALL(cuptiProfilerCounterDataImageInitialize(&initializeParams));
489 CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params
490 scratchBufferSizeParams = {
491 CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE
494 scratchBufferSizeParams.counterDataImageSize =
495 calculateSizeParams.counterDataImageSize;
496 scratchBufferSizeParams.pCounterDataImage = initializeParams.pCounterDataImage;
498 TIMEMORY_CUPTI_API_CALL(cuptiProfilerCounterDataImageCalculateScratchBufferSize(
499 &scratchBufferSizeParams));
501 counterDataScratchBuffer.resize(scratchBufferSizeParams.counterDataScratchBufferSize);
503 CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params
504 initScratchBufferParams = {
505 CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE
508 initScratchBufferParams.counterDataImageSize =
509 calculateSizeParams.counterDataImageSize;
510 initScratchBufferParams.pCounterDataImage = initializeParams.pCounterDataImage;
511 initScratchBufferParams.counterDataScratchBufferSize =
512 scratchBufferSizeParams.counterDataScratchBufferSize;
513 initScratchBufferParams.pCounterDataScratchBuffer = &counterDataScratchBuffer[0];
515 TIMEMORY_CUPTI_API_CALL(
516 cuptiProfilerCounterDataImageInitializeScratchBuffer(&initScratchBufferParams));
524cupti_profiler::configure(
int device)
526 auto& cuDevice = get_persistent_data().cuDevice;
527 auto& metricNames = get_persistent_data().metricNames;
528 auto& counterDataImagePrefix = get_persistent_data().counterDataImagePrefix;
529 auto& configImage = get_persistent_data().configImage;
530 auto& counterDataImage = get_persistent_data().counterDataImage;
531 auto& counterDataScratchBuffer = get_persistent_data().counterDataScratchBuffer;
532 auto& profilerReplayMode = get_persistent_data().profilerReplayMode;
533 auto& profilerRange = get_persistent_data().profilerRange;
534 auto& deviceCount = get_persistent_data().deviceCount;
535 auto& deviceNum = get_persistent_data().deviceNum;
536 auto& computeCapabilityMajor = get_persistent_data().computeCapabilityMajor;
537 auto& computeCapabilityMinor = get_persistent_data().computeCapabilityMinor;
538 auto& chipName = get_persistent_data().chipName;
547 fprintf(stderr,
"There is no device supporting CUDA.\n");
552 printf(
"CUDA Device Number: %d\n", deviceNum);
556 &computeCapabilityMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevice));
558 &computeCapabilityMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevice));
560 printf(
"Compute Capability of Device: %d.%d\n", computeCapabilityMajor,
561 computeCapabilityMinor);
563 if(computeCapabilityMajor < 7)
565 printf(
"Sample unsupported on Device with compute capability < 7.0\n");
573 CUpti_Profiler_Initialize_Params profilerInitializeParams = {
574 CUpti_Profiler_Initialize_Params_STRUCT_SIZE
576 TIMEMORY_CUPTI_API_CALL(cuptiProfilerInitialize(&profilerInitializeParams));
579 CUpti_Device_GetChipName_Params getChipNameParams = {
580 CUpti_Device_GetChipName_Params_STRUCT_SIZE
583 getChipNameParams.deviceIndex = deviceNum;
585 TIMEMORY_CUPTI_API_CALL(cuptiDeviceGetChipName(&getChipNameParams));
586 chipName = getChipNameParams.pChipName;
589 NVPW_InitializeHost_Params initializeHostParams = {
590 NVPW_InitializeHost_Params_STRUCT_SIZE
593 TIMEMORY_NVPW_API_CALL(NVPW_InitializeHost(&initializeHostParams));
595 if(metricNames.size())
597 if(!GetConfigImage(chipName, metricNames, configImage))
599 std::cerr <<
"Failed to create configImage" << std::endl;
602 if(!GetCounterDataPrefixImage(chipName, metricNames, counterDataImagePrefix))
604 std::cerr <<
"Failed to create counterDataImagePrefix" << std::endl;
610 std::cerr <<
"No metrics provided to profile" << std::endl;
614 if(!create_counter_data_image(counterDataImage, counterDataScratchBuffer,
615 counterDataImagePrefix))
617 std::cerr <<
"Failed to create counterDataImage" << std::endl;
621 auto&
enabled = get_persistent_data().enabled;
622 auto& numRanges = get_persistent_data().numRanges;
623 auto& cuContext = get_persistent_data().cuContext;
625 CUpti_Profiler_BeginSession_Params beginSessionParams = {
626 CUpti_Profiler_BeginSession_Params_STRUCT_SIZE
629 CUpti_Profiler_SetConfig_Params setConfigParams = {
630 CUpti_Profiler_SetConfig_Params_STRUCT_SIZE
635 beginSessionParams.ctx = cuContext;
636 beginSessionParams.counterDataImageSize = counterDataImage.size();
637 beginSessionParams.pCounterDataImage = &counterDataImage[0];
638 beginSessionParams.counterDataScratchBufferSize = counterDataScratchBuffer.size();
639 beginSessionParams.pCounterDataScratchBuffer = &counterDataScratchBuffer[0];
640 beginSessionParams.range = profilerRange;
641 beginSessionParams.replayMode = profilerReplayMode;
642 beginSessionParams.maxRangesPerPass = numRanges;
643 beginSessionParams.maxLaunchesPerPass = numRanges;
645 setConfigParams.pConfig = &configImage[0];
646 setConfigParams.configSize = configImage.size();
647 setConfigParams.passIndex = 0;
648 setConfigParams.minNestingLevel = 1;
649 setConfigParams.numNestingLevels = 1;
651 TIMEMORY_CUPTI_API_CALL(cuptiProfilerBeginSession(&beginSessionParams));
652 TIMEMORY_CUPTI_API_CALL(cuptiProfilerSetConfig(&setConfigParams));
660cupti_profiler::enable()
662 auto&
enabled = get_persistent_data().enabled;
666 auto& cuContext = get_persistent_data().cuContext;
667 auto& numRanges = get_persistent_data().numRanges;
668 auto& cuDevice = get_persistent_data().cuDevice;
669 auto& metricNames = get_persistent_data().metricNames;
670 auto& counterDataImagePrefix = get_persistent_data().counterDataImagePrefix;
671 auto& configImage = get_persistent_data().configImage;
672 auto& counterDataImage = get_persistent_data().counterDataImage;
673 auto& counterDataScratchBuffer = get_persistent_data().counterDataScratchBuffer;
674 auto& profilerReplayMode = get_persistent_data().profilerReplayMode;
675 auto& profilerRange = get_persistent_data().profilerRange;
676 auto& deviceCount = get_persistent_data().deviceCount;
677 auto& deviceNum = get_persistent_data().deviceNum;
678 auto& computeCapabilityMajor = get_persistent_data().computeCapabilityMajor;
679 auto& computeCapabilityMinor = get_persistent_data().computeCapabilityMinor;
680 auto& chipName = get_persistent_data().chipName;
682 CUpti_Profiler_EnableProfiling_Params enableProfilingParams = {
683 CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE
686 CUpti_Profiler_BeginPass_Params beginPassParams = {
687 CUpti_Profiler_BeginPass_Params_STRUCT_SIZE
690 TIMEMORY_CUPTI_API_CALL(cuptiProfilerBeginPass(&beginPassParams));
691 TIMEMORY_CUPTI_API_CALL(cuptiProfilerEnableProfiling(&enableProfilingParams));
701 auto& chipName = get_persistent_data().chipName;
702 auto& counterDataImage = get_persistent_data().counterDataImage;
703 auto& counterDataScratchBuffer = get_persistent_data().counterDataScratchBuffer;
704 auto& CounterDataFileName = get_persistent_data().CounterDataFileName;
705 auto& CounterDataSBFileName = get_persistent_data().CounterDataSBFileName;
706 auto& metricNames = get_persistent_data().metricNames;
707 auto& cuContext = get_persistent_data().cuContext;
709 CUpti_Profiler_UnsetConfig_Params unsetConfigParams = {
710 CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE
713 CUpti_Profiler_EndSession_Params endSessionParams = {
714 CUpti_Profiler_EndSession_Params_STRUCT_SIZE
717 CUpti_Profiler_DeInitialize_Params profilerDeInitializeParams = {
718 CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE
721 TIMEMORY_CUPTI_API_CALL(cuptiProfilerUnsetConfig(&unsetConfigParams));
722 TIMEMORY_CUPTI_API_CALL(cuptiProfilerEndSession(&endSessionParams));
724 TIMEMORY_CUPTI_API_CALL(cuptiProfilerDeInitialize(&profilerDeInitializeParams));
727 WriteBinaryFile(CounterDataFileName.c_str(), counterDataImage);
728 WriteBinaryFile(CounterDataSBFileName.c_str(), counterDataScratchBuffer);
732 PrintMetricValues(chipName, counterDataImage, metricNames);
738cupti_profiler::disable()
740 auto&
enabled = get_persistent_data().enabled;
741 auto& cuContext = get_persistent_data().cuContext;
746 CUpti_Profiler_DisableProfiling_Params disableProfilingParams = {
747 CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE
750 CUpti_Profiler_EndPass_Params endPassParams = {
751 CUpti_Profiler_EndPass_Params_STRUCT_SIZE
754 CUpti_Profiler_FlushCounterData_Params flushCounterDataParams = {
755 CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE
758 TIMEMORY_CUPTI_API_CALL(cuptiProfilerDisableProfiling(&disableProfilingParams));
759 TIMEMORY_CUPTI_API_CALL(cuptiProfilerEndPass(&endPassParams));
760 TIMEMORY_CUPTI_API_CALL(cuptiProfilerFlushCounterData(&flushCounterDataParams));
768cupti_profiler::GetHwUnit(
const std::string& metricName)
770 return metricName.substr(0, metricName.find(
"__", 0));
776cupti_profiler::GetMetricGpuValue(
std::string chipName,
777 std::vector<uint8_t> counterDataImage,
778 std::vector<std::string> metricNames,
779 std::vector<MetricNameValue>& metricNameValueMap)
781 if(!counterDataImage.size())
783 std::cout <<
"Counter Data Image is empty!\n";
787 NVPW_CUDA_MetricsContext_Create_Params metricsContextCreateParams = {
788 NVPW_CUDA_MetricsContext_Create_Params_STRUCT_SIZE
791 metricsContextCreateParams.pChipName = chipName.c_str();
792 TIMEMORY_RETURN_IF_NVPW_ERROR(
793 false, NVPW_CUDA_MetricsContext_Create(&metricsContextCreateParams));
795 NVPW_MetricsContext_Destroy_Params metricsContextDestroyParams = {
796 NVPW_MetricsContext_Destroy_Params_STRUCT_SIZE
799 metricsContextDestroyParams.pMetricsContext =
800 metricsContextCreateParams.pMetricsContext;
803 NVPW_MetricsContext_Destroy(
804 (NVPW_MetricsContext_Destroy_Params*) &metricsContextDestroyParams);
807 NVPW_CounterData_GetNumRanges_Params getNumRangesParams = {
808 NVPW_CounterData_GetNumRanges_Params_STRUCT_SIZE
811 getNumRangesParams.pCounterDataImage = &counterDataImage[0];
812 TIMEMORY_RETURN_IF_NVPW_ERROR(
false,
813 NVPW_CounterData_GetNumRanges(&getNumRangesParams));
815 std::vector<std::string> reqName;
816 reqName.resize(metricNames.size());
818 bool isolated =
true;
819 bool keepInstances =
true;
820 std::vector<const char*> metricNamePtrs;
821 metricNameValueMap.resize(metricNames.size());
823 for(
size_t metricIndex = 0; metricIndex < metricNames.size(); ++metricIndex)
825 ParseMetricNameString(metricNames[metricIndex], &reqName[metricIndex], &isolated,
827 metricNamePtrs.push_back(reqName[metricIndex].c_str());
828 metricNameValueMap[metricIndex].metricName = metricNames[metricIndex];
829 metricNameValueMap[metricIndex].numRanges = getNumRangesParams.numRanges;
832 for(
size_t rangeIndex = 0; rangeIndex < getNumRangesParams.numRanges; ++rangeIndex)
834 std::vector<const char*> descriptionPtrs;
836 NVPW_Profiler_CounterData_GetRangeDescriptions_Params getRangeDescParams = {
837 NVPW_Profiler_CounterData_GetRangeDescriptions_Params_STRUCT_SIZE
839 getRangeDescParams.pCounterDataImage = &counterDataImage[0];
840 getRangeDescParams.rangeIndex = rangeIndex;
841 TIMEMORY_RETURN_IF_NVPW_ERROR(
842 false, NVPW_Profiler_CounterData_GetRangeDescriptions(&getRangeDescParams));
843 descriptionPtrs.resize(getRangeDescParams.numDescriptions);
845 getRangeDescParams.ppDescriptions = &descriptionPtrs[0];
846 TIMEMORY_RETURN_IF_NVPW_ERROR(
847 false, NVPW_Profiler_CounterData_GetRangeDescriptions(&getRangeDescParams));
850 for(
size_t descriptionIndex = 0;
851 descriptionIndex < getRangeDescParams.numDescriptions; ++descriptionIndex)
857 rangeName += descriptionPtrs[descriptionIndex];
860 std::vector<double> gpuValues;
861 gpuValues.resize(metricNames.size());
862 NVPW_MetricsContext_SetCounterData_Params setCounterDataParams = {
863 NVPW_MetricsContext_SetCounterData_Params_STRUCT_SIZE
865 setCounterDataParams.pMetricsContext = metricsContextCreateParams.pMetricsContext;
866 setCounterDataParams.pCounterDataImage = &counterDataImage[0];
867 setCounterDataParams.isolated =
true;
868 setCounterDataParams.rangeIndex = rangeIndex;
869 NVPW_MetricsContext_SetCounterData(&setCounterDataParams);
871 NVPW_MetricsContext_EvaluateToGpuValues_Params evalToGpuParams = {
872 NVPW_MetricsContext_EvaluateToGpuValues_Params_STRUCT_SIZE
874 evalToGpuParams.pMetricsContext = metricsContextCreateParams.pMetricsContext;
875 evalToGpuParams.numMetrics = metricNamePtrs.size();
876 evalToGpuParams.ppMetricNames = &metricNamePtrs[0];
877 evalToGpuParams.pMetricValues = &gpuValues[0];
878 NVPW_MetricsContext_EvaluateToGpuValues(&evalToGpuParams);
879 for(
size_t metricIndex = 0; metricIndex < metricNames.size(); ++metricIndex)
881 metricNameValueMap[metricIndex].rangeNameMetricValueMap.push_back(
882 std::make_pair(rangeName, gpuValues[metricIndex]));
892cupti_profiler::PrintMetricValues(
std::string chipName,
893 std::vector<uint8_t> counterDataImage,
894 std::vector<std::string> metricNames)
896 if(!counterDataImage.size())
898 std::cout <<
"Counter Data Image is empty!\n";
902 NVPW_CUDA_MetricsContext_Create_Params metricsContextCreateParams = {
903 NVPW_CUDA_MetricsContext_Create_Params_STRUCT_SIZE
905 metricsContextCreateParams.pChipName = chipName.c_str();
906 TIMEMORY_RETURN_IF_NVPW_ERROR(
907 false, NVPW_CUDA_MetricsContext_Create(&metricsContextCreateParams));
909 NVPW_MetricsContext_Destroy_Params metricsContextDestroyParams = {
910 NVPW_MetricsContext_Destroy_Params_STRUCT_SIZE
912 metricsContextDestroyParams.pMetricsContext =
913 metricsContextCreateParams.pMetricsContext;
915 NVPW_MetricsContext_Destroy(
916 (NVPW_MetricsContext_Destroy_Params*) &metricsContextDestroyParams);
919 NVPW_CounterData_GetNumRanges_Params getNumRangesParams = {
920 NVPW_CounterData_GetNumRanges_Params_STRUCT_SIZE
922 getNumRangesParams.pCounterDataImage = &counterDataImage[0];
923 TIMEMORY_RETURN_IF_NVPW_ERROR(
false,
924 NVPW_CounterData_GetNumRanges(&getNumRangesParams));
926 std::vector<std::string> reqName;
927 reqName.resize(metricNames.size());
928 bool isolated =
true;
929 bool keepInstances =
true;
930 std::vector<const char*> metricNamePtrs;
931 for(
size_t metricIndex = 0; metricIndex < metricNames.size(); ++metricIndex)
933 ParseMetricNameString(metricNames[metricIndex], &reqName[metricIndex], &isolated,
935 metricNamePtrs.push_back(reqName[metricIndex].c_str());
938 for(
size_t rangeIndex = 0; rangeIndex < getNumRangesParams.numRanges; ++rangeIndex)
940 std::vector<const char*> descriptionPtrs;
942 NVPW_Profiler_CounterData_GetRangeDescriptions_Params getRangeDescParams = {
943 NVPW_Profiler_CounterData_GetRangeDescriptions_Params_STRUCT_SIZE
945 getRangeDescParams.pCounterDataImage = &counterDataImage[0];
946 getRangeDescParams.rangeIndex = rangeIndex;
947 TIMEMORY_RETURN_IF_NVPW_ERROR(
948 false, NVPW_Profiler_CounterData_GetRangeDescriptions(&getRangeDescParams));
950 descriptionPtrs.resize(getRangeDescParams.numDescriptions);
952 getRangeDescParams.ppDescriptions = &descriptionPtrs[0];
953 TIMEMORY_RETURN_IF_NVPW_ERROR(
954 false, NVPW_Profiler_CounterData_GetRangeDescriptions(&getRangeDescParams));
957 for(
size_t descriptionIndex = 0;
958 descriptionIndex < getRangeDescParams.numDescriptions; ++descriptionIndex)
964 rangeName += descriptionPtrs[descriptionIndex];
967 std::vector<double> gpuValues;
968 gpuValues.resize(metricNames.size());
970 NVPW_MetricsContext_SetCounterData_Params setCounterDataParams = {
971 NVPW_MetricsContext_SetCounterData_Params_STRUCT_SIZE
973 setCounterDataParams.pMetricsContext = metricsContextCreateParams.pMetricsContext;
974 setCounterDataParams.pCounterDataImage = &counterDataImage[0];
975 setCounterDataParams.isolated =
true;
976 setCounterDataParams.rangeIndex = rangeIndex;
977 NVPW_MetricsContext_SetCounterData(&setCounterDataParams);
979 NVPW_MetricsContext_EvaluateToGpuValues_Params evalToGpuParams = {
980 NVPW_MetricsContext_EvaluateToGpuValues_Params_STRUCT_SIZE
982 evalToGpuParams.pMetricsContext = metricsContextCreateParams.pMetricsContext;
983 evalToGpuParams.numMetrics = metricNamePtrs.size();
984 evalToGpuParams.ppMetricNames = &metricNamePtrs[0];
985 evalToGpuParams.pMetricValues = &gpuValues[0];
986 NVPW_MetricsContext_EvaluateToGpuValues(&evalToGpuParams);
988 for(
size_t metricIndex = 0; metricIndex < metricNames.size(); ++metricIndex)
990 std::cout <<
"rangeName: " << rangeName
991 <<
"\tmetricName: " << metricNames[metricIndex]
992 <<
"\tgpuValue: " << gpuValues[metricIndex] << std::endl;
1001cupti_profiler::GetRawMetricRequests(
1002 NVPA_MetricsContext* pMetricsContext, std::vector<std::string> metricNames,
1003 std::vector<NVPA_RawMetricRequest>& rawMetricRequests, std::vector<std::string>& temp)
1006 bool isolated =
true;
1007 bool keepInstances =
true;
1009 for(
auto& metricName : metricNames)
1011 ParseMetricNameString(metricName, &reqName, &isolated, &keepInstances);
1014 keepInstances =
true;
1015 NVPW_MetricsContext_GetMetricProperties_Begin_Params
1016 getMetricPropertiesBeginParams = {
1017 NVPW_MetricsContext_GetMetricProperties_Begin_Params_STRUCT_SIZE
1019 getMetricPropertiesBeginParams.pMetricsContext = pMetricsContext;
1020 getMetricPropertiesBeginParams.pMetricName = reqName.c_str();
1022 TIMEMORY_RETURN_IF_NVPW_ERROR(
false,
1023 NVPW_MetricsContext_GetMetricProperties_Begin(
1024 &getMetricPropertiesBeginParams));
1026 for(
const char** ppMetricDependencies =
1027 getMetricPropertiesBeginParams.ppRawMetricDependencies;
1028 *ppMetricDependencies; ++ppMetricDependencies)
1030 temp.push_back(*ppMetricDependencies);
1032 NVPW_MetricsContext_GetMetricProperties_End_Params
1033 getMetricPropertiesEndParams = {
1034 NVPW_MetricsContext_GetMetricProperties_End_Params_STRUCT_SIZE
1036 getMetricPropertiesEndParams.pMetricsContext = pMetricsContext;
1037 TIMEMORY_RETURN_IF_NVPW_ERROR(
false, NVPW_MetricsContext_GetMetricProperties_End(
1038 &getMetricPropertiesEndParams));
1043 for(
auto& rawMetricName : temp)
1045 NVPA_RawMetricRequest metricRequest = { NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE };
1046 metricRequest.pMetricName = rawMetricName.c_str();
1047 metricRequest.isolated = isolated;
1048 metricRequest.keepInstances = keepInstances;
1049 rawMetricRequests.push_back(metricRequest);
1058cupti_profiler::GetConfigImage(
std::string chipName, std::vector<std::string> metricNames,
1059 std::vector<uint8_t>& configImage)
1061 NVPW_CUDA_MetricsContext_Create_Params metricsContextCreateParams = {
1062 NVPW_CUDA_MetricsContext_Create_Params_STRUCT_SIZE
1064 metricsContextCreateParams.pChipName = chipName.c_str();
1065 TIMEMORY_RETURN_IF_NVPW_ERROR(
1066 false, NVPW_CUDA_MetricsContext_Create(&metricsContextCreateParams));
1068 NVPW_MetricsContext_Destroy_Params metricsContextDestroyParams = {
1069 NVPW_MetricsContext_Destroy_Params_STRUCT_SIZE
1071 metricsContextDestroyParams.pMetricsContext =
1072 metricsContextCreateParams.pMetricsContext;
1074 NVPW_MetricsContext_Destroy(
1075 (NVPW_MetricsContext_Destroy_Params*) &metricsContextDestroyParams);
1078 std::vector<NVPA_RawMetricRequest> rawMetricRequests;
1079 std::vector<std::string> temp;
1080 GetRawMetricRequests(metricsContextCreateParams.pMetricsContext, metricNames,
1081 rawMetricRequests, temp);
1083 NVPA_RawMetricsConfigOptions metricsConfigOptions = {
1084 NVPA_RAW_METRICS_CONFIG_OPTIONS_STRUCT_SIZE
1086 metricsConfigOptions.activityKind = NVPA_ACTIVITY_KIND_PROFILER;
1087 metricsConfigOptions.pChipName = chipName.c_str();
1088 NVPA_RawMetricsConfig* pRawMetricsConfig;
1089 TIMEMORY_RETURN_IF_NVPW_ERROR(
1090 false, NVPA_RawMetricsConfig_Create(&metricsConfigOptions, &pRawMetricsConfig));
1092 NVPW_RawMetricsConfig_Destroy_Params rawMetricsConfigDestroyParams = {
1093 NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE
1095 rawMetricsConfigDestroyParams.pRawMetricsConfig = pRawMetricsConfig;
1097 NVPW_RawMetricsConfig_Destroy(
1098 (NVPW_RawMetricsConfig_Destroy_Params*) &rawMetricsConfigDestroyParams);
1101 NVPW_RawMetricsConfig_BeginPassGroup_Params beginPassGroupParams = {
1102 NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE
1104 beginPassGroupParams.pRawMetricsConfig = pRawMetricsConfig;
1105 TIMEMORY_RETURN_IF_NVPW_ERROR(
1106 false, NVPW_RawMetricsConfig_BeginPassGroup(&beginPassGroupParams));
1108 NVPW_RawMetricsConfig_AddMetrics_Params addMetricsParams = {
1109 NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE
1111 addMetricsParams.pRawMetricsConfig = pRawMetricsConfig;
1112 addMetricsParams.pRawMetricRequests = &rawMetricRequests[0];
1113 addMetricsParams.numMetricRequests = rawMetricRequests.size();
1114 TIMEMORY_RETURN_IF_NVPW_ERROR(
false,
1115 NVPW_RawMetricsConfig_AddMetrics(&addMetricsParams));
1117 NVPW_RawMetricsConfig_EndPassGroup_Params endPassGroupParams = {
1118 NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE
1120 endPassGroupParams.pRawMetricsConfig = pRawMetricsConfig;
1121 TIMEMORY_RETURN_IF_NVPW_ERROR(
1122 false, NVPW_RawMetricsConfig_EndPassGroup(&endPassGroupParams));
1124 NVPW_RawMetricsConfig_GenerateConfigImage_Params generateConfigImageParams = {
1125 NVPW_RawMetricsConfig_GenerateConfigImage_Params_STRUCT_SIZE
1127 generateConfigImageParams.pRawMetricsConfig = pRawMetricsConfig;
1128 TIMEMORY_RETURN_IF_NVPW_ERROR(
1129 false, NVPW_RawMetricsConfig_GenerateConfigImage(&generateConfigImageParams));
1131 NVPW_RawMetricsConfig_GetConfigImage_Params getConfigImageParams = {
1132 NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE
1134 getConfigImageParams.pRawMetricsConfig = pRawMetricsConfig;
1135 getConfigImageParams.bytesAllocated = 0;
1136 getConfigImageParams.pBuffer = NULL;
1137 TIMEMORY_RETURN_IF_NVPW_ERROR(
1138 false, NVPW_RawMetricsConfig_GetConfigImage(&getConfigImageParams));
1140 configImage.resize(getConfigImageParams.bytesCopied);
1142 getConfigImageParams.bytesAllocated = configImage.size();
1143 getConfigImageParams.pBuffer = &configImage[0];
1144 TIMEMORY_RETURN_IF_NVPW_ERROR(
1145 false, NVPW_RawMetricsConfig_GetConfigImage(&getConfigImageParams));
1153cupti_profiler::GetCounterDataPrefixImage(
std::string chipName,
1154 std::vector<std::string> metricNames,
1155 std::vector<uint8_t>& counterDataImagePrefix)
1157 NVPW_CUDA_MetricsContext_Create_Params metricsContextCreateParams = {
1158 NVPW_CUDA_MetricsContext_Create_Params_STRUCT_SIZE
1161 metricsContextCreateParams.pChipName = chipName.c_str();
1163 TIMEMORY_RETURN_IF_NVPW_ERROR(
1164 false, NVPW_CUDA_MetricsContext_Create(&metricsContextCreateParams));
1166 NVPW_MetricsContext_Destroy_Params metricsContextDestroyParams = {
1167 NVPW_MetricsContext_Destroy_Params_STRUCT_SIZE
1170 metricsContextDestroyParams.pMetricsContext =
1171 metricsContextCreateParams.pMetricsContext;
1174 NVPW_MetricsContext_Destroy(
1175 (NVPW_MetricsContext_Destroy_Params*) &metricsContextDestroyParams);
1178 std::vector<NVPA_RawMetricRequest> rawMetricRequests;
1179 std::vector<std::string> temp;
1180 GetRawMetricRequests(metricsContextCreateParams.pMetricsContext, metricNames,
1181 rawMetricRequests, temp);
1183 NVPW_CounterDataBuilder_Create_Params counterDataBuilderCreateParams = {
1184 NVPW_CounterDataBuilder_Create_Params_STRUCT_SIZE
1187 counterDataBuilderCreateParams.pChipName = chipName.c_str();
1189 TIMEMORY_RETURN_IF_NVPW_ERROR(
1190 false, NVPW_CounterDataBuilder_Create(&counterDataBuilderCreateParams));
1192 NVPW_CounterDataBuilder_Destroy_Params counterDataBuilderDestroyParams = {
1193 NVPW_CounterDataBuilder_Destroy_Params_STRUCT_SIZE
1196 counterDataBuilderDestroyParams.pCounterDataBuilder =
1197 counterDataBuilderCreateParams.pCounterDataBuilder;
1200 NVPW_CounterDataBuilder_Destroy(
1201 (NVPW_CounterDataBuilder_Destroy_Params*) &counterDataBuilderDestroyParams);
1204 NVPW_CounterDataBuilder_AddMetrics_Params addMetricsParams = {
1205 NVPW_CounterDataBuilder_AddMetrics_Params_STRUCT_SIZE
1208 addMetricsParams.pCounterDataBuilder =
1209 counterDataBuilderCreateParams.pCounterDataBuilder;
1210 addMetricsParams.pRawMetricRequests = &rawMetricRequests[0];
1211 addMetricsParams.numMetricRequests = rawMetricRequests.size();
1212 TIMEMORY_RETURN_IF_NVPW_ERROR(
false,
1213 NVPW_CounterDataBuilder_AddMetrics(&addMetricsParams));
1216 NVPW_CounterDataBuilder_GetCounterDataPrefix_Params getCounterDataPrefixParams = {
1217 NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE
1220 getCounterDataPrefixParams.pCounterDataBuilder =
1221 counterDataBuilderCreateParams.pCounterDataBuilder;
1223 getCounterDataPrefixParams.bytesAllocated = 0;
1224 getCounterDataPrefixParams.pBuffer = NULL;
1225 TIMEMORY_RETURN_IF_NVPW_ERROR(
1226 false, NVPW_CounterDataBuilder_GetCounterDataPrefix(&getCounterDataPrefixParams));
1228 counterDataImagePrefix.resize(getCounterDataPrefixParams.bytesCopied);
1230 getCounterDataPrefixParams.bytesAllocated = counterDataImagePrefix.size();
1231 getCounterDataPrefixParams.pBuffer = &counterDataImagePrefix[0];
1232 TIMEMORY_RETURN_IF_NVPW_ERROR(
1233 false, NVPW_CounterDataBuilder_GetCounterDataPrefix(&getCounterDataPrefixParams));
1240inline std::set<std::string>
1241cupti_profiler::ListSupportedChips()
1243 std::set<std::string>
_ret;
1245 NVPW_GetSupportedChipNames_Params getSupportedChipNames = {
1246 NVPW_GetSupportedChipNames_Params_STRUCT_SIZE
1248 TIMEMORY_RETURN_IF_NVPW_ERROR(
_ret,
1249 NVPW_GetSupportedChipNames(&getSupportedChipNames));
1253 std::cout <<
"\n Number of supported chips : "
1254 << getSupportedChipNames.numChipNames;
1255 std::cout <<
"\n List of supported chips : \n";
1258 for(
size_t i = 0; i < getSupportedChipNames.numChipNames; i++)
1260 _ret.insert(getSupportedChipNames.ppChipNames[i]);
1262 std::cout <<
" " << getSupportedChipNames.ppChipNames[i] <<
"\n";
1270inline std::set<std::string>
1271cupti_profiler::ListMetrics(
const char* chip,
bool listSubMetrics)
1273 std::set<std::string>
_ret;
1275 NVPW_CUDA_MetricsContext_Create_Params metricsContextCreateParams = {
1276 NVPW_CUDA_MetricsContext_Create_Params_STRUCT_SIZE
1279 metricsContextCreateParams.pChipName = chip;
1281 TIMEMORY_RETURN_IF_NVPW_ERROR(
1282 _ret, NVPW_CUDA_MetricsContext_Create(&metricsContextCreateParams));
1284 NVPW_MetricsContext_Destroy_Params metricsContextDestroyParams = {
1285 NVPW_MetricsContext_Destroy_Params_STRUCT_SIZE
1288 metricsContextDestroyParams.pMetricsContext =
1289 metricsContextCreateParams.pMetricsContext;
1292 NVPW_MetricsContext_Destroy(
1293 (NVPW_MetricsContext_Destroy_Params*) &metricsContextDestroyParams);
1296 NVPW_MetricsContext_GetMetricNames_Begin_Params getMetricNameBeginParams = {
1297 NVPW_MetricsContext_GetMetricNames_Begin_Params_STRUCT_SIZE
1300 getMetricNameBeginParams.pMetricsContext = metricsContextCreateParams.pMetricsContext;
1301 getMetricNameBeginParams.hidePeakSubMetrics = !listSubMetrics;
1302 getMetricNameBeginParams.hidePerCycleSubMetrics = !listSubMetrics;
1303 getMetricNameBeginParams.hidePctOfPeakSubMetrics = !listSubMetrics;
1305 TIMEMORY_RETURN_IF_NVPW_ERROR(
1306 _ret, NVPW_MetricsContext_GetMetricNames_Begin(&getMetricNameBeginParams));
1308 NVPW_MetricsContext_GetMetricNames_End_Params getMetricNameEndParams = {
1309 NVPW_MetricsContext_GetMetricNames_End_Params_STRUCT_SIZE
1312 getMetricNameEndParams.pMetricsContext = metricsContextCreateParams.pMetricsContext;
1315 NVPW_MetricsContext_GetMetricNames_End(
1316 (NVPW_MetricsContext_GetMetricNames_End_Params*) &getMetricNameEndParams);
1320 std::cout << getMetricNameBeginParams.numMetrics
1321 <<
" metrics in total on the chip\n Metrics List : \n";
1323 for(
size_t i = 0; i < getMetricNameBeginParams.numMetrics; i++)
1325 _ret.insert(getMetricNameBeginParams.ppMetricNames[i]);
1327 std::cout << getMetricNameBeginParams.ppMetricNames[i] <<
"\n";
1337 bool* isolated,
bool* keepInstances)
1348 size_t pos = name.find(
'\n');
1349 if(
pos != std::string::npos)
1355 while(name.back() ==
' ')
1364 *keepInstances =
false;
1365 if(name.back() ==
'+')
1367 *keepInstances =
true;
1376 if(name.back() ==
'$')
1384 else if(name.back() ==
'&')
1400cupti_profiler::WriteBinaryFile(
const char* pFileName,
const std::vector<uint8_t>& data)
1402 FILE*
fp = fopen(pFileName,
"wb");
1407 fwrite(&data[0], 1, data.size(),
fp);
1413 std::cout <<
"Failed to open " << pFileName <<
"\n";
1423cupti_profiler::ReadBinaryFile(
const char* pFileName, std::vector<uint8_t>& image)
1425 FILE*
fp = fopen(pFileName,
"rb");
1428 std::cout <<
"Failed to open " << pFileName <<
"\n";
1432 fseek(
fp, 0, SEEK_END);
1433 const long fileLength = ftell(
fp);
1434 fseek(
fp, 0, SEEK_SET);
1437 std::cout << pFileName <<
" has zero length\n";
1442 image.resize((
size_t) fileLength);
1443 auto ret = fread(&image[0], 1, image.size(),
fp);
1445 return (ret != image.size()) ?
false :
true;
#define TIMEMORY_CUDA_DRIVER_API_CALL(...)
void load(Archive &ar, tim::node::graph< Tp > &d)
void serialize(std::string fname, exec_data< Counter > &obj)
const hash_alias_ptr_t hash_value_t std::string *& _ret
void set_prefix(TupleT< Tp... > &obj, Args &&... args)
void record(TupleT< Tp... > &obj, Args &&... args)
void stop(TupleT< Tp... > &obj, Args &&... args)
void start(TupleT< Tp... > &obj, Args &&... args)
std::bitset< scope_count > data_type
std::array< Tp, N > & operator+=(std::array< Tp, N > &, Other &&)
std::array< Tp, N > & operator-=(std::array< Tp, N > &, const std::array< Tp, N > &)
char const std::string & _prefix
void configure(std::initializer_list< EnumT > components, Args &&... args)
tim::mpl::apply< std::string > string
tim::popen::TIMEMORY_PIPE * fp
const std::string std::ostream * os
auto get(const auto_bundle< Tag, Types... > &_obj)
description("A generic option for any setting. Each argument MUST be passed in " "form: 'NAME=VALUE'. E.g. --timemory-args " "\"papi_events=PAPI_TOT_INS,PAPI_TOT_CYC\" text_output=off") .action([&](parser_t &p)
ContainerT delimit(const std::string &line, const std::string &delimiters="\"',;: ", PredicateT &&predicate=[](const std::string &s) -> std::string { return s;})
void consume_parameters(ArgsT &&...)