27#include "timemory/components/cuda/backends.hpp"
83#if defined(TIMEMORY_USE_CUDA_HALF)
99 tpls::papi, category::external,
100 category::hardware_counter, category::timing,
105 tpls::nvidia, category::external,
106 category::hardware_counter, category::timing,
126#if !defined(TIMEMORY_USE_PAPI)
137#if !defined(TIMEMORY_USE_CUPTI) || !defined(TIMEMORY_USE_CUDA)
144#elif !defined(TIMEMORY_USE_CUDA_HALF)
260template <
typename... Types>
261struct units<component::cpu_roofline<Types...>>
264 using display_type = std::vector<std::string>;
274 "cpu_roofline_dp_flops",
"cpu_roofline_dp",
275 "cpu_roofline_double")
282 "cpu_roofline_single")
286 "gpu_roofline_double")
297 "gpu_roofline_single")
#define TIMEMORY_PROPERTY_SPECIALIZATION(TYPE, ENUM, ID,...)
Specialization of the property specialization.
TIMEMORY_DECLARE_TEMPLATE_COMPONENT(user_bundle, size_t Idx, typename Tag=TIMEMORY_API) TIMEMORY_BUNDLE_INDEX(ompt_bundle_idx
int EventTypes int EventTypes papi_array< 16 > papi_array< 8 > TIMEMORY_ESC(component::papi_array< MaxNumEvents >)
int EventTypes int EventTypes papi_array< 16 > papi_array< 8 > os::supports_linux TIMEMORY_SET_TEMPLATE_COMPONENT_API(TIMEMORY_ESC(int... Evts), TIMEMORY_ESC(component::papi_tuple< Evts... >), tpls::papi, category::external, category::hardware_counter, os::supports_linux) TIMEMORY_SET_TEMPLATE_COMPONENT_API(TIMEMORY_ESC(typename RateT
false_type TIMEMORY_DEFINE_CONCRETE_TRAIT(is_available, component::perfetto_trace, false_type) TIMEMORY_PROPERTY_SPECIALIZATION(perfetto_trace
#define TIMEMORY_CPU_ROOFLINE_DP_FLOPS
#define TIMEMORY_GPU_ROOFLINE_SP_FLOPS
#define TIMEMORY_GPU_ROOFLINE_DP_FLOPS
#define TIMEMORY_CPU_ROOFLINE_FLOPS
#define TIMEMORY_CPU_ROOFLINE_SP_FLOPS
#define TIMEMORY_GPU_ROOFLINE_HP_FLOPS
#define TIMEMORY_GPU_ROOFLINE_FLOPS
#define TIMEMORY_DEFINE_VARIADIC_TRAIT(TRAIT, COMPONENT, VALUE, TYPE)
#define TIMEMORY_VARIADIC_STATISTICS_TYPE(COMPONENT, TYPE, TEMPLATE_TYPE)
gpu_roofline< float, double > gpu_roofline_flops
gpu_roofline< cuda::fp16_t > gpu_roofline_hp_flops
A specialization of tim::component::gpu_roofline for 16-bit floating point operations (depending on a...
gpu_roofline< double > gpu_roofline_dp_flops
A specialization of tim::component::gpu_roofline for 64-bit floating point operations.
gpu_roofline< float > gpu_roofline_sp_flops
A specialization of tim::component::gpu_roofline for 32-bit floating point operations.
cpu_roofline< double > cpu_roofline_dp_flops
A specialization of tim::component::cpu_roofline for 64-bit floating point operations.
cpu_roofline< float > cpu_roofline_sp_flops
A specialization of tim::component::cpu_roofline for 32-bit floating point operations.
cpu_roofline< float, double > cpu_roofline_flops
std::integral_constant< int, N > priority_constant
Combines hardware counters and timers and executes the empirical roofline toolkit during application ...
Combines hardware counters and timers and executes the empirical roofline toolkit during application ...
typename typename typename