timemory  3.2.1
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
types.hpp
Go to the documentation of this file.
1 // MIT License
2 //
3 // Copyright (c) 2020, The Regents of the University of California,
4 // through Lawrence Berkeley National Laboratory (subject to receipt of any
5 // required approvals from the U.S. Dept. of Energy). All rights reserved.
6 //
7 // Permission is hereby granted, free of charge, to any person obtaining a copy
8 // of this software and associated documentation files (the "Software"), to deal
9 // in the Software without restriction, including without limitation the rights
10 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 // copies of the Software, and to permit persons to whom the Software is
12 // furnished to do so, subject to the following conditions:
13 //
14 // The above copyright notice and this permission notice shall be included in all
15 // copies or substantial portions of the Software.
16 //
17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 // SOFTWARE.
24 
25 #pragma once
26 
27 #include "timemory/components/cuda/backends.hpp"
29 #include "timemory/enum.h"
31 #include "timemory/mpl/types.hpp"
32 
33 TIMEMORY_DECLARE_TEMPLATE_COMPONENT(cpu_roofline, typename... Types)
34 
35 TIMEMORY_DECLARE_TEMPLATE_COMPONENT(gpu_roofline, typename... Types)
36 
37 namespace tim
38 {
39 namespace component
40 {
41 /// \typedef tim::component::cpu_roofline<float> tim::component::cpu_roofline_sp_flops
42 ///
43 /// \brief A specialization of \ref tim::component::cpu_roofline for 32-bit floating point
44 /// operations
46 
47 /// \typedef tim::component::cpu_roofline<double> tim::component::cpu_roofline_dp_flops
48 ///
49 /// \brief A specialization of \ref tim::component::cpu_roofline for 64-bit floating point
50 /// operations
52 
53 /// \typedef tim::component::cpu_roofline<float, double>
54 /// tim::component::cpu_roofline_flops
55 ///
56 /// \brief A specialization of \ref tim::component::cpu_roofline for 32-bit and 64-bit
57 /// floating point operations
59 
60 /// \typedef tim::component::gpu_roofline<float> tim::component::gpu_roofline_hp_flops
61 ///
62 /// \brief A specialization of \ref tim::component::gpu_roofline for 16-bit floating point
63 /// operations (depending on availability).
65 
66 /// \typedef tim::component::gpu_roofline<float> tim::component::gpu_roofline_sp_flops
67 ///
68 /// \brief A specialization of \ref tim::component::gpu_roofline for 32-bit floating point
69 /// operations
71 
72 /// \typedef tim::component::gpu_roofline<double> tim::component::gpu_roofline_dp_flops
73 ///
74 /// \brief A specialization of \ref tim::component::gpu_roofline for 64-bit floating point
75 /// operations
77 
78 /// \typedef tim::component::gpu_roofline<float, double>
79 /// tim::component::gpu_roofline_flops
80 ///
81 /// \brief A specialization of \ref tim::component::gpu_roofline for 16-bit (possibly),
82 /// 32-bit, and 64-bit floating point operations
83 #if defined(TIMEMORY_USE_CUDA_HALF)
85 #else
87 #endif
88 } // namespace component
89 } // namespace tim
90 
91 //--------------------------------------------------------------------------------------//
92 //
93 // APIs
94 //
95 //--------------------------------------------------------------------------------------//
96 
98  TIMEMORY_ESC(component::cpu_roofline<Types...>),
99  tpls::papi, category::external,
100  category::hardware_counter, category::timing,
101  os::supports_linux)
102 
104  TIMEMORY_ESC(component::gpu_roofline<Types...>),
105  tpls::nvidia, category::external,
106  category::hardware_counter, category::timing,
107  os::agnostic)
108 
109 //--------------------------------------------------------------------------------------//
110 //
111 // STATISTICS
112 //
113 //--------------------------------------------------------------------------------------//
114 
115 TIMEMORY_VARIADIC_STATISTICS_TYPE(component::cpu_roofline, std::vector<double>, typename)
116 TIMEMORY_VARIADIC_STATISTICS_TYPE(component::gpu_roofline, std::vector<double>, typename)
117 
118 //--------------------------------------------------------------------------------------//
119 //
120 // IS AVAILABLE
121 //
122 //--------------------------------------------------------------------------------------//
123 //
124 // PAPI
125 //
126 #if !defined(TIMEMORY_USE_PAPI)
127 TIMEMORY_DEFINE_VARIADIC_TRAIT(is_available, component::cpu_roofline, false_type,
128  typename)
129 TIMEMORY_DEFINE_CONCRETE_TRAIT(is_available, component::cpu_roofline_flops, false_type)
130 TIMEMORY_DEFINE_CONCRETE_TRAIT(is_available, component::cpu_roofline_sp_flops, false_type)
131 TIMEMORY_DEFINE_CONCRETE_TRAIT(is_available, component::cpu_roofline_dp_flops, false_type)
132 #endif
133 
134 //
135 // CUDA and CUPTI
136 //
137 #if !defined(TIMEMORY_USE_CUPTI) || !defined(TIMEMORY_USE_CUDA)
138 TIMEMORY_DEFINE_VARIADIC_TRAIT(is_available, component::gpu_roofline, false_type,
139  typename)
140 TIMEMORY_DEFINE_CONCRETE_TRAIT(is_available, component::gpu_roofline_flops, false_type)
141 TIMEMORY_DEFINE_CONCRETE_TRAIT(is_available, component::gpu_roofline_hp_flops, false_type)
142 TIMEMORY_DEFINE_CONCRETE_TRAIT(is_available, component::gpu_roofline_sp_flops, false_type)
143 TIMEMORY_DEFINE_CONCRETE_TRAIT(is_available, component::gpu_roofline_dp_flops, false_type)
144 #elif !defined(TIMEMORY_USE_CUDA_HALF)
146 #endif
147 
148 //--------------------------------------------------------------------------------------//
149 //
150 // CUSTOM UNIT PRINTING
151 //
152 //--------------------------------------------------------------------------------------//
153 
154 TIMEMORY_DEFINE_VARIADIC_TRAIT(custom_unit_printing, component::cpu_roofline, true_type,
155  typename)
156 TIMEMORY_DEFINE_VARIADIC_TRAIT(custom_unit_printing, component::gpu_roofline, true_type,
157  typename)
158 
159 //--------------------------------------------------------------------------------------//
160 //
161 // CUSTOM LABEL PRINTING
162 //
163 //--------------------------------------------------------------------------------------//
164 
165 TIMEMORY_DEFINE_VARIADIC_TRAIT(custom_label_printing, component::cpu_roofline, true_type,
166  typename)
167 TIMEMORY_DEFINE_VARIADIC_TRAIT(custom_label_printing, component::gpu_roofline, true_type,
168  typename)
169 
170 //--------------------------------------------------------------------------------------//
171 //
172 // ARRAY SERIALIZATION
173 //
174 //--------------------------------------------------------------------------------------//
175 
176 TIMEMORY_DEFINE_VARIADIC_TRAIT(array_serialization, component::cpu_roofline, true_type,
177  typename)
178 
179 //--------------------------------------------------------------------------------------//
180 //
181 // REQUIRES JSON
182 //
183 //--------------------------------------------------------------------------------------//
184 
185 TIMEMORY_DEFINE_VARIADIC_TRAIT(requires_json, component::cpu_roofline, true_type,
186  typename)
187 
188 TIMEMORY_DEFINE_VARIADIC_TRAIT(requires_json, component::gpu_roofline, true_type,
189  typename)
190 
191 //--------------------------------------------------------------------------------------//
192 //
193 // SUPPORTS CUSTOM RECORD
194 //
195 //--------------------------------------------------------------------------------------//
196 
197 TIMEMORY_DEFINE_VARIADIC_TRAIT(supports_custom_record, component::cpu_roofline, true_type,
198  typename)
199 
200 TIMEMORY_DEFINE_VARIADIC_TRAIT(supports_custom_record, component::gpu_roofline, true_type,
201  typename)
202 
203 //--------------------------------------------------------------------------------------//
204 //
205 // ITERABLE MEASUREMENT
206 //
207 //--------------------------------------------------------------------------------------//
208 
209 TIMEMORY_DEFINE_VARIADIC_TRAIT(iterable_measurement, component::cpu_roofline, true_type,
210  typename)
211 
212 TIMEMORY_DEFINE_VARIADIC_TRAIT(iterable_measurement, component::gpu_roofline, true_type,
213  typename)
214 
215 //--------------------------------------------------------------------------------------//
216 //
217 // CUSTOM SERIALIZATION
218 //
219 //--------------------------------------------------------------------------------------//
220 
221 TIMEMORY_DEFINE_VARIADIC_TRAIT(custom_serialization, component::cpu_roofline, true_type,
222  typename)
223 
224 TIMEMORY_DEFINE_VARIADIC_TRAIT(custom_serialization, component::gpu_roofline, true_type,
225  typename)
226 
227 //--------------------------------------------------------------------------------------//
228 //
229 // SECONDARY DATA
230 //
231 //--------------------------------------------------------------------------------------//
232 
233 TIMEMORY_DEFINE_VARIADIC_TRAIT(secondary_data, component::gpu_roofline, true_type,
234  typename)
235 
236 //--------------------------------------------------------------------------------------//
237 //
238 // FINALIZATION PRIORITY
239 //
240 //--------------------------------------------------------------------------------------//
241 
242 // finalize early
243 TIMEMORY_DEFINE_VARIADIC_TRAIT(fini_priority, component::cpu_roofline,
245 
246 TIMEMORY_DEFINE_VARIADIC_TRAIT(fini_priority, component::gpu_roofline,
248 
249 //--------------------------------------------------------------------------------------//
250 //
251 // UNITS SPECIALIZATIONS
252 //
253 //--------------------------------------------------------------------------------------//
254 
255 namespace tim
256 {
257 namespace trait
258 {
259 //
260 template <typename... Types>
261 struct units<component::cpu_roofline<Types...>>
262 {
263  using type = double;
264  using display_type = std::vector<std::string>;
265 };
266 //
267 } // namespace trait
268 } // namespace tim
269 
270 //
271 //======================================================================================//
272 //
274  "cpu_roofline_dp_flops", "cpu_roofline_dp",
275  "cpu_roofline_double")
276 
278  "cpu_roofline_flops", "cpu_roofline")
279 
281  "cpu_roofline_sp_flops", "cpu_roofline_sp",
282  "cpu_roofline_single")
283 
285  "gpu_roofline_dp_flops", "gpu_roofline_dp",
286  "gpu_roofline_double")
287 
289  "gpu_roofline_flops", "gpu_roofline")
290 
292  "gpu_roofline_hp_flops", "gpu_roofline_hp",
293  "gpu_roofline_half")
294 
296  "gpu_roofline_sp_flops", "gpu_roofline_sp",
297  "gpu_roofline_single")
298 //
299 //======================================================================================//
300 //
#define TIMEMORY_PROPERTY_SPECIALIZATION(TYPE, ENUM, ID,...)
Specialization of the property specialization.
Definition: macros.hpp:223
TIMEMORY_DECLARE_TEMPLATE_COMPONENT(user_bundle, size_t Idx, typename Tag=TIMEMORY_API) TIMEMORY_BUNDLE_INDEX(ompt_bundle_idx
int EventTypes int EventTypes papi_array< 16 > papi_array< 8 > TIMEMORY_ESC(component::papi_array< MaxNumEvents >)
int EventTypes int EventTypes papi_array< 16 > papi_array< 8 > os::supports_linux TIMEMORY_SET_TEMPLATE_COMPONENT_API(TIMEMORY_ESC(int... Evts), TIMEMORY_ESC(component::papi_tuple< Evts... >), tpls::papi, category::external, category::hardware_counter, os::supports_linux) TIMEMORY_SET_TEMPLATE_COMPONENT_API(TIMEMORY_ESC(typename RateT
TIMEMORY_DEFINE_CONCRETE_TRAIT(is_available, quirk::explicit_start, false_type) TIMEMORY_DEFINE_CONCRETE_TRAIT(is_available
#define TIMEMORY_CPU_ROOFLINE_DP_FLOPS
Definition: enum.h:402
#define TIMEMORY_GPU_ROOFLINE_SP_FLOPS
Definition: enum.h:471
#define TIMEMORY_GPU_ROOFLINE_DP_FLOPS
Definition: enum.h:462
#define TIMEMORY_CPU_ROOFLINE_FLOPS
Definition: enum.h:405
#define TIMEMORY_CPU_ROOFLINE_SP_FLOPS
Definition: enum.h:408
#define TIMEMORY_GPU_ROOFLINE_HP_FLOPS
Definition: enum.h:468
#define TIMEMORY_GPU_ROOFLINE_FLOPS
Definition: enum.h:465
#define TIMEMORY_DEFINE_VARIADIC_TRAIT(TRAIT, COMPONENT, VALUE, TYPE)
Definition: macros.hpp:117
#define TIMEMORY_VARIADIC_STATISTICS_TYPE(COMPONENT, TYPE, TEMPLATE_TYPE)
Definition: macros.hpp:200
gpu_roofline< float, double > gpu_roofline_flops
Definition: types.hpp:86
gpu_roofline< cuda::fp16_t > gpu_roofline_hp_flops
A specialization of tim::component::gpu_roofline for 16-bit floating point operations (depending on a...
Definition: types.hpp:64
gpu_roofline< double > gpu_roofline_dp_flops
A specialization of tim::component::gpu_roofline for 64-bit floating point operations.
Definition: types.hpp:76
gpu_roofline< float > gpu_roofline_sp_flops
A specialization of tim::component::gpu_roofline for 32-bit floating point operations.
Definition: types.hpp:70
cpu_roofline< double > cpu_roofline_dp_flops
A specialization of tim::component::cpu_roofline for 64-bit floating point operations.
Definition: types.hpp:51
cpu_roofline< float > cpu_roofline_sp_flops
A specialization of tim::component::cpu_roofline for 32-bit floating point operations.
Definition: types.hpp:45
cpu_roofline< float, double > cpu_roofline_flops
Definition: types.hpp:58
Definition: kokkosp.cpp:38
std::integral_constant< int, N > priority_constant
Definition: types.hpp:206
Combines hardware counters and timers and executes the empirical roofline toolkit during application ...
Combines hardware counters and timers and executes the empirical roofline toolkit during application ...
typename typename typename
Definition: types.hpp:226