timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
kokkosp.hpp
Go to the documentation of this file.
1// MIT License
2//
3// Copyright (c) 2020, The Regents of the University of California,
4// through Lawrence Berkeley National Laboratory (subject to receipt of any
5// required approvals from the U.S. Dept. of Energy). All rights reserved.
6//
7// Permission is hereby granted, free of charge, to any person obtaining a copy
8// of this software and associated documentation files (the "Software"), to deal
9// in the Software without restriction, including without limitation the rights
10// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11// copies of the Software, and to permit persons to whom the Software is
12// furnished to do so, subject to the following conditions:
13//
14// The above copyright notice and this permission notice shall be included in all
15// copies or substantial portions of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23// SOFTWARE.
24
25#pragma once
26
27#include "timemory/api.hpp"
32
33#include <cstdint>
34#include <functional>
35#include <map>
36#include <mutex>
37#include <string>
38#include <tuple>
39#include <unordered_map>
40#include <vector>
41
42#if !defined(TIMEMORY_KOKKOSP_PREFIX)
43# if defined(TIMEMORY_LIBRARY_SOURCE)
44# define TIMEMORY_KOKKOSP_PREFIX TIMEMORY_WEAK_PREFIX
45# else
46# define TIMEMORY_KOKKOSP_PREFIX
47# endif
48#endif
49
50#if !defined(TIMEMORY_KOKKOSP_POSTFIX)
51# if defined(TIMEMORY_LIBRARY_SOURCE)
52# define TIMEMORY_KOKKOSP_POSTFIX \
53 TIMEMORY_WEAK_POSTFIX TIMEMORY_VISIBILITY("default")
54# else
55# define TIMEMORY_KOKKOSP_POSTFIX TIMEMORY_VISIBILITY("default")
56# endif
57#endif
58
59struct SpaceHandle
60{
61 char name[64];
62};
63
64struct KokkosPDeviceInfo
65{
66 uint32_t deviceID;
67};
68
69//--------------------------------------------------------------------------------------//
70
71namespace tim
72{
73namespace kokkosp
74{
75//--------------------------------------------------------------------------------------//
76
78{
81};
82
83//--------------------------------------------------------------------------------------//
84
85enum
86{
87 NSPACES = 2
88};
89
90//--------------------------------------------------------------------------------------//
91
92inline Space
93get_space(const SpaceHandle& handle)
94{
95 switch(handle.name[0])
96 {
97 case 'H': return SPACE_HOST;
98 case 'C': return SPACE_CUDA;
99 }
100 abort();
101 return SPACE_HOST;
102}
103
104//--------------------------------------------------------------------------------------//
105
106inline const char*
108{
109 switch(space)
110 {
111 case SPACE_HOST: return "HOST";
112 case SPACE_CUDA: return "CUDA";
113 }
114 abort();
115 return nullptr;
116}
117
118//--------------------------------------------------------------------------------------//
119
120inline uint64_t
122{
123 static thread_local uint64_t _instance = 0;
124 return _instance++;
125}
126
127//--------------------------------------------------------------------------------------//
128
129inline std::mutex&
131{
132 static std::mutex _instance;
133 return _instance;
134}
135
136//--------------------------------------------------------------------------------------//
137
138inline auto&
140{
141 static std::vector<std::function<void()>> _instance{};
142 return _instance;
143}
144
145//--------------------------------------------------------------------------------------//
146
147template <typename Tp>
148inline Tp&
150{
151 // create a thread-local instance
152 static thread_local Tp _instance{};
153 // on first pass, add to cleanup
154 static thread_local bool _init = [&]() {
155 get_cleanup_mutex().lock();
156 get_cleanup().push_back([&]() { _instance.clear(); });
157 get_cleanup_mutex().unlock();
158 return true;
159 }();
160 consume_parameters(_init);
161
162 return _instance;
163}
164
165//--------------------------------------------------------------------------------------//
166
167template <typename Tp>
168inline Tp&
170{
171 // create a thread-local instance
172 static Tp _instance{};
173 // on first pass, add to cleanup
174 static bool _init = [&]() {
175 get_cleanup_mutex().lock();
176 get_cleanup().push_back([&]() { _instance.clear(); });
177 get_cleanup_mutex().unlock();
178 }();
179 consume_parameters(_init);
180
181 return _instance;
182}
183
184//--------------------------------------------------------------------------------------//
185
186inline void
188{
189 get_cleanup_mutex().lock();
190 for(auto& itr : get_cleanup())
191 itr();
192 get_cleanup_mutex().unlock();
193}
194
195//--------------------------------------------------------------------------------------//
196
197struct kernel_logger : component::base<kernel_logger, void>
198{
199public:
200 TIMEMORY_DEFAULT_OBJECT(kernel_logger)
201
202 template <typename... Args>
203 void mark(int64_t _inc_depth, Args&&... _args)
204 {
205 if(_inc_depth < 0)
206 get_depth() += _inc_depth;
207 {
208 auto _msg = TIMEMORY_JOIN('/', std::forward<Args>(_args)...);
209 auto_lock_t _lk{ type_mutex<decltype(std::cerr)>() };
210 std::cerr << get_indent() << get_message(_msg) << std::endl;
211 }
212 if(_inc_depth > 0)
213 get_depth() += _inc_depth;
214 }
215
216public:
218 {
219 return std::string{ "[kokkos_kernel_logger]> " } + std::string{ _msg };
220 }
221
222 static int64_t& get_depth()
223 {
224 static int64_t _value = 0;
225 return _value;
226 }
227
229 {
230 auto _depth = get_depth();
231 if(_depth < 1)
232 return "";
233 std::stringstream ss;
234 ss << std::right << std::setw(_depth * 2) << "";
235 return ss.str();
236 }
237};
238
239//--------------------------------------------------------------------------------------//
240
242using kokkos_bundle = tim::component::user_kokkosp_bundle;
243
245
246template <typename... Tail>
248 tim::component_bundle_t<project::kokkosp, kokkosp::memory_tracker, Tail...>;
249
250template <typename... Tail>
251using profiler_section_t = std::tuple<std::string, profiler_t<Tail...>>;
252
253template <typename... Tail>
255
256// various data structurs used
257template <typename... Tail>
258using profiler_stack_t = std::vector<profiler_t<Tail...>>;
259
260template <typename... Tail>
262 std::unordered_map<string_view_t,
263 std::unordered_map<string_view_t, profiler_t<Tail...>>>;
264
265template <typename... Tail>
266using profiler_index_map_t = std::unordered_map<uint64_t, profiler_t<Tail...>>;
267
268template <typename... Tail>
269using profiler_section_map_t = std::unordered_map<uint64_t, profiler_section_t<Tail...>>;
270
271//--------------------------------------------------------------------------------------//
272
273template <typename... Tail>
274inline profiler_index_map_t<Tail...>&
276{
277 return get_tl_static<profiler_index_map_t<Tail...>>();
278}
279
280//--------------------------------------------------------------------------------------//
281
282template <typename... Tail>
283inline profiler_section_map_t<Tail...>&
285{
286 return get_tl_static<profiler_section_map_t<Tail...>>();
287}
288
289//--------------------------------------------------------------------------------------//
290
291template <typename... Tail>
292inline profiler_memory_map_t<Tail...>&
294{
295 return get_tl_static<profiler_memory_map_t<Tail...>>();
296}
297
298//--------------------------------------------------------------------------------------//
299
300template <typename... Tail>
301inline auto&
302get_profiler_memory_map(SpaceHandle _space)
303{
304 return get_profiler_memory_map<Tail...>()[tim::string_view_t{ _space.name }];
305}
306
307//--------------------------------------------------------------------------------------//
308
309template <typename... Tail>
310inline profiler_stack_t<Tail...>&
312{
313 return get_tl_static<profiler_stack_t<Tail...>>();
314}
315
316//--------------------------------------------------------------------------------------//
317
318template <typename... Tail>
319inline void
320create_profiler(const std::string& pname, uint64_t kernid)
321{
322 get_profiler_index_map<Tail...>().insert({ kernid, profiler_t<Tail...>(pname) });
323}
324
325//--------------------------------------------------------------------------------------//
326
327template <typename... Tail>
328inline void
329destroy_profiler(uint64_t kernid)
330{
331 if(get_profiler_index_map<Tail...>().find(kernid) !=
332 get_profiler_index_map<Tail...>().end())
333 get_profiler_index_map<Tail...>().erase(kernid);
334}
335
336//--------------------------------------------------------------------------------------//
337
338template <typename... Tail>
339inline void
340start_profiler(uint64_t kernid)
341{
342 if(get_profiler_index_map<Tail...>().find(kernid) !=
343 get_profiler_index_map<Tail...>().end())
344 get_profiler_index_map<Tail...>().at(kernid).start();
345}
346
347//--------------------------------------------------------------------------------------//
348
349template <typename... Tail>
350inline void
351stop_profiler(uint64_t kernid)
352{
353 if(get_profiler_index_map<Tail...>().find(kernid) !=
354 get_profiler_index_map<Tail...>().end())
355 get_profiler_index_map<Tail...>().at(kernid).stop();
356}
357
358//--------------------------------------------------------------------------------------//
359
360} // namespace kokkosp
361} // namespace tim
362
363//--------------------------------------------------------------------------------------//
364
367 std::true_type)
368
369//--------------------------------------------------------------------------------------//
370
371extern "C"
372{
374
377
379 const char* key, const char* value) TIMEMORY_KOKKOSP_POSTFIX;
380
382 const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount,
383 void* deviceInfo) TIMEMORY_KOKKOSP_POSTFIX;
384
386
388 const char* name, uint32_t devid, uint64_t* kernid) TIMEMORY_KOKKOSP_POSTFIX;
389
392
394 const char* name, uint32_t devid, uint64_t* kernid) TIMEMORY_KOKKOSP_POSTFIX;
395
398
400 const char* name, uint32_t devid, uint64_t* kernid) TIMEMORY_KOKKOSP_POSTFIX;
401
404
406 const char* name, uint32_t devid, uint64_t* kernid) TIMEMORY_KOKKOSP_POSTFIX;
407
408 TIMEMORY_KOKKOSP_PREFIX void kokkosp_end_fence(uint64_t kernid)
410
413
415
417 const char* name, uint32_t* secid) TIMEMORY_KOKKOSP_POSTFIX;
418
421
424
427
429 const SpaceHandle space, const char* label, const void* const ptr,
430 const uint64_t size) TIMEMORY_KOKKOSP_POSTFIX;
431
433 const SpaceHandle space, const char* label, const void* const ptr,
434 const uint64_t size) TIMEMORY_KOKKOSP_POSTFIX;
435
437 SpaceHandle dst_handle, const char* dst_name, const void* dst_ptr,
438 SpaceHandle src_handle, const char* src_name, const void* src_ptr,
439 uint64_t size) TIMEMORY_KOKKOSP_POSTFIX;
440
442
445}
This is a variadic component wrapper where all components are allocated on the stack and cannot be di...
Definition: auto_tuple.hpp:65
Implementation of the data_tracker component(s)
Forward declaration of user_bundle components. User-bundles are similar to the classical profiling in...
void kokkosp_end_deep_copy()
Definition: kokkosp.cpp:351
void kokkosp_end_fence(uint64_t kernid)
Definition: kokkosp.cpp:253
void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t devInfoCount, void *deviceInfo)
Definition: kokkosp.cpp:128
#define TIMEMORY_KOKKOSP_POSTFIX
Definition: kokkosp.hpp:55
void kokkosp_push_profile_region(const char *name)
Definition: kokkosp.cpp:262
void kokkosp_start_profile_section(uint32_t secid)
Definition: kokkosp.cpp:295
TIMEMORY_DEFINE_CONCRETE_TRAIT(uses_memory_units, kokkosp::memory_tracker, std::true_type) TIMEMORY_DEFINE_CONCRETE_TRAIT(is_memory_category
void kokkosp_end_parallel_scan(uint64_t kernid)
Definition: kokkosp.cpp:232
std::true_type void kokkosp_print_help(char *argv0)
Definition: kokkosp.cpp:100
void kokkosp_parse_args(int argc, char **argv)
Definition: kokkosp.cpp:107
void kokkosp_begin_parallel_reduce(const char *name, uint32_t devid, uint64_t *kernid)
Definition: kokkosp.cpp:199
void kokkosp_end_parallel_reduce(uint64_t kernid)
Definition: kokkosp.cpp:211
void kokkosp_begin_parallel_scan(const char *name, uint32_t devid, uint64_t *kernid)
Definition: kokkosp.cpp:220
void kokkosp_stop_profile_section(uint32_t secid)
Definition: kokkosp.cpp:301
void kokkosp_begin_deep_copy(SpaceHandle dst_handle, const char *dst_name, const void *dst_ptr, SpaceHandle src_handle, const char *src_name, const void *src_ptr, uint64_t size)
Definition: kokkosp.cpp:331
void kokkosp_profile_event(const char *name)
Definition: kokkosp.cpp:364
void kokkosp_pop_profile_region()
Definition: kokkosp.cpp:270
void kokkosp_finalize_library()
Definition: kokkosp.cpp:165
void kokkosp_begin_fence(const char *name, uint32_t devid, uint64_t *kernid)
Definition: kokkosp.cpp:241
void kokkosp_begin_parallel_for(const char *name, uint32_t devid, uint64_t *kernid)
Definition: kokkosp.cpp:178
void kokkosp_declare_metadata(const char *key, const char *value)
Definition: kokkosp.cpp:123
void kokkosp_create_profile_section(const char *name, uint32_t *secid)
Definition: kokkosp.cpp:281
void kokkosp_deallocate_data(const SpaceHandle space, const char *label, const void *const ptr, const uint64_t size)
Definition: kokkosp.cpp:319
void kokkosp_allocate_data(const SpaceHandle space, const char *label, const void *const ptr, const uint64_t size)
Definition: kokkosp.cpp:309
void kokkosp_destroy_profile_section(uint32_t secid)
Definition: kokkosp.cpp:288
#define TIMEMORY_KOKKOSP_PREFIX
Definition: kokkosp.hpp:46
void kokkosp_end_parallel_for(uint64_t kernid)
Definition: kokkosp.cpp:190
return _hash_map end()
_reported insert(_hash_id)
auto & get_cleanup()
Definition: kokkosp.hpp:139
Tp & get_tl_static()
Definition: kokkosp.hpp:149
std::mutex & get_cleanup_mutex()
Definition: kokkosp.hpp:130
tim::component::data_tracker< int64_t, tim::project::kokkosp > memory_tracker
Definition: kokkosp.hpp:241
tim::component::user_kokkosp_bundle kokkos_bundle
Definition: kokkosp.hpp:242
profiler_index_map_t< Tail... > & get_profiler_index_map()
Definition: kokkosp.hpp:275
profiler_stack_t< Tail... > & get_profiler_stack()
Definition: kokkosp.hpp:311
Space get_space(const SpaceHandle &handle)
Definition: kokkosp.hpp:93
void cleanup()
Definition: kokkosp.hpp:187
void create_profiler(const std::string &pname, uint64_t kernid)
Definition: kokkosp.hpp:320
void stop_profiler(uint64_t kernid)
Definition: kokkosp.hpp:351
std::unordered_map< uint64_t, profiler_t< Tail... > > profiler_index_map_t
Definition: kokkosp.hpp:266
std::unordered_map< string_view_t, std::unordered_map< string_view_t, profiler_t< Tail... > > > profiler_memory_map_t
Definition: kokkosp.hpp:263
tim::component_bundle_t< project::kokkosp, kokkosp::kernel_logger * > logger_t
Definition: kokkosp.hpp:244
const char * get_space_name(int space)
Definition: kokkosp.hpp:107
profiler_section_map_t< Tail... > & get_profiler_section_map()
Definition: kokkosp.hpp:284
tim::component_bundle_t< project::kokkosp, kokkosp::memory_tracker, Tail... > profiler_t
Definition: kokkosp.hpp:248
std::vector< profiler_t< Tail... > > profiler_stack_t
Definition: kokkosp.hpp:258
profiler_memory_map_t< Tail... > & get_profiler_memory_map()
Definition: kokkosp.hpp:293
void destroy_profiler(uint64_t kernid)
Definition: kokkosp.hpp:329
uint64_t get_unique_id()
Definition: kokkosp.hpp:121
std::tuple< std::string, profiler_t< Tail... > > profiler_section_t
Definition: kokkosp.hpp:251
void start_profiler(uint64_t kernid)
Definition: kokkosp.hpp:340
Tp & get_static()
Definition: kokkosp.hpp:169
std::unordered_map< uint64_t, profiler_section_t< Tail... > > profiler_section_map_t
Definition: kokkosp.hpp:269
Definition: kokkosp.cpp:39
std::array< char *, 4 > _args
std::string string_view_t
Definition: language.hpp:102
std::unique_lock< mutex_t > auto_lock_t
Unique lock type around mutex_t.
Definition: locking.hpp:42
convert_t< mpl::available_t< type_list< T... > >, component_bundle< Tag > > component_bundle_t
Definition: available.hpp:337
const std::string & string_view_cref_t
Definition: language.hpp:103
char ** argv
Definition: config.cpp:55
tim::mpl::apply< std::string > string
Definition: macros.hpp:53
_args at(0)
void consume_parameters(ArgsT &&...)
Definition: types.hpp:285
This component is provided to facilitate data tracking. The first template parameter is the type of d...
Definition: components.hpp:148
static std::string get_indent()
Definition: kokkosp.hpp:228
void mark(int64_t _inc_depth, Args &&... _args)
Definition: kokkosp.hpp:203
static int64_t & get_depth()
Definition: kokkosp.hpp:222
static std::string get_message(string_view_cref_t _msg)
Definition: kokkosp.hpp:217
#define TIMEMORY_JOIN(delim,...)
Definition: macros.hpp:90