timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
tool.hpp
Go to the documentation of this file.
1// MIT License
2//
3// Copyright (c) 2020, The Regents of the University of California,
4// through Lawrence Berkeley National Laboratory (subject to receipt of any
5// required approvals from the U.S. Dept. of Energy). All rights reserved.
6//
7// Permission is hereby granted, free of charge, to any person obtaining a copy
8// of this software and associated documentation files (the "Software"), to deal
9// in the Software without restriction, including without limitation the rights
10// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11// copies of the Software, and to permit persons to whom the Software is
12// furnished to do so, subject to the following conditions:
13//
14// The above copyright notice and this permission notice shall be included in all
15// copies or substantial portions of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23// SOFTWARE.
24
25#pragma once
26
32//
33#include "timemory/components/ompt/backends.hpp"
35//
36#include <deque>
37
38namespace tim
39{
40//
41//--------------------------------------------------------------------------------------//
42//
43namespace openmp
44{
45//
46//--------------------------------------------------------------------------------------//
47//
48static const char* ompt_thread_type_labels[] = { nullptr, "ompt_thread_initial",
49 "ompt_thread_worker",
50 "ompt_thread_other" };
51//
52//--------------------------------------------------------------------------------------//
53//
54static const char* ompt_dispatch_type_labels[] = { nullptr, "ompt_dispatch_iteration",
55 "ompt_dispatch_section" };
56//
57//--------------------------------------------------------------------------------------//
58//
59static const char* ompt_sync_region_type_labels[] = {
60 nullptr,
61 "ompt_sync_region_barrier",
62 "ompt_sync_region_barrier_implicit",
63 "ompt_sync_region_barrier_explicit",
64 "ompt_sync_region_barrier_implementation",
65 "ompt_sync_region_taskwait",
66 "ompt_sync_region_taskgroup",
67 "ompt_sync_region_reduction"
68};
69//
70//--------------------------------------------------------------------------------------//
71//
72static const char* ompt_target_type_labels[] = { nullptr, "ompt_target",
73 "ompt_target_enter_data",
74 "ompt_target_exit_data",
75 "ompt_target_update" };
76//
77//--------------------------------------------------------------------------------------//
78//
79static const char* ompt_work_labels[] = { nullptr,
80 "ompt_work_loop",
81 "ompt_work_sections",
82 "ompt_work_single_executor",
83 "ompt_work_single_other",
84 "ompt_work_workshare",
85 "ompt_work_distribute",
86 "ompt_work_taskloop" };
87//
88//--------------------------------------------------------------------------------------//
89//
90static const char* ompt_target_data_op_labels[] = { nullptr, "ompt_target_data_alloc",
91 "ompt_target_data_transfer_to_dev",
92 "ompt_target_data_transfer_from_dev",
93 "ompt_target_data_delete" };
94//
95//--------------------------------------------------------------------------------------//
96//
97static const char* ompt_task_status_labels[] = { nullptr,
98 "ompt_task_complete",
99 "ompt_task_yield",
100 "ompt_task_cancel",
101 "ompt_task_detach",
102 "ompt_task_early_fulfill",
103 "ompt_task_late_fulfill",
104 "ompt_task_switch" };
105//
106//--------------------------------------------------------------------------------------//
107//
108static std::map<ompt_mutex_t, const char*> ompt_mutex_type_labels = {
109 { ompt_mutex_lock, "ompt_mutex_lock" },
110 { ompt_mutex_test_lock, "ompt_mutex_test_lock" },
111 { ompt_mutex_nest_lock, "ompt_mutex_nest_lock" },
112 { ompt_mutex_test_nest_lock, "ompt_mutex_test_nest_lock" },
113 { ompt_mutex_critical, "ompt_mutex_critical" },
114 { ompt_mutex_atomic, "ompt_mutex_atomic" },
115 { ompt_mutex_ordered, "ompt_mutex_ordered" }
116};
117//
118//--------------------------------------------------------------------------------------//
119//
120static std::map<ompt_task_flag_t, const char*> ompt_task_type_labels = {
121 { ompt_task_initial, "ompt_task_initial" },
122 { ompt_task_implicit, "ompt_task_implicit" },
123 { ompt_task_explicit, "ompt_task_explicit" },
124 { ompt_task_target, "ompt_task_target" },
125 { ompt_task_undeferred, "ompt_task_undeferred" },
126 { ompt_task_untied, "ompt_task_untied" },
127 { ompt_task_final, "ompt_task_final" },
128 { ompt_task_mergeable, "ompt_task_mergeable" },
129 { ompt_task_merged, "ompt_task_merged" }
130};
131//
132//--------------------------------------------------------------------------------------//
133//
134static std::map<ompt_target_map_flag_t, const char*> ompt_target_map_labels = {
135 { ompt_target_map_flag_to, "ompt_target_map_flag_to" },
136 { ompt_target_map_flag_from, "ompt_target_map_flag_from" },
137 { ompt_target_map_flag_alloc, "ompt_target_map_flag_alloc" },
138 { ompt_target_map_flag_release, "ompt_target_map_flag_release" },
139 { ompt_target_map_flag_delete, "ompt_target_map_flag_delete" },
140 { ompt_target_map_flag_implicit, "ompt_target_map_flag_implicit" }
141};
142//
143//--------------------------------------------------------------------------------------//
144//
145#define TIMEMORY_OMPT_ENUM_LABEL(TYPE) \
146 { \
147 TYPE, #TYPE \
148 }
149//
150//--------------------------------------------------------------------------------------//
151//
152static std::map<ompt_dependence_type_t, const char*> ompt_dependence_type_labels = {
153 TIMEMORY_OMPT_ENUM_LABEL(ompt_dependence_type_in),
154 TIMEMORY_OMPT_ENUM_LABEL(ompt_dependence_type_out),
155 TIMEMORY_OMPT_ENUM_LABEL(ompt_dependence_type_inout),
156 TIMEMORY_OMPT_ENUM_LABEL(ompt_dependence_type_mutexinoutset),
157 TIMEMORY_OMPT_ENUM_LABEL(ompt_dependence_type_source),
158 TIMEMORY_OMPT_ENUM_LABEL(ompt_dependence_type_sink)
159};
160//
161//--------------------------------------------------------------------------------------//
162//
163static std::map<ompt_cancel_flag_t, const char*> ompt_cancel_type_labels = {
164 TIMEMORY_OMPT_ENUM_LABEL(ompt_cancel_parallel),
165 TIMEMORY_OMPT_ENUM_LABEL(ompt_cancel_sections),
166 TIMEMORY_OMPT_ENUM_LABEL(ompt_cancel_loop),
167 TIMEMORY_OMPT_ENUM_LABEL(ompt_cancel_taskgroup),
168 TIMEMORY_OMPT_ENUM_LABEL(ompt_cancel_activated),
169 TIMEMORY_OMPT_ENUM_LABEL(ompt_cancel_detected),
170 TIMEMORY_OMPT_ENUM_LABEL(ompt_cancel_discarded_task)
171};
172//
173//--------------------------------------------------------------------------------------//
174//
175static std::map<ompt_callbacks_t, const char*> ompt_callback_labels = {
176 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_thread_begin),
177 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_thread_end),
178 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_parallel_begin),
179 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_parallel_end),
180 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_task_create),
181 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_task_schedule),
182 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_implicit_task),
183 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_target),
184 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_target_data_op),
185 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_target_submit),
186 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_control_tool),
187 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_device_initialize),
188 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_device_finalize),
189 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_device_load),
190 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_device_unload),
191 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_sync_region_wait),
192 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_mutex_released),
193 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_dependences),
194 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_task_dependence),
195 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_work),
196 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_master),
197 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_target_map),
198 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_sync_region),
199 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_lock_init),
200 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_lock_destroy),
201 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_mutex_acquire),
202 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_mutex_acquired),
203 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_nest_lock),
204 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_flush),
205 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_cancel),
206 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_reduction),
207 TIMEMORY_OMPT_ENUM_LABEL(ompt_callback_dispatch)
208};
209//
210//--------------------------------------------------------------------------------------//
211//
212inline void
214{
216 ompt_task_status_labels, ompt_target_data_op_labels, ompt_work_labels,
217 ompt_target_type_labels, ompt_sync_region_type_labels, ompt_dispatch_type_labels,
218 ompt_thread_type_labels, ompt_cancel_type_labels, ompt_dependence_type_labels,
219 ompt_target_map_labels, ompt_task_type_labels, ompt_mutex_type_labels);
220}
221//
222//--------------------------------------------------------------------------------------//
223//
224template <typename Api>
226{
227 using api_type = Api;
228
229public:
230 template <typename KeyT, typename MappedT, typename HashT = std::hash<KeyT>>
231 using uomap_t = std::unordered_map<KeyT, MappedT, HashT>;
232
233 template <typename Tag, typename KeyT = uint64_t, typename MappedT = ompt_data_t*,
234 typename MapT = uomap_t<KeyT, MappedT>>
235 static auto& get_data()
236 {
237 static thread_local MapT _instance;
238 return _instance;
239 }
240
241 // tags for above
243 {};
245 {};
246 struct task_tag
247 {};
249 {};
251 {};
252
254
255public:
256 //----------------------------------------------------------------------------------//
257 // callback thread begin
258 //----------------------------------------------------------------------------------//
259 context_handler(ompt_thread_t thread_type, ompt_data_t* thread_data)
260 : m_key(ompt_thread_type_labels[thread_type])
261 , m_data({ { thread_data, nullptr } })
262 {}
263
264 //----------------------------------------------------------------------------------//
265 // callback thread end
266 //----------------------------------------------------------------------------------//
267 context_handler(ompt_data_t* thread_data)
268 : m_data({ { thread_data, nullptr } })
269 {}
270
271 //----------------------------------------------------------------------------------//
272 // parallel begin
273 //----------------------------------------------------------------------------------//
274 context_handler(ompt_data_t* task_data, const ompt_frame_t* task_frame,
275 ompt_data_t* parallel_data, unsigned int requested_parallelism,
276 int flags, const void* codeptr)
277 : m_key("ompt_parallel")
278 , m_data({ { nullptr, parallel_data } })
279 {
280 consume_parameters(task_data, task_frame, requested_parallelism, flags, codeptr);
281 }
282
283 //----------------------------------------------------------------------------------//
284 // parallel end
285 //----------------------------------------------------------------------------------//
286 context_handler(ompt_data_t* parallel_data, ompt_data_t* task_data, int flags,
287 const void* codeptr)
288 : m_key("ompt_parallel")
289 , m_data({ { nullptr, parallel_data } })
290 {
291 consume_parameters(task_data, flags, codeptr);
292 }
293
294 //----------------------------------------------------------------------------------//
295 // callback master
296 //----------------------------------------------------------------------------------//
297 context_handler(ompt_scope_endpoint_t endpoint, ompt_data_t* parallel_data,
298 ompt_data_t* task_data, const void* codeptr)
299 : m_key("ompt_master")
300 , m_data(
301 { { (endpoint == ompt_scope_begin) ? construct_data() : task_data, nullptr } })
302 {
303 consume_parameters(endpoint, parallel_data, task_data, codeptr);
304 }
305
306 //----------------------------------------------------------------------------------//
307 // callback implicit task
308 //----------------------------------------------------------------------------------//
309 context_handler(ompt_scope_endpoint_t endpoint, ompt_data_t* parallel_data,
310 ompt_data_t* task_data, unsigned int team_size,
311 unsigned int thread_num)
312 : m_key("ompt_implicit_task")
313 , m_data(
314 { { (endpoint == ompt_scope_begin) ? construct_data() : task_data, nullptr } })
315 {
316 consume_parameters(endpoint, parallel_data, task_data, team_size, thread_num);
317 }
318
319 //----------------------------------------------------------------------------------//
320 // callback sync region
321 //----------------------------------------------------------------------------------//
322 context_handler(ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint,
323 ompt_data_t* parallel_data, ompt_data_t* task_data,
324 const void* codeptr)
325 : m_key(ompt_sync_region_type_labels[kind])
326 , m_data(
327 { { (endpoint == ompt_scope_begin) ? construct_data() : task_data, nullptr } })
328 {
329 consume_parameters(endpoint, parallel_data, task_data, codeptr);
330 }
331
332 //----------------------------------------------------------------------------------//
333 // callback mutex acquire
334 //----------------------------------------------------------------------------------//
335 context_handler(ompt_mutex_t kind, unsigned int hint, unsigned int impl,
336 ompt_wait_id_t wait_id, const void* codeptr)
337 : m_key(ompt_mutex_type_labels[kind])
338 , m_data({ { construct_data(), nullptr } })
339 {
340 get_data<mutex_tag>().insert({ wait_id, m_data[0] });
341 consume_parameters(hint, impl, wait_id, codeptr);
342 }
343
344 //----------------------------------------------------------------------------------//
345 // callback mutex acquired
346 // callback mutex released
347 //----------------------------------------------------------------------------------//
348 context_handler(ompt_mutex_t kind, ompt_wait_id_t wait_id, const void* codeptr)
349 : m_key(ompt_mutex_type_labels[kind])
350 , m_data({ { nullptr, nullptr } })
351 {
352 if(get_data<mutex_tag>().find(wait_id) != get_data<mutex_tag>().end())
353 {
354 m_data[0] = get_data<mutex_tag>()[wait_id];
355 m_cleanup = [=]() {
356 auto& itr = get_data<mutex_tag>()[wait_id];
357 delete itr;
358 itr = nullptr;
359 get_data<mutex_tag>().erase(wait_id);
360 };
361 }
362 consume_parameters(codeptr);
363 }
364
365 //----------------------------------------------------------------------------------//
366 // callback nest lock
367 //----------------------------------------------------------------------------------//
368 context_handler(ompt_scope_endpoint_t endpoint, ompt_wait_id_t wait_id,
369 const void* codeptr)
370 : m_key("ompt_nested_lock")
371 , m_data({ { nullptr, nullptr } })
372 {
373 if(endpoint == ompt_scope_end &&
374 get_data<nest_lock_tag>().find(wait_id) != get_data<nest_lock_tag>().end())
375 {
376 m_data[0] = get_data<nest_lock_tag>()[wait_id];
377 m_cleanup = [=]() {
378 auto& itr = get_data<nest_lock_tag>()[wait_id];
379 delete itr;
380 itr = nullptr;
381 get_data<nest_lock_tag>().erase(wait_id);
382 };
383 }
384 else if(endpoint == ompt_scope_begin)
385 {
386 m_data[0] = construct_data();
387 get_data<nest_lock_tag>()[wait_id] = m_data[0];
388 }
389
390 consume_parameters(endpoint, wait_id, codeptr);
391 }
392
393 //----------------------------------------------------------------------------------//
394 // callback task create
395 //----------------------------------------------------------------------------------//
396 context_handler(ompt_data_t* task_data, const ompt_frame_t* task_frame,
397 ompt_data_t* new_task_data, int flags, int has_dependences,
398 const void* codeptr)
399 : m_key("ompt_task_create")
400 , m_data({ { task_data, nullptr } })
401 {
402 consume_parameters(task_frame, new_task_data, flags, has_dependences, codeptr);
403 }
404
405 //----------------------------------------------------------------------------------//
406 // callback task scheduler
407 //----------------------------------------------------------------------------------//
408 context_handler(ompt_data_t* prior_task_data, ompt_task_status_t prior_task_status,
409 ompt_data_t* next_task_data)
410 : m_key("ompt_task_schedule")
411 , m_data({ { nullptr, next_task_data } })
412 {
413 consume_parameters(prior_task_data, prior_task_status, next_task_data);
414 }
415
416 //----------------------------------------------------------------------------------//
417 // callback dispatch
418 //----------------------------------------------------------------------------------//
419 context_handler(ompt_data_t* parallel_data, ompt_data_t* task_data,
420 ompt_dispatch_t kind, ompt_data_t instance)
421 : m_key(ompt_dispatch_type_labels[kind])
422 , m_data({ { task_data, nullptr } })
423 {
424 consume_parameters(parallel_data, task_data, kind, instance);
425 }
426
427 //----------------------------------------------------------------------------------//
428 // callback work
429 //----------------------------------------------------------------------------------//
430 context_handler(ompt_work_t wstype, ompt_scope_endpoint_t endpoint,
431 ompt_data_t* parallel_data, ompt_data_t* task_data, uint64_t count,
432 const void* codeptr)
433 : m_key(ompt_work_labels[wstype])
434 , m_data(
435 { { (endpoint == ompt_scope_begin) ? construct_data() : task_data, nullptr } })
436 {
437 consume_parameters(endpoint, parallel_data, task_data, count, codeptr);
438 }
439
440 //----------------------------------------------------------------------------------//
441 // callback flush
442 //----------------------------------------------------------------------------------//
443 context_handler(ompt_data_t* thread_data, const void* codeptr)
444 : m_key("ompt_flush")
445 , m_data({ { thread_data, nullptr } })
446 {
447 consume_parameters(thread_data, codeptr);
448 }
449
450 //----------------------------------------------------------------------------------//
451 // callback cancel
452 //----------------------------------------------------------------------------------//
453 context_handler(ompt_data_t* thread_data, int flags, const void* codeptr)
454 : m_key("ompt_cancel")
455 , m_data({ { thread_data, nullptr } })
456 {
457 consume_parameters(thread_data, flags, codeptr);
458 }
459
460 //----------------------------------------------------------------------------------//
461 // callback target
462 //----------------------------------------------------------------------------------//
463 context_handler(ompt_target_t kind, ompt_scope_endpoint_t endpoint, int device_num,
464 ompt_data_t* task_data, ompt_id_t target_id, const void* codeptr)
465 : m_key(mpl::apply<std::string>::join('_', ompt_target_type_labels[kind], "dev",
466 device_num))
467 , m_data(
468 { { (endpoint == ompt_scope_begin) ? construct_data() : task_data, nullptr } })
469 {
470 consume_parameters(kind, endpoint, target_id, codeptr);
471 }
472
473 //----------------------------------------------------------------------------------//
474 // callback target data op
475 //----------------------------------------------------------------------------------//
476 context_handler(ompt_id_t target_id, ompt_id_t host_op_id,
477 ompt_target_data_op_t optype, void* src_addr, int src_device_num,
478 void* dest_addr, int dest_device_num, size_t bytes,
479 const void* codeptr)
480 : m_key(mpl::apply<std::string>::join('_', ompt_target_data_op_labels[optype], "src",
481 src_device_num, "dest", dest_device_num))
482 , m_data({ { construct_data(true), nullptr } })
483 {
484 consume_parameters(target_id, host_op_id, src_addr, dest_addr, bytes, codeptr);
485 }
486
487 //----------------------------------------------------------------------------------//
488 // callback target submit
489 //----------------------------------------------------------------------------------//
490 context_handler(ompt_id_t target_id, ompt_id_t host_op_id,
491 unsigned int requested_num_teams)
492 : m_key("ompt_target_submit")
493 , m_data({ { nullptr, nullptr } })
494 {
495 consume_parameters(target_id, host_op_id, requested_num_teams);
496 }
497
498 //----------------------------------------------------------------------------------//
499 // callback target mapping
500 //----------------------------------------------------------------------------------//
501 context_handler(ompt_id_t target_id, unsigned int nitems, void** host_addr,
502 void** device_addr, size_t* bytes, unsigned int* mapping_flags)
503 : m_key(mpl::apply<std::string>::join('_', "ompt_target_mapping", target_id))
504 , m_data({ { nullptr, nullptr } })
505 {
506 consume_parameters(nitems, host_addr, device_addr, bytes, mapping_flags);
507 }
508
509 //----------------------------------------------------------------------------------//
510 // callback target device initialize
511 //----------------------------------------------------------------------------------//
512 context_handler(uint64_t device_num, const char* type, ompt_device_t* device,
513 ompt_function_lookup_t lookup, const char* documentation)
514 : m_key(mpl::apply<std::string>::join('_', "ompt_device", device_num, type))
515 , m_data({ { construct_data(), nullptr } })
516 {
517 get_data<device_state_tag>().insert({ device_num, m_data[0] });
518 consume_parameters(device, lookup, documentation);
519 }
520
521 //----------------------------------------------------------------------------------//
522 // callback target device finalize
523 //----------------------------------------------------------------------------------//
524 context_handler(uint64_t device_num)
525 : m_data({ { get_data<device_state_tag>()[device_num], nullptr } })
526 , m_cleanup([=]() {
527 auto& itr = get_data<device_state_tag>()[device_num];
528 delete itr;
529 itr = nullptr;
530 get_data<device_state_tag>().erase(device_num);
531 })
532 {}
533
534 //----------------------------------------------------------------------------------//
535 // callback target device load
536 //----------------------------------------------------------------------------------//
537 context_handler(uint64_t device_num, const char* filename, int64_t offset_in_file,
538 void* vma_in_file, size_t bytes, void* host_addr, void* device_addr,
539 uint64_t module_id)
540 : m_key(mpl::apply<std::string>::join('_', "ompt_target_load", device_num, filename))
541 , m_data({ { construct_data(), nullptr } })
542 {
543 get_data<device_load_tag, uint64_t, data_map_t>()[device_num].insert(
544 { module_id, m_data[0] });
545 consume_parameters(offset_in_file, vma_in_file, bytes, host_addr, device_addr);
546 }
547
548 //----------------------------------------------------------------------------------//
549 // callback target device unload
550 //----------------------------------------------------------------------------------//
551 context_handler(uint64_t device_num, uint64_t module_id)
552 : m_data({ { get_data<device_load_tag, uint64_t, data_map_t>()[device_num][module_id],
553 nullptr } })
554 , m_cleanup([=]() {
555 auto& itr =
556 get_data<device_load_tag, uint64_t, data_map_t>()[device_num][module_id];
557 delete itr;
558 itr = nullptr;
559 get_data<device_load_tag, uint64_t, data_map_t>()[device_num].erase(module_id);
560 })
561 {}
562
564
565public:
566 static constexpr size_t size = 2;
567
568 TIMEMORY_NODISCARD bool empty() const
569 {
570 return (m_key.empty() || (m_data[0] == nullptr && m_data[1] == nullptr));
571 }
572
573 TIMEMORY_NODISCARD const std::string& key() const { return m_key; }
574
575 TIMEMORY_NODISCARD ompt_data_t* data(size_t idx = 0) const
576 {
577 return m_data[idx % size];
578 }
579
580 template <size_t Idx, typename Tp, typename Func = std::function<void(Tp*)>>
581 auto construct(Func&& f = [](Tp*) {})
582 -> decltype(new Tp(std::declval<std::string>()), void())
583 {
584 auto& itr = std::get<Idx>(m_data);
585 if(itr && itr->ptr == nullptr)
586 {
587 auto obj = new Tp(m_key);
588 std::forward<Func>(f)(obj);
589 itr->ptr = (void*) obj;
590 }
591 }
592
593 template <typename Tp, typename Func = std::function<void(Tp*)>>
594 auto construct(Func&& f = [](Tp*) {})
595 {
596 construct<0, Tp>(std::forward<Func>(f));
597 construct<1, Tp>(std::forward<Func>(f));
598 }
599
600 template <size_t Idx, typename Tp, typename Func = std::function<void(Tp*)>>
601 auto destroy(Func&& f = [](Tp*) {})
602 {
603 auto& itr = std::get<Idx>(m_data);
604 if(itr && itr->ptr != nullptr)
605 {
606 auto obj = static_cast<Tp*>(itr->ptr);
607 std::forward<Func>(f)(obj);
608 delete obj;
609 itr->ptr = nullptr;
610 }
611 }
612
613 template <typename Tp, typename Func = std::function<void(Tp*)>>
614 auto destroy(Func&& f = [](Tp*) {})
615 {
616 destroy<0, Tp>(std::forward<Func>(f));
617 destroy<1, Tp>(std::forward<Func>(f));
618 }
619
620 auto construct_data(bool _cleanup = false)
621 {
622 auto _obj = new ompt_data_t{};
623 if(_cleanup)
624 m_cleanup = [=]() { delete _obj; };
625 return _obj;
626 }
627
628protected:
630 std::array<ompt_data_t*, size> m_data;
631 std::function<void()> m_cleanup = [] {};
632
633 template <typename Ct, typename At>
634 friend struct callback_connector;
635
636 static uint64_t& get_counter()
637 {
638 static thread_local uint64_t _instance;
639 return _instance;
640 }
641};
642//
643//--------------------------------------------------------------------------------------//
644//
645template <typename Components, typename Api>
647{
648 using api_type = Api;
649 using type = Components;
650 using result_type = std::shared_ptr<type>;
651 using array_type = std::deque<result_type>;
652 using map_type = std::unordered_map<size_t, array_type>;
654
655 static bool is_enabled()
656 {
657 if(!manager::instance() ||
658 (manager::instance() && manager::instance()->is_finalizing()))
659 {
662 return false;
663 }
664
665 DEBUG_PRINT_HERE("[timemory-ompt]> %s :: handle enabled = %s",
666 demangle<type>().c_str(),
668
670 }
671
672 template <typename T, typename... Args,
674 callback_connector(T, Args... args);
675
676 template <typename T, typename... Args,
678 callback_connector(T, Args... args);
679
680 template <typename T, typename... Args,
682 callback_connector(T, Args... args);
683
684 template <typename T, typename... Args,
686 callback_connector(T, ompt_scope_endpoint_t endp, Args... args);
687
688 template <typename T, typename... Args,
690 callback_connector(T, ompt_work_t workv, ompt_scope_endpoint_t endp, Args... args)
691 {
692 if(!is_enabled())
693 return;
694 generic_endpoint_connector(T{}, workv, endp, args...);
695 }
696
697 template <typename T, typename... Args,
699 callback_connector(T, ompt_sync_region_t syncv, ompt_scope_endpoint_t endp,
700 Args... args)
701 {
702 if(!is_enabled())
703 return;
704 generic_endpoint_connector(T{}, syncv, endp, args...);
705 }
706
707 template <typename T, typename... Args,
709 callback_connector(T, ompt_target_t targv, ompt_scope_endpoint_t endp, Args... args)
710 {
711 if(!is_enabled())
712 return;
713 generic_endpoint_connector(T{}, targv, endp, args...);
714 }
715
716protected:
717 template <typename T, typename Arg, typename... Args,
719 void generic_endpoint_connector(T, Arg arg, ompt_scope_endpoint_t endp, Args... args);
720
721private:
722 static map_type& get_key_map()
723 {
724 static thread_local map_type _instance;
725 return _instance;
726 }
727};
728//
729//--------------------------------------------------------------------------------------//
730//
731template <typename Components, typename Api>
732template <typename T, typename... Args,
733 enable_if_t<std::is_same<T, mode::begin_callback>::value, int>>
735{
736 if(!is_enabled())
737 return;
738
739 context_handler<api_type> ctx(args...);
740 user_context_callback(ctx, ctx.m_key, args...);
741
742 // don't provide empty entries
743 if(ctx.empty())
744 return;
745
746 user_context_callback<type>(ctx, T{}, std::forward<Args>(args)...);
747}
748//
749//--------------------------------------------------------------------------------------//
750//
751template <typename Components, typename Api>
752template <typename T, typename... Args,
755{
756 if(!is_enabled())
757 return;
758
759 context_handler<api_type> ctx(args...);
760 user_context_callback(ctx, ctx.m_key, args...);
761
762 // don't provide empty entries
763 if(ctx.empty())
764 return;
765
766 user_context_callback<type>(ctx, T{}, std::forward<Args>(args)...);
767}
768//
769//--------------------------------------------------------------------------------------//
770//
771template <typename Components, typename Api>
772template <typename T, typename... Args,
773 enable_if_t<std::is_same<T, mode::store_callback>::value, int>>
775{
776 if(!is_enabled())
777 return;
778
779 context_handler<api_type> ctx(args...);
780 user_context_callback(ctx, ctx.m_key, args...);
781
782 // don't provide empty entries
783 if(ctx.empty())
784 return;
785
786 user_context_callback<type>(ctx, T{}, std::forward<Args>(args)...);
787}
788//
789//--------------------------------------------------------------------------------------//
790//
791template <typename Components, typename Api>
792template <typename T, typename... Args,
793 enable_if_t<std::is_same<T, mode::endpoint_callback>::value, int>>
795 Args... args)
796{
797 if(!is_enabled())
798 return;
799
800 context_handler<api_type> ctx(endp, args...);
801 user_context_callback(ctx, ctx.m_key, endp, args...);
802
803 // don't provide empty entries
804 if(ctx.empty())
805 return;
806
807 user_context_callback<type>(ctx, T{}, endp, std::forward<Args>(args)...);
808}
809//
810//--------------------------------------------------------------------------------------//
811//
812template <typename Components, typename Api>
813template <typename T, typename Arg, typename... Args,
815void
817 T, Arg arg, ompt_scope_endpoint_t endp, Args... args)
818{
819 context_handler<api_type> ctx(arg, endp, args...);
820 user_context_callback(ctx, ctx.m_key, arg, endp, args...);
821
822 // don't provide empty entries
823 if(ctx.empty())
824 return;
825
826 user_context_callback<type>(ctx, T{}, std::forward<Arg>(arg), endp,
827 std::forward<Args>(args)...);
828}
829//
830//--------------------------------------------------------------------------------------//
831//
832} // namespace openmp
833//
834//--------------------------------------------------------------------------------------//
835//
836namespace ompt
837{
838template <typename ApiT>
839static void
840configure(ompt_function_lookup_t lookup, int, ompt_data_t*)
841{
842#if defined(TIMEMORY_USE_OMPT)
843 //
844 //----------------------------------------------------------------------------------//
845 //
846 using api_type = ApiT;
847 using handle_type = component::ompt_handle<ApiT>;
848 using toolset_type = typename trait::ompt_handle<api_type>::type;
850 //
851 //----------------------------------------------------------------------------------//
852 //
853# define TIMEMORY_OMPT_LOOKUP(TYPE, NAME) \
854 static TYPE OMPT_##NAME = (TYPE) lookup(#NAME); \
855 consume_parameters(OMPT_##NAME)
856 //
857 //----------------------------------------------------------------------------------//
858 //
859 static auto ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback");
860 //
861 TIMEMORY_OMPT_LOOKUP(ompt_get_proc_id_t, ompt_get_proc_id);
862 TIMEMORY_OMPT_LOOKUP(ompt_get_num_places_t, ompt_get_num_places);
863 TIMEMORY_OMPT_LOOKUP(ompt_get_num_devices_t, ompt_get_num_devices);
864 TIMEMORY_OMPT_LOOKUP(ompt_get_unique_id_t, ompt_get_unique_id);
865 TIMEMORY_OMPT_LOOKUP(ompt_get_place_num_t, ompt_get_place_num);
866 TIMEMORY_OMPT_LOOKUP(ompt_get_place_proc_ids_t, ompt_get_place_proc_ids);
867 TIMEMORY_OMPT_LOOKUP(ompt_get_target_info_t, ompt_get_target_info);
868 TIMEMORY_OMPT_LOOKUP(ompt_get_thread_data_t, ompt_get_thread_data);
869 TIMEMORY_OMPT_LOOKUP(ompt_get_record_type_t, ompt_get_record_type);
870 TIMEMORY_OMPT_LOOKUP(ompt_get_record_ompt_t, ompt_get_record_ompt);
871 TIMEMORY_OMPT_LOOKUP(ompt_get_parallel_info_t, ompt_get_parallel_info);
872 TIMEMORY_OMPT_LOOKUP(ompt_get_device_num_procs_t, ompt_get_device_num_procs);
873 TIMEMORY_OMPT_LOOKUP(ompt_get_partition_place_nums_t, ompt_get_partition_place_nums);
874 //
875 // TIMEMORY_OMPT_LOOKUP(ompt_get_device_time_t, ompt_get_device_time);
876 // TIMEMORY_OMPT_LOOKUP(ompt_translate_time_t, ompt_translate_time);
877 //
878 TIMEMORY_OMPT_LOOKUP(ompt_get_task_info_t, ompt_get_task_info);
879 TIMEMORY_OMPT_LOOKUP(ompt_get_task_memory_t, ompt_get_task_memory);
880 //
881 // TIMEMORY_OMPT_LOOKUP(ompt_set_trace_ompt_t, ompt_set_trace_ompt);
882 // TIMEMORY_OMPT_LOOKUP(ompt_start_trace_t, ompt_start_trace);
883 // TIMEMORY_OMPT_LOOKUP(ompt_pause_trace_t, ompt_pause_trace);
884 //
885 TIMEMORY_OMPT_LOOKUP(ompt_enumerate_states_t, ompt_enumerate_states);
886 TIMEMORY_OMPT_LOOKUP(ompt_enumerate_mutex_impls_t, ompt_enumerate_mutex_impls);
887 //
888 TIMEMORY_OMPT_LOOKUP(ompt_callback_mutex_t, ompt_callback_mutex);
889 TIMEMORY_OMPT_LOOKUP(ompt_callback_nest_lock_t, ompt_callback_nest_lock);
890 TIMEMORY_OMPT_LOOKUP(ompt_callback_flush_t, ompt_callback_flush);
891 TIMEMORY_OMPT_LOOKUP(ompt_callback_cancel_t, ompt_callback_cancel);
892 TIMEMORY_OMPT_LOOKUP(ompt_callback_dispatch_t, ompt_callback_dispatch);
893 TIMEMORY_OMPT_LOOKUP(ompt_callback_buffer_request_t, ompt_callback_buffer_request);
894 TIMEMORY_OMPT_LOOKUP(ompt_callback_buffer_complete_t, ompt_callback_buffer_complete);
895 TIMEMORY_OMPT_LOOKUP(ompt_callback_dependences_t, ompt_callback_dependences);
896 TIMEMORY_OMPT_LOOKUP(ompt_callback_task_dependence_t, ompt_callback_task_dependence);
897 //
898 TIMEMORY_OMPT_LOOKUP(ompt_finalize_tool_t, ompt_finalize_tool);
899 //
900 //------------------------------------------------------------------------------//
901 //
903 return;
904
905 handle_type::configure();
907 if(manager)
908 {
909 auto cleanup_label = demangle<handle_type>();
910 auto cleanup_func = []() { trait::runtime_enabled<toolset_type>::set(false); };
911 manager->add_cleanup(cleanup_label, cleanup_func);
912 }
913
914 auto register_callback = [](ompt_callbacks_t cbidx, ompt_callback_t cb) {
915 int ret = ompt_set_callback(cbidx, cb);
916 if(settings::verbose() < 1 && !settings::debug())
917 return ret;
918 auto name = openmp::ompt_callback_labels[cbidx];
919 switch(ret)
920 {
921 case ompt_set_error:
922 fprintf(stderr,
923 "[timemory]> WARNING: OMPT Callback for event '%s' count not "
924 "be registered: '%s'\n",
925 name, "ompt_set_error");
926 break;
927 case ompt_set_never:
928 fprintf(stderr,
929 "[timemory]> WARNING: OMPT Callback for event '%s' could not "
930 "be registered: '%s'\n",
931 name, "ompt_set_never");
932 break;
933 case ompt_set_impossible:
934 fprintf(stderr,
935 "[timemory]> WARNING: OMPT Callback for event '%s' could not "
936 "be registered: '%s'\n",
937 name, "ompt_set_impossible");
938 break;
939 case ompt_set_sometimes:
940 fprintf(stderr,
941 "[timemory]> OMPT Callback for event '%s' registered with "
942 "return value: '%s'\n",
943 name, "ompt_set_sometimes");
944 break;
945 case ompt_set_sometimes_paired:
946 fprintf(stderr,
947 "[timemory]> OMPT Callback for event '%s' registered with "
948 "return value: '%s'\n",
949 name, "ompt_set_sometimes_paired");
950 break;
951 case ompt_set_always:
952 fprintf(stderr,
953 "[timemory]> OMPT Callback for event '%s' registered with "
954 "return value: '%s'\n",
955 name, "ompt_set_always");
956 break;
957 }
958 return ret;
959 };
960 //
961 //----------------------------------------------------------------------------------//
962 //
963 auto timemory_ompt_register_callback = [&](ompt_callbacks_t name,
964 ompt_callback_t cb) {
965 int ret = register_callback(name, cb);
967 };
968 //
969 //----------------------------------------------------------------------------------//
970 //
971 // General thread
972 //
973 //----------------------------------------------------------------------------------//
974
975 using thread_begin_cb_t =
976 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::begin_callback,
977 ompt_thread_t, ompt_data_t*>;
978
979 using thread_end_cb_t =
980 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::end_callback,
981 ompt_data_t*>;
982
983 timemory_ompt_register_callback(ompt_callback_thread_begin,
984 TIMEMORY_OMPT_CBDECL(thread_begin_cb_t::callback));
985 timemory_ompt_register_callback(ompt_callback_thread_end,
986 TIMEMORY_OMPT_CBDECL(thread_end_cb_t::callback));
987
988 using parallel_begin_cb_t =
989 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::begin_callback,
990 ompt_data_t*, const ompt_frame_t*, ompt_data_t*,
991 unsigned int, int, const void*>;
992
993 using parallel_end_cb_t =
994 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::end_callback,
995 ompt_data_t*, ompt_data_t*, int, const void*>;
996
997 timemory_ompt_register_callback(ompt_callback_parallel_begin,
998 TIMEMORY_OMPT_CBDECL(parallel_begin_cb_t::callback));
999 timemory_ompt_register_callback(ompt_callback_parallel_end,
1000 TIMEMORY_OMPT_CBDECL(parallel_end_cb_t::callback));
1001
1002 using master_cb_t =
1003 openmp::ompt_wrapper<toolset_type, connector_type,
1004 openmp::mode::endpoint_callback, ompt_scope_endpoint_t,
1005 ompt_data_t*, ompt_data_t*, const void*>;
1006
1007 timemory_ompt_register_callback(ompt_callback_master,
1008 TIMEMORY_OMPT_CBDECL(master_cb_t::callback));
1009
1010 //----------------------------------------------------------------------------------//
1011 //
1012 // Tasking section
1013 //
1014 //----------------------------------------------------------------------------------//
1015
1016 using task_create_cb_t =
1017 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::store_callback,
1018 ompt_data_t*, const ompt_frame_t*, ompt_data_t*, int, int,
1019 const void*>;
1020
1021 using task_schedule_cb_t =
1022 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::store_callback,
1023 ompt_data_t*, ompt_task_status_t, ompt_data_t*>;
1024
1025 using work_cb_t = openmp::ompt_wrapper<
1026 toolset_type, connector_type, openmp::mode::endpoint_callback, ompt_work_t,
1027 ompt_scope_endpoint_t, ompt_data_t*, ompt_data_t*, uint64_t, const void*>;
1028
1029 using implicit_task_cb_t =
1030 openmp::ompt_wrapper<toolset_type, connector_type,
1031 openmp::mode::endpoint_callback, ompt_scope_endpoint_t,
1032 ompt_data_t*, ompt_data_t*, unsigned int, unsigned int>;
1033
1034 using dispatch_cb_t =
1035 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::end_callback,
1036 ompt_data_t*, ompt_data_t*, ompt_dispatch_t, ompt_data_t>;
1037
1038 timemory_ompt_register_callback(ompt_callback_task_create,
1039 TIMEMORY_OMPT_CBDECL(task_create_cb_t::callback));
1040 timemory_ompt_register_callback(ompt_callback_task_schedule,
1041 TIMEMORY_OMPT_CBDECL(task_schedule_cb_t::callback));
1042 timemory_ompt_register_callback(ompt_callback_work,
1043 TIMEMORY_OMPT_CBDECL(work_cb_t::callback));
1044 timemory_ompt_register_callback(ompt_callback_implicit_task,
1045 TIMEMORY_OMPT_CBDECL(implicit_task_cb_t::callback));
1046 timemory_ompt_register_callback(ompt_callback_dispatch,
1047 TIMEMORY_OMPT_CBDECL(dispatch_cb_t::callback));
1048
1049 /*using task_dependences_cb_t =
1050 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::store_callback,
1051 ompt_data_t*, const ompt_dependence_t*, int>;
1052
1053 using task_dependence_cb_t =
1054 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::store_callback,
1055 ompt_data_t*, ompt_data_t*>;
1056
1057 timemory_ompt_register_callback(
1058 ompt_callback_dependences, TIMEMORY_OMPT_CBDECL(task_dependences_cb_t::callback));
1059 timemory_ompt_register_callback(ompt_callback_task_dependence,
1060 TIMEMORY_OMPT_CBDECL(task_dependence_cb_t::callback));
1061 */
1062 //----------------------------------------------------------------------------------//
1063 //
1064 // Target section
1065 //
1066 //----------------------------------------------------------------------------------//
1067
1068 using target_cb_t = openmp::ompt_wrapper<
1069 toolset_type, connector_type, openmp::mode::endpoint_callback, ompt_target_t,
1070 ompt_scope_endpoint_t, int, ompt_data_t*, ompt_id_t, const void*>;
1071
1072 timemory_ompt_register_callback(ompt_callback_target,
1073 TIMEMORY_OMPT_CBDECL(target_cb_t::callback));
1074
1075 using target_init_cb_t =
1076 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::begin_callback,
1077 uint64_t, const char*, ompt_device_t*,
1078 ompt_function_lookup_t, const char*>;
1079
1080 using target_finalize_cb_t =
1081 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::end_callback,
1082 uint64_t>;
1083
1084 timemory_ompt_register_callback(ompt_callback_device_initialize,
1085 TIMEMORY_OMPT_CBDECL(target_init_cb_t::callback));
1086 timemory_ompt_register_callback(ompt_callback_device_finalize,
1087 TIMEMORY_OMPT_CBDECL(target_finalize_cb_t::callback));
1088
1089 using target_load_cb_t =
1090 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::begin_callback,
1091 uint64_t, const char*, int64_t, void*, size_t, void*, void*,
1092 uint64_t>;
1093
1094 using target_unload_cb_t =
1095 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::end_callback,
1096 uint64_t, uint64_t>;
1097
1098 timemory_ompt_register_callback(ompt_callback_device_load,
1099 TIMEMORY_OMPT_CBDECL(target_load_cb_t::callback));
1100 timemory_ompt_register_callback(ompt_callback_device_unload,
1101 TIMEMORY_OMPT_CBDECL(target_unload_cb_t::callback));
1102
1103 using target_data_op_cb_t =
1104 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::store_callback,
1105 ompt_id_t, ompt_id_t, ompt_target_data_op_t, void*, int,
1106 void*, int, size_t, const void*>;
1107
1108 using target_submit_cb_t =
1109 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::store_callback,
1110 ompt_id_t, ompt_id_t, unsigned int>;
1111
1112 using target_mapping_cb_t =
1113 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::store_callback,
1114 ompt_id_t, unsigned int, void**, void**, size_t*,
1115 unsigned int*>;
1116
1117 timemory_ompt_register_callback(ompt_callback_target_data_op,
1118 TIMEMORY_OMPT_CBDECL(target_data_op_cb_t::callback));
1119 timemory_ompt_register_callback(ompt_callback_target_submit,
1120 TIMEMORY_OMPT_CBDECL(target_submit_cb_t::callback));
1121 timemory_ompt_register_callback(ompt_callback_target_map,
1122 TIMEMORY_OMPT_CBDECL(target_mapping_cb_t::callback));
1123
1124 //----------------------------------------------------------------------------------//
1125 //
1126 // Sync/work section
1127 //
1128 //----------------------------------------------------------------------------------//
1129
1130 using sync_region_cb_t = openmp::ompt_wrapper<
1131 toolset_type, connector_type, openmp::mode::endpoint_callback, ompt_sync_region_t,
1132 ompt_scope_endpoint_t, ompt_data_t*, ompt_data_t*, const void*>;
1133
1134 timemory_ompt_register_callback(ompt_callback_sync_region,
1135 TIMEMORY_OMPT_CBDECL(sync_region_cb_t::callback));
1136
1137 using mutex_nest_lock_cb_t =
1138 openmp::ompt_wrapper<toolset_type, connector_type,
1139 openmp::mode::endpoint_callback, ompt_scope_endpoint_t,
1140 ompt_wait_id_t, const void*>;
1141
1142 timemory_ompt_register_callback(ompt_callback_nest_lock,
1143 TIMEMORY_OMPT_CBDECL(mutex_nest_lock_cb_t::callback));
1144
1145 using mutex_acquire_cb_t =
1146 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::begin_callback,
1147 ompt_mutex_t, unsigned int, unsigned int, ompt_wait_id_t,
1148 const void*>;
1149
1150 timemory_ompt_register_callback(ompt_callback_mutex_acquire,
1151 TIMEMORY_OMPT_CBDECL(mutex_acquire_cb_t::callback));
1152 // timemory_ompt_register_callback(ompt_callback_reduction,
1153 // TIMEMORY_OMPT_CBDECL(sync_region_cb_t::callback));
1154
1155 using mutex_cb_t =
1156 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::end_callback,
1157 ompt_mutex_t, ompt_wait_id_t, const void*>;
1158
1159 timemory_ompt_register_callback(ompt_callback_mutex_acquired,
1160 TIMEMORY_OMPT_CBDECL(mutex_cb_t::callback));
1161 timemory_ompt_register_callback(ompt_callback_mutex_released,
1162 TIMEMORY_OMPT_CBDECL(mutex_cb_t::callback));
1163
1164 // timemory_ompt_register_callback(ompt_callback_lock_init,
1165 // TIMEMORY_OMPT_CBDECL(mutex_acquire_cb_t::callback));
1166 // timemory_ompt_register_callback(ompt_callback_lock_destroy,
1167 // TIMEMORY_OMPT_CBDECL(mutex_cb_t::callback));
1168
1169 //----------------------------------------------------------------------------------//
1170 //
1171 // Miscellaneous section
1172 //
1173 //----------------------------------------------------------------------------------//
1174 /*
1175 using flush_cb_t =
1176 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::store_callback,
1177 ompt_data_t*, const void*>;
1178
1179 using cancel_cb_t =
1180 openmp::ompt_wrapper<toolset_type, connector_type, openmp::mode::store_callback,
1181 ompt_data_t*, int, const void*>;
1182
1183 timemory_ompt_register_callback(ompt_callback_flush,
1184 TIMEMORY_OMPT_CBDECL(flush_cb_t::callback));
1185 timemory_ompt_register_callback(ompt_callback_cancel,
1186 TIMEMORY_OMPT_CBDECL(cancel_cb_t::callback));
1187 */
1188 if(settings::verbose() > 0 || settings::debug())
1189 printf("\n");
1190#else
1191 consume_parameters(lookup);
1192#endif
1193}
1194} // namespace ompt
1195//
1196//--------------------------------------------------------------------------------------//
1197//
1198} // namespace tim
static pointer_t instance()
Get a shared pointer to the instance for the current thread.
void add_cleanup(void *, Func &&)
add functors to destroy instances based on a pointer
Definition: manager.hpp:421
Implementation of the ompt component(s)
#define TIMEMORY_OMPT_CBDECL(...)
Definition: macros.hpp:782
The declaration for the types for manager without definitions.
STL namespace.
void ompt_suppress_unused_variable_warnings()
Definition: tool.hpp:213
auto join(const char *sep, Arg &&arg, Args &&... args)
Definition: declaration.hpp:74
Definition: kokkosp.cpp:39
typename std::enable_if< B, T >::type enable_if_t
Alias template for enable_if.
Definition: types.hpp:190
tim::mpl::apply< std::string > string
Definition: macros.hpp:53
void consume_parameters(ArgsT &&...)
Definition: types.hpp:285
std::shared_ptr< type > result_type
Definition: tool.hpp:650
callback_connector(T, ompt_target_t targv, ompt_scope_endpoint_t endp, Args... args)
Definition: tool.hpp:709
std::deque< result_type > array_type
Definition: tool.hpp:651
callback_connector(T, Args... args)
Definition: tool.hpp:734
void generic_endpoint_connector(T, Arg arg, ompt_scope_endpoint_t endp, Args... args)
Definition: tool.hpp:816
callback_connector(T, ompt_sync_region_t syncv, ompt_scope_endpoint_t endp, Args... args)
Definition: tool.hpp:699
std::unordered_map< size_t, array_type > map_type
Definition: tool.hpp:652
callback_connector(T, ompt_work_t workv, ompt_scope_endpoint_t endp, Args... args)
Definition: tool.hpp:690
context_handler(ompt_id_t target_id, ompt_id_t host_op_id, ompt_target_data_op_t optype, void *src_addr, int src_device_num, void *dest_addr, int dest_device_num, size_t bytes, const void *codeptr)
Definition: tool.hpp:476
context_handler(ompt_id_t target_id, ompt_id_t host_op_id, unsigned int requested_num_teams)
Definition: tool.hpp:490
auto destroy(Func &&f=[](Tp *) {})
Definition: tool.hpp:601
context_handler(ompt_work_t wstype, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, uint64_t count, const void *codeptr)
Definition: tool.hpp:430
context_handler(ompt_data_t *thread_data, int flags, const void *codeptr)
Definition: tool.hpp:453
std::array< ompt_data_t *, size > m_data
Definition: tool.hpp:630
context_handler(ompt_data_t *prior_task_data, ompt_task_status_t prior_task_status, ompt_data_t *next_task_data)
Definition: tool.hpp:408
static auto & get_data()
Definition: tool.hpp:235
context_handler(ompt_target_t kind, ompt_scope_endpoint_t endpoint, int device_num, ompt_data_t *task_data, ompt_id_t target_id, const void *codeptr)
Definition: tool.hpp:463
context_handler(ompt_data_t *thread_data, const void *codeptr)
Definition: tool.hpp:443
context_handler(ompt_data_t *parallel_data, ompt_data_t *task_data, ompt_dispatch_t kind, ompt_data_t instance)
Definition: tool.hpp:419
static constexpr size_t size
Definition: tool.hpp:566
context_handler(ompt_thread_t thread_type, ompt_data_t *thread_data)
Definition: tool.hpp:259
auto destroy(Func &&f=[](Tp *) {})
Definition: tool.hpp:614
const std::string & key() const
Definition: tool.hpp:573
context_handler(uint64_t device_num)
Definition: tool.hpp:524
uomap_t< uint64_t, ompt_data_t * > data_map_t
Definition: tool.hpp:253
context_handler(ompt_id_t target_id, unsigned int nitems, void **host_addr, void **device_addr, size_t *bytes, unsigned int *mapping_flags)
Definition: tool.hpp:501
context_handler(ompt_mutex_t kind, ompt_wait_id_t wait_id, const void *codeptr)
Definition: tool.hpp:348
context_handler(ompt_data_t *task_data, const ompt_frame_t *task_frame, ompt_data_t *new_task_data, int flags, int has_dependences, const void *codeptr)
Definition: tool.hpp:396
context_handler(ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr)
Definition: tool.hpp:297
static uint64_t & get_counter()
Definition: tool.hpp:636
context_handler(uint64_t device_num, uint64_t module_id)
Definition: tool.hpp:551
ompt_data_t * data(size_t idx=0) const
Definition: tool.hpp:575
context_handler(ompt_scope_endpoint_t endpoint, ompt_wait_id_t wait_id, const void *codeptr)
Definition: tool.hpp:368
context_handler(ompt_data_t *thread_data)
Definition: tool.hpp:267
context_handler(uint64_t device_num, const char *filename, int64_t offset_in_file, void *vma_in_file, size_t bytes, void *host_addr, void *device_addr, uint64_t module_id)
Definition: tool.hpp:537
auto construct(Func &&f=[](Tp *) {}) -> decltype(new Tp(std::declval< std::string >()), void())
Definition: tool.hpp:581
context_handler(ompt_data_t *parallel_data, ompt_data_t *task_data, int flags, const void *codeptr)
Definition: tool.hpp:286
context_handler(uint64_t device_num, const char *type, ompt_device_t *device, ompt_function_lookup_t lookup, const char *documentation)
Definition: tool.hpp:512
std::unordered_map< KeyT, MappedT, HashT > uomap_t
Definition: tool.hpp:231
std::function< void()> m_cleanup
Definition: tool.hpp:631
context_handler(ompt_data_t *task_data, const ompt_frame_t *task_frame, ompt_data_t *parallel_data, unsigned int requested_parallelism, int flags, const void *codeptr)
Definition: tool.hpp:274
context_handler(ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, unsigned int team_size, unsigned int thread_num)
Definition: tool.hpp:309
auto construct(Func &&f=[](Tp *) {})
Definition: tool.hpp:594
context_handler(ompt_mutex_t kind, unsigned int hint, unsigned int impl, ompt_wait_id_t wait_id, const void *codeptr)
Definition: tool.hpp:335
auto construct_data(bool _cleanup=false)
Definition: tool.hpp:620
context_handler(ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr)
Definition: tool.hpp:322
trait that signifies that an implementation for the component is available. When this is set to false...
Definition: types.hpp:355
trait that signifies that an implementation is enabled at runtime. The value returned from get() is f...
static bool set(bool val, enable_if_t< is_available< U >::value &&get_value< U >(), int >=0)
SET specialization if component is available.
#define TIMEMORY_OMPT_ENUM_LABEL(TYPE)
Definition: tool.hpp:145
#define DEBUG_PRINT_HERE(...)
Definition: macros.hpp:168