timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
barrier.hpp
Go to the documentation of this file.
1// MIT License
2//
3// Copyright (c) 2020, The Regents of the University of California,
4// through Lawrence Berkeley National Laboratory (subject to receipt of any
5// required approvals from the U.S. Dept. of Energy). All rights reserved.
6//
7// Permission is hereby granted, free of charge, to any person obtaining a copy
8// of this software and associated documentation files (the "Software"), to deal
9// in the Software without restriction, including without limitation the rights
10// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11// copies of the Software, and to permit persons to whom the Software is
12// furnished to do so, subject to the following conditions:
13//
14// The above copyright notice and this permission notice shall be included in all
15// copies or substantial portions of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23// SOFTWARE.
24
25/** \file timemory/ert/barrier.hpp
26 * \headerfile timemory/ert/barrier.hpp "timemory/ert/barrier.hpp"
27 * Provides multi-threading barriers
28 *
29 */
30
31#pragma once
32
33#include <atomic>
34#include <condition_variable>
35#include <cstdint>
36#include <future>
37#include <mutex>
38#include <stdexcept>
39#include <thread>
40
41namespace tim
42{
43namespace ert
44{
45using std::size_t;
46
47//--------------------------------------------------------------------------------------//
48// creates a multithreading barrier
49//
51{
52public:
53 using size_type = int64_t;
54 using mutex_t = std::mutex;
55 using condvar_t = std::condition_variable;
56 using atomic_t = std::atomic<size_type>;
57 using lock_t = std::unique_lock<mutex_t>;
58
59public:
60 explicit thread_barrier(size_t nthreads)
61 : m_master(std::this_thread::get_id())
62 , m_num_threads(nthreads)
63 , m_notify(0)
64 , m_future(m_promise.get_future().share())
65 {}
66
69
72
73 size_type size() const { return m_num_threads; }
74
75 // call from worker thread -- spin wait (fast)
76 void spin_wait()
77 {
78 if(is_master())
79 {
80#if defined(TIMEMORY_INTERNAL_TESTING)
81 TIMEMORY_EXCEPTION("master thread calling worker wait function\n");
82#else
83 return;
84#endif
85 }
86
87 {
88 lock_t lk(m_mutex);
89 ++m_counter;
90 ++m_waiting;
91 }
92
93 while(m_counter < m_num_threads)
94 {
95 while(spin_lock.test_and_set(std::memory_order_acquire)) // acquire lock
96 ; // spin
97 spin_lock.clear(std::memory_order_release);
98 }
99
100 {
101 lock_t lk(m_mutex);
102 --m_waiting;
103 if(m_waiting == 0)
104 m_counter = 0; // reset barrier
105 }
106 }
107
108 // call from worker thread -- condition variable wait (slower)
109 void cv_wait()
110 {
111 if(is_master())
112 {
113#if defined(TIMEMORY_INTERNAL_TESTING)
114 TIMEMORY_EXCEPTION("master thread calling worker wait function\n");
115#else
116 return;
117#endif
118 }
119
120 lock_t lk(m_mutex);
121 ++m_counter;
122 ++m_waiting;
123 m_cv.wait(lk, [&] { return m_counter >= m_num_threads; });
124 m_cv.notify_one();
125 --m_waiting;
126 if(m_waiting == 0)
127 m_counter = 0; // reset barrier
128 }
129
130 // workers increment an atomic until and wait on future until
131 // master sets the promise once the
133 {
134 if(is_master())
135 {
136 lock_t lk(m_mutex);
137 while(m_notify.load() < m_num_threads)
138 m_cv.wait(lk);
139 m_promise.set_value();
140 while(m_notify.load() > 0)
141 {
142 }
143 std::promise<void> _ptmp;
144 std::shared_future<void> _ftmp = _ptmp.get_future().share();
145 std::swap(m_promise, _ptmp);
146 std::swap(m_future, _ftmp);
147 }
148 else
149 {
150 {
151 lock_t lk(m_mutex);
152 ++m_notify;
153 m_cv.notify_one();
154 }
155 m_future.wait();
156 --m_notify;
157 }
158 }
159
160 // check if this is the thread the created barrier
161 bool is_master() const { return std::this_thread::get_id() == m_master; }
162
163private:
164 // the constructing thread will be set to master
165 std::thread::id m_master = std::this_thread::get_id();
166 size_type m_num_threads = 0; // number of threads that will wait on barrier
167 size_type m_waiting = 0; // number of threads waiting on lock
168 size_type m_counter = 0; // number of threads that have entered wait func
169 std::atomic_flag spin_lock = ATOMIC_FLAG_INIT; // for spin lock
170 mutex_t m_mutex;
171 condvar_t m_cv;
172 std::atomic<size_type> m_notify;
173 std::promise<void> m_promise;
174 std::shared_future<void> m_future;
175};
176
177} // namespace ert
178} // namespace tim
std::condition_variable condvar_t
Definition: barrier.hpp:55
std::atomic< size_type > atomic_t
Definition: barrier.hpp:56
thread_barrier & operator=(const thread_barrier &)=delete
thread_barrier(const thread_barrier &)=delete
thread_barrier(size_t nthreads)
Definition: barrier.hpp:60
bool is_master() const
Definition: barrier.hpp:161
thread_barrier & operator=(thread_barrier &&)=delete
size_type size() const
Definition: barrier.hpp:73
std::unique_lock< mutex_t > lock_t
Definition: barrier.hpp:57
thread_barrier(thread_barrier &&)=delete
STL namespace.
Definition: kokkosp.cpp:39
std::recursive_mutex mutex_t
Recursive mutex is used for convenience since the performance penalty vs. a regular mutex is not real...
Definition: locking.hpp:38
#define TIMEMORY_EXCEPTION(...)
Definition: types.hpp:138