timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
ncclp.hpp
Go to the documentation of this file.
1// MIT License
2//
3// Copyright (c) 2020, The Regents of the University of California,
4// through Lawrence Berkeley National Laboratory (subject to receipt of any
5// required approvals from the U.S. Dept. of Energy). All rights reserved.
6//
7// Permission is hereby granted, free of charge, to any person obtaining a copy
8// of this software and associated documentation files (the "Software"), to deal
9// in the Software without restriction, including without limitation the rights
10// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11// copies of the Software, and to permit persons to whom the Software is
12// furnished to do so, subject to the following conditions:
13//
14// The above copyright notice and this permission notice shall be included in all
15// copies or substantial portions of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23// SOFTWARE.
24
25#pragma once
26
27#include "timemory/api.hpp"
29#include "timemory/components/gotcha/backends.hpp"
34#include "timemory/units.hpp"
36
37#include <memory>
38#include <set>
39#include <string>
40#include <unordered_map>
41
42#if defined(TIMEMORY_USE_NCCL)
43# include <nccl.h>
44#endif
45
46#if !defined(NUM_TIMEMORY_NCCLP_WRAPPERS)
47# define NUM_TIMEMORY_NCCLP_WRAPPERS 15
48#endif
49
50namespace tim
51{
52namespace component
53{
54//
55//--------------------------------------------------------------------------------------//
56//
57template <typename Toolset, typename Tag>
58void
59configure_ncclp(std::set<std::string> permit = {}, std::set<std::string> reject = {});
60//
61//--------------------------------------------------------------------------------------//
62//
63template <typename Toolset, typename Tag>
64static uint64_t
65activate_ncclp();
66//
67//--------------------------------------------------------------------------------------//
68//
69template <typename Toolset, typename Tag>
70static uint64_t deactivate_ncclp(uint64_t);
71//
72//--------------------------------------------------------------------------------------//
73//
74template <typename Toolset, typename Tag>
75struct ncclp_handle : base<ncclp_handle<Toolset, Tag>, void>
76{
78
79 using value_type = void;
82
88 using toolset_ptr_t = std::shared_ptr<ncclp_tuple_t>;
89
90 static string_t label() { return "ncclp_handle"; }
91 static string_t description() { return "Handle for activating NCCL wrappers"; }
92
93 void get() {}
94
95 void start()
96 {
97 if(get_tool_count()++ == 0)
98 {
99 get_tool_instance() = std::make_shared<ncclp_tuple_t>("timemory_ncclp");
100 get_tool_instance()->start();
101 }
102 }
103
104 void stop()
105 {
106 auto idx = --get_tool_count();
107 if(get_tool_instance().get())
108 {
109 get_tool_instance()->stop();
110 if(idx == 0)
111 get_tool_instance().reset();
112 }
113 }
114
115 int get_count() { return get_tool_count().load(); }
116
117private:
118 struct persistent_data
119 {
120 std::atomic<short> m_configured;
121 std::atomic<int64_t> m_count;
122 toolset_ptr_t m_tool;
123 };
124
125 static persistent_data& get_persistent_data()
126 {
127 static persistent_data _instance;
128 return _instance;
129 }
130
131 static std::atomic<short>& get_configured()
132 {
133 return get_persistent_data().m_configured;
134 }
135
136 static toolset_ptr_t& get_tool_instance() { return get_persistent_data().m_tool; }
137
138 static std::atomic<int64_t>& get_tool_count()
139 {
140 return get_persistent_data().m_count;
141 }
142};
143//
144//======================================================================================//
145//
146} // namespace component
147} // namespace tim
148//
149//======================================================================================//
150//
151#include "timemory/timemory.hpp"
152//
153//======================================================================================//
154//
155/// \fn uint64_t tim::component::activate_ncclp()
156/// \brief The thread that first activates ncclp will be the thread that turns it off.
157/// Function returns the number of new ncclp handles
158///
159template <typename Toolset, typename Tag>
160static uint64_t
161tim::component::activate_ncclp()
162{
164
165 static std::shared_ptr<handle_t> _handle;
166
167 if(!_handle.get())
168 {
169 _handle = std::make_shared<handle_t>();
170 _handle->start();
171
172 auto cleanup_functor = [=]() {
173 if(_handle)
174 {
175 _handle->stop();
176 _handle.reset();
177 }
178 };
179
180 std::stringstream ss;
181 ss << "timemory-ncclp-" << typeid(Toolset).name() << "-" << typeid(Tag).name();
182 tim::manager::instance()->add_cleanup(ss.str(), cleanup_functor);
183 return 1;
184 }
185 return 0;
186}
187//
188//======================================================================================//
189//
190/// \fn uint64_t tim::component::deactivate_ncclp(uint64_t id)
191/// \brief The thread that created the initial ncclp handle will turn off. Returns
192/// the number of handles active
193///
194template <typename Toolset, typename Tag>
195static uint64_t
196tim::component::deactivate_ncclp(uint64_t id)
197{
198 if(id > 0)
199 {
200 std::stringstream ss;
201 ss << "timemory-ncclp-" << typeid(Toolset).name() << "-" << typeid(Tag).name();
202 tim::manager::instance()->cleanup(ss.str());
203 return 0;
204 }
205 return 1;
206}
207//
208//======================================================================================//
209//
210#if !defined(TIMEMORY_USE_GOTCHA) || !defined(TIMEMORY_USE_NCCL)
211//
212template <typename Toolset, typename Tag>
213void configure_ncclp(std::set<std::string>, std::set<std::string>)
214{}
215//
216#else
217//
218template <typename Toolset, typename Tag>
219void
220tim::component::configure_ncclp(std::set<std::string> permit,
221 std::set<std::string> reject)
222{
223 static constexpr size_t ncclp_wrapper_count = NUM_TIMEMORY_NCCLP_WRAPPERS;
224
225 using string_t = std::string;
227
228 static bool is_initialized = false;
229 if(!is_initialized)
230 {
231 // generate the gotcha wrappers
232 ncclp_gotcha_t::get_initializer() = []() {
233 TIMEMORY_C_GOTCHA(ncclp_gotcha_t, 0, ncclReduce);
234 TIMEMORY_C_GOTCHA(ncclp_gotcha_t, 1, ncclBcast);
235 TIMEMORY_C_GOTCHA(ncclp_gotcha_t, 2, ncclBroadcast);
236 TIMEMORY_C_GOTCHA(ncclp_gotcha_t, 3, ncclAllReduce);
237 TIMEMORY_C_GOTCHA(ncclp_gotcha_t, 4, ncclReduceScatter);
238 TIMEMORY_C_GOTCHA(ncclp_gotcha_t, 5, ncclAllGather);
239 TIMEMORY_C_GOTCHA(ncclp_gotcha_t, 6, ncclCommCuDevice);
240 TIMEMORY_C_GOTCHA(ncclp_gotcha_t, 7, ncclCommUserRank);
241 TIMEMORY_C_GOTCHA(ncclp_gotcha_t, 8, ncclGroupStart);
242 TIMEMORY_C_GOTCHA(ncclp_gotcha_t, 9, ncclGroupEnd);
243 TIMEMORY_C_GOTCHA(ncclp_gotcha_t, 10, ncclSend);
244 TIMEMORY_C_GOTCHA(ncclp_gotcha_t, 11, ncclRecv);
245 // TIMEMORY_C_GOTCHA(ncclp_gotcha_t, 12, ncclCommCount);
246 };
247
248 // provide environment variable for suppressing wrappers
249 ncclp_gotcha_t::get_reject_list() = [reject]() {
250 auto _reject = reject;
251 // check environment
252 auto reject_list = tim::get_env<string_t>("TIMEMORY_NCCLP_REJECT_LIST", "");
253 // add environment setting
254 for(const auto& itr : tim::delimit(reject_list))
255 _reject.insert(itr);
256 return _reject;
257 };
258
259 // provide environment variable for selecting wrappers
260 ncclp_gotcha_t::get_permit_list() = [permit]() {
261 auto _permit = permit;
262 // check environment
263 auto permit_list = tim::get_env<string_t>("TIMEMORY_NCCLP_PERMIT_LIST", "");
264 // add environment setting
265 for(const auto& itr : tim::delimit(permit_list))
266 _permit.insert(itr);
267 return _permit;
268 };
269
270 is_initialized = true;
271 }
272}
273//
274#endif
275//
276//======================================================================================//
277//
This is a variadic component wrapper where all components are allocated on the stack and cannot be di...
static pointer_t instance()
Get a shared pointer to the instance for the current thread.
Toolset
Definition: types.hpp:40
std::string string_t
Definition: library.cpp:57
#define TIMEMORY_C_GOTCHA(...)
Definition: macros.hpp:323
The declaration for the types for manager without definitions.
void configure_ncclp(std::set< std::string > permit={}, std::set< std::string > reject={})
Definition: kokkosp.cpp:39
tim::mpl::apply< std::string > string
Definition: macros.hpp:53
ContainerT delimit(const std::string &line, const std::string &delimiters="\"',;: ", PredicateT &&predicate=[](const std::string &s) -> std::string { return s;})
Definition: delimit.hpp:68
void configure_ncclp(std::set< std::string >, std::set< std::string >)
Definition: ncclp.hpp:213
#define NUM_TIMEMORY_NCCLP_WRAPPERS
Definition: ncclp.hpp:47
The gotcha component rewrites the global offset table such that calling the wrapped function actually...
Definition: components.hpp:179
static constexpr size_t ncclp_wrapper_count
Definition: ncclp.hpp:77
std::shared_ptr< ncclp_tuple_t > toolset_ptr_t
Definition: ncclp.hpp:88
static string_t description()
Definition: ncclp.hpp:91
static string_t label()
Definition: ncclp.hpp:90