timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
data.hpp
Go to the documentation of this file.
1// MIT License
2//
3// Copyright (c) 2020, The Regents of the University of California,
4// through Lawrence Berkeley National Laboratory (subject to receipt of any
5// required approvals from the U.S. Dept. of Energy). All rights reserved.
6//
7// Permission is hereby granted, free of charge, to any person obtaining a copy
8// of this software and associated documentation files (the "Software"), to deal
9// in the Software without restriction, including without limitation the rights
10// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11// copies of the Software, and to permit persons to whom the Software is
12// furnished to do so, subject to the following conditions:
13//
14// The above copyright notice and this permission notice shall be included in all
15// copies or substantial portions of the Software.
16//
17// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23// SOFTWARE.
24
25/** \file timemory/ert/data.hpp
26 * \headerfile timemory/ert/data.hpp "timemory/ert/data.hpp"
27 * Provides execution data for ERT
28 *
29 */
30
31#pragma once
32
33#include "timemory/backends/device.hpp"
34#include "timemory/backends/dmp.hpp"
35#include "timemory/components/cuda/backends.hpp"
42#include "timemory/tpls/cereal/cereal.hpp"
44
45#include <array>
46#include <atomic>
47#include <cstddef>
48#include <cstdint>
49#include <functional>
50#include <numeric>
51#include <sstream>
52#include <stdexcept>
53#include <string>
54#include <vector>
55
56namespace tim
57{
58namespace ert
59{
60using std::size_t;
61using namespace std::placeholders;
62
63//--------------------------------------------------------------------------------------//
64// execution params
65//
67{
68 explicit exec_params(uint64_t _work_set = 16,
69 uint64_t mem_max = 8 * cache_size::get_max(),
70 uint64_t _nthread = 1, uint64_t _nstream = 1,
71 uint64_t _grid_size = 0, uint64_t _block_size = 32)
72 : working_set_min(_work_set)
73 , memory_max(mem_max)
74 , nthreads(_nthread)
75 , nstreams(_nstream)
76 , grid_size(_grid_size)
77 , block_size(_block_size)
78 {}
79
80 ~exec_params() = default;
81 exec_params(const exec_params&) = default;
82 exec_params(exec_params&&) noexcept = default;
83 exec_params& operator=(const exec_params&) = default;
84 exec_params& operator=(exec_params&&) noexcept = default;
85
86 template <typename Archive>
87 void serialize(Archive& ar, const unsigned int)
88 {
89 ar(cereal::make_nvp("working_set_min", working_set_min),
90 cereal::make_nvp("memory_max", memory_max),
91 cereal::make_nvp("nthreads", nthreads), cereal::make_nvp("nrank", nrank),
92 cereal::make_nvp("nproc", nproc), cereal::make_nvp("nstreams", nstreams),
93 cereal::make_nvp("grid_size", grid_size),
94 cereal::make_nvp("block_size", block_size),
95 cereal::make_nvp("shmem_size", shmem_size));
96 }
97
98 friend std::ostream& operator<<(std::ostream& os, const exec_params& obj)
99 {
100 std::stringstream ss;
101 ss << "working_set_min = " << obj.working_set_min << ", "
102 << "memory_max = " << obj.memory_max << ", "
103 << "nthreads = " << obj.nthreads << ", "
104 << "nrank = " << obj.nrank << ", "
105 << "nproc = " << obj.nproc << ", "
106 << "nstreams = " << obj.nstreams << ", "
107 << "grid_size = " << obj.grid_size << ", "
108 << "block_size = " << obj.block_size << ", "
109 << "shmem_size = " << obj.shmem_size;
110 os << ss.str();
111 return os;
112 }
113
114 uint64_t working_set_min = 16; // NOLINT NOLINTNEXTLINE
115 uint64_t memory_max = 8 * cache_size::get_max(); // default is 8 * L3 cache size
116 uint64_t nthreads = 1; // NOLINT
117 uint64_t nrank = tim::dmp::rank(); // NOLINT
118 uint64_t nproc = tim::dmp::size(); // NOLINT
119 uint64_t nstreams = 1; // NOLINT
120 uint64_t grid_size = 0; // NOLINT
121 uint64_t block_size = 32; // NOLINT
122 uint64_t shmem_size = 0; // NOLINT
123};
124
125//--------------------------------------------------------------------------------------//
126// execution data -- reuse this for multiple types
127//
128template <typename Tp>
130{
131public:
132 using value_type = std::tuple<std::string, uint64_t, uint64_t, uint64_t, uint64_t,
133 uint64_t, Tp, std::string, std::string, exec_params>;
134 using labels_type = std::array<string_t, std::tuple_size<value_type>::value>;
135 using value_array = std::vector<value_type>;
136 using size_type = typename value_array::size_type;
137 using iterator = typename value_array::iterator;
138 using const_iterator = typename value_array::const_iterator;
140
141 //----------------------------------------------------------------------------------//
142 //
143 exec_data() = default;
144 ~exec_data() = default;
145 exec_data(const exec_data&) = delete;
146 exec_data(exec_data&&) noexcept = default;
147
148 exec_data& operator=(const exec_data&) = delete;
149 exec_data& operator=(exec_data&&) noexcept = default;
150
151public:
152 //----------------------------------------------------------------------------------//
153 //
154 void set_labels(const labels_type& _labels) { m_labels = _labels; }
155 TIMEMORY_NODISCARD labels_type get_labels() const { return m_labels; }
156 size_type size() { return m_values.size(); }
157 iterator begin() { return m_values.begin(); }
158 TIMEMORY_NODISCARD const_iterator begin() const { return m_values.begin(); }
159 iterator end() { return m_values.end(); }
160 TIMEMORY_NODISCARD const_iterator end() const { return m_values.end(); }
161
162public:
163 //----------------------------------------------------------------------------------//
164 //
166 {
167 // static std::mutex _mutex;
168 // std::unique_lock<std::mutex> _lock(_mutex);
169 m_values.resize(m_values.size() + 1);
170 m_values.back() = entry;
171 // m_values.push_back(entry);
172 return *this;
173 }
174
175 //----------------------------------------------------------------------------------//
176 //
178 {
179 // static std::mutex _mutex;
180 // std::unique_lock<std::mutex> _lock(_mutex);
181
182 for(const auto& itr : rhs.m_values)
183 m_values.push_back(itr);
184 return *this;
185 }
186
187public:
188 //----------------------------------------------------------------------------------//
189 //
190 friend std::ostream& operator<<(std::ostream& os, const exec_data& obj)
191 {
192 std::stringstream ss;
193 for(const auto& itr : obj.m_values)
194 {
195 ss << std::setw(24) << std::get<0>(itr) << " (device: " << std::get<7>(itr)
196 << ", dtype = " << std::get<8>(itr) << "): ";
197 obj.write<1>(ss, itr, ", ", 10);
198 obj.write<2>(ss, itr, ", ", 6);
199 obj.write<3>(ss, itr, ", ", 12);
200 obj.write<4>(ss, itr, ", ", 12);
201 obj.write<5>(ss, itr, ", ", 12);
202 obj.write<6>(ss, itr, "\n", 12);
203 }
204 os << ss.str();
205 return os;
206 }
207
208 //----------------------------------------------------------------------------------//
209 //
210 template <typename Archive>
211 void save(Archive& ar, const unsigned int) const
212 {
213 constexpr auto sz = std::tuple_size<value_type>::value;
214 ar(cereal::make_nvp("entries", m_values.size()));
215
216 ar.setNextName("ert");
217 ar.startNode();
218 ar.makeArray();
219 for(const auto& itr : m_values)
220 {
221 ar.startNode();
222 _save(ar, itr, make_index_sequence<sz>{});
223 ar.finishNode();
224 }
225 ar.finishNode();
226 }
227
228 //----------------------------------------------------------------------------------//
229 //
230 template <typename Archive>
231 void load(Archive& ar, const unsigned int)
232 {
233 constexpr auto sz = std::tuple_size<value_type>::value;
234 auto _size = 0;
235 ar(cereal::make_nvp("entries", _size));
236 m_values.resize(_size);
237
238 ar.setNextName("ert");
239 ar.startNode();
240 for(auto& itr : m_values)
241 {
242 ar.startNode();
243 _load(ar, itr, make_index_sequence<sz>{});
244 ar.finishNode();
245 }
246 ar.finishNode();
247 }
248
249private:
250 labels_type m_labels = { { "label", "working-set", "trials", "total-bytes",
251 "total-ops", "ops-per-set", "counter", "device", "dtype",
252 "exec-params" } };
253 value_array m_values = {};
254
255private:
256 //----------------------------------------------------------------------------------//
257 //
258 template <size_t N>
259 void write(std::ostream& os, const value_type& ret, const string_t& _trailing,
260 int32_t _width) const
261 {
262 os << std::setw(10) << std::get<N>(m_labels) << " = " << std::setw(_width)
263 << std::get<N>(ret) << _trailing;
264 }
265
266 //----------------------------------------------------------------------------------//
267 //
268 template <typename Archive, size_t... Idx>
269 void _save(Archive& ar, const value_type& _tuple, index_sequence<Idx...>) const
270 {
271 ar(cereal::make_nvp(std::get<Idx>(m_labels), std::get<Idx>(_tuple))...);
272 }
273
274 //----------------------------------------------------------------------------------//
275 //
276 template <typename Archive, size_t... Idx>
277 void _load(Archive& ar, value_type& _tuple, index_sequence<Idx...>)
278 {
279 ar(cereal::make_nvp(std::get<Idx>(m_labels), std::get<Idx>(_tuple))...);
280 }
281};
282
283//--------------------------------------------------------------------------------------//
284//
285// CPU -- initialize buffer
286//
287//--------------------------------------------------------------------------------------//
288
289template <typename DeviceT, typename Tp, typename Intp = int32_t,
290 device::enable_if_cpu_t<DeviceT> = 0>
291void
292initialize_buffer(Tp* A, const Tp& value, const Intp& nsize)
293{
294 auto range = device::grid_strided_range<DeviceT, 0, Intp>(nsize);
295 for(auto i = range.begin(); i < range.end(); i += range.stride())
296 A[i] = value;
297}
298
299//--------------------------------------------------------------------------------------//
300//
301// GPU -- initialize buffer
302//
303//--------------------------------------------------------------------------------------//
304
305template <typename DeviceT, typename Tp, typename Intp = int32_t,
306 device::enable_if_gpu_t<DeviceT> = 0>
308initialize_buffer(Tp* A, Tp value, Intp nsize)
309{
310 auto range = device::grid_strided_range<DeviceT, 0, Intp>(nsize);
311 for(auto i = range.begin(); i < range.end(); i += range.stride())
312 A[i] = value;
313}
314
315//--------------------------------------------------------------------------------------//
316
317} // namespace ert
318} // namespace tim
#define TIMEMORY_GLOBAL_FUNCTION
Definition: attributes.hpp:182
const_iterator end() const
Definition: data.hpp:160
iterator end()
Definition: data.hpp:159
std::vector< value_type > value_array
Definition: data.hpp:135
void load(Archive &ar, const unsigned int)
Definition: data.hpp:231
exec_data(exec_data &&) noexcept=default
std::tuple< std::string, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, Tp, std::string, std::string, exec_params > value_type
Definition: data.hpp:133
friend std::ostream & operator<<(std::ostream &os, const exec_data &obj)
Definition: data.hpp:190
void set_labels(const labels_type &_labels)
Definition: data.hpp:154
iterator begin()
Definition: data.hpp:157
typename value_array::iterator iterator
Definition: data.hpp:137
void save(Archive &ar, const unsigned int) const
Definition: data.hpp:211
exec_data & operator+=(const value_type &entry)
Definition: data.hpp:165
const_iterator begin() const
Definition: data.hpp:158
size_type size()
Definition: data.hpp:156
labels_type get_labels() const
Definition: data.hpp:155
exec_data & operator+=(const exec_data &rhs)
Definition: data.hpp:177
exec_data(const exec_data &)=delete
typename value_array::size_type size_type
Definition: data.hpp:136
typename value_array::const_iterator const_iterator
Definition: data.hpp:138
std::array< string_t, std::tuple_size< value_type >::value > labels_type
Definition: data.hpp:134
std::string string_t
Definition: library.cpp:57
void initialize_buffer(Tp *A, const Tp &value, const Intp &nsize)
Definition: data.hpp:292
data::entry entry
Definition: stream.hpp:980
Definition: kokkosp.cpp:39
std::make_integer_sequence< size_t, Num > make_index_sequence
Alias template make_index_sequence.
Definition: types.hpp:182
tim::mpl::apply< std::string > string
Definition: macros.hpp:53
const std::string std::ostream * os
uint64_t working_set_min
Definition: data.hpp:114
uint64_t nstreams
Definition: data.hpp:119
uint64_t memory_max
Definition: data.hpp:115
uint64_t nthreads
Definition: data.hpp:116
exec_params(exec_params &&) noexcept=default
void serialize(Archive &ar, const unsigned int)
Definition: data.hpp:87
uint64_t grid_size
Definition: data.hpp:120
exec_params(const exec_params &)=default
uint64_t shmem_size
Definition: data.hpp:122
uint64_t block_size
Definition: data.hpp:121
exec_params(uint64_t _work_set=16, uint64_t mem_max=8 *cache_size::get_max(), uint64_t _nthread=1, uint64_t _nstream=1, uint64_t _grid_size=0, uint64_t _block_size=32)
Definition: data.hpp:68
friend std::ostream & operator<<(std::ostream &os, const exec_params &obj)
Definition: data.hpp:98
typename typename typename
Definition: types.hpp:226