timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
tim::ert::configuration< DeviceT, Tp, CounterT > Struct Template Reference

#include "timemory/ert/configuration.hpp"

+ Collaboration diagram for tim::ert::configuration< DeviceT, Tp, CounterT >:

Public Types

using this_type = configuration< DeviceT, Tp, CounterT >
 
using ert_data_t = exec_data< CounterT >
 
using device_t = DeviceT
 
using counter_t = CounterT
 
using ert_counter_t = counter< device_t, Tp, counter_t >
 
using ert_data_ptr_t = std::shared_ptr< ert_data_t >
 
using executor_func_t = std::function< ert_counter_t(ert_data_ptr_t)>
 
using get_uint64_t = std::function< uint64_t()>
 
using skip_ops_t = std::unordered_set< size_t >
 
using get_skip_ops_t = std::function< skip_ops_t()>
 

Static Public Member Functions

static get_uint64_tget_num_threads ()
 
static get_uint64_tget_num_streams ()
 
static get_uint64_tget_grid_size ()
 
static get_uint64_tget_block_size ()
 
static get_uint64_tget_alignment ()
 
static get_uint64_tget_min_working_size ()
 
static get_uint64_tget_max_data_size ()
 
static get_skip_ops_tget_skip_ops ()
 
template<typename Dev = DeviceT, enable_if_t< std::is_same< Dev, device::cpu >::value, int > = 0>
static void configure (uint64_t nthreads, uint64_t alignment=sizeof(Tp), uint64_t nstreams=0, uint64_t block_size=0, uint64_t grid_size=0)
 configure the number of threads, number of streams, block size, grid size, and alignment More...
 
template<typename Dev = DeviceT, enable_if_t< std::is_same< Dev, device::gpu >::value, int > = 0>
static void configure (uint64_t nthreads, uint64_t alignment=sizeof(Tp), uint64_t nstreams=1, uint64_t block_size=1024, uint64_t grid_size=0)
 configure the number of threads, number of streams, block size, grid size, and alignment More...
 
static executor_func_tget_executor ()
 

Public Attributes

bool verbose = false
 
get_uint64_t num_threads = this_type::get_num_threads()
 
get_uint64_t num_streams = this_type::get_num_streams()
 
get_uint64_t min_working_size = this_type::get_min_working_size()
 
get_uint64_t max_data_size = this_type::get_max_data_size()
 
get_uint64_t alignment = this_type::get_alignment()
 
get_uint64_t grid_size = this_type::get_grid_size()
 
get_uint64_t block_size = this_type::get_block_size()
 
executor_func_t executor = this_type::get_executor()
 

Detailed Description

template<typename DeviceT, typename Tp, typename CounterT>
struct tim::ert::configuration< DeviceT, Tp, CounterT >

Definition at line 62 of file configuration.hpp.

Member Typedef Documentation

◆ counter_t

template<typename DeviceT , typename Tp , typename CounterT >
using tim::ert::configuration< DeviceT, Tp, CounterT >::counter_t = CounterT

Definition at line 67 of file configuration.hpp.

◆ device_t

template<typename DeviceT , typename Tp , typename CounterT >
using tim::ert::configuration< DeviceT, Tp, CounterT >::device_t = DeviceT

Definition at line 66 of file configuration.hpp.

◆ ert_counter_t

template<typename DeviceT , typename Tp , typename CounterT >
using tim::ert::configuration< DeviceT, Tp, CounterT >::ert_counter_t = counter<device_t, Tp, counter_t>

Definition at line 68 of file configuration.hpp.

◆ ert_data_ptr_t

template<typename DeviceT , typename Tp , typename CounterT >
using tim::ert::configuration< DeviceT, Tp, CounterT >::ert_data_ptr_t = std::shared_ptr<ert_data_t>

Definition at line 69 of file configuration.hpp.

◆ ert_data_t

template<typename DeviceT , typename Tp , typename CounterT >
using tim::ert::configuration< DeviceT, Tp, CounterT >::ert_data_t = exec_data<CounterT>

Definition at line 65 of file configuration.hpp.

◆ executor_func_t

template<typename DeviceT , typename Tp , typename CounterT >
using tim::ert::configuration< DeviceT, Tp, CounterT >::executor_func_t = std::function<ert_counter_t(ert_data_ptr_t)>

Definition at line 70 of file configuration.hpp.

◆ get_skip_ops_t

template<typename DeviceT , typename Tp , typename CounterT >
using tim::ert::configuration< DeviceT, Tp, CounterT >::get_skip_ops_t = std::function<skip_ops_t()>

Definition at line 73 of file configuration.hpp.

◆ get_uint64_t

template<typename DeviceT , typename Tp , typename CounterT >
using tim::ert::configuration< DeviceT, Tp, CounterT >::get_uint64_t = std::function<uint64_t()>

Definition at line 71 of file configuration.hpp.

◆ skip_ops_t

template<typename DeviceT , typename Tp , typename CounterT >
using tim::ert::configuration< DeviceT, Tp, CounterT >::skip_ops_t = std::unordered_set<size_t>

Definition at line 72 of file configuration.hpp.

◆ this_type

template<typename DeviceT , typename Tp , typename CounterT >
using tim::ert::configuration< DeviceT, Tp, CounterT >::this_type = configuration<DeviceT, Tp, CounterT>

Definition at line 64 of file configuration.hpp.

Member Function Documentation

◆ configure() [1/2]

template<typename DeviceT , typename Tp , typename CounterT >
template<typename Dev = DeviceT, enable_if_t< std::is_same< Dev, device::cpu >::value, int > = 0>
static void tim::ert::configuration< DeviceT, Tp, CounterT >::configure ( uint64_t  nthreads,
uint64_t  alignment = sizeof(Tp),
uint64_t  nstreams = 0,
uint64_t  block_size = 0,
uint64_t  grid_size = 0 
)
inlinestatic

configure the number of threads, number of streams, block size, grid size, and alignment

Definition at line 180 of file configuration.hpp.

183 {
184 get_num_threads() = [=]() -> uint64_t { return nthreads; };
185 get_num_streams() = [=]() -> uint64_t { return nstreams; };
186 get_grid_size() = [=]() -> uint64_t { return grid_size; };
187 get_block_size() = [=]() -> uint64_t { return block_size; };
188 get_alignment() = [=]() -> uint64_t { return alignment; };
189 }
static get_uint64_t & get_alignment()
static get_uint64_t & get_block_size()
static get_uint64_t & get_grid_size()
static get_uint64_t & get_num_streams()
static get_uint64_t & get_num_threads()

References tim::ert::configuration< DeviceT, Tp, CounterT >::alignment, tim::ert::configuration< DeviceT, Tp, CounterT >::block_size, tim::ert::configuration< DeviceT, Tp, CounterT >::get_alignment(), tim::ert::configuration< DeviceT, Tp, CounterT >::get_block_size(), tim::ert::configuration< DeviceT, Tp, CounterT >::get_grid_size(), tim::ert::configuration< DeviceT, Tp, CounterT >::get_num_streams(), tim::ert::configuration< DeviceT, Tp, CounterT >::get_num_threads(), and tim::ert::configuration< DeviceT, Tp, CounterT >::grid_size.

◆ configure() [2/2]

template<typename DeviceT , typename Tp , typename CounterT >
template<typename Dev = DeviceT, enable_if_t< std::is_same< Dev, device::gpu >::value, int > = 0>
static void tim::ert::configuration< DeviceT, Tp, CounterT >::configure ( uint64_t  nthreads,
uint64_t  alignment = sizeof(Tp),
uint64_t  nstreams = 1,
uint64_t  block_size = 1024,
uint64_t  grid_size = 0 
)
inlinestatic

◆ get_alignment()

template<typename DeviceT , typename Tp , typename CounterT >
static get_uint64_t & tim::ert::configuration< DeviceT, Tp, CounterT >::get_alignment ( )
inlinestatic

Definition at line 116 of file configuration.hpp.

117 {
118 static get_uint64_t _instance = []() {
119 return std::max<uint64_t>(settings::ert_alignment(), 8 * sizeof(Tp));
120 };
121 return _instance;
122 }
ert_alignment
Definition: settings.cpp:1762
std::function< uint64_t()> get_uint64_t

References tim::ert_alignment.

Referenced by tim::ert::configuration< DeviceT, Tp, CounterT >::configure(), and tim::ert::configuration< DeviceT, Tp, CounterT >::get_executor().

◆ get_block_size()

template<typename DeviceT , typename Tp , typename CounterT >
static get_uint64_t & tim::ert::configuration< DeviceT, Tp, CounterT >::get_block_size ( )
inlinestatic

Definition at line 108 of file configuration.hpp.

109 {
110 static get_uint64_t _instance = []() { return settings::ert_block_size(); };
111 return _instance;
112 }
ert_block_size
Definition: settings.cpp:1760

References tim::ert_block_size.

Referenced by tim::ert::configuration< DeviceT, Tp, CounterT >::configure(), and tim::ert::configuration< DeviceT, Tp, CounterT >::get_executor().

◆ get_executor()

template<typename DeviceT , typename Tp , typename CounterT >
static executor_func_t & tim::ert::configuration< DeviceT, Tp, CounterT >::get_executor ( )
inlinestatic

Definition at line 209 of file configuration.hpp.

210 {
211 static executor_func_t _instance = [](ert_data_ptr_t data) {
212 using lli = long long int;
213 // configuration sizes
214 auto _mws_size = get_min_working_size()();
215 auto _max_size = get_max_data_size()();
216 auto _num_thread = get_num_threads()();
217 auto _num_stream = get_num_streams()();
218 auto _grid_size = get_grid_size()();
219 auto _block_size = get_block_size()();
220 auto _align_size = get_alignment()();
221 auto _skip_ops = get_skip_ops()();
222
223 // execution parameters
224 exec_params params(_mws_size, _max_size, _num_thread, _num_stream, _grid_size,
225 _block_size);
226 // operation _counter instance
227 ert_counter_t _counter(params, data, _align_size);
228
229 // set bytes per element
230 _counter.bytes_per_element = sizeof(Tp);
231 // set number of memory accesses per element from two functions
232 _counter.memory_accesses_per_element = 2;
233
234 for(const auto& itr : _skip_ops)
235 _counter.add_skip_ops(itr);
236
237 auto dtype = demangle(typeid(Tp).name());
238
239 std::string _dev_name{};
240 if(std::is_same<DeviceT, device::cpu>::value)
241 _dev_name = "[device::cpu]";
242 else if(std::is_same<DeviceT, device::gpu>::value)
243 _dev_name = "[device::gpu]";
244
246 {
247 printf("[ert::executor]%s> "
248 "working-set = %lli, max-size = %lli, num-thread = %lli, "
249 "num-stream = "
250 "%lli, grid-size = %lli, block-size = %lli, align-size = %lli, "
251 "data-type "
252 "= %s\n",
253 _dev_name.c_str(), (lli) _mws_size, (lli) _max_size,
254 (lli) _num_thread, (lli) _num_stream, (lli) _grid_size,
255 (lli) _block_size, (lli) _align_size, dtype.c_str());
256 }
257
258 return _counter;
259 };
260 return _instance;
261 }
std::string demangle(const char *_mangled_name, int *_status=nullptr)
Definition: demangle.hpp:47
tim::mpl::apply< std::string > string
Definition: macros.hpp:53
static get_skip_ops_t & get_skip_ops()
static get_uint64_t & get_min_working_size()
static get_uint64_t & get_max_data_size()
counter< device_t, Tp, counter_t > ert_counter_t
std::function< ert_counter_t(ert_data_ptr_t)> executor_func_t
std::shared_ptr< ert_data_t > ert_data_ptr_t

References tim::ert::counter< DeviceT, Tp, Counter >::add_skip_ops(), tim::ert::counter< DeviceT, Tp, Counter >::bytes_per_element, tim::debug, tim::demangle(), tim::ert::configuration< DeviceT, Tp, CounterT >::get_alignment(), tim::ert::configuration< DeviceT, Tp, CounterT >::get_block_size(), tim::ert::configuration< DeviceT, Tp, CounterT >::get_grid_size(), tim::ert::configuration< DeviceT, Tp, CounterT >::get_max_data_size(), tim::ert::configuration< DeviceT, Tp, CounterT >::get_min_working_size(), tim::ert::configuration< DeviceT, Tp, CounterT >::get_num_streams(), tim::ert::configuration< DeviceT, Tp, CounterT >::get_num_threads(), tim::ert::configuration< DeviceT, Tp, CounterT >::get_skip_ops(), tim::ert::counter< DeviceT, Tp, Counter >::memory_accesses_per_element, and tim::verbose.

◆ get_grid_size()

template<typename DeviceT , typename Tp , typename CounterT >
static get_uint64_t & tim::ert::configuration< DeviceT, Tp, CounterT >::get_grid_size ( )
inlinestatic

Definition at line 100 of file configuration.hpp.

101 {
102 static get_uint64_t _instance = []() { return settings::ert_grid_size(); };
103 return _instance;
104 }
ert_grid_size
Definition: settings.cpp:1758

References tim::ert_grid_size.

Referenced by tim::ert::configuration< DeviceT, Tp, CounterT >::configure(), and tim::ert::configuration< DeviceT, Tp, CounterT >::get_executor().

◆ get_max_data_size()

template<typename DeviceT , typename Tp , typename CounterT >
static get_uint64_t & tim::ert::configuration< DeviceT, Tp, CounterT >::get_max_data_size ( )
inlinestatic

Definition at line 140 of file configuration.hpp.

141 {
142 static get_uint64_t _instance = []() -> uint64_t {
145 static constexpr bool is_gpu = device::is_gpu<DeviceT>::value;
146 if(is_gpu)
147 {
149 }
150 {
151 return 2 * ert::cache_size::get_max();
152 }
153 };
154 return _instance;
155 }
ert_max_data_size
Definition: settings.cpp:1770
ert_max_data_size_gpu
Definition: settings.cpp:1774

References tim::ert_max_data_size, and tim::ert_max_data_size_gpu.

Referenced by tim::ert::configuration< DeviceT, Tp, CounterT >::get_executor().

◆ get_min_working_size()

template<typename DeviceT , typename Tp , typename CounterT >
static get_uint64_t & tim::ert::configuration< DeviceT, Tp, CounterT >::get_min_working_size ( )
inlinestatic

Definition at line 126 of file configuration.hpp.

127 {
128 static get_uint64_t _instance = []() {
131 static constexpr bool is_gpu = device::is_gpu<DeviceT>::value;
132 return (is_gpu) ? settings::ert_min_working_size_gpu()
133 : settings::ert_min_working_size_cpu();
134 };
135 return _instance;
136 }
ert_min_working_size
Definition: settings.cpp:1764
ert_min_working_size_cpu
Definition: settings.cpp:1766
ert_min_working_size_gpu
Definition: settings.cpp:1768

References tim::ert_min_working_size, tim::ert_min_working_size_cpu, and tim::ert_min_working_size_gpu.

Referenced by tim::ert::configuration< DeviceT, Tp, CounterT >::get_executor().

◆ get_num_streams()

template<typename DeviceT , typename Tp , typename CounterT >
static get_uint64_t & tim::ert::configuration< DeviceT, Tp, CounterT >::get_num_streams ( )
inlinestatic

Definition at line 92 of file configuration.hpp.

93 {
94 static get_uint64_t _instance = []() { return settings::ert_num_streams(); };
95 return _instance;
96 }
ert_num_streams
Definition: settings.cpp:1756

References tim::ert_num_streams.

Referenced by tim::ert::configuration< DeviceT, Tp, CounterT >::configure(), and tim::ert::configuration< DeviceT, Tp, CounterT >::get_executor().

◆ get_num_threads()

template<typename DeviceT , typename Tp , typename CounterT >
static get_uint64_t & tim::ert::configuration< DeviceT, Tp, CounterT >::get_num_threads ( )
inlinestatic

Definition at line 77 of file configuration.hpp.

78 {
79 static get_uint64_t _instance = []() {
82 // for checking if gpu
83 static constexpr bool is_gpu = device::is_gpu<DeviceT>::value;
84 return (is_gpu) ? settings::ert_num_threads_gpu()
85 : settings::ert_num_threads_cpu();
86 };
87 return _instance;
88 }
ert_num_threads_gpu
Definition: settings.cpp:1754
ert_num_threads_cpu
Definition: settings.cpp:1752
ert_num_threads
Definition: settings.cpp:1750

References tim::ert_num_threads, tim::ert_num_threads_cpu, and tim::ert_num_threads_gpu.

Referenced by tim::ert::configuration< DeviceT, Tp, CounterT >::configure(), and tim::ert::configuration< DeviceT, Tp, CounterT >::get_executor().

◆ get_skip_ops()

template<typename DeviceT , typename Tp , typename CounterT >
static get_skip_ops_t & tim::ert::configuration< DeviceT, Tp, CounterT >::get_skip_ops ( )
inlinestatic

Definition at line 159 of file configuration.hpp.

160 {
161 static get_skip_ops_t _instance = []() {
162 auto _skipstr = settings::ert_skip_ops();
163 auto _skipstrvec = delimit(_skipstr, ",; \t");
164 skip_ops_t _result;
165 for(const auto& itr : _skipstrvec)
166 {
167 if(itr.find_first_not_of("0123456789") == std::string::npos)
168 _result.insert(atol(itr.c_str()));
169 }
170 return _result;
171 };
172 return _instance;
173 }
ert_skip_ops
Definition: settings.cpp:1776
ContainerT delimit(const std::string &line, const std::string &delimiters="\"',;: ", PredicateT &&predicate=[](const std::string &s) -> std::string { return s;})
Definition: delimit.hpp:68
std::function< skip_ops_t()> get_skip_ops_t
std::unordered_set< size_t > skip_ops_t

References tim::delimit(), and tim::ert_skip_ops.

Referenced by tim::ert::configuration< DeviceT, Tp, CounterT >::get_executor().

Member Data Documentation

◆ alignment

template<typename DeviceT , typename Tp , typename CounterT >
get_uint64_t tim::ert::configuration< DeviceT, Tp, CounterT >::alignment = this_type::get_alignment()

◆ block_size

template<typename DeviceT , typename Tp , typename CounterT >
get_uint64_t tim::ert::configuration< DeviceT, Tp, CounterT >::block_size = this_type::get_block_size()

◆ executor

template<typename DeviceT , typename Tp , typename CounterT >
executor_func_t tim::ert::configuration< DeviceT, Tp, CounterT >::executor = this_type::get_executor()

Definition at line 272 of file configuration.hpp.

◆ grid_size

template<typename DeviceT , typename Tp , typename CounterT >
get_uint64_t tim::ert::configuration< DeviceT, Tp, CounterT >::grid_size = this_type::get_grid_size()

◆ max_data_size

template<typename DeviceT , typename Tp , typename CounterT >
get_uint64_t tim::ert::configuration< DeviceT, Tp, CounterT >::max_data_size = this_type::get_max_data_size()

Definition at line 268 of file configuration.hpp.

◆ min_working_size

template<typename DeviceT , typename Tp , typename CounterT >
get_uint64_t tim::ert::configuration< DeviceT, Tp, CounterT >::min_working_size = this_type::get_min_working_size()

Definition at line 267 of file configuration.hpp.

◆ num_streams

template<typename DeviceT , typename Tp , typename CounterT >
get_uint64_t tim::ert::configuration< DeviceT, Tp, CounterT >::num_streams = this_type::get_num_streams()

Definition at line 266 of file configuration.hpp.

◆ num_threads

template<typename DeviceT , typename Tp , typename CounterT >
get_uint64_t tim::ert::configuration< DeviceT, Tp, CounterT >::num_threads = this_type::get_num_threads()

Definition at line 265 of file configuration.hpp.

◆ verbose

template<typename DeviceT , typename Tp , typename CounterT >
bool tim::ert::configuration< DeviceT, Tp, CounterT >::verbose = false

Definition at line 264 of file configuration.hpp.


The documentation for this struct was generated from the following file: