timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
tim::ert::counter< DeviceT, Tp, Counter > Class Template Reference

#include "timemory/ert/counter.hpp"

+ Collaboration diagram for tim::ert::counter< DeviceT, Tp, Counter >:

Public Types

using mutex_t = std::recursive_mutex
 
using lock_t = std::unique_lock< mutex_t >
 
using counter_type = Counter
 
using ert_data_t = exec_data< Counter >
 
using this_type = counter< DeviceT, Tp, Counter >
 
using callback_type = std::function< void(uint64_t, this_type &)>
 
using data_type = typename ert_data_t::value_type
 
using data_ptr_t = std::shared_ptr< ert_data_t >
 
using ull = unsigned long long
 
using skip_ops_t = std::unordered_set< size_t >
 

Public Member Functions

 counter ()=default
 
 ~counter ()=default
 
 counter (const counter &)=default
 
 counter (counter &&) noexcept=default
 
counteroperator= (const counter &)=default
 
counteroperator= (counter &&) noexcept=default
 
 counter (const exec_params &_params, data_ptr_t _exec_data, uint64_t _align=8 *sizeof(Tp))
 
 counter (const exec_params &_params, callback_type _func, data_ptr_t _exec_data, uint64_t _align=8 *sizeof(Tp))
 
template<typename Up = Tp, typename Dev = DeviceT, typename std::enable_if<(std::is_same< Dev, device::cpu >::value||(std::is_same< Dev, device::gpu >::value &&!std::is_same< Up, gpu::fp16_t >::value)), int >::type = 0>
Up * get_buffer ()
 allocate a buffer for the ERT calculation uses this function if device is CPU or device is GPU and type is not half2 More...
 
template<typename Up = Tp, typename Dev = DeviceT, typename std::enable_if<(std::is_same< Up, gpu::fp16_t >::value &&std::is_same< Dev, device::gpu >::value), int >::type = 0>
Up * get_buffer ()
 allocate a buffer for the ERT calculation uses this function if device is GPU and type is half2 More...
 
void destroy_buffer (Tp *buffer)
 
void configure (uint64_t tid)
 
counter_type get_counter () const
 
void record (counter_type &_counter, int n, int trials, uint64_t nops, const exec_params &_itrp)
 
template<typename FuncT >
void set_callback (FuncT &&_f)
 
template<typename Archive >
void serialize (Archive &ar, const unsigned int)
 
data_ptr_tget_data ()
 
const data_ptr_tget_data () const
 
void add_skip_ops (size_t _Nops)
 
void add_skip_ops (std::initializer_list< size_t > _args)
 
bool skip (size_t _Nops)
 

Public Attributes

exec_params params = exec_params()
 
int bytes_per_element = 0
 
int memory_accesses_per_element = 0
 
uint64_t align = sizeof(Tp)
 
uint64_t nsize = 0
 
data_ptr_t data = std::make_shared<ert_data_t>()
 
std::string label = ""
 
skip_ops_t skip_ops = skip_ops_t()
 

Friends

std::ostream & operator<< (std::ostream &os, const counter &obj)
 

Detailed Description

template<typename DeviceT, typename Tp, typename Counter>
class tim::ert::counter< DeviceT, Tp, Counter >

Definition at line 73 of file counter.hpp.

Member Typedef Documentation

◆ callback_type

template<typename DeviceT , typename Tp , typename Counter >
using tim::ert::counter< DeviceT, Tp, Counter >::callback_type = std::function<void(uint64_t, this_type&)>

Definition at line 81 of file counter.hpp.

◆ counter_type

template<typename DeviceT , typename Tp , typename Counter >
using tim::ert::counter< DeviceT, Tp, Counter >::counter_type = Counter

Definition at line 78 of file counter.hpp.

◆ data_ptr_t

template<typename DeviceT , typename Tp , typename Counter >
using tim::ert::counter< DeviceT, Tp, Counter >::data_ptr_t = std::shared_ptr<ert_data_t>

Definition at line 83 of file counter.hpp.

◆ data_type

template<typename DeviceT , typename Tp , typename Counter >
using tim::ert::counter< DeviceT, Tp, Counter >::data_type = typename ert_data_t::value_type

Definition at line 82 of file counter.hpp.

◆ ert_data_t

template<typename DeviceT , typename Tp , typename Counter >
using tim::ert::counter< DeviceT, Tp, Counter >::ert_data_t = exec_data<Counter>

Definition at line 79 of file counter.hpp.

◆ lock_t

template<typename DeviceT , typename Tp , typename Counter >
using tim::ert::counter< DeviceT, Tp, Counter >::lock_t = std::unique_lock<mutex_t>

Definition at line 77 of file counter.hpp.

◆ mutex_t

template<typename DeviceT , typename Tp , typename Counter >
using tim::ert::counter< DeviceT, Tp, Counter >::mutex_t = std::recursive_mutex

Definition at line 76 of file counter.hpp.

◆ skip_ops_t

template<typename DeviceT , typename Tp , typename Counter >
using tim::ert::counter< DeviceT, Tp, Counter >::skip_ops_t = std::unordered_set<size_t>

Definition at line 85 of file counter.hpp.

◆ this_type

template<typename DeviceT , typename Tp , typename Counter >
using tim::ert::counter< DeviceT, Tp, Counter >::this_type = counter<DeviceT, Tp, Counter>

Definition at line 80 of file counter.hpp.

◆ ull

template<typename DeviceT , typename Tp , typename Counter >
using tim::ert::counter< DeviceT, Tp, Counter >::ull = unsigned long long

Definition at line 84 of file counter.hpp.

Constructor & Destructor Documentation

◆ counter() [1/5]

template<typename DeviceT , typename Tp , typename Counter >
tim::ert::counter< DeviceT, Tp, Counter >::counter ( )
default

◆ ~counter()

template<typename DeviceT , typename Tp , typename Counter >
tim::ert::counter< DeviceT, Tp, Counter >::~counter ( )
default

◆ counter() [2/5]

template<typename DeviceT , typename Tp , typename Counter >
tim::ert::counter< DeviceT, Tp, Counter >::counter ( const counter< DeviceT, Tp, Counter > &  )
default

◆ counter() [3/5]

template<typename DeviceT , typename Tp , typename Counter >
tim::ert::counter< DeviceT, Tp, Counter >::counter ( counter< DeviceT, Tp, Counter > &&  )
defaultnoexcept

◆ counter() [4/5]

template<typename DeviceT , typename Tp , typename Counter >
tim::ert::counter< DeviceT, Tp, Counter >::counter ( const exec_params _params,
data_ptr_t  _exec_data,
uint64_t  _align = 8 * sizeof(Tp) 
)
inlineexplicit

Definition at line 101 of file counter.hpp.

103 : params(_params)
104 , align(_align)
105 , data(std::move(_exec_data))
106 {
107 compute_internal();
108 }
data_ptr_t data
Definition: counter.hpp:299
uint64_t align
Definition: counter.hpp:297
exec_params params
Definition: counter.hpp:294

◆ counter() [5/5]

template<typename DeviceT , typename Tp , typename Counter >
tim::ert::counter< DeviceT, Tp, Counter >::counter ( const exec_params _params,
callback_type  _func,
data_ptr_t  _exec_data,
uint64_t  _align = 8 * sizeof(Tp) 
)
inline

Definition at line 113 of file counter.hpp.

115 : params(_params)
116 , align(_align)
117 , data(std::move(_exec_data))
118 , configure_callback(std::move(_func))
119 {
120 compute_internal();
121 }

Member Function Documentation

◆ add_skip_ops() [1/2]

template<typename DeviceT , typename Tp , typename Counter >
void tim::ert::counter< DeviceT, Tp, Counter >::add_skip_ops ( size_t  _Nops)
inline

◆ add_skip_ops() [2/2]

template<typename DeviceT , typename Tp , typename Counter >
void tim::ert::counter< DeviceT, Tp, Counter >::add_skip_ops ( std::initializer_list< size_t >  _args)
inline

Definition at line 282 of file counter.hpp.

283 {
284 for(const auto& itr : _args)
285 skip_ops.insert(itr);
286 }
std::array< char *, 4 > _args

References tim::_args, and tim::ert::counter< DeviceT, Tp, Counter >::skip_ops.

◆ configure()

template<typename DeviceT , typename Tp , typename Counter >
void tim::ert::counter< DeviceT, Tp, Counter >::configure ( uint64_t  tid)
inline

Definition at line 184 of file counter.hpp.

184{ configure_callback(tid, *this); }

Referenced by tim::ert::ops_main().

◆ destroy_buffer()

template<typename DeviceT , typename Tp , typename Counter >
void tim::ert::counter< DeviceT, Tp, Counter >::destroy_buffer ( Tp *  buffer)
inline

Definition at line 178 of file counter.hpp.

178{ free_aligned<Tp, DeviceT>(buffer); }

Referenced by tim::ert::ops_main().

◆ get_buffer() [1/2]

template<typename DeviceT , typename Tp , typename Counter >
template<typename Up = Tp, typename Dev = DeviceT, typename std::enable_if<(std::is_same< Dev, device::cpu >::value||(std::is_same< Dev, device::gpu >::value &&!std::is_same< Up, gpu::fp16_t >::value)), int >::type = 0>
Up * tim::ert::counter< DeviceT, Tp, Counter >::get_buffer ( )
inline

allocate a buffer for the ERT calculation uses this function if device is CPU or device is GPU and type is not half2

Definition at line 133 of file counter.hpp.

134 {
135 // check alignment and
136 align = std::max<uint64_t>(align, 8 * sizeof(Up));
137 compute_internal();
138
139 if(settings::debug())
140 printf("[%s]> nsize = %llu\n", __FUNCTION__, (ull) nsize);
141 Up* buffer = allocate_aligned<Up, DeviceT>(nsize, align);
142 if(settings::debug())
143 printf("[%s]> buffer = %p\n", __FUNCTION__, (void*) buffer);
144 device::params<DeviceT> _params(0, 512, 0, 0);
145 device::launch(nsize, _params, initialize_buffer<DeviceT, Up, uint64_t>, buffer,
146 Up{ 1 }, nsize);
147 return buffer;
148 }
unsigned long long ull
Definition: counter.hpp:84
uint64_t nsize
Definition: counter.hpp:298

References tim::ert::counter< DeviceT, Tp, Counter >::align, tim::debug, and tim::ert::counter< DeviceT, Tp, Counter >::nsize.

Referenced by tim::ert::ops_main().

◆ get_buffer() [2/2]

template<typename DeviceT , typename Tp , typename Counter >
template<typename Up = Tp, typename Dev = DeviceT, typename std::enable_if<(std::is_same< Up, gpu::fp16_t >::value &&std::is_same< Dev, device::gpu >::value), int >::type = 0>
Up * tim::ert::counter< DeviceT, Tp, Counter >::get_buffer ( )
inline

allocate a buffer for the ERT calculation uses this function if device is GPU and type is half2

Definition at line 158 of file counter.hpp.

159 {
160 // check alignment and
161 align = std::max<uint64_t>(align, 8 * sizeof(Up));
162 compute_internal();
163
164 if(settings::debug())
165 printf("[%s]> nsize = %llu\n", __FUNCTION__, (ull) nsize);
166 Up* buffer = allocate_aligned<Up, DeviceT>(nsize, align);
167 if(settings::debug())
168 printf("[%s]> buffer = %p\n", __FUNCTION__, (void*) buffer);
169 device::params<DeviceT> _params(0, 512, 0, 0);
170 device::launch(nsize, _params, initialize_buffer<DeviceT, Up, uint32_t>, buffer,
171 Up{ 1, 1 }, nsize);
172 return buffer;
173 }

References tim::ert::counter< DeviceT, Tp, Counter >::align, tim::debug, and tim::ert::counter< DeviceT, Tp, Counter >::nsize.

◆ get_counter()

template<typename DeviceT , typename Tp , typename Counter >
counter_type tim::ert::counter< DeviceT, Tp, Counter >::get_counter ( ) const
inline

Definition at line 190 of file counter.hpp.

190{ return counter_type(); }
Counter counter_type
Definition: counter.hpp:78

Referenced by tim::ert::ops_main().

◆ get_data() [1/2]

template<typename DeviceT , typename Tp , typename Counter >
data_ptr_t & tim::ert::counter< DeviceT, Tp, Counter >::get_data ( )
inline

Definition at line 274 of file counter.hpp.

274{ return data; }

References tim::ert::counter< DeviceT, Tp, Counter >::data.

◆ get_data() [2/2]

template<typename DeviceT , typename Tp , typename Counter >
const data_ptr_t & tim::ert::counter< DeviceT, Tp, Counter >::get_data ( ) const
inline

Definition at line 275 of file counter.hpp.

275{ return data; }

References tim::ert::counter< DeviceT, Tp, Counter >::data.

◆ operator=() [1/2]

template<typename DeviceT , typename Tp , typename Counter >
counter & tim::ert::counter< DeviceT, Tp, Counter >::operator= ( const counter< DeviceT, Tp, Counter > &  )
default

◆ operator=() [2/2]

template<typename DeviceT , typename Tp , typename Counter >
counter & tim::ert::counter< DeviceT, Tp, Counter >::operator= ( counter< DeviceT, Tp, Counter > &&  )
defaultnoexcept

◆ record()

template<typename DeviceT , typename Tp , typename Counter >
void tim::ert::counter< DeviceT, Tp, Counter >::record ( counter_type _counter,
int  n,
int  trials,
uint64_t  nops,
const exec_params _itrp 
)
inline

Definition at line 196 of file counter.hpp.

198 {
199 uint64_t working_set_size = n * params.nthreads * params.nproc;
200 uint64_t working_set = working_set_size * bytes_per_element;
201 uint64_t total_bytes = trials * working_set * memory_accesses_per_element;
202 uint64_t total_ops = trials * working_set_size * nops;
203
204 std::stringstream ss;
205 ss << label;
206 if(label.length() == 0)
207 {
208 if(nops > 1)
209 {
210 ss << "vector_op";
211 }
212 else
213 {
214 ss << "scalar_op";
215 }
216 }
217
218 auto _label = tim::demangle<Tp>();
219 data_type _data(ss.str(), working_set, trials, total_bytes, total_ops, nops,
220 _counter, DeviceT::name(), _label, _itrp);
221
222#if !defined(TIMEMORY_WINDOWS)
223 // using namespace tim::stl::ostream;
224 // if(settings::verbose() > 1 || settings::debug())
225 // std::cout << "[RECORD]> " << _data << std::endl;
226#endif
227
228 static std::mutex _mutex;
229 // std::unique_lock<std::mutex> _lock(_mutex);
230 _mutex.lock();
231 *data += _data;
232 _mutex.unlock();
233 }
typename ert_data_t::value_type data_type
Definition: counter.hpp:82
std::string label
Definition: counter.hpp:300
int memory_accesses_per_element
Definition: counter.hpp:296
uint64_t nthreads
Definition: data.hpp:116

References tim::ert::counter< DeviceT, Tp, Counter >::bytes_per_element, tim::ert::counter< DeviceT, Tp, Counter >::data, tim::ert::counter< DeviceT, Tp, Counter >::label, tim::ert::counter< DeviceT, Tp, Counter >::memory_accesses_per_element, tim::ert::exec_params::nproc, tim::ert::exec_params::nthreads, and tim::ert::counter< DeviceT, Tp, Counter >::params.

Referenced by tim::ert::ops_main().

◆ serialize()

template<typename DeviceT , typename Tp , typename Counter >
template<typename Archive >
void tim::ert::counter< DeviceT, Tp, Counter >::serialize ( Archive &  ar,
const unsigned int   
)
inline

Definition at line 247 of file counter.hpp.

248 {
249 if(!data.get()) // for input
250 data = std::make_shared<ert_data_t>();
251 ar(cereal::make_nvp("params", params), cereal::make_nvp("data", *data));
252 }

References tim::ert::counter< DeviceT, Tp, Counter >::data, and tim::ert::counter< DeviceT, Tp, Counter >::params.

◆ set_callback()

template<typename DeviceT , typename Tp , typename Counter >
template<typename FuncT >
void tim::ert::counter< DeviceT, Tp, Counter >::set_callback ( FuncT &&  _f)
inline

Definition at line 238 of file counter.hpp.

239 {
240 configure_callback = std::forward<FuncT>(_f);
241 }

◆ skip()

template<typename DeviceT , typename Tp , typename Counter >
bool tim::ert::counter< DeviceT, Tp, Counter >::skip ( size_t  _Nops)
inline

Definition at line 288 of file counter.hpp.

288{ return (skip_ops.count(_Nops) > 0); }

References tim::ert::counter< DeviceT, Tp, Counter >::skip_ops.

Referenced by tim::ert::ops_main().

Friends And Related Function Documentation

◆ operator<<

template<typename DeviceT , typename Tp , typename Counter >
std::ostream & operator<< ( std::ostream &  os,
const counter< DeviceT, Tp, Counter > &  obj 
)
friend

Definition at line 257 of file counter.hpp.

258 {
259 std::stringstream ss;
260 ss << obj.params << ", "
261 << "bytes_per_element = " << obj.bytes_per_element << ", "
262 << "memory_accesses_per_element = " << obj.memory_accesses_per_element << ", "
263 << "alignment = " << obj.align << ", "
264 << "nsize = " << obj.nsize << ", "
265 << "label = " << obj.label << ", "
266 << "data entries = " << ((obj.data.get()) ? obj.data->size() : 0);
267 os << ss.str();
268 return os;
269 }
const std::string std::ostream * os

Member Data Documentation

◆ align

template<typename DeviceT , typename Tp , typename Counter >
uint64_t tim::ert::counter< DeviceT, Tp, Counter >::align = sizeof(Tp)

Definition at line 297 of file counter.hpp.

Referenced by tim::ert::counter< DeviceT, Tp, Counter >::get_buffer().

◆ bytes_per_element

◆ data

◆ label

◆ memory_accesses_per_element

◆ nsize

template<typename DeviceT , typename Tp , typename Counter >
uint64_t tim::ert::counter< DeviceT, Tp, Counter >::nsize = 0

◆ params

◆ skip_ops

template<typename DeviceT , typename Tp , typename Counter >
skip_ops_t tim::ert::counter< DeviceT, Tp, Counter >::skip_ops = skip_ops_t()

The documentation for this class was generated from the following file: