timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
|
Classes | |
class | aligned_allocator |
struct | callback |
for variadic expansion to set the callback More... | |
struct | configuration |
class | counter |
class | exec_data |
struct | exec_params |
struct | executor |
struct | executor< device::gpu, Tp, CounterT > |
class | thread_barrier |
Functions | |
template<typename DeviceT , typename CounterT , typename Tp , typename... Types, typename DataType = exec_data<CounterT>, typename DataPtr = std::shared_ptr<DataType>, typename std::enable_if<(sizeof...(Types)==0), int >::type = 0> | |
std::shared_ptr< DataType > | execute (std::shared_ptr< DataType > _data=std::make_shared< DataType >()) |
template<typename Counter > | |
void | serialize (std::string fname, exec_data< Counter > &obj) |
template<typename DeviceT , typename Tp , typename Intp = int32_t, device::enable_if_cpu_t< DeviceT > = 0> | |
void | initialize_buffer (Tp *A, const Tp &value, const Intp &nsize) |
template<typename DeviceT , typename Tp , typename Intp = int32_t, device::enable_if_gpu_t< DeviceT > = 0> | |
void | initialize_buffer (Tp *A, Tp value, Intp nsize) |
template<size_t Nrep, typename DeviceT , typename Intp , typename Tp , typename OpsFuncT , typename StoreFuncT , device::enable_if_cpu_t< DeviceT > = 0> | |
void | ops_kernel (Intp ntrials, Intp nsize, Tp *A, OpsFuncT &&ops_func, StoreFuncT &&store_func) |
template<size_t Nrep, typename DeviceT , typename Intp , typename Tp , typename OpsFuncT , typename StoreFuncT , device::enable_if_gpu_t< DeviceT > = 0, enable_if_t<!std::is_same< Tp, gpu::fp16_t >::value > = 0> | |
void | ops_kernel (Intp ntrials, Intp nsize, Tp *A, OpsFuncT &&ops_func, StoreFuncT &&store_func) |
template<size_t Nops, size_t... Nextra, typename DeviceT , typename Tp , typename CounterT , typename OpsFuncT , typename StoreFuncT , enable_if_t< sizeof...(Nextra)==0, int > = 0> | |
bool | ops_main (counter< DeviceT, Tp, CounterT > &_counter, OpsFuncT &&ops_func, StoreFuncT &&store_func) |
This is the "main" function for ERT. More... | |
template<size_t... Nops, typename DeviceT , typename Tp , typename CounterT , typename OpsFuncT , typename StoreFuncT , enable_if_t< sizeof...(Nops)==0, int > = 0> | |
bool | ops_main (counter< DeviceT, Tp, CounterT > &, OpsFuncT &&, StoreFuncT &&) |
This is invoked when TIMEMORY_USER_ERT_FLOPS is empty. More... | |
std::shared_ptr< DataType > tim::ert::execute | ( | std::shared_ptr< DataType > | _data = std::make_shared<DataType>() | ) |
Definition at line 650 of file configuration.hpp.
Referenced by tim::sampling::sampler< CompT< Types... >, N, SigIds... >::configure().
void tim::ert::ops_kernel | ( | Intp | ntrials, |
Intp | nsize, | ||
Tp * | A, | ||
OpsFuncT && | ops_func, | ||
StoreFuncT && | store_func | ||
) |
Definition at line 68 of file kernels.hpp.
void tim::ert::ops_kernel | ( | Intp | ntrials, |
Intp | nsize, | ||
Tp * | A, | ||
OpsFuncT && | ops_func, | ||
StoreFuncT && | store_func | ||
) |
Definition at line 99 of file kernels.hpp.
bool tim::ert::ops_main | ( | counter< DeviceT, Tp, CounterT > & | , |
OpsFuncT && | , | ||
StoreFuncT && | |||
) |
This is invoked when TIMEMORY_USER_ERT_FLOPS is empty.
Definition at line 419 of file kernels.hpp.
bool tim::ert::ops_main | ( | counter< DeviceT, Tp, CounterT > & | _counter, |
OpsFuncT && | ops_func, | ||
StoreFuncT && | store_func | ||
) |
This is the "main" function for ERT.
This is invokes the "main" function for ERT for all the desired "FLOPs" that are unrolled in the kernel.
Definition at line 159 of file kernels.hpp.
References tim::ert::exec_params::block_size, tim::ert::counter< DeviceT, Tp, Counter >::bytes_per_element, tim::ert::counter< DeviceT, Tp, Counter >::configure(), tim::debug, tim::ert::counter< DeviceT, Tp, Counter >::destroy_buffer(), tim::ert::counter< DeviceT, Tp, Counter >::get_buffer(), tim::ert::counter< DeviceT, Tp, Counter >::get_counter(), tim::ert::exec_params::grid_size, tim::ert::counter< DeviceT, Tp, Counter >::memory_accesses_per_element, tim::ert::counter< DeviceT, Tp, Counter >::nsize, tim::ert::exec_params::nstreams, tim::ert::exec_params::nthreads, tim::ert::counter< DeviceT, Tp, Counter >::params, tim::ert::counter< DeviceT, Tp, Counter >::record(), tim::ert::exec_params::shmem_size, tim::ert::counter< DeviceT, Tp, Counter >::skip(), TIMEMORY_ERROR_FUNCTION_MACRO, tim::verbose, and tim::ert::exec_params::working_set_min.
Referenced by tim::ert::executor< DeviceT, Tp, CounterT >::execute().
|
inline |
Definition at line 325 of file counter.hpp.
References tim::settings::compose_output_filename(), tim::get(), and tim::filepath::open().