timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
tim::ert::executor< device::gpu, Tp, CounterT > Struct Template Reference

#include "timemory/ert/configuration.hpp"

+ Collaboration diagram for tim::ert::executor< device::gpu, Tp, CounterT >:

Public Types

using DeviceT = device::gpu
 
using device_type = device::gpu
 
using value_type = Tp
 
using configuration_type = configuration< device_type, value_type, CounterT >
 
using counter_type = counter< device_type, value_type, CounterT >
 
using this_type = executor< device_type, value_type, CounterT >
 
using callback_type = std::function< void(counter_type &)>
 
using ert_data_t = exec_data< CounterT >
 

Public Member Functions

 executor (configuration_type &config, std::shared_ptr< ert_data_t > _data)
 
template<typename FuncT >
 executor (configuration_type &config, std::shared_ptr< ert_data_t > _data, FuncT &&_counter_callback)
 
template<typename FuncT = std::function<void(uint64_t, counter_type&)>>
auto operator() (configuration_type &config, std::shared_ptr< ert_data_t > _data={}, FuncT &&_counter_callback=FuncT{})
 

Static Public Member Functions

static callback_typeget_callback ()
 
static void execute (counter_type &_counter)
 
template<size_t... Flops, enable_if_t<(sizeof...(Flops) > 0)> = 0>
static bool execute (counter_type &_counter, std::array< std::string, sizeof...(Flops)> _labels)
 
template<size_t Flops>
static enable_if_t< Flops==1, bool > execute_impl (counter_type &_counter, const std::string &_label)
 
template<size_t Flops>
static enable_if_t<(Flops > 1), bool > execute_impl (counter_type &_counter, const std::string &_label)
 

Public Attributes

callback_type callback = get_callback()
 

Detailed Description

template<typename Tp, typename CounterT>
struct tim::ert::executor< device::gpu, Tp, CounterT >

Definition at line 447 of file configuration.hpp.

Member Typedef Documentation

◆ callback_type

template<typename Tp , typename CounterT >
using tim::ert::executor< device::gpu, Tp, CounterT >::callback_type = std::function<void(counter_type&)>

Definition at line 461 of file configuration.hpp.

◆ configuration_type

template<typename Tp , typename CounterT >
using tim::ert::executor< device::gpu, Tp, CounterT >::configuration_type = configuration<device_type, value_type, CounterT>

Definition at line 458 of file configuration.hpp.

◆ counter_type

template<typename Tp , typename CounterT >
using tim::ert::executor< device::gpu, Tp, CounterT >::counter_type = counter<device_type, value_type, CounterT>

Definition at line 459 of file configuration.hpp.

◆ device_type

template<typename Tp , typename CounterT >
using tim::ert::executor< device::gpu, Tp, CounterT >::device_type = device::gpu

Definition at line 456 of file configuration.hpp.

◆ DeviceT

template<typename Tp , typename CounterT >
using tim::ert::executor< device::gpu, Tp, CounterT >::DeviceT = device::gpu

Definition at line 449 of file configuration.hpp.

◆ ert_data_t

template<typename Tp , typename CounterT >
using tim::ert::executor< device::gpu, Tp, CounterT >::ert_data_t = exec_data<CounterT>

Definition at line 462 of file configuration.hpp.

◆ this_type

template<typename Tp , typename CounterT >
using tim::ert::executor< device::gpu, Tp, CounterT >::this_type = executor<device_type, value_type, CounterT>

Definition at line 460 of file configuration.hpp.

◆ value_type

template<typename Tp , typename CounterT >
using tim::ert::executor< device::gpu, Tp, CounterT >::value_type = Tp

Definition at line 457 of file configuration.hpp.

Constructor & Destructor Documentation

◆ executor() [1/2]

template<typename Tp , typename CounterT >
tim::ert::executor< device::gpu, Tp, CounterT >::executor ( configuration_type config,
std::shared_ptr< ert_data_t _data 
)
inline

Definition at line 470 of file configuration.hpp.

471 {
472 (*this)(config, _data);
473 }

◆ executor() [2/2]

template<typename Tp , typename CounterT >
template<typename FuncT >
tim::ert::executor< device::gpu, Tp, CounterT >::executor ( configuration_type config,
std::shared_ptr< ert_data_t _data,
FuncT &&  _counter_callback 
)
inline

Definition at line 479 of file configuration.hpp.

481 {
482 (*this)(config, _data, std::forward<FuncT>(_counter_callback));
483 }

Member Function Documentation

◆ execute() [1/2]

template<typename Tp , typename CounterT >
static void tim::ert::executor< device::gpu, Tp, CounterT >::execute ( counter_type _counter)
inlinestatic

Definition at line 529 of file configuration.hpp.

530 {
531 // functions
532 auto store_func = [] TIMEMORY_DEVICE_LAMBDA(Tp & a, const Tp& b) { a = b; };
533 auto add_func = [] TIMEMORY_DEVICE_LAMBDA(Tp & a, const Tp& b, const Tp& c) {
534 a = b + c;
535 };
536 // auto mult_func = [] TIMEMORY_LAMBDA(Tp & a, const Tp& b, const Tp& c) {
537 // a = b * c;
538 //};
539 auto fma_func = [] TIMEMORY_DEVICE_LAMBDA(Tp & a, const Tp& b, const Tp& c) {
540 a = a * b + c;
541 };
542
543 // set bytes per element
544 _counter.bytes_per_element = sizeof(Tp);
545 // set number of memory accesses per element from two functions
546 _counter.memory_accesses_per_element = 2;
547
548 // set the label
549 _counter.label = "scalar_add";
550 // run the kernels
551 ops_main<1>(_counter, add_func, store_func);
552
553 // set the label
554 // _counter.label = "vector_mult";
555 // run the kernels
556 // ops_main<4, 16, 64, 128, 256, 512>(_counter, mult_func, store_func);
557
558 // set the label
559 _counter.label = "vector_fma";
560 // run the kernels
561 if(!ops_main<TIMEMORY_USER_ERT_FLOPS>(_counter, fma_func, store_func))
562 ops_main<4, 16, 64, 128, 256, 512>(_counter, fma_func, store_func);
563 }
#define TIMEMORY_DEVICE_LAMBDA
Definition: attributes.hpp:180

References tim::ert::counter< DeviceT, Tp, Counter >::bytes_per_element, tim::ert::counter< DeviceT, Tp, Counter >::label, tim::ert::counter< DeviceT, Tp, Counter >::memory_accesses_per_element, and TIMEMORY_DEVICE_LAMBDA.

◆ execute() [2/2]

template<typename Tp , typename CounterT >
template<size_t... Flops, enable_if_t<(sizeof...(Flops) > 0)> = 0>
static bool tim::ert::executor< device::gpu, Tp, CounterT >::execute ( counter_type _counter,
std::array< std::string, sizeof...(Flops)>  _labels 
)
inlinestatic

Definition at line 568 of file configuration.hpp.

570 {
571 bool _executed = false;
572 auto itr = _labels.begin();
573 TIMEMORY_FOLD_EXPRESSION(_executed |= execute_impl<Flops>(_counter, *(itr++)));
574 return _executed;
575 }
#define TIMEMORY_FOLD_EXPRESSION(...)
Definition: types.hpp:56

References TIMEMORY_FOLD_EXPRESSION.

◆ execute_impl() [1/2]

template<typename Tp , typename CounterT >
template<size_t Flops>
static enable_if_t< Flops==1, bool > tim::ert::executor< device::gpu, Tp, CounterT >::execute_impl ( counter_type _counter,
const std::string &  _label 
)
inlinestatic

Definition at line 581 of file configuration.hpp.

583 {
584 // functions
585 auto store_func = [] TIMEMORY_DEVICE_LAMBDA(Tp & a, const Tp& b) { a = b; };
586 auto add_func = [] TIMEMORY_DEVICE_LAMBDA(Tp & a, const Tp& b, const Tp& c) {
587 a = b + c;
588 };
589
590 // set bytes per element
591 _counter.bytes_per_element = sizeof(Tp);
592 // set number of memory accesses per element from two functions
593 _counter.memory_accesses_per_element = 2;
594
595 // set the label
596 _counter.label = _label;
597 // run the kernels
598 return ops_main<Flops>(_counter, add_func, store_func);
599 }

References tim::ert::counter< DeviceT, Tp, Counter >::bytes_per_element, tim::ert::counter< DeviceT, Tp, Counter >::label, tim::ert::counter< DeviceT, Tp, Counter >::memory_accesses_per_element, and TIMEMORY_DEVICE_LAMBDA.

◆ execute_impl() [2/2]

template<typename Tp , typename CounterT >
template<size_t Flops>
static enable_if_t<(Flops > 1), bool > tim::ert::executor< device::gpu, Tp, CounterT >::execute_impl ( counter_type _counter,
const std::string &  _label 
)
inlinestatic

Definition at line 604 of file configuration.hpp.

606 {
607 // functions
608 auto store_func = [] TIMEMORY_DEVICE_LAMBDA(Tp & a, const Tp& b) { a = b; };
609 auto fma_func = [] TIMEMORY_DEVICE_LAMBDA(Tp & a, const Tp& b, const Tp& c) {
610 a = a * b + c;
611 };
612
613 // set bytes per element
614 _counter.bytes_per_element = sizeof(Tp);
615 // set number of memory accesses per element from two functions
616 _counter.memory_accesses_per_element = 2;
617
618 // set the label
619 _counter.label = _label;
620 return ops_main<Flops>(_counter, fma_func, store_func);
621 }

References tim::ert::counter< DeviceT, Tp, Counter >::bytes_per_element, tim::ert::counter< DeviceT, Tp, Counter >::label, tim::ert::counter< DeviceT, Tp, Counter >::memory_accesses_per_element, and TIMEMORY_DEVICE_LAMBDA.

◆ get_callback()

template<typename Tp , typename CounterT >
static callback_type & tim::ert::executor< device::gpu, Tp, CounterT >::get_callback ( )
inlinestatic

Definition at line 518 of file configuration.hpp.

519 {
520 static callback_type _instance = [](counter_type& _counter) {
521 this_type::execute(_counter);
522 };
523 return _instance;
524 }
std::function< void(counter_type &)> callback_type
counter< device_type, value_type, CounterT > counter_type
static void execute(counter_type &_counter)

References tim::ert::executor< DeviceT, Tp, CounterT >::execute().

◆ operator()()

template<typename Tp , typename CounterT >
template<typename FuncT = std::function<void(uint64_t, counter_type&)>>
auto tim::ert::executor< device::gpu, Tp, CounterT >::operator() ( configuration_type config,
std::shared_ptr< ert_data_t _data = {},
FuncT &&  _counter_callback = FuncT{} 
)
inline

Definition at line 489 of file configuration.hpp.

489 {},
490 FuncT&& _counter_callback = FuncT{})
491 {
492 std::function<void(uint64_t, counter_type&)> _cb =
493 std::forward<FuncT>(_counter_callback);
494 try
495 {
496 if(!_data)
497 _data.reset(new ert_data_t);
498 auto _counter = config.executor(_data);
499 if(_cb)
500 _counter.set_callback(std::move(_cb));
501 callback(_counter);
502 } catch(std::exception& e)
503 {
504 std::cerr << "\n\nEXCEPTION:\n";
505 std::cerr << "\t" << e.what() << "\n\n" << std::endl;
506 }
507 return _data;
508 }

Member Data Documentation

◆ callback

template<typename Tp , typename CounterT >
callback_type tim::ert::executor< device::gpu, Tp, CounterT >::callback = get_callback()

Definition at line 513 of file configuration.hpp.


The documentation for this struct was generated from the following file: