timemory 3.3.0
Modular C++ Toolkit for Performance Analysis and Logging. Profiling API and Tools for C, C++, CUDA, Fortran, and Python. The C++ template API is essentially a framework to creating tools: it is designed to provide a unifying interface for recording various performance measurements alongside data logging and interfaces to other tools.
kernels.hpp File Reference
#include "timemory/backends/device.hpp"
#include "timemory/backends/dmp.hpp"
#include "timemory/backends/gpu.hpp"
#include "timemory/backends/threading.hpp"
#include "timemory/components/cuda/backends.hpp"
#include "timemory/ert/counter.hpp"
#include "timemory/ert/data.hpp"
#include "timemory/mpl/apply.hpp"
#include "timemory/settings/declaration.hpp"
#include "timemory/utility/macros.hpp"
#include "timemory/utility/utility.hpp"
#include <cstdint>
#include <functional>
#include <future>
#include <iomanip>
#include <sstream>
#include <string>
#include <tuple>
#include <type_traits>
#include <utility>
+ Include dependency graph for kernels.hpp:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Namespaces

namespace  tim
 
namespace  tim::ert
 

Functions

template<size_t Nrep, typename DeviceT , typename Intp , typename Tp , typename OpsFuncT , typename StoreFuncT , device::enable_if_cpu_t< DeviceT > = 0>
void tim::ert::ops_kernel (Intp ntrials, Intp nsize, Tp *A, OpsFuncT &&ops_func, StoreFuncT &&store_func)
 
template<size_t Nrep, typename DeviceT , typename Intp , typename Tp , typename OpsFuncT , typename StoreFuncT , device::enable_if_gpu_t< DeviceT > = 0, enable_if_t<!std::is_same< Tp, gpu::fp16_t >::value > = 0>
void tim::ert::ops_kernel (Intp ntrials, Intp nsize, Tp *A, OpsFuncT &&ops_func, StoreFuncT &&store_func)
 
template<size_t Nops, size_t... Nextra, typename DeviceT , typename Tp , typename CounterT , typename OpsFuncT , typename StoreFuncT , enable_if_t< sizeof...(Nextra)==0, int > = 0>
bool tim::ert::ops_main (counter< DeviceT, Tp, CounterT > &_counter, OpsFuncT &&ops_func, StoreFuncT &&store_func)
 This is the "main" function for ERT. More...
 
template<size_t... Nops, typename DeviceT , typename Tp , typename CounterT , typename OpsFuncT , typename StoreFuncT , enable_if_t< sizeof...(Nops)==0, int > = 0>
bool tim::ert::ops_main (counter< DeviceT, Tp, CounterT > &, OpsFuncT &&, StoreFuncT &&)
 This is invoked when TIMEMORY_USER_ERT_FLOPS is empty. More...
 

Detailed Description

Provides kernels for executing kernels in ERT

Definition in file kernels.hpp.