aboutsummaryrefslogtreecommitdiffstats
path: root/benchpress.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'benchpress.hpp')
-rw-r--r--benchpress.hpp450
1 files changed, 0 insertions, 450 deletions
diff --git a/benchpress.hpp b/benchpress.hpp
deleted file mode 100644
index cb1bff4..0000000
--- a/benchpress.hpp
+++ /dev/null
@@ -1,450 +0,0 @@
-/*
-* Copyright (C) 2015 Christopher Gilbert.
-*
-* Permission is hereby granted, free of charge, to any person obtaining a copy
-* of this software and associated documentation files (the "Software"), to deal
-* in the Software without restriction, including without limitation the rights
-* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-* copies of the Software, and to permit persons to whom the Software is
-* furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in all
-* copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*/
-#ifndef BENCHPRESS_HPP
-#define BENCHPRESS_HPP
-
-#include <algorithm> // max, min
-#include <atomic> // atomic_intmax_t
-#include <chrono> // high_resolution_timer, duration
-#include <functional> // function
-#include <iomanip> // setw
-#include <iostream> // cout
-#include <regex> // regex, regex_match
-#include <sstream> // stringstream
-#include <string> // string
-#include <thread> // thread
-#include <vector> // vector
-
-namespace benchpress {
-
-/*
- * The options class encapsulates all options for running benchmarks.
- *
- * When including benchpress, a main function can be emitted which includes a command-line parser for building an
- * options object. However from time-to-time it may be necessary for the developer to have to build their own main
- * stub and construct the options object manually.
- *
- * options opts;
- * opts
- * .bench(".*")
- * .benchtime(1)
- * .cpu(4);
- */
-class options {
- std::string d_bench;
- size_t d_benchtime;
- size_t d_cpu;
-public:
- options()
- : d_bench(".*")
- , d_benchtime(1)
- , d_cpu(std::thread::hardware_concurrency())
- {}
- options& bench(const std::string& bench) {
- d_bench = bench;
- return *this;
- }
- options& benchtime(size_t benchtime) {
- d_benchtime = benchtime;
- return *this;
- }
- options& cpu(size_t cpu) {
- d_cpu = cpu;
- return *this;
- }
- std::string get_bench() const {
- return d_bench;
- }
- size_t get_benchtime() const {
- return d_benchtime;
- }
- size_t get_cpu() const {
- return d_cpu;
- }
-};
-
-class context;
-
-/*
- * The benchmark_info class is used to store a function name / pointer pair.
- *
- * benchmark_info bi("example", [](benchpress::context* b) {
- * // benchmark function
- * });
- */
-class benchmark_info {
- std::string d_name;
- std::function<void(context*)> d_func;
-
-public:
- benchmark_info(std::string name, std::function<void(context*)> func)
- : d_name(name)
- , d_func(func)
- {}
-
- std::string get_name() const { return d_name; }
- std::function<void(context*)> get_func() const { return d_func; }
-};
-
-/*
- * The registration class is responsible for providing a single global point of reference for registering
- * benchmark functions.
- *
- * registration::get_ptr()->register_benchmark(info);
- */
-class registration {
- static registration* d_this;
- std::vector<benchmark_info> d_benchmarks;
-
-public:
- static registration* get_ptr() {
- if (nullptr == d_this) {
- d_this = new registration();
- }
- return d_this;
- }
-
- void register_benchmark(benchmark_info& info) {
- d_benchmarks.push_back(info);
- }
-
- std::vector<benchmark_info> get_benchmarks() { return d_benchmarks; }
-};
-
-/*
- * The auto_register class is a helper used to register benchmarks.
- */
-class auto_register {
-public:
- auto_register(const std::string& name, std::function<void(context*)> func) {
- benchmark_info info(name, func);
- registration::get_ptr()->register_benchmark(info);
- }
-};
-
-#define CONCAT(x, y) x ## y
-#define CONCAT2(x, y) CONCAT(x, y)
-
-// The BENCHMARK macro is a helper for creating benchmark functions and automatically registering them with the
-// registration class.
-#define BENCHMARK(x, f) benchpress::auto_register CONCAT2(register_, __LINE__)((x), (f));
-
-/*
- * This function can be used to keep variables on the stack that would normally be optimised away
- * by the compiler, without introducing any additional instructions or changing the behaviour of
- * the program.
- *
- * This function uses the Extended Asm syntax of GCC. The volatile keyword indicates that the
- * following instructions have some unknowable side-effect, and ensures that the code will neither
- * be moved, nor optimised away.
- *
- * AssemblerTemplate: No operands.
- *
- * OutputOperands: None.
- *
- * InputOperands: The "g" is a wildcard constraint which tells the compiler that it may choose what
- * to use for p (eg. a register OR a memory reference).
- *
- * Clobbers: The "memory" clobber tells the compiler that the assembly code performs reads or writes
- * to the memory pointed to by one of the input parameters.
- *
- * Example usage:
- * std::vector<int> v;
- * v.reserve(10);
- * escape(v.data());
- */
-void escape(void *p) {
- asm volatile("" : : "g"(p) : "memory");
-}
-
-/*
- * This function can be used to disable the optimiser. It has the effect of creating a read / write
- * memory barrier for the compiler, meaning it does not assume that any values read from memory before
- * the asm remain unchanged after that asm; it reloads them as needed.
- *
- * Example usage:
- * std::vector<int> v;
- * v.reserve(10);
- * escape(v.data());
- * v.push_back(42);
- * clobber(); // Ensure the integer pushed is read
- */
-void clobber() {
- asm volatile("" : : : "memory");
-}
-
-/*
- * The result class is responsible for producing a printable string representation of a benchmark run.
- */
-class result {
- size_t d_num_iterations;
- std::chrono::nanoseconds d_duration;
- size_t d_num_bytes;
-
-public:
- result(size_t num_iterations, std::chrono::nanoseconds duration, size_t num_bytes)
- : d_num_iterations(num_iterations)
- , d_duration(duration)
- , d_num_bytes(num_bytes)
- {}
-
- size_t get_ns_per_op() const {
- if (d_num_iterations <= 0) {
- return 0;
- }
- return d_duration.count() / d_num_iterations;
- }
-
- double get_mb_per_s() const {
- if (d_num_iterations <= 0 || d_duration.count() <= 0 || d_num_bytes <= 0) {
- return 0;
- }
- return ((double(d_num_bytes) * double(d_num_iterations) / double(1e6)) /
- double(std::chrono::duration_cast<std::chrono::seconds>(d_duration).count()));
- }
-
- std::string to_string() const {
- std::stringstream tmp;
- tmp << std::setw(12) << std::right << d_num_iterations;
- size_t npo = get_ns_per_op();
- tmp << std::setw(12) << std::right << npo << std::setw(0) << " ns/op";
- double mbs = get_mb_per_s();
- if (mbs > 0.0) {
- tmp << std::setw(12) << std::right << mbs << std::setw(0) << " MB/s";
- }
- return std::string(tmp.str());
- }
-};
-
-/*
- * The parallel_context class is responsible for providing a thread-safe context for parallel benchmark code.
- */
-class parallel_context {
- std::atomic_intmax_t d_num_iterations;
-public:
- parallel_context(size_t num_iterations)
- : d_num_iterations(num_iterations)
- {}
-
- bool next() {
- return (d_num_iterations.fetch_sub(1) > 0);
- }
-};
-
-/*
- * The context class is responsible for providing an interface for capturing benchmark metrics to benchmark functions.
- */
-class context {
- bool d_timer_on;
- std::chrono::high_resolution_clock::time_point d_start;
- std::chrono::nanoseconds d_duration;
- std::chrono::seconds d_benchtime;
- size_t d_num_iterations;
- size_t d_num_threads;
- size_t d_num_bytes;
- benchmark_info d_benchmark;
-
-public:
- context(const benchmark_info& info, const options& opts)
- : d_timer_on(false)
- , d_start()
- , d_duration()
- , d_benchtime(std::chrono::seconds(opts.get_benchtime()))
- , d_num_iterations(1)
- , d_num_threads(opts.get_cpu())
- , d_num_bytes(0)
- , d_benchmark(info)
- {}
-
- size_t num_iterations() const { return d_num_iterations; }
-
- void set_num_threads(size_t n) { d_num_threads = n; }
- size_t num_threads() const { return d_num_threads; }
-
- void start_timer() {
- if (!d_timer_on) {
- d_start = std::chrono::high_resolution_clock::now();
- d_timer_on = true;
- }
- }
- void stop_timer() {
- if (d_timer_on) {
- d_duration += std::chrono::high_resolution_clock::now() - d_start;
- d_timer_on = false;
- }
- }
- void reset_timer() {
- if (d_timer_on) {
- d_start = std::chrono::high_resolution_clock::now();
- }
- d_duration = std::chrono::nanoseconds::zero();
- }
-
- void set_bytes(int64_t bytes) { d_num_bytes = bytes; }
-
- size_t get_ns_per_op() {
- if (d_num_iterations <= 0) {
- return 0;
- }
- return d_duration.count() / d_num_iterations;
- }
-
- void run_n(size_t n) {
- d_num_iterations = n;
- reset_timer();
- start_timer();
- d_benchmark.get_func()(this);
- stop_timer();
- }
-
- void run_parallel(std::function<void(parallel_context*)> f) {
- parallel_context pc(d_num_iterations);
- std::vector<std::thread> threads;
- for (size_t i = 0; i < d_num_threads; ++i) {
- threads.push_back(std::thread([&pc,&f]() -> void {
- f(&pc);
- }));
- }
- for(auto& thread : threads){
- thread.join();
- }
- }
-
- result run() {
- size_t n = 1;
- run_n(n);
- while (d_duration < d_benchtime && n < 1e9) {
- size_t last = n;
- if (get_ns_per_op() == 0) {
- n = 1e9;
- } else {
- n = d_duration.count() / get_ns_per_op();
- }
- n = std::max(std::min(n+n/2, 100*last), last+1);
- n = round_up(n);
- run_n(n);
- }
- return result(n, d_duration, d_num_bytes);
- }
-
-private:
- template<typename T>
- T round_down_10(T n) {
- int tens = 0;
- while (n > 10) {
- n /= 10;
- tens++;
- }
- int result = 1;
- for (int i = 0; i < tens; ++i) {
- result *= 10;
- }
- return result;
- }
-
- template<typename T>
- T round_up(T n) {
- T base = round_down_10(n);
- if (n < (2 * base)) {
- return 2 * base;
- }
- if (n < (5 * base)) {
- return 5 * base;
- }
- return 10 * base;
- }
-};
-
-/*
- * The run_benchmarks function will run the registered benchmarks.
- */
-void run_benchmarks(const options& opts) {
- std::regex match_r(opts.get_bench());
- auto benchmarks = registration::get_ptr()->get_benchmarks();
- for (auto& info : benchmarks) {
- if (std::regex_match(info.get_name(), match_r)) {
- context c(info, opts);
- auto r = c.run();
- std::cout << std::setw(35) << std::left << info.get_name() << r.to_string() << std::endl;
- }
- }
-}
-
-} // namespace benchpress
-
-/*
- * If BENCHPRESS_CONFIG_MAIN is defined when the file is included then a main function will be emitted which provides a
- * command-line parser and then executes run_benchmarks.
- */
-#ifdef BENCHPRESS_CONFIG_MAIN
-#include "cxxopts.hpp"
-benchpress::registration* benchpress::registration::d_this;
-int main(int argc, char** argv) {
- std::chrono::high_resolution_clock::time_point bp_start = std::chrono::high_resolution_clock::now();
- benchpress::options bench_opts;
- try {
- cxxopts::Options cmd_opts(argv[0], " - command line options");
- cmd_opts.add_options()
- ("bench", "run benchmarks matching the regular expression", cxxopts::value<std::string>()
- ->default_value(".*"))
- ("benchtime", "run enough iterations of each benchmark to take t seconds", cxxopts::value<size_t>()
- ->default_value("1"))
- ("cpu", "specify the number of threads to use for parallel benchmarks", cxxopts::value<size_t>()
- ->default_value(std::to_string(std::thread::hardware_concurrency())))
- ("list", "list all available benchmarks")
- ("help", "print help")
- ;
- cmd_opts.parse(argc, argv);
- if (cmd_opts.count("help")) {
- std::cout << cmd_opts.help({""}) << std::endl;
- exit(0);
- }
- if (cmd_opts.count("bench")) {
- bench_opts.bench(cmd_opts["bench"].as<std::string>());
- }
- if (cmd_opts.count("benchtime")) {
- bench_opts.benchtime(cmd_opts["benchtime"].as<size_t>());
- }
- if (cmd_opts.count("cpu")) {
- bench_opts.cpu(cmd_opts["cpu"].as<size_t>());
- }
- if (cmd_opts.count("list")) {
- auto benchmarks = benchpress::registration::get_ptr()->get_benchmarks();
- for (auto& info : benchmarks) {
- std::cout << info.get_name() << std::endl;
- }
- exit(EXIT_SUCCESS);
- }
- } catch (const cxxopts::OptionException& e) {
- std::cout << "error parsing options: " << e.what() << std::endl;
- exit(1);
- }
- benchpress::run_benchmarks(bench_opts);
- float duration = std::chrono::duration_cast<std::chrono::milliseconds>(
- std::chrono::high_resolution_clock::now() - bp_start
- ).count() / 1000.f;
- std::cout << argv[0] << " " << duration << "s" << std::endl;
- return 0;
-}
-#endif
-
-#endif // BENCHPRESS_HPP