aboutsummaryrefslogtreecommitdiffstats
path: root/tests/benchpress.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'tests/benchpress.hpp')
-rw-r--r--tests/benchpress.hpp450
1 files changed, 450 insertions, 0 deletions
diff --git a/tests/benchpress.hpp b/tests/benchpress.hpp
new file mode 100644
index 0000000..cb1bff4
--- /dev/null
+++ b/tests/benchpress.hpp
@@ -0,0 +1,450 @@
+/*
+* Copyright (C) 2015 Christopher Gilbert.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy
+* of this software and associated documentation files (the "Software"), to deal
+* in the Software without restriction, including without limitation the rights
+* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+* copies of the Software, and to permit persons to whom the Software is
+* furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in all
+* copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*/
+#ifndef BENCHPRESS_HPP
+#define BENCHPRESS_HPP
+
+#include <algorithm> // max, min
+#include <atomic> // atomic_intmax_t
+#include <chrono> // high_resolution_timer, duration
+#include <functional> // function
+#include <iomanip> // setw
+#include <iostream> // cout
+#include <regex> // regex, regex_match
+#include <sstream> // stringstream
+#include <string> // string
+#include <thread> // thread
+#include <vector> // vector
+
+namespace benchpress {
+
+/*
+ * The options class encapsulates all options for running benchmarks.
+ *
+ * When including benchpress, a main function can be emitted which includes a command-line parser for building an
+ * options object. However from time-to-time it may be necessary for the developer to have to build their own main
+ * stub and construct the options object manually.
+ *
+ * options opts;
+ * opts
+ * .bench(".*")
+ * .benchtime(1)
+ * .cpu(4);
+ */
+class options {
+ std::string d_bench;
+ size_t d_benchtime;
+ size_t d_cpu;
+public:
+ options()
+ : d_bench(".*")
+ , d_benchtime(1)
+ , d_cpu(std::thread::hardware_concurrency())
+ {}
+ options& bench(const std::string& bench) {
+ d_bench = bench;
+ return *this;
+ }
+ options& benchtime(size_t benchtime) {
+ d_benchtime = benchtime;
+ return *this;
+ }
+ options& cpu(size_t cpu) {
+ d_cpu = cpu;
+ return *this;
+ }
+ std::string get_bench() const {
+ return d_bench;
+ }
+ size_t get_benchtime() const {
+ return d_benchtime;
+ }
+ size_t get_cpu() const {
+ return d_cpu;
+ }
+};
+
+class context;
+
+/*
+ * The benchmark_info class is used to store a function name / pointer pair.
+ *
+ * benchmark_info bi("example", [](benchpress::context* b) {
+ * // benchmark function
+ * });
+ */
+class benchmark_info {
+ std::string d_name;
+ std::function<void(context*)> d_func;
+
+public:
+ benchmark_info(std::string name, std::function<void(context*)> func)
+ : d_name(name)
+ , d_func(func)
+ {}
+
+ std::string get_name() const { return d_name; }
+ std::function<void(context*)> get_func() const { return d_func; }
+};
+
+/*
+ * The registration class is responsible for providing a single global point of reference for registering
+ * benchmark functions.
+ *
+ * registration::get_ptr()->register_benchmark(info);
+ */
+class registration {
+ static registration* d_this;
+ std::vector<benchmark_info> d_benchmarks;
+
+public:
+ static registration* get_ptr() {
+ if (nullptr == d_this) {
+ d_this = new registration();
+ }
+ return d_this;
+ }
+
+ void register_benchmark(benchmark_info& info) {
+ d_benchmarks.push_back(info);
+ }
+
+ std::vector<benchmark_info> get_benchmarks() { return d_benchmarks; }
+};
+
+/*
+ * The auto_register class is a helper used to register benchmarks.
+ */
+class auto_register {
+public:
+ auto_register(const std::string& name, std::function<void(context*)> func) {
+ benchmark_info info(name, func);
+ registration::get_ptr()->register_benchmark(info);
+ }
+};
+
+#define CONCAT(x, y) x ## y
+#define CONCAT2(x, y) CONCAT(x, y)
+
+// The BENCHMARK macro is a helper for creating benchmark functions and automatically registering them with the
+// registration class.
+#define BENCHMARK(x, f) benchpress::auto_register CONCAT2(register_, __LINE__)((x), (f));
+
+/*
+ * This function can be used to keep variables on the stack that would normally be optimised away
+ * by the compiler, without introducing any additional instructions or changing the behaviour of
+ * the program.
+ *
+ * This function uses the Extended Asm syntax of GCC. The volatile keyword indicates that the
+ * following instructions have some unknowable side-effect, and ensures that the code will neither
+ * be moved, nor optimised away.
+ *
+ * AssemblerTemplate: No operands.
+ *
+ * OutputOperands: None.
+ *
+ * InputOperands: The "g" is a wildcard constraint which tells the compiler that it may choose what
+ * to use for p (eg. a register OR a memory reference).
+ *
+ * Clobbers: The "memory" clobber tells the compiler that the assembly code performs reads or writes
+ * to the memory pointed to by one of the input parameters.
+ *
+ * Example usage:
+ * std::vector<int> v;
+ * v.reserve(10);
+ * escape(v.data());
+ */
+void escape(void *p) {
+ asm volatile("" : : "g"(p) : "memory");
+}
+
+/*
+ * This function can be used to disable the optimiser. It has the effect of creating a read / write
+ * memory barrier for the compiler, meaning it does not assume that any values read from memory before
+ * the asm remain unchanged after that asm; it reloads them as needed.
+ *
+ * Example usage:
+ * std::vector<int> v;
+ * v.reserve(10);
+ * escape(v.data());
+ * v.push_back(42);
+ * clobber(); // Ensure the integer pushed is read
+ */
+void clobber() {
+ asm volatile("" : : : "memory");
+}
+
+/*
+ * The result class is responsible for producing a printable string representation of a benchmark run.
+ */
+class result {
+ size_t d_num_iterations;
+ std::chrono::nanoseconds d_duration;
+ size_t d_num_bytes;
+
+public:
+ result(size_t num_iterations, std::chrono::nanoseconds duration, size_t num_bytes)
+ : d_num_iterations(num_iterations)
+ , d_duration(duration)
+ , d_num_bytes(num_bytes)
+ {}
+
+ size_t get_ns_per_op() const {
+ if (d_num_iterations <= 0) {
+ return 0;
+ }
+ return d_duration.count() / d_num_iterations;
+ }
+
+ double get_mb_per_s() const {
+ if (d_num_iterations <= 0 || d_duration.count() <= 0 || d_num_bytes <= 0) {
+ return 0;
+ }
+ return ((double(d_num_bytes) * double(d_num_iterations) / double(1e6)) /
+ double(std::chrono::duration_cast<std::chrono::seconds>(d_duration).count()));
+ }
+
+ std::string to_string() const {
+ std::stringstream tmp;
+ tmp << std::setw(12) << std::right << d_num_iterations;
+ size_t npo = get_ns_per_op();
+ tmp << std::setw(12) << std::right << npo << std::setw(0) << " ns/op";
+ double mbs = get_mb_per_s();
+ if (mbs > 0.0) {
+ tmp << std::setw(12) << std::right << mbs << std::setw(0) << " MB/s";
+ }
+ return std::string(tmp.str());
+ }
+};
+
+/*
+ * The parallel_context class is responsible for providing a thread-safe context for parallel benchmark code.
+ */
+class parallel_context {
+ std::atomic_intmax_t d_num_iterations;
+public:
+ parallel_context(size_t num_iterations)
+ : d_num_iterations(num_iterations)
+ {}
+
+ bool next() {
+ return (d_num_iterations.fetch_sub(1) > 0);
+ }
+};
+
+/*
+ * The context class is responsible for providing an interface for capturing benchmark metrics to benchmark functions.
+ */
+class context {
+ bool d_timer_on;
+ std::chrono::high_resolution_clock::time_point d_start;
+ std::chrono::nanoseconds d_duration;
+ std::chrono::seconds d_benchtime;
+ size_t d_num_iterations;
+ size_t d_num_threads;
+ size_t d_num_bytes;
+ benchmark_info d_benchmark;
+
+public:
+ context(const benchmark_info& info, const options& opts)
+ : d_timer_on(false)
+ , d_start()
+ , d_duration()
+ , d_benchtime(std::chrono::seconds(opts.get_benchtime()))
+ , d_num_iterations(1)
+ , d_num_threads(opts.get_cpu())
+ , d_num_bytes(0)
+ , d_benchmark(info)
+ {}
+
+ size_t num_iterations() const { return d_num_iterations; }
+
+ void set_num_threads(size_t n) { d_num_threads = n; }
+ size_t num_threads() const { return d_num_threads; }
+
+ void start_timer() {
+ if (!d_timer_on) {
+ d_start = std::chrono::high_resolution_clock::now();
+ d_timer_on = true;
+ }
+ }
+ void stop_timer() {
+ if (d_timer_on) {
+ d_duration += std::chrono::high_resolution_clock::now() - d_start;
+ d_timer_on = false;
+ }
+ }
+ void reset_timer() {
+ if (d_timer_on) {
+ d_start = std::chrono::high_resolution_clock::now();
+ }
+ d_duration = std::chrono::nanoseconds::zero();
+ }
+
+ void set_bytes(int64_t bytes) { d_num_bytes = bytes; }
+
+ size_t get_ns_per_op() {
+ if (d_num_iterations <= 0) {
+ return 0;
+ }
+ return d_duration.count() / d_num_iterations;
+ }
+
+ void run_n(size_t n) {
+ d_num_iterations = n;
+ reset_timer();
+ start_timer();
+ d_benchmark.get_func()(this);
+ stop_timer();
+ }
+
+ void run_parallel(std::function<void(parallel_context*)> f) {
+ parallel_context pc(d_num_iterations);
+ std::vector<std::thread> threads;
+ for (size_t i = 0; i < d_num_threads; ++i) {
+ threads.push_back(std::thread([&pc,&f]() -> void {
+ f(&pc);
+ }));
+ }
+ for(auto& thread : threads){
+ thread.join();
+ }
+ }
+
+ result run() {
+ size_t n = 1;
+ run_n(n);
+ while (d_duration < d_benchtime && n < 1e9) {
+ size_t last = n;
+ if (get_ns_per_op() == 0) {
+ n = 1e9;
+ } else {
+ n = d_duration.count() / get_ns_per_op();
+ }
+ n = std::max(std::min(n+n/2, 100*last), last+1);
+ n = round_up(n);
+ run_n(n);
+ }
+ return result(n, d_duration, d_num_bytes);
+ }
+
+private:
+ template<typename T>
+ T round_down_10(T n) {
+ int tens = 0;
+ while (n > 10) {
+ n /= 10;
+ tens++;
+ }
+ int result = 1;
+ for (int i = 0; i < tens; ++i) {
+ result *= 10;
+ }
+ return result;
+ }
+
+ template<typename T>
+ T round_up(T n) {
+ T base = round_down_10(n);
+ if (n < (2 * base)) {
+ return 2 * base;
+ }
+ if (n < (5 * base)) {
+ return 5 * base;
+ }
+ return 10 * base;
+ }
+};
+
+/*
+ * The run_benchmarks function will run the registered benchmarks.
+ */
+void run_benchmarks(const options& opts) {
+ std::regex match_r(opts.get_bench());
+ auto benchmarks = registration::get_ptr()->get_benchmarks();
+ for (auto& info : benchmarks) {
+ if (std::regex_match(info.get_name(), match_r)) {
+ context c(info, opts);
+ auto r = c.run();
+ std::cout << std::setw(35) << std::left << info.get_name() << r.to_string() << std::endl;
+ }
+ }
+}
+
+} // namespace benchpress
+
+/*
+ * If BENCHPRESS_CONFIG_MAIN is defined when the file is included then a main function will be emitted which provides a
+ * command-line parser and then executes run_benchmarks.
+ */
+#ifdef BENCHPRESS_CONFIG_MAIN
+#include "cxxopts.hpp"
+benchpress::registration* benchpress::registration::d_this;
+int main(int argc, char** argv) {
+ std::chrono::high_resolution_clock::time_point bp_start = std::chrono::high_resolution_clock::now();
+ benchpress::options bench_opts;
+ try {
+ cxxopts::Options cmd_opts(argv[0], " - command line options");
+ cmd_opts.add_options()
+ ("bench", "run benchmarks matching the regular expression", cxxopts::value<std::string>()
+ ->default_value(".*"))
+ ("benchtime", "run enough iterations of each benchmark to take t seconds", cxxopts::value<size_t>()
+ ->default_value("1"))
+ ("cpu", "specify the number of threads to use for parallel benchmarks", cxxopts::value<size_t>()
+ ->default_value(std::to_string(std::thread::hardware_concurrency())))
+ ("list", "list all available benchmarks")
+ ("help", "print help")
+ ;
+ cmd_opts.parse(argc, argv);
+ if (cmd_opts.count("help")) {
+ std::cout << cmd_opts.help({""}) << std::endl;
+ exit(0);
+ }
+ if (cmd_opts.count("bench")) {
+ bench_opts.bench(cmd_opts["bench"].as<std::string>());
+ }
+ if (cmd_opts.count("benchtime")) {
+ bench_opts.benchtime(cmd_opts["benchtime"].as<size_t>());
+ }
+ if (cmd_opts.count("cpu")) {
+ bench_opts.cpu(cmd_opts["cpu"].as<size_t>());
+ }
+ if (cmd_opts.count("list")) {
+ auto benchmarks = benchpress::registration::get_ptr()->get_benchmarks();
+ for (auto& info : benchmarks) {
+ std::cout << info.get_name() << std::endl;
+ }
+ exit(EXIT_SUCCESS);
+ }
+ } catch (const cxxopts::OptionException& e) {
+ std::cout << "error parsing options: " << e.what() << std::endl;
+ exit(1);
+ }
+ benchpress::run_benchmarks(bench_opts);
+ float duration = std::chrono::duration_cast<std::chrono::milliseconds>(
+ std::chrono::high_resolution_clock::now() - bp_start
+ ).count() / 1000.f;
+ std::cout << argv[0] << " " << duration << "s" << std::endl;
+ return 0;
+}
+#endif
+
+#endif // BENCHPRESS_HPP