aboutsummaryrefslogtreecommitdiffstats
path: root/consteval_huffman.hpp
diff options
context:
space:
mode:
authorfriendlyanon <friendlyanon@users.noreply.github.com>2020-12-30 22:00:19 +0000
committerfriendlyanon <friendlyanon@users.noreply.github.com>2020-12-30 22:05:12 +0000
commit02bc4b465453bb80d36d8dbbbf8c6f7548dd1175 (patch)
tree6f7fa4d8596b7225e273e7524b021987d518cdff /consteval_huffman.hpp
parent7a63ba6dc707e190f84d73a92ebc308c5da555bd (diff)
Move the library header to the include folder
Diffstat (limited to 'consteval_huffman.hpp')
-rw-r--r--consteval_huffman.hpp379
1 files changed, 0 insertions, 379 deletions
diff --git a/consteval_huffman.hpp b/consteval_huffman.hpp
deleted file mode 100644
index b7ea57f..0000000
--- a/consteval_huffman.hpp
+++ /dev/null
@@ -1,379 +0,0 @@
-/**
- * consteval_huffman.hpp - Provides compile-time text compression.
- * Written by Clyne Sullivan.
- * https://github.com/tcsullivan/consteval-huffman
- */
-
-#ifndef TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_
-#define TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_
-
-#include <algorithm>
-#include <concepts>
-#include <span>
-#include <type_traits>
-
-namespace detail
-{
- // Provides a string container for the huffman compressor.
- // Using this allows for automatic string data length measurement, as
- // well as implementation of the _huffman suffix.
- template<unsigned long int N>
- struct huffman_string_container {
- char data[N];
- consteval huffman_string_container(const char (&s)[N]) noexcept {
- std::copy(s, s + N, data);
- }
- consteval operator const char *() const noexcept {
- return data;
- }
- consteval auto size() const noexcept {
- return N;
- }
- };
-}
-
-/**
- * Compresses the given data string using Huffman coding, providing a
- * minimal run-time interface for decompressing the data.
- * @tparam data The string of data to be compressed.
- */
-template<auto raw_data>
- requires(
- std::same_as<std::remove_cvref_t<decltype(raw_data)>,
- detail::huffman_string_container<raw_data.size()>> &&
- raw_data.size() > 0)
-class huffman_compressor
-{
- using size_t = long int;
- using usize_t = unsigned long int;
-
- // Note: class internals need to be defined before the public interface.
- // See the bottom of the class definition for usage.
-private:
- // Node structure used to build a tree for calculating Huffman codes.
- struct node {
- int value = 0;
- size_t freq = 0;
-
- // Below values are indices into the node list
- int parent = -1;
- int left = -1;
- int right = -1;
- };
-
- /**
- * Builds a list of nodes for every character that appears in the given data.
- * This list is sorted by increasing frequency.
- * @return Compile-time allocated array of nodes
- */
- consteval static auto build_node_list() noexcept {
- // Build a list for counting every occuring value
- auto list = std::span(new node[256] {}, 256);
- for (int i = 0; i < 256; i++)
- list[i].value = i;
- for (usize_t i = 0; i < raw_data.size(); i++)
- list[raw_data[i]].freq++;
-
- std::sort(list.begin(), list.end(),
- [](const auto& a, const auto& b) { return a.freq < b.freq; });
-
- // Filter out the non-occuring values, and build a compact list to return
- auto first_valid_node = std::find_if(list.begin(), list.end(),
- [](const auto& n) { return n.freq != 0; });
- auto fit_size = std::distance(first_valid_node, list.end());
- if (fit_size < 2)
- fit_size = 2;
- auto fit_list = std::span(new node[fit_size] {}, fit_size);
- std::copy(first_valid_node, list.end(), fit_list.begin());
- delete[] list.data();
- return fit_list;
- }
-
- /**
- * Returns the count of how many nodes are in the node tree.
- */
- consteval static auto tree_count() noexcept {
- auto list = build_node_list();
- auto count = list.size() * 2 - 1;
- delete[] list.data();
- return count;
- }
-
- /**
- * Builds a tree out of the node list, allowing for the calculation of
- * Huffman codes.
- * @return Compile-time allocated tree of nodes, root node at index zero.
- */
- consteval static auto build_node_tree() noexcept {
- auto list = build_node_list();
- auto tree = std::span(new node[tree_count()] {}, tree_count());
-
- auto list_end = list.end(); // Track end of list as it shrinks
- auto tree_begin = tree.end(); // Build tree from bottom
- int next_parent_node_value = 0x100; // Give parent nodes unique ids
- while (1) {
- // Create parent node for two least-occuring values
- node new_node {
- next_parent_node_value++,
- list[0].freq + list[1].freq,
- -1,
- list[0].value,
- list[1].value
- };
-
- // Move the two nodes into the tree and remove them from the list
- *--tree_begin = list[0];
- *--tree_begin = list[1];
- std::copy(list.begin() + 2, list_end--, list.begin());
- if (std::distance(list.begin(), list_end) == 1) {
- list.front() = new_node;
- break;
- }
-
- // Insert the parent node back into the list
- auto insertion_point = std::find_if(list.begin(), list_end - 1,
- [&new_node](const auto& n) { return n.freq >= new_node.freq; });
- if (insertion_point != list_end - 1) {
- *(list_end - 1) = node();
- std::copy_backward(insertion_point, list_end - 1, list_end);
- }
-
- *insertion_point = new_node;
- }
-
- // Connect child nodes to their parents
- tree[0] = list[0];
- for (auto iter = tree.begin(); ++iter != tree.end();) {
- if (iter->parent == -1) {
- auto parent = std::find_if(tree.begin(), iter,
- [&iter](const auto& n) {
- return n.left == iter->value || n.right == iter->value;
- });
- if (parent != iter)
- iter->parent = std::distance(tree.begin(), parent);
- }
- }
-
- delete[] list.data();
- return tree;
- }
-
- /**
- * Determines the size of the compressed data.
- * @return A pair of total bytes used, and bits used in last byte.
- */
- consteval static auto compressed_size_info() noexcept {
- auto tree = build_node_tree();
- size_t bytes = 1, bits = 0;
-
- for (usize_t i = 0; i < raw_data.size(); i++) {
- auto leaf = std::find_if(tree.begin(), tree.end(),
- [c = raw_data[i]](const auto& n) { return n.value == c; });
-
- while (leaf->parent != -1) {
- if (++bits == 8)
- bits = 0, bytes++;
- leaf = tree.begin() + leaf->parent;
- }
- }
-
- delete[] tree.data();
- return std::make_pair(bytes, bits);
- }
-
- /**
- * Compresses the input data, storing the result in the object instance.
- */
- consteval void compress() noexcept {
- auto tree = build_node_tree();
-
- // Set up byte and bit count (note, we're compressing the data backwards)
- auto [bytes, bits] = compressed_size_info();
- if (bits > 0)
- bits = 8 - bits;
- else
- bits = 0, bytes--;
-
- // Compress data backwards, because we obtain the Huffman codes backwards
- // as we traverse towards the parent node.
- for (auto i = raw_data.size(); i > 0; i--) {
- auto leaf = std::find_if(tree.begin(), tree.end(),
- [c = raw_data[i - 1]](auto& n) { return n.value == c; });
-
- while (leaf->parent != -1) {
- auto parent = tree.begin() + leaf->parent;
- if (parent->right == leaf->value)
- compressed_data[bytes - 1] |= (1 << bits);
- if (++bits == 8)
- bits = 0, --bytes;
- leaf = parent;
- }
- }
-
- delete[] tree.data();
- }
-
- /**
- * Builds the decode tree, used to decompress the data.
- * Format: three bytes per node.
- * 1. Node value, 2. Distance to left child, 3. Distance to right child.
- */
- consteval void build_decode_tree() noexcept {
- auto tree = build_node_tree();
- auto decode_tree = compressed_data + compressed_size_info().first;
-
- for (usize_t i = 0; i < tree_count(); i++) {
- // Only store node value if it represents a data value
- decode_tree[i * 3] = tree[i].value <= 0xFF ? tree[i].value : 0;
-
- usize_t j;
- // Find the left child of this node
- for (j = i + 1; j < tree_count(); j++) {
- if (tree[i].left == tree[j].value)
- break;
- }
- decode_tree[i * 3 + 1] = j < tree_count() ? j - i : 0;
- // Find the right child of this node
- for (j = i + 1; j < tree_count(); j++) {
- if (tree[i].right == tree[j].value)
- break;
- }
- decode_tree[i * 3 + 2] = j < tree_count() ? j - i : 0;
- }
-
- delete[] tree.data();
- }
-
-public:
- consteval static auto compressed_size() noexcept {
- return compressed_size_info().first + 3 * tree_count();
- }
- consteval static auto uncompressed_size() noexcept {
- return raw_data.size();
- }
- consteval static size_t bytes_saved() noexcept {
- size_t diff = uncompressed_size() - compressed_size();
- return diff > 0 ? diff : 0;
- }
-
- // Utility for decoding compressed data.
- class decoder {
- public:
- using difference_type = std::ptrdiff_t;
- using value_type = int;
-
- decoder(const unsigned char *comp_data) noexcept
- : m_data(comp_data),
- m_table(comp_data + compressed_size_info().first) { get_next(); }
- decoder() = default;
-
- constexpr static decoder end(const unsigned char *comp_data) noexcept {
- decoder ender;
- ender.m_data = comp_data;
- if constexpr (bytes_saved() > 0) {
- const auto [size_bytes, last_bits] = compressed_size_info();
- ender.m_data += size_bytes - 1;
- ender.m_bit = 1 << (7 - last_bits);
- } else {
- ender.m_data += raw_data.size() + 1;
- }
-
- return ender;
- }
-
- bool operator==(const decoder& other) const noexcept {
- return m_data == other.m_data && m_bit == other.m_bit;
- }
- auto operator*() const noexcept {
- return m_current;
- }
- decoder& operator++() noexcept {
- get_next();
- return *this;
- }
- decoder operator++(int) noexcept {
- auto old = *this;
- get_next();
- return old;
- }
-
- private:
- void get_next() noexcept {
- if (*this == end(m_data))
- return;
- if constexpr (bytes_saved() > 0) {
- auto *node = m_table;
- int data = *m_data;
- auto bit = m_bit;
- do {
- node += (data & bit) ? node[2] * 3u : node[1] * 3u;
- bit >>= 1;
- if (!bit)
- bit = 0x80, data = *++m_data;
- } while (node[1] != 0);
- m_bit = bit;
- m_current = *node;
- } else {
- m_current = *m_data++;
- }
- }
-
- const unsigned char *m_data = nullptr;
- const unsigned char *m_table = nullptr;
- unsigned char m_bit = 0x80;
- int m_current = -1;
-
- friend class huffman_compressor;
- };
-
- // Stick the forward_iterator check here just so it's run
- consteval huffman_compressor() noexcept
- requires (std::forward_iterator<decoder>)
- {
- if constexpr (bytes_saved() > 0) {
- build_decode_tree();
- compress();
- } else {
- std::copy(raw_data.data, raw_data.data + raw_data.size(),
- compressed_data);
- }
- }
-
- auto begin() const noexcept {
- return decoder(compressed_data);
- }
- auto end() const noexcept {
- return decoder::end(compressed_data);
- }
- auto cbegin() const noexcept { return begin(); }
- auto cend() const noexcept { return end(); }
-
- // For accessing the compressed data
- auto data() const noexcept {
- if constexpr (bytes_saved() > 0)
- return compressed_data;
- else
- return raw_data;
- }
-
- auto size() const noexcept {
- if constexpr (bytes_saved() > 0)
- return compressed_size();
- else
- return uncompressed_size();
- }
-
-private:
- // Contains the compressed data, followed by the decoding tree.
- unsigned char compressed_data[
- bytes_saved() > 0 ? compressed_size_info().first + 3 * tree_count()
- : raw_data.size()] = {0};
-};
-
-template <detail::huffman_string_container hsc>
-constexpr auto operator ""_huffman()
-{
- return huffman_compressor<hsc>();
-}
-
-#endif // TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_