diff options
author | friendlyanon <friendlyanon@users.noreply.github.com> | 2020-12-30 22:00:19 +0000 |
---|---|---|
committer | friendlyanon <friendlyanon@users.noreply.github.com> | 2020-12-30 22:05:12 +0000 |
commit | 02bc4b465453bb80d36d8dbbbf8c6f7548dd1175 (patch) | |
tree | 6f7fa4d8596b7225e273e7524b021987d518cdff /consteval_huffman.hpp | |
parent | 7a63ba6dc707e190f84d73a92ebc308c5da555bd (diff) |
Move the library header to the include folder
Diffstat (limited to 'consteval_huffman.hpp')
-rw-r--r-- | consteval_huffman.hpp | 379 |
1 files changed, 0 insertions, 379 deletions
diff --git a/consteval_huffman.hpp b/consteval_huffman.hpp deleted file mode 100644 index b7ea57f..0000000 --- a/consteval_huffman.hpp +++ /dev/null @@ -1,379 +0,0 @@ -/** - * consteval_huffman.hpp - Provides compile-time text compression. - * Written by Clyne Sullivan. - * https://github.com/tcsullivan/consteval-huffman - */ - -#ifndef TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_ -#define TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_ - -#include <algorithm> -#include <concepts> -#include <span> -#include <type_traits> - -namespace detail -{ - // Provides a string container for the huffman compressor. - // Using this allows for automatic string data length measurement, as - // well as implementation of the _huffman suffix. - template<unsigned long int N> - struct huffman_string_container { - char data[N]; - consteval huffman_string_container(const char (&s)[N]) noexcept { - std::copy(s, s + N, data); - } - consteval operator const char *() const noexcept { - return data; - } - consteval auto size() const noexcept { - return N; - } - }; -} - -/** - * Compresses the given data string using Huffman coding, providing a - * minimal run-time interface for decompressing the data. - * @tparam data The string of data to be compressed. - */ -template<auto raw_data> - requires( - std::same_as<std::remove_cvref_t<decltype(raw_data)>, - detail::huffman_string_container<raw_data.size()>> && - raw_data.size() > 0) -class huffman_compressor -{ - using size_t = long int; - using usize_t = unsigned long int; - - // Note: class internals need to be defined before the public interface. - // See the bottom of the class definition for usage. -private: - // Node structure used to build a tree for calculating Huffman codes. - struct node { - int value = 0; - size_t freq = 0; - - // Below values are indices into the node list - int parent = -1; - int left = -1; - int right = -1; - }; - - /** - * Builds a list of nodes for every character that appears in the given data. - * This list is sorted by increasing frequency. - * @return Compile-time allocated array of nodes - */ - consteval static auto build_node_list() noexcept { - // Build a list for counting every occuring value - auto list = std::span(new node[256] {}, 256); - for (int i = 0; i < 256; i++) - list[i].value = i; - for (usize_t i = 0; i < raw_data.size(); i++) - list[raw_data[i]].freq++; - - std::sort(list.begin(), list.end(), - [](const auto& a, const auto& b) { return a.freq < b.freq; }); - - // Filter out the non-occuring values, and build a compact list to return - auto first_valid_node = std::find_if(list.begin(), list.end(), - [](const auto& n) { return n.freq != 0; }); - auto fit_size = std::distance(first_valid_node, list.end()); - if (fit_size < 2) - fit_size = 2; - auto fit_list = std::span(new node[fit_size] {}, fit_size); - std::copy(first_valid_node, list.end(), fit_list.begin()); - delete[] list.data(); - return fit_list; - } - - /** - * Returns the count of how many nodes are in the node tree. - */ - consteval static auto tree_count() noexcept { - auto list = build_node_list(); - auto count = list.size() * 2 - 1; - delete[] list.data(); - return count; - } - - /** - * Builds a tree out of the node list, allowing for the calculation of - * Huffman codes. - * @return Compile-time allocated tree of nodes, root node at index zero. - */ - consteval static auto build_node_tree() noexcept { - auto list = build_node_list(); - auto tree = std::span(new node[tree_count()] {}, tree_count()); - - auto list_end = list.end(); // Track end of list as it shrinks - auto tree_begin = tree.end(); // Build tree from bottom - int next_parent_node_value = 0x100; // Give parent nodes unique ids - while (1) { - // Create parent node for two least-occuring values - node new_node { - next_parent_node_value++, - list[0].freq + list[1].freq, - -1, - list[0].value, - list[1].value - }; - - // Move the two nodes into the tree and remove them from the list - *--tree_begin = list[0]; - *--tree_begin = list[1]; - std::copy(list.begin() + 2, list_end--, list.begin()); - if (std::distance(list.begin(), list_end) == 1) { - list.front() = new_node; - break; - } - - // Insert the parent node back into the list - auto insertion_point = std::find_if(list.begin(), list_end - 1, - [&new_node](const auto& n) { return n.freq >= new_node.freq; }); - if (insertion_point != list_end - 1) { - *(list_end - 1) = node(); - std::copy_backward(insertion_point, list_end - 1, list_end); - } - - *insertion_point = new_node; - } - - // Connect child nodes to their parents - tree[0] = list[0]; - for (auto iter = tree.begin(); ++iter != tree.end();) { - if (iter->parent == -1) { - auto parent = std::find_if(tree.begin(), iter, - [&iter](const auto& n) { - return n.left == iter->value || n.right == iter->value; - }); - if (parent != iter) - iter->parent = std::distance(tree.begin(), parent); - } - } - - delete[] list.data(); - return tree; - } - - /** - * Determines the size of the compressed data. - * @return A pair of total bytes used, and bits used in last byte. - */ - consteval static auto compressed_size_info() noexcept { - auto tree = build_node_tree(); - size_t bytes = 1, bits = 0; - - for (usize_t i = 0; i < raw_data.size(); i++) { - auto leaf = std::find_if(tree.begin(), tree.end(), - [c = raw_data[i]](const auto& n) { return n.value == c; }); - - while (leaf->parent != -1) { - if (++bits == 8) - bits = 0, bytes++; - leaf = tree.begin() + leaf->parent; - } - } - - delete[] tree.data(); - return std::make_pair(bytes, bits); - } - - /** - * Compresses the input data, storing the result in the object instance. - */ - consteval void compress() noexcept { - auto tree = build_node_tree(); - - // Set up byte and bit count (note, we're compressing the data backwards) - auto [bytes, bits] = compressed_size_info(); - if (bits > 0) - bits = 8 - bits; - else - bits = 0, bytes--; - - // Compress data backwards, because we obtain the Huffman codes backwards - // as we traverse towards the parent node. - for (auto i = raw_data.size(); i > 0; i--) { - auto leaf = std::find_if(tree.begin(), tree.end(), - [c = raw_data[i - 1]](auto& n) { return n.value == c; }); - - while (leaf->parent != -1) { - auto parent = tree.begin() + leaf->parent; - if (parent->right == leaf->value) - compressed_data[bytes - 1] |= (1 << bits); - if (++bits == 8) - bits = 0, --bytes; - leaf = parent; - } - } - - delete[] tree.data(); - } - - /** - * Builds the decode tree, used to decompress the data. - * Format: three bytes per node. - * 1. Node value, 2. Distance to left child, 3. Distance to right child. - */ - consteval void build_decode_tree() noexcept { - auto tree = build_node_tree(); - auto decode_tree = compressed_data + compressed_size_info().first; - - for (usize_t i = 0; i < tree_count(); i++) { - // Only store node value if it represents a data value - decode_tree[i * 3] = tree[i].value <= 0xFF ? tree[i].value : 0; - - usize_t j; - // Find the left child of this node - for (j = i + 1; j < tree_count(); j++) { - if (tree[i].left == tree[j].value) - break; - } - decode_tree[i * 3 + 1] = j < tree_count() ? j - i : 0; - // Find the right child of this node - for (j = i + 1; j < tree_count(); j++) { - if (tree[i].right == tree[j].value) - break; - } - decode_tree[i * 3 + 2] = j < tree_count() ? j - i : 0; - } - - delete[] tree.data(); - } - -public: - consteval static auto compressed_size() noexcept { - return compressed_size_info().first + 3 * tree_count(); - } - consteval static auto uncompressed_size() noexcept { - return raw_data.size(); - } - consteval static size_t bytes_saved() noexcept { - size_t diff = uncompressed_size() - compressed_size(); - return diff > 0 ? diff : 0; - } - - // Utility for decoding compressed data. - class decoder { - public: - using difference_type = std::ptrdiff_t; - using value_type = int; - - decoder(const unsigned char *comp_data) noexcept - : m_data(comp_data), - m_table(comp_data + compressed_size_info().first) { get_next(); } - decoder() = default; - - constexpr static decoder end(const unsigned char *comp_data) noexcept { - decoder ender; - ender.m_data = comp_data; - if constexpr (bytes_saved() > 0) { - const auto [size_bytes, last_bits] = compressed_size_info(); - ender.m_data += size_bytes - 1; - ender.m_bit = 1 << (7 - last_bits); - } else { - ender.m_data += raw_data.size() + 1; - } - - return ender; - } - - bool operator==(const decoder& other) const noexcept { - return m_data == other.m_data && m_bit == other.m_bit; - } - auto operator*() const noexcept { - return m_current; - } - decoder& operator++() noexcept { - get_next(); - return *this; - } - decoder operator++(int) noexcept { - auto old = *this; - get_next(); - return old; - } - - private: - void get_next() noexcept { - if (*this == end(m_data)) - return; - if constexpr (bytes_saved() > 0) { - auto *node = m_table; - int data = *m_data; - auto bit = m_bit; - do { - node += (data & bit) ? node[2] * 3u : node[1] * 3u; - bit >>= 1; - if (!bit) - bit = 0x80, data = *++m_data; - } while (node[1] != 0); - m_bit = bit; - m_current = *node; - } else { - m_current = *m_data++; - } - } - - const unsigned char *m_data = nullptr; - const unsigned char *m_table = nullptr; - unsigned char m_bit = 0x80; - int m_current = -1; - - friend class huffman_compressor; - }; - - // Stick the forward_iterator check here just so it's run - consteval huffman_compressor() noexcept - requires (std::forward_iterator<decoder>) - { - if constexpr (bytes_saved() > 0) { - build_decode_tree(); - compress(); - } else { - std::copy(raw_data.data, raw_data.data + raw_data.size(), - compressed_data); - } - } - - auto begin() const noexcept { - return decoder(compressed_data); - } - auto end() const noexcept { - return decoder::end(compressed_data); - } - auto cbegin() const noexcept { return begin(); } - auto cend() const noexcept { return end(); } - - // For accessing the compressed data - auto data() const noexcept { - if constexpr (bytes_saved() > 0) - return compressed_data; - else - return raw_data; - } - - auto size() const noexcept { - if constexpr (bytes_saved() > 0) - return compressed_size(); - else - return uncompressed_size(); - } - -private: - // Contains the compressed data, followed by the decoding tree. - unsigned char compressed_data[ - bytes_saved() > 0 ? compressed_size_info().first + 3 * tree_count() - : raw_data.size()] = {0}; -}; - -template <detail::huffman_string_container hsc> -constexpr auto operator ""_huffman() -{ - return huffman_compressor<hsc>(); -} - -#endif // TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_ |