From 02bc4b465453bb80d36d8dbbbf8c6f7548dd1175 Mon Sep 17 00:00:00 2001 From: friendlyanon Date: Wed, 30 Dec 2020 22:00:19 +0000 Subject: Move the library header to the include folder --- consteval_huffman.hpp | 379 ------------------------ include/consteval_huffman/consteval_huffman.hpp | 379 ++++++++++++++++++++++++ 2 files changed, 379 insertions(+), 379 deletions(-) delete mode 100644 consteval_huffman.hpp create mode 100644 include/consteval_huffman/consteval_huffman.hpp diff --git a/consteval_huffman.hpp b/consteval_huffman.hpp deleted file mode 100644 index b7ea57f..0000000 --- a/consteval_huffman.hpp +++ /dev/null @@ -1,379 +0,0 @@ -/** - * consteval_huffman.hpp - Provides compile-time text compression. - * Written by Clyne Sullivan. - * https://github.com/tcsullivan/consteval-huffman - */ - -#ifndef TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_ -#define TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_ - -#include -#include -#include -#include - -namespace detail -{ - // Provides a string container for the huffman compressor. - // Using this allows for automatic string data length measurement, as - // well as implementation of the _huffman suffix. - template - struct huffman_string_container { - char data[N]; - consteval huffman_string_container(const char (&s)[N]) noexcept { - std::copy(s, s + N, data); - } - consteval operator const char *() const noexcept { - return data; - } - consteval auto size() const noexcept { - return N; - } - }; -} - -/** - * Compresses the given data string using Huffman coding, providing a - * minimal run-time interface for decompressing the data. - * @tparam data The string of data to be compressed. - */ -template - requires( - std::same_as, - detail::huffman_string_container> && - raw_data.size() > 0) -class huffman_compressor -{ - using size_t = long int; - using usize_t = unsigned long int; - - // Note: class internals need to be defined before the public interface. - // See the bottom of the class definition for usage. -private: - // Node structure used to build a tree for calculating Huffman codes. - struct node { - int value = 0; - size_t freq = 0; - - // Below values are indices into the node list - int parent = -1; - int left = -1; - int right = -1; - }; - - /** - * Builds a list of nodes for every character that appears in the given data. - * This list is sorted by increasing frequency. - * @return Compile-time allocated array of nodes - */ - consteval static auto build_node_list() noexcept { - // Build a list for counting every occuring value - auto list = std::span(new node[256] {}, 256); - for (int i = 0; i < 256; i++) - list[i].value = i; - for (usize_t i = 0; i < raw_data.size(); i++) - list[raw_data[i]].freq++; - - std::sort(list.begin(), list.end(), - [](const auto& a, const auto& b) { return a.freq < b.freq; }); - - // Filter out the non-occuring values, and build a compact list to return - auto first_valid_node = std::find_if(list.begin(), list.end(), - [](const auto& n) { return n.freq != 0; }); - auto fit_size = std::distance(first_valid_node, list.end()); - if (fit_size < 2) - fit_size = 2; - auto fit_list = std::span(new node[fit_size] {}, fit_size); - std::copy(first_valid_node, list.end(), fit_list.begin()); - delete[] list.data(); - return fit_list; - } - - /** - * Returns the count of how many nodes are in the node tree. - */ - consteval static auto tree_count() noexcept { - auto list = build_node_list(); - auto count = list.size() * 2 - 1; - delete[] list.data(); - return count; - } - - /** - * Builds a tree out of the node list, allowing for the calculation of - * Huffman codes. - * @return Compile-time allocated tree of nodes, root node at index zero. - */ - consteval static auto build_node_tree() noexcept { - auto list = build_node_list(); - auto tree = std::span(new node[tree_count()] {}, tree_count()); - - auto list_end = list.end(); // Track end of list as it shrinks - auto tree_begin = tree.end(); // Build tree from bottom - int next_parent_node_value = 0x100; // Give parent nodes unique ids - while (1) { - // Create parent node for two least-occuring values - node new_node { - next_parent_node_value++, - list[0].freq + list[1].freq, - -1, - list[0].value, - list[1].value - }; - - // Move the two nodes into the tree and remove them from the list - *--tree_begin = list[0]; - *--tree_begin = list[1]; - std::copy(list.begin() + 2, list_end--, list.begin()); - if (std::distance(list.begin(), list_end) == 1) { - list.front() = new_node; - break; - } - - // Insert the parent node back into the list - auto insertion_point = std::find_if(list.begin(), list_end - 1, - [&new_node](const auto& n) { return n.freq >= new_node.freq; }); - if (insertion_point != list_end - 1) { - *(list_end - 1) = node(); - std::copy_backward(insertion_point, list_end - 1, list_end); - } - - *insertion_point = new_node; - } - - // Connect child nodes to their parents - tree[0] = list[0]; - for (auto iter = tree.begin(); ++iter != tree.end();) { - if (iter->parent == -1) { - auto parent = std::find_if(tree.begin(), iter, - [&iter](const auto& n) { - return n.left == iter->value || n.right == iter->value; - }); - if (parent != iter) - iter->parent = std::distance(tree.begin(), parent); - } - } - - delete[] list.data(); - return tree; - } - - /** - * Determines the size of the compressed data. - * @return A pair of total bytes used, and bits used in last byte. - */ - consteval static auto compressed_size_info() noexcept { - auto tree = build_node_tree(); - size_t bytes = 1, bits = 0; - - for (usize_t i = 0; i < raw_data.size(); i++) { - auto leaf = std::find_if(tree.begin(), tree.end(), - [c = raw_data[i]](const auto& n) { return n.value == c; }); - - while (leaf->parent != -1) { - if (++bits == 8) - bits = 0, bytes++; - leaf = tree.begin() + leaf->parent; - } - } - - delete[] tree.data(); - return std::make_pair(bytes, bits); - } - - /** - * Compresses the input data, storing the result in the object instance. - */ - consteval void compress() noexcept { - auto tree = build_node_tree(); - - // Set up byte and bit count (note, we're compressing the data backwards) - auto [bytes, bits] = compressed_size_info(); - if (bits > 0) - bits = 8 - bits; - else - bits = 0, bytes--; - - // Compress data backwards, because we obtain the Huffman codes backwards - // as we traverse towards the parent node. - for (auto i = raw_data.size(); i > 0; i--) { - auto leaf = std::find_if(tree.begin(), tree.end(), - [c = raw_data[i - 1]](auto& n) { return n.value == c; }); - - while (leaf->parent != -1) { - auto parent = tree.begin() + leaf->parent; - if (parent->right == leaf->value) - compressed_data[bytes - 1] |= (1 << bits); - if (++bits == 8) - bits = 0, --bytes; - leaf = parent; - } - } - - delete[] tree.data(); - } - - /** - * Builds the decode tree, used to decompress the data. - * Format: three bytes per node. - * 1. Node value, 2. Distance to left child, 3. Distance to right child. - */ - consteval void build_decode_tree() noexcept { - auto tree = build_node_tree(); - auto decode_tree = compressed_data + compressed_size_info().first; - - for (usize_t i = 0; i < tree_count(); i++) { - // Only store node value if it represents a data value - decode_tree[i * 3] = tree[i].value <= 0xFF ? tree[i].value : 0; - - usize_t j; - // Find the left child of this node - for (j = i + 1; j < tree_count(); j++) { - if (tree[i].left == tree[j].value) - break; - } - decode_tree[i * 3 + 1] = j < tree_count() ? j - i : 0; - // Find the right child of this node - for (j = i + 1; j < tree_count(); j++) { - if (tree[i].right == tree[j].value) - break; - } - decode_tree[i * 3 + 2] = j < tree_count() ? j - i : 0; - } - - delete[] tree.data(); - } - -public: - consteval static auto compressed_size() noexcept { - return compressed_size_info().first + 3 * tree_count(); - } - consteval static auto uncompressed_size() noexcept { - return raw_data.size(); - } - consteval static size_t bytes_saved() noexcept { - size_t diff = uncompressed_size() - compressed_size(); - return diff > 0 ? diff : 0; - } - - // Utility for decoding compressed data. - class decoder { - public: - using difference_type = std::ptrdiff_t; - using value_type = int; - - decoder(const unsigned char *comp_data) noexcept - : m_data(comp_data), - m_table(comp_data + compressed_size_info().first) { get_next(); } - decoder() = default; - - constexpr static decoder end(const unsigned char *comp_data) noexcept { - decoder ender; - ender.m_data = comp_data; - if constexpr (bytes_saved() > 0) { - const auto [size_bytes, last_bits] = compressed_size_info(); - ender.m_data += size_bytes - 1; - ender.m_bit = 1 << (7 - last_bits); - } else { - ender.m_data += raw_data.size() + 1; - } - - return ender; - } - - bool operator==(const decoder& other) const noexcept { - return m_data == other.m_data && m_bit == other.m_bit; - } - auto operator*() const noexcept { - return m_current; - } - decoder& operator++() noexcept { - get_next(); - return *this; - } - decoder operator++(int) noexcept { - auto old = *this; - get_next(); - return old; - } - - private: - void get_next() noexcept { - if (*this == end(m_data)) - return; - if constexpr (bytes_saved() > 0) { - auto *node = m_table; - int data = *m_data; - auto bit = m_bit; - do { - node += (data & bit) ? node[2] * 3u : node[1] * 3u; - bit >>= 1; - if (!bit) - bit = 0x80, data = *++m_data; - } while (node[1] != 0); - m_bit = bit; - m_current = *node; - } else { - m_current = *m_data++; - } - } - - const unsigned char *m_data = nullptr; - const unsigned char *m_table = nullptr; - unsigned char m_bit = 0x80; - int m_current = -1; - - friend class huffman_compressor; - }; - - // Stick the forward_iterator check here just so it's run - consteval huffman_compressor() noexcept - requires (std::forward_iterator) - { - if constexpr (bytes_saved() > 0) { - build_decode_tree(); - compress(); - } else { - std::copy(raw_data.data, raw_data.data + raw_data.size(), - compressed_data); - } - } - - auto begin() const noexcept { - return decoder(compressed_data); - } - auto end() const noexcept { - return decoder::end(compressed_data); - } - auto cbegin() const noexcept { return begin(); } - auto cend() const noexcept { return end(); } - - // For accessing the compressed data - auto data() const noexcept { - if constexpr (bytes_saved() > 0) - return compressed_data; - else - return raw_data; - } - - auto size() const noexcept { - if constexpr (bytes_saved() > 0) - return compressed_size(); - else - return uncompressed_size(); - } - -private: - // Contains the compressed data, followed by the decoding tree. - unsigned char compressed_data[ - bytes_saved() > 0 ? compressed_size_info().first + 3 * tree_count() - : raw_data.size()] = {0}; -}; - -template -constexpr auto operator ""_huffman() -{ - return huffman_compressor(); -} - -#endif // TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_ diff --git a/include/consteval_huffman/consteval_huffman.hpp b/include/consteval_huffman/consteval_huffman.hpp new file mode 100644 index 0000000..b7ea57f --- /dev/null +++ b/include/consteval_huffman/consteval_huffman.hpp @@ -0,0 +1,379 @@ +/** + * consteval_huffman.hpp - Provides compile-time text compression. + * Written by Clyne Sullivan. + * https://github.com/tcsullivan/consteval-huffman + */ + +#ifndef TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_ +#define TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_ + +#include +#include +#include +#include + +namespace detail +{ + // Provides a string container for the huffman compressor. + // Using this allows for automatic string data length measurement, as + // well as implementation of the _huffman suffix. + template + struct huffman_string_container { + char data[N]; + consteval huffman_string_container(const char (&s)[N]) noexcept { + std::copy(s, s + N, data); + } + consteval operator const char *() const noexcept { + return data; + } + consteval auto size() const noexcept { + return N; + } + }; +} + +/** + * Compresses the given data string using Huffman coding, providing a + * minimal run-time interface for decompressing the data. + * @tparam data The string of data to be compressed. + */ +template + requires( + std::same_as, + detail::huffman_string_container> && + raw_data.size() > 0) +class huffman_compressor +{ + using size_t = long int; + using usize_t = unsigned long int; + + // Note: class internals need to be defined before the public interface. + // See the bottom of the class definition for usage. +private: + // Node structure used to build a tree for calculating Huffman codes. + struct node { + int value = 0; + size_t freq = 0; + + // Below values are indices into the node list + int parent = -1; + int left = -1; + int right = -1; + }; + + /** + * Builds a list of nodes for every character that appears in the given data. + * This list is sorted by increasing frequency. + * @return Compile-time allocated array of nodes + */ + consteval static auto build_node_list() noexcept { + // Build a list for counting every occuring value + auto list = std::span(new node[256] {}, 256); + for (int i = 0; i < 256; i++) + list[i].value = i; + for (usize_t i = 0; i < raw_data.size(); i++) + list[raw_data[i]].freq++; + + std::sort(list.begin(), list.end(), + [](const auto& a, const auto& b) { return a.freq < b.freq; }); + + // Filter out the non-occuring values, and build a compact list to return + auto first_valid_node = std::find_if(list.begin(), list.end(), + [](const auto& n) { return n.freq != 0; }); + auto fit_size = std::distance(first_valid_node, list.end()); + if (fit_size < 2) + fit_size = 2; + auto fit_list = std::span(new node[fit_size] {}, fit_size); + std::copy(first_valid_node, list.end(), fit_list.begin()); + delete[] list.data(); + return fit_list; + } + + /** + * Returns the count of how many nodes are in the node tree. + */ + consteval static auto tree_count() noexcept { + auto list = build_node_list(); + auto count = list.size() * 2 - 1; + delete[] list.data(); + return count; + } + + /** + * Builds a tree out of the node list, allowing for the calculation of + * Huffman codes. + * @return Compile-time allocated tree of nodes, root node at index zero. + */ + consteval static auto build_node_tree() noexcept { + auto list = build_node_list(); + auto tree = std::span(new node[tree_count()] {}, tree_count()); + + auto list_end = list.end(); // Track end of list as it shrinks + auto tree_begin = tree.end(); // Build tree from bottom + int next_parent_node_value = 0x100; // Give parent nodes unique ids + while (1) { + // Create parent node for two least-occuring values + node new_node { + next_parent_node_value++, + list[0].freq + list[1].freq, + -1, + list[0].value, + list[1].value + }; + + // Move the two nodes into the tree and remove them from the list + *--tree_begin = list[0]; + *--tree_begin = list[1]; + std::copy(list.begin() + 2, list_end--, list.begin()); + if (std::distance(list.begin(), list_end) == 1) { + list.front() = new_node; + break; + } + + // Insert the parent node back into the list + auto insertion_point = std::find_if(list.begin(), list_end - 1, + [&new_node](const auto& n) { return n.freq >= new_node.freq; }); + if (insertion_point != list_end - 1) { + *(list_end - 1) = node(); + std::copy_backward(insertion_point, list_end - 1, list_end); + } + + *insertion_point = new_node; + } + + // Connect child nodes to their parents + tree[0] = list[0]; + for (auto iter = tree.begin(); ++iter != tree.end();) { + if (iter->parent == -1) { + auto parent = std::find_if(tree.begin(), iter, + [&iter](const auto& n) { + return n.left == iter->value || n.right == iter->value; + }); + if (parent != iter) + iter->parent = std::distance(tree.begin(), parent); + } + } + + delete[] list.data(); + return tree; + } + + /** + * Determines the size of the compressed data. + * @return A pair of total bytes used, and bits used in last byte. + */ + consteval static auto compressed_size_info() noexcept { + auto tree = build_node_tree(); + size_t bytes = 1, bits = 0; + + for (usize_t i = 0; i < raw_data.size(); i++) { + auto leaf = std::find_if(tree.begin(), tree.end(), + [c = raw_data[i]](const auto& n) { return n.value == c; }); + + while (leaf->parent != -1) { + if (++bits == 8) + bits = 0, bytes++; + leaf = tree.begin() + leaf->parent; + } + } + + delete[] tree.data(); + return std::make_pair(bytes, bits); + } + + /** + * Compresses the input data, storing the result in the object instance. + */ + consteval void compress() noexcept { + auto tree = build_node_tree(); + + // Set up byte and bit count (note, we're compressing the data backwards) + auto [bytes, bits] = compressed_size_info(); + if (bits > 0) + bits = 8 - bits; + else + bits = 0, bytes--; + + // Compress data backwards, because we obtain the Huffman codes backwards + // as we traverse towards the parent node. + for (auto i = raw_data.size(); i > 0; i--) { + auto leaf = std::find_if(tree.begin(), tree.end(), + [c = raw_data[i - 1]](auto& n) { return n.value == c; }); + + while (leaf->parent != -1) { + auto parent = tree.begin() + leaf->parent; + if (parent->right == leaf->value) + compressed_data[bytes - 1] |= (1 << bits); + if (++bits == 8) + bits = 0, --bytes; + leaf = parent; + } + } + + delete[] tree.data(); + } + + /** + * Builds the decode tree, used to decompress the data. + * Format: three bytes per node. + * 1. Node value, 2. Distance to left child, 3. Distance to right child. + */ + consteval void build_decode_tree() noexcept { + auto tree = build_node_tree(); + auto decode_tree = compressed_data + compressed_size_info().first; + + for (usize_t i = 0; i < tree_count(); i++) { + // Only store node value if it represents a data value + decode_tree[i * 3] = tree[i].value <= 0xFF ? tree[i].value : 0; + + usize_t j; + // Find the left child of this node + for (j = i + 1; j < tree_count(); j++) { + if (tree[i].left == tree[j].value) + break; + } + decode_tree[i * 3 + 1] = j < tree_count() ? j - i : 0; + // Find the right child of this node + for (j = i + 1; j < tree_count(); j++) { + if (tree[i].right == tree[j].value) + break; + } + decode_tree[i * 3 + 2] = j < tree_count() ? j - i : 0; + } + + delete[] tree.data(); + } + +public: + consteval static auto compressed_size() noexcept { + return compressed_size_info().first + 3 * tree_count(); + } + consteval static auto uncompressed_size() noexcept { + return raw_data.size(); + } + consteval static size_t bytes_saved() noexcept { + size_t diff = uncompressed_size() - compressed_size(); + return diff > 0 ? diff : 0; + } + + // Utility for decoding compressed data. + class decoder { + public: + using difference_type = std::ptrdiff_t; + using value_type = int; + + decoder(const unsigned char *comp_data) noexcept + : m_data(comp_data), + m_table(comp_data + compressed_size_info().first) { get_next(); } + decoder() = default; + + constexpr static decoder end(const unsigned char *comp_data) noexcept { + decoder ender; + ender.m_data = comp_data; + if constexpr (bytes_saved() > 0) { + const auto [size_bytes, last_bits] = compressed_size_info(); + ender.m_data += size_bytes - 1; + ender.m_bit = 1 << (7 - last_bits); + } else { + ender.m_data += raw_data.size() + 1; + } + + return ender; + } + + bool operator==(const decoder& other) const noexcept { + return m_data == other.m_data && m_bit == other.m_bit; + } + auto operator*() const noexcept { + return m_current; + } + decoder& operator++() noexcept { + get_next(); + return *this; + } + decoder operator++(int) noexcept { + auto old = *this; + get_next(); + return old; + } + + private: + void get_next() noexcept { + if (*this == end(m_data)) + return; + if constexpr (bytes_saved() > 0) { + auto *node = m_table; + int data = *m_data; + auto bit = m_bit; + do { + node += (data & bit) ? node[2] * 3u : node[1] * 3u; + bit >>= 1; + if (!bit) + bit = 0x80, data = *++m_data; + } while (node[1] != 0); + m_bit = bit; + m_current = *node; + } else { + m_current = *m_data++; + } + } + + const unsigned char *m_data = nullptr; + const unsigned char *m_table = nullptr; + unsigned char m_bit = 0x80; + int m_current = -1; + + friend class huffman_compressor; + }; + + // Stick the forward_iterator check here just so it's run + consteval huffman_compressor() noexcept + requires (std::forward_iterator) + { + if constexpr (bytes_saved() > 0) { + build_decode_tree(); + compress(); + } else { + std::copy(raw_data.data, raw_data.data + raw_data.size(), + compressed_data); + } + } + + auto begin() const noexcept { + return decoder(compressed_data); + } + auto end() const noexcept { + return decoder::end(compressed_data); + } + auto cbegin() const noexcept { return begin(); } + auto cend() const noexcept { return end(); } + + // For accessing the compressed data + auto data() const noexcept { + if constexpr (bytes_saved() > 0) + return compressed_data; + else + return raw_data; + } + + auto size() const noexcept { + if constexpr (bytes_saved() > 0) + return compressed_size(); + else + return uncompressed_size(); + } + +private: + // Contains the compressed data, followed by the decoding tree. + unsigned char compressed_data[ + bytes_saved() > 0 ? compressed_size_info().first + 3 * tree_count() + : raw_data.size()] = {0}; +}; + +template +constexpr auto operator ""_huffman() +{ + return huffman_compressor(); +} + +#endif // TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_ -- cgit v1.2.3