From ca567e69d8560cfebfb776c80e16fbad44b9dfe2 Mon Sep 17 00:00:00 2001 From: clyne Date: Wed, 1 Jul 2020 16:45:51 -0400 Subject: [PATCH] Create consteval_huffman.hpp --- consteval_huffman.hpp | 167 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 consteval_huffman.hpp diff --git a/consteval_huffman.hpp b/consteval_huffman.hpp new file mode 100644 index 0000000..f2204dd --- /dev/null +++ b/consteval_huffman.hpp @@ -0,0 +1,167 @@ +#ifndef TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_ +#define TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_ + +#include +#include + +/** + * Compresses given data at compile-time, while also providing utilities for decoding. + * @tparam data Expected to be a null-terminated `char` of data to be compressed. + */ +template +class huffman_compress +{ +public: + using size_t = unsigned long int; + + // Contains the compressed data. + unsigned char output[size()] = {}; + // Contains a 'tree' that can be used to decompress the data. + unsigned char decode_tree[3 * tree_count()] = {}; + + // Returns the size of the compressed data, in bytes. + consteval static auto size() { return output_size().first; } + // Returns how many of the bits in the last byte of `output` are actually part of the data. + consteval static auto lastbitscount() { return output_size().second; } + + consteval huffman_compress() { + build_decode_tree(); + compress(); + } + +private: + // Node structure used for tree-building. + struct node { + int value = 0; + size_t freq = 0; + + int parent = -1; + int left = -1; + int right = -1; + }; + + // Builds a list of nodes for every character that appears in the data. + // This list is sorted by increasing frequency. + consteval static auto build_node_list() { + auto table = std::span(new node[256] {}, 256); + for (int i = 0; i < 256; i++) + table[i].value = i; + for (size_t i = 0; data[i]; i++) + table[data[i]].freq++; + std::sort(table.begin(), table.end(), [](auto& a, auto& b) { return a.freq < b.freq; }); + int empty_count; + for (empty_count = 0; table[empty_count].freq == 0; empty_count++); + auto iter = std::copy(table.begin() + empty_count, table.end(), table.begin()); + std::fill(iter, table.end(), node()); + return table; + } + // Returns the count of how many nodes in build_node_list() are valid nodes. + consteval static auto node_count() { + auto table = build_node_list(); + size_t i; + for (i = 0; table[i].value != 0; i++); + delete[] table.data(); + return i; + } + // Builds a tree out of the node list, allowing for compression and decompression. + consteval static auto build_node_tree() { + auto table = build_node_list(); + + auto end = node_count(); + size_t endend = 255; + unsigned char endv = 0xFF; + while (table[1].freq != 0) { + node n { endv--, + table[0].freq + table[1].freq, + -1, + table[0].value, + table[1].value }; + table[endend--] = table[0]; + table[endend--] = table[1]; + size_t insert; + for (insert = 0; + table[insert].freq != 0 && table[insert].freq < n.freq; + insert++); + std::copy_backward(table.begin() + insert, + table.begin() + end, + table.begin() + end + 1); + table[insert] = n; + std::copy(table.begin() + 2, table.begin() + end + 1, table.begin()); + table[end - 1] = node(); + table[end--] = node(); + } + std::copy(table.begin() + endend + 1, table.end(), table.begin() + 1); + + for (size_t i = 1; i < 256 - endend; i++) { + if (table[i].parent == -1) { + for (size_t j = 0; j < i; j++) { + if (table[j].left == table[i].value || table[j].right == table[i].value) { + table[i].parent = j; + break; + } + } + } + } + + return table; + } + // Returns the count of how many nodes are in the node tree. + consteval static auto tree_count() { + auto table = build_node_tree(); + size_t i; + for (i = 0; i < 256 && table[i].value != 0; i++); + delete[] table.data(); + return i; + } + // Determines the size of the compressed data. + // Returns a pair: [total byte size, bits used in last byte]. + consteval static auto output_size() { + auto tree = build_node_tree(); + size_t bytes = 0, bits = 0; + for (size_t i = 0; i < std::char_traits::length(data); i++) { + auto leaf = std::find_if(tree.begin(), tree.end(), [c = data[i]](auto& n) { return n.value == c; }); + while (leaf->parent != -1) { + if (++bits == 8) + bits = 0, bytes++; + leaf = tree.begin() + leaf->parent; + } + } + + delete[] tree.data(); + return std::make_pair(bytes + 1, bits); + } + // Compresses the input data, placing the result in `output`. + consteval void compress() + { + auto tree = build_node_tree(); + size_t bytes = size(); + int bits = 5; + for (size_t i = std::char_traits::length(data); i > 0; i--) { + auto leaf = std::find_if(tree.begin(), tree.begin() + tree_count(), [c = data[i - 1]](auto& n) { return n.value == c; }); + while (leaf->parent != -1) { + auto parent = tree.begin() + leaf->parent; + if (parent->right == leaf->value) + output[bytes - 1] |= (1 << bits); + if (++bits == 8) { + bits = 0; + if (--bytes == 0) + return; + } + leaf = parent; + } + } + delete[] tree.data(); + } + // Builds the tree that can be used for decompression, stored in `decode_tree`. + consteval void build_decode_tree() { + auto tree = build_node_tree(); + for (size_t i = 0; i < tree_count(); i++) { + decode_tree[i] = tree[i].value; + decode_tree[i + 1] = std::max(tree[i].left, 0); + decode_tree[i + 1] = std::max(tree[i].right, 0); + } + delete[] tree.data(); + } +}; + +#endif // TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_