aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--consteval_huffman.hpp167
1 files changed, 167 insertions, 0 deletions
diff --git a/consteval_huffman.hpp b/consteval_huffman.hpp
new file mode 100644
index 0000000..f2204dd
--- /dev/null
+++ b/consteval_huffman.hpp
@@ -0,0 +1,167 @@
+#ifndef TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_
+#define TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_
+
+#include <algorithm>
+#include <span>
+
+/**
+ * Compresses given data at compile-time, while also providing utilities for decoding.
+ * @tparam data Expected to be a null-terminated `char` of data to be compressed.
+ */
+template<auto data>
+class huffman_compress
+{
+public:
+ using size_t = unsigned long int;
+
+ // Contains the compressed data.
+ unsigned char output[size()] = {};
+ // Contains a 'tree' that can be used to decompress the data.
+ unsigned char decode_tree[3 * tree_count()] = {};
+
+ // Returns the size of the compressed data, in bytes.
+ consteval static auto size() { return output_size().first; }
+ // Returns how many of the bits in the last byte of `output` are actually part of the data.
+ consteval static auto lastbitscount() { return output_size().second; }
+
+ consteval huffman_compress() {
+ build_decode_tree();
+ compress();
+ }
+
+private:
+ // Node structure used for tree-building.
+ struct node {
+ int value = 0;
+ size_t freq = 0;
+
+ int parent = -1;
+ int left = -1;
+ int right = -1;
+ };
+
+ // Builds a list of nodes for every character that appears in the data.
+ // This list is sorted by increasing frequency.
+ consteval static auto build_node_list() {
+ auto table = std::span(new node[256] {}, 256);
+ for (int i = 0; i < 256; i++)
+ table[i].value = i;
+ for (size_t i = 0; data[i]; i++)
+ table[data[i]].freq++;
+ std::sort(table.begin(), table.end(), [](auto& a, auto& b) { return a.freq < b.freq; });
+ int empty_count;
+ for (empty_count = 0; table[empty_count].freq == 0; empty_count++);
+ auto iter = std::copy(table.begin() + empty_count, table.end(), table.begin());
+ std::fill(iter, table.end(), node());
+ return table;
+ }
+ // Returns the count of how many nodes in build_node_list() are valid nodes.
+ consteval static auto node_count() {
+ auto table = build_node_list();
+ size_t i;
+ for (i = 0; table[i].value != 0; i++);
+ delete[] table.data();
+ return i;
+ }
+ // Builds a tree out of the node list, allowing for compression and decompression.
+ consteval static auto build_node_tree() {
+ auto table = build_node_list();
+
+ auto end = node_count();
+ size_t endend = 255;
+ unsigned char endv = 0xFF;
+ while (table[1].freq != 0) {
+ node n { endv--,
+ table[0].freq + table[1].freq,
+ -1,
+ table[0].value,
+ table[1].value };
+ table[endend--] = table[0];
+ table[endend--] = table[1];
+ size_t insert;
+ for (insert = 0;
+ table[insert].freq != 0 && table[insert].freq < n.freq;
+ insert++);
+ std::copy_backward(table.begin() + insert,
+ table.begin() + end,
+ table.begin() + end + 1);
+ table[insert] = n;
+ std::copy(table.begin() + 2, table.begin() + end + 1, table.begin());
+ table[end - 1] = node();
+ table[end--] = node();
+ }
+ std::copy(table.begin() + endend + 1, table.end(), table.begin() + 1);
+
+ for (size_t i = 1; i < 256 - endend; i++) {
+ if (table[i].parent == -1) {
+ for (size_t j = 0; j < i; j++) {
+ if (table[j].left == table[i].value || table[j].right == table[i].value) {
+ table[i].parent = j;
+ break;
+ }
+ }
+ }
+ }
+
+ return table;
+ }
+ // Returns the count of how many nodes are in the node tree.
+ consteval static auto tree_count() {
+ auto table = build_node_tree();
+ size_t i;
+ for (i = 0; i < 256 && table[i].value != 0; i++);
+ delete[] table.data();
+ return i;
+ }
+ // Determines the size of the compressed data.
+ // Returns a pair: [total byte size, bits used in last byte].
+ consteval static auto output_size() {
+ auto tree = build_node_tree();
+ size_t bytes = 0, bits = 0;
+ for (size_t i = 0; i < std::char_traits<char>::length(data); i++) {
+ auto leaf = std::find_if(tree.begin(), tree.end(), [c = data[i]](auto& n) { return n.value == c; });
+ while (leaf->parent != -1) {
+ if (++bits == 8)
+ bits = 0, bytes++;
+ leaf = tree.begin() + leaf->parent;
+ }
+ }
+
+ delete[] tree.data();
+ return std::make_pair(bytes + 1, bits);
+ }
+ // Compresses the input data, placing the result in `output`.
+ consteval void compress()
+ {
+ auto tree = build_node_tree();
+ size_t bytes = size();
+ int bits = 5;
+ for (size_t i = std::char_traits<char>::length(data); i > 0; i--) {
+ auto leaf = std::find_if(tree.begin(), tree.begin() + tree_count(), [c = data[i - 1]](auto& n) { return n.value == c; });
+ while (leaf->parent != -1) {
+ auto parent = tree.begin() + leaf->parent;
+ if (parent->right == leaf->value)
+ output[bytes - 1] |= (1 << bits);
+ if (++bits == 8) {
+ bits = 0;
+ if (--bytes == 0)
+ return;
+ }
+ leaf = parent;
+ }
+ }
+ delete[] tree.data();
+ }
+ // Builds the tree that can be used for decompression, stored in `decode_tree`.
+ consteval void build_decode_tree() {
+ auto tree = build_node_tree();
+ for (size_t i = 0; i < tree_count(); i++) {
+ decode_tree[i] = tree[i].value;
+ decode_tree[i + 1] = std::max(tree[i].left, 0);
+ decode_tree[i + 1] = std::max(tree[i].right, 0);
+ }
+ delete[] tree.data();
+ }
+};
+
+#endif // TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_