#ifndef TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_ #define TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_ #include #include /** * Compresses given data at compile-time, while also providing utilities for decoding. * @tparam data Expected to be a null-terminated `char` of data to be compressed. */ template class huffman_compress { using size_t = unsigned long int; // The internals for this class needed to be defined before they're used in // the public interface. Scroll to the next `public` section for usable variables/functions. private: // Node structure used for tree-building. struct node { int value = 0; size_t freq = 0; int parent = -1; int left = -1; int right = -1; }; // Builds a list of nodes for every character that appears in the data. // This list is sorted by increasing frequency. consteval static auto build_node_list() { auto table = std::span(new node[256] {}, 256); for (int i = 0; i < 256; i++) table[i].value = i; for (size_t i = 0; data[i]; i++) table[data[i]].freq++; std::sort(table.begin(), table.end(), [](auto& a, auto& b) { return a.freq < b.freq; }); int empty_count; for (empty_count = 0; table[empty_count].freq == 0; empty_count++); auto iter = std::copy(table.begin() + empty_count, table.end(), table.begin()); std::fill(iter, table.end(), node()); return table; } // Returns the count of how many nodes in build_node_list() are valid nodes. consteval static auto node_count() { auto table = build_node_list(); size_t i; for (i = 0; table[i].value != 0; i++); delete[] table.data(); return i; } // Builds a tree out of the node list, allowing for compression and decompression. consteval static auto build_node_tree() { auto table = build_node_list(); auto end = node_count(); size_t endend = 255; unsigned char endv = 0xFE; while (table[1].freq != 0) { node n { endv--, table[0].freq + table[1].freq, -1, table[0].value, table[1].value }; table[endend--] = table[0]; table[endend--] = table[1]; size_t insert; for (insert = 0; table[insert].freq != 0 && table[insert].freq < n.freq; insert++); std::copy_backward(table.begin() + insert, table.begin() + end, table.begin() + end + 1); table[insert] = n; std::copy(table.begin() + 2, table.begin() + end + 1, table.begin()); table[end - 1] = node(); table[end--] = node(); } std::copy(table.begin() + endend + 1, table.end(), table.begin() + 1); for (size_t i = 1; i < 256 - endend; i++) { if (table[i].parent == -1) { for (size_t j = 0; j < i; j++) { if (table[j].left == table[i].value || table[j].right == table[i].value) { table[i].parent = j; break; } } } } return table; } // Returns the count of how many nodes are in the node tree. consteval static auto tree_count() { auto table = build_node_tree(); size_t i; for (i = 0; i < 256 && table[i].value != 0; i++); delete[] table.data(); return i; } // Determines the size of the compressed data. // Returns a pair: [total byte size, bits used in last byte]. consteval static auto output_size() { auto tree = build_node_tree(); size_t bytes = 1, bits = 0; for (size_t i = 0; i < std::char_traits::length(data); i++) { auto leaf = std::find_if(tree.begin(), tree.end(), [c = data[i]](auto& n) { return n.value == c; }); while (leaf->parent != -1) { if (++bits == 8) bits = 0, bytes++; leaf = tree.begin() + leaf->parent; } } delete[] tree.data(); return std::make_pair(bytes, bits); } // Compresses the input data, placing the result in `output`. consteval void compress() { auto tree = build_node_tree(); size_t bytes = size(); int bits = 8 - output_size().second; for (size_t i = std::char_traits::length(data); i > 0; i--) { auto leaf = std::find_if(tree.begin(), tree.begin() + tree_count(), [c = data[i - 1]](auto& n) { return n.value == c; }); while (leaf->parent != -1) { auto parent = tree.begin() + leaf->parent; if (parent->right == leaf->value) output[bytes - 1] |= (1 << bits); if (++bits == 8) bits = 0, --bytes; leaf = parent; } } delete[] tree.data(); } // Builds the tree that can be used for decompression, stored in `decode_tree`. consteval void build_decode_tree() { auto tree = build_node_tree(); for (size_t i = 0; i < tree_count(); i++) { decode_tree[i * 3] = tree[i].value; size_t j; for (j = i + 1; j < tree_count(); j++) { if (tree[i].left == tree[j].value) break; } decode_tree[i * 3 + 1] = j < tree_count() ? j : 0; for (j = i + 1; j < tree_count(); j++) { if (tree[i].right == tree[j].value) break; } decode_tree[i * 3 + 2] = j < tree_count() ? j : 0; } delete[] tree.data(); } public: // Returns the size of the compressed data, in bytes. consteval static auto size() { return output_size().first; } // Returns how many of the bits in the last byte of `output` are actually part of the data. consteval static auto lastbitscount() { return output_size().second; } // Contains the compressed data. unsigned char output[size()] = {}; // Contains a 'tree' that can be used to decompress the data. unsigned char decode_tree[3 * tree_count()] = {}; consteval huffman_compress() { build_decode_tree(); compress(); } // Utility for decoding compressed data. class decode_info { public: decode_info(const huffman_compress& data_) : m_data(data_) { get_next(); } // Checks if another byte is available operator bool() const { return m_pos < (m_data.size() - 1) || m_bit >= (8 - m_data.lastbitscount()); } // Gets the current byte int operator*() const { return m_current; } // Moves to the next byte int operator++() { get_next(); return m_current; } private: // Internal: moves to next byte void get_next() { auto node = m_data.decode_tree; do { bool bit = m_data.output[m_pos] & (1 << (m_bit - 1)); if (--m_bit == 0) m_bit = 8, m_pos++; node = m_data.decode_tree + 3 * node[bit ? 2 : 1]; } while (node[1] != 0); m_current = *node; } const huffman_compress& m_data; size_t m_pos = 0; unsigned char m_bit = 8; int m_current = -1; }; // Creates a decoder object for iteratively decompressing the data. auto get_decoder() const { return decode_info(*this); } }; #endif // TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_