decoder conforms to std::forward_iterator

author Clyne Sullivan <clyne@bitgloo.com>

Sun, 27 Dec 2020 22:12:14 +0000 (17:12 -0500)

committer Clyne Sullivan <clyne@bitgloo.com>

Sun, 27 Dec 2020 22:12:14 +0000 (17:12 -0500)
author Clyne Sullivan <clyne@bitgloo.com>
Sun, 27 Dec 2020 22:12:14 +0000 (17:12 -0500)
committer Clyne Sullivan <clyne@bitgloo.com>
Sun, 27 Dec 2020 22:12:14 +0000 (17:12 -0500)
diff --git a/README.md b/README.md

index cd598919e42ce45b940d0d550eeb3a9bebba1c7b..b834d1c6ed5285748ff4f2b81c2ec0dd5cde449a 100644 (file)
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
  
  Allows for long string or data constants to be compressed at compile-time, with a small decoder routine for decompression at run-time. 
  
-Compression is achieved using Huffman coding, which works by creating codes for frequently-occuring characters.
+Compression is achieved using [Huffman coding](https://en.wikipedia.org/wiki/Huffman_coding), which works by creating short (measured in bits) codes for frequently-occuring characters.
  
  ## Use cases
  
@@ -14,7 +14,7 @@ A ~3.5kB string of JSON can be compressed down ~2.5kB ([see it on Godbolt](https
  
  A ~40 line commented sample of Lisp can be reduced from 1,662 bytes to 1,244 (418 bytes saved) ([on Godbolt](https://godbolt.org/z/c64Pzz)).
  
-Compression will work best on not-small blocks of text or data. This is because a decoding tree must be stored with the compressed data, requiring three bytes per value.
+Compression will work best on larger blocks of text or data, as a decoding tree must be stored with the compressed data that requires three bytes per unique data byte.
  
  ## How to Use
  
diff --git a/consteval_huffman.hpp b/consteval_huffman.hpp

index 7bb0ce5f94c225c618d719063d51033cf9b2f6ca..5978e75508c89e92a1cb689f0fd9c878f8772828 100644 (file)
--- a/consteval_huffman.hpp
+++ b/consteval_huffman.hpp
@@ -11,19 +11,18 @@
  #include <span>
  
  /**
- * Compresses the given character string using Huffman coding, providing a
+ * Compresses the given data string using Huffman coding, providing a
   * minimal run-time interface for decompressing the data.
   * @tparam data The string of data to be compressed.
- * @tparam data_length The size in bytes of the data, defaults to using strlen().
+ * @tparam data_length The size in bytes of the data, defaulting to strlen() behavior.
   */
  template<const char *data, auto data_length = std::char_traits<char>::length(data)>
+    requires(data_length > 0)
  class huffman_compress
  {
      using size_t = long int;
  
-    // Jump to the bottom of this header for the public-facing features of this
-    // class.
-    // The internals needed to be defined before they were used.
+    // Note: class internals need to be defined before the public interface.
  private:
      // Node structure used to build a tree for calculating Huffman codes.
      struct node {
@@ -56,6 +55,8 @@ private:
          auto first_valid_node = std::find_if(list.begin(), list.end(),
              [](const auto& n) { return n.freq != 0; });
          auto fit_size = std::distance(first_valid_node, list.end());
+        if (fit_size < 2)
+            fit_size = 2;
          auto fit_list = std::span(new node[fit_size] {}, fit_size);
          std::copy(first_valid_node, list.end(), fit_list.begin());
          delete[] list.data();
@@ -119,7 +120,9 @@ private:
          for (auto iter = tree.begin(); ++iter != tree.end();) {
              if (iter->parent == -1) {
                  auto parent = std::find_if(tree.begin(), iter,
-                    [&iter](const auto& n) { return n.left == iter->value || n.right == iter->value; });
+                    [&iter](const auto& n) {
+                        return n.left == iter->value || n.right == iter->value;
+                    });
                  if (parent != iter)
                      iter->parent = std::distance(tree.begin(), parent);
              }
@@ -230,34 +233,47 @@ public:
      // Utility for decoding compressed data.
      class decode_info {
      public:
-        decode_info(const huffman_compress<data, data_length>& comp_data) :
+        using difference_type = std::ptrdiff_t;
+        using value_type = int;
+
+        decode_info(const huffman_compress<data, data_length>* comp_data) :
              m_data(comp_data) { get_next(); }
+        decode_info() = default;
  
-        // Checks if another byte is available
-        operator bool() const {
+        decode_info& end() {
              if constexpr (bytes_saved() > 0) {
-                const auto [size_bytes, last_bits] = m_data.compressed_size_info();
-                return m_pos < (size_bytes - 1) || m_bit > (8 - last_bits);
+                const auto [size_bytes, last_bits] = m_data->compressed_size_info();
+                m_pos = size_bytes - 1;
+                m_bit = 8 - last_bits;
              } else {
-                return m_pos < data_length;
+                m_pos = data_length + 1;
              }
+
+            return *this;
          }
  
-        // Gets the current byte
-        int operator*() const { return m_current; }
-        // Moves to the next byte
-        int operator++() {
-            get_next();
+        bool operator==(const decode_info& other) const {
+            return m_data == other.m_data && m_bit == other.m_bit && m_pos == other.m_pos;
+        }
+        auto operator*() const {
              return m_current;
          }
+        decode_info& operator++() {
+            get_next();
+            return *this;
+        }
+        decode_info operator++(int) {
+            auto old = *this;
+            get_next();
+            return old;
+        }
  
      private:
-        // Internal: moves to next byte
          void get_next() {
              if constexpr (bytes_saved() > 0) {
-                auto *node = m_data.decode_tree;
+                auto *node = m_data->decode_tree;
                  do {
-                    bool bit = m_data.compressed_data[m_pos] & (1 << (m_bit - 1));
+                    bool bit = m_data->compressed_data[m_pos] & (1 << (m_bit - 1));
                      if (--m_bit == 0)
                          m_bit = 8, m_pos++;
                      node += 3 * node[bit ? 2 : 1];
@@ -268,7 +284,7 @@ public:
              }
          }
  
-        const huffman_compress<data>& m_data;
+        const huffman_compress<data> *m_data = nullptr;
          size_t m_pos = 0;
          unsigned char m_bit = 8;
          int m_current = -1;
@@ -276,7 +292,19 @@ public:
          friend class huffman_compress;
      };
  
-    consteval huffman_compress() {
+    auto begin() const {
+        return decode_info(this);
+    }
+    auto end() const {
+        return decode_info(this).end();
+    }
+    auto cbegin() const { begin(); }
+    auto cend() const { end(); }
+
+    // Stick the requires clause here just so it's run
+    consteval huffman_compress()
+        requires (std::forward_iterator<decode_info>)
+    {
          if constexpr (bytes_saved() > 0) {
              build_decode_tree();
              compress();
@@ -285,16 +313,12 @@ public:
          }
      }
  
-    // Creates a decoder object for iteratively decompressing the data.
-    auto get_decoder() const {
-        return decode_info(*this);
-    }
-
  private:
      // Contains the compressed data.
-    unsigned char compressed_data[bytes_saved() > 0 ? compressed_size_info().first : data_length] = {};
+    unsigned char compressed_data[bytes_saved() > 0 ? compressed_size_info().first : data_length] = {0};
      // Contains a 'tree' that can be used to decompress the data.
-    unsigned char decode_tree[3 * tree_count()] = {};
+    unsigned char decode_tree[3 * tree_count()] = {0};
  };
  
  #endif // TCSULLIVAN_CONSTEVAL_HUFFMAN_HPP_
+
author	Clyne Sullivan <clyne@bitgloo.com>
	Sun, 27 Dec 2020 22:12:14 +0000 (17:12 -0500)
committer	Clyne Sullivan <clyne@bitgloo.com>
	Sun, 27 Dec 2020 22:12:14 +0000 (17:12 -0500)
README.md		patch \| blob \| history
consteval_huffman.hpp		patch \| blob \| history