tudocomp
– The TU Dortmund Compression Framework
BinaryTrie.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <vector>
5 #include <tudocomp/Algorithm.hpp>
7 
8 namespace tdc {
9 namespace lz78 {
10 
11 class BinaryTrie : public Algorithm, public LZ78Trie<> {
12 
13  /*
14  * The trie is not stored in standard form. Each node stores the pointer to its first child and a pointer to its next sibling (first as first come first served)
15  */
16  std::vector<factorid_t> m_first_child;
17  std::vector<factorid_t> m_next_sibling;
18  std::vector<uliteral_t> m_literal;
19 
20  IF_STATS(
21  size_t m_resizes = 0;
22  size_t m_specialresizes = 0;
23  )
24 public:
25  inline static Meta meta() {
26  Meta m("lz78trie", "binary", "Lempel-Ziv 78 Binary Trie");
27  return m;
28  }
29 
30  inline BinaryTrie(Env&& env, size_t n, const size_t& remaining_characters, factorid_t reserve = 0)
31  : Algorithm(std::move(env))
32  , LZ78Trie(n,remaining_characters)
33  {
34  if(reserve > 0) {
35  m_first_child.reserve(reserve);
36  m_next_sibling.reserve(reserve);
37  m_literal.reserve(reserve);
38  }
39  }
40 
41  IF_STATS(
42  MoveGuard m_guard;
43  inline ~BinaryTrie() {
44  if (m_guard) {
45  StatPhase::log("resizes", m_resizes);
46  StatPhase::log("special resizes", m_specialresizes);
47  StatPhase::log("table size", m_first_child.capacity());
48  StatPhase::log("load ratio", m_first_child.size()*100/m_first_child.capacity());
49  }
50  }
51  )
52  BinaryTrie(BinaryTrie&& other) = default;
53  BinaryTrie& operator=(BinaryTrie&& other) = default;
54 
55  inline node_t add_rootnode(uliteral_t c) {
56  DCHECK_EQ(c, size());
57  m_first_child.push_back(undef_id);
58  m_next_sibling.push_back(undef_id);
59  m_literal.push_back(c);
60  return node_t(c, true);
61  }
62 
63  inline node_t get_rootnode(uliteral_t c) const {
64  return node_t(c, false);
65  }
66 
67  inline void clear() {
68  m_first_child.clear();
69  m_next_sibling.clear();
70  m_literal.clear();
71  }
72 
73  inline node_t find_or_insert(const node_t& parent_w, uliteral_t c) {
74  auto parent = parent_w.id();
75  const factorid_t newleaf_id = size();
76 
77  DCHECK_LT(parent, size());
78 
79 
80  if(m_first_child[parent] == undef_id) {
81  m_first_child[parent] = newleaf_id;
82  } else {
83  factorid_t node = m_first_child[parent];
84  while(true) { // search the binary tree stored in parent (following left/right siblings)
85  if(c == m_literal[node]) return node_t(node, false);
86  if(m_next_sibling[node] == undef_id) {
87  m_next_sibling[node] = newleaf_id;
88  break;
89  }
90  node = m_next_sibling[node];
91  }
92  }
93  if(m_first_child.capacity() == m_first_child.size()) {
94  const size_t newbound = m_first_child.size()+expected_number_of_remaining_elements(size());
95  if(newbound < m_first_child.size()*2 ) {
96  m_first_child.reserve (newbound);
97  m_next_sibling.reserve (newbound);
98  m_literal.reserve (newbound);
99  IF_STATS(++m_specialresizes);
100  }
101  IF_STATS(++m_resizes);
102  }
103  m_first_child.push_back(undef_id);
104  m_next_sibling.push_back(undef_id);
105  m_literal.push_back(c);
106  return node_t(size() - 1, true);
107  }
108 
109  inline size_t size() const {
110  return m_first_child.size();
111  }
112 };
113 
114 }} //ns
115 
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
LZ78TrieNode node_t
Definition: LZ78Trie.hpp:43
uint8_t uliteral_t
Type to represent signed single literals.
Definition: def.hpp:131
Algorithm(Algorithm const &)=default
LZ78Trie(const size_t n, const size_t &remaining_characters)
Definition: LZ78Trie.hpp:48
Env & env()
Provides access to the environment that the algorithm works in.
Definition: Algorithm.hpp:51
factorid_t id() const
Definition: LZ78Trie.hpp:32
Default return type of find_or_insert.
Definition: LZ78Trie.hpp:22
static void log(const char *key, const T &value)
Logs a user statistic for the current phase.
Definition: StatPhase.hpp:218
uint32_t factorid_t
Type for the factor indices, bounded by the number of LZ78 trie nodes.
Definition: LZ78Trie.hpp:11
constexpr factorid_t undef_id
Id that can be used for a non-existing factor.
Definition: LZ78Trie.hpp:14
Local environment for a compression/encoding/decompression call.
Interface for algorithms.
Definition: Algorithm.hpp:15
size_t expected_number_of_remaining_elements(const size_t z) const
Definition: LZ78Trie.hpp:51