tudocomp
– The TU Dortmund Compression Framework
BinarySortedTrie.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <vector>
5 #include <tudocomp/Algorithm.hpp>
6 
7 namespace tdc {
8 namespace lz78 {
9 
10 class BinarySortedTrie : public Algorithm, public LZ78Trie<> {
11  /*
12  * The trie is not stored in standard form. Each node stores the pointer to its first child and a pointer to its next sibling (first as first come first served)
13  */
14  std::vector<factorid_t> m_first_child;
15  std::vector<factorid_t> m_next_sibling;
16  std::vector<uliteral_t> m_literal;
17 
18  IF_STATS(
19  size_t m_resizes = 0;
20  size_t m_specialresizes = 0;
21  )
22 public:
23  inline static Meta meta() {
24  Meta m("lz78trie", "binarysorted", "Lempel-Ziv 78 Sorted Binary Trie");
25  return m;
26  }
27  inline BinarySortedTrie(Env&& env, size_t n, const size_t& remaining_characters, factorid_t reserve = 0)
28  : Algorithm(std::move(env))
29  , LZ78Trie(n,remaining_characters)
30  {
31  if(reserve > 0) {
32  m_first_child.reserve(reserve);
33  m_next_sibling.reserve(reserve);
34  m_literal.reserve(reserve);
35  }
36  }
37 
38  inline node_t add_rootnode(uliteral_t c) {
39  DCHECK_EQ(c, size());
40  m_first_child.push_back(undef_id);
41  m_next_sibling.push_back(undef_id);
42  m_literal.push_back(c);
43  return node_t(c, true);
44  }
45 
46  inline node_t get_rootnode(uliteral_t c) const {
47  return node_t(c, false);
48  }
49 
50  inline void clear() {
51  m_first_child.clear();
52  m_next_sibling.clear();
53  m_literal.clear();
54 
55  }
56 
57  inline node_t new_node(uliteral_t c, const factorid_t m_first_child_id, const factorid_t m_next_sibling_id) {
58  m_first_child.push_back(m_first_child_id);
59  m_next_sibling.push_back(m_next_sibling_id);
60  m_literal.push_back(c);
61  return node_t(size() - 1, true);
62  }
63 
64  inline node_t find_or_insert(const node_t& parent_w, uliteral_t c) {
65  auto parent = parent_w.id();
66  const factorid_t newleaf_id = size();
67 
68  DCHECK_LT(parent, size());
69 
70 
71  if(m_first_child[parent] == undef_id) {
72  m_first_child[parent] = newleaf_id;
73  return new_node(c, undef_id, undef_id);
74  } else {
75  factorid_t node = m_first_child[parent];
76  if(m_literal[node] > c) {
77  m_first_child[parent] = newleaf_id;
78  return new_node(c, undef_id, node);
79  }
80  while(true) { // search the binary tree stored in parent (following left/right siblings)
81  if(c == m_literal[node]) return node_t(node, false);
82  if(m_next_sibling[node] == undef_id) {
83  m_next_sibling[node] = newleaf_id;
84  return new_node(c, undef_id, undef_id);
85  }
86  const factorid_t nextnode = m_next_sibling[node];
87  if(m_literal[nextnode] > c) {
88  m_next_sibling[node] = newleaf_id;
89  return new_node(c, undef_id, nextnode);
90  }
91  node = m_next_sibling[node];
92  if(m_first_child.capacity() == m_first_child.size()) {
93  const size_t newbound = m_first_child.size()+expected_number_of_remaining_elements(size());
94  if(newbound < m_first_child.size()*2 ) {
95  m_first_child.reserve (newbound);
96  m_next_sibling.reserve (newbound);
97  m_literal.reserve (newbound);
98  IF_STATS(++m_specialresizes);
99  }
100  IF_STATS(++m_resizes);
101  }
102  }
103  }
104  }
105 
106  inline size_t size() const {
107  return m_first_child.size();
108  }
109 
110 };
111 
112 }} //ns
113 
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
LZ78TrieNode node_t
Definition: LZ78Trie.hpp:43
uint8_t uliteral_t
Type to represent signed single literals.
Definition: def.hpp:131
Algorithm(Algorithm const &)=default
LZ78Trie(const size_t n, const size_t &remaining_characters)
Definition: LZ78Trie.hpp:48
Env & env()
Provides access to the environment that the algorithm works in.
Definition: Algorithm.hpp:51
factorid_t id() const
Definition: LZ78Trie.hpp:32
Default return type of find_or_insert.
Definition: LZ78Trie.hpp:22
uint32_t factorid_t
Type for the factor indices, bounded by the number of LZ78 trie nodes.
Definition: LZ78Trie.hpp:11
constexpr factorid_t undef_id
Id that can be used for a non-existing factor.
Definition: LZ78Trie.hpp:14
Local environment for a compression/encoding/decompression call.
Interface for algorithms.
Definition: Algorithm.hpp:15
size_t expected_number_of_remaining_elements(const size_t z) const
Definition: LZ78Trie.hpp:51