tudocomp
– The TU Dortmund Compression Framework
CompactSparseHashTrie.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <tudocomp/Algorithm.hpp>
7 
9 
10 namespace tdc {
11 namespace lz78 {
12 
13 
14 class CompactSparseHashTrie : public Algorithm, public LZ78Trie<> {
16  //std::unordered_map<uint64_t, factorid_t> m_table;
17  size_t m_key_width = 0;
18 
19  inline size_t key_width(uint64_t key) {
20  m_key_width = std::max(m_key_width, size_t(bits_for(key)));
21  return m_key_width;
22  }
23 
24 public:
25  inline static Meta meta() {
26  Meta m("lz78trie", "compact_sparse_hash", "Compact Sparse Hash Trie");
27  //m.option("load_factor").dynamic(30);
28  return m;
29  }
30 
31  inline CompactSparseHashTrie(Env&& env, const size_t n, const size_t& remaining_characters, factorid_t reserve = 0)
32  : Algorithm(std::move(env))
33  , LZ78Trie(n,remaining_characters)
34  , m_table(zero_or_next_power_of_two(reserve), 0)
35  {
36  //m_table.max_load_factor(this->env().option("load_factor").as_integer()/100.0f );
37  }
38 
39  IF_STATS(
40  MoveGuard m_guard;
41  inline ~CompactSparseHashTrie() {
42  if (m_guard) {
43  //m_table.collect_stats(env());
44  }
45  }
46  )
49 
51  auto key = create_node(0, c);
52  auto value = size();
53 
54  auto& entry = m_table.index(key, key_width(key));
55 
56  //std::cout << "find_or_insert(" << key << ", " << entry << ", " << value << ");\n";
57 
58  entry = value;
59  return node_t(value, true);
60  }
61 
62  inline node_t get_rootnode(uliteral_t c) const {
63  return node_t(c, false);
64  }
65 
66  inline void clear() {
67  // m_table.clear();
68  }
69 
70  inline node_t find_or_insert(const node_t& parent_w, uliteral_t c) {
71  auto parent = parent_w.id();
72 
73  // if we add a new node, its index will be equal to the current size of the dictionary
74  const factorid_t newleaf_id = size();
75 
76  // Use 0 as special value for map lookup
77  // if val == 0, then it got default constructed, which means
78  // it is new
79  // could also do it by the handler mechanism if this turns out to be a problem
80  DCHECK_NE(newleaf_id, 0);
81 
82  auto key = create_node(parent,c);
83  auto& val = m_table.index(key, key_width(key));
84  if (val == 0) {
85  val = newleaf_id;
86  DCHECK_EQ(val, newleaf_id);
87  //std::cout << "find_or_insert(" << key << ", " << val << ", " << newleaf_id << ");\n";
88  return node_t(val, true);
89  } else {
90  //std::cout << "find_or_insert(" << key << ", " << val << ", " << val << ");\n";
91  return node_t(val, false);
92  }
93  }
94 
95  inline size_t size() const {
96  return m_table.size();
97  }
98 };
99 
100 }} //ns
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
constexpr uint_fast8_t bits_for(size_t n)
Computes the number of bits required to store the given integer value.
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
LZ78TrieNode node_t
Definition: LZ78Trie.hpp:43
CompactSparseHashTrie & operator=(CompactSparseHashTrie &&other)=default
uint8_t uliteral_t
Type to represent signed single literals.
Definition: def.hpp:131
val_t & index(uint64_t key, size_t key_width)
uint64_t zero_or_next_power_of_two(uint64_t x)
node_t find_or_insert(const node_t &parent_w, uliteral_t c)
Env & env()
Provides access to the environment that the algorithm works in.
Definition: Algorithm.hpp:51
factorid_t id() const
Definition: LZ78Trie.hpp:32
Default return type of find_or_insert.
Definition: LZ78Trie.hpp:22
CompactSparseHashTrie(Env &&env, const size_t n, const size_t &remaining_characters, factorid_t reserve=0)
IF_STATS(MoveGuard m_guard;inline ~CompactSparseHashTrie() { if(m_guard) { } }) CompactSparseHashTrie(CompactSparseHashTrie &&other)=default
uint32_t factorid_t
Type for the factor indices, bounded by the number of LZ78 trie nodes.
Definition: LZ78Trie.hpp:11
squeeze_node_t create_node(factorid_t id, uliteral_t c)
Local environment for a compression/encoding/decompression call.
node_t get_rootnode(uliteral_t c) const
Interface for algorithms.
Definition: Algorithm.hpp:15