tudocomp
– The TU Dortmund Compression Framework
GrammarRules.hpp
Go to the documentation of this file.
1 #pragma once
2 
4 
5 namespace tdc {namespace esp {
6  struct IPDStats {
7  size_t ext_size2_total = 0;
8 
9  size_t ext_size3_total = 0;
10  size_t ext_size3_unique = 0;
11 
12  size_t int_size2_total = 0;
13  size_t int_size2_unique = 0;
14  };
15 
16  template<typename ipd_t>
17  class GrammarRules {
18  public:
19  using Stats = IPDStats;
20  private:
21  static constexpr std::array<size_t, 2> default_key() {
22  return {{ size_t(-1), size_t(-1) }};
23  }
24 
25  using a2_t = typename ipd_t::template IPDMap<2, size_t, size_t>;
26 
27  a2_t n2;
28 
29  size_t counter = 1;
30 
31  size_t m_initial_counter = 1;
32 
33  Stats m_stats;
34  public:
35  GrammarRules(size_t counter_start):
36  n2(0, Array<2>(default_key())),
37  counter(counter_start + 1),
38  m_initial_counter(counter_start + 1) {}
39 
40  inline size_t add(in_t v) {
41  const size_t vs = v.size();
42 
43  DCHECK(vs == 2 || vs == 3);
44 
45  auto updater = [&](size_t& v) {
46  if (v == 0) {
47  v = counter++;
48  }
49  };
50 
51  if (vs == 2) {
52  Array<2> va;
53  va.m_data[0] = v[0];
54  va.m_data[1] = v[1];
55 
56  auto old_counter = counter;
57  auto r = n2.access(va, updater) - 1;
58  if (counter > old_counter) {
59  m_stats.int_size2_unique++;
60  }
61  m_stats.int_size2_total++;
62  m_stats.ext_size2_total++;
63  return r;
64  } else {
65  Array<2> between;
66  between.m_data[0] = add(v.slice(0, 2));
67  between.m_data[1] = v[2];
68 
69  auto old_counter = counter;
70  auto r = n2.access(between, updater) - 1;
71  if (counter > old_counter) {
72  m_stats.ext_size3_unique++;
73  m_stats.int_size2_unique++;
74  }
75  m_stats.ext_size2_total--; // compensate for internal 2 use
76  m_stats.ext_size3_total++;
77  m_stats.int_size2_total++;
78  return r;
79  }
80  }
81 
82  inline size_t rules_count() const {
83  return counter - m_initial_counter;
84  }
85 
86  inline size_t initial_counter() const {
87  return m_initial_counter;
88  }
89 
90  template<typename F>
91  void for_all(F f) const {
92  n2.for_all(f);
93  }
94 
95  inline void clear() {
96  auto discard = std::move(n2);
97  }
98 
99  inline const Stats& stats() {
100  return m_stats;
101  }
102  };
103 }}
BitPackingVectorSlice< dynamic_t > in_t
Definition: HashArray.hpp:9
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
size_t rules_count() const
void for_all(F f) const
size_t initial_counter() const
const Stats & stats()
std::array< T, N > m_data
Definition: HashArray.hpp:15
GrammarRules(size_t counter_start)