tudocomp
– The TU Dortmund Compression Framework
utils.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <unordered_set>
4 
5 #include <tudocomp/util.hpp>
6 #include <tudocomp/Env.hpp>
9 
11 
12 namespace tdc {
13 namespace esp {
14  //[debugging]///////////////////////////////////////////////////////////////
15 
16  std::ostream& nice_block_lengths(GenericView<TypedBlock> tbs, std::ostream& o) {
17  for (auto& tb: tbs) {
18  if (tb.len == 1) {
19  o << "[" << int(tb.type) << "]";
20  } else if (tb.len == 2) {
21  o << "[ " << int(tb.type) << " ]";
22  } else if (tb.len == 3) {
23  o << "[ " << int(tb.type) << " ]";
24  } else {
25  o << "<Err: " << tb << ">";
26  }
27  }
28  return o;
29  }
30  template<typename T>
31  class DebugPrint {
32  ConstGenericView<T> m_view;
33  size_t m_alpha;
34  public:
35  inline DebugPrint(ConstGenericView<T> v, size_t alpha):
36  m_view(v), m_alpha(alpha) {}
37  template<typename U>
38  friend std::ostream& operator<<(std::ostream&, const DebugPrint<U>&);
39  size_t char_mult() {
40  if (m_alpha == 256) {
41  return 1;
42  } else {
43  return 4;
44  }
45  }
46  };
47  template<typename T>
48  inline std::ostream& operator<<(std::ostream& o, const DebugPrint<T>& d) {
49  if (d.m_alpha == 256) {
50  o << "[";
51  for (auto c: d.m_view) {
52  o << char(uint8_t(c));
53  }
54  o << "]";
55  return o;
56  } else {
57  return o << vec_to_debug_string(d.m_view, 2);
58  }
59  }
60  template<typename T>
62  return DebugPrint<T>(v, alpha);
63  }
64 
65  //[end debugging]///////////////////////////////////////////////////////////
66 
67  template<class T>
68  uint64_t calc_alphabet_size(const T& t) {
69  // TODO
70  // Optimize for alphabets with large repeating numbers
71  std::unordered_set<size_t> alpha;
72  for (const auto& v : t) {
73  alpha.insert(v);
74  }
75  return alpha.size();
76  }
77 
78  template<class T>
79  bool no_adjacent_identical(const T& t) {
80  for(size_t i = 1; i < t.size(); i++) {
81  if (t[i] == t[i - 1]) return false;
82  }
83  return true;
84  }
85 
86  template<class T, class F>
87  void do_for_neighbors(T& t, F f) {
88  for (size_t i = 0; i < t.size(); i++) {
89  std::array<typename T::value_type, 2> neighbors;
90  uint8_t neighbor_len = 0;
91 
92  if (i == 0 && i == t.size() - 1) {
93  neighbor_len = 0;
94  } else if (i == 0) {
95  neighbor_len = 1;
96  neighbors[0] = t[i + 1];
97  } else if (i == t.size() - 1) {
98  neighbor_len = 1;
99  neighbors[0] = t[i - 1];
100  } else {
101  neighbor_len = 2;
102  neighbors[0] = t[i - 1];
103  neighbors[1] = t[i + 1];
104  }
105 
106  f(i, ConstGenericView<typename T::value_type>(neighbors.data(), neighbor_len));
107  }
108  }
109 }
110 }
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
std::string vec_to_debug_string(const T &s, size_t indent=0)
Builds the string representation of a vector of byte values, sorrounded by square brackets ([ and ])...
A view into a slice of memory.
A const view into a slice of memory.
DebugPrint< T > debug_p(ConstGenericView< T > v, size_t alpha)
Definition: utils.hpp:61
DebugPrint(ConstGenericView< T > v, size_t alpha)
Definition: utils.hpp:35
bool no_adjacent_identical(const T &t)
Definition: utils.hpp:79
size_t char_mult()
Definition: utils.hpp:39
std::ostream & nice_block_lengths(GenericView< TypedBlock > tbs, std::ostream &o)
Definition: utils.hpp:16
void do_for_neighbors(T &t, F f)
Definition: utils.hpp:87
uint64_t calc_alphabet_size(const T &t)
Definition: utils.hpp:68