tudocomp
– The TU Dortmund Compression Framework
MaxHeapStrategy.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <tudocomp/Algorithm.hpp>
4 #include <tudocomp/ds/TextDS.hpp>
6 
8 
10 
11 namespace tdc {
12 namespace lcpcomp {
13 
22 class MaxHeapStrategy : public Algorithm {
23 public:
24  inline static Meta meta() {
25  Meta m("lcpcomp_comp", "heap");
26  return m;
27  }
28 
29  inline static ds::dsflags_t textds_flags() {
30  return ds::SA | ds::ISA | ds::LCP;
31  }
32 
33  using Algorithm::Algorithm; //import constructor
34 
35  template<typename text_t>
36  inline void factorize(text_t& text,
37  const size_t threshold,
38  lzss::FactorBuffer& factors) {
39 
40  // Construct SA, ISA and LCP
41  StatPhase::wrap("Construct text ds", [&]{
42  text.require(text_t::SA | text_t::ISA | text_t::LCP);
43  });
44 
45  auto& sa = text.require_sa();
46  auto& isa = text.require_isa();
47  auto lcp = text.release_lcp();
48 
49  auto heap = StatPhase::wrap("Construct MaxLCPHeap", [&]{
50  // Count relevant LCP entries
51  size_t heap_size = 0;
52  for(size_t i = 1; i < lcp.size(); i++) {
53  if(lcp[i] >= threshold) ++heap_size;
54  }
55 
56  // Construct heap
57  ArrayMaxHeap<typename text_t::lcp_type::data_type> heap(lcp, lcp.size(), heap_size);
58  for(size_t i = 1; i < lcp.size(); i++) {
59  if(lcp[i] >= threshold) heap.insert(i);
60  }
61 
62  StatPhase::log("entries", heap.size());
63  return heap;
64  });
65 
66  //Factorize
67  StatPhase::wrap("Process MaxLCPHeap", [&]{
68  while(heap.size() > 0) {
69  //get suffix with longest LCP
70  size_t m = heap.get_max();
71 
72  //generate factor
73  size_t fpos = sa[m];
74  size_t fsrc = sa[m-1];
75  size_t flen = lcp[m];
76 
77  factors.emplace_back(fpos, fsrc, flen);
78 
79  //remove overlapped entries
80  for(size_t k = 0; k < flen; k++) {
81  heap.remove(isa[fpos + k]);
82  }
83 
84  //correct intersecting entries
85  for(size_t k = 0; k < flen && fpos > k; k++) {
86  size_t s = fpos - k - 1;
87  size_t i = isa[s];
88  if(heap.contains(i)) {
89  if(s + lcp[i] > fpos) {
90  size_t l = fpos - s;
91  if(l >= threshold) {
92  heap.decrease_key(i, l);
93  } else {
94  heap.remove(i);
95  }
96  }
97  }
98  }
99  }
100  });
101  }
102 };
103 
104 }}
105 
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
Implements the original "Max LCP" selection strategy for LCPComp.
unsigned int dsflags_t
Definition: TextDSFlags.hpp:8
constexpr dsflags_t ISA
Definition: TextDSFlags.hpp:12
Algorithm(Algorithm const &)=default
void insert(len_t i)
Inserts an item into the heap.
void factorize(text_t &text, const size_t threshold, lzss::FactorBuffer &factors)
void emplace_back(len_t fpos, len_t fsrc, len_t flen)
Definition: LZSSFactors.hpp:41
static void log(const char *key, const T &value)
Logs a user statistic for the current phase.
Definition: StatPhase.hpp:218
constexpr dsflags_t SA
Definition: TextDSFlags.hpp:11
static auto wrap(const char *title, F func) -> typename std::result_of< F(StatPhase &)>::type
Executes a lambda as a single statistics phase.
Definition: StatPhase.hpp:143
constexpr dsflags_t LCP
Definition: TextDSFlags.hpp:13
Represents a binary max heap backed by an external array of keys.
Interface for algorithms.
Definition: Algorithm.hpp:15
static ds::dsflags_t textds_flags()