tudocomp
– The TU Dortmund Compression Framework
PLCPPeaksStrategy.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <vector>
4 
6 
7 #include <tudocomp/Algorithm.hpp>
8 #include <tudocomp/ds/TextDS.hpp>
9 
11 
13 
14 namespace tdc {
15 namespace lcpcomp {
16 
20 class PLCPPeaksStrategy : public Algorithm {
21 public:
23 
24  inline static Meta meta() {
25  Meta m("lcpcomp_comp", "plcppeaks", "using peaks of PLCP array");
26  return m;
27  }
28 
29  inline static ds::dsflags_t textds_flags() {
30  return ds::SA | ds::ISA | ds::PLCP;
31  }
32 
33  template<typename text_t>
34  inline void factorize(text_t& text,
35  size_t threshold,
36  lzss::FactorBuffer& factors) {
37 
38  // Construct SA, ISA and LCP
39  StatPhase::wrap("Construct text ds", [&]{
40  text.require(text_t::SA | text_t::ISA | text_t::PLCP);
41  });
42 
43  StatPhase::wrap("Search Peaks", [&]{
44  const auto& sa = text.require_sa();
45  const auto& isa = text.require_isa();
46 
47  auto plcp = text.release_plcp();
48 
49  //
50  const size_t n = sa.size();
51 
52  len_t last_replacement_pos = 0;
53  for(len_t i = 0; i+1 < n; ) {
54  if( (i == last_replacement_pos || plcp[i] > plcp[i-1]) && plcp[i] > plcp[i+1] && plcp[i] >= threshold) {
55  DCHECK_NE(isa[i], 0u);
56  const len_t& target_position = i;
57  const len_t factor_length = plcp[target_position];
58  DCHECK_LT(target_position+factor_length,n);
59  const len_t source_position = sa[isa[target_position]-1];
60  factors.emplace_back(i, source_position, factor_length);
61  // for(size_t k = 0; k < factor_length; ++k) {
62  // plcp[target_position + k] = 0;
63  // }
64  // const len_t affected_length = std::min(factor_length, target_position);
65  // for(size_t k = 0; k < affected_length; ++k) {
66  // const len_t affected_position = target_position - k - 1;
67  // if(target_position < affected_position + plcp[affected_position]) {
68  // const len_t affected_length = target_position - affected_position;
69  // plcp[affected_position] = affected_length;
70  // }
71  // }
72  i+= factor_length;
73  last_replacement_pos = i-1;
74  }
75  else {
76  ++i;
77  }
78  }
79  });
80  }
81 };
82 
83 }}//ns
84 
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
unsigned int dsflags_t
Definition: TextDSFlags.hpp:8
constexpr dsflags_t ISA
Definition: TextDSFlags.hpp:12
constexpr dsflags_t PLCP
Definition: TextDSFlags.hpp:15
Algorithm(Algorithm const &)=default
static ds::dsflags_t textds_flags()
fast_t< len_compact_t > len_t
Type to represent an length value.
Definition: def.hpp:114
A very naive selection strategy for LCPComp.
void emplace_back(len_t fpos, len_t fsrc, len_t flen)
Definition: LZSSFactors.hpp:41
constexpr dsflags_t SA
Definition: TextDSFlags.hpp:11
static auto wrap(const char *title, F func) -> typename std::result_of< F(StatPhase &)>::type
Executes a lambda as a single statistics phase.
Definition: StatPhase.hpp:143
void factorize(text_t &text, size_t threshold, lzss::FactorBuffer &factors)
Interface for algorithms.
Definition: Algorithm.hpp:15