tudocomp
– The TU Dortmund Compression Framework
LCPCompressor.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <tudocomp/util.hpp>
4 
9 
10 #include <tudocomp/ds/TextDS.hpp>
11 
13 
14 // For default params
17 
18 namespace tdc {
19 namespace lcpcomp {
20 class MaxLCPStrategy;
21 class CompactDec;
22 
23 template<typename coder_t, typename decode_buffer_t>
24 inline void decode_text_internal(Env&& env, coder_t& decoder, std::ostream& outs) {
25 
26  StatPhase decode_phase("Decoding");
27 
28  // decode text range
29  auto text_len = decoder.template decode<len_t>(len_r);
30 
31  // init decode buffer
32  decode_buffer_t buffer(std::move(env), text_len);
33 
34  StatPhase::wrap("Starting Decoding", [&]{
35  Range text_r(text_len);
36 
37  // decode shortest and longest factor
38  auto flen_min = decoder.template decode<len_t>(text_r);
39  auto flen_max = decoder.template decode<len_t>(text_r);
40  MinDistributedRange flen_r(flen_min, flen_max);
41 
42  // decode longest distance between factors
43  auto fdist_max = decoder.template decode<len_t>(text_r);
44  Range fdist_r(fdist_max);
45 
46  // decode
47  while(!decoder.eof()) {
48  len_t num;
49 
50  auto b = decoder.template decode<bool>(bit_r);
51  if(b) num = decoder.template decode<len_t>(fdist_r);
52  else num = 0;
53 
54  // decode characters
55  while(num--) {
56  auto c = decoder.template decode<uliteral_t>(literal_r);
57  buffer.decode_literal(c);
58  }
59 
60  if(!decoder.eof()) {
61  //decode factor
62  auto src = decoder.template decode<len_t>(text_r);
63  auto len = decoder.template decode<len_t>(flen_r);
64 
65  buffer.decode_factor(src, len);
66  }
67  }
68  });
69 
70  StatPhase::wrap("Scan Decoding", [&]{ buffer.decode_lazy(); });
71  StatPhase::wrap("Eager Decoding", [&]{
72  buffer.decode_eagerly();
73  IF_STATS(StatPhase::log("longest_chain", buffer.longest_chain()));
74  });
75  StatPhase::wrap("Output Text", [&]{ buffer.write_to(outs); });
76 }
77 
78 }//ns
79 
82 template<typename coder_t, typename strategy_t, typename dec_t, typename text_t = TextDS<>>
83 class LCPCompressor : public Compressor {
84 public:
85  inline static Meta meta() {
86  Meta m("compressor", "lcpcomp");
87  m.option("coder").templated<coder_t>("coder");
88  m.option("comp").templated<strategy_t, lcpcomp::ArraysComp>("lcpcomp_comp");
89  m.option("dec").templated<dec_t, lcpcomp::ScanDec>("lcpcomp_dec");
90  m.option("textds").templated<text_t, TextDS<>>("textds");
91  m.option("threshold").dynamic(5);
92  m.option("flatten").dynamic(1); // 0 or 1
93  m.uses_textds<text_t>(strategy_t::textds_flags());
94  return m;
95  }
96 
98  inline LCPCompressor(Env&& env) : Compressor(std::move(env)) {}
99 
100  inline virtual void compress(Input& input, Output& output) override {
101  auto in = input.as_view();
102  DCHECK(in.ends_with(uint8_t(0)));
103 
104  auto text = StatPhase::wrap("Construct Text DS", [&]{
105  return text_t(env().env_for_option("textds"), in, strategy_t::textds_flags());
106  });
107 
108  // read options
109  const len_t threshold = env().option("threshold").as_integer(); //factor threshold
110  lzss::FactorBuffer factors;
111 
112  StatPhase::wrap("Factorize", [&]{
113  // Factorize
114  strategy_t strategy(env().env_for_option("comp"));
115  strategy.factorize(text, threshold, factors);
116 
117  StatPhase::log("threshold", threshold);
118  StatPhase::log("factors", factors.size());
119  });
120 
121  // sort factors
122  StatPhase::wrap("Sort Factors", [&]{ factors.sort(); });
123 
124  if(env().option("flatten").as_integer()) {
125  // flatten factors
126  StatPhase::wrap("Flatten Factors", [&]{ factors.flatten(); });
127  }
128 
129  // encode
130  StatPhase::wrap("Encode Factors", [&]{
131  typename coder_t::Encoder coder(
132  env().env_for_option("coder"),
133  output,
134  lzss::TextLiterals<text_t>(text, factors));
135 
136  lzss::encode_text(coder, text, factors); //TODO is this correct?
137  });
138  }
139 
140  inline virtual void decompress(Input& input, Output& output) override {
141  //TODO: tell that forward-factors are allowed
142  typename coder_t::Decoder decoder(env().env_for_option("coder"), input);
143  auto outs = output.as_stream();
144 
145  //lzss::decode_text_internal<coder_t, dec_t>(decoder, outs);
146  // if(lazy == 0)
147  // lzss::decode_text_internal<coder_t, dec_t>(decoder, outs);
148  // else
149  lcpcomp::decode_text_internal<typename coder_t::Decoder, dec_t>(env().env_for_option("dec"), decoder, outs);
150  }
151 };
152 
155 namespace lcpcomp {
156 }
157 
158 }
159 
Represents a generic range of positive integers.
Definition: Range.hpp:16
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
constexpr auto bit_r
Global predefined range for bits (0 or 1).
Definition: Range.hpp:108
Factorizes the input by finding redundant phrases in a re-ordered version of the LCP table...
Base for data compressors.
Definition: Compressor.hpp:19
void encode_text(coder_t &coder, const text_t &text, const factor_t &factors)
Definition: LZSSCoding.hpp:19
len_compact_t len
Definition: LZSSFactors.hpp:38
Provides access to runtime and memory measurement in statistics phases.
Definition: StatPhase.hpp:44
Represents a range of positive integers that tend to be distributed towards the minimum.
Definition: Range.hpp:56
virtual void compress(Input &input, Output &output) override
Compress the given input to the given output.
static Meta meta()
void uses_textds(ds::dsflags_t flags)
Indicates that this Algorithm uses the TextDS class, and how it does.
Definition: Meta.hpp:277
Runs a number of scans of the factors.
Definition: ScanDec.hpp:146
OutputStream as_stream() const
Creates a stream that allows for character-wise output.
Creates arrays instead of an LCP-heap Each array corresponds to one LCP value We do not eagerly invok...
Definition: ArraysComp.hpp:22
void decode_text_internal(Env &&env, coder_t &decoder, std::ostream &outs)
constexpr auto literal_r
Global predefined reange for literals.
Definition: Range.hpp:111
An abstraction layer for algorithm output.
Definition: Output.hpp:23
len_compact_t src
Definition: LZSSFactors.hpp:38
fast_t< len_compact_t > len_t
Type to represent an length value.
Definition: def.hpp:114
#define IF_STATS(x)
x is compiled only when the STATS_DISABLED macro is undefined.
Definition: def.hpp:59
static void log(const char *key, const T &value)
Logs a user statistic for the current phase.
Definition: StatPhase.hpp:218
void templated(const std::string &accepted_type)
Declares that this option accepts values of the specified Algorithm type T.
Definition: Meta.hpp:93
constexpr auto len_r
Global predefined range for len_t.
Definition: Range.hpp:115
static auto wrap(const char *title, F func) -> typename std::result_of< F(StatPhase &)>::type
Executes a lambda as a single statistics phase.
Definition: StatPhase.hpp:143
LCPCompressor(Env &&env)
Construct the class with an environment.
virtual void decompress(Input &input, Output &output) override
Decompress the given input to the given output.
InputView as_view() const
Provides a view on the input that allows for random access.
Definition: Input.hpp:260
OptionBuilder option(const std::string &name)
Declares an accepted option for this algorithm.
Definition: Meta.hpp:216
Local environment for a compression/encoding/decompression call.
Manages text related data structures.
Definition: TextDS.hpp:30
void dynamic()
Declares that this option accepts values of a simple type that can be parsed from a string (e...
Definition: Meta.hpp:150
An abstraction layer for algorithm input.
Definition: Input.hpp:37