tudocomp
– The TU Dortmund Compression Framework
SparseISA.hpp
Go to the documentation of this file.
1 #pragma once
2 
3 #include <assert.h>
4 #include <type_traits>
5 
8 #include <tudocomp/ds/Rank.hpp>
9 
11 
12 namespace tdc {
13 
15 template<typename sa_t>
16 class SparseISA : public Algorithm {
17 public:
19  using data_type = iv_t;
20 
21 private:
22  const sa_t* m_sa;
23 
24  BitVector m_has_shortcut;
25  Rank m_rank;
26 
27  iv_t m_shortcuts;
28 
29 public:
30  inline static Meta meta() {
31  Meta m("isa", "sparse_isa");
32  m.option("sa").templated<sa_t>("sa");
33  m.option("t").dynamic(3);
34  return m;
35  }
36 
38  return ds::InputRestrictions {};
39  }
40 
41  template<typename textds_t>
42  inline SparseISA(Env&& env, textds_t& tds, CompressMode cm)
43  : Algorithm(std::move(env)) {
44 
45  // Suffix Array types must match
46  static_assert(std::is_same<sa_t, typename textds_t::sa_type>(),
47  "Suffix Array type mismatch!");
48 
49  // Require Suffix Array
50  m_sa = &tds.require_sa(cm);
51 
52  const size_t n = m_sa->size();
53  m_has_shortcut = BitVector(n);
54 
55  const size_t t = this->env().option("t").as_integer();
56 
57  // Construct
58  StatPhase::wrap("Construct sparse ISA", [&]{
59  auto v = BitVector(n);
60  for(size_t i = 0; i < n; i++) {
61  if(!v[i]) {
62  // new cycle
63  v[i] = 1;
64  size_t j = (*m_sa)[i];
65  size_t k = 1;
66 
67  while(j != i) {
68  if((k % t) == 0) {
69  m_has_shortcut[j] = 1;
70  }
71 
72  v[j] = 1;
73  j = (*m_sa)[j];
74  ++k;
75  }
76 
77  if(k > t) m_has_shortcut[i] = 1;
78  }
79  }
80 
81  m_rank = Rank(m_has_shortcut);
82  m_shortcuts = DynamicIntVector(m_rank(n-1), 0, bits_for(n));
83 
84  for(size_t i = 0; i < n; i++) {
85  if(v[i]) {
86  v[i] = 0;
87  size_t j = (*m_sa)[i];
88  while(v[j]) {
89  if(m_has_shortcut[j]) {
90  m_shortcuts[m_rank(j)-1] = i;
91  i = j;
92  }
93  v[j] = 0;
94  j = (*m_sa)[j];
95  }
96 
97  if(m_has_shortcut[j]) {
98  m_shortcuts[m_rank(j)-1] = i;
99  }
100 
101  i = j;
102  }
103  }
104  });
105  }
106 
107 public:
108  inline size_t operator[](size_t i) const {
109  size_t j = i;
110  bool s = true;
111 
112  while((*m_sa)[j] != i) {
113  if(s && m_has_shortcut[j]) {
114  j = m_shortcuts[m_rank(j)-1];
115  s = false;
116  } else {
117  j = (*m_sa)[j];
118  }
119  }
120  return j;
121  }
122 
123  inline void compress() {
124  // nothing to do, already sparse :-)
125  }
126 
127  inline size_t size() const {
128  return m_has_shortcut.size();
129  }
130 
136  inline iv_t relinquish() {
137  throw std::runtime_error("not supported"); //TODO: what to do?
138  }
139 
141  inline iv_t copy() const {
142  throw std::runtime_error("not supported"); //TODO: what to do?
143  }
144 };
145 
146 } //ns
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
constexpr uint_fast8_t bits_for(size_t n)
Computes the number of bits required to store the given integer value.
Provides meta information about an Algorithm.
Definition: Meta.hpp:34
A vector over arbitrary unsigned integer types.
Definition: IntVector.hpp:175
const OptionValue & option(const std::string &option) const
Get an option of this algorithm.
Definition: Env.hpp:44
Implements a rank data structure for a BitVector.
Definition: Rank.hpp:16
IntVector< uint_t< 1 > > BitVector
Represents a bit vector, alias for IntVector with a fixed bit width of 1.
Definition: IntVector.hpp:545
Describes a set of restrictions placed on input data.
uint64_t as_integer() const
SparseISA(Env &&env, textds_t &tds, CompressMode cm)
Definition: SparseISA.hpp:42
size_t operator[](size_t i) const
Definition: SparseISA.hpp:108
CompressMode
Defines when data structures are bit-compressed.
Definition: CompressMode.hpp:6
Env & env()
Provides access to the environment that the algorithm works in.
Definition: Algorithm.hpp:51
static Meta meta()
Definition: SparseISA.hpp:30
static ds::InputRestrictions restrictions()
Definition: SparseISA.hpp:37
void templated(const std::string &accepted_type)
Declares that this option accepts values of the specified Algorithm type T.
Definition: Meta.hpp:93
static auto wrap(const char *title, F func) -> typename std::result_of< F(StatPhase &)>::type
Executes a lambda as a single statistics phase.
Definition: StatPhase.hpp:143
IntVector< dynamic_t > DynamicIntVector
Represents an integer vector with unspecified (dynamic) bit width.
Definition: IntVector.hpp:553
OptionBuilder option(const std::string &name)
Declares an accepted option for this algorithm.
Definition: Meta.hpp:216
size_t size() const
Definition: SparseISA.hpp:127
iv_t copy() const
Creates a copy of the data structure&#39;s storage.
Definition: SparseISA.hpp:141
Local environment for a compression/encoding/decompression call.
size_type size() const
Definition: IntVector.hpp:284
Interface for algorithms.
Definition: Algorithm.hpp:15
void dynamic()
Declares that this option accepts values of a simple type that can be parsed from a string (e...
Definition: Meta.hpp:150
Constructs the inverse suffix array using the suffix array.
Definition: SparseISA.hpp:16
iv_t relinquish()
Forces the data structure to relinquish its data storage.
Definition: SparseISA.hpp:136
DynamicIntVector iv_t
Definition: SparseISA.hpp:18