tudocomp
– The TU Dortmund Compression Framework
Select.hpp
Go to the documentation of this file.
1 #pragma once
2 
5 #include <tudocomp/ds/Rank.hpp>
6 
7 namespace tdc {
8 
19 template<bool m_bit>
20 class Select {
21 private:
22  using data_t = BitVector::internal_data_type;
23  static constexpr size_t data_w = 8 * sizeof(data_t);
24 
25  static_assert(data_w <= 64,
26  "bit vectors backing must have width of at most 64 bits");
27 
28  // template variants
29  static constexpr uint8_t basic_rank(data_t v);
30  static constexpr uint8_t basic_rank(data_t v, uint8_t l, uint8_t m);
31  static constexpr uint8_t basic_select(data_t, uint8_t k);
32  static constexpr uint8_t basic_select(data_t, uint8_t l, uint8_t k);
33 
34  const BitVector* m_bv;
35 
36  size_t m_max;
37  size_t m_block_size;
38  size_t m_supblock_size;
39 
40  DynamicIntVector m_blocks;
41  DynamicIntVector m_supblocks;
42 
43 public:
45  inline Select()
46  : m_bv(nullptr),
47  m_max(0),
48  m_block_size(0),
49  m_supblock_size(0) {
50  }
51 
53  inline Select(const Select& other)
54  : m_bv(other.m_bv),
55  m_max(other.m_max),
56  m_block_size(other.m_block_size),
57  m_supblock_size(other.m_supblock_size),
58  m_blocks(other.m_blocks),
59  m_supblocks(other.m_supblocks) {
60  }
61 
63  inline Select(Select&& other)
64  : m_bv(other.m_bv),
65  m_max(other.m_max),
66  m_block_size(other.m_block_size),
67  m_supblock_size(other.m_supblock_size),
68  m_blocks(std::move(other.m_blocks)),
69  m_supblocks(std::move(other.m_supblocks)) {
70  }
71 
73  inline Select& operator=(const Select& other) {
74  m_bv = other.m_bv;
75  m_max = other.m_max;
76  m_block_size = other.m_block_size;
77  m_supblock_size = other.m_supblock_size;
78  m_blocks = other.m_blocks;
79  m_supblocks = other.m_supblocks;
80  return *this;
81  }
82 
84  inline Select& operator=(Select&& other) {
85  m_bv = other.m_bv;
86  m_max = other.m_max;
87  m_block_size = other.m_block_size;
88  m_supblock_size = other.m_supblock_size;
89  m_blocks = std::move(other.m_blocks);
90  m_supblocks = std::move(other.m_supblocks);
91  return *this;
92  }
93 
101  inline Select(BitVector& bv) : m_bv(&bv) {
102  const size_t n = bv.size();
103 
104  const size_t log_n = bits_for(n);
105 
106  // construct
107  m_supblock_size = log_n * log_n;
108  m_block_size = log_n;
109 
110  m_supblocks = DynamicIntVector(idiv_ceil(n, m_supblock_size), 0, log_n);
111  m_blocks = DynamicIntVector(idiv_ceil(n, m_block_size), 0, log_n);
112 
113  m_max = 0;
114  size_t r_sb = 0; // current bit count in superblock
115  size_t r_b = 0; // current bit count in block
116 
117  size_t cur_sb = 0; // current superblock
118  size_t cur_sb_offset = 0; // starting position of current superblock
119  size_t longest_sb = 0; // length of longest superblock
120 
121  size_t cur_b = 0; // current block
122 
123  auto data = bv.data();
124  for(size_t i = 0; i < idiv_ceil(n, data_w); i++) {
125  const auto v = data[i];
126  const uint8_t r = basic_rank(v);
127  m_max += r;
128 
129  if(r_b + r >= m_block_size) {
130  // entered new block
131 
132  // amount of bits needed to fill current block
133  size_t distance_b = m_block_size - r_b;
134 
135  // stores the offset of the last bit in the current block
136  uint8_t offs = 0;
137 
138  r_b += r;
139 
140  size_t distance_sum = 0;
141  while(r_b >= m_block_size) {
142  // find exact position of the bit in question
143  offs = basic_select(v, offs, distance_b);
144  DCHECK_NE(SELECT_FAIL, offs);
145 
146  const size_t pos = i * data_w + offs;
147 
148  distance_sum += distance_b;
149  r_sb += distance_b;
150  if(r_sb >= m_supblock_size) {
151  // entered new superblock
152  longest_sb = std::max(longest_sb, pos - cur_sb_offset);
153  cur_sb_offset = pos;
154 
155  m_supblocks[cur_sb++] = pos;
156 
157  r_sb -= m_supblock_size;
158  }
159 
160  m_blocks[cur_b++] = pos - cur_sb_offset;
161  r_b -= m_block_size;
162  distance_b = m_block_size;
163 
164  ++offs;
165  }
166 
167  DCHECK_GE(size_t(r), distance_sum);
168  r_sb += r - distance_sum;
169  } else {
170  r_b += r;
171  r_sb += r;
172  }
173  }
174 
175  longest_sb = std::max(longest_sb, n - cur_sb_offset);
176  const size_t w_block = bits_for(longest_sb);
177 
178  m_blocks.resize(cur_b);
179  m_blocks.width(w_block);
180  m_blocks.shrink_to_fit();
181 
182  m_supblocks.resize(cur_sb);
183  m_supblocks.shrink_to_fit();
184  }
185 
191  inline size_t select(size_t x) const {
192  DCHECK_GT(x, 0) << "order must be at least one";
193  if(x > m_max) return m_bv->size();
194 
195  size_t pos = 0;
196 
197  //narrow down to block
198  {
199  const size_t i = x / m_supblock_size;
200  const size_t j = x / m_block_size;
201 
202  if(i > 0) {
203  pos += m_supblocks[i-1];
204  x -= i * m_supblock_size;
205  }
206  if(x == 0) return pos;
207 
208  // block j is the k-th block within the i-th superblock
209  size_t k = j - i * (m_supblock_size / m_block_size);
210  if(k > 0) {
211  pos += m_blocks[j-1];
212  x -= k * m_block_size;
213  }
214  if(x == 0) return pos;
215 
216  if(i > 0 || k > 0) ++pos; // offset from block boundary
217  }
218 
219  // from this point forward, search directly in the bit vector
220  auto data = m_bv->data();
221 
222  size_t i = pos / data_w;
223  size_t offs = pos % data_w;
224 
225  uint8_t s = basic_select(data[i], offs, x);
226  if(s != SELECT_FAIL) {
227  // found in first data segment
228  return pos + s - offs;
229  } else {
230  // linearly search in the next segments
231  size_t passes = 1;
232 
233  x -= basic_rank(data[i], offs, data_w-1);
234  do {
235  ++passes;
236  pos = (++i) * data_w;
237  s = basic_select(data[i], x);
238  if(s == SELECT_FAIL) x -= basic_rank(data[i]);
239  } while(s == SELECT_FAIL);
240 
241  return pos + s;
242  }
243  }
244 
246  inline size_t operator()(size_t k) const {
247  return select(k);
248  }
249 };
250 
252 
254 template<>
255 inline constexpr uint8_t Select1::basic_rank(data_t v) {
256  return tdc::rank1(v);
257 }
258 
259 template<>
260 inline constexpr uint8_t Select1::basic_rank(data_t v, uint8_t l, uint8_t m) {
261  return tdc::rank1(v, l, m);
262 }
263 
264 template<>
265 inline constexpr uint8_t Select1::basic_select(data_t v, uint8_t k) {
266  return tdc::select1(v, k);
267 }
268 
269 template<>
270 inline constexpr uint8_t Select1::basic_select(data_t v, uint8_t l, uint8_t k) {
271  return tdc::select1(v, l, k);
272 }
274 
276 
278 template<>
279 inline constexpr uint8_t Select0::basic_rank(data_t v) {
280  return tdc::rank0(v);
281 }
282 
283 template<>
284 inline constexpr uint8_t Select0::basic_rank(data_t v, uint8_t l, uint8_t m) {
285  return tdc::rank0(v, l, m);
286 }
287 
288 template<>
289 inline constexpr uint8_t Select0::basic_select(data_t v, uint8_t k) {
290  return tdc::select0(v, k);
291 }
292 
293 template<>
294 inline constexpr uint8_t Select0::basic_select(data_t v, uint8_t l, uint8_t k) {
295  return tdc::select0(v, l, k);
296 }
298 
299 }
Contains the text compression and encoding framework.
Definition: namespaces.hpp:11
constexpr uint_fast8_t bits_for(size_t n)
Computes the number of bits required to store the given integer value.
constexpr size_t idiv_ceil(size_t a, size_t b)
Performs an integer division with the result rounded up to the next integer.
A vector over arbitrary unsigned integer types.
Definition: IntVector.hpp:175
Select & operator=(const Select &other)
Copy assignment.
Definition: Select.hpp:73
Select(BitVector &bv)
Constructs the select data structure for the given bit vector.
Definition: Select.hpp:101
constexpr uint8_t select1(uint_t v, uint8_t k)
Finds the position of the k-th 1-bit in the binary representation of the given value.
constexpr uint8_t rank0(uint_t v)
Definition: rank_64bit.hpp:122
constexpr uint8_t SELECT_FAIL
Returned by select0 and select1 in case the searched bit does not exist in the given input value...
Implements a select data structure for a BitVector.
Definition: Select.hpp:20
constexpr uint8_t select0(uint_t v, uint8_t k)
Finds the position of the k-th 0-bit in the binary representation of the given value.
constexpr uint8_t rank1(uint8_t v)
Computes the amount of 1-bits in the binary representation of the given 8-bit value.
Definition: rank_64bit.hpp:51
void resize(size_type n)
Definition: IntVector.hpp:327
Select(Select &&other)
Move constructor.
Definition: Select.hpp:63
size_t select(size_t x) const
Finds the position of the x-th flagged bit in the bit vector.
Definition: Select.hpp:191
Select()
Default constructor.
Definition: Select.hpp:45
IntVectorTrait< T >::internal_data_type internal_data_type
The element type of the internal data buffer accessed with data()
Definition: IntVector.hpp:191
internal_data_type * data() noexcept
Definition: IntVector.hpp:405
len_compact_t pos
Definition: LZSSFactors.hpp:38
size_t operator()(size_t k) const
Definition: Select.hpp:246
Select & operator=(Select &&other)
Move assignment.
Definition: Select.hpp:84
Select(const Select &other)
Copy constructor.
Definition: Select.hpp:53
IntVector< dynamic_t > DynamicIntVector
Represents an integer vector with unspecified (dynamic) bit width.
Definition: IntVector.hpp:553
size_type size() const
Definition: IntVector.hpp:284