tudocomp
– The TU Dortmund Compression Framework
cedar.hpp
Go to the documentation of this file.
1 // cedar -- C++ implementation of Efficiently-updatable Double ARray trie
2 // $Id: cedar.h 1814 2014-05-07 03:42:04Z ynaga $
3 // Copyright (c) 2009-2014 Naoki Yoshinaga <ynaga@tkl.iis.u-tokyo.ac.jp>
4 #pragma once
5 
6 #include <cstdio>
7 #include <cstdlib>
8 #include <cstring>
9 #include <cassert>
10 
11 #ifdef HAVE_CONFIG_H
12 #include "config.h"
13 #endif
14 
15 #define STATIC_ASSERT(e, msg) typedef char msg[(e) ? 1 : -1]
16 
18 
19 namespace cedar {
20  // typedefs
21  typedef unsigned char uchar;
22  template <typename T> struct NaN { enum { N1 = -1, N2 = -2 }; };
23  template <> struct NaN <float> { enum { N1 = 0x7f800001, N2 = 0x7f800002 }; };
24  static const int MAX_ALLOC_SIZE = 1 << 16; // must be divisible by 256
25  // dynamic double array
26  template <typename value_type,
27  const int NO_VALUE = NaN <value_type>::N1,
28  const int NO_PATH = NaN <value_type>::N2,
29  const bool ORDERED = true,
30  const int MAX_TRIAL = 1,
31  const size_t NUM_TRACKING_NODES = 0>
32  class da {
33  static_assert(sizeof (value_type) <= sizeof (int), "value_type size is larger than an int");
34  public:
35  enum error_code { CEDAR_NO_VALUE = NO_VALUE, CEDAR_NO_PATH = NO_PATH, CEDAR_VALUE_LIMIT = 2147483647 };
36  typedef value_type result_type;
37  struct result_pair_type {
38  value_type value;
39  size_t length; // prefix length
40  };
41  struct result_triple_type { // for predict ()
42  value_type value;
43  size_t length; // suffix length
44  size_t id; // node id of value
45  };
46  struct node {
47  union { int base_; value_type value; }; // negative means prev empty index
48  int check; // negative means next empty index
49  node (const int base__ = 0, const int check_ = 0)
50  : base_ (base__), check (check_) {}
51 #ifdef USE_REDUCED_TRIE
52  int base () const { return - (base_ + 1); } // ~ in two's complement system
53 #else
54  int base () const { return base_; }
55 #endif
56  };
57  struct ninfo { // x1.5 update speed; +.25 % memory (8n -> 10n)
58  uchar sibling; // right sibling (= 0 if not exist)
59  uchar child; // first child
60  ninfo () : sibling (0), child (0) {}
61  };
62  struct block { // a block w/ 256 elements
63  int prev; // prev block; 3 bytes
64  int next; // next block; 3 bytes
65  short num; // # empty elements; 0 - 256
66  short reject; // minimum # branching failed to locate; soft limit
67  int trial; // # trial
68  int ehead; // first empty item
69  block () : prev (0), next (0), num (256), reject (257), trial (0), ehead (0) {}
70  };
71  da () : tracking_node (), _array (0), _ninfo (0), _block (0), _bheadF (0), _bheadC (0), _bheadO (0), _capacity (0), _size (0), _no_delete (false), _reject () {
72  _initialize ();
73  }
74  ~da () { clear (false); }
75  size_t capacity () const { return static_cast <size_t> (_capacity); }
76  size_t size () const { return static_cast <size_t> (_size); }
77  size_t total_size () const { return sizeof (node) * _size; }
78  size_t unit_size () const { return sizeof (node); }
79  size_t nonzero_size () const {
80  size_t i = 0;
81  for (int to = 0; to < _size; ++to)
82  if (_array[to].check >= 0) ++i;
83  return i;
84  }
85  size_t num_keys () const {
86  size_t i = 0;
87  for (int to = 0; to < _size; ++to)
88 #ifdef USE_REDUCED_TRIE
89  if (_array[to].check >= 0 && _array[to].value >= 0) ++i;
90 #else
91  if (_array[to].check >= 0 && _array[_array[to].check].base () == to) ++i;
92 #endif
93  return i;
94  }
95  // interfance
96  template <typename T>
97  T exactMatchSearch (const char* key) const
98  { return exactMatchSearch <T> (key, std::strlen (key)); }
99  template <typename T>
100  T exactMatchSearch (const char* key, size_t len, size_t from = 0) const {
101  union { int i; value_type x; } b;
102  size_t pos = 0;
103  b.i = _find (key, from, pos, len);
104  if (b.i == CEDAR_NO_PATH) b.i = CEDAR_NO_VALUE;
105  T result;
106  _set_result (&result, b.x, len, from);
107  return result;
108  }
109  template <typename T>
110  size_t commonPrefixSearch (const char* key, T* result, size_t result_len) const
111  { return commonPrefixSearch (key, result, result_len, std::strlen (key)); }
112  template <typename T>
113  size_t commonPrefixSearch (const char* key, T* result, size_t result_len, size_t len, size_t from = 0) const {
114  size_t num = 0;
115  for (size_t pos = 0; pos < len; ) {
116  union { int i; value_type x; } b;
117  b.i = _find (key, from, pos, pos + 1);
118  if (b.i == CEDAR_NO_VALUE) continue;
119  if (b.i == CEDAR_NO_PATH) return num;
120  if (num < result_len) _set_result (&result[num], b.x, pos, from);
121  ++num;
122  }
123  return num;
124  }
125  // predict key from double array
126  template <typename T>
127  size_t commonPrefixPredict (const char* key, T* result, size_t result_len)
128  { return commonPrefixPredict (key, result, result_len, std::strlen (key)); }
129  template <typename T>
130  size_t commonPrefixPredict (const char* key, T* result, size_t result_len, size_t len, size_t from = 0) {
131  size_t num (0), pos (0), p (0);
132  if (_find (key, from, pos, len) == CEDAR_NO_PATH) return 0;
133  union { int i; value_type x; } b;
134  size_t root = from;
135  for (b.i = begin (from, p); b.i != CEDAR_NO_PATH; b.i = next (from, p, root)) {
136  if (num < result_len) _set_result (&result[num], b.x, p, from);
137  ++num;
138  }
139  return num;
140  }
141  void suffix (char* key, size_t len, size_t to) const {
142  key[len] = '\0';
143  while (len--) {
144  const int from = _array[to].check;
145  key[len]
146  = static_cast <char> (_array[from].base () ^ static_cast <int> (to));
147  to = static_cast <size_t> (from);
148  }
149  }
150  value_type traverse (const char* key, size_t& from, size_t& pos) const
151  { return traverse (key, from, pos, std::strlen (key)); }
152  value_type traverse (const char* key, size_t& from, size_t& pos, size_t len) const {
153  union { int i; value_type x; } b;
154  b.i = _find (key, from, pos, len);
155  return b.x;
156  }
157  struct empty_callback { void operator () (const int, const int) {} }; // dummy empty function
158  value_type& update (const char* key)
159  { return update (key, std::strlen (key)); }
160  value_type& update (const char* key, size_t len, value_type val = value_type (0))
161  { size_t from (0), pos (0); return update (key, from, pos, len, val); }
162  value_type& update (const char* key, size_t& from, size_t& pos, size_t len, value_type val = value_type (0))
163  { empty_callback cf; return update (key, from, pos, len, val, cf); }
164  template <typename T>
165  value_type& update (const char* key, size_t& from, size_t& pos, size_t len, value_type val, T& cf) {
166  if (! len && ! from)
167  _err (__FILE__, __LINE__, "failed to insert zero-length key\n");
168 #ifndef USE_FAST_LOAD
169  if (! _ninfo || ! _block) restore ();
170 #endif
171  for (const uchar* const key_ = reinterpret_cast <const uchar*> (key);
172  pos < len; ++pos) {
173 #ifdef USE_REDUCED_TRIE
174  const value_type val_ = _array[from].value;
175  if (val_ >= 0 && val_ != CEDAR_VALUE_LIMIT) // always new; correct this!
176  { const int to = _follow (from, 0, cf); _array[to].value = val_; }
177 #endif
178  from = static_cast <size_t> (_follow (from, key_[pos], cf));
179  }
180 #ifdef USE_REDUCED_TRIE
181  const int to = _array[from].value >= 0 ? static_cast <int> (from) : _follow (from, 0, cf);
182  if (_array[to].value == CEDAR_VALUE_LIMIT) _array[to].value = 0;
183 #else
184  const int to = _follow (from, 0, cf);
185 #endif
186  return _array[to].value += val;
187  }
188  // easy-going erase () without compression
189  int erase (const char* key) { return erase (key, std::strlen (key)); }
190  int erase (const char* key, size_t len, size_t from = 0) {
191  size_t pos = 0;
192  const int i = _find (key, from, pos, len);
193  if (i == CEDAR_NO_PATH || i == CEDAR_NO_VALUE) return -1;
194  erase (from);
195  return 0;
196  }
197  void erase (size_t from) {
198  // _test ();
199 #ifdef USE_REDUCED_TRIE
200  int e = _array[from].value >= 0 ? static_cast <int> (from) : _array[from].base () ^ 0;
201  from = static_cast <size_t> (_array[e].check);
202 #else
203  int e = _array[from].base () ^ 0;
204 #endif
205  bool flag = false; // have sibling
206  do {
207  const node& n = _array[from];
208  flag = _ninfo[n.base () ^ _ninfo[from].child].sibling;
209  if (flag) _pop_sibling (from, n.base (), static_cast <uchar> (n.base () ^ e));
210  _push_enode (e);
211  e = static_cast <int> (from);
212  from = static_cast <size_t> (_array[from].check);
213  } while (! flag);
214  }
215  int build (size_t num, const char** key, const size_t* len = 0, const value_type* val = 0) {
216  for (size_t i = 0; i < num; ++i)
217  update (key[i], len ? len[i] : std::strlen (key[i]), val ? val[i] : value_type (i));
218  return 0;
219  }
220  template <typename T>
221  void dump (T* result, const size_t result_len) {
222  union { int i; value_type x; } b;
223  size_t num (0), from (0), p (0);
224  for (b.i = begin (from, p); b.i != CEDAR_NO_PATH; b.i = next (from, p))
225  if (num < result_len)
226  _set_result (&result[num++], b.x, p, from);
227  else
228  _err (__FILE__, __LINE__, "dump() needs array of length = num_keys()\n");
229  }
230  int save (const char* fn, const char* mode = "wb") const {
231  // _test ();
232  FILE* fp = std::fopen (fn, mode);
233  if (! fp) return -1;
234  std::fwrite (_array, sizeof (node), static_cast <size_t> (_size), fp);
235  std::fclose (fp);
236 #ifdef USE_FAST_LOAD
237  const char* const info
238  = std::strcat (std::strcpy (new char[std::strlen (fn) + 5], fn), ".sbl");
239  fp = std::fopen (info, mode);
240  delete [] info; // resolve memory leak
241  if (! fp) return -1;
242  std::fwrite (&_bheadF, sizeof (int), 1, fp);
243  std::fwrite (&_bheadC, sizeof (int), 1, fp);
244  std::fwrite (&_bheadO, sizeof (int), 1, fp);
245  std::fwrite (_ninfo, sizeof (ninfo), static_cast <size_t> (_size), fp);
246  std::fwrite (_block, sizeof (block), static_cast <size_t> (_size >> 8), fp);
247  std::fclose (fp);
248 #endif
249  return 0;
250  }
251  int open (const char* fn, const char* mode = "rb",
252  const size_t offset = 0, size_t size_ = 0) {
253  FILE* fp = std::fopen (fn, mode);
254  if (! fp) return -1;
255  // get size
256  if (! size_) {
257  if (std::fseek (fp, 0, SEEK_END) != 0) return -1;
258  size_ = static_cast <size_t> (std::ftell (fp));
259  if (std::fseek (fp, 0, SEEK_SET) != 0) return -1;
260  }
261  if (size_ <= offset) return -1;
262  // set array
263  clear (false);
264  size_ = (size_ - offset) / sizeof (node);
265  if (std::fseek (fp, static_cast <long> (offset), SEEK_SET) != 0) return -1;
266  _array = static_cast <node*> (std::malloc (sizeof (node) * size_));
267 #ifdef USE_FAST_LOAD
268  _ninfo = static_cast <ninfo*> (std::malloc (sizeof (ninfo) * size_));
269  _block = static_cast <block*> (std::malloc (sizeof (block) * size_));
270  if (! _array || ! _ninfo || ! _block)
271 #else
272  if (! _array)
273 #endif
274  _err (__FILE__, __LINE__, "memory allocation failed\n");
275  if (size_ != std::fread (_array, sizeof (node), size_, fp)) return -1;
276  std::fclose (fp);
277  _size = static_cast <int> (size_);
278 #ifdef USE_FAST_LOAD
279  const char* const info
280  = std::strcat (std::strcpy (new char[std::strlen (fn) + 5], fn), ".sbl");
281  fp = std::fopen (info, mode);
282  delete [] info; // resolve memory leak
283  if (! fp) return -1;
284  std::fread (&_bheadF, sizeof (int), 1, fp);
285  std::fread (&_bheadC, sizeof (int), 1, fp);
286  std::fread (&_bheadO, sizeof (int), 1, fp);
287  if (size_ != std::fread (_ninfo, sizeof (ninfo), size_, fp) ||
288  size_ != std::fread (_block, sizeof (block), size_ >> 8, fp) << 8)
289  return -1;
290  std::fclose (fp);
291  _capacity = _size;
292 #endif
293  return 0;
294  }
295 #ifndef USE_FAST_LOAD
296  void restore () { // restore information to update
297  if (! _block) _restore_block ();
298  if (! _ninfo) _restore_ninfo ();
299  _capacity = _size;
300  }
301 #endif
302  void set_array (void* p, size_t size_ = 0) { // ad-hoc
303  clear (false);
304  _array = static_cast <node*> (p);
305  _size = static_cast <int> (size_);
306  _no_delete = true;
307  }
308  const void* array () const { return _array; }
309  void clear (const bool reuse = true) {
310  if (_array && ! _no_delete) { std::free (_array); _array = 0; }
311  if (_ninfo) { std::free (_ninfo); _ninfo = 0; }
312  if (_block) { std::free (_block); _block = 0; }
313  _bheadF = _bheadC = _bheadO = _capacity = _size = 0; // *
314  if (reuse) _initialize ();
315  _no_delete = false;
316  }
317  // return the first child for a tree rooted by a given node
318  int begin (size_t& from, size_t& len) {
319 #ifndef USE_FAST_LOAD
320  if (! _ninfo) _restore_ninfo ();
321 #endif
322  int base = _array[from].base ();
323  uchar c = _ninfo[from].child;
324  if (! from && ! (c = _ninfo[base ^ c].sibling)) // bug fix
325  return CEDAR_NO_PATH; // no entry
326  for (; c; ++len) {
327  from = static_cast <size_t> (_array[from].base ()) ^ c;
328  c = _ninfo[from].child;
329  }
330 #ifdef USE_REDUCED_TRIE
331  if (_array[from].value >= 0) return _array[from].value;
332 #endif
333  return _array[_array[from].base () ^ c].base_;
334  }
335  // return the next child if any
336  int next (size_t& from, size_t& len, const size_t root = 0) {
337  uchar c = 0;
338 #ifdef USE_REDUCED_TRIE
339  if (_array[from].value < 0)
340 #endif
341  c = _ninfo[_array[from].base () ^ 0].sibling;
342  for (; ! c && from != root; --len) {
343  c = _ninfo[from].sibling;
344  from = static_cast <size_t> (_array[from].check);
345  }
346  return c ?
347  begin (from = static_cast <size_t> (_array[from].base ()) ^ c, ++len) :
348  CEDAR_NO_PATH;
349  }
350  // test the validity of double array for debug
351  void test (const size_t from = 0) const {
352  const int base = _array[from].base ();
353  uchar c = _ninfo[from].child;
354  do {
355  if (from) assert (_array[base ^ c].check == static_cast <int> (from));
356  if (c && _array[base ^ c].value < 0) // correct this
357  test (static_cast <size_t> (base ^ c));
358  } while ((c = _ninfo[base ^ c].sibling));
359  }
360  size_t tracking_node[NUM_TRACKING_NODES + 1];
361  private:
362  // currently disabled; implement these if you need
363  da (const da&);
364  da& operator= (const da&);
365  node* _array;
366  ninfo* _ninfo;
367  block* _block;
368  int _bheadF; // first block of Full; 0
369  int _bheadC; // first block of Closed; 0 if no Closed
370  int _bheadO; // first block of Open; 0 if no Open
371  int _capacity;
372  int _size;
373  int _no_delete;
374  short _reject[257];
375  //
376  static void _err (const char* fn, const int ln, const char* msg)
377  { std::fprintf (stderr, "cedar: %s [%d]: %s", fn, ln, msg); std::exit (1); }
378  template <typename T>
379  static void _realloc_array (T*& p, const int size_n, const int size_p = 0) {
380  void* tmp = std::realloc (p, sizeof (T) * static_cast <size_t> (size_n));
381  if (! tmp)
382  std::free (p), _err (__FILE__, __LINE__, "memory reallocation failed\n");
383  p = static_cast <T*> (tmp);
384  static const T T0 = T ();
385  for (T* q (p + size_p), * const r (p + size_n); q != r; ++q) *q = T0;
386  }
387  void _initialize () { // initilize the first special block
388  _realloc_array (_array, 256, 256);
389  _realloc_array (_ninfo, 256);
390  _realloc_array (_block, 1);
391 #ifdef USE_REDUCED_TRIE
392  _array[0] = node (-1, -1);
393 #else
394  _array[0] = node (0, -1);
395 #endif
396  for (int i = 1; i < 256; ++i)
397  _array[i] = node (i == 1 ? -255 : - (i - 1), i == 255 ? -1 : - (i + 1));
398  _block[0].ehead = 1; // bug fix for erase
399  _capacity = _size = 256;
400  for (size_t i = 0 ; i <= NUM_TRACKING_NODES; ++i) tracking_node[i] = 0;
401  for (short i = 0; i <= 256; ++i) _reject[i] = i + 1;
402  }
403  // follow/create edge
404  template <typename T>
405  int _follow (size_t& from, const uchar& label, T& cf) {
406  int to = 0;
407  const int base = _array[from].base ();
408  if (base < 0 || _array[to = base ^ label].check < 0) {
409  to = _pop_enode (base, label, static_cast <int> (from));
410  _push_sibling (from, to ^ label, label, base >= 0);
411  } else if (_array[to].check != static_cast <int> (from))
412  to = _resolve (from, base, label, cf);
413  return to;
414  }
415  // find key from double array
416  int _find (const char* key, size_t& from, size_t& pos, const size_t len) const {
417  for (const uchar* const key_ = reinterpret_cast <const uchar*> (key);
418  pos < len; ) { // follow link
419 #ifdef USE_REDUCED_TRIE
420  if (_array[from].value >= 0) break;
421 #endif
422  size_t to = static_cast <size_t> (_array[from].base ()); to ^= key_[pos];
423  if (_array[to].check != static_cast <int> (from)) return CEDAR_NO_PATH;
424  ++pos;
425  from = to;
426  }
427 #ifdef USE_REDUCED_TRIE
428  if (_array[from].value >= 0) // get value from leaf
429  return pos == len ? _array[from].value : CEDAR_NO_PATH; // only allow integer key
430 #endif
431  const node n = _array[_array[from].base () ^ 0];
432  if (n.check != static_cast <int> (from)) return CEDAR_NO_VALUE;
433  return n.base_;
434  }
435 #ifndef USE_FAST_LOAD
436  void _restore_ninfo () {
437  _realloc_array (_ninfo, _size);
438  for (int to = 0; to < _size; ++to) {
439  const int from = _array[to].check;
440  if (from < 0) continue; // skip empty node
441  const int base = _array[from].base ();
442  if (const uchar label = static_cast <uchar> (base ^ to)) // skip leaf
443  _push_sibling (static_cast <size_t> (from), base, label,
444  ! from || _ninfo[from].child || _array[base ^ 0].check == from);
445  }
446  }
447  void _restore_block () {
448  _realloc_array (_block, _size >> 8);
449  _bheadF = _bheadC = _bheadO = 0;
450  for (int bi (0), e (0); e < _size; ++bi) { // register blocks to full
451  block& b = _block[bi];
452  b.num = 0;
453  for (; e < (bi << 8) + 256; ++e)
454  if (_array[e].check < 0 && ++b.num == 1) b.ehead = e;
455  int& head_out = b.num == 1 ? _bheadC : (b.num == 0 ? _bheadF : _bheadO);
456  _push_block (bi, head_out, ! head_out && b.num);
457  }
458  }
459 #endif
460  void _set_result (result_type* x, value_type r, size_t = 0, size_t = 0) const
461  { *x = r; }
462  void _set_result (result_pair_type* x, value_type r, size_t l, size_t = 0) const
463  { x->value = r; x->length = l; }
464  void _set_result (result_triple_type* x, value_type r, size_t l, size_t from) const
465  { x->value = r; x->length = l; x->id = from; }
466  void _pop_block (const int bi, int& head_in, const bool last) {
467  if (last) { // last one poped; Closed or Open
468  head_in = 0;
469  } else {
470  const block& b = _block[bi];
471  _block[b.prev].next = b.next;
472  _block[b.next].prev = b.prev;
473  if (bi == head_in) head_in = b.next;
474  }
475  }
476  void _push_block (const int bi, int& head_out, const bool empty) {
477  block& b = _block[bi];
478  if (empty) { // the destination is empty
479  head_out = b.prev = b.next = bi;
480  } else { // use most recently pushed
481  int& tail_out = _block[head_out].prev;
482  b.prev = tail_out;
483  b.next = head_out;
484  head_out = tail_out = _block[tail_out].next = bi;
485  }
486  }
487  int _add_block () {
488  if (_size == _capacity) { // allocate memory if needed
489 #ifdef USE_EXACT_FIT
490  _capacity += _size >= MAX_ALLOC_SIZE ? MAX_ALLOC_SIZE : _size;
491 #else
492  _capacity += _capacity;
493 #endif
494  _realloc_array (_array, _capacity, _capacity);
495  _realloc_array (_ninfo, _capacity, _size);
496  _realloc_array (_block, _capacity >> 8, _size >> 8);
497  }
498  _block[_size >> 8].ehead = _size;
499  _array[_size] = node (- (_size + 255), - (_size + 1));
500  for (int i = _size + 1; i < _size + 255; ++i)
501  _array[i] = node (-(i - 1), -(i + 1));
502  _array[_size + 255] = node (- (_size + 254), -_size);
503  _push_block (_size >> 8, _bheadO, ! _bheadO); // append to block Open
504  _size += 256;
505  return (_size >> 8) - 1;
506  }
507  // transfer block from one start w/ head_in to one start w/ head_out
508  void _transfer_block (const int bi, int& head_in, int& head_out) {
509  _pop_block (bi, head_in, bi == _block[bi].next);
510  _push_block (bi, head_out, ! head_out && _block[bi].num);
511  }
512  // pop empty node from block; never transfer the special block (bi = 0)
513  int _pop_enode (const int base, const uchar label, const int from) {
514  const int e = base < 0 ? _find_place () : base ^ label;
515  const int bi = e >> 8;
516  node& n = _array[e];
517  block& b = _block[bi];
518  if (--b.num == 0) {
519  if (bi) _transfer_block (bi, _bheadC, _bheadF); // Closed to Full
520  } else { // release empty node from empty ring
521  _array[-n.base_].check = n.check;
522  _array[-n.check].base_ = n.base_;
523  if (e == b.ehead) b.ehead = -n.check; // set ehead
524  if (bi && b.num == 1 && b.trial != MAX_TRIAL) // Open to Closed
525  _transfer_block (bi, _bheadO, _bheadC);
526  }
527  // initialize the released node
528 #ifdef USE_REDUCED_TRIE
529  n.value = CEDAR_VALUE_LIMIT; n.check = from;
530  if (base < 0) _array[from].base_ = - (e ^ label) - 1;
531 #else
532  if (label) n.base_ = -1; else n.value = value_type (0); n.check = from;
533  if (base < 0) _array[from].base_ = e ^ label;
534 #endif
535  return e;
536  }
537  // push empty node into empty ring
538  void _push_enode (const int e) {
539  const int bi = e >> 8;
540  block& b = _block[bi];
541  if (++b.num == 1) { // Full to Closed
542  b.ehead = e;
543  _array[e] = node (-e, -e);
544  if (bi) _transfer_block (bi, _bheadF, _bheadC); // Full to Closed
545  } else {
546  const int prev = b.ehead;
547  const int next = -_array[prev].check;
548  _array[e] = node (-prev, -next);
549  _array[prev].check = _array[next].base_ = -e;
550  if (b.num == 2 || b.trial == MAX_TRIAL) // Closed to Open
551  if (bi) _transfer_block (bi, _bheadC, _bheadO);
552  b.trial = 0;
553  }
554  if (b.reject < _reject[b.num]) b.reject = _reject[b.num];
555  _ninfo[e] = ninfo (); // reset ninfo; no child, no sibling
556  }
557  // push label to from's child
558  void _push_sibling (const size_t from, const int base, const uchar label, const bool flag = true) {
559  uchar* c = &_ninfo[from].child;
560  if (flag && (ORDERED ? label > *c : ! *c))
561  do c = &_ninfo[base ^ *c].sibling; while (ORDERED && *c && *c < label);
562  _ninfo[base ^ label].sibling = *c, *c = label;
563  }
564  // pop label from from's child
565  void _pop_sibling (const size_t from, const int base, const uchar label) {
566  uchar* c = &_ninfo[from].child;
567  while (*c != label) c = &_ninfo[base ^ *c].sibling;
568  *c = _ninfo[base ^ label].sibling;
569  }
570  // check whether to replace branching w/ the newly added node
571  bool _consult (const int base_n, const int base_p, uchar c_n, uchar c_p) const {
572  do c_n = _ninfo[base_n ^ c_n].sibling, c_p = _ninfo[base_p ^ c_p].sibling;
573  while (c_n && c_p);
574  return c_p;
575  }
576  // enumerate (equal to or more than one) child nodes
577  uchar* _set_child (uchar* p, const int base, uchar c, const int label = -1) {
578  --p;
579  if (! c) { *++p = c; c = _ninfo[base ^ c].sibling; } // 0: terminal
580  if (ORDERED)
581  while (c && c < label) { *++p = c; c = _ninfo[base ^ c].sibling; }
582  if (label != -1) *++p = static_cast <uchar> (label);
583  while (c) { *++p = c; c = _ninfo[base ^ c].sibling; }
584  return p;
585  }
586  // explore new block to settle down
587  int _find_place () {
588  if (_bheadC) return _block[_bheadC].ehead;
589  if (_bheadO) return _block[_bheadO].ehead;
590  return _add_block () << 8;
591  }
592  int _find_place (const uchar* const first, const uchar* const last) {
593  if (int bi = _bheadO) {
594  const int bz = _block[_bheadO].prev;
595  const short nc = static_cast <short> (last - first + 1);
596  while (1) { // set candidate block
597  block& b = _block[bi];
598  if (b.num >= nc && nc < b.reject) // explore configuration
599  for (int e = b.ehead;;) {
600  const int base = e ^ *first;
601  for (const uchar* p = first; _array[base ^ *++p].check < 0; )
602  if (p == last) return b.ehead = e; // no conflict
603  if ((e = -_array[e].check) == b.ehead) break;
604  }
605  b.reject = nc;
606  if (b.reject < _reject[b.num]) _reject[b.num] = b.reject;
607  const int bi_ = b.next;
608  if (++b.trial == MAX_TRIAL) _transfer_block (bi, _bheadO, _bheadC);
609  if (bi == bz) break;
610  bi = bi_;
611  };
612  }
613  return _add_block () << 8;
614  }
615  // resolve conflict on base_n ^ label_n = base_p ^ label_p
616  template <typename T>
617  int _resolve (size_t& from_n, const int base_n, const uchar label_n, T& cf) {
618  // examine siblings of conflicted nodes
619  const int to_pn = base_n ^ label_n;
620  const int from_p = _array[to_pn].check;
621  const int base_p = _array[from_p].base ();
622  const bool flag // whether to replace siblings of newly added
623  = _consult (base_n, base_p, _ninfo[from_n].child, _ninfo[from_p].child);
624  uchar child[256];
625  uchar* const first = &child[0];
626  uchar* const last =
627  flag ? _set_child (first, base_n, _ninfo[from_n].child, label_n)
628  : _set_child (first, base_p, _ninfo[from_p].child);
629  const int base =
630  (first == last ? _find_place () : _find_place (first, last)) ^ *first;
631  // replace & modify empty list
632  const int from = flag ? static_cast <int> (from_n) : from_p;
633  const int base_ = flag ? base_n : base_p;
634  if (flag && *first == label_n) _ninfo[from].child = label_n; // new child
635 #ifdef USE_REDUCED_TRIE
636  _array[from].base_ = -base - 1; // new base
637 #else
638  _array[from].base_ = base; // new base
639 #endif
640  for (const uchar* p = first; p <= last; ++p) { // to_ => to
641  const int to = _pop_enode (base, *p, from);
642  const int to_ = base_ ^ *p;
643  _ninfo[to].sibling = (p == last ? 0 : *(p + 1));
644  if (flag && to_ == to_pn) continue; // skip newcomer (no child)
645  cf (to_, to); // user-defined callback function to handle moved nodes
646  node& n = _array[to];
647  node& n_ = _array[to_];
648 #ifdef USE_REDUCED_TRIE
649  if ((n.base_ = n_.base_) < 0 && *p) // copy base; bug fix
650 #else
651  if ((n.base_ = n_.base_) > 0 && *p) // copy base; bug fix
652 #endif
653  {
654  uchar c = _ninfo[to].child = _ninfo[to_].child;
655  do _array[n.base () ^ c].check = to; // adjust grand son's check
656  while ((c = _ninfo[n.base () ^ c].sibling));
657  }
658  if (! flag && to_ == static_cast <int> (from_n)) // parent node moved
659  from_n = static_cast <size_t> (to); // bug fix
660  if (! flag && to_ == to_pn) { // the address is immediately used
661  _push_sibling (from_n, to_pn ^ label_n, label_n);
662  _ninfo[to_].child = 0; // remember to reset child
663 #ifdef USE_REDUCED_TRIE
664  n_.value = CEDAR_VALUE_LIMIT;
665 #else
666  if (label_n) n_.base_ = -1; else n_.value = value_type (0);
667 #endif
668  n_.check = static_cast <int> (from_n);
669  } else
670  _push_enode (to_);
671  if (NUM_TRACKING_NODES) // keep the traversed node updated
672  for (size_t j = 0; tracking_node[j] != 0; ++j)
673  if (tracking_node[j] == static_cast <size_t> (to_))
674  { tracking_node[j] = static_cast <size_t> (to); break; }
675  }
676  return flag ? base ^ label_n : to_pn;
677  }
678  };
679 }
680 
682 
uint_impl_t & operator=(const uint_impl_t &b)
Definition: uint_t.hpp:43
uint64_t label(uint64_t left, uint64_t right)
Definition: esp_math.hpp:16
len_compact_t len
Definition: LZSSFactors.hpp:38
len_compact_t pos
Definition: LZSSFactors.hpp:38