tudocomp
– The TU Dortmund Compression Framework
HashMap.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2017 Erik Rigtorp <erik@rigtorp.se>
3 
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in all
12 copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 SOFTWARE.
21  */
22 
23 /*
24 HashMap
25 
26 A high performance hash map. Uses open addressing with linear
27 probing.
28 
29 Advantages:
30  - Predictable performance. Doesn't use the allocator unless load factor
31  grows beyond 50%. Linear probing ensures cash efficency.
32  - Deletes items by rearranging items and marking slots as empty instead of
33  marking items as deleted. This is keeps performance high when there
34  is a high rate of churn (many paired inserts and deletes) since otherwise
35  most slots would be marked deleted and probing would end up scanning
36  most of the table.
37 
38 Disadvantages:
39  - Significant performance degradation at high load factors.
40  - Maximum load factor hard coded to 50%, memory inefficient.
41  - Memory is not reclaimed on erase.
42  */
43 
44 #pragma once
45 
46 #include <cassert>
47 #include <cstddef>
48 #include <cstdint>
49 #include <limits>
50 #include <stdexcept>
51 #include <vector>
52 
54 namespace rigtorp {
55 
56 template <typename Key, typename T, typename Hash = std::hash<Key>,
57  typename KeyEqual = std::equal_to<void>>
58 class HashMap {
59 public:
60  using key_type = Key;
61  using mapped_type = T;
62  using value_type = std::pair<Key, T>;
63  using size_type = std::size_t;
64  using hasher = Hash;
65  using key_equal = KeyEqual;
66  using reference = value_type &;
67  using const_reference = const value_type &;
68  using buckets = std::vector<value_type>;
69 
70  template <typename ContT, typename IterVal> struct hm_iterator {
71  using value_type = IterVal;
72  using pointer = value_type *;
73  using reference = value_type &;
74  using iterator_category = std::forward_iterator_tag;
75 
76  bool operator==(const hm_iterator &other) const {
77  return other.hm_ == hm_ && other.idx_ == idx_;
78  }
79  bool operator!=(const hm_iterator &other) { return !(other == *this); }
80 
81  hm_iterator &operator++() {
82  ++idx_;
83  advance_past_empty();
84  return *this;
85  }
86 
87  reference operator*() const { return hm_->buckets_[idx_]; }
88  pointer operator->() const { return &hm_->buckets_[idx_]; }
89 
90  private:
91  explicit hm_iterator(ContT *hm) : hm_(hm) { advance_past_empty(); }
92  explicit hm_iterator(ContT *hm, size_type idx) : hm_(hm), idx_(idx) {}
93  template <typename OtherContT, typename OtherIterVal>
94  hm_iterator(const hm_iterator<OtherContT, OtherIterVal> &other)
95  : hm_(other.hm_), idx_(other.idx_) {}
96 
97  void advance_past_empty() {
98  while (idx_ < hm_->buckets_.size() &&
99  key_equal()(hm_->buckets_[idx_].first, hm_->empty_key_)) {
100  ++idx_;
101  }
102  }
103 
104  ContT *hm_ = nullptr;
105  typename ContT::size_type idx_ = 0;
106  friend ContT;
107  };
108 
109  using iterator = hm_iterator<HashMap, value_type>;
110  using const_iterator = hm_iterator<const HashMap, const value_type>;
111 
112 public:
113  HashMap(size_type bucket_count, key_type empty_key) : empty_key_(empty_key) {
114  size_t pow2 = 1;
115  while (pow2 < bucket_count) {
116  pow2 <<= 1;
117  }
118  buckets_.resize(pow2, std::make_pair(empty_key_, T()));
119  }
120 
121  HashMap(const HashMap &other, size_type bucket_count)
122  : HashMap(bucket_count, other.empty_key_) {
123  for (auto it = other.begin(); it != other.end(); ++it) {
124  insert(*it);
125  }
126  }
127 
128  // Iterators
129  iterator begin() { return iterator(this); }
130 
131  const_iterator begin() const { return const_iterator(this); }
132 
133  const_iterator cbegin() const { return const_iterator(this); }
134 
135  iterator end() { return iterator(this, buckets_.size()); }
136 
137  const_iterator end() const { return const_iterator(this, buckets_.size()); }
138 
139  const_iterator cend() const { return const_iterator(this, buckets_.size()); }
140 
141  // Capacity
142  bool empty() const { return size() == 0; }
143 
144  size_type size() const { return size_; }
145 
146  size_type max_size() const { return std::numeric_limits<size_type>::max(); }
147 
148  // Modifiers
149  void clear() {
150  HashMap other(bucket_count(), empty_key_);
151  swap(other);
152  }
153 
154  std::pair<iterator, bool> insert(const value_type &value) {
155  return emplace_impl(value.first, value.second);
156  }
157 
158  std::pair<iterator, bool> insert(value_type &&value) {
159  return emplace_impl(value.first, std::move(value.second));
160  }
161 
162  template <typename... Args>
163  std::pair<iterator, bool> emplace(Args &&... args) {
164  return emplace_impl(std::forward<Args>(args)...);
165  }
166 
167  void erase(iterator it) { erase_impl(it); }
168 
169  size_type erase(const key_type &key) { return erase_impl(key); }
170 
171  template <typename K> size_type erase(const K &x) { return erase_impl(x); }
172 
173  void swap(HashMap &other) {
174  std::swap(buckets_, other.buckets_);
175  std::swap(size_, other.size_);
176  std::swap(empty_key_, other.empty_key_);
177  }
178 
179  // Lookup
180  mapped_type &at(const key_type &key) { return at_impl(key); }
181 
182  template <typename K> mapped_type &at(const K &x) { return at_impl(x); }
183 
184  const mapped_type &at(const key_type &key) const { return at_impl(key); }
185 
186  template <typename K> const mapped_type &at(const K &x) const {
187  return at_impl(x);
188  }
189 
190  mapped_type &operator[](const key_type &key) {
191  return emplace_impl(key).first->second;
192  }
193 
194  size_type count(const key_type &key) const { return count_impl(key); }
195 
196  template <typename K> size_type count(const K &x) const {
197  return count_impl(x);
198  }
199 
200  iterator find(const key_type &key) { return find_impl(key); }
201 
202  template <typename K> iterator find(const K &x) { return find_impl(x); }
203 
204  const_iterator find(const key_type &key) const { return find_impl(key); }
205 
206  template <typename K> const_iterator find(const K &x) const {
207  return find_impl(x);
208  }
209 
210  // Bucket interface
211  size_type bucket_count() const noexcept { return buckets_.size(); }
212 
213  size_type max_bucket_count() const noexcept {
214  return std::numeric_limits<size_type>::max();
215  }
216 
217  // Hash policy
218  void rehash(size_type count) {
219  count = std::max(count, size() * 2);
220  HashMap other(*this, count);
221  swap(other);
222  }
223 
224  void reserve(size_type count) {
225  if (count * 2 > buckets_.size()) {
226  rehash(count * 2);
227  }
228  }
229 
230  // Observers
231  hasher hash_function() const { return hasher(); }
232 
233  key_equal key_eq() const { return key_equal(); }
234 
235 private:
236  template <typename K, typename... Args>
237  std::pair<iterator, bool> emplace_impl(const K &key, Args &&... args) {
238  assert(!key_equal()(empty_key_, key) && "empty key shouldn't be used");
239  reserve(size_ + 1);
240  for (size_t idx = key_to_idx(key);; idx = probe_next(idx)) {
241  if (key_equal()(buckets_[idx].first, empty_key_)) {
242  buckets_[idx].second = mapped_type(std::forward<Args>(args)...);
243  buckets_[idx].first = key;
244  size_++;
245  return {iterator(this, idx), true};
246  } else if (key_equal()(buckets_[idx].first, key)) {
247  return {iterator(this, idx), false};
248  }
249  }
250  }
251 
252  void erase_impl(iterator it) {
253  size_t bucket = it.idx_;
254  for (size_t idx = probe_next(bucket);; idx = probe_next(idx)) {
255  if (key_equal()(buckets_[idx].first, empty_key_)) {
256  buckets_[bucket].first = empty_key_;
257  size_--;
258  return;
259  }
260  size_t ideal = key_to_idx(buckets_[idx].first);
261  if (diff(bucket, ideal) < diff(idx, ideal)) {
262  // swap, bucket is closer to ideal than idx
263  buckets_[bucket] = buckets_[idx];
264  bucket = idx;
265  }
266  }
267  }
268 
269  template <typename K> size_type erase_impl(const K &key) {
270  auto it = find_impl(key);
271  if (it != end()) {
272  erase_impl(it);
273  return 1;
274  }
275  return 0;
276  }
277 
278  template <typename K> mapped_type &at_impl(const K &key) {
279  iterator it = find_impl(key);
280  if (it != end()) {
281  return it->second;
282  }
283  throw std::out_of_range("HashMap::at");
284  }
285 
286  template <typename K> const mapped_type &at_impl(const K &key) const {
287  return const_cast<HashMap *>(this)->at_impl(key);
288  }
289 
290  template <typename K> size_t count_impl(const K &key) const {
291  return find_impl(key) == end() ? 0 : 1;
292  }
293 
294  template <typename K> iterator find_impl(const K &key) {
295  assert(!key_equal()(empty_key_, key) && "empty key shouldn't be used");
296  for (size_t idx = key_to_idx(key);; idx = probe_next(idx)) {
297  if (key_equal()(buckets_[idx].first, key)) {
298  return iterator(this, idx);
299  }
300  if (key_equal()(buckets_[idx].first, empty_key_)) {
301  return end();
302  }
303  }
304  }
305 
306  template <typename K> const_iterator find_impl(const K &key) const {
307  return const_cast<HashMap *>(this)->find_impl(key);
308  }
309 
310  template <typename K> size_t key_to_idx(const K &key) const {
311  const size_t mask = buckets_.size() - 1;
312  return hasher()(key) & mask;
313  }
314 
315  size_t probe_next(size_t idx) const {
316  const size_t mask = buckets_.size() - 1;
317  return (idx + 1) & mask;
318  }
319 
320  size_t diff(size_t a, size_t b) const {
321  const size_t mask = buckets_.size() - 1;
322  return (buckets_.size() + (a - b)) & mask;
323  }
324 
325 private:
326  key_type empty_key_;
327  buckets buckets_;
328  size_t size_ = 0;
329 };
330 }
bool operator==(const Array< N, T > &lhs, const Array< N, T > &rhs)
Definition: HashArray.hpp:35
void swap(IntVector< T > &lhs, IntVector< T > &rhs)
Definition: IntVector.hpp:532
bool operator!=(const IntVector< T > &lhs, const IntVector< T > &rhs)
Definition: IntVector.hpp:507