Libosmium  2.20.0
Fast and flexible C++ library for working with OpenStreetMap data
id_set.hpp
Go to the documentation of this file.
1 #ifndef OSMIUM_INDEX_ID_SET_HPP
2 #define OSMIUM_INDEX_ID_SET_HPP
3 
4 /*
5 
6 This file is part of Osmium (https://osmcode.org/libosmium).
7 
8 Copyright 2013-2023 Jochen Topf <jochen@topf.org> and others (see README).
9 
10 Boost Software License - Version 1.0 - August 17th, 2003
11 
12 Permission is hereby granted, free of charge, to any person or organization
13 obtaining a copy of the software and accompanying documentation covered by
14 this license (the "Software") to use, reproduce, display, distribute,
15 execute, and transmit the Software, and to prepare derivative works of the
16 Software, and to permit third-parties to whom the Software is furnished to
17 do so, all subject to the following:
18 
19 The copyright notices in the Software and this entire statement, including
20 the above license grant, this restriction and the following disclaimer,
21 must be included in all copies of the Software, in whole or in part, and
22 all derivative works of the Software, unless such copies or derivative
23 works are solely in the form of machine-executable object code generated by
24 a source language processor.
25 
26 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
29 SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
30 FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
31 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
32 DEALINGS IN THE SOFTWARE.
33 
34 */
35 
36 #include <osmium/osm/item_type.hpp>
37 #include <osmium/osm/types.hpp>
38 
39 #include <algorithm>
40 #include <array>
41 #include <cassert>
42 #include <cstddef>
43 #include <cstring>
44 #include <iterator>
45 #include <memory>
46 #include <type_traits>
47 #include <vector>
48 
49 namespace osmium {
50 
51  namespace index {
52 
57  template <typename T>
58  class IdSet {
59 
60  public:
61 
62  IdSet() = default;
63 
64  IdSet(const IdSet&) = default;
65  IdSet& operator=(const IdSet&) = default;
66 
67  IdSet(IdSet&&) noexcept = default;
68  IdSet& operator=(IdSet&&) noexcept = default;
69 
70  virtual ~IdSet() noexcept = default;
71 
75  virtual void set(T id) = 0;
76 
80  virtual bool get(T id) const noexcept = 0;
81 
85  virtual bool empty() const = 0;
86 
90  virtual void clear() = 0;
91 
95  virtual std::size_t used_memory() const noexcept = 0;
96 
97  }; // class IdSet
98 
99  namespace detail {
100 
101  // This value is a compromise. For node Ids it could be bigger
102  // which would mean less (but larger) memory allocations. For
103  // relations Ids it could be smaller, because they would all fit
104  // into a smaller allocation.
105  enum : std::size_t {
106  default_chunk_bits = 22U
107  };
108 
109  } // namespace detail
110 
111  template <typename T, std::size_t chunk_bits = detail::default_chunk_bits>
112  class IdSetDense;
113 
117  template <typename T, std::size_t chunk_bits>
119 
120 
122 
123  const id_set* m_set;
126 
127  void next() noexcept {
128  while (m_value != m_last && !m_set->get(m_value)) {
129  const T cid = id_set::chunk_id(m_value);
130  assert(cid < m_set->m_data.size());
131  if (!m_set->m_data[cid]) {
132  m_value = (cid + 1) << (chunk_bits + 3);
133  } else {
134  const auto slot = m_set->m_data[cid][id_set::offset(m_value)];
135  if (slot == 0) {
136  m_value += 8;
137  m_value &= ~0x7ULL;
138  } else {
139  ++m_value;
140  }
141  }
142  }
143  }
144 
145  public:
146 
147  using iterator_category = std::forward_iterator_tag;
148  using value_type = T;
149  using pointer = value_type*;
151 
152  IdSetDenseIterator(const id_set* set, T value, T last) noexcept :
153  m_set(set),
154  m_value(value),
155  m_last(last) {
156  next();
157  }
158 
160  if (m_value != m_last) {
161  ++m_value;
162  next();
163  }
164  return *this;
165  }
166 
168  IdSetDenseIterator tmp{*this};
169  operator++();
170  return tmp;
171  }
172 
173  bool operator==(const IdSetDenseIterator& rhs) const noexcept {
174  return m_set == rhs.m_set && m_value == rhs.m_value;
175  }
176 
177  bool operator!=(const IdSetDenseIterator& rhs) const noexcept {
178  return !(*this == rhs);
179  }
180 
181  T operator*() const noexcept {
182  assert(m_value < m_last);
183  return m_value;
184  }
185 
186  }; // class IdSetDenseIterator
187 
195  template <typename T, std::size_t chunk_bits>
196  class IdSetDense : public IdSet<T> {
197 
198 
199  friend class IdSetDenseIterator<T, chunk_bits>;
200 
201  enum : std::size_t {
202  chunk_size = 1U << chunk_bits
203  };
204 
205  std::vector<std::unique_ptr<unsigned char[]>> m_data;
206  T m_size = 0;
207 
208  static std::size_t chunk_id(T id) noexcept {
209  return id >> (chunk_bits + 3U);
210  }
211 
212  static std::size_t offset(T id) noexcept {
213  return (id >> 3U) & ((1U << chunk_bits) - 1U);
214  }
215 
216  static unsigned int bitmask(T id) noexcept {
217  return 1U << (id & 0x7U);
218  }
219 
220  T last() const noexcept {
221  return static_cast<T>(m_data.size()) * chunk_size * 8;
222  }
223 
224  unsigned char& get_element(T id) {
225  const auto cid = chunk_id(id);
226  if (cid >= m_data.size()) {
227  m_data.resize(cid + 1);
228  }
229 
230  auto& chunk = m_data[cid];
231  if (!chunk) {
232  chunk.reset(new unsigned char[chunk_size]);
233  ::memset(chunk.get(), 0, chunk_size);
234  }
235 
236  return chunk[offset(id)];
237  }
238 
239  public:
240 
242 
243  friend void swap(IdSetDense& first, IdSetDense& second) noexcept {
244  using std::swap;
245  swap(first.m_data, second.m_data);
246  swap(first.m_size, second.m_size);
247  }
248 
249  IdSetDense() = default;
250 
251  IdSetDense(const IdSetDense& other) :
252  IdSet<T>(other),
253  m_size(other.m_size) {
254  m_data.reserve(other.m_data.size());
255  for (const auto& ptr : other.m_data) {
256  if (ptr) {
257  m_data.emplace_back(new unsigned char[chunk_size]);
258  ::memcpy(m_data.back().get(), ptr.get(), chunk_size);
259  } else {
260  m_data.emplace_back();
261  }
262  }
263  }
264 
266  swap(*this, other);
267  return *this;
268  }
269 
270  IdSetDense(IdSetDense&&) noexcept = default;
271 
272  // This should really be noexcept, but GCC 4.8 doesn't like it.
273  // NOLINTNEXTLINE(hicpp-noexcept-move, performance-noexcept-move-constructor)
274  IdSetDense& operator=(IdSetDense&&) = default;
275 
276  ~IdSetDense() noexcept override = default;
277 
284  bool check_and_set(T id) {
285  auto& element = get_element(id);
286 
287  if ((element & bitmask(id)) == 0) {
288  element |= bitmask(id);
289  ++m_size;
290  return true;
291  }
292 
293  return false;
294  }
295 
301  void set(T id) final {
302  (void)check_and_set(id);
303  }
304 
310  void unset(T id) {
311  auto& element = get_element(id);
312 
313  if ((element & bitmask(id)) != 0) {
314  element &= ~bitmask(id);
315  --m_size;
316  }
317  }
318 
324  bool get(T id) const noexcept final {
325  if (chunk_id(id) >= m_data.size()) {
326  return false;
327  }
328  const auto* r = m_data[chunk_id(id)].get();
329  if (!r) {
330  return false;
331  }
332  return (r[offset(id)] & bitmask(id)) != 0;
333  }
334 
338  bool empty() const noexcept final {
339  return m_size == 0;
340  }
341 
345  T size() const noexcept {
346  return m_size;
347  }
348 
352  void clear() final {
353  m_data.clear();
354  m_size = 0;
355  }
356 
357  std::size_t used_memory() const noexcept final {
358  return m_data.size() * chunk_size;
359  }
360 
362  return {this, 0, last()};
363  }
364 
365  const_iterator end() const {
366  return {this, last(), last()};
367  }
368 
369  }; // class IdSetDense
370 
375  template <typename T>
376  class IdSetSmall : public IdSet<T> {
377 
378  std::vector<T> m_data;
379 
380  public:
381 
382  IdSetSmall() = default;
383 
384  IdSetSmall(const IdSetSmall&) = default;
385  IdSetSmall& operator=(const IdSetSmall&) = default;
386 
387  IdSetSmall(IdSetSmall&&) noexcept = default;
388  IdSetSmall& operator=(IdSetSmall&&) noexcept = default;
389 
390  ~IdSetSmall() noexcept override = default;
391 
395  void set(T id) final {
396  if (m_data.empty() || m_data.back() != id) {
397  m_data.push_back(id);
398  }
399  }
400 
406  bool get(T id) const noexcept final {
407  const auto it = std::find(m_data.cbegin(), m_data.cend(), id);
408  return it != m_data.cend();
409  }
410 
421  bool get_binary_search(T id) const noexcept {
422  return std::binary_search(m_data.cbegin(), m_data.cend(), id);
423  }
424 
428  bool empty() const noexcept final {
429  return m_data.empty();
430  }
431 
435  void clear() final {
436  m_data.clear();
437  }
438 
444  void sort_unique() {
445  std::sort(m_data.begin(), m_data.end());
446  const auto last = std::unique(m_data.begin(), m_data.end());
447  m_data.erase(last, m_data.end());
448 
449  }
450 
457  std::size_t size() const noexcept {
458  return m_data.size();
459  }
460 
461  std::size_t used_memory() const noexcept final {
462  return m_data.capacity() * sizeof(T);
463  }
464 
471  void merge_sorted(const IdSetSmall<T>& other) {
472  std::vector<T> new_data;
473  new_data.reserve(m_data.size() + other.m_data.size());
474  std::set_union(m_data.cbegin(), m_data.cend(),
475  other.m_data.cbegin(), other.m_data.cend(),
476  std::back_inserter(new_data));
477  using std::swap;
478  swap(new_data, m_data);
479  }
480 
482  using const_iterator = typename std::vector<T>::const_iterator;
483 
484  const_iterator begin() const noexcept {
485  return m_data.cbegin();
486  }
487 
488  const_iterator end() const noexcept {
489  return m_data.cend();
490  }
491 
492  const_iterator cbegin() const noexcept {
493  return m_data.cbegin();
494  }
495 
496  const_iterator cend() const noexcept {
497  return m_data.cend();
498  }
499 
500  }; // class IdSetSmall
501 
502  } // namespace index
503 
504 } // namespace osmium
505 
506 #endif // OSMIUM_INDEX_ID_SET_HPP
Definition: id_set.hpp:118
value_type & reference
Definition: id_set.hpp:150
T m_last
Definition: id_set.hpp:125
void next() noexcept
Definition: id_set.hpp:127
IdSetDenseIterator operator++(int) noexcept
Definition: id_set.hpp:167
T m_value
Definition: id_set.hpp:124
IdSetDenseIterator(const id_set *set, T value, T last) noexcept
Definition: id_set.hpp:152
const id_set * m_set
Definition: id_set.hpp:123
bool operator!=(const IdSetDenseIterator &rhs) const noexcept
Definition: id_set.hpp:177
bool operator==(const IdSetDenseIterator &rhs) const noexcept
Definition: id_set.hpp:173
IdSetDenseIterator & operator++() noexcept
Definition: id_set.hpp:159
T value_type
Definition: id_set.hpp:148
std::forward_iterator_tag iterator_category
Definition: id_set.hpp:147
value_type * pointer
Definition: id_set.hpp:149
Definition: id_set.hpp:196
const_iterator begin() const
Definition: id_set.hpp:361
IdSetDense(const IdSetDense &other)
Definition: id_set.hpp:251
void set(T id) final
Definition: id_set.hpp:301
std::vector< std::unique_ptr< unsigned char[]> > m_data
Definition: id_set.hpp:205
IdSetDense(IdSetDense &&) noexcept=default
static std::size_t chunk_id(T id) noexcept
Definition: id_set.hpp:208
T last() const noexcept
Definition: id_set.hpp:220
friend void swap(IdSetDense &first, IdSetDense &second) noexcept
Definition: id_set.hpp:243
T size() const noexcept
Definition: id_set.hpp:345
void clear() final
Definition: id_set.hpp:352
const_iterator end() const
Definition: id_set.hpp:365
static std::size_t offset(T id) noexcept
Definition: id_set.hpp:212
static unsigned int bitmask(T id) noexcept
Definition: id_set.hpp:216
void unset(T id)
Definition: id_set.hpp:310
unsigned char & get_element(T id)
Definition: id_set.hpp:224
IdSetDense & operator=(IdSetDense other)
Definition: id_set.hpp:265
std::size_t used_memory() const noexcept final
Definition: id_set.hpp:357
bool get(T id) const noexcept final
Definition: id_set.hpp:324
bool empty() const noexcept final
Definition: id_set.hpp:338
Definition: id_set.hpp:376
std::size_t used_memory() const noexcept final
Definition: id_set.hpp:461
typename std::vector< T >::const_iterator const_iterator
Iterator type. There is no non-const iterator.
Definition: id_set.hpp:482
void merge_sorted(const IdSetSmall< T > &other)
Definition: id_set.hpp:471
const_iterator cend() const noexcept
Definition: id_set.hpp:496
const_iterator begin() const noexcept
Definition: id_set.hpp:484
IdSetSmall(const IdSetSmall &)=default
const_iterator end() const noexcept
Definition: id_set.hpp:488
const_iterator cbegin() const noexcept
Definition: id_set.hpp:492
std::vector< T > m_data
Definition: id_set.hpp:378
bool get_binary_search(T id) const noexcept
Definition: id_set.hpp:421
bool empty() const noexcept final
Definition: id_set.hpp:428
bool get(T id) const noexcept final
Definition: id_set.hpp:406
IdSetSmall & operator=(const IdSetSmall &)=default
void clear() final
Definition: id_set.hpp:435
void sort_unique()
Definition: id_set.hpp:444
IdSetSmall(IdSetSmall &&) noexcept=default
std::size_t size() const noexcept
Definition: id_set.hpp:457
Definition: id_set.hpp:58
virtual bool empty() const =0
virtual std::size_t used_memory() const noexcept=0
IdSet(const IdSet &)=default
virtual void clear()=0
virtual bool get(T id) const noexcept=0
virtual void set(T id)=0
IdSet & operator=(const IdSet &)=default
IdSet(IdSet &&) noexcept=default
Definition: attr.hpp:342
Namespace for everything in the Osmium library.
Definition: assembler.hpp:53
Definition: location.hpp:555