1// Copyright (c) 2005, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31//
32//
33// A sparsetable is a random container that implements a sparse array,
34// that is, an array that uses very little memory to store unassigned
35// indices (in this case, between 1-2 bits per unassigned index). For
36// instance, if you allocate an array of size 5 and assign a[2] = <big
37// struct>, then a[2] will take up a lot of memory but a[0], a[1],
38// a[3], and a[4] will not. Array elements that have a value are
39// called "assigned". Array elements that have no value yet, or have
40// had their value cleared using erase() or clear(), are called
41// "unassigned".
42//
43// Unassigned values seem to have the default value of T (see below).
44// Nevertheless, there is a difference between an unassigned index and
45// one explicitly assigned the value of T(). The latter is considered
46// assigned.
47//
48// Access to an array element is constant time, as is insertion and
49// deletion. Insertion and deletion may be fairly slow, however:
50// because of this container's memory economy, each insert and delete
51// causes a memory reallocation.
52//
53// NOTE: You should not test(), get(), or set() any index that is
54// greater than sparsetable.size(). If you need to do that, call
55// resize() first.
56//
57// --- Template parameters
58// PARAMETER DESCRIPTION DEFAULT
59// T The value of the array: the type of --
60// object that is stored in the array.
61//
62// GROUP_SIZE How large each "group" in the table 48
63// is (see below). Larger values use
64// a little less memory but cause most
65// operations to be a little slower
66//
67// Alloc: Allocator to use to allocate memory. libc_allocator_with_realloc
68//
69// --- Model of
70// Random Access Container
71//
72// --- Type requirements
73// T must be Copy Constructible. It need not be Assignable.
74//
75// --- Public base classes
76// None.
77//
78// --- Members
79// Type members
80//
81// MEMBER WHERE DEFINED DESCRIPTION
82// value_type container The type of object, T, stored in the array
83// allocator_type container Allocator to use
84// pointer container Pointer to p
85// const_pointer container Const pointer to p
86// reference container Reference to t
87// const_reference container Const reference to t
88// size_type container An unsigned integral type
89// difference_type container A signed integral type
90// iterator [*] container Iterator used to iterate over a sparsetable
91// const_iterator container Const iterator used to iterate over a table
92// reverse_iterator reversible Iterator used to iterate backwards over
93// container a sparsetable
94// const_reverse_iterator reversible container Guess
95// nonempty_iterator [+] sparsetable Iterates over assigned
96// array elements only
97// const_nonempty_iterator sparsetable Iterates over assigned
98// array elements only
99// reverse_nonempty_iterator sparsetable Iterates backwards over
100// assigned array elements only
101// const_reverse_nonempty_iterator sparsetable Iterates backwards over
102// assigned array elements only
103//
104// [*] All iterators are const in a sparsetable (though nonempty_iterators
105// may not be). Use get() and set() to assign values, not iterators.
106//
107// [+] iterators are random-access iterators. nonempty_iterators are
108// bidirectional iterators.
109
110// Iterator members
111// MEMBER WHERE DEFINED DESCRIPTION
112//
113// iterator begin() container An iterator to the beginning of the table
114// iterator end() container An iterator to the end of the table
115// const_iterator container A const_iterator pointing to the
116// begin() const beginning of a sparsetable
117// const_iterator container A const_iterator pointing to the
118// end() const end of a sparsetable
119//
120// reverse_iterator reversable Points to beginning of a reversed
121// rbegin() container sparsetable
122// reverse_iterator reversable Points to end of a reversed table
123// rend() container
124// const_reverse_iterator reversable Points to beginning of a
125// rbegin() const container reversed sparsetable
126// const_reverse_iterator reversable Points to end of a reversed table
127// rend() const container
128//
129// nonempty_iterator sparsetable Points to first assigned element
130// begin() of a sparsetable
131// nonempty_iterator sparsetable Points past last assigned element
132// end() of a sparsetable
133// const_nonempty_iterator sparsetable Points to first assigned element
134// begin() const of a sparsetable
135// const_nonempty_iterator sparsetable Points past last assigned element
136// end() const of a sparsetable
137//
138// reverse_nonempty_iterator sparsetable Points to first assigned element
139// begin() of a reversed sparsetable
140// reverse_nonempty_iterator sparsetable Points past last assigned element
141// end() of a reversed sparsetable
142// const_reverse_nonempty_iterator sparsetable Points to first assigned
143// begin() const elt of a reversed sparsetable
144// const_reverse_nonempty_iterator sparsetable Points past last assigned
145// end() const elt of a reversed sparsetable
146//
147//
148// Other members
149// MEMBER WHERE DEFINED DESCRIPTION
150// sparsetable() sparsetable A table of size 0; must resize()
151// before using.
152// sparsetable(size_type size) sparsetable A table of size size. All
153// indices are unassigned.
154// sparsetable(
155// const sparsetable &tbl) sparsetable Copy constructor
156// ~sparsetable() sparsetable The destructor
157// sparsetable &operator=( sparsetable The assignment operator
158// const sparsetable &tbl)
159//
160// void resize(size_type size) sparsetable Grow or shrink a table to
161// have size indices [*]
162//
163// void swap(sparsetable &x) sparsetable Swap two sparsetables
164// void swap(sparsetable &x, sparsetable Swap two sparsetables
165// sparsetable &y) (global, not member, function)
166//
167// size_type size() const sparsetable Number of "buckets" in the table
168// size_type max_size() const sparsetable Max allowed size of a sparsetable
169// bool empty() const sparsetable true if size() == 0
170// size_type num_nonempty() const sparsetable Number of assigned "buckets"
171//
172// const_reference get( sparsetable Value at index i, or default
173// size_type i) const value if i is unassigned
174// const_reference operator[]( sparsetable Identical to get(i) [+]
175// difference_type i) const
176// reference set(size_type i, sparsetable Set element at index i to
177// const_reference val) be a copy of val
178// bool test(size_type i) sparsetable True if element at index i
179// const has been assigned to
180// bool test(iterator pos) sparsetable True if element pointed to
181// const by pos has been assigned to
182// void erase(iterator pos) sparsetable Set element pointed to by
183// pos to be unassigned [!]
184// void erase(size_type i) sparsetable Set element i to be unassigned
185// void erase(iterator start, sparsetable Erases all elements between
186// iterator end) start and end
187// void clear() sparsetable Erases all elements in the table
188//
189// I/O versions exist for both FILE* and for File* (Google2-style files):
190// bool write_metadata(FILE *fp) sparsetable Writes a sparsetable to the
191// bool write_metadata(File *fp) given file. true if write
192// completes successfully
193// bool read_metadata(FILE *fp) sparsetable Replaces sparsetable with
194// bool read_metadata(File *fp) version read from fp. true
195// if read completes sucessfully
196// bool write_nopointer_data(FILE *fp) Read/write the data stored in
197// bool read_nopointer_data(FILE*fp) the table, if it's simple
198//
199// bool operator==( forward Tests two tables for equality.
200// const sparsetable &t1, container This is a global function,
201// const sparsetable &t2) not a member function.
202// bool operator<( forward Lexicographical comparison.
203// const sparsetable &t1, container This is a global function,
204// const sparsetable &t2) not a member function.
205//
206// [*] If you shrink a sparsetable using resize(), assigned elements
207// past the end of the table are removed using erase(). If you grow
208// a sparsetable, new unassigned indices are created.
209//
210// [+] Note that operator[] returns a const reference. You must use
211// set() to change the value of a table element.
212//
213// [!] Unassignment also calls the destructor.
214//
215// Iterators are invalidated whenever an item is inserted or
216// deleted (ie set() or erase() is used) or when the size of
217// the table changes (ie resize() or clear() is used).
218//
219// See doc/sparsetable.html for more information about how to use this class.
220
221// Note: this uses STL style for naming, rather than Google naming.
222// That's because this is an STL-y container
223
224#pragma once
225
226#include <cstdlib> // for malloc/free
227#include <cstdio> // to read/write tables
228#include <cstring> // for memcpy
229#include <cstdint> // the normal place uint16_t is defined
230#include <cassert> // for bounds checking
231#include <iterator> // to define reverse_iterator for me
232#include <algorithm> // equal, lexicographical_compare, swap,...
233#include <memory> // uninitialized_copy, uninitialized_fill
234#include <vector> // a sparsetable is a vector of groups
235#include <type_traits>
236#include <sparsehash/internal/hashtable-common.h>
237#include <sparsehash/internal/libc_allocator_with_realloc.h>
238#include <sparsehash/traits>
239
240namespace google {
241// The smaller this is, the faster lookup is (because the group bitmap is
242// smaller) and the faster insert is, because there's less to move.
243// On the other hand, there are more groups. Since group::size_type is
244// a short, this number should be of the form 32*x + 16 to avoid waste.
245static const uint16_t DEFAULT_SPARSEGROUP_SIZE = 48; // fits in 1.5 words
246
247// Our iterator as simple as iterators can be: basically it's just
248// the index into our table. Dereference, the only complicated
249// thing, we punt to the table class. This just goes to show how
250// much machinery STL requires to do even the most trivial tasks.
251//
252// A NOTE ON ASSIGNING:
253// A sparse table does not actually allocate memory for entries
254// that are not filled. Because of this, it becomes complicated
255// to have a non-const iterator: we don't know, if the iterator points
256// to a not-filled bucket, whether you plan to fill it with something
257// or whether you plan to read its value (in which case you'll get
258// the default bucket value). Therefore, while we can define const
259// operations in a pretty 'normal' way, for non-const operations, we
260// define something that returns a helper object with operator= and
261// operator& that allocate a bucket lazily. We use this for table[]
262// and also for regular table iterators.
263
264template <class tabletype>
265class table_element_adaptor {
266 public:
267 typedef typename tabletype::value_type value_type;
268 typedef typename tabletype::size_type size_type;
269 typedef typename tabletype::reference reference;
270 typedef typename tabletype::pointer pointer;
271
272 table_element_adaptor(tabletype* tbl, size_type p) : table(tbl), pos(p) {}
273 table_element_adaptor& operator=(const value_type& val) {
274 table->set(pos, val);
275 return *this;
276 }
277 operator value_type() { return table->get(pos); } // we look like a value
278 pointer operator&() { return &table->mutating_get(pos); }
279
280 private:
281 tabletype* table;
282 size_type pos;
283};
284
285// Our iterator as simple as iterators can be: basically it's just
286// the index into our table. Dereference, the only complicated
287// thing, we punt to the table class. This just goes to show how
288// much machinery STL requires to do even the most trivial tasks.
289//
290// By templatizing over tabletype, we have one iterator type which
291// we can use for both sparsetables and sparsebins. In fact it
292// works on any class that allows size() and operator[] (eg vector),
293// as long as it does the standard STL typedefs too (eg value_type).
294
295template <class tabletype>
296class table_iterator {
297 public:
298 typedef table_iterator iterator;
299
300 typedef std::random_access_iterator_tag iterator_category;
301 typedef typename tabletype::value_type value_type;
302 typedef typename tabletype::difference_type difference_type;
303 typedef typename tabletype::size_type size_type;
304 typedef table_element_adaptor<tabletype> reference;
305 typedef table_element_adaptor<tabletype>* pointer;
306 typedef typename tabletype::const_reference const_reference; // we're const-only
307
308 // The "real" constructor
309 table_iterator(tabletype* tbl, size_type p) : table(tbl), pos(p) {}
310 // The default constructor, used when I define vars of type table::iterator
311 table_iterator() : table(NULL), pos(0) {}
312 // The copy constructor, for when I say table::iterator foo = tbl.begin()
313 // The default destructor is fine; we don't define one
314 // The default operator= is fine; we don't define one
315
316 // The main thing our iterator does is dereference. If the table entry
317 // we point to is empty, we return the default value type.
318 // This is the big different function from the const iterator.
319 reference operator*() { return table_element_adaptor<tabletype>(table, pos); }
320 const_reference operator*() const { return table_element_adaptor<tabletype>(table, pos); }
321 pointer operator->() { return &(operator*()); }
322
323 // Helper function to assert things are ok; eg pos is still in range
324 void check() const {
325 assert(table);
326 assert(pos <= table->size());
327 }
328
329 // Arithmetic: we just do arithmetic on pos. We don't even need to
330 // do bounds checking, since STL doesn't consider that its job. :-)
331 iterator& operator+=(size_type t) {
332 pos += t;
333 check();
334 return *this;
335 }
336 iterator& operator-=(size_type t) {
337 pos -= t;
338 check();
339 return *this;
340 }
341 iterator& operator++() {
342 ++pos;
343 check();
344 return *this;
345 }
346 iterator& operator--() {
347 --pos;
348 check();
349 return *this;
350 }
351 iterator operator++(int) {
352 iterator tmp(*this); // for x++
353 ++pos;
354 check();
355 return tmp;
356 }
357 iterator operator--(int) {
358 iterator tmp(*this); // for x--
359 --pos;
360 check();
361 return tmp;
362 }
363 iterator operator+(difference_type i) const {
364 iterator tmp(*this);
365 tmp += i;
366 return tmp;
367 }
368 iterator operator-(difference_type i) const {
369 iterator tmp(*this);
370 tmp -= i;
371 return tmp;
372 }
373 difference_type operator-(iterator it) const { // for "x = it2 - it"
374 assert(table == it.table);
375 return pos - it.pos;
376 }
377 reference operator[](difference_type n) const {
378 return *(*this + n); // simple though not totally efficient
379 }
380
381 // Comparisons.
382 bool operator==(const iterator& it) const {
383 return table == it.table && pos == it.pos;
384 }
385 bool operator<(const iterator& it) const {
386 assert(table == it.table); // life is bad bad bad otherwise
387 return pos < it.pos;
388 }
389 bool operator!=(const iterator& it) const { return !(*this == it); }
390 bool operator<=(const iterator& it) const { return !(it < *this); }
391 bool operator>(const iterator& it) const { return it < *this; }
392 bool operator>=(const iterator& it) const { return !(*this < it); }
393
394 // Here's the info we actually need to be an iterator
395 tabletype* table; // so we can dereference and bounds-check
396 size_type pos; // index into the table
397};
398
399// support for "3 + iterator" has to be defined outside the class, alas
400template <class T>
401table_iterator<T> operator+(typename table_iterator<T>::difference_type i,
402 table_iterator<T> it) {
403 return it + i; // so people can say it2 = 3 + it
404}
405
406template <class tabletype>
407class const_table_iterator {
408 public:
409 typedef table_iterator<tabletype> iterator;
410 typedef const_table_iterator const_iterator;
411
412 typedef std::random_access_iterator_tag iterator_category;
413 typedef typename tabletype::value_type value_type;
414 typedef typename tabletype::difference_type difference_type;
415 typedef typename tabletype::size_type size_type;
416 typedef typename tabletype::const_reference reference; // we're const-only
417 typedef typename tabletype::const_pointer pointer;
418
419 // The "real" constructor
420 const_table_iterator(const tabletype* tbl, size_type p)
421 : table(tbl), pos(p) {}
422 // The default constructor, used when I define vars of type table::iterator
423 const_table_iterator() : table(NULL), pos(0) {}
424 // The copy constructor, for when I say table::iterator foo = tbl.begin()
425 // Also converts normal iterators to const iterators
426 const_table_iterator(const iterator& from)
427 : table(from.table), pos(from.pos) {}
428 // The default destructor is fine; we don't define one
429 // The default operator= is fine; we don't define one
430
431 // The main thing our iterator does is dereference. If the table entry
432 // we point to is empty, we return the default value type.
433 reference operator*() const { return (*table)[pos]; }
434 pointer operator->() const { return &(operator*()); }
435
436 // Helper function to assert things are ok; eg pos is still in range
437 void check() const {
438 assert(table);
439 assert(pos <= table->size());
440 }
441
442 // Arithmetic: we just do arithmetic on pos. We don't even need to
443 // do bounds checking, since STL doesn't consider that its job. :-)
444 const_iterator& operator+=(size_type t) {
445 pos += t;
446 check();
447 return *this;
448 }
449 const_iterator& operator-=(size_type t) {
450 pos -= t;
451 check();
452 return *this;
453 }
454 const_iterator& operator++() {
455 ++pos;
456 check();
457 return *this;
458 }
459 const_iterator& operator--() {
460 --pos;
461 check();
462 return *this;
463 }
464 const_iterator operator++(int) {
465 const_iterator tmp(*this); // for x++
466 ++pos;
467 check();
468 return tmp;
469 }
470 const_iterator operator--(int) {
471 const_iterator tmp(*this); // for x--
472 --pos;
473 check();
474 return tmp;
475 }
476 const_iterator operator+(difference_type i) const {
477 const_iterator tmp(*this);
478 tmp += i;
479 return tmp;
480 }
481 const_iterator operator-(difference_type i) const {
482 const_iterator tmp(*this);
483 tmp -= i;
484 return tmp;
485 }
486 difference_type operator-(const_iterator it) const { // for "x = it2 - it"
487 assert(table == it.table);
488 return pos - it.pos;
489 }
490 reference operator[](difference_type n) const {
491 return *(*this + n); // simple though not totally efficient
492 }
493
494 // Comparisons.
495 bool operator==(const const_iterator& it) const {
496 return table == it.table && pos == it.pos;
497 }
498 bool operator<(const const_iterator& it) const {
499 assert(table == it.table); // life is bad bad bad otherwise
500 return pos < it.pos;
501 }
502 bool operator!=(const const_iterator& it) const { return !(*this == it); }
503 bool operator<=(const const_iterator& it) const { return !(it < *this); }
504 bool operator>(const const_iterator& it) const { return it < *this; }
505 bool operator>=(const const_iterator& it) const { return !(*this < it); }
506
507 // Here's the info we actually need to be an iterator
508 const tabletype* table; // so we can dereference and bounds-check
509 size_type pos; // index into the table
510};
511
512// support for "3 + iterator" has to be defined outside the class, alas
513template <class T>
514const_table_iterator<T> operator+(
515 typename const_table_iterator<T>::difference_type i,
516 const_table_iterator<T> it) {
517 return it + i; // so people can say it2 = 3 + it
518}
519
520// ---------------------------------------------------------------------------
521
522/*
523// This is a 2-D iterator. You specify a begin and end over a list
524// of *containers*. We iterate over each container by iterating over
525// it. It's actually simple:
526// VECTOR.begin() VECTOR[0].begin() --------> VECTOR[0].end() ---,
527// | ________________________________________________/
528// | \_> VECTOR[1].begin() --------> VECTOR[1].end() -,
529// | ___________________________________________________/
530// v \_> ......
531// VECTOR.end()
532//
533// It's impossible to do random access on one of these things in constant
534// time, so it's just a bidirectional iterator.
535//
536// Unfortunately, because we need to use this for a non-empty iterator,
537// we use nonempty_begin() and nonempty_end() instead of begin() and end()
538// (though only going across, not down).
539*/
540
541#define TWOD_BEGIN_ nonempty_begin
542#define TWOD_END_ nonempty_end
543#define TWOD_ITER_ nonempty_iterator
544#define TWOD_CONST_ITER_ const_nonempty_iterator
545
546template <class containertype>
547class two_d_iterator {
548 public:
549 typedef two_d_iterator iterator;
550
551 typedef std::bidirectional_iterator_tag iterator_category;
552 // apparently some versions of VC++ have trouble with two ::'s in a typename
553 typedef typename containertype::value_type _tmp_vt;
554 typedef typename _tmp_vt::value_type value_type;
555 typedef typename _tmp_vt::difference_type difference_type;
556 typedef typename _tmp_vt::reference reference;
557 typedef typename _tmp_vt::pointer pointer;
558
559 // The "real" constructor. begin and end specify how many rows we have
560 // (in the diagram above); we always iterate over each row completely.
561 two_d_iterator(typename containertype::iterator begin,
562 typename containertype::iterator end,
563 typename containertype::iterator curr)
564 : row_begin(begin), row_end(end), row_current(curr), col_current() {
565 if (row_current != row_end) {
566 col_current = row_current->TWOD_BEGIN_();
567 advance_past_end(); // in case cur->begin() == cur->end()
568 }
569 }
570 // If you want to start at an arbitrary place, you can, I guess
571 two_d_iterator(typename containertype::iterator begin,
572 typename containertype::iterator end,
573 typename containertype::iterator curr,
574 typename containertype::value_type::TWOD_ITER_ col)
575 : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
576 advance_past_end(); // in case cur->begin() == cur->end()
577 }
578 // The default constructor, used when I define vars of type table::iterator
579 two_d_iterator() : row_begin(), row_end(), row_current(), col_current() {}
580 // The default destructor is fine; we don't define one
581 // The default operator= is fine; we don't define one
582
583 // Happy dereferencer
584 reference operator*() const { return *col_current; }
585 pointer operator->() const { return &(operator*()); }
586
587 // Arithmetic: we just do arithmetic on pos. We don't even need to
588 // do bounds checking, since STL doesn't consider that its job. :-)
589 // NOTE: this is not amortized constant time! What do we do about it?
590 void advance_past_end() { // used when col_current points to end()
591 while (col_current == row_current->TWOD_END_()) { // end of current row
592 ++row_current; // go to beginning of next
593 if (row_current != row_end) // col is irrelevant at end
594 col_current = row_current->TWOD_BEGIN_();
595 else
596 break; // don't go past row_end
597 }
598 }
599
600 iterator& operator++() {
601 assert(row_current != row_end); // how to ++ from there?
602 ++col_current;
603 advance_past_end(); // in case col_current is at end()
604 return *this;
605 }
606 iterator& operator--() {
607 while (row_current == row_end ||
608 col_current == row_current->TWOD_BEGIN_()) {
609 assert(row_current != row_begin);
610 --row_current;
611 col_current = row_current->TWOD_END_(); // this is 1 too far
612 }
613 --col_current;
614 return *this;
615 }
616 iterator operator++(int) {
617 iterator tmp(*this);
618 ++*this;
619 return tmp;
620 }
621 iterator operator--(int) {
622 iterator tmp(*this);
623 --*this;
624 return tmp;
625 }
626
627 // Comparisons.
628 bool operator==(const iterator& it) const {
629 return (row_begin == it.row_begin && row_end == it.row_end &&
630 row_current == it.row_current &&
631 (row_current == row_end || col_current == it.col_current));
632 }
633 bool operator!=(const iterator& it) const { return !(*this == it); }
634
635 // Here's the info we actually need to be an iterator
636 // These need to be public so we convert from iterator to const_iterator
637 typename containertype::iterator row_begin, row_end, row_current;
638 typename containertype::value_type::TWOD_ITER_ col_current;
639};
640
641// The same thing again, but this time const. :-(
642template <class containertype>
643class const_two_d_iterator {
644 public:
645 typedef const_two_d_iterator iterator;
646
647 typedef std::bidirectional_iterator_tag iterator_category;
648 // apparently some versions of VC++ have trouble with two ::'s in a typename
649 typedef typename containertype::value_type _tmp_vt;
650 typedef typename _tmp_vt::value_type value_type;
651 typedef typename _tmp_vt::difference_type difference_type;
652 typedef typename _tmp_vt::const_reference reference;
653 typedef typename _tmp_vt::const_pointer pointer;
654
655 const_two_d_iterator(typename containertype::const_iterator begin,
656 typename containertype::const_iterator end,
657 typename containertype::const_iterator curr)
658 : row_begin(begin), row_end(end), row_current(curr), col_current() {
659 if (curr != end) {
660 col_current = curr->TWOD_BEGIN_();
661 advance_past_end(); // in case cur->begin() == cur->end()
662 }
663 }
664 const_two_d_iterator(typename containertype::const_iterator begin,
665 typename containertype::const_iterator end,
666 typename containertype::const_iterator curr,
667 typename containertype::value_type::TWOD_CONST_ITER_ col)
668 : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
669 advance_past_end(); // in case cur->begin() == cur->end()
670 }
671 const_two_d_iterator()
672 : row_begin(), row_end(), row_current(), col_current() {}
673 // Need this explicitly so we can convert normal iterators to const
674 // iterators
675 const_two_d_iterator(const two_d_iterator<containertype>& it)
676 : row_begin(it.row_begin),
677 row_end(it.row_end),
678 row_current(it.row_current),
679 col_current(it.col_current) {}
680
681 typename containertype::const_iterator row_begin, row_end, row_current;
682 typename containertype::value_type::TWOD_CONST_ITER_ col_current;
683
684 // EVERYTHING FROM HERE DOWN IS THE SAME AS THE NON-CONST ITERATOR
685 reference operator*() const { return *col_current; }
686 pointer operator->() const { return &(operator*()); }
687
688 void advance_past_end() { // used when col_current points to end()
689 while (col_current == row_current->TWOD_END_()) { // end of current row
690 ++row_current; // go to beginning of next
691 if (row_current != row_end) // col is irrelevant at end
692 col_current = row_current->TWOD_BEGIN_();
693 else
694 break; // don't go past row_end
695 }
696 }
697 iterator& operator++() {
698 assert(row_current != row_end); // how to ++ from there?
699 ++col_current;
700 advance_past_end(); // in case col_current is at end()
701 return *this;
702 }
703 iterator& operator--() {
704 while (row_current == row_end ||
705 col_current == row_current->TWOD_BEGIN_()) {
706 assert(row_current != row_begin);
707 --row_current;
708 col_current = row_current->TWOD_END_(); // this is 1 too far
709 }
710 --col_current;
711 return *this;
712 }
713 iterator operator++(int) {
714 iterator tmp(*this);
715 ++*this;
716 return tmp;
717 }
718 iterator operator--(int) {
719 iterator tmp(*this);
720 --*this;
721 return tmp;
722 }
723
724 bool operator==(const iterator& it) const {
725 return (row_begin == it.row_begin && row_end == it.row_end &&
726 row_current == it.row_current &&
727 (row_current == row_end || col_current == it.col_current));
728 }
729 bool operator!=(const iterator& it) const { return !(*this == it); }
730};
731
732// We provide yet another version, to be as frugal with memory as
733// possible. This one frees each block of memory as it finishes
734// iterating over it. By the end, the entire table is freed.
735// For understandable reasons, you can only iterate over it once,
736// which is why it's an input iterator
737template <class containertype>
738class destructive_two_d_iterator {
739 public:
740 typedef destructive_two_d_iterator iterator;
741
742 typedef std::input_iterator_tag iterator_category;
743 // apparently some versions of VC++ have trouble with two ::'s in a typename
744 typedef typename containertype::value_type _tmp_vt;
745 typedef typename _tmp_vt::value_type value_type;
746 typedef typename _tmp_vt::difference_type difference_type;
747 typedef typename _tmp_vt::reference reference;
748 typedef typename _tmp_vt::pointer pointer;
749
750 destructive_two_d_iterator(typename containertype::iterator begin,
751 typename containertype::iterator end,
752 typename containertype::iterator curr)
753 : row_begin(begin), row_end(end), row_current(curr), col_current() {
754 if (curr != end) {
755 col_current = curr->TWOD_BEGIN_();
756 advance_past_end(); // in case cur->begin() == cur->end()
757 }
758 }
759 destructive_two_d_iterator(typename containertype::iterator begin,
760 typename containertype::iterator end,
761 typename containertype::iterator curr,
762 typename containertype::value_type::TWOD_ITER_ col)
763 : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
764 advance_past_end(); // in case cur->begin() == cur->end()
765 }
766 destructive_two_d_iterator()
767 : row_begin(), row_end(), row_current(), col_current() {}
768
769 typename containertype::iterator row_begin, row_end, row_current;
770 typename containertype::value_type::TWOD_ITER_ col_current;
771
772 // This is the part that destroys
773 void advance_past_end() { // used when col_current points to end()
774 while (col_current == row_current->TWOD_END_()) { // end of current row
775 row_current->clear(); // the destructive part
776 // It would be nice if we could decrement sparsetable->num_buckets
777 // here
778 ++row_current; // go to beginning of next
779 if (row_current != row_end) // col is irrelevant at end
780 col_current = row_current->TWOD_BEGIN_();
781 else
782 break; // don't go past row_end
783 }
784 }
785
786 // EVERYTHING FROM HERE DOWN IS THE SAME AS THE REGULAR ITERATOR
787 reference operator*() const { return *col_current; }
788 pointer operator->() const { return &(operator*()); }
789
790 iterator& operator++() {
791 assert(row_current != row_end); // how to ++ from there?
792 ++col_current;
793 advance_past_end(); // in case col_current is at end()
794 return *this;
795 }
796 iterator operator++(int) {
797 iterator tmp(*this);
798 ++*this;
799 return tmp;
800 }
801
802 bool operator==(const iterator& it) const {
803 return (row_begin == it.row_begin && row_end == it.row_end &&
804 row_current == it.row_current &&
805 (row_current == row_end || col_current == it.col_current));
806 }
807 bool operator!=(const iterator& it) const { return !(*this == it); }
808};
809
810#undef TWOD_BEGIN_
811#undef TWOD_END_
812#undef TWOD_ITER_
813#undef TWOD_CONST_ITER_
814
815// SPARSE-TABLE
816// ------------
817// The idea is that a table with (logically) t buckets is divided
818// into t/M *groups* of M buckets each. (M is a constant set in
819// GROUP_SIZE for efficiency.) Each group is stored sparsely.
820// Thus, inserting into the table causes some array to grow, which is
821// slow but still constant time. Lookup involves doing a
822// logical-position-to-sparse-position lookup, which is also slow but
823// constant time. The larger M is, the slower these operations are
824// but the less overhead (slightly).
825//
826// To store the sparse array, we store a bitmap B, where B[i] = 1 iff
827// bucket i is non-empty. Then to look up bucket i we really look up
828// array[# of 1s before i in B]. This is constant time for fixed M.
829//
830// Terminology: the position of an item in the overall table (from
831// 1 .. t) is called its "location." The logical position in a group
832// (from 1 .. M ) is called its "position." The actual location in
833// the array (from 1 .. # of non-empty buckets in the group) is
834// called its "offset."
835
836template <class T, uint16_t GROUP_SIZE, class Alloc>
837class sparsegroup {
838 public:
839 typedef T value_type;
840 typedef Alloc allocator_type;
841
842 private:
843 using value_alloc_type =
844 typename std::allocator_traits<Alloc>::template rebind_alloc<T>;
845 typedef std::integral_constant<
846 bool, (is_relocatable<value_type>::value &&
847 std::is_same<allocator_type,
848 libc_allocator_with_realloc<value_type>>::value)>
849 realloc_and_memmove_ok; // we pretend mv(x,y) == "x.~T();
850 // new(x) T(y)"
851 public:
852 // Basic types
853 typedef typename value_alloc_type::reference reference;
854 typedef typename value_alloc_type::const_reference const_reference;
855 typedef typename value_alloc_type::pointer pointer;
856 typedef typename value_alloc_type::const_pointer const_pointer;
857
858 typedef table_iterator<sparsegroup<T, GROUP_SIZE, Alloc>> iterator;
859 typedef const_table_iterator<sparsegroup<T, GROUP_SIZE, Alloc>>
860 const_iterator;
861 typedef table_element_adaptor<sparsegroup<T, GROUP_SIZE, Alloc>>
862 element_adaptor;
863 typedef uint16_t size_type; // max # of buckets
864 typedef int16_t difference_type;
865 typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
866 typedef std::reverse_iterator<iterator> reverse_iterator; // from iterator.h
867
868 // These are our special iterators, that go over non-empty buckets in a
869 // group. These aren't const-only because you can change non-empty bcks.
870 typedef pointer nonempty_iterator;
871 typedef const_pointer const_nonempty_iterator;
872 typedef std::reverse_iterator<nonempty_iterator> reverse_nonempty_iterator;
873 typedef std::reverse_iterator<const_nonempty_iterator>
874 const_reverse_nonempty_iterator;
875
876 // Iterator functions
877 iterator begin() { return iterator(this, 0); }
878 const_iterator begin() const { return const_iterator(this, 0); }
879 iterator end() { return iterator(this, size()); }
880 const_iterator end() const { return const_iterator(this, size()); }
881 reverse_iterator rbegin() { return reverse_iterator(end()); }
882 const_reverse_iterator rbegin() const {
883 return const_reverse_iterator(end());
884 }
885 reverse_iterator rend() { return reverse_iterator(begin()); }
886 const_reverse_iterator rend() const {
887 return const_reverse_iterator(begin());
888 }
889
890 // We'll have versions for our special non-empty iterator too
891 nonempty_iterator nonempty_begin() { return group; }
892 const_nonempty_iterator nonempty_begin() const { return group; }
893 nonempty_iterator nonempty_end() { return group + settings.num_buckets; }
894 const_nonempty_iterator nonempty_end() const {
895 return group + settings.num_buckets;
896 }
897 reverse_nonempty_iterator nonempty_rbegin() {
898 return reverse_nonempty_iterator(nonempty_end());
899 }
900 const_reverse_nonempty_iterator nonempty_rbegin() const {
901 return const_reverse_nonempty_iterator(nonempty_end());
902 }
903 reverse_nonempty_iterator nonempty_rend() {
904 return reverse_nonempty_iterator(nonempty_begin());
905 }
906 const_reverse_nonempty_iterator nonempty_rend() const {
907 return const_reverse_nonempty_iterator(nonempty_begin());
908 }
909
910 // This gives us the "default" value to return for an empty bucket.
911 // We just use the default constructor on T, the template type
912 const_reference default_value() const {
913 static value_type defaultval = value_type();
914 return defaultval;
915 }
916
917 private:
918 // We need to do all this bit manipulation, of course. ick
919 static size_type charbit(size_type i) { return i >> 3; }
920 static size_type modbit(size_type i) { return 1 << (i & 7); }
921 int bmtest(size_type i) const { return bitmap[charbit(i)] & modbit(i); }
922 void bmset(size_type i) { bitmap[charbit(i)] |= modbit(i); }
923 void bmclear(size_type i) { bitmap[charbit(i)] &= ~modbit(i); }
924
925 pointer allocate_group(size_type n) {
926 pointer retval = settings.allocate(n);
927 if (retval == NULL) {
928 // We really should use PRIuS here, but I don't want to have to add
929 // a whole new configure option, with concomitant macro namespace
930 // pollution, just to print this (unlikely) error message. So I
931 // cast.
932 fprintf(stderr, "sparsehash FATAL ERROR: failed to allocate %lu groups\n",
933 static_cast<unsigned long>(n));
934 exit(1);
935 }
936 return retval;
937 }
938
939 void free_group() {
940 if (!group) return;
941 pointer end_it = group + settings.num_buckets;
942 for (pointer p = group; p != end_it; ++p) p->~value_type();
943 settings.deallocate(group, settings.num_buckets);
944 group = NULL;
945 }
946
947 static size_type bits_in_char(unsigned char c) {
948 // We could make these ints. The tradeoff is size (eg does it overwhelm
949 // the cache?) vs efficiency in referencing sub-word-sized array
950 // elements.
951 static const char bits_in[256] = {
952 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
953 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
954 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
955 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
956 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
957 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
958 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
959 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
960 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
961 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
962 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
963 };
964 return bits_in[c];
965 }
966
967 public: // get_iter() in sparsetable needs it
968 // We need a small function that tells us how many set bits there are
969 // in positions 0..i-1 of the bitmap. It uses a big table.
970 // We make it static so templates don't allocate lots of these tables.
971 // There are lots of ways to do this calculation (called 'popcount').
972 // The 8-bit table lookup is one of the fastest, though this
973 // implementation suffers from not doing any loop unrolling. See, eg,
974 // http://www.dalkescientific.com/writings/diary/archive/2008/07/03/hakmem_and_other_popcounts.html
975 // http://gurmeetsingh.wordpress.com/2008/08/05/fast-bit-counting-routines/
976 static size_type pos_to_offset(const unsigned char* bm, size_type pos) {
977 size_type retval = 0;
978
979 // [Note: condition pos > 8 is an optimization; convince yourself we
980 // give exactly the same result as if we had pos >= 8 here instead.]
981 for (; pos > 8; pos -= 8) // bm[0..pos/8-1]
982 retval += bits_in_char(*bm++); // chars we want *all* bits in
983 return retval + bits_in_char(*bm & ((1 << pos) - 1)); // char including pos
984 }
985
986 size_type pos_to_offset(size_type pos) const { // not static but still const
987 return pos_to_offset(bitmap, pos);
988 }
989
990 // Returns the (logical) position in the bm[] array, i, such that
991 // bm[i] is the offset-th set bit in the array. It is the inverse
992 // of pos_to_offset. get_pos() uses this function to find the index
993 // of an nonempty_iterator in the table. Bit-twiddling from
994 // http://hackersdelight.org/basics.pdf
995 static size_type offset_to_pos(const unsigned char* bm, size_type offset) {
996 size_type retval = 0;
997 // This is sizeof(this->bitmap).
998 const size_type group_size = (GROUP_SIZE - 1) / 8 + 1;
999 for (size_type i = 0; i < group_size; i++) { // forward scan
1000 const size_type pop_count = bits_in_char(*bm);
1001 if (pop_count > offset) {
1002 unsigned char last_bm = *bm;
1003 for (; offset > 0; offset--) {
1004 last_bm &= (last_bm - 1); // remove right-most set bit
1005 }
1006 // Clear all bits to the left of the rightmost bit (the &),
1007 // and then clear the rightmost bit but set all bits to the
1008 // right of it (the -1).
1009 last_bm = (last_bm & -last_bm) - 1;
1010 retval += bits_in_char(last_bm);
1011 return retval;
1012 }
1013 offset -= pop_count;
1014 retval += 8;
1015 bm++;
1016 }
1017 return retval;
1018 }
1019
1020 size_type offset_to_pos(size_type offset) const {
1021 return offset_to_pos(bitmap, offset);
1022 }
1023
1024 public:
1025 // Constructors -- default and copy -- and destructor
1026 explicit sparsegroup(allocator_type& a)
1027 : group(0), settings(alloc_impl<value_alloc_type>(a)) {
1028 memset(bitmap, 0, sizeof(bitmap));
1029 }
1030 sparsegroup(const sparsegroup& x) : group(0), settings(x.settings) {
1031 if (settings.num_buckets) {
1032 group = allocate_group(x.settings.num_buckets);
1033 std::uninitialized_copy(x.group, x.group + x.settings.num_buckets, group);
1034 }
1035 memcpy(bitmap, x.bitmap, sizeof(bitmap));
1036 }
1037 ~sparsegroup() { free_group(); }
1038
1039 // Operator= is just like the copy constructor, I guess
1040 // TODO(austern): Make this exception safe. Handle exceptions in
1041 // value_type's
1042 // copy constructor.
1043 sparsegroup& operator=(const sparsegroup& x) {
1044 if (&x == this) return *this; // x = x
1045 if (x.settings.num_buckets == 0) {
1046 free_group();
1047 } else {
1048 pointer p = allocate_group(x.settings.num_buckets);
1049 std::uninitialized_copy(x.group, x.group + x.settings.num_buckets, p);
1050 free_group();
1051 group = p;
1052 }
1053 memcpy(bitmap, x.bitmap, sizeof(bitmap));
1054 settings.num_buckets = x.settings.num_buckets;
1055 return *this;
1056 }
1057
1058 // Many STL algorithms use swap instead of copy constructors
1059 void swap(sparsegroup& x) {
1060 std::swap(group, x.group); // defined in <algorithm>
1061 for (int i = 0; i < sizeof(bitmap) / sizeof(*bitmap); ++i)
1062 std::swap(bitmap[i], x.bitmap[i]); // swap not defined on arrays
1063 std::swap(settings.num_buckets, x.settings.num_buckets);
1064 // we purposefully don't swap the allocator, which may not be swap-able
1065 }
1066
1067 // It's always nice to be able to clear a table without deallocating it
1068 void clear() {
1069 free_group();
1070 memset(bitmap, 0, sizeof(bitmap));
1071 settings.num_buckets = 0;
1072 }
1073
1074 // Functions that tell you about size. Alas, these aren't so useful
1075 // because our table is always fixed size.
1076 size_type size() const { return GROUP_SIZE; }
1077 size_type max_size() const { return GROUP_SIZE; }
1078 bool empty() const { return false; }
1079 // We also may want to know how many *used* buckets there are
1080 size_type num_nonempty() const { return settings.num_buckets; }
1081
1082 // get()/set() are explicitly const/non-const. You can use [] if
1083 // you want something that can be either (potentially more expensive).
1084 const_reference get(size_type i) const {
1085 if (bmtest(i)) // bucket i is occupied
1086 return group[pos_to_offset(bitmap, i)];
1087 else
1088 return default_value(); // return the default reference
1089 }
1090
1091 // TODO(csilvers): make protected + friend
1092 // This is used by sparse_hashtable to get an element from the table
1093 // when we know it exists.
1094 const_reference unsafe_get(size_type i) const {
1095 assert(bmtest(i));
1096 return group[pos_to_offset(bitmap, i)];
1097 }
1098
1099 // TODO(csilvers): make protected + friend
1100 reference mutating_get(size_type i) { // fills bucket i before getting
1101 if (!bmtest(i)) set(i, default_value());
1102 return group[pos_to_offset(bitmap, i)];
1103 }
1104
1105 // Syntactic sugar. It's easy to return a const reference. To
1106 // return a non-const reference, we need to use the assigner adaptor.
1107 const_reference operator[](size_type i) const { return get(i); }
1108
1109 element_adaptor operator[](size_type i) { return element_adaptor(this, i); }
1110
1111 private:
1112 // Create space at group[offset], assuming value_type has trivial
1113 // copy constructor and destructor, and the allocator_type is
1114 // the default libc_allocator_with_alloc. (Really, we want it to have
1115 // "trivial move", because that's what realloc and memmove both do.
1116 // But there's no way to capture that using type_traits, so we
1117 // pretend that move(x, y) is equivalent to "x.~T(); new(x) T(y);"
1118 // which is pretty much correct, if a bit conservative.)
1119 void set_aux(size_type offset, std::true_type) {
1120 group = settings.realloc_or_die(group, settings.num_buckets + 1);
1121 // This is equivalent to memmove(), but faster on my Intel P4,
1122 // at least with gcc4.1 -O2 / glibc 2.3.6.
1123 for (size_type i = settings.num_buckets; i > offset; --i)
1124 // cast to void* to prevent compiler warnings about writing to an object
1125 // with no trivial copy-assignment
1126 memcpy(static_cast<void*>(group + i), group + i - 1, sizeof(*group));
1127 }
1128
1129 // Create space at group[offset], without special assumptions about
1130 // value_type
1131 // and allocator_type.
1132 void set_aux(size_type offset, std::false_type) {
1133 // This is valid because 0 <= offset <= num_buckets
1134 pointer p = allocate_group(settings.num_buckets + 1);
1135 std::uninitialized_copy(group, group + offset, p);
1136 std::uninitialized_copy(group + offset, group + settings.num_buckets,
1137 p + offset + 1);
1138 free_group();
1139 group = p;
1140 }
1141
1142 public:
1143 // This returns a reference to the inserted item (which is a copy of val).
1144 // TODO(austern): Make this exception safe: handle exceptions from
1145 // value_type's copy constructor.
1146 reference set(size_type i, const_reference val) {
1147 size_type offset =
1148 pos_to_offset(bitmap, i); // where we'll find (or insert)
1149 if (bmtest(i)) {
1150 // Delete the old value, which we're replacing with the new one
1151 group[offset].~value_type();
1152 } else {
1153 set_aux(offset, realloc_and_memmove_ok());
1154 ++settings.num_buckets;
1155 bmset(i);
1156 }
1157 // This does the actual inserting. Since we made the array using
1158 // malloc, we use "placement new" to just call the constructor.
1159 new (&group[offset]) value_type(val);
1160 return group[offset];
1161 }
1162
1163 // We let you see if a bucket is non-empty without retrieving it
1164 bool test(size_type i) const { return bmtest(i) != 0; }
1165 bool test(iterator pos) const { return bmtest(pos.pos) != 0; }
1166
1167 private:
1168 // Shrink the array, assuming value_type has trivial copy
1169 // constructor and destructor, and the allocator_type is the default
1170 // libc_allocator_with_alloc. (Really, we want it to have "trivial
1171 // move", because that's what realloc and memmove both do. But
1172 // there's no way to capture that using type_traits, so we pretend
1173 // that move(x, y) is equivalent to ""x.~T(); new(x) T(y);"
1174 // which is pretty much correct, if a bit conservative.)
1175 void erase_aux(size_type offset, std::true_type) {
1176 // This isn't technically necessary, since we know we have a
1177 // trivial destructor, but is a cheap way to get a bit more safety.
1178 group[offset].~value_type();
1179 // This is equivalent to memmove(), but faster on my Intel P4,
1180 // at lesat with gcc4.1 -O2 / glibc 2.3.6.
1181 assert(settings.num_buckets > 0);
1182 for (size_type i = offset; i < settings.num_buckets - 1; ++i)
1183 // cast to void* to prevent compiler warnings about writing to an object
1184 // with no trivial copy-assignment
1185 // hopefully inlined!
1186 memcpy(static_cast<void*>(group + i), group + i + 1, sizeof(*group));
1187 group = settings.realloc_or_die(group, settings.num_buckets - 1);
1188 }
1189
1190 // Shrink the array, without any special assumptions about value_type and
1191 // allocator_type.
1192 void erase_aux(size_type offset, std::false_type) {
1193 // This is valid because 0 <= offset < num_buckets. Note the inequality.
1194 pointer p = allocate_group(settings.num_buckets - 1);
1195 std::uninitialized_copy(group, group + offset, p);
1196 std::uninitialized_copy(group + offset + 1, group + settings.num_buckets,
1197 p + offset);
1198 free_group();
1199 group = p;
1200 }
1201
1202 public:
1203 // This takes the specified elements out of the group. This is
1204 // "undefining", rather than "clearing".
1205 // TODO(austern): Make this exception safe: handle exceptions from
1206 // value_type's copy constructor.
1207 void erase(size_type i) {
1208 if (bmtest(i)) { // trivial to erase empty bucket
1209 size_type offset =
1210 pos_to_offset(bitmap, i); // where we'll find (or insert)
1211 if (settings.num_buckets == 1) {
1212 free_group();
1213 group = NULL;
1214 } else {
1215 erase_aux(offset, realloc_and_memmove_ok());
1216 }
1217 --settings.num_buckets;
1218 bmclear(i);
1219 }
1220 }
1221
1222 void erase(iterator pos) { erase(pos.pos); }
1223
1224 void erase(iterator start_it, iterator end_it) {
1225 // This could be more efficient, but to do so we'd need to make
1226 // bmclear() clear a range of indices. Doesn't seem worth it.
1227 for (; start_it != end_it; ++start_it) erase(start_it);
1228 }
1229
1230 // I/O
1231 // We support reading and writing groups to disk. We don't store
1232 // the actual array contents (which we don't know how to store),
1233 // just the bitmap and size. Meant to be used with table I/O.
1234
1235 template <typename OUTPUT>
1236 bool write_metadata(OUTPUT* fp) const {
1237 // we explicitly set to uint16_t
1238 assert(sizeof(settings.num_buckets) == 2);
1239 if (!sparsehash_internal::write_bigendian_number(fp, settings.num_buckets,
1240 2))
1241 return false;
1242 if (!sparsehash_internal::write_data(fp, bitmap, sizeof(bitmap)))
1243 return false;
1244 return true;
1245 }
1246
1247 // Reading destroys the old group contents! Returns true if all was ok.
1248 template <typename INPUT>
1249 bool read_metadata(INPUT* fp) {
1250 clear();
1251 if (!sparsehash_internal::read_bigendian_number(fp, &settings.num_buckets,
1252 2))
1253 return false;
1254 if (!sparsehash_internal::read_data(fp, bitmap, sizeof(bitmap)))
1255 return false;
1256 // We'll allocate the space, but we won't fill it: it will be
1257 // left as uninitialized raw memory.
1258 group = allocate_group(settings.num_buckets);
1259 return true;
1260 }
1261
1262 // Again, only meaningful if value_type is a POD.
1263 template <typename INPUT>
1264 bool read_nopointer_data(INPUT* fp) {
1265 for (nonempty_iterator it = nonempty_begin(); it != nonempty_end(); ++it) {
1266 if (!sparsehash_internal::read_data(fp, &(*it), sizeof(*it)))
1267 return false;
1268 }
1269 return true;
1270 }
1271
1272 // If your keys and values are simple enough, we can write them
1273 // to disk for you. "simple enough" means POD and no pointers.
1274 // However, we don't try to normalize endianness.
1275 template <typename OUTPUT>
1276 bool write_nopointer_data(OUTPUT* fp) const {
1277 for (const_nonempty_iterator it = nonempty_begin(); it != nonempty_end();
1278 ++it) {
1279 if (!sparsehash_internal::write_data(fp, &(*it), sizeof(*it)))
1280 return false;
1281 }
1282 return true;
1283 }
1284
1285 // Comparisons. We only need to define == and < -- we get
1286 // != > <= >= via relops.h (which we happily included above).
1287 // Note the comparisons are pretty arbitrary: we compare
1288 // values of the first index that isn't equal (using default
1289 // value for empty buckets).
1290 bool operator==(const sparsegroup& x) const {
1291 return (settings.num_buckets == x.settings.num_buckets &&
1292 memcmp(bitmap, x.bitmap, sizeof(bitmap)) == 0 &&
1293 std::equal(begin(), end(), x.begin())); // from <algorithm>
1294 }
1295
1296 bool operator<(const sparsegroup& x) const { // also from <algorithm>
1297 return std::lexicographical_compare(begin(), end(), x.begin(), x.end());
1298 }
1299 bool operator!=(const sparsegroup& x) const { return !(*this == x); }
1300 bool operator<=(const sparsegroup& x) const { return !(x < *this); }
1301 bool operator>(const sparsegroup& x) const { return x < *this; }
1302 bool operator>=(const sparsegroup& x) const { return !(*this < x); }
1303
1304 private:
1305 template <class A>
1306 class alloc_impl : public A {
1307 public:
1308 typedef typename A::pointer pointer;
1309 typedef typename A::size_type size_type;
1310
1311 // Convert a normal allocator to one that has realloc_or_die()
1312 alloc_impl(const A& a) : A(a) {}
1313
1314 // realloc_or_die should only be used when using the default
1315 // allocator (libc_allocator_with_realloc).
1316 pointer realloc_or_die(pointer /*ptr*/, size_type /*n*/) {
1317 fprintf(stderr,
1318 "realloc_or_die is only supported for "
1319 "libc_allocator_with_realloc\n");
1320 exit(1);
1321 return NULL;
1322 }
1323 };
1324
1325 // A template specialization of alloc_impl for
1326 // libc_allocator_with_realloc that can handle realloc_or_die.
1327 template <class A>
1328 class alloc_impl<libc_allocator_with_realloc<A>>
1329 : public libc_allocator_with_realloc<A> {
1330 public:
1331 typedef typename libc_allocator_with_realloc<A>::pointer pointer;
1332 typedef typename libc_allocator_with_realloc<A>::size_type size_type;
1333
1334 alloc_impl(const libc_allocator_with_realloc<A>& a)
1335 : libc_allocator_with_realloc<A>(a) {}
1336
1337 pointer realloc_or_die(pointer ptr, size_type n) {
1338 pointer retval = this->reallocate(ptr, n);
1339 if (retval == NULL) {
1340 fprintf(stderr,
1341 "sparsehash: FATAL ERROR: failed to reallocate "
1342 "%lu elements for ptr %p",
1343 static_cast<unsigned long>(n), static_cast<void*>(ptr));
1344 exit(1);
1345 }
1346 return retval;
1347 }
1348 };
1349
1350 // Package allocator with num_buckets to eliminate memory needed for the
1351 // zero-size allocator.
1352 // If new fields are added to this class, we should add them to
1353 // operator= and swap.
1354 class Settings : public alloc_impl<value_alloc_type> {
1355 public:
1356 Settings(const alloc_impl<value_alloc_type>& a, uint16_t n = 0)
1357 : alloc_impl<value_alloc_type>(a), num_buckets(n) {}
1358 Settings(const Settings& s)
1359 : alloc_impl<value_alloc_type>(s), num_buckets(s.num_buckets) {}
1360
1361 uint16_t num_buckets; // limits GROUP_SIZE to 64K
1362 };
1363
1364 // The actual data
1365 pointer group; // (small) array of T's
1366 Settings settings; // allocator and num_buckets
1367 unsigned char
1368 bitmap[(GROUP_SIZE - 1) / 8 + 1]; // fancy math is so we round up
1369};
1370
1371// We need a global swap as well
1372template <class T, uint16_t GROUP_SIZE, class Alloc>
1373inline void swap(sparsegroup<T, GROUP_SIZE, Alloc>& x,
1374 sparsegroup<T, GROUP_SIZE, Alloc>& y) {
1375 x.swap(y);
1376}
1377
1378// ---------------------------------------------------------------------------
1379
1380template <class T, uint16_t GROUP_SIZE = DEFAULT_SPARSEGROUP_SIZE,
1381 class Alloc = libc_allocator_with_realloc<T>>
1382class sparsetable {
1383 private:
1384 using value_alloc_type =
1385 typename std::allocator_traits<Alloc>::template rebind_alloc<T>;
1386 using vector_alloc =
1387 typename std::allocator_traits<Alloc>::template rebind_alloc<
1388 sparsegroup<T, GROUP_SIZE, value_alloc_type>>;
1389
1390 public:
1391 // Basic types
1392 typedef T value_type; // stolen from stl_vector.h
1393 typedef Alloc allocator_type;
1394 typedef typename value_alloc_type::size_type size_type;
1395 typedef typename value_alloc_type::difference_type difference_type;
1396 typedef typename value_alloc_type::reference reference;
1397 typedef typename value_alloc_type::const_reference const_reference;
1398 typedef typename value_alloc_type::pointer pointer;
1399 typedef typename value_alloc_type::const_pointer const_pointer;
1400 typedef table_iterator<sparsetable<T, GROUP_SIZE, Alloc>> iterator;
1401 typedef const_table_iterator<sparsetable<T, GROUP_SIZE, Alloc>>
1402 const_iterator;
1403 typedef table_element_adaptor<sparsetable<T, GROUP_SIZE, Alloc>>
1404 element_adaptor;
1405 typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
1406 typedef std::reverse_iterator<iterator> reverse_iterator; // from iterator.h
1407
1408 // These are our special iterators, that go over non-empty buckets in a
1409 // table. These aren't const only because you can change non-empty bcks.
1410 typedef two_d_iterator<std::vector<
1411 sparsegroup<value_type, GROUP_SIZE, value_alloc_type>, vector_alloc>>
1412 nonempty_iterator;
1413 typedef const_two_d_iterator<std::vector<
1414 sparsegroup<value_type, GROUP_SIZE, value_alloc_type>, vector_alloc>>
1415 const_nonempty_iterator;
1416 typedef std::reverse_iterator<nonempty_iterator> reverse_nonempty_iterator;
1417 typedef std::reverse_iterator<const_nonempty_iterator>
1418 const_reverse_nonempty_iterator;
1419 // Another special iterator: it frees memory as it iterates (used to resize)
1420 typedef destructive_two_d_iterator<std::vector<
1421 sparsegroup<value_type, GROUP_SIZE, value_alloc_type>, vector_alloc>>
1422 destructive_iterator;
1423
1424 // Iterator functions
1425 iterator begin() { return iterator(this, 0); }
1426 const_iterator begin() const { return const_iterator(this, 0); }
1427 iterator end() { return iterator(this, size()); }
1428 const_iterator end() const { return const_iterator(this, size()); }
1429 reverse_iterator rbegin() { return reverse_iterator(end()); }
1430 const_reverse_iterator rbegin() const {
1431 return const_reverse_iterator(end());
1432 }
1433 reverse_iterator rend() { return reverse_iterator(begin()); }
1434 const_reverse_iterator rend() const {
1435 return const_reverse_iterator(begin());
1436 }
1437
1438 // Versions for our special non-empty iterator
1439 nonempty_iterator nonempty_begin() {
1440 return nonempty_iterator(groups.begin(), groups.end(), groups.begin());
1441 }
1442 const_nonempty_iterator nonempty_begin() const {
1443 return const_nonempty_iterator(groups.begin(), groups.end(),
1444 groups.begin());
1445 }
1446 nonempty_iterator nonempty_end() {
1447 return nonempty_iterator(groups.begin(), groups.end(), groups.end());
1448 }
1449 const_nonempty_iterator nonempty_end() const {
1450 return const_nonempty_iterator(groups.begin(), groups.end(), groups.end());
1451 }
1452 reverse_nonempty_iterator nonempty_rbegin() {
1453 return reverse_nonempty_iterator(nonempty_end());
1454 }
1455 const_reverse_nonempty_iterator nonempty_rbegin() const {
1456 return const_reverse_nonempty_iterator(nonempty_end());
1457 }
1458 reverse_nonempty_iterator nonempty_rend() {
1459 return reverse_nonempty_iterator(nonempty_begin());
1460 }
1461 const_reverse_nonempty_iterator nonempty_rend() const {
1462 return const_reverse_nonempty_iterator(nonempty_begin());
1463 }
1464 destructive_iterator destructive_begin() {
1465 return destructive_iterator(groups.begin(), groups.end(), groups.begin());
1466 }
1467 destructive_iterator destructive_end() {
1468 return destructive_iterator(groups.begin(), groups.end(), groups.end());
1469 }
1470
1471 typedef sparsegroup<value_type, GROUP_SIZE, allocator_type> group_type;
1472 using group_vector_type_allocator_type =
1473 typename std::allocator_traits<Alloc>::template rebind_alloc<group_type>;
1474 typedef std::vector<group_type, group_vector_type_allocator_type>
1475 group_vector_type;
1476
1477 typedef typename group_vector_type::reference GroupsReference;
1478 typedef typename group_vector_type::const_reference GroupsConstReference;
1479 typedef typename group_vector_type::iterator GroupsIterator;
1480 typedef typename group_vector_type::const_iterator GroupsConstIterator;
1481
1482 // How to deal with the proper group
1483 static size_type num_groups(size_type num) { // how many to hold num buckets
1484 return num == 0 ? 0 : ((num - 1) / GROUP_SIZE) + 1;
1485 }
1486
1487 uint16_t pos_in_group(size_type i) const {
1488 return static_cast<uint16_t>(i % GROUP_SIZE);
1489 }
1490 size_type group_num(size_type i) const { return i / GROUP_SIZE; }
1491 GroupsReference which_group(size_type i) { return groups[group_num(i)]; }
1492 GroupsConstReference which_group(size_type i) const {
1493 return groups[group_num(i)];
1494 }
1495
1496 public:
1497 // Constructors -- default, normal (when you specify size), and copy
1498 explicit sparsetable(size_type sz = 0, Alloc alloc = Alloc())
1499 : groups(vector_alloc(alloc)), settings(alloc, sz) {
1500 groups.resize(num_groups(sz), group_type(settings));
1501 }
1502 // We can get away with using the default copy constructor,
1503 // and default destructor, and hence the default operator=. Huzzah!
1504
1505 // Many STL algorithms use swap instead of copy constructors
1506 void swap(sparsetable& x) {
1507 std::swap(groups, x.groups); // defined in stl_algobase.h
1508 std::swap(settings.table_size, x.settings.table_size);
1509 std::swap(settings.num_buckets, x.settings.num_buckets);
1510 }
1511
1512 // It's always nice to be able to clear a table without deallocating it
1513 void clear() {
1514 GroupsIterator group;
1515 for (group = groups.begin(); group != groups.end(); ++group) {
1516 group->clear();
1517 }
1518 settings.num_buckets = 0;
1519 }
1520
1521 // ACCESSOR FUNCTIONS for the things we templatize on, basically
1522 allocator_type get_allocator() const { return allocator_type(settings); }
1523
1524 // Functions that tell you about size.
1525 // NOTE: empty() is non-intuitive! It does not tell you the number
1526 // of not-empty buckets (use num_nonempty() for that). Instead
1527 // it says whether you've allocated any buckets or not.
1528 size_type size() const { return settings.table_size; }
1529 size_type max_size() const { return settings.max_size(); }
1530 bool empty() const { return settings.table_size == 0; }
1531 // We also may want to know how many *used* buckets there are
1532 size_type num_nonempty() const { return settings.num_buckets; }
1533
1534 // OK, we'll let you resize one of these puppies
1535 void resize(size_type new_size) {
1536 groups.resize(num_groups(new_size), group_type(settings));
1537 if (new_size < settings.table_size) {
1538 // lower num_buckets, clear last group
1539 if (pos_in_group(new_size) > 0) // need to clear inside last group
1540 groups.back().erase(groups.back().begin() + pos_in_group(new_size),
1541 groups.back().end());
1542 settings.num_buckets = 0; // refigure # of used buckets
1543 GroupsConstIterator group;
1544 for (group = groups.begin(); group != groups.end(); ++group)
1545 settings.num_buckets += group->num_nonempty();
1546 }
1547 settings.table_size = new_size;
1548 }
1549
1550 // We let you see if a bucket is non-empty without retrieving it
1551 bool test(size_type i) const {
1552 assert(i < settings.table_size);
1553 return which_group(i).test(pos_in_group(i));
1554 }
1555 bool test(iterator pos) const {
1556 return which_group(pos.pos).test(pos_in_group(pos.pos));
1557 }
1558 bool test(const_iterator pos) const {
1559 return which_group(pos.pos).test(pos_in_group(pos.pos));
1560 }
1561
1562 // We only return const_references because it's really hard to
1563 // return something settable for empty buckets. Use set() instead.
1564 const_reference get(size_type i) const {
1565 assert(i < settings.table_size);
1566 return which_group(i).get(pos_in_group(i));
1567 }
1568
1569 // TODO(csilvers): make protected + friend
1570 // This is used by sparse_hashtable to get an element from the table
1571 // when we know it exists (because the caller has called test(i)).
1572 const_reference unsafe_get(size_type i) const {
1573 assert(i < settings.table_size);
1574 assert(test(i));
1575 return which_group(i).unsafe_get(pos_in_group(i));
1576 }
1577
1578 // TODO(csilvers): make protected + friend element_adaptor
1579 reference mutating_get(size_type i) { // fills bucket i before getting
1580 assert(i < settings.table_size);
1581 typename group_type::size_type old_numbuckets =
1582 which_group(i).num_nonempty();
1583 reference retval = which_group(i).mutating_get(pos_in_group(i));
1584 settings.num_buckets += which_group(i).num_nonempty() - old_numbuckets;
1585 return retval;
1586 }
1587
1588 // Syntactic sugar. As in sparsegroup, the non-const version is harder
1589 const_reference operator[](size_type i) const { return get(i); }
1590
1591 element_adaptor operator[](size_type i) { return element_adaptor(this, i); }
1592
1593 // Needed for hashtables, gets as a nonempty_iterator. Crashes for empty
1594 // bcks
1595 const_nonempty_iterator get_iter(size_type i) const {
1596 assert(test(i)); // how can a nonempty_iterator point to an empty bucket?
1597 return const_nonempty_iterator(
1598 groups.begin(), groups.end(), groups.begin() + group_num(i),
1599 (groups[group_num(i)].nonempty_begin() +
1600 groups[group_num(i)].pos_to_offset(pos_in_group(i))));
1601 }
1602 // For nonempty we can return a non-const version
1603 nonempty_iterator get_iter(size_type i) {
1604 assert(test(i)); // how can a nonempty_iterator point to an empty bucket?
1605 return nonempty_iterator(
1606 groups.begin(), groups.end(), groups.begin() + group_num(i),
1607 (groups[group_num(i)].nonempty_begin() +
1608 groups[group_num(i)].pos_to_offset(pos_in_group(i))));
1609 }
1610
1611 // And the reverse transformation.
1612 size_type get_pos(const const_nonempty_iterator& it) const {
1613 difference_type current_row = it.row_current - it.row_begin;
1614 difference_type current_col =
1615 (it.col_current - groups[current_row].nonempty_begin());
1616 return ((current_row * GROUP_SIZE) +
1617 groups[current_row].offset_to_pos(current_col));
1618 }
1619
1620 // This returns a reference to the inserted item (which is a copy of val)
1621 // The trick is to figure out whether we're replacing or inserting anew
1622 reference set(size_type i, const_reference val) {
1623 assert(i < settings.table_size);
1624 typename group_type::size_type old_numbuckets =
1625 which_group(i).num_nonempty();
1626 reference retval = which_group(i).set(pos_in_group(i), val);
1627 settings.num_buckets += which_group(i).num_nonempty() - old_numbuckets;
1628 return retval;
1629 }
1630
1631 // This takes the specified elements out of the table. This is
1632 // "undefining", rather than "clearing".
1633 void erase(size_type i) {
1634 assert(i < settings.table_size);
1635 typename group_type::size_type old_numbuckets =
1636 which_group(i).num_nonempty();
1637 which_group(i).erase(pos_in_group(i));
1638 settings.num_buckets += which_group(i).num_nonempty() - old_numbuckets;
1639 }
1640
1641 void erase(iterator pos) { erase(pos.pos); }
1642
1643 void erase(iterator start_it, iterator end_it) {
1644 // This could be more efficient, but then we'd need to figure
1645 // out if we spanned groups or not. Doesn't seem worth it.
1646 for (; start_it != end_it; ++start_it) erase(start_it);
1647 }
1648
1649 // We support reading and writing tables to disk. We don't store
1650 // the actual array contents (which we don't know how to store),
1651 // just the groups and sizes. Returns true if all went ok.
1652
1653 private:
1654 // Every time the disk format changes, this should probably change too
1655 typedef unsigned long MagicNumberType;
1656 static const MagicNumberType MAGIC_NUMBER = 0x24687531;
1657
1658 // Old versions of this code write all data in 32 bits. We need to
1659 // support these files as well as having support for 64-bit systems.
1660 // So we use the following encoding scheme: for values < 2^32-1, we
1661 // store in 4 bytes in big-endian order. For values > 2^32, we
1662 // store 0xFFFFFFF followed by 8 bytes in big-endian order. This
1663 // causes us to mis-read old-version code that stores exactly
1664 // 0xFFFFFFF, but I don't think that is likely to have happened for
1665 // these particular values.
1666 template <typename OUTPUT, typename IntType>
1667 static bool write_32_or_64(OUTPUT* fp, IntType value) {
1668 if (value < 0xFFFFFFFFULL) { // fits in 4 bytes
1669 if (!sparsehash_internal::write_bigendian_number(fp, value, 4))
1670 return false;
1671 } else {
1672 if (!sparsehash_internal::write_bigendian_number(fp, 0xFFFFFFFFUL, 4))
1673 return false;
1674 if (!sparsehash_internal::write_bigendian_number(fp, value, 8))
1675 return false;
1676 }
1677 return true;
1678 }
1679
1680 template <typename INPUT, typename IntType>
1681 static bool read_32_or_64(INPUT* fp, IntType* value) { // reads into value
1682 MagicNumberType first4 = 0; // a convenient 32-bit unsigned type
1683 if (!sparsehash_internal::read_bigendian_number(fp, &first4, 4))
1684 return false;
1685 if (first4 < 0xFFFFFFFFULL) {
1686 *value = first4;
1687 } else {
1688 if (!sparsehash_internal::read_bigendian_number(fp, value, 8))
1689 return false;
1690 }
1691 return true;
1692 }
1693
1694 public:
1695 // read/write_metadata() and read_write/nopointer_data() are DEPRECATED.
1696 // Use serialize() and unserialize(), below, for new code.
1697
1698 template <typename OUTPUT>
1699 bool write_metadata(OUTPUT* fp) const {
1700 if (!write_32_or_64(fp, MAGIC_NUMBER)) return false;
1701 if (!write_32_or_64(fp, settings.table_size)) return false;
1702 if (!write_32_or_64(fp, settings.num_buckets)) return false;
1703
1704 GroupsConstIterator group;
1705 for (group = groups.begin(); group != groups.end(); ++group)
1706 if (group->write_metadata(fp) == false) return false;
1707 return true;
1708 }
1709
1710 // Reading destroys the old table contents! Returns true if read ok.
1711 template <typename INPUT>
1712 bool read_metadata(INPUT* fp) {
1713 size_type magic_read = 0;
1714 if (!read_32_or_64(fp, &magic_read)) return false;
1715 if (magic_read != MAGIC_NUMBER) {
1716 clear(); // just to be consistent
1717 return false;
1718 }
1719
1720 if (!read_32_or_64(fp, &settings.table_size)) return false;
1721 if (!read_32_or_64(fp, &settings.num_buckets)) return false;
1722
1723 resize(settings.table_size); // so the vector's sized ok
1724 GroupsIterator group;
1725 for (group = groups.begin(); group != groups.end(); ++group)
1726 if (group->read_metadata(fp) == false) return false;
1727 return true;
1728 }
1729
1730 // This code is identical to that for SparseGroup
1731 // If your keys and values are simple enough, we can write them
1732 // to disk for you. "simple enough" means no pointers.
1733 // However, we don't try to normalize endianness
1734 bool write_nopointer_data(FILE* fp) const {
1735 for (const_nonempty_iterator it = nonempty_begin(); it != nonempty_end();
1736 ++it) {
1737 if (!fwrite(&*it, sizeof(*it), 1, fp)) return false;
1738 }
1739 return true;
1740 }
1741
1742 // When reading, we have to override the potential const-ness of *it
1743 bool read_nopointer_data(FILE* fp) {
1744 for (nonempty_iterator it = nonempty_begin(); it != nonempty_end(); ++it) {
1745 if (!fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp))
1746 return false;
1747 }
1748 return true;
1749 }
1750
1751 // INPUT and OUTPUT must be either a FILE, *or* a C++ stream
1752 // (istream, ostream, etc) *or* a class providing
1753 // Read(void*, size_t) and Write(const void*, size_t)
1754 // (respectively), which writes a buffer into a stream
1755 // (which the INPUT/OUTPUT instance presumably owns).
1756
1757 typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer;
1758
1759 // ValueSerializer: a functor. operator()(OUTPUT*, const value_type&)
1760 template <typename ValueSerializer, typename OUTPUT>
1761 bool serialize(ValueSerializer serializer, OUTPUT* fp) {
1762 if (!write_metadata(fp)) return false;
1763 for (const_nonempty_iterator it = nonempty_begin(); it != nonempty_end();
1764 ++it) {
1765 if (!serializer(fp, *it)) return false;
1766 }
1767 return true;
1768 }
1769
1770 // ValueSerializer: a functor. operator()(INPUT*, value_type*)
1771 template <typename ValueSerializer, typename INPUT>
1772 bool unserialize(ValueSerializer serializer, INPUT* fp) {
1773 clear();
1774 if (!read_metadata(fp)) return false;
1775 for (nonempty_iterator it = nonempty_begin(); it != nonempty_end(); ++it) {
1776 if (!serializer(fp, &*it)) return false;
1777 }
1778 return true;
1779 }
1780
1781 // Comparisons. Note the comparisons are pretty arbitrary: we
1782 // compare values of the first index that isn't equal (using default
1783 // value for empty buckets).
1784 bool operator==(const sparsetable& x) const {
1785 return (settings.table_size == x.settings.table_size &&
1786 settings.num_buckets == x.settings.num_buckets &&
1787 groups == x.groups);
1788 }
1789
1790 bool operator<(const sparsetable& x) const {
1791 return std::lexicographical_compare(begin(), end(), x.begin(), x.end());
1792 }
1793 bool operator!=(const sparsetable& x) const { return !(*this == x); }
1794 bool operator<=(const sparsetable& x) const { return !(x < *this); }
1795 bool operator>(const sparsetable& x) const { return x < *this; }
1796 bool operator>=(const sparsetable& x) const { return !(*this < x); }
1797
1798 private:
1799 // Package allocator with table_size and num_buckets to eliminate memory
1800 // needed for the zero-size allocator.
1801 // If new fields are added to this class, we should add them to
1802 // operator= and swap.
1803 class Settings : public allocator_type {
1804 public:
1805 typedef typename allocator_type::size_type size_type;
1806
1807 Settings(const allocator_type& a, size_type sz = 0, size_type n = 0)
1808 : allocator_type(a), table_size(sz), num_buckets(n) {}
1809
1810 Settings(const Settings& s)
1811 : allocator_type(s),
1812 table_size(s.table_size),
1813 num_buckets(s.num_buckets) {}
1814
1815 size_type table_size; // how many buckets they want
1816 size_type num_buckets; // number of non-empty buckets
1817 };
1818
1819 // The actual data
1820 group_vector_type groups; // our list of groups
1821 Settings settings; // allocator, table size, buckets
1822};
1823
1824// We need a global swap as well
1825template <class T, uint16_t GROUP_SIZE, class Alloc>
1826inline void swap(sparsetable<T, GROUP_SIZE, Alloc>& x,
1827 sparsetable<T, GROUP_SIZE, Alloc>& y) {
1828 x.swap(y);
1829}
1830} // namespace google
1831