hashsort.c source code [PostgreSQL/src/backend/access/hash/hashsort.c]

1	/-------------------------------------------------------------------------*
2	*
3	* hashsort.c
4	* Sort tuples for insertion into a new hash index.
5	*
6	* When building a very large hash index, we pre-sort the tuples by bucket
7	* number to improve locality of access to the index, and thereby avoid
8	* thrashing. We use tuplesort.c to sort the given index tuples into order.
9	*
10	* Note: if the number of rows in the table has been underestimated,
11	* bucket splits may occur during the index build. In that case we'd
12	* be inserting into two or more buckets for each possible masked-off
13	* hash code value. That's no big problem though, since we'll still have
14	* plenty of locality of access.
15	*
16	*
17	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
18	* Portions Copyright (c) 1994, Regents of the University of California
19	*
20	* IDENTIFICATION
21	* src/backend/access/hash/hashsort.c
22	*
23	*-------------------------------------------------------------------------
24	*/
25
26	#include "postgres.h"
27
28	#include "access/hash.h"
29	#include "commands/progress.h"
30	#include "miscadmin.h"
31	#include "pgstat.h"
32	#include "utils/tuplesort.h"
33
34
35	/*
36	* Status record for spooling/sorting phase.
37	*/
38	struct HSpool
39	{
40	Tuplesortstate sortstate; /* state data for tuplesort.c /
41	Relation index;
42
43	/*
44	* We sort the hash keys based on the buckets they belong to. Below masks
45	* are used in _hash_hashkey2bucket to determine the bucket of given hash
46	* key.
47	*/
48	uint32 high_mask;
49	uint32 low_mask;
50	uint32 max_buckets;
51	};
52
53
54	/*
55	* create and initialize a spool structure
56	*/
57	HSpool *
58	_h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
59	{
60	HSpool hspool = (HSpool ) palloc0(sizeof(HSpool));
61
62	hspool->index = index;
63
64	/*
65	* Determine the bitmask for hash code values. Since there are currently
66	* num_buckets buckets in the index, the appropriate mask can be computed
67	* as follows.
68	*
69	* NOTE : This hash mask calculation should be in sync with similar
70	* calculation in _hash_init_metabuffer.
71	*/
72	hspool->high_mask = (((uint32) `1`) << _hash_log2(num_buckets + `1`)) - `1`;
73	hspool->low_mask = (hspool->high_mask >> `1`);
74	hspool->max_buckets = num_buckets - `1`;
75
76	/*
77	* We size the sort area as maintenance_work_mem rather than work_mem to
78	* speed index creation. This should be OK since a single backend can't
79	* run multiple index creations in parallel.
80	*/
81	hspool->sortstate = tuplesort_begin_index_hash(heap,
82	index,
83	hspool->high_mask,
84	hspool->low_mask,
85	hspool->max_buckets,
86	maintenance_work_mem,
87	NULL,
88	false);
89
90	return hspool;
91	}
92
93	/*
94	* clean up a spool structure and its substructures.
95	*/
96	void
97	_h_spooldestroy(HSpool *hspool)
98	{
99	tuplesort_end(hspool->sortstate);
100	pfree(hspool);
101	}
102
103	/*
104	* spool an index entry into the sort file.
105	*/
106	void
107	_h_spool(HSpool hspool, ItemPointer self, Datum values, bool *isnull)
108	{
109	tuplesort_putindextuplevalues(hspool->sortstate, hspool->index,
110	self, values, isnull);
111	}
112
113	/*
114	* given a spool loaded by successive calls to _h_spool,
115	* create an entire index.
116	*/
117	void
118	_h_indexbuild(HSpool *hspool, Relation heapRel)
119	{
120	IndexTuple itup;
121	int64 tups_done = `0`;
122	#ifdef USE_ASSERT_CHECKING
123	uint32 hashkey = `0`;
124	#endif
125
126	tuplesort_performsort(hspool->sortstate);
127
128	while ((itup = tuplesort_getindextuple(hspool->sortstate, true)) != NULL)
129	{
130	/*
131	* Technically, it isn't critical that hash keys be found in sorted
132	* order, since this sorting is only used to increase locality of
133	* access as a performance optimization. It still seems like a good
134	* idea to test tuplesort.c's handling of hash index tuple sorts
135	* through an assertion, though.
136	*/
137	#ifdef USE_ASSERT_CHECKING
138	uint32 lasthashkey = hashkey;
139
140	hashkey = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup),
141	hspool->max_buckets, hspool->high_mask,
142	hspool->low_mask);
143	Assert(hashkey >= lasthashkey);
144	#endif
145
146	_hash_doinsert(hspool->index, itup, heapRel);
147
148	pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
149	++tups_done);
150	}
151	}
152

Browse the source code of PostgreSQL/src/backend/access/hash/hashsort.c