hashfunc.c source code [PostgreSQL/src/backend/access/hash/hashfunc.c]

1	/-------------------------------------------------------------------------*
2	*
3	* hashfunc.c
4	* Support functions for hash access method.
5	*
6	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7	* Portions Copyright (c) 1994, Regents of the University of California
8	*
9	*
10	* IDENTIFICATION
11	* src/backend/access/hash/hashfunc.c
12	*
13	* NOTES
14	* These functions are stored in pg_amproc. For each operator class
15	* defined for hash indexes, they compute the hash value of the argument.
16	*
17	* Additional hash functions appear in /utils/adt/ files for various
18	* specialized datatypes.
19	*
20	* It is expected that every bit of a hash function's 32-bit result is
21	* as random as every other; failure to ensure this is likely to lead
22	* to poor performance of hash joins, for example. In most cases a hash
23	* function should use hash_any() or its variant hash_uint32().
24	*-------------------------------------------------------------------------
25	*/
26
27	#include "postgres.h"
28
29	#include "access/hash.h"
30	#include "catalog/pg_collation.h"
31	#include "utils/builtins.h"
32	#include "utils/hashutils.h"
33	#include "utils/pg_locale.h"
34
35	/*
36	* Datatype-specific hash functions.
37	*
38	* These support both hash indexes and hash joins.
39	*
40	* NOTE: some of these are also used by catcache operations, without
41	* any direct connection to hash indexes. Also, the common hash_any
42	* routine is also used by dynahash tables.
43	*/
44
45	/ Note: this is used for both "char" and boolean datatypes /
46	Datum
47	hashchar(PG_FUNCTION_ARGS)
48	{
49	return hash_uint32((int32) PG_GETARG_CHAR(`0`));
50	}
51
52	Datum
53	hashcharextended(PG_FUNCTION_ARGS)
54	{
55	return hash_uint32_extended((int32) PG_GETARG_CHAR(`0`), PG_GETARG_INT64(`1`));
56	}
57
58	Datum
59	hashint2(PG_FUNCTION_ARGS)
60	{
61	return hash_uint32((int32) PG_GETARG_INT16(`0`));
62	}
63
64	Datum
65	hashint2extended(PG_FUNCTION_ARGS)
66	{
67	return hash_uint32_extended((int32) PG_GETARG_INT16(`0`), PG_GETARG_INT64(`1`));
68	}
69
70	Datum
71	hashint4(PG_FUNCTION_ARGS)
72	{
73	return hash_uint32(PG_GETARG_INT32(`0`));
74	}
75
76	Datum
77	hashint4extended(PG_FUNCTION_ARGS)
78	{
79	return hash_uint32_extended(PG_GETARG_INT32(`0`), PG_GETARG_INT64(`1`));
80	}
81
82	Datum
83	hashint8(PG_FUNCTION_ARGS)
84	{
85	/*
86	* The idea here is to produce a hash value compatible with the values
87	* produced by hashint4 and hashint2 for logically equal inputs; this is
88	* necessary to support cross-type hash joins across these input types.
89	* Since all three types are signed, we can xor the high half of the int8
90	* value if the sign is positive, or the complement of the high half when
91	* the sign is negative.
92	*/
93	int64 val = PG_GETARG_INT64(`0`);
94	uint32 lohalf = (uint32) val;
95	uint32 hihalf = (uint32) (val >> `32`);
96
97	lohalf ^= (val >= `0`) ? hihalf : ~hihalf;
98
99	return hash_uint32(lohalf);
100	}
101
102	Datum
103	hashint8extended(PG_FUNCTION_ARGS)
104	{
105	/ Same approach as hashint8 /
106	int64 val = PG_GETARG_INT64(`0`);
107	uint32 lohalf = (uint32) val;
108	uint32 hihalf = (uint32) (val >> `32`);
109
110	lohalf ^= (val >= `0`) ? hihalf : ~hihalf;
111
112	return hash_uint32_extended(lohalf, PG_GETARG_INT64(`1`));
113	}
114
115	Datum
116	hashoid(PG_FUNCTION_ARGS)
117	{
118	return hash_uint32((uint32) PG_GETARG_OID(`0`));
119	}
120
121	Datum
122	hashoidextended(PG_FUNCTION_ARGS)
123	{
124	return hash_uint32_extended((uint32) PG_GETARG_OID(`0`), PG_GETARG_INT64(`1`));
125	}
126
127	Datum
128	hashenum(PG_FUNCTION_ARGS)
129	{
130	return hash_uint32((uint32) PG_GETARG_OID(`0`));
131	}
132
133	Datum
134	hashenumextended(PG_FUNCTION_ARGS)
135	{
136	return hash_uint32_extended((uint32) PG_GETARG_OID(`0`), PG_GETARG_INT64(`1`));
137	}
138
139	Datum
140	hashfloat4(PG_FUNCTION_ARGS)
141	{
142	float4 key = PG_GETARG_FLOAT4(`0`);
143	float8 key8;
144
145	/*
146	* On IEEE-float machines, minus zero and zero have different bit patterns
147	* but should compare as equal. We must ensure that they have the same
148	* hash value, which is most reliably done this way:
149	*/
150	if (key == (float4) `0`)
151	PG_RETURN_UINT32(`0`);
152
153	/*
154	* To support cross-type hashing of float8 and float4, we want to return
155	* the same hash value hashfloat8 would produce for an equal float8 value.
156	* So, widen the value to float8 and hash that. (We must do this rather
157	* than have hashfloat8 try to narrow its value to float4; that could fail
158	* on overflow.)
159	*/
160	key8 = key;
161
162	return hash_any((unsigned char ) &key8, sizeof*(key8));
163	}
164
165	Datum
166	hashfloat4extended(PG_FUNCTION_ARGS)
167	{
168	float4 key = PG_GETARG_FLOAT4(`0`);
169	uint64 seed = PG_GETARG_INT64(`1`);
170	float8 key8;
171
172	/ Same approach as hashfloat4 /
173	if (key == (float4) `0`)
174	PG_RETURN_UINT64(seed);
175	key8 = key;
176
177	return hash_any_extended((unsigned char ) &key8, sizeof*(key8), seed);
178	}
179
180	Datum
181	hashfloat8(PG_FUNCTION_ARGS)
182	{
183	float8 key = PG_GETARG_FLOAT8(`0`);
184
185	/*
186	* On IEEE-float machines, minus zero and zero have different bit patterns
187	* but should compare as equal. We must ensure that they have the same
188	* hash value, which is most reliably done this way:
189	*/
190	if (key == (float8) `0`)
191	PG_RETURN_UINT32(`0`);
192
193	return hash_any((unsigned char ) &key, sizeof*(key));
194	}
195
196	Datum
197	hashfloat8extended(PG_FUNCTION_ARGS)
198	{
199	float8 key = PG_GETARG_FLOAT8(`0`);
200	uint64 seed = PG_GETARG_INT64(`1`);
201
202	/ Same approach as hashfloat8 /
203	if (key == (float8) `0`)
204	PG_RETURN_UINT64(seed);
205
206	return hash_any_extended((unsigned char ) &key, sizeof*(key), seed);
207	}
208
209	Datum
210	hashoidvector(PG_FUNCTION_ARGS)
211	{
212	oidvector key = (oidvector ) PG_GETARG_POINTER(`0`);
213
214	return hash_any((unsigned char ) key->values, key->dim1 sizeof(Oid));
215	}
216
217	Datum
218	hashoidvectorextended(PG_FUNCTION_ARGS)
219	{
220	oidvector key = (oidvector ) PG_GETARG_POINTER(`0`);
221
222	return hash_any_extended((unsigned char *) key->values,
223	key->dim1 * sizeof(Oid),
224	PG_GETARG_INT64(`1`));
225	}
226
227	Datum
228	hashname(PG_FUNCTION_ARGS)
229	{
230	char key = NameStr(PG_GETARG_NAME(`0`));
231
232	return hash_any((unsigned char *) key, strlen(key));
233	}
234
235	Datum
236	hashnameextended(PG_FUNCTION_ARGS)
237	{
238	char key = NameStr(PG_GETARG_NAME(`0`));
239
240	return hash_any_extended((unsigned char *) key, strlen(key),
241	PG_GETARG_INT64(`1`));
242	}
243
244	Datum
245	hashtext(PG_FUNCTION_ARGS)
246	{
247	text *key = PG_GETARG_TEXT_PP(`0`);
248	Oid collid = PG_GET_COLLATION();
249	pg_locale_t mylocale = `0`;
250	Datum result;
251
252	if (!collid)
253	ereport(ERROR,
254	(errcode(ERRCODE_INDETERMINATE_COLLATION),
255	errmsg("could not determine which collation to use for string hashing"),
256	errhint("Use the COLLATE clause to set the collation explicitly.")));
257
258	if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
259	mylocale = pg_newlocale_from_collation(collid);
260
261	if (!mylocale \|\| mylocale->deterministic)
262	{
263	result = hash_any((unsigned char *) VARDATA_ANY(key),
264	VARSIZE_ANY_EXHDR(key));
265	}
266	else
267	{
268	#ifdef USE_ICU
269	if (mylocale->provider == COLLPROVIDER_ICU)
270	{
271	int32_t ulen = -`1`;
272	UChar *uchar = NULL;
273	Size bsize;
274	uint8_t *buf;
275
276	ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
277
278	bsize = ucol_getSortKey(mylocale->info.icu.ucol,
279	uchar, ulen, NULL, `0`);
280	buf = palloc(bsize);
281	ucol_getSortKey(mylocale->info.icu.ucol,
282	uchar, ulen, buf, bsize);
283
284	result = hash_any(buf, bsize);
285
286	pfree(buf);
287	}
288	else
289	#endif
290	/ shouldn't happen /
291	elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
292	}
293
294	/ Avoid leaking memory for toasted inputs /
295	PG_FREE_IF_COPY(key, `0`);
296
297	return result;
298	}
299
300	Datum
301	hashtextextended(PG_FUNCTION_ARGS)
302	{
303	text *key = PG_GETARG_TEXT_PP(`0`);
304	Oid collid = PG_GET_COLLATION();
305	pg_locale_t mylocale = `0`;
306	Datum result;
307
308	if (!collid)
309	ereport(ERROR,
310	(errcode(ERRCODE_INDETERMINATE_COLLATION),
311	errmsg("could not determine which collation to use for string hashing"),
312	errhint("Use the COLLATE clause to set the collation explicitly.")));
313
314	if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
315	mylocale = pg_newlocale_from_collation(collid);
316
317	if (!mylocale \|\| mylocale->deterministic)
318	{
319	result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
320	VARSIZE_ANY_EXHDR(key),
321	PG_GETARG_INT64(`1`));
322	}
323	else
324	{
325	#ifdef USE_ICU
326	if (mylocale->provider == COLLPROVIDER_ICU)
327	{
328	int32_t ulen = -`1`;
329	UChar *uchar = NULL;
330	Size bsize;
331	uint8_t *buf;
332
333	ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
334
335	bsize = ucol_getSortKey(mylocale->info.icu.ucol,
336	uchar, ulen, NULL, `0`);
337	buf = palloc(bsize);
338	ucol_getSortKey(mylocale->info.icu.ucol,
339	uchar, ulen, buf, bsize);
340
341	result = hash_any_extended(buf, bsize, PG_GETARG_INT64(`1`));
342
343	pfree(buf);
344	}
345	else
346	#endif
347	/ shouldn't happen /
348	elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
349	}
350
351	PG_FREE_IF_COPY(key, `0`);
352
353	return result;
354	}
355
356	/*
357	* hashvarlena() can be used for any varlena datatype in which there are
358	* no non-significant bits, ie, distinct bitpatterns never compare as equal.
359	*/
360	Datum
361	hashvarlena(PG_FUNCTION_ARGS)
362	{
363	struct varlena *key = PG_GETARG_VARLENA_PP(`0`);
364	Datum result;
365
366	result = hash_any((unsigned char *) VARDATA_ANY(key),
367	VARSIZE_ANY_EXHDR(key));
368
369	/ Avoid leaking memory for toasted inputs /
370	PG_FREE_IF_COPY(key, `0`);
371
372	return result;
373	}
374
375	Datum
376	hashvarlenaextended(PG_FUNCTION_ARGS)
377	{
378	struct varlena *key = PG_GETARG_VARLENA_PP(`0`);
379	Datum result;
380
381	result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
382	VARSIZE_ANY_EXHDR(key),
383	PG_GETARG_INT64(`1`));
384
385	PG_FREE_IF_COPY(key, `0`);
386
387	return result;
388	}
389

Browse the source code of PostgreSQL/src/backend/access/hash/hashfunc.c