1/*-------------------------------------------------------------------------
2 *
3 * hashfunc.c
4 * Support functions for hash access method.
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/access/hash/hashfunc.c
12 *
13 * NOTES
14 * These functions are stored in pg_amproc. For each operator class
15 * defined for hash indexes, they compute the hash value of the argument.
16 *
17 * Additional hash functions appear in /utils/adt/ files for various
18 * specialized datatypes.
19 *
20 * It is expected that every bit of a hash function's 32-bit result is
21 * as random as every other; failure to ensure this is likely to lead
22 * to poor performance of hash joins, for example. In most cases a hash
23 * function should use hash_any() or its variant hash_uint32().
24 *-------------------------------------------------------------------------
25 */
26
27#include "postgres.h"
28
29#include "access/hash.h"
30#include "catalog/pg_collation.h"
31#include "utils/builtins.h"
32#include "utils/hashutils.h"
33#include "utils/pg_locale.h"
34
35/*
36 * Datatype-specific hash functions.
37 *
38 * These support both hash indexes and hash joins.
39 *
40 * NOTE: some of these are also used by catcache operations, without
41 * any direct connection to hash indexes. Also, the common hash_any
42 * routine is also used by dynahash tables.
43 */
44
45/* Note: this is used for both "char" and boolean datatypes */
46Datum
47hashchar(PG_FUNCTION_ARGS)
48{
49 return hash_uint32((int32) PG_GETARG_CHAR(0));
50}
51
52Datum
53hashcharextended(PG_FUNCTION_ARGS)
54{
55 return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
56}
57
58Datum
59hashint2(PG_FUNCTION_ARGS)
60{
61 return hash_uint32((int32) PG_GETARG_INT16(0));
62}
63
64Datum
65hashint2extended(PG_FUNCTION_ARGS)
66{
67 return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
68}
69
70Datum
71hashint4(PG_FUNCTION_ARGS)
72{
73 return hash_uint32(PG_GETARG_INT32(0));
74}
75
76Datum
77hashint4extended(PG_FUNCTION_ARGS)
78{
79 return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
80}
81
82Datum
83hashint8(PG_FUNCTION_ARGS)
84{
85 /*
86 * The idea here is to produce a hash value compatible with the values
87 * produced by hashint4 and hashint2 for logically equal inputs; this is
88 * necessary to support cross-type hash joins across these input types.
89 * Since all three types are signed, we can xor the high half of the int8
90 * value if the sign is positive, or the complement of the high half when
91 * the sign is negative.
92 */
93 int64 val = PG_GETARG_INT64(0);
94 uint32 lohalf = (uint32) val;
95 uint32 hihalf = (uint32) (val >> 32);
96
97 lohalf ^= (val >= 0) ? hihalf : ~hihalf;
98
99 return hash_uint32(lohalf);
100}
101
102Datum
103hashint8extended(PG_FUNCTION_ARGS)
104{
105 /* Same approach as hashint8 */
106 int64 val = PG_GETARG_INT64(0);
107 uint32 lohalf = (uint32) val;
108 uint32 hihalf = (uint32) (val >> 32);
109
110 lohalf ^= (val >= 0) ? hihalf : ~hihalf;
111
112 return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
113}
114
115Datum
116hashoid(PG_FUNCTION_ARGS)
117{
118 return hash_uint32((uint32) PG_GETARG_OID(0));
119}
120
121Datum
122hashoidextended(PG_FUNCTION_ARGS)
123{
124 return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
125}
126
127Datum
128hashenum(PG_FUNCTION_ARGS)
129{
130 return hash_uint32((uint32) PG_GETARG_OID(0));
131}
132
133Datum
134hashenumextended(PG_FUNCTION_ARGS)
135{
136 return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
137}
138
139Datum
140hashfloat4(PG_FUNCTION_ARGS)
141{
142 float4 key = PG_GETARG_FLOAT4(0);
143 float8 key8;
144
145 /*
146 * On IEEE-float machines, minus zero and zero have different bit patterns
147 * but should compare as equal. We must ensure that they have the same
148 * hash value, which is most reliably done this way:
149 */
150 if (key == (float4) 0)
151 PG_RETURN_UINT32(0);
152
153 /*
154 * To support cross-type hashing of float8 and float4, we want to return
155 * the same hash value hashfloat8 would produce for an equal float8 value.
156 * So, widen the value to float8 and hash that. (We must do this rather
157 * than have hashfloat8 try to narrow its value to float4; that could fail
158 * on overflow.)
159 */
160 key8 = key;
161
162 return hash_any((unsigned char *) &key8, sizeof(key8));
163}
164
165Datum
166hashfloat4extended(PG_FUNCTION_ARGS)
167{
168 float4 key = PG_GETARG_FLOAT4(0);
169 uint64 seed = PG_GETARG_INT64(1);
170 float8 key8;
171
172 /* Same approach as hashfloat4 */
173 if (key == (float4) 0)
174 PG_RETURN_UINT64(seed);
175 key8 = key;
176
177 return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
178}
179
180Datum
181hashfloat8(PG_FUNCTION_ARGS)
182{
183 float8 key = PG_GETARG_FLOAT8(0);
184
185 /*
186 * On IEEE-float machines, minus zero and zero have different bit patterns
187 * but should compare as equal. We must ensure that they have the same
188 * hash value, which is most reliably done this way:
189 */
190 if (key == (float8) 0)
191 PG_RETURN_UINT32(0);
192
193 return hash_any((unsigned char *) &key, sizeof(key));
194}
195
196Datum
197hashfloat8extended(PG_FUNCTION_ARGS)
198{
199 float8 key = PG_GETARG_FLOAT8(0);
200 uint64 seed = PG_GETARG_INT64(1);
201
202 /* Same approach as hashfloat8 */
203 if (key == (float8) 0)
204 PG_RETURN_UINT64(seed);
205
206 return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
207}
208
209Datum
210hashoidvector(PG_FUNCTION_ARGS)
211{
212 oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
213
214 return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
215}
216
217Datum
218hashoidvectorextended(PG_FUNCTION_ARGS)
219{
220 oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
221
222 return hash_any_extended((unsigned char *) key->values,
223 key->dim1 * sizeof(Oid),
224 PG_GETARG_INT64(1));
225}
226
227Datum
228hashname(PG_FUNCTION_ARGS)
229{
230 char *key = NameStr(*PG_GETARG_NAME(0));
231
232 return hash_any((unsigned char *) key, strlen(key));
233}
234
235Datum
236hashnameextended(PG_FUNCTION_ARGS)
237{
238 char *key = NameStr(*PG_GETARG_NAME(0));
239
240 return hash_any_extended((unsigned char *) key, strlen(key),
241 PG_GETARG_INT64(1));
242}
243
244Datum
245hashtext(PG_FUNCTION_ARGS)
246{
247 text *key = PG_GETARG_TEXT_PP(0);
248 Oid collid = PG_GET_COLLATION();
249 pg_locale_t mylocale = 0;
250 Datum result;
251
252 if (!collid)
253 ereport(ERROR,
254 (errcode(ERRCODE_INDETERMINATE_COLLATION),
255 errmsg("could not determine which collation to use for string hashing"),
256 errhint("Use the COLLATE clause to set the collation explicitly.")));
257
258 if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
259 mylocale = pg_newlocale_from_collation(collid);
260
261 if (!mylocale || mylocale->deterministic)
262 {
263 result = hash_any((unsigned char *) VARDATA_ANY(key),
264 VARSIZE_ANY_EXHDR(key));
265 }
266 else
267 {
268#ifdef USE_ICU
269 if (mylocale->provider == COLLPROVIDER_ICU)
270 {
271 int32_t ulen = -1;
272 UChar *uchar = NULL;
273 Size bsize;
274 uint8_t *buf;
275
276 ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
277
278 bsize = ucol_getSortKey(mylocale->info.icu.ucol,
279 uchar, ulen, NULL, 0);
280 buf = palloc(bsize);
281 ucol_getSortKey(mylocale->info.icu.ucol,
282 uchar, ulen, buf, bsize);
283
284 result = hash_any(buf, bsize);
285
286 pfree(buf);
287 }
288 else
289#endif
290 /* shouldn't happen */
291 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
292 }
293
294 /* Avoid leaking memory for toasted inputs */
295 PG_FREE_IF_COPY(key, 0);
296
297 return result;
298}
299
300Datum
301hashtextextended(PG_FUNCTION_ARGS)
302{
303 text *key = PG_GETARG_TEXT_PP(0);
304 Oid collid = PG_GET_COLLATION();
305 pg_locale_t mylocale = 0;
306 Datum result;
307
308 if (!collid)
309 ereport(ERROR,
310 (errcode(ERRCODE_INDETERMINATE_COLLATION),
311 errmsg("could not determine which collation to use for string hashing"),
312 errhint("Use the COLLATE clause to set the collation explicitly.")));
313
314 if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
315 mylocale = pg_newlocale_from_collation(collid);
316
317 if (!mylocale || mylocale->deterministic)
318 {
319 result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
320 VARSIZE_ANY_EXHDR(key),
321 PG_GETARG_INT64(1));
322 }
323 else
324 {
325#ifdef USE_ICU
326 if (mylocale->provider == COLLPROVIDER_ICU)
327 {
328 int32_t ulen = -1;
329 UChar *uchar = NULL;
330 Size bsize;
331 uint8_t *buf;
332
333 ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
334
335 bsize = ucol_getSortKey(mylocale->info.icu.ucol,
336 uchar, ulen, NULL, 0);
337 buf = palloc(bsize);
338 ucol_getSortKey(mylocale->info.icu.ucol,
339 uchar, ulen, buf, bsize);
340
341 result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
342
343 pfree(buf);
344 }
345 else
346#endif
347 /* shouldn't happen */
348 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
349 }
350
351 PG_FREE_IF_COPY(key, 0);
352
353 return result;
354}
355
356/*
357 * hashvarlena() can be used for any varlena datatype in which there are
358 * no non-significant bits, ie, distinct bitpatterns never compare as equal.
359 */
360Datum
361hashvarlena(PG_FUNCTION_ARGS)
362{
363 struct varlena *key = PG_GETARG_VARLENA_PP(0);
364 Datum result;
365
366 result = hash_any((unsigned char *) VARDATA_ANY(key),
367 VARSIZE_ANY_EXHDR(key));
368
369 /* Avoid leaking memory for toasted inputs */
370 PG_FREE_IF_COPY(key, 0);
371
372 return result;
373}
374
375Datum
376hashvarlenaextended(PG_FUNCTION_ARGS)
377{
378 struct varlena *key = PG_GETARG_VARLENA_PP(0);
379 Datum result;
380
381 result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
382 VARSIZE_ANY_EXHDR(key),
383 PG_GETARG_INT64(1));
384
385 PG_FREE_IF_COPY(key, 0);
386
387 return result;
388}
389