1/*-------------------------------------------------------------------------
2 *
3 * uuid.c
4 * Functions for the built-in type "uuid".
5 *
6 * Copyright (c) 2007-2019, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/backend/utils/adt/uuid.c
10 *
11 *-------------------------------------------------------------------------
12 */
13
14#include "postgres.h"
15
16#include "lib/hyperloglog.h"
17#include "libpq/pqformat.h"
18#include "port/pg_bswap.h"
19#include "utils/builtins.h"
20#include "utils/guc.h"
21#include "utils/hashutils.h"
22#include "utils/sortsupport.h"
23#include "utils/uuid.h"
24
25/* sortsupport for uuid */
26typedef struct
27{
28 int64 input_count; /* number of non-null values seen */
29 bool estimating; /* true if estimating cardinality */
30
31 hyperLogLogState abbr_card; /* cardinality estimator */
32} uuid_sortsupport_state;
33
34static void string_to_uuid(const char *source, pg_uuid_t *uuid);
35static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
36static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
37static int uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
38static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
39static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
40
41Datum
42uuid_in(PG_FUNCTION_ARGS)
43{
44 char *uuid_str = PG_GETARG_CSTRING(0);
45 pg_uuid_t *uuid;
46
47 uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
48 string_to_uuid(uuid_str, uuid);
49 PG_RETURN_UUID_P(uuid);
50}
51
52Datum
53uuid_out(PG_FUNCTION_ARGS)
54{
55 pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
56 static const char hex_chars[] = "0123456789abcdef";
57 StringInfoData buf;
58 int i;
59
60 initStringInfo(&buf);
61 for (i = 0; i < UUID_LEN; i++)
62 {
63 int hi;
64 int lo;
65
66 /*
67 * We print uuid values as a string of 8, 4, 4, 4, and then 12
68 * hexadecimal characters, with each group is separated by a hyphen
69 * ("-"). Therefore, add the hyphens at the appropriate places here.
70 */
71 if (i == 4 || i == 6 || i == 8 || i == 10)
72 appendStringInfoChar(&buf, '-');
73
74 hi = uuid->data[i] >> 4;
75 lo = uuid->data[i] & 0x0F;
76
77 appendStringInfoChar(&buf, hex_chars[hi]);
78 appendStringInfoChar(&buf, hex_chars[lo]);
79 }
80
81 PG_RETURN_CSTRING(buf.data);
82}
83
84/*
85 * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
86 * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
87 * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
88 * digits, is the only one used for output.)
89 */
90static void
91string_to_uuid(const char *source, pg_uuid_t *uuid)
92{
93 const char *src = source;
94 bool braces = false;
95 int i;
96
97 if (src[0] == '{')
98 {
99 src++;
100 braces = true;
101 }
102
103 for (i = 0; i < UUID_LEN; i++)
104 {
105 char str_buf[3];
106
107 if (src[0] == '\0' || src[1] == '\0')
108 goto syntax_error;
109 memcpy(str_buf, src, 2);
110 if (!isxdigit((unsigned char) str_buf[0]) ||
111 !isxdigit((unsigned char) str_buf[1]))
112 goto syntax_error;
113
114 str_buf[2] = '\0';
115 uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
116 src += 2;
117 if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
118 src++;
119 }
120
121 if (braces)
122 {
123 if (*src != '}')
124 goto syntax_error;
125 src++;
126 }
127
128 if (*src != '\0')
129 goto syntax_error;
130
131 return;
132
133syntax_error:
134 ereport(ERROR,
135 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
136 errmsg("invalid input syntax for type %s: \"%s\"",
137 "uuid", source)));
138}
139
140Datum
141uuid_recv(PG_FUNCTION_ARGS)
142{
143 StringInfo buffer = (StringInfo) PG_GETARG_POINTER(0);
144 pg_uuid_t *uuid;
145
146 uuid = (pg_uuid_t *) palloc(UUID_LEN);
147 memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
148 PG_RETURN_POINTER(uuid);
149}
150
151Datum
152uuid_send(PG_FUNCTION_ARGS)
153{
154 pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
155 StringInfoData buffer;
156
157 pq_begintypsend(&buffer);
158 pq_sendbytes(&buffer, (char *) uuid->data, UUID_LEN);
159 PG_RETURN_BYTEA_P(pq_endtypsend(&buffer));
160}
161
162/* internal uuid compare function */
163static int
164uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
165{
166 return memcmp(arg1->data, arg2->data, UUID_LEN);
167}
168
169Datum
170uuid_lt(PG_FUNCTION_ARGS)
171{
172 pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
173 pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
174
175 PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
176}
177
178Datum
179uuid_le(PG_FUNCTION_ARGS)
180{
181 pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
182 pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
183
184 PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
185}
186
187Datum
188uuid_eq(PG_FUNCTION_ARGS)
189{
190 pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
191 pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
192
193 PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
194}
195
196Datum
197uuid_ge(PG_FUNCTION_ARGS)
198{
199 pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
200 pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
201
202 PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
203}
204
205Datum
206uuid_gt(PG_FUNCTION_ARGS)
207{
208 pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
209 pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
210
211 PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
212}
213
214Datum
215uuid_ne(PG_FUNCTION_ARGS)
216{
217 pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
218 pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
219
220 PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
221}
222
223/* handler for btree index operator */
224Datum
225uuid_cmp(PG_FUNCTION_ARGS)
226{
227 pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
228 pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
229
230 PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
231}
232
233/*
234 * Sort support strategy routine
235 */
236Datum
237uuid_sortsupport(PG_FUNCTION_ARGS)
238{
239 SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
240
241 ssup->comparator = uuid_fast_cmp;
242 ssup->ssup_extra = NULL;
243
244 if (ssup->abbreviate)
245 {
246 uuid_sortsupport_state *uss;
247 MemoryContext oldcontext;
248
249 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
250
251 uss = palloc(sizeof(uuid_sortsupport_state));
252 uss->input_count = 0;
253 uss->estimating = true;
254 initHyperLogLog(&uss->abbr_card, 10);
255
256 ssup->ssup_extra = uss;
257
258 ssup->comparator = uuid_cmp_abbrev;
259 ssup->abbrev_converter = uuid_abbrev_convert;
260 ssup->abbrev_abort = uuid_abbrev_abort;
261 ssup->abbrev_full_comparator = uuid_fast_cmp;
262
263 MemoryContextSwitchTo(oldcontext);
264 }
265
266 PG_RETURN_VOID();
267}
268
269/*
270 * SortSupport comparison func
271 */
272static int
273uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
274{
275 pg_uuid_t *arg1 = DatumGetUUIDP(x);
276 pg_uuid_t *arg2 = DatumGetUUIDP(y);
277
278 return uuid_internal_cmp(arg1, arg2);
279}
280
281/*
282 * Abbreviated key comparison func
283 */
284static int
285uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
286{
287 if (x > y)
288 return 1;
289 else if (x == y)
290 return 0;
291 else
292 return -1;
293}
294
295/*
296 * Callback for estimating effectiveness of abbreviated key optimization.
297 *
298 * We pay no attention to the cardinality of the non-abbreviated data, because
299 * there is no equality fast-path within authoritative uuid comparator.
300 */
301static bool
302uuid_abbrev_abort(int memtupcount, SortSupport ssup)
303{
304 uuid_sortsupport_state *uss = ssup->ssup_extra;
305 double abbr_card;
306
307 if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
308 return false;
309
310 abbr_card = estimateHyperLogLog(&uss->abbr_card);
311
312 /*
313 * If we have >100k distinct values, then even if we were sorting many
314 * billion rows we'd likely still break even, and the penalty of undoing
315 * that many rows of abbrevs would probably not be worth it. Stop even
316 * counting at that point.
317 */
318 if (abbr_card > 100000.0)
319 {
320#ifdef TRACE_SORT
321 if (trace_sort)
322 elog(LOG,
323 "uuid_abbrev: estimation ends at cardinality %f"
324 " after " INT64_FORMAT " values (%d rows)",
325 abbr_card, uss->input_count, memtupcount);
326#endif
327 uss->estimating = false;
328 return false;
329 }
330
331 /*
332 * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
333 * fudge factor allows us to abort earlier on genuinely pathological data
334 * where we've had exactly one abbreviated value in the first 2k
335 * (non-null) rows.
336 */
337 if (abbr_card < uss->input_count / 2000.0 + 0.5)
338 {
339#ifdef TRACE_SORT
340 if (trace_sort)
341 elog(LOG,
342 "uuid_abbrev: aborting abbreviation at cardinality %f"
343 " below threshold %f after " INT64_FORMAT " values (%d rows)",
344 abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
345 memtupcount);
346#endif
347 return true;
348 }
349
350#ifdef TRACE_SORT
351 if (trace_sort)
352 elog(LOG,
353 "uuid_abbrev: cardinality %f after " INT64_FORMAT
354 " values (%d rows)", abbr_card, uss->input_count, memtupcount);
355#endif
356
357 return false;
358}
359
360/*
361 * Conversion routine for sortsupport. Converts original uuid representation
362 * to abbreviated key representation. Our encoding strategy is simple -- pack
363 * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
364 * machines, the bytes are stored in reverse order), and treat it as an
365 * unsigned integer.
366 */
367static Datum
368uuid_abbrev_convert(Datum original, SortSupport ssup)
369{
370 uuid_sortsupport_state *uss = ssup->ssup_extra;
371 pg_uuid_t *authoritative = DatumGetUUIDP(original);
372 Datum res;
373
374 memcpy(&res, authoritative->data, sizeof(Datum));
375 uss->input_count += 1;
376
377 if (uss->estimating)
378 {
379 uint32 tmp;
380
381#if SIZEOF_DATUM == 8
382 tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
383#else /* SIZEOF_DATUM != 8 */
384 tmp = (uint32) res;
385#endif
386
387 addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
388 }
389
390 /*
391 * Byteswap on little-endian machines.
392 *
393 * This is needed so that uuid_cmp_abbrev() (an unsigned integer 3-way
394 * comparator) works correctly on all platforms. If we didn't do this,
395 * the comparator would have to call memcmp() with a pair of pointers to
396 * the first byte of each abbreviated key, which is slower.
397 */
398 res = DatumBigEndianToNative(res);
399
400 return res;
401}
402
403/* hash index support */
404Datum
405uuid_hash(PG_FUNCTION_ARGS)
406{
407 pg_uuid_t *key = PG_GETARG_UUID_P(0);
408
409 return hash_any(key->data, UUID_LEN);
410}
411
412Datum
413uuid_hash_extended(PG_FUNCTION_ARGS)
414{
415 pg_uuid_t *key = PG_GETARG_UUID_P(0);
416
417 return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
418}
419