1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * uuid.c |
4 | * Functions for the built-in type "uuid". |
5 | * |
6 | * Copyright (c) 2007-2019, PostgreSQL Global Development Group |
7 | * |
8 | * IDENTIFICATION |
9 | * src/backend/utils/adt/uuid.c |
10 | * |
11 | *------------------------------------------------------------------------- |
12 | */ |
13 | |
14 | #include "postgres.h" |
15 | |
16 | #include "lib/hyperloglog.h" |
17 | #include "libpq/pqformat.h" |
18 | #include "port/pg_bswap.h" |
19 | #include "utils/builtins.h" |
20 | #include "utils/guc.h" |
21 | #include "utils/hashutils.h" |
22 | #include "utils/sortsupport.h" |
23 | #include "utils/uuid.h" |
24 | |
25 | /* sortsupport for uuid */ |
26 | typedef struct |
27 | { |
28 | int64 input_count; /* number of non-null values seen */ |
29 | bool estimating; /* true if estimating cardinality */ |
30 | |
31 | hyperLogLogState abbr_card; /* cardinality estimator */ |
32 | } uuid_sortsupport_state; |
33 | |
34 | static void string_to_uuid(const char *source, pg_uuid_t *uuid); |
35 | static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2); |
36 | static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup); |
37 | static int uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup); |
38 | static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup); |
39 | static Datum uuid_abbrev_convert(Datum original, SortSupport ssup); |
40 | |
41 | Datum |
42 | uuid_in(PG_FUNCTION_ARGS) |
43 | { |
44 | char *uuid_str = PG_GETARG_CSTRING(0); |
45 | pg_uuid_t *uuid; |
46 | |
47 | uuid = (pg_uuid_t *) palloc(sizeof(*uuid)); |
48 | string_to_uuid(uuid_str, uuid); |
49 | PG_RETURN_UUID_P(uuid); |
50 | } |
51 | |
52 | Datum |
53 | uuid_out(PG_FUNCTION_ARGS) |
54 | { |
55 | pg_uuid_t *uuid = PG_GETARG_UUID_P(0); |
56 | static const char hex_chars[] = "0123456789abcdef" ; |
57 | StringInfoData buf; |
58 | int i; |
59 | |
60 | initStringInfo(&buf); |
61 | for (i = 0; i < UUID_LEN; i++) |
62 | { |
63 | int hi; |
64 | int lo; |
65 | |
66 | /* |
67 | * We print uuid values as a string of 8, 4, 4, 4, and then 12 |
68 | * hexadecimal characters, with each group is separated by a hyphen |
69 | * ("-"). Therefore, add the hyphens at the appropriate places here. |
70 | */ |
71 | if (i == 4 || i == 6 || i == 8 || i == 10) |
72 | appendStringInfoChar(&buf, '-'); |
73 | |
74 | hi = uuid->data[i] >> 4; |
75 | lo = uuid->data[i] & 0x0F; |
76 | |
77 | appendStringInfoChar(&buf, hex_chars[hi]); |
78 | appendStringInfoChar(&buf, hex_chars[lo]); |
79 | } |
80 | |
81 | PG_RETURN_CSTRING(buf.data); |
82 | } |
83 | |
84 | /* |
85 | * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash |
86 | * after each group of 4 hexadecimal digits, and optionally surrounded by {}. |
87 | * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal |
88 | * digits, is the only one used for output.) |
89 | */ |
90 | static void |
91 | string_to_uuid(const char *source, pg_uuid_t *uuid) |
92 | { |
93 | const char *src = source; |
94 | bool braces = false; |
95 | int i; |
96 | |
97 | if (src[0] == '{') |
98 | { |
99 | src++; |
100 | braces = true; |
101 | } |
102 | |
103 | for (i = 0; i < UUID_LEN; i++) |
104 | { |
105 | char str_buf[3]; |
106 | |
107 | if (src[0] == '\0' || src[1] == '\0') |
108 | goto syntax_error; |
109 | memcpy(str_buf, src, 2); |
110 | if (!isxdigit((unsigned char) str_buf[0]) || |
111 | !isxdigit((unsigned char) str_buf[1])) |
112 | goto syntax_error; |
113 | |
114 | str_buf[2] = '\0'; |
115 | uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16); |
116 | src += 2; |
117 | if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1) |
118 | src++; |
119 | } |
120 | |
121 | if (braces) |
122 | { |
123 | if (*src != '}') |
124 | goto syntax_error; |
125 | src++; |
126 | } |
127 | |
128 | if (*src != '\0') |
129 | goto syntax_error; |
130 | |
131 | return; |
132 | |
133 | syntax_error: |
134 | ereport(ERROR, |
135 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
136 | errmsg("invalid input syntax for type %s: \"%s\"" , |
137 | "uuid" , source))); |
138 | } |
139 | |
140 | Datum |
141 | uuid_recv(PG_FUNCTION_ARGS) |
142 | { |
143 | StringInfo buffer = (StringInfo) PG_GETARG_POINTER(0); |
144 | pg_uuid_t *uuid; |
145 | |
146 | uuid = (pg_uuid_t *) palloc(UUID_LEN); |
147 | memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN); |
148 | PG_RETURN_POINTER(uuid); |
149 | } |
150 | |
151 | Datum |
152 | uuid_send(PG_FUNCTION_ARGS) |
153 | { |
154 | pg_uuid_t *uuid = PG_GETARG_UUID_P(0); |
155 | StringInfoData buffer; |
156 | |
157 | pq_begintypsend(&buffer); |
158 | pq_sendbytes(&buffer, (char *) uuid->data, UUID_LEN); |
159 | PG_RETURN_BYTEA_P(pq_endtypsend(&buffer)); |
160 | } |
161 | |
162 | /* internal uuid compare function */ |
163 | static int |
164 | uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2) |
165 | { |
166 | return memcmp(arg1->data, arg2->data, UUID_LEN); |
167 | } |
168 | |
169 | Datum |
170 | uuid_lt(PG_FUNCTION_ARGS) |
171 | { |
172 | pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
173 | pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
174 | |
175 | PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0); |
176 | } |
177 | |
178 | Datum |
179 | uuid_le(PG_FUNCTION_ARGS) |
180 | { |
181 | pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
182 | pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
183 | |
184 | PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0); |
185 | } |
186 | |
187 | Datum |
188 | uuid_eq(PG_FUNCTION_ARGS) |
189 | { |
190 | pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
191 | pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
192 | |
193 | PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0); |
194 | } |
195 | |
196 | Datum |
197 | uuid_ge(PG_FUNCTION_ARGS) |
198 | { |
199 | pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
200 | pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
201 | |
202 | PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0); |
203 | } |
204 | |
205 | Datum |
206 | uuid_gt(PG_FUNCTION_ARGS) |
207 | { |
208 | pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
209 | pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
210 | |
211 | PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0); |
212 | } |
213 | |
214 | Datum |
215 | uuid_ne(PG_FUNCTION_ARGS) |
216 | { |
217 | pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
218 | pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
219 | |
220 | PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0); |
221 | } |
222 | |
223 | /* handler for btree index operator */ |
224 | Datum |
225 | uuid_cmp(PG_FUNCTION_ARGS) |
226 | { |
227 | pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
228 | pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
229 | |
230 | PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2)); |
231 | } |
232 | |
233 | /* |
234 | * Sort support strategy routine |
235 | */ |
236 | Datum |
237 | uuid_sortsupport(PG_FUNCTION_ARGS) |
238 | { |
239 | SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); |
240 | |
241 | ssup->comparator = uuid_fast_cmp; |
242 | ssup->ssup_extra = NULL; |
243 | |
244 | if (ssup->abbreviate) |
245 | { |
246 | uuid_sortsupport_state *uss; |
247 | MemoryContext oldcontext; |
248 | |
249 | oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); |
250 | |
251 | uss = palloc(sizeof(uuid_sortsupport_state)); |
252 | uss->input_count = 0; |
253 | uss->estimating = true; |
254 | initHyperLogLog(&uss->abbr_card, 10); |
255 | |
256 | ssup->ssup_extra = uss; |
257 | |
258 | ssup->comparator = uuid_cmp_abbrev; |
259 | ssup->abbrev_converter = uuid_abbrev_convert; |
260 | ssup->abbrev_abort = uuid_abbrev_abort; |
261 | ssup->abbrev_full_comparator = uuid_fast_cmp; |
262 | |
263 | MemoryContextSwitchTo(oldcontext); |
264 | } |
265 | |
266 | PG_RETURN_VOID(); |
267 | } |
268 | |
269 | /* |
270 | * SortSupport comparison func |
271 | */ |
272 | static int |
273 | uuid_fast_cmp(Datum x, Datum y, SortSupport ssup) |
274 | { |
275 | pg_uuid_t *arg1 = DatumGetUUIDP(x); |
276 | pg_uuid_t *arg2 = DatumGetUUIDP(y); |
277 | |
278 | return uuid_internal_cmp(arg1, arg2); |
279 | } |
280 | |
281 | /* |
282 | * Abbreviated key comparison func |
283 | */ |
284 | static int |
285 | uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup) |
286 | { |
287 | if (x > y) |
288 | return 1; |
289 | else if (x == y) |
290 | return 0; |
291 | else |
292 | return -1; |
293 | } |
294 | |
295 | /* |
296 | * Callback for estimating effectiveness of abbreviated key optimization. |
297 | * |
298 | * We pay no attention to the cardinality of the non-abbreviated data, because |
299 | * there is no equality fast-path within authoritative uuid comparator. |
300 | */ |
301 | static bool |
302 | uuid_abbrev_abort(int memtupcount, SortSupport ssup) |
303 | { |
304 | uuid_sortsupport_state *uss = ssup->ssup_extra; |
305 | double abbr_card; |
306 | |
307 | if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating) |
308 | return false; |
309 | |
310 | abbr_card = estimateHyperLogLog(&uss->abbr_card); |
311 | |
312 | /* |
313 | * If we have >100k distinct values, then even if we were sorting many |
314 | * billion rows we'd likely still break even, and the penalty of undoing |
315 | * that many rows of abbrevs would probably not be worth it. Stop even |
316 | * counting at that point. |
317 | */ |
318 | if (abbr_card > 100000.0) |
319 | { |
320 | #ifdef TRACE_SORT |
321 | if (trace_sort) |
322 | elog(LOG, |
323 | "uuid_abbrev: estimation ends at cardinality %f" |
324 | " after " INT64_FORMAT " values (%d rows)" , |
325 | abbr_card, uss->input_count, memtupcount); |
326 | #endif |
327 | uss->estimating = false; |
328 | return false; |
329 | } |
330 | |
331 | /* |
332 | * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row |
333 | * fudge factor allows us to abort earlier on genuinely pathological data |
334 | * where we've had exactly one abbreviated value in the first 2k |
335 | * (non-null) rows. |
336 | */ |
337 | if (abbr_card < uss->input_count / 2000.0 + 0.5) |
338 | { |
339 | #ifdef TRACE_SORT |
340 | if (trace_sort) |
341 | elog(LOG, |
342 | "uuid_abbrev: aborting abbreviation at cardinality %f" |
343 | " below threshold %f after " INT64_FORMAT " values (%d rows)" , |
344 | abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count, |
345 | memtupcount); |
346 | #endif |
347 | return true; |
348 | } |
349 | |
350 | #ifdef TRACE_SORT |
351 | if (trace_sort) |
352 | elog(LOG, |
353 | "uuid_abbrev: cardinality %f after " INT64_FORMAT |
354 | " values (%d rows)" , abbr_card, uss->input_count, memtupcount); |
355 | #endif |
356 | |
357 | return false; |
358 | } |
359 | |
360 | /* |
361 | * Conversion routine for sortsupport. Converts original uuid representation |
362 | * to abbreviated key representation. Our encoding strategy is simple -- pack |
363 | * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian |
364 | * machines, the bytes are stored in reverse order), and treat it as an |
365 | * unsigned integer. |
366 | */ |
367 | static Datum |
368 | uuid_abbrev_convert(Datum original, SortSupport ssup) |
369 | { |
370 | uuid_sortsupport_state *uss = ssup->ssup_extra; |
371 | pg_uuid_t *authoritative = DatumGetUUIDP(original); |
372 | Datum res; |
373 | |
374 | memcpy(&res, authoritative->data, sizeof(Datum)); |
375 | uss->input_count += 1; |
376 | |
377 | if (uss->estimating) |
378 | { |
379 | uint32 tmp; |
380 | |
381 | #if SIZEOF_DATUM == 8 |
382 | tmp = (uint32) res ^ (uint32) ((uint64) res >> 32); |
383 | #else /* SIZEOF_DATUM != 8 */ |
384 | tmp = (uint32) res; |
385 | #endif |
386 | |
387 | addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); |
388 | } |
389 | |
390 | /* |
391 | * Byteswap on little-endian machines. |
392 | * |
393 | * This is needed so that uuid_cmp_abbrev() (an unsigned integer 3-way |
394 | * comparator) works correctly on all platforms. If we didn't do this, |
395 | * the comparator would have to call memcmp() with a pair of pointers to |
396 | * the first byte of each abbreviated key, which is slower. |
397 | */ |
398 | res = DatumBigEndianToNative(res); |
399 | |
400 | return res; |
401 | } |
402 | |
403 | /* hash index support */ |
404 | Datum |
405 | uuid_hash(PG_FUNCTION_ARGS) |
406 | { |
407 | pg_uuid_t *key = PG_GETARG_UUID_P(0); |
408 | |
409 | return hash_any(key->data, UUID_LEN); |
410 | } |
411 | |
412 | Datum |
413 | uuid_hash_extended(PG_FUNCTION_ARGS) |
414 | { |
415 | pg_uuid_t *key = PG_GETARG_UUID_P(0); |
416 | |
417 | return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1)); |
418 | } |
419 | |