| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * uuid.c |
| 4 | * Functions for the built-in type "uuid". |
| 5 | * |
| 6 | * Copyright (c) 2007-2019, PostgreSQL Global Development Group |
| 7 | * |
| 8 | * IDENTIFICATION |
| 9 | * src/backend/utils/adt/uuid.c |
| 10 | * |
| 11 | *------------------------------------------------------------------------- |
| 12 | */ |
| 13 | |
| 14 | #include "postgres.h" |
| 15 | |
| 16 | #include "lib/hyperloglog.h" |
| 17 | #include "libpq/pqformat.h" |
| 18 | #include "port/pg_bswap.h" |
| 19 | #include "utils/builtins.h" |
| 20 | #include "utils/guc.h" |
| 21 | #include "utils/hashutils.h" |
| 22 | #include "utils/sortsupport.h" |
| 23 | #include "utils/uuid.h" |
| 24 | |
| 25 | /* sortsupport for uuid */ |
| 26 | typedef struct |
| 27 | { |
| 28 | int64 input_count; /* number of non-null values seen */ |
| 29 | bool estimating; /* true if estimating cardinality */ |
| 30 | |
| 31 | hyperLogLogState abbr_card; /* cardinality estimator */ |
| 32 | } uuid_sortsupport_state; |
| 33 | |
| 34 | static void string_to_uuid(const char *source, pg_uuid_t *uuid); |
| 35 | static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2); |
| 36 | static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup); |
| 37 | static int uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup); |
| 38 | static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup); |
| 39 | static Datum uuid_abbrev_convert(Datum original, SortSupport ssup); |
| 40 | |
| 41 | Datum |
| 42 | uuid_in(PG_FUNCTION_ARGS) |
| 43 | { |
| 44 | char *uuid_str = PG_GETARG_CSTRING(0); |
| 45 | pg_uuid_t *uuid; |
| 46 | |
| 47 | uuid = (pg_uuid_t *) palloc(sizeof(*uuid)); |
| 48 | string_to_uuid(uuid_str, uuid); |
| 49 | PG_RETURN_UUID_P(uuid); |
| 50 | } |
| 51 | |
| 52 | Datum |
| 53 | uuid_out(PG_FUNCTION_ARGS) |
| 54 | { |
| 55 | pg_uuid_t *uuid = PG_GETARG_UUID_P(0); |
| 56 | static const char hex_chars[] = "0123456789abcdef" ; |
| 57 | StringInfoData buf; |
| 58 | int i; |
| 59 | |
| 60 | initStringInfo(&buf); |
| 61 | for (i = 0; i < UUID_LEN; i++) |
| 62 | { |
| 63 | int hi; |
| 64 | int lo; |
| 65 | |
| 66 | /* |
| 67 | * We print uuid values as a string of 8, 4, 4, 4, and then 12 |
| 68 | * hexadecimal characters, with each group is separated by a hyphen |
| 69 | * ("-"). Therefore, add the hyphens at the appropriate places here. |
| 70 | */ |
| 71 | if (i == 4 || i == 6 || i == 8 || i == 10) |
| 72 | appendStringInfoChar(&buf, '-'); |
| 73 | |
| 74 | hi = uuid->data[i] >> 4; |
| 75 | lo = uuid->data[i] & 0x0F; |
| 76 | |
| 77 | appendStringInfoChar(&buf, hex_chars[hi]); |
| 78 | appendStringInfoChar(&buf, hex_chars[lo]); |
| 79 | } |
| 80 | |
| 81 | PG_RETURN_CSTRING(buf.data); |
| 82 | } |
| 83 | |
| 84 | /* |
| 85 | * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash |
| 86 | * after each group of 4 hexadecimal digits, and optionally surrounded by {}. |
| 87 | * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal |
| 88 | * digits, is the only one used for output.) |
| 89 | */ |
| 90 | static void |
| 91 | string_to_uuid(const char *source, pg_uuid_t *uuid) |
| 92 | { |
| 93 | const char *src = source; |
| 94 | bool braces = false; |
| 95 | int i; |
| 96 | |
| 97 | if (src[0] == '{') |
| 98 | { |
| 99 | src++; |
| 100 | braces = true; |
| 101 | } |
| 102 | |
| 103 | for (i = 0; i < UUID_LEN; i++) |
| 104 | { |
| 105 | char str_buf[3]; |
| 106 | |
| 107 | if (src[0] == '\0' || src[1] == '\0') |
| 108 | goto syntax_error; |
| 109 | memcpy(str_buf, src, 2); |
| 110 | if (!isxdigit((unsigned char) str_buf[0]) || |
| 111 | !isxdigit((unsigned char) str_buf[1])) |
| 112 | goto syntax_error; |
| 113 | |
| 114 | str_buf[2] = '\0'; |
| 115 | uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16); |
| 116 | src += 2; |
| 117 | if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1) |
| 118 | src++; |
| 119 | } |
| 120 | |
| 121 | if (braces) |
| 122 | { |
| 123 | if (*src != '}') |
| 124 | goto syntax_error; |
| 125 | src++; |
| 126 | } |
| 127 | |
| 128 | if (*src != '\0') |
| 129 | goto syntax_error; |
| 130 | |
| 131 | return; |
| 132 | |
| 133 | syntax_error: |
| 134 | ereport(ERROR, |
| 135 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
| 136 | errmsg("invalid input syntax for type %s: \"%s\"" , |
| 137 | "uuid" , source))); |
| 138 | } |
| 139 | |
| 140 | Datum |
| 141 | uuid_recv(PG_FUNCTION_ARGS) |
| 142 | { |
| 143 | StringInfo buffer = (StringInfo) PG_GETARG_POINTER(0); |
| 144 | pg_uuid_t *uuid; |
| 145 | |
| 146 | uuid = (pg_uuid_t *) palloc(UUID_LEN); |
| 147 | memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN); |
| 148 | PG_RETURN_POINTER(uuid); |
| 149 | } |
| 150 | |
| 151 | Datum |
| 152 | uuid_send(PG_FUNCTION_ARGS) |
| 153 | { |
| 154 | pg_uuid_t *uuid = PG_GETARG_UUID_P(0); |
| 155 | StringInfoData buffer; |
| 156 | |
| 157 | pq_begintypsend(&buffer); |
| 158 | pq_sendbytes(&buffer, (char *) uuid->data, UUID_LEN); |
| 159 | PG_RETURN_BYTEA_P(pq_endtypsend(&buffer)); |
| 160 | } |
| 161 | |
| 162 | /* internal uuid compare function */ |
| 163 | static int |
| 164 | uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2) |
| 165 | { |
| 166 | return memcmp(arg1->data, arg2->data, UUID_LEN); |
| 167 | } |
| 168 | |
| 169 | Datum |
| 170 | uuid_lt(PG_FUNCTION_ARGS) |
| 171 | { |
| 172 | pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
| 173 | pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
| 174 | |
| 175 | PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0); |
| 176 | } |
| 177 | |
| 178 | Datum |
| 179 | uuid_le(PG_FUNCTION_ARGS) |
| 180 | { |
| 181 | pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
| 182 | pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
| 183 | |
| 184 | PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0); |
| 185 | } |
| 186 | |
| 187 | Datum |
| 188 | uuid_eq(PG_FUNCTION_ARGS) |
| 189 | { |
| 190 | pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
| 191 | pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
| 192 | |
| 193 | PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0); |
| 194 | } |
| 195 | |
| 196 | Datum |
| 197 | uuid_ge(PG_FUNCTION_ARGS) |
| 198 | { |
| 199 | pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
| 200 | pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
| 201 | |
| 202 | PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0); |
| 203 | } |
| 204 | |
| 205 | Datum |
| 206 | uuid_gt(PG_FUNCTION_ARGS) |
| 207 | { |
| 208 | pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
| 209 | pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
| 210 | |
| 211 | PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0); |
| 212 | } |
| 213 | |
| 214 | Datum |
| 215 | uuid_ne(PG_FUNCTION_ARGS) |
| 216 | { |
| 217 | pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
| 218 | pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
| 219 | |
| 220 | PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0); |
| 221 | } |
| 222 | |
| 223 | /* handler for btree index operator */ |
| 224 | Datum |
| 225 | uuid_cmp(PG_FUNCTION_ARGS) |
| 226 | { |
| 227 | pg_uuid_t *arg1 = PG_GETARG_UUID_P(0); |
| 228 | pg_uuid_t *arg2 = PG_GETARG_UUID_P(1); |
| 229 | |
| 230 | PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2)); |
| 231 | } |
| 232 | |
| 233 | /* |
| 234 | * Sort support strategy routine |
| 235 | */ |
| 236 | Datum |
| 237 | uuid_sortsupport(PG_FUNCTION_ARGS) |
| 238 | { |
| 239 | SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); |
| 240 | |
| 241 | ssup->comparator = uuid_fast_cmp; |
| 242 | ssup->ssup_extra = NULL; |
| 243 | |
| 244 | if (ssup->abbreviate) |
| 245 | { |
| 246 | uuid_sortsupport_state *uss; |
| 247 | MemoryContext oldcontext; |
| 248 | |
| 249 | oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); |
| 250 | |
| 251 | uss = palloc(sizeof(uuid_sortsupport_state)); |
| 252 | uss->input_count = 0; |
| 253 | uss->estimating = true; |
| 254 | initHyperLogLog(&uss->abbr_card, 10); |
| 255 | |
| 256 | ssup->ssup_extra = uss; |
| 257 | |
| 258 | ssup->comparator = uuid_cmp_abbrev; |
| 259 | ssup->abbrev_converter = uuid_abbrev_convert; |
| 260 | ssup->abbrev_abort = uuid_abbrev_abort; |
| 261 | ssup->abbrev_full_comparator = uuid_fast_cmp; |
| 262 | |
| 263 | MemoryContextSwitchTo(oldcontext); |
| 264 | } |
| 265 | |
| 266 | PG_RETURN_VOID(); |
| 267 | } |
| 268 | |
| 269 | /* |
| 270 | * SortSupport comparison func |
| 271 | */ |
| 272 | static int |
| 273 | uuid_fast_cmp(Datum x, Datum y, SortSupport ssup) |
| 274 | { |
| 275 | pg_uuid_t *arg1 = DatumGetUUIDP(x); |
| 276 | pg_uuid_t *arg2 = DatumGetUUIDP(y); |
| 277 | |
| 278 | return uuid_internal_cmp(arg1, arg2); |
| 279 | } |
| 280 | |
| 281 | /* |
| 282 | * Abbreviated key comparison func |
| 283 | */ |
| 284 | static int |
| 285 | uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup) |
| 286 | { |
| 287 | if (x > y) |
| 288 | return 1; |
| 289 | else if (x == y) |
| 290 | return 0; |
| 291 | else |
| 292 | return -1; |
| 293 | } |
| 294 | |
| 295 | /* |
| 296 | * Callback for estimating effectiveness of abbreviated key optimization. |
| 297 | * |
| 298 | * We pay no attention to the cardinality of the non-abbreviated data, because |
| 299 | * there is no equality fast-path within authoritative uuid comparator. |
| 300 | */ |
| 301 | static bool |
| 302 | uuid_abbrev_abort(int memtupcount, SortSupport ssup) |
| 303 | { |
| 304 | uuid_sortsupport_state *uss = ssup->ssup_extra; |
| 305 | double abbr_card; |
| 306 | |
| 307 | if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating) |
| 308 | return false; |
| 309 | |
| 310 | abbr_card = estimateHyperLogLog(&uss->abbr_card); |
| 311 | |
| 312 | /* |
| 313 | * If we have >100k distinct values, then even if we were sorting many |
| 314 | * billion rows we'd likely still break even, and the penalty of undoing |
| 315 | * that many rows of abbrevs would probably not be worth it. Stop even |
| 316 | * counting at that point. |
| 317 | */ |
| 318 | if (abbr_card > 100000.0) |
| 319 | { |
| 320 | #ifdef TRACE_SORT |
| 321 | if (trace_sort) |
| 322 | elog(LOG, |
| 323 | "uuid_abbrev: estimation ends at cardinality %f" |
| 324 | " after " INT64_FORMAT " values (%d rows)" , |
| 325 | abbr_card, uss->input_count, memtupcount); |
| 326 | #endif |
| 327 | uss->estimating = false; |
| 328 | return false; |
| 329 | } |
| 330 | |
| 331 | /* |
| 332 | * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row |
| 333 | * fudge factor allows us to abort earlier on genuinely pathological data |
| 334 | * where we've had exactly one abbreviated value in the first 2k |
| 335 | * (non-null) rows. |
| 336 | */ |
| 337 | if (abbr_card < uss->input_count / 2000.0 + 0.5) |
| 338 | { |
| 339 | #ifdef TRACE_SORT |
| 340 | if (trace_sort) |
| 341 | elog(LOG, |
| 342 | "uuid_abbrev: aborting abbreviation at cardinality %f" |
| 343 | " below threshold %f after " INT64_FORMAT " values (%d rows)" , |
| 344 | abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count, |
| 345 | memtupcount); |
| 346 | #endif |
| 347 | return true; |
| 348 | } |
| 349 | |
| 350 | #ifdef TRACE_SORT |
| 351 | if (trace_sort) |
| 352 | elog(LOG, |
| 353 | "uuid_abbrev: cardinality %f after " INT64_FORMAT |
| 354 | " values (%d rows)" , abbr_card, uss->input_count, memtupcount); |
| 355 | #endif |
| 356 | |
| 357 | return false; |
| 358 | } |
| 359 | |
| 360 | /* |
| 361 | * Conversion routine for sortsupport. Converts original uuid representation |
| 362 | * to abbreviated key representation. Our encoding strategy is simple -- pack |
| 363 | * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian |
| 364 | * machines, the bytes are stored in reverse order), and treat it as an |
| 365 | * unsigned integer. |
| 366 | */ |
| 367 | static Datum |
| 368 | uuid_abbrev_convert(Datum original, SortSupport ssup) |
| 369 | { |
| 370 | uuid_sortsupport_state *uss = ssup->ssup_extra; |
| 371 | pg_uuid_t *authoritative = DatumGetUUIDP(original); |
| 372 | Datum res; |
| 373 | |
| 374 | memcpy(&res, authoritative->data, sizeof(Datum)); |
| 375 | uss->input_count += 1; |
| 376 | |
| 377 | if (uss->estimating) |
| 378 | { |
| 379 | uint32 tmp; |
| 380 | |
| 381 | #if SIZEOF_DATUM == 8 |
| 382 | tmp = (uint32) res ^ (uint32) ((uint64) res >> 32); |
| 383 | #else /* SIZEOF_DATUM != 8 */ |
| 384 | tmp = (uint32) res; |
| 385 | #endif |
| 386 | |
| 387 | addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); |
| 388 | } |
| 389 | |
| 390 | /* |
| 391 | * Byteswap on little-endian machines. |
| 392 | * |
| 393 | * This is needed so that uuid_cmp_abbrev() (an unsigned integer 3-way |
| 394 | * comparator) works correctly on all platforms. If we didn't do this, |
| 395 | * the comparator would have to call memcmp() with a pair of pointers to |
| 396 | * the first byte of each abbreviated key, which is slower. |
| 397 | */ |
| 398 | res = DatumBigEndianToNative(res); |
| 399 | |
| 400 | return res; |
| 401 | } |
| 402 | |
| 403 | /* hash index support */ |
| 404 | Datum |
| 405 | uuid_hash(PG_FUNCTION_ARGS) |
| 406 | { |
| 407 | pg_uuid_t *key = PG_GETARG_UUID_P(0); |
| 408 | |
| 409 | return hash_any(key->data, UUID_LEN); |
| 410 | } |
| 411 | |
| 412 | Datum |
| 413 | uuid_hash_extended(PG_FUNCTION_ARGS) |
| 414 | { |
| 415 | pg_uuid_t *key = PG_GETARG_UUID_P(0); |
| 416 | |
| 417 | return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1)); |
| 418 | } |
| 419 | |