1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * mac.c |
4 | * PostgreSQL type definitions for 6 byte, EUI-48, MAC addresses. |
5 | * |
6 | * Portions Copyright (c) 1998-2019, PostgreSQL Global Development Group |
7 | * |
8 | * IDENTIFICATION |
9 | * src/backend/utils/adt/mac.c |
10 | * |
11 | *------------------------------------------------------------------------- |
12 | */ |
13 | |
14 | #include "postgres.h" |
15 | |
16 | #include "lib/hyperloglog.h" |
17 | #include "libpq/pqformat.h" |
18 | #include "port/pg_bswap.h" |
19 | #include "utils/builtins.h" |
20 | #include "utils/guc.h" |
21 | #include "utils/hashutils.h" |
22 | #include "utils/inet.h" |
23 | #include "utils/sortsupport.h" |
24 | |
25 | |
26 | /* |
27 | * Utility macros used for sorting and comparing: |
28 | */ |
29 | |
30 | #define hibits(addr) \ |
31 | ((unsigned long)(((addr)->a<<16)|((addr)->b<<8)|((addr)->c))) |
32 | |
33 | #define lobits(addr) \ |
34 | ((unsigned long)(((addr)->d<<16)|((addr)->e<<8)|((addr)->f))) |
35 | |
36 | /* sortsupport for macaddr */ |
37 | typedef struct |
38 | { |
39 | int64 input_count; /* number of non-null values seen */ |
40 | bool estimating; /* true if estimating cardinality */ |
41 | |
42 | hyperLogLogState abbr_card; /* cardinality estimator */ |
43 | } macaddr_sortsupport_state; |
44 | |
45 | static int macaddr_cmp_internal(macaddr *a1, macaddr *a2); |
46 | static int macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup); |
47 | static int macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup); |
48 | static bool macaddr_abbrev_abort(int memtupcount, SortSupport ssup); |
49 | static Datum macaddr_abbrev_convert(Datum original, SortSupport ssup); |
50 | |
51 | /* |
52 | * MAC address reader. Accepts several common notations. |
53 | */ |
54 | |
55 | Datum |
56 | macaddr_in(PG_FUNCTION_ARGS) |
57 | { |
58 | char *str = PG_GETARG_CSTRING(0); |
59 | macaddr *result; |
60 | int a, |
61 | b, |
62 | c, |
63 | d, |
64 | e, |
65 | f; |
66 | char junk[2]; |
67 | int count; |
68 | |
69 | /* %1s matches iff there is trailing non-whitespace garbage */ |
70 | |
71 | count = sscanf(str, "%x:%x:%x:%x:%x:%x%1s" , |
72 | &a, &b, &c, &d, &e, &f, junk); |
73 | if (count != 6) |
74 | count = sscanf(str, "%x-%x-%x-%x-%x-%x%1s" , |
75 | &a, &b, &c, &d, &e, &f, junk); |
76 | if (count != 6) |
77 | count = sscanf(str, "%2x%2x%2x:%2x%2x%2x%1s" , |
78 | &a, &b, &c, &d, &e, &f, junk); |
79 | if (count != 6) |
80 | count = sscanf(str, "%2x%2x%2x-%2x%2x%2x%1s" , |
81 | &a, &b, &c, &d, &e, &f, junk); |
82 | if (count != 6) |
83 | count = sscanf(str, "%2x%2x.%2x%2x.%2x%2x%1s" , |
84 | &a, &b, &c, &d, &e, &f, junk); |
85 | if (count != 6) |
86 | count = sscanf(str, "%2x%2x-%2x%2x-%2x%2x%1s" , |
87 | &a, &b, &c, &d, &e, &f, junk); |
88 | if (count != 6) |
89 | count = sscanf(str, "%2x%2x%2x%2x%2x%2x%1s" , |
90 | &a, &b, &c, &d, &e, &f, junk); |
91 | if (count != 6) |
92 | ereport(ERROR, |
93 | (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
94 | errmsg("invalid input syntax for type %s: \"%s\"" , "macaddr" , |
95 | str))); |
96 | |
97 | if ((a < 0) || (a > 255) || (b < 0) || (b > 255) || |
98 | (c < 0) || (c > 255) || (d < 0) || (d > 255) || |
99 | (e < 0) || (e > 255) || (f < 0) || (f > 255)) |
100 | ereport(ERROR, |
101 | (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), |
102 | errmsg("invalid octet value in \"macaddr\" value: \"%s\"" , str))); |
103 | |
104 | result = (macaddr *) palloc(sizeof(macaddr)); |
105 | |
106 | result->a = a; |
107 | result->b = b; |
108 | result->c = c; |
109 | result->d = d; |
110 | result->e = e; |
111 | result->f = f; |
112 | |
113 | PG_RETURN_MACADDR_P(result); |
114 | } |
115 | |
116 | /* |
117 | * MAC address output function. Fixed format. |
118 | */ |
119 | |
120 | Datum |
121 | macaddr_out(PG_FUNCTION_ARGS) |
122 | { |
123 | macaddr *addr = PG_GETARG_MACADDR_P(0); |
124 | char *result; |
125 | |
126 | result = (char *) palloc(32); |
127 | |
128 | snprintf(result, 32, "%02x:%02x:%02x:%02x:%02x:%02x" , |
129 | addr->a, addr->b, addr->c, addr->d, addr->e, addr->f); |
130 | |
131 | PG_RETURN_CSTRING(result); |
132 | } |
133 | |
134 | /* |
135 | * macaddr_recv - converts external binary format to macaddr |
136 | * |
137 | * The external representation is just the six bytes, MSB first. |
138 | */ |
139 | Datum |
140 | macaddr_recv(PG_FUNCTION_ARGS) |
141 | { |
142 | StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); |
143 | macaddr *addr; |
144 | |
145 | addr = (macaddr *) palloc(sizeof(macaddr)); |
146 | |
147 | addr->a = pq_getmsgbyte(buf); |
148 | addr->b = pq_getmsgbyte(buf); |
149 | addr->c = pq_getmsgbyte(buf); |
150 | addr->d = pq_getmsgbyte(buf); |
151 | addr->e = pq_getmsgbyte(buf); |
152 | addr->f = pq_getmsgbyte(buf); |
153 | |
154 | PG_RETURN_MACADDR_P(addr); |
155 | } |
156 | |
157 | /* |
158 | * macaddr_send - converts macaddr to binary format |
159 | */ |
160 | Datum |
161 | macaddr_send(PG_FUNCTION_ARGS) |
162 | { |
163 | macaddr *addr = PG_GETARG_MACADDR_P(0); |
164 | StringInfoData buf; |
165 | |
166 | pq_begintypsend(&buf); |
167 | pq_sendbyte(&buf, addr->a); |
168 | pq_sendbyte(&buf, addr->b); |
169 | pq_sendbyte(&buf, addr->c); |
170 | pq_sendbyte(&buf, addr->d); |
171 | pq_sendbyte(&buf, addr->e); |
172 | pq_sendbyte(&buf, addr->f); |
173 | PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); |
174 | } |
175 | |
176 | |
177 | /* |
178 | * Comparison function for sorting: |
179 | */ |
180 | |
181 | static int |
182 | macaddr_cmp_internal(macaddr *a1, macaddr *a2) |
183 | { |
184 | if (hibits(a1) < hibits(a2)) |
185 | return -1; |
186 | else if (hibits(a1) > hibits(a2)) |
187 | return 1; |
188 | else if (lobits(a1) < lobits(a2)) |
189 | return -1; |
190 | else if (lobits(a1) > lobits(a2)) |
191 | return 1; |
192 | else |
193 | return 0; |
194 | } |
195 | |
196 | Datum |
197 | macaddr_cmp(PG_FUNCTION_ARGS) |
198 | { |
199 | macaddr *a1 = PG_GETARG_MACADDR_P(0); |
200 | macaddr *a2 = PG_GETARG_MACADDR_P(1); |
201 | |
202 | PG_RETURN_INT32(macaddr_cmp_internal(a1, a2)); |
203 | } |
204 | |
205 | /* |
206 | * Boolean comparisons. |
207 | */ |
208 | |
209 | Datum |
210 | macaddr_lt(PG_FUNCTION_ARGS) |
211 | { |
212 | macaddr *a1 = PG_GETARG_MACADDR_P(0); |
213 | macaddr *a2 = PG_GETARG_MACADDR_P(1); |
214 | |
215 | PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) < 0); |
216 | } |
217 | |
218 | Datum |
219 | macaddr_le(PG_FUNCTION_ARGS) |
220 | { |
221 | macaddr *a1 = PG_GETARG_MACADDR_P(0); |
222 | macaddr *a2 = PG_GETARG_MACADDR_P(1); |
223 | |
224 | PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) <= 0); |
225 | } |
226 | |
227 | Datum |
228 | macaddr_eq(PG_FUNCTION_ARGS) |
229 | { |
230 | macaddr *a1 = PG_GETARG_MACADDR_P(0); |
231 | macaddr *a2 = PG_GETARG_MACADDR_P(1); |
232 | |
233 | PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) == 0); |
234 | } |
235 | |
236 | Datum |
237 | macaddr_ge(PG_FUNCTION_ARGS) |
238 | { |
239 | macaddr *a1 = PG_GETARG_MACADDR_P(0); |
240 | macaddr *a2 = PG_GETARG_MACADDR_P(1); |
241 | |
242 | PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) >= 0); |
243 | } |
244 | |
245 | Datum |
246 | macaddr_gt(PG_FUNCTION_ARGS) |
247 | { |
248 | macaddr *a1 = PG_GETARG_MACADDR_P(0); |
249 | macaddr *a2 = PG_GETARG_MACADDR_P(1); |
250 | |
251 | PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) > 0); |
252 | } |
253 | |
254 | Datum |
255 | macaddr_ne(PG_FUNCTION_ARGS) |
256 | { |
257 | macaddr *a1 = PG_GETARG_MACADDR_P(0); |
258 | macaddr *a2 = PG_GETARG_MACADDR_P(1); |
259 | |
260 | PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) != 0); |
261 | } |
262 | |
263 | /* |
264 | * Support function for hash indexes on macaddr. |
265 | */ |
266 | Datum |
267 | hashmacaddr(PG_FUNCTION_ARGS) |
268 | { |
269 | macaddr *key = PG_GETARG_MACADDR_P(0); |
270 | |
271 | return hash_any((unsigned char *) key, sizeof(macaddr)); |
272 | } |
273 | |
274 | Datum |
275 | hashmacaddrextended(PG_FUNCTION_ARGS) |
276 | { |
277 | macaddr *key = PG_GETARG_MACADDR_P(0); |
278 | |
279 | return hash_any_extended((unsigned char *) key, sizeof(macaddr), |
280 | PG_GETARG_INT64(1)); |
281 | } |
282 | |
283 | /* |
284 | * Arithmetic functions: bitwise NOT, AND, OR. |
285 | */ |
286 | Datum |
287 | macaddr_not(PG_FUNCTION_ARGS) |
288 | { |
289 | macaddr *addr = PG_GETARG_MACADDR_P(0); |
290 | macaddr *result; |
291 | |
292 | result = (macaddr *) palloc(sizeof(macaddr)); |
293 | result->a = ~addr->a; |
294 | result->b = ~addr->b; |
295 | result->c = ~addr->c; |
296 | result->d = ~addr->d; |
297 | result->e = ~addr->e; |
298 | result->f = ~addr->f; |
299 | PG_RETURN_MACADDR_P(result); |
300 | } |
301 | |
302 | Datum |
303 | macaddr_and(PG_FUNCTION_ARGS) |
304 | { |
305 | macaddr *addr1 = PG_GETARG_MACADDR_P(0); |
306 | macaddr *addr2 = PG_GETARG_MACADDR_P(1); |
307 | macaddr *result; |
308 | |
309 | result = (macaddr *) palloc(sizeof(macaddr)); |
310 | result->a = addr1->a & addr2->a; |
311 | result->b = addr1->b & addr2->b; |
312 | result->c = addr1->c & addr2->c; |
313 | result->d = addr1->d & addr2->d; |
314 | result->e = addr1->e & addr2->e; |
315 | result->f = addr1->f & addr2->f; |
316 | PG_RETURN_MACADDR_P(result); |
317 | } |
318 | |
319 | Datum |
320 | macaddr_or(PG_FUNCTION_ARGS) |
321 | { |
322 | macaddr *addr1 = PG_GETARG_MACADDR_P(0); |
323 | macaddr *addr2 = PG_GETARG_MACADDR_P(1); |
324 | macaddr *result; |
325 | |
326 | result = (macaddr *) palloc(sizeof(macaddr)); |
327 | result->a = addr1->a | addr2->a; |
328 | result->b = addr1->b | addr2->b; |
329 | result->c = addr1->c | addr2->c; |
330 | result->d = addr1->d | addr2->d; |
331 | result->e = addr1->e | addr2->e; |
332 | result->f = addr1->f | addr2->f; |
333 | PG_RETURN_MACADDR_P(result); |
334 | } |
335 | |
336 | /* |
337 | * Truncation function to allow comparing mac manufacturers. |
338 | * From suggestion by Alex Pilosov <alex@pilosoft.com> |
339 | */ |
340 | Datum |
341 | macaddr_trunc(PG_FUNCTION_ARGS) |
342 | { |
343 | macaddr *addr = PG_GETARG_MACADDR_P(0); |
344 | macaddr *result; |
345 | |
346 | result = (macaddr *) palloc(sizeof(macaddr)); |
347 | |
348 | result->a = addr->a; |
349 | result->b = addr->b; |
350 | result->c = addr->c; |
351 | result->d = 0; |
352 | result->e = 0; |
353 | result->f = 0; |
354 | |
355 | PG_RETURN_MACADDR_P(result); |
356 | } |
357 | |
358 | /* |
359 | * SortSupport strategy function. Populates a SortSupport struct with the |
360 | * information necessary to use comparison by abbreviated keys. |
361 | */ |
362 | Datum |
363 | macaddr_sortsupport(PG_FUNCTION_ARGS) |
364 | { |
365 | SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); |
366 | |
367 | ssup->comparator = macaddr_fast_cmp; |
368 | ssup->ssup_extra = NULL; |
369 | |
370 | if (ssup->abbreviate) |
371 | { |
372 | macaddr_sortsupport_state *uss; |
373 | MemoryContext oldcontext; |
374 | |
375 | oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); |
376 | |
377 | uss = palloc(sizeof(macaddr_sortsupport_state)); |
378 | uss->input_count = 0; |
379 | uss->estimating = true; |
380 | initHyperLogLog(&uss->abbr_card, 10); |
381 | |
382 | ssup->ssup_extra = uss; |
383 | |
384 | ssup->comparator = macaddr_cmp_abbrev; |
385 | ssup->abbrev_converter = macaddr_abbrev_convert; |
386 | ssup->abbrev_abort = macaddr_abbrev_abort; |
387 | ssup->abbrev_full_comparator = macaddr_fast_cmp; |
388 | |
389 | MemoryContextSwitchTo(oldcontext); |
390 | } |
391 | |
392 | PG_RETURN_VOID(); |
393 | } |
394 | |
395 | /* |
396 | * SortSupport "traditional" comparison function. Pulls two MAC addresses from |
397 | * the heap and runs a standard comparison on them. |
398 | */ |
399 | static int |
400 | macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup) |
401 | { |
402 | macaddr *arg1 = DatumGetMacaddrP(x); |
403 | macaddr *arg2 = DatumGetMacaddrP(y); |
404 | |
405 | return macaddr_cmp_internal(arg1, arg2); |
406 | } |
407 | |
408 | /* |
409 | * SortSupport abbreviated key comparison function. Compares two MAC addresses |
410 | * quickly by treating them like integers, and without having to go to the |
411 | * heap. |
412 | */ |
413 | static int |
414 | macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup) |
415 | { |
416 | if (x > y) |
417 | return 1; |
418 | else if (x == y) |
419 | return 0; |
420 | else |
421 | return -1; |
422 | } |
423 | |
424 | /* |
425 | * Callback for estimating effectiveness of abbreviated key optimization. |
426 | * |
427 | * We pay no attention to the cardinality of the non-abbreviated data, because |
428 | * there is no equality fast-path within authoritative macaddr comparator. |
429 | */ |
430 | static bool |
431 | macaddr_abbrev_abort(int memtupcount, SortSupport ssup) |
432 | { |
433 | macaddr_sortsupport_state *uss = ssup->ssup_extra; |
434 | double abbr_card; |
435 | |
436 | if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating) |
437 | return false; |
438 | |
439 | abbr_card = estimateHyperLogLog(&uss->abbr_card); |
440 | |
441 | /* |
442 | * If we have >100k distinct values, then even if we were sorting many |
443 | * billion rows we'd likely still break even, and the penalty of undoing |
444 | * that many rows of abbrevs would probably not be worth it. At this point |
445 | * we stop counting because we know that we're now fully committed. |
446 | */ |
447 | if (abbr_card > 100000.0) |
448 | { |
449 | #ifdef TRACE_SORT |
450 | if (trace_sort) |
451 | elog(LOG, |
452 | "macaddr_abbrev: estimation ends at cardinality %f" |
453 | " after " INT64_FORMAT " values (%d rows)" , |
454 | abbr_card, uss->input_count, memtupcount); |
455 | #endif |
456 | uss->estimating = false; |
457 | return false; |
458 | } |
459 | |
460 | /* |
461 | * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row |
462 | * fudge factor allows us to abort earlier on genuinely pathological data |
463 | * where we've had exactly one abbreviated value in the first 2k |
464 | * (non-null) rows. |
465 | */ |
466 | if (abbr_card < uss->input_count / 2000.0 + 0.5) |
467 | { |
468 | #ifdef TRACE_SORT |
469 | if (trace_sort) |
470 | elog(LOG, |
471 | "macaddr_abbrev: aborting abbreviation at cardinality %f" |
472 | " below threshold %f after " INT64_FORMAT " values (%d rows)" , |
473 | abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count, |
474 | memtupcount); |
475 | #endif |
476 | return true; |
477 | } |
478 | |
479 | #ifdef TRACE_SORT |
480 | if (trace_sort) |
481 | elog(LOG, |
482 | "macaddr_abbrev: cardinality %f after " INT64_FORMAT |
483 | " values (%d rows)" , abbr_card, uss->input_count, memtupcount); |
484 | #endif |
485 | |
486 | return false; |
487 | } |
488 | |
489 | /* |
490 | * SortSupport conversion routine. Converts original macaddr representation |
491 | * to abbreviated key representation. |
492 | * |
493 | * Packs the bytes of a 6-byte MAC address into a Datum and treats it as an |
494 | * unsigned integer for purposes of comparison. On a 64-bit machine, there |
495 | * will be two zeroed bytes of padding. The integer is converted to native |
496 | * endianness to facilitate easy comparison. |
497 | */ |
498 | static Datum |
499 | macaddr_abbrev_convert(Datum original, SortSupport ssup) |
500 | { |
501 | macaddr_sortsupport_state *uss = ssup->ssup_extra; |
502 | macaddr *authoritative = DatumGetMacaddrP(original); |
503 | Datum res; |
504 | |
505 | /* |
506 | * On a 64-bit machine, zero out the 8-byte datum and copy the 6 bytes of |
507 | * the MAC address in. There will be two bytes of zero padding on the end |
508 | * of the least significant bits. |
509 | */ |
510 | #if SIZEOF_DATUM == 8 |
511 | memset(&res, 0, SIZEOF_DATUM); |
512 | memcpy(&res, authoritative, sizeof(macaddr)); |
513 | #else /* SIZEOF_DATUM != 8 */ |
514 | memcpy(&res, authoritative, SIZEOF_DATUM); |
515 | #endif |
516 | uss->input_count += 1; |
517 | |
518 | /* |
519 | * Cardinality estimation. The estimate uses uint32, so on a 64-bit |
520 | * architecture, XOR the two 32-bit halves together to produce slightly |
521 | * more entropy. The two zeroed bytes won't have any practical impact on |
522 | * this operation. |
523 | */ |
524 | if (uss->estimating) |
525 | { |
526 | uint32 tmp; |
527 | |
528 | #if SIZEOF_DATUM == 8 |
529 | tmp = (uint32) res ^ (uint32) ((uint64) res >> 32); |
530 | #else /* SIZEOF_DATUM != 8 */ |
531 | tmp = (uint32) res; |
532 | #endif |
533 | |
534 | addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); |
535 | } |
536 | |
537 | /* |
538 | * Byteswap on little-endian machines. |
539 | * |
540 | * This is needed so that macaddr_cmp_abbrev() (an unsigned integer 3-way |
541 | * comparator) works correctly on all platforms. Without this, the |
542 | * comparator would have to call memcmp() with a pair of pointers to the |
543 | * first byte of each abbreviated key, which is slower. |
544 | */ |
545 | res = DatumBigEndianToNative(res); |
546 | |
547 | return res; |
548 | } |
549 | |