1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * varchar.c |
4 | * Functions for the built-in types char(n) and varchar(n). |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * Portions Copyright (c) 1994, Regents of the University of California |
8 | * |
9 | * |
10 | * IDENTIFICATION |
11 | * src/backend/utils/adt/varchar.c |
12 | * |
13 | *------------------------------------------------------------------------- |
14 | */ |
15 | #include "postgres.h" |
16 | |
17 | #include "access/tuptoaster.h" |
18 | #include "catalog/pg_collation.h" |
19 | #include "catalog/pg_type.h" |
20 | #include "libpq/pqformat.h" |
21 | #include "nodes/nodeFuncs.h" |
22 | #include "nodes/supportnodes.h" |
23 | #include "utils/array.h" |
24 | #include "utils/builtins.h" |
25 | #include "utils/hashutils.h" |
26 | #include "utils/lsyscache.h" |
27 | #include "utils/pg_locale.h" |
28 | #include "utils/varlena.h" |
29 | #include "mb/pg_wchar.h" |
30 | |
31 | |
32 | /* common code for bpchartypmodin and varchartypmodin */ |
33 | static int32 |
34 | anychar_typmodin(ArrayType *ta, const char *typename) |
35 | { |
36 | int32 typmod; |
37 | int32 *tl; |
38 | int n; |
39 | |
40 | tl = ArrayGetIntegerTypmods(ta, &n); |
41 | |
42 | /* |
43 | * we're not too tense about good error message here because grammar |
44 | * shouldn't allow wrong number of modifiers for CHAR |
45 | */ |
46 | if (n != 1) |
47 | ereport(ERROR, |
48 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
49 | errmsg("invalid type modifier" ))); |
50 | |
51 | if (*tl < 1) |
52 | ereport(ERROR, |
53 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
54 | errmsg("length for type %s must be at least 1" , typename))); |
55 | if (*tl > MaxAttrSize) |
56 | ereport(ERROR, |
57 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
58 | errmsg("length for type %s cannot exceed %d" , |
59 | typename, MaxAttrSize))); |
60 | |
61 | /* |
62 | * For largely historical reasons, the typmod is VARHDRSZ plus the number |
63 | * of characters; there is enough client-side code that knows about that |
64 | * that we'd better not change it. |
65 | */ |
66 | typmod = VARHDRSZ + *tl; |
67 | |
68 | return typmod; |
69 | } |
70 | |
71 | /* common code for bpchartypmodout and varchartypmodout */ |
72 | static char * |
73 | anychar_typmodout(int32 typmod) |
74 | { |
75 | char *res = (char *) palloc(64); |
76 | |
77 | if (typmod > VARHDRSZ) |
78 | snprintf(res, 64, "(%d)" , (int) (typmod - VARHDRSZ)); |
79 | else |
80 | *res = '\0'; |
81 | |
82 | return res; |
83 | } |
84 | |
85 | |
86 | /* |
87 | * CHAR() and VARCHAR() types are part of the SQL standard. CHAR() |
88 | * is for blank-padded string whose length is specified in CREATE TABLE. |
89 | * VARCHAR is for storing string whose length is at most the length specified |
90 | * at CREATE TABLE time. |
91 | * |
92 | * It's hard to implement these types because we cannot figure out |
93 | * the length of the type from the type itself. I changed (hopefully all) the |
94 | * fmgr calls that invoke input functions of a data type to supply the |
95 | * length also. (eg. in INSERTs, we have the tupleDescriptor which contains |
96 | * the length of the attributes and hence the exact length of the char() or |
97 | * varchar(). We pass this to bpcharin() or varcharin().) In the case where |
98 | * we cannot determine the length, we pass in -1 instead and the input |
99 | * converter does not enforce any length check. |
100 | * |
101 | * We actually implement this as a varlena so that we don't have to pass in |
102 | * the length for the comparison functions. (The difference between these |
103 | * types and "text" is that we truncate and possibly blank-pad the string |
104 | * at insertion time.) |
105 | * |
106 | * - ay 6/95 |
107 | */ |
108 | |
109 | |
110 | /***************************************************************************** |
111 | * bpchar - char() * |
112 | *****************************************************************************/ |
113 | |
114 | /* |
115 | * bpchar_input -- common guts of bpcharin and bpcharrecv |
116 | * |
117 | * s is the input text of length len (may not be null-terminated) |
118 | * atttypmod is the typmod value to apply |
119 | * |
120 | * Note that atttypmod is measured in characters, which |
121 | * is not necessarily the same as the number of bytes. |
122 | * |
123 | * If the input string is too long, raise an error, unless the extra |
124 | * characters are spaces, in which case they're truncated. (per SQL) |
125 | */ |
126 | static BpChar * |
127 | bpchar_input(const char *s, size_t len, int32 atttypmod) |
128 | { |
129 | BpChar *result; |
130 | char *r; |
131 | size_t maxlen; |
132 | |
133 | /* If typmod is -1 (or invalid), use the actual string length */ |
134 | if (atttypmod < (int32) VARHDRSZ) |
135 | maxlen = len; |
136 | else |
137 | { |
138 | size_t charlen; /* number of CHARACTERS in the input */ |
139 | |
140 | maxlen = atttypmod - VARHDRSZ; |
141 | charlen = pg_mbstrlen_with_len(s, len); |
142 | if (charlen > maxlen) |
143 | { |
144 | /* Verify that extra characters are spaces, and clip them off */ |
145 | size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen); |
146 | size_t j; |
147 | |
148 | /* |
149 | * at this point, len is the actual BYTE length of the input |
150 | * string, maxlen is the max number of CHARACTERS allowed for this |
151 | * bpchar type, mbmaxlen is the length in BYTES of those chars. |
152 | */ |
153 | for (j = mbmaxlen; j < len; j++) |
154 | { |
155 | if (s[j] != ' ') |
156 | ereport(ERROR, |
157 | (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION), |
158 | errmsg("value too long for type character(%d)" , |
159 | (int) maxlen))); |
160 | } |
161 | |
162 | /* |
163 | * Now we set maxlen to the necessary byte length, not the number |
164 | * of CHARACTERS! |
165 | */ |
166 | maxlen = len = mbmaxlen; |
167 | } |
168 | else |
169 | { |
170 | /* |
171 | * Now we set maxlen to the necessary byte length, not the number |
172 | * of CHARACTERS! |
173 | */ |
174 | maxlen = len + (maxlen - charlen); |
175 | } |
176 | } |
177 | |
178 | result = (BpChar *) palloc(maxlen + VARHDRSZ); |
179 | SET_VARSIZE(result, maxlen + VARHDRSZ); |
180 | r = VARDATA(result); |
181 | memcpy(r, s, len); |
182 | |
183 | /* blank pad the string if necessary */ |
184 | if (maxlen > len) |
185 | memset(r + len, ' ', maxlen - len); |
186 | |
187 | return result; |
188 | } |
189 | |
190 | /* |
191 | * Convert a C string to CHARACTER internal representation. atttypmod |
192 | * is the declared length of the type plus VARHDRSZ. |
193 | */ |
194 | Datum |
195 | bpcharin(PG_FUNCTION_ARGS) |
196 | { |
197 | char *s = PG_GETARG_CSTRING(0); |
198 | |
199 | #ifdef NOT_USED |
200 | Oid typelem = PG_GETARG_OID(1); |
201 | #endif |
202 | int32 atttypmod = PG_GETARG_INT32(2); |
203 | BpChar *result; |
204 | |
205 | result = bpchar_input(s, strlen(s), atttypmod); |
206 | PG_RETURN_BPCHAR_P(result); |
207 | } |
208 | |
209 | |
210 | /* |
211 | * Convert a CHARACTER value to a C string. |
212 | * |
213 | * Uses the text conversion functions, which is only appropriate if BpChar |
214 | * and text are equivalent types. |
215 | */ |
216 | Datum |
217 | bpcharout(PG_FUNCTION_ARGS) |
218 | { |
219 | Datum txt = PG_GETARG_DATUM(0); |
220 | |
221 | PG_RETURN_CSTRING(TextDatumGetCString(txt)); |
222 | } |
223 | |
224 | /* |
225 | * bpcharrecv - converts external binary format to bpchar |
226 | */ |
227 | Datum |
228 | bpcharrecv(PG_FUNCTION_ARGS) |
229 | { |
230 | StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); |
231 | |
232 | #ifdef NOT_USED |
233 | Oid typelem = PG_GETARG_OID(1); |
234 | #endif |
235 | int32 atttypmod = PG_GETARG_INT32(2); |
236 | BpChar *result; |
237 | char *str; |
238 | int nbytes; |
239 | |
240 | str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); |
241 | result = bpchar_input(str, nbytes, atttypmod); |
242 | pfree(str); |
243 | PG_RETURN_BPCHAR_P(result); |
244 | } |
245 | |
246 | /* |
247 | * bpcharsend - converts bpchar to binary format |
248 | */ |
249 | Datum |
250 | bpcharsend(PG_FUNCTION_ARGS) |
251 | { |
252 | /* Exactly the same as textsend, so share code */ |
253 | return textsend(fcinfo); |
254 | } |
255 | |
256 | |
257 | /* |
258 | * Converts a CHARACTER type to the specified size. |
259 | * |
260 | * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes. |
261 | * isExplicit is true if this is for an explicit cast to char(N). |
262 | * |
263 | * Truncation rules: for an explicit cast, silently truncate to the given |
264 | * length; for an implicit cast, raise error unless extra characters are |
265 | * all spaces. (This is sort-of per SQL: the spec would actually have us |
266 | * raise a "completion condition" for the explicit cast case, but Postgres |
267 | * hasn't got such a concept.) |
268 | */ |
269 | Datum |
270 | bpchar(PG_FUNCTION_ARGS) |
271 | { |
272 | BpChar *source = PG_GETARG_BPCHAR_PP(0); |
273 | int32 maxlen = PG_GETARG_INT32(1); |
274 | bool isExplicit = PG_GETARG_BOOL(2); |
275 | BpChar *result; |
276 | int32 len; |
277 | char *r; |
278 | char *s; |
279 | int i; |
280 | int charlen; /* number of characters in the input string + |
281 | * VARHDRSZ */ |
282 | |
283 | /* No work if typmod is invalid */ |
284 | if (maxlen < (int32) VARHDRSZ) |
285 | PG_RETURN_BPCHAR_P(source); |
286 | |
287 | maxlen -= VARHDRSZ; |
288 | |
289 | len = VARSIZE_ANY_EXHDR(source); |
290 | s = VARDATA_ANY(source); |
291 | |
292 | charlen = pg_mbstrlen_with_len(s, len); |
293 | |
294 | /* No work if supplied data matches typmod already */ |
295 | if (charlen == maxlen) |
296 | PG_RETURN_BPCHAR_P(source); |
297 | |
298 | if (charlen > maxlen) |
299 | { |
300 | /* Verify that extra characters are spaces, and clip them off */ |
301 | size_t maxmblen; |
302 | |
303 | maxmblen = pg_mbcharcliplen(s, len, maxlen); |
304 | |
305 | if (!isExplicit) |
306 | { |
307 | for (i = maxmblen; i < len; i++) |
308 | if (s[i] != ' ') |
309 | ereport(ERROR, |
310 | (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION), |
311 | errmsg("value too long for type character(%d)" , |
312 | maxlen))); |
313 | } |
314 | |
315 | len = maxmblen; |
316 | |
317 | /* |
318 | * At this point, maxlen is the necessary byte length, not the number |
319 | * of CHARACTERS! |
320 | */ |
321 | maxlen = len; |
322 | } |
323 | else |
324 | { |
325 | /* |
326 | * At this point, maxlen is the necessary byte length, not the number |
327 | * of CHARACTERS! |
328 | */ |
329 | maxlen = len + (maxlen - charlen); |
330 | } |
331 | |
332 | Assert(maxlen >= len); |
333 | |
334 | result = palloc(maxlen + VARHDRSZ); |
335 | SET_VARSIZE(result, maxlen + VARHDRSZ); |
336 | r = VARDATA(result); |
337 | |
338 | memcpy(r, s, len); |
339 | |
340 | /* blank pad the string if necessary */ |
341 | if (maxlen > len) |
342 | memset(r + len, ' ', maxlen - len); |
343 | |
344 | PG_RETURN_BPCHAR_P(result); |
345 | } |
346 | |
347 | |
348 | /* char_bpchar() |
349 | * Convert char to bpchar(1). |
350 | */ |
351 | Datum |
352 | char_bpchar(PG_FUNCTION_ARGS) |
353 | { |
354 | char c = PG_GETARG_CHAR(0); |
355 | BpChar *result; |
356 | |
357 | result = (BpChar *) palloc(VARHDRSZ + 1); |
358 | |
359 | SET_VARSIZE(result, VARHDRSZ + 1); |
360 | *(VARDATA(result)) = c; |
361 | |
362 | PG_RETURN_BPCHAR_P(result); |
363 | } |
364 | |
365 | |
366 | /* bpchar_name() |
367 | * Converts a bpchar() type to a NameData type. |
368 | */ |
369 | Datum |
370 | bpchar_name(PG_FUNCTION_ARGS) |
371 | { |
372 | BpChar *s = PG_GETARG_BPCHAR_PP(0); |
373 | char *s_data; |
374 | Name result; |
375 | int len; |
376 | |
377 | len = VARSIZE_ANY_EXHDR(s); |
378 | s_data = VARDATA_ANY(s); |
379 | |
380 | /* Truncate oversize input */ |
381 | if (len >= NAMEDATALEN) |
382 | len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1); |
383 | |
384 | /* Remove trailing blanks */ |
385 | while (len > 0) |
386 | { |
387 | if (s_data[len - 1] != ' ') |
388 | break; |
389 | len--; |
390 | } |
391 | |
392 | /* We use palloc0 here to ensure result is zero-padded */ |
393 | result = (Name) palloc0(NAMEDATALEN); |
394 | memcpy(NameStr(*result), s_data, len); |
395 | |
396 | PG_RETURN_NAME(result); |
397 | } |
398 | |
399 | /* name_bpchar() |
400 | * Converts a NameData type to a bpchar type. |
401 | * |
402 | * Uses the text conversion functions, which is only appropriate if BpChar |
403 | * and text are equivalent types. |
404 | */ |
405 | Datum |
406 | name_bpchar(PG_FUNCTION_ARGS) |
407 | { |
408 | Name s = PG_GETARG_NAME(0); |
409 | BpChar *result; |
410 | |
411 | result = (BpChar *) cstring_to_text(NameStr(*s)); |
412 | PG_RETURN_BPCHAR_P(result); |
413 | } |
414 | |
415 | Datum |
416 | bpchartypmodin(PG_FUNCTION_ARGS) |
417 | { |
418 | ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); |
419 | |
420 | PG_RETURN_INT32(anychar_typmodin(ta, "char" )); |
421 | } |
422 | |
423 | Datum |
424 | bpchartypmodout(PG_FUNCTION_ARGS) |
425 | { |
426 | int32 typmod = PG_GETARG_INT32(0); |
427 | |
428 | PG_RETURN_CSTRING(anychar_typmodout(typmod)); |
429 | } |
430 | |
431 | |
432 | /***************************************************************************** |
433 | * varchar - varchar(n) |
434 | * |
435 | * Note: varchar piggybacks on type text for most operations, and so has no |
436 | * C-coded functions except for I/O and typmod checking. |
437 | *****************************************************************************/ |
438 | |
439 | /* |
440 | * varchar_input -- common guts of varcharin and varcharrecv |
441 | * |
442 | * s is the input text of length len (may not be null-terminated) |
443 | * atttypmod is the typmod value to apply |
444 | * |
445 | * Note that atttypmod is measured in characters, which |
446 | * is not necessarily the same as the number of bytes. |
447 | * |
448 | * If the input string is too long, raise an error, unless the extra |
449 | * characters are spaces, in which case they're truncated. (per SQL) |
450 | * |
451 | * Uses the C string to text conversion function, which is only appropriate |
452 | * if VarChar and text are equivalent types. |
453 | */ |
454 | static VarChar * |
455 | varchar_input(const char *s, size_t len, int32 atttypmod) |
456 | { |
457 | VarChar *result; |
458 | size_t maxlen; |
459 | |
460 | maxlen = atttypmod - VARHDRSZ; |
461 | |
462 | if (atttypmod >= (int32) VARHDRSZ && len > maxlen) |
463 | { |
464 | /* Verify that extra characters are spaces, and clip them off */ |
465 | size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen); |
466 | size_t j; |
467 | |
468 | for (j = mbmaxlen; j < len; j++) |
469 | { |
470 | if (s[j] != ' ') |
471 | ereport(ERROR, |
472 | (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION), |
473 | errmsg("value too long for type character varying(%d)" , |
474 | (int) maxlen))); |
475 | } |
476 | |
477 | len = mbmaxlen; |
478 | } |
479 | |
480 | result = (VarChar *) cstring_to_text_with_len(s, len); |
481 | return result; |
482 | } |
483 | |
484 | /* |
485 | * Convert a C string to VARCHAR internal representation. atttypmod |
486 | * is the declared length of the type plus VARHDRSZ. |
487 | */ |
488 | Datum |
489 | varcharin(PG_FUNCTION_ARGS) |
490 | { |
491 | char *s = PG_GETARG_CSTRING(0); |
492 | |
493 | #ifdef NOT_USED |
494 | Oid typelem = PG_GETARG_OID(1); |
495 | #endif |
496 | int32 atttypmod = PG_GETARG_INT32(2); |
497 | VarChar *result; |
498 | |
499 | result = varchar_input(s, strlen(s), atttypmod); |
500 | PG_RETURN_VARCHAR_P(result); |
501 | } |
502 | |
503 | |
504 | /* |
505 | * Convert a VARCHAR value to a C string. |
506 | * |
507 | * Uses the text to C string conversion function, which is only appropriate |
508 | * if VarChar and text are equivalent types. |
509 | */ |
510 | Datum |
511 | varcharout(PG_FUNCTION_ARGS) |
512 | { |
513 | Datum txt = PG_GETARG_DATUM(0); |
514 | |
515 | PG_RETURN_CSTRING(TextDatumGetCString(txt)); |
516 | } |
517 | |
518 | /* |
519 | * varcharrecv - converts external binary format to varchar |
520 | */ |
521 | Datum |
522 | varcharrecv(PG_FUNCTION_ARGS) |
523 | { |
524 | StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); |
525 | |
526 | #ifdef NOT_USED |
527 | Oid typelem = PG_GETARG_OID(1); |
528 | #endif |
529 | int32 atttypmod = PG_GETARG_INT32(2); |
530 | VarChar *result; |
531 | char *str; |
532 | int nbytes; |
533 | |
534 | str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes); |
535 | result = varchar_input(str, nbytes, atttypmod); |
536 | pfree(str); |
537 | PG_RETURN_VARCHAR_P(result); |
538 | } |
539 | |
540 | /* |
541 | * varcharsend - converts varchar to binary format |
542 | */ |
543 | Datum |
544 | varcharsend(PG_FUNCTION_ARGS) |
545 | { |
546 | /* Exactly the same as textsend, so share code */ |
547 | return textsend(fcinfo); |
548 | } |
549 | |
550 | |
551 | /* |
552 | * varchar_support() |
553 | * |
554 | * Planner support function for the varchar() length coercion function. |
555 | * |
556 | * Currently, the only interesting thing we can do is flatten calls that set |
557 | * the new maximum length >= the previous maximum length. We can ignore the |
558 | * isExplicit argument, since that only affects truncation cases. |
559 | */ |
560 | Datum |
561 | varchar_support(PG_FUNCTION_ARGS) |
562 | { |
563 | Node *rawreq = (Node *) PG_GETARG_POINTER(0); |
564 | Node *ret = NULL; |
565 | |
566 | if (IsA(rawreq, SupportRequestSimplify)) |
567 | { |
568 | SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq; |
569 | FuncExpr *expr = req->fcall; |
570 | Node *typmod; |
571 | |
572 | Assert(list_length(expr->args) >= 2); |
573 | |
574 | typmod = (Node *) lsecond(expr->args); |
575 | |
576 | if (IsA(typmod, Const) &&!((Const *) typmod)->constisnull) |
577 | { |
578 | Node *source = (Node *) linitial(expr->args); |
579 | int32 old_typmod = exprTypmod(source); |
580 | int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue); |
581 | int32 old_max = old_typmod - VARHDRSZ; |
582 | int32 new_max = new_typmod - VARHDRSZ; |
583 | |
584 | if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max)) |
585 | ret = relabel_to_typmod(source, new_typmod); |
586 | } |
587 | } |
588 | |
589 | PG_RETURN_POINTER(ret); |
590 | } |
591 | |
592 | /* |
593 | * Converts a VARCHAR type to the specified size. |
594 | * |
595 | * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes. |
596 | * isExplicit is true if this is for an explicit cast to varchar(N). |
597 | * |
598 | * Truncation rules: for an explicit cast, silently truncate to the given |
599 | * length; for an implicit cast, raise error unless extra characters are |
600 | * all spaces. (This is sort-of per SQL: the spec would actually have us |
601 | * raise a "completion condition" for the explicit cast case, but Postgres |
602 | * hasn't got such a concept.) |
603 | */ |
604 | Datum |
605 | varchar(PG_FUNCTION_ARGS) |
606 | { |
607 | VarChar *source = PG_GETARG_VARCHAR_PP(0); |
608 | int32 typmod = PG_GETARG_INT32(1); |
609 | bool isExplicit = PG_GETARG_BOOL(2); |
610 | int32 len, |
611 | maxlen; |
612 | size_t maxmblen; |
613 | int i; |
614 | char *s_data; |
615 | |
616 | len = VARSIZE_ANY_EXHDR(source); |
617 | s_data = VARDATA_ANY(source); |
618 | maxlen = typmod - VARHDRSZ; |
619 | |
620 | /* No work if typmod is invalid or supplied data fits it already */ |
621 | if (maxlen < 0 || len <= maxlen) |
622 | PG_RETURN_VARCHAR_P(source); |
623 | |
624 | /* only reach here if string is too long... */ |
625 | |
626 | /* truncate multibyte string preserving multibyte boundary */ |
627 | maxmblen = pg_mbcharcliplen(s_data, len, maxlen); |
628 | |
629 | if (!isExplicit) |
630 | { |
631 | for (i = maxmblen; i < len; i++) |
632 | if (s_data[i] != ' ') |
633 | ereport(ERROR, |
634 | (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION), |
635 | errmsg("value too long for type character varying(%d)" , |
636 | maxlen))); |
637 | } |
638 | |
639 | PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text_with_len(s_data, |
640 | maxmblen)); |
641 | } |
642 | |
643 | Datum |
644 | varchartypmodin(PG_FUNCTION_ARGS) |
645 | { |
646 | ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); |
647 | |
648 | PG_RETURN_INT32(anychar_typmodin(ta, "varchar" )); |
649 | } |
650 | |
651 | Datum |
652 | varchartypmodout(PG_FUNCTION_ARGS) |
653 | { |
654 | int32 typmod = PG_GETARG_INT32(0); |
655 | |
656 | PG_RETURN_CSTRING(anychar_typmodout(typmod)); |
657 | } |
658 | |
659 | |
660 | /***************************************************************************** |
661 | * Exported functions |
662 | *****************************************************************************/ |
663 | |
664 | /* "True" length (not counting trailing blanks) of a BpChar */ |
665 | static inline int |
666 | bcTruelen(BpChar *arg) |
667 | { |
668 | return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg)); |
669 | } |
670 | |
671 | int |
672 | bpchartruelen(char *s, int len) |
673 | { |
674 | int i; |
675 | |
676 | /* |
677 | * Note that we rely on the assumption that ' ' is a singleton unit on |
678 | * every supported multibyte server encoding. |
679 | */ |
680 | for (i = len - 1; i >= 0; i--) |
681 | { |
682 | if (s[i] != ' ') |
683 | break; |
684 | } |
685 | return i + 1; |
686 | } |
687 | |
688 | Datum |
689 | bpcharlen(PG_FUNCTION_ARGS) |
690 | { |
691 | BpChar *arg = PG_GETARG_BPCHAR_PP(0); |
692 | int len; |
693 | |
694 | /* get number of bytes, ignoring trailing spaces */ |
695 | len = bcTruelen(arg); |
696 | |
697 | /* in multibyte encoding, convert to number of characters */ |
698 | if (pg_database_encoding_max_length() != 1) |
699 | len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len); |
700 | |
701 | PG_RETURN_INT32(len); |
702 | } |
703 | |
704 | Datum |
705 | bpcharoctetlen(PG_FUNCTION_ARGS) |
706 | { |
707 | Datum arg = PG_GETARG_DATUM(0); |
708 | |
709 | /* We need not detoast the input at all */ |
710 | PG_RETURN_INT32(toast_raw_datum_size(arg) - VARHDRSZ); |
711 | } |
712 | |
713 | |
714 | /***************************************************************************** |
715 | * Comparison Functions used for bpchar |
716 | * |
717 | * Note: btree indexes need these routines not to leak memory; therefore, |
718 | * be careful to free working copies of toasted datums. Most places don't |
719 | * need to be so careful. |
720 | *****************************************************************************/ |
721 | |
722 | static void |
723 | check_collation_set(Oid collid) |
724 | { |
725 | if (!OidIsValid(collid)) |
726 | { |
727 | /* |
728 | * This typically means that the parser could not resolve a conflict |
729 | * of implicit collations, so report it that way. |
730 | */ |
731 | ereport(ERROR, |
732 | (errcode(ERRCODE_INDETERMINATE_COLLATION), |
733 | errmsg("could not determine which collation to use for string comparison" ), |
734 | errhint("Use the COLLATE clause to set the collation explicitly." ))); |
735 | } |
736 | } |
737 | |
738 | Datum |
739 | bpchareq(PG_FUNCTION_ARGS) |
740 | { |
741 | BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
742 | BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
743 | int len1, |
744 | len2; |
745 | bool result; |
746 | Oid collid = PG_GET_COLLATION(); |
747 | |
748 | check_collation_set(collid); |
749 | |
750 | len1 = bcTruelen(arg1); |
751 | len2 = bcTruelen(arg2); |
752 | |
753 | if (lc_collate_is_c(collid) || |
754 | collid == DEFAULT_COLLATION_OID || |
755 | pg_newlocale_from_collation(collid)->deterministic) |
756 | { |
757 | /* |
758 | * Since we only care about equality or not-equality, we can avoid all |
759 | * the expense of strcoll() here, and just do bitwise comparison. |
760 | */ |
761 | if (len1 != len2) |
762 | result = false; |
763 | else |
764 | result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0); |
765 | } |
766 | else |
767 | { |
768 | result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, |
769 | collid) == 0); |
770 | } |
771 | |
772 | PG_FREE_IF_COPY(arg1, 0); |
773 | PG_FREE_IF_COPY(arg2, 1); |
774 | |
775 | PG_RETURN_BOOL(result); |
776 | } |
777 | |
778 | Datum |
779 | bpcharne(PG_FUNCTION_ARGS) |
780 | { |
781 | BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
782 | BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
783 | int len1, |
784 | len2; |
785 | bool result; |
786 | Oid collid = PG_GET_COLLATION(); |
787 | |
788 | len1 = bcTruelen(arg1); |
789 | len2 = bcTruelen(arg2); |
790 | |
791 | if (lc_collate_is_c(collid) || |
792 | collid == DEFAULT_COLLATION_OID || |
793 | pg_newlocale_from_collation(collid)->deterministic) |
794 | { |
795 | /* |
796 | * Since we only care about equality or not-equality, we can avoid all |
797 | * the expense of strcoll() here, and just do bitwise comparison. |
798 | */ |
799 | if (len1 != len2) |
800 | result = true; |
801 | else |
802 | result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0); |
803 | } |
804 | else |
805 | { |
806 | result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, |
807 | collid) != 0); |
808 | } |
809 | |
810 | PG_FREE_IF_COPY(arg1, 0); |
811 | PG_FREE_IF_COPY(arg2, 1); |
812 | |
813 | PG_RETURN_BOOL(result); |
814 | } |
815 | |
816 | Datum |
817 | bpcharlt(PG_FUNCTION_ARGS) |
818 | { |
819 | BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
820 | BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
821 | int len1, |
822 | len2; |
823 | int cmp; |
824 | |
825 | len1 = bcTruelen(arg1); |
826 | len2 = bcTruelen(arg2); |
827 | |
828 | cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, |
829 | PG_GET_COLLATION()); |
830 | |
831 | PG_FREE_IF_COPY(arg1, 0); |
832 | PG_FREE_IF_COPY(arg2, 1); |
833 | |
834 | PG_RETURN_BOOL(cmp < 0); |
835 | } |
836 | |
837 | Datum |
838 | bpcharle(PG_FUNCTION_ARGS) |
839 | { |
840 | BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
841 | BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
842 | int len1, |
843 | len2; |
844 | int cmp; |
845 | |
846 | len1 = bcTruelen(arg1); |
847 | len2 = bcTruelen(arg2); |
848 | |
849 | cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, |
850 | PG_GET_COLLATION()); |
851 | |
852 | PG_FREE_IF_COPY(arg1, 0); |
853 | PG_FREE_IF_COPY(arg2, 1); |
854 | |
855 | PG_RETURN_BOOL(cmp <= 0); |
856 | } |
857 | |
858 | Datum |
859 | bpchargt(PG_FUNCTION_ARGS) |
860 | { |
861 | BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
862 | BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
863 | int len1, |
864 | len2; |
865 | int cmp; |
866 | |
867 | len1 = bcTruelen(arg1); |
868 | len2 = bcTruelen(arg2); |
869 | |
870 | cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, |
871 | PG_GET_COLLATION()); |
872 | |
873 | PG_FREE_IF_COPY(arg1, 0); |
874 | PG_FREE_IF_COPY(arg2, 1); |
875 | |
876 | PG_RETURN_BOOL(cmp > 0); |
877 | } |
878 | |
879 | Datum |
880 | bpcharge(PG_FUNCTION_ARGS) |
881 | { |
882 | BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
883 | BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
884 | int len1, |
885 | len2; |
886 | int cmp; |
887 | |
888 | len1 = bcTruelen(arg1); |
889 | len2 = bcTruelen(arg2); |
890 | |
891 | cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, |
892 | PG_GET_COLLATION()); |
893 | |
894 | PG_FREE_IF_COPY(arg1, 0); |
895 | PG_FREE_IF_COPY(arg2, 1); |
896 | |
897 | PG_RETURN_BOOL(cmp >= 0); |
898 | } |
899 | |
900 | Datum |
901 | bpcharcmp(PG_FUNCTION_ARGS) |
902 | { |
903 | BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
904 | BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
905 | int len1, |
906 | len2; |
907 | int cmp; |
908 | |
909 | len1 = bcTruelen(arg1); |
910 | len2 = bcTruelen(arg2); |
911 | |
912 | cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, |
913 | PG_GET_COLLATION()); |
914 | |
915 | PG_FREE_IF_COPY(arg1, 0); |
916 | PG_FREE_IF_COPY(arg2, 1); |
917 | |
918 | PG_RETURN_INT32(cmp); |
919 | } |
920 | |
921 | Datum |
922 | bpchar_sortsupport(PG_FUNCTION_ARGS) |
923 | { |
924 | SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); |
925 | Oid collid = ssup->ssup_collation; |
926 | MemoryContext oldcontext; |
927 | |
928 | oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); |
929 | |
930 | /* Use generic string SortSupport */ |
931 | varstr_sortsupport(ssup, BPCHAROID, collid); |
932 | |
933 | MemoryContextSwitchTo(oldcontext); |
934 | |
935 | PG_RETURN_VOID(); |
936 | } |
937 | |
938 | Datum |
939 | bpchar_larger(PG_FUNCTION_ARGS) |
940 | { |
941 | BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
942 | BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
943 | int len1, |
944 | len2; |
945 | int cmp; |
946 | |
947 | len1 = bcTruelen(arg1); |
948 | len2 = bcTruelen(arg2); |
949 | |
950 | cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, |
951 | PG_GET_COLLATION()); |
952 | |
953 | PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2); |
954 | } |
955 | |
956 | Datum |
957 | bpchar_smaller(PG_FUNCTION_ARGS) |
958 | { |
959 | BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
960 | BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
961 | int len1, |
962 | len2; |
963 | int cmp; |
964 | |
965 | len1 = bcTruelen(arg1); |
966 | len2 = bcTruelen(arg2); |
967 | |
968 | cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2, |
969 | PG_GET_COLLATION()); |
970 | |
971 | PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2); |
972 | } |
973 | |
974 | |
975 | /* |
976 | * bpchar needs a specialized hash function because we want to ignore |
977 | * trailing blanks in comparisons. |
978 | */ |
979 | Datum |
980 | hashbpchar(PG_FUNCTION_ARGS) |
981 | { |
982 | BpChar *key = PG_GETARG_BPCHAR_PP(0); |
983 | Oid collid = PG_GET_COLLATION(); |
984 | char *keydata; |
985 | int keylen; |
986 | pg_locale_t mylocale = 0; |
987 | Datum result; |
988 | |
989 | if (!collid) |
990 | ereport(ERROR, |
991 | (errcode(ERRCODE_INDETERMINATE_COLLATION), |
992 | errmsg("could not determine which collation to use for string hashing" ), |
993 | errhint("Use the COLLATE clause to set the collation explicitly." ))); |
994 | |
995 | keydata = VARDATA_ANY(key); |
996 | keylen = bcTruelen(key); |
997 | |
998 | if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) |
999 | mylocale = pg_newlocale_from_collation(collid); |
1000 | |
1001 | if (!mylocale || mylocale->deterministic) |
1002 | { |
1003 | result = hash_any((unsigned char *) keydata, keylen); |
1004 | } |
1005 | else |
1006 | { |
1007 | #ifdef USE_ICU |
1008 | if (mylocale->provider == COLLPROVIDER_ICU) |
1009 | { |
1010 | int32_t ulen = -1; |
1011 | UChar *uchar = NULL; |
1012 | Size bsize; |
1013 | uint8_t *buf; |
1014 | |
1015 | ulen = icu_to_uchar(&uchar, keydata, keylen); |
1016 | |
1017 | bsize = ucol_getSortKey(mylocale->info.icu.ucol, |
1018 | uchar, ulen, NULL, 0); |
1019 | buf = palloc(bsize); |
1020 | ucol_getSortKey(mylocale->info.icu.ucol, |
1021 | uchar, ulen, buf, bsize); |
1022 | |
1023 | result = hash_any(buf, bsize); |
1024 | |
1025 | pfree(buf); |
1026 | } |
1027 | else |
1028 | #endif |
1029 | /* shouldn't happen */ |
1030 | elog(ERROR, "unsupported collprovider: %c" , mylocale->provider); |
1031 | } |
1032 | |
1033 | /* Avoid leaking memory for toasted inputs */ |
1034 | PG_FREE_IF_COPY(key, 0); |
1035 | |
1036 | return result; |
1037 | } |
1038 | |
1039 | Datum |
1040 | hashbpcharextended(PG_FUNCTION_ARGS) |
1041 | { |
1042 | BpChar *key = PG_GETARG_BPCHAR_PP(0); |
1043 | Oid collid = PG_GET_COLLATION(); |
1044 | char *keydata; |
1045 | int keylen; |
1046 | pg_locale_t mylocale = 0; |
1047 | Datum result; |
1048 | |
1049 | if (!collid) |
1050 | ereport(ERROR, |
1051 | (errcode(ERRCODE_INDETERMINATE_COLLATION), |
1052 | errmsg("could not determine which collation to use for string hashing" ), |
1053 | errhint("Use the COLLATE clause to set the collation explicitly." ))); |
1054 | |
1055 | keydata = VARDATA_ANY(key); |
1056 | keylen = bcTruelen(key); |
1057 | |
1058 | if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID) |
1059 | mylocale = pg_newlocale_from_collation(collid); |
1060 | |
1061 | if (!mylocale || mylocale->deterministic) |
1062 | { |
1063 | result = hash_any_extended((unsigned char *) keydata, keylen, |
1064 | PG_GETARG_INT64(1)); |
1065 | } |
1066 | else |
1067 | { |
1068 | #ifdef USE_ICU |
1069 | if (mylocale->provider == COLLPROVIDER_ICU) |
1070 | { |
1071 | int32_t ulen = -1; |
1072 | UChar *uchar = NULL; |
1073 | Size bsize; |
1074 | uint8_t *buf; |
1075 | |
1076 | ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); |
1077 | |
1078 | bsize = ucol_getSortKey(mylocale->info.icu.ucol, |
1079 | uchar, ulen, NULL, 0); |
1080 | buf = palloc(bsize); |
1081 | ucol_getSortKey(mylocale->info.icu.ucol, |
1082 | uchar, ulen, buf, bsize); |
1083 | |
1084 | result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1)); |
1085 | |
1086 | pfree(buf); |
1087 | } |
1088 | else |
1089 | #endif |
1090 | /* shouldn't happen */ |
1091 | elog(ERROR, "unsupported collprovider: %c" , mylocale->provider); |
1092 | } |
1093 | |
1094 | PG_FREE_IF_COPY(key, 0); |
1095 | |
1096 | return result; |
1097 | } |
1098 | |
1099 | /* |
1100 | * The following operators support character-by-character comparison |
1101 | * of bpchar datums, to allow building indexes suitable for LIKE clauses. |
1102 | * Note that the regular bpchareq/bpcharne comparison operators, and |
1103 | * regular support functions 1 and 2 with "C" collation are assumed to be |
1104 | * compatible with these! |
1105 | */ |
1106 | |
1107 | static int |
1108 | internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2) |
1109 | { |
1110 | int result; |
1111 | int len1, |
1112 | len2; |
1113 | |
1114 | len1 = bcTruelen(arg1); |
1115 | len2 = bcTruelen(arg2); |
1116 | |
1117 | result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); |
1118 | if (result != 0) |
1119 | return result; |
1120 | else if (len1 < len2) |
1121 | return -1; |
1122 | else if (len1 > len2) |
1123 | return 1; |
1124 | else |
1125 | return 0; |
1126 | } |
1127 | |
1128 | |
1129 | Datum |
1130 | bpchar_pattern_lt(PG_FUNCTION_ARGS) |
1131 | { |
1132 | BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
1133 | BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
1134 | int result; |
1135 | |
1136 | result = internal_bpchar_pattern_compare(arg1, arg2); |
1137 | |
1138 | PG_FREE_IF_COPY(arg1, 0); |
1139 | PG_FREE_IF_COPY(arg2, 1); |
1140 | |
1141 | PG_RETURN_BOOL(result < 0); |
1142 | } |
1143 | |
1144 | |
1145 | Datum |
1146 | bpchar_pattern_le(PG_FUNCTION_ARGS) |
1147 | { |
1148 | BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
1149 | BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
1150 | int result; |
1151 | |
1152 | result = internal_bpchar_pattern_compare(arg1, arg2); |
1153 | |
1154 | PG_FREE_IF_COPY(arg1, 0); |
1155 | PG_FREE_IF_COPY(arg2, 1); |
1156 | |
1157 | PG_RETURN_BOOL(result <= 0); |
1158 | } |
1159 | |
1160 | |
1161 | Datum |
1162 | bpchar_pattern_ge(PG_FUNCTION_ARGS) |
1163 | { |
1164 | BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
1165 | BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
1166 | int result; |
1167 | |
1168 | result = internal_bpchar_pattern_compare(arg1, arg2); |
1169 | |
1170 | PG_FREE_IF_COPY(arg1, 0); |
1171 | PG_FREE_IF_COPY(arg2, 1); |
1172 | |
1173 | PG_RETURN_BOOL(result >= 0); |
1174 | } |
1175 | |
1176 | |
1177 | Datum |
1178 | bpchar_pattern_gt(PG_FUNCTION_ARGS) |
1179 | { |
1180 | BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
1181 | BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
1182 | int result; |
1183 | |
1184 | result = internal_bpchar_pattern_compare(arg1, arg2); |
1185 | |
1186 | PG_FREE_IF_COPY(arg1, 0); |
1187 | PG_FREE_IF_COPY(arg2, 1); |
1188 | |
1189 | PG_RETURN_BOOL(result > 0); |
1190 | } |
1191 | |
1192 | |
1193 | Datum |
1194 | btbpchar_pattern_cmp(PG_FUNCTION_ARGS) |
1195 | { |
1196 | BpChar *arg1 = PG_GETARG_BPCHAR_PP(0); |
1197 | BpChar *arg2 = PG_GETARG_BPCHAR_PP(1); |
1198 | int result; |
1199 | |
1200 | result = internal_bpchar_pattern_compare(arg1, arg2); |
1201 | |
1202 | PG_FREE_IF_COPY(arg1, 0); |
1203 | PG_FREE_IF_COPY(arg2, 1); |
1204 | |
1205 | PG_RETURN_INT32(result); |
1206 | } |
1207 | |
1208 | |
1209 | Datum |
1210 | btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS) |
1211 | { |
1212 | SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); |
1213 | MemoryContext oldcontext; |
1214 | |
1215 | oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); |
1216 | |
1217 | /* Use generic string SortSupport, forcing "C" collation */ |
1218 | varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID); |
1219 | |
1220 | MemoryContextSwitchTo(oldcontext); |
1221 | |
1222 | PG_RETURN_VOID(); |
1223 | } |
1224 | |