1/*-------------------------------------------------------------------------
2 *
3 * varchar.c
4 * Functions for the built-in types char(n) and varchar(n).
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/utils/adt/varchar.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "access/tuptoaster.h"
18#include "catalog/pg_collation.h"
19#include "catalog/pg_type.h"
20#include "libpq/pqformat.h"
21#include "nodes/nodeFuncs.h"
22#include "nodes/supportnodes.h"
23#include "utils/array.h"
24#include "utils/builtins.h"
25#include "utils/hashutils.h"
26#include "utils/lsyscache.h"
27#include "utils/pg_locale.h"
28#include "utils/varlena.h"
29#include "mb/pg_wchar.h"
30
31
32/* common code for bpchartypmodin and varchartypmodin */
33static int32
34anychar_typmodin(ArrayType *ta, const char *typename)
35{
36 int32 typmod;
37 int32 *tl;
38 int n;
39
40 tl = ArrayGetIntegerTypmods(ta, &n);
41
42 /*
43 * we're not too tense about good error message here because grammar
44 * shouldn't allow wrong number of modifiers for CHAR
45 */
46 if (n != 1)
47 ereport(ERROR,
48 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
49 errmsg("invalid type modifier")));
50
51 if (*tl < 1)
52 ereport(ERROR,
53 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
54 errmsg("length for type %s must be at least 1", typename)));
55 if (*tl > MaxAttrSize)
56 ereport(ERROR,
57 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
58 errmsg("length for type %s cannot exceed %d",
59 typename, MaxAttrSize)));
60
61 /*
62 * For largely historical reasons, the typmod is VARHDRSZ plus the number
63 * of characters; there is enough client-side code that knows about that
64 * that we'd better not change it.
65 */
66 typmod = VARHDRSZ + *tl;
67
68 return typmod;
69}
70
71/* common code for bpchartypmodout and varchartypmodout */
72static char *
73anychar_typmodout(int32 typmod)
74{
75 char *res = (char *) palloc(64);
76
77 if (typmod > VARHDRSZ)
78 snprintf(res, 64, "(%d)", (int) (typmod - VARHDRSZ));
79 else
80 *res = '\0';
81
82 return res;
83}
84
85
86/*
87 * CHAR() and VARCHAR() types are part of the SQL standard. CHAR()
88 * is for blank-padded string whose length is specified in CREATE TABLE.
89 * VARCHAR is for storing string whose length is at most the length specified
90 * at CREATE TABLE time.
91 *
92 * It's hard to implement these types because we cannot figure out
93 * the length of the type from the type itself. I changed (hopefully all) the
94 * fmgr calls that invoke input functions of a data type to supply the
95 * length also. (eg. in INSERTs, we have the tupleDescriptor which contains
96 * the length of the attributes and hence the exact length of the char() or
97 * varchar(). We pass this to bpcharin() or varcharin().) In the case where
98 * we cannot determine the length, we pass in -1 instead and the input
99 * converter does not enforce any length check.
100 *
101 * We actually implement this as a varlena so that we don't have to pass in
102 * the length for the comparison functions. (The difference between these
103 * types and "text" is that we truncate and possibly blank-pad the string
104 * at insertion time.)
105 *
106 * - ay 6/95
107 */
108
109
110/*****************************************************************************
111 * bpchar - char() *
112 *****************************************************************************/
113
114/*
115 * bpchar_input -- common guts of bpcharin and bpcharrecv
116 *
117 * s is the input text of length len (may not be null-terminated)
118 * atttypmod is the typmod value to apply
119 *
120 * Note that atttypmod is measured in characters, which
121 * is not necessarily the same as the number of bytes.
122 *
123 * If the input string is too long, raise an error, unless the extra
124 * characters are spaces, in which case they're truncated. (per SQL)
125 */
126static BpChar *
127bpchar_input(const char *s, size_t len, int32 atttypmod)
128{
129 BpChar *result;
130 char *r;
131 size_t maxlen;
132
133 /* If typmod is -1 (or invalid), use the actual string length */
134 if (atttypmod < (int32) VARHDRSZ)
135 maxlen = len;
136 else
137 {
138 size_t charlen; /* number of CHARACTERS in the input */
139
140 maxlen = atttypmod - VARHDRSZ;
141 charlen = pg_mbstrlen_with_len(s, len);
142 if (charlen > maxlen)
143 {
144 /* Verify that extra characters are spaces, and clip them off */
145 size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
146 size_t j;
147
148 /*
149 * at this point, len is the actual BYTE length of the input
150 * string, maxlen is the max number of CHARACTERS allowed for this
151 * bpchar type, mbmaxlen is the length in BYTES of those chars.
152 */
153 for (j = mbmaxlen; j < len; j++)
154 {
155 if (s[j] != ' ')
156 ereport(ERROR,
157 (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
158 errmsg("value too long for type character(%d)",
159 (int) maxlen)));
160 }
161
162 /*
163 * Now we set maxlen to the necessary byte length, not the number
164 * of CHARACTERS!
165 */
166 maxlen = len = mbmaxlen;
167 }
168 else
169 {
170 /*
171 * Now we set maxlen to the necessary byte length, not the number
172 * of CHARACTERS!
173 */
174 maxlen = len + (maxlen - charlen);
175 }
176 }
177
178 result = (BpChar *) palloc(maxlen + VARHDRSZ);
179 SET_VARSIZE(result, maxlen + VARHDRSZ);
180 r = VARDATA(result);
181 memcpy(r, s, len);
182
183 /* blank pad the string if necessary */
184 if (maxlen > len)
185 memset(r + len, ' ', maxlen - len);
186
187 return result;
188}
189
190/*
191 * Convert a C string to CHARACTER internal representation. atttypmod
192 * is the declared length of the type plus VARHDRSZ.
193 */
194Datum
195bpcharin(PG_FUNCTION_ARGS)
196{
197 char *s = PG_GETARG_CSTRING(0);
198
199#ifdef NOT_USED
200 Oid typelem = PG_GETARG_OID(1);
201#endif
202 int32 atttypmod = PG_GETARG_INT32(2);
203 BpChar *result;
204
205 result = bpchar_input(s, strlen(s), atttypmod);
206 PG_RETURN_BPCHAR_P(result);
207}
208
209
210/*
211 * Convert a CHARACTER value to a C string.
212 *
213 * Uses the text conversion functions, which is only appropriate if BpChar
214 * and text are equivalent types.
215 */
216Datum
217bpcharout(PG_FUNCTION_ARGS)
218{
219 Datum txt = PG_GETARG_DATUM(0);
220
221 PG_RETURN_CSTRING(TextDatumGetCString(txt));
222}
223
224/*
225 * bpcharrecv - converts external binary format to bpchar
226 */
227Datum
228bpcharrecv(PG_FUNCTION_ARGS)
229{
230 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
231
232#ifdef NOT_USED
233 Oid typelem = PG_GETARG_OID(1);
234#endif
235 int32 atttypmod = PG_GETARG_INT32(2);
236 BpChar *result;
237 char *str;
238 int nbytes;
239
240 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
241 result = bpchar_input(str, nbytes, atttypmod);
242 pfree(str);
243 PG_RETURN_BPCHAR_P(result);
244}
245
246/*
247 * bpcharsend - converts bpchar to binary format
248 */
249Datum
250bpcharsend(PG_FUNCTION_ARGS)
251{
252 /* Exactly the same as textsend, so share code */
253 return textsend(fcinfo);
254}
255
256
257/*
258 * Converts a CHARACTER type to the specified size.
259 *
260 * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
261 * isExplicit is true if this is for an explicit cast to char(N).
262 *
263 * Truncation rules: for an explicit cast, silently truncate to the given
264 * length; for an implicit cast, raise error unless extra characters are
265 * all spaces. (This is sort-of per SQL: the spec would actually have us
266 * raise a "completion condition" for the explicit cast case, but Postgres
267 * hasn't got such a concept.)
268 */
269Datum
270bpchar(PG_FUNCTION_ARGS)
271{
272 BpChar *source = PG_GETARG_BPCHAR_PP(0);
273 int32 maxlen = PG_GETARG_INT32(1);
274 bool isExplicit = PG_GETARG_BOOL(2);
275 BpChar *result;
276 int32 len;
277 char *r;
278 char *s;
279 int i;
280 int charlen; /* number of characters in the input string +
281 * VARHDRSZ */
282
283 /* No work if typmod is invalid */
284 if (maxlen < (int32) VARHDRSZ)
285 PG_RETURN_BPCHAR_P(source);
286
287 maxlen -= VARHDRSZ;
288
289 len = VARSIZE_ANY_EXHDR(source);
290 s = VARDATA_ANY(source);
291
292 charlen = pg_mbstrlen_with_len(s, len);
293
294 /* No work if supplied data matches typmod already */
295 if (charlen == maxlen)
296 PG_RETURN_BPCHAR_P(source);
297
298 if (charlen > maxlen)
299 {
300 /* Verify that extra characters are spaces, and clip them off */
301 size_t maxmblen;
302
303 maxmblen = pg_mbcharcliplen(s, len, maxlen);
304
305 if (!isExplicit)
306 {
307 for (i = maxmblen; i < len; i++)
308 if (s[i] != ' ')
309 ereport(ERROR,
310 (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
311 errmsg("value too long for type character(%d)",
312 maxlen)));
313 }
314
315 len = maxmblen;
316
317 /*
318 * At this point, maxlen is the necessary byte length, not the number
319 * of CHARACTERS!
320 */
321 maxlen = len;
322 }
323 else
324 {
325 /*
326 * At this point, maxlen is the necessary byte length, not the number
327 * of CHARACTERS!
328 */
329 maxlen = len + (maxlen - charlen);
330 }
331
332 Assert(maxlen >= len);
333
334 result = palloc(maxlen + VARHDRSZ);
335 SET_VARSIZE(result, maxlen + VARHDRSZ);
336 r = VARDATA(result);
337
338 memcpy(r, s, len);
339
340 /* blank pad the string if necessary */
341 if (maxlen > len)
342 memset(r + len, ' ', maxlen - len);
343
344 PG_RETURN_BPCHAR_P(result);
345}
346
347
348/* char_bpchar()
349 * Convert char to bpchar(1).
350 */
351Datum
352char_bpchar(PG_FUNCTION_ARGS)
353{
354 char c = PG_GETARG_CHAR(0);
355 BpChar *result;
356
357 result = (BpChar *) palloc(VARHDRSZ + 1);
358
359 SET_VARSIZE(result, VARHDRSZ + 1);
360 *(VARDATA(result)) = c;
361
362 PG_RETURN_BPCHAR_P(result);
363}
364
365
366/* bpchar_name()
367 * Converts a bpchar() type to a NameData type.
368 */
369Datum
370bpchar_name(PG_FUNCTION_ARGS)
371{
372 BpChar *s = PG_GETARG_BPCHAR_PP(0);
373 char *s_data;
374 Name result;
375 int len;
376
377 len = VARSIZE_ANY_EXHDR(s);
378 s_data = VARDATA_ANY(s);
379
380 /* Truncate oversize input */
381 if (len >= NAMEDATALEN)
382 len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1);
383
384 /* Remove trailing blanks */
385 while (len > 0)
386 {
387 if (s_data[len - 1] != ' ')
388 break;
389 len--;
390 }
391
392 /* We use palloc0 here to ensure result is zero-padded */
393 result = (Name) palloc0(NAMEDATALEN);
394 memcpy(NameStr(*result), s_data, len);
395
396 PG_RETURN_NAME(result);
397}
398
399/* name_bpchar()
400 * Converts a NameData type to a bpchar type.
401 *
402 * Uses the text conversion functions, which is only appropriate if BpChar
403 * and text are equivalent types.
404 */
405Datum
406name_bpchar(PG_FUNCTION_ARGS)
407{
408 Name s = PG_GETARG_NAME(0);
409 BpChar *result;
410
411 result = (BpChar *) cstring_to_text(NameStr(*s));
412 PG_RETURN_BPCHAR_P(result);
413}
414
415Datum
416bpchartypmodin(PG_FUNCTION_ARGS)
417{
418 ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
419
420 PG_RETURN_INT32(anychar_typmodin(ta, "char"));
421}
422
423Datum
424bpchartypmodout(PG_FUNCTION_ARGS)
425{
426 int32 typmod = PG_GETARG_INT32(0);
427
428 PG_RETURN_CSTRING(anychar_typmodout(typmod));
429}
430
431
432/*****************************************************************************
433 * varchar - varchar(n)
434 *
435 * Note: varchar piggybacks on type text for most operations, and so has no
436 * C-coded functions except for I/O and typmod checking.
437 *****************************************************************************/
438
439/*
440 * varchar_input -- common guts of varcharin and varcharrecv
441 *
442 * s is the input text of length len (may not be null-terminated)
443 * atttypmod is the typmod value to apply
444 *
445 * Note that atttypmod is measured in characters, which
446 * is not necessarily the same as the number of bytes.
447 *
448 * If the input string is too long, raise an error, unless the extra
449 * characters are spaces, in which case they're truncated. (per SQL)
450 *
451 * Uses the C string to text conversion function, which is only appropriate
452 * if VarChar and text are equivalent types.
453 */
454static VarChar *
455varchar_input(const char *s, size_t len, int32 atttypmod)
456{
457 VarChar *result;
458 size_t maxlen;
459
460 maxlen = atttypmod - VARHDRSZ;
461
462 if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
463 {
464 /* Verify that extra characters are spaces, and clip them off */
465 size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
466 size_t j;
467
468 for (j = mbmaxlen; j < len; j++)
469 {
470 if (s[j] != ' ')
471 ereport(ERROR,
472 (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
473 errmsg("value too long for type character varying(%d)",
474 (int) maxlen)));
475 }
476
477 len = mbmaxlen;
478 }
479
480 result = (VarChar *) cstring_to_text_with_len(s, len);
481 return result;
482}
483
484/*
485 * Convert a C string to VARCHAR internal representation. atttypmod
486 * is the declared length of the type plus VARHDRSZ.
487 */
488Datum
489varcharin(PG_FUNCTION_ARGS)
490{
491 char *s = PG_GETARG_CSTRING(0);
492
493#ifdef NOT_USED
494 Oid typelem = PG_GETARG_OID(1);
495#endif
496 int32 atttypmod = PG_GETARG_INT32(2);
497 VarChar *result;
498
499 result = varchar_input(s, strlen(s), atttypmod);
500 PG_RETURN_VARCHAR_P(result);
501}
502
503
504/*
505 * Convert a VARCHAR value to a C string.
506 *
507 * Uses the text to C string conversion function, which is only appropriate
508 * if VarChar and text are equivalent types.
509 */
510Datum
511varcharout(PG_FUNCTION_ARGS)
512{
513 Datum txt = PG_GETARG_DATUM(0);
514
515 PG_RETURN_CSTRING(TextDatumGetCString(txt));
516}
517
518/*
519 * varcharrecv - converts external binary format to varchar
520 */
521Datum
522varcharrecv(PG_FUNCTION_ARGS)
523{
524 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
525
526#ifdef NOT_USED
527 Oid typelem = PG_GETARG_OID(1);
528#endif
529 int32 atttypmod = PG_GETARG_INT32(2);
530 VarChar *result;
531 char *str;
532 int nbytes;
533
534 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
535 result = varchar_input(str, nbytes, atttypmod);
536 pfree(str);
537 PG_RETURN_VARCHAR_P(result);
538}
539
540/*
541 * varcharsend - converts varchar to binary format
542 */
543Datum
544varcharsend(PG_FUNCTION_ARGS)
545{
546 /* Exactly the same as textsend, so share code */
547 return textsend(fcinfo);
548}
549
550
551/*
552 * varchar_support()
553 *
554 * Planner support function for the varchar() length coercion function.
555 *
556 * Currently, the only interesting thing we can do is flatten calls that set
557 * the new maximum length >= the previous maximum length. We can ignore the
558 * isExplicit argument, since that only affects truncation cases.
559 */
560Datum
561varchar_support(PG_FUNCTION_ARGS)
562{
563 Node *rawreq = (Node *) PG_GETARG_POINTER(0);
564 Node *ret = NULL;
565
566 if (IsA(rawreq, SupportRequestSimplify))
567 {
568 SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
569 FuncExpr *expr = req->fcall;
570 Node *typmod;
571
572 Assert(list_length(expr->args) >= 2);
573
574 typmod = (Node *) lsecond(expr->args);
575
576 if (IsA(typmod, Const) &&!((Const *) typmod)->constisnull)
577 {
578 Node *source = (Node *) linitial(expr->args);
579 int32 old_typmod = exprTypmod(source);
580 int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
581 int32 old_max = old_typmod - VARHDRSZ;
582 int32 new_max = new_typmod - VARHDRSZ;
583
584 if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max))
585 ret = relabel_to_typmod(source, new_typmod);
586 }
587 }
588
589 PG_RETURN_POINTER(ret);
590}
591
592/*
593 * Converts a VARCHAR type to the specified size.
594 *
595 * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
596 * isExplicit is true if this is for an explicit cast to varchar(N).
597 *
598 * Truncation rules: for an explicit cast, silently truncate to the given
599 * length; for an implicit cast, raise error unless extra characters are
600 * all spaces. (This is sort-of per SQL: the spec would actually have us
601 * raise a "completion condition" for the explicit cast case, but Postgres
602 * hasn't got such a concept.)
603 */
604Datum
605varchar(PG_FUNCTION_ARGS)
606{
607 VarChar *source = PG_GETARG_VARCHAR_PP(0);
608 int32 typmod = PG_GETARG_INT32(1);
609 bool isExplicit = PG_GETARG_BOOL(2);
610 int32 len,
611 maxlen;
612 size_t maxmblen;
613 int i;
614 char *s_data;
615
616 len = VARSIZE_ANY_EXHDR(source);
617 s_data = VARDATA_ANY(source);
618 maxlen = typmod - VARHDRSZ;
619
620 /* No work if typmod is invalid or supplied data fits it already */
621 if (maxlen < 0 || len <= maxlen)
622 PG_RETURN_VARCHAR_P(source);
623
624 /* only reach here if string is too long... */
625
626 /* truncate multibyte string preserving multibyte boundary */
627 maxmblen = pg_mbcharcliplen(s_data, len, maxlen);
628
629 if (!isExplicit)
630 {
631 for (i = maxmblen; i < len; i++)
632 if (s_data[i] != ' ')
633 ereport(ERROR,
634 (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
635 errmsg("value too long for type character varying(%d)",
636 maxlen)));
637 }
638
639 PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text_with_len(s_data,
640 maxmblen));
641}
642
643Datum
644varchartypmodin(PG_FUNCTION_ARGS)
645{
646 ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
647
648 PG_RETURN_INT32(anychar_typmodin(ta, "varchar"));
649}
650
651Datum
652varchartypmodout(PG_FUNCTION_ARGS)
653{
654 int32 typmod = PG_GETARG_INT32(0);
655
656 PG_RETURN_CSTRING(anychar_typmodout(typmod));
657}
658
659
660/*****************************************************************************
661 * Exported functions
662 *****************************************************************************/
663
664/* "True" length (not counting trailing blanks) of a BpChar */
665static inline int
666bcTruelen(BpChar *arg)
667{
668 return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg));
669}
670
671int
672bpchartruelen(char *s, int len)
673{
674 int i;
675
676 /*
677 * Note that we rely on the assumption that ' ' is a singleton unit on
678 * every supported multibyte server encoding.
679 */
680 for (i = len - 1; i >= 0; i--)
681 {
682 if (s[i] != ' ')
683 break;
684 }
685 return i + 1;
686}
687
688Datum
689bpcharlen(PG_FUNCTION_ARGS)
690{
691 BpChar *arg = PG_GETARG_BPCHAR_PP(0);
692 int len;
693
694 /* get number of bytes, ignoring trailing spaces */
695 len = bcTruelen(arg);
696
697 /* in multibyte encoding, convert to number of characters */
698 if (pg_database_encoding_max_length() != 1)
699 len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len);
700
701 PG_RETURN_INT32(len);
702}
703
704Datum
705bpcharoctetlen(PG_FUNCTION_ARGS)
706{
707 Datum arg = PG_GETARG_DATUM(0);
708
709 /* We need not detoast the input at all */
710 PG_RETURN_INT32(toast_raw_datum_size(arg) - VARHDRSZ);
711}
712
713
714/*****************************************************************************
715 * Comparison Functions used for bpchar
716 *
717 * Note: btree indexes need these routines not to leak memory; therefore,
718 * be careful to free working copies of toasted datums. Most places don't
719 * need to be so careful.
720 *****************************************************************************/
721
722static void
723check_collation_set(Oid collid)
724{
725 if (!OidIsValid(collid))
726 {
727 /*
728 * This typically means that the parser could not resolve a conflict
729 * of implicit collations, so report it that way.
730 */
731 ereport(ERROR,
732 (errcode(ERRCODE_INDETERMINATE_COLLATION),
733 errmsg("could not determine which collation to use for string comparison"),
734 errhint("Use the COLLATE clause to set the collation explicitly.")));
735 }
736}
737
738Datum
739bpchareq(PG_FUNCTION_ARGS)
740{
741 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
742 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
743 int len1,
744 len2;
745 bool result;
746 Oid collid = PG_GET_COLLATION();
747
748 check_collation_set(collid);
749
750 len1 = bcTruelen(arg1);
751 len2 = bcTruelen(arg2);
752
753 if (lc_collate_is_c(collid) ||
754 collid == DEFAULT_COLLATION_OID ||
755 pg_newlocale_from_collation(collid)->deterministic)
756 {
757 /*
758 * Since we only care about equality or not-equality, we can avoid all
759 * the expense of strcoll() here, and just do bitwise comparison.
760 */
761 if (len1 != len2)
762 result = false;
763 else
764 result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
765 }
766 else
767 {
768 result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
769 collid) == 0);
770 }
771
772 PG_FREE_IF_COPY(arg1, 0);
773 PG_FREE_IF_COPY(arg2, 1);
774
775 PG_RETURN_BOOL(result);
776}
777
778Datum
779bpcharne(PG_FUNCTION_ARGS)
780{
781 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
782 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
783 int len1,
784 len2;
785 bool result;
786 Oid collid = PG_GET_COLLATION();
787
788 len1 = bcTruelen(arg1);
789 len2 = bcTruelen(arg2);
790
791 if (lc_collate_is_c(collid) ||
792 collid == DEFAULT_COLLATION_OID ||
793 pg_newlocale_from_collation(collid)->deterministic)
794 {
795 /*
796 * Since we only care about equality or not-equality, we can avoid all
797 * the expense of strcoll() here, and just do bitwise comparison.
798 */
799 if (len1 != len2)
800 result = true;
801 else
802 result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
803 }
804 else
805 {
806 result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
807 collid) != 0);
808 }
809
810 PG_FREE_IF_COPY(arg1, 0);
811 PG_FREE_IF_COPY(arg2, 1);
812
813 PG_RETURN_BOOL(result);
814}
815
816Datum
817bpcharlt(PG_FUNCTION_ARGS)
818{
819 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
820 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
821 int len1,
822 len2;
823 int cmp;
824
825 len1 = bcTruelen(arg1);
826 len2 = bcTruelen(arg2);
827
828 cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
829 PG_GET_COLLATION());
830
831 PG_FREE_IF_COPY(arg1, 0);
832 PG_FREE_IF_COPY(arg2, 1);
833
834 PG_RETURN_BOOL(cmp < 0);
835}
836
837Datum
838bpcharle(PG_FUNCTION_ARGS)
839{
840 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
841 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
842 int len1,
843 len2;
844 int cmp;
845
846 len1 = bcTruelen(arg1);
847 len2 = bcTruelen(arg2);
848
849 cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
850 PG_GET_COLLATION());
851
852 PG_FREE_IF_COPY(arg1, 0);
853 PG_FREE_IF_COPY(arg2, 1);
854
855 PG_RETURN_BOOL(cmp <= 0);
856}
857
858Datum
859bpchargt(PG_FUNCTION_ARGS)
860{
861 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
862 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
863 int len1,
864 len2;
865 int cmp;
866
867 len1 = bcTruelen(arg1);
868 len2 = bcTruelen(arg2);
869
870 cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
871 PG_GET_COLLATION());
872
873 PG_FREE_IF_COPY(arg1, 0);
874 PG_FREE_IF_COPY(arg2, 1);
875
876 PG_RETURN_BOOL(cmp > 0);
877}
878
879Datum
880bpcharge(PG_FUNCTION_ARGS)
881{
882 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
883 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
884 int len1,
885 len2;
886 int cmp;
887
888 len1 = bcTruelen(arg1);
889 len2 = bcTruelen(arg2);
890
891 cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
892 PG_GET_COLLATION());
893
894 PG_FREE_IF_COPY(arg1, 0);
895 PG_FREE_IF_COPY(arg2, 1);
896
897 PG_RETURN_BOOL(cmp >= 0);
898}
899
900Datum
901bpcharcmp(PG_FUNCTION_ARGS)
902{
903 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
904 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
905 int len1,
906 len2;
907 int cmp;
908
909 len1 = bcTruelen(arg1);
910 len2 = bcTruelen(arg2);
911
912 cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
913 PG_GET_COLLATION());
914
915 PG_FREE_IF_COPY(arg1, 0);
916 PG_FREE_IF_COPY(arg2, 1);
917
918 PG_RETURN_INT32(cmp);
919}
920
921Datum
922bpchar_sortsupport(PG_FUNCTION_ARGS)
923{
924 SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
925 Oid collid = ssup->ssup_collation;
926 MemoryContext oldcontext;
927
928 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
929
930 /* Use generic string SortSupport */
931 varstr_sortsupport(ssup, BPCHAROID, collid);
932
933 MemoryContextSwitchTo(oldcontext);
934
935 PG_RETURN_VOID();
936}
937
938Datum
939bpchar_larger(PG_FUNCTION_ARGS)
940{
941 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
942 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
943 int len1,
944 len2;
945 int cmp;
946
947 len1 = bcTruelen(arg1);
948 len2 = bcTruelen(arg2);
949
950 cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
951 PG_GET_COLLATION());
952
953 PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2);
954}
955
956Datum
957bpchar_smaller(PG_FUNCTION_ARGS)
958{
959 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
960 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
961 int len1,
962 len2;
963 int cmp;
964
965 len1 = bcTruelen(arg1);
966 len2 = bcTruelen(arg2);
967
968 cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
969 PG_GET_COLLATION());
970
971 PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2);
972}
973
974
975/*
976 * bpchar needs a specialized hash function because we want to ignore
977 * trailing blanks in comparisons.
978 */
979Datum
980hashbpchar(PG_FUNCTION_ARGS)
981{
982 BpChar *key = PG_GETARG_BPCHAR_PP(0);
983 Oid collid = PG_GET_COLLATION();
984 char *keydata;
985 int keylen;
986 pg_locale_t mylocale = 0;
987 Datum result;
988
989 if (!collid)
990 ereport(ERROR,
991 (errcode(ERRCODE_INDETERMINATE_COLLATION),
992 errmsg("could not determine which collation to use for string hashing"),
993 errhint("Use the COLLATE clause to set the collation explicitly.")));
994
995 keydata = VARDATA_ANY(key);
996 keylen = bcTruelen(key);
997
998 if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
999 mylocale = pg_newlocale_from_collation(collid);
1000
1001 if (!mylocale || mylocale->deterministic)
1002 {
1003 result = hash_any((unsigned char *) keydata, keylen);
1004 }
1005 else
1006 {
1007#ifdef USE_ICU
1008 if (mylocale->provider == COLLPROVIDER_ICU)
1009 {
1010 int32_t ulen = -1;
1011 UChar *uchar = NULL;
1012 Size bsize;
1013 uint8_t *buf;
1014
1015 ulen = icu_to_uchar(&uchar, keydata, keylen);
1016
1017 bsize = ucol_getSortKey(mylocale->info.icu.ucol,
1018 uchar, ulen, NULL, 0);
1019 buf = palloc(bsize);
1020 ucol_getSortKey(mylocale->info.icu.ucol,
1021 uchar, ulen, buf, bsize);
1022
1023 result = hash_any(buf, bsize);
1024
1025 pfree(buf);
1026 }
1027 else
1028#endif
1029 /* shouldn't happen */
1030 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1031 }
1032
1033 /* Avoid leaking memory for toasted inputs */
1034 PG_FREE_IF_COPY(key, 0);
1035
1036 return result;
1037}
1038
1039Datum
1040hashbpcharextended(PG_FUNCTION_ARGS)
1041{
1042 BpChar *key = PG_GETARG_BPCHAR_PP(0);
1043 Oid collid = PG_GET_COLLATION();
1044 char *keydata;
1045 int keylen;
1046 pg_locale_t mylocale = 0;
1047 Datum result;
1048
1049 if (!collid)
1050 ereport(ERROR,
1051 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1052 errmsg("could not determine which collation to use for string hashing"),
1053 errhint("Use the COLLATE clause to set the collation explicitly.")));
1054
1055 keydata = VARDATA_ANY(key);
1056 keylen = bcTruelen(key);
1057
1058 if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
1059 mylocale = pg_newlocale_from_collation(collid);
1060
1061 if (!mylocale || mylocale->deterministic)
1062 {
1063 result = hash_any_extended((unsigned char *) keydata, keylen,
1064 PG_GETARG_INT64(1));
1065 }
1066 else
1067 {
1068#ifdef USE_ICU
1069 if (mylocale->provider == COLLPROVIDER_ICU)
1070 {
1071 int32_t ulen = -1;
1072 UChar *uchar = NULL;
1073 Size bsize;
1074 uint8_t *buf;
1075
1076 ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
1077
1078 bsize = ucol_getSortKey(mylocale->info.icu.ucol,
1079 uchar, ulen, NULL, 0);
1080 buf = palloc(bsize);
1081 ucol_getSortKey(mylocale->info.icu.ucol,
1082 uchar, ulen, buf, bsize);
1083
1084 result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
1085
1086 pfree(buf);
1087 }
1088 else
1089#endif
1090 /* shouldn't happen */
1091 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1092 }
1093
1094 PG_FREE_IF_COPY(key, 0);
1095
1096 return result;
1097}
1098
1099/*
1100 * The following operators support character-by-character comparison
1101 * of bpchar datums, to allow building indexes suitable for LIKE clauses.
1102 * Note that the regular bpchareq/bpcharne comparison operators, and
1103 * regular support functions 1 and 2 with "C" collation are assumed to be
1104 * compatible with these!
1105 */
1106
1107static int
1108internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
1109{
1110 int result;
1111 int len1,
1112 len2;
1113
1114 len1 = bcTruelen(arg1);
1115 len2 = bcTruelen(arg2);
1116
1117 result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
1118 if (result != 0)
1119 return result;
1120 else if (len1 < len2)
1121 return -1;
1122 else if (len1 > len2)
1123 return 1;
1124 else
1125 return 0;
1126}
1127
1128
1129Datum
1130bpchar_pattern_lt(PG_FUNCTION_ARGS)
1131{
1132 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1133 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1134 int result;
1135
1136 result = internal_bpchar_pattern_compare(arg1, arg2);
1137
1138 PG_FREE_IF_COPY(arg1, 0);
1139 PG_FREE_IF_COPY(arg2, 1);
1140
1141 PG_RETURN_BOOL(result < 0);
1142}
1143
1144
1145Datum
1146bpchar_pattern_le(PG_FUNCTION_ARGS)
1147{
1148 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1149 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1150 int result;
1151
1152 result = internal_bpchar_pattern_compare(arg1, arg2);
1153
1154 PG_FREE_IF_COPY(arg1, 0);
1155 PG_FREE_IF_COPY(arg2, 1);
1156
1157 PG_RETURN_BOOL(result <= 0);
1158}
1159
1160
1161Datum
1162bpchar_pattern_ge(PG_FUNCTION_ARGS)
1163{
1164 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1165 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1166 int result;
1167
1168 result = internal_bpchar_pattern_compare(arg1, arg2);
1169
1170 PG_FREE_IF_COPY(arg1, 0);
1171 PG_FREE_IF_COPY(arg2, 1);
1172
1173 PG_RETURN_BOOL(result >= 0);
1174}
1175
1176
1177Datum
1178bpchar_pattern_gt(PG_FUNCTION_ARGS)
1179{
1180 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1181 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1182 int result;
1183
1184 result = internal_bpchar_pattern_compare(arg1, arg2);
1185
1186 PG_FREE_IF_COPY(arg1, 0);
1187 PG_FREE_IF_COPY(arg2, 1);
1188
1189 PG_RETURN_BOOL(result > 0);
1190}
1191
1192
1193Datum
1194btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
1195{
1196 BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
1197 BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
1198 int result;
1199
1200 result = internal_bpchar_pattern_compare(arg1, arg2);
1201
1202 PG_FREE_IF_COPY(arg1, 0);
1203 PG_FREE_IF_COPY(arg2, 1);
1204
1205 PG_RETURN_INT32(result);
1206}
1207
1208
1209Datum
1210btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
1211{
1212 SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
1213 MemoryContext oldcontext;
1214
1215 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1216
1217 /* Use generic string SortSupport, forcing "C" collation */
1218 varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID);
1219
1220 MemoryContextSwitchTo(oldcontext);
1221
1222 PG_RETURN_VOID();
1223}
1224