1/*-------------------------------------------------------------------------
2 *
3 * varlena.c
4 * Functions for the variable-length built-in types.
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/utils/adt/varlena.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include <ctype.h>
18#include <limits.h>
19
20#include "access/tuptoaster.h"
21#include "catalog/pg_collation.h"
22#include "catalog/pg_type.h"
23#include "common/int.h"
24#include "lib/hyperloglog.h"
25#include "libpq/pqformat.h"
26#include "miscadmin.h"
27#include "parser/scansup.h"
28#include "port/pg_bswap.h"
29#include "regex/regex.h"
30#include "utils/builtins.h"
31#include "utils/bytea.h"
32#include "utils/hashutils.h"
33#include "utils/lsyscache.h"
34#include "utils/memutils.h"
35#include "utils/pg_locale.h"
36#include "utils/sortsupport.h"
37#include "utils/varlena.h"
38
39
40/* GUC variable */
41int bytea_output = BYTEA_OUTPUT_HEX;
42
43typedef struct varlena unknown;
44typedef struct varlena VarString;
45
46/*
47 * State for text_position_* functions.
48 */
49typedef struct
50{
51 bool is_multibyte; /* T if multibyte encoding */
52 bool is_multibyte_char_in_char;
53
54 char *str1; /* haystack string */
55 char *str2; /* needle string */
56 int len1; /* string lengths in bytes */
57 int len2;
58
59 /* Skip table for Boyer-Moore-Horspool search algorithm: */
60 int skiptablemask; /* mask for ANDing with skiptable subscripts */
61 int skiptable[256]; /* skip distance for given mismatched char */
62
63 char *last_match; /* pointer to last match in 'str1' */
64
65 /*
66 * Sometimes we need to convert the byte position of a match to a
67 * character position. These store the last position that was converted,
68 * so that on the next call, we can continue from that point, rather than
69 * count characters from the very beginning.
70 */
71 char *refpoint; /* pointer within original haystack string */
72 int refpos; /* 0-based character offset of the same point */
73} TextPositionState;
74
75typedef struct
76{
77 char *buf1; /* 1st string, or abbreviation original string
78 * buf */
79 char *buf2; /* 2nd string, or abbreviation strxfrm() buf */
80 int buflen1;
81 int buflen2;
82 int last_len1; /* Length of last buf1 string/strxfrm() input */
83 int last_len2; /* Length of last buf2 string/strxfrm() blob */
84 int last_returned; /* Last comparison result (cache) */
85 bool cache_blob; /* Does buf2 contain strxfrm() blob, etc? */
86 bool collate_c;
87 Oid typid; /* Actual datatype (text/bpchar/bytea/name) */
88 hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
89 hyperLogLogState full_card; /* Full key cardinality state */
90 double prop_card; /* Required cardinality proportion */
91 pg_locale_t locale;
92} VarStringSortSupport;
93
94/*
95 * This should be large enough that most strings will fit, but small enough
96 * that we feel comfortable putting it on the stack
97 */
98#define TEXTBUFLEN 1024
99
100#define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
101#define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
102#define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
103#define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
104#define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
105
106#define DatumGetVarStringP(X) ((VarString *) PG_DETOAST_DATUM(X))
107#define DatumGetVarStringPP(X) ((VarString *) PG_DETOAST_DATUM_PACKED(X))
108
109static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
110static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
111static int namefastcmp_c(Datum x, Datum y, SortSupport ssup);
112static int varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup);
113static int namefastcmp_locale(Datum x, Datum y, SortSupport ssup);
114static int varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup);
115static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup);
116static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
117static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
118static int32 text_length(Datum str);
119static text *text_catenate(text *t1, text *t2);
120static text *text_substring(Datum str,
121 int32 start,
122 int32 length,
123 bool length_not_specified);
124static text *text_overlay(text *t1, text *t2, int sp, int sl);
125static int text_position(text *t1, text *t2, Oid collid);
126static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state);
127static bool text_position_next(TextPositionState *state);
128static char *text_position_next_internal(char *start_ptr, TextPositionState *state);
129static char *text_position_get_match_ptr(TextPositionState *state);
130static int text_position_get_match_pos(TextPositionState *state);
131static void text_position_cleanup(TextPositionState *state);
132static void check_collation_set(Oid collid);
133static int text_cmp(text *arg1, text *arg2, Oid collid);
134static bytea *bytea_catenate(bytea *t1, bytea *t2);
135static bytea *bytea_substring(Datum str,
136 int S,
137 int L,
138 bool length_not_specified);
139static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
140static void appendStringInfoText(StringInfo str, const text *t);
141static Datum text_to_array_internal(PG_FUNCTION_ARGS);
142static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
143 const char *fldsep, const char *null_string);
144static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
145static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
146 int *value);
147static const char *text_format_parse_format(const char *start_ptr,
148 const char *end_ptr,
149 int *argpos, int *widthpos,
150 int *flags, int *width);
151static void text_format_string_conversion(StringInfo buf, char conversion,
152 FmgrInfo *typOutputInfo,
153 Datum value, bool isNull,
154 int flags, int width);
155static void text_format_append_string(StringInfo buf, const char *str,
156 int flags, int width);
157
158
159/*****************************************************************************
160 * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
161 *****************************************************************************/
162
163/*
164 * cstring_to_text
165 *
166 * Create a text value from a null-terminated C string.
167 *
168 * The new text value is freshly palloc'd with a full-size VARHDR.
169 */
170text *
171cstring_to_text(const char *s)
172{
173 return cstring_to_text_with_len(s, strlen(s));
174}
175
176/*
177 * cstring_to_text_with_len
178 *
179 * Same as cstring_to_text except the caller specifies the string length;
180 * the string need not be null_terminated.
181 */
182text *
183cstring_to_text_with_len(const char *s, int len)
184{
185 text *result = (text *) palloc(len + VARHDRSZ);
186
187 SET_VARSIZE(result, len + VARHDRSZ);
188 memcpy(VARDATA(result), s, len);
189
190 return result;
191}
192
193/*
194 * text_to_cstring
195 *
196 * Create a palloc'd, null-terminated C string from a text value.
197 *
198 * We support being passed a compressed or toasted text value.
199 * This is a bit bogus since such values shouldn't really be referred to as
200 * "text *", but it seems useful for robustness. If we didn't handle that
201 * case here, we'd need another routine that did, anyway.
202 */
203char *
204text_to_cstring(const text *t)
205{
206 /* must cast away the const, unfortunately */
207 text *tunpacked = pg_detoast_datum_packed(unconstify(text *, t));
208 int len = VARSIZE_ANY_EXHDR(tunpacked);
209 char *result;
210
211 result = (char *) palloc(len + 1);
212 memcpy(result, VARDATA_ANY(tunpacked), len);
213 result[len] = '\0';
214
215 if (tunpacked != t)
216 pfree(tunpacked);
217
218 return result;
219}
220
221/*
222 * text_to_cstring_buffer
223 *
224 * Copy a text value into a caller-supplied buffer of size dst_len.
225 *
226 * The text string is truncated if necessary to fit. The result is
227 * guaranteed null-terminated (unless dst_len == 0).
228 *
229 * We support being passed a compressed or toasted text value.
230 * This is a bit bogus since such values shouldn't really be referred to as
231 * "text *", but it seems useful for robustness. If we didn't handle that
232 * case here, we'd need another routine that did, anyway.
233 */
234void
235text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
236{
237 /* must cast away the const, unfortunately */
238 text *srcunpacked = pg_detoast_datum_packed(unconstify(text *, src));
239 size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
240
241 if (dst_len > 0)
242 {
243 dst_len--;
244 if (dst_len >= src_len)
245 dst_len = src_len;
246 else /* ensure truncation is encoding-safe */
247 dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
248 memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
249 dst[dst_len] = '\0';
250 }
251
252 if (srcunpacked != src)
253 pfree(srcunpacked);
254}
255
256
257/*****************************************************************************
258 * USER I/O ROUTINES *
259 *****************************************************************************/
260
261
262#define VAL(CH) ((CH) - '0')
263#define DIG(VAL) ((VAL) + '0')
264
265/*
266 * byteain - converts from printable representation of byte array
267 *
268 * Non-printable characters must be passed as '\nnn' (octal) and are
269 * converted to internal form. '\' must be passed as '\\'.
270 * ereport(ERROR, ...) if bad form.
271 *
272 * BUGS:
273 * The input is scanned twice.
274 * The error checking of input is minimal.
275 */
276Datum
277byteain(PG_FUNCTION_ARGS)
278{
279 char *inputText = PG_GETARG_CSTRING(0);
280 char *tp;
281 char *rp;
282 int bc;
283 bytea *result;
284
285 /* Recognize hex input */
286 if (inputText[0] == '\\' && inputText[1] == 'x')
287 {
288 size_t len = strlen(inputText);
289
290 bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
291 result = palloc(bc);
292 bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
293 SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
294
295 PG_RETURN_BYTEA_P(result);
296 }
297
298 /* Else, it's the traditional escaped style */
299 for (bc = 0, tp = inputText; *tp != '\0'; bc++)
300 {
301 if (tp[0] != '\\')
302 tp++;
303 else if ((tp[0] == '\\') &&
304 (tp[1] >= '0' && tp[1] <= '3') &&
305 (tp[2] >= '0' && tp[2] <= '7') &&
306 (tp[3] >= '0' && tp[3] <= '7'))
307 tp += 4;
308 else if ((tp[0] == '\\') &&
309 (tp[1] == '\\'))
310 tp += 2;
311 else
312 {
313 /*
314 * one backslash, not followed by another or ### valid octal
315 */
316 ereport(ERROR,
317 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
318 errmsg("invalid input syntax for type %s", "bytea")));
319 }
320 }
321
322 bc += VARHDRSZ;
323
324 result = (bytea *) palloc(bc);
325 SET_VARSIZE(result, bc);
326
327 tp = inputText;
328 rp = VARDATA(result);
329 while (*tp != '\0')
330 {
331 if (tp[0] != '\\')
332 *rp++ = *tp++;
333 else if ((tp[0] == '\\') &&
334 (tp[1] >= '0' && tp[1] <= '3') &&
335 (tp[2] >= '0' && tp[2] <= '7') &&
336 (tp[3] >= '0' && tp[3] <= '7'))
337 {
338 bc = VAL(tp[1]);
339 bc <<= 3;
340 bc += VAL(tp[2]);
341 bc <<= 3;
342 *rp++ = bc + VAL(tp[3]);
343
344 tp += 4;
345 }
346 else if ((tp[0] == '\\') &&
347 (tp[1] == '\\'))
348 {
349 *rp++ = '\\';
350 tp += 2;
351 }
352 else
353 {
354 /*
355 * We should never get here. The first pass should not allow it.
356 */
357 ereport(ERROR,
358 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
359 errmsg("invalid input syntax for type %s", "bytea")));
360 }
361 }
362
363 PG_RETURN_BYTEA_P(result);
364}
365
366/*
367 * byteaout - converts to printable representation of byte array
368 *
369 * In the traditional escaped format, non-printable characters are
370 * printed as '\nnn' (octal) and '\' as '\\'.
371 */
372Datum
373byteaout(PG_FUNCTION_ARGS)
374{
375 bytea *vlena = PG_GETARG_BYTEA_PP(0);
376 char *result;
377 char *rp;
378
379 if (bytea_output == BYTEA_OUTPUT_HEX)
380 {
381 /* Print hex format */
382 rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
383 *rp++ = '\\';
384 *rp++ = 'x';
385 rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
386 }
387 else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
388 {
389 /* Print traditional escaped format */
390 char *vp;
391 int len;
392 int i;
393
394 len = 1; /* empty string has 1 char */
395 vp = VARDATA_ANY(vlena);
396 for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
397 {
398 if (*vp == '\\')
399 len += 2;
400 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
401 len += 4;
402 else
403 len++;
404 }
405 rp = result = (char *) palloc(len);
406 vp = VARDATA_ANY(vlena);
407 for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
408 {
409 if (*vp == '\\')
410 {
411 *rp++ = '\\';
412 *rp++ = '\\';
413 }
414 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
415 {
416 int val; /* holds unprintable chars */
417
418 val = *vp;
419 rp[0] = '\\';
420 rp[3] = DIG(val & 07);
421 val >>= 3;
422 rp[2] = DIG(val & 07);
423 val >>= 3;
424 rp[1] = DIG(val & 03);
425 rp += 4;
426 }
427 else
428 *rp++ = *vp;
429 }
430 }
431 else
432 {
433 elog(ERROR, "unrecognized bytea_output setting: %d",
434 bytea_output);
435 rp = result = NULL; /* keep compiler quiet */
436 }
437 *rp = '\0';
438 PG_RETURN_CSTRING(result);
439}
440
441/*
442 * bytearecv - converts external binary format to bytea
443 */
444Datum
445bytearecv(PG_FUNCTION_ARGS)
446{
447 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
448 bytea *result;
449 int nbytes;
450
451 nbytes = buf->len - buf->cursor;
452 result = (bytea *) palloc(nbytes + VARHDRSZ);
453 SET_VARSIZE(result, nbytes + VARHDRSZ);
454 pq_copymsgbytes(buf, VARDATA(result), nbytes);
455 PG_RETURN_BYTEA_P(result);
456}
457
458/*
459 * byteasend - converts bytea to binary format
460 *
461 * This is a special case: just copy the input...
462 */
463Datum
464byteasend(PG_FUNCTION_ARGS)
465{
466 bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
467
468 PG_RETURN_BYTEA_P(vlena);
469}
470
471Datum
472bytea_string_agg_transfn(PG_FUNCTION_ARGS)
473{
474 StringInfo state;
475
476 state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
477
478 /* Append the value unless null. */
479 if (!PG_ARGISNULL(1))
480 {
481 bytea *value = PG_GETARG_BYTEA_PP(1);
482
483 /* On the first time through, we ignore the delimiter. */
484 if (state == NULL)
485 state = makeStringAggState(fcinfo);
486 else if (!PG_ARGISNULL(2))
487 {
488 bytea *delim = PG_GETARG_BYTEA_PP(2);
489
490 appendBinaryStringInfo(state, VARDATA_ANY(delim), VARSIZE_ANY_EXHDR(delim));
491 }
492
493 appendBinaryStringInfo(state, VARDATA_ANY(value), VARSIZE_ANY_EXHDR(value));
494 }
495
496 /*
497 * The transition type for string_agg() is declared to be "internal",
498 * which is a pass-by-value type the same size as a pointer.
499 */
500 PG_RETURN_POINTER(state);
501}
502
503Datum
504bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
505{
506 StringInfo state;
507
508 /* cannot be called directly because of internal-type argument */
509 Assert(AggCheckCallContext(fcinfo, NULL));
510
511 state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
512
513 if (state != NULL)
514 {
515 bytea *result;
516
517 result = (bytea *) palloc(state->len + VARHDRSZ);
518 SET_VARSIZE(result, state->len + VARHDRSZ);
519 memcpy(VARDATA(result), state->data, state->len);
520 PG_RETURN_BYTEA_P(result);
521 }
522 else
523 PG_RETURN_NULL();
524}
525
526/*
527 * textin - converts "..." to internal representation
528 */
529Datum
530textin(PG_FUNCTION_ARGS)
531{
532 char *inputText = PG_GETARG_CSTRING(0);
533
534 PG_RETURN_TEXT_P(cstring_to_text(inputText));
535}
536
537/*
538 * textout - converts internal representation to "..."
539 */
540Datum
541textout(PG_FUNCTION_ARGS)
542{
543 Datum txt = PG_GETARG_DATUM(0);
544
545 PG_RETURN_CSTRING(TextDatumGetCString(txt));
546}
547
548/*
549 * textrecv - converts external binary format to text
550 */
551Datum
552textrecv(PG_FUNCTION_ARGS)
553{
554 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
555 text *result;
556 char *str;
557 int nbytes;
558
559 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
560
561 result = cstring_to_text_with_len(str, nbytes);
562 pfree(str);
563 PG_RETURN_TEXT_P(result);
564}
565
566/*
567 * textsend - converts text to binary format
568 */
569Datum
570textsend(PG_FUNCTION_ARGS)
571{
572 text *t = PG_GETARG_TEXT_PP(0);
573 StringInfoData buf;
574
575 pq_begintypsend(&buf);
576 pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
577 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
578}
579
580
581/*
582 * unknownin - converts "..." to internal representation
583 */
584Datum
585unknownin(PG_FUNCTION_ARGS)
586{
587 char *str = PG_GETARG_CSTRING(0);
588
589 /* representation is same as cstring */
590 PG_RETURN_CSTRING(pstrdup(str));
591}
592
593/*
594 * unknownout - converts internal representation to "..."
595 */
596Datum
597unknownout(PG_FUNCTION_ARGS)
598{
599 /* representation is same as cstring */
600 char *str = PG_GETARG_CSTRING(0);
601
602 PG_RETURN_CSTRING(pstrdup(str));
603}
604
605/*
606 * unknownrecv - converts external binary format to unknown
607 */
608Datum
609unknownrecv(PG_FUNCTION_ARGS)
610{
611 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
612 char *str;
613 int nbytes;
614
615 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
616 /* representation is same as cstring */
617 PG_RETURN_CSTRING(str);
618}
619
620/*
621 * unknownsend - converts unknown to binary format
622 */
623Datum
624unknownsend(PG_FUNCTION_ARGS)
625{
626 /* representation is same as cstring */
627 char *str = PG_GETARG_CSTRING(0);
628 StringInfoData buf;
629
630 pq_begintypsend(&buf);
631 pq_sendtext(&buf, str, strlen(str));
632 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
633}
634
635
636/* ========== PUBLIC ROUTINES ========== */
637
638/*
639 * textlen -
640 * returns the logical length of a text*
641 * (which is less than the VARSIZE of the text*)
642 */
643Datum
644textlen(PG_FUNCTION_ARGS)
645{
646 Datum str = PG_GETARG_DATUM(0);
647
648 /* try to avoid decompressing argument */
649 PG_RETURN_INT32(text_length(str));
650}
651
652/*
653 * text_length -
654 * Does the real work for textlen()
655 *
656 * This is broken out so it can be called directly by other string processing
657 * functions. Note that the argument is passed as a Datum, to indicate that
658 * it may still be in compressed form. We can avoid decompressing it at all
659 * in some cases.
660 */
661static int32
662text_length(Datum str)
663{
664 /* fastpath when max encoding length is one */
665 if (pg_database_encoding_max_length() == 1)
666 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
667 else
668 {
669 text *t = DatumGetTextPP(str);
670
671 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t),
672 VARSIZE_ANY_EXHDR(t)));
673 }
674}
675
676/*
677 * textoctetlen -
678 * returns the physical length of a text*
679 * (which is less than the VARSIZE of the text*)
680 */
681Datum
682textoctetlen(PG_FUNCTION_ARGS)
683{
684 Datum str = PG_GETARG_DATUM(0);
685
686 /* We need not detoast the input at all */
687 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
688}
689
690/*
691 * textcat -
692 * takes two text* and returns a text* that is the concatenation of
693 * the two.
694 *
695 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
696 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
697 * Allocate space for output in all cases.
698 * XXX - thomas 1997-07-10
699 */
700Datum
701textcat(PG_FUNCTION_ARGS)
702{
703 text *t1 = PG_GETARG_TEXT_PP(0);
704 text *t2 = PG_GETARG_TEXT_PP(1);
705
706 PG_RETURN_TEXT_P(text_catenate(t1, t2));
707}
708
709/*
710 * text_catenate
711 * Guts of textcat(), broken out so it can be used by other functions
712 *
713 * Arguments can be in short-header form, but not compressed or out-of-line
714 */
715static text *
716text_catenate(text *t1, text *t2)
717{
718 text *result;
719 int len1,
720 len2,
721 len;
722 char *ptr;
723
724 len1 = VARSIZE_ANY_EXHDR(t1);
725 len2 = VARSIZE_ANY_EXHDR(t2);
726
727 /* paranoia ... probably should throw error instead? */
728 if (len1 < 0)
729 len1 = 0;
730 if (len2 < 0)
731 len2 = 0;
732
733 len = len1 + len2 + VARHDRSZ;
734 result = (text *) palloc(len);
735
736 /* Set size of result string... */
737 SET_VARSIZE(result, len);
738
739 /* Fill data field of result string... */
740 ptr = VARDATA(result);
741 if (len1 > 0)
742 memcpy(ptr, VARDATA_ANY(t1), len1);
743 if (len2 > 0)
744 memcpy(ptr + len1, VARDATA_ANY(t2), len2);
745
746 return result;
747}
748
749/*
750 * charlen_to_bytelen()
751 * Compute the number of bytes occupied by n characters starting at *p
752 *
753 * It is caller's responsibility that there actually are n characters;
754 * the string need not be null-terminated.
755 */
756static int
757charlen_to_bytelen(const char *p, int n)
758{
759 if (pg_database_encoding_max_length() == 1)
760 {
761 /* Optimization for single-byte encodings */
762 return n;
763 }
764 else
765 {
766 const char *s;
767
768 for (s = p; n > 0; n--)
769 s += pg_mblen(s);
770
771 return s - p;
772 }
773}
774
775/*
776 * text_substr()
777 * Return a substring starting at the specified position.
778 * - thomas 1997-12-31
779 *
780 * Input:
781 * - string
782 * - starting position (is one-based)
783 * - string length
784 *
785 * If the starting position is zero or less, then return from the start of the string
786 * adjusting the length to be consistent with the "negative start" per SQL.
787 * If the length is less than zero, return the remaining string.
788 *
789 * Added multibyte support.
790 * - Tatsuo Ishii 1998-4-21
791 * Changed behavior if starting position is less than one to conform to SQL behavior.
792 * Formerly returned the entire string; now returns a portion.
793 * - Thomas Lockhart 1998-12-10
794 * Now uses faster TOAST-slicing interface
795 * - John Gray 2002-02-22
796 * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
797 * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
798 * error; if E < 1, return '', not entire string). Fixed MB related bug when
799 * S > LC and < LC + 4 sometimes garbage characters are returned.
800 * - Joe Conway 2002-08-10
801 */
802Datum
803text_substr(PG_FUNCTION_ARGS)
804{
805 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
806 PG_GETARG_INT32(1),
807 PG_GETARG_INT32(2),
808 false));
809}
810
811/*
812 * text_substr_no_len -
813 * Wrapper to avoid opr_sanity failure due to
814 * one function accepting a different number of args.
815 */
816Datum
817text_substr_no_len(PG_FUNCTION_ARGS)
818{
819 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
820 PG_GETARG_INT32(1),
821 -1, true));
822}
823
824/*
825 * text_substring -
826 * Does the real work for text_substr() and text_substr_no_len()
827 *
828 * This is broken out so it can be called directly by other string processing
829 * functions. Note that the argument is passed as a Datum, to indicate that
830 * it may still be in compressed/toasted form. We can avoid detoasting all
831 * of it in some cases.
832 *
833 * The result is always a freshly palloc'd datum.
834 */
835static text *
836text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
837{
838 int32 eml = pg_database_encoding_max_length();
839 int32 S = start; /* start position */
840 int32 S1; /* adjusted start position */
841 int32 L1; /* adjusted substring length */
842
843 /* life is easy if the encoding max length is 1 */
844 if (eml == 1)
845 {
846 S1 = Max(S, 1);
847
848 if (length_not_specified) /* special case - get length to end of
849 * string */
850 L1 = -1;
851 else
852 {
853 /* end position */
854 int E = S + length;
855
856 /*
857 * A negative value for L is the only way for the end position to
858 * be before the start. SQL99 says to throw an error.
859 */
860 if (E < S)
861 ereport(ERROR,
862 (errcode(ERRCODE_SUBSTRING_ERROR),
863 errmsg("negative substring length not allowed")));
864
865 /*
866 * A zero or negative value for the end position can happen if the
867 * start was negative or one. SQL99 says to return a zero-length
868 * string.
869 */
870 if (E < 1)
871 return cstring_to_text("");
872
873 L1 = E - S1;
874 }
875
876 /*
877 * If the start position is past the end of the string, SQL99 says to
878 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
879 * that for us. Convert to zero-based starting position
880 */
881 return DatumGetTextPSlice(str, S1 - 1, L1);
882 }
883 else if (eml > 1)
884 {
885 /*
886 * When encoding max length is > 1, we can't get LC without
887 * detoasting, so we'll grab a conservatively large slice now and go
888 * back later to do the right thing
889 */
890 int32 slice_start;
891 int32 slice_size;
892 int32 slice_strlen;
893 text *slice;
894 int32 E1;
895 int32 i;
896 char *p;
897 char *s;
898 text *ret;
899
900 /*
901 * if S is past the end of the string, the tuple toaster will return a
902 * zero-length string to us
903 */
904 S1 = Max(S, 1);
905
906 /*
907 * We need to start at position zero because there is no way to know
908 * in advance which byte offset corresponds to the supplied start
909 * position.
910 */
911 slice_start = 0;
912
913 if (length_not_specified) /* special case - get length to end of
914 * string */
915 slice_size = L1 = -1;
916 else
917 {
918 int E = S + length;
919
920 /*
921 * A negative value for L is the only way for the end position to
922 * be before the start. SQL99 says to throw an error.
923 */
924 if (E < S)
925 ereport(ERROR,
926 (errcode(ERRCODE_SUBSTRING_ERROR),
927 errmsg("negative substring length not allowed")));
928
929 /*
930 * A zero or negative value for the end position can happen if the
931 * start was negative or one. SQL99 says to return a zero-length
932 * string.
933 */
934 if (E < 1)
935 return cstring_to_text("");
936
937 /*
938 * if E is past the end of the string, the tuple toaster will
939 * truncate the length for us
940 */
941 L1 = E - S1;
942
943 /*
944 * Total slice size in bytes can't be any longer than the start
945 * position plus substring length times the encoding max length.
946 */
947 slice_size = (S1 + L1) * eml;
948 }
949
950 /*
951 * If we're working with an untoasted source, no need to do an extra
952 * copying step.
953 */
954 if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
955 VARATT_IS_EXTERNAL(DatumGetPointer(str)))
956 slice = DatumGetTextPSlice(str, slice_start, slice_size);
957 else
958 slice = (text *) DatumGetPointer(str);
959
960 /* see if we got back an empty string */
961 if (VARSIZE_ANY_EXHDR(slice) == 0)
962 {
963 if (slice != (text *) DatumGetPointer(str))
964 pfree(slice);
965 return cstring_to_text("");
966 }
967
968 /* Now we can get the actual length of the slice in MB characters */
969 slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
970 VARSIZE_ANY_EXHDR(slice));
971
972 /*
973 * Check that the start position wasn't > slice_strlen. If so, SQL99
974 * says to return a zero-length string.
975 */
976 if (S1 > slice_strlen)
977 {
978 if (slice != (text *) DatumGetPointer(str))
979 pfree(slice);
980 return cstring_to_text("");
981 }
982
983 /*
984 * Adjust L1 and E1 now that we know the slice string length. Again
985 * remember that S1 is one based, and slice_start is zero based.
986 */
987 if (L1 > -1)
988 E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
989 else
990 E1 = slice_start + 1 + slice_strlen;
991
992 /*
993 * Find the start position in the slice; remember S1 is not zero based
994 */
995 p = VARDATA_ANY(slice);
996 for (i = 0; i < S1 - 1; i++)
997 p += pg_mblen(p);
998
999 /* hang onto a pointer to our start position */
1000 s = p;
1001
1002 /*
1003 * Count the actual bytes used by the substring of the requested
1004 * length.
1005 */
1006 for (i = S1; i < E1; i++)
1007 p += pg_mblen(p);
1008
1009 ret = (text *) palloc(VARHDRSZ + (p - s));
1010 SET_VARSIZE(ret, VARHDRSZ + (p - s));
1011 memcpy(VARDATA(ret), s, (p - s));
1012
1013 if (slice != (text *) DatumGetPointer(str))
1014 pfree(slice);
1015
1016 return ret;
1017 }
1018 else
1019 elog(ERROR, "invalid backend encoding: encoding max length < 1");
1020
1021 /* not reached: suppress compiler warning */
1022 return NULL;
1023}
1024
1025/*
1026 * textoverlay
1027 * Replace specified substring of first string with second
1028 *
1029 * The SQL standard defines OVERLAY() in terms of substring and concatenation.
1030 * This code is a direct implementation of what the standard says.
1031 */
1032Datum
1033textoverlay(PG_FUNCTION_ARGS)
1034{
1035 text *t1 = PG_GETARG_TEXT_PP(0);
1036 text *t2 = PG_GETARG_TEXT_PP(1);
1037 int sp = PG_GETARG_INT32(2); /* substring start position */
1038 int sl = PG_GETARG_INT32(3); /* substring length */
1039
1040 PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1041}
1042
1043Datum
1044textoverlay_no_len(PG_FUNCTION_ARGS)
1045{
1046 text *t1 = PG_GETARG_TEXT_PP(0);
1047 text *t2 = PG_GETARG_TEXT_PP(1);
1048 int sp = PG_GETARG_INT32(2); /* substring start position */
1049 int sl;
1050
1051 sl = text_length(PointerGetDatum(t2)); /* defaults to length(t2) */
1052 PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1053}
1054
1055static text *
1056text_overlay(text *t1, text *t2, int sp, int sl)
1057{
1058 text *result;
1059 text *s1;
1060 text *s2;
1061 int sp_pl_sl;
1062
1063 /*
1064 * Check for possible integer-overflow cases. For negative sp, throw a
1065 * "substring length" error because that's what should be expected
1066 * according to the spec's definition of OVERLAY().
1067 */
1068 if (sp <= 0)
1069 ereport(ERROR,
1070 (errcode(ERRCODE_SUBSTRING_ERROR),
1071 errmsg("negative substring length not allowed")));
1072 if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
1073 ereport(ERROR,
1074 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1075 errmsg("integer out of range")));
1076
1077 s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
1078 s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
1079 result = text_catenate(s1, t2);
1080 result = text_catenate(result, s2);
1081
1082 return result;
1083}
1084
1085/*
1086 * textpos -
1087 * Return the position of the specified substring.
1088 * Implements the SQL POSITION() function.
1089 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
1090 * - thomas 1997-07-27
1091 */
1092Datum
1093textpos(PG_FUNCTION_ARGS)
1094{
1095 text *str = PG_GETARG_TEXT_PP(0);
1096 text *search_str = PG_GETARG_TEXT_PP(1);
1097
1098 PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION()));
1099}
1100
1101/*
1102 * text_position -
1103 * Does the real work for textpos()
1104 *
1105 * Inputs:
1106 * t1 - string to be searched
1107 * t2 - pattern to match within t1
1108 * Result:
1109 * Character index of the first matched char, starting from 1,
1110 * or 0 if no match.
1111 *
1112 * This is broken out so it can be called directly by other string processing
1113 * functions.
1114 */
1115static int
1116text_position(text *t1, text *t2, Oid collid)
1117{
1118 TextPositionState state;
1119 int result;
1120
1121 if (VARSIZE_ANY_EXHDR(t1) < 1 || VARSIZE_ANY_EXHDR(t2) < 1)
1122 return 0;
1123
1124 text_position_setup(t1, t2, collid, &state);
1125 if (!text_position_next(&state))
1126 result = 0;
1127 else
1128 result = text_position_get_match_pos(&state);
1129 text_position_cleanup(&state);
1130 return result;
1131}
1132
1133
1134/*
1135 * text_position_setup, text_position_next, text_position_cleanup -
1136 * Component steps of text_position()
1137 *
1138 * These are broken out so that a string can be efficiently searched for
1139 * multiple occurrences of the same pattern. text_position_next may be
1140 * called multiple times, and it advances to the next match on each call.
1141 * text_position_get_match_ptr() and text_position_get_match_pos() return
1142 * a pointer or 1-based character position of the last match, respectively.
1143 *
1144 * The "state" variable is normally just a local variable in the caller.
1145 *
1146 * NOTE: text_position_next skips over the matched portion. For example,
1147 * searching for "xx" in "xxx" returns only one match, not two.
1148 */
1149
1150static void
1151text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
1152{
1153 int len1 = VARSIZE_ANY_EXHDR(t1);
1154 int len2 = VARSIZE_ANY_EXHDR(t2);
1155 pg_locale_t mylocale = 0;
1156
1157 check_collation_set(collid);
1158
1159 if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
1160 mylocale = pg_newlocale_from_collation(collid);
1161
1162 if (mylocale && !mylocale->deterministic)
1163 ereport(ERROR,
1164 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1165 errmsg("nondeterministic collations are not supported for substring searches")));
1166
1167 Assert(len1 > 0);
1168 Assert(len2 > 0);
1169
1170 /*
1171 * Even with a multi-byte encoding, we perform the search using the raw
1172 * byte sequence, ignoring multibyte issues. For UTF-8, that works fine,
1173 * because in UTF-8 the byte sequence of one character cannot contain
1174 * another character. For other multi-byte encodings, we do the search
1175 * initially as a simple byte search, ignoring multibyte issues, but
1176 * verify afterwards that the match we found is at a character boundary,
1177 * and continue the search if it was a false match.
1178 */
1179 if (pg_database_encoding_max_length() == 1)
1180 {
1181 state->is_multibyte = false;
1182 state->is_multibyte_char_in_char = false;
1183 }
1184 else if (GetDatabaseEncoding() == PG_UTF8)
1185 {
1186 state->is_multibyte = true;
1187 state->is_multibyte_char_in_char = false;
1188 }
1189 else
1190 {
1191 state->is_multibyte = true;
1192 state->is_multibyte_char_in_char = true;
1193 }
1194
1195 state->str1 = VARDATA_ANY(t1);
1196 state->str2 = VARDATA_ANY(t2);
1197 state->len1 = len1;
1198 state->len2 = len2;
1199 state->last_match = NULL;
1200 state->refpoint = state->str1;
1201 state->refpos = 0;
1202
1203 /*
1204 * Prepare the skip table for Boyer-Moore-Horspool searching. In these
1205 * notes we use the terminology that the "haystack" is the string to be
1206 * searched (t1) and the "needle" is the pattern being sought (t2).
1207 *
1208 * If the needle is empty or bigger than the haystack then there is no
1209 * point in wasting cycles initializing the table. We also choose not to
1210 * use B-M-H for needles of length 1, since the skip table can't possibly
1211 * save anything in that case.
1212 */
1213 if (len1 >= len2 && len2 > 1)
1214 {
1215 int searchlength = len1 - len2;
1216 int skiptablemask;
1217 int last;
1218 int i;
1219 const char *str2 = state->str2;
1220
1221 /*
1222 * First we must determine how much of the skip table to use. The
1223 * declaration of TextPositionState allows up to 256 elements, but for
1224 * short search problems we don't really want to have to initialize so
1225 * many elements --- it would take too long in comparison to the
1226 * actual search time. So we choose a useful skip table size based on
1227 * the haystack length minus the needle length. The closer the needle
1228 * length is to the haystack length the less useful skipping becomes.
1229 *
1230 * Note: since we use bit-masking to select table elements, the skip
1231 * table size MUST be a power of 2, and so the mask must be 2^N-1.
1232 */
1233 if (searchlength < 16)
1234 skiptablemask = 3;
1235 else if (searchlength < 64)
1236 skiptablemask = 7;
1237 else if (searchlength < 128)
1238 skiptablemask = 15;
1239 else if (searchlength < 512)
1240 skiptablemask = 31;
1241 else if (searchlength < 2048)
1242 skiptablemask = 63;
1243 else if (searchlength < 4096)
1244 skiptablemask = 127;
1245 else
1246 skiptablemask = 255;
1247 state->skiptablemask = skiptablemask;
1248
1249 /*
1250 * Initialize the skip table. We set all elements to the needle
1251 * length, since this is the correct skip distance for any character
1252 * not found in the needle.
1253 */
1254 for (i = 0; i <= skiptablemask; i++)
1255 state->skiptable[i] = len2;
1256
1257 /*
1258 * Now examine the needle. For each character except the last one,
1259 * set the corresponding table element to the appropriate skip
1260 * distance. Note that when two characters share the same skip table
1261 * entry, the one later in the needle must determine the skip
1262 * distance.
1263 */
1264 last = len2 - 1;
1265
1266 for (i = 0; i < last; i++)
1267 state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
1268 }
1269}
1270
1271/*
1272 * Advance to the next match, starting from the end of the previous match
1273 * (or the beginning of the string, on first call). Returns true if a match
1274 * is found.
1275 */
1276static bool
1277text_position_next(TextPositionState *state)
1278{
1279 int needle_len = state->len2;
1280 char *start_ptr;
1281 char *matchptr;
1282
1283 if (needle_len <= 0)
1284 return false; /* result for empty pattern */
1285
1286 /* Start from the point right after the previous match. */
1287 if (state->last_match)
1288 start_ptr = state->last_match + needle_len;
1289 else
1290 start_ptr = state->str1;
1291
1292retry:
1293 matchptr = text_position_next_internal(start_ptr, state);
1294
1295 if (!matchptr)
1296 return false;
1297
1298 /*
1299 * Found a match for the byte sequence. If this is a multibyte encoding,
1300 * where one character's byte sequence can appear inside a longer
1301 * multi-byte character, we need to verify that the match was at a
1302 * character boundary, not in the middle of a multi-byte character.
1303 */
1304 if (state->is_multibyte_char_in_char)
1305 {
1306 /* Walk one character at a time, until we reach the match. */
1307
1308 /* the search should never move backwards. */
1309 Assert(state->refpoint <= matchptr);
1310
1311 while (state->refpoint < matchptr)
1312 {
1313 /* step to next character. */
1314 state->refpoint += pg_mblen(state->refpoint);
1315 state->refpos++;
1316
1317 /*
1318 * If we stepped over the match's start position, then it was a
1319 * false positive, where the byte sequence appeared in the middle
1320 * of a multi-byte character. Skip it, and continue the search at
1321 * the next character boundary.
1322 */
1323 if (state->refpoint > matchptr)
1324 {
1325 start_ptr = state->refpoint;
1326 goto retry;
1327 }
1328 }
1329 }
1330
1331 state->last_match = matchptr;
1332 return true;
1333}
1334
1335/*
1336 * Subroutine of text_position_next(). This searches for the raw byte
1337 * sequence, ignoring any multi-byte encoding issues. Returns the first
1338 * match starting at 'start_ptr', or NULL if no match is found.
1339 */
1340static char *
1341text_position_next_internal(char *start_ptr, TextPositionState *state)
1342{
1343 int haystack_len = state->len1;
1344 int needle_len = state->len2;
1345 int skiptablemask = state->skiptablemask;
1346 const char *haystack = state->str1;
1347 const char *needle = state->str2;
1348 const char *haystack_end = &haystack[haystack_len];
1349 const char *hptr;
1350
1351 Assert(start_ptr >= haystack && start_ptr <= haystack_end);
1352
1353 if (needle_len == 1)
1354 {
1355 /* No point in using B-M-H for a one-character needle */
1356 char nchar = *needle;
1357
1358 hptr = start_ptr;
1359 while (hptr < haystack_end)
1360 {
1361 if (*hptr == nchar)
1362 return (char *) hptr;
1363 hptr++;
1364 }
1365 }
1366 else
1367 {
1368 const char *needle_last = &needle[needle_len - 1];
1369
1370 /* Start at startpos plus the length of the needle */
1371 hptr = start_ptr + needle_len - 1;
1372 while (hptr < haystack_end)
1373 {
1374 /* Match the needle scanning *backward* */
1375 const char *nptr;
1376 const char *p;
1377
1378 nptr = needle_last;
1379 p = hptr;
1380 while (*nptr == *p)
1381 {
1382 /* Matched it all? If so, return 1-based position */
1383 if (nptr == needle)
1384 return (char *) p;
1385 nptr--, p--;
1386 }
1387
1388 /*
1389 * No match, so use the haystack char at hptr to decide how far to
1390 * advance. If the needle had any occurrence of that character
1391 * (or more precisely, one sharing the same skiptable entry)
1392 * before its last character, then we advance far enough to align
1393 * the last such needle character with that haystack position.
1394 * Otherwise we can advance by the whole needle length.
1395 */
1396 hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1397 }
1398 }
1399
1400 return 0; /* not found */
1401}
1402
1403/*
1404 * Return a pointer to the current match.
1405 *
1406 * The returned pointer points into correct position in the original
1407 * the haystack string.
1408 */
1409static char *
1410text_position_get_match_ptr(TextPositionState *state)
1411{
1412 return state->last_match;
1413}
1414
1415/*
1416 * Return the offset of the current match.
1417 *
1418 * The offset is in characters, 1-based.
1419 */
1420static int
1421text_position_get_match_pos(TextPositionState *state)
1422{
1423 if (!state->is_multibyte)
1424 return state->last_match - state->str1 + 1;
1425 else
1426 {
1427 /* Convert the byte position to char position. */
1428 while (state->refpoint < state->last_match)
1429 {
1430 state->refpoint += pg_mblen(state->refpoint);
1431 state->refpos++;
1432 }
1433 Assert(state->refpoint == state->last_match);
1434 return state->refpos + 1;
1435 }
1436}
1437
1438static void
1439text_position_cleanup(TextPositionState *state)
1440{
1441 /* no cleanup needed */
1442}
1443
1444static void
1445check_collation_set(Oid collid)
1446{
1447 if (!OidIsValid(collid))
1448 {
1449 /*
1450 * This typically means that the parser could not resolve a conflict
1451 * of implicit collations, so report it that way.
1452 */
1453 ereport(ERROR,
1454 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1455 errmsg("could not determine which collation to use for string comparison"),
1456 errhint("Use the COLLATE clause to set the collation explicitly.")));
1457 }
1458}
1459
1460/* varstr_cmp()
1461 * Comparison function for text strings with given lengths.
1462 * Includes locale support, but must copy strings to temporary memory
1463 * to allow null-termination for inputs to strcoll().
1464 * Returns an integer less than, equal to, or greater than zero, indicating
1465 * whether arg1 is less than, equal to, or greater than arg2.
1466 *
1467 * Note: many functions that depend on this are marked leakproof; therefore,
1468 * avoid reporting the actual contents of the input when throwing errors.
1469 * All errors herein should be things that can't happen except on corrupt
1470 * data, anyway; otherwise we will have trouble with indexing strings that
1471 * would cause them.
1472 */
1473int
1474varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
1475{
1476 int result;
1477
1478 check_collation_set(collid);
1479
1480 /*
1481 * Unfortunately, there is no strncoll(), so in the non-C locale case we
1482 * have to do some memory copying. This turns out to be significantly
1483 * slower, so we optimize the case where LC_COLLATE is C. We also try to
1484 * optimize relatively-short strings by avoiding palloc/pfree overhead.
1485 */
1486 if (lc_collate_is_c(collid))
1487 {
1488 result = memcmp(arg1, arg2, Min(len1, len2));
1489 if ((result == 0) && (len1 != len2))
1490 result = (len1 < len2) ? -1 : 1;
1491 }
1492 else
1493 {
1494 char a1buf[TEXTBUFLEN];
1495 char a2buf[TEXTBUFLEN];
1496 char *a1p,
1497 *a2p;
1498 pg_locale_t mylocale = 0;
1499
1500 if (collid != DEFAULT_COLLATION_OID)
1501 mylocale = pg_newlocale_from_collation(collid);
1502
1503 /*
1504 * memcmp() can't tell us which of two unequal strings sorts first,
1505 * but it's a cheap way to tell if they're equal. Testing shows that
1506 * memcmp() followed by strcoll() is only trivially slower than
1507 * strcoll() by itself, so we don't lose much if this doesn't work out
1508 * very often, and if it does - for example, because there are many
1509 * equal strings in the input - then we win big by avoiding expensive
1510 * collation-aware comparisons.
1511 */
1512 if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
1513 return 0;
1514
1515#ifdef WIN32
1516 /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1517 if (GetDatabaseEncoding() == PG_UTF8
1518 && (!mylocale || mylocale->provider == COLLPROVIDER_LIBC))
1519 {
1520 int a1len;
1521 int a2len;
1522 int r;
1523
1524 if (len1 >= TEXTBUFLEN / 2)
1525 {
1526 a1len = len1 * 2 + 2;
1527 a1p = palloc(a1len);
1528 }
1529 else
1530 {
1531 a1len = TEXTBUFLEN;
1532 a1p = a1buf;
1533 }
1534 if (len2 >= TEXTBUFLEN / 2)
1535 {
1536 a2len = len2 * 2 + 2;
1537 a2p = palloc(a2len);
1538 }
1539 else
1540 {
1541 a2len = TEXTBUFLEN;
1542 a2p = a2buf;
1543 }
1544
1545 /* stupid Microsloth API does not work for zero-length input */
1546 if (len1 == 0)
1547 r = 0;
1548 else
1549 {
1550 r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1551 (LPWSTR) a1p, a1len / 2);
1552 if (!r)
1553 ereport(ERROR,
1554 (errmsg("could not convert string to UTF-16: error code %lu",
1555 GetLastError())));
1556 }
1557 ((LPWSTR) a1p)[r] = 0;
1558
1559 if (len2 == 0)
1560 r = 0;
1561 else
1562 {
1563 r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1564 (LPWSTR) a2p, a2len / 2);
1565 if (!r)
1566 ereport(ERROR,
1567 (errmsg("could not convert string to UTF-16: error code %lu",
1568 GetLastError())));
1569 }
1570 ((LPWSTR) a2p)[r] = 0;
1571
1572 errno = 0;
1573#ifdef HAVE_LOCALE_T
1574 if (mylocale)
1575 result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale->info.lt);
1576 else
1577#endif
1578 result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1579 if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
1580 * headers */
1581 ereport(ERROR,
1582 (errmsg("could not compare Unicode strings: %m")));
1583
1584 /* Break tie if necessary. */
1585 if (result == 0 &&
1586 (!mylocale || mylocale->deterministic))
1587 {
1588 result = memcmp(arg1, arg2, Min(len1, len2));
1589 if ((result == 0) && (len1 != len2))
1590 result = (len1 < len2) ? -1 : 1;
1591 }
1592
1593 if (a1p != a1buf)
1594 pfree(a1p);
1595 if (a2p != a2buf)
1596 pfree(a2p);
1597
1598 return result;
1599 }
1600#endif /* WIN32 */
1601
1602 if (len1 >= TEXTBUFLEN)
1603 a1p = (char *) palloc(len1 + 1);
1604 else
1605 a1p = a1buf;
1606 if (len2 >= TEXTBUFLEN)
1607 a2p = (char *) palloc(len2 + 1);
1608 else
1609 a2p = a2buf;
1610
1611 memcpy(a1p, arg1, len1);
1612 a1p[len1] = '\0';
1613 memcpy(a2p, arg2, len2);
1614 a2p[len2] = '\0';
1615
1616 if (mylocale)
1617 {
1618 if (mylocale->provider == COLLPROVIDER_ICU)
1619 {
1620#ifdef USE_ICU
1621#ifdef HAVE_UCOL_STRCOLLUTF8
1622 if (GetDatabaseEncoding() == PG_UTF8)
1623 {
1624 UErrorCode status;
1625
1626 status = U_ZERO_ERROR;
1627 result = ucol_strcollUTF8(mylocale->info.icu.ucol,
1628 arg1, len1,
1629 arg2, len2,
1630 &status);
1631 if (U_FAILURE(status))
1632 ereport(ERROR,
1633 (errmsg("collation failed: %s", u_errorName(status))));
1634 }
1635 else
1636#endif
1637 {
1638 int32_t ulen1,
1639 ulen2;
1640 UChar *uchar1,
1641 *uchar2;
1642
1643 ulen1 = icu_to_uchar(&uchar1, arg1, len1);
1644 ulen2 = icu_to_uchar(&uchar2, arg2, len2);
1645
1646 result = ucol_strcoll(mylocale->info.icu.ucol,
1647 uchar1, ulen1,
1648 uchar2, ulen2);
1649
1650 pfree(uchar1);
1651 pfree(uchar2);
1652 }
1653#else /* not USE_ICU */
1654 /* shouldn't happen */
1655 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1656#endif /* not USE_ICU */
1657 }
1658 else
1659 {
1660#ifdef HAVE_LOCALE_T
1661 result = strcoll_l(a1p, a2p, mylocale->info.lt);
1662#else
1663 /* shouldn't happen */
1664 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1665#endif
1666 }
1667 }
1668 else
1669 result = strcoll(a1p, a2p);
1670
1671 /* Break tie if necessary. */
1672 if (result == 0 &&
1673 (!mylocale || mylocale->deterministic))
1674 result = strcmp(a1p, a2p);
1675
1676 if (a1p != a1buf)
1677 pfree(a1p);
1678 if (a2p != a2buf)
1679 pfree(a2p);
1680 }
1681
1682 return result;
1683}
1684
1685/* text_cmp()
1686 * Internal comparison function for text strings.
1687 * Returns -1, 0 or 1
1688 */
1689static int
1690text_cmp(text *arg1, text *arg2, Oid collid)
1691{
1692 char *a1p,
1693 *a2p;
1694 int len1,
1695 len2;
1696
1697 a1p = VARDATA_ANY(arg1);
1698 a2p = VARDATA_ANY(arg2);
1699
1700 len1 = VARSIZE_ANY_EXHDR(arg1);
1701 len2 = VARSIZE_ANY_EXHDR(arg2);
1702
1703 return varstr_cmp(a1p, len1, a2p, len2, collid);
1704}
1705
1706/*
1707 * Comparison functions for text strings.
1708 *
1709 * Note: btree indexes need these routines not to leak memory; therefore,
1710 * be careful to free working copies of toasted datums. Most places don't
1711 * need to be so careful.
1712 */
1713
1714Datum
1715texteq(PG_FUNCTION_ARGS)
1716{
1717 Oid collid = PG_GET_COLLATION();
1718 bool result;
1719
1720 check_collation_set(collid);
1721
1722 if (lc_collate_is_c(collid) ||
1723 collid == DEFAULT_COLLATION_OID ||
1724 pg_newlocale_from_collation(collid)->deterministic)
1725 {
1726 Datum arg1 = PG_GETARG_DATUM(0);
1727 Datum arg2 = PG_GETARG_DATUM(1);
1728 Size len1,
1729 len2;
1730
1731 /*
1732 * Since we only care about equality or not-equality, we can avoid all
1733 * the expense of strcoll() here, and just do bitwise comparison. In
1734 * fact, we don't even have to do a bitwise comparison if we can show
1735 * the lengths of the strings are unequal; which might save us from
1736 * having to detoast one or both values.
1737 */
1738 len1 = toast_raw_datum_size(arg1);
1739 len2 = toast_raw_datum_size(arg2);
1740 if (len1 != len2)
1741 result = false;
1742 else
1743 {
1744 text *targ1 = DatumGetTextPP(arg1);
1745 text *targ2 = DatumGetTextPP(arg2);
1746
1747 result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1748 len1 - VARHDRSZ) == 0);
1749
1750 PG_FREE_IF_COPY(targ1, 0);
1751 PG_FREE_IF_COPY(targ2, 1);
1752 }
1753 }
1754 else
1755 {
1756 text *arg1 = PG_GETARG_TEXT_PP(0);
1757 text *arg2 = PG_GETARG_TEXT_PP(1);
1758
1759 result = (text_cmp(arg1, arg2, collid) == 0);
1760
1761 PG_FREE_IF_COPY(arg1, 0);
1762 PG_FREE_IF_COPY(arg2, 1);
1763 }
1764
1765 PG_RETURN_BOOL(result);
1766}
1767
1768Datum
1769textne(PG_FUNCTION_ARGS)
1770{
1771 Oid collid = PG_GET_COLLATION();
1772 bool result;
1773
1774 check_collation_set(collid);
1775
1776 if (lc_collate_is_c(collid) ||
1777 collid == DEFAULT_COLLATION_OID ||
1778 pg_newlocale_from_collation(collid)->deterministic)
1779 {
1780 Datum arg1 = PG_GETARG_DATUM(0);
1781 Datum arg2 = PG_GETARG_DATUM(1);
1782 Size len1,
1783 len2;
1784
1785 /* See comment in texteq() */
1786 len1 = toast_raw_datum_size(arg1);
1787 len2 = toast_raw_datum_size(arg2);
1788 if (len1 != len2)
1789 result = true;
1790 else
1791 {
1792 text *targ1 = DatumGetTextPP(arg1);
1793 text *targ2 = DatumGetTextPP(arg2);
1794
1795 result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1796 len1 - VARHDRSZ) != 0);
1797
1798 PG_FREE_IF_COPY(targ1, 0);
1799 PG_FREE_IF_COPY(targ2, 1);
1800 }
1801 }
1802 else
1803 {
1804 text *arg1 = PG_GETARG_TEXT_PP(0);
1805 text *arg2 = PG_GETARG_TEXT_PP(1);
1806
1807 result = (text_cmp(arg1, arg2, collid) != 0);
1808
1809 PG_FREE_IF_COPY(arg1, 0);
1810 PG_FREE_IF_COPY(arg2, 1);
1811 }
1812
1813 PG_RETURN_BOOL(result);
1814}
1815
1816Datum
1817text_lt(PG_FUNCTION_ARGS)
1818{
1819 text *arg1 = PG_GETARG_TEXT_PP(0);
1820 text *arg2 = PG_GETARG_TEXT_PP(1);
1821 bool result;
1822
1823 result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
1824
1825 PG_FREE_IF_COPY(arg1, 0);
1826 PG_FREE_IF_COPY(arg2, 1);
1827
1828 PG_RETURN_BOOL(result);
1829}
1830
1831Datum
1832text_le(PG_FUNCTION_ARGS)
1833{
1834 text *arg1 = PG_GETARG_TEXT_PP(0);
1835 text *arg2 = PG_GETARG_TEXT_PP(1);
1836 bool result;
1837
1838 result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
1839
1840 PG_FREE_IF_COPY(arg1, 0);
1841 PG_FREE_IF_COPY(arg2, 1);
1842
1843 PG_RETURN_BOOL(result);
1844}
1845
1846Datum
1847text_gt(PG_FUNCTION_ARGS)
1848{
1849 text *arg1 = PG_GETARG_TEXT_PP(0);
1850 text *arg2 = PG_GETARG_TEXT_PP(1);
1851 bool result;
1852
1853 result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
1854
1855 PG_FREE_IF_COPY(arg1, 0);
1856 PG_FREE_IF_COPY(arg2, 1);
1857
1858 PG_RETURN_BOOL(result);
1859}
1860
1861Datum
1862text_ge(PG_FUNCTION_ARGS)
1863{
1864 text *arg1 = PG_GETARG_TEXT_PP(0);
1865 text *arg2 = PG_GETARG_TEXT_PP(1);
1866 bool result;
1867
1868 result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
1869
1870 PG_FREE_IF_COPY(arg1, 0);
1871 PG_FREE_IF_COPY(arg2, 1);
1872
1873 PG_RETURN_BOOL(result);
1874}
1875
1876Datum
1877text_starts_with(PG_FUNCTION_ARGS)
1878{
1879 Datum arg1 = PG_GETARG_DATUM(0);
1880 Datum arg2 = PG_GETARG_DATUM(1);
1881 Oid collid = PG_GET_COLLATION();
1882 pg_locale_t mylocale = 0;
1883 bool result;
1884 Size len1,
1885 len2;
1886
1887 check_collation_set(collid);
1888
1889 if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
1890 mylocale = pg_newlocale_from_collation(collid);
1891
1892 if (mylocale && !mylocale->deterministic)
1893 ereport(ERROR,
1894 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1895 errmsg("nondeterministic collations are not supported for substring searches")));
1896
1897 len1 = toast_raw_datum_size(arg1);
1898 len2 = toast_raw_datum_size(arg2);
1899 if (len2 > len1)
1900 result = false;
1901 else
1902 {
1903 text *targ1 = text_substring(arg1, 1, len2, false);
1904 text *targ2 = DatumGetTextPP(arg2);
1905
1906 result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1907 VARSIZE_ANY_EXHDR(targ2)) == 0);
1908
1909 PG_FREE_IF_COPY(targ1, 0);
1910 PG_FREE_IF_COPY(targ2, 1);
1911 }
1912
1913 PG_RETURN_BOOL(result);
1914}
1915
1916Datum
1917bttextcmp(PG_FUNCTION_ARGS)
1918{
1919 text *arg1 = PG_GETARG_TEXT_PP(0);
1920 text *arg2 = PG_GETARG_TEXT_PP(1);
1921 int32 result;
1922
1923 result = text_cmp(arg1, arg2, PG_GET_COLLATION());
1924
1925 PG_FREE_IF_COPY(arg1, 0);
1926 PG_FREE_IF_COPY(arg2, 1);
1927
1928 PG_RETURN_INT32(result);
1929}
1930
1931Datum
1932bttextsortsupport(PG_FUNCTION_ARGS)
1933{
1934 SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
1935 Oid collid = ssup->ssup_collation;
1936 MemoryContext oldcontext;
1937
1938 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1939
1940 /* Use generic string SortSupport */
1941 varstr_sortsupport(ssup, TEXTOID, collid);
1942
1943 MemoryContextSwitchTo(oldcontext);
1944
1945 PG_RETURN_VOID();
1946}
1947
1948/*
1949 * Generic sortsupport interface for character type's operator classes.
1950 * Includes locale support, and support for BpChar semantics (i.e. removing
1951 * trailing spaces before comparison).
1952 *
1953 * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
1954 * same representation. Callers that always use the C collation (e.g.
1955 * non-collatable type callers like bytea) may have NUL bytes in their strings;
1956 * this will not work with any other collation, though.
1957 */
1958void
1959varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
1960{
1961 bool abbreviate = ssup->abbreviate;
1962 bool collate_c = false;
1963 VarStringSortSupport *sss;
1964 pg_locale_t locale = 0;
1965
1966 check_collation_set(collid);
1967
1968 /*
1969 * If possible, set ssup->comparator to a function which can be used to
1970 * directly compare two datums. If we can do this, we'll avoid the
1971 * overhead of a trip through the fmgr layer for every comparison, which
1972 * can be substantial.
1973 *
1974 * Most typically, we'll set the comparator to varlenafastcmp_locale,
1975 * which uses strcoll() to perform comparisons. We use that for the
1976 * BpChar case too, but type NAME uses namefastcmp_locale. However, if
1977 * LC_COLLATE = C, we can make things quite a bit faster with
1978 * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use
1979 * memcmp() rather than strcoll().
1980 */
1981 if (lc_collate_is_c(collid))
1982 {
1983 if (typid == BPCHAROID)
1984 ssup->comparator = bpcharfastcmp_c;
1985 else if (typid == NAMEOID)
1986 {
1987 ssup->comparator = namefastcmp_c;
1988 /* Not supporting abbreviation with type NAME, for now */
1989 abbreviate = false;
1990 }
1991 else
1992 ssup->comparator = varstrfastcmp_c;
1993
1994 collate_c = true;
1995 }
1996 else
1997 {
1998 /*
1999 * We need a collation-sensitive comparison. To make things faster,
2000 * we'll figure out the collation based on the locale id and cache the
2001 * result.
2002 */
2003 if (collid != DEFAULT_COLLATION_OID)
2004 locale = pg_newlocale_from_collation(collid);
2005
2006 /*
2007 * There is a further exception on Windows. When the database
2008 * encoding is UTF-8 and we are not using the C collation, complex
2009 * hacks are required. We don't currently have a comparator that
2010 * handles that case, so we fall back on the slow method of having the
2011 * sort code invoke bttextcmp() (in the case of text) via the fmgr
2012 * trampoline. ICU locales work just the same on Windows, however.
2013 */
2014#ifdef WIN32
2015 if (GetDatabaseEncoding() == PG_UTF8 &&
2016 !(locale && locale->provider == COLLPROVIDER_ICU))
2017 return;
2018#endif
2019
2020 /*
2021 * We use varlenafastcmp_locale except for type NAME.
2022 */
2023 if (typid == NAMEOID)
2024 {
2025 ssup->comparator = namefastcmp_locale;
2026 /* Not supporting abbreviation with type NAME, for now */
2027 abbreviate = false;
2028 }
2029 else
2030 ssup->comparator = varlenafastcmp_locale;
2031 }
2032
2033 /*
2034 * Unfortunately, it seems that abbreviation for non-C collations is
2035 * broken on many common platforms; testing of multiple versions of glibc
2036 * reveals that, for many locales, strcoll() and strxfrm() do not return
2037 * consistent results, which is fatal to this optimization. While no
2038 * other libc other than Cygwin has so far been shown to have a problem,
2039 * we take the conservative course of action for right now and disable
2040 * this categorically. (Users who are certain this isn't a problem on
2041 * their system can define TRUST_STRXFRM.)
2042 *
2043 * Even apart from the risk of broken locales, it's possible that there
2044 * are platforms where the use of abbreviated keys should be disabled at
2045 * compile time. Having only 4 byte datums could make worst-case
2046 * performance drastically more likely, for example. Moreover, macOS's
2047 * strxfrm() implementation is known to not effectively concentrate a
2048 * significant amount of entropy from the original string in earlier
2049 * transformed blobs. It's possible that other supported platforms are
2050 * similarly encumbered. So, if we ever get past disabling this
2051 * categorically, we may still want or need to disable it for particular
2052 * platforms.
2053 */
2054#ifndef TRUST_STRXFRM
2055 if (!collate_c && !(locale && locale->provider == COLLPROVIDER_ICU))
2056 abbreviate = false;
2057#endif
2058
2059 /*
2060 * If we're using abbreviated keys, or if we're using a locale-aware
2061 * comparison, we need to initialize a StringSortSupport object. Both
2062 * cases will make use of the temporary buffers we initialize here for
2063 * scratch space (and to detect requirement for BpChar semantics from
2064 * caller), and the abbreviation case requires additional state.
2065 */
2066 if (abbreviate || !collate_c)
2067 {
2068 sss = palloc(sizeof(VarStringSortSupport));
2069 sss->buf1 = palloc(TEXTBUFLEN);
2070 sss->buflen1 = TEXTBUFLEN;
2071 sss->buf2 = palloc(TEXTBUFLEN);
2072 sss->buflen2 = TEXTBUFLEN;
2073 /* Start with invalid values */
2074 sss->last_len1 = -1;
2075 sss->last_len2 = -1;
2076 /* Initialize */
2077 sss->last_returned = 0;
2078 sss->locale = locale;
2079
2080 /*
2081 * To avoid somehow confusing a strxfrm() blob and an original string,
2082 * constantly keep track of the variety of data that buf1 and buf2
2083 * currently contain.
2084 *
2085 * Comparisons may be interleaved with conversion calls. Frequently,
2086 * conversions and comparisons are batched into two distinct phases,
2087 * but the correctness of caching cannot hinge upon this. For
2088 * comparison caching, buffer state is only trusted if cache_blob is
2089 * found set to false, whereas strxfrm() caching only trusts the state
2090 * when cache_blob is found set to true.
2091 *
2092 * Arbitrarily initialize cache_blob to true.
2093 */
2094 sss->cache_blob = true;
2095 sss->collate_c = collate_c;
2096 sss->typid = typid;
2097 ssup->ssup_extra = sss;
2098
2099 /*
2100 * If possible, plan to use the abbreviated keys optimization. The
2101 * core code may switch back to authoritative comparator should
2102 * abbreviation be aborted.
2103 */
2104 if (abbreviate)
2105 {
2106 sss->prop_card = 0.20;
2107 initHyperLogLog(&sss->abbr_card, 10);
2108 initHyperLogLog(&sss->full_card, 10);
2109 ssup->abbrev_full_comparator = ssup->comparator;
2110 ssup->comparator = varstrcmp_abbrev;
2111 ssup->abbrev_converter = varstr_abbrev_convert;
2112 ssup->abbrev_abort = varstr_abbrev_abort;
2113 }
2114 }
2115}
2116
2117/*
2118 * sortsupport comparison func (for C locale case)
2119 */
2120static int
2121varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
2122{
2123 VarString *arg1 = DatumGetVarStringPP(x);
2124 VarString *arg2 = DatumGetVarStringPP(y);
2125 char *a1p,
2126 *a2p;
2127 int len1,
2128 len2,
2129 result;
2130
2131 a1p = VARDATA_ANY(arg1);
2132 a2p = VARDATA_ANY(arg2);
2133
2134 len1 = VARSIZE_ANY_EXHDR(arg1);
2135 len2 = VARSIZE_ANY_EXHDR(arg2);
2136
2137 result = memcmp(a1p, a2p, Min(len1, len2));
2138 if ((result == 0) && (len1 != len2))
2139 result = (len1 < len2) ? -1 : 1;
2140
2141 /* We can't afford to leak memory here. */
2142 if (PointerGetDatum(arg1) != x)
2143 pfree(arg1);
2144 if (PointerGetDatum(arg2) != y)
2145 pfree(arg2);
2146
2147 return result;
2148}
2149
2150/*
2151 * sortsupport comparison func (for BpChar C locale case)
2152 *
2153 * BpChar outsources its sortsupport to this module. Specialization for the
2154 * varstr_sortsupport BpChar case, modeled on
2155 * internal_bpchar_pattern_compare().
2156 */
2157static int
2158bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
2159{
2160 BpChar *arg1 = DatumGetBpCharPP(x);
2161 BpChar *arg2 = DatumGetBpCharPP(y);
2162 char *a1p,
2163 *a2p;
2164 int len1,
2165 len2,
2166 result;
2167
2168 a1p = VARDATA_ANY(arg1);
2169 a2p = VARDATA_ANY(arg2);
2170
2171 len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
2172 len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
2173
2174 result = memcmp(a1p, a2p, Min(len1, len2));
2175 if ((result == 0) && (len1 != len2))
2176 result = (len1 < len2) ? -1 : 1;
2177
2178 /* We can't afford to leak memory here. */
2179 if (PointerGetDatum(arg1) != x)
2180 pfree(arg1);
2181 if (PointerGetDatum(arg2) != y)
2182 pfree(arg2);
2183
2184 return result;
2185}
2186
2187/*
2188 * sortsupport comparison func (for NAME C locale case)
2189 */
2190static int
2191namefastcmp_c(Datum x, Datum y, SortSupport ssup)
2192{
2193 Name arg1 = DatumGetName(x);
2194 Name arg2 = DatumGetName(y);
2195
2196 return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
2197}
2198
2199/*
2200 * sortsupport comparison func (for locale case with all varlena types)
2201 */
2202static int
2203varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup)
2204{
2205 VarString *arg1 = DatumGetVarStringPP(x);
2206 VarString *arg2 = DatumGetVarStringPP(y);
2207 char *a1p,
2208 *a2p;
2209 int len1,
2210 len2,
2211 result;
2212
2213 a1p = VARDATA_ANY(arg1);
2214 a2p = VARDATA_ANY(arg2);
2215
2216 len1 = VARSIZE_ANY_EXHDR(arg1);
2217 len2 = VARSIZE_ANY_EXHDR(arg2);
2218
2219 result = varstrfastcmp_locale(a1p, len1, a2p, len2, ssup);
2220
2221 /* We can't afford to leak memory here. */
2222 if (PointerGetDatum(arg1) != x)
2223 pfree(arg1);
2224 if (PointerGetDatum(arg2) != y)
2225 pfree(arg2);
2226
2227 return result;
2228}
2229
2230/*
2231 * sortsupport comparison func (for locale case with NAME type)
2232 */
2233static int
2234namefastcmp_locale(Datum x, Datum y, SortSupport ssup)
2235{
2236 Name arg1 = DatumGetName(x);
2237 Name arg2 = DatumGetName(y);
2238
2239 return varstrfastcmp_locale(NameStr(*arg1), strlen(NameStr(*arg1)),
2240 NameStr(*arg2), strlen(NameStr(*arg2)),
2241 ssup);
2242}
2243
2244/*
2245 * sortsupport comparison func for locale cases
2246 */
2247static int
2248varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
2249{
2250 VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
2251 int result;
2252 bool arg1_match;
2253
2254 /* Fast pre-check for equality, as discussed in varstr_cmp() */
2255 if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
2256 {
2257 /*
2258 * No change in buf1 or buf2 contents, so avoid changing last_len1 or
2259 * last_len2. Existing contents of buffers might still be used by
2260 * next call.
2261 *
2262 * It's fine to allow the comparison of BpChar padding bytes here,
2263 * even though that implies that the memcmp() will usually be
2264 * performed for BpChar callers (though multibyte characters could
2265 * still prevent that from occurring). The memcmp() is still very
2266 * cheap, and BpChar's funny semantics have us remove trailing spaces
2267 * (not limited to padding), so we need make no distinction between
2268 * padding space characters and "real" space characters.
2269 */
2270 return 0;
2271 }
2272
2273 if (sss->typid == BPCHAROID)
2274 {
2275 /* Get true number of bytes, ignoring trailing spaces */
2276 len1 = bpchartruelen(a1p, len1);
2277 len2 = bpchartruelen(a2p, len2);
2278 }
2279
2280 if (len1 >= sss->buflen1)
2281 {
2282 pfree(sss->buf1);
2283 sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2284 sss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen1);
2285 }
2286 if (len2 >= sss->buflen2)
2287 {
2288 pfree(sss->buf2);
2289 sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
2290 sss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen2);
2291 }
2292
2293 /*
2294 * We're likely to be asked to compare the same strings repeatedly, and
2295 * memcmp() is so much cheaper than strcoll() that it pays to try to cache
2296 * comparisons, even though in general there is no reason to think that
2297 * that will work out (every string datum may be unique). Caching does
2298 * not slow things down measurably when it doesn't work out, and can speed
2299 * things up by rather a lot when it does. In part, this is because the
2300 * memcmp() compares data from cachelines that are needed in L1 cache even
2301 * when the last comparison's result cannot be reused.
2302 */
2303 arg1_match = true;
2304 if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
2305 {
2306 arg1_match = false;
2307 memcpy(sss->buf1, a1p, len1);
2308 sss->buf1[len1] = '\0';
2309 sss->last_len1 = len1;
2310 }
2311
2312 /*
2313 * If we're comparing the same two strings as last time, we can return the
2314 * same answer without calling strcoll() again. This is more likely than
2315 * it seems (at least with moderate to low cardinality sets), because
2316 * quicksort compares the same pivot against many values.
2317 */
2318 if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
2319 {
2320 memcpy(sss->buf2, a2p, len2);
2321 sss->buf2[len2] = '\0';
2322 sss->last_len2 = len2;
2323 }
2324 else if (arg1_match && !sss->cache_blob)
2325 {
2326 /* Use result cached following last actual strcoll() call */
2327 return sss->last_returned;
2328 }
2329
2330 if (sss->locale)
2331 {
2332 if (sss->locale->provider == COLLPROVIDER_ICU)
2333 {
2334#ifdef USE_ICU
2335#ifdef HAVE_UCOL_STRCOLLUTF8
2336 if (GetDatabaseEncoding() == PG_UTF8)
2337 {
2338 UErrorCode status;
2339
2340 status = U_ZERO_ERROR;
2341 result = ucol_strcollUTF8(sss->locale->info.icu.ucol,
2342 a1p, len1,
2343 a2p, len2,
2344 &status);
2345 if (U_FAILURE(status))
2346 ereport(ERROR,
2347 (errmsg("collation failed: %s", u_errorName(status))));
2348 }
2349 else
2350#endif
2351 {
2352 int32_t ulen1,
2353 ulen2;
2354 UChar *uchar1,
2355 *uchar2;
2356
2357 ulen1 = icu_to_uchar(&uchar1, a1p, len1);
2358 ulen2 = icu_to_uchar(&uchar2, a2p, len2);
2359
2360 result = ucol_strcoll(sss->locale->info.icu.ucol,
2361 uchar1, ulen1,
2362 uchar2, ulen2);
2363
2364 pfree(uchar1);
2365 pfree(uchar2);
2366 }
2367#else /* not USE_ICU */
2368 /* shouldn't happen */
2369 elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
2370#endif /* not USE_ICU */
2371 }
2372 else
2373 {
2374#ifdef HAVE_LOCALE_T
2375 result = strcoll_l(sss->buf1, sss->buf2, sss->locale->info.lt);
2376#else
2377 /* shouldn't happen */
2378 elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
2379#endif
2380 }
2381 }
2382 else
2383 result = strcoll(sss->buf1, sss->buf2);
2384
2385 /* Break tie if necessary. */
2386 if (result == 0 &&
2387 (!sss->locale || sss->locale->deterministic))
2388 result = strcmp(sss->buf1, sss->buf2);
2389
2390 /* Cache result, perhaps saving an expensive strcoll() call next time */
2391 sss->cache_blob = false;
2392 sss->last_returned = result;
2393 return result;
2394}
2395
2396/*
2397 * Abbreviated key comparison func
2398 */
2399static int
2400varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup)
2401{
2402 /*
2403 * When 0 is returned, the core system will call varstrfastcmp_c()
2404 * (bpcharfastcmp_c() in BpChar case) or varlenafastcmp_locale(). Even a
2405 * strcmp() on two non-truncated strxfrm() blobs cannot indicate *equality*
2406 * authoritatively, for the same reason that there is a strcoll()
2407 * tie-breaker call to strcmp() in varstr_cmp().
2408 */
2409 if (x > y)
2410 return 1;
2411 else if (x == y)
2412 return 0;
2413 else
2414 return -1;
2415}
2416
2417/*
2418 * Conversion routine for sortsupport. Converts original to abbreviated key
2419 * representation. Our encoding strategy is simple -- pack the first 8 bytes
2420 * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
2421 * stored in reverse order), and treat it as an unsigned integer. When the "C"
2422 * locale is used, or in case of bytea, just memcpy() from original instead.
2423 */
2424static Datum
2425varstr_abbrev_convert(Datum original, SortSupport ssup)
2426{
2427 VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
2428 VarString *authoritative = DatumGetVarStringPP(original);
2429 char *authoritative_data = VARDATA_ANY(authoritative);
2430
2431 /* working state */
2432 Datum res;
2433 char *pres;
2434 int len;
2435 uint32 hash;
2436
2437 pres = (char *) &res;
2438 /* memset(), so any non-overwritten bytes are NUL */
2439 memset(pres, 0, sizeof(Datum));
2440 len = VARSIZE_ANY_EXHDR(authoritative);
2441
2442 /* Get number of bytes, ignoring trailing spaces */
2443 if (sss->typid == BPCHAROID)
2444 len = bpchartruelen(authoritative_data, len);
2445
2446 /*
2447 * If we're using the C collation, use memcpy(), rather than strxfrm(), to
2448 * abbreviate keys. The full comparator for the C locale is always
2449 * memcmp(). It would be incorrect to allow bytea callers (callers that
2450 * always force the C collation -- bytea isn't a collatable type, but this
2451 * approach is convenient) to use strxfrm(). This is because bytea
2452 * strings may contain NUL bytes. Besides, this should be faster, too.
2453 *
2454 * More generally, it's okay that bytea callers can have NUL bytes in
2455 * strings because varstrcmp_abbrev() need not make a distinction between
2456 * terminating NUL bytes, and NUL bytes representing actual NULs in the
2457 * authoritative representation. Hopefully a comparison at or past one
2458 * abbreviated key's terminating NUL byte will resolve the comparison
2459 * without consulting the authoritative representation; specifically, some
2460 * later non-NUL byte in the longer string can resolve the comparison
2461 * against a subsequent terminating NUL in the shorter string. There will
2462 * usually be what is effectively a "length-wise" resolution there and
2463 * then.
2464 *
2465 * If that doesn't work out -- if all bytes in the longer string
2466 * positioned at or past the offset of the smaller string's (first)
2467 * terminating NUL are actually representative of NUL bytes in the
2468 * authoritative binary string (perhaps with some *terminating* NUL bytes
2469 * towards the end of the longer string iff it happens to still be small)
2470 * -- then an authoritative tie-breaker will happen, and do the right
2471 * thing: explicitly consider string length.
2472 */
2473 if (sss->collate_c)
2474 memcpy(pres, authoritative_data, Min(len, sizeof(Datum)));
2475 else
2476 {
2477 Size bsize;
2478#ifdef USE_ICU
2479 int32_t ulen = -1;
2480 UChar *uchar = NULL;
2481#endif
2482
2483 /*
2484 * We're not using the C collation, so fall back on strxfrm or ICU
2485 * analogs.
2486 */
2487
2488 /* By convention, we use buffer 1 to store and NUL-terminate */
2489 if (len >= sss->buflen1)
2490 {
2491 pfree(sss->buf1);
2492 sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
2493 sss->buf1 = palloc(sss->buflen1);
2494 }
2495
2496 /* Might be able to reuse strxfrm() blob from last call */
2497 if (sss->last_len1 == len && sss->cache_blob &&
2498 memcmp(sss->buf1, authoritative_data, len) == 0)
2499 {
2500 memcpy(pres, sss->buf2, Min(sizeof(Datum), sss->last_len2));
2501 /* No change affecting cardinality, so no hashing required */
2502 goto done;
2503 }
2504
2505 memcpy(sss->buf1, authoritative_data, len);
2506
2507 /*
2508 * Just like strcoll(), strxfrm() expects a NUL-terminated string. Not
2509 * necessary for ICU, but doesn't hurt.
2510 */
2511 sss->buf1[len] = '\0';
2512 sss->last_len1 = len;
2513
2514#ifdef USE_ICU
2515 /* When using ICU and not UTF8, convert string to UChar. */
2516 if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU &&
2517 GetDatabaseEncoding() != PG_UTF8)
2518 ulen = icu_to_uchar(&uchar, sss->buf1, len);
2519#endif
2520
2521 /*
2522 * Loop: Call strxfrm() or ucol_getSortKey(), possibly enlarge buffer,
2523 * and try again. Both of these functions have the result buffer
2524 * content undefined if the result did not fit, so we need to retry
2525 * until everything fits, even though we only need the first few bytes
2526 * in the end. When using ucol_nextSortKeyPart(), however, we only
2527 * ask for as many bytes as we actually need.
2528 */
2529 for (;;)
2530 {
2531#ifdef USE_ICU
2532 if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU)
2533 {
2534 /*
2535 * When using UTF8, use the iteration interface so we only
2536 * need to produce as many bytes as we actually need.
2537 */
2538 if (GetDatabaseEncoding() == PG_UTF8)
2539 {
2540 UCharIterator iter;
2541 uint32_t state[2];
2542 UErrorCode status;
2543
2544 uiter_setUTF8(&iter, sss->buf1, len);
2545 state[0] = state[1] = 0; /* won't need that again */
2546 status = U_ZERO_ERROR;
2547 bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol,
2548 &iter,
2549 state,
2550 (uint8_t *) sss->buf2,
2551 Min(sizeof(Datum), sss->buflen2),
2552 &status);
2553 if (U_FAILURE(status))
2554 ereport(ERROR,
2555 (errmsg("sort key generation failed: %s",
2556 u_errorName(status))));
2557 }
2558 else
2559 bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
2560 uchar, ulen,
2561 (uint8_t *) sss->buf2, sss->buflen2);
2562 }
2563 else
2564#endif
2565#ifdef HAVE_LOCALE_T
2566 if (sss->locale && sss->locale->provider == COLLPROVIDER_LIBC)
2567 bsize = strxfrm_l(sss->buf2, sss->buf1,
2568 sss->buflen2, sss->locale->info.lt);
2569 else
2570#endif
2571 bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2);
2572
2573 sss->last_len2 = bsize;
2574 if (bsize < sss->buflen2)
2575 break;
2576
2577 /*
2578 * Grow buffer and retry.
2579 */
2580 pfree(sss->buf2);
2581 sss->buflen2 = Max(bsize + 1,
2582 Min(sss->buflen2 * 2, MaxAllocSize));
2583 sss->buf2 = palloc(sss->buflen2);
2584 }
2585
2586 /*
2587 * Every Datum byte is always compared. This is safe because the
2588 * strxfrm() blob is itself NUL terminated, leaving no danger of
2589 * misinterpreting any NUL bytes not intended to be interpreted as
2590 * logically representing termination.
2591 *
2592 * (Actually, even if there were NUL bytes in the blob it would be
2593 * okay. See remarks on bytea case above.)
2594 */
2595 memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
2596
2597#ifdef USE_ICU
2598 if (uchar)
2599 pfree(uchar);
2600#endif
2601 }
2602
2603 /*
2604 * Maintain approximate cardinality of both abbreviated keys and original,
2605 * authoritative keys using HyperLogLog. Used as cheap insurance against
2606 * the worst case, where we do many string transformations for no saving
2607 * in full strcoll()-based comparisons. These statistics are used by
2608 * varstr_abbrev_abort().
2609 *
2610 * First, Hash key proper, or a significant fraction of it. Mix in length
2611 * in order to compensate for cases where differences are past
2612 * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
2613 */
2614 hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
2615 Min(len, PG_CACHE_LINE_SIZE)));
2616
2617 if (len > PG_CACHE_LINE_SIZE)
2618 hash ^= DatumGetUInt32(hash_uint32((uint32) len));
2619
2620 addHyperLogLog(&sss->full_card, hash);
2621
2622 /* Hash abbreviated key */
2623#if SIZEOF_DATUM == 8
2624 {
2625 uint32 lohalf,
2626 hihalf;
2627
2628 lohalf = (uint32) res;
2629 hihalf = (uint32) (res >> 32);
2630 hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
2631 }
2632#else /* SIZEOF_DATUM != 8 */
2633 hash = DatumGetUInt32(hash_uint32((uint32) res));
2634#endif
2635
2636 addHyperLogLog(&sss->abbr_card, hash);
2637
2638 /* Cache result, perhaps saving an expensive strxfrm() call next time */
2639 sss->cache_blob = true;
2640done:
2641
2642 /*
2643 * Byteswap on little-endian machines.
2644 *
2645 * This is needed so that varstrcmp_abbrev() (an unsigned integer 3-way
2646 * comparator) works correctly on all platforms. If we didn't do this,
2647 * the comparator would have to call memcmp() with a pair of pointers to
2648 * the first byte of each abbreviated key, which is slower.
2649 */
2650 res = DatumBigEndianToNative(res);
2651
2652 /* Don't leak memory here */
2653 if (PointerGetDatum(authoritative) != original)
2654 pfree(authoritative);
2655
2656 return res;
2657}
2658
2659/*
2660 * Callback for estimating effectiveness of abbreviated key optimization, using
2661 * heuristic rules. Returns value indicating if the abbreviation optimization
2662 * should be aborted, based on its projected effectiveness.
2663 */
2664static bool
2665varstr_abbrev_abort(int memtupcount, SortSupport ssup)
2666{
2667 VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
2668 double abbrev_distinct,
2669 key_distinct;
2670
2671 Assert(ssup->abbreviate);
2672
2673 /* Have a little patience */
2674 if (memtupcount < 100)
2675 return false;
2676
2677 abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
2678 key_distinct = estimateHyperLogLog(&sss->full_card);
2679
2680 /*
2681 * Clamp cardinality estimates to at least one distinct value. While
2682 * NULLs are generally disregarded, if only NULL values were seen so far,
2683 * that might misrepresent costs if we failed to clamp.
2684 */
2685 if (abbrev_distinct <= 1.0)
2686 abbrev_distinct = 1.0;
2687
2688 if (key_distinct <= 1.0)
2689 key_distinct = 1.0;
2690
2691 /*
2692 * In the worst case all abbreviated keys are identical, while at the same
2693 * time there are differences within full key strings not captured in
2694 * abbreviations.
2695 */
2696#ifdef TRACE_SORT
2697 if (trace_sort)
2698 {
2699 double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
2700
2701 elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
2702 "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
2703 memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
2704 sss->prop_card);
2705 }
2706#endif
2707
2708 /*
2709 * If the number of distinct abbreviated keys approximately matches the
2710 * number of distinct authoritative original keys, that's reason enough to
2711 * proceed. We can win even with a very low cardinality set if most
2712 * tie-breakers only memcmp(). This is by far the most important
2713 * consideration.
2714 *
2715 * While comparisons that are resolved at the abbreviated key level are
2716 * considerably cheaper than tie-breakers resolved with memcmp(), both of
2717 * those two outcomes are so much cheaper than a full strcoll() once
2718 * sorting is underway that it doesn't seem worth it to weigh abbreviated
2719 * cardinality against the overall size of the set in order to more
2720 * accurately model costs. Assume that an abbreviated comparison, and an
2721 * abbreviated comparison with a cheap memcmp()-based authoritative
2722 * resolution are equivalent.
2723 */
2724 if (abbrev_distinct > key_distinct * sss->prop_card)
2725 {
2726 /*
2727 * When we have exceeded 10,000 tuples, decay required cardinality
2728 * aggressively for next call.
2729 *
2730 * This is useful because the number of comparisons required on
2731 * average increases at a linearithmic rate, and at roughly 10,000
2732 * tuples that factor will start to dominate over the linear costs of
2733 * string transformation (this is a conservative estimate). The decay
2734 * rate is chosen to be a little less aggressive than halving -- which
2735 * (since we're called at points at which memtupcount has doubled)
2736 * would never see the cost model actually abort past the first call
2737 * following a decay. This decay rate is mostly a precaution against
2738 * a sudden, violent swing in how well abbreviated cardinality tracks
2739 * full key cardinality. The decay also serves to prevent a marginal
2740 * case from being aborted too late, when too much has already been
2741 * invested in string transformation.
2742 *
2743 * It's possible for sets of several million distinct strings with
2744 * mere tens of thousands of distinct abbreviated keys to still
2745 * benefit very significantly. This will generally occur provided
2746 * each abbreviated key is a proxy for a roughly uniform number of the
2747 * set's full keys. If it isn't so, we hope to catch that early and
2748 * abort. If it isn't caught early, by the time the problem is
2749 * apparent it's probably not worth aborting.
2750 */
2751 if (memtupcount > 10000)
2752 sss->prop_card *= 0.65;
2753
2754 return false;
2755 }
2756
2757 /*
2758 * Abort abbreviation strategy.
2759 *
2760 * The worst case, where all abbreviated keys are identical while all
2761 * original strings differ will typically only see a regression of about
2762 * 10% in execution time for small to medium sized lists of strings.
2763 * Whereas on modern CPUs where cache stalls are the dominant cost, we can
2764 * often expect very large improvements, particularly with sets of strings
2765 * of moderately high to high abbreviated cardinality. There is little to
2766 * lose but much to gain, which our strategy reflects.
2767 */
2768#ifdef TRACE_SORT
2769 if (trace_sort)
2770 elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
2771 "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
2772 memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
2773#endif
2774
2775 return true;
2776}
2777
2778Datum
2779text_larger(PG_FUNCTION_ARGS)
2780{
2781 text *arg1 = PG_GETARG_TEXT_PP(0);
2782 text *arg2 = PG_GETARG_TEXT_PP(1);
2783 text *result;
2784
2785 result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
2786
2787 PG_RETURN_TEXT_P(result);
2788}
2789
2790Datum
2791text_smaller(PG_FUNCTION_ARGS)
2792{
2793 text *arg1 = PG_GETARG_TEXT_PP(0);
2794 text *arg2 = PG_GETARG_TEXT_PP(1);
2795 text *result;
2796
2797 result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
2798
2799 PG_RETURN_TEXT_P(result);
2800}
2801
2802
2803/*
2804 * Cross-type comparison functions for types text and name.
2805 */
2806
2807Datum
2808nameeqtext(PG_FUNCTION_ARGS)
2809{
2810 Name arg1 = PG_GETARG_NAME(0);
2811 text *arg2 = PG_GETARG_TEXT_PP(1);
2812 size_t len1 = strlen(NameStr(*arg1));
2813 size_t len2 = VARSIZE_ANY_EXHDR(arg2);
2814 Oid collid = PG_GET_COLLATION();
2815 bool result;
2816
2817 check_collation_set(collid);
2818
2819 if (collid == C_COLLATION_OID)
2820 result = (len1 == len2 &&
2821 memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
2822 else
2823 result = (varstr_cmp(NameStr(*arg1), len1,
2824 VARDATA_ANY(arg2), len2,
2825 collid) == 0);
2826
2827 PG_FREE_IF_COPY(arg2, 1);
2828
2829 PG_RETURN_BOOL(result);
2830}
2831
2832Datum
2833texteqname(PG_FUNCTION_ARGS)
2834{
2835 text *arg1 = PG_GETARG_TEXT_PP(0);
2836 Name arg2 = PG_GETARG_NAME(1);
2837 size_t len1 = VARSIZE_ANY_EXHDR(arg1);
2838 size_t len2 = strlen(NameStr(*arg2));
2839 Oid collid = PG_GET_COLLATION();
2840 bool result;
2841
2842 check_collation_set(collid);
2843
2844 if (collid == C_COLLATION_OID)
2845 result = (len1 == len2 &&
2846 memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
2847 else
2848 result = (varstr_cmp(VARDATA_ANY(arg1), len1,
2849 NameStr(*arg2), len2,
2850 collid) == 0);
2851
2852 PG_FREE_IF_COPY(arg1, 0);
2853
2854 PG_RETURN_BOOL(result);
2855}
2856
2857Datum
2858namenetext(PG_FUNCTION_ARGS)
2859{
2860 Name arg1 = PG_GETARG_NAME(0);
2861 text *arg2 = PG_GETARG_TEXT_PP(1);
2862 size_t len1 = strlen(NameStr(*arg1));
2863 size_t len2 = VARSIZE_ANY_EXHDR(arg2);
2864 Oid collid = PG_GET_COLLATION();
2865 bool result;
2866
2867 check_collation_set(collid);
2868
2869 if (collid == C_COLLATION_OID)
2870 result = !(len1 == len2 &&
2871 memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
2872 else
2873 result = !(varstr_cmp(NameStr(*arg1), len1,
2874 VARDATA_ANY(arg2), len2,
2875 collid) == 0);
2876
2877 PG_FREE_IF_COPY(arg2, 1);
2878
2879 PG_RETURN_BOOL(result);
2880}
2881
2882Datum
2883textnename(PG_FUNCTION_ARGS)
2884{
2885 text *arg1 = PG_GETARG_TEXT_PP(0);
2886 Name arg2 = PG_GETARG_NAME(1);
2887 size_t len1 = VARSIZE_ANY_EXHDR(arg1);
2888 size_t len2 = strlen(NameStr(*arg2));
2889 Oid collid = PG_GET_COLLATION();
2890 bool result;
2891
2892 check_collation_set(collid);
2893
2894 if (collid == C_COLLATION_OID)
2895 result = !(len1 == len2 &&
2896 memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
2897 else
2898 result = !(varstr_cmp(VARDATA_ANY(arg1), len1,
2899 NameStr(*arg2), len2,
2900 collid) == 0);
2901
2902 PG_FREE_IF_COPY(arg1, 0);
2903
2904 PG_RETURN_BOOL(result);
2905}
2906
2907Datum
2908btnametextcmp(PG_FUNCTION_ARGS)
2909{
2910 Name arg1 = PG_GETARG_NAME(0);
2911 text *arg2 = PG_GETARG_TEXT_PP(1);
2912 int32 result;
2913
2914 result = varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
2915 VARDATA_ANY(arg2), VARSIZE_ANY_EXHDR(arg2),
2916 PG_GET_COLLATION());
2917
2918 PG_FREE_IF_COPY(arg2, 1);
2919
2920 PG_RETURN_INT32(result);
2921}
2922
2923Datum
2924bttextnamecmp(PG_FUNCTION_ARGS)
2925{
2926 text *arg1 = PG_GETARG_TEXT_PP(0);
2927 Name arg2 = PG_GETARG_NAME(1);
2928 int32 result;
2929
2930 result = varstr_cmp(VARDATA_ANY(arg1), VARSIZE_ANY_EXHDR(arg1),
2931 NameStr(*arg2), strlen(NameStr(*arg2)),
2932 PG_GET_COLLATION());
2933
2934 PG_FREE_IF_COPY(arg1, 0);
2935
2936 PG_RETURN_INT32(result);
2937}
2938
2939#define CmpCall(cmpfunc) \
2940 DatumGetInt32(DirectFunctionCall2Coll(cmpfunc, \
2941 PG_GET_COLLATION(), \
2942 PG_GETARG_DATUM(0), \
2943 PG_GETARG_DATUM(1)))
2944
2945Datum
2946namelttext(PG_FUNCTION_ARGS)
2947{
2948 PG_RETURN_BOOL(CmpCall(btnametextcmp) < 0);
2949}
2950
2951Datum
2952nameletext(PG_FUNCTION_ARGS)
2953{
2954 PG_RETURN_BOOL(CmpCall(btnametextcmp) <= 0);
2955}
2956
2957Datum
2958namegttext(PG_FUNCTION_ARGS)
2959{
2960 PG_RETURN_BOOL(CmpCall(btnametextcmp) > 0);
2961}
2962
2963Datum
2964namegetext(PG_FUNCTION_ARGS)
2965{
2966 PG_RETURN_BOOL(CmpCall(btnametextcmp) >= 0);
2967}
2968
2969Datum
2970textltname(PG_FUNCTION_ARGS)
2971{
2972 PG_RETURN_BOOL(CmpCall(bttextnamecmp) < 0);
2973}
2974
2975Datum
2976textlename(PG_FUNCTION_ARGS)
2977{
2978 PG_RETURN_BOOL(CmpCall(bttextnamecmp) <= 0);
2979}
2980
2981Datum
2982textgtname(PG_FUNCTION_ARGS)
2983{
2984 PG_RETURN_BOOL(CmpCall(bttextnamecmp) > 0);
2985}
2986
2987Datum
2988textgename(PG_FUNCTION_ARGS)
2989{
2990 PG_RETURN_BOOL(CmpCall(bttextnamecmp) >= 0);
2991}
2992
2993#undef CmpCall
2994
2995
2996/*
2997 * The following operators support character-by-character comparison
2998 * of text datums, to allow building indexes suitable for LIKE clauses.
2999 * Note that the regular texteq/textne comparison operators, and regular
3000 * support functions 1 and 2 with "C" collation are assumed to be
3001 * compatible with these!
3002 */
3003
3004static int
3005internal_text_pattern_compare(text *arg1, text *arg2)
3006{
3007 int result;
3008 int len1,
3009 len2;
3010
3011 len1 = VARSIZE_ANY_EXHDR(arg1);
3012 len2 = VARSIZE_ANY_EXHDR(arg2);
3013
3014 result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3015 if (result != 0)
3016 return result;
3017 else if (len1 < len2)
3018 return -1;
3019 else if (len1 > len2)
3020 return 1;
3021 else
3022 return 0;
3023}
3024
3025
3026Datum
3027text_pattern_lt(PG_FUNCTION_ARGS)
3028{
3029 text *arg1 = PG_GETARG_TEXT_PP(0);
3030 text *arg2 = PG_GETARG_TEXT_PP(1);
3031 int result;
3032
3033 result = internal_text_pattern_compare(arg1, arg2);
3034
3035 PG_FREE_IF_COPY(arg1, 0);
3036 PG_FREE_IF_COPY(arg2, 1);
3037
3038 PG_RETURN_BOOL(result < 0);
3039}
3040
3041
3042Datum
3043text_pattern_le(PG_FUNCTION_ARGS)
3044{
3045 text *arg1 = PG_GETARG_TEXT_PP(0);
3046 text *arg2 = PG_GETARG_TEXT_PP(1);
3047 int result;
3048
3049 result = internal_text_pattern_compare(arg1, arg2);
3050
3051 PG_FREE_IF_COPY(arg1, 0);
3052 PG_FREE_IF_COPY(arg2, 1);
3053
3054 PG_RETURN_BOOL(result <= 0);
3055}
3056
3057
3058Datum
3059text_pattern_ge(PG_FUNCTION_ARGS)
3060{
3061 text *arg1 = PG_GETARG_TEXT_PP(0);
3062 text *arg2 = PG_GETARG_TEXT_PP(1);
3063 int result;
3064
3065 result = internal_text_pattern_compare(arg1, arg2);
3066
3067 PG_FREE_IF_COPY(arg1, 0);
3068 PG_FREE_IF_COPY(arg2, 1);
3069
3070 PG_RETURN_BOOL(result >= 0);
3071}
3072
3073
3074Datum
3075text_pattern_gt(PG_FUNCTION_ARGS)
3076{
3077 text *arg1 = PG_GETARG_TEXT_PP(0);
3078 text *arg2 = PG_GETARG_TEXT_PP(1);
3079 int result;
3080
3081 result = internal_text_pattern_compare(arg1, arg2);
3082
3083 PG_FREE_IF_COPY(arg1, 0);
3084 PG_FREE_IF_COPY(arg2, 1);
3085
3086 PG_RETURN_BOOL(result > 0);
3087}
3088
3089
3090Datum
3091bttext_pattern_cmp(PG_FUNCTION_ARGS)
3092{
3093 text *arg1 = PG_GETARG_TEXT_PP(0);
3094 text *arg2 = PG_GETARG_TEXT_PP(1);
3095 int result;
3096
3097 result = internal_text_pattern_compare(arg1, arg2);
3098
3099 PG_FREE_IF_COPY(arg1, 0);
3100 PG_FREE_IF_COPY(arg2, 1);
3101
3102 PG_RETURN_INT32(result);
3103}
3104
3105
3106Datum
3107bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
3108{
3109 SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
3110 MemoryContext oldcontext;
3111
3112 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
3113
3114 /* Use generic string SortSupport, forcing "C" collation */
3115 varstr_sortsupport(ssup, TEXTOID, C_COLLATION_OID);
3116
3117 MemoryContextSwitchTo(oldcontext);
3118
3119 PG_RETURN_VOID();
3120}
3121
3122
3123/*-------------------------------------------------------------
3124 * byteaoctetlen
3125 *
3126 * get the number of bytes contained in an instance of type 'bytea'
3127 *-------------------------------------------------------------
3128 */
3129Datum
3130byteaoctetlen(PG_FUNCTION_ARGS)
3131{
3132 Datum str = PG_GETARG_DATUM(0);
3133
3134 /* We need not detoast the input at all */
3135 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
3136}
3137
3138/*
3139 * byteacat -
3140 * takes two bytea* and returns a bytea* that is the concatenation of
3141 * the two.
3142 *
3143 * Cloned from textcat and modified as required.
3144 */
3145Datum
3146byteacat(PG_FUNCTION_ARGS)
3147{
3148 bytea *t1 = PG_GETARG_BYTEA_PP(0);
3149 bytea *t2 = PG_GETARG_BYTEA_PP(1);
3150
3151 PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
3152}
3153
3154/*
3155 * bytea_catenate
3156 * Guts of byteacat(), broken out so it can be used by other functions
3157 *
3158 * Arguments can be in short-header form, but not compressed or out-of-line
3159 */
3160static bytea *
3161bytea_catenate(bytea *t1, bytea *t2)
3162{
3163 bytea *result;
3164 int len1,
3165 len2,
3166 len;
3167 char *ptr;
3168
3169 len1 = VARSIZE_ANY_EXHDR(t1);
3170 len2 = VARSIZE_ANY_EXHDR(t2);
3171
3172 /* paranoia ... probably should throw error instead? */
3173 if (len1 < 0)
3174 len1 = 0;
3175 if (len2 < 0)
3176 len2 = 0;
3177
3178 len = len1 + len2 + VARHDRSZ;
3179 result = (bytea *) palloc(len);
3180
3181 /* Set size of result string... */
3182 SET_VARSIZE(result, len);
3183
3184 /* Fill data field of result string... */
3185 ptr = VARDATA(result);
3186 if (len1 > 0)
3187 memcpy(ptr, VARDATA_ANY(t1), len1);
3188 if (len2 > 0)
3189 memcpy(ptr + len1, VARDATA_ANY(t2), len2);
3190
3191 return result;
3192}
3193
3194#define PG_STR_GET_BYTEA(str_) \
3195 DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
3196
3197/*
3198 * bytea_substr()
3199 * Return a substring starting at the specified position.
3200 * Cloned from text_substr and modified as required.
3201 *
3202 * Input:
3203 * - string
3204 * - starting position (is one-based)
3205 * - string length (optional)
3206 *
3207 * If the starting position is zero or less, then return from the start of the string
3208 * adjusting the length to be consistent with the "negative start" per SQL.
3209 * If the length is less than zero, an ERROR is thrown. If no third argument
3210 * (length) is provided, the length to the end of the string is assumed.
3211 */
3212Datum
3213bytea_substr(PG_FUNCTION_ARGS)
3214{
3215 PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
3216 PG_GETARG_INT32(1),
3217 PG_GETARG_INT32(2),
3218 false));
3219}
3220
3221/*
3222 * bytea_substr_no_len -
3223 * Wrapper to avoid opr_sanity failure due to
3224 * one function accepting a different number of args.
3225 */
3226Datum
3227bytea_substr_no_len(PG_FUNCTION_ARGS)
3228{
3229 PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
3230 PG_GETARG_INT32(1),
3231 -1,
3232 true));
3233}
3234
3235static bytea *
3236bytea_substring(Datum str,
3237 int S,
3238 int L,
3239 bool length_not_specified)
3240{
3241 int S1; /* adjusted start position */
3242 int L1; /* adjusted substring length */
3243
3244 S1 = Max(S, 1);
3245
3246 if (length_not_specified)
3247 {
3248 /*
3249 * Not passed a length - DatumGetByteaPSlice() grabs everything to the
3250 * end of the string if we pass it a negative value for length.
3251 */
3252 L1 = -1;
3253 }
3254 else
3255 {
3256 /* end position */
3257 int E = S + L;
3258
3259 /*
3260 * A negative value for L is the only way for the end position to be
3261 * before the start. SQL99 says to throw an error.
3262 */
3263 if (E < S)
3264 ereport(ERROR,
3265 (errcode(ERRCODE_SUBSTRING_ERROR),
3266 errmsg("negative substring length not allowed")));
3267
3268 /*
3269 * A zero or negative value for the end position can happen if the
3270 * start was negative or one. SQL99 says to return a zero-length
3271 * string.
3272 */
3273 if (E < 1)
3274 return PG_STR_GET_BYTEA("");
3275
3276 L1 = E - S1;
3277 }
3278
3279 /*
3280 * If the start position is past the end of the string, SQL99 says to
3281 * return a zero-length string -- DatumGetByteaPSlice() will do that for
3282 * us. Convert to zero-based starting position
3283 */
3284 return DatumGetByteaPSlice(str, S1 - 1, L1);
3285}
3286
3287/*
3288 * byteaoverlay
3289 * Replace specified substring of first string with second
3290 *
3291 * The SQL standard defines OVERLAY() in terms of substring and concatenation.
3292 * This code is a direct implementation of what the standard says.
3293 */
3294Datum
3295byteaoverlay(PG_FUNCTION_ARGS)
3296{
3297 bytea *t1 = PG_GETARG_BYTEA_PP(0);
3298 bytea *t2 = PG_GETARG_BYTEA_PP(1);
3299 int sp = PG_GETARG_INT32(2); /* substring start position */
3300 int sl = PG_GETARG_INT32(3); /* substring length */
3301
3302 PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
3303}
3304
3305Datum
3306byteaoverlay_no_len(PG_FUNCTION_ARGS)
3307{
3308 bytea *t1 = PG_GETARG_BYTEA_PP(0);
3309 bytea *t2 = PG_GETARG_BYTEA_PP(1);
3310 int sp = PG_GETARG_INT32(2); /* substring start position */
3311 int sl;
3312
3313 sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
3314 PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
3315}
3316
3317static bytea *
3318bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
3319{
3320 bytea *result;
3321 bytea *s1;
3322 bytea *s2;
3323 int sp_pl_sl;
3324
3325 /*
3326 * Check for possible integer-overflow cases. For negative sp, throw a
3327 * "substring length" error because that's what should be expected
3328 * according to the spec's definition of OVERLAY().
3329 */
3330 if (sp <= 0)
3331 ereport(ERROR,
3332 (errcode(ERRCODE_SUBSTRING_ERROR),
3333 errmsg("negative substring length not allowed")));
3334 if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
3335 ereport(ERROR,
3336 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
3337 errmsg("integer out of range")));
3338
3339 s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
3340 s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
3341 result = bytea_catenate(s1, t2);
3342 result = bytea_catenate(result, s2);
3343
3344 return result;
3345}
3346
3347/*
3348 * byteapos -
3349 * Return the position of the specified substring.
3350 * Implements the SQL POSITION() function.
3351 * Cloned from textpos and modified as required.
3352 */
3353Datum
3354byteapos(PG_FUNCTION_ARGS)
3355{
3356 bytea *t1 = PG_GETARG_BYTEA_PP(0);
3357 bytea *t2 = PG_GETARG_BYTEA_PP(1);
3358 int pos;
3359 int px,
3360 p;
3361 int len1,
3362 len2;
3363 char *p1,
3364 *p2;
3365
3366 len1 = VARSIZE_ANY_EXHDR(t1);
3367 len2 = VARSIZE_ANY_EXHDR(t2);
3368
3369 if (len2 <= 0)
3370 PG_RETURN_INT32(1); /* result for empty pattern */
3371
3372 p1 = VARDATA_ANY(t1);
3373 p2 = VARDATA_ANY(t2);
3374
3375 pos = 0;
3376 px = (len1 - len2);
3377 for (p = 0; p <= px; p++)
3378 {
3379 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
3380 {
3381 pos = p + 1;
3382 break;
3383 };
3384 p1++;
3385 };
3386
3387 PG_RETURN_INT32(pos);
3388}
3389
3390/*-------------------------------------------------------------
3391 * byteaGetByte
3392 *
3393 * this routine treats "bytea" as an array of bytes.
3394 * It returns the Nth byte (a number between 0 and 255).
3395 *-------------------------------------------------------------
3396 */
3397Datum
3398byteaGetByte(PG_FUNCTION_ARGS)
3399{
3400 bytea *v = PG_GETARG_BYTEA_PP(0);
3401 int32 n = PG_GETARG_INT32(1);
3402 int len;
3403 int byte;
3404
3405 len = VARSIZE_ANY_EXHDR(v);
3406
3407 if (n < 0 || n >= len)
3408 ereport(ERROR,
3409 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3410 errmsg("index %d out of valid range, 0..%d",
3411 n, len - 1)));
3412
3413 byte = ((unsigned char *) VARDATA_ANY(v))[n];
3414
3415 PG_RETURN_INT32(byte);
3416}
3417
3418/*-------------------------------------------------------------
3419 * byteaGetBit
3420 *
3421 * This routine treats a "bytea" type like an array of bits.
3422 * It returns the value of the Nth bit (0 or 1).
3423 *
3424 *-------------------------------------------------------------
3425 */
3426Datum
3427byteaGetBit(PG_FUNCTION_ARGS)
3428{
3429 bytea *v = PG_GETARG_BYTEA_PP(0);
3430 int32 n = PG_GETARG_INT32(1);
3431 int byteNo,
3432 bitNo;
3433 int len;
3434 int byte;
3435
3436 len = VARSIZE_ANY_EXHDR(v);
3437
3438 if (n < 0 || n >= len * 8)
3439 ereport(ERROR,
3440 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3441 errmsg("index %d out of valid range, 0..%d",
3442 n, len * 8 - 1)));
3443
3444 byteNo = n / 8;
3445 bitNo = n % 8;
3446
3447 byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
3448
3449 if (byte & (1 << bitNo))
3450 PG_RETURN_INT32(1);
3451 else
3452 PG_RETURN_INT32(0);
3453}
3454
3455/*-------------------------------------------------------------
3456 * byteaSetByte
3457 *
3458 * Given an instance of type 'bytea' creates a new one with
3459 * the Nth byte set to the given value.
3460 *
3461 *-------------------------------------------------------------
3462 */
3463Datum
3464byteaSetByte(PG_FUNCTION_ARGS)
3465{
3466 bytea *res = PG_GETARG_BYTEA_P_COPY(0);
3467 int32 n = PG_GETARG_INT32(1);
3468 int32 newByte = PG_GETARG_INT32(2);
3469 int len;
3470
3471 len = VARSIZE(res) - VARHDRSZ;
3472
3473 if (n < 0 || n >= len)
3474 ereport(ERROR,
3475 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3476 errmsg("index %d out of valid range, 0..%d",
3477 n, len - 1)));
3478
3479 /*
3480 * Now set the byte.
3481 */
3482 ((unsigned char *) VARDATA(res))[n] = newByte;
3483
3484 PG_RETURN_BYTEA_P(res);
3485}
3486
3487/*-------------------------------------------------------------
3488 * byteaSetBit
3489 *
3490 * Given an instance of type 'bytea' creates a new one with
3491 * the Nth bit set to the given value.
3492 *
3493 *-------------------------------------------------------------
3494 */
3495Datum
3496byteaSetBit(PG_FUNCTION_ARGS)
3497{
3498 bytea *res = PG_GETARG_BYTEA_P_COPY(0);
3499 int32 n = PG_GETARG_INT32(1);
3500 int32 newBit = PG_GETARG_INT32(2);
3501 int len;
3502 int oldByte,
3503 newByte;
3504 int byteNo,
3505 bitNo;
3506
3507 len = VARSIZE(res) - VARHDRSZ;
3508
3509 if (n < 0 || n >= len * 8)
3510 ereport(ERROR,
3511 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3512 errmsg("index %d out of valid range, 0..%d",
3513 n, len * 8 - 1)));
3514
3515 byteNo = n / 8;
3516 bitNo = n % 8;
3517
3518 /*
3519 * sanity check!
3520 */
3521 if (newBit != 0 && newBit != 1)
3522 ereport(ERROR,
3523 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3524 errmsg("new bit must be 0 or 1")));
3525
3526 /*
3527 * Update the byte.
3528 */
3529 oldByte = ((unsigned char *) VARDATA(res))[byteNo];
3530
3531 if (newBit == 0)
3532 newByte = oldByte & (~(1 << bitNo));
3533 else
3534 newByte = oldByte | (1 << bitNo);
3535
3536 ((unsigned char *) VARDATA(res))[byteNo] = newByte;
3537
3538 PG_RETURN_BYTEA_P(res);
3539}
3540
3541
3542/* text_name()
3543 * Converts a text type to a Name type.
3544 */
3545Datum
3546text_name(PG_FUNCTION_ARGS)
3547{
3548 text *s = PG_GETARG_TEXT_PP(0);
3549 Name result;
3550 int len;
3551
3552 len = VARSIZE_ANY_EXHDR(s);
3553
3554 /* Truncate oversize input */
3555 if (len >= NAMEDATALEN)
3556 len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
3557
3558 /* We use palloc0 here to ensure result is zero-padded */
3559 result = (Name) palloc0(NAMEDATALEN);
3560 memcpy(NameStr(*result), VARDATA_ANY(s), len);
3561
3562 PG_RETURN_NAME(result);
3563}
3564
3565/* name_text()
3566 * Converts a Name type to a text type.
3567 */
3568Datum
3569name_text(PG_FUNCTION_ARGS)
3570{
3571 Name s = PG_GETARG_NAME(0);
3572
3573 PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
3574}
3575
3576
3577/*
3578 * textToQualifiedNameList - convert a text object to list of names
3579 *
3580 * This implements the input parsing needed by nextval() and other
3581 * functions that take a text parameter representing a qualified name.
3582 * We split the name at dots, downcase if not double-quoted, and
3583 * truncate names if they're too long.
3584 */
3585List *
3586textToQualifiedNameList(text *textval)
3587{
3588 char *rawname;
3589 List *result = NIL;
3590 List *namelist;
3591 ListCell *l;
3592
3593 /* Convert to C string (handles possible detoasting). */
3594 /* Note we rely on being able to modify rawname below. */
3595 rawname = text_to_cstring(textval);
3596
3597 if (!SplitIdentifierString(rawname, '.', &namelist))
3598 ereport(ERROR,
3599 (errcode(ERRCODE_INVALID_NAME),
3600 errmsg("invalid name syntax")));
3601
3602 if (namelist == NIL)
3603 ereport(ERROR,
3604 (errcode(ERRCODE_INVALID_NAME),
3605 errmsg("invalid name syntax")));
3606
3607 foreach(l, namelist)
3608 {
3609 char *curname = (char *) lfirst(l);
3610
3611 result = lappend(result, makeString(pstrdup(curname)));
3612 }
3613
3614 pfree(rawname);
3615 list_free(namelist);
3616
3617 return result;
3618}
3619
3620/*
3621 * SplitIdentifierString --- parse a string containing identifiers
3622 *
3623 * This is the guts of textToQualifiedNameList, and is exported for use in
3624 * other situations such as parsing GUC variables. In the GUC case, it's
3625 * important to avoid memory leaks, so the API is designed to minimize the
3626 * amount of stuff that needs to be allocated and freed.
3627 *
3628 * Inputs:
3629 * rawstring: the input string; must be overwritable! On return, it's
3630 * been modified to contain the separated identifiers.
3631 * separator: the separator punctuation expected between identifiers
3632 * (typically '.' or ','). Whitespace may also appear around
3633 * identifiers.
3634 * Outputs:
3635 * namelist: filled with a palloc'd list of pointers to identifiers within
3636 * rawstring. Caller should list_free() this even on error return.
3637 *
3638 * Returns true if okay, false if there is a syntax error in the string.
3639 *
3640 * Note that an empty string is considered okay here, though not in
3641 * textToQualifiedNameList.
3642 */
3643bool
3644SplitIdentifierString(char *rawstring, char separator,
3645 List **namelist)
3646{
3647 char *nextp = rawstring;
3648 bool done = false;
3649
3650 *namelist = NIL;
3651
3652 while (scanner_isspace(*nextp))
3653 nextp++; /* skip leading whitespace */
3654
3655 if (*nextp == '\0')
3656 return true; /* allow empty string */
3657
3658 /* At the top of the loop, we are at start of a new identifier. */
3659 do
3660 {
3661 char *curname;
3662 char *endp;
3663
3664 if (*nextp == '"')
3665 {
3666 /* Quoted name --- collapse quote-quote pairs, no downcasing */
3667 curname = nextp + 1;
3668 for (;;)
3669 {
3670 endp = strchr(nextp + 1, '"');
3671 if (endp == NULL)
3672 return false; /* mismatched quotes */
3673 if (endp[1] != '"')
3674 break; /* found end of quoted name */
3675 /* Collapse adjacent quotes into one quote, and look again */
3676 memmove(endp, endp + 1, strlen(endp));
3677 nextp = endp;
3678 }
3679 /* endp now points at the terminating quote */
3680 nextp = endp + 1;
3681 }
3682 else
3683 {
3684 /* Unquoted name --- extends to separator or whitespace */
3685 char *downname;
3686 int len;
3687
3688 curname = nextp;
3689 while (*nextp && *nextp != separator &&
3690 !scanner_isspace(*nextp))
3691 nextp++;
3692 endp = nextp;
3693 if (curname == nextp)
3694 return false; /* empty unquoted name not allowed */
3695
3696 /*
3697 * Downcase the identifier, using same code as main lexer does.
3698 *
3699 * XXX because we want to overwrite the input in-place, we cannot
3700 * support a downcasing transformation that increases the string
3701 * length. This is not a problem given the current implementation
3702 * of downcase_truncate_identifier, but we'll probably have to do
3703 * something about this someday.
3704 */
3705 len = endp - curname;
3706 downname = downcase_truncate_identifier(curname, len, false);
3707 Assert(strlen(downname) <= len);
3708 strncpy(curname, downname, len); /* strncpy is required here */
3709 pfree(downname);
3710 }
3711
3712 while (scanner_isspace(*nextp))
3713 nextp++; /* skip trailing whitespace */
3714
3715 if (*nextp == separator)
3716 {
3717 nextp++;
3718 while (scanner_isspace(*nextp))
3719 nextp++; /* skip leading whitespace for next */
3720 /* we expect another name, so done remains false */
3721 }
3722 else if (*nextp == '\0')
3723 done = true;
3724 else
3725 return false; /* invalid syntax */
3726
3727 /* Now safe to overwrite separator with a null */
3728 *endp = '\0';
3729
3730 /* Truncate name if it's overlength */
3731 truncate_identifier(curname, strlen(curname), false);
3732
3733 /*
3734 * Finished isolating current name --- add it to list
3735 */
3736 *namelist = lappend(*namelist, curname);
3737
3738 /* Loop back if we didn't reach end of string */
3739 } while (!done);
3740
3741 return true;
3742}
3743
3744
3745/*
3746 * SplitDirectoriesString --- parse a string containing file/directory names
3747 *
3748 * This works fine on file names too; the function name is historical.
3749 *
3750 * This is similar to SplitIdentifierString, except that the parsing
3751 * rules are meant to handle pathnames instead of identifiers: there is
3752 * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
3753 * and we apply canonicalize_path() to each extracted string. Because of the
3754 * last, the returned strings are separately palloc'd rather than being
3755 * pointers into rawstring --- but we still scribble on rawstring.
3756 *
3757 * Inputs:
3758 * rawstring: the input string; must be modifiable!
3759 * separator: the separator punctuation expected between directories
3760 * (typically ',' or ';'). Whitespace may also appear around
3761 * directories.
3762 * Outputs:
3763 * namelist: filled with a palloc'd list of directory names.
3764 * Caller should list_free_deep() this even on error return.
3765 *
3766 * Returns true if okay, false if there is a syntax error in the string.
3767 *
3768 * Note that an empty string is considered okay here.
3769 */
3770bool
3771SplitDirectoriesString(char *rawstring, char separator,
3772 List **namelist)
3773{
3774 char *nextp = rawstring;
3775 bool done = false;
3776
3777 *namelist = NIL;
3778
3779 while (scanner_isspace(*nextp))
3780 nextp++; /* skip leading whitespace */
3781
3782 if (*nextp == '\0')
3783 return true; /* allow empty string */
3784
3785 /* At the top of the loop, we are at start of a new directory. */
3786 do
3787 {
3788 char *curname;
3789 char *endp;
3790
3791 if (*nextp == '"')
3792 {
3793 /* Quoted name --- collapse quote-quote pairs */
3794 curname = nextp + 1;
3795 for (;;)
3796 {
3797 endp = strchr(nextp + 1, '"');
3798 if (endp == NULL)
3799 return false; /* mismatched quotes */
3800 if (endp[1] != '"')
3801 break; /* found end of quoted name */
3802 /* Collapse adjacent quotes into one quote, and look again */
3803 memmove(endp, endp + 1, strlen(endp));
3804 nextp = endp;
3805 }
3806 /* endp now points at the terminating quote */
3807 nextp = endp + 1;
3808 }
3809 else
3810 {
3811 /* Unquoted name --- extends to separator or end of string */
3812 curname = endp = nextp;
3813 while (*nextp && *nextp != separator)
3814 {
3815 /* trailing whitespace should not be included in name */
3816 if (!scanner_isspace(*nextp))
3817 endp = nextp + 1;
3818 nextp++;
3819 }
3820 if (curname == endp)
3821 return false; /* empty unquoted name not allowed */
3822 }
3823
3824 while (scanner_isspace(*nextp))
3825 nextp++; /* skip trailing whitespace */
3826
3827 if (*nextp == separator)
3828 {
3829 nextp++;
3830 while (scanner_isspace(*nextp))
3831 nextp++; /* skip leading whitespace for next */
3832 /* we expect another name, so done remains false */
3833 }
3834 else if (*nextp == '\0')
3835 done = true;
3836 else
3837 return false; /* invalid syntax */
3838
3839 /* Now safe to overwrite separator with a null */
3840 *endp = '\0';
3841
3842 /* Truncate path if it's overlength */
3843 if (strlen(curname) >= MAXPGPATH)
3844 curname[MAXPGPATH - 1] = '\0';
3845
3846 /*
3847 * Finished isolating current name --- add it to list
3848 */
3849 curname = pstrdup(curname);
3850 canonicalize_path(curname);
3851 *namelist = lappend(*namelist, curname);
3852
3853 /* Loop back if we didn't reach end of string */
3854 } while (!done);
3855
3856 return true;
3857}
3858
3859
3860/*
3861 * SplitGUCList --- parse a string containing identifiers or file names
3862 *
3863 * This is used to split the value of a GUC_LIST_QUOTE GUC variable, without
3864 * presuming whether the elements will be taken as identifiers or file names.
3865 * We assume the input has already been through flatten_set_variable_args(),
3866 * so that we need never downcase (if appropriate, that was done already).
3867 * Nor do we ever truncate, since we don't know the correct max length.
3868 * We disallow embedded whitespace for simplicity (it shouldn't matter,
3869 * because any embedded whitespace should have led to double-quoting).
3870 * Otherwise the API is identical to SplitIdentifierString.
3871 *
3872 * XXX it's annoying to have so many copies of this string-splitting logic.
3873 * However, it's not clear that having one function with a bunch of option
3874 * flags would be much better.
3875 *
3876 * XXX there is a version of this function in src/bin/pg_dump/dumputils.c.
3877 * Be sure to update that if you have to change this.
3878 *
3879 * Inputs:
3880 * rawstring: the input string; must be overwritable! On return, it's
3881 * been modified to contain the separated identifiers.
3882 * separator: the separator punctuation expected between identifiers
3883 * (typically '.' or ','). Whitespace may also appear around
3884 * identifiers.
3885 * Outputs:
3886 * namelist: filled with a palloc'd list of pointers to identifiers within
3887 * rawstring. Caller should list_free() this even on error return.
3888 *
3889 * Returns true if okay, false if there is a syntax error in the string.
3890 */
3891bool
3892SplitGUCList(char *rawstring, char separator,
3893 List **namelist)
3894{
3895 char *nextp = rawstring;
3896 bool done = false;
3897
3898 *namelist = NIL;
3899
3900 while (scanner_isspace(*nextp))
3901 nextp++; /* skip leading whitespace */
3902
3903 if (*nextp == '\0')
3904 return true; /* allow empty string */
3905
3906 /* At the top of the loop, we are at start of a new identifier. */
3907 do
3908 {
3909 char *curname;
3910 char *endp;
3911
3912 if (*nextp == '"')
3913 {
3914 /* Quoted name --- collapse quote-quote pairs */
3915 curname = nextp + 1;
3916 for (;;)
3917 {
3918 endp = strchr(nextp + 1, '"');
3919 if (endp == NULL)
3920 return false; /* mismatched quotes */
3921 if (endp[1] != '"')
3922 break; /* found end of quoted name */
3923 /* Collapse adjacent quotes into one quote, and look again */
3924 memmove(endp, endp + 1, strlen(endp));
3925 nextp = endp;
3926 }
3927 /* endp now points at the terminating quote */
3928 nextp = endp + 1;
3929 }
3930 else
3931 {
3932 /* Unquoted name --- extends to separator or whitespace */
3933 curname = nextp;
3934 while (*nextp && *nextp != separator &&
3935 !scanner_isspace(*nextp))
3936 nextp++;
3937 endp = nextp;
3938 if (curname == nextp)
3939 return false; /* empty unquoted name not allowed */
3940 }
3941
3942 while (scanner_isspace(*nextp))
3943 nextp++; /* skip trailing whitespace */
3944
3945 if (*nextp == separator)
3946 {
3947 nextp++;
3948 while (scanner_isspace(*nextp))
3949 nextp++; /* skip leading whitespace for next */
3950 /* we expect another name, so done remains false */
3951 }
3952 else if (*nextp == '\0')
3953 done = true;
3954 else
3955 return false; /* invalid syntax */
3956
3957 /* Now safe to overwrite separator with a null */
3958 *endp = '\0';
3959
3960 /*
3961 * Finished isolating current name --- add it to list
3962 */
3963 *namelist = lappend(*namelist, curname);
3964
3965 /* Loop back if we didn't reach end of string */
3966 } while (!done);
3967
3968 return true;
3969}
3970
3971
3972/*****************************************************************************
3973 * Comparison Functions used for bytea
3974 *
3975 * Note: btree indexes need these routines not to leak memory; therefore,
3976 * be careful to free working copies of toasted datums. Most places don't
3977 * need to be so careful.
3978 *****************************************************************************/
3979
3980Datum
3981byteaeq(PG_FUNCTION_ARGS)
3982{
3983 Datum arg1 = PG_GETARG_DATUM(0);
3984 Datum arg2 = PG_GETARG_DATUM(1);
3985 bool result;
3986 Size len1,
3987 len2;
3988
3989 /*
3990 * We can use a fast path for unequal lengths, which might save us from
3991 * having to detoast one or both values.
3992 */
3993 len1 = toast_raw_datum_size(arg1);
3994 len2 = toast_raw_datum_size(arg2);
3995 if (len1 != len2)
3996 result = false;
3997 else
3998 {
3999 bytea *barg1 = DatumGetByteaPP(arg1);
4000 bytea *barg2 = DatumGetByteaPP(arg2);
4001
4002 result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
4003 len1 - VARHDRSZ) == 0);
4004
4005 PG_FREE_IF_COPY(barg1, 0);
4006 PG_FREE_IF_COPY(barg2, 1);
4007 }
4008
4009 PG_RETURN_BOOL(result);
4010}
4011
4012Datum
4013byteane(PG_FUNCTION_ARGS)
4014{
4015 Datum arg1 = PG_GETARG_DATUM(0);
4016 Datum arg2 = PG_GETARG_DATUM(1);
4017 bool result;
4018 Size len1,
4019 len2;
4020
4021 /*
4022 * We can use a fast path for unequal lengths, which might save us from
4023 * having to detoast one or both values.
4024 */
4025 len1 = toast_raw_datum_size(arg1);
4026 len2 = toast_raw_datum_size(arg2);
4027 if (len1 != len2)
4028 result = true;
4029 else
4030 {
4031 bytea *barg1 = DatumGetByteaPP(arg1);
4032 bytea *barg2 = DatumGetByteaPP(arg2);
4033
4034 result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
4035 len1 - VARHDRSZ) != 0);
4036
4037 PG_FREE_IF_COPY(barg1, 0);
4038 PG_FREE_IF_COPY(barg2, 1);
4039 }
4040
4041 PG_RETURN_BOOL(result);
4042}
4043
4044Datum
4045bytealt(PG_FUNCTION_ARGS)
4046{
4047 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4048 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4049 int len1,
4050 len2;
4051 int cmp;
4052
4053 len1 = VARSIZE_ANY_EXHDR(arg1);
4054 len2 = VARSIZE_ANY_EXHDR(arg2);
4055
4056 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4057
4058 PG_FREE_IF_COPY(arg1, 0);
4059 PG_FREE_IF_COPY(arg2, 1);
4060
4061 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
4062}
4063
4064Datum
4065byteale(PG_FUNCTION_ARGS)
4066{
4067 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4068 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4069 int len1,
4070 len2;
4071 int cmp;
4072
4073 len1 = VARSIZE_ANY_EXHDR(arg1);
4074 len2 = VARSIZE_ANY_EXHDR(arg2);
4075
4076 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4077
4078 PG_FREE_IF_COPY(arg1, 0);
4079 PG_FREE_IF_COPY(arg2, 1);
4080
4081 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
4082}
4083
4084Datum
4085byteagt(PG_FUNCTION_ARGS)
4086{
4087 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4088 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4089 int len1,
4090 len2;
4091 int cmp;
4092
4093 len1 = VARSIZE_ANY_EXHDR(arg1);
4094 len2 = VARSIZE_ANY_EXHDR(arg2);
4095
4096 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4097
4098 PG_FREE_IF_COPY(arg1, 0);
4099 PG_FREE_IF_COPY(arg2, 1);
4100
4101 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
4102}
4103
4104Datum
4105byteage(PG_FUNCTION_ARGS)
4106{
4107 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4108 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4109 int len1,
4110 len2;
4111 int cmp;
4112
4113 len1 = VARSIZE_ANY_EXHDR(arg1);
4114 len2 = VARSIZE_ANY_EXHDR(arg2);
4115
4116 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4117
4118 PG_FREE_IF_COPY(arg1, 0);
4119 PG_FREE_IF_COPY(arg2, 1);
4120
4121 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
4122}
4123
4124Datum
4125byteacmp(PG_FUNCTION_ARGS)
4126{
4127 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
4128 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
4129 int len1,
4130 len2;
4131 int cmp;
4132
4133 len1 = VARSIZE_ANY_EXHDR(arg1);
4134 len2 = VARSIZE_ANY_EXHDR(arg2);
4135
4136 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4137 if ((cmp == 0) && (len1 != len2))
4138 cmp = (len1 < len2) ? -1 : 1;
4139
4140 PG_FREE_IF_COPY(arg1, 0);
4141 PG_FREE_IF_COPY(arg2, 1);
4142
4143 PG_RETURN_INT32(cmp);
4144}
4145
4146Datum
4147bytea_sortsupport(PG_FUNCTION_ARGS)
4148{
4149 SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
4150 MemoryContext oldcontext;
4151
4152 oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
4153
4154 /* Use generic string SortSupport, forcing "C" collation */
4155 varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
4156
4157 MemoryContextSwitchTo(oldcontext);
4158
4159 PG_RETURN_VOID();
4160}
4161
4162/*
4163 * appendStringInfoText
4164 *
4165 * Append a text to str.
4166 * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
4167 */
4168static void
4169appendStringInfoText(StringInfo str, const text *t)
4170{
4171 appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
4172}
4173
4174/*
4175 * replace_text
4176 * replace all occurrences of 'old_sub_str' in 'orig_str'
4177 * with 'new_sub_str' to form 'new_str'
4178 *
4179 * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
4180 * otherwise returns 'new_str'
4181 */
4182Datum
4183replace_text(PG_FUNCTION_ARGS)
4184{
4185 text *src_text = PG_GETARG_TEXT_PP(0);
4186 text *from_sub_text = PG_GETARG_TEXT_PP(1);
4187 text *to_sub_text = PG_GETARG_TEXT_PP(2);
4188 int src_text_len;
4189 int from_sub_text_len;
4190 TextPositionState state;
4191 text *ret_text;
4192 int chunk_len;
4193 char *curr_ptr;
4194 char *start_ptr;
4195 StringInfoData str;
4196 bool found;
4197
4198 src_text_len = VARSIZE_ANY_EXHDR(src_text);
4199 from_sub_text_len = VARSIZE_ANY_EXHDR(from_sub_text);
4200
4201 /* Return unmodified source string if empty source or pattern */
4202 if (src_text_len < 1 || from_sub_text_len < 1)
4203 {
4204 PG_RETURN_TEXT_P(src_text);
4205 }
4206
4207 text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state);
4208
4209 found = text_position_next(&state);
4210
4211 /* When the from_sub_text is not found, there is nothing to do. */
4212 if (!found)
4213 {
4214 text_position_cleanup(&state);
4215 PG_RETURN_TEXT_P(src_text);
4216 }
4217 curr_ptr = text_position_get_match_ptr(&state);
4218 start_ptr = VARDATA_ANY(src_text);
4219
4220 initStringInfo(&str);
4221
4222 do
4223 {
4224 CHECK_FOR_INTERRUPTS();
4225
4226 /* copy the data skipped over by last text_position_next() */
4227 chunk_len = curr_ptr - start_ptr;
4228 appendBinaryStringInfo(&str, start_ptr, chunk_len);
4229
4230 appendStringInfoText(&str, to_sub_text);
4231
4232 start_ptr = curr_ptr + from_sub_text_len;
4233
4234 found = text_position_next(&state);
4235 if (found)
4236 curr_ptr = text_position_get_match_ptr(&state);
4237 }
4238 while (found);
4239
4240 /* copy trailing data */
4241 chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
4242 appendBinaryStringInfo(&str, start_ptr, chunk_len);
4243
4244 text_position_cleanup(&state);
4245
4246 ret_text = cstring_to_text_with_len(str.data, str.len);
4247 pfree(str.data);
4248
4249 PG_RETURN_TEXT_P(ret_text);
4250}
4251
4252/*
4253 * check_replace_text_has_escape_char
4254 *
4255 * check whether replace_text contains escape char.
4256 */
4257static bool
4258check_replace_text_has_escape_char(const text *replace_text)
4259{
4260 const char *p = VARDATA_ANY(replace_text);
4261 const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
4262
4263 if (pg_database_encoding_max_length() == 1)
4264 {
4265 for (; p < p_end; p++)
4266 {
4267 if (*p == '\\')
4268 return true;
4269 }
4270 }
4271 else
4272 {
4273 for (; p < p_end; p += pg_mblen(p))
4274 {
4275 if (*p == '\\')
4276 return true;
4277 }
4278 }
4279
4280 return false;
4281}
4282
4283/*
4284 * appendStringInfoRegexpSubstr
4285 *
4286 * Append replace_text to str, substituting regexp back references for
4287 * \n escapes. start_ptr is the start of the match in the source string,
4288 * at logical character position data_pos.
4289 */
4290static void
4291appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
4292 regmatch_t *pmatch,
4293 char *start_ptr, int data_pos)
4294{
4295 const char *p = VARDATA_ANY(replace_text);
4296 const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
4297 int eml = pg_database_encoding_max_length();
4298
4299 for (;;)
4300 {
4301 const char *chunk_start = p;
4302 int so;
4303 int eo;
4304
4305 /* Find next escape char. */
4306 if (eml == 1)
4307 {
4308 for (; p < p_end && *p != '\\'; p++)
4309 /* nothing */ ;
4310 }
4311 else
4312 {
4313 for (; p < p_end && *p != '\\'; p += pg_mblen(p))
4314 /* nothing */ ;
4315 }
4316
4317 /* Copy the text we just scanned over, if any. */
4318 if (p > chunk_start)
4319 appendBinaryStringInfo(str, chunk_start, p - chunk_start);
4320
4321 /* Done if at end of string, else advance over escape char. */
4322 if (p >= p_end)
4323 break;
4324 p++;
4325
4326 if (p >= p_end)
4327 {
4328 /* Escape at very end of input. Treat same as unexpected char */
4329 appendStringInfoChar(str, '\\');
4330 break;
4331 }
4332
4333 if (*p >= '1' && *p <= '9')
4334 {
4335 /* Use the back reference of regexp. */
4336 int idx = *p - '0';
4337
4338 so = pmatch[idx].rm_so;
4339 eo = pmatch[idx].rm_eo;
4340 p++;
4341 }
4342 else if (*p == '&')
4343 {
4344 /* Use the entire matched string. */
4345 so = pmatch[0].rm_so;
4346 eo = pmatch[0].rm_eo;
4347 p++;
4348 }
4349 else if (*p == '\\')
4350 {
4351 /* \\ means transfer one \ to output. */
4352 appendStringInfoChar(str, '\\');
4353 p++;
4354 continue;
4355 }
4356 else
4357 {
4358 /*
4359 * If escape char is not followed by any expected char, just treat
4360 * it as ordinary data to copy. (XXX would it be better to throw
4361 * an error?)
4362 */
4363 appendStringInfoChar(str, '\\');
4364 continue;
4365 }
4366
4367 if (so != -1 && eo != -1)
4368 {
4369 /*
4370 * Copy the text that is back reference of regexp. Note so and eo
4371 * are counted in characters not bytes.
4372 */
4373 char *chunk_start;
4374 int chunk_len;
4375
4376 Assert(so >= data_pos);
4377 chunk_start = start_ptr;
4378 chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
4379 chunk_len = charlen_to_bytelen(chunk_start, eo - so);
4380 appendBinaryStringInfo(str, chunk_start, chunk_len);
4381 }
4382 }
4383}
4384
4385#define REGEXP_REPLACE_BACKREF_CNT 10
4386
4387/*
4388 * replace_text_regexp
4389 *
4390 * replace text that matches to regexp in src_text to replace_text.
4391 *
4392 * Note: to avoid having to include regex.h in builtins.h, we declare
4393 * the regexp argument as void *, but really it's regex_t *.
4394 */
4395text *
4396replace_text_regexp(text *src_text, void *regexp,
4397 text *replace_text, bool glob)
4398{
4399 text *ret_text;
4400 regex_t *re = (regex_t *) regexp;
4401 int src_text_len = VARSIZE_ANY_EXHDR(src_text);
4402 StringInfoData buf;
4403 regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
4404 pg_wchar *data;
4405 size_t data_len;
4406 int search_start;
4407 int data_pos;
4408 char *start_ptr;
4409 bool have_escape;
4410
4411 initStringInfo(&buf);
4412
4413 /* Convert data string to wide characters. */
4414 data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
4415 data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
4416
4417 /* Check whether replace_text has escape char. */
4418 have_escape = check_replace_text_has_escape_char(replace_text);
4419
4420 /* start_ptr points to the data_pos'th character of src_text */
4421 start_ptr = (char *) VARDATA_ANY(src_text);
4422 data_pos = 0;
4423
4424 search_start = 0;
4425 while (search_start <= data_len)
4426 {
4427 int regexec_result;
4428
4429 CHECK_FOR_INTERRUPTS();
4430
4431 regexec_result = pg_regexec(re,
4432 data,
4433 data_len,
4434 search_start,
4435 NULL, /* no details */
4436 REGEXP_REPLACE_BACKREF_CNT,
4437 pmatch,
4438 0);
4439
4440 if (regexec_result == REG_NOMATCH)
4441 break;
4442
4443 if (regexec_result != REG_OKAY)
4444 {
4445 char errMsg[100];
4446
4447 CHECK_FOR_INTERRUPTS();
4448 pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
4449 ereport(ERROR,
4450 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
4451 errmsg("regular expression failed: %s", errMsg)));
4452 }
4453
4454 /*
4455 * Copy the text to the left of the match position. Note we are given
4456 * character not byte indexes.
4457 */
4458 if (pmatch[0].rm_so - data_pos > 0)
4459 {
4460 int chunk_len;
4461
4462 chunk_len = charlen_to_bytelen(start_ptr,
4463 pmatch[0].rm_so - data_pos);
4464 appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4465
4466 /*
4467 * Advance start_ptr over that text, to avoid multiple rescans of
4468 * it if the replace_text contains multiple back-references.
4469 */
4470 start_ptr += chunk_len;
4471 data_pos = pmatch[0].rm_so;
4472 }
4473
4474 /*
4475 * Copy the replace_text. Process back references when the
4476 * replace_text has escape characters.
4477 */
4478 if (have_escape)
4479 appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
4480 start_ptr, data_pos);
4481 else
4482 appendStringInfoText(&buf, replace_text);
4483
4484 /* Advance start_ptr and data_pos over the matched text. */
4485 start_ptr += charlen_to_bytelen(start_ptr,
4486 pmatch[0].rm_eo - data_pos);
4487 data_pos = pmatch[0].rm_eo;
4488
4489 /*
4490 * When global option is off, replace the first instance only.
4491 */
4492 if (!glob)
4493 break;
4494
4495 /*
4496 * Advance search position. Normally we start the next search at the
4497 * end of the previous match; but if the match was of zero length, we
4498 * have to advance by one character, or we'd just find the same match
4499 * again.
4500 */
4501 search_start = data_pos;
4502 if (pmatch[0].rm_so == pmatch[0].rm_eo)
4503 search_start++;
4504 }
4505
4506 /*
4507 * Copy the text to the right of the last match.
4508 */
4509 if (data_pos < data_len)
4510 {
4511 int chunk_len;
4512
4513 chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
4514 appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4515 }
4516
4517 ret_text = cstring_to_text_with_len(buf.data, buf.len);
4518 pfree(buf.data);
4519 pfree(data);
4520
4521 return ret_text;
4522}
4523
4524/*
4525 * split_text
4526 * parse input string
4527 * return ord item (1 based)
4528 * based on provided field separator
4529 */
4530Datum
4531split_text(PG_FUNCTION_ARGS)
4532{
4533 text *inputstring = PG_GETARG_TEXT_PP(0);
4534 text *fldsep = PG_GETARG_TEXT_PP(1);
4535 int fldnum = PG_GETARG_INT32(2);
4536 int inputstring_len;
4537 int fldsep_len;
4538 TextPositionState state;
4539 char *start_ptr;
4540 char *end_ptr;
4541 text *result_text;
4542 bool found;
4543
4544 /* field number is 1 based */
4545 if (fldnum < 1)
4546 ereport(ERROR,
4547 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4548 errmsg("field position must be greater than zero")));
4549
4550 inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4551 fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
4552
4553 /* return empty string for empty input string */
4554 if (inputstring_len < 1)
4555 PG_RETURN_TEXT_P(cstring_to_text(""));
4556
4557 /* empty field separator */
4558 if (fldsep_len < 1)
4559 {
4560 text_position_cleanup(&state);
4561 /* if first field, return input string, else empty string */
4562 if (fldnum == 1)
4563 PG_RETURN_TEXT_P(inputstring);
4564 else
4565 PG_RETURN_TEXT_P(cstring_to_text(""));
4566 }
4567
4568 text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
4569
4570 /* identify bounds of first field */
4571 start_ptr = VARDATA_ANY(inputstring);
4572 found = text_position_next(&state);
4573
4574 /* special case if fldsep not found at all */
4575 if (!found)
4576 {
4577 text_position_cleanup(&state);
4578 /* if field 1 requested, return input string, else empty string */
4579 if (fldnum == 1)
4580 PG_RETURN_TEXT_P(inputstring);
4581 else
4582 PG_RETURN_TEXT_P(cstring_to_text(""));
4583 }
4584 end_ptr = text_position_get_match_ptr(&state);
4585
4586 while (found && --fldnum > 0)
4587 {
4588 /* identify bounds of next field */
4589 start_ptr = end_ptr + fldsep_len;
4590 found = text_position_next(&state);
4591 if (found)
4592 end_ptr = text_position_get_match_ptr(&state);
4593 }
4594
4595 text_position_cleanup(&state);
4596
4597 if (fldnum > 0)
4598 {
4599 /* N'th field separator not found */
4600 /* if last field requested, return it, else empty string */
4601 if (fldnum == 1)
4602 {
4603 int last_len = start_ptr - VARDATA_ANY(inputstring);
4604
4605 result_text = cstring_to_text_with_len(start_ptr,
4606 inputstring_len - last_len);
4607 }
4608 else
4609 result_text = cstring_to_text("");
4610 }
4611 else
4612 {
4613 /* non-last field requested */
4614 result_text = cstring_to_text_with_len(start_ptr, end_ptr - start_ptr);
4615 }
4616
4617 PG_RETURN_TEXT_P(result_text);
4618}
4619
4620/*
4621 * Convenience function to return true when two text params are equal.
4622 */
4623static bool
4624text_isequal(text *txt1, text *txt2, Oid collid)
4625{
4626 return DatumGetBool(DirectFunctionCall2Coll(texteq,
4627 collid,
4628 PointerGetDatum(txt1),
4629 PointerGetDatum(txt2)));
4630}
4631
4632/*
4633 * text_to_array
4634 * parse input string and return text array of elements,
4635 * based on provided field separator
4636 */
4637Datum
4638text_to_array(PG_FUNCTION_ARGS)
4639{
4640 return text_to_array_internal(fcinfo);
4641}
4642
4643/*
4644 * text_to_array_null
4645 * parse input string and return text array of elements,
4646 * based on provided field separator and null string
4647 *
4648 * This is a separate entry point only to prevent the regression tests from
4649 * complaining about different argument sets for the same internal function.
4650 */
4651Datum
4652text_to_array_null(PG_FUNCTION_ARGS)
4653{
4654 return text_to_array_internal(fcinfo);
4655}
4656
4657/*
4658 * common code for text_to_array and text_to_array_null functions
4659 *
4660 * These are not strict so we have to test for null inputs explicitly.
4661 */
4662static Datum
4663text_to_array_internal(PG_FUNCTION_ARGS)
4664{
4665 text *inputstring;
4666 text *fldsep;
4667 text *null_string;
4668 int inputstring_len;
4669 int fldsep_len;
4670 char *start_ptr;
4671 text *result_text;
4672 bool is_null;
4673 ArrayBuildState *astate = NULL;
4674
4675 /* when input string is NULL, then result is NULL too */
4676 if (PG_ARGISNULL(0))
4677 PG_RETURN_NULL();
4678
4679 inputstring = PG_GETARG_TEXT_PP(0);
4680
4681 /* fldsep can be NULL */
4682 if (!PG_ARGISNULL(1))
4683 fldsep = PG_GETARG_TEXT_PP(1);
4684 else
4685 fldsep = NULL;
4686
4687 /* null_string can be NULL or omitted */
4688 if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
4689 null_string = PG_GETARG_TEXT_PP(2);
4690 else
4691 null_string = NULL;
4692
4693 if (fldsep != NULL)
4694 {
4695 /*
4696 * Normal case with non-null fldsep. Use the text_position machinery
4697 * to search for occurrences of fldsep.
4698 */
4699 TextPositionState state;
4700
4701 inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4702 fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
4703
4704 /* return empty array for empty input string */
4705 if (inputstring_len < 1)
4706 PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
4707
4708 /*
4709 * empty field separator: return the input string as a one-element
4710 * array
4711 */
4712 if (fldsep_len < 1)
4713 {
4714 Datum elems[1];
4715 bool nulls[1];
4716 int dims[1];
4717 int lbs[1];
4718
4719 /* single element can be a NULL too */
4720 is_null = null_string ? text_isequal(inputstring, null_string, PG_GET_COLLATION()) : false;
4721
4722 elems[0] = PointerGetDatum(inputstring);
4723 nulls[0] = is_null;
4724 dims[0] = 1;
4725 lbs[0] = 1;
4726 /* XXX: this hardcodes assumptions about the text type */
4727 PG_RETURN_ARRAYTYPE_P(construct_md_array(elems, nulls,
4728 1, dims, lbs,
4729 TEXTOID, -1, false, 'i'));
4730 }
4731
4732 text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
4733
4734 start_ptr = VARDATA_ANY(inputstring);
4735
4736 for (;;)
4737 {
4738 bool found;
4739 char *end_ptr;
4740 int chunk_len;
4741
4742 CHECK_FOR_INTERRUPTS();
4743
4744 found = text_position_next(&state);
4745 if (!found)
4746 {
4747 /* fetch last field */
4748 chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
4749 end_ptr = NULL; /* not used, but some compilers complain */
4750 }
4751 else
4752 {
4753 /* fetch non-last field */
4754 end_ptr = text_position_get_match_ptr(&state);
4755 chunk_len = end_ptr - start_ptr;
4756 }
4757
4758 /* must build a temp text datum to pass to accumArrayResult */
4759 result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4760 is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false;
4761
4762 /* stash away this field */
4763 astate = accumArrayResult(astate,
4764 PointerGetDatum(result_text),
4765 is_null,
4766 TEXTOID,
4767 CurrentMemoryContext);
4768
4769 pfree(result_text);
4770
4771 if (!found)
4772 break;
4773
4774 start_ptr = end_ptr + fldsep_len;
4775 }
4776
4777 text_position_cleanup(&state);
4778 }
4779 else
4780 {
4781 /*
4782 * When fldsep is NULL, each character in the inputstring becomes an
4783 * element in the result array. The separator is effectively the
4784 * space between characters.
4785 */
4786 inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4787
4788 /* return empty array for empty input string */
4789 if (inputstring_len < 1)
4790 PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
4791
4792 start_ptr = VARDATA_ANY(inputstring);
4793
4794 while (inputstring_len > 0)
4795 {
4796 int chunk_len = pg_mblen(start_ptr);
4797
4798 CHECK_FOR_INTERRUPTS();
4799
4800 /* must build a temp text datum to pass to accumArrayResult */
4801 result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4802 is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false;
4803
4804 /* stash away this field */
4805 astate = accumArrayResult(astate,
4806 PointerGetDatum(result_text),
4807 is_null,
4808 TEXTOID,
4809 CurrentMemoryContext);
4810
4811 pfree(result_text);
4812
4813 start_ptr += chunk_len;
4814 inputstring_len -= chunk_len;
4815 }
4816 }
4817
4818 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
4819 CurrentMemoryContext));
4820}
4821
4822/*
4823 * array_to_text
4824 * concatenate Cstring representation of input array elements
4825 * using provided field separator
4826 */
4827Datum
4828array_to_text(PG_FUNCTION_ARGS)
4829{
4830 ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
4831 char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4832
4833 PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
4834}
4835
4836/*
4837 * array_to_text_null
4838 * concatenate Cstring representation of input array elements
4839 * using provided field separator and null string
4840 *
4841 * This version is not strict so we have to test for null inputs explicitly.
4842 */
4843Datum
4844array_to_text_null(PG_FUNCTION_ARGS)
4845{
4846 ArrayType *v;
4847 char *fldsep;
4848 char *null_string;
4849
4850 /* returns NULL when first or second parameter is NULL */
4851 if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
4852 PG_RETURN_NULL();
4853
4854 v = PG_GETARG_ARRAYTYPE_P(0);
4855 fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
4856
4857 /* NULL null string is passed through as a null pointer */
4858 if (!PG_ARGISNULL(2))
4859 null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
4860 else
4861 null_string = NULL;
4862
4863 PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
4864}
4865
4866/*
4867 * common code for array_to_text and array_to_text_null functions
4868 */
4869static text *
4870array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
4871 const char *fldsep, const char *null_string)
4872{
4873 text *result;
4874 int nitems,
4875 *dims,
4876 ndims;
4877 Oid element_type;
4878 int typlen;
4879 bool typbyval;
4880 char typalign;
4881 StringInfoData buf;
4882 bool printed = false;
4883 char *p;
4884 bits8 *bitmap;
4885 int bitmask;
4886 int i;
4887 ArrayMetaState *my_extra;
4888
4889 ndims = ARR_NDIM(v);
4890 dims = ARR_DIMS(v);
4891 nitems = ArrayGetNItems(ndims, dims);
4892
4893 /* if there are no elements, return an empty string */
4894 if (nitems == 0)
4895 return cstring_to_text_with_len("", 0);
4896
4897 element_type = ARR_ELEMTYPE(v);
4898 initStringInfo(&buf);
4899
4900 /*
4901 * We arrange to look up info about element type, including its output
4902 * conversion proc, only once per series of calls, assuming the element
4903 * type doesn't change underneath us.
4904 */
4905 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4906 if (my_extra == NULL)
4907 {
4908 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4909 sizeof(ArrayMetaState));
4910 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4911 my_extra->element_type = ~element_type;
4912 }
4913
4914 if (my_extra->element_type != element_type)
4915 {
4916 /*
4917 * Get info about element type, including its output conversion proc
4918 */
4919 get_type_io_data(element_type, IOFunc_output,
4920 &my_extra->typlen, &my_extra->typbyval,
4921 &my_extra->typalign, &my_extra->typdelim,
4922 &my_extra->typioparam, &my_extra->typiofunc);
4923 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
4924 fcinfo->flinfo->fn_mcxt);
4925 my_extra->element_type = element_type;
4926 }
4927 typlen = my_extra->typlen;
4928 typbyval = my_extra->typbyval;
4929 typalign = my_extra->typalign;
4930
4931 p = ARR_DATA_PTR(v);
4932 bitmap = ARR_NULLBITMAP(v);
4933 bitmask = 1;
4934
4935 for (i = 0; i < nitems; i++)
4936 {
4937 Datum itemvalue;
4938 char *value;
4939
4940 /* Get source element, checking for NULL */
4941 if (bitmap && (*bitmap & bitmask) == 0)
4942 {
4943 /* if null_string is NULL, we just ignore null elements */
4944 if (null_string != NULL)
4945 {
4946 if (printed)
4947 appendStringInfo(&buf, "%s%s", fldsep, null_string);
4948 else
4949 appendStringInfoString(&buf, null_string);
4950 printed = true;
4951 }
4952 }
4953 else
4954 {
4955 itemvalue = fetch_att(p, typbyval, typlen);
4956
4957 value = OutputFunctionCall(&my_extra->proc, itemvalue);
4958
4959 if (printed)
4960 appendStringInfo(&buf, "%s%s", fldsep, value);
4961 else
4962 appendStringInfoString(&buf, value);
4963 printed = true;
4964
4965 p = att_addlength_pointer(p, typlen, p);
4966 p = (char *) att_align_nominal(p, typalign);
4967 }
4968
4969 /* advance bitmap pointer if any */
4970 if (bitmap)
4971 {
4972 bitmask <<= 1;
4973 if (bitmask == 0x100)
4974 {
4975 bitmap++;
4976 bitmask = 1;
4977 }
4978 }
4979 }
4980
4981 result = cstring_to_text_with_len(buf.data, buf.len);
4982 pfree(buf.data);
4983
4984 return result;
4985}
4986
4987#define HEXBASE 16
4988/*
4989 * Convert an int32 to a string containing a base 16 (hex) representation of
4990 * the number.
4991 */
4992Datum
4993to_hex32(PG_FUNCTION_ARGS)
4994{
4995 uint32 value = (uint32) PG_GETARG_INT32(0);
4996 char *ptr;
4997 const char *digits = "0123456789abcdef";
4998 char buf[32]; /* bigger than needed, but reasonable */
4999
5000 ptr = buf + sizeof(buf) - 1;
5001 *ptr = '\0';
5002
5003 do
5004 {
5005 *--ptr = digits[value % HEXBASE];
5006 value /= HEXBASE;
5007 } while (ptr > buf && value);
5008
5009 PG_RETURN_TEXT_P(cstring_to_text(ptr));
5010}
5011
5012/*
5013 * Convert an int64 to a string containing a base 16 (hex) representation of
5014 * the number.
5015 */
5016Datum
5017to_hex64(PG_FUNCTION_ARGS)
5018{
5019 uint64 value = (uint64) PG_GETARG_INT64(0);
5020 char *ptr;
5021 const char *digits = "0123456789abcdef";
5022 char buf[32]; /* bigger than needed, but reasonable */
5023
5024 ptr = buf + sizeof(buf) - 1;
5025 *ptr = '\0';
5026
5027 do
5028 {
5029 *--ptr = digits[value % HEXBASE];
5030 value /= HEXBASE;
5031 } while (ptr > buf && value);
5032
5033 PG_RETURN_TEXT_P(cstring_to_text(ptr));
5034}
5035
5036/*
5037 * Return the size of a datum, possibly compressed
5038 *
5039 * Works on any data type
5040 */
5041Datum
5042pg_column_size(PG_FUNCTION_ARGS)
5043{
5044 Datum value = PG_GETARG_DATUM(0);
5045 int32 result;
5046 int typlen;
5047
5048 /* On first call, get the input type's typlen, and save at *fn_extra */
5049 if (fcinfo->flinfo->fn_extra == NULL)
5050 {
5051 /* Lookup the datatype of the supplied argument */
5052 Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
5053
5054 typlen = get_typlen(argtypeid);
5055 if (typlen == 0) /* should not happen */
5056 elog(ERROR, "cache lookup failed for type %u", argtypeid);
5057
5058 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5059 sizeof(int));
5060 *((int *) fcinfo->flinfo->fn_extra) = typlen;
5061 }
5062 else
5063 typlen = *((int *) fcinfo->flinfo->fn_extra);
5064
5065 if (typlen == -1)
5066 {
5067 /* varlena type, possibly toasted */
5068 result = toast_datum_size(value);
5069 }
5070 else if (typlen == -2)
5071 {
5072 /* cstring */
5073 result = strlen(DatumGetCString(value)) + 1;
5074 }
5075 else
5076 {
5077 /* ordinary fixed-width type */
5078 result = typlen;
5079 }
5080
5081 PG_RETURN_INT32(result);
5082}
5083
5084/*
5085 * string_agg - Concatenates values and returns string.
5086 *
5087 * Syntax: string_agg(value text, delimiter text) RETURNS text
5088 *
5089 * Note: Any NULL values are ignored. The first-call delimiter isn't
5090 * actually used at all, and on subsequent calls the delimiter precedes
5091 * the associated value.
5092 */
5093
5094/* subroutine to initialize state */
5095static StringInfo
5096makeStringAggState(FunctionCallInfo fcinfo)
5097{
5098 StringInfo state;
5099 MemoryContext aggcontext;
5100 MemoryContext oldcontext;
5101
5102 if (!AggCheckCallContext(fcinfo, &aggcontext))
5103 {
5104 /* cannot be called directly because of internal-type argument */
5105 elog(ERROR, "string_agg_transfn called in non-aggregate context");
5106 }
5107
5108 /*
5109 * Create state in aggregate context. It'll stay there across subsequent
5110 * calls.
5111 */
5112 oldcontext = MemoryContextSwitchTo(aggcontext);
5113 state = makeStringInfo();
5114 MemoryContextSwitchTo(oldcontext);
5115
5116 return state;
5117}
5118
5119Datum
5120string_agg_transfn(PG_FUNCTION_ARGS)
5121{
5122 StringInfo state;
5123
5124 state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
5125
5126 /* Append the value unless null. */
5127 if (!PG_ARGISNULL(1))
5128 {
5129 /* On the first time through, we ignore the delimiter. */
5130 if (state == NULL)
5131 state = makeStringAggState(fcinfo);
5132 else if (!PG_ARGISNULL(2))
5133 appendStringInfoText(state, PG_GETARG_TEXT_PP(2)); /* delimiter */
5134
5135 appendStringInfoText(state, PG_GETARG_TEXT_PP(1)); /* value */
5136 }
5137
5138 /*
5139 * The transition type for string_agg() is declared to be "internal",
5140 * which is a pass-by-value type the same size as a pointer.
5141 */
5142 PG_RETURN_POINTER(state);
5143}
5144
5145Datum
5146string_agg_finalfn(PG_FUNCTION_ARGS)
5147{
5148 StringInfo state;
5149
5150 /* cannot be called directly because of internal-type argument */
5151 Assert(AggCheckCallContext(fcinfo, NULL));
5152
5153 state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
5154
5155 if (state != NULL)
5156 PG_RETURN_TEXT_P(cstring_to_text_with_len(state->data, state->len));
5157 else
5158 PG_RETURN_NULL();
5159}
5160
5161/*
5162 * Prepare cache with fmgr info for the output functions of the datatypes of
5163 * the arguments of a concat-like function, beginning with argument "argidx".
5164 * (Arguments before that will have corresponding slots in the resulting
5165 * FmgrInfo array, but we don't fill those slots.)
5166 */
5167static FmgrInfo *
5168build_concat_foutcache(FunctionCallInfo fcinfo, int argidx)
5169{
5170 FmgrInfo *foutcache;
5171 int i;
5172
5173 /* We keep the info in fn_mcxt so it survives across calls */
5174 foutcache = (FmgrInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5175 PG_NARGS() * sizeof(FmgrInfo));
5176
5177 for (i = argidx; i < PG_NARGS(); i++)
5178 {
5179 Oid valtype;
5180 Oid typOutput;
5181 bool typIsVarlena;
5182
5183 valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
5184 if (!OidIsValid(valtype))
5185 elog(ERROR, "could not determine data type of concat() input");
5186
5187 getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
5188 fmgr_info_cxt(typOutput, &foutcache[i], fcinfo->flinfo->fn_mcxt);
5189 }
5190
5191 fcinfo->flinfo->fn_extra = foutcache;
5192
5193 return foutcache;
5194}
5195
5196/*
5197 * Implementation of both concat() and concat_ws().
5198 *
5199 * sepstr is the separator string to place between values.
5200 * argidx identifies the first argument to concatenate (counting from zero);
5201 * note that this must be constant across any one series of calls.
5202 *
5203 * Returns NULL if result should be NULL, else text value.
5204 */
5205static text *
5206concat_internal(const char *sepstr, int argidx,
5207 FunctionCallInfo fcinfo)
5208{
5209 text *result;
5210 StringInfoData str;
5211 FmgrInfo *foutcache;
5212 bool first_arg = true;
5213 int i;
5214
5215 /*
5216 * concat(VARIADIC some-array) is essentially equivalent to
5217 * array_to_text(), ie concat the array elements with the given separator.
5218 * So we just pass the case off to that code.
5219 */
5220 if (get_fn_expr_variadic(fcinfo->flinfo))
5221 {
5222 ArrayType *arr;
5223
5224 /* Should have just the one argument */
5225 Assert(argidx == PG_NARGS() - 1);
5226
5227 /* concat(VARIADIC NULL) is defined as NULL */
5228 if (PG_ARGISNULL(argidx))
5229 return NULL;
5230
5231 /*
5232 * Non-null argument had better be an array. We assume that any call
5233 * context that could let get_fn_expr_variadic return true will have
5234 * checked that a VARIADIC-labeled parameter actually is an array. So
5235 * it should be okay to just Assert that it's an array rather than
5236 * doing a full-fledged error check.
5237 */
5238 Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, argidx))));
5239
5240 /* OK, safe to fetch the array value */
5241 arr = PG_GETARG_ARRAYTYPE_P(argidx);
5242
5243 /*
5244 * And serialize the array. We tell array_to_text to ignore null
5245 * elements, which matches the behavior of the loop below.
5246 */
5247 return array_to_text_internal(fcinfo, arr, sepstr, NULL);
5248 }
5249
5250 /* Normal case without explicit VARIADIC marker */
5251 initStringInfo(&str);
5252
5253 /* Get output function info, building it if first time through */
5254 foutcache = (FmgrInfo *) fcinfo->flinfo->fn_extra;
5255 if (foutcache == NULL)
5256 foutcache = build_concat_foutcache(fcinfo, argidx);
5257
5258 for (i = argidx; i < PG_NARGS(); i++)
5259 {
5260 if (!PG_ARGISNULL(i))
5261 {
5262 Datum value = PG_GETARG_DATUM(i);
5263
5264 /* add separator if appropriate */
5265 if (first_arg)
5266 first_arg = false;
5267 else
5268 appendStringInfoString(&str, sepstr);
5269
5270 /* call the appropriate type output function, append the result */
5271 appendStringInfoString(&str,
5272 OutputFunctionCall(&foutcache[i], value));
5273 }
5274 }
5275
5276 result = cstring_to_text_with_len(str.data, str.len);
5277 pfree(str.data);
5278
5279 return result;
5280}
5281
5282/*
5283 * Concatenate all arguments. NULL arguments are ignored.
5284 */
5285Datum
5286text_concat(PG_FUNCTION_ARGS)
5287{
5288 text *result;
5289
5290 result = concat_internal("", 0, fcinfo);
5291 if (result == NULL)
5292 PG_RETURN_NULL();
5293 PG_RETURN_TEXT_P(result);
5294}
5295
5296/*
5297 * Concatenate all but first argument value with separators. The first
5298 * parameter is used as the separator. NULL arguments are ignored.
5299 */
5300Datum
5301text_concat_ws(PG_FUNCTION_ARGS)
5302{
5303 char *sep;
5304 text *result;
5305
5306 /* return NULL when separator is NULL */
5307 if (PG_ARGISNULL(0))
5308 PG_RETURN_NULL();
5309 sep = text_to_cstring(PG_GETARG_TEXT_PP(0));
5310
5311 result = concat_internal(sep, 1, fcinfo);
5312 if (result == NULL)
5313 PG_RETURN_NULL();
5314 PG_RETURN_TEXT_P(result);
5315}
5316
5317/*
5318 * Return first n characters in the string. When n is negative,
5319 * return all but last |n| characters.
5320 */
5321Datum
5322text_left(PG_FUNCTION_ARGS)
5323{
5324 int n = PG_GETARG_INT32(1);
5325
5326 if (n < 0)
5327 {
5328 text *str = PG_GETARG_TEXT_PP(0);
5329 const char *p = VARDATA_ANY(str);
5330 int len = VARSIZE_ANY_EXHDR(str);
5331 int rlen;
5332
5333 n = pg_mbstrlen_with_len(p, len) + n;
5334 rlen = pg_mbcharcliplen(p, len, n);
5335 PG_RETURN_TEXT_P(cstring_to_text_with_len(p, rlen));
5336 }
5337 else
5338 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), 1, n, false));
5339}
5340
5341/*
5342 * Return last n characters in the string. When n is negative,
5343 * return all but first |n| characters.
5344 */
5345Datum
5346text_right(PG_FUNCTION_ARGS)
5347{
5348 text *str = PG_GETARG_TEXT_PP(0);
5349 const char *p = VARDATA_ANY(str);
5350 int len = VARSIZE_ANY_EXHDR(str);
5351 int n = PG_GETARG_INT32(1);
5352 int off;
5353
5354 if (n < 0)
5355 n = -n;
5356 else
5357 n = pg_mbstrlen_with_len(p, len) - n;
5358 off = pg_mbcharcliplen(p, len, n);
5359
5360 PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
5361}
5362
5363/*
5364 * Return reversed string
5365 */
5366Datum
5367text_reverse(PG_FUNCTION_ARGS)
5368{
5369 text *str = PG_GETARG_TEXT_PP(0);
5370 const char *p = VARDATA_ANY(str);
5371 int len = VARSIZE_ANY_EXHDR(str);
5372 const char *endp = p + len;
5373 text *result;
5374 char *dst;
5375
5376 result = palloc(len + VARHDRSZ);
5377 dst = (char *) VARDATA(result) + len;
5378 SET_VARSIZE(result, len + VARHDRSZ);
5379
5380 if (pg_database_encoding_max_length() > 1)
5381 {
5382 /* multibyte version */
5383 while (p < endp)
5384 {
5385 int sz;
5386
5387 sz = pg_mblen(p);
5388 dst -= sz;
5389 memcpy(dst, p, sz);
5390 p += sz;
5391 }
5392 }
5393 else
5394 {
5395 /* single byte version */
5396 while (p < endp)
5397 *(--dst) = *p++;
5398 }
5399
5400 PG_RETURN_TEXT_P(result);
5401}
5402
5403
5404/*
5405 * Support macros for text_format()
5406 */
5407#define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */
5408
5409#define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
5410 do { \
5411 if (++(ptr) >= (end_ptr)) \
5412 ereport(ERROR, \
5413 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
5414 errmsg("unterminated format() type specifier"), \
5415 errhint("For a single \"%%\" use \"%%%%\"."))); \
5416 } while (0)
5417
5418/*
5419 * Returns a formatted string
5420 */
5421Datum
5422text_format(PG_FUNCTION_ARGS)
5423{
5424 text *fmt;
5425 StringInfoData str;
5426 const char *cp;
5427 const char *start_ptr;
5428 const char *end_ptr;
5429 text *result;
5430 int arg;
5431 bool funcvariadic;
5432 int nargs;
5433 Datum *elements = NULL;
5434 bool *nulls = NULL;
5435 Oid element_type = InvalidOid;
5436 Oid prev_type = InvalidOid;
5437 Oid prev_width_type = InvalidOid;
5438 FmgrInfo typoutputfinfo;
5439 FmgrInfo typoutputinfo_width;
5440
5441 /* When format string is null, immediately return null */
5442 if (PG_ARGISNULL(0))
5443 PG_RETURN_NULL();
5444
5445 /* If argument is marked VARIADIC, expand array into elements */
5446 if (get_fn_expr_variadic(fcinfo->flinfo))
5447 {
5448 ArrayType *arr;
5449 int16 elmlen;
5450 bool elmbyval;
5451 char elmalign;
5452 int nitems;
5453
5454 /* Should have just the one argument */
5455 Assert(PG_NARGS() == 2);
5456
5457 /* If argument is NULL, we treat it as zero-length array */
5458 if (PG_ARGISNULL(1))
5459 nitems = 0;
5460 else
5461 {
5462 /*
5463 * Non-null argument had better be an array. We assume that any
5464 * call context that could let get_fn_expr_variadic return true
5465 * will have checked that a VARIADIC-labeled parameter actually is
5466 * an array. So it should be okay to just Assert that it's an
5467 * array rather than doing a full-fledged error check.
5468 */
5469 Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, 1))));
5470
5471 /* OK, safe to fetch the array value */
5472 arr = PG_GETARG_ARRAYTYPE_P(1);
5473
5474 /* Get info about array element type */
5475 element_type = ARR_ELEMTYPE(arr);
5476 get_typlenbyvalalign(element_type,
5477 &elmlen, &elmbyval, &elmalign);
5478
5479 /* Extract all array elements */
5480 deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
5481 &elements, &nulls, &nitems);
5482 }
5483
5484 nargs = nitems + 1;
5485 funcvariadic = true;
5486 }
5487 else
5488 {
5489 /* Non-variadic case, we'll process the arguments individually */
5490 nargs = PG_NARGS();
5491 funcvariadic = false;
5492 }
5493
5494 /* Setup for main loop. */
5495 fmt = PG_GETARG_TEXT_PP(0);
5496 start_ptr = VARDATA_ANY(fmt);
5497 end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
5498 initStringInfo(&str);
5499 arg = 1; /* next argument position to print */
5500
5501 /* Scan format string, looking for conversion specifiers. */
5502 for (cp = start_ptr; cp < end_ptr; cp++)
5503 {
5504 int argpos;
5505 int widthpos;
5506 int flags;
5507 int width;
5508 Datum value;
5509 bool isNull;
5510 Oid typid;
5511
5512 /*
5513 * If it's not the start of a conversion specifier, just copy it to
5514 * the output buffer.
5515 */
5516 if (*cp != '%')
5517 {
5518 appendStringInfoCharMacro(&str, *cp);
5519 continue;
5520 }
5521
5522 ADVANCE_PARSE_POINTER(cp, end_ptr);
5523
5524 /* Easy case: %% outputs a single % */
5525 if (*cp == '%')
5526 {
5527 appendStringInfoCharMacro(&str, *cp);
5528 continue;
5529 }
5530
5531 /* Parse the optional portions of the format specifier */
5532 cp = text_format_parse_format(cp, end_ptr,
5533 &argpos, &widthpos,
5534 &flags, &width);
5535
5536 /*
5537 * Next we should see the main conversion specifier. Whether or not
5538 * an argument position was present, it's known that at least one
5539 * character remains in the string at this point. Experience suggests
5540 * that it's worth checking that that character is one of the expected
5541 * ones before we try to fetch arguments, so as to produce the least
5542 * confusing response to a mis-formatted specifier.
5543 */
5544 if (strchr("sIL", *cp) == NULL)
5545 ereport(ERROR,
5546 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5547 errmsg("unrecognized format() type specifier \"%c\"",
5548 *cp),
5549 errhint("For a single \"%%\" use \"%%%%\".")));
5550
5551 /* If indirect width was specified, get its value */
5552 if (widthpos >= 0)
5553 {
5554 /* Collect the specified or next argument position */
5555 if (widthpos > 0)
5556 arg = widthpos;
5557 if (arg >= nargs)
5558 ereport(ERROR,
5559 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5560 errmsg("too few arguments for format()")));
5561
5562 /* Get the value and type of the selected argument */
5563 if (!funcvariadic)
5564 {
5565 value = PG_GETARG_DATUM(arg);
5566 isNull = PG_ARGISNULL(arg);
5567 typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5568 }
5569 else
5570 {
5571 value = elements[arg - 1];
5572 isNull = nulls[arg - 1];
5573 typid = element_type;
5574 }
5575 if (!OidIsValid(typid))
5576 elog(ERROR, "could not determine data type of format() input");
5577
5578 arg++;
5579
5580 /* We can treat NULL width the same as zero */
5581 if (isNull)
5582 width = 0;
5583 else if (typid == INT4OID)
5584 width = DatumGetInt32(value);
5585 else if (typid == INT2OID)
5586 width = DatumGetInt16(value);
5587 else
5588 {
5589 /* For less-usual datatypes, convert to text then to int */
5590 char *str;
5591
5592 if (typid != prev_width_type)
5593 {
5594 Oid typoutputfunc;
5595 bool typIsVarlena;
5596
5597 getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5598 fmgr_info(typoutputfunc, &typoutputinfo_width);
5599 prev_width_type = typid;
5600 }
5601
5602 str = OutputFunctionCall(&typoutputinfo_width, value);
5603
5604 /* pg_strtoint32 will complain about bad data or overflow */
5605 width = pg_strtoint32(str);
5606
5607 pfree(str);
5608 }
5609 }
5610
5611 /* Collect the specified or next argument position */
5612 if (argpos > 0)
5613 arg = argpos;
5614 if (arg >= nargs)
5615 ereport(ERROR,
5616 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5617 errmsg("too few arguments for format()")));
5618
5619 /* Get the value and type of the selected argument */
5620 if (!funcvariadic)
5621 {
5622 value = PG_GETARG_DATUM(arg);
5623 isNull = PG_ARGISNULL(arg);
5624 typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5625 }
5626 else
5627 {
5628 value = elements[arg - 1];
5629 isNull = nulls[arg - 1];
5630 typid = element_type;
5631 }
5632 if (!OidIsValid(typid))
5633 elog(ERROR, "could not determine data type of format() input");
5634
5635 arg++;
5636
5637 /*
5638 * Get the appropriate typOutput function, reusing previous one if
5639 * same type as previous argument. That's particularly useful in the
5640 * variadic-array case, but often saves work even for ordinary calls.
5641 */
5642 if (typid != prev_type)
5643 {
5644 Oid typoutputfunc;
5645 bool typIsVarlena;
5646
5647 getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5648 fmgr_info(typoutputfunc, &typoutputfinfo);
5649 prev_type = typid;
5650 }
5651
5652 /*
5653 * And now we can format the value.
5654 */
5655 switch (*cp)
5656 {
5657 case 's':
5658 case 'I':
5659 case 'L':
5660 text_format_string_conversion(&str, *cp, &typoutputfinfo,
5661 value, isNull,
5662 flags, width);
5663 break;
5664 default:
5665 /* should not get here, because of previous check */
5666 ereport(ERROR,
5667 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5668 errmsg("unrecognized format() type specifier \"%c\"",
5669 *cp),
5670 errhint("For a single \"%%\" use \"%%%%\".")));
5671 break;
5672 }
5673 }
5674
5675 /* Don't need deconstruct_array results anymore. */
5676 if (elements != NULL)
5677 pfree(elements);
5678 if (nulls != NULL)
5679 pfree(nulls);
5680
5681 /* Generate results. */
5682 result = cstring_to_text_with_len(str.data, str.len);
5683 pfree(str.data);
5684
5685 PG_RETURN_TEXT_P(result);
5686}
5687
5688/*
5689 * Parse contiguous digits as a decimal number.
5690 *
5691 * Returns true if some digits could be parsed.
5692 * The value is returned into *value, and *ptr is advanced to the next
5693 * character to be parsed.
5694 *
5695 * Note parsing invariant: at least one character is known available before
5696 * string end (end_ptr) at entry, and this is still true at exit.
5697 */
5698static bool
5699text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
5700{
5701 bool found = false;
5702 const char *cp = *ptr;
5703 int val = 0;
5704
5705 while (*cp >= '0' && *cp <= '9')
5706 {
5707 int8 digit = (*cp - '0');
5708
5709 if (unlikely(pg_mul_s32_overflow(val, 10, &val)) ||
5710 unlikely(pg_add_s32_overflow(val, digit, &val)))
5711 ereport(ERROR,
5712 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5713 errmsg("number is out of range")));
5714 ADVANCE_PARSE_POINTER(cp, end_ptr);
5715 found = true;
5716 }
5717
5718 *ptr = cp;
5719 *value = val;
5720
5721 return found;
5722}
5723
5724/*
5725 * Parse a format specifier (generally following the SUS printf spec).
5726 *
5727 * We have already advanced over the initial '%', and we are looking for
5728 * [argpos][flags][width]type (but the type character is not consumed here).
5729 *
5730 * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
5731 * Output parameters:
5732 * argpos: argument position for value to be printed. -1 means unspecified.
5733 * widthpos: argument position for width. Zero means the argument position
5734 * was unspecified (ie, take the next arg) and -1 means no width
5735 * argument (width was omitted or specified as a constant).
5736 * flags: bitmask of flags.
5737 * width: directly-specified width value. Zero means the width was omitted
5738 * (note it's not necessary to distinguish this case from an explicit
5739 * zero width value).
5740 *
5741 * The function result is the next character position to be parsed, ie, the
5742 * location where the type character is/should be.
5743 *
5744 * Note parsing invariant: at least one character is known available before
5745 * string end (end_ptr) at entry, and this is still true at exit.
5746 */
5747static const char *
5748text_format_parse_format(const char *start_ptr, const char *end_ptr,
5749 int *argpos, int *widthpos,
5750 int *flags, int *width)
5751{
5752 const char *cp = start_ptr;
5753 int n;
5754
5755 /* set defaults for output parameters */
5756 *argpos = -1;
5757 *widthpos = -1;
5758 *flags = 0;
5759 *width = 0;
5760
5761 /* try to identify first number */
5762 if (text_format_parse_digits(&cp, end_ptr, &n))
5763 {
5764 if (*cp != '$')
5765 {
5766 /* Must be just a width and a type, so we're done */
5767 *width = n;
5768 return cp;
5769 }
5770 /* The number was argument position */
5771 *argpos = n;
5772 /* Explicit 0 for argument index is immediately refused */
5773 if (n == 0)
5774 ereport(ERROR,
5775 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5776 errmsg("format specifies argument 0, but arguments are numbered from 1")));
5777 ADVANCE_PARSE_POINTER(cp, end_ptr);
5778 }
5779
5780 /* Handle flags (only minus is supported now) */
5781 while (*cp == '-')
5782 {
5783 *flags |= TEXT_FORMAT_FLAG_MINUS;
5784 ADVANCE_PARSE_POINTER(cp, end_ptr);
5785 }
5786
5787 if (*cp == '*')
5788 {
5789 /* Handle indirect width */
5790 ADVANCE_PARSE_POINTER(cp, end_ptr);
5791 if (text_format_parse_digits(&cp, end_ptr, &n))
5792 {
5793 /* number in this position must be closed by $ */
5794 if (*cp != '$')
5795 ereport(ERROR,
5796 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5797 errmsg("width argument position must be ended by \"$\"")));
5798 /* The number was width argument position */
5799 *widthpos = n;
5800 /* Explicit 0 for argument index is immediately refused */
5801 if (n == 0)
5802 ereport(ERROR,
5803 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5804 errmsg("format specifies argument 0, but arguments are numbered from 1")));
5805 ADVANCE_PARSE_POINTER(cp, end_ptr);
5806 }
5807 else
5808 *widthpos = 0; /* width's argument position is unspecified */
5809 }
5810 else
5811 {
5812 /* Check for direct width specification */
5813 if (text_format_parse_digits(&cp, end_ptr, &n))
5814 *width = n;
5815 }
5816
5817 /* cp should now be pointing at type character */
5818 return cp;
5819}
5820
5821/*
5822 * Format a %s, %I, or %L conversion
5823 */
5824static void
5825text_format_string_conversion(StringInfo buf, char conversion,
5826 FmgrInfo *typOutputInfo,
5827 Datum value, bool isNull,
5828 int flags, int width)
5829{
5830 char *str;
5831
5832 /* Handle NULL arguments before trying to stringify the value. */
5833 if (isNull)
5834 {
5835 if (conversion == 's')
5836 text_format_append_string(buf, "", flags, width);
5837 else if (conversion == 'L')
5838 text_format_append_string(buf, "NULL", flags, width);
5839 else if (conversion == 'I')
5840 ereport(ERROR,
5841 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
5842 errmsg("null values cannot be formatted as an SQL identifier")));
5843 return;
5844 }
5845
5846 /* Stringify. */
5847 str = OutputFunctionCall(typOutputInfo, value);
5848
5849 /* Escape. */
5850 if (conversion == 'I')
5851 {
5852 /* quote_identifier may or may not allocate a new string. */
5853 text_format_append_string(buf, quote_identifier(str), flags, width);
5854 }
5855 else if (conversion == 'L')
5856 {
5857 char *qstr = quote_literal_cstr(str);
5858
5859 text_format_append_string(buf, qstr, flags, width);
5860 /* quote_literal_cstr() always allocates a new string */
5861 pfree(qstr);
5862 }
5863 else
5864 text_format_append_string(buf, str, flags, width);
5865
5866 /* Cleanup. */
5867 pfree(str);
5868}
5869
5870/*
5871 * Append str to buf, padding as directed by flags/width
5872 */
5873static void
5874text_format_append_string(StringInfo buf, const char *str,
5875 int flags, int width)
5876{
5877 bool align_to_left = false;
5878 int len;
5879
5880 /* fast path for typical easy case */
5881 if (width == 0)
5882 {
5883 appendStringInfoString(buf, str);
5884 return;
5885 }
5886
5887 if (width < 0)
5888 {
5889 /* Negative width: implicit '-' flag, then take absolute value */
5890 align_to_left = true;
5891 /* -INT_MIN is undefined */
5892 if (width <= INT_MIN)
5893 ereport(ERROR,
5894 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5895 errmsg("number is out of range")));
5896 width = -width;
5897 }
5898 else if (flags & TEXT_FORMAT_FLAG_MINUS)
5899 align_to_left = true;
5900
5901 len = pg_mbstrlen(str);
5902 if (align_to_left)
5903 {
5904 /* left justify */
5905 appendStringInfoString(buf, str);
5906 if (len < width)
5907 appendStringInfoSpaces(buf, width - len);
5908 }
5909 else
5910 {
5911 /* right justify */
5912 if (len < width)
5913 appendStringInfoSpaces(buf, width - len);
5914 appendStringInfoString(buf, str);
5915 }
5916}
5917
5918/*
5919 * text_format_nv - nonvariadic wrapper for text_format function.
5920 *
5921 * note: this wrapper is necessary to pass the sanity check in opr_sanity,
5922 * which checks that all built-in functions that share the implementing C
5923 * function take the same number of arguments.
5924 */
5925Datum
5926text_format_nv(PG_FUNCTION_ARGS)
5927{
5928 return text_format(fcinfo);
5929}
5930
5931/*
5932 * Helper function for Levenshtein distance functions. Faster than memcmp(),
5933 * for this use case.
5934 */
5935static inline bool
5936rest_of_char_same(const char *s1, const char *s2, int len)
5937{
5938 while (len > 0)
5939 {
5940 len--;
5941 if (s1[len] != s2[len])
5942 return false;
5943 }
5944 return true;
5945}
5946
5947/* Expand each Levenshtein distance variant */
5948#include "levenshtein.c"
5949#define LEVENSHTEIN_LESS_EQUAL
5950#include "levenshtein.c"
5951