varlena.c source code [PostgreSQL/src/backend/utils/adt/varlena.c]

1	/-------------------------------------------------------------------------*
2	*
3	* varlena.c
4	* Functions for the variable-length built-in types.
5	*
6	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7	* Portions Copyright (c) 1994, Regents of the University of California
8	*
9	*
10	* IDENTIFICATION
11	* src/backend/utils/adt/varlena.c
12	*
13	*-------------------------------------------------------------------------
14	*/
15	#include "postgres.h"
16
17	#include <ctype.h>
18	#include <limits.h>
19
20	#include "access/tuptoaster.h"
21	#include "catalog/pg_collation.h"
22	#include "catalog/pg_type.h"
23	#include "common/int.h"
24	#include "lib/hyperloglog.h"
25	#include "libpq/pqformat.h"
26	#include "miscadmin.h"
27	#include "parser/scansup.h"
28	#include "port/pg_bswap.h"
29	#include "regex/regex.h"
30	#include "utils/builtins.h"
31	#include "utils/bytea.h"
32	#include "utils/hashutils.h"
33	#include "utils/lsyscache.h"
34	#include "utils/memutils.h"
35	#include "utils/pg_locale.h"
36	#include "utils/sortsupport.h"
37	#include "utils/varlena.h"
38
39
40	/ GUC variable /
41	int bytea_output = BYTEA_OUTPUT_HEX;
42
43	typedef struct varlena unknown;
44	typedef struct varlena VarString;
45
46	/*
47	* State for text_position_* functions.
48	*/
49	typedef struct
50	{
51	bool is_multibyte; / T if multibyte encoding /
52	bool is_multibyte_char_in_char;
53
54	char str1; /* haystack string /
55	char str2; /* needle string /
56	int len1; / string lengths in bytes /
57	int len2;
58
59	/ Skip table for Boyer-Moore-Horspool search algorithm: /
60	int skiptablemask; / mask for ANDing with skiptable subscripts /
61	int skiptable[`256`]; / skip distance for given mismatched char /
62
63	char last_match; /* pointer to last match in 'str1' /
64
65	/*
66	* Sometimes we need to convert the byte position of a match to a
67	* character position. These store the last position that was converted,
68	* so that on the next call, we can continue from that point, rather than
69	* count characters from the very beginning.
70	*/
71	char refpoint; /* pointer within original haystack string /
72	int refpos; / 0-based character offset of the same point /
73	} TextPositionState;
74
75	typedef struct
76	{
77	char buf1; /* 1st string, or abbreviation original string*
78	* buf */
79	char buf2; /* 2nd string, or abbreviation strxfrm() buf /
80	int buflen1;
81	int buflen2;
82	int last_len1; / Length of last buf1 string/strxfrm() input /
83	int last_len2; / Length of last buf2 string/strxfrm() blob /
84	int last_returned; / Last comparison result (cache) /
85	bool cache_blob; / Does buf2 contain strxfrm() blob, etc? /
86	bool collate_c;
87	Oid typid; / Actual datatype (text/bpchar/bytea/name) /
88	hyperLogLogState abbr_card; / Abbreviated key cardinality state /
89	hyperLogLogState full_card; / Full key cardinality state /
90	double prop_card; / Required cardinality proportion /
91	pg_locale_t locale;
92	} VarStringSortSupport;
93
94	/*
95	* This should be large enough that most strings will fit, but small enough
96	* that we feel comfortable putting it on the stack
97	*/
98	#define TEXTBUFLEN 1024
99
100	#define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
101	#define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
102	#define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
103	#define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
104	#define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
105
106	#define DatumGetVarStringP(X) ((VarString *) PG_DETOAST_DATUM(X))
107	#define DatumGetVarStringPP(X) ((VarString *) PG_DETOAST_DATUM_PACKED(X))
108
109	static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
110	static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
111	static int namefastcmp_c(Datum x, Datum y, SortSupport ssup);
112	static int varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup);
113	static int namefastcmp_locale(Datum x, Datum y, SortSupport ssup);
114	static int varstrfastcmp_locale(char a1p, int* len1, char a2p, int* len2, SortSupport ssup);
115	static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup);
116	static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
117	static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
118	static int32 text_length(Datum str);
119	static text text_catenate(text t1, text *t2);
120	static text *text_substring(Datum str,
121	int32 start,
122	int32 length,
123	bool length_not_specified);
124	static text text_overlay(text t1, text t2, int* sp, int sl);
125	static int text_position(text t1, text t2, Oid collid);
126	static void text_position_setup(text t1, text t2, Oid collid, TextPositionState *state);
127	static bool text_position_next(TextPositionState *state);
128	static char text_position_next_internal(char* start_ptr, TextPositionState state);
129	static char text_position_get_match_ptr(TextPositionState state);
130	static int text_position_get_match_pos(TextPositionState *state);
131	static void text_position_cleanup(TextPositionState *state);
132	static void check_collation_set(Oid collid);
133	static int text_cmp(text arg1, text arg2, Oid collid);
134	static bytea bytea_catenate(bytea t1, bytea *t2);
135	static bytea *bytea_substring(Datum str,
136	int S,
137	int L,
138	bool length_not_specified);
139	static bytea bytea_overlay(bytea t1, bytea t2, int* sp, int sl);
140	static void appendStringInfoText(StringInfo str, const text *t);
141	static Datum text_to_array_internal(PG_FUNCTION_ARGS);
142	static text array_to_text_internal(FunctionCallInfo fcinfo, ArrayType v,
143	const char fldsep, const* char *null_string);
144	static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
145	static bool text_format_parse_digits(const char *ptr, const* char *end_ptr,
146	int *value);
147	static const char text_format_parse_format(const* char *start_ptr,
148	const char *end_ptr,
149	int argpos, int* *widthpos,
150	int flags, int* *width);
151	static void text_format_string_conversion(StringInfo buf, char conversion,
152	FmgrInfo *typOutputInfo,
153	Datum value, bool isNull,
154	int flags, int width);
155	static void text_format_append_string(StringInfo buf, const char *str,
156	int flags, int width);
157
158
159	/*****************************************************************************
160	* CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
161	*****************************************************************************/
162
163	/*
164	* cstring_to_text
165	*
166	* Create a text value from a null-terminated C string.
167	*
168	* The new text value is freshly palloc'd with a full-size VARHDR.
169	*/
170	text *
171	cstring_to_text(const char *s)
172	{
173	return cstring_to_text_with_len(s, strlen(s));
174	}
175
176	/*
177	* cstring_to_text_with_len
178	*
179	* Same as cstring_to_text except the caller specifies the string length;
180	* the string need not be null_terminated.
181	*/
182	text *
183	cstring_to_text_with_len(const char s, int* len)
184	{
185	text result = (text ) palloc(len + VARHDRSZ);
186
187	SET_VARSIZE(result, len + VARHDRSZ);
188	memcpy(VARDATA(result), s, len);
189
190	return result;
191	}
192
193	/*
194	* text_to_cstring
195	*
196	* Create a palloc'd, null-terminated C string from a text value.
197	*
198	* We support being passed a compressed or toasted text value.
199	* This is a bit bogus since such values shouldn't really be referred to as
200	* "text *", but it seems useful for robustness. If we didn't handle that
201	* case here, we'd need another routine that did, anyway.
202	*/
203	char *
204	text_to_cstring(const text *t)
205	{
206	/ must cast away the const, unfortunately /
207	text tunpacked = pg_detoast_datum_packed(unconstify(text , t));
208	int len = VARSIZE_ANY_EXHDR(tunpacked);
209	char *result;
210
211	result = (char *) palloc(len + `1`);
212	memcpy(result, VARDATA_ANY(tunpacked), len);
213	result[len] = `'\0'`;
214
215	if (tunpacked != t)
216	pfree(tunpacked);
217
218	return result;
219	}
220
221	/*
222	* text_to_cstring_buffer
223	*
224	* Copy a text value into a caller-supplied buffer of size dst_len.
225	*
226	* The text string is truncated if necessary to fit. The result is
227	* guaranteed null-terminated (unless dst_len == 0).
228	*
229	* We support being passed a compressed or toasted text value.
230	* This is a bit bogus since such values shouldn't really be referred to as
231	* "text *", but it seems useful for robustness. If we didn't handle that
232	* case here, we'd need another routine that did, anyway.
233	*/
234	void
235	text_to_cstring_buffer(const text src, char* *dst, size_t dst_len)
236	{
237	/ must cast away the const, unfortunately /
238	text srcunpacked = pg_detoast_datum_packed(unconstify(text , src));
239	size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
240
241	if (dst_len > `0`)
242	{
243	dst_len--;
244	if (dst_len >= src_len)
245	dst_len = src_len;
246	else / ensure truncation is encoding-safe /
247	dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
248	memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
249	dst[dst_len] = `'\0'`;
250	}
251
252	if (srcunpacked != src)
253	pfree(srcunpacked);
254	}
255
256
257	/*****************************************************************************
258	* USER I/O ROUTINES *
259	*****************************************************************************/
260
261
262	#define VAL(CH) ((CH) - '0')
263	#define DIG(VAL) ((VAL) + '0')
264
265	/*
266	* byteain - converts from printable representation of byte array
267	*
268	* Non-printable characters must be passed as '\nnn' (octal) and are
269	* converted to internal form. '\' must be passed as '\\'.
270	* ereport(ERROR, ...) if bad form.
271	*
272	* BUGS:
273	* The input is scanned twice.
274	* The error checking of input is minimal.
275	*/
276	Datum
277	byteain(PG_FUNCTION_ARGS)
278	{
279	char *inputText = PG_GETARG_CSTRING(`0`);
280	char *tp;
281	char *rp;
282	int bc;
283	bytea *result;
284
285	/ Recognize hex input /
286	if (inputText[`0`] == `'\\'` && inputText[`1`] == `'x'`)
287	{
288	size_t len = strlen(inputText);
289
290	bc = (len - `2`) / `2` + VARHDRSZ; / maximum possible length /
291	result = palloc(bc);
292	bc = hex_decode(inputText + `2`, len - `2`, VARDATA(result));
293	SET_VARSIZE(result, bc + VARHDRSZ); / actual length /
294
295	PG_RETURN_BYTEA_P(result);
296	}
297
298	/ Else, it's the traditional escaped style /
299	for (bc = `0`, tp = inputText; *tp != `'\0'`; bc++)
300	{
301	if (tp[`0`] != `'\\'`)
302	tp++;
303	else if ((tp[`0`] == `'\\'`) &&
304	(tp[`1`] >= `'0'` && tp[`1`] <= `'3'`) &&
305	(tp[`2`] >= `'0'` && tp[`2`] <= `'7'`) &&
306	(tp[`3`] >= `'0'` && tp[`3`] <= `'7'`))
307	tp += `4`;
308	else if ((tp[`0`] == `'\\'`) &&
309	(tp[`1`] == `'\\'`))
310	tp += `2`;
311	else
312	{
313	/*
314	* one backslash, not followed by another or ### valid octal
315	*/
316	ereport(ERROR,
317	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
318	errmsg("invalid input syntax for type %s", "bytea")));
319	}
320	}
321
322	bc += VARHDRSZ;
323
324	result = (bytea *) palloc(bc);
325	SET_VARSIZE(result, bc);
326
327	tp = inputText;
328	rp = VARDATA(result);
329	while (*tp != `'\0'`)
330	{
331	if (tp[`0`] != `'\\'`)
332	rp++ = tp++;
333	else if ((tp[`0`] == `'\\'`) &&
334	(tp[`1`] >= `'0'` && tp[`1`] <= `'3'`) &&
335	(tp[`2`] >= `'0'` && tp[`2`] <= `'7'`) &&
336	(tp[`3`] >= `'0'` && tp[`3`] <= `'7'`))
337	{
338	bc = VAL(tp[`1`]);
339	bc <<= `3`;
340	bc += VAL(tp[`2`]);
341	bc <<= `3`;
342	*rp++ = bc + VAL(tp[`3`]);
343
344	tp += `4`;
345	}
346	else if ((tp[`0`] == `'\\'`) &&
347	(tp[`1`] == `'\\'`))
348	{
349	*rp++ = `'\\'`;
350	tp += `2`;
351	}
352	else
353	{
354	/*
355	* We should never get here. The first pass should not allow it.
356	*/
357	ereport(ERROR,
358	(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
359	errmsg("invalid input syntax for type %s", "bytea")));
360	}
361	}
362
363	PG_RETURN_BYTEA_P(result);
364	}
365
366	/*
367	* byteaout - converts to printable representation of byte array
368	*
369	* In the traditional escaped format, non-printable characters are
370	* printed as '\nnn' (octal) and '\' as '\\'.
371	*/
372	Datum
373	byteaout(PG_FUNCTION_ARGS)
374	{
375	bytea *vlena = PG_GETARG_BYTEA_PP(`0`);
376	char *result;
377	char *rp;
378
379	if (bytea_output == BYTEA_OUTPUT_HEX)
380	{
381	/ Print hex format /
382	rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * `2` + `2` + `1`);
383	*rp++ = `'\\'`;
384	*rp++ = `'x'`;
385	rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
386	}
387	else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
388	{
389	/ Print traditional escaped format /
390	char *vp;
391	int len;
392	int i;
393
394	len = `1`; / empty string has 1 char /
395	vp = VARDATA_ANY(vlena);
396	for (i = VARSIZE_ANY_EXHDR(vlena); i != `0`; i--, vp++)
397	{
398	if (*vp == `'\\'`)
399	len += `2`;
400	else if ((unsigned char) vp < `0x20` \|\| (unsigned* char) *vp > `0x7e`)
401	len += `4`;
402	else
403	len++;
404	}
405	rp = result = (char *) palloc(len);
406	vp = VARDATA_ANY(vlena);
407	for (i = VARSIZE_ANY_EXHDR(vlena); i != `0`; i--, vp++)
408	{
409	if (*vp == `'\\'`)
410	{
411	*rp++ = `'\\'`;
412	*rp++ = `'\\'`;
413	}
414	else if ((unsigned char) vp < `0x20` \|\| (unsigned* char) *vp > `0x7e`)
415	{
416	int val; / holds unprintable chars /
417
418	val = *vp;
419	rp[`0`] = `'\\'`;
420	rp[`3`] = DIG(val & `07`);
421	val >>= `3`;
422	rp[`2`] = DIG(val & `07`);
423	val >>= `3`;
424	rp[`1`] = DIG(val & `03`);
425	rp += `4`;
426	}
427	else
428	rp++ = vp;
429	}
430	}
431	else
432	{
433	elog(ERROR, "unrecognized bytea_output setting: %d",
434	bytea_output);
435	rp = result = NULL; / keep compiler quiet /
436	}
437	*rp = `'\0'`;
438	PG_RETURN_CSTRING(result);
439	}
440
441	/*
442	* bytearecv - converts external binary format to bytea
443	*/
444	Datum
445	bytearecv(PG_FUNCTION_ARGS)
446	{
447	StringInfo buf = (StringInfo) PG_GETARG_POINTER(`0`);
448	bytea *result;
449	int nbytes;
450
451	nbytes = buf->len - buf->cursor;
452	result = (bytea *) palloc(nbytes + VARHDRSZ);
453	SET_VARSIZE(result, nbytes + VARHDRSZ);
454	pq_copymsgbytes(buf, VARDATA(result), nbytes);
455	PG_RETURN_BYTEA_P(result);
456	}
457
458	/*
459	* byteasend - converts bytea to binary format
460	*
461	* This is a special case: just copy the input...
462	*/
463	Datum
464	byteasend(PG_FUNCTION_ARGS)
465	{
466	bytea *vlena = PG_GETARG_BYTEA_P_COPY(`0`);
467
468	PG_RETURN_BYTEA_P(vlena);
469	}
470
471	Datum
472	bytea_string_agg_transfn(PG_FUNCTION_ARGS)
473	{
474	StringInfo state;
475
476	state = PG_ARGISNULL(`0`) ? NULL : (StringInfo) PG_GETARG_POINTER(`0`);
477
478	/ Append the value unless null. /
479	if (!PG_ARGISNULL(`1`))
480	{
481	bytea *value = PG_GETARG_BYTEA_PP(`1`);
482
483	/ On the first time through, we ignore the delimiter. /
484	if (state == NULL)
485	state = makeStringAggState(fcinfo);
486	else if (!PG_ARGISNULL(`2`))
487	{
488	bytea *delim = PG_GETARG_BYTEA_PP(`2`);
489
490	appendBinaryStringInfo(state, VARDATA_ANY(delim), VARSIZE_ANY_EXHDR(delim));
491	}
492
493	appendBinaryStringInfo(state, VARDATA_ANY(value), VARSIZE_ANY_EXHDR(value));
494	}
495
496	/*
497	* The transition type for string_agg() is declared to be "internal",
498	* which is a pass-by-value type the same size as a pointer.
499	*/
500	PG_RETURN_POINTER(state);
501	}
502
503	Datum
504	bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
505	{
506	StringInfo state;
507
508	/ cannot be called directly because of internal-type argument /
509	Assert(AggCheckCallContext(fcinfo, NULL));
510
511	state = PG_ARGISNULL(`0`) ? NULL : (StringInfo) PG_GETARG_POINTER(`0`);
512
513	if (state != NULL)
514	{
515	bytea *result;
516
517	result = (bytea *) palloc(state->len + VARHDRSZ);
518	SET_VARSIZE(result, state->len + VARHDRSZ);
519	memcpy(VARDATA(result), state->data, state->len);
520	PG_RETURN_BYTEA_P(result);
521	}
522	else
523	PG_RETURN_NULL();
524	}
525
526	/*
527	* textin - converts "..." to internal representation
528	*/
529	Datum
530	textin(PG_FUNCTION_ARGS)
531	{
532	char *inputText = PG_GETARG_CSTRING(`0`);
533
534	PG_RETURN_TEXT_P(cstring_to_text(inputText));
535	}
536
537	/*
538	* textout - converts internal representation to "..."
539	*/
540	Datum
541	textout(PG_FUNCTION_ARGS)
542	{
543	Datum txt = PG_GETARG_DATUM(`0`);
544
545	PG_RETURN_CSTRING(TextDatumGetCString(txt));
546	}
547
548	/*
549	* textrecv - converts external binary format to text
550	*/
551	Datum
552	textrecv(PG_FUNCTION_ARGS)
553	{
554	StringInfo buf = (StringInfo) PG_GETARG_POINTER(`0`);
555	text *result;
556	char *str;
557	int nbytes;
558
559	str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
560
561	result = cstring_to_text_with_len(str, nbytes);
562	pfree(str);
563	PG_RETURN_TEXT_P(result);
564	}
565
566	/*
567	* textsend - converts text to binary format
568	*/
569	Datum
570	textsend(PG_FUNCTION_ARGS)
571	{
572	text *t = PG_GETARG_TEXT_PP(`0`);
573	StringInfoData buf;
574
575	pq_begintypsend(&buf);
576	pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
577	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
578	}
579
580
581	/*
582	* unknownin - converts "..." to internal representation
583	*/
584	Datum
585	unknownin(PG_FUNCTION_ARGS)
586	{
587	char *str = PG_GETARG_CSTRING(`0`);
588
589	/ representation is same as cstring /
590	PG_RETURN_CSTRING(pstrdup(str));
591	}
592
593	/*
594	* unknownout - converts internal representation to "..."
595	*/
596	Datum
597	unknownout(PG_FUNCTION_ARGS)
598	{
599	/ representation is same as cstring /
600	char *str = PG_GETARG_CSTRING(`0`);
601
602	PG_RETURN_CSTRING(pstrdup(str));
603	}
604
605	/*
606	* unknownrecv - converts external binary format to unknown
607	*/
608	Datum
609	unknownrecv(PG_FUNCTION_ARGS)
610	{
611	StringInfo buf = (StringInfo) PG_GETARG_POINTER(`0`);
612	char *str;
613	int nbytes;
614
615	str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
616	/ representation is same as cstring /
617	PG_RETURN_CSTRING(str);
618	}
619
620	/*
621	* unknownsend - converts unknown to binary format
622	*/
623	Datum
624	unknownsend(PG_FUNCTION_ARGS)
625	{
626	/ representation is same as cstring /
627	char *str = PG_GETARG_CSTRING(`0`);
628	StringInfoData buf;
629
630	pq_begintypsend(&buf);
631	pq_sendtext(&buf, str, strlen(str));
632	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
633	}
634
635
636	/ ========== PUBLIC ROUTINES ========== /
637
638	/*
639	* textlen -
640	* returns the logical length of a text*
641	* (which is less than the VARSIZE of the text*)
642	*/
643	Datum
644	textlen(PG_FUNCTION_ARGS)
645	{
646	Datum str = PG_GETARG_DATUM(`0`);
647
648	/ try to avoid decompressing argument /
649	PG_RETURN_INT32(text_length(str));
650	}
651
652	/*
653	* text_length -
654	* Does the real work for textlen()
655	*
656	* This is broken out so it can be called directly by other string processing
657	* functions. Note that the argument is passed as a Datum, to indicate that
658	* it may still be in compressed form. We can avoid decompressing it at all
659	* in some cases.
660	*/
661	static int32
662	text_length(Datum str)
663	{
664	/ fastpath when max encoding length is one /
665	if (pg_database_encoding_max_length() == `1`)
666	PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
667	else
668	{
669	text *t = DatumGetTextPP(str);
670
671	PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t),
672	VARSIZE_ANY_EXHDR(t)));
673	}
674	}
675
676	/*
677	* textoctetlen -
678	* returns the physical length of a text*
679	* (which is less than the VARSIZE of the text*)
680	*/
681	Datum
682	textoctetlen(PG_FUNCTION_ARGS)
683	{
684	Datum str = PG_GETARG_DATUM(`0`);
685
686	/ We need not detoast the input at all /
687	PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
688	}
689
690	/*
691	* textcat -
692	* takes two text* and returns a text* that is the concatenation of
693	* the two.
694	*
695	* Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
696	* Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
697	* Allocate space for output in all cases.
698	* XXX - thomas 1997-07-10
699	*/
700	Datum
701	textcat(PG_FUNCTION_ARGS)
702	{
703	text *t1 = PG_GETARG_TEXT_PP(`0`);
704	text *t2 = PG_GETARG_TEXT_PP(`1`);
705
706	PG_RETURN_TEXT_P(text_catenate(t1, t2));
707	}
708
709	/*
710	* text_catenate
711	* Guts of textcat(), broken out so it can be used by other functions
712	*
713	* Arguments can be in short-header form, but not compressed or out-of-line
714	*/
715	static text *
716	text_catenate(text t1, text t2)
717	{
718	text *result;
719	int len1,
720	len2,
721	len;
722	char *ptr;
723
724	len1 = VARSIZE_ANY_EXHDR(t1);
725	len2 = VARSIZE_ANY_EXHDR(t2);
726
727	/ paranoia ... probably should throw error instead? /
728	if (len1 < `0`)
729	len1 = `0`;
730	if (len2 < `0`)
731	len2 = `0`;
732
733	len = len1 + len2 + VARHDRSZ;
734	result = (text *) palloc(len);
735
736	/ Set size of result string... /
737	SET_VARSIZE(result, len);
738
739	/ Fill data field of result string... /
740	ptr = VARDATA(result);
741	if (len1 > `0`)
742	memcpy(ptr, VARDATA_ANY(t1), len1);
743	if (len2 > `0`)
744	memcpy(ptr + len1, VARDATA_ANY(t2), len2);
745
746	return result;
747	}
748
749	/*
750	* charlen_to_bytelen()
751	* Compute the number of bytes occupied by n characters starting at *p
752	*
753	* It is caller's responsibility that there actually are n characters;
754	* the string need not be null-terminated.
755	*/
756	static int
757	charlen_to_bytelen(const char p, int* n)
758	{
759	if (pg_database_encoding_max_length() == `1`)
760	{
761	/ Optimization for single-byte encodings /
762	return n;
763	}
764	else
765	{
766	const char *s;
767
768	for (s = p; n > `0`; n--)
769	s += pg_mblen(s);
770
771	return s - p;
772	}
773	}
774
775	/*
776	* text_substr()
777	* Return a substring starting at the specified position.
778	* - thomas 1997-12-31
779	*
780	* Input:
781	* - string
782	* - starting position (is one-based)
783	* - string length
784	*
785	* If the starting position is zero or less, then return from the start of the string
786	* adjusting the length to be consistent with the "negative start" per SQL.
787	* If the length is less than zero, return the remaining string.
788	*
789	* Added multibyte support.
790	* - Tatsuo Ishii 1998-4-21
791	* Changed behavior if starting position is less than one to conform to SQL behavior.
792	* Formerly returned the entire string; now returns a portion.
793	* - Thomas Lockhart 1998-12-10
794	* Now uses faster TOAST-slicing interface
795	* - John Gray 2002-02-22
796	* Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
797	* behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
798	* error; if E < 1, return '', not entire string). Fixed MB related bug when
799	* S > LC and < LC + 4 sometimes garbage characters are returned.
800	* - Joe Conway 2002-08-10
801	*/
802	Datum
803	text_substr(PG_FUNCTION_ARGS)
804	{
805	PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(`0`),
806	PG_GETARG_INT32(`1`),
807	PG_GETARG_INT32(`2`),
808	false));
809	}
810
811	/*
812	* text_substr_no_len -
813	* Wrapper to avoid opr_sanity failure due to
814	* one function accepting a different number of args.
815	*/
816	Datum
817	text_substr_no_len(PG_FUNCTION_ARGS)
818	{
819	PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(`0`),
820	PG_GETARG_INT32(`1`),
821	-`1`, true));
822	}
823
824	/*
825	* text_substring -
826	* Does the real work for text_substr() and text_substr_no_len()
827	*
828	* This is broken out so it can be called directly by other string processing
829	* functions. Note that the argument is passed as a Datum, to indicate that
830	* it may still be in compressed/toasted form. We can avoid detoasting all
831	* of it in some cases.
832	*
833	* The result is always a freshly palloc'd datum.
834	*/
835	static text *
836	text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
837	{
838	int32 eml = pg_database_encoding_max_length();
839	int32 S = start; / start position /
840	int32 S1; / adjusted start position /
841	int32 L1; / adjusted substring length /
842
843	/ life is easy if the encoding max length is 1 /
844	if (eml == `1`)
845	{
846	S1 = Max(S, `1`);
847
848	if (length_not_specified) / special case - get length to end of*
849	* string */
850	L1 = -`1`;
851	else
852	{
853	/ end position /
854	int E = S + length;
855
856	/*
857	* A negative value for L is the only way for the end position to
858	* be before the start. SQL99 says to throw an error.
859	*/
860	if (E < S)
861	ereport(ERROR,
862	(errcode(ERRCODE_SUBSTRING_ERROR),
863	errmsg("negative substring length not allowed")));
864
865	/*
866	* A zero or negative value for the end position can happen if the
867	* start was negative or one. SQL99 says to return a zero-length
868	* string.
869	*/
870	if (E < `1`)
871	return cstring_to_text("");
872
873	L1 = E - S1;
874	}
875
876	/*
877	* If the start position is past the end of the string, SQL99 says to
878	* return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
879	* that for us. Convert to zero-based starting position
880	*/
881	return DatumGetTextPSlice(str, S1 - `1`, L1);
882	}
883	else if (eml > `1`)
884	{
885	/*
886	* When encoding max length is > 1, we can't get LC without
887	* detoasting, so we'll grab a conservatively large slice now and go
888	* back later to do the right thing
889	*/
890	int32 slice_start;
891	int32 slice_size;
892	int32 slice_strlen;
893	text *slice;
894	int32 E1;
895	int32 i;
896	char *p;
897	char *s;
898	text *ret;
899
900	/*
901	* if S is past the end of the string, the tuple toaster will return a
902	* zero-length string to us
903	*/
904	S1 = Max(S, `1`);
905
906	/*
907	* We need to start at position zero because there is no way to know
908	* in advance which byte offset corresponds to the supplied start
909	* position.
910	*/
911	slice_start = `0`;
912
913	if (length_not_specified) / special case - get length to end of*
914	* string */
915	slice_size = L1 = -`1`;
916	else
917	{
918	int E = S + length;
919
920	/*
921	* A negative value for L is the only way for the end position to
922	* be before the start. SQL99 says to throw an error.
923	*/
924	if (E < S)
925	ereport(ERROR,
926	(errcode(ERRCODE_SUBSTRING_ERROR),
927	errmsg("negative substring length not allowed")));
928
929	/*
930	* A zero or negative value for the end position can happen if the
931	* start was negative or one. SQL99 says to return a zero-length
932	* string.
933	*/
934	if (E < `1`)
935	return cstring_to_text("");
936
937	/*
938	* if E is past the end of the string, the tuple toaster will
939	* truncate the length for us
940	*/
941	L1 = E - S1;
942
943	/*
944	* Total slice size in bytes can't be any longer than the start
945	* position plus substring length times the encoding max length.
946	*/
947	slice_size = (S1 + L1) * eml;
948	}
949
950	/*
951	* If we're working with an untoasted source, no need to do an extra
952	* copying step.
953	*/
954	if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) \|\|
955	VARATT_IS_EXTERNAL(DatumGetPointer(str)))
956	slice = DatumGetTextPSlice(str, slice_start, slice_size);
957	else
958	slice = (text *) DatumGetPointer(str);
959
960	/ see if we got back an empty string /
961	if (VARSIZE_ANY_EXHDR(slice) == `0`)
962	{
963	if (slice != (text *) DatumGetPointer(str))
964	pfree(slice);
965	return cstring_to_text("");
966	}
967
968	/ Now we can get the actual length of the slice in MB characters /
969	slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
970	VARSIZE_ANY_EXHDR(slice));
971
972	/*
973	* Check that the start position wasn't > slice_strlen. If so, SQL99
974	* says to return a zero-length string.
975	*/
976	if (S1 > slice_strlen)
977	{
978	if (slice != (text *) DatumGetPointer(str))
979	pfree(slice);
980	return cstring_to_text("");
981	}
982
983	/*
984	* Adjust L1 and E1 now that we know the slice string length. Again
985	* remember that S1 is one based, and slice_start is zero based.
986	*/
987	if (L1 > -`1`)
988	E1 = Min(S1 + L1, slice_start + `1` + slice_strlen);
989	else
990	E1 = slice_start + `1` + slice_strlen;
991
992	/*
993	* Find the start position in the slice; remember S1 is not zero based
994	*/
995	p = VARDATA_ANY(slice);
996	for (i = `0`; i < S1 - `1`; i++)
997	p += pg_mblen(p);
998
999	/ hang onto a pointer to our start position /
1000	s = p;
1001
1002	/*
1003	* Count the actual bytes used by the substring of the requested
1004	* length.
1005	*/
1006	for (i = S1; i < E1; i++)
1007	p += pg_mblen(p);
1008
1009	ret = (text *) palloc(VARHDRSZ + (p - s));
1010	SET_VARSIZE(ret, VARHDRSZ + (p - s));
1011	memcpy(VARDATA(ret), s, (p - s));
1012
1013	if (slice != (text *) DatumGetPointer(str))
1014	pfree(slice);
1015
1016	return ret;
1017	}
1018	else
1019	elog(ERROR, "invalid backend encoding: encoding max length < 1");
1020
1021	/ not reached: suppress compiler warning /
1022	return NULL;
1023	}
1024
1025	/*
1026	* textoverlay
1027	* Replace specified substring of first string with second
1028	*
1029	* The SQL standard defines OVERLAY() in terms of substring and concatenation.
1030	* This code is a direct implementation of what the standard says.
1031	*/
1032	Datum
1033	textoverlay(PG_FUNCTION_ARGS)
1034	{
1035	text *t1 = PG_GETARG_TEXT_PP(`0`);
1036	text *t2 = PG_GETARG_TEXT_PP(`1`);
1037	int sp = PG_GETARG_INT32(`2`); / substring start position /
1038	int sl = PG_GETARG_INT32(`3`); / substring length /
1039
1040	PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1041	}
1042
1043	Datum
1044	textoverlay_no_len(PG_FUNCTION_ARGS)
1045	{
1046	text *t1 = PG_GETARG_TEXT_PP(`0`);
1047	text *t2 = PG_GETARG_TEXT_PP(`1`);
1048	int sp = PG_GETARG_INT32(`2`); / substring start position /
1049	int sl;
1050
1051	sl = text_length(PointerGetDatum(t2)); / defaults to length(t2) /
1052	PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
1053	}
1054
1055	static text *
1056	text_overlay(text t1, text t2, int sp, int sl)
1057	{
1058	text *result;
1059	text *s1;
1060	text *s2;
1061	int sp_pl_sl;
1062
1063	/*
1064	* Check for possible integer-overflow cases. For negative sp, throw a
1065	* "substring length" error because that's what should be expected
1066	* according to the spec's definition of OVERLAY().
1067	*/
1068	if (sp <= `0`)
1069	ereport(ERROR,
1070	(errcode(ERRCODE_SUBSTRING_ERROR),
1071	errmsg("negative substring length not allowed")));
1072	if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
1073	ereport(ERROR,
1074	(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
1075	errmsg("integer out of range")));
1076
1077	s1 = text_substring(PointerGetDatum(t1), `1`, sp - `1`, false);
1078	s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -`1`, true);
1079	result = text_catenate(s1, t2);
1080	result = text_catenate(result, s2);
1081
1082	return result;
1083	}
1084
1085	/*
1086	* textpos -
1087	* Return the position of the specified substring.
1088	* Implements the SQL POSITION() function.
1089	* Ref: A Guide To The SQL Standard, Date & Darwen, 1997
1090	* - thomas 1997-07-27
1091	*/
1092	Datum
1093	textpos(PG_FUNCTION_ARGS)
1094	{
1095	text *str = PG_GETARG_TEXT_PP(`0`);
1096	text *search_str = PG_GETARG_TEXT_PP(`1`);
1097
1098	PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION()));
1099	}
1100
1101	/*
1102	* text_position -
1103	* Does the real work for textpos()
1104	*
1105	* Inputs:
1106	* t1 - string to be searched
1107	* t2 - pattern to match within t1
1108	* Result:
1109	* Character index of the first matched char, starting from 1,
1110	* or 0 if no match.
1111	*
1112	* This is broken out so it can be called directly by other string processing
1113	* functions.
1114	*/
1115	static int
1116	text_position(text t1, text t2, Oid collid)
1117	{
1118	TextPositionState state;
1119	int result;
1120
1121	if (VARSIZE_ANY_EXHDR(t1) < `1` \|\| VARSIZE_ANY_EXHDR(t2) < `1`)
1122	return `0`;
1123
1124	text_position_setup(t1, t2, collid, &state);
1125	if (!text_position_next(&state))
1126	result = `0`;
1127	else
1128	result = text_position_get_match_pos(&state);
1129	text_position_cleanup(&state);
1130	return result;
1131	}
1132
1133
1134	/*
1135	* text_position_setup, text_position_next, text_position_cleanup -
1136	* Component steps of text_position()
1137	*
1138	* These are broken out so that a string can be efficiently searched for
1139	* multiple occurrences of the same pattern. text_position_next may be
1140	* called multiple times, and it advances to the next match on each call.
1141	* text_position_get_match_ptr() and text_position_get_match_pos() return
1142	* a pointer or 1-based character position of the last match, respectively.
1143	*
1144	* The "state" variable is normally just a local variable in the caller.
1145	*
1146	* NOTE: text_position_next skips over the matched portion. For example,
1147	* searching for "xx" in "xxx" returns only one match, not two.
1148	*/
1149
1150	static void
1151	text_position_setup(text t1, text t2, Oid collid, TextPositionState *state)
1152	{
1153	int len1 = VARSIZE_ANY_EXHDR(t1);
1154	int len2 = VARSIZE_ANY_EXHDR(t2);
1155	pg_locale_t mylocale = `0`;
1156
1157	check_collation_set(collid);
1158
1159	if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
1160	mylocale = pg_newlocale_from_collation(collid);
1161
1162	if (mylocale && !mylocale->deterministic)
1163	ereport(ERROR,
1164	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1165	errmsg("nondeterministic collations are not supported for substring searches")));
1166
1167	Assert(len1 > `0`);
1168	Assert(len2 > `0`);
1169
1170	/*
1171	* Even with a multi-byte encoding, we perform the search using the raw
1172	* byte sequence, ignoring multibyte issues. For UTF-8, that works fine,
1173	* because in UTF-8 the byte sequence of one character cannot contain
1174	* another character. For other multi-byte encodings, we do the search
1175	* initially as a simple byte search, ignoring multibyte issues, but
1176	* verify afterwards that the match we found is at a character boundary,
1177	* and continue the search if it was a false match.
1178	*/
1179	if (pg_database_encoding_max_length() == `1`)
1180	{
1181	state->is_multibyte = false;
1182	state->is_multibyte_char_in_char = false;
1183	}
1184	else if (GetDatabaseEncoding() == PG_UTF8)
1185	{
1186	state->is_multibyte = true;
1187	state->is_multibyte_char_in_char = false;
1188	}
1189	else
1190	{
1191	state->is_multibyte = true;
1192	state->is_multibyte_char_in_char = true;
1193	}
1194
1195	state->str1 = VARDATA_ANY(t1);
1196	state->str2 = VARDATA_ANY(t2);
1197	state->len1 = len1;
1198	state->len2 = len2;
1199	state->last_match = NULL;
1200	state->refpoint = state->str1;
1201	state->refpos = `0`;
1202
1203	/*
1204	* Prepare the skip table for Boyer-Moore-Horspool searching. In these
1205	* notes we use the terminology that the "haystack" is the string to be
1206	* searched (t1) and the "needle" is the pattern being sought (t2).
1207	*
1208	* If the needle is empty or bigger than the haystack then there is no
1209	* point in wasting cycles initializing the table. We also choose not to
1210	* use B-M-H for needles of length 1, since the skip table can't possibly
1211	* save anything in that case.
1212	*/
1213	if (len1 >= len2 && len2 > `1`)
1214	{
1215	int searchlength = len1 - len2;
1216	int skiptablemask;
1217	int last;
1218	int i;
1219	const char *str2 = state->str2;
1220
1221	/*
1222	* First we must determine how much of the skip table to use. The
1223	* declaration of TextPositionState allows up to 256 elements, but for
1224	* short search problems we don't really want to have to initialize so
1225	* many elements --- it would take too long in comparison to the
1226	* actual search time. So we choose a useful skip table size based on
1227	* the haystack length minus the needle length. The closer the needle
1228	* length is to the haystack length the less useful skipping becomes.
1229	*
1230	* Note: since we use bit-masking to select table elements, the skip
1231	* table size MUST be a power of 2, and so the mask must be 2^N-1.
1232	*/
1233	if (searchlength < `16`)
1234	skiptablemask = `3`;
1235	else if (searchlength < `64`)
1236	skiptablemask = `7`;
1237	else if (searchlength < `128`)
1238	skiptablemask = `15`;
1239	else if (searchlength < `512`)
1240	skiptablemask = `31`;
1241	else if (searchlength < `2048`)
1242	skiptablemask = `63`;
1243	else if (searchlength < `4096`)
1244	skiptablemask = `127`;
1245	else
1246	skiptablemask = `255`;
1247	state->skiptablemask = skiptablemask;
1248
1249	/*
1250	* Initialize the skip table. We set all elements to the needle
1251	* length, since this is the correct skip distance for any character
1252	* not found in the needle.
1253	*/
1254	for (i = `0`; i <= skiptablemask; i++)
1255	state->skiptable[i] = len2;
1256
1257	/*
1258	* Now examine the needle. For each character except the last one,
1259	* set the corresponding table element to the appropriate skip
1260	* distance. Note that when two characters share the same skip table
1261	* entry, the one later in the needle must determine the skip
1262	* distance.
1263	*/
1264	last = len2 - `1`;
1265
1266	for (i = `0`; i < last; i++)
1267	state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
1268	}
1269	}
1270
1271	/*
1272	* Advance to the next match, starting from the end of the previous match
1273	* (or the beginning of the string, on first call). Returns true if a match
1274	* is found.
1275	*/
1276	static bool
1277	text_position_next(TextPositionState *state)
1278	{
1279	int needle_len = state->len2;
1280	char *start_ptr;
1281	char *matchptr;
1282
1283	if (needle_len <= `0`)
1284	return false; / result for empty pattern /
1285
1286	/ Start from the point right after the previous match. /
1287	if (state->last_match)
1288	start_ptr = state->last_match + needle_len;
1289	else
1290	start_ptr = state->str1;
1291
1292	retry:
1293	matchptr = text_position_next_internal(start_ptr, state);
1294
1295	if (!matchptr)
1296	return false;
1297
1298	/*
1299	* Found a match for the byte sequence. If this is a multibyte encoding,
1300	* where one character's byte sequence can appear inside a longer
1301	* multi-byte character, we need to verify that the match was at a
1302	* character boundary, not in the middle of a multi-byte character.
1303	*/
1304	if (state->is_multibyte_char_in_char)
1305	{
1306	/ Walk one character at a time, until we reach the match. /
1307
1308	/ the search should never move backwards. /
1309	Assert(state->refpoint <= matchptr);
1310
1311	while (state->refpoint < matchptr)
1312	{
1313	/ step to next character. /
1314	state->refpoint += pg_mblen(state->refpoint);
1315	state->refpos++;
1316
1317	/*
1318	* If we stepped over the match's start position, then it was a
1319	* false positive, where the byte sequence appeared in the middle
1320	* of a multi-byte character. Skip it, and continue the search at
1321	* the next character boundary.
1322	*/
1323	if (state->refpoint > matchptr)
1324	{
1325	start_ptr = state->refpoint;
1326	goto retry;
1327	}
1328	}
1329	}
1330
1331	state->last_match = matchptr;
1332	return true;
1333	}
1334
1335	/*
1336	* Subroutine of text_position_next(). This searches for the raw byte
1337	* sequence, ignoring any multi-byte encoding issues. Returns the first
1338	* match starting at 'start_ptr', or NULL if no match is found.
1339	*/
1340	static char *
1341	text_position_next_internal(char start_ptr, TextPositionState state)
1342	{
1343	int haystack_len = state->len1;
1344	int needle_len = state->len2;
1345	int skiptablemask = state->skiptablemask;
1346	const char *haystack = state->str1;
1347	const char *needle = state->str2;
1348	const char *haystack_end = &haystack[haystack_len];
1349	const char *hptr;
1350
1351	Assert(start_ptr >= haystack && start_ptr <= haystack_end);
1352
1353	if (needle_len == `1`)
1354	{
1355	/ No point in using B-M-H for a one-character needle /
1356	char nchar = *needle;
1357
1358	hptr = start_ptr;
1359	while (hptr < haystack_end)
1360	{
1361	if (*hptr == nchar)
1362	return (char *) hptr;
1363	hptr++;
1364	}
1365	}
1366	else
1367	{
1368	const char *needle_last = &needle[needle_len - `1`];
1369
1370	/ Start at startpos plus the length of the needle /
1371	hptr = start_ptr + needle_len - `1`;
1372	while (hptr < haystack_end)
1373	{
1374	/ Match the needle scanning backward /
1375	const char *nptr;
1376	const char *p;
1377
1378	nptr = needle_last;
1379	p = hptr;
1380	while (nptr == p)
1381	{
1382	/ Matched it all? If so, return 1-based position /
1383	if (nptr == needle)
1384	return (char *) p;
1385	nptr--, p--;
1386	}
1387
1388	/*
1389	* No match, so use the haystack char at hptr to decide how far to
1390	* advance. If the needle had any occurrence of that character
1391	* (or more precisely, one sharing the same skiptable entry)
1392	* before its last character, then we advance far enough to align
1393	* the last such needle character with that haystack position.
1394	* Otherwise we can advance by the whole needle length.
1395	*/
1396	hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1397	}
1398	}
1399
1400	return `0`; / not found /
1401	}
1402
1403	/*
1404	* Return a pointer to the current match.
1405	*
1406	* The returned pointer points into correct position in the original
1407	* the haystack string.
1408	*/
1409	static char *
1410	text_position_get_match_ptr(TextPositionState *state)
1411	{
1412	return state->last_match;
1413	}
1414
1415	/*
1416	* Return the offset of the current match.
1417	*
1418	* The offset is in characters, 1-based.
1419	*/
1420	static int
1421	text_position_get_match_pos(TextPositionState *state)
1422	{
1423	if (!state->is_multibyte)
1424	return state->last_match - state->str1 + `1`;
1425	else
1426	{
1427	/ Convert the byte position to char position. /
1428	while (state->refpoint < state->last_match)
1429	{
1430	state->refpoint += pg_mblen(state->refpoint);
1431	state->refpos++;
1432	}
1433	Assert(state->refpoint == state->last_match);
1434	return state->refpos + `1`;
1435	}
1436	}
1437
1438	static void
1439	text_position_cleanup(TextPositionState *state)
1440	{
1441	/ no cleanup needed /
1442	}
1443
1444	static void
1445	check_collation_set(Oid collid)
1446	{
1447	if (!OidIsValid(collid))
1448	{
1449	/*
1450	* This typically means that the parser could not resolve a conflict
1451	* of implicit collations, so report it that way.
1452	*/
1453	ereport(ERROR,
1454	(errcode(ERRCODE_INDETERMINATE_COLLATION),
1455	errmsg("could not determine which collation to use for string comparison"),
1456	errhint("Use the COLLATE clause to set the collation explicitly.")));
1457	}
1458	}
1459
1460	/ varstr_cmp()*
1461	* Comparison function for text strings with given lengths.
1462	* Includes locale support, but must copy strings to temporary memory
1463	* to allow null-termination for inputs to strcoll().
1464	* Returns an integer less than, equal to, or greater than zero, indicating
1465	* whether arg1 is less than, equal to, or greater than arg2.
1466	*
1467	* Note: many functions that depend on this are marked leakproof; therefore,
1468	* avoid reporting the actual contents of the input when throwing errors.
1469	* All errors herein should be things that can't happen except on corrupt
1470	* data, anyway; otherwise we will have trouble with indexing strings that
1471	* would cause them.
1472	*/
1473	int
1474	varstr_cmp(const char arg1, int* len1, const char arg2, int* len2, Oid collid)
1475	{
1476	int result;
1477
1478	check_collation_set(collid);
1479
1480	/*
1481	* Unfortunately, there is no strncoll(), so in the non-C locale case we
1482	* have to do some memory copying. This turns out to be significantly
1483	* slower, so we optimize the case where LC_COLLATE is C. We also try to
1484	* optimize relatively-short strings by avoiding palloc/pfree overhead.
1485	*/
1486	if (lc_collate_is_c(collid))
1487	{
1488	result = memcmp(arg1, arg2, Min(len1, len2));
1489	if ((result == `0`) && (len1 != len2))
1490	result = (len1 < len2) ? -`1` : `1`;
1491	}
1492	else
1493	{
1494	char a1buf[TEXTBUFLEN];
1495	char a2buf[TEXTBUFLEN];
1496	char *a1p,
1497	*a2p;
1498	pg_locale_t mylocale = `0`;
1499
1500	if (collid != DEFAULT_COLLATION_OID)
1501	mylocale = pg_newlocale_from_collation(collid);
1502
1503	/*
1504	* memcmp() can't tell us which of two unequal strings sorts first,
1505	* but it's a cheap way to tell if they're equal. Testing shows that
1506	* memcmp() followed by strcoll() is only trivially slower than
1507	* strcoll() by itself, so we don't lose much if this doesn't work out
1508	* very often, and if it does - for example, because there are many
1509	* equal strings in the input - then we win big by avoiding expensive
1510	* collation-aware comparisons.
1511	*/
1512	if (len1 == len2 && memcmp(arg1, arg2, len1) == `0`)
1513	return `0`;
1514
1515	#ifdef WIN32
1516	/ Win32 does not have UTF-8, so we need to map to UTF-16 /
1517	if (GetDatabaseEncoding() == PG_UTF8
1518	&& (!mylocale \|\| mylocale->provider == COLLPROVIDER_LIBC))
1519	{
1520	int a1len;
1521	int a2len;
1522	int r;
1523
1524	if (len1 >= TEXTBUFLEN / `2`)
1525	{
1526	a1len = len1 * `2` + `2`;
1527	a1p = palloc(a1len);
1528	}
1529	else
1530	{
1531	a1len = TEXTBUFLEN;
1532	a1p = a1buf;
1533	}
1534	if (len2 >= TEXTBUFLEN / `2`)
1535	{
1536	a2len = len2 * `2` + `2`;
1537	a2p = palloc(a2len);
1538	}
1539	else
1540	{
1541	a2len = TEXTBUFLEN;
1542	a2p = a2buf;
1543	}
1544
1545	/ stupid Microsloth API does not work for zero-length input /
1546	if (len1 == `0`)
1547	r = `0`;
1548	else
1549	{
1550	r = MultiByteToWideChar(CP_UTF8, `0`, arg1, len1,
1551	(LPWSTR) a1p, a1len / `2`);
1552	if (!r)
1553	ereport(ERROR,
1554	(errmsg("could not convert string to UTF-16: error code %lu",
1555	GetLastError())));
1556	}
1557	((LPWSTR) a1p)[r] = `0`;
1558
1559	if (len2 == `0`)
1560	r = `0`;
1561	else
1562	{
1563	r = MultiByteToWideChar(CP_UTF8, `0`, arg2, len2,
1564	(LPWSTR) a2p, a2len / `2`);
1565	if (!r)
1566	ereport(ERROR,
1567	(errmsg("could not convert string to UTF-16: error code %lu",
1568	GetLastError())));
1569	}
1570	((LPWSTR) a2p)[r] = `0`;
1571
1572	errno = `0`;
1573	#ifdef HAVE_LOCALE_T
1574	if (mylocale)
1575	result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale->info.lt);
1576	else
1577	#endif
1578	result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1579	if (result == `2147483647`) / _NLSCMPERROR; missing from mingw*
1580	* headers */
1581	ereport(ERROR,
1582	(errmsg("could not compare Unicode strings: %m")));
1583
1584	/ Break tie if necessary. /
1585	if (result == `0` &&
1586	(!mylocale \|\| mylocale->deterministic))
1587	{
1588	result = memcmp(arg1, arg2, Min(len1, len2));
1589	if ((result == `0`) && (len1 != len2))
1590	result = (len1 < len2) ? -`1` : `1`;
1591	}
1592
1593	if (a1p != a1buf)
1594	pfree(a1p);
1595	if (a2p != a2buf)
1596	pfree(a2p);
1597
1598	return result;
1599	}
1600	#endif /* WIN32 */
1601
1602	if (len1 >= TEXTBUFLEN)
1603	a1p = (char *) palloc(len1 + `1`);
1604	else
1605	a1p = a1buf;
1606	if (len2 >= TEXTBUFLEN)
1607	a2p = (char *) palloc(len2 + `1`);
1608	else
1609	a2p = a2buf;
1610
1611	memcpy(a1p, arg1, len1);
1612	a1p[len1] = `'\0'`;
1613	memcpy(a2p, arg2, len2);
1614	a2p[len2] = `'\0'`;
1615
1616	if (mylocale)
1617	{
1618	if (mylocale->provider == COLLPROVIDER_ICU)
1619	{
1620	#ifdef USE_ICU
1621	#ifdef HAVE_UCOL_STRCOLLUTF8
1622	if (GetDatabaseEncoding() == PG_UTF8)
1623	{
1624	UErrorCode status;
1625
1626	status = U_ZERO_ERROR;
1627	result = ucol_strcollUTF8(mylocale->info.icu.ucol,
1628	arg1, len1,
1629	arg2, len2,
1630	&status);
1631	if (U_FAILURE(status))
1632	ereport(ERROR,
1633	(errmsg("collation failed: %s", u_errorName(status))));
1634	}
1635	else
1636	#endif
1637	{
1638	int32_t ulen1,
1639	ulen2;
1640	UChar *uchar1,
1641	*uchar2;
1642
1643	ulen1 = icu_to_uchar(&uchar1, arg1, len1);
1644	ulen2 = icu_to_uchar(&uchar2, arg2, len2);
1645
1646	result = ucol_strcoll(mylocale->info.icu.ucol,
1647	uchar1, ulen1,
1648	uchar2, ulen2);
1649
1650	pfree(uchar1);
1651	pfree(uchar2);
1652	}
1653	#else /* not USE_ICU */
1654	/ shouldn't happen /
1655	elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1656	#endif /* not USE_ICU */
1657	}
1658	else
1659	{
1660	#ifdef HAVE_LOCALE_T
1661	result = strcoll_l(a1p, a2p, mylocale->info.lt);
1662	#else
1663	/ shouldn't happen /
1664	elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
1665	#endif
1666	}
1667	}
1668	else
1669	result = strcoll(a1p, a2p);
1670
1671	/ Break tie if necessary. /
1672	if (result == `0` &&
1673	(!mylocale \|\| mylocale->deterministic))
1674	result = strcmp(a1p, a2p);
1675
1676	if (a1p != a1buf)
1677	pfree(a1p);
1678	if (a2p != a2buf)
1679	pfree(a2p);
1680	}
1681
1682	return result;
1683	}
1684
1685	/ text_cmp()*
1686	* Internal comparison function for text strings.
1687	* Returns -1, 0 or 1
1688	*/
1689	static int
1690	text_cmp(text arg1, text arg2, Oid collid)
1691	{
1692	char *a1p,
1693	*a2p;
1694	int len1,
1695	len2;
1696
1697	a1p = VARDATA_ANY(arg1);
1698	a2p = VARDATA_ANY(arg2);
1699
1700	len1 = VARSIZE_ANY_EXHDR(arg1);
1701	len2 = VARSIZE_ANY_EXHDR(arg2);
1702
1703	return varstr_cmp(a1p, len1, a2p, len2, collid);
1704	}
1705
1706	/*
1707	* Comparison functions for text strings.
1708	*
1709	* Note: btree indexes need these routines not to leak memory; therefore,
1710	* be careful to free working copies of toasted datums. Most places don't
1711	* need to be so careful.
1712	*/
1713
1714	Datum
1715	texteq(PG_FUNCTION_ARGS)
1716	{
1717	Oid collid = PG_GET_COLLATION();
1718	bool result;
1719
1720	check_collation_set(collid);
1721
1722	if (lc_collate_is_c(collid) \|\|
1723	collid == DEFAULT_COLLATION_OID \|\|
1724	pg_newlocale_from_collation(collid)->deterministic)
1725	{
1726	Datum arg1 = PG_GETARG_DATUM(`0`);
1727	Datum arg2 = PG_GETARG_DATUM(`1`);
1728	Size len1,
1729	len2;
1730
1731	/*
1732	* Since we only care about equality or not-equality, we can avoid all
1733	* the expense of strcoll() here, and just do bitwise comparison. In
1734	* fact, we don't even have to do a bitwise comparison if we can show
1735	* the lengths of the strings are unequal; which might save us from
1736	* having to detoast one or both values.
1737	*/
1738	len1 = toast_raw_datum_size(arg1);
1739	len2 = toast_raw_datum_size(arg2);
1740	if (len1 != len2)
1741	result = false;
1742	else
1743	{
1744	text *targ1 = DatumGetTextPP(arg1);
1745	text *targ2 = DatumGetTextPP(arg2);
1746
1747	result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1748	len1 - VARHDRSZ) == `0`);
1749
1750	PG_FREE_IF_COPY(targ1, `0`);
1751	PG_FREE_IF_COPY(targ2, `1`);
1752	}
1753	}
1754	else
1755	{
1756	text *arg1 = PG_GETARG_TEXT_PP(`0`);
1757	text *arg2 = PG_GETARG_TEXT_PP(`1`);
1758
1759	result = (text_cmp(arg1, arg2, collid) == `0`);
1760
1761	PG_FREE_IF_COPY(arg1, `0`);
1762	PG_FREE_IF_COPY(arg2, `1`);
1763	}
1764
1765	PG_RETURN_BOOL(result);
1766	}
1767
1768	Datum
1769	textne(PG_FUNCTION_ARGS)
1770	{
1771	Oid collid = PG_GET_COLLATION();
1772	bool result;
1773
1774	check_collation_set(collid);
1775
1776	if (lc_collate_is_c(collid) \|\|
1777	collid == DEFAULT_COLLATION_OID \|\|
1778	pg_newlocale_from_collation(collid)->deterministic)
1779	{
1780	Datum arg1 = PG_GETARG_DATUM(`0`);
1781	Datum arg2 = PG_GETARG_DATUM(`1`);
1782	Size len1,
1783	len2;
1784
1785	/ See comment in texteq() /
1786	len1 = toast_raw_datum_size(arg1);
1787	len2 = toast_raw_datum_size(arg2);
1788	if (len1 != len2)
1789	result = true;
1790	else
1791	{
1792	text *targ1 = DatumGetTextPP(arg1);
1793	text *targ2 = DatumGetTextPP(arg2);
1794
1795	result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1796	len1 - VARHDRSZ) != `0`);
1797
1798	PG_FREE_IF_COPY(targ1, `0`);
1799	PG_FREE_IF_COPY(targ2, `1`);
1800	}
1801	}
1802	else
1803	{
1804	text *arg1 = PG_GETARG_TEXT_PP(`0`);
1805	text *arg2 = PG_GETARG_TEXT_PP(`1`);
1806
1807	result = (text_cmp(arg1, arg2, collid) != `0`);
1808
1809	PG_FREE_IF_COPY(arg1, `0`);
1810	PG_FREE_IF_COPY(arg2, `1`);
1811	}
1812
1813	PG_RETURN_BOOL(result);
1814	}
1815
1816	Datum
1817	text_lt(PG_FUNCTION_ARGS)
1818	{
1819	text *arg1 = PG_GETARG_TEXT_PP(`0`);
1820	text *arg2 = PG_GETARG_TEXT_PP(`1`);
1821	bool result;
1822
1823	result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < `0`);
1824
1825	PG_FREE_IF_COPY(arg1, `0`);
1826	PG_FREE_IF_COPY(arg2, `1`);
1827
1828	PG_RETURN_BOOL(result);
1829	}
1830
1831	Datum
1832	text_le(PG_FUNCTION_ARGS)
1833	{
1834	text *arg1 = PG_GETARG_TEXT_PP(`0`);
1835	text *arg2 = PG_GETARG_TEXT_PP(`1`);
1836	bool result;
1837
1838	result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= `0`);
1839
1840	PG_FREE_IF_COPY(arg1, `0`);
1841	PG_FREE_IF_COPY(arg2, `1`);
1842
1843	PG_RETURN_BOOL(result);
1844	}
1845
1846	Datum
1847	text_gt(PG_FUNCTION_ARGS)
1848	{
1849	text *arg1 = PG_GETARG_TEXT_PP(`0`);
1850	text *arg2 = PG_GETARG_TEXT_PP(`1`);
1851	bool result;
1852
1853	result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > `0`);
1854
1855	PG_FREE_IF_COPY(arg1, `0`);
1856	PG_FREE_IF_COPY(arg2, `1`);
1857
1858	PG_RETURN_BOOL(result);
1859	}
1860
1861	Datum
1862	text_ge(PG_FUNCTION_ARGS)
1863	{
1864	text *arg1 = PG_GETARG_TEXT_PP(`0`);
1865	text *arg2 = PG_GETARG_TEXT_PP(`1`);
1866	bool result;
1867
1868	result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= `0`);
1869
1870	PG_FREE_IF_COPY(arg1, `0`);
1871	PG_FREE_IF_COPY(arg2, `1`);
1872
1873	PG_RETURN_BOOL(result);
1874	}
1875
1876	Datum
1877	text_starts_with(PG_FUNCTION_ARGS)
1878	{
1879	Datum arg1 = PG_GETARG_DATUM(`0`);
1880	Datum arg2 = PG_GETARG_DATUM(`1`);
1881	Oid collid = PG_GET_COLLATION();
1882	pg_locale_t mylocale = `0`;
1883	bool result;
1884	Size len1,
1885	len2;
1886
1887	check_collation_set(collid);
1888
1889	if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
1890	mylocale = pg_newlocale_from_collation(collid);
1891
1892	if (mylocale && !mylocale->deterministic)
1893	ereport(ERROR,
1894	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1895	errmsg("nondeterministic collations are not supported for substring searches")));
1896
1897	len1 = toast_raw_datum_size(arg1);
1898	len2 = toast_raw_datum_size(arg2);
1899	if (len2 > len1)
1900	result = false;
1901	else
1902	{
1903	text *targ1 = text_substring(arg1, `1`, len2, false);
1904	text *targ2 = DatumGetTextPP(arg2);
1905
1906	result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
1907	VARSIZE_ANY_EXHDR(targ2)) == `0`);
1908
1909	PG_FREE_IF_COPY(targ1, `0`);
1910	PG_FREE_IF_COPY(targ2, `1`);
1911	}
1912
1913	PG_RETURN_BOOL(result);
1914	}
1915
1916	Datum
1917	bttextcmp(PG_FUNCTION_ARGS)
1918	{
1919	text *arg1 = PG_GETARG_TEXT_PP(`0`);
1920	text *arg2 = PG_GETARG_TEXT_PP(`1`);
1921	int32 result;
1922
1923	result = text_cmp(arg1, arg2, PG_GET_COLLATION());
1924
1925	PG_FREE_IF_COPY(arg1, `0`);
1926	PG_FREE_IF_COPY(arg2, `1`);
1927
1928	PG_RETURN_INT32(result);
1929	}
1930
1931	Datum
1932	bttextsortsupport(PG_FUNCTION_ARGS)
1933	{
1934	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(`0`);
1935	Oid collid = ssup->ssup_collation;
1936	MemoryContext oldcontext;
1937
1938	oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
1939
1940	/ Use generic string SortSupport /
1941	varstr_sortsupport(ssup, TEXTOID, collid);
1942
1943	MemoryContextSwitchTo(oldcontext);
1944
1945	PG_RETURN_VOID();
1946	}
1947
1948	/*
1949	* Generic sortsupport interface for character type's operator classes.
1950	* Includes locale support, and support for BpChar semantics (i.e. removing
1951	* trailing spaces before comparison).
1952	*
1953	* Relies on the assumption that text, VarChar, BpChar, and bytea all have the
1954	* same representation. Callers that always use the C collation (e.g.
1955	* non-collatable type callers like bytea) may have NUL bytes in their strings;
1956	* this will not work with any other collation, though.
1957	*/
1958	void
1959	varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
1960	{
1961	bool abbreviate = ssup->abbreviate;
1962	bool collate_c = false;
1963	VarStringSortSupport *sss;
1964	pg_locale_t locale = `0`;
1965
1966	check_collation_set(collid);
1967
1968	/*
1969	* If possible, set ssup->comparator to a function which can be used to
1970	* directly compare two datums. If we can do this, we'll avoid the
1971	* overhead of a trip through the fmgr layer for every comparison, which
1972	* can be substantial.
1973	*
1974	* Most typically, we'll set the comparator to varlenafastcmp_locale,
1975	* which uses strcoll() to perform comparisons. We use that for the
1976	* BpChar case too, but type NAME uses namefastcmp_locale. However, if
1977	* LC_COLLATE = C, we can make things quite a bit faster with
1978	* varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use
1979	* memcmp() rather than strcoll().
1980	*/
1981	if (lc_collate_is_c(collid))
1982	{
1983	if (typid == BPCHAROID)
1984	ssup->comparator = bpcharfastcmp_c;
1985	else if (typid == NAMEOID)
1986	{
1987	ssup->comparator = namefastcmp_c;
1988	/ Not supporting abbreviation with type NAME, for now /
1989	abbreviate = false;
1990	}
1991	else
1992	ssup->comparator = varstrfastcmp_c;
1993
1994	collate_c = true;
1995	}
1996	else
1997	{
1998	/*
1999	* We need a collation-sensitive comparison. To make things faster,
2000	* we'll figure out the collation based on the locale id and cache the
2001	* result.
2002	*/
2003	if (collid != DEFAULT_COLLATION_OID)
2004	locale = pg_newlocale_from_collation(collid);
2005
2006	/*
2007	* There is a further exception on Windows. When the database
2008	* encoding is UTF-8 and we are not using the C collation, complex
2009	* hacks are required. We don't currently have a comparator that
2010	* handles that case, so we fall back on the slow method of having the
2011	* sort code invoke bttextcmp() (in the case of text) via the fmgr
2012	* trampoline. ICU locales work just the same on Windows, however.
2013	*/
2014	#ifdef WIN32
2015	if (GetDatabaseEncoding() == PG_UTF8 &&
2016	!(locale && locale->provider == COLLPROVIDER_ICU))
2017	return;
2018	#endif
2019
2020	/*
2021	* We use varlenafastcmp_locale except for type NAME.
2022	*/
2023	if (typid == NAMEOID)
2024	{
2025	ssup->comparator = namefastcmp_locale;
2026	/ Not supporting abbreviation with type NAME, for now /
2027	abbreviate = false;
2028	}
2029	else
2030	ssup->comparator = varlenafastcmp_locale;
2031	}
2032
2033	/*
2034	* Unfortunately, it seems that abbreviation for non-C collations is
2035	* broken on many common platforms; testing of multiple versions of glibc
2036	* reveals that, for many locales, strcoll() and strxfrm() do not return
2037	* consistent results, which is fatal to this optimization. While no
2038	* other libc other than Cygwin has so far been shown to have a problem,
2039	* we take the conservative course of action for right now and disable
2040	* this categorically. (Users who are certain this isn't a problem on
2041	* their system can define TRUST_STRXFRM.)
2042	*
2043	* Even apart from the risk of broken locales, it's possible that there
2044	* are platforms where the use of abbreviated keys should be disabled at
2045	* compile time. Having only 4 byte datums could make worst-case
2046	* performance drastically more likely, for example. Moreover, macOS's
2047	* strxfrm() implementation is known to not effectively concentrate a
2048	* significant amount of entropy from the original string in earlier
2049	* transformed blobs. It's possible that other supported platforms are
2050	* similarly encumbered. So, if we ever get past disabling this
2051	* categorically, we may still want or need to disable it for particular
2052	* platforms.
2053	*/
2054	#ifndef TRUST_STRXFRM
2055	if (!collate_c && !(locale && locale->provider == COLLPROVIDER_ICU))
2056	abbreviate = false;
2057	#endif
2058
2059	/*
2060	* If we're using abbreviated keys, or if we're using a locale-aware
2061	* comparison, we need to initialize a StringSortSupport object. Both
2062	* cases will make use of the temporary buffers we initialize here for
2063	* scratch space (and to detect requirement for BpChar semantics from
2064	* caller), and the abbreviation case requires additional state.
2065	*/
2066	if (abbreviate \|\| !collate_c)
2067	{
2068	sss = palloc(sizeof(VarStringSortSupport));
2069	sss->buf1 = palloc(TEXTBUFLEN);
2070	sss->buflen1 = TEXTBUFLEN;
2071	sss->buf2 = palloc(TEXTBUFLEN);
2072	sss->buflen2 = TEXTBUFLEN;
2073	/ Start with invalid values /
2074	sss->last_len1 = -`1`;
2075	sss->last_len2 = -`1`;
2076	/ Initialize /
2077	sss->last_returned = `0`;
2078	sss->locale = locale;
2079
2080	/*
2081	* To avoid somehow confusing a strxfrm() blob and an original string,
2082	* constantly keep track of the variety of data that buf1 and buf2
2083	* currently contain.
2084	*
2085	* Comparisons may be interleaved with conversion calls. Frequently,
2086	* conversions and comparisons are batched into two distinct phases,
2087	* but the correctness of caching cannot hinge upon this. For
2088	* comparison caching, buffer state is only trusted if cache_blob is
2089	* found set to false, whereas strxfrm() caching only trusts the state
2090	* when cache_blob is found set to true.
2091	*
2092	* Arbitrarily initialize cache_blob to true.
2093	*/
2094	sss->cache_blob = true;
2095	sss->collate_c = collate_c;
2096	sss->typid = typid;
2097	ssup->ssup_extra = sss;
2098
2099	/*
2100	* If possible, plan to use the abbreviated keys optimization. The
2101	* core code may switch back to authoritative comparator should
2102	* abbreviation be aborted.
2103	*/
2104	if (abbreviate)
2105	{
2106	sss->prop_card = `0.20`;
2107	initHyperLogLog(&sss->abbr_card, `10`);
2108	initHyperLogLog(&sss->full_card, `10`);
2109	ssup->abbrev_full_comparator = ssup->comparator;
2110	ssup->comparator = varstrcmp_abbrev;
2111	ssup->abbrev_converter = varstr_abbrev_convert;
2112	ssup->abbrev_abort = varstr_abbrev_abort;
2113	}
2114	}
2115	}
2116
2117	/*
2118	* sortsupport comparison func (for C locale case)
2119	*/
2120	static int
2121	varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
2122	{
2123	VarString *arg1 = DatumGetVarStringPP(x);
2124	VarString *arg2 = DatumGetVarStringPP(y);
2125	char *a1p,
2126	*a2p;
2127	int len1,
2128	len2,
2129	result;
2130
2131	a1p = VARDATA_ANY(arg1);
2132	a2p = VARDATA_ANY(arg2);
2133
2134	len1 = VARSIZE_ANY_EXHDR(arg1);
2135	len2 = VARSIZE_ANY_EXHDR(arg2);
2136
2137	result = memcmp(a1p, a2p, Min(len1, len2));
2138	if ((result == `0`) && (len1 != len2))
2139	result = (len1 < len2) ? -`1` : `1`;
2140
2141	/ We can't afford to leak memory here. /
2142	if (PointerGetDatum(arg1) != x)
2143	pfree(arg1);
2144	if (PointerGetDatum(arg2) != y)
2145	pfree(arg2);
2146
2147	return result;
2148	}
2149
2150	/*
2151	* sortsupport comparison func (for BpChar C locale case)
2152	*
2153	* BpChar outsources its sortsupport to this module. Specialization for the
2154	* varstr_sortsupport BpChar case, modeled on
2155	* internal_bpchar_pattern_compare().
2156	*/
2157	static int
2158	bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
2159	{
2160	BpChar *arg1 = DatumGetBpCharPP(x);
2161	BpChar *arg2 = DatumGetBpCharPP(y);
2162	char *a1p,
2163	*a2p;
2164	int len1,
2165	len2,
2166	result;
2167
2168	a1p = VARDATA_ANY(arg1);
2169	a2p = VARDATA_ANY(arg2);
2170
2171	len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
2172	len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
2173
2174	result = memcmp(a1p, a2p, Min(len1, len2));
2175	if ((result == `0`) && (len1 != len2))
2176	result = (len1 < len2) ? -`1` : `1`;
2177
2178	/ We can't afford to leak memory here. /
2179	if (PointerGetDatum(arg1) != x)
2180	pfree(arg1);
2181	if (PointerGetDatum(arg2) != y)
2182	pfree(arg2);
2183
2184	return result;
2185	}
2186
2187	/*
2188	* sortsupport comparison func (for NAME C locale case)
2189	*/
2190	static int
2191	namefastcmp_c(Datum x, Datum y, SortSupport ssup)
2192	{
2193	Name arg1 = DatumGetName(x);
2194	Name arg2 = DatumGetName(y);
2195
2196	return strncmp(NameStr(arg1), NameStr(arg2), NAMEDATALEN);
2197	}
2198
2199	/*
2200	* sortsupport comparison func (for locale case with all varlena types)
2201	*/
2202	static int
2203	varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup)
2204	{
2205	VarString *arg1 = DatumGetVarStringPP(x);
2206	VarString *arg2 = DatumGetVarStringPP(y);
2207	char *a1p,
2208	*a2p;
2209	int len1,
2210	len2,
2211	result;
2212
2213	a1p = VARDATA_ANY(arg1);
2214	a2p = VARDATA_ANY(arg2);
2215
2216	len1 = VARSIZE_ANY_EXHDR(arg1);
2217	len2 = VARSIZE_ANY_EXHDR(arg2);
2218
2219	result = varstrfastcmp_locale(a1p, len1, a2p, len2, ssup);
2220
2221	/ We can't afford to leak memory here. /
2222	if (PointerGetDatum(arg1) != x)
2223	pfree(arg1);
2224	if (PointerGetDatum(arg2) != y)
2225	pfree(arg2);
2226
2227	return result;
2228	}
2229
2230	/*
2231	* sortsupport comparison func (for locale case with NAME type)
2232	*/
2233	static int
2234	namefastcmp_locale(Datum x, Datum y, SortSupport ssup)
2235	{
2236	Name arg1 = DatumGetName(x);
2237	Name arg2 = DatumGetName(y);
2238
2239	return varstrfastcmp_locale(NameStr(arg1), strlen(NameStr(arg1)),
2240	NameStr(arg2), strlen(NameStr(arg2)),
2241	ssup);
2242	}
2243
2244	/*
2245	* sortsupport comparison func for locale cases
2246	*/
2247	static int
2248	varstrfastcmp_locale(char a1p, int* len1, char a2p, int* len2, SortSupport ssup)
2249	{
2250	VarStringSortSupport sss = (VarStringSortSupport ) ssup->ssup_extra;
2251	int result;
2252	bool arg1_match;
2253
2254	/ Fast pre-check for equality, as discussed in varstr_cmp() /
2255	if (len1 == len2 && memcmp(a1p, a2p, len1) == `0`)
2256	{
2257	/*
2258	* No change in buf1 or buf2 contents, so avoid changing last_len1 or
2259	* last_len2. Existing contents of buffers might still be used by
2260	* next call.
2261	*
2262	* It's fine to allow the comparison of BpChar padding bytes here,
2263	* even though that implies that the memcmp() will usually be
2264	* performed for BpChar callers (though multibyte characters could
2265	* still prevent that from occurring). The memcmp() is still very
2266	* cheap, and BpChar's funny semantics have us remove trailing spaces
2267	* (not limited to padding), so we need make no distinction between
2268	* padding space characters and "real" space characters.
2269	*/
2270	return `0`;
2271	}
2272
2273	if (sss->typid == BPCHAROID)
2274	{
2275	/ Get true number of bytes, ignoring trailing spaces /
2276	len1 = bpchartruelen(a1p, len1);
2277	len2 = bpchartruelen(a2p, len2);
2278	}
2279
2280	if (len1 >= sss->buflen1)
2281	{
2282	pfree(sss->buf1);
2283	sss->buflen1 = Max(len1 + `1`, Min(sss->buflen1 * `2`, MaxAllocSize));
2284	sss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen1);
2285	}
2286	if (len2 >= sss->buflen2)
2287	{
2288	pfree(sss->buf2);
2289	sss->buflen2 = Max(len2 + `1`, Min(sss->buflen2 * `2`, MaxAllocSize));
2290	sss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, sss->buflen2);
2291	}
2292
2293	/*
2294	* We're likely to be asked to compare the same strings repeatedly, and
2295	* memcmp() is so much cheaper than strcoll() that it pays to try to cache
2296	* comparisons, even though in general there is no reason to think that
2297	* that will work out (every string datum may be unique). Caching does
2298	* not slow things down measurably when it doesn't work out, and can speed
2299	* things up by rather a lot when it does. In part, this is because the
2300	* memcmp() compares data from cachelines that are needed in L1 cache even
2301	* when the last comparison's result cannot be reused.
2302	*/
2303	arg1_match = true;
2304	if (len1 != sss->last_len1 \|\| memcmp(sss->buf1, a1p, len1) != `0`)
2305	{
2306	arg1_match = false;
2307	memcpy(sss->buf1, a1p, len1);
2308	sss->buf1[len1] = `'\0'`;
2309	sss->last_len1 = len1;
2310	}
2311
2312	/*
2313	* If we're comparing the same two strings as last time, we can return the
2314	* same answer without calling strcoll() again. This is more likely than
2315	* it seems (at least with moderate to low cardinality sets), because
2316	* quicksort compares the same pivot against many values.
2317	*/
2318	if (len2 != sss->last_len2 \|\| memcmp(sss->buf2, a2p, len2) != `0`)
2319	{
2320	memcpy(sss->buf2, a2p, len2);
2321	sss->buf2[len2] = `'\0'`;
2322	sss->last_len2 = len2;
2323	}
2324	else if (arg1_match && !sss->cache_blob)
2325	{
2326	/ Use result cached following last actual strcoll() call /
2327	return sss->last_returned;
2328	}
2329
2330	if (sss->locale)
2331	{
2332	if (sss->locale->provider == COLLPROVIDER_ICU)
2333	{
2334	#ifdef USE_ICU
2335	#ifdef HAVE_UCOL_STRCOLLUTF8
2336	if (GetDatabaseEncoding() == PG_UTF8)
2337	{
2338	UErrorCode status;
2339
2340	status = U_ZERO_ERROR;
2341	result = ucol_strcollUTF8(sss->locale->info.icu.ucol,
2342	a1p, len1,
2343	a2p, len2,
2344	&status);
2345	if (U_FAILURE(status))
2346	ereport(ERROR,
2347	(errmsg("collation failed: %s", u_errorName(status))));
2348	}
2349	else
2350	#endif
2351	{
2352	int32_t ulen1,
2353	ulen2;
2354	UChar *uchar1,
2355	*uchar2;
2356
2357	ulen1 = icu_to_uchar(&uchar1, a1p, len1);
2358	ulen2 = icu_to_uchar(&uchar2, a2p, len2);
2359
2360	result = ucol_strcoll(sss->locale->info.icu.ucol,
2361	uchar1, ulen1,
2362	uchar2, ulen2);
2363
2364	pfree(uchar1);
2365	pfree(uchar2);
2366	}
2367	#else /* not USE_ICU */
2368	/ shouldn't happen /
2369	elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
2370	#endif /* not USE_ICU */
2371	}
2372	else
2373	{
2374	#ifdef HAVE_LOCALE_T
2375	result = strcoll_l(sss->buf1, sss->buf2, sss->locale->info.lt);
2376	#else
2377	/ shouldn't happen /
2378	elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
2379	#endif
2380	}
2381	}
2382	else
2383	result = strcoll(sss->buf1, sss->buf2);
2384
2385	/ Break tie if necessary. /
2386	if (result == `0` &&
2387	(!sss->locale \|\| sss->locale->deterministic))
2388	result = strcmp(sss->buf1, sss->buf2);
2389
2390	/ Cache result, perhaps saving an expensive strcoll() call next time /
2391	sss->cache_blob = false;
2392	sss->last_returned = result;
2393	return result;
2394	}
2395
2396	/*
2397	* Abbreviated key comparison func
2398	*/
2399	static int
2400	varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup)
2401	{
2402	/*
2403	* When 0 is returned, the core system will call varstrfastcmp_c()
2404	* (bpcharfastcmp_c() in BpChar case) or varlenafastcmp_locale(). Even a
2405	* strcmp() on two non-truncated strxfrm() blobs cannot indicate equality
2406	* authoritatively, for the same reason that there is a strcoll()
2407	* tie-breaker call to strcmp() in varstr_cmp().
2408	*/
2409	if (x > y)
2410	return `1`;
2411	else if (x == y)
2412	return `0`;
2413	else
2414	return -`1`;
2415	}
2416
2417	/*
2418	* Conversion routine for sortsupport. Converts original to abbreviated key
2419	* representation. Our encoding strategy is simple -- pack the first 8 bytes
2420	* of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
2421	* stored in reverse order), and treat it as an unsigned integer. When the "C"
2422	* locale is used, or in case of bytea, just memcpy() from original instead.
2423	*/
2424	static Datum
2425	varstr_abbrev_convert(Datum original, SortSupport ssup)
2426	{
2427	VarStringSortSupport sss = (VarStringSortSupport ) ssup->ssup_extra;
2428	VarString *authoritative = DatumGetVarStringPP(original);
2429	char *authoritative_data = VARDATA_ANY(authoritative);
2430
2431	/ working state /
2432	Datum res;
2433	char *pres;
2434	int len;
2435	uint32 hash;
2436
2437	pres = (char *) &res;
2438	/ memset(), so any non-overwritten bytes are NUL /
2439	memset(pres, `0`, sizeof(Datum));
2440	len = VARSIZE_ANY_EXHDR(authoritative);
2441
2442	/ Get number of bytes, ignoring trailing spaces /
2443	if (sss->typid == BPCHAROID)
2444	len = bpchartruelen(authoritative_data, len);
2445
2446	/*
2447	* If we're using the C collation, use memcpy(), rather than strxfrm(), to
2448	* abbreviate keys. The full comparator for the C locale is always
2449	* memcmp(). It would be incorrect to allow bytea callers (callers that
2450	* always force the C collation -- bytea isn't a collatable type, but this
2451	* approach is convenient) to use strxfrm(). This is because bytea
2452	* strings may contain NUL bytes. Besides, this should be faster, too.
2453	*
2454	* More generally, it's okay that bytea callers can have NUL bytes in
2455	* strings because varstrcmp_abbrev() need not make a distinction between
2456	* terminating NUL bytes, and NUL bytes representing actual NULs in the
2457	* authoritative representation. Hopefully a comparison at or past one
2458	* abbreviated key's terminating NUL byte will resolve the comparison
2459	* without consulting the authoritative representation; specifically, some
2460	* later non-NUL byte in the longer string can resolve the comparison
2461	* against a subsequent terminating NUL in the shorter string. There will
2462	* usually be what is effectively a "length-wise" resolution there and
2463	* then.
2464	*
2465	* If that doesn't work out -- if all bytes in the longer string
2466	* positioned at or past the offset of the smaller string's (first)
2467	* terminating NUL are actually representative of NUL bytes in the
2468	* authoritative binary string (perhaps with some terminating NUL bytes
2469	* towards the end of the longer string iff it happens to still be small)
2470	* -- then an authoritative tie-breaker will happen, and do the right
2471	* thing: explicitly consider string length.
2472	*/
2473	if (sss->collate_c)
2474	memcpy(pres, authoritative_data, Min(len, sizeof(Datum)));
2475	else
2476	{
2477	Size bsize;
2478	#ifdef USE_ICU
2479	int32_t ulen = -`1`;
2480	UChar *uchar = NULL;
2481	#endif
2482
2483	/*
2484	* We're not using the C collation, so fall back on strxfrm or ICU
2485	* analogs.
2486	*/
2487
2488	/ By convention, we use buffer 1 to store and NUL-terminate /
2489	if (len >= sss->buflen1)
2490	{
2491	pfree(sss->buf1);
2492	sss->buflen1 = Max(len + `1`, Min(sss->buflen1 * `2`, MaxAllocSize));
2493	sss->buf1 = palloc(sss->buflen1);
2494	}
2495
2496	/ Might be able to reuse strxfrm() blob from last call /
2497	if (sss->last_len1 == len && sss->cache_blob &&
2498	memcmp(sss->buf1, authoritative_data, len) == `0`)
2499	{
2500	memcpy(pres, sss->buf2, Min(sizeof(Datum), sss->last_len2));
2501	/ No change affecting cardinality, so no hashing required /
2502	goto done;
2503	}
2504
2505	memcpy(sss->buf1, authoritative_data, len);
2506
2507	/*
2508	* Just like strcoll(), strxfrm() expects a NUL-terminated string. Not
2509	* necessary for ICU, but doesn't hurt.
2510	*/
2511	sss->buf1[len] = `'\0'`;
2512	sss->last_len1 = len;
2513
2514	#ifdef USE_ICU
2515	/ When using ICU and not UTF8, convert string to UChar. /
2516	if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU &&
2517	GetDatabaseEncoding() != PG_UTF8)
2518	ulen = icu_to_uchar(&uchar, sss->buf1, len);
2519	#endif
2520
2521	/*
2522	* Loop: Call strxfrm() or ucol_getSortKey(), possibly enlarge buffer,
2523	* and try again. Both of these functions have the result buffer
2524	* content undefined if the result did not fit, so we need to retry
2525	* until everything fits, even though we only need the first few bytes
2526	* in the end. When using ucol_nextSortKeyPart(), however, we only
2527	* ask for as many bytes as we actually need.
2528	*/
2529	for (;;)
2530	{
2531	#ifdef USE_ICU
2532	if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU)
2533	{
2534	/*
2535	* When using UTF8, use the iteration interface so we only
2536	* need to produce as many bytes as we actually need.
2537	*/
2538	if (GetDatabaseEncoding() == PG_UTF8)
2539	{
2540	UCharIterator iter;
2541	uint32_t state[`2`];
2542	UErrorCode status;
2543
2544	uiter_setUTF8(&iter, sss->buf1, len);
2545	state[`0`] = state[`1`] = `0`; / won't need that again /
2546	status = U_ZERO_ERROR;
2547	bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol,
2548	&iter,
2549	state,
2550	(uint8_t *) sss->buf2,
2551	Min(sizeof(Datum), sss->buflen2),
2552	&status);
2553	if (U_FAILURE(status))
2554	ereport(ERROR,
2555	(errmsg("sort key generation failed: %s",
2556	u_errorName(status))));
2557	}
2558	else
2559	bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
2560	uchar, ulen,
2561	(uint8_t *) sss->buf2, sss->buflen2);
2562	}
2563	else
2564	#endif
2565	#ifdef HAVE_LOCALE_T
2566	if (sss->locale && sss->locale->provider == COLLPROVIDER_LIBC)
2567	bsize = strxfrm_l(sss->buf2, sss->buf1,
2568	sss->buflen2, sss->locale->info.lt);
2569	else
2570	#endif
2571	bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2);
2572
2573	sss->last_len2 = bsize;
2574	if (bsize < sss->buflen2)
2575	break;
2576
2577	/*
2578	* Grow buffer and retry.
2579	*/
2580	pfree(sss->buf2);
2581	sss->buflen2 = Max(bsize + `1`,
2582	Min(sss->buflen2 * `2`, MaxAllocSize));
2583	sss->buf2 = palloc(sss->buflen2);
2584	}
2585
2586	/*
2587	* Every Datum byte is always compared. This is safe because the
2588	* strxfrm() blob is itself NUL terminated, leaving no danger of
2589	* misinterpreting any NUL bytes not intended to be interpreted as
2590	* logically representing termination.
2591	*
2592	* (Actually, even if there were NUL bytes in the blob it would be
2593	* okay. See remarks on bytea case above.)
2594	*/
2595	memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
2596
2597	#ifdef USE_ICU
2598	if (uchar)
2599	pfree(uchar);
2600	#endif
2601	}
2602
2603	/*
2604	* Maintain approximate cardinality of both abbreviated keys and original,
2605	* authoritative keys using HyperLogLog. Used as cheap insurance against
2606	* the worst case, where we do many string transformations for no saving
2607	* in full strcoll()-based comparisons. These statistics are used by
2608	* varstr_abbrev_abort().
2609	*
2610	* First, Hash key proper, or a significant fraction of it. Mix in length
2611	* in order to compensate for cases where differences are past
2612	* PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
2613	*/
2614	hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
2615	Min(len, PG_CACHE_LINE_SIZE)));
2616
2617	if (len > PG_CACHE_LINE_SIZE)
2618	hash ^= DatumGetUInt32(hash_uint32((uint32) len));
2619
2620	addHyperLogLog(&sss->full_card, hash);
2621
2622	/ Hash abbreviated key /
2623	#if SIZEOF_DATUM == 8
2624	{
2625	uint32 lohalf,
2626	hihalf;
2627
2628	lohalf = (uint32) res;
2629	hihalf = (uint32) (res >> `32`);
2630	hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
2631	}
2632	#else /* SIZEOF_DATUM != 8 */
2633	hash = DatumGetUInt32(hash_uint32((uint32) res));
2634	#endif
2635
2636	addHyperLogLog(&sss->abbr_card, hash);
2637
2638	/ Cache result, perhaps saving an expensive strxfrm() call next time /
2639	sss->cache_blob = true;
2640	done:
2641
2642	/*
2643	* Byteswap on little-endian machines.
2644	*
2645	* This is needed so that varstrcmp_abbrev() (an unsigned integer 3-way
2646	* comparator) works correctly on all platforms. If we didn't do this,
2647	* the comparator would have to call memcmp() with a pair of pointers to
2648	* the first byte of each abbreviated key, which is slower.
2649	*/
2650	res = DatumBigEndianToNative(res);
2651
2652	/ Don't leak memory here /
2653	if (PointerGetDatum(authoritative) != original)
2654	pfree(authoritative);
2655
2656	return res;
2657	}
2658
2659	/*
2660	* Callback for estimating effectiveness of abbreviated key optimization, using
2661	* heuristic rules. Returns value indicating if the abbreviation optimization
2662	* should be aborted, based on its projected effectiveness.
2663	*/
2664	static bool
2665	varstr_abbrev_abort(int memtupcount, SortSupport ssup)
2666	{
2667	VarStringSortSupport sss = (VarStringSortSupport ) ssup->ssup_extra;
2668	double abbrev_distinct,
2669	key_distinct;
2670
2671	Assert(ssup->abbreviate);
2672
2673	/ Have a little patience /
2674	if (memtupcount < `100`)
2675	return false;
2676
2677	abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
2678	key_distinct = estimateHyperLogLog(&sss->full_card);
2679
2680	/*
2681	* Clamp cardinality estimates to at least one distinct value. While
2682	* NULLs are generally disregarded, if only NULL values were seen so far,
2683	* that might misrepresent costs if we failed to clamp.
2684	*/
2685	if (abbrev_distinct <= `1.0`)
2686	abbrev_distinct = `1.0`;
2687
2688	if (key_distinct <= `1.0`)
2689	key_distinct = `1.0`;
2690
2691	/*
2692	* In the worst case all abbreviated keys are identical, while at the same
2693	* time there are differences within full key strings not captured in
2694	* abbreviations.
2695	*/
2696	#ifdef TRACE_SORT
2697	if (trace_sort)
2698	{
2699	double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
2700
2701	elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
2702	"(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
2703	memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
2704	sss->prop_card);
2705	}
2706	#endif
2707
2708	/*
2709	* If the number of distinct abbreviated keys approximately matches the
2710	* number of distinct authoritative original keys, that's reason enough to
2711	* proceed. We can win even with a very low cardinality set if most
2712	* tie-breakers only memcmp(). This is by far the most important
2713	* consideration.
2714	*
2715	* While comparisons that are resolved at the abbreviated key level are
2716	* considerably cheaper than tie-breakers resolved with memcmp(), both of
2717	* those two outcomes are so much cheaper than a full strcoll() once
2718	* sorting is underway that it doesn't seem worth it to weigh abbreviated
2719	* cardinality against the overall size of the set in order to more
2720	* accurately model costs. Assume that an abbreviated comparison, and an
2721	* abbreviated comparison with a cheap memcmp()-based authoritative
2722	* resolution are equivalent.
2723	*/
2724	if (abbrev_distinct > key_distinct * sss->prop_card)
2725	{
2726	/*
2727	* When we have exceeded 10,000 tuples, decay required cardinality
2728	* aggressively for next call.
2729	*
2730	* This is useful because the number of comparisons required on
2731	* average increases at a linearithmic rate, and at roughly 10,000
2732	* tuples that factor will start to dominate over the linear costs of
2733	* string transformation (this is a conservative estimate). The decay
2734	* rate is chosen to be a little less aggressive than halving -- which
2735	* (since we're called at points at which memtupcount has doubled)
2736	* would never see the cost model actually abort past the first call
2737	* following a decay. This decay rate is mostly a precaution against
2738	* a sudden, violent swing in how well abbreviated cardinality tracks
2739	* full key cardinality. The decay also serves to prevent a marginal
2740	* case from being aborted too late, when too much has already been
2741	* invested in string transformation.
2742	*
2743	* It's possible for sets of several million distinct strings with
2744	* mere tens of thousands of distinct abbreviated keys to still
2745	* benefit very significantly. This will generally occur provided
2746	* each abbreviated key is a proxy for a roughly uniform number of the
2747	* set's full keys. If it isn't so, we hope to catch that early and
2748	* abort. If it isn't caught early, by the time the problem is
2749	* apparent it's probably not worth aborting.
2750	*/
2751	if (memtupcount > `10000`)
2752	sss->prop_card *= `0.65`;
2753
2754	return false;
2755	}
2756
2757	/*
2758	* Abort abbreviation strategy.
2759	*
2760	* The worst case, where all abbreviated keys are identical while all
2761	* original strings differ will typically only see a regression of about
2762	* 10% in execution time for small to medium sized lists of strings.
2763	* Whereas on modern CPUs where cache stalls are the dominant cost, we can
2764	* often expect very large improvements, particularly with sets of strings
2765	* of moderately high to high abbreviated cardinality. There is little to
2766	* lose but much to gain, which our strategy reflects.
2767	*/
2768	#ifdef TRACE_SORT
2769	if (trace_sort)
2770	elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
2771	"(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
2772	memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
2773	#endif
2774
2775	return true;
2776	}
2777
2778	Datum
2779	text_larger(PG_FUNCTION_ARGS)
2780	{
2781	text *arg1 = PG_GETARG_TEXT_PP(`0`);
2782	text *arg2 = PG_GETARG_TEXT_PP(`1`);
2783	text *result;
2784
2785	result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > `0`) ? arg1 : arg2);
2786
2787	PG_RETURN_TEXT_P(result);
2788	}
2789
2790	Datum
2791	text_smaller(PG_FUNCTION_ARGS)
2792	{
2793	text *arg1 = PG_GETARG_TEXT_PP(`0`);
2794	text *arg2 = PG_GETARG_TEXT_PP(`1`);
2795	text *result;
2796
2797	result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < `0`) ? arg1 : arg2);
2798
2799	PG_RETURN_TEXT_P(result);
2800	}
2801
2802
2803	/*
2804	* Cross-type comparison functions for types text and name.
2805	*/
2806
2807	Datum
2808	nameeqtext(PG_FUNCTION_ARGS)
2809	{
2810	Name arg1 = PG_GETARG_NAME(`0`);
2811	text *arg2 = PG_GETARG_TEXT_PP(`1`);
2812	size_t len1 = strlen(NameStr(*arg1));
2813	size_t len2 = VARSIZE_ANY_EXHDR(arg2);
2814	Oid collid = PG_GET_COLLATION();
2815	bool result;
2816
2817	check_collation_set(collid);
2818
2819	if (collid == C_COLLATION_OID)
2820	result = (len1 == len2 &&
2821	memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == `0`);
2822	else
2823	result = (varstr_cmp(NameStr(*arg1), len1,
2824	VARDATA_ANY(arg2), len2,
2825	collid) == `0`);
2826
2827	PG_FREE_IF_COPY(arg2, `1`);
2828
2829	PG_RETURN_BOOL(result);
2830	}
2831
2832	Datum
2833	texteqname(PG_FUNCTION_ARGS)
2834	{
2835	text *arg1 = PG_GETARG_TEXT_PP(`0`);
2836	Name arg2 = PG_GETARG_NAME(`1`);
2837	size_t len1 = VARSIZE_ANY_EXHDR(arg1);
2838	size_t len2 = strlen(NameStr(*arg2));
2839	Oid collid = PG_GET_COLLATION();
2840	bool result;
2841
2842	check_collation_set(collid);
2843
2844	if (collid == C_COLLATION_OID)
2845	result = (len1 == len2 &&
2846	memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == `0`);
2847	else
2848	result = (varstr_cmp(VARDATA_ANY(arg1), len1,
2849	NameStr(*arg2), len2,
2850	collid) == `0`);
2851
2852	PG_FREE_IF_COPY(arg1, `0`);
2853
2854	PG_RETURN_BOOL(result);
2855	}
2856
2857	Datum
2858	namenetext(PG_FUNCTION_ARGS)
2859	{
2860	Name arg1 = PG_GETARG_NAME(`0`);
2861	text *arg2 = PG_GETARG_TEXT_PP(`1`);
2862	size_t len1 = strlen(NameStr(*arg1));
2863	size_t len2 = VARSIZE_ANY_EXHDR(arg2);
2864	Oid collid = PG_GET_COLLATION();
2865	bool result;
2866
2867	check_collation_set(collid);
2868
2869	if (collid == C_COLLATION_OID)
2870	result = !(len1 == len2 &&
2871	memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == `0`);
2872	else
2873	result = !(varstr_cmp(NameStr(*arg1), len1,
2874	VARDATA_ANY(arg2), len2,
2875	collid) == `0`);
2876
2877	PG_FREE_IF_COPY(arg2, `1`);
2878
2879	PG_RETURN_BOOL(result);
2880	}
2881
2882	Datum
2883	textnename(PG_FUNCTION_ARGS)
2884	{
2885	text *arg1 = PG_GETARG_TEXT_PP(`0`);
2886	Name arg2 = PG_GETARG_NAME(`1`);
2887	size_t len1 = VARSIZE_ANY_EXHDR(arg1);
2888	size_t len2 = strlen(NameStr(*arg2));
2889	Oid collid = PG_GET_COLLATION();
2890	bool result;
2891
2892	check_collation_set(collid);
2893
2894	if (collid == C_COLLATION_OID)
2895	result = !(len1 == len2 &&
2896	memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == `0`);
2897	else
2898	result = !(varstr_cmp(VARDATA_ANY(arg1), len1,
2899	NameStr(*arg2), len2,
2900	collid) == `0`);
2901
2902	PG_FREE_IF_COPY(arg1, `0`);
2903
2904	PG_RETURN_BOOL(result);
2905	}
2906
2907	Datum
2908	btnametextcmp(PG_FUNCTION_ARGS)
2909	{
2910	Name arg1 = PG_GETARG_NAME(`0`);
2911	text *arg2 = PG_GETARG_TEXT_PP(`1`);
2912	int32 result;
2913
2914	result = varstr_cmp(NameStr(arg1), strlen(NameStr(arg1)),
2915	VARDATA_ANY(arg2), VARSIZE_ANY_EXHDR(arg2),
2916	PG_GET_COLLATION());
2917
2918	PG_FREE_IF_COPY(arg2, `1`);
2919
2920	PG_RETURN_INT32(result);
2921	}
2922
2923	Datum
2924	bttextnamecmp(PG_FUNCTION_ARGS)
2925	{
2926	text *arg1 = PG_GETARG_TEXT_PP(`0`);
2927	Name arg2 = PG_GETARG_NAME(`1`);
2928	int32 result;
2929
2930	result = varstr_cmp(VARDATA_ANY(arg1), VARSIZE_ANY_EXHDR(arg1),
2931	NameStr(arg2), strlen(NameStr(arg2)),
2932	PG_GET_COLLATION());
2933
2934	PG_FREE_IF_COPY(arg1, `0`);
2935
2936	PG_RETURN_INT32(result);
2937	}
2938
2939	#define CmpCall(cmpfunc) \
2940	DatumGetInt32(DirectFunctionCall2Coll(cmpfunc, \
2941	PG_GET_COLLATION(), \
2942	PG_GETARG_DATUM(0), \
2943	PG_GETARG_DATUM(1)))
2944
2945	Datum
2946	namelttext(PG_FUNCTION_ARGS)
2947	{
2948	PG_RETURN_BOOL(CmpCall(btnametextcmp) < `0`);
2949	}
2950
2951	Datum
2952	nameletext(PG_FUNCTION_ARGS)
2953	{
2954	PG_RETURN_BOOL(CmpCall(btnametextcmp) <= `0`);
2955	}
2956
2957	Datum
2958	namegttext(PG_FUNCTION_ARGS)
2959	{
2960	PG_RETURN_BOOL(CmpCall(btnametextcmp) > `0`);
2961	}
2962
2963	Datum
2964	namegetext(PG_FUNCTION_ARGS)
2965	{
2966	PG_RETURN_BOOL(CmpCall(btnametextcmp) >= `0`);
2967	}
2968
2969	Datum
2970	textltname(PG_FUNCTION_ARGS)
2971	{
2972	PG_RETURN_BOOL(CmpCall(bttextnamecmp) < `0`);
2973	}
2974
2975	Datum
2976	textlename(PG_FUNCTION_ARGS)
2977	{
2978	PG_RETURN_BOOL(CmpCall(bttextnamecmp) <= `0`);
2979	}
2980
2981	Datum
2982	textgtname(PG_FUNCTION_ARGS)
2983	{
2984	PG_RETURN_BOOL(CmpCall(bttextnamecmp) > `0`);
2985	}
2986
2987	Datum
2988	textgename(PG_FUNCTION_ARGS)
2989	{
2990	PG_RETURN_BOOL(CmpCall(bttextnamecmp) >= `0`);
2991	}
2992
2993	#undef CmpCall
2994
2995
2996	/*
2997	* The following operators support character-by-character comparison
2998	* of text datums, to allow building indexes suitable for LIKE clauses.
2999	* Note that the regular texteq/textne comparison operators, and regular
3000	* support functions 1 and 2 with "C" collation are assumed to be
3001	* compatible with these!
3002	*/
3003
3004	static int
3005	internal_text_pattern_compare(text arg1, text arg2)
3006	{
3007	int result;
3008	int len1,
3009	len2;
3010
3011	len1 = VARSIZE_ANY_EXHDR(arg1);
3012	len2 = VARSIZE_ANY_EXHDR(arg2);
3013
3014	result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
3015	if (result != `0`)
3016	return result;
3017	else if (len1 < len2)
3018	return -`1`;
3019	else if (len1 > len2)
3020	return `1`;
3021	else
3022	return `0`;
3023	}
3024
3025
3026	Datum
3027	text_pattern_lt(PG_FUNCTION_ARGS)
3028	{
3029	text *arg1 = PG_GETARG_TEXT_PP(`0`);
3030	text *arg2 = PG_GETARG_TEXT_PP(`1`);
3031	int result;
3032
3033	result = internal_text_pattern_compare(arg1, arg2);
3034
3035	PG_FREE_IF_COPY(arg1, `0`);
3036	PG_FREE_IF_COPY(arg2, `1`);
3037
3038	PG_RETURN_BOOL(result < `0`);
3039	}
3040
3041
3042	Datum
3043	text_pattern_le(PG_FUNCTION_ARGS)
3044	{
3045	text *arg1 = PG_GETARG_TEXT_PP(`0`);
3046	text *arg2 = PG_GETARG_TEXT_PP(`1`);
3047	int result;
3048
3049	result = internal_text_pattern_compare(arg1, arg2);
3050
3051	PG_FREE_IF_COPY(arg1, `0`);
3052	PG_FREE_IF_COPY(arg2, `1`);
3053
3054	PG_RETURN_BOOL(result <= `0`);
3055	}
3056
3057
3058	Datum
3059	text_pattern_ge(PG_FUNCTION_ARGS)
3060	{
3061	text *arg1 = PG_GETARG_TEXT_PP(`0`);
3062	text *arg2 = PG_GETARG_TEXT_PP(`1`);
3063	int result;
3064
3065	result = internal_text_pattern_compare(arg1, arg2);
3066
3067	PG_FREE_IF_COPY(arg1, `0`);
3068	PG_FREE_IF_COPY(arg2, `1`);
3069
3070	PG_RETURN_BOOL(result >= `0`);
3071	}
3072
3073
3074	Datum
3075	text_pattern_gt(PG_FUNCTION_ARGS)
3076	{
3077	text *arg1 = PG_GETARG_TEXT_PP(`0`);
3078	text *arg2 = PG_GETARG_TEXT_PP(`1`);
3079	int result;
3080
3081	result = internal_text_pattern_compare(arg1, arg2);
3082
3083	PG_FREE_IF_COPY(arg1, `0`);
3084	PG_FREE_IF_COPY(arg2, `1`);
3085
3086	PG_RETURN_BOOL(result > `0`);
3087	}
3088
3089
3090	Datum
3091	bttext_pattern_cmp(PG_FUNCTION_ARGS)
3092	{
3093	text *arg1 = PG_GETARG_TEXT_PP(`0`);
3094	text *arg2 = PG_GETARG_TEXT_PP(`1`);
3095	int result;
3096
3097	result = internal_text_pattern_compare(arg1, arg2);
3098
3099	PG_FREE_IF_COPY(arg1, `0`);
3100	PG_FREE_IF_COPY(arg2, `1`);
3101
3102	PG_RETURN_INT32(result);
3103	}
3104
3105
3106	Datum
3107	bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
3108	{
3109	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(`0`);
3110	MemoryContext oldcontext;
3111
3112	oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
3113
3114	/ Use generic string SortSupport, forcing "C" collation /
3115	varstr_sortsupport(ssup, TEXTOID, C_COLLATION_OID);
3116
3117	MemoryContextSwitchTo(oldcontext);
3118
3119	PG_RETURN_VOID();
3120	}
3121
3122
3123	/-------------------------------------------------------------*
3124	* byteaoctetlen
3125	*
3126	* get the number of bytes contained in an instance of type 'bytea'
3127	*-------------------------------------------------------------
3128	*/
3129	Datum
3130	byteaoctetlen(PG_FUNCTION_ARGS)
3131	{
3132	Datum str = PG_GETARG_DATUM(`0`);
3133
3134	/ We need not detoast the input at all /
3135	PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
3136	}
3137
3138	/*
3139	* byteacat -
3140	* takes two bytea* and returns a bytea* that is the concatenation of
3141	* the two.
3142	*
3143	* Cloned from textcat and modified as required.
3144	*/
3145	Datum
3146	byteacat(PG_FUNCTION_ARGS)
3147	{
3148	bytea *t1 = PG_GETARG_BYTEA_PP(`0`);
3149	bytea *t2 = PG_GETARG_BYTEA_PP(`1`);
3150
3151	PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
3152	}
3153
3154	/*
3155	* bytea_catenate
3156	* Guts of byteacat(), broken out so it can be used by other functions
3157	*
3158	* Arguments can be in short-header form, but not compressed or out-of-line
3159	*/
3160	static bytea *
3161	bytea_catenate(bytea t1, bytea t2)
3162	{
3163	bytea *result;
3164	int len1,
3165	len2,
3166	len;
3167	char *ptr;
3168
3169	len1 = VARSIZE_ANY_EXHDR(t1);
3170	len2 = VARSIZE_ANY_EXHDR(t2);
3171
3172	/ paranoia ... probably should throw error instead? /
3173	if (len1 < `0`)
3174	len1 = `0`;
3175	if (len2 < `0`)
3176	len2 = `0`;
3177
3178	len = len1 + len2 + VARHDRSZ;
3179	result = (bytea *) palloc(len);
3180
3181	/ Set size of result string... /
3182	SET_VARSIZE(result, len);
3183
3184	/ Fill data field of result string... /
3185	ptr = VARDATA(result);
3186	if (len1 > `0`)
3187	memcpy(ptr, VARDATA_ANY(t1), len1);
3188	if (len2 > `0`)
3189	memcpy(ptr + len1, VARDATA_ANY(t2), len2);
3190
3191	return result;
3192	}
3193
3194	#define PG_STR_GET_BYTEA(str_) \
3195	DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
3196
3197	/*
3198	* bytea_substr()
3199	* Return a substring starting at the specified position.
3200	* Cloned from text_substr and modified as required.
3201	*
3202	* Input:
3203	* - string
3204	* - starting position (is one-based)
3205	* - string length (optional)
3206	*
3207	* If the starting position is zero or less, then return from the start of the string
3208	* adjusting the length to be consistent with the "negative start" per SQL.
3209	* If the length is less than zero, an ERROR is thrown. If no third argument
3210	* (length) is provided, the length to the end of the string is assumed.
3211	*/
3212	Datum
3213	bytea_substr(PG_FUNCTION_ARGS)
3214	{
3215	PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(`0`),
3216	PG_GETARG_INT32(`1`),
3217	PG_GETARG_INT32(`2`),
3218	false));
3219	}
3220
3221	/*
3222	* bytea_substr_no_len -
3223	* Wrapper to avoid opr_sanity failure due to
3224	* one function accepting a different number of args.
3225	*/
3226	Datum
3227	bytea_substr_no_len(PG_FUNCTION_ARGS)
3228	{
3229	PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(`0`),
3230	PG_GETARG_INT32(`1`),
3231	-`1`,
3232	true));
3233	}
3234
3235	static bytea *
3236	bytea_substring(Datum str,
3237	int S,
3238	int L,
3239	bool length_not_specified)
3240	{
3241	int S1; / adjusted start position /
3242	int L1; / adjusted substring length /
3243
3244	S1 = Max(S, `1`);
3245
3246	if (length_not_specified)
3247	{
3248	/*
3249	* Not passed a length - DatumGetByteaPSlice() grabs everything to the
3250	* end of the string if we pass it a negative value for length.
3251	*/
3252	L1 = -`1`;
3253	}
3254	else
3255	{
3256	/ end position /
3257	int E = S + L;
3258
3259	/*
3260	* A negative value for L is the only way for the end position to be
3261	* before the start. SQL99 says to throw an error.
3262	*/
3263	if (E < S)
3264	ereport(ERROR,
3265	(errcode(ERRCODE_SUBSTRING_ERROR),
3266	errmsg("negative substring length not allowed")));
3267
3268	/*
3269	* A zero or negative value for the end position can happen if the
3270	* start was negative or one. SQL99 says to return a zero-length
3271	* string.
3272	*/
3273	if (E < `1`)
3274	return PG_STR_GET_BYTEA("");
3275
3276	L1 = E - S1;
3277	}
3278
3279	/*
3280	* If the start position is past the end of the string, SQL99 says to
3281	* return a zero-length string -- DatumGetByteaPSlice() will do that for
3282	* us. Convert to zero-based starting position
3283	*/
3284	return DatumGetByteaPSlice(str, S1 - `1`, L1);
3285	}
3286
3287	/*
3288	* byteaoverlay
3289	* Replace specified substring of first string with second
3290	*
3291	* The SQL standard defines OVERLAY() in terms of substring and concatenation.
3292	* This code is a direct implementation of what the standard says.
3293	*/
3294	Datum
3295	byteaoverlay(PG_FUNCTION_ARGS)
3296	{
3297	bytea *t1 = PG_GETARG_BYTEA_PP(`0`);
3298	bytea *t2 = PG_GETARG_BYTEA_PP(`1`);
3299	int sp = PG_GETARG_INT32(`2`); / substring start position /
3300	int sl = PG_GETARG_INT32(`3`); / substring length /
3301
3302	PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
3303	}
3304
3305	Datum
3306	byteaoverlay_no_len(PG_FUNCTION_ARGS)
3307	{
3308	bytea *t1 = PG_GETARG_BYTEA_PP(`0`);
3309	bytea *t2 = PG_GETARG_BYTEA_PP(`1`);
3310	int sp = PG_GETARG_INT32(`2`); / substring start position /
3311	int sl;
3312
3313	sl = VARSIZE_ANY_EXHDR(t2); / defaults to length(t2) /
3314	PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
3315	}
3316
3317	static bytea *
3318	bytea_overlay(bytea t1, bytea t2, int sp, int sl)
3319	{
3320	bytea *result;
3321	bytea *s1;
3322	bytea *s2;
3323	int sp_pl_sl;
3324
3325	/*
3326	* Check for possible integer-overflow cases. For negative sp, throw a
3327	* "substring length" error because that's what should be expected
3328	* according to the spec's definition of OVERLAY().
3329	*/
3330	if (sp <= `0`)
3331	ereport(ERROR,
3332	(errcode(ERRCODE_SUBSTRING_ERROR),
3333	errmsg("negative substring length not allowed")));
3334	if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
3335	ereport(ERROR,
3336	(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
3337	errmsg("integer out of range")));
3338
3339	s1 = bytea_substring(PointerGetDatum(t1), `1`, sp - `1`, false);
3340	s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -`1`, true);
3341	result = bytea_catenate(s1, t2);
3342	result = bytea_catenate(result, s2);
3343
3344	return result;
3345	}
3346
3347	/*
3348	* byteapos -
3349	* Return the position of the specified substring.
3350	* Implements the SQL POSITION() function.
3351	* Cloned from textpos and modified as required.
3352	*/
3353	Datum
3354	byteapos(PG_FUNCTION_ARGS)
3355	{
3356	bytea *t1 = PG_GETARG_BYTEA_PP(`0`);
3357	bytea *t2 = PG_GETARG_BYTEA_PP(`1`);
3358	int pos;
3359	int px,
3360	p;
3361	int len1,
3362	len2;
3363	char *p1,
3364	*p2;
3365
3366	len1 = VARSIZE_ANY_EXHDR(t1);
3367	len2 = VARSIZE_ANY_EXHDR(t2);
3368
3369	if (len2 <= `0`)
3370	PG_RETURN_INT32(`1`); / result for empty pattern /
3371
3372	p1 = VARDATA_ANY(t1);
3373	p2 = VARDATA_ANY(t2);
3374
3375	pos = `0`;
3376	px = (len1 - len2);
3377	for (p = `0`; p <= px; p++)
3378	{
3379	if ((p2 == p1) && (memcmp(p1, p2, len2) == `0`))
3380	{
3381	pos = p + `1`;
3382	break;
3383	};
3384	p1++;
3385	};
3386
3387	PG_RETURN_INT32(pos);
3388	}
3389
3390	/-------------------------------------------------------------*
3391	* byteaGetByte
3392	*
3393	* this routine treats "bytea" as an array of bytes.
3394	* It returns the Nth byte (a number between 0 and 255).
3395	*-------------------------------------------------------------
3396	*/
3397	Datum
3398	byteaGetByte(PG_FUNCTION_ARGS)
3399	{
3400	bytea *v = PG_GETARG_BYTEA_PP(`0`);
3401	int32 n = PG_GETARG_INT32(`1`);
3402	int len;
3403	int byte;
3404
3405	len = VARSIZE_ANY_EXHDR(v);
3406
3407	if (n < `0` \|\| n >= len)
3408	ereport(ERROR,
3409	(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3410	errmsg("index %d out of valid range, 0..%d",
3411	n, len - `1`)));
3412
3413	byte = ((unsigned char *) VARDATA_ANY(v))[n];
3414
3415	PG_RETURN_INT32(byte);
3416	}
3417
3418	/-------------------------------------------------------------*
3419	* byteaGetBit
3420	*
3421	* This routine treats a "bytea" type like an array of bits.
3422	* It returns the value of the Nth bit (0 or 1).
3423	*
3424	*-------------------------------------------------------------
3425	*/
3426	Datum
3427	byteaGetBit(PG_FUNCTION_ARGS)
3428	{
3429	bytea *v = PG_GETARG_BYTEA_PP(`0`);
3430	int32 n = PG_GETARG_INT32(`1`);
3431	int byteNo,
3432	bitNo;
3433	int len;
3434	int byte;
3435
3436	len = VARSIZE_ANY_EXHDR(v);
3437
3438	if (n < `0` \|\| n >= len * `8`)
3439	ereport(ERROR,
3440	(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3441	errmsg("index %d out of valid range, 0..%d",
3442	n, len * `8` - `1`)));
3443
3444	byteNo = n / `8`;
3445	bitNo = n % `8`;
3446
3447	byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
3448
3449	if (byte & (`1` << bitNo))
3450	PG_RETURN_INT32(`1`);
3451	else
3452	PG_RETURN_INT32(`0`);
3453	}
3454
3455	/-------------------------------------------------------------*
3456	* byteaSetByte
3457	*
3458	* Given an instance of type 'bytea' creates a new one with
3459	* the Nth byte set to the given value.
3460	*
3461	*-------------------------------------------------------------
3462	*/
3463	Datum
3464	byteaSetByte(PG_FUNCTION_ARGS)
3465	{
3466	bytea *res = PG_GETARG_BYTEA_P_COPY(`0`);
3467	int32 n = PG_GETARG_INT32(`1`);
3468	int32 newByte = PG_GETARG_INT32(`2`);
3469	int len;
3470
3471	len = VARSIZE(res) - VARHDRSZ;
3472
3473	if (n < `0` \|\| n >= len)
3474	ereport(ERROR,
3475	(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3476	errmsg("index %d out of valid range, 0..%d",
3477	n, len - `1`)));
3478
3479	/*
3480	* Now set the byte.
3481	*/
3482	((unsigned char *) VARDATA(res))[n] = newByte;
3483
3484	PG_RETURN_BYTEA_P(res);
3485	}
3486
3487	/-------------------------------------------------------------*
3488	* byteaSetBit
3489	*
3490	* Given an instance of type 'bytea' creates a new one with
3491	* the Nth bit set to the given value.
3492	*
3493	*-------------------------------------------------------------
3494	*/
3495	Datum
3496	byteaSetBit(PG_FUNCTION_ARGS)
3497	{
3498	bytea *res = PG_GETARG_BYTEA_P_COPY(`0`);
3499	int32 n = PG_GETARG_INT32(`1`);
3500	int32 newBit = PG_GETARG_INT32(`2`);
3501	int len;
3502	int oldByte,
3503	newByte;
3504	int byteNo,
3505	bitNo;
3506
3507	len = VARSIZE(res) - VARHDRSZ;
3508
3509	if (n < `0` \|\| n >= len * `8`)
3510	ereport(ERROR,
3511	(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
3512	errmsg("index %d out of valid range, 0..%d",
3513	n, len * `8` - `1`)));
3514
3515	byteNo = n / `8`;
3516	bitNo = n % `8`;
3517
3518	/*
3519	* sanity check!
3520	*/
3521	if (newBit != `0` && newBit != `1`)
3522	ereport(ERROR,
3523	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
3524	errmsg("new bit must be 0 or 1")));
3525
3526	/*
3527	* Update the byte.
3528	*/
3529	oldByte = ((unsigned char *) VARDATA(res))[byteNo];
3530
3531	if (newBit == `0`)
3532	newByte = oldByte & (~(`1` << bitNo));
3533	else
3534	newByte = oldByte \| (`1` << bitNo);
3535
3536	((unsigned char *) VARDATA(res))[byteNo] = newByte;
3537
3538	PG_RETURN_BYTEA_P(res);
3539	}
3540
3541
3542	/ text_name()*
3543	* Converts a text type to a Name type.
3544	*/
3545	Datum
3546	text_name(PG_FUNCTION_ARGS)
3547	{
3548	text *s = PG_GETARG_TEXT_PP(`0`);
3549	Name result;
3550	int len;
3551
3552	len = VARSIZE_ANY_EXHDR(s);
3553
3554	/ Truncate oversize input /
3555	if (len >= NAMEDATALEN)
3556	len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - `1`);
3557
3558	/ We use palloc0 here to ensure result is zero-padded /
3559	result = (Name) palloc0(NAMEDATALEN);
3560	memcpy(NameStr(*result), VARDATA_ANY(s), len);
3561
3562	PG_RETURN_NAME(result);
3563	}
3564
3565	/ name_text()*
3566	* Converts a Name type to a text type.
3567	*/
3568	Datum
3569	name_text(PG_FUNCTION_ARGS)
3570	{
3571	Name s = PG_GETARG_NAME(`0`);
3572
3573	PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
3574	}
3575
3576
3577	/*
3578	* textToQualifiedNameList - convert a text object to list of names
3579	*
3580	* This implements the input parsing needed by nextval() and other
3581	* functions that take a text parameter representing a qualified name.
3582	* We split the name at dots, downcase if not double-quoted, and
3583	* truncate names if they're too long.
3584	*/
3585	List *
3586	textToQualifiedNameList(text *textval)
3587	{
3588	char *rawname;
3589	List *result = NIL;
3590	List *namelist;
3591	ListCell *l;
3592
3593	/ Convert to C string (handles possible detoasting). /
3594	/ Note we rely on being able to modify rawname below. /
3595	rawname = text_to_cstring(textval);
3596
3597	if (!SplitIdentifierString(rawname, `'.'`, &namelist))
3598	ereport(ERROR,
3599	(errcode(ERRCODE_INVALID_NAME),
3600	errmsg("invalid name syntax")));
3601
3602	if (namelist == NIL)
3603	ereport(ERROR,
3604	(errcode(ERRCODE_INVALID_NAME),
3605	errmsg("invalid name syntax")));
3606
3607	foreach(l, namelist)
3608	{
3609	char curname = (char* *) lfirst(l);
3610
3611	result = lappend(result, makeString(pstrdup(curname)));
3612	}
3613
3614	pfree(rawname);
3615	list_free(namelist);
3616
3617	return result;
3618	}
3619
3620	/*
3621	* SplitIdentifierString --- parse a string containing identifiers
3622	*
3623	* This is the guts of textToQualifiedNameList, and is exported for use in
3624	* other situations such as parsing GUC variables. In the GUC case, it's
3625	* important to avoid memory leaks, so the API is designed to minimize the
3626	* amount of stuff that needs to be allocated and freed.
3627	*
3628	* Inputs:
3629	* rawstring: the input string; must be overwritable! On return, it's
3630	* been modified to contain the separated identifiers.
3631	* separator: the separator punctuation expected between identifiers
3632	* (typically '.' or ','). Whitespace may also appear around
3633	* identifiers.
3634	* Outputs:
3635	* namelist: filled with a palloc'd list of pointers to identifiers within
3636	* rawstring. Caller should list_free() this even on error return.
3637	*
3638	* Returns true if okay, false if there is a syntax error in the string.
3639	*
3640	* Note that an empty string is considered okay here, though not in
3641	* textToQualifiedNameList.
3642	*/
3643	bool
3644	SplitIdentifierString(char rawstring, char* separator,
3645	List **namelist)
3646	{
3647	char *nextp = rawstring;
3648	bool done = false;
3649
3650	*namelist = NIL;
3651
3652	while (scanner_isspace(*nextp))
3653	nextp++; / skip leading whitespace /
3654
3655	if (*nextp == `'\0'`)
3656	return true; / allow empty string /
3657
3658	/ At the top of the loop, we are at start of a new identifier. /
3659	do
3660	{
3661	char *curname;
3662	char *endp;
3663
3664	if (*nextp == `'"'`)
3665	{
3666	/ Quoted name --- collapse quote-quote pairs, no downcasing /
3667	curname = nextp + `1`;
3668	for (;;)
3669	{
3670	endp = strchr(nextp + `1`, `'"'`);
3671	if (endp == NULL)
3672	return false; / mismatched quotes /
3673	if (endp[`1`] != `'"'`)
3674	break; / found end of quoted name /
3675	/ Collapse adjacent quotes into one quote, and look again /
3676	memmove(endp, endp + `1`, strlen(endp));
3677	nextp = endp;
3678	}
3679	/ endp now points at the terminating quote /
3680	nextp = endp + `1`;
3681	}
3682	else
3683	{
3684	/ Unquoted name --- extends to separator or whitespace /
3685	char *downname;
3686	int len;
3687
3688	curname = nextp;
3689	while (nextp && nextp != separator &&
3690	!scanner_isspace(*nextp))
3691	nextp++;
3692	endp = nextp;
3693	if (curname == nextp)
3694	return false; / empty unquoted name not allowed /
3695
3696	/*
3697	* Downcase the identifier, using same code as main lexer does.
3698	*
3699	* XXX because we want to overwrite the input in-place, we cannot
3700	* support a downcasing transformation that increases the string
3701	* length. This is not a problem given the current implementation
3702	* of downcase_truncate_identifier, but we'll probably have to do
3703	* something about this someday.
3704	*/
3705	len = endp - curname;
3706	downname = downcase_truncate_identifier(curname, len, false);
3707	Assert(strlen(downname) <= len);
3708	strncpy(curname, downname, len); / strncpy is required here /
3709	pfree(downname);
3710	}
3711
3712	while (scanner_isspace(*nextp))
3713	nextp++; / skip trailing whitespace /
3714
3715	if (*nextp == separator)
3716	{
3717	nextp++;
3718	while (scanner_isspace(*nextp))
3719	nextp++; / skip leading whitespace for next /
3720	/ we expect another name, so done remains false /
3721	}
3722	else if (*nextp == `'\0'`)
3723	done = true;
3724	else
3725	return false; / invalid syntax /
3726
3727	/ Now safe to overwrite separator with a null /
3728	*endp = `'\0'`;
3729
3730	/ Truncate name if it's overlength /
3731	truncate_identifier(curname, strlen(curname), false);
3732
3733	/*
3734	* Finished isolating current name --- add it to list
3735	*/
3736	namelist = lappend(namelist, curname);
3737
3738	/ Loop back if we didn't reach end of string /
3739	} while (!done);
3740
3741	return true;
3742	}
3743
3744
3745	/*
3746	* SplitDirectoriesString --- parse a string containing file/directory names
3747	*
3748	* This works fine on file names too; the function name is historical.
3749	*
3750	* This is similar to SplitIdentifierString, except that the parsing
3751	* rules are meant to handle pathnames instead of identifiers: there is
3752	* no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
3753	* and we apply canonicalize_path() to each extracted string. Because of the
3754	* last, the returned strings are separately palloc'd rather than being
3755	* pointers into rawstring --- but we still scribble on rawstring.
3756	*
3757	* Inputs:
3758	* rawstring: the input string; must be modifiable!
3759	* separator: the separator punctuation expected between directories
3760	* (typically ',' or ';'). Whitespace may also appear around
3761	* directories.
3762	* Outputs:
3763	* namelist: filled with a palloc'd list of directory names.
3764	* Caller should list_free_deep() this even on error return.
3765	*
3766	* Returns true if okay, false if there is a syntax error in the string.
3767	*
3768	* Note that an empty string is considered okay here.
3769	*/
3770	bool
3771	SplitDirectoriesString(char rawstring, char* separator,
3772	List **namelist)
3773	{
3774	char *nextp = rawstring;
3775	bool done = false;
3776
3777	*namelist = NIL;
3778
3779	while (scanner_isspace(*nextp))
3780	nextp++; / skip leading whitespace /
3781
3782	if (*nextp == `'\0'`)
3783	return true; / allow empty string /
3784
3785	/ At the top of the loop, we are at start of a new directory. /
3786	do
3787	{
3788	char *curname;
3789	char *endp;
3790
3791	if (*nextp == `'"'`)
3792	{
3793	/ Quoted name --- collapse quote-quote pairs /
3794	curname = nextp + `1`;
3795	for (;;)
3796	{
3797	endp = strchr(nextp + `1`, `'"'`);
3798	if (endp == NULL)
3799	return false; / mismatched quotes /
3800	if (endp[`1`] != `'"'`)
3801	break; / found end of quoted name /
3802	/ Collapse adjacent quotes into one quote, and look again /
3803	memmove(endp, endp + `1`, strlen(endp));
3804	nextp = endp;
3805	}
3806	/ endp now points at the terminating quote /
3807	nextp = endp + `1`;
3808	}
3809	else
3810	{
3811	/ Unquoted name --- extends to separator or end of string /
3812	curname = endp = nextp;
3813	while (nextp && nextp != separator)
3814	{
3815	/ trailing whitespace should not be included in name /
3816	if (!scanner_isspace(*nextp))
3817	endp = nextp + `1`;
3818	nextp++;
3819	}
3820	if (curname == endp)
3821	return false; / empty unquoted name not allowed /
3822	}
3823
3824	while (scanner_isspace(*nextp))
3825	nextp++; / skip trailing whitespace /
3826
3827	if (*nextp == separator)
3828	{
3829	nextp++;
3830	while (scanner_isspace(*nextp))
3831	nextp++; / skip leading whitespace for next /
3832	/ we expect another name, so done remains false /
3833	}
3834	else if (*nextp == `'\0'`)
3835	done = true;
3836	else
3837	return false; / invalid syntax /
3838
3839	/ Now safe to overwrite separator with a null /
3840	*endp = `'\0'`;
3841
3842	/ Truncate path if it's overlength /
3843	if (strlen(curname) >= MAXPGPATH)
3844	curname[MAXPGPATH - `1`] = `'\0'`;
3845
3846	/*
3847	* Finished isolating current name --- add it to list
3848	*/
3849	curname = pstrdup(curname);
3850	canonicalize_path(curname);
3851	namelist = lappend(namelist, curname);
3852
3853	/ Loop back if we didn't reach end of string /
3854	} while (!done);
3855
3856	return true;
3857	}
3858
3859
3860	/*
3861	* SplitGUCList --- parse a string containing identifiers or file names
3862	*
3863	* This is used to split the value of a GUC_LIST_QUOTE GUC variable, without
3864	* presuming whether the elements will be taken as identifiers or file names.
3865	* We assume the input has already been through flatten_set_variable_args(),
3866	* so that we need never downcase (if appropriate, that was done already).
3867	* Nor do we ever truncate, since we don't know the correct max length.
3868	* We disallow embedded whitespace for simplicity (it shouldn't matter,
3869	* because any embedded whitespace should have led to double-quoting).
3870	* Otherwise the API is identical to SplitIdentifierString.
3871	*
3872	* XXX it's annoying to have so many copies of this string-splitting logic.
3873	* However, it's not clear that having one function with a bunch of option
3874	* flags would be much better.
3875	*
3876	* XXX there is a version of this function in src/bin/pg_dump/dumputils.c.
3877	* Be sure to update that if you have to change this.
3878	*
3879	* Inputs:
3880	* rawstring: the input string; must be overwritable! On return, it's
3881	* been modified to contain the separated identifiers.
3882	* separator: the separator punctuation expected between identifiers
3883	* (typically '.' or ','). Whitespace may also appear around
3884	* identifiers.
3885	* Outputs:
3886	* namelist: filled with a palloc'd list of pointers to identifiers within
3887	* rawstring. Caller should list_free() this even on error return.
3888	*
3889	* Returns true if okay, false if there is a syntax error in the string.
3890	*/
3891	bool
3892	SplitGUCList(char rawstring, char* separator,
3893	List **namelist)
3894	{
3895	char *nextp = rawstring;
3896	bool done = false;
3897
3898	*namelist = NIL;
3899
3900	while (scanner_isspace(*nextp))
3901	nextp++; / skip leading whitespace /
3902
3903	if (*nextp == `'\0'`)
3904	return true; / allow empty string /
3905
3906	/ At the top of the loop, we are at start of a new identifier. /
3907	do
3908	{
3909	char *curname;
3910	char *endp;
3911
3912	if (*nextp == `'"'`)
3913	{
3914	/ Quoted name --- collapse quote-quote pairs /
3915	curname = nextp + `1`;
3916	for (;;)
3917	{
3918	endp = strchr(nextp + `1`, `'"'`);
3919	if (endp == NULL)
3920	return false; / mismatched quotes /
3921	if (endp[`1`] != `'"'`)
3922	break; / found end of quoted name /
3923	/ Collapse adjacent quotes into one quote, and look again /
3924	memmove(endp, endp + `1`, strlen(endp));
3925	nextp = endp;
3926	}
3927	/ endp now points at the terminating quote /
3928	nextp = endp + `1`;
3929	}
3930	else
3931	{
3932	/ Unquoted name --- extends to separator or whitespace /
3933	curname = nextp;
3934	while (nextp && nextp != separator &&
3935	!scanner_isspace(*nextp))
3936	nextp++;
3937	endp = nextp;
3938	if (curname == nextp)
3939	return false; / empty unquoted name not allowed /
3940	}
3941
3942	while (scanner_isspace(*nextp))
3943	nextp++; / skip trailing whitespace /
3944
3945	if (*nextp == separator)
3946	{
3947	nextp++;
3948	while (scanner_isspace(*nextp))
3949	nextp++; / skip leading whitespace for next /
3950	/ we expect another name, so done remains false /
3951	}
3952	else if (*nextp == `'\0'`)
3953	done = true;
3954	else
3955	return false; / invalid syntax /
3956
3957	/ Now safe to overwrite separator with a null /
3958	*endp = `'\0'`;
3959
3960	/*
3961	* Finished isolating current name --- add it to list
3962	*/
3963	namelist = lappend(namelist, curname);
3964
3965	/ Loop back if we didn't reach end of string /
3966	} while (!done);
3967
3968	return true;
3969	}
3970
3971
3972	/*****************************************************************************
3973	* Comparison Functions used for bytea
3974	*
3975	* Note: btree indexes need these routines not to leak memory; therefore,
3976	* be careful to free working copies of toasted datums. Most places don't
3977	* need to be so careful.
3978	*****************************************************************************/
3979
3980	Datum
3981	byteaeq(PG_FUNCTION_ARGS)
3982	{
3983	Datum arg1 = PG_GETARG_DATUM(`0`);
3984	Datum arg2 = PG_GETARG_DATUM(`1`);
3985	bool result;
3986	Size len1,
3987	len2;
3988
3989	/*
3990	* We can use a fast path for unequal lengths, which might save us from
3991	* having to detoast one or both values.
3992	*/
3993	len1 = toast_raw_datum_size(arg1);
3994	len2 = toast_raw_datum_size(arg2);
3995	if (len1 != len2)
3996	result = false;
3997	else
3998	{
3999	bytea *barg1 = DatumGetByteaPP(arg1);
4000	bytea *barg2 = DatumGetByteaPP(arg2);
4001
4002	result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
4003	len1 - VARHDRSZ) == `0`);
4004
4005	PG_FREE_IF_COPY(barg1, `0`);
4006	PG_FREE_IF_COPY(barg2, `1`);
4007	}
4008
4009	PG_RETURN_BOOL(result);
4010	}
4011
4012	Datum
4013	byteane(PG_FUNCTION_ARGS)
4014	{
4015	Datum arg1 = PG_GETARG_DATUM(`0`);
4016	Datum arg2 = PG_GETARG_DATUM(`1`);
4017	bool result;
4018	Size len1,
4019	len2;
4020
4021	/*
4022	* We can use a fast path for unequal lengths, which might save us from
4023	* having to detoast one or both values.
4024	*/
4025	len1 = toast_raw_datum_size(arg1);
4026	len2 = toast_raw_datum_size(arg2);
4027	if (len1 != len2)
4028	result = true;
4029	else
4030	{
4031	bytea *barg1 = DatumGetByteaPP(arg1);
4032	bytea *barg2 = DatumGetByteaPP(arg2);
4033
4034	result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
4035	len1 - VARHDRSZ) != `0`);
4036
4037	PG_FREE_IF_COPY(barg1, `0`);
4038	PG_FREE_IF_COPY(barg2, `1`);
4039	}
4040
4041	PG_RETURN_BOOL(result);
4042	}
4043
4044	Datum
4045	bytealt(PG_FUNCTION_ARGS)
4046	{
4047	bytea *arg1 = PG_GETARG_BYTEA_PP(`0`);
4048	bytea *arg2 = PG_GETARG_BYTEA_PP(`1`);
4049	int len1,
4050	len2;
4051	int cmp;
4052
4053	len1 = VARSIZE_ANY_EXHDR(arg1);
4054	len2 = VARSIZE_ANY_EXHDR(arg2);
4055
4056	cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4057
4058	PG_FREE_IF_COPY(arg1, `0`);
4059	PG_FREE_IF_COPY(arg2, `1`);
4060
4061	PG_RETURN_BOOL((cmp < `0`) \|\| ((cmp == `0`) && (len1 < len2)));
4062	}
4063
4064	Datum
4065	byteale(PG_FUNCTION_ARGS)
4066	{
4067	bytea *arg1 = PG_GETARG_BYTEA_PP(`0`);
4068	bytea *arg2 = PG_GETARG_BYTEA_PP(`1`);
4069	int len1,
4070	len2;
4071	int cmp;
4072
4073	len1 = VARSIZE_ANY_EXHDR(arg1);
4074	len2 = VARSIZE_ANY_EXHDR(arg2);
4075
4076	cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4077
4078	PG_FREE_IF_COPY(arg1, `0`);
4079	PG_FREE_IF_COPY(arg2, `1`);
4080
4081	PG_RETURN_BOOL((cmp < `0`) \|\| ((cmp == `0`) && (len1 <= len2)));
4082	}
4083
4084	Datum
4085	byteagt(PG_FUNCTION_ARGS)
4086	{
4087	bytea *arg1 = PG_GETARG_BYTEA_PP(`0`);
4088	bytea *arg2 = PG_GETARG_BYTEA_PP(`1`);
4089	int len1,
4090	len2;
4091	int cmp;
4092
4093	len1 = VARSIZE_ANY_EXHDR(arg1);
4094	len2 = VARSIZE_ANY_EXHDR(arg2);
4095
4096	cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4097
4098	PG_FREE_IF_COPY(arg1, `0`);
4099	PG_FREE_IF_COPY(arg2, `1`);
4100
4101	PG_RETURN_BOOL((cmp > `0`) \|\| ((cmp == `0`) && (len1 > len2)));
4102	}
4103
4104	Datum
4105	byteage(PG_FUNCTION_ARGS)
4106	{
4107	bytea *arg1 = PG_GETARG_BYTEA_PP(`0`);
4108	bytea *arg2 = PG_GETARG_BYTEA_PP(`1`);
4109	int len1,
4110	len2;
4111	int cmp;
4112
4113	len1 = VARSIZE_ANY_EXHDR(arg1);
4114	len2 = VARSIZE_ANY_EXHDR(arg2);
4115
4116	cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4117
4118	PG_FREE_IF_COPY(arg1, `0`);
4119	PG_FREE_IF_COPY(arg2, `1`);
4120
4121	PG_RETURN_BOOL((cmp > `0`) \|\| ((cmp == `0`) && (len1 >= len2)));
4122	}
4123
4124	Datum
4125	byteacmp(PG_FUNCTION_ARGS)
4126	{
4127	bytea *arg1 = PG_GETARG_BYTEA_PP(`0`);
4128	bytea *arg2 = PG_GETARG_BYTEA_PP(`1`);
4129	int len1,
4130	len2;
4131	int cmp;
4132
4133	len1 = VARSIZE_ANY_EXHDR(arg1);
4134	len2 = VARSIZE_ANY_EXHDR(arg2);
4135
4136	cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
4137	if ((cmp == `0`) && (len1 != len2))
4138	cmp = (len1 < len2) ? -`1` : `1`;
4139
4140	PG_FREE_IF_COPY(arg1, `0`);
4141	PG_FREE_IF_COPY(arg2, `1`);
4142
4143	PG_RETURN_INT32(cmp);
4144	}
4145
4146	Datum
4147	bytea_sortsupport(PG_FUNCTION_ARGS)
4148	{
4149	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(`0`);
4150	MemoryContext oldcontext;
4151
4152	oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
4153
4154	/ Use generic string SortSupport, forcing "C" collation /
4155	varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
4156
4157	MemoryContextSwitchTo(oldcontext);
4158
4159	PG_RETURN_VOID();
4160	}
4161
4162	/*
4163	* appendStringInfoText
4164	*
4165	* Append a text to str.
4166	* Like appendStringInfoString(str, text_to_cstring(t)) but faster.
4167	*/
4168	static void
4169	appendStringInfoText(StringInfo str, const text *t)
4170	{
4171	appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
4172	}
4173
4174	/*
4175	* replace_text
4176	* replace all occurrences of 'old_sub_str' in 'orig_str'
4177	* with 'new_sub_str' to form 'new_str'
4178	*
4179	* returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
4180	* otherwise returns 'new_str'
4181	*/
4182	Datum
4183	replace_text(PG_FUNCTION_ARGS)
4184	{
4185	text *src_text = PG_GETARG_TEXT_PP(`0`);
4186	text *from_sub_text = PG_GETARG_TEXT_PP(`1`);
4187	text *to_sub_text = PG_GETARG_TEXT_PP(`2`);
4188	int src_text_len;
4189	int from_sub_text_len;
4190	TextPositionState state;
4191	text *ret_text;
4192	int chunk_len;
4193	char *curr_ptr;
4194	char *start_ptr;
4195	StringInfoData str;
4196	bool found;
4197
4198	src_text_len = VARSIZE_ANY_EXHDR(src_text);
4199	from_sub_text_len = VARSIZE_ANY_EXHDR(from_sub_text);
4200
4201	/ Return unmodified source string if empty source or pattern /
4202	if (src_text_len < `1` \|\| from_sub_text_len < `1`)
4203	{
4204	PG_RETURN_TEXT_P(src_text);
4205	}
4206
4207	text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state);
4208
4209	found = text_position_next(&state);
4210
4211	/ When the from_sub_text is not found, there is nothing to do. /
4212	if (!found)
4213	{
4214	text_position_cleanup(&state);
4215	PG_RETURN_TEXT_P(src_text);
4216	}
4217	curr_ptr = text_position_get_match_ptr(&state);
4218	start_ptr = VARDATA_ANY(src_text);
4219
4220	initStringInfo(&str);
4221
4222	do
4223	{
4224	CHECK_FOR_INTERRUPTS();
4225
4226	/ copy the data skipped over by last text_position_next() /
4227	chunk_len = curr_ptr - start_ptr;
4228	appendBinaryStringInfo(&str, start_ptr, chunk_len);
4229
4230	appendStringInfoText(&str, to_sub_text);
4231
4232	start_ptr = curr_ptr + from_sub_text_len;
4233
4234	found = text_position_next(&state);
4235	if (found)
4236	curr_ptr = text_position_get_match_ptr(&state);
4237	}
4238	while (found);
4239
4240	/ copy trailing data /
4241	chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
4242	appendBinaryStringInfo(&str, start_ptr, chunk_len);
4243
4244	text_position_cleanup(&state);
4245
4246	ret_text = cstring_to_text_with_len(str.data, str.len);
4247	pfree(str.data);
4248
4249	PG_RETURN_TEXT_P(ret_text);
4250	}
4251
4252	/*
4253	* check_replace_text_has_escape_char
4254	*
4255	* check whether replace_text contains escape char.
4256	*/
4257	static bool
4258	check_replace_text_has_escape_char(const text *replace_text)
4259	{
4260	const char *p = VARDATA_ANY(replace_text);
4261	const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
4262
4263	if (pg_database_encoding_max_length() == `1`)
4264	{
4265	for (; p < p_end; p++)
4266	{
4267	if (*p == `'\\'`)
4268	return true;
4269	}
4270	}
4271	else
4272	{
4273	for (; p < p_end; p += pg_mblen(p))
4274	{
4275	if (*p == `'\\'`)
4276	return true;
4277	}
4278	}
4279
4280	return false;
4281	}
4282
4283	/*
4284	* appendStringInfoRegexpSubstr
4285	*
4286	* Append replace_text to str, substituting regexp back references for
4287	* \n escapes. start_ptr is the start of the match in the source string,
4288	* at logical character position data_pos.
4289	*/
4290	static void
4291	appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
4292	regmatch_t *pmatch,
4293	char start_ptr, int* data_pos)
4294	{
4295	const char *p = VARDATA_ANY(replace_text);
4296	const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
4297	int eml = pg_database_encoding_max_length();
4298
4299	for (;;)
4300	{
4301	const char *chunk_start = p;
4302	int so;
4303	int eo;
4304
4305	/ Find next escape char. /
4306	if (eml == `1`)
4307	{
4308	for (; p < p_end && *p != `'\\'`; p++)
4309	/ nothing / ;
4310	}
4311	else
4312	{
4313	for (; p < p_end && *p != `'\\'`; p += pg_mblen(p))
4314	/ nothing / ;
4315	}
4316
4317	/ Copy the text we just scanned over, if any. /
4318	if (p > chunk_start)
4319	appendBinaryStringInfo(str, chunk_start, p - chunk_start);
4320
4321	/ Done if at end of string, else advance over escape char. /
4322	if (p >= p_end)
4323	break;
4324	p++;
4325
4326	if (p >= p_end)
4327	{
4328	/ Escape at very end of input. Treat same as unexpected char /
4329	appendStringInfoChar(str, `'\\'`);
4330	break;
4331	}
4332
4333	if (p >= `'1'` && p <= `'9'`)
4334	{
4335	/ Use the back reference of regexp. /
4336	int idx = *p - `'0'`;
4337
4338	so = pmatch[idx].rm_so;
4339	eo = pmatch[idx].rm_eo;
4340	p++;
4341	}
4342	else if (*p == `'&'`)
4343	{
4344	/ Use the entire matched string. /
4345	so = pmatch[`0`].rm_so;
4346	eo = pmatch[`0`].rm_eo;
4347	p++;
4348	}
4349	else if (*p == `'\\'`)
4350	{
4351	/ \\ means transfer one \ to output. /
4352	appendStringInfoChar(str, `'\\'`);
4353	p++;
4354	continue;
4355	}
4356	else
4357	{
4358	/*
4359	* If escape char is not followed by any expected char, just treat
4360	* it as ordinary data to copy. (XXX would it be better to throw
4361	* an error?)
4362	*/
4363	appendStringInfoChar(str, `'\\'`);
4364	continue;
4365	}
4366
4367	if (so != -`1` && eo != -`1`)
4368	{
4369	/*
4370	* Copy the text that is back reference of regexp. Note so and eo
4371	* are counted in characters not bytes.
4372	*/
4373	char *chunk_start;
4374	int chunk_len;
4375
4376	Assert(so >= data_pos);
4377	chunk_start = start_ptr;
4378	chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
4379	chunk_len = charlen_to_bytelen(chunk_start, eo - so);
4380	appendBinaryStringInfo(str, chunk_start, chunk_len);
4381	}
4382	}
4383	}
4384
4385	#define REGEXP_REPLACE_BACKREF_CNT 10
4386
4387	/*
4388	* replace_text_regexp
4389	*
4390	* replace text that matches to regexp in src_text to replace_text.
4391	*
4392	* Note: to avoid having to include regex.h in builtins.h, we declare
4393	* the regexp argument as void , but really it's regex_t .
4394	*/
4395	text *
4396	replace_text_regexp(text src_text, void* *regexp,
4397	text *replace_text, bool glob)
4398	{
4399	text *ret_text;
4400	regex_t re = (regex_t ) regexp;
4401	int src_text_len = VARSIZE_ANY_EXHDR(src_text);
4402	StringInfoData buf;
4403	regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
4404	pg_wchar *data;
4405	size_t data_len;
4406	int search_start;
4407	int data_pos;
4408	char *start_ptr;
4409	bool have_escape;
4410
4411	initStringInfo(&buf);
4412
4413	/ Convert data string to wide characters. /
4414	data = (pg_wchar ) palloc((src_text_len + `1`) sizeof(pg_wchar));
4415	data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
4416
4417	/ Check whether replace_text has escape char. /
4418	have_escape = check_replace_text_has_escape_char(replace_text);
4419
4420	/ start_ptr points to the data_pos'th character of src_text /
4421	start_ptr = (char *) VARDATA_ANY(src_text);
4422	data_pos = `0`;
4423
4424	search_start = `0`;
4425	while (search_start <= data_len)
4426	{
4427	int regexec_result;
4428
4429	CHECK_FOR_INTERRUPTS();
4430
4431	regexec_result = pg_regexec(re,
4432	data,
4433	data_len,
4434	search_start,
4435	NULL, / no details /
4436	REGEXP_REPLACE_BACKREF_CNT,
4437	pmatch,
4438	`0`);
4439
4440	if (regexec_result == REG_NOMATCH)
4441	break;
4442
4443	if (regexec_result != REG_OKAY)
4444	{
4445	char errMsg[`100`];
4446
4447	CHECK_FOR_INTERRUPTS();
4448	pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
4449	ereport(ERROR,
4450	(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
4451	errmsg("regular expression failed: %s", errMsg)));
4452	}
4453
4454	/*
4455	* Copy the text to the left of the match position. Note we are given
4456	* character not byte indexes.
4457	*/
4458	if (pmatch[`0`].rm_so - data_pos > `0`)
4459	{
4460	int chunk_len;
4461
4462	chunk_len = charlen_to_bytelen(start_ptr,
4463	pmatch[`0`].rm_so - data_pos);
4464	appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4465
4466	/*
4467	* Advance start_ptr over that text, to avoid multiple rescans of
4468	* it if the replace_text contains multiple back-references.
4469	*/
4470	start_ptr += chunk_len;
4471	data_pos = pmatch[`0`].rm_so;
4472	}
4473
4474	/*
4475	* Copy the replace_text. Process back references when the
4476	* replace_text has escape characters.
4477	*/
4478	if (have_escape)
4479	appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
4480	start_ptr, data_pos);
4481	else
4482	appendStringInfoText(&buf, replace_text);
4483
4484	/ Advance start_ptr and data_pos over the matched text. /
4485	start_ptr += charlen_to_bytelen(start_ptr,
4486	pmatch[`0`].rm_eo - data_pos);
4487	data_pos = pmatch[`0`].rm_eo;
4488
4489	/*
4490	* When global option is off, replace the first instance only.
4491	*/
4492	if (!glob)
4493	break;
4494
4495	/*
4496	* Advance search position. Normally we start the next search at the
4497	* end of the previous match; but if the match was of zero length, we
4498	* have to advance by one character, or we'd just find the same match
4499	* again.
4500	*/
4501	search_start = data_pos;
4502	if (pmatch[`0`].rm_so == pmatch[`0`].rm_eo)
4503	search_start++;
4504	}
4505
4506	/*
4507	* Copy the text to the right of the last match.
4508	*/
4509	if (data_pos < data_len)
4510	{
4511	int chunk_len;
4512
4513	chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
4514	appendBinaryStringInfo(&buf, start_ptr, chunk_len);
4515	}
4516
4517	ret_text = cstring_to_text_with_len(buf.data, buf.len);
4518	pfree(buf.data);
4519	pfree(data);
4520
4521	return ret_text;
4522	}
4523
4524	/*
4525	* split_text
4526	* parse input string
4527	* return ord item (1 based)
4528	* based on provided field separator
4529	*/
4530	Datum
4531	split_text(PG_FUNCTION_ARGS)
4532	{
4533	text *inputstring = PG_GETARG_TEXT_PP(`0`);
4534	text *fldsep = PG_GETARG_TEXT_PP(`1`);
4535	int fldnum = PG_GETARG_INT32(`2`);
4536	int inputstring_len;
4537	int fldsep_len;
4538	TextPositionState state;
4539	char *start_ptr;
4540	char *end_ptr;
4541	text *result_text;
4542	bool found;
4543
4544	/ field number is 1 based /
4545	if (fldnum < `1`)
4546	ereport(ERROR,
4547	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4548	errmsg("field position must be greater than zero")));
4549
4550	inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4551	fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
4552
4553	/ return empty string for empty input string /
4554	if (inputstring_len < `1`)
4555	PG_RETURN_TEXT_P(cstring_to_text(""));
4556
4557	/ empty field separator /
4558	if (fldsep_len < `1`)
4559	{
4560	text_position_cleanup(&state);
4561	/ if first field, return input string, else empty string /
4562	if (fldnum == `1`)
4563	PG_RETURN_TEXT_P(inputstring);
4564	else
4565	PG_RETURN_TEXT_P(cstring_to_text(""));
4566	}
4567
4568	text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
4569
4570	/ identify bounds of first field /
4571	start_ptr = VARDATA_ANY(inputstring);
4572	found = text_position_next(&state);
4573
4574	/ special case if fldsep not found at all /
4575	if (!found)
4576	{
4577	text_position_cleanup(&state);
4578	/ if field 1 requested, return input string, else empty string /
4579	if (fldnum == `1`)
4580	PG_RETURN_TEXT_P(inputstring);
4581	else
4582	PG_RETURN_TEXT_P(cstring_to_text(""));
4583	}
4584	end_ptr = text_position_get_match_ptr(&state);
4585
4586	while (found && --fldnum > `0`)
4587	{
4588	/ identify bounds of next field /
4589	start_ptr = end_ptr + fldsep_len;
4590	found = text_position_next(&state);
4591	if (found)
4592	end_ptr = text_position_get_match_ptr(&state);
4593	}
4594
4595	text_position_cleanup(&state);
4596
4597	if (fldnum > `0`)
4598	{
4599	/ N'th field separator not found /
4600	/ if last field requested, return it, else empty string /
4601	if (fldnum == `1`)
4602	{
4603	int last_len = start_ptr - VARDATA_ANY(inputstring);
4604
4605	result_text = cstring_to_text_with_len(start_ptr,
4606	inputstring_len - last_len);
4607	}
4608	else
4609	result_text = cstring_to_text("");
4610	}
4611	else
4612	{
4613	/ non-last field requested /
4614	result_text = cstring_to_text_with_len(start_ptr, end_ptr - start_ptr);
4615	}
4616
4617	PG_RETURN_TEXT_P(result_text);
4618	}
4619
4620	/*
4621	* Convenience function to return true when two text params are equal.
4622	*/
4623	static bool
4624	text_isequal(text txt1, text txt2, Oid collid)
4625	{
4626	return DatumGetBool(DirectFunctionCall2Coll(texteq,
4627	collid,
4628	PointerGetDatum(txt1),
4629	PointerGetDatum(txt2)));
4630	}
4631
4632	/*
4633	* text_to_array
4634	* parse input string and return text array of elements,
4635	* based on provided field separator
4636	*/
4637	Datum
4638	text_to_array(PG_FUNCTION_ARGS)
4639	{
4640	return text_to_array_internal(fcinfo);
4641	}
4642
4643	/*
4644	* text_to_array_null
4645	* parse input string and return text array of elements,
4646	* based on provided field separator and null string
4647	*
4648	* This is a separate entry point only to prevent the regression tests from
4649	* complaining about different argument sets for the same internal function.
4650	*/
4651	Datum
4652	text_to_array_null(PG_FUNCTION_ARGS)
4653	{
4654	return text_to_array_internal(fcinfo);
4655	}
4656
4657	/*
4658	* common code for text_to_array and text_to_array_null functions
4659	*
4660	* These are not strict so we have to test for null inputs explicitly.
4661	*/
4662	static Datum
4663	text_to_array_internal(PG_FUNCTION_ARGS)
4664	{
4665	text *inputstring;
4666	text *fldsep;
4667	text *null_string;
4668	int inputstring_len;
4669	int fldsep_len;
4670	char *start_ptr;
4671	text *result_text;
4672	bool is_null;
4673	ArrayBuildState *astate = NULL;
4674
4675	/ when input string is NULL, then result is NULL too /
4676	if (PG_ARGISNULL(`0`))
4677	PG_RETURN_NULL();
4678
4679	inputstring = PG_GETARG_TEXT_PP(`0`);
4680
4681	/ fldsep can be NULL /
4682	if (!PG_ARGISNULL(`1`))
4683	fldsep = PG_GETARG_TEXT_PP(`1`);
4684	else
4685	fldsep = NULL;
4686
4687	/ null_string can be NULL or omitted /
4688	if (PG_NARGS() > `2` && !PG_ARGISNULL(`2`))
4689	null_string = PG_GETARG_TEXT_PP(`2`);
4690	else
4691	null_string = NULL;
4692
4693	if (fldsep != NULL)
4694	{
4695	/*
4696	* Normal case with non-null fldsep. Use the text_position machinery
4697	* to search for occurrences of fldsep.
4698	*/
4699	TextPositionState state;
4700
4701	inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4702	fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
4703
4704	/ return empty array for empty input string /
4705	if (inputstring_len < `1`)
4706	PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
4707
4708	/*
4709	* empty field separator: return the input string as a one-element
4710	* array
4711	*/
4712	if (fldsep_len < `1`)
4713	{
4714	Datum elems[`1`];
4715	bool nulls[`1`];
4716	int dims[`1`];
4717	int lbs[`1`];
4718
4719	/ single element can be a NULL too /
4720	is_null = null_string ? text_isequal(inputstring, null_string, PG_GET_COLLATION()) : false;
4721
4722	elems[`0`] = PointerGetDatum(inputstring);
4723	nulls[`0`] = is_null;
4724	dims[`0`] = `1`;
4725	lbs[`0`] = `1`;
4726	/ XXX: this hardcodes assumptions about the text type /
4727	PG_RETURN_ARRAYTYPE_P(construct_md_array(elems, nulls,
4728	`1`, dims, lbs,
4729	TEXTOID, -`1`, false, `'i'`));
4730	}
4731
4732	text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
4733
4734	start_ptr = VARDATA_ANY(inputstring);
4735
4736	for (;;)
4737	{
4738	bool found;
4739	char *end_ptr;
4740	int chunk_len;
4741
4742	CHECK_FOR_INTERRUPTS();
4743
4744	found = text_position_next(&state);
4745	if (!found)
4746	{
4747	/ fetch last field /
4748	chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
4749	end_ptr = NULL; / not used, but some compilers complain /
4750	}
4751	else
4752	{
4753	/ fetch non-last field /
4754	end_ptr = text_position_get_match_ptr(&state);
4755	chunk_len = end_ptr - start_ptr;
4756	}
4757
4758	/ must build a temp text datum to pass to accumArrayResult /
4759	result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4760	is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false;
4761
4762	/ stash away this field /
4763	astate = accumArrayResult(astate,
4764	PointerGetDatum(result_text),
4765	is_null,
4766	TEXTOID,
4767	CurrentMemoryContext);
4768
4769	pfree(result_text);
4770
4771	if (!found)
4772	break;
4773
4774	start_ptr = end_ptr + fldsep_len;
4775	}
4776
4777	text_position_cleanup(&state);
4778	}
4779	else
4780	{
4781	/*
4782	* When fldsep is NULL, each character in the inputstring becomes an
4783	* element in the result array. The separator is effectively the
4784	* space between characters.
4785	*/
4786	inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
4787
4788	/ return empty array for empty input string /
4789	if (inputstring_len < `1`)
4790	PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
4791
4792	start_ptr = VARDATA_ANY(inputstring);
4793
4794	while (inputstring_len > `0`)
4795	{
4796	int chunk_len = pg_mblen(start_ptr);
4797
4798	CHECK_FOR_INTERRUPTS();
4799
4800	/ must build a temp text datum to pass to accumArrayResult /
4801	result_text = cstring_to_text_with_len(start_ptr, chunk_len);
4802	is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false;
4803
4804	/ stash away this field /
4805	astate = accumArrayResult(astate,
4806	PointerGetDatum(result_text),
4807	is_null,
4808	TEXTOID,
4809	CurrentMemoryContext);
4810
4811	pfree(result_text);
4812
4813	start_ptr += chunk_len;
4814	inputstring_len -= chunk_len;
4815	}
4816	}
4817
4818	PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
4819	CurrentMemoryContext));
4820	}
4821
4822	/*
4823	* array_to_text
4824	* concatenate Cstring representation of input array elements
4825	* using provided field separator
4826	*/
4827	Datum
4828	array_to_text(PG_FUNCTION_ARGS)
4829	{
4830	ArrayType *v = PG_GETARG_ARRAYTYPE_P(`0`);
4831	char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(`1`));
4832
4833	PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
4834	}
4835
4836	/*
4837	* array_to_text_null
4838	* concatenate Cstring representation of input array elements
4839	* using provided field separator and null string
4840	*
4841	* This version is not strict so we have to test for null inputs explicitly.
4842	*/
4843	Datum
4844	array_to_text_null(PG_FUNCTION_ARGS)
4845	{
4846	ArrayType *v;
4847	char *fldsep;
4848	char *null_string;
4849
4850	/ returns NULL when first or second parameter is NULL /
4851	if (PG_ARGISNULL(`0`) \|\| PG_ARGISNULL(`1`))
4852	PG_RETURN_NULL();
4853
4854	v = PG_GETARG_ARRAYTYPE_P(`0`);
4855	fldsep = text_to_cstring(PG_GETARG_TEXT_PP(`1`));
4856
4857	/ NULL null string is passed through as a null pointer /
4858	if (!PG_ARGISNULL(`2`))
4859	null_string = text_to_cstring(PG_GETARG_TEXT_PP(`2`));
4860	else
4861	null_string = NULL;
4862
4863	PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
4864	}
4865
4866	/*
4867	* common code for array_to_text and array_to_text_null functions
4868	*/
4869	static text *
4870	array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
4871	const char fldsep, const* char *null_string)
4872	{
4873	text *result;
4874	int nitems,
4875	*dims,
4876	ndims;
4877	Oid element_type;
4878	int typlen;
4879	bool typbyval;
4880	char typalign;
4881	StringInfoData buf;
4882	bool printed = false;
4883	char *p;
4884	bits8 *bitmap;
4885	int bitmask;
4886	int i;
4887	ArrayMetaState *my_extra;
4888
4889	ndims = ARR_NDIM(v);
4890	dims = ARR_DIMS(v);
4891	nitems = ArrayGetNItems(ndims, dims);
4892
4893	/ if there are no elements, return an empty string /
4894	if (nitems == `0`)
4895	return cstring_to_text_with_len("", `0`);
4896
4897	element_type = ARR_ELEMTYPE(v);
4898	initStringInfo(&buf);
4899
4900	/*
4901	* We arrange to look up info about element type, including its output
4902	* conversion proc, only once per series of calls, assuming the element
4903	* type doesn't change underneath us.
4904	*/
4905	my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4906	if (my_extra == NULL)
4907	{
4908	fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
4909	sizeof(ArrayMetaState));
4910	my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
4911	my_extra->element_type = ~element_type;
4912	}
4913
4914	if (my_extra->element_type != element_type)
4915	{
4916	/*
4917	* Get info about element type, including its output conversion proc
4918	*/
4919	get_type_io_data(element_type, IOFunc_output,
4920	&my_extra->typlen, &my_extra->typbyval,
4921	&my_extra->typalign, &my_extra->typdelim,
4922	&my_extra->typioparam, &my_extra->typiofunc);
4923	fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
4924	fcinfo->flinfo->fn_mcxt);
4925	my_extra->element_type = element_type;
4926	}
4927	typlen = my_extra->typlen;
4928	typbyval = my_extra->typbyval;
4929	typalign = my_extra->typalign;
4930
4931	p = ARR_DATA_PTR(v);
4932	bitmap = ARR_NULLBITMAP(v);
4933	bitmask = `1`;
4934
4935	for (i = `0`; i < nitems; i++)
4936	{
4937	Datum itemvalue;
4938	char *value;
4939
4940	/ Get source element, checking for NULL /
4941	if (bitmap && (*bitmap & bitmask) == `0`)
4942	{
4943	/ if null_string is NULL, we just ignore null elements /
4944	if (null_string != NULL)
4945	{
4946	if (printed)
4947	appendStringInfo(&buf, "%s%s", fldsep, null_string);
4948	else
4949	appendStringInfoString(&buf, null_string);
4950	printed = true;
4951	}
4952	}
4953	else
4954	{
4955	itemvalue = fetch_att(p, typbyval, typlen);
4956
4957	value = OutputFunctionCall(&my_extra->proc, itemvalue);
4958
4959	if (printed)
4960	appendStringInfo(&buf, "%s%s", fldsep, value);
4961	else
4962	appendStringInfoString(&buf, value);
4963	printed = true;
4964
4965	p = att_addlength_pointer(p, typlen, p);
4966	p = (char *) att_align_nominal(p, typalign);
4967	}
4968
4969	/ advance bitmap pointer if any /
4970	if (bitmap)
4971	{
4972	bitmask <<= `1`;
4973	if (bitmask == `0x100`)
4974	{
4975	bitmap++;
4976	bitmask = `1`;
4977	}
4978	}
4979	}
4980
4981	result = cstring_to_text_with_len(buf.data, buf.len);
4982	pfree(buf.data);
4983
4984	return result;
4985	}
4986
4987	#define HEXBASE 16
4988	/*
4989	* Convert an int32 to a string containing a base 16 (hex) representation of
4990	* the number.
4991	*/
4992	Datum
4993	to_hex32(PG_FUNCTION_ARGS)
4994	{
4995	uint32 value = (uint32) PG_GETARG_INT32(`0`);
4996	char *ptr;
4997	const char *digits = "0123456789abcdef";
4998	char buf[`32`]; / bigger than needed, but reasonable /
4999
5000	ptr = buf + sizeof(buf) - `1`;
5001	*ptr = `'\0'`;
5002
5003	do
5004	{
5005	*--ptr = digits[value % HEXBASE];
5006	value /= HEXBASE;
5007	} while (ptr > buf && value);
5008
5009	PG_RETURN_TEXT_P(cstring_to_text(ptr));
5010	}
5011
5012	/*
5013	* Convert an int64 to a string containing a base 16 (hex) representation of
5014	* the number.
5015	*/
5016	Datum
5017	to_hex64(PG_FUNCTION_ARGS)
5018	{
5019	uint64 value = (uint64) PG_GETARG_INT64(`0`);
5020	char *ptr;
5021	const char *digits = "0123456789abcdef";
5022	char buf[`32`]; / bigger than needed, but reasonable /
5023
5024	ptr = buf + sizeof(buf) - `1`;
5025	*ptr = `'\0'`;
5026
5027	do
5028	{
5029	*--ptr = digits[value % HEXBASE];
5030	value /= HEXBASE;
5031	} while (ptr > buf && value);
5032
5033	PG_RETURN_TEXT_P(cstring_to_text(ptr));
5034	}
5035
5036	/*
5037	* Return the size of a datum, possibly compressed
5038	*
5039	* Works on any data type
5040	*/
5041	Datum
5042	pg_column_size(PG_FUNCTION_ARGS)
5043	{
5044	Datum value = PG_GETARG_DATUM(`0`);
5045	int32 result;
5046	int typlen;
5047
5048	/ On first call, get the input type's typlen, and save at fn_extra /*
5049	if (fcinfo->flinfo->fn_extra == NULL)
5050	{
5051	/ Lookup the datatype of the supplied argument /
5052	Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, `0`);
5053
5054	typlen = get_typlen(argtypeid);
5055	if (typlen == `0`) / should not happen /
5056	elog(ERROR, "cache lookup failed for type %u", argtypeid);
5057
5058	fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5059	sizeof(int));
5060	((int* *) fcinfo->flinfo->fn_extra) = typlen;
5061	}
5062	else
5063	typlen = ((int* *) fcinfo->flinfo->fn_extra);
5064
5065	if (typlen == -`1`)
5066	{
5067	/ varlena type, possibly toasted /
5068	result = toast_datum_size(value);
5069	}
5070	else if (typlen == -`2`)
5071	{
5072	/ cstring /
5073	result = strlen(DatumGetCString(value)) + `1`;
5074	}
5075	else
5076	{
5077	/ ordinary fixed-width type /
5078	result = typlen;
5079	}
5080
5081	PG_RETURN_INT32(result);
5082	}
5083
5084	/*
5085	* string_agg - Concatenates values and returns string.
5086	*
5087	* Syntax: string_agg(value text, delimiter text) RETURNS text
5088	*
5089	* Note: Any NULL values are ignored. The first-call delimiter isn't
5090	* actually used at all, and on subsequent calls the delimiter precedes
5091	* the associated value.
5092	*/
5093
5094	/ subroutine to initialize state /
5095	static StringInfo
5096	makeStringAggState(FunctionCallInfo fcinfo)
5097	{
5098	StringInfo state;
5099	MemoryContext aggcontext;
5100	MemoryContext oldcontext;
5101
5102	if (!AggCheckCallContext(fcinfo, &aggcontext))
5103	{
5104	/ cannot be called directly because of internal-type argument /
5105	elog(ERROR, "string_agg_transfn called in non-aggregate context");
5106	}
5107
5108	/*
5109	* Create state in aggregate context. It'll stay there across subsequent
5110	* calls.
5111	*/
5112	oldcontext = MemoryContextSwitchTo(aggcontext);
5113	state = makeStringInfo();
5114	MemoryContextSwitchTo(oldcontext);
5115
5116	return state;
5117	}
5118
5119	Datum
5120	string_agg_transfn(PG_FUNCTION_ARGS)
5121	{
5122	StringInfo state;
5123
5124	state = PG_ARGISNULL(`0`) ? NULL : (StringInfo) PG_GETARG_POINTER(`0`);
5125
5126	/ Append the value unless null. /
5127	if (!PG_ARGISNULL(`1`))
5128	{
5129	/ On the first time through, we ignore the delimiter. /
5130	if (state == NULL)
5131	state = makeStringAggState(fcinfo);
5132	else if (!PG_ARGISNULL(`2`))
5133	appendStringInfoText(state, PG_GETARG_TEXT_PP(`2`)); / delimiter /
5134
5135	appendStringInfoText(state, PG_GETARG_TEXT_PP(`1`)); / value /
5136	}
5137
5138	/*
5139	* The transition type for string_agg() is declared to be "internal",
5140	* which is a pass-by-value type the same size as a pointer.
5141	*/
5142	PG_RETURN_POINTER(state);
5143	}
5144
5145	Datum
5146	string_agg_finalfn(PG_FUNCTION_ARGS)
5147	{
5148	StringInfo state;
5149
5150	/ cannot be called directly because of internal-type argument /
5151	Assert(AggCheckCallContext(fcinfo, NULL));
5152
5153	state = PG_ARGISNULL(`0`) ? NULL : (StringInfo) PG_GETARG_POINTER(`0`);
5154
5155	if (state != NULL)
5156	PG_RETURN_TEXT_P(cstring_to_text_with_len(state->data, state->len));
5157	else
5158	PG_RETURN_NULL();
5159	}
5160
5161	/*
5162	* Prepare cache with fmgr info for the output functions of the datatypes of
5163	* the arguments of a concat-like function, beginning with argument "argidx".
5164	* (Arguments before that will have corresponding slots in the resulting
5165	* FmgrInfo array, but we don't fill those slots.)
5166	*/
5167	static FmgrInfo *
5168	build_concat_foutcache(FunctionCallInfo fcinfo, int argidx)
5169	{
5170	FmgrInfo *foutcache;
5171	int i;
5172
5173	/ We keep the info in fn_mcxt so it survives across calls /
5174	foutcache = (FmgrInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
5175	PG_NARGS() * sizeof(FmgrInfo));
5176
5177	for (i = argidx; i < PG_NARGS(); i++)
5178	{
5179	Oid valtype;
5180	Oid typOutput;
5181	bool typIsVarlena;
5182
5183	valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
5184	if (!OidIsValid(valtype))
5185	elog(ERROR, "could not determine data type of concat() input");
5186
5187	getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
5188	fmgr_info_cxt(typOutput, &foutcache[i], fcinfo->flinfo->fn_mcxt);
5189	}
5190
5191	fcinfo->flinfo->fn_extra = foutcache;
5192
5193	return foutcache;
5194	}
5195
5196	/*
5197	* Implementation of both concat() and concat_ws().
5198	*
5199	* sepstr is the separator string to place between values.
5200	* argidx identifies the first argument to concatenate (counting from zero);
5201	* note that this must be constant across any one series of calls.
5202	*
5203	* Returns NULL if result should be NULL, else text value.
5204	*/
5205	static text *
5206	concat_internal(const char sepstr, int* argidx,
5207	FunctionCallInfo fcinfo)
5208	{
5209	text *result;
5210	StringInfoData str;
5211	FmgrInfo *foutcache;
5212	bool first_arg = true;
5213	int i;
5214
5215	/*
5216	* concat(VARIADIC some-array) is essentially equivalent to
5217	* array_to_text(), ie concat the array elements with the given separator.
5218	* So we just pass the case off to that code.
5219	*/
5220	if (get_fn_expr_variadic(fcinfo->flinfo))
5221	{
5222	ArrayType *arr;
5223
5224	/ Should have just the one argument /
5225	Assert(argidx == PG_NARGS() - `1`);
5226
5227	/ concat(VARIADIC NULL) is defined as NULL /
5228	if (PG_ARGISNULL(argidx))
5229	return NULL;
5230
5231	/*
5232	* Non-null argument had better be an array. We assume that any call
5233	* context that could let get_fn_expr_variadic return true will have
5234	* checked that a VARIADIC-labeled parameter actually is an array. So
5235	* it should be okay to just Assert that it's an array rather than
5236	* doing a full-fledged error check.
5237	*/
5238	Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, argidx))));
5239
5240	/ OK, safe to fetch the array value /
5241	arr = PG_GETARG_ARRAYTYPE_P(argidx);
5242
5243	/*
5244	* And serialize the array. We tell array_to_text to ignore null
5245	* elements, which matches the behavior of the loop below.
5246	*/
5247	return array_to_text_internal(fcinfo, arr, sepstr, NULL);
5248	}
5249
5250	/ Normal case without explicit VARIADIC marker /
5251	initStringInfo(&str);
5252
5253	/ Get output function info, building it if first time through /
5254	foutcache = (FmgrInfo *) fcinfo->flinfo->fn_extra;
5255	if (foutcache == NULL)
5256	foutcache = build_concat_foutcache(fcinfo, argidx);
5257
5258	for (i = argidx; i < PG_NARGS(); i++)
5259	{
5260	if (!PG_ARGISNULL(i))
5261	{
5262	Datum value = PG_GETARG_DATUM(i);
5263
5264	/ add separator if appropriate /
5265	if (first_arg)
5266	first_arg = false;
5267	else
5268	appendStringInfoString(&str, sepstr);
5269
5270	/ call the appropriate type output function, append the result /
5271	appendStringInfoString(&str,
5272	OutputFunctionCall(&foutcache[i], value));
5273	}
5274	}
5275
5276	result = cstring_to_text_with_len(str.data, str.len);
5277	pfree(str.data);
5278
5279	return result;
5280	}
5281
5282	/*
5283	* Concatenate all arguments. NULL arguments are ignored.
5284	*/
5285	Datum
5286	text_concat(PG_FUNCTION_ARGS)
5287	{
5288	text *result;
5289
5290	result = concat_internal("", `0`, fcinfo);
5291	if (result == NULL)
5292	PG_RETURN_NULL();
5293	PG_RETURN_TEXT_P(result);
5294	}
5295
5296	/*
5297	* Concatenate all but first argument value with separators. The first
5298	* parameter is used as the separator. NULL arguments are ignored.
5299	*/
5300	Datum
5301	text_concat_ws(PG_FUNCTION_ARGS)
5302	{
5303	char *sep;
5304	text *result;
5305
5306	/ return NULL when separator is NULL /
5307	if (PG_ARGISNULL(`0`))
5308	PG_RETURN_NULL();
5309	sep = text_to_cstring(PG_GETARG_TEXT_PP(`0`));
5310
5311	result = concat_internal(sep, `1`, fcinfo);
5312	if (result == NULL)
5313	PG_RETURN_NULL();
5314	PG_RETURN_TEXT_P(result);
5315	}
5316
5317	/*
5318	* Return first n characters in the string. When n is negative,
5319	* return all but last \|n\| characters.
5320	*/
5321	Datum
5322	text_left(PG_FUNCTION_ARGS)
5323	{
5324	int n = PG_GETARG_INT32(`1`);
5325
5326	if (n < `0`)
5327	{
5328	text *str = PG_GETARG_TEXT_PP(`0`);
5329	const char *p = VARDATA_ANY(str);
5330	int len = VARSIZE_ANY_EXHDR(str);
5331	int rlen;
5332
5333	n = pg_mbstrlen_with_len(p, len) + n;
5334	rlen = pg_mbcharcliplen(p, len, n);
5335	PG_RETURN_TEXT_P(cstring_to_text_with_len(p, rlen));
5336	}
5337	else
5338	PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(`0`), `1`, n, false));
5339	}
5340
5341	/*
5342	* Return last n characters in the string. When n is negative,
5343	* return all but first \|n\| characters.
5344	*/
5345	Datum
5346	text_right(PG_FUNCTION_ARGS)
5347	{
5348	text *str = PG_GETARG_TEXT_PP(`0`);
5349	const char *p = VARDATA_ANY(str);
5350	int len = VARSIZE_ANY_EXHDR(str);
5351	int n = PG_GETARG_INT32(`1`);
5352	int off;
5353
5354	if (n < `0`)
5355	n = -n;
5356	else
5357	n = pg_mbstrlen_with_len(p, len) - n;
5358	off = pg_mbcharcliplen(p, len, n);
5359
5360	PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
5361	}
5362
5363	/*
5364	* Return reversed string
5365	*/
5366	Datum
5367	text_reverse(PG_FUNCTION_ARGS)
5368	{
5369	text *str = PG_GETARG_TEXT_PP(`0`);
5370	const char *p = VARDATA_ANY(str);
5371	int len = VARSIZE_ANY_EXHDR(str);
5372	const char *endp = p + len;
5373	text *result;
5374	char *dst;
5375
5376	result = palloc(len + VARHDRSZ);
5377	dst = (char *) VARDATA(result) + len;
5378	SET_VARSIZE(result, len + VARHDRSZ);
5379
5380	if (pg_database_encoding_max_length() > `1`)
5381	{
5382	/ multibyte version /
5383	while (p < endp)
5384	{
5385	int sz;
5386
5387	sz = pg_mblen(p);
5388	dst -= sz;
5389	memcpy(dst, p, sz);
5390	p += sz;
5391	}
5392	}
5393	else
5394	{
5395	/ single byte version /
5396	while (p < endp)
5397	(--dst) = p++;
5398	}
5399
5400	PG_RETURN_TEXT_P(result);
5401	}
5402
5403
5404	/*
5405	* Support macros for text_format()
5406	*/
5407	#define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */
5408
5409	#define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
5410	do { \
5411	if (++(ptr) >= (end_ptr)) \
5412	ereport(ERROR, \
5413	(errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
5414	errmsg("unterminated format() type specifier"), \
5415	errhint("For a single \"%%\" use \"%%%%\"."))); \
5416	} while (0)
5417
5418	/*
5419	* Returns a formatted string
5420	*/
5421	Datum
5422	text_format(PG_FUNCTION_ARGS)
5423	{
5424	text *fmt;
5425	StringInfoData str;
5426	const char *cp;
5427	const char *start_ptr;
5428	const char *end_ptr;
5429	text *result;
5430	int arg;
5431	bool funcvariadic;
5432	int nargs;
5433	Datum *elements = NULL;
5434	bool *nulls = NULL;
5435	Oid element_type = InvalidOid;
5436	Oid prev_type = InvalidOid;
5437	Oid prev_width_type = InvalidOid;
5438	FmgrInfo typoutputfinfo;
5439	FmgrInfo typoutputinfo_width;
5440
5441	/ When format string is null, immediately return null /
5442	if (PG_ARGISNULL(`0`))
5443	PG_RETURN_NULL();
5444
5445	/ If argument is marked VARIADIC, expand array into elements /
5446	if (get_fn_expr_variadic(fcinfo->flinfo))
5447	{
5448	ArrayType *arr;
5449	int16 elmlen;
5450	bool elmbyval;
5451	char elmalign;
5452	int nitems;
5453
5454	/ Should have just the one argument /
5455	Assert(PG_NARGS() == `2`);
5456
5457	/ If argument is NULL, we treat it as zero-length array /
5458	if (PG_ARGISNULL(`1`))
5459	nitems = `0`;
5460	else
5461	{
5462	/*
5463	* Non-null argument had better be an array. We assume that any
5464	* call context that could let get_fn_expr_variadic return true
5465	* will have checked that a VARIADIC-labeled parameter actually is
5466	* an array. So it should be okay to just Assert that it's an
5467	* array rather than doing a full-fledged error check.
5468	*/
5469	Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, `1`))));
5470
5471	/ OK, safe to fetch the array value /
5472	arr = PG_GETARG_ARRAYTYPE_P(`1`);
5473
5474	/ Get info about array element type /
5475	element_type = ARR_ELEMTYPE(arr);
5476	get_typlenbyvalalign(element_type,
5477	&elmlen, &elmbyval, &elmalign);
5478
5479	/ Extract all array elements /
5480	deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
5481	&elements, &nulls, &nitems);
5482	}
5483
5484	nargs = nitems + `1`;
5485	funcvariadic = true;
5486	}
5487	else
5488	{
5489	/ Non-variadic case, we'll process the arguments individually /
5490	nargs = PG_NARGS();
5491	funcvariadic = false;
5492	}
5493
5494	/ Setup for main loop. /
5495	fmt = PG_GETARG_TEXT_PP(`0`);
5496	start_ptr = VARDATA_ANY(fmt);
5497	end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
5498	initStringInfo(&str);
5499	arg = `1`; / next argument position to print /
5500
5501	/ Scan format string, looking for conversion specifiers. /
5502	for (cp = start_ptr; cp < end_ptr; cp++)
5503	{
5504	int argpos;
5505	int widthpos;
5506	int flags;
5507	int width;
5508	Datum value;
5509	bool isNull;
5510	Oid typid;
5511
5512	/*
5513	* If it's not the start of a conversion specifier, just copy it to
5514	* the output buffer.
5515	*/
5516	if (*cp != `'%'`)
5517	{
5518	appendStringInfoCharMacro(&str, *cp);
5519	continue;
5520	}
5521
5522	ADVANCE_PARSE_POINTER(cp, end_ptr);
5523
5524	/ Easy case: %% outputs a single % /
5525	if (*cp == `'%'`)
5526	{
5527	appendStringInfoCharMacro(&str, *cp);
5528	continue;
5529	}
5530
5531	/ Parse the optional portions of the format specifier /
5532	cp = text_format_parse_format(cp, end_ptr,
5533	&argpos, &widthpos,
5534	&flags, &width);
5535
5536	/*
5537	* Next we should see the main conversion specifier. Whether or not
5538	* an argument position was present, it's known that at least one
5539	* character remains in the string at this point. Experience suggests
5540	* that it's worth checking that that character is one of the expected
5541	* ones before we try to fetch arguments, so as to produce the least
5542	* confusing response to a mis-formatted specifier.
5543	*/
5544	if (strchr("sIL", *cp) == NULL)
5545	ereport(ERROR,
5546	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5547	errmsg("unrecognized format() type specifier \"%c\"",
5548	*cp),
5549	errhint("For a single \"%%\" use \"%%%%\".")));
5550
5551	/ If indirect width was specified, get its value /
5552	if (widthpos >= `0`)
5553	{
5554	/ Collect the specified or next argument position /
5555	if (widthpos > `0`)
5556	arg = widthpos;
5557	if (arg >= nargs)
5558	ereport(ERROR,
5559	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5560	errmsg("too few arguments for format()")));
5561
5562	/ Get the value and type of the selected argument /
5563	if (!funcvariadic)
5564	{
5565	value = PG_GETARG_DATUM(arg);
5566	isNull = PG_ARGISNULL(arg);
5567	typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5568	}
5569	else
5570	{
5571	value = elements[arg - `1`];
5572	isNull = nulls[arg - `1`];
5573	typid = element_type;
5574	}
5575	if (!OidIsValid(typid))
5576	elog(ERROR, "could not determine data type of format() input");
5577
5578	arg++;
5579
5580	/ We can treat NULL width the same as zero /
5581	if (isNull)
5582	width = `0`;
5583	else if (typid == INT4OID)
5584	width = DatumGetInt32(value);
5585	else if (typid == INT2OID)
5586	width = DatumGetInt16(value);
5587	else
5588	{
5589	/ For less-usual datatypes, convert to text then to int /
5590	char *str;
5591
5592	if (typid != prev_width_type)
5593	{
5594	Oid typoutputfunc;
5595	bool typIsVarlena;
5596
5597	getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5598	fmgr_info(typoutputfunc, &typoutputinfo_width);
5599	prev_width_type = typid;
5600	}
5601
5602	str = OutputFunctionCall(&typoutputinfo_width, value);
5603
5604	/ pg_strtoint32 will complain about bad data or overflow /
5605	width = pg_strtoint32(str);
5606
5607	pfree(str);
5608	}
5609	}
5610
5611	/ Collect the specified or next argument position /
5612	if (argpos > `0`)
5613	arg = argpos;
5614	if (arg >= nargs)
5615	ereport(ERROR,
5616	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5617	errmsg("too few arguments for format()")));
5618
5619	/ Get the value and type of the selected argument /
5620	if (!funcvariadic)
5621	{
5622	value = PG_GETARG_DATUM(arg);
5623	isNull = PG_ARGISNULL(arg);
5624	typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
5625	}
5626	else
5627	{
5628	value = elements[arg - `1`];
5629	isNull = nulls[arg - `1`];
5630	typid = element_type;
5631	}
5632	if (!OidIsValid(typid))
5633	elog(ERROR, "could not determine data type of format() input");
5634
5635	arg++;
5636
5637	/*
5638	* Get the appropriate typOutput function, reusing previous one if
5639	* same type as previous argument. That's particularly useful in the
5640	* variadic-array case, but often saves work even for ordinary calls.
5641	*/
5642	if (typid != prev_type)
5643	{
5644	Oid typoutputfunc;
5645	bool typIsVarlena;
5646
5647	getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
5648	fmgr_info(typoutputfunc, &typoutputfinfo);
5649	prev_type = typid;
5650	}
5651
5652	/*
5653	* And now we can format the value.
5654	*/
5655	switch (*cp)
5656	{
5657	case `'s'`:
5658	case `'I'`:
5659	case `'L'`:
5660	text_format_string_conversion(&str, *cp, &typoutputfinfo,
5661	value, isNull,
5662	flags, width);
5663	break;
5664	default:
5665	/ should not get here, because of previous check /
5666	ereport(ERROR,
5667	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5668	errmsg("unrecognized format() type specifier \"%c\"",
5669	*cp),
5670	errhint("For a single \"%%\" use \"%%%%\".")));
5671	break;
5672	}
5673	}
5674
5675	/ Don't need deconstruct_array results anymore. /
5676	if (elements != NULL)
5677	pfree(elements);
5678	if (nulls != NULL)
5679	pfree(nulls);
5680
5681	/ Generate results. /
5682	result = cstring_to_text_with_len(str.data, str.len);
5683	pfree(str.data);
5684
5685	PG_RETURN_TEXT_P(result);
5686	}
5687
5688	/*
5689	* Parse contiguous digits as a decimal number.
5690	*
5691	* Returns true if some digits could be parsed.
5692	* The value is returned into value, and ptr is advanced to the next
5693	* character to be parsed.
5694	*
5695	* Note parsing invariant: at least one character is known available before
5696	* string end (end_ptr) at entry, and this is still true at exit.
5697	*/
5698	static bool
5699	text_format_parse_digits(const char *ptr, const* char end_ptr, int* *value)
5700	{
5701	bool found = false;
5702	const char cp = ptr;
5703	int val = `0`;
5704
5705	while (cp >= `'0'` && cp <= `'9'`)
5706	{
5707	int8 digit = (*cp - `'0'`);
5708
5709	if (unlikely(pg_mul_s32_overflow(val, `10`, &val)) \|\|
5710	unlikely(pg_add_s32_overflow(val, digit, &val)))
5711	ereport(ERROR,
5712	(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5713	errmsg("number is out of range")));
5714	ADVANCE_PARSE_POINTER(cp, end_ptr);
5715	found = true;
5716	}
5717
5718	*ptr = cp;
5719	*value = val;
5720
5721	return found;
5722	}
5723
5724	/*
5725	* Parse a format specifier (generally following the SUS printf spec).
5726	*
5727	* We have already advanced over the initial '%', and we are looking for
5728	* [argpos][flags][width]type (but the type character is not consumed here).
5729	*
5730	* Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
5731	* Output parameters:
5732	* argpos: argument position for value to be printed. -1 means unspecified.
5733	* widthpos: argument position for width. Zero means the argument position
5734	* was unspecified (ie, take the next arg) and -1 means no width
5735	* argument (width was omitted or specified as a constant).
5736	* flags: bitmask of flags.
5737	* width: directly-specified width value. Zero means the width was omitted
5738	* (note it's not necessary to distinguish this case from an explicit
5739	* zero width value).
5740	*
5741	* The function result is the next character position to be parsed, ie, the
5742	* location where the type character is/should be.
5743	*
5744	* Note parsing invariant: at least one character is known available before
5745	* string end (end_ptr) at entry, and this is still true at exit.
5746	*/
5747	static const char *
5748	text_format_parse_format(const char start_ptr, const* char *end_ptr,
5749	int argpos, int* *widthpos,
5750	int flags, int* *width)
5751	{
5752	const char *cp = start_ptr;
5753	int n;
5754
5755	/ set defaults for output parameters /
5756	*argpos = -`1`;
5757	*widthpos = -`1`;
5758	*flags = `0`;
5759	*width = `0`;
5760
5761	/ try to identify first number /
5762	if (text_format_parse_digits(&cp, end_ptr, &n))
5763	{
5764	if (*cp != `'$'`)
5765	{
5766	/ Must be just a width and a type, so we're done /
5767	*width = n;
5768	return cp;
5769	}
5770	/ The number was argument position /
5771	*argpos = n;
5772	/ Explicit 0 for argument index is immediately refused /
5773	if (n == `0`)
5774	ereport(ERROR,
5775	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5776	errmsg("format specifies argument 0, but arguments are numbered from 1")));
5777	ADVANCE_PARSE_POINTER(cp, end_ptr);
5778	}
5779
5780	/ Handle flags (only minus is supported now) /
5781	while (*cp == `'-'`)
5782	{
5783	*flags \|= TEXT_FORMAT_FLAG_MINUS;
5784	ADVANCE_PARSE_POINTER(cp, end_ptr);
5785	}
5786
5787	if (cp == `''`)
5788	{
5789	/ Handle indirect width /
5790	ADVANCE_PARSE_POINTER(cp, end_ptr);
5791	if (text_format_parse_digits(&cp, end_ptr, &n))
5792	{
5793	/ number in this position must be closed by $ /
5794	if (*cp != `'$'`)
5795	ereport(ERROR,
5796	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5797	errmsg("width argument position must be ended by \"$\"")));
5798	/ The number was width argument position /
5799	*widthpos = n;
5800	/ Explicit 0 for argument index is immediately refused /
5801	if (n == `0`)
5802	ereport(ERROR,
5803	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
5804	errmsg("format specifies argument 0, but arguments are numbered from 1")));
5805	ADVANCE_PARSE_POINTER(cp, end_ptr);
5806	}
5807	else
5808	widthpos = `0`; /* width's argument position is unspecified /
5809	}
5810	else
5811	{
5812	/ Check for direct width specification /
5813	if (text_format_parse_digits(&cp, end_ptr, &n))
5814	*width = n;
5815	}
5816
5817	/ cp should now be pointing at type character /
5818	return cp;
5819	}
5820
5821	/*
5822	* Format a %s, %I, or %L conversion
5823	*/
5824	static void
5825	text_format_string_conversion(StringInfo buf, char conversion,
5826	FmgrInfo *typOutputInfo,
5827	Datum value, bool isNull,
5828	int flags, int width)
5829	{
5830	char *str;
5831
5832	/ Handle NULL arguments before trying to stringify the value. /
5833	if (isNull)
5834	{
5835	if (conversion == `'s'`)
5836	text_format_append_string(buf, "", flags, width);
5837	else if (conversion == `'L'`)
5838	text_format_append_string(buf, "NULL", flags, width);
5839	else if (conversion == `'I'`)
5840	ereport(ERROR,
5841	(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
5842	errmsg("null values cannot be formatted as an SQL identifier")));
5843	return;
5844	}
5845
5846	/ Stringify. /
5847	str = OutputFunctionCall(typOutputInfo, value);
5848
5849	/ Escape. /
5850	if (conversion == `'I'`)
5851	{
5852	/ quote_identifier may or may not allocate a new string. /
5853	text_format_append_string(buf, quote_identifier(str), flags, width);
5854	}
5855	else if (conversion == `'L'`)
5856	{
5857	char *qstr = quote_literal_cstr(str);
5858
5859	text_format_append_string(buf, qstr, flags, width);
5860	/ quote_literal_cstr() always allocates a new string /
5861	pfree(qstr);
5862	}
5863	else
5864	text_format_append_string(buf, str, flags, width);
5865
5866	/ Cleanup. /
5867	pfree(str);
5868	}
5869
5870	/*
5871	* Append str to buf, padding as directed by flags/width
5872	*/
5873	static void
5874	text_format_append_string(StringInfo buf, const char *str,
5875	int flags, int width)
5876	{
5877	bool align_to_left = false;
5878	int len;
5879
5880	/ fast path for typical easy case /
5881	if (width == `0`)
5882	{
5883	appendStringInfoString(buf, str);
5884	return;
5885	}
5886
5887	if (width < `0`)
5888	{
5889	/ Negative width: implicit '-' flag, then take absolute value /
5890	align_to_left = true;
5891	/ -INT_MIN is undefined /
5892	if (width <= INT_MIN)
5893	ereport(ERROR,
5894	(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
5895	errmsg("number is out of range")));
5896	width = -width;
5897	}
5898	else if (flags & TEXT_FORMAT_FLAG_MINUS)
5899	align_to_left = true;
5900
5901	len = pg_mbstrlen(str);
5902	if (align_to_left)
5903	{
5904	/ left justify /
5905	appendStringInfoString(buf, str);
5906	if (len < width)
5907	appendStringInfoSpaces(buf, width - len);
5908	}
5909	else
5910	{
5911	/ right justify /
5912	if (len < width)
5913	appendStringInfoSpaces(buf, width - len);
5914	appendStringInfoString(buf, str);
5915	}
5916	}
5917
5918	/*
5919	* text_format_nv - nonvariadic wrapper for text_format function.
5920	*
5921	* note: this wrapper is necessary to pass the sanity check in opr_sanity,
5922	* which checks that all built-in functions that share the implementing C
5923	* function take the same number of arguments.
5924	*/
5925	Datum
5926	text_format_nv(PG_FUNCTION_ARGS)
5927	{
5928	return text_format(fcinfo);
5929	}
5930
5931	/*
5932	* Helper function for Levenshtein distance functions. Faster than memcmp(),
5933	* for this use case.
5934	*/
5935	static inline bool
5936	rest_of_char_same(const char s1, const* char s2, int* len)
5937	{
5938	while (len > `0`)
5939	{
5940	len--;
5941	if (s1[len] != s2[len])
5942	return false;
5943	}
5944	return true;
5945	}
5946
5947	/ Expand each Levenshtein distance variant /
5948	#include "levenshtein.c"
5949	#define LEVENSHTEIN_LESS_EQUAL
5950	#include "levenshtein.c"
5951

Browse the source code of PostgreSQL/src/backend/utils/adt/varlena.c