1/*-------------------------------------------------------------------------
2 * oracle_compat.c
3 * Oracle compatible functions.
4 *
5 * Copyright (c) 1996-2019, PostgreSQL Global Development Group
6 *
7 * Author: Edmund Mergl <E.Mergl@bawue.de>
8 * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
9 *
10 *
11 * IDENTIFICATION
12 * src/backend/utils/adt/oracle_compat.c
13 *
14 *-------------------------------------------------------------------------
15 */
16#include "postgres.h"
17
18#include "common/int.h"
19#include "utils/builtins.h"
20#include "utils/formatting.h"
21#include "mb/pg_wchar.h"
22
23
24static text *dotrim(const char *string, int stringlen,
25 const char *set, int setlen,
26 bool doltrim, bool dortrim);
27
28
29/********************************************************************
30 *
31 * lower
32 *
33 * Syntax:
34 *
35 * text lower(text string)
36 *
37 * Purpose:
38 *
39 * Returns string, with all letters forced to lowercase.
40 *
41 ********************************************************************/
42
43Datum
44lower(PG_FUNCTION_ARGS)
45{
46 text *in_string = PG_GETARG_TEXT_PP(0);
47 char *out_string;
48 text *result;
49
50 out_string = str_tolower(VARDATA_ANY(in_string),
51 VARSIZE_ANY_EXHDR(in_string),
52 PG_GET_COLLATION());
53 result = cstring_to_text(out_string);
54 pfree(out_string);
55
56 PG_RETURN_TEXT_P(result);
57}
58
59
60/********************************************************************
61 *
62 * upper
63 *
64 * Syntax:
65 *
66 * text upper(text string)
67 *
68 * Purpose:
69 *
70 * Returns string, with all letters forced to uppercase.
71 *
72 ********************************************************************/
73
74Datum
75upper(PG_FUNCTION_ARGS)
76{
77 text *in_string = PG_GETARG_TEXT_PP(0);
78 char *out_string;
79 text *result;
80
81 out_string = str_toupper(VARDATA_ANY(in_string),
82 VARSIZE_ANY_EXHDR(in_string),
83 PG_GET_COLLATION());
84 result = cstring_to_text(out_string);
85 pfree(out_string);
86
87 PG_RETURN_TEXT_P(result);
88}
89
90
91/********************************************************************
92 *
93 * initcap
94 *
95 * Syntax:
96 *
97 * text initcap(text string)
98 *
99 * Purpose:
100 *
101 * Returns string, with first letter of each word in uppercase, all
102 * other letters in lowercase. A word is defined as a sequence of
103 * alphanumeric characters, delimited by non-alphanumeric
104 * characters.
105 *
106 ********************************************************************/
107
108Datum
109initcap(PG_FUNCTION_ARGS)
110{
111 text *in_string = PG_GETARG_TEXT_PP(0);
112 char *out_string;
113 text *result;
114
115 out_string = str_initcap(VARDATA_ANY(in_string),
116 VARSIZE_ANY_EXHDR(in_string),
117 PG_GET_COLLATION());
118 result = cstring_to_text(out_string);
119 pfree(out_string);
120
121 PG_RETURN_TEXT_P(result);
122}
123
124
125/********************************************************************
126 *
127 * lpad
128 *
129 * Syntax:
130 *
131 * text lpad(text string1, int4 len, text string2)
132 *
133 * Purpose:
134 *
135 * Returns string1, left-padded to length len with the sequence of
136 * characters in string2. If len is less than the length of string1,
137 * instead truncate (on the right) to len.
138 *
139 ********************************************************************/
140
141Datum
142lpad(PG_FUNCTION_ARGS)
143{
144 text *string1 = PG_GETARG_TEXT_PP(0);
145 int32 len = PG_GETARG_INT32(1);
146 text *string2 = PG_GETARG_TEXT_PP(2);
147 text *ret;
148 char *ptr1,
149 *ptr2,
150 *ptr2start,
151 *ptr2end,
152 *ptr_ret;
153 int m,
154 s1len,
155 s2len;
156
157 int bytelen;
158
159 /* Negative len is silently taken as zero */
160 if (len < 0)
161 len = 0;
162
163 s1len = VARSIZE_ANY_EXHDR(string1);
164 if (s1len < 0)
165 s1len = 0; /* shouldn't happen */
166
167 s2len = VARSIZE_ANY_EXHDR(string2);
168 if (s2len < 0)
169 s2len = 0; /* shouldn't happen */
170
171 s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
172
173 if (s1len > len)
174 s1len = len; /* truncate string1 to len chars */
175
176 if (s2len <= 0)
177 len = s1len; /* nothing to pad with, so don't pad */
178
179 bytelen = pg_database_encoding_max_length() * len;
180
181 /* check for integer overflow */
182 if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
183 ereport(ERROR,
184 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
185 errmsg("requested length too large")));
186
187 ret = (text *) palloc(VARHDRSZ + bytelen);
188
189 m = len - s1len;
190
191 ptr2 = ptr2start = VARDATA_ANY(string2);
192 ptr2end = ptr2 + s2len;
193 ptr_ret = VARDATA(ret);
194
195 while (m--)
196 {
197 int mlen = pg_mblen(ptr2);
198
199 memcpy(ptr_ret, ptr2, mlen);
200 ptr_ret += mlen;
201 ptr2 += mlen;
202 if (ptr2 == ptr2end) /* wrap around at end of s2 */
203 ptr2 = ptr2start;
204 }
205
206 ptr1 = VARDATA_ANY(string1);
207
208 while (s1len--)
209 {
210 int mlen = pg_mblen(ptr1);
211
212 memcpy(ptr_ret, ptr1, mlen);
213 ptr_ret += mlen;
214 ptr1 += mlen;
215 }
216
217 SET_VARSIZE(ret, ptr_ret - (char *) ret);
218
219 PG_RETURN_TEXT_P(ret);
220}
221
222
223/********************************************************************
224 *
225 * rpad
226 *
227 * Syntax:
228 *
229 * text rpad(text string1, int4 len, text string2)
230 *
231 * Purpose:
232 *
233 * Returns string1, right-padded to length len with the sequence of
234 * characters in string2. If len is less than the length of string1,
235 * instead truncate (on the right) to len.
236 *
237 ********************************************************************/
238
239Datum
240rpad(PG_FUNCTION_ARGS)
241{
242 text *string1 = PG_GETARG_TEXT_PP(0);
243 int32 len = PG_GETARG_INT32(1);
244 text *string2 = PG_GETARG_TEXT_PP(2);
245 text *ret;
246 char *ptr1,
247 *ptr2,
248 *ptr2start,
249 *ptr2end,
250 *ptr_ret;
251 int m,
252 s1len,
253 s2len;
254
255 int bytelen;
256
257 /* Negative len is silently taken as zero */
258 if (len < 0)
259 len = 0;
260
261 s1len = VARSIZE_ANY_EXHDR(string1);
262 if (s1len < 0)
263 s1len = 0; /* shouldn't happen */
264
265 s2len = VARSIZE_ANY_EXHDR(string2);
266 if (s2len < 0)
267 s2len = 0; /* shouldn't happen */
268
269 s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
270
271 if (s1len > len)
272 s1len = len; /* truncate string1 to len chars */
273
274 if (s2len <= 0)
275 len = s1len; /* nothing to pad with, so don't pad */
276
277 bytelen = pg_database_encoding_max_length() * len;
278
279 /* Check for integer overflow */
280 if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
281 ereport(ERROR,
282 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
283 errmsg("requested length too large")));
284
285 ret = (text *) palloc(VARHDRSZ + bytelen);
286 m = len - s1len;
287
288 ptr1 = VARDATA_ANY(string1);
289 ptr_ret = VARDATA(ret);
290
291 while (s1len--)
292 {
293 int mlen = pg_mblen(ptr1);
294
295 memcpy(ptr_ret, ptr1, mlen);
296 ptr_ret += mlen;
297 ptr1 += mlen;
298 }
299
300 ptr2 = ptr2start = VARDATA_ANY(string2);
301 ptr2end = ptr2 + s2len;
302
303 while (m--)
304 {
305 int mlen = pg_mblen(ptr2);
306
307 memcpy(ptr_ret, ptr2, mlen);
308 ptr_ret += mlen;
309 ptr2 += mlen;
310 if (ptr2 == ptr2end) /* wrap around at end of s2 */
311 ptr2 = ptr2start;
312 }
313
314 SET_VARSIZE(ret, ptr_ret - (char *) ret);
315
316 PG_RETURN_TEXT_P(ret);
317}
318
319
320/********************************************************************
321 *
322 * btrim
323 *
324 * Syntax:
325 *
326 * text btrim(text string, text set)
327 *
328 * Purpose:
329 *
330 * Returns string with characters removed from the front and back
331 * up to the first character not in set.
332 *
333 ********************************************************************/
334
335Datum
336btrim(PG_FUNCTION_ARGS)
337{
338 text *string = PG_GETARG_TEXT_PP(0);
339 text *set = PG_GETARG_TEXT_PP(1);
340 text *ret;
341
342 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
343 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
344 true, true);
345
346 PG_RETURN_TEXT_P(ret);
347}
348
349/********************************************************************
350 *
351 * btrim1 --- btrim with set fixed as ' '
352 *
353 ********************************************************************/
354
355Datum
356btrim1(PG_FUNCTION_ARGS)
357{
358 text *string = PG_GETARG_TEXT_PP(0);
359 text *ret;
360
361 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
362 " ", 1,
363 true, true);
364
365 PG_RETURN_TEXT_P(ret);
366}
367
368/*
369 * Common implementation for btrim, ltrim, rtrim
370 */
371static text *
372dotrim(const char *string, int stringlen,
373 const char *set, int setlen,
374 bool doltrim, bool dortrim)
375{
376 int i;
377
378 /* Nothing to do if either string or set is empty */
379 if (stringlen > 0 && setlen > 0)
380 {
381 if (pg_database_encoding_max_length() > 1)
382 {
383 /*
384 * In the multibyte-encoding case, build arrays of pointers to
385 * character starts, so that we can avoid inefficient checks in
386 * the inner loops.
387 */
388 const char **stringchars;
389 const char **setchars;
390 int *stringmblen;
391 int *setmblen;
392 int stringnchars;
393 int setnchars;
394 int resultndx;
395 int resultnchars;
396 const char *p;
397 int len;
398 int mblen;
399 const char *str_pos;
400 int str_len;
401
402 stringchars = (const char **) palloc(stringlen * sizeof(char *));
403 stringmblen = (int *) palloc(stringlen * sizeof(int));
404 stringnchars = 0;
405 p = string;
406 len = stringlen;
407 while (len > 0)
408 {
409 stringchars[stringnchars] = p;
410 stringmblen[stringnchars] = mblen = pg_mblen(p);
411 stringnchars++;
412 p += mblen;
413 len -= mblen;
414 }
415
416 setchars = (const char **) palloc(setlen * sizeof(char *));
417 setmblen = (int *) palloc(setlen * sizeof(int));
418 setnchars = 0;
419 p = set;
420 len = setlen;
421 while (len > 0)
422 {
423 setchars[setnchars] = p;
424 setmblen[setnchars] = mblen = pg_mblen(p);
425 setnchars++;
426 p += mblen;
427 len -= mblen;
428 }
429
430 resultndx = 0; /* index in stringchars[] */
431 resultnchars = stringnchars;
432
433 if (doltrim)
434 {
435 while (resultnchars > 0)
436 {
437 str_pos = stringchars[resultndx];
438 str_len = stringmblen[resultndx];
439 for (i = 0; i < setnchars; i++)
440 {
441 if (str_len == setmblen[i] &&
442 memcmp(str_pos, setchars[i], str_len) == 0)
443 break;
444 }
445 if (i >= setnchars)
446 break; /* no match here */
447 string += str_len;
448 stringlen -= str_len;
449 resultndx++;
450 resultnchars--;
451 }
452 }
453
454 if (dortrim)
455 {
456 while (resultnchars > 0)
457 {
458 str_pos = stringchars[resultndx + resultnchars - 1];
459 str_len = stringmblen[resultndx + resultnchars - 1];
460 for (i = 0; i < setnchars; i++)
461 {
462 if (str_len == setmblen[i] &&
463 memcmp(str_pos, setchars[i], str_len) == 0)
464 break;
465 }
466 if (i >= setnchars)
467 break; /* no match here */
468 stringlen -= str_len;
469 resultnchars--;
470 }
471 }
472
473 pfree(stringchars);
474 pfree(stringmblen);
475 pfree(setchars);
476 pfree(setmblen);
477 }
478 else
479 {
480 /*
481 * In the single-byte-encoding case, we don't need such overhead.
482 */
483 if (doltrim)
484 {
485 while (stringlen > 0)
486 {
487 char str_ch = *string;
488
489 for (i = 0; i < setlen; i++)
490 {
491 if (str_ch == set[i])
492 break;
493 }
494 if (i >= setlen)
495 break; /* no match here */
496 string++;
497 stringlen--;
498 }
499 }
500
501 if (dortrim)
502 {
503 while (stringlen > 0)
504 {
505 char str_ch = string[stringlen - 1];
506
507 for (i = 0; i < setlen; i++)
508 {
509 if (str_ch == set[i])
510 break;
511 }
512 if (i >= setlen)
513 break; /* no match here */
514 stringlen--;
515 }
516 }
517 }
518 }
519
520 /* Return selected portion of string */
521 return cstring_to_text_with_len(string, stringlen);
522}
523
524/********************************************************************
525 *
526 * byteatrim
527 *
528 * Syntax:
529 *
530 * bytea byteatrim(bytea string, bytea set)
531 *
532 * Purpose:
533 *
534 * Returns string with characters removed from the front and back
535 * up to the first character not in set.
536 *
537 * Cloned from btrim and modified as required.
538 ********************************************************************/
539
540Datum
541byteatrim(PG_FUNCTION_ARGS)
542{
543 bytea *string = PG_GETARG_BYTEA_PP(0);
544 bytea *set = PG_GETARG_BYTEA_PP(1);
545 bytea *ret;
546 char *ptr,
547 *end,
548 *ptr2,
549 *ptr2start,
550 *end2;
551 int m,
552 stringlen,
553 setlen;
554
555 stringlen = VARSIZE_ANY_EXHDR(string);
556 setlen = VARSIZE_ANY_EXHDR(set);
557
558 if (stringlen <= 0 || setlen <= 0)
559 PG_RETURN_BYTEA_P(string);
560
561 m = stringlen;
562 ptr = VARDATA_ANY(string);
563 end = ptr + stringlen - 1;
564 ptr2start = VARDATA_ANY(set);
565 end2 = ptr2start + setlen - 1;
566
567 while (m > 0)
568 {
569 ptr2 = ptr2start;
570 while (ptr2 <= end2)
571 {
572 if (*ptr == *ptr2)
573 break;
574 ++ptr2;
575 }
576 if (ptr2 > end2)
577 break;
578 ptr++;
579 m--;
580 }
581
582 while (m > 0)
583 {
584 ptr2 = ptr2start;
585 while (ptr2 <= end2)
586 {
587 if (*end == *ptr2)
588 break;
589 ++ptr2;
590 }
591 if (ptr2 > end2)
592 break;
593 end--;
594 m--;
595 }
596
597 ret = (bytea *) palloc(VARHDRSZ + m);
598 SET_VARSIZE(ret, VARHDRSZ + m);
599 memcpy(VARDATA(ret), ptr, m);
600
601 PG_RETURN_BYTEA_P(ret);
602}
603
604/********************************************************************
605 *
606 * ltrim
607 *
608 * Syntax:
609 *
610 * text ltrim(text string, text set)
611 *
612 * Purpose:
613 *
614 * Returns string with initial characters removed up to the first
615 * character not in set.
616 *
617 ********************************************************************/
618
619Datum
620ltrim(PG_FUNCTION_ARGS)
621{
622 text *string = PG_GETARG_TEXT_PP(0);
623 text *set = PG_GETARG_TEXT_PP(1);
624 text *ret;
625
626 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
627 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
628 true, false);
629
630 PG_RETURN_TEXT_P(ret);
631}
632
633/********************************************************************
634 *
635 * ltrim1 --- ltrim with set fixed as ' '
636 *
637 ********************************************************************/
638
639Datum
640ltrim1(PG_FUNCTION_ARGS)
641{
642 text *string = PG_GETARG_TEXT_PP(0);
643 text *ret;
644
645 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
646 " ", 1,
647 true, false);
648
649 PG_RETURN_TEXT_P(ret);
650}
651
652/********************************************************************
653 *
654 * rtrim
655 *
656 * Syntax:
657 *
658 * text rtrim(text string, text set)
659 *
660 * Purpose:
661 *
662 * Returns string with final characters removed after the last
663 * character not in set.
664 *
665 ********************************************************************/
666
667Datum
668rtrim(PG_FUNCTION_ARGS)
669{
670 text *string = PG_GETARG_TEXT_PP(0);
671 text *set = PG_GETARG_TEXT_PP(1);
672 text *ret;
673
674 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
675 VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
676 false, true);
677
678 PG_RETURN_TEXT_P(ret);
679}
680
681/********************************************************************
682 *
683 * rtrim1 --- rtrim with set fixed as ' '
684 *
685 ********************************************************************/
686
687Datum
688rtrim1(PG_FUNCTION_ARGS)
689{
690 text *string = PG_GETARG_TEXT_PP(0);
691 text *ret;
692
693 ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
694 " ", 1,
695 false, true);
696
697 PG_RETURN_TEXT_P(ret);
698}
699
700
701/********************************************************************
702 *
703 * translate
704 *
705 * Syntax:
706 *
707 * text translate(text string, text from, text to)
708 *
709 * Purpose:
710 *
711 * Returns string after replacing all occurrences of characters in from
712 * with the corresponding character in to. If from is longer than to,
713 * occurrences of the extra characters in from are deleted.
714 * Improved by Edwin Ramirez <ramirez@doc.mssm.edu>.
715 *
716 ********************************************************************/
717
718Datum
719translate(PG_FUNCTION_ARGS)
720{
721 text *string = PG_GETARG_TEXT_PP(0);
722 text *from = PG_GETARG_TEXT_PP(1);
723 text *to = PG_GETARG_TEXT_PP(2);
724 text *result;
725 char *from_ptr,
726 *to_ptr;
727 char *source,
728 *target;
729 int m,
730 fromlen,
731 tolen,
732 retlen,
733 i;
734 int worst_len;
735 int len;
736 int source_len;
737 int from_index;
738
739 m = VARSIZE_ANY_EXHDR(string);
740 if (m <= 0)
741 PG_RETURN_TEXT_P(string);
742 source = VARDATA_ANY(string);
743
744 fromlen = VARSIZE_ANY_EXHDR(from);
745 from_ptr = VARDATA_ANY(from);
746 tolen = VARSIZE_ANY_EXHDR(to);
747 to_ptr = VARDATA_ANY(to);
748
749 /*
750 * The worst-case expansion is to substitute a max-length character for a
751 * single-byte character at each position of the string.
752 */
753 worst_len = pg_database_encoding_max_length() * m;
754
755 /* check for integer overflow */
756 if (worst_len / pg_database_encoding_max_length() != m)
757 ereport(ERROR,
758 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
759 errmsg("requested length too large")));
760
761 result = (text *) palloc(worst_len + VARHDRSZ);
762 target = VARDATA(result);
763 retlen = 0;
764
765 while (m > 0)
766 {
767 source_len = pg_mblen(source);
768 from_index = 0;
769
770 for (i = 0; i < fromlen; i += len)
771 {
772 len = pg_mblen(&from_ptr[i]);
773 if (len == source_len &&
774 memcmp(source, &from_ptr[i], len) == 0)
775 break;
776
777 from_index++;
778 }
779 if (i < fromlen)
780 {
781 /* substitute */
782 char *p = to_ptr;
783
784 for (i = 0; i < from_index; i++)
785 {
786 p += pg_mblen(p);
787 if (p >= (to_ptr + tolen))
788 break;
789 }
790 if (p < (to_ptr + tolen))
791 {
792 len = pg_mblen(p);
793 memcpy(target, p, len);
794 target += len;
795 retlen += len;
796 }
797
798 }
799 else
800 {
801 /* no match, so copy */
802 memcpy(target, source, source_len);
803 target += source_len;
804 retlen += source_len;
805 }
806
807 source += source_len;
808 m -= source_len;
809 }
810
811 SET_VARSIZE(result, retlen + VARHDRSZ);
812
813 /*
814 * The function result is probably much bigger than needed, if we're using
815 * a multibyte encoding, but it's not worth reallocating it; the result
816 * probably won't live long anyway.
817 */
818
819 PG_RETURN_TEXT_P(result);
820}
821
822/********************************************************************
823 *
824 * ascii
825 *
826 * Syntax:
827 *
828 * int ascii(text string)
829 *
830 * Purpose:
831 *
832 * Returns the decimal representation of the first character from
833 * string.
834 * If the string is empty we return 0.
835 * If the database encoding is UTF8, we return the Unicode codepoint.
836 * If the database encoding is any other multi-byte encoding, we
837 * return the value of the first byte if it is an ASCII character
838 * (range 1 .. 127), or raise an error.
839 * For all other encodings we return the value of the first byte,
840 * (range 1..255).
841 *
842 ********************************************************************/
843
844Datum
845ascii(PG_FUNCTION_ARGS)
846{
847 text *string = PG_GETARG_TEXT_PP(0);
848 int encoding = GetDatabaseEncoding();
849 unsigned char *data;
850
851 if (VARSIZE_ANY_EXHDR(string) <= 0)
852 PG_RETURN_INT32(0);
853
854 data = (unsigned char *) VARDATA_ANY(string);
855
856 if (encoding == PG_UTF8 && *data > 127)
857 {
858 /* return the code point for Unicode */
859
860 int result = 0,
861 tbytes = 0,
862 i;
863
864 if (*data >= 0xF0)
865 {
866 result = *data & 0x07;
867 tbytes = 3;
868 }
869 else if (*data >= 0xE0)
870 {
871 result = *data & 0x0F;
872 tbytes = 2;
873 }
874 else
875 {
876 Assert(*data > 0xC0);
877 result = *data & 0x1f;
878 tbytes = 1;
879 }
880
881 Assert(tbytes > 0);
882
883 for (i = 1; i <= tbytes; i++)
884 {
885 Assert((data[i] & 0xC0) == 0x80);
886 result = (result << 6) + (data[i] & 0x3f);
887 }
888
889 PG_RETURN_INT32(result);
890 }
891 else
892 {
893 if (pg_encoding_max_length(encoding) > 1 && *data > 127)
894 ereport(ERROR,
895 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
896 errmsg("requested character too large")));
897
898
899 PG_RETURN_INT32((int32) *data);
900 }
901}
902
903/********************************************************************
904 *
905 * chr
906 *
907 * Syntax:
908 *
909 * text chr(int val)
910 *
911 * Purpose:
912 *
913 * Returns the character having the binary equivalent to val.
914 *
915 * For UTF8 we treat the argument as a Unicode code point.
916 * For other multi-byte encodings we raise an error for arguments
917 * outside the strict ASCII range (1..127).
918 *
919 * It's important that we don't ever return a value that is not valid
920 * in the database encoding, so that this doesn't become a way for
921 * invalid data to enter the database.
922 *
923 ********************************************************************/
924
925Datum
926chr (PG_FUNCTION_ARGS)
927{
928 uint32 cvalue = PG_GETARG_UINT32(0);
929 text *result;
930 int encoding = GetDatabaseEncoding();
931
932 if (encoding == PG_UTF8 && cvalue > 127)
933 {
934 /* for Unicode we treat the argument as a code point */
935 int bytes;
936 unsigned char *wch;
937
938 /*
939 * We only allow valid Unicode code points; per RFC3629 that stops at
940 * U+10FFFF, even though 4-byte UTF8 sequences can hold values up to
941 * U+1FFFFF.
942 */
943 if (cvalue > 0x0010ffff)
944 ereport(ERROR,
945 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
946 errmsg("requested character too large for encoding: %d",
947 cvalue)));
948
949 if (cvalue > 0xffff)
950 bytes = 4;
951 else if (cvalue > 0x07ff)
952 bytes = 3;
953 else
954 bytes = 2;
955
956 result = (text *) palloc(VARHDRSZ + bytes);
957 SET_VARSIZE(result, VARHDRSZ + bytes);
958 wch = (unsigned char *) VARDATA(result);
959
960 if (bytes == 2)
961 {
962 wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F);
963 wch[1] = 0x80 | (cvalue & 0x3F);
964 }
965 else if (bytes == 3)
966 {
967 wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F);
968 wch[1] = 0x80 | ((cvalue >> 6) & 0x3F);
969 wch[2] = 0x80 | (cvalue & 0x3F);
970 }
971 else
972 {
973 wch[0] = 0xF0 | ((cvalue >> 18) & 0x07);
974 wch[1] = 0x80 | ((cvalue >> 12) & 0x3F);
975 wch[2] = 0x80 | ((cvalue >> 6) & 0x3F);
976 wch[3] = 0x80 | (cvalue & 0x3F);
977 }
978
979 /*
980 * The preceding range check isn't sufficient, because UTF8 excludes
981 * Unicode "surrogate pair" codes. Make sure what we created is valid
982 * UTF8.
983 */
984 if (!pg_utf8_islegal(wch, bytes))
985 ereport(ERROR,
986 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
987 errmsg("requested character not valid for encoding: %d",
988 cvalue)));
989 }
990 else
991 {
992 bool is_mb;
993
994 /*
995 * Error out on arguments that make no sense or that we can't validly
996 * represent in the encoding.
997 */
998 if (cvalue == 0)
999 ereport(ERROR,
1000 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1001 errmsg("null character not permitted")));
1002
1003 is_mb = pg_encoding_max_length(encoding) > 1;
1004
1005 if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255)))
1006 ereport(ERROR,
1007 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1008 errmsg("requested character too large for encoding: %d",
1009 cvalue)));
1010
1011 result = (text *) palloc(VARHDRSZ + 1);
1012 SET_VARSIZE(result, VARHDRSZ + 1);
1013 *VARDATA(result) = (char) cvalue;
1014 }
1015
1016 PG_RETURN_TEXT_P(result);
1017}
1018
1019/********************************************************************
1020 *
1021 * repeat
1022 *
1023 * Syntax:
1024 *
1025 * text repeat(text string, int val)
1026 *
1027 * Purpose:
1028 *
1029 * Repeat string by val.
1030 *
1031 ********************************************************************/
1032
1033Datum
1034repeat(PG_FUNCTION_ARGS)
1035{
1036 text *string = PG_GETARG_TEXT_PP(0);
1037 int32 count = PG_GETARG_INT32(1);
1038 text *result;
1039 int slen,
1040 tlen;
1041 int i;
1042 char *cp,
1043 *sp;
1044
1045 if (count < 0)
1046 count = 0;
1047
1048 slen = VARSIZE_ANY_EXHDR(string);
1049
1050 if (unlikely(pg_mul_s32_overflow(count, slen, &tlen)) ||
1051 unlikely(pg_add_s32_overflow(tlen, VARHDRSZ, &tlen)))
1052 ereport(ERROR,
1053 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1054 errmsg("requested length too large")));
1055
1056 result = (text *) palloc(tlen);
1057
1058 SET_VARSIZE(result, tlen);
1059 cp = VARDATA(result);
1060 sp = VARDATA_ANY(string);
1061 for (i = 0; i < count; i++)
1062 {
1063 memcpy(cp, sp, slen);
1064 cp += slen;
1065 }
1066
1067 PG_RETURN_TEXT_P(result);
1068}
1069