1 | /*------------------------------------------------------------------------- |
2 | * oracle_compat.c |
3 | * Oracle compatible functions. |
4 | * |
5 | * Copyright (c) 1996-2019, PostgreSQL Global Development Group |
6 | * |
7 | * Author: Edmund Mergl <E.Mergl@bawue.de> |
8 | * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org> |
9 | * |
10 | * |
11 | * IDENTIFICATION |
12 | * src/backend/utils/adt/oracle_compat.c |
13 | * |
14 | *------------------------------------------------------------------------- |
15 | */ |
16 | #include "postgres.h" |
17 | |
18 | #include "common/int.h" |
19 | #include "utils/builtins.h" |
20 | #include "utils/formatting.h" |
21 | #include "mb/pg_wchar.h" |
22 | |
23 | |
24 | static text *dotrim(const char *string, int stringlen, |
25 | const char *set, int setlen, |
26 | bool doltrim, bool dortrim); |
27 | |
28 | |
29 | /******************************************************************** |
30 | * |
31 | * lower |
32 | * |
33 | * Syntax: |
34 | * |
35 | * text lower(text string) |
36 | * |
37 | * Purpose: |
38 | * |
39 | * Returns string, with all letters forced to lowercase. |
40 | * |
41 | ********************************************************************/ |
42 | |
43 | Datum |
44 | lower(PG_FUNCTION_ARGS) |
45 | { |
46 | text *in_string = PG_GETARG_TEXT_PP(0); |
47 | char *out_string; |
48 | text *result; |
49 | |
50 | out_string = str_tolower(VARDATA_ANY(in_string), |
51 | VARSIZE_ANY_EXHDR(in_string), |
52 | PG_GET_COLLATION()); |
53 | result = cstring_to_text(out_string); |
54 | pfree(out_string); |
55 | |
56 | PG_RETURN_TEXT_P(result); |
57 | } |
58 | |
59 | |
60 | /******************************************************************** |
61 | * |
62 | * upper |
63 | * |
64 | * Syntax: |
65 | * |
66 | * text upper(text string) |
67 | * |
68 | * Purpose: |
69 | * |
70 | * Returns string, with all letters forced to uppercase. |
71 | * |
72 | ********************************************************************/ |
73 | |
74 | Datum |
75 | upper(PG_FUNCTION_ARGS) |
76 | { |
77 | text *in_string = PG_GETARG_TEXT_PP(0); |
78 | char *out_string; |
79 | text *result; |
80 | |
81 | out_string = str_toupper(VARDATA_ANY(in_string), |
82 | VARSIZE_ANY_EXHDR(in_string), |
83 | PG_GET_COLLATION()); |
84 | result = cstring_to_text(out_string); |
85 | pfree(out_string); |
86 | |
87 | PG_RETURN_TEXT_P(result); |
88 | } |
89 | |
90 | |
91 | /******************************************************************** |
92 | * |
93 | * initcap |
94 | * |
95 | * Syntax: |
96 | * |
97 | * text initcap(text string) |
98 | * |
99 | * Purpose: |
100 | * |
101 | * Returns string, with first letter of each word in uppercase, all |
102 | * other letters in lowercase. A word is defined as a sequence of |
103 | * alphanumeric characters, delimited by non-alphanumeric |
104 | * characters. |
105 | * |
106 | ********************************************************************/ |
107 | |
108 | Datum |
109 | initcap(PG_FUNCTION_ARGS) |
110 | { |
111 | text *in_string = PG_GETARG_TEXT_PP(0); |
112 | char *out_string; |
113 | text *result; |
114 | |
115 | out_string = str_initcap(VARDATA_ANY(in_string), |
116 | VARSIZE_ANY_EXHDR(in_string), |
117 | PG_GET_COLLATION()); |
118 | result = cstring_to_text(out_string); |
119 | pfree(out_string); |
120 | |
121 | PG_RETURN_TEXT_P(result); |
122 | } |
123 | |
124 | |
125 | /******************************************************************** |
126 | * |
127 | * lpad |
128 | * |
129 | * Syntax: |
130 | * |
131 | * text lpad(text string1, int4 len, text string2) |
132 | * |
133 | * Purpose: |
134 | * |
135 | * Returns string1, left-padded to length len with the sequence of |
136 | * characters in string2. If len is less than the length of string1, |
137 | * instead truncate (on the right) to len. |
138 | * |
139 | ********************************************************************/ |
140 | |
141 | Datum |
142 | lpad(PG_FUNCTION_ARGS) |
143 | { |
144 | text *string1 = PG_GETARG_TEXT_PP(0); |
145 | int32 len = PG_GETARG_INT32(1); |
146 | text *string2 = PG_GETARG_TEXT_PP(2); |
147 | text *ret; |
148 | char *ptr1, |
149 | *ptr2, |
150 | *ptr2start, |
151 | *ptr2end, |
152 | *ptr_ret; |
153 | int m, |
154 | s1len, |
155 | s2len; |
156 | |
157 | int bytelen; |
158 | |
159 | /* Negative len is silently taken as zero */ |
160 | if (len < 0) |
161 | len = 0; |
162 | |
163 | s1len = VARSIZE_ANY_EXHDR(string1); |
164 | if (s1len < 0) |
165 | s1len = 0; /* shouldn't happen */ |
166 | |
167 | s2len = VARSIZE_ANY_EXHDR(string2); |
168 | if (s2len < 0) |
169 | s2len = 0; /* shouldn't happen */ |
170 | |
171 | s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len); |
172 | |
173 | if (s1len > len) |
174 | s1len = len; /* truncate string1 to len chars */ |
175 | |
176 | if (s2len <= 0) |
177 | len = s1len; /* nothing to pad with, so don't pad */ |
178 | |
179 | bytelen = pg_database_encoding_max_length() * len; |
180 | |
181 | /* check for integer overflow */ |
182 | if (len != 0 && bytelen / pg_database_encoding_max_length() != len) |
183 | ereport(ERROR, |
184 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
185 | errmsg("requested length too large" ))); |
186 | |
187 | ret = (text *) palloc(VARHDRSZ + bytelen); |
188 | |
189 | m = len - s1len; |
190 | |
191 | ptr2 = ptr2start = VARDATA_ANY(string2); |
192 | ptr2end = ptr2 + s2len; |
193 | ptr_ret = VARDATA(ret); |
194 | |
195 | while (m--) |
196 | { |
197 | int mlen = pg_mblen(ptr2); |
198 | |
199 | memcpy(ptr_ret, ptr2, mlen); |
200 | ptr_ret += mlen; |
201 | ptr2 += mlen; |
202 | if (ptr2 == ptr2end) /* wrap around at end of s2 */ |
203 | ptr2 = ptr2start; |
204 | } |
205 | |
206 | ptr1 = VARDATA_ANY(string1); |
207 | |
208 | while (s1len--) |
209 | { |
210 | int mlen = pg_mblen(ptr1); |
211 | |
212 | memcpy(ptr_ret, ptr1, mlen); |
213 | ptr_ret += mlen; |
214 | ptr1 += mlen; |
215 | } |
216 | |
217 | SET_VARSIZE(ret, ptr_ret - (char *) ret); |
218 | |
219 | PG_RETURN_TEXT_P(ret); |
220 | } |
221 | |
222 | |
223 | /******************************************************************** |
224 | * |
225 | * rpad |
226 | * |
227 | * Syntax: |
228 | * |
229 | * text rpad(text string1, int4 len, text string2) |
230 | * |
231 | * Purpose: |
232 | * |
233 | * Returns string1, right-padded to length len with the sequence of |
234 | * characters in string2. If len is less than the length of string1, |
235 | * instead truncate (on the right) to len. |
236 | * |
237 | ********************************************************************/ |
238 | |
239 | Datum |
240 | rpad(PG_FUNCTION_ARGS) |
241 | { |
242 | text *string1 = PG_GETARG_TEXT_PP(0); |
243 | int32 len = PG_GETARG_INT32(1); |
244 | text *string2 = PG_GETARG_TEXT_PP(2); |
245 | text *ret; |
246 | char *ptr1, |
247 | *ptr2, |
248 | *ptr2start, |
249 | *ptr2end, |
250 | *ptr_ret; |
251 | int m, |
252 | s1len, |
253 | s2len; |
254 | |
255 | int bytelen; |
256 | |
257 | /* Negative len is silently taken as zero */ |
258 | if (len < 0) |
259 | len = 0; |
260 | |
261 | s1len = VARSIZE_ANY_EXHDR(string1); |
262 | if (s1len < 0) |
263 | s1len = 0; /* shouldn't happen */ |
264 | |
265 | s2len = VARSIZE_ANY_EXHDR(string2); |
266 | if (s2len < 0) |
267 | s2len = 0; /* shouldn't happen */ |
268 | |
269 | s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len); |
270 | |
271 | if (s1len > len) |
272 | s1len = len; /* truncate string1 to len chars */ |
273 | |
274 | if (s2len <= 0) |
275 | len = s1len; /* nothing to pad with, so don't pad */ |
276 | |
277 | bytelen = pg_database_encoding_max_length() * len; |
278 | |
279 | /* Check for integer overflow */ |
280 | if (len != 0 && bytelen / pg_database_encoding_max_length() != len) |
281 | ereport(ERROR, |
282 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
283 | errmsg("requested length too large" ))); |
284 | |
285 | ret = (text *) palloc(VARHDRSZ + bytelen); |
286 | m = len - s1len; |
287 | |
288 | ptr1 = VARDATA_ANY(string1); |
289 | ptr_ret = VARDATA(ret); |
290 | |
291 | while (s1len--) |
292 | { |
293 | int mlen = pg_mblen(ptr1); |
294 | |
295 | memcpy(ptr_ret, ptr1, mlen); |
296 | ptr_ret += mlen; |
297 | ptr1 += mlen; |
298 | } |
299 | |
300 | ptr2 = ptr2start = VARDATA_ANY(string2); |
301 | ptr2end = ptr2 + s2len; |
302 | |
303 | while (m--) |
304 | { |
305 | int mlen = pg_mblen(ptr2); |
306 | |
307 | memcpy(ptr_ret, ptr2, mlen); |
308 | ptr_ret += mlen; |
309 | ptr2 += mlen; |
310 | if (ptr2 == ptr2end) /* wrap around at end of s2 */ |
311 | ptr2 = ptr2start; |
312 | } |
313 | |
314 | SET_VARSIZE(ret, ptr_ret - (char *) ret); |
315 | |
316 | PG_RETURN_TEXT_P(ret); |
317 | } |
318 | |
319 | |
320 | /******************************************************************** |
321 | * |
322 | * btrim |
323 | * |
324 | * Syntax: |
325 | * |
326 | * text btrim(text string, text set) |
327 | * |
328 | * Purpose: |
329 | * |
330 | * Returns string with characters removed from the front and back |
331 | * up to the first character not in set. |
332 | * |
333 | ********************************************************************/ |
334 | |
335 | Datum |
336 | btrim(PG_FUNCTION_ARGS) |
337 | { |
338 | text *string = PG_GETARG_TEXT_PP(0); |
339 | text *set = PG_GETARG_TEXT_PP(1); |
340 | text *ret; |
341 | |
342 | ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), |
343 | VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set), |
344 | true, true); |
345 | |
346 | PG_RETURN_TEXT_P(ret); |
347 | } |
348 | |
349 | /******************************************************************** |
350 | * |
351 | * btrim1 --- btrim with set fixed as ' ' |
352 | * |
353 | ********************************************************************/ |
354 | |
355 | Datum |
356 | btrim1(PG_FUNCTION_ARGS) |
357 | { |
358 | text *string = PG_GETARG_TEXT_PP(0); |
359 | text *ret; |
360 | |
361 | ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), |
362 | " " , 1, |
363 | true, true); |
364 | |
365 | PG_RETURN_TEXT_P(ret); |
366 | } |
367 | |
368 | /* |
369 | * Common implementation for btrim, ltrim, rtrim |
370 | */ |
371 | static text * |
372 | dotrim(const char *string, int stringlen, |
373 | const char *set, int setlen, |
374 | bool doltrim, bool dortrim) |
375 | { |
376 | int i; |
377 | |
378 | /* Nothing to do if either string or set is empty */ |
379 | if (stringlen > 0 && setlen > 0) |
380 | { |
381 | if (pg_database_encoding_max_length() > 1) |
382 | { |
383 | /* |
384 | * In the multibyte-encoding case, build arrays of pointers to |
385 | * character starts, so that we can avoid inefficient checks in |
386 | * the inner loops. |
387 | */ |
388 | const char **stringchars; |
389 | const char **setchars; |
390 | int *stringmblen; |
391 | int *setmblen; |
392 | int stringnchars; |
393 | int setnchars; |
394 | int resultndx; |
395 | int resultnchars; |
396 | const char *p; |
397 | int len; |
398 | int mblen; |
399 | const char *str_pos; |
400 | int str_len; |
401 | |
402 | stringchars = (const char **) palloc(stringlen * sizeof(char *)); |
403 | stringmblen = (int *) palloc(stringlen * sizeof(int)); |
404 | stringnchars = 0; |
405 | p = string; |
406 | len = stringlen; |
407 | while (len > 0) |
408 | { |
409 | stringchars[stringnchars] = p; |
410 | stringmblen[stringnchars] = mblen = pg_mblen(p); |
411 | stringnchars++; |
412 | p += mblen; |
413 | len -= mblen; |
414 | } |
415 | |
416 | setchars = (const char **) palloc(setlen * sizeof(char *)); |
417 | setmblen = (int *) palloc(setlen * sizeof(int)); |
418 | setnchars = 0; |
419 | p = set; |
420 | len = setlen; |
421 | while (len > 0) |
422 | { |
423 | setchars[setnchars] = p; |
424 | setmblen[setnchars] = mblen = pg_mblen(p); |
425 | setnchars++; |
426 | p += mblen; |
427 | len -= mblen; |
428 | } |
429 | |
430 | resultndx = 0; /* index in stringchars[] */ |
431 | resultnchars = stringnchars; |
432 | |
433 | if (doltrim) |
434 | { |
435 | while (resultnchars > 0) |
436 | { |
437 | str_pos = stringchars[resultndx]; |
438 | str_len = stringmblen[resultndx]; |
439 | for (i = 0; i < setnchars; i++) |
440 | { |
441 | if (str_len == setmblen[i] && |
442 | memcmp(str_pos, setchars[i], str_len) == 0) |
443 | break; |
444 | } |
445 | if (i >= setnchars) |
446 | break; /* no match here */ |
447 | string += str_len; |
448 | stringlen -= str_len; |
449 | resultndx++; |
450 | resultnchars--; |
451 | } |
452 | } |
453 | |
454 | if (dortrim) |
455 | { |
456 | while (resultnchars > 0) |
457 | { |
458 | str_pos = stringchars[resultndx + resultnchars - 1]; |
459 | str_len = stringmblen[resultndx + resultnchars - 1]; |
460 | for (i = 0; i < setnchars; i++) |
461 | { |
462 | if (str_len == setmblen[i] && |
463 | memcmp(str_pos, setchars[i], str_len) == 0) |
464 | break; |
465 | } |
466 | if (i >= setnchars) |
467 | break; /* no match here */ |
468 | stringlen -= str_len; |
469 | resultnchars--; |
470 | } |
471 | } |
472 | |
473 | pfree(stringchars); |
474 | pfree(stringmblen); |
475 | pfree(setchars); |
476 | pfree(setmblen); |
477 | } |
478 | else |
479 | { |
480 | /* |
481 | * In the single-byte-encoding case, we don't need such overhead. |
482 | */ |
483 | if (doltrim) |
484 | { |
485 | while (stringlen > 0) |
486 | { |
487 | char str_ch = *string; |
488 | |
489 | for (i = 0; i < setlen; i++) |
490 | { |
491 | if (str_ch == set[i]) |
492 | break; |
493 | } |
494 | if (i >= setlen) |
495 | break; /* no match here */ |
496 | string++; |
497 | stringlen--; |
498 | } |
499 | } |
500 | |
501 | if (dortrim) |
502 | { |
503 | while (stringlen > 0) |
504 | { |
505 | char str_ch = string[stringlen - 1]; |
506 | |
507 | for (i = 0; i < setlen; i++) |
508 | { |
509 | if (str_ch == set[i]) |
510 | break; |
511 | } |
512 | if (i >= setlen) |
513 | break; /* no match here */ |
514 | stringlen--; |
515 | } |
516 | } |
517 | } |
518 | } |
519 | |
520 | /* Return selected portion of string */ |
521 | return cstring_to_text_with_len(string, stringlen); |
522 | } |
523 | |
524 | /******************************************************************** |
525 | * |
526 | * byteatrim |
527 | * |
528 | * Syntax: |
529 | * |
530 | * bytea byteatrim(bytea string, bytea set) |
531 | * |
532 | * Purpose: |
533 | * |
534 | * Returns string with characters removed from the front and back |
535 | * up to the first character not in set. |
536 | * |
537 | * Cloned from btrim and modified as required. |
538 | ********************************************************************/ |
539 | |
540 | Datum |
541 | byteatrim(PG_FUNCTION_ARGS) |
542 | { |
543 | bytea *string = PG_GETARG_BYTEA_PP(0); |
544 | bytea *set = PG_GETARG_BYTEA_PP(1); |
545 | bytea *ret; |
546 | char *ptr, |
547 | *end, |
548 | *ptr2, |
549 | *ptr2start, |
550 | *end2; |
551 | int m, |
552 | stringlen, |
553 | setlen; |
554 | |
555 | stringlen = VARSIZE_ANY_EXHDR(string); |
556 | setlen = VARSIZE_ANY_EXHDR(set); |
557 | |
558 | if (stringlen <= 0 || setlen <= 0) |
559 | PG_RETURN_BYTEA_P(string); |
560 | |
561 | m = stringlen; |
562 | ptr = VARDATA_ANY(string); |
563 | end = ptr + stringlen - 1; |
564 | ptr2start = VARDATA_ANY(set); |
565 | end2 = ptr2start + setlen - 1; |
566 | |
567 | while (m > 0) |
568 | { |
569 | ptr2 = ptr2start; |
570 | while (ptr2 <= end2) |
571 | { |
572 | if (*ptr == *ptr2) |
573 | break; |
574 | ++ptr2; |
575 | } |
576 | if (ptr2 > end2) |
577 | break; |
578 | ptr++; |
579 | m--; |
580 | } |
581 | |
582 | while (m > 0) |
583 | { |
584 | ptr2 = ptr2start; |
585 | while (ptr2 <= end2) |
586 | { |
587 | if (*end == *ptr2) |
588 | break; |
589 | ++ptr2; |
590 | } |
591 | if (ptr2 > end2) |
592 | break; |
593 | end--; |
594 | m--; |
595 | } |
596 | |
597 | ret = (bytea *) palloc(VARHDRSZ + m); |
598 | SET_VARSIZE(ret, VARHDRSZ + m); |
599 | memcpy(VARDATA(ret), ptr, m); |
600 | |
601 | PG_RETURN_BYTEA_P(ret); |
602 | } |
603 | |
604 | /******************************************************************** |
605 | * |
606 | * ltrim |
607 | * |
608 | * Syntax: |
609 | * |
610 | * text ltrim(text string, text set) |
611 | * |
612 | * Purpose: |
613 | * |
614 | * Returns string with initial characters removed up to the first |
615 | * character not in set. |
616 | * |
617 | ********************************************************************/ |
618 | |
619 | Datum |
620 | ltrim(PG_FUNCTION_ARGS) |
621 | { |
622 | text *string = PG_GETARG_TEXT_PP(0); |
623 | text *set = PG_GETARG_TEXT_PP(1); |
624 | text *ret; |
625 | |
626 | ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), |
627 | VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set), |
628 | true, false); |
629 | |
630 | PG_RETURN_TEXT_P(ret); |
631 | } |
632 | |
633 | /******************************************************************** |
634 | * |
635 | * ltrim1 --- ltrim with set fixed as ' ' |
636 | * |
637 | ********************************************************************/ |
638 | |
639 | Datum |
640 | ltrim1(PG_FUNCTION_ARGS) |
641 | { |
642 | text *string = PG_GETARG_TEXT_PP(0); |
643 | text *ret; |
644 | |
645 | ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), |
646 | " " , 1, |
647 | true, false); |
648 | |
649 | PG_RETURN_TEXT_P(ret); |
650 | } |
651 | |
652 | /******************************************************************** |
653 | * |
654 | * rtrim |
655 | * |
656 | * Syntax: |
657 | * |
658 | * text rtrim(text string, text set) |
659 | * |
660 | * Purpose: |
661 | * |
662 | * Returns string with final characters removed after the last |
663 | * character not in set. |
664 | * |
665 | ********************************************************************/ |
666 | |
667 | Datum |
668 | rtrim(PG_FUNCTION_ARGS) |
669 | { |
670 | text *string = PG_GETARG_TEXT_PP(0); |
671 | text *set = PG_GETARG_TEXT_PP(1); |
672 | text *ret; |
673 | |
674 | ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), |
675 | VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set), |
676 | false, true); |
677 | |
678 | PG_RETURN_TEXT_P(ret); |
679 | } |
680 | |
681 | /******************************************************************** |
682 | * |
683 | * rtrim1 --- rtrim with set fixed as ' ' |
684 | * |
685 | ********************************************************************/ |
686 | |
687 | Datum |
688 | rtrim1(PG_FUNCTION_ARGS) |
689 | { |
690 | text *string = PG_GETARG_TEXT_PP(0); |
691 | text *ret; |
692 | |
693 | ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), |
694 | " " , 1, |
695 | false, true); |
696 | |
697 | PG_RETURN_TEXT_P(ret); |
698 | } |
699 | |
700 | |
701 | /******************************************************************** |
702 | * |
703 | * translate |
704 | * |
705 | * Syntax: |
706 | * |
707 | * text translate(text string, text from, text to) |
708 | * |
709 | * Purpose: |
710 | * |
711 | * Returns string after replacing all occurrences of characters in from |
712 | * with the corresponding character in to. If from is longer than to, |
713 | * occurrences of the extra characters in from are deleted. |
714 | * Improved by Edwin Ramirez <ramirez@doc.mssm.edu>. |
715 | * |
716 | ********************************************************************/ |
717 | |
718 | Datum |
719 | translate(PG_FUNCTION_ARGS) |
720 | { |
721 | text *string = PG_GETARG_TEXT_PP(0); |
722 | text *from = PG_GETARG_TEXT_PP(1); |
723 | text *to = PG_GETARG_TEXT_PP(2); |
724 | text *result; |
725 | char *from_ptr, |
726 | *to_ptr; |
727 | char *source, |
728 | *target; |
729 | int m, |
730 | fromlen, |
731 | tolen, |
732 | retlen, |
733 | i; |
734 | int worst_len; |
735 | int len; |
736 | int source_len; |
737 | int from_index; |
738 | |
739 | m = VARSIZE_ANY_EXHDR(string); |
740 | if (m <= 0) |
741 | PG_RETURN_TEXT_P(string); |
742 | source = VARDATA_ANY(string); |
743 | |
744 | fromlen = VARSIZE_ANY_EXHDR(from); |
745 | from_ptr = VARDATA_ANY(from); |
746 | tolen = VARSIZE_ANY_EXHDR(to); |
747 | to_ptr = VARDATA_ANY(to); |
748 | |
749 | /* |
750 | * The worst-case expansion is to substitute a max-length character for a |
751 | * single-byte character at each position of the string. |
752 | */ |
753 | worst_len = pg_database_encoding_max_length() * m; |
754 | |
755 | /* check for integer overflow */ |
756 | if (worst_len / pg_database_encoding_max_length() != m) |
757 | ereport(ERROR, |
758 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
759 | errmsg("requested length too large" ))); |
760 | |
761 | result = (text *) palloc(worst_len + VARHDRSZ); |
762 | target = VARDATA(result); |
763 | retlen = 0; |
764 | |
765 | while (m > 0) |
766 | { |
767 | source_len = pg_mblen(source); |
768 | from_index = 0; |
769 | |
770 | for (i = 0; i < fromlen; i += len) |
771 | { |
772 | len = pg_mblen(&from_ptr[i]); |
773 | if (len == source_len && |
774 | memcmp(source, &from_ptr[i], len) == 0) |
775 | break; |
776 | |
777 | from_index++; |
778 | } |
779 | if (i < fromlen) |
780 | { |
781 | /* substitute */ |
782 | char *p = to_ptr; |
783 | |
784 | for (i = 0; i < from_index; i++) |
785 | { |
786 | p += pg_mblen(p); |
787 | if (p >= (to_ptr + tolen)) |
788 | break; |
789 | } |
790 | if (p < (to_ptr + tolen)) |
791 | { |
792 | len = pg_mblen(p); |
793 | memcpy(target, p, len); |
794 | target += len; |
795 | retlen += len; |
796 | } |
797 | |
798 | } |
799 | else |
800 | { |
801 | /* no match, so copy */ |
802 | memcpy(target, source, source_len); |
803 | target += source_len; |
804 | retlen += source_len; |
805 | } |
806 | |
807 | source += source_len; |
808 | m -= source_len; |
809 | } |
810 | |
811 | SET_VARSIZE(result, retlen + VARHDRSZ); |
812 | |
813 | /* |
814 | * The function result is probably much bigger than needed, if we're using |
815 | * a multibyte encoding, but it's not worth reallocating it; the result |
816 | * probably won't live long anyway. |
817 | */ |
818 | |
819 | PG_RETURN_TEXT_P(result); |
820 | } |
821 | |
822 | /******************************************************************** |
823 | * |
824 | * ascii |
825 | * |
826 | * Syntax: |
827 | * |
828 | * int ascii(text string) |
829 | * |
830 | * Purpose: |
831 | * |
832 | * Returns the decimal representation of the first character from |
833 | * string. |
834 | * If the string is empty we return 0. |
835 | * If the database encoding is UTF8, we return the Unicode codepoint. |
836 | * If the database encoding is any other multi-byte encoding, we |
837 | * return the value of the first byte if it is an ASCII character |
838 | * (range 1 .. 127), or raise an error. |
839 | * For all other encodings we return the value of the first byte, |
840 | * (range 1..255). |
841 | * |
842 | ********************************************************************/ |
843 | |
844 | Datum |
845 | ascii(PG_FUNCTION_ARGS) |
846 | { |
847 | text *string = PG_GETARG_TEXT_PP(0); |
848 | int encoding = GetDatabaseEncoding(); |
849 | unsigned char *data; |
850 | |
851 | if (VARSIZE_ANY_EXHDR(string) <= 0) |
852 | PG_RETURN_INT32(0); |
853 | |
854 | data = (unsigned char *) VARDATA_ANY(string); |
855 | |
856 | if (encoding == PG_UTF8 && *data > 127) |
857 | { |
858 | /* return the code point for Unicode */ |
859 | |
860 | int result = 0, |
861 | tbytes = 0, |
862 | i; |
863 | |
864 | if (*data >= 0xF0) |
865 | { |
866 | result = *data & 0x07; |
867 | tbytes = 3; |
868 | } |
869 | else if (*data >= 0xE0) |
870 | { |
871 | result = *data & 0x0F; |
872 | tbytes = 2; |
873 | } |
874 | else |
875 | { |
876 | Assert(*data > 0xC0); |
877 | result = *data & 0x1f; |
878 | tbytes = 1; |
879 | } |
880 | |
881 | Assert(tbytes > 0); |
882 | |
883 | for (i = 1; i <= tbytes; i++) |
884 | { |
885 | Assert((data[i] & 0xC0) == 0x80); |
886 | result = (result << 6) + (data[i] & 0x3f); |
887 | } |
888 | |
889 | PG_RETURN_INT32(result); |
890 | } |
891 | else |
892 | { |
893 | if (pg_encoding_max_length(encoding) > 1 && *data > 127) |
894 | ereport(ERROR, |
895 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
896 | errmsg("requested character too large" ))); |
897 | |
898 | |
899 | PG_RETURN_INT32((int32) *data); |
900 | } |
901 | } |
902 | |
903 | /******************************************************************** |
904 | * |
905 | * chr |
906 | * |
907 | * Syntax: |
908 | * |
909 | * text chr(int val) |
910 | * |
911 | * Purpose: |
912 | * |
913 | * Returns the character having the binary equivalent to val. |
914 | * |
915 | * For UTF8 we treat the argument as a Unicode code point. |
916 | * For other multi-byte encodings we raise an error for arguments |
917 | * outside the strict ASCII range (1..127). |
918 | * |
919 | * It's important that we don't ever return a value that is not valid |
920 | * in the database encoding, so that this doesn't become a way for |
921 | * invalid data to enter the database. |
922 | * |
923 | ********************************************************************/ |
924 | |
925 | Datum |
926 | chr (PG_FUNCTION_ARGS) |
927 | { |
928 | uint32 cvalue = PG_GETARG_UINT32(0); |
929 | text *result; |
930 | int encoding = GetDatabaseEncoding(); |
931 | |
932 | if (encoding == PG_UTF8 && cvalue > 127) |
933 | { |
934 | /* for Unicode we treat the argument as a code point */ |
935 | int bytes; |
936 | unsigned char *wch; |
937 | |
938 | /* |
939 | * We only allow valid Unicode code points; per RFC3629 that stops at |
940 | * U+10FFFF, even though 4-byte UTF8 sequences can hold values up to |
941 | * U+1FFFFF. |
942 | */ |
943 | if (cvalue > 0x0010ffff) |
944 | ereport(ERROR, |
945 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
946 | errmsg("requested character too large for encoding: %d" , |
947 | cvalue))); |
948 | |
949 | if (cvalue > 0xffff) |
950 | bytes = 4; |
951 | else if (cvalue > 0x07ff) |
952 | bytes = 3; |
953 | else |
954 | bytes = 2; |
955 | |
956 | result = (text *) palloc(VARHDRSZ + bytes); |
957 | SET_VARSIZE(result, VARHDRSZ + bytes); |
958 | wch = (unsigned char *) VARDATA(result); |
959 | |
960 | if (bytes == 2) |
961 | { |
962 | wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F); |
963 | wch[1] = 0x80 | (cvalue & 0x3F); |
964 | } |
965 | else if (bytes == 3) |
966 | { |
967 | wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F); |
968 | wch[1] = 0x80 | ((cvalue >> 6) & 0x3F); |
969 | wch[2] = 0x80 | (cvalue & 0x3F); |
970 | } |
971 | else |
972 | { |
973 | wch[0] = 0xF0 | ((cvalue >> 18) & 0x07); |
974 | wch[1] = 0x80 | ((cvalue >> 12) & 0x3F); |
975 | wch[2] = 0x80 | ((cvalue >> 6) & 0x3F); |
976 | wch[3] = 0x80 | (cvalue & 0x3F); |
977 | } |
978 | |
979 | /* |
980 | * The preceding range check isn't sufficient, because UTF8 excludes |
981 | * Unicode "surrogate pair" codes. Make sure what we created is valid |
982 | * UTF8. |
983 | */ |
984 | if (!pg_utf8_islegal(wch, bytes)) |
985 | ereport(ERROR, |
986 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
987 | errmsg("requested character not valid for encoding: %d" , |
988 | cvalue))); |
989 | } |
990 | else |
991 | { |
992 | bool is_mb; |
993 | |
994 | /* |
995 | * Error out on arguments that make no sense or that we can't validly |
996 | * represent in the encoding. |
997 | */ |
998 | if (cvalue == 0) |
999 | ereport(ERROR, |
1000 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
1001 | errmsg("null character not permitted" ))); |
1002 | |
1003 | is_mb = pg_encoding_max_length(encoding) > 1; |
1004 | |
1005 | if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255))) |
1006 | ereport(ERROR, |
1007 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
1008 | errmsg("requested character too large for encoding: %d" , |
1009 | cvalue))); |
1010 | |
1011 | result = (text *) palloc(VARHDRSZ + 1); |
1012 | SET_VARSIZE(result, VARHDRSZ + 1); |
1013 | *VARDATA(result) = (char) cvalue; |
1014 | } |
1015 | |
1016 | PG_RETURN_TEXT_P(result); |
1017 | } |
1018 | |
1019 | /******************************************************************** |
1020 | * |
1021 | * repeat |
1022 | * |
1023 | * Syntax: |
1024 | * |
1025 | * text repeat(text string, int val) |
1026 | * |
1027 | * Purpose: |
1028 | * |
1029 | * Repeat string by val. |
1030 | * |
1031 | ********************************************************************/ |
1032 | |
1033 | Datum |
1034 | repeat(PG_FUNCTION_ARGS) |
1035 | { |
1036 | text *string = PG_GETARG_TEXT_PP(0); |
1037 | int32 count = PG_GETARG_INT32(1); |
1038 | text *result; |
1039 | int slen, |
1040 | tlen; |
1041 | int i; |
1042 | char *cp, |
1043 | *sp; |
1044 | |
1045 | if (count < 0) |
1046 | count = 0; |
1047 | |
1048 | slen = VARSIZE_ANY_EXHDR(string); |
1049 | |
1050 | if (unlikely(pg_mul_s32_overflow(count, slen, &tlen)) || |
1051 | unlikely(pg_add_s32_overflow(tlen, VARHDRSZ, &tlen))) |
1052 | ereport(ERROR, |
1053 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
1054 | errmsg("requested length too large" ))); |
1055 | |
1056 | result = (text *) palloc(tlen); |
1057 | |
1058 | SET_VARSIZE(result, tlen); |
1059 | cp = VARDATA(result); |
1060 | sp = VARDATA_ANY(string); |
1061 | for (i = 0; i < count; i++) |
1062 | { |
1063 | memcpy(cp, sp, slen); |
1064 | cp += slen; |
1065 | } |
1066 | |
1067 | PG_RETURN_TEXT_P(result); |
1068 | } |
1069 | |