1 | /************************************************* |
2 | * Perl-Compatible Regular Expressions * |
3 | *************************************************/ |
4 | |
5 | /* PCRE is a library of functions to support regular expressions whose syntax |
6 | and semantics are as close as possible to those of the Perl 5 language. |
7 | |
8 | Written by Philip Hazel |
9 | Original API code Copyright (c) 1997-2012 University of Cambridge |
10 | New API code Copyright (c) 2016 University of Cambridge |
11 | |
12 | ----------------------------------------------------------------------------- |
13 | Redistribution and use in source and binary forms, with or without |
14 | modification, are permitted provided that the following conditions are met: |
15 | |
16 | * Redistributions of source code must retain the above copyright notice, |
17 | this list of conditions and the following disclaimer. |
18 | |
19 | * Redistributions in binary form must reproduce the above copyright |
20 | notice, this list of conditions and the following disclaimer in the |
21 | documentation and/or other materials provided with the distribution. |
22 | |
23 | * Neither the name of the University of Cambridge nor the names of its |
24 | contributors may be used to endorse or promote products derived from |
25 | this software without specific prior written permission. |
26 | |
27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
37 | POSSIBILITY OF SUCH DAMAGE. |
38 | ----------------------------------------------------------------------------- |
39 | */ |
40 | |
41 | |
42 | /* This file contains a function that converts a Unicode character code point |
43 | into a UTF string. The behaviour is different for each code unit width. */ |
44 | |
45 | |
46 | #ifdef HAVE_CONFIG_H |
47 | #include "config.h" |
48 | #endif |
49 | |
50 | #include "pcre2_internal.h" |
51 | |
52 | |
53 | /* If SUPPORT_UNICODE is not defined, this function will never be called. |
54 | Supply a dummy function because some compilers do not like empty source |
55 | modules. */ |
56 | |
57 | #ifndef SUPPORT_UNICODE |
58 | unsigned int |
59 | PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer) |
60 | { |
61 | (void)(cvalue); |
62 | (void)(buffer); |
63 | return 0; |
64 | } |
65 | #else /* SUPPORT_UNICODE */ |
66 | |
67 | |
68 | /************************************************* |
69 | * Convert code point to UTF * |
70 | *************************************************/ |
71 | |
72 | /* |
73 | Arguments: |
74 | cvalue the character value |
75 | buffer pointer to buffer for result |
76 | |
77 | Returns: number of code units placed in the buffer |
78 | */ |
79 | |
80 | unsigned int |
81 | PRIV(ord2utf)(uint32_t cvalue, PCRE2_UCHAR *buffer) |
82 | { |
83 | /* Convert to UTF-8 */ |
84 | |
85 | #if PCRE2_CODE_UNIT_WIDTH == 8 |
86 | int i, j; |
87 | for (i = 0; i < PRIV(utf8_table1_size); i++) |
88 | if ((int)cvalue <= PRIV(utf8_table1)[i]) break; |
89 | buffer += i; |
90 | for (j = i; j > 0; j--) |
91 | { |
92 | *buffer-- = 0x80 | (cvalue & 0x3f); |
93 | cvalue >>= 6; |
94 | } |
95 | *buffer = PRIV(utf8_table2)[i] | cvalue; |
96 | return i + 1; |
97 | |
98 | /* Convert to UTF-16 */ |
99 | |
100 | #elif PCRE2_CODE_UNIT_WIDTH == 16 |
101 | if (cvalue <= 0xffff) |
102 | { |
103 | *buffer = (PCRE2_UCHAR)cvalue; |
104 | return 1; |
105 | } |
106 | cvalue -= 0x10000; |
107 | *buffer++ = 0xd800 | (cvalue >> 10); |
108 | *buffer = 0xdc00 | (cvalue & 0x3ff); |
109 | return 2; |
110 | |
111 | /* Convert to UTF-32 */ |
112 | |
113 | #else |
114 | *buffer = (PCRE2_UCHAR)cvalue; |
115 | return 1; |
116 | #endif |
117 | } |
118 | #endif /* SUPPORT_UNICODE */ |
119 | |
120 | /* End of pcre_ord2utf.c */ |
121 | |