1 | /* |
2 | * Copyright (c) 2004, 2018, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. Oracle designates this |
8 | * particular file as subject to the "Classpath" exception as provided |
9 | * by Oracle in the LICENSE file that accompanied this code. |
10 | * |
11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | * version 2 for more details (a copy is included in the LICENSE file that |
15 | * accompanied this code). |
16 | * |
17 | * You should have received a copy of the GNU General Public License version |
18 | * 2 along with this work; if not, write to the Free Software Foundation, |
19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
20 | * |
21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
22 | * or visit www.oracle.com if you need additional information or have any |
23 | * questions. |
24 | */ |
25 | #include <stdio.h> |
26 | #include <stddef.h> |
27 | #include <stdlib.h> |
28 | #include <string.h> |
29 | #include <ctype.h> |
30 | #include <locale.h> |
31 | #include <langinfo.h> |
32 | #include <iconv.h> |
33 | |
34 | /* Routines to convert back and forth between Platform Encoding and UTF-8 */ |
35 | |
36 | /* Use THIS_FILE when it is available. */ |
37 | #ifndef THIS_FILE |
38 | #define THIS_FILE __FILE__ |
39 | #endif |
40 | |
41 | /* Error and assert macros */ |
42 | #define UTF_ERROR(m) utfError(THIS_FILE, __LINE__, m) |
43 | #define UTF_ASSERT(x) ( (x)==0 ? UTF_ERROR("ASSERT ERROR " #x) : (void)0 ) |
44 | #define UTF_DEBUG(x) |
45 | |
46 | /* Global variables */ |
47 | static iconv_t iconvToPlatform = (iconv_t)-1; |
48 | static iconv_t iconvFromPlatform = (iconv_t)-1; |
49 | |
50 | /* |
51 | * Error handler |
52 | */ |
53 | static void |
54 | utfError(char *file, int line, char *message) |
55 | { |
56 | (void)fprintf(stderr, "UTF ERROR [\"%s\":%d]: %s\n" , file, line, message); |
57 | abort(); |
58 | } |
59 | |
60 | /* |
61 | * Initialize all utf processing. |
62 | */ |
63 | static void |
64 | utfInitialize(void) |
65 | { |
66 | const char* codeset; |
67 | |
68 | /* Set the locale from the environment */ |
69 | (void)setlocale(LC_ALL, "" ); |
70 | |
71 | /* Get the codeset name */ |
72 | codeset = (char*)nl_langinfo(CODESET); |
73 | if ( codeset == NULL || codeset[0] == 0 ) { |
74 | UTF_DEBUG(("NO codeset returned by nl_langinfo(CODESET)\n" )); |
75 | return; |
76 | } |
77 | |
78 | UTF_DEBUG(("Codeset = %s\n" , codeset)); |
79 | |
80 | #ifdef MACOSX |
81 | /* On Mac, if US-ASCII, but with no env hints, use UTF-8 */ |
82 | const char* env_lang = getenv("LANG" ); |
83 | const char* env_lc_all = getenv("LC_ALL" ); |
84 | const char* env_lc_ctype = getenv("LC_CTYPE" ); |
85 | |
86 | if (strcmp(codeset,"US-ASCII" ) == 0 && |
87 | (env_lang == NULL || strlen(env_lang) == 0) && |
88 | (env_lc_all == NULL || strlen(env_lc_all) == 0) && |
89 | (env_lc_ctype == NULL || strlen(env_lc_ctype) == 0)) { |
90 | codeset = "UTF-8" ; |
91 | } |
92 | #endif |
93 | |
94 | /* If we don't need this, skip it */ |
95 | if (strcmp(codeset, "UTF-8" ) == 0 || strcmp(codeset, "utf8" ) == 0 ) { |
96 | UTF_DEBUG(("NO iconv() being used because it is not needed\n" )); |
97 | return; |
98 | } |
99 | |
100 | /* Open conversion descriptors */ |
101 | iconvToPlatform = iconv_open(codeset, "UTF-8" ); |
102 | if ( iconvToPlatform == (iconv_t)-1 ) { |
103 | UTF_ERROR("Failed to complete iconv_open() setup" ); |
104 | } |
105 | iconvFromPlatform = iconv_open("UTF-8" , codeset); |
106 | if ( iconvFromPlatform == (iconv_t)-1 ) { |
107 | UTF_ERROR("Failed to complete iconv_open() setup" ); |
108 | } |
109 | } |
110 | |
111 | /* |
112 | * Terminate all utf processing |
113 | */ |
114 | static void |
115 | utfTerminate(void) |
116 | { |
117 | if ( iconvFromPlatform!=(iconv_t)-1 ) { |
118 | (void)iconv_close(iconvFromPlatform); |
119 | } |
120 | if ( iconvToPlatform!=(iconv_t)-1 ) { |
121 | (void)iconv_close(iconvToPlatform); |
122 | } |
123 | iconvToPlatform = (iconv_t)-1; |
124 | iconvFromPlatform = (iconv_t)-1; |
125 | } |
126 | |
127 | /* |
128 | * Do iconv() conversion. |
129 | * Returns length or -1 if output overflows. |
130 | */ |
131 | static int |
132 | iconvConvert(iconv_t ic, char *bytes, int len, char *output, int outputMaxLen) |
133 | { |
134 | int outputLen = 0; |
135 | |
136 | UTF_ASSERT(bytes); |
137 | UTF_ASSERT(len>=0); |
138 | UTF_ASSERT(output); |
139 | UTF_ASSERT(outputMaxLen>len); |
140 | |
141 | output[0] = 0; |
142 | outputLen = 0; |
143 | |
144 | if ( ic != (iconv_t)-1 ) { |
145 | int returnValue; |
146 | size_t inLeft; |
147 | size_t outLeft; |
148 | char *inbuf; |
149 | char *outbuf; |
150 | |
151 | inbuf = bytes; |
152 | outbuf = output; |
153 | inLeft = len; |
154 | outLeft = outputMaxLen; |
155 | returnValue = iconv(ic, (void*)&inbuf, &inLeft, &outbuf, &outLeft); |
156 | if ( returnValue >= 0 && inLeft==0 ) { |
157 | outputLen = outputMaxLen-outLeft; |
158 | output[outputLen] = 0; |
159 | return outputLen; |
160 | } |
161 | |
162 | /* Failed to do the conversion */ |
163 | UTF_DEBUG(("iconv() failed to do the conversion\n" )); |
164 | return -1; |
165 | } |
166 | |
167 | /* Just copy bytes */ |
168 | outputLen = len; |
169 | (void)memcpy(output, bytes, len); |
170 | output[len] = 0; |
171 | return outputLen; |
172 | } |
173 | |
174 | /* |
175 | * Convert UTF-8 to Platform Encoding. |
176 | * Returns length or -1 if output overflows. |
177 | */ |
178 | static int |
179 | utf8ToPlatform(char *utf8, int len, char *output, int outputMaxLen) |
180 | { |
181 | return iconvConvert(iconvToPlatform, utf8, len, output, outputMaxLen); |
182 | } |
183 | |
184 | /* |
185 | * Convert Platform Encoding to UTF-8. |
186 | * Returns length or -1 if output overflows. |
187 | */ |
188 | static int |
189 | platformToUtf8(char *str, int len, char *output, int outputMaxLen) |
190 | { |
191 | return iconvConvert(iconvFromPlatform, str, len, output, outputMaxLen); |
192 | } |
193 | |
194 | int |
195 | convertUft8ToPlatformString(char* utf8_str, int utf8_len, char* platform_str, int platform_len) { |
196 | if (iconvToPlatform == (iconv_t)-1) { |
197 | utfInitialize(); |
198 | } |
199 | return utf8ToPlatform(utf8_str, utf8_len, platform_str, platform_len); |
200 | } |
201 | |