1 | /* -*- c-basic-offset: 2 -*- */ |
2 | /* |
3 | Copyright(C) 2013 Kouhei Sutou <kou@clear-code.com> |
4 | Copyright(C) 2011-2013 Kentoku SHIBA |
5 | |
6 | This library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | This library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with this library; if not, write to the Free Software |
18 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | */ |
20 | |
21 | #include <mrn_err.h> |
22 | #include "mrn_encoding.hpp" |
23 | |
24 | namespace mrn { |
25 | namespace encoding { |
26 | CHARSET_INFO *mrn_charset_utf8 = NULL; |
27 | CHARSET_INFO *mrn_charset_utf8mb4 = NULL; |
28 | CHARSET_INFO *mrn_charset_binary = NULL; |
29 | CHARSET_INFO *mrn_charset_ascii = NULL; |
30 | CHARSET_INFO *mrn_charset_latin1_1 = NULL; |
31 | CHARSET_INFO *mrn_charset_latin1_2 = NULL; |
32 | CHARSET_INFO *mrn_charset_cp932 = NULL; |
33 | CHARSET_INFO *mrn_charset_sjis = NULL; |
34 | CHARSET_INFO *mrn_charset_eucjpms = NULL; |
35 | CHARSET_INFO *mrn_charset_ujis = NULL; |
36 | CHARSET_INFO *mrn_charset_koi8r = NULL; |
37 | |
38 | void init(void) { |
39 | CHARSET_INFO **cs; |
40 | MRN_DBUG_ENTER_FUNCTION(); |
41 | for (cs = all_charsets; cs < all_charsets + MY_ALL_CHARSETS_SIZE; cs++) |
42 | { |
43 | if (!cs[0]) |
44 | continue; |
45 | if (!strcmp(cs[0]->csname, "utf8" )) |
46 | { |
47 | DBUG_PRINT("info" , ("mroonga: %s is %s [%p]" , |
48 | cs[0]->name, cs[0]->csname, cs[0]->cset)); |
49 | if (!mrn_charset_utf8) |
50 | mrn_charset_utf8 = cs[0]; |
51 | else if (mrn_charset_utf8->cset != cs[0]->cset) |
52 | DBUG_ASSERT(0); |
53 | continue; |
54 | } |
55 | if (!strcmp(cs[0]->csname, "utf8mb4" )) |
56 | { |
57 | DBUG_PRINT("info" , ("mroonga: %s is %s [%p]" , |
58 | cs[0]->name, cs[0]->csname, cs[0]->cset)); |
59 | if (!mrn_charset_utf8mb4) |
60 | mrn_charset_utf8mb4 = cs[0]; |
61 | else if (mrn_charset_utf8mb4->cset != cs[0]->cset) |
62 | DBUG_ASSERT(0); |
63 | continue; |
64 | } |
65 | if (!strcmp(cs[0]->csname, "binary" )) |
66 | { |
67 | DBUG_PRINT("info" , ("mroonga: %s is %s [%p]" , |
68 | cs[0]->name, cs[0]->csname, cs[0]->cset)); |
69 | if (!mrn_charset_binary) |
70 | mrn_charset_binary = cs[0]; |
71 | else if (mrn_charset_binary->cset != cs[0]->cset) |
72 | DBUG_ASSERT(0); |
73 | continue; |
74 | } |
75 | if (!strcmp(cs[0]->csname, "ascii" )) |
76 | { |
77 | DBUG_PRINT("info" , ("mroonga: %s is %s [%p]" , |
78 | cs[0]->name, cs[0]->csname, cs[0]->cset)); |
79 | if (!mrn_charset_ascii) |
80 | mrn_charset_ascii = cs[0]; |
81 | else if (mrn_charset_ascii->cset != cs[0]->cset) |
82 | DBUG_ASSERT(0); |
83 | continue; |
84 | } |
85 | if (!strcmp(cs[0]->csname, "latin1" )) |
86 | { |
87 | DBUG_PRINT("info" , ("mroonga: %s is %s [%p]" , |
88 | cs[0]->name, cs[0]->csname, cs[0]->cset)); |
89 | if (!mrn_charset_latin1_1) |
90 | mrn_charset_latin1_1 = cs[0]; |
91 | else if (mrn_charset_latin1_1->cset != cs[0]->cset) |
92 | { |
93 | if (!mrn_charset_latin1_2) |
94 | mrn_charset_latin1_2 = cs[0]; |
95 | else if (mrn_charset_latin1_2->cset != cs[0]->cset) |
96 | DBUG_ASSERT(0); |
97 | } |
98 | continue; |
99 | } |
100 | if (!strcmp(cs[0]->csname, "cp932" )) |
101 | { |
102 | DBUG_PRINT("info" , ("mroonga: %s is %s [%p]" , |
103 | cs[0]->name, cs[0]->csname, cs[0]->cset)); |
104 | if (!mrn_charset_cp932) |
105 | mrn_charset_cp932 = cs[0]; |
106 | else if (mrn_charset_cp932->cset != cs[0]->cset) |
107 | DBUG_ASSERT(0); |
108 | continue; |
109 | } |
110 | if (!strcmp(cs[0]->csname, "sjis" )) |
111 | { |
112 | DBUG_PRINT("info" , ("mroonga: %s is %s [%p]" , |
113 | cs[0]->name, cs[0]->csname, cs[0]->cset)); |
114 | if (!mrn_charset_sjis) |
115 | mrn_charset_sjis = cs[0]; |
116 | else if (mrn_charset_sjis->cset != cs[0]->cset) |
117 | DBUG_ASSERT(0); |
118 | continue; |
119 | } |
120 | if (!strcmp(cs[0]->csname, "eucjpms" )) |
121 | { |
122 | DBUG_PRINT("info" , ("mroonga: %s is %s [%p]" , |
123 | cs[0]->name, cs[0]->csname, cs[0]->cset)); |
124 | if (!mrn_charset_eucjpms) |
125 | mrn_charset_eucjpms = cs[0]; |
126 | else if (mrn_charset_eucjpms->cset != cs[0]->cset) |
127 | DBUG_ASSERT(0); |
128 | continue; |
129 | } |
130 | if (!strcmp(cs[0]->csname, "ujis" )) |
131 | { |
132 | DBUG_PRINT("info" , ("mroonga: %s is %s [%p]" , |
133 | cs[0]->name, cs[0]->csname, cs[0]->cset)); |
134 | if (!mrn_charset_ujis) |
135 | mrn_charset_ujis = cs[0]; |
136 | else if (mrn_charset_ujis->cset != cs[0]->cset) |
137 | DBUG_ASSERT(0); |
138 | continue; |
139 | } |
140 | if (!strcmp(cs[0]->csname, "koi8r" )) |
141 | { |
142 | DBUG_PRINT("info" , ("mroonga: %s is %s [%p]" , |
143 | cs[0]->name, cs[0]->csname, cs[0]->cset)); |
144 | if (!mrn_charset_koi8r) |
145 | mrn_charset_koi8r = cs[0]; |
146 | else if (mrn_charset_koi8r->cset != cs[0]->cset) |
147 | DBUG_ASSERT(0); |
148 | continue; |
149 | } |
150 | DBUG_PRINT("info" , ("mroonga: %s[%s][%p] is not supported" , |
151 | cs[0]->name, cs[0]->csname, cs[0]->cset)); |
152 | } |
153 | DBUG_VOID_RETURN; |
154 | } |
155 | |
156 | int set(grn_ctx *ctx, const CHARSET_INFO *charset) { |
157 | MRN_DBUG_ENTER_FUNCTION(); |
158 | int error = 0; |
159 | |
160 | if (!set_raw(ctx, charset)) { |
161 | const char *name = "<null>" ; |
162 | const char *csname = "<null>" ; |
163 | if (charset) { |
164 | name = charset->name; |
165 | csname = charset->csname; |
166 | } |
167 | error = ER_MRN_CHARSET_NOT_SUPPORT_NUM; |
168 | my_printf_error(error, |
169 | ER_MRN_CHARSET_NOT_SUPPORT_STR, |
170 | MYF(0), name, csname); |
171 | } |
172 | |
173 | DBUG_RETURN(error); |
174 | } |
175 | |
176 | bool set_raw(grn_ctx *ctx, const CHARSET_INFO *charset) { |
177 | MRN_DBUG_ENTER_FUNCTION(); |
178 | if (!charset) |
179 | { |
180 | GRN_CTX_SET_ENCODING(ctx, GRN_ENC_NONE); |
181 | DBUG_RETURN(true); |
182 | } |
183 | if (charset->cset == mrn_charset_utf8->cset) |
184 | { |
185 | GRN_CTX_SET_ENCODING(ctx, GRN_ENC_UTF8); |
186 | DBUG_RETURN(true); |
187 | } |
188 | if (mrn_charset_utf8mb4 && charset->cset == mrn_charset_utf8mb4->cset) |
189 | { |
190 | GRN_CTX_SET_ENCODING(ctx, GRN_ENC_UTF8); |
191 | DBUG_RETURN(true); |
192 | } |
193 | if (charset->cset == mrn_charset_cp932->cset) |
194 | { |
195 | GRN_CTX_SET_ENCODING(ctx, GRN_ENC_SJIS); |
196 | DBUG_RETURN(true); |
197 | } |
198 | if (charset->cset == mrn_charset_eucjpms->cset) |
199 | { |
200 | GRN_CTX_SET_ENCODING(ctx, GRN_ENC_EUC_JP); |
201 | DBUG_RETURN(true); |
202 | } |
203 | if (charset->cset == mrn_charset_latin1_1->cset) |
204 | { |
205 | GRN_CTX_SET_ENCODING(ctx, GRN_ENC_LATIN1); |
206 | DBUG_RETURN(true); |
207 | } |
208 | if (charset->cset == mrn_charset_latin1_2->cset) |
209 | { |
210 | GRN_CTX_SET_ENCODING(ctx, GRN_ENC_LATIN1); |
211 | DBUG_RETURN(true); |
212 | } |
213 | if (charset->cset == mrn_charset_koi8r->cset) |
214 | { |
215 | GRN_CTX_SET_ENCODING(ctx, GRN_ENC_KOI8R); |
216 | DBUG_RETURN(true); |
217 | } |
218 | if (charset->cset == mrn_charset_binary->cset) |
219 | { |
220 | GRN_CTX_SET_ENCODING(ctx, GRN_ENC_NONE); |
221 | DBUG_RETURN(true); |
222 | } |
223 | if (charset->cset == mrn_charset_ascii->cset) |
224 | { |
225 | GRN_CTX_SET_ENCODING(ctx, GRN_ENC_UTF8); |
226 | DBUG_RETURN(true); |
227 | } |
228 | if (charset->cset == mrn_charset_sjis->cset) |
229 | { |
230 | GRN_CTX_SET_ENCODING(ctx, GRN_ENC_SJIS); |
231 | DBUG_RETURN(true); |
232 | } |
233 | if (charset->cset == mrn_charset_ujis->cset) |
234 | { |
235 | GRN_CTX_SET_ENCODING(ctx, GRN_ENC_EUC_JP); |
236 | DBUG_RETURN(true); |
237 | } |
238 | GRN_CTX_SET_ENCODING(ctx, GRN_ENC_NONE); |
239 | DBUG_RETURN(false); |
240 | } |
241 | } |
242 | } |
243 | |