1 | // Scintilla source code edit control |
2 | // Encoding: UTF-8 |
3 | /** @file CaseConvert.cxx |
4 | ** Case fold characters and convert them to upper or lower case. |
5 | ** Tables automatically regenerated by scripts/GenerateCaseConvert.py |
6 | ** Should only be rarely regenerated for new versions of Unicode. |
7 | **/ |
8 | // Copyright 2013 by Neil Hodgson <neilh@scintilla.org> |
9 | // The License.txt file describes the conditions under which this software may be distributed. |
10 | |
11 | #include <cassert> |
12 | #include <cstring> |
13 | |
14 | #include <stdexcept> |
15 | #include <string> |
16 | #include <string_view> |
17 | #include <vector> |
18 | #include <algorithm> |
19 | |
20 | #include "CaseConvert.h" |
21 | #include "UniConversion.h" |
22 | |
23 | using namespace Scintilla::Internal; |
24 | |
25 | namespace { |
26 | // Use an unnamed namespace to protect the declarations from name conflicts |
27 | |
28 | // Unicode code points are ordered by groups and follow patterns. |
29 | // Most characters (pitch==1) are in ranges for a particular alphabet and their |
30 | // upper case forms are a fixed distance away. |
31 | // Another pattern (pitch==2) is where each lower case letter is preceded by |
32 | // the upper case form. These are also grouped into ranges. |
33 | |
34 | int symmetricCaseConversionRanges[] = { |
35 | //lower, upper, range length, range pitch |
36 | //++Autogenerated -- start of section automatically generated |
37 | //**\(\*\n\) |
38 | 97,65,26,1, |
39 | 224,192,23,1, |
40 | 248,216,7,1, |
41 | 257,256,24,2, |
42 | 314,313,8,2, |
43 | 331,330,23,2, |
44 | 462,461,8,2, |
45 | 479,478,9,2, |
46 | 505,504,20,2, |
47 | 547,546,9,2, |
48 | 583,582,5,2, |
49 | 945,913,17,1, |
50 | 963,931,9,1, |
51 | 985,984,12,2, |
52 | 1072,1040,32,1, |
53 | 1104,1024,16,1, |
54 | 1121,1120,17,2, |
55 | 1163,1162,27,2, |
56 | 1218,1217,7,2, |
57 | 1233,1232,48,2, |
58 | 1377,1329,38,1, |
59 | 4304,7312,43,1, |
60 | 7681,7680,75,2, |
61 | 7841,7840,48,2, |
62 | 7936,7944,8,1, |
63 | 7952,7960,6,1, |
64 | 7968,7976,8,1, |
65 | 7984,7992,8,1, |
66 | 8000,8008,6,1, |
67 | 8032,8040,8,1, |
68 | 8560,8544,16,1, |
69 | 9424,9398,26,1, |
70 | 11312,11264,47,1, |
71 | 11393,11392,50,2, |
72 | 11520,4256,38,1, |
73 | 42561,42560,23,2, |
74 | 42625,42624,14,2, |
75 | 42787,42786,7,2, |
76 | 42803,42802,31,2, |
77 | 42879,42878,5,2, |
78 | 42903,42902,10,2, |
79 | 42933,42932,6,2, |
80 | 65345,65313,26,1, |
81 | 66600,66560,40,1, |
82 | 66776,66736,36,1, |
83 | 68800,68736,51,1, |
84 | 71872,71840,32,1, |
85 | 93792,93760,32,1, |
86 | 125218,125184,34,1, |
87 | |
88 | //--Autogenerated -- end of section automatically generated |
89 | }; |
90 | |
91 | // Code points that are symmetric but don't fit into a range of similar characters |
92 | // are listed here. |
93 | |
94 | int symmetricCaseConversions[] = { |
95 | //lower, upper |
96 | //++Autogenerated -- start of section automatically generated |
97 | //**1 \(\*\n\) |
98 | 255,376, |
99 | 307,306, |
100 | 309,308, |
101 | 311,310, |
102 | 378,377, |
103 | 380,379, |
104 | 382,381, |
105 | 384,579, |
106 | 387,386, |
107 | 389,388, |
108 | 392,391, |
109 | 396,395, |
110 | 402,401, |
111 | 405,502, |
112 | 409,408, |
113 | 410,573, |
114 | 414,544, |
115 | 417,416, |
116 | 419,418, |
117 | 421,420, |
118 | 424,423, |
119 | 429,428, |
120 | 432,431, |
121 | 436,435, |
122 | 438,437, |
123 | 441,440, |
124 | 445,444, |
125 | 447,503, |
126 | 454,452, |
127 | 457,455, |
128 | 460,458, |
129 | 477,398, |
130 | 499,497, |
131 | 501,500, |
132 | 572,571, |
133 | 575,11390, |
134 | 576,11391, |
135 | 578,577, |
136 | 592,11375, |
137 | 593,11373, |
138 | 594,11376, |
139 | 595,385, |
140 | 596,390, |
141 | 598,393, |
142 | 599,394, |
143 | 601,399, |
144 | 603,400, |
145 | 604,42923, |
146 | 608,403, |
147 | 609,42924, |
148 | 611,404, |
149 | 613,42893, |
150 | 614,42922, |
151 | 616,407, |
152 | 617,406, |
153 | 618,42926, |
154 | 619,11362, |
155 | 620,42925, |
156 | 623,412, |
157 | 625,11374, |
158 | 626,413, |
159 | 629,415, |
160 | 637,11364, |
161 | 640,422, |
162 | 642,42949, |
163 | 643,425, |
164 | 647,42929, |
165 | 648,430, |
166 | 649,580, |
167 | 650,433, |
168 | 651,434, |
169 | 652,581, |
170 | 658,439, |
171 | 669,42930, |
172 | 670,42928, |
173 | 881,880, |
174 | 883,882, |
175 | 887,886, |
176 | 891,1021, |
177 | 892,1022, |
178 | 893,1023, |
179 | 940,902, |
180 | 941,904, |
181 | 942,905, |
182 | 943,906, |
183 | 972,908, |
184 | 973,910, |
185 | 974,911, |
186 | 983,975, |
187 | 1010,1017, |
188 | 1011,895, |
189 | 1016,1015, |
190 | 1019,1018, |
191 | 1231,1216, |
192 | 4349,7357, |
193 | 4350,7358, |
194 | 4351,7359, |
195 | 7545,42877, |
196 | 7549,11363, |
197 | 7566,42950, |
198 | 8017,8025, |
199 | 8019,8027, |
200 | 8021,8029, |
201 | 8023,8031, |
202 | 8048,8122, |
203 | 8049,8123, |
204 | 8050,8136, |
205 | 8051,8137, |
206 | 8052,8138, |
207 | 8053,8139, |
208 | 8054,8154, |
209 | 8055,8155, |
210 | 8056,8184, |
211 | 8057,8185, |
212 | 8058,8170, |
213 | 8059,8171, |
214 | 8060,8186, |
215 | 8061,8187, |
216 | 8112,8120, |
217 | 8113,8121, |
218 | 8144,8152, |
219 | 8145,8153, |
220 | 8160,8168, |
221 | 8161,8169, |
222 | 8165,8172, |
223 | 8526,8498, |
224 | 8580,8579, |
225 | 11361,11360, |
226 | 11365,570, |
227 | 11366,574, |
228 | 11368,11367, |
229 | 11370,11369, |
230 | 11372,11371, |
231 | 11379,11378, |
232 | 11382,11381, |
233 | 11500,11499, |
234 | 11502,11501, |
235 | 11507,11506, |
236 | 11559,4295, |
237 | 11565,4301, |
238 | 42874,42873, |
239 | 42876,42875, |
240 | 42892,42891, |
241 | 42897,42896, |
242 | 42899,42898, |
243 | 42900,42948, |
244 | 42947,42946, |
245 | 42952,42951, |
246 | 42954,42953, |
247 | 42998,42997, |
248 | 43859,42931, |
249 | |
250 | //--Autogenerated -- end of section automatically generated |
251 | }; |
252 | |
253 | // Characters that have complex case conversions are listed here. |
254 | // This includes cases where more than one character is needed for a conversion, |
255 | // folding is different to lowering, or (as appropriate) upper(lower(x)) != x or |
256 | // lower(upper(x)) != x. |
257 | |
258 | const char *complexCaseConversions = |
259 | // Original | Folded | Upper | Lower | |
260 | //++Autogenerated -- start of section automatically generated |
261 | //**2 \(\*\n\) |
262 | "\xc2\xb5|\xce\xbc|\xce\x9c||" |
263 | "\xc3\x9f|ss|SS||" |
264 | "\xc4\xb0|i\xcc\x87||i\xcc\x87|" |
265 | "\xc4\xb1||I||" |
266 | "\xc5\x89|\xca\xbcn|\xca\xbcN||" |
267 | "\xc5\xbf|s|S||" |
268 | "\xc7\x85|\xc7\x86|\xc7\x84|\xc7\x86|" |
269 | "\xc7\x88|\xc7\x89|\xc7\x87|\xc7\x89|" |
270 | "\xc7\x8b|\xc7\x8c|\xc7\x8a|\xc7\x8c|" |
271 | "\xc7\xb0|j\xcc\x8c|J\xcc\x8c||" |
272 | "\xc7\xb2|\xc7\xb3|\xc7\xb1|\xc7\xb3|" |
273 | "\xcd\x85|\xce\xb9|\xce\x99||" |
274 | "\xce\x90|\xce\xb9\xcc\x88\xcc\x81|\xce\x99\xcc\x88\xcc\x81||" |
275 | "\xce\xb0|\xcf\x85\xcc\x88\xcc\x81|\xce\xa5\xcc\x88\xcc\x81||" |
276 | "\xcf\x82|\xcf\x83|\xce\xa3||" |
277 | "\xcf\x90|\xce\xb2|\xce\x92||" |
278 | "\xcf\x91|\xce\xb8|\xce\x98||" |
279 | "\xcf\x95|\xcf\x86|\xce\xa6||" |
280 | "\xcf\x96|\xcf\x80|\xce\xa0||" |
281 | "\xcf\xb0|\xce\xba|\xce\x9a||" |
282 | "\xcf\xb1|\xcf\x81|\xce\xa1||" |
283 | "\xcf\xb4|\xce\xb8||\xce\xb8|" |
284 | "\xcf\xb5|\xce\xb5|\xce\x95||" |
285 | "\xd6\x87|\xd5\xa5\xd6\x82|\xd4\xb5\xd5\x92||" |
286 | "\xe1\x8e\xa0|||\xea\xad\xb0|" |
287 | "\xe1\x8e\xa1|||\xea\xad\xb1|" |
288 | "\xe1\x8e\xa2|||\xea\xad\xb2|" |
289 | "\xe1\x8e\xa3|||\xea\xad\xb3|" |
290 | "\xe1\x8e\xa4|||\xea\xad\xb4|" |
291 | "\xe1\x8e\xa5|||\xea\xad\xb5|" |
292 | "\xe1\x8e\xa6|||\xea\xad\xb6|" |
293 | "\xe1\x8e\xa7|||\xea\xad\xb7|" |
294 | "\xe1\x8e\xa8|||\xea\xad\xb8|" |
295 | "\xe1\x8e\xa9|||\xea\xad\xb9|" |
296 | "\xe1\x8e\xaa|||\xea\xad\xba|" |
297 | "\xe1\x8e\xab|||\xea\xad\xbb|" |
298 | "\xe1\x8e\xac|||\xea\xad\xbc|" |
299 | "\xe1\x8e\xad|||\xea\xad\xbd|" |
300 | "\xe1\x8e\xae|||\xea\xad\xbe|" |
301 | "\xe1\x8e\xaf|||\xea\xad\xbf|" |
302 | "\xe1\x8e\xb0|||\xea\xae\x80|" |
303 | "\xe1\x8e\xb1|||\xea\xae\x81|" |
304 | "\xe1\x8e\xb2|||\xea\xae\x82|" |
305 | "\xe1\x8e\xb3|||\xea\xae\x83|" |
306 | "\xe1\x8e\xb4|||\xea\xae\x84|" |
307 | "\xe1\x8e\xb5|||\xea\xae\x85|" |
308 | "\xe1\x8e\xb6|||\xea\xae\x86|" |
309 | "\xe1\x8e\xb7|||\xea\xae\x87|" |
310 | "\xe1\x8e\xb8|||\xea\xae\x88|" |
311 | "\xe1\x8e\xb9|||\xea\xae\x89|" |
312 | "\xe1\x8e\xba|||\xea\xae\x8a|" |
313 | "\xe1\x8e\xbb|||\xea\xae\x8b|" |
314 | "\xe1\x8e\xbc|||\xea\xae\x8c|" |
315 | "\xe1\x8e\xbd|||\xea\xae\x8d|" |
316 | "\xe1\x8e\xbe|||\xea\xae\x8e|" |
317 | "\xe1\x8e\xbf|||\xea\xae\x8f|" |
318 | "\xe1\x8f\x80|||\xea\xae\x90|" |
319 | "\xe1\x8f\x81|||\xea\xae\x91|" |
320 | "\xe1\x8f\x82|||\xea\xae\x92|" |
321 | "\xe1\x8f\x83|||\xea\xae\x93|" |
322 | "\xe1\x8f\x84|||\xea\xae\x94|" |
323 | "\xe1\x8f\x85|||\xea\xae\x95|" |
324 | "\xe1\x8f\x86|||\xea\xae\x96|" |
325 | "\xe1\x8f\x87|||\xea\xae\x97|" |
326 | "\xe1\x8f\x88|||\xea\xae\x98|" |
327 | "\xe1\x8f\x89|||\xea\xae\x99|" |
328 | "\xe1\x8f\x8a|||\xea\xae\x9a|" |
329 | "\xe1\x8f\x8b|||\xea\xae\x9b|" |
330 | "\xe1\x8f\x8c|||\xea\xae\x9c|" |
331 | "\xe1\x8f\x8d|||\xea\xae\x9d|" |
332 | "\xe1\x8f\x8e|||\xea\xae\x9e|" |
333 | "\xe1\x8f\x8f|||\xea\xae\x9f|" |
334 | "\xe1\x8f\x90|||\xea\xae\xa0|" |
335 | "\xe1\x8f\x91|||\xea\xae\xa1|" |
336 | "\xe1\x8f\x92|||\xea\xae\xa2|" |
337 | "\xe1\x8f\x93|||\xea\xae\xa3|" |
338 | "\xe1\x8f\x94|||\xea\xae\xa4|" |
339 | "\xe1\x8f\x95|||\xea\xae\xa5|" |
340 | "\xe1\x8f\x96|||\xea\xae\xa6|" |
341 | "\xe1\x8f\x97|||\xea\xae\xa7|" |
342 | "\xe1\x8f\x98|||\xea\xae\xa8|" |
343 | "\xe1\x8f\x99|||\xea\xae\xa9|" |
344 | "\xe1\x8f\x9a|||\xea\xae\xaa|" |
345 | "\xe1\x8f\x9b|||\xea\xae\xab|" |
346 | "\xe1\x8f\x9c|||\xea\xae\xac|" |
347 | "\xe1\x8f\x9d|||\xea\xae\xad|" |
348 | "\xe1\x8f\x9e|||\xea\xae\xae|" |
349 | "\xe1\x8f\x9f|||\xea\xae\xaf|" |
350 | "\xe1\x8f\xa0|||\xea\xae\xb0|" |
351 | "\xe1\x8f\xa1|||\xea\xae\xb1|" |
352 | "\xe1\x8f\xa2|||\xea\xae\xb2|" |
353 | "\xe1\x8f\xa3|||\xea\xae\xb3|" |
354 | "\xe1\x8f\xa4|||\xea\xae\xb4|" |
355 | "\xe1\x8f\xa5|||\xea\xae\xb5|" |
356 | "\xe1\x8f\xa6|||\xea\xae\xb6|" |
357 | "\xe1\x8f\xa7|||\xea\xae\xb7|" |
358 | "\xe1\x8f\xa8|||\xea\xae\xb8|" |
359 | "\xe1\x8f\xa9|||\xea\xae\xb9|" |
360 | "\xe1\x8f\xaa|||\xea\xae\xba|" |
361 | "\xe1\x8f\xab|||\xea\xae\xbb|" |
362 | "\xe1\x8f\xac|||\xea\xae\xbc|" |
363 | "\xe1\x8f\xad|||\xea\xae\xbd|" |
364 | "\xe1\x8f\xae|||\xea\xae\xbe|" |
365 | "\xe1\x8f\xaf|||\xea\xae\xbf|" |
366 | "\xe1\x8f\xb0|||\xe1\x8f\xb8|" |
367 | "\xe1\x8f\xb1|||\xe1\x8f\xb9|" |
368 | "\xe1\x8f\xb2|||\xe1\x8f\xba|" |
369 | "\xe1\x8f\xb3|||\xe1\x8f\xbb|" |
370 | "\xe1\x8f\xb4|||\xe1\x8f\xbc|" |
371 | "\xe1\x8f\xb5|||\xe1\x8f\xbd|" |
372 | "\xe1\x8f\xb8|\xe1\x8f\xb0|\xe1\x8f\xb0||" |
373 | "\xe1\x8f\xb9|\xe1\x8f\xb1|\xe1\x8f\xb1||" |
374 | "\xe1\x8f\xba|\xe1\x8f\xb2|\xe1\x8f\xb2||" |
375 | "\xe1\x8f\xbb|\xe1\x8f\xb3|\xe1\x8f\xb3||" |
376 | "\xe1\x8f\xbc|\xe1\x8f\xb4|\xe1\x8f\xb4||" |
377 | "\xe1\x8f\xbd|\xe1\x8f\xb5|\xe1\x8f\xb5||" |
378 | "\xe1\xb2\x80|\xd0\xb2|\xd0\x92||" |
379 | "\xe1\xb2\x81|\xd0\xb4|\xd0\x94||" |
380 | "\xe1\xb2\x82|\xd0\xbe|\xd0\x9e||" |
381 | "\xe1\xb2\x83|\xd1\x81|\xd0\xa1||" |
382 | "\xe1\xb2\x84|\xd1\x82|\xd0\xa2||" |
383 | "\xe1\xb2\x85|\xd1\x82|\xd0\xa2||" |
384 | "\xe1\xb2\x86|\xd1\x8a|\xd0\xaa||" |
385 | "\xe1\xb2\x87|\xd1\xa3|\xd1\xa2||" |
386 | "\xe1\xb2\x88|\xea\x99\x8b|\xea\x99\x8a||" |
387 | "\xe1\xba\x96|h\xcc\xb1|H\xcc\xb1||" |
388 | "\xe1\xba\x97|t\xcc\x88|T\xcc\x88||" |
389 | "\xe1\xba\x98|w\xcc\x8a|W\xcc\x8a||" |
390 | "\xe1\xba\x99|y\xcc\x8a|Y\xcc\x8a||" |
391 | "\xe1\xba\x9a|a\xca\xbe|A\xca\xbe||" |
392 | "\xe1\xba\x9b|\xe1\xb9\xa1|\xe1\xb9\xa0||" |
393 | "\xe1\xba\x9e|ss||\xc3\x9f|" |
394 | "\xe1\xbd\x90|\xcf\x85\xcc\x93|\xce\xa5\xcc\x93||" |
395 | "\xe1\xbd\x92|\xcf\x85\xcc\x93\xcc\x80|\xce\xa5\xcc\x93\xcc\x80||" |
396 | "\xe1\xbd\x94|\xcf\x85\xcc\x93\xcc\x81|\xce\xa5\xcc\x93\xcc\x81||" |
397 | "\xe1\xbd\x96|\xcf\x85\xcc\x93\xcd\x82|\xce\xa5\xcc\x93\xcd\x82||" |
398 | "\xe1\xbe\x80|\xe1\xbc\x80\xce\xb9|\xe1\xbc\x88\xce\x99||" |
399 | "\xe1\xbe\x81|\xe1\xbc\x81\xce\xb9|\xe1\xbc\x89\xce\x99||" |
400 | "\xe1\xbe\x82|\xe1\xbc\x82\xce\xb9|\xe1\xbc\x8a\xce\x99||" |
401 | "\xe1\xbe\x83|\xe1\xbc\x83\xce\xb9|\xe1\xbc\x8b\xce\x99||" |
402 | "\xe1\xbe\x84|\xe1\xbc\x84\xce\xb9|\xe1\xbc\x8c\xce\x99||" |
403 | "\xe1\xbe\x85|\xe1\xbc\x85\xce\xb9|\xe1\xbc\x8d\xce\x99||" |
404 | "\xe1\xbe\x86|\xe1\xbc\x86\xce\xb9|\xe1\xbc\x8e\xce\x99||" |
405 | "\xe1\xbe\x87|\xe1\xbc\x87\xce\xb9|\xe1\xbc\x8f\xce\x99||" |
406 | "\xe1\xbe\x88|\xe1\xbc\x80\xce\xb9|\xe1\xbc\x88\xce\x99|\xe1\xbe\x80|" |
407 | "\xe1\xbe\x89|\xe1\xbc\x81\xce\xb9|\xe1\xbc\x89\xce\x99|\xe1\xbe\x81|" |
408 | "\xe1\xbe\x8a|\xe1\xbc\x82\xce\xb9|\xe1\xbc\x8a\xce\x99|\xe1\xbe\x82|" |
409 | "\xe1\xbe\x8b|\xe1\xbc\x83\xce\xb9|\xe1\xbc\x8b\xce\x99|\xe1\xbe\x83|" |
410 | "\xe1\xbe\x8c|\xe1\xbc\x84\xce\xb9|\xe1\xbc\x8c\xce\x99|\xe1\xbe\x84|" |
411 | "\xe1\xbe\x8d|\xe1\xbc\x85\xce\xb9|\xe1\xbc\x8d\xce\x99|\xe1\xbe\x85|" |
412 | "\xe1\xbe\x8e|\xe1\xbc\x86\xce\xb9|\xe1\xbc\x8e\xce\x99|\xe1\xbe\x86|" |
413 | "\xe1\xbe\x8f|\xe1\xbc\x87\xce\xb9|\xe1\xbc\x8f\xce\x99|\xe1\xbe\x87|" |
414 | "\xe1\xbe\x90|\xe1\xbc\xa0\xce\xb9|\xe1\xbc\xa8\xce\x99||" |
415 | "\xe1\xbe\x91|\xe1\xbc\xa1\xce\xb9|\xe1\xbc\xa9\xce\x99||" |
416 | "\xe1\xbe\x92|\xe1\xbc\xa2\xce\xb9|\xe1\xbc\xaa\xce\x99||" |
417 | "\xe1\xbe\x93|\xe1\xbc\xa3\xce\xb9|\xe1\xbc\xab\xce\x99||" |
418 | "\xe1\xbe\x94|\xe1\xbc\xa4\xce\xb9|\xe1\xbc\xac\xce\x99||" |
419 | "\xe1\xbe\x95|\xe1\xbc\xa5\xce\xb9|\xe1\xbc\xad\xce\x99||" |
420 | "\xe1\xbe\x96|\xe1\xbc\xa6\xce\xb9|\xe1\xbc\xae\xce\x99||" |
421 | "\xe1\xbe\x97|\xe1\xbc\xa7\xce\xb9|\xe1\xbc\xaf\xce\x99||" |
422 | "\xe1\xbe\x98|\xe1\xbc\xa0\xce\xb9|\xe1\xbc\xa8\xce\x99|\xe1\xbe\x90|" |
423 | "\xe1\xbe\x99|\xe1\xbc\xa1\xce\xb9|\xe1\xbc\xa9\xce\x99|\xe1\xbe\x91|" |
424 | "\xe1\xbe\x9a|\xe1\xbc\xa2\xce\xb9|\xe1\xbc\xaa\xce\x99|\xe1\xbe\x92|" |
425 | "\xe1\xbe\x9b|\xe1\xbc\xa3\xce\xb9|\xe1\xbc\xab\xce\x99|\xe1\xbe\x93|" |
426 | "\xe1\xbe\x9c|\xe1\xbc\xa4\xce\xb9|\xe1\xbc\xac\xce\x99|\xe1\xbe\x94|" |
427 | "\xe1\xbe\x9d|\xe1\xbc\xa5\xce\xb9|\xe1\xbc\xad\xce\x99|\xe1\xbe\x95|" |
428 | "\xe1\xbe\x9e|\xe1\xbc\xa6\xce\xb9|\xe1\xbc\xae\xce\x99|\xe1\xbe\x96|" |
429 | "\xe1\xbe\x9f|\xe1\xbc\xa7\xce\xb9|\xe1\xbc\xaf\xce\x99|\xe1\xbe\x97|" |
430 | "\xe1\xbe\xa0|\xe1\xbd\xa0\xce\xb9|\xe1\xbd\xa8\xce\x99||" |
431 | "\xe1\xbe\xa1|\xe1\xbd\xa1\xce\xb9|\xe1\xbd\xa9\xce\x99||" |
432 | "\xe1\xbe\xa2|\xe1\xbd\xa2\xce\xb9|\xe1\xbd\xaa\xce\x99||" |
433 | "\xe1\xbe\xa3|\xe1\xbd\xa3\xce\xb9|\xe1\xbd\xab\xce\x99||" |
434 | "\xe1\xbe\xa4|\xe1\xbd\xa4\xce\xb9|\xe1\xbd\xac\xce\x99||" |
435 | "\xe1\xbe\xa5|\xe1\xbd\xa5\xce\xb9|\xe1\xbd\xad\xce\x99||" |
436 | "\xe1\xbe\xa6|\xe1\xbd\xa6\xce\xb9|\xe1\xbd\xae\xce\x99||" |
437 | "\xe1\xbe\xa7|\xe1\xbd\xa7\xce\xb9|\xe1\xbd\xaf\xce\x99||" |
438 | "\xe1\xbe\xa8|\xe1\xbd\xa0\xce\xb9|\xe1\xbd\xa8\xce\x99|\xe1\xbe\xa0|" |
439 | "\xe1\xbe\xa9|\xe1\xbd\xa1\xce\xb9|\xe1\xbd\xa9\xce\x99|\xe1\xbe\xa1|" |
440 | "\xe1\xbe\xaa|\xe1\xbd\xa2\xce\xb9|\xe1\xbd\xaa\xce\x99|\xe1\xbe\xa2|" |
441 | "\xe1\xbe\xab|\xe1\xbd\xa3\xce\xb9|\xe1\xbd\xab\xce\x99|\xe1\xbe\xa3|" |
442 | "\xe1\xbe\xac|\xe1\xbd\xa4\xce\xb9|\xe1\xbd\xac\xce\x99|\xe1\xbe\xa4|" |
443 | "\xe1\xbe\xad|\xe1\xbd\xa5\xce\xb9|\xe1\xbd\xad\xce\x99|\xe1\xbe\xa5|" |
444 | "\xe1\xbe\xae|\xe1\xbd\xa6\xce\xb9|\xe1\xbd\xae\xce\x99|\xe1\xbe\xa6|" |
445 | "\xe1\xbe\xaf|\xe1\xbd\xa7\xce\xb9|\xe1\xbd\xaf\xce\x99|\xe1\xbe\xa7|" |
446 | "\xe1\xbe\xb2|\xe1\xbd\xb0\xce\xb9|\xe1\xbe\xba\xce\x99||" |
447 | "\xe1\xbe\xb3|\xce\xb1\xce\xb9|\xce\x91\xce\x99||" |
448 | "\xe1\xbe\xb4|\xce\xac\xce\xb9|\xce\x86\xce\x99||" |
449 | "\xe1\xbe\xb6|\xce\xb1\xcd\x82|\xce\x91\xcd\x82||" |
450 | "\xe1\xbe\xb7|\xce\xb1\xcd\x82\xce\xb9|\xce\x91\xcd\x82\xce\x99||" |
451 | "\xe1\xbe\xbc|\xce\xb1\xce\xb9|\xce\x91\xce\x99|\xe1\xbe\xb3|" |
452 | "\xe1\xbe\xbe|\xce\xb9|\xce\x99||" |
453 | "\xe1\xbf\x82|\xe1\xbd\xb4\xce\xb9|\xe1\xbf\x8a\xce\x99||" |
454 | "\xe1\xbf\x83|\xce\xb7\xce\xb9|\xce\x97\xce\x99||" |
455 | "\xe1\xbf\x84|\xce\xae\xce\xb9|\xce\x89\xce\x99||" |
456 | "\xe1\xbf\x86|\xce\xb7\xcd\x82|\xce\x97\xcd\x82||" |
457 | "\xe1\xbf\x87|\xce\xb7\xcd\x82\xce\xb9|\xce\x97\xcd\x82\xce\x99||" |
458 | "\xe1\xbf\x8c|\xce\xb7\xce\xb9|\xce\x97\xce\x99|\xe1\xbf\x83|" |
459 | "\xe1\xbf\x92|\xce\xb9\xcc\x88\xcc\x80|\xce\x99\xcc\x88\xcc\x80||" |
460 | "\xe1\xbf\x93|\xce\xb9\xcc\x88\xcc\x81|\xce\x99\xcc\x88\xcc\x81||" |
461 | "\xe1\xbf\x96|\xce\xb9\xcd\x82|\xce\x99\xcd\x82||" |
462 | "\xe1\xbf\x97|\xce\xb9\xcc\x88\xcd\x82|\xce\x99\xcc\x88\xcd\x82||" |
463 | "\xe1\xbf\xa2|\xcf\x85\xcc\x88\xcc\x80|\xce\xa5\xcc\x88\xcc\x80||" |
464 | "\xe1\xbf\xa3|\xcf\x85\xcc\x88\xcc\x81|\xce\xa5\xcc\x88\xcc\x81||" |
465 | "\xe1\xbf\xa4|\xcf\x81\xcc\x93|\xce\xa1\xcc\x93||" |
466 | "\xe1\xbf\xa6|\xcf\x85\xcd\x82|\xce\xa5\xcd\x82||" |
467 | "\xe1\xbf\xa7|\xcf\x85\xcc\x88\xcd\x82|\xce\xa5\xcc\x88\xcd\x82||" |
468 | "\xe1\xbf\xb2|\xe1\xbd\xbc\xce\xb9|\xe1\xbf\xba\xce\x99||" |
469 | "\xe1\xbf\xb3|\xcf\x89\xce\xb9|\xce\xa9\xce\x99||" |
470 | "\xe1\xbf\xb4|\xcf\x8e\xce\xb9|\xce\x8f\xce\x99||" |
471 | "\xe1\xbf\xb6|\xcf\x89\xcd\x82|\xce\xa9\xcd\x82||" |
472 | "\xe1\xbf\xb7|\xcf\x89\xcd\x82\xce\xb9|\xce\xa9\xcd\x82\xce\x99||" |
473 | "\xe1\xbf\xbc|\xcf\x89\xce\xb9|\xce\xa9\xce\x99|\xe1\xbf\xb3|" |
474 | "\xe2\x84\xa6|\xcf\x89||\xcf\x89|" |
475 | "\xe2\x84\xaa|k||k|" |
476 | "\xe2\x84\xab|\xc3\xa5||\xc3\xa5|" |
477 | "\xea\xad\xb0|\xe1\x8e\xa0|\xe1\x8e\xa0||" |
478 | "\xea\xad\xb1|\xe1\x8e\xa1|\xe1\x8e\xa1||" |
479 | "\xea\xad\xb2|\xe1\x8e\xa2|\xe1\x8e\xa2||" |
480 | "\xea\xad\xb3|\xe1\x8e\xa3|\xe1\x8e\xa3||" |
481 | "\xea\xad\xb4|\xe1\x8e\xa4|\xe1\x8e\xa4||" |
482 | "\xea\xad\xb5|\xe1\x8e\xa5|\xe1\x8e\xa5||" |
483 | "\xea\xad\xb6|\xe1\x8e\xa6|\xe1\x8e\xa6||" |
484 | "\xea\xad\xb7|\xe1\x8e\xa7|\xe1\x8e\xa7||" |
485 | "\xea\xad\xb8|\xe1\x8e\xa8|\xe1\x8e\xa8||" |
486 | "\xea\xad\xb9|\xe1\x8e\xa9|\xe1\x8e\xa9||" |
487 | "\xea\xad\xba|\xe1\x8e\xaa|\xe1\x8e\xaa||" |
488 | "\xea\xad\xbb|\xe1\x8e\xab|\xe1\x8e\xab||" |
489 | "\xea\xad\xbc|\xe1\x8e\xac|\xe1\x8e\xac||" |
490 | "\xea\xad\xbd|\xe1\x8e\xad|\xe1\x8e\xad||" |
491 | "\xea\xad\xbe|\xe1\x8e\xae|\xe1\x8e\xae||" |
492 | "\xea\xad\xbf|\xe1\x8e\xaf|\xe1\x8e\xaf||" |
493 | "\xea\xae\x80|\xe1\x8e\xb0|\xe1\x8e\xb0||" |
494 | "\xea\xae\x81|\xe1\x8e\xb1|\xe1\x8e\xb1||" |
495 | "\xea\xae\x82|\xe1\x8e\xb2|\xe1\x8e\xb2||" |
496 | "\xea\xae\x83|\xe1\x8e\xb3|\xe1\x8e\xb3||" |
497 | "\xea\xae\x84|\xe1\x8e\xb4|\xe1\x8e\xb4||" |
498 | "\xea\xae\x85|\xe1\x8e\xb5|\xe1\x8e\xb5||" |
499 | "\xea\xae\x86|\xe1\x8e\xb6|\xe1\x8e\xb6||" |
500 | "\xea\xae\x87|\xe1\x8e\xb7|\xe1\x8e\xb7||" |
501 | "\xea\xae\x88|\xe1\x8e\xb8|\xe1\x8e\xb8||" |
502 | "\xea\xae\x89|\xe1\x8e\xb9|\xe1\x8e\xb9||" |
503 | "\xea\xae\x8a|\xe1\x8e\xba|\xe1\x8e\xba||" |
504 | "\xea\xae\x8b|\xe1\x8e\xbb|\xe1\x8e\xbb||" |
505 | "\xea\xae\x8c|\xe1\x8e\xbc|\xe1\x8e\xbc||" |
506 | "\xea\xae\x8d|\xe1\x8e\xbd|\xe1\x8e\xbd||" |
507 | "\xea\xae\x8e|\xe1\x8e\xbe|\xe1\x8e\xbe||" |
508 | "\xea\xae\x8f|\xe1\x8e\xbf|\xe1\x8e\xbf||" |
509 | "\xea\xae\x90|\xe1\x8f\x80|\xe1\x8f\x80||" |
510 | "\xea\xae\x91|\xe1\x8f\x81|\xe1\x8f\x81||" |
511 | "\xea\xae\x92|\xe1\x8f\x82|\xe1\x8f\x82||" |
512 | "\xea\xae\x93|\xe1\x8f\x83|\xe1\x8f\x83||" |
513 | "\xea\xae\x94|\xe1\x8f\x84|\xe1\x8f\x84||" |
514 | "\xea\xae\x95|\xe1\x8f\x85|\xe1\x8f\x85||" |
515 | "\xea\xae\x96|\xe1\x8f\x86|\xe1\x8f\x86||" |
516 | "\xea\xae\x97|\xe1\x8f\x87|\xe1\x8f\x87||" |
517 | "\xea\xae\x98|\xe1\x8f\x88|\xe1\x8f\x88||" |
518 | "\xea\xae\x99|\xe1\x8f\x89|\xe1\x8f\x89||" |
519 | "\xea\xae\x9a|\xe1\x8f\x8a|\xe1\x8f\x8a||" |
520 | "\xea\xae\x9b|\xe1\x8f\x8b|\xe1\x8f\x8b||" |
521 | "\xea\xae\x9c|\xe1\x8f\x8c|\xe1\x8f\x8c||" |
522 | "\xea\xae\x9d|\xe1\x8f\x8d|\xe1\x8f\x8d||" |
523 | "\xea\xae\x9e|\xe1\x8f\x8e|\xe1\x8f\x8e||" |
524 | "\xea\xae\x9f|\xe1\x8f\x8f|\xe1\x8f\x8f||" |
525 | "\xea\xae\xa0|\xe1\x8f\x90|\xe1\x8f\x90||" |
526 | "\xea\xae\xa1|\xe1\x8f\x91|\xe1\x8f\x91||" |
527 | "\xea\xae\xa2|\xe1\x8f\x92|\xe1\x8f\x92||" |
528 | "\xea\xae\xa3|\xe1\x8f\x93|\xe1\x8f\x93||" |
529 | "\xea\xae\xa4|\xe1\x8f\x94|\xe1\x8f\x94||" |
530 | "\xea\xae\xa5|\xe1\x8f\x95|\xe1\x8f\x95||" |
531 | "\xea\xae\xa6|\xe1\x8f\x96|\xe1\x8f\x96||" |
532 | "\xea\xae\xa7|\xe1\x8f\x97|\xe1\x8f\x97||" |
533 | "\xea\xae\xa8|\xe1\x8f\x98|\xe1\x8f\x98||" |
534 | "\xea\xae\xa9|\xe1\x8f\x99|\xe1\x8f\x99||" |
535 | "\xea\xae\xaa|\xe1\x8f\x9a|\xe1\x8f\x9a||" |
536 | "\xea\xae\xab|\xe1\x8f\x9b|\xe1\x8f\x9b||" |
537 | "\xea\xae\xac|\xe1\x8f\x9c|\xe1\x8f\x9c||" |
538 | "\xea\xae\xad|\xe1\x8f\x9d|\xe1\x8f\x9d||" |
539 | "\xea\xae\xae|\xe1\x8f\x9e|\xe1\x8f\x9e||" |
540 | "\xea\xae\xaf|\xe1\x8f\x9f|\xe1\x8f\x9f||" |
541 | "\xea\xae\xb0|\xe1\x8f\xa0|\xe1\x8f\xa0||" |
542 | "\xea\xae\xb1|\xe1\x8f\xa1|\xe1\x8f\xa1||" |
543 | "\xea\xae\xb2|\xe1\x8f\xa2|\xe1\x8f\xa2||" |
544 | "\xea\xae\xb3|\xe1\x8f\xa3|\xe1\x8f\xa3||" |
545 | "\xea\xae\xb4|\xe1\x8f\xa4|\xe1\x8f\xa4||" |
546 | "\xea\xae\xb5|\xe1\x8f\xa5|\xe1\x8f\xa5||" |
547 | "\xea\xae\xb6|\xe1\x8f\xa6|\xe1\x8f\xa6||" |
548 | "\xea\xae\xb7|\xe1\x8f\xa7|\xe1\x8f\xa7||" |
549 | "\xea\xae\xb8|\xe1\x8f\xa8|\xe1\x8f\xa8||" |
550 | "\xea\xae\xb9|\xe1\x8f\xa9|\xe1\x8f\xa9||" |
551 | "\xea\xae\xba|\xe1\x8f\xaa|\xe1\x8f\xaa||" |
552 | "\xea\xae\xbb|\xe1\x8f\xab|\xe1\x8f\xab||" |
553 | "\xea\xae\xbc|\xe1\x8f\xac|\xe1\x8f\xac||" |
554 | "\xea\xae\xbd|\xe1\x8f\xad|\xe1\x8f\xad||" |
555 | "\xea\xae\xbe|\xe1\x8f\xae|\xe1\x8f\xae||" |
556 | "\xea\xae\xbf|\xe1\x8f\xaf|\xe1\x8f\xaf||" |
557 | "\xef\xac\x80|ff|FF||" |
558 | "\xef\xac\x81|fi|FI||" |
559 | "\xef\xac\x82|fl|FL||" |
560 | "\xef\xac\x83|ffi|FFI||" |
561 | "\xef\xac\x84|ffl|FFL||" |
562 | "\xef\xac\x85|st|ST||" |
563 | "\xef\xac\x86|st|ST||" |
564 | "\xef\xac\x93|\xd5\xb4\xd5\xb6|\xd5\x84\xd5\x86||" |
565 | "\xef\xac\x94|\xd5\xb4\xd5\xa5|\xd5\x84\xd4\xb5||" |
566 | "\xef\xac\x95|\xd5\xb4\xd5\xab|\xd5\x84\xd4\xbb||" |
567 | "\xef\xac\x96|\xd5\xbe\xd5\xb6|\xd5\x8e\xd5\x86||" |
568 | "\xef\xac\x97|\xd5\xb4\xd5\xad|\xd5\x84\xd4\xbd||" |
569 | |
570 | //--Autogenerated -- end of section automatically generated |
571 | ; |
572 | |
573 | class CaseConverter : public ICaseConverter { |
574 | // Maximum length of a case conversion result is 6 bytes in UTF-8 |
575 | enum { maxConversionLength=6 }; |
576 | struct ConversionString { |
577 | char conversion[maxConversionLength+1]; |
578 | ConversionString() noexcept : conversion{} { |
579 | } |
580 | }; |
581 | // Conversions are initially store in a vector of structs but then decomposed into |
582 | // parallel arrays as that is about 10% faster to search. |
583 | struct CharacterConversion { |
584 | int character; |
585 | ConversionString conversion; |
586 | CharacterConversion() noexcept : character(0) { |
587 | // Empty case: NUL -> "". |
588 | } |
589 | CharacterConversion(int character_, std::string_view conversion_) noexcept : character(character_) { |
590 | assert(conversion_.length() <= maxConversionLength); |
591 | try { |
592 | // This can never fail as std::string_view::copy should only throw |
593 | // std::out_of_range if pos > size() and pos == 0 here |
594 | conversion_.copy(conversion.conversion, conversion_.length()); |
595 | } catch (...) { |
596 | // Ignore any exception |
597 | } |
598 | } |
599 | bool operator<(const CharacterConversion &other) const noexcept { |
600 | return character < other.character; |
601 | } |
602 | }; |
603 | typedef std::vector<CharacterConversion> CharacterToConversion; |
604 | CharacterToConversion characterToConversion; |
605 | // The parallel arrays |
606 | std::vector<int> characters; |
607 | std::vector<ConversionString> conversions; |
608 | |
609 | public: |
610 | CaseConverter() = default; |
611 | // Deleted so CaseConverter objects can not be copied. |
612 | CaseConverter(const CaseConverter &) = delete; |
613 | CaseConverter(CaseConverter &&) = delete; |
614 | CaseConverter &operator=(const CaseConverter &) = delete; |
615 | CaseConverter &operator=(CaseConverter &&) = delete; |
616 | virtual ~CaseConverter() noexcept = default; |
617 | bool Initialised() const noexcept { |
618 | return !characters.empty(); |
619 | } |
620 | void Add(int character, const char *conversion) { |
621 | characterToConversion.emplace_back(character, conversion); |
622 | } |
623 | const char *Find(int character) { |
624 | const std::vector<int>::iterator it = std::lower_bound(characters.begin(), characters.end(), character); |
625 | if (it == characters.end()) |
626 | return nullptr; |
627 | else if (*it == character) |
628 | return conversions[it - characters.begin()].conversion; |
629 | else |
630 | return nullptr; |
631 | } |
632 | size_t CaseConvertString(char *converted, size_t sizeConverted, const char *mixed, size_t lenMixed) override { |
633 | size_t lenConverted = 0; |
634 | size_t mixedPos = 0; |
635 | unsigned char bytes[UTF8MaxBytes + 1]{}; |
636 | while (mixedPos < lenMixed) { |
637 | const unsigned char leadByte = mixed[mixedPos]; |
638 | const char *caseConverted = nullptr; |
639 | size_t lenMixedChar = 1; |
640 | if (UTF8IsAscii(leadByte)) { |
641 | caseConverted = Find(leadByte); |
642 | } else { |
643 | bytes[0] = leadByte; |
644 | const int widthCharBytes = UTF8BytesOfLead[leadByte]; |
645 | for (int b=1; b<widthCharBytes; b++) { |
646 | bytes[b] = (mixedPos+b < lenMixed) ? mixed[mixedPos+b] : 0; |
647 | } |
648 | const int classified = UTF8Classify(bytes, widthCharBytes); |
649 | if (!(classified & UTF8MaskInvalid)) { |
650 | // valid UTF-8 |
651 | lenMixedChar = classified & UTF8MaskWidth; |
652 | const int character = UnicodeFromUTF8(bytes); |
653 | caseConverted = Find(character); |
654 | } |
655 | } |
656 | if (caseConverted) { |
657 | // Character has a conversion so copy that conversion in |
658 | while (*caseConverted) { |
659 | converted[lenConverted++] = *caseConverted++; |
660 | if (lenConverted >= sizeConverted) |
661 | return 0; |
662 | } |
663 | } else { |
664 | // Character has no conversion so copy the input to output |
665 | for (size_t i=0; i<lenMixedChar; i++) { |
666 | converted[lenConverted++] = mixed[mixedPos+i]; |
667 | if (lenConverted >= sizeConverted) |
668 | return 0; |
669 | } |
670 | } |
671 | mixedPos += lenMixedChar; |
672 | } |
673 | return lenConverted; |
674 | } |
675 | void FinishedAdding() { |
676 | std::sort(characterToConversion.begin(), characterToConversion.end()); |
677 | characters.reserve(characterToConversion.size()); |
678 | conversions.reserve(characterToConversion.size()); |
679 | for (const CharacterConversion &chConv : characterToConversion) { |
680 | characters.push_back(chConv.character); |
681 | conversions.push_back(chConv.conversion); |
682 | } |
683 | // Empty the original calculated data completely |
684 | CharacterToConversion().swap(characterToConversion); |
685 | } |
686 | }; |
687 | |
688 | CaseConverter caseConvFold; |
689 | CaseConverter caseConvUp; |
690 | CaseConverter caseConvLow; |
691 | |
692 | void AddSymmetric(CaseConversion conversion, int lower,int upper) { |
693 | char lowerUTF8[UTF8MaxBytes+1]; |
694 | UTF8FromUTF32Character(lower, lowerUTF8); |
695 | char upperUTF8[UTF8MaxBytes+1]; |
696 | UTF8FromUTF32Character(upper, upperUTF8); |
697 | |
698 | switch (conversion) { |
699 | case CaseConversion::fold: |
700 | caseConvFold.Add(upper, lowerUTF8); |
701 | break; |
702 | case CaseConversion::upper: |
703 | caseConvUp.Add(lower, upperUTF8); |
704 | break; |
705 | case CaseConversion::lower: |
706 | caseConvLow.Add(upper, lowerUTF8); |
707 | break; |
708 | } |
709 | } |
710 | |
711 | void SetupConversions(CaseConversion conversion) { |
712 | // First initialize for the symmetric ranges |
713 | for (size_t i=0; i<std::size(symmetricCaseConversionRanges);) { |
714 | const int lower = symmetricCaseConversionRanges[i++]; |
715 | const int upper = symmetricCaseConversionRanges[i++]; |
716 | const int length = symmetricCaseConversionRanges[i++]; |
717 | const int pitch = symmetricCaseConversionRanges[i++]; |
718 | for (int j=0; j<length*pitch; j+=pitch) { |
719 | AddSymmetric(conversion, lower+j, upper+j); |
720 | } |
721 | } |
722 | // Add the symmetric singletons |
723 | for (size_t i=0; i<std::size(symmetricCaseConversions);) { |
724 | const int lower = symmetricCaseConversions[i++]; |
725 | const int upper = symmetricCaseConversions[i++]; |
726 | AddSymmetric(conversion, lower, upper); |
727 | } |
728 | // Add the complex cases |
729 | const char *sComplex = complexCaseConversions; |
730 | while (*sComplex) { |
731 | // Longest ligature is 3 character so 5 for safety |
732 | constexpr size_t lenUTF8 = 5*UTF8MaxBytes+1; |
733 | unsigned char originUTF8[lenUTF8]{}; |
734 | char foldedUTF8[lenUTF8]{}; |
735 | char lowerUTF8[lenUTF8]{}; |
736 | char upperUTF8[lenUTF8]{}; |
737 | size_t i = 0; |
738 | while (*sComplex && *sComplex != '|') { |
739 | originUTF8[i++] = *sComplex; |
740 | sComplex++; |
741 | } |
742 | sComplex++; |
743 | originUTF8[i] = 0; |
744 | i = 0; |
745 | while (*sComplex && *sComplex != '|') { |
746 | foldedUTF8[i++] = *sComplex; |
747 | sComplex++; |
748 | } |
749 | sComplex++; |
750 | foldedUTF8[i] = 0; |
751 | i = 0; |
752 | while (*sComplex && *sComplex != '|') { |
753 | upperUTF8[i++] = *sComplex; |
754 | sComplex++; |
755 | } |
756 | sComplex++; |
757 | upperUTF8[i] = 0; |
758 | i = 0; |
759 | while (*sComplex && *sComplex != '|') { |
760 | lowerUTF8[i++] = *sComplex; |
761 | sComplex++; |
762 | } |
763 | sComplex++; |
764 | lowerUTF8[i] = 0; |
765 | |
766 | const int character = UnicodeFromUTF8(originUTF8); |
767 | |
768 | if (conversion == CaseConversion::fold && foldedUTF8[0]) { |
769 | caseConvFold.Add(character, foldedUTF8); |
770 | } |
771 | |
772 | if (conversion == CaseConversion::upper && upperUTF8[0]) { |
773 | caseConvUp.Add(character, upperUTF8); |
774 | } |
775 | |
776 | if (conversion == CaseConversion::lower && lowerUTF8[0]) { |
777 | caseConvLow.Add(character, lowerUTF8); |
778 | } |
779 | } |
780 | |
781 | switch (conversion) { |
782 | case CaseConversion::fold: |
783 | caseConvFold.FinishedAdding(); |
784 | break; |
785 | case CaseConversion::upper: |
786 | caseConvUp.FinishedAdding(); |
787 | break; |
788 | case CaseConversion::lower: |
789 | caseConvLow.FinishedAdding(); |
790 | break; |
791 | } |
792 | } |
793 | |
794 | CaseConverter *ConverterForConversion(CaseConversion conversion) noexcept { |
795 | switch (conversion) { |
796 | case CaseConversion::fold: |
797 | return &caseConvFold; |
798 | case CaseConversion::upper: |
799 | return &caseConvUp; |
800 | case CaseConversion::lower: |
801 | return &caseConvLow; |
802 | } |
803 | return nullptr; |
804 | } |
805 | |
806 | } |
807 | |
808 | namespace Scintilla::Internal { |
809 | |
810 | ICaseConverter *ConverterFor(CaseConversion conversion) { |
811 | CaseConverter *pCaseConv = ConverterForConversion(conversion); |
812 | if (!pCaseConv->Initialised()) |
813 | SetupConversions(conversion); |
814 | return pCaseConv; |
815 | } |
816 | |
817 | const char *CaseConvert(int character, CaseConversion conversion) { |
818 | CaseConverter *pCaseConv = ConverterForConversion(conversion); |
819 | if (!pCaseConv->Initialised()) |
820 | SetupConversions(conversion); |
821 | return pCaseConv->Find(character); |
822 | } |
823 | |
824 | size_t CaseConvertString(char *converted, size_t sizeConverted, const char *mixed, size_t lenMixed, CaseConversion conversion) { |
825 | CaseConverter *pCaseConv = ConverterForConversion(conversion); |
826 | if (!pCaseConv->Initialised()) |
827 | SetupConversions(conversion); |
828 | return pCaseConv->CaseConvertString(converted, sizeConverted, mixed, lenMixed); |
829 | } |
830 | |
831 | std::string CaseConvertString(const std::string &s, CaseConversion conversion) { |
832 | std::string retMapped(s.length() * maxExpansionCaseConversion, 0); |
833 | const size_t lenMapped = CaseConvertString(&retMapped[0], retMapped.length(), s.c_str(), s.length(), |
834 | conversion); |
835 | retMapped.resize(lenMapped); |
836 | return retMapped; |
837 | } |
838 | |
839 | } |
840 | |