1// Scintilla source code edit control
2// Encoding: UTF-8
3/** @file CaseConvert.cxx
4 ** Case fold characters and convert them to upper or lower case.
5 ** Tables automatically regenerated by scripts/GenerateCaseConvert.py
6 ** Should only be rarely regenerated for new versions of Unicode.
7 **/
8// Copyright 2013 by Neil Hodgson <neilh@scintilla.org>
9// The License.txt file describes the conditions under which this software may be distributed.
10
11#include <cassert>
12#include <cstring>
13
14#include <stdexcept>
15#include <string>
16#include <string_view>
17#include <vector>
18#include <algorithm>
19
20#include "CaseConvert.h"
21#include "UniConversion.h"
22
23using namespace Scintilla::Internal;
24
25namespace {
26 // Use an unnamed namespace to protect the declarations from name conflicts
27
28// Unicode code points are ordered by groups and follow patterns.
29// Most characters (pitch==1) are in ranges for a particular alphabet and their
30// upper case forms are a fixed distance away.
31// Another pattern (pitch==2) is where each lower case letter is preceded by
32// the upper case form. These are also grouped into ranges.
33
34int symmetricCaseConversionRanges[] = {
35//lower, upper, range length, range pitch
36//++Autogenerated -- start of section automatically generated
37//**\(\*\n\)
3897,65,26,1,
39224,192,23,1,
40248,216,7,1,
41257,256,24,2,
42314,313,8,2,
43331,330,23,2,
44462,461,8,2,
45479,478,9,2,
46505,504,20,2,
47547,546,9,2,
48583,582,5,2,
49945,913,17,1,
50963,931,9,1,
51985,984,12,2,
521072,1040,32,1,
531104,1024,16,1,
541121,1120,17,2,
551163,1162,27,2,
561218,1217,7,2,
571233,1232,48,2,
581377,1329,38,1,
594304,7312,43,1,
607681,7680,75,2,
617841,7840,48,2,
627936,7944,8,1,
637952,7960,6,1,
647968,7976,8,1,
657984,7992,8,1,
668000,8008,6,1,
678032,8040,8,1,
688560,8544,16,1,
699424,9398,26,1,
7011312,11264,47,1,
7111393,11392,50,2,
7211520,4256,38,1,
7342561,42560,23,2,
7442625,42624,14,2,
7542787,42786,7,2,
7642803,42802,31,2,
7742879,42878,5,2,
7842903,42902,10,2,
7942933,42932,6,2,
8065345,65313,26,1,
8166600,66560,40,1,
8266776,66736,36,1,
8368800,68736,51,1,
8471872,71840,32,1,
8593792,93760,32,1,
86125218,125184,34,1,
87
88//--Autogenerated -- end of section automatically generated
89};
90
91// Code points that are symmetric but don't fit into a range of similar characters
92// are listed here.
93
94int symmetricCaseConversions[] = {
95//lower, upper
96//++Autogenerated -- start of section automatically generated
97//**1 \(\*\n\)
98255,376,
99307,306,
100309,308,
101311,310,
102378,377,
103380,379,
104382,381,
105384,579,
106387,386,
107389,388,
108392,391,
109396,395,
110402,401,
111405,502,
112409,408,
113410,573,
114414,544,
115417,416,
116419,418,
117421,420,
118424,423,
119429,428,
120432,431,
121436,435,
122438,437,
123441,440,
124445,444,
125447,503,
126454,452,
127457,455,
128460,458,
129477,398,
130499,497,
131501,500,
132572,571,
133575,11390,
134576,11391,
135578,577,
136592,11375,
137593,11373,
138594,11376,
139595,385,
140596,390,
141598,393,
142599,394,
143601,399,
144603,400,
145604,42923,
146608,403,
147609,42924,
148611,404,
149613,42893,
150614,42922,
151616,407,
152617,406,
153618,42926,
154619,11362,
155620,42925,
156623,412,
157625,11374,
158626,413,
159629,415,
160637,11364,
161640,422,
162642,42949,
163643,425,
164647,42929,
165648,430,
166649,580,
167650,433,
168651,434,
169652,581,
170658,439,
171669,42930,
172670,42928,
173881,880,
174883,882,
175887,886,
176891,1021,
177892,1022,
178893,1023,
179940,902,
180941,904,
181942,905,
182943,906,
183972,908,
184973,910,
185974,911,
186983,975,
1871010,1017,
1881011,895,
1891016,1015,
1901019,1018,
1911231,1216,
1924349,7357,
1934350,7358,
1944351,7359,
1957545,42877,
1967549,11363,
1977566,42950,
1988017,8025,
1998019,8027,
2008021,8029,
2018023,8031,
2028048,8122,
2038049,8123,
2048050,8136,
2058051,8137,
2068052,8138,
2078053,8139,
2088054,8154,
2098055,8155,
2108056,8184,
2118057,8185,
2128058,8170,
2138059,8171,
2148060,8186,
2158061,8187,
2168112,8120,
2178113,8121,
2188144,8152,
2198145,8153,
2208160,8168,
2218161,8169,
2228165,8172,
2238526,8498,
2248580,8579,
22511361,11360,
22611365,570,
22711366,574,
22811368,11367,
22911370,11369,
23011372,11371,
23111379,11378,
23211382,11381,
23311500,11499,
23411502,11501,
23511507,11506,
23611559,4295,
23711565,4301,
23842874,42873,
23942876,42875,
24042892,42891,
24142897,42896,
24242899,42898,
24342900,42948,
24442947,42946,
24542952,42951,
24642954,42953,
24742998,42997,
24843859,42931,
249
250//--Autogenerated -- end of section automatically generated
251};
252
253// Characters that have complex case conversions are listed here.
254// This includes cases where more than one character is needed for a conversion,
255// folding is different to lowering, or (as appropriate) upper(lower(x)) != x or
256// lower(upper(x)) != x.
257
258const char *complexCaseConversions =
259// Original | Folded | Upper | Lower |
260//++Autogenerated -- start of section automatically generated
261//**2 \(\*\n\)
262"\xc2\xb5|\xce\xbc|\xce\x9c||"
263"\xc3\x9f|ss|SS||"
264"\xc4\xb0|i\xcc\x87||i\xcc\x87|"
265"\xc4\xb1||I||"
266"\xc5\x89|\xca\xbcn|\xca\xbcN||"
267"\xc5\xbf|s|S||"
268"\xc7\x85|\xc7\x86|\xc7\x84|\xc7\x86|"
269"\xc7\x88|\xc7\x89|\xc7\x87|\xc7\x89|"
270"\xc7\x8b|\xc7\x8c|\xc7\x8a|\xc7\x8c|"
271"\xc7\xb0|j\xcc\x8c|J\xcc\x8c||"
272"\xc7\xb2|\xc7\xb3|\xc7\xb1|\xc7\xb3|"
273"\xcd\x85|\xce\xb9|\xce\x99||"
274"\xce\x90|\xce\xb9\xcc\x88\xcc\x81|\xce\x99\xcc\x88\xcc\x81||"
275"\xce\xb0|\xcf\x85\xcc\x88\xcc\x81|\xce\xa5\xcc\x88\xcc\x81||"
276"\xcf\x82|\xcf\x83|\xce\xa3||"
277"\xcf\x90|\xce\xb2|\xce\x92||"
278"\xcf\x91|\xce\xb8|\xce\x98||"
279"\xcf\x95|\xcf\x86|\xce\xa6||"
280"\xcf\x96|\xcf\x80|\xce\xa0||"
281"\xcf\xb0|\xce\xba|\xce\x9a||"
282"\xcf\xb1|\xcf\x81|\xce\xa1||"
283"\xcf\xb4|\xce\xb8||\xce\xb8|"
284"\xcf\xb5|\xce\xb5|\xce\x95||"
285"\xd6\x87|\xd5\xa5\xd6\x82|\xd4\xb5\xd5\x92||"
286"\xe1\x8e\xa0|||\xea\xad\xb0|"
287"\xe1\x8e\xa1|||\xea\xad\xb1|"
288"\xe1\x8e\xa2|||\xea\xad\xb2|"
289"\xe1\x8e\xa3|||\xea\xad\xb3|"
290"\xe1\x8e\xa4|||\xea\xad\xb4|"
291"\xe1\x8e\xa5|||\xea\xad\xb5|"
292"\xe1\x8e\xa6|||\xea\xad\xb6|"
293"\xe1\x8e\xa7|||\xea\xad\xb7|"
294"\xe1\x8e\xa8|||\xea\xad\xb8|"
295"\xe1\x8e\xa9|||\xea\xad\xb9|"
296"\xe1\x8e\xaa|||\xea\xad\xba|"
297"\xe1\x8e\xab|||\xea\xad\xbb|"
298"\xe1\x8e\xac|||\xea\xad\xbc|"
299"\xe1\x8e\xad|||\xea\xad\xbd|"
300"\xe1\x8e\xae|||\xea\xad\xbe|"
301"\xe1\x8e\xaf|||\xea\xad\xbf|"
302"\xe1\x8e\xb0|||\xea\xae\x80|"
303"\xe1\x8e\xb1|||\xea\xae\x81|"
304"\xe1\x8e\xb2|||\xea\xae\x82|"
305"\xe1\x8e\xb3|||\xea\xae\x83|"
306"\xe1\x8e\xb4|||\xea\xae\x84|"
307"\xe1\x8e\xb5|||\xea\xae\x85|"
308"\xe1\x8e\xb6|||\xea\xae\x86|"
309"\xe1\x8e\xb7|||\xea\xae\x87|"
310"\xe1\x8e\xb8|||\xea\xae\x88|"
311"\xe1\x8e\xb9|||\xea\xae\x89|"
312"\xe1\x8e\xba|||\xea\xae\x8a|"
313"\xe1\x8e\xbb|||\xea\xae\x8b|"
314"\xe1\x8e\xbc|||\xea\xae\x8c|"
315"\xe1\x8e\xbd|||\xea\xae\x8d|"
316"\xe1\x8e\xbe|||\xea\xae\x8e|"
317"\xe1\x8e\xbf|||\xea\xae\x8f|"
318"\xe1\x8f\x80|||\xea\xae\x90|"
319"\xe1\x8f\x81|||\xea\xae\x91|"
320"\xe1\x8f\x82|||\xea\xae\x92|"
321"\xe1\x8f\x83|||\xea\xae\x93|"
322"\xe1\x8f\x84|||\xea\xae\x94|"
323"\xe1\x8f\x85|||\xea\xae\x95|"
324"\xe1\x8f\x86|||\xea\xae\x96|"
325"\xe1\x8f\x87|||\xea\xae\x97|"
326"\xe1\x8f\x88|||\xea\xae\x98|"
327"\xe1\x8f\x89|||\xea\xae\x99|"
328"\xe1\x8f\x8a|||\xea\xae\x9a|"
329"\xe1\x8f\x8b|||\xea\xae\x9b|"
330"\xe1\x8f\x8c|||\xea\xae\x9c|"
331"\xe1\x8f\x8d|||\xea\xae\x9d|"
332"\xe1\x8f\x8e|||\xea\xae\x9e|"
333"\xe1\x8f\x8f|||\xea\xae\x9f|"
334"\xe1\x8f\x90|||\xea\xae\xa0|"
335"\xe1\x8f\x91|||\xea\xae\xa1|"
336"\xe1\x8f\x92|||\xea\xae\xa2|"
337"\xe1\x8f\x93|||\xea\xae\xa3|"
338"\xe1\x8f\x94|||\xea\xae\xa4|"
339"\xe1\x8f\x95|||\xea\xae\xa5|"
340"\xe1\x8f\x96|||\xea\xae\xa6|"
341"\xe1\x8f\x97|||\xea\xae\xa7|"
342"\xe1\x8f\x98|||\xea\xae\xa8|"
343"\xe1\x8f\x99|||\xea\xae\xa9|"
344"\xe1\x8f\x9a|||\xea\xae\xaa|"
345"\xe1\x8f\x9b|||\xea\xae\xab|"
346"\xe1\x8f\x9c|||\xea\xae\xac|"
347"\xe1\x8f\x9d|||\xea\xae\xad|"
348"\xe1\x8f\x9e|||\xea\xae\xae|"
349"\xe1\x8f\x9f|||\xea\xae\xaf|"
350"\xe1\x8f\xa0|||\xea\xae\xb0|"
351"\xe1\x8f\xa1|||\xea\xae\xb1|"
352"\xe1\x8f\xa2|||\xea\xae\xb2|"
353"\xe1\x8f\xa3|||\xea\xae\xb3|"
354"\xe1\x8f\xa4|||\xea\xae\xb4|"
355"\xe1\x8f\xa5|||\xea\xae\xb5|"
356"\xe1\x8f\xa6|||\xea\xae\xb6|"
357"\xe1\x8f\xa7|||\xea\xae\xb7|"
358"\xe1\x8f\xa8|||\xea\xae\xb8|"
359"\xe1\x8f\xa9|||\xea\xae\xb9|"
360"\xe1\x8f\xaa|||\xea\xae\xba|"
361"\xe1\x8f\xab|||\xea\xae\xbb|"
362"\xe1\x8f\xac|||\xea\xae\xbc|"
363"\xe1\x8f\xad|||\xea\xae\xbd|"
364"\xe1\x8f\xae|||\xea\xae\xbe|"
365"\xe1\x8f\xaf|||\xea\xae\xbf|"
366"\xe1\x8f\xb0|||\xe1\x8f\xb8|"
367"\xe1\x8f\xb1|||\xe1\x8f\xb9|"
368"\xe1\x8f\xb2|||\xe1\x8f\xba|"
369"\xe1\x8f\xb3|||\xe1\x8f\xbb|"
370"\xe1\x8f\xb4|||\xe1\x8f\xbc|"
371"\xe1\x8f\xb5|||\xe1\x8f\xbd|"
372"\xe1\x8f\xb8|\xe1\x8f\xb0|\xe1\x8f\xb0||"
373"\xe1\x8f\xb9|\xe1\x8f\xb1|\xe1\x8f\xb1||"
374"\xe1\x8f\xba|\xe1\x8f\xb2|\xe1\x8f\xb2||"
375"\xe1\x8f\xbb|\xe1\x8f\xb3|\xe1\x8f\xb3||"
376"\xe1\x8f\xbc|\xe1\x8f\xb4|\xe1\x8f\xb4||"
377"\xe1\x8f\xbd|\xe1\x8f\xb5|\xe1\x8f\xb5||"
378"\xe1\xb2\x80|\xd0\xb2|\xd0\x92||"
379"\xe1\xb2\x81|\xd0\xb4|\xd0\x94||"
380"\xe1\xb2\x82|\xd0\xbe|\xd0\x9e||"
381"\xe1\xb2\x83|\xd1\x81|\xd0\xa1||"
382"\xe1\xb2\x84|\xd1\x82|\xd0\xa2||"
383"\xe1\xb2\x85|\xd1\x82|\xd0\xa2||"
384"\xe1\xb2\x86|\xd1\x8a|\xd0\xaa||"
385"\xe1\xb2\x87|\xd1\xa3|\xd1\xa2||"
386"\xe1\xb2\x88|\xea\x99\x8b|\xea\x99\x8a||"
387"\xe1\xba\x96|h\xcc\xb1|H\xcc\xb1||"
388"\xe1\xba\x97|t\xcc\x88|T\xcc\x88||"
389"\xe1\xba\x98|w\xcc\x8a|W\xcc\x8a||"
390"\xe1\xba\x99|y\xcc\x8a|Y\xcc\x8a||"
391"\xe1\xba\x9a|a\xca\xbe|A\xca\xbe||"
392"\xe1\xba\x9b|\xe1\xb9\xa1|\xe1\xb9\xa0||"
393"\xe1\xba\x9e|ss||\xc3\x9f|"
394"\xe1\xbd\x90|\xcf\x85\xcc\x93|\xce\xa5\xcc\x93||"
395"\xe1\xbd\x92|\xcf\x85\xcc\x93\xcc\x80|\xce\xa5\xcc\x93\xcc\x80||"
396"\xe1\xbd\x94|\xcf\x85\xcc\x93\xcc\x81|\xce\xa5\xcc\x93\xcc\x81||"
397"\xe1\xbd\x96|\xcf\x85\xcc\x93\xcd\x82|\xce\xa5\xcc\x93\xcd\x82||"
398"\xe1\xbe\x80|\xe1\xbc\x80\xce\xb9|\xe1\xbc\x88\xce\x99||"
399"\xe1\xbe\x81|\xe1\xbc\x81\xce\xb9|\xe1\xbc\x89\xce\x99||"
400"\xe1\xbe\x82|\xe1\xbc\x82\xce\xb9|\xe1\xbc\x8a\xce\x99||"
401"\xe1\xbe\x83|\xe1\xbc\x83\xce\xb9|\xe1\xbc\x8b\xce\x99||"
402"\xe1\xbe\x84|\xe1\xbc\x84\xce\xb9|\xe1\xbc\x8c\xce\x99||"
403"\xe1\xbe\x85|\xe1\xbc\x85\xce\xb9|\xe1\xbc\x8d\xce\x99||"
404"\xe1\xbe\x86|\xe1\xbc\x86\xce\xb9|\xe1\xbc\x8e\xce\x99||"
405"\xe1\xbe\x87|\xe1\xbc\x87\xce\xb9|\xe1\xbc\x8f\xce\x99||"
406"\xe1\xbe\x88|\xe1\xbc\x80\xce\xb9|\xe1\xbc\x88\xce\x99|\xe1\xbe\x80|"
407"\xe1\xbe\x89|\xe1\xbc\x81\xce\xb9|\xe1\xbc\x89\xce\x99|\xe1\xbe\x81|"
408"\xe1\xbe\x8a|\xe1\xbc\x82\xce\xb9|\xe1\xbc\x8a\xce\x99|\xe1\xbe\x82|"
409"\xe1\xbe\x8b|\xe1\xbc\x83\xce\xb9|\xe1\xbc\x8b\xce\x99|\xe1\xbe\x83|"
410"\xe1\xbe\x8c|\xe1\xbc\x84\xce\xb9|\xe1\xbc\x8c\xce\x99|\xe1\xbe\x84|"
411"\xe1\xbe\x8d|\xe1\xbc\x85\xce\xb9|\xe1\xbc\x8d\xce\x99|\xe1\xbe\x85|"
412"\xe1\xbe\x8e|\xe1\xbc\x86\xce\xb9|\xe1\xbc\x8e\xce\x99|\xe1\xbe\x86|"
413"\xe1\xbe\x8f|\xe1\xbc\x87\xce\xb9|\xe1\xbc\x8f\xce\x99|\xe1\xbe\x87|"
414"\xe1\xbe\x90|\xe1\xbc\xa0\xce\xb9|\xe1\xbc\xa8\xce\x99||"
415"\xe1\xbe\x91|\xe1\xbc\xa1\xce\xb9|\xe1\xbc\xa9\xce\x99||"
416"\xe1\xbe\x92|\xe1\xbc\xa2\xce\xb9|\xe1\xbc\xaa\xce\x99||"
417"\xe1\xbe\x93|\xe1\xbc\xa3\xce\xb9|\xe1\xbc\xab\xce\x99||"
418"\xe1\xbe\x94|\xe1\xbc\xa4\xce\xb9|\xe1\xbc\xac\xce\x99||"
419"\xe1\xbe\x95|\xe1\xbc\xa5\xce\xb9|\xe1\xbc\xad\xce\x99||"
420"\xe1\xbe\x96|\xe1\xbc\xa6\xce\xb9|\xe1\xbc\xae\xce\x99||"
421"\xe1\xbe\x97|\xe1\xbc\xa7\xce\xb9|\xe1\xbc\xaf\xce\x99||"
422"\xe1\xbe\x98|\xe1\xbc\xa0\xce\xb9|\xe1\xbc\xa8\xce\x99|\xe1\xbe\x90|"
423"\xe1\xbe\x99|\xe1\xbc\xa1\xce\xb9|\xe1\xbc\xa9\xce\x99|\xe1\xbe\x91|"
424"\xe1\xbe\x9a|\xe1\xbc\xa2\xce\xb9|\xe1\xbc\xaa\xce\x99|\xe1\xbe\x92|"
425"\xe1\xbe\x9b|\xe1\xbc\xa3\xce\xb9|\xe1\xbc\xab\xce\x99|\xe1\xbe\x93|"
426"\xe1\xbe\x9c|\xe1\xbc\xa4\xce\xb9|\xe1\xbc\xac\xce\x99|\xe1\xbe\x94|"
427"\xe1\xbe\x9d|\xe1\xbc\xa5\xce\xb9|\xe1\xbc\xad\xce\x99|\xe1\xbe\x95|"
428"\xe1\xbe\x9e|\xe1\xbc\xa6\xce\xb9|\xe1\xbc\xae\xce\x99|\xe1\xbe\x96|"
429"\xe1\xbe\x9f|\xe1\xbc\xa7\xce\xb9|\xe1\xbc\xaf\xce\x99|\xe1\xbe\x97|"
430"\xe1\xbe\xa0|\xe1\xbd\xa0\xce\xb9|\xe1\xbd\xa8\xce\x99||"
431"\xe1\xbe\xa1|\xe1\xbd\xa1\xce\xb9|\xe1\xbd\xa9\xce\x99||"
432"\xe1\xbe\xa2|\xe1\xbd\xa2\xce\xb9|\xe1\xbd\xaa\xce\x99||"
433"\xe1\xbe\xa3|\xe1\xbd\xa3\xce\xb9|\xe1\xbd\xab\xce\x99||"
434"\xe1\xbe\xa4|\xe1\xbd\xa4\xce\xb9|\xe1\xbd\xac\xce\x99||"
435"\xe1\xbe\xa5|\xe1\xbd\xa5\xce\xb9|\xe1\xbd\xad\xce\x99||"
436"\xe1\xbe\xa6|\xe1\xbd\xa6\xce\xb9|\xe1\xbd\xae\xce\x99||"
437"\xe1\xbe\xa7|\xe1\xbd\xa7\xce\xb9|\xe1\xbd\xaf\xce\x99||"
438"\xe1\xbe\xa8|\xe1\xbd\xa0\xce\xb9|\xe1\xbd\xa8\xce\x99|\xe1\xbe\xa0|"
439"\xe1\xbe\xa9|\xe1\xbd\xa1\xce\xb9|\xe1\xbd\xa9\xce\x99|\xe1\xbe\xa1|"
440"\xe1\xbe\xaa|\xe1\xbd\xa2\xce\xb9|\xe1\xbd\xaa\xce\x99|\xe1\xbe\xa2|"
441"\xe1\xbe\xab|\xe1\xbd\xa3\xce\xb9|\xe1\xbd\xab\xce\x99|\xe1\xbe\xa3|"
442"\xe1\xbe\xac|\xe1\xbd\xa4\xce\xb9|\xe1\xbd\xac\xce\x99|\xe1\xbe\xa4|"
443"\xe1\xbe\xad|\xe1\xbd\xa5\xce\xb9|\xe1\xbd\xad\xce\x99|\xe1\xbe\xa5|"
444"\xe1\xbe\xae|\xe1\xbd\xa6\xce\xb9|\xe1\xbd\xae\xce\x99|\xe1\xbe\xa6|"
445"\xe1\xbe\xaf|\xe1\xbd\xa7\xce\xb9|\xe1\xbd\xaf\xce\x99|\xe1\xbe\xa7|"
446"\xe1\xbe\xb2|\xe1\xbd\xb0\xce\xb9|\xe1\xbe\xba\xce\x99||"
447"\xe1\xbe\xb3|\xce\xb1\xce\xb9|\xce\x91\xce\x99||"
448"\xe1\xbe\xb4|\xce\xac\xce\xb9|\xce\x86\xce\x99||"
449"\xe1\xbe\xb6|\xce\xb1\xcd\x82|\xce\x91\xcd\x82||"
450"\xe1\xbe\xb7|\xce\xb1\xcd\x82\xce\xb9|\xce\x91\xcd\x82\xce\x99||"
451"\xe1\xbe\xbc|\xce\xb1\xce\xb9|\xce\x91\xce\x99|\xe1\xbe\xb3|"
452"\xe1\xbe\xbe|\xce\xb9|\xce\x99||"
453"\xe1\xbf\x82|\xe1\xbd\xb4\xce\xb9|\xe1\xbf\x8a\xce\x99||"
454"\xe1\xbf\x83|\xce\xb7\xce\xb9|\xce\x97\xce\x99||"
455"\xe1\xbf\x84|\xce\xae\xce\xb9|\xce\x89\xce\x99||"
456"\xe1\xbf\x86|\xce\xb7\xcd\x82|\xce\x97\xcd\x82||"
457"\xe1\xbf\x87|\xce\xb7\xcd\x82\xce\xb9|\xce\x97\xcd\x82\xce\x99||"
458"\xe1\xbf\x8c|\xce\xb7\xce\xb9|\xce\x97\xce\x99|\xe1\xbf\x83|"
459"\xe1\xbf\x92|\xce\xb9\xcc\x88\xcc\x80|\xce\x99\xcc\x88\xcc\x80||"
460"\xe1\xbf\x93|\xce\xb9\xcc\x88\xcc\x81|\xce\x99\xcc\x88\xcc\x81||"
461"\xe1\xbf\x96|\xce\xb9\xcd\x82|\xce\x99\xcd\x82||"
462"\xe1\xbf\x97|\xce\xb9\xcc\x88\xcd\x82|\xce\x99\xcc\x88\xcd\x82||"
463"\xe1\xbf\xa2|\xcf\x85\xcc\x88\xcc\x80|\xce\xa5\xcc\x88\xcc\x80||"
464"\xe1\xbf\xa3|\xcf\x85\xcc\x88\xcc\x81|\xce\xa5\xcc\x88\xcc\x81||"
465"\xe1\xbf\xa4|\xcf\x81\xcc\x93|\xce\xa1\xcc\x93||"
466"\xe1\xbf\xa6|\xcf\x85\xcd\x82|\xce\xa5\xcd\x82||"
467"\xe1\xbf\xa7|\xcf\x85\xcc\x88\xcd\x82|\xce\xa5\xcc\x88\xcd\x82||"
468"\xe1\xbf\xb2|\xe1\xbd\xbc\xce\xb9|\xe1\xbf\xba\xce\x99||"
469"\xe1\xbf\xb3|\xcf\x89\xce\xb9|\xce\xa9\xce\x99||"
470"\xe1\xbf\xb4|\xcf\x8e\xce\xb9|\xce\x8f\xce\x99||"
471"\xe1\xbf\xb6|\xcf\x89\xcd\x82|\xce\xa9\xcd\x82||"
472"\xe1\xbf\xb7|\xcf\x89\xcd\x82\xce\xb9|\xce\xa9\xcd\x82\xce\x99||"
473"\xe1\xbf\xbc|\xcf\x89\xce\xb9|\xce\xa9\xce\x99|\xe1\xbf\xb3|"
474"\xe2\x84\xa6|\xcf\x89||\xcf\x89|"
475"\xe2\x84\xaa|k||k|"
476"\xe2\x84\xab|\xc3\xa5||\xc3\xa5|"
477"\xea\xad\xb0|\xe1\x8e\xa0|\xe1\x8e\xa0||"
478"\xea\xad\xb1|\xe1\x8e\xa1|\xe1\x8e\xa1||"
479"\xea\xad\xb2|\xe1\x8e\xa2|\xe1\x8e\xa2||"
480"\xea\xad\xb3|\xe1\x8e\xa3|\xe1\x8e\xa3||"
481"\xea\xad\xb4|\xe1\x8e\xa4|\xe1\x8e\xa4||"
482"\xea\xad\xb5|\xe1\x8e\xa5|\xe1\x8e\xa5||"
483"\xea\xad\xb6|\xe1\x8e\xa6|\xe1\x8e\xa6||"
484"\xea\xad\xb7|\xe1\x8e\xa7|\xe1\x8e\xa7||"
485"\xea\xad\xb8|\xe1\x8e\xa8|\xe1\x8e\xa8||"
486"\xea\xad\xb9|\xe1\x8e\xa9|\xe1\x8e\xa9||"
487"\xea\xad\xba|\xe1\x8e\xaa|\xe1\x8e\xaa||"
488"\xea\xad\xbb|\xe1\x8e\xab|\xe1\x8e\xab||"
489"\xea\xad\xbc|\xe1\x8e\xac|\xe1\x8e\xac||"
490"\xea\xad\xbd|\xe1\x8e\xad|\xe1\x8e\xad||"
491"\xea\xad\xbe|\xe1\x8e\xae|\xe1\x8e\xae||"
492"\xea\xad\xbf|\xe1\x8e\xaf|\xe1\x8e\xaf||"
493"\xea\xae\x80|\xe1\x8e\xb0|\xe1\x8e\xb0||"
494"\xea\xae\x81|\xe1\x8e\xb1|\xe1\x8e\xb1||"
495"\xea\xae\x82|\xe1\x8e\xb2|\xe1\x8e\xb2||"
496"\xea\xae\x83|\xe1\x8e\xb3|\xe1\x8e\xb3||"
497"\xea\xae\x84|\xe1\x8e\xb4|\xe1\x8e\xb4||"
498"\xea\xae\x85|\xe1\x8e\xb5|\xe1\x8e\xb5||"
499"\xea\xae\x86|\xe1\x8e\xb6|\xe1\x8e\xb6||"
500"\xea\xae\x87|\xe1\x8e\xb7|\xe1\x8e\xb7||"
501"\xea\xae\x88|\xe1\x8e\xb8|\xe1\x8e\xb8||"
502"\xea\xae\x89|\xe1\x8e\xb9|\xe1\x8e\xb9||"
503"\xea\xae\x8a|\xe1\x8e\xba|\xe1\x8e\xba||"
504"\xea\xae\x8b|\xe1\x8e\xbb|\xe1\x8e\xbb||"
505"\xea\xae\x8c|\xe1\x8e\xbc|\xe1\x8e\xbc||"
506"\xea\xae\x8d|\xe1\x8e\xbd|\xe1\x8e\xbd||"
507"\xea\xae\x8e|\xe1\x8e\xbe|\xe1\x8e\xbe||"
508"\xea\xae\x8f|\xe1\x8e\xbf|\xe1\x8e\xbf||"
509"\xea\xae\x90|\xe1\x8f\x80|\xe1\x8f\x80||"
510"\xea\xae\x91|\xe1\x8f\x81|\xe1\x8f\x81||"
511"\xea\xae\x92|\xe1\x8f\x82|\xe1\x8f\x82||"
512"\xea\xae\x93|\xe1\x8f\x83|\xe1\x8f\x83||"
513"\xea\xae\x94|\xe1\x8f\x84|\xe1\x8f\x84||"
514"\xea\xae\x95|\xe1\x8f\x85|\xe1\x8f\x85||"
515"\xea\xae\x96|\xe1\x8f\x86|\xe1\x8f\x86||"
516"\xea\xae\x97|\xe1\x8f\x87|\xe1\x8f\x87||"
517"\xea\xae\x98|\xe1\x8f\x88|\xe1\x8f\x88||"
518"\xea\xae\x99|\xe1\x8f\x89|\xe1\x8f\x89||"
519"\xea\xae\x9a|\xe1\x8f\x8a|\xe1\x8f\x8a||"
520"\xea\xae\x9b|\xe1\x8f\x8b|\xe1\x8f\x8b||"
521"\xea\xae\x9c|\xe1\x8f\x8c|\xe1\x8f\x8c||"
522"\xea\xae\x9d|\xe1\x8f\x8d|\xe1\x8f\x8d||"
523"\xea\xae\x9e|\xe1\x8f\x8e|\xe1\x8f\x8e||"
524"\xea\xae\x9f|\xe1\x8f\x8f|\xe1\x8f\x8f||"
525"\xea\xae\xa0|\xe1\x8f\x90|\xe1\x8f\x90||"
526"\xea\xae\xa1|\xe1\x8f\x91|\xe1\x8f\x91||"
527"\xea\xae\xa2|\xe1\x8f\x92|\xe1\x8f\x92||"
528"\xea\xae\xa3|\xe1\x8f\x93|\xe1\x8f\x93||"
529"\xea\xae\xa4|\xe1\x8f\x94|\xe1\x8f\x94||"
530"\xea\xae\xa5|\xe1\x8f\x95|\xe1\x8f\x95||"
531"\xea\xae\xa6|\xe1\x8f\x96|\xe1\x8f\x96||"
532"\xea\xae\xa7|\xe1\x8f\x97|\xe1\x8f\x97||"
533"\xea\xae\xa8|\xe1\x8f\x98|\xe1\x8f\x98||"
534"\xea\xae\xa9|\xe1\x8f\x99|\xe1\x8f\x99||"
535"\xea\xae\xaa|\xe1\x8f\x9a|\xe1\x8f\x9a||"
536"\xea\xae\xab|\xe1\x8f\x9b|\xe1\x8f\x9b||"
537"\xea\xae\xac|\xe1\x8f\x9c|\xe1\x8f\x9c||"
538"\xea\xae\xad|\xe1\x8f\x9d|\xe1\x8f\x9d||"
539"\xea\xae\xae|\xe1\x8f\x9e|\xe1\x8f\x9e||"
540"\xea\xae\xaf|\xe1\x8f\x9f|\xe1\x8f\x9f||"
541"\xea\xae\xb0|\xe1\x8f\xa0|\xe1\x8f\xa0||"
542"\xea\xae\xb1|\xe1\x8f\xa1|\xe1\x8f\xa1||"
543"\xea\xae\xb2|\xe1\x8f\xa2|\xe1\x8f\xa2||"
544"\xea\xae\xb3|\xe1\x8f\xa3|\xe1\x8f\xa3||"
545"\xea\xae\xb4|\xe1\x8f\xa4|\xe1\x8f\xa4||"
546"\xea\xae\xb5|\xe1\x8f\xa5|\xe1\x8f\xa5||"
547"\xea\xae\xb6|\xe1\x8f\xa6|\xe1\x8f\xa6||"
548"\xea\xae\xb7|\xe1\x8f\xa7|\xe1\x8f\xa7||"
549"\xea\xae\xb8|\xe1\x8f\xa8|\xe1\x8f\xa8||"
550"\xea\xae\xb9|\xe1\x8f\xa9|\xe1\x8f\xa9||"
551"\xea\xae\xba|\xe1\x8f\xaa|\xe1\x8f\xaa||"
552"\xea\xae\xbb|\xe1\x8f\xab|\xe1\x8f\xab||"
553"\xea\xae\xbc|\xe1\x8f\xac|\xe1\x8f\xac||"
554"\xea\xae\xbd|\xe1\x8f\xad|\xe1\x8f\xad||"
555"\xea\xae\xbe|\xe1\x8f\xae|\xe1\x8f\xae||"
556"\xea\xae\xbf|\xe1\x8f\xaf|\xe1\x8f\xaf||"
557"\xef\xac\x80|ff|FF||"
558"\xef\xac\x81|fi|FI||"
559"\xef\xac\x82|fl|FL||"
560"\xef\xac\x83|ffi|FFI||"
561"\xef\xac\x84|ffl|FFL||"
562"\xef\xac\x85|st|ST||"
563"\xef\xac\x86|st|ST||"
564"\xef\xac\x93|\xd5\xb4\xd5\xb6|\xd5\x84\xd5\x86||"
565"\xef\xac\x94|\xd5\xb4\xd5\xa5|\xd5\x84\xd4\xb5||"
566"\xef\xac\x95|\xd5\xb4\xd5\xab|\xd5\x84\xd4\xbb||"
567"\xef\xac\x96|\xd5\xbe\xd5\xb6|\xd5\x8e\xd5\x86||"
568"\xef\xac\x97|\xd5\xb4\xd5\xad|\xd5\x84\xd4\xbd||"
569
570//--Autogenerated -- end of section automatically generated
571;
572
573class CaseConverter : public ICaseConverter {
574 // Maximum length of a case conversion result is 6 bytes in UTF-8
575 enum { maxConversionLength=6 };
576 struct ConversionString {
577 char conversion[maxConversionLength+1];
578 ConversionString() noexcept : conversion{} {
579 }
580 };
581 // Conversions are initially store in a vector of structs but then decomposed into
582 // parallel arrays as that is about 10% faster to search.
583 struct CharacterConversion {
584 int character;
585 ConversionString conversion;
586 CharacterConversion() noexcept : character(0) {
587 // Empty case: NUL -> "".
588 }
589 CharacterConversion(int character_, std::string_view conversion_) noexcept : character(character_) {
590 assert(conversion_.length() <= maxConversionLength);
591 try {
592 // This can never fail as std::string_view::copy should only throw
593 // std::out_of_range if pos > size() and pos == 0 here
594 conversion_.copy(conversion.conversion, conversion_.length());
595 } catch (...) {
596 // Ignore any exception
597 }
598 }
599 bool operator<(const CharacterConversion &other) const noexcept {
600 return character < other.character;
601 }
602 };
603 typedef std::vector<CharacterConversion> CharacterToConversion;
604 CharacterToConversion characterToConversion;
605 // The parallel arrays
606 std::vector<int> characters;
607 std::vector<ConversionString> conversions;
608
609public:
610 CaseConverter() = default;
611 // Deleted so CaseConverter objects can not be copied.
612 CaseConverter(const CaseConverter &) = delete;
613 CaseConverter(CaseConverter &&) = delete;
614 CaseConverter &operator=(const CaseConverter &) = delete;
615 CaseConverter &operator=(CaseConverter &&) = delete;
616 virtual ~CaseConverter() noexcept = default;
617 bool Initialised() const noexcept {
618 return !characters.empty();
619 }
620 void Add(int character, const char *conversion) {
621 characterToConversion.emplace_back(character, conversion);
622 }
623 const char *Find(int character) {
624 const std::vector<int>::iterator it = std::lower_bound(characters.begin(), characters.end(), character);
625 if (it == characters.end())
626 return nullptr;
627 else if (*it == character)
628 return conversions[it - characters.begin()].conversion;
629 else
630 return nullptr;
631 }
632 size_t CaseConvertString(char *converted, size_t sizeConverted, const char *mixed, size_t lenMixed) override {
633 size_t lenConverted = 0;
634 size_t mixedPos = 0;
635 unsigned char bytes[UTF8MaxBytes + 1]{};
636 while (mixedPos < lenMixed) {
637 const unsigned char leadByte = mixed[mixedPos];
638 const char *caseConverted = nullptr;
639 size_t lenMixedChar = 1;
640 if (UTF8IsAscii(leadByte)) {
641 caseConverted = Find(leadByte);
642 } else {
643 bytes[0] = leadByte;
644 const int widthCharBytes = UTF8BytesOfLead[leadByte];
645 for (int b=1; b<widthCharBytes; b++) {
646 bytes[b] = (mixedPos+b < lenMixed) ? mixed[mixedPos+b] : 0;
647 }
648 const int classified = UTF8Classify(bytes, widthCharBytes);
649 if (!(classified & UTF8MaskInvalid)) {
650 // valid UTF-8
651 lenMixedChar = classified & UTF8MaskWidth;
652 const int character = UnicodeFromUTF8(bytes);
653 caseConverted = Find(character);
654 }
655 }
656 if (caseConverted) {
657 // Character has a conversion so copy that conversion in
658 while (*caseConverted) {
659 converted[lenConverted++] = *caseConverted++;
660 if (lenConverted >= sizeConverted)
661 return 0;
662 }
663 } else {
664 // Character has no conversion so copy the input to output
665 for (size_t i=0; i<lenMixedChar; i++) {
666 converted[lenConverted++] = mixed[mixedPos+i];
667 if (lenConverted >= sizeConverted)
668 return 0;
669 }
670 }
671 mixedPos += lenMixedChar;
672 }
673 return lenConverted;
674 }
675 void FinishedAdding() {
676 std::sort(characterToConversion.begin(), characterToConversion.end());
677 characters.reserve(characterToConversion.size());
678 conversions.reserve(characterToConversion.size());
679 for (const CharacterConversion &chConv : characterToConversion) {
680 characters.push_back(chConv.character);
681 conversions.push_back(chConv.conversion);
682 }
683 // Empty the original calculated data completely
684 CharacterToConversion().swap(characterToConversion);
685 }
686};
687
688CaseConverter caseConvFold;
689CaseConverter caseConvUp;
690CaseConverter caseConvLow;
691
692void AddSymmetric(CaseConversion conversion, int lower,int upper) {
693 char lowerUTF8[UTF8MaxBytes+1];
694 UTF8FromUTF32Character(lower, lowerUTF8);
695 char upperUTF8[UTF8MaxBytes+1];
696 UTF8FromUTF32Character(upper, upperUTF8);
697
698 switch (conversion) {
699 case CaseConversion::fold:
700 caseConvFold.Add(upper, lowerUTF8);
701 break;
702 case CaseConversion::upper:
703 caseConvUp.Add(lower, upperUTF8);
704 break;
705 case CaseConversion::lower:
706 caseConvLow.Add(upper, lowerUTF8);
707 break;
708 }
709}
710
711void SetupConversions(CaseConversion conversion) {
712 // First initialize for the symmetric ranges
713 for (size_t i=0; i<std::size(symmetricCaseConversionRanges);) {
714 const int lower = symmetricCaseConversionRanges[i++];
715 const int upper = symmetricCaseConversionRanges[i++];
716 const int length = symmetricCaseConversionRanges[i++];
717 const int pitch = symmetricCaseConversionRanges[i++];
718 for (int j=0; j<length*pitch; j+=pitch) {
719 AddSymmetric(conversion, lower+j, upper+j);
720 }
721 }
722 // Add the symmetric singletons
723 for (size_t i=0; i<std::size(symmetricCaseConversions);) {
724 const int lower = symmetricCaseConversions[i++];
725 const int upper = symmetricCaseConversions[i++];
726 AddSymmetric(conversion, lower, upper);
727 }
728 // Add the complex cases
729 const char *sComplex = complexCaseConversions;
730 while (*sComplex) {
731 // Longest ligature is 3 character so 5 for safety
732 constexpr size_t lenUTF8 = 5*UTF8MaxBytes+1;
733 unsigned char originUTF8[lenUTF8]{};
734 char foldedUTF8[lenUTF8]{};
735 char lowerUTF8[lenUTF8]{};
736 char upperUTF8[lenUTF8]{};
737 size_t i = 0;
738 while (*sComplex && *sComplex != '|') {
739 originUTF8[i++] = *sComplex;
740 sComplex++;
741 }
742 sComplex++;
743 originUTF8[i] = 0;
744 i = 0;
745 while (*sComplex && *sComplex != '|') {
746 foldedUTF8[i++] = *sComplex;
747 sComplex++;
748 }
749 sComplex++;
750 foldedUTF8[i] = 0;
751 i = 0;
752 while (*sComplex && *sComplex != '|') {
753 upperUTF8[i++] = *sComplex;
754 sComplex++;
755 }
756 sComplex++;
757 upperUTF8[i] = 0;
758 i = 0;
759 while (*sComplex && *sComplex != '|') {
760 lowerUTF8[i++] = *sComplex;
761 sComplex++;
762 }
763 sComplex++;
764 lowerUTF8[i] = 0;
765
766 const int character = UnicodeFromUTF8(originUTF8);
767
768 if (conversion == CaseConversion::fold && foldedUTF8[0]) {
769 caseConvFold.Add(character, foldedUTF8);
770 }
771
772 if (conversion == CaseConversion::upper && upperUTF8[0]) {
773 caseConvUp.Add(character, upperUTF8);
774 }
775
776 if (conversion == CaseConversion::lower && lowerUTF8[0]) {
777 caseConvLow.Add(character, lowerUTF8);
778 }
779 }
780
781 switch (conversion) {
782 case CaseConversion::fold:
783 caseConvFold.FinishedAdding();
784 break;
785 case CaseConversion::upper:
786 caseConvUp.FinishedAdding();
787 break;
788 case CaseConversion::lower:
789 caseConvLow.FinishedAdding();
790 break;
791 }
792}
793
794CaseConverter *ConverterForConversion(CaseConversion conversion) noexcept {
795 switch (conversion) {
796 case CaseConversion::fold:
797 return &caseConvFold;
798 case CaseConversion::upper:
799 return &caseConvUp;
800 case CaseConversion::lower:
801 return &caseConvLow;
802 }
803 return nullptr;
804}
805
806}
807
808namespace Scintilla::Internal {
809
810ICaseConverter *ConverterFor(CaseConversion conversion) {
811 CaseConverter *pCaseConv = ConverterForConversion(conversion);
812 if (!pCaseConv->Initialised())
813 SetupConversions(conversion);
814 return pCaseConv;
815}
816
817const char *CaseConvert(int character, CaseConversion conversion) {
818 CaseConverter *pCaseConv = ConverterForConversion(conversion);
819 if (!pCaseConv->Initialised())
820 SetupConversions(conversion);
821 return pCaseConv->Find(character);
822}
823
824size_t CaseConvertString(char *converted, size_t sizeConverted, const char *mixed, size_t lenMixed, CaseConversion conversion) {
825 CaseConverter *pCaseConv = ConverterForConversion(conversion);
826 if (!pCaseConv->Initialised())
827 SetupConversions(conversion);
828 return pCaseConv->CaseConvertString(converted, sizeConverted, mixed, lenMixed);
829}
830
831std::string CaseConvertString(const std::string &s, CaseConversion conversion) {
832 std::string retMapped(s.length() * maxExpansionCaseConversion, 0);
833 const size_t lenMapped = CaseConvertString(&retMapped[0], retMapped.length(), s.c_str(), s.length(),
834 conversion);
835 retMapped.resize(lenMapped);
836 return retMapped;
837}
838
839}
840