1 | // Scintilla source code edit control |
2 | /** @file CharacterCategoryMap.h |
3 | ** Returns the Unicode general category of a character. |
4 | ** Similar code to Lexilla's lexilla/lexlib/CharacterCategory.h but renamed |
5 | ** to avoid problems with builds that statically include both Scintilla and Lexilla. |
6 | **/ |
7 | // Copyright 2013 by Neil Hodgson <neilh@scintilla.org> |
8 | // The License.txt file describes the conditions under which this software may be distributed. |
9 | |
10 | #ifndef CHARACTERCATEGORYMAP_H |
11 | #define CHARACTERCATEGORYMAP_H |
12 | |
13 | namespace Scintilla::Internal { |
14 | |
15 | enum CharacterCategory { |
16 | ccLu, ccLl, ccLt, ccLm, ccLo, |
17 | ccMn, ccMc, ccMe, |
18 | ccNd, ccNl, ccNo, |
19 | ccPc, ccPd, ccPs, ccPe, ccPi, ccPf, ccPo, |
20 | ccSm, ccSc, ccSk, ccSo, |
21 | ccZs, ccZl, ccZp, |
22 | ccCc, ccCf, ccCs, ccCo, ccCn |
23 | }; |
24 | |
25 | CharacterCategory CategoriseCharacter(int character); |
26 | |
27 | // Common definitions of allowable characters in identifiers from UAX #31. |
28 | bool IsIdStart(int character); |
29 | bool IsIdContinue(int character); |
30 | bool IsXidStart(int character); |
31 | bool IsXidContinue(int character); |
32 | |
33 | class CharacterCategoryMap { |
34 | private: |
35 | std::vector<unsigned char> dense; |
36 | public: |
37 | CharacterCategoryMap(); |
38 | CharacterCategory CategoryFor(int character) const { |
39 | if (static_cast<size_t>(character) < dense.size()) { |
40 | return static_cast<CharacterCategory>(dense[character]); |
41 | } else { |
42 | // binary search through ranges |
43 | return CategoriseCharacter(character); |
44 | } |
45 | } |
46 | int Size() const noexcept; |
47 | void Optimize(int countCharacters); |
48 | }; |
49 | |
50 | } |
51 | |
52 | #endif |
53 | |