1 | // Scintilla source code edit control |
2 | /** @file CharacterCategory.h |
3 | ** Returns the Unicode general category of a character. |
4 | **/ |
5 | // Copyright 2013 by Neil Hodgson <neilh@scintilla.org> |
6 | // The License.txt file describes the conditions under which this software may be distributed. |
7 | |
8 | #ifndef CHARACTERCATEGORY_H |
9 | #define CHARACTERCATEGORY_H |
10 | |
11 | namespace Lexilla { |
12 | |
13 | enum CharacterCategory { |
14 | ccLu, ccLl, ccLt, ccLm, ccLo, |
15 | ccMn, ccMc, ccMe, |
16 | ccNd, ccNl, ccNo, |
17 | ccPc, ccPd, ccPs, ccPe, ccPi, ccPf, ccPo, |
18 | ccSm, ccSc, ccSk, ccSo, |
19 | ccZs, ccZl, ccZp, |
20 | ccCc, ccCf, ccCs, ccCo, ccCn |
21 | }; |
22 | |
23 | CharacterCategory CategoriseCharacter(int character); |
24 | |
25 | // Common definitions of allowable characters in identifiers from UAX #31. |
26 | bool IsIdStart(int character); |
27 | bool IsIdContinue(int character); |
28 | bool IsXidStart(int character); |
29 | bool IsXidContinue(int character); |
30 | |
31 | class CharacterCategoryMap { |
32 | private: |
33 | std::vector<unsigned char> dense; |
34 | public: |
35 | CharacterCategoryMap(); |
36 | CharacterCategory CategoryFor(int character) const { |
37 | if (static_cast<size_t>(character) < dense.size()) { |
38 | return static_cast<CharacterCategory>(dense[character]); |
39 | } else { |
40 | // binary search through ranges |
41 | return CategoriseCharacter(character); |
42 | } |
43 | } |
44 | int Size() const noexcept; |
45 | void Optimize(int countCharacters); |
46 | }; |
47 | |
48 | } |
49 | |
50 | #endif |
51 | |