| 1 | // Scintilla source code edit control |
| 2 | /** @file CharacterCategory.h |
| 3 | ** Returns the Unicode general category of a character. |
| 4 | **/ |
| 5 | // Copyright 2013 by Neil Hodgson <neilh@scintilla.org> |
| 6 | // The License.txt file describes the conditions under which this software may be distributed. |
| 7 | |
| 8 | #ifndef CHARACTERCATEGORY_H |
| 9 | #define CHARACTERCATEGORY_H |
| 10 | |
| 11 | namespace Lexilla { |
| 12 | |
| 13 | enum CharacterCategory { |
| 14 | ccLu, ccLl, ccLt, ccLm, ccLo, |
| 15 | ccMn, ccMc, ccMe, |
| 16 | ccNd, ccNl, ccNo, |
| 17 | ccPc, ccPd, ccPs, ccPe, ccPi, ccPf, ccPo, |
| 18 | ccSm, ccSc, ccSk, ccSo, |
| 19 | ccZs, ccZl, ccZp, |
| 20 | ccCc, ccCf, ccCs, ccCo, ccCn |
| 21 | }; |
| 22 | |
| 23 | CharacterCategory CategoriseCharacter(int character); |
| 24 | |
| 25 | // Common definitions of allowable characters in identifiers from UAX #31. |
| 26 | bool IsIdStart(int character); |
| 27 | bool IsIdContinue(int character); |
| 28 | bool IsXidStart(int character); |
| 29 | bool IsXidContinue(int character); |
| 30 | |
| 31 | class CharacterCategoryMap { |
| 32 | private: |
| 33 | std::vector<unsigned char> dense; |
| 34 | public: |
| 35 | CharacterCategoryMap(); |
| 36 | CharacterCategory CategoryFor(int character) const { |
| 37 | if (static_cast<size_t>(character) < dense.size()) { |
| 38 | return static_cast<CharacterCategory>(dense[character]); |
| 39 | } else { |
| 40 | // binary search through ranges |
| 41 | return CategoriseCharacter(character); |
| 42 | } |
| 43 | } |
| 44 | int Size() const noexcept; |
| 45 | void Optimize(int countCharacters); |
| 46 | }; |
| 47 | |
| 48 | } |
| 49 | |
| 50 | #endif |
| 51 | |