| 1 | // Scintilla source code edit control |
| 2 | /** @file CharacterCategoryMap.h |
| 3 | ** Returns the Unicode general category of a character. |
| 4 | ** Similar code to Lexilla's lexilla/lexlib/CharacterCategory.h but renamed |
| 5 | ** to avoid problems with builds that statically include both Scintilla and Lexilla. |
| 6 | **/ |
| 7 | // Copyright 2013 by Neil Hodgson <neilh@scintilla.org> |
| 8 | // The License.txt file describes the conditions under which this software may be distributed. |
| 9 | |
| 10 | #ifndef CHARACTERCATEGORYMAP_H |
| 11 | #define CHARACTERCATEGORYMAP_H |
| 12 | |
| 13 | namespace Scintilla::Internal { |
| 14 | |
| 15 | enum CharacterCategory { |
| 16 | ccLu, ccLl, ccLt, ccLm, ccLo, |
| 17 | ccMn, ccMc, ccMe, |
| 18 | ccNd, ccNl, ccNo, |
| 19 | ccPc, ccPd, ccPs, ccPe, ccPi, ccPf, ccPo, |
| 20 | ccSm, ccSc, ccSk, ccSo, |
| 21 | ccZs, ccZl, ccZp, |
| 22 | ccCc, ccCf, ccCs, ccCo, ccCn |
| 23 | }; |
| 24 | |
| 25 | CharacterCategory CategoriseCharacter(int character); |
| 26 | |
| 27 | // Common definitions of allowable characters in identifiers from UAX #31. |
| 28 | bool IsIdStart(int character); |
| 29 | bool IsIdContinue(int character); |
| 30 | bool IsXidStart(int character); |
| 31 | bool IsXidContinue(int character); |
| 32 | |
| 33 | class CharacterCategoryMap { |
| 34 | private: |
| 35 | std::vector<unsigned char> dense; |
| 36 | public: |
| 37 | CharacterCategoryMap(); |
| 38 | CharacterCategory CategoryFor(int character) const { |
| 39 | if (static_cast<size_t>(character) < dense.size()) { |
| 40 | return static_cast<CharacterCategory>(dense[character]); |
| 41 | } else { |
| 42 | // binary search through ranges |
| 43 | return CategoriseCharacter(character); |
| 44 | } |
| 45 | } |
| 46 | int Size() const noexcept; |
| 47 | void Optimize(int countCharacters); |
| 48 | }; |
| 49 | |
| 50 | } |
| 51 | |
| 52 | #endif |
| 53 | |