1// Scintilla source code edit control
2/** @file CharacterCategoryMap.h
3 ** Returns the Unicode general category of a character.
4 ** Similar code to Lexilla's lexilla/lexlib/CharacterCategory.h but renamed
5 ** to avoid problems with builds that statically include both Scintilla and Lexilla.
6 **/
7// Copyright 2013 by Neil Hodgson <neilh@scintilla.org>
8// The License.txt file describes the conditions under which this software may be distributed.
9
10#ifndef CHARACTERCATEGORYMAP_H
11#define CHARACTERCATEGORYMAP_H
12
13namespace Scintilla::Internal {
14
15enum CharacterCategory {
16 ccLu, ccLl, ccLt, ccLm, ccLo,
17 ccMn, ccMc, ccMe,
18 ccNd, ccNl, ccNo,
19 ccPc, ccPd, ccPs, ccPe, ccPi, ccPf, ccPo,
20 ccSm, ccSc, ccSk, ccSo,
21 ccZs, ccZl, ccZp,
22 ccCc, ccCf, ccCs, ccCo, ccCn
23};
24
25CharacterCategory CategoriseCharacter(int character);
26
27// Common definitions of allowable characters in identifiers from UAX #31.
28bool IsIdStart(int character);
29bool IsIdContinue(int character);
30bool IsXidStart(int character);
31bool IsXidContinue(int character);
32
33class CharacterCategoryMap {
34private:
35 std::vector<unsigned char> dense;
36public:
37 CharacterCategoryMap();
38 CharacterCategory CategoryFor(int character) const {
39 if (static_cast<size_t>(character) < dense.size()) {
40 return static_cast<CharacterCategory>(dense[character]);
41 } else {
42 // binary search through ranges
43 return CategoriseCharacter(character);
44 }
45 }
46 int Size() const noexcept;
47 void Optimize(int countCharacters);
48};
49
50}
51
52#endif
53