1// Scintilla source code edit control
2/** @file CharacterCategory.h
3 ** Returns the Unicode general category of a character.
4 **/
5// Copyright 2013 by Neil Hodgson <neilh@scintilla.org>
6// The License.txt file describes the conditions under which this software may be distributed.
7
8#ifndef CHARACTERCATEGORY_H
9#define CHARACTERCATEGORY_H
10
11namespace Lexilla {
12
13enum CharacterCategory {
14 ccLu, ccLl, ccLt, ccLm, ccLo,
15 ccMn, ccMc, ccMe,
16 ccNd, ccNl, ccNo,
17 ccPc, ccPd, ccPs, ccPe, ccPi, ccPf, ccPo,
18 ccSm, ccSc, ccSk, ccSo,
19 ccZs, ccZl, ccZp,
20 ccCc, ccCf, ccCs, ccCo, ccCn
21};
22
23CharacterCategory CategoriseCharacter(int character);
24
25// Common definitions of allowable characters in identifiers from UAX #31.
26bool IsIdStart(int character);
27bool IsIdContinue(int character);
28bool IsXidStart(int character);
29bool IsXidContinue(int character);
30
31class CharacterCategoryMap {
32private:
33 std::vector<unsigned char> dense;
34public:
35 CharacterCategoryMap();
36 CharacterCategory CategoryFor(int character) const {
37 if (static_cast<size_t>(character) < dense.size()) {
38 return static_cast<CharacterCategory>(dense[character]);
39 } else {
40 // binary search through ranges
41 return CategoriseCharacter(character);
42 }
43 }
44 int Size() const noexcept;
45 void Optimize(int countCharacters);
46};
47
48}
49
50#endif
51