1 | // Copyright 2008 The RE2 Authors. All Rights Reserved. |
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | #ifndef RE2_UNICODE_GROUPS_H_ |
6 | #define RE2_UNICODE_GROUPS_H_ |
7 | |
8 | // Unicode character groups. |
9 | |
10 | // The codes get split into ranges of 16-bit codes |
11 | // and ranges of 32-bit codes. It would be simpler |
12 | // to use only 32-bit ranges, but these tables are large |
13 | // enough to warrant extra care. |
14 | // |
15 | // Using just 32-bit ranges gives 27 kB of data. |
16 | // Adding 16-bit ranges gives 18 kB of data. |
17 | // Adding an extra table of 16-bit singletons would reduce |
18 | // to 16.5 kB of data but make the data harder to use; |
19 | // we don't bother. |
20 | |
21 | #include <stdint.h> |
22 | |
23 | #include "util/util.h" |
24 | #include "util/utf.h" |
25 | |
26 | namespace re2 { |
27 | |
28 | struct URange16 |
29 | { |
30 | uint16_t lo; |
31 | uint16_t hi; |
32 | }; |
33 | |
34 | struct URange32 |
35 | { |
36 | Rune lo; |
37 | Rune hi; |
38 | }; |
39 | |
40 | struct UGroup |
41 | { |
42 | const char *name; |
43 | int sign; // +1 for [abc], -1 for [^abc] |
44 | const URange16 *r16; |
45 | int nr16; |
46 | const URange32 *r32; |
47 | int nr32; |
48 | }; |
49 | |
50 | // Named by property or script name (e.g., "Nd", "N", "Han"). |
51 | // Negated groups are not included. |
52 | extern const UGroup unicode_groups[]; |
53 | extern const int num_unicode_groups; |
54 | |
55 | // Named by POSIX name (e.g., "[:alpha:]", "[:^lower:]"). |
56 | // Negated groups are included. |
57 | extern const UGroup posix_groups[]; |
58 | extern const int num_posix_groups; |
59 | |
60 | // Named by Perl name (e.g., "\\d", "\\D"). |
61 | // Negated groups are included. |
62 | extern const UGroup perl_groups[]; |
63 | extern const int num_perl_groups; |
64 | |
65 | } // namespace re2 |
66 | |
67 | #endif // RE2_UNICODE_GROUPS_H_ |
68 | |