1 | // Copyright 2007 Google Inc. All Rights Reserved. |
2 | |
3 | #ifndef STRINGS_ASCII_CTYPE_H__ |
4 | #define STRINGS_ASCII_CTYPE_H__ |
5 | |
6 | #include "base/basictypes.h" |
7 | |
8 | // ---------------------------------------------------------------------- |
9 | // ascii_isalpha() |
10 | // ascii_isdigit() |
11 | // ascii_isalnum() |
12 | // ascii_isspace() |
13 | // ascii_ispunct() |
14 | // ascii_isblank() |
15 | // ascii_iscntrl() |
16 | // ascii_isxdigit() |
17 | // ascii_isprint() |
18 | // ascii_isgraph() |
19 | // ascii_isupper() |
20 | // ascii_islower() |
21 | // ascii_tolower() |
22 | // ascii_toupper() |
23 | // The ctype.h versions of these routines are slow with some |
24 | // compilers and/or architectures, perhaps because of locale |
25 | // issues. These versions work for ascii only: they return |
26 | // false for everything above \x7f (which means they return |
27 | // false for any byte from any non-ascii UTF8 character). |
28 | // |
29 | // The individual bits do not have names because the array definition |
30 | // is already tightly coupled to this, and names would make it harder |
31 | // to read and debug. |
32 | // |
33 | // This is an example of the benchmark times from the unittest: |
34 | // $ ascii_ctype_test --benchmarks=all --heap_check= |
35 | // Benchmark Time(ns) CPU(ns) Iterations |
36 | // -------------------------------------------------- |
37 | // BM_Identity 121 120 5785985 2027.0 MB/s |
38 | // BM_isalpha 1603 1597 511027 152.9 MB/s |
39 | // BM_ascii_isalpha 223 224 3111595 1088.5 MB/s |
40 | // BM_isdigit 181 183 3825722 1336.4 MB/s |
41 | // BM_ascii_isdigit 236 239 2929312 1023.3 MB/s |
42 | // BM_isalnum 1623 1615 460596 151.2 MB/s |
43 | // BM_ascii_isalnum 253 255 2745518 959.1 MB/s |
44 | // BM_isspace 1264 1258 555639 194.1 MB/s |
45 | // BM_ascii_isspace 253 255 2745507 959.1 MB/s |
46 | // BM_ispunct 1324 1317 555639 185.3 MB/s |
47 | // BM_ascii_ispunct 252 255 2745507 959.1 MB/s |
48 | // BM_isblank 1433 1426 511027 171.2 MB/s |
49 | // BM_ascii_isblank 253 254 2745518 960.5 MB/s |
50 | // BM_iscntrl 1643 1634 530383 149.4 MB/s |
51 | // BM_ascii_iscntrl 252 255 2745518 959.1 MB/s |
52 | // BM_isxdigit 1826 1817 414265 134.3 MB/s |
53 | // BM_ascii_isxdigit 258 260 2692712 939.3 MB/s |
54 | // BM_isprint 1677 1669 419224 146.2 MB/s |
55 | // BM_ascii_isprint 237 239 2929312 1021.8 MB/s |
56 | // BM_isgraph 1436 1429 507324 170.9 MB/s |
57 | // BM_ascii_isgraph 237 239 2929312 1021.8 MB/s |
58 | // BM_isupper 1550 1544 463647 158.1 MB/s |
59 | // BM_ascii_isupper 237 239 2929312 1021.8 MB/s |
60 | // BM_islower 1301 1294 538544 188.7 MB/s |
61 | // BM_ascii_islower 237 239 2929312 1023.3 MB/s |
62 | // BM_isascii 182 181 3846746 1345.7 MB/s |
63 | // BM_ascii_isascii 209 211 3318039 1159.1 MB/s |
64 | // BM_tolower 1743 1764 397786 138.4 MB/s |
65 | // BM_ascii_tolower 210 211 3318039 1155.8 MB/s |
66 | // BM_toupper 1742 1764 397788 138.4 MB/s |
67 | // BM_ascii_toupper 212 211 3302401 1156.9 MB/s |
68 | // |
69 | // ---------------------------------------------------------------------- |
70 | |
71 | #define kApb kAsciiPropertyBits |
72 | extern const uint8 kAsciiPropertyBits[256]; |
73 | static inline bool ascii_isalpha(unsigned char c) { return kApb[c] & 0x01; } |
74 | static inline bool ascii_isalnum(unsigned char c) { return kApb[c] & 0x04; } |
75 | static inline bool ascii_isspace(unsigned char c) { return kApb[c] & 0x08; } |
76 | static inline bool ascii_ispunct(unsigned char c) { return kApb[c] & 0x10; } |
77 | static inline bool ascii_isblank(unsigned char c) { return kApb[c] & 0x20; } |
78 | static inline bool ascii_iscntrl(unsigned char c) { return kApb[c] & 0x40; } |
79 | static inline bool ascii_isxdigit(unsigned char c) { return kApb[c] & 0x80; } |
80 | static inline bool ascii_isdigit(unsigned char c) { return c >= '0' && c <= '9'; } |
81 | static inline bool ascii_isprint(unsigned char c) { return c >= 32 && c < 127; } |
82 | static inline bool ascii_isgraph(unsigned char c) { return c > 32 && c < 127; } |
83 | static inline bool ascii_isupper(unsigned char c) { return c >= 'A' && c <= 'Z'; } |
84 | static inline bool ascii_islower(unsigned char c) { return c >= 'a' && c <= 'z'; } |
85 | static inline bool ascii_isascii(unsigned char c) { |
86 | return static_cast<signed char>(c) >= 0; |
87 | } |
88 | #undef kApb |
89 | |
90 | extern const uint8 kAsciiToLower[256]; |
91 | static inline char ascii_tolower(unsigned char c) { return kAsciiToLower[c]; } |
92 | extern const uint8 kAsciiToUpper[256]; |
93 | static inline char ascii_toupper(unsigned char c) { return kAsciiToUpper[c]; } |
94 | |
95 | #endif // STRINGS_ASCII_CTYPE_H__ |
96 | |