| 1 | // Copyright 2007 Google Inc. All Rights Reserved. |
| 2 | |
| 3 | #ifndef STRINGS_ASCII_CTYPE_H__ |
| 4 | #define STRINGS_ASCII_CTYPE_H__ |
| 5 | |
| 6 | #include "base/basictypes.h" |
| 7 | |
| 8 | // ---------------------------------------------------------------------- |
| 9 | // ascii_isalpha() |
| 10 | // ascii_isdigit() |
| 11 | // ascii_isalnum() |
| 12 | // ascii_isspace() |
| 13 | // ascii_ispunct() |
| 14 | // ascii_isblank() |
| 15 | // ascii_iscntrl() |
| 16 | // ascii_isxdigit() |
| 17 | // ascii_isprint() |
| 18 | // ascii_isgraph() |
| 19 | // ascii_isupper() |
| 20 | // ascii_islower() |
| 21 | // ascii_tolower() |
| 22 | // ascii_toupper() |
| 23 | // The ctype.h versions of these routines are slow with some |
| 24 | // compilers and/or architectures, perhaps because of locale |
| 25 | // issues. These versions work for ascii only: they return |
| 26 | // false for everything above \x7f (which means they return |
| 27 | // false for any byte from any non-ascii UTF8 character). |
| 28 | // |
| 29 | // The individual bits do not have names because the array definition |
| 30 | // is already tightly coupled to this, and names would make it harder |
| 31 | // to read and debug. |
| 32 | // |
| 33 | // This is an example of the benchmark times from the unittest: |
| 34 | // $ ascii_ctype_test --benchmarks=all --heap_check= |
| 35 | // Benchmark Time(ns) CPU(ns) Iterations |
| 36 | // -------------------------------------------------- |
| 37 | // BM_Identity 121 120 5785985 2027.0 MB/s |
| 38 | // BM_isalpha 1603 1597 511027 152.9 MB/s |
| 39 | // BM_ascii_isalpha 223 224 3111595 1088.5 MB/s |
| 40 | // BM_isdigit 181 183 3825722 1336.4 MB/s |
| 41 | // BM_ascii_isdigit 236 239 2929312 1023.3 MB/s |
| 42 | // BM_isalnum 1623 1615 460596 151.2 MB/s |
| 43 | // BM_ascii_isalnum 253 255 2745518 959.1 MB/s |
| 44 | // BM_isspace 1264 1258 555639 194.1 MB/s |
| 45 | // BM_ascii_isspace 253 255 2745507 959.1 MB/s |
| 46 | // BM_ispunct 1324 1317 555639 185.3 MB/s |
| 47 | // BM_ascii_ispunct 252 255 2745507 959.1 MB/s |
| 48 | // BM_isblank 1433 1426 511027 171.2 MB/s |
| 49 | // BM_ascii_isblank 253 254 2745518 960.5 MB/s |
| 50 | // BM_iscntrl 1643 1634 530383 149.4 MB/s |
| 51 | // BM_ascii_iscntrl 252 255 2745518 959.1 MB/s |
| 52 | // BM_isxdigit 1826 1817 414265 134.3 MB/s |
| 53 | // BM_ascii_isxdigit 258 260 2692712 939.3 MB/s |
| 54 | // BM_isprint 1677 1669 419224 146.2 MB/s |
| 55 | // BM_ascii_isprint 237 239 2929312 1021.8 MB/s |
| 56 | // BM_isgraph 1436 1429 507324 170.9 MB/s |
| 57 | // BM_ascii_isgraph 237 239 2929312 1021.8 MB/s |
| 58 | // BM_isupper 1550 1544 463647 158.1 MB/s |
| 59 | // BM_ascii_isupper 237 239 2929312 1021.8 MB/s |
| 60 | // BM_islower 1301 1294 538544 188.7 MB/s |
| 61 | // BM_ascii_islower 237 239 2929312 1023.3 MB/s |
| 62 | // BM_isascii 182 181 3846746 1345.7 MB/s |
| 63 | // BM_ascii_isascii 209 211 3318039 1159.1 MB/s |
| 64 | // BM_tolower 1743 1764 397786 138.4 MB/s |
| 65 | // BM_ascii_tolower 210 211 3318039 1155.8 MB/s |
| 66 | // BM_toupper 1742 1764 397788 138.4 MB/s |
| 67 | // BM_ascii_toupper 212 211 3302401 1156.9 MB/s |
| 68 | // |
| 69 | // ---------------------------------------------------------------------- |
| 70 | |
| 71 | #define kApb kAsciiPropertyBits |
| 72 | extern const uint8 kAsciiPropertyBits[256]; |
| 73 | static inline bool ascii_isalpha(unsigned char c) { return kApb[c] & 0x01; } |
| 74 | static inline bool ascii_isalnum(unsigned char c) { return kApb[c] & 0x04; } |
| 75 | static inline bool ascii_isspace(unsigned char c) { return kApb[c] & 0x08; } |
| 76 | static inline bool ascii_ispunct(unsigned char c) { return kApb[c] & 0x10; } |
| 77 | static inline bool ascii_isblank(unsigned char c) { return kApb[c] & 0x20; } |
| 78 | static inline bool ascii_iscntrl(unsigned char c) { return kApb[c] & 0x40; } |
| 79 | static inline bool ascii_isxdigit(unsigned char c) { return kApb[c] & 0x80; } |
| 80 | static inline bool ascii_isdigit(unsigned char c) { return c >= '0' && c <= '9'; } |
| 81 | static inline bool ascii_isprint(unsigned char c) { return c >= 32 && c < 127; } |
| 82 | static inline bool ascii_isgraph(unsigned char c) { return c > 32 && c < 127; } |
| 83 | static inline bool ascii_isupper(unsigned char c) { return c >= 'A' && c <= 'Z'; } |
| 84 | static inline bool ascii_islower(unsigned char c) { return c >= 'a' && c <= 'z'; } |
| 85 | static inline bool ascii_isascii(unsigned char c) { |
| 86 | return static_cast<signed char>(c) >= 0; |
| 87 | } |
| 88 | #undef kApb |
| 89 | |
| 90 | extern const uint8 kAsciiToLower[256]; |
| 91 | static inline char ascii_tolower(unsigned char c) { return kAsciiToLower[c]; } |
| 92 | extern const uint8 kAsciiToUpper[256]; |
| 93 | static inline char ascii_toupper(unsigned char c) { return kAsciiToUpper[c]; } |
| 94 | |
| 95 | #endif // STRINGS_ASCII_CTYPE_H__ |
| 96 | |