1// Copyright 2007 Google Inc. All Rights Reserved.
2
3#ifndef STRINGS_ASCII_CTYPE_H__
4#define STRINGS_ASCII_CTYPE_H__
5
6#include "base/basictypes.h"
7
8// ----------------------------------------------------------------------
9// ascii_isalpha()
10// ascii_isdigit()
11// ascii_isalnum()
12// ascii_isspace()
13// ascii_ispunct()
14// ascii_isblank()
15// ascii_iscntrl()
16// ascii_isxdigit()
17// ascii_isprint()
18// ascii_isgraph()
19// ascii_isupper()
20// ascii_islower()
21// ascii_tolower()
22// ascii_toupper()
23// The ctype.h versions of these routines are slow with some
24// compilers and/or architectures, perhaps because of locale
25// issues. These versions work for ascii only: they return
26// false for everything above \x7f (which means they return
27// false for any byte from any non-ascii UTF8 character).
28//
29// The individual bits do not have names because the array definition
30// is already tightly coupled to this, and names would make it harder
31// to read and debug.
32//
33// This is an example of the benchmark times from the unittest:
34// $ ascii_ctype_test --benchmarks=all --heap_check=
35// Benchmark Time(ns) CPU(ns) Iterations
36// --------------------------------------------------
37// BM_Identity 121 120 5785985 2027.0 MB/s
38// BM_isalpha 1603 1597 511027 152.9 MB/s
39// BM_ascii_isalpha 223 224 3111595 1088.5 MB/s
40// BM_isdigit 181 183 3825722 1336.4 MB/s
41// BM_ascii_isdigit 236 239 2929312 1023.3 MB/s
42// BM_isalnum 1623 1615 460596 151.2 MB/s
43// BM_ascii_isalnum 253 255 2745518 959.1 MB/s
44// BM_isspace 1264 1258 555639 194.1 MB/s
45// BM_ascii_isspace 253 255 2745507 959.1 MB/s
46// BM_ispunct 1324 1317 555639 185.3 MB/s
47// BM_ascii_ispunct 252 255 2745507 959.1 MB/s
48// BM_isblank 1433 1426 511027 171.2 MB/s
49// BM_ascii_isblank 253 254 2745518 960.5 MB/s
50// BM_iscntrl 1643 1634 530383 149.4 MB/s
51// BM_ascii_iscntrl 252 255 2745518 959.1 MB/s
52// BM_isxdigit 1826 1817 414265 134.3 MB/s
53// BM_ascii_isxdigit 258 260 2692712 939.3 MB/s
54// BM_isprint 1677 1669 419224 146.2 MB/s
55// BM_ascii_isprint 237 239 2929312 1021.8 MB/s
56// BM_isgraph 1436 1429 507324 170.9 MB/s
57// BM_ascii_isgraph 237 239 2929312 1021.8 MB/s
58// BM_isupper 1550 1544 463647 158.1 MB/s
59// BM_ascii_isupper 237 239 2929312 1021.8 MB/s
60// BM_islower 1301 1294 538544 188.7 MB/s
61// BM_ascii_islower 237 239 2929312 1023.3 MB/s
62// BM_isascii 182 181 3846746 1345.7 MB/s
63// BM_ascii_isascii 209 211 3318039 1159.1 MB/s
64// BM_tolower 1743 1764 397786 138.4 MB/s
65// BM_ascii_tolower 210 211 3318039 1155.8 MB/s
66// BM_toupper 1742 1764 397788 138.4 MB/s
67// BM_ascii_toupper 212 211 3302401 1156.9 MB/s
68//
69// ----------------------------------------------------------------------
70
71#define kApb kAsciiPropertyBits
72extern const uint8 kAsciiPropertyBits[256];
73static inline bool ascii_isalpha(unsigned char c) { return kApb[c] & 0x01; }
74static inline bool ascii_isalnum(unsigned char c) { return kApb[c] & 0x04; }
75static inline bool ascii_isspace(unsigned char c) { return kApb[c] & 0x08; }
76static inline bool ascii_ispunct(unsigned char c) { return kApb[c] & 0x10; }
77static inline bool ascii_isblank(unsigned char c) { return kApb[c] & 0x20; }
78static inline bool ascii_iscntrl(unsigned char c) { return kApb[c] & 0x40; }
79static inline bool ascii_isxdigit(unsigned char c) { return kApb[c] & 0x80; }
80static inline bool ascii_isdigit(unsigned char c) { return c >= '0' && c <= '9'; }
81static inline bool ascii_isprint(unsigned char c) { return c >= 32 && c < 127; }
82static inline bool ascii_isgraph(unsigned char c) { return c > 32 && c < 127; }
83static inline bool ascii_isupper(unsigned char c) { return c >= 'A' && c <= 'Z'; }
84static inline bool ascii_islower(unsigned char c) { return c >= 'a' && c <= 'z'; }
85static inline bool ascii_isascii(unsigned char c) {
86 return static_cast<signed char>(c) >= 0;
87}
88#undef kApb
89
90extern const uint8 kAsciiToLower[256];
91static inline char ascii_tolower(unsigned char c) { return kAsciiToLower[c]; }
92extern const uint8 kAsciiToUpper[256];
93static inline char ascii_toupper(unsigned char c) { return kAsciiToUpper[c]; }
94
95#endif // STRINGS_ASCII_CTYPE_H__
96