1/* Character handling in C locale.
2
3 These functions work like the corresponding functions in <ctype.h>,
4 except that they have the C (POSIX) locale hardwired, whereas the
5 <ctype.h> functions' behaviour depends on the current locale set via
6 setlocale.
7
8 Copyright (C) 2000-2003, 2006, 2008-2019 Free Software Foundation, Inc.
9
10This program is free software; you can redistribute it and/or modify
11it under the terms of the GNU General Public License as published by
12the Free Software Foundation; either version 3 of the License, or
13(at your option) any later version.
14
15This program is distributed in the hope that it will be useful,
16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18GNU General Public License for more details.
19
20You should have received a copy of the GNU General Public License
21along with this program; if not, see <https://www.gnu.org/licenses/>. */
22
23#ifndef C_CTYPE_H
24#define C_CTYPE_H
25
26#include <stdbool.h>
27
28#ifndef _GL_INLINE_HEADER_BEGIN
29 #error "Please include config.h first."
30#endif
31_GL_INLINE_HEADER_BEGIN
32#ifndef C_CTYPE_INLINE
33# define C_CTYPE_INLINE _GL_INLINE
34#endif
35
36#ifdef __cplusplus
37extern "C" {
38#endif
39
40
41/* The functions defined in this file assume the "C" locale and a character
42 set without diacritics (ASCII-US or EBCDIC-US or something like that).
43 Even if the "C" locale on a particular system is an extension of the ASCII
44 character set (like on BeOS, where it is UTF-8, or on AmigaOS, where it
45 is ISO-8859-1), the functions in this file recognize only the ASCII
46 characters. */
47
48
49#if (' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
50 && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
51 && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
52 && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
53 && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
54 && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
55 && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
56 && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
57 && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
58 && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
59 && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
60 && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
61 && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
62 && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
63 && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
64 && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
65 && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
66 && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
67 && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
68 && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
69 && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
70 && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
71 && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)
72/* The character set is ASCII or one of its variants or extensions, not EBCDIC.
73 Testing the value of '\n' and '\r' is not relevant. */
74# define C_CTYPE_ASCII 1
75#elif ! (' ' == '\x40' && '0' == '\xf0' \
76 && 'A' == '\xc1' && 'J' == '\xd1' && 'S' == '\xe2' \
77 && 'a' == '\x81' && 'j' == '\x91' && 's' == '\xa2')
78# error "Only ASCII and EBCDIC are supported"
79#endif
80
81#if 'A' < 0
82# error "EBCDIC and char is signed -- not supported"
83#endif
84
85/* Cases for control characters. */
86
87#define _C_CTYPE_CNTRL \
88 case '\a': case '\b': case '\f': case '\n': \
89 case '\r': case '\t': case '\v': \
90 _C_CTYPE_OTHER_CNTRL
91
92/* ASCII control characters other than those with \-letter escapes. */
93
94#if C_CTYPE_ASCII
95# define _C_CTYPE_OTHER_CNTRL \
96 case '\x00': case '\x01': case '\x02': case '\x03': \
97 case '\x04': case '\x05': case '\x06': case '\x0e': \
98 case '\x0f': case '\x10': case '\x11': case '\x12': \
99 case '\x13': case '\x14': case '\x15': case '\x16': \
100 case '\x17': case '\x18': case '\x19': case '\x1a': \
101 case '\x1b': case '\x1c': case '\x1d': case '\x1e': \
102 case '\x1f': case '\x7f'
103#else
104 /* Use EBCDIC code page 1047's assignments for ASCII control chars;
105 assume all EBCDIC code pages agree about these assignments. */
106# define _C_CTYPE_OTHER_CNTRL \
107 case '\x00': case '\x01': case '\x02': case '\x03': \
108 case '\x07': case '\x0e': case '\x0f': case '\x10': \
109 case '\x11': case '\x12': case '\x13': case '\x18': \
110 case '\x19': case '\x1c': case '\x1d': case '\x1e': \
111 case '\x1f': case '\x26': case '\x27': case '\x2d': \
112 case '\x2e': case '\x32': case '\x37': case '\x3c': \
113 case '\x3d': case '\x3f'
114#endif
115
116/* Cases for lowercase hex letters, and lowercase letters, all offset by N. */
117
118#define _C_CTYPE_LOWER_A_THRU_F_N(N) \
119 case 'a' + (N): case 'b' + (N): case 'c' + (N): case 'd' + (N): \
120 case 'e' + (N): case 'f' + (N)
121#define _C_CTYPE_LOWER_N(N) \
122 _C_CTYPE_LOWER_A_THRU_F_N(N): \
123 case 'g' + (N): case 'h' + (N): case 'i' + (N): case 'j' + (N): \
124 case 'k' + (N): case 'l' + (N): case 'm' + (N): case 'n' + (N): \
125 case 'o' + (N): case 'p' + (N): case 'q' + (N): case 'r' + (N): \
126 case 's' + (N): case 't' + (N): case 'u' + (N): case 'v' + (N): \
127 case 'w' + (N): case 'x' + (N): case 'y' + (N): case 'z' + (N)
128
129/* Cases for hex letters, digits, lower, punct, and upper. */
130
131#define _C_CTYPE_A_THRU_F \
132 _C_CTYPE_LOWER_A_THRU_F_N (0): \
133 _C_CTYPE_LOWER_A_THRU_F_N ('A' - 'a')
134#define _C_CTYPE_DIGIT \
135 case '0': case '1': case '2': case '3': \
136 case '4': case '5': case '6': case '7': \
137 case '8': case '9'
138#define _C_CTYPE_LOWER _C_CTYPE_LOWER_N (0)
139#define _C_CTYPE_PUNCT \
140 case '!': case '"': case '#': case '$': \
141 case '%': case '&': case '\'': case '(': \
142 case ')': case '*': case '+': case ',': \
143 case '-': case '.': case '/': case ':': \
144 case ';': case '<': case '=': case '>': \
145 case '?': case '@': case '[': case '\\': \
146 case ']': case '^': case '_': case '`': \
147 case '{': case '|': case '}': case '~'
148#define _C_CTYPE_UPPER _C_CTYPE_LOWER_N ('A' - 'a')
149
150
151/* Function definitions. */
152
153/* Unlike the functions in <ctype.h>, which require an argument in the range
154 of the 'unsigned char' type, the functions here operate on values that are
155 in the 'unsigned char' range or in the 'char' range. In other words,
156 when you have a 'char' value, you need to cast it before using it as
157 argument to a <ctype.h> function:
158
159 const char *s = ...;
160 if (isalpha ((unsigned char) *s)) ...
161
162 but you don't need to cast it for the functions defined in this file:
163
164 const char *s = ...;
165 if (c_isalpha (*s)) ...
166 */
167
168C_CTYPE_INLINE bool
169c_isalnum (int c)
170{
171 switch (c)
172 {
173 _C_CTYPE_DIGIT:
174 _C_CTYPE_LOWER:
175 _C_CTYPE_UPPER:
176 return true;
177 default:
178 return false;
179 }
180}
181
182C_CTYPE_INLINE bool
183c_isalpha (int c)
184{
185 switch (c)
186 {
187 _C_CTYPE_LOWER:
188 _C_CTYPE_UPPER:
189 return true;
190 default:
191 return false;
192 }
193}
194
195/* The function isascii is not locale dependent.
196 Its use in EBCDIC is questionable. */
197C_CTYPE_INLINE bool
198c_isascii (int c)
199{
200 switch (c)
201 {
202 case ' ':
203 _C_CTYPE_CNTRL:
204 _C_CTYPE_DIGIT:
205 _C_CTYPE_LOWER:
206 _C_CTYPE_PUNCT:
207 _C_CTYPE_UPPER:
208 return true;
209 default:
210 return false;
211 }
212}
213
214C_CTYPE_INLINE bool
215c_isblank (int c)
216{
217 return c == ' ' || c == '\t';
218}
219
220C_CTYPE_INLINE bool
221c_iscntrl (int c)
222{
223 switch (c)
224 {
225 _C_CTYPE_CNTRL:
226 return true;
227 default:
228 return false;
229 }
230}
231
232C_CTYPE_INLINE bool
233c_isdigit (int c)
234{
235 switch (c)
236 {
237 _C_CTYPE_DIGIT:
238 return true;
239 default:
240 return false;
241 }
242}
243
244C_CTYPE_INLINE bool
245c_isgraph (int c)
246{
247 switch (c)
248 {
249 _C_CTYPE_DIGIT:
250 _C_CTYPE_LOWER:
251 _C_CTYPE_PUNCT:
252 _C_CTYPE_UPPER:
253 return true;
254 default:
255 return false;
256 }
257}
258
259C_CTYPE_INLINE bool
260c_islower (int c)
261{
262 switch (c)
263 {
264 _C_CTYPE_LOWER:
265 return true;
266 default:
267 return false;
268 }
269}
270
271C_CTYPE_INLINE bool
272c_isprint (int c)
273{
274 switch (c)
275 {
276 case ' ':
277 _C_CTYPE_DIGIT:
278 _C_CTYPE_LOWER:
279 _C_CTYPE_PUNCT:
280 _C_CTYPE_UPPER:
281 return true;
282 default:
283 return false;
284 }
285}
286
287C_CTYPE_INLINE bool
288c_ispunct (int c)
289{
290 switch (c)
291 {
292 _C_CTYPE_PUNCT:
293 return true;
294 default:
295 return false;
296 }
297}
298
299C_CTYPE_INLINE bool
300c_isspace (int c)
301{
302 switch (c)
303 {
304 case ' ': case '\t': case '\n': case '\v': case '\f': case '\r':
305 return true;
306 default:
307 return false;
308 }
309}
310
311C_CTYPE_INLINE bool
312c_isupper (int c)
313{
314 switch (c)
315 {
316 _C_CTYPE_UPPER:
317 return true;
318 default:
319 return false;
320 }
321}
322
323C_CTYPE_INLINE bool
324c_isxdigit (int c)
325{
326 switch (c)
327 {
328 _C_CTYPE_DIGIT:
329 _C_CTYPE_A_THRU_F:
330 return true;
331 default:
332 return false;
333 }
334}
335
336C_CTYPE_INLINE int
337c_tolower (int c)
338{
339 switch (c)
340 {
341 _C_CTYPE_UPPER:
342 return c - 'A' + 'a';
343 default:
344 return c;
345 }
346}
347
348C_CTYPE_INLINE int
349c_toupper (int c)
350{
351 switch (c)
352 {
353 _C_CTYPE_LOWER:
354 return c - 'a' + 'A';
355 default:
356 return c;
357 }
358}
359
360#ifdef __cplusplus
361}
362#endif
363
364_GL_INLINE_HEADER_END
365
366#endif /* C_CTYPE_H */
367