1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * |
6 | * Copyright (C) 1999-2015, International Business Machines |
7 | * Corporation and others. All Rights Reserved. |
8 | * |
9 | ******************************************************************************* |
10 | * file name: uinvchar.h |
11 | * encoding: UTF-8 |
12 | * tab size: 8 (not used) |
13 | * indentation:2 |
14 | * |
15 | * created on: 2004sep14 |
16 | * created by: Markus W. Scherer |
17 | * |
18 | * Definitions for handling invariant characters, moved here from putil.c |
19 | * for better modularization. |
20 | */ |
21 | |
22 | #ifndef __UINVCHAR_H__ |
23 | #define __UINVCHAR_H__ |
24 | |
25 | #include "unicode/utypes.h" |
26 | #ifdef __cplusplus |
27 | #include "unicode/unistr.h" |
28 | #endif |
29 | |
30 | /** |
31 | * Check if a char string only contains invariant characters. |
32 | * See utypes.h for details. |
33 | * |
34 | * @param s Input string pointer. |
35 | * @param length Length of the string, can be -1 if NUL-terminated. |
36 | * @return true if s contains only invariant characters. |
37 | * |
38 | * @internal (ICU 2.8) |
39 | */ |
40 | U_CAPI UBool U_EXPORT2 |
41 | uprv_isInvariantString(const char *s, int32_t length); |
42 | |
43 | /** |
44 | * Check if a Unicode string only contains invariant characters. |
45 | * See utypes.h for details. |
46 | * |
47 | * @param s Input string pointer. |
48 | * @param length Length of the string, can be -1 if NUL-terminated. |
49 | * @return true if s contains only invariant characters. |
50 | * |
51 | * @internal (ICU 2.8) |
52 | */ |
53 | U_CAPI UBool U_EXPORT2 |
54 | uprv_isInvariantUString(const UChar *s, int32_t length); |
55 | |
56 | /** |
57 | * \def U_UPPER_ORDINAL |
58 | * Get the ordinal number of an uppercase invariant character |
59 | * @internal |
60 | */ |
61 | #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
62 | # define U_UPPER_ORDINAL(x) ((x)-'A') |
63 | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
64 | # define U_UPPER_ORDINAL(x) (((x) < 'J') ? ((x)-'A') : \ |
65 | (((x) < 'S') ? ((x)-'J'+9) : \ |
66 | ((x)-'S'+18))) |
67 | #else |
68 | # error Unknown charset family! |
69 | #endif |
70 | |
71 | #ifdef __cplusplus |
72 | |
73 | U_NAMESPACE_BEGIN |
74 | |
75 | /** |
76 | * Like U_UPPER_ORDINAL(x) but with validation. |
77 | * Returns 0..25 for A..Z else a value outside 0..25. |
78 | */ |
79 | inline int32_t uprv_upperOrdinal(int32_t c) { |
80 | #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
81 | return c - 'A'; |
82 | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
83 | // EBCDIC: A-Z (26 letters) is split into three ranges A-I (9 letters), J-R (9), S-Z (8). |
84 | // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout |
85 | if (c <= 'I') { return c - 'A'; } // A-I --> 0-8 |
86 | if (c < 'J') { return -1; } |
87 | if (c <= 'R') { return c - 'J' + 9; } // J-R --> 9..17 |
88 | if (c < 'S') { return -1; } |
89 | return c - 'S' + 18; // S-Z --> 18..25 |
90 | #else |
91 | # error Unknown charset family! |
92 | #endif |
93 | } |
94 | |
95 | // Like U_UPPER_ORDINAL(x) but for lowercase and with validation. |
96 | // Returns 0..25 for a..z else a value outside 0..25. |
97 | inline int32_t uprv_lowerOrdinal(int32_t c) { |
98 | #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
99 | return c - 'a'; |
100 | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
101 | // EBCDIC: a-z (26 letters) is split into three ranges a-i (9 letters), j-r (9), s-z (8). |
102 | // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout |
103 | if (c <= 'i') { return c - 'a'; } // a-i --> 0-8 |
104 | if (c < 'j') { return -1; } |
105 | if (c <= 'r') { return c - 'j' + 9; } // j-r --> 9..17 |
106 | if (c < 's') { return -1; } |
107 | return c - 's' + 18; // s-z --> 18..25 |
108 | #else |
109 | # error Unknown charset family! |
110 | #endif |
111 | } |
112 | |
113 | U_NAMESPACE_END |
114 | |
115 | #endif |
116 | |
117 | /** |
118 | * Returns true if c == '@' is possible. |
119 | * The @ sign is variant, and the @ sign used on one |
120 | * EBCDIC machine won't be compiled the same way on other EBCDIC based machines. |
121 | * @internal |
122 | */ |
123 | U_CFUNC UBool |
124 | uprv_isEbcdicAtSign(char c); |
125 | |
126 | /** |
127 | * \def uprv_isAtSign |
128 | * Returns true if c == '@' is possible. |
129 | * For ASCII, checks for exactly '@'. For EBCDIC, calls uprv_isEbcdicAtSign(). |
130 | * @internal |
131 | */ |
132 | #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
133 | # define uprv_isAtSign(c) ((c)=='@') |
134 | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
135 | # define uprv_isAtSign(c) uprv_isEbcdicAtSign(c) |
136 | #else |
137 | # error Unknown charset family! |
138 | #endif |
139 | |
140 | /** |
141 | * Compare two EBCDIC invariant-character strings in ASCII order. |
142 | * @internal |
143 | */ |
144 | U_CAPI int32_t U_EXPORT2 |
145 | uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2); |
146 | |
147 | /** |
148 | * \def uprv_compareInvCharsAsAscii |
149 | * Compare two invariant-character strings in ASCII order. |
150 | * @internal |
151 | */ |
152 | #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
153 | # define uprv_compareInvCharsAsAscii(s1, s2) uprv_strcmp(s1, s2) |
154 | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
155 | # define uprv_compareInvCharsAsAscii(s1, s2) uprv_compareInvEbcdicAsAscii(s1, s2) |
156 | #else |
157 | # error Unknown charset family! |
158 | #endif |
159 | |
160 | /** |
161 | * Converts an EBCDIC invariant character to ASCII. |
162 | * @internal |
163 | */ |
164 | U_CAPI char U_EXPORT2 |
165 | uprv_ebcdicToAscii(char c); |
166 | |
167 | /** |
168 | * \def uprv_invCharToAscii |
169 | * Converts an invariant character to ASCII. |
170 | * @internal |
171 | */ |
172 | #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
173 | # define uprv_invCharToAscii(c) (c) |
174 | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
175 | # define uprv_invCharToAscii(c) uprv_ebcdicToAscii(c) |
176 | #else |
177 | # error Unknown charset family! |
178 | #endif |
179 | |
180 | /** |
181 | * Converts an EBCDIC invariant character to lowercase ASCII. |
182 | * @internal |
183 | */ |
184 | U_CAPI char U_EXPORT2 |
185 | uprv_ebcdicToLowercaseAscii(char c); |
186 | |
187 | /** |
188 | * \def uprv_invCharToLowercaseAscii |
189 | * Converts an invariant character to lowercase ASCII. |
190 | * @internal |
191 | */ |
192 | #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
193 | # define uprv_invCharToLowercaseAscii uprv_asciitolower |
194 | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY |
195 | # define uprv_invCharToLowercaseAscii uprv_ebcdicToLowercaseAscii |
196 | #else |
197 | # error Unknown charset family! |
198 | #endif |
199 | |
200 | /** |
201 | * Copy EBCDIC to ASCII |
202 | * @internal |
203 | * @see uprv_strncpy |
204 | */ |
205 | U_CAPI uint8_t* U_EXPORT2 |
206 | uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n); |
207 | |
208 | |
209 | /** |
210 | * Copy ASCII to EBCDIC |
211 | * @internal |
212 | * @see uprv_strncpy |
213 | */ |
214 | U_CAPI uint8_t* U_EXPORT2 |
215 | uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n); |
216 | |
217 | |
218 | |
219 | #endif |
220 | |