1 | /* |
2 | * |
3 | * Copyright (c) 2004 |
4 | * John Maddock |
5 | * |
6 | * Use, modification and distribution are subject to the |
7 | * Boost Software License, Version 1.0. (See accompanying file |
8 | * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
9 | * |
10 | */ |
11 | |
12 | /* |
13 | * LOCATION: see http://www.boost.org for most recent version. |
14 | * FILE: wc_regex_traits.cpp |
15 | * VERSION: see <boost/version.hpp> |
16 | * DESCRIPTION: Implements out of line members for c_regex_traits<wchar_t> |
17 | */ |
18 | |
19 | |
20 | #define BOOST_REGEX_SOURCE |
21 | |
22 | #include <boost/detail/workaround.hpp> |
23 | #include <memory> |
24 | #include <string> |
25 | #include "internals.hpp" |
26 | |
27 | #if defined(_DLL_CPPLIB) && !defined(_M_CEE_PURE) && defined(_NATIVE_WCHAR_T_DEFINED) \ |
28 | && !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION) || defined(__STD_RWCOMPILER_H__) || defined(_RWSTD_VER))\ |
29 | && BOOST_WORKAROUND(BOOST_MSVC, <1600) |
30 | // |
31 | // This is a horrible workaround, but without declaring these symbols extern we get |
32 | // duplicate symbol errors when linking if the application is built without |
33 | // /Zc:wchar_t |
34 | // |
35 | #ifdef _CRTIMP2_PURE |
36 | # define BOOST_REGEX_STDLIB_DECL _CRTIMP2_PURE |
37 | #else |
38 | # define BOOST_REGEX_STDLIB_DECL _CRTIMP2 |
39 | #endif |
40 | |
41 | namespace std{ |
42 | |
43 | #if BOOST_WORKAROUND(BOOST_MSVC, >= 1400) |
44 | template class BOOST_REGEX_STDLIB_DECL allocator<unsigned short>; |
45 | template class BOOST_REGEX_STDLIB_DECL _String_val<unsigned short, allocator<unsigned short> >; |
46 | template class BOOST_REGEX_STDLIB_DECL basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >; |
47 | #endif |
48 | |
49 | #if BOOST_WORKAROUND(BOOST_MSVC, > 1300) && BOOST_WORKAROUND(BOOST_MSVC, BOOST_TESTED_AT(1400)) |
50 | template<> BOOST_REGEX_STDLIB_DECL std::size_t __cdecl char_traits<unsigned short>::length(unsigned short const*); |
51 | #endif |
52 | |
53 | template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==( |
54 | const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&, |
55 | const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&); |
56 | template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==( |
57 | const unsigned short *, |
58 | const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&); |
59 | template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==( |
60 | const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&, |
61 | const unsigned short *); |
62 | template BOOST_REGEX_STDLIB_DECL bool __cdecl operator<( |
63 | const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&, |
64 | const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&); |
65 | template BOOST_REGEX_STDLIB_DECL bool __cdecl operator>( |
66 | const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&, |
67 | const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&); |
68 | } |
69 | #endif |
70 | |
71 | #include <boost/regex/config.hpp> |
72 | #include <boost/detail/workaround.hpp> |
73 | |
74 | #if !BOOST_WORKAROUND(__BORLANDC__, < 0x560) |
75 | |
76 | #include <boost/regex/v4/c_regex_traits.hpp> |
77 | #ifndef BOOST_NO_WREGEX |
78 | #include <boost/regex/v4/primary_transform.hpp> |
79 | #include <boost/regex/v4/regex_traits_defaults.hpp> |
80 | |
81 | #if defined(BOOST_NO_STDC_NAMESPACE) |
82 | namespace std{ |
83 | using ::wcstol; |
84 | } |
85 | #endif |
86 | |
87 | namespace boost{ |
88 | |
89 | c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform(const wchar_t* p1, const wchar_t* p2) |
90 | { |
91 | std::size_t r; |
92 | std::size_t s = 10; |
93 | std::wstring src(p1, p2); |
94 | std::wstring result(s, L' '); |
95 | while(s < (r = std::wcsxfrm(&*result.begin(), src.c_str(), s))) |
96 | { |
97 | #if defined(_CPPLIB_VER) |
98 | // |
99 | // A bug in VC11 and 12 causes the program to hang if we pass a null-string |
100 | // to std::strxfrm, but only for certain locales :-( |
101 | // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware). |
102 | // |
103 | if(r == INT_MAX) |
104 | { |
105 | result.erase(); |
106 | result.insert(result.begin(), static_cast<wchar_t>(0)); |
107 | return result; |
108 | } |
109 | #endif |
110 | result.append(r - s + 3, L' '); |
111 | s = result.size(); |
112 | } |
113 | result.erase(r); |
114 | return result; |
115 | } |
116 | |
117 | c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform_primary(const wchar_t* p1, const wchar_t* p2) |
118 | { |
119 | static wchar_t s_delim; |
120 | static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast<const c_regex_traits<wchar_t>*>(0), &s_delim); |
121 | std::wstring result; |
122 | // |
123 | // What we do here depends upon the format of the sort key returned by |
124 | // sort key returned by this->transform: |
125 | // |
126 | switch(s_collate_type) |
127 | { |
128 | case ::boost::BOOST_REGEX_DETAIL_NS::sort_C: |
129 | case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown: |
130 | // the best we can do is translate to lower case, then get a regular sort key: |
131 | { |
132 | result.assign(p1, p2); |
133 | for(std::wstring::size_type i = 0; i < result.size(); ++i) |
134 | result[i] = (std::towlower)(result[i]); |
135 | result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size()); |
136 | break; |
137 | } |
138 | case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed: |
139 | { |
140 | // get a regular sort key, and then truncate it: |
141 | result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size()); |
142 | result.erase(s_delim); |
143 | break; |
144 | } |
145 | case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim: |
146 | // get a regular sort key, and then truncate everything after the delim: |
147 | result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size()); |
148 | if(result.size() && (result[0] == s_delim)) |
149 | break; |
150 | std::size_t i; |
151 | for(i = 0; i < result.size(); ++i) |
152 | { |
153 | if(result[i] == s_delim) |
154 | break; |
155 | } |
156 | result.erase(i); |
157 | break; |
158 | } |
159 | if(result.empty()) |
160 | result = std::wstring(1, char(0)); |
161 | return result; |
162 | } |
163 | |
164 | c_regex_traits<wchar_t>::char_class_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_classname(const wchar_t* p1, const wchar_t* p2) |
165 | { |
166 | static const char_class_type masks[] = |
167 | { |
168 | 0, |
169 | char_class_alnum, |
170 | char_class_alpha, |
171 | char_class_blank, |
172 | char_class_cntrl, |
173 | char_class_digit, |
174 | char_class_digit, |
175 | char_class_graph, |
176 | char_class_horizontal, |
177 | char_class_lower, |
178 | char_class_lower, |
179 | char_class_print, |
180 | char_class_punct, |
181 | char_class_space, |
182 | char_class_space, |
183 | char_class_upper, |
184 | char_class_unicode, |
185 | char_class_upper, |
186 | char_class_vertical, |
187 | char_class_alnum | char_class_word, |
188 | char_class_alnum | char_class_word, |
189 | char_class_xdigit, |
190 | }; |
191 | |
192 | int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2); |
193 | if(idx < 0) |
194 | { |
195 | std::wstring s(p1, p2); |
196 | for(std::wstring::size_type i = 0; i < s.size(); ++i) |
197 | s[i] = (std::towlower)(s[i]); |
198 | idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size()); |
199 | } |
200 | BOOST_ASSERT(idx+1 < static_cast<int>(sizeof(masks) / sizeof(masks[0]))); |
201 | return masks[idx+1]; |
202 | } |
203 | |
204 | bool BOOST_REGEX_CALL c_regex_traits<wchar_t>::isctype(wchar_t c, char_class_type mask) |
205 | { |
206 | return |
207 | ((mask & char_class_space) && (std::iswspace)(c)) |
208 | || ((mask & char_class_print) && (std::iswprint)(c)) |
209 | || ((mask & char_class_cntrl) && (std::iswcntrl)(c)) |
210 | || ((mask & char_class_upper) && (std::iswupper)(c)) |
211 | || ((mask & char_class_lower) && (std::iswlower)(c)) |
212 | || ((mask & char_class_alpha) && (std::iswalpha)(c)) |
213 | || ((mask & char_class_digit) && (std::iswdigit)(c)) |
214 | || ((mask & char_class_punct) && (std::iswpunct)(c)) |
215 | || ((mask & char_class_xdigit) && (std::iswxdigit)(c)) |
216 | || ((mask & char_class_blank) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c)) |
217 | || ((mask & char_class_word) && (c == '_')) |
218 | || ((mask & char_class_unicode) && (c & ~static_cast<wchar_t>(0xff))) |
219 | || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == L'\v'))) |
220 | || ((mask & char_class_horizontal) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != L'\v')); |
221 | } |
222 | |
223 | c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_collatename(const wchar_t* p1, const wchar_t* p2) |
224 | { |
225 | #if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\ |
226 | && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\ |
227 | && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551) |
228 | std::string name(p1, p2); |
229 | #else |
230 | std::string name; |
231 | const wchar_t* p0 = p1; |
232 | while(p0 != p2) |
233 | name.append(1, char(*p0++)); |
234 | #endif |
235 | name = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(name); |
236 | #if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\ |
237 | && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\ |
238 | && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551) |
239 | if(name.size()) |
240 | return string_type(name.begin(), name.end()); |
241 | #else |
242 | if(name.size()) |
243 | { |
244 | string_type result; |
245 | typedef std::string::const_iterator iter; |
246 | iter b = name.begin(); |
247 | iter e = name.end(); |
248 | while(b != e) |
249 | result.append(1, wchar_t(*b++)); |
250 | return result; |
251 | } |
252 | #endif |
253 | if(p2 - p1 == 1) |
254 | return string_type(1, *p1); |
255 | return string_type(); |
256 | } |
257 | |
258 | int BOOST_REGEX_CALL c_regex_traits<wchar_t>::value(wchar_t c, int radix) |
259 | { |
260 | #ifdef __BORLANDC__ |
261 | // workaround for broken wcstol: |
262 | if((std::iswxdigit)(c) == 0) |
263 | return -1; |
264 | #endif |
265 | wchar_t b[2] = { c, '\0', }; |
266 | wchar_t* ep; |
267 | int result = std::wcstol(b, &ep, radix); |
268 | if(ep == b) |
269 | return -1; |
270 | return result; |
271 | } |
272 | |
273 | #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T |
274 | c_regex_traits<unsigned short>::string_type BOOST_REGEX_CALL c_regex_traits<unsigned short>::transform(const unsigned short* p1, const unsigned short* p2) |
275 | { |
276 | std::wstring result = c_regex_traits<wchar_t>::transform((const wchar_t*)p1, (const wchar_t*)p2); |
277 | return string_type(result.begin(), result.end()); |
278 | } |
279 | |
280 | c_regex_traits<unsigned short>::string_type BOOST_REGEX_CALL c_regex_traits<unsigned short>::transform_primary(const unsigned short* p1, const unsigned short* p2) |
281 | { |
282 | std::wstring result = c_regex_traits<wchar_t>::transform_primary((const wchar_t*)p1, (const wchar_t*)p2); |
283 | return string_type(result.begin(), result.end()); |
284 | } |
285 | |
286 | c_regex_traits<unsigned short>::char_class_type BOOST_REGEX_CALL c_regex_traits<unsigned short>::lookup_classname(const unsigned short* p1, const unsigned short* p2) |
287 | { |
288 | return c_regex_traits<wchar_t>::lookup_classname((const wchar_t*)p1, (const wchar_t*)p2); |
289 | } |
290 | |
291 | c_regex_traits<unsigned short>::string_type BOOST_REGEX_CALL c_regex_traits<unsigned short>::lookup_collatename(const unsigned short* p1, const unsigned short* p2) |
292 | { |
293 | std::wstring result = c_regex_traits<wchar_t>::lookup_collatename((const wchar_t*)p1, (const wchar_t*)p2); |
294 | return string_type(result.begin(), result.end()); |
295 | } |
296 | |
297 | bool BOOST_REGEX_CALL c_regex_traits<unsigned short>::isctype(unsigned short c, char_class_type m) |
298 | { |
299 | return c_regex_traits<wchar_t>::isctype(c, m); |
300 | } |
301 | |
302 | int BOOST_REGEX_CALL c_regex_traits<unsigned short>::value(unsigned short c, int radix) |
303 | { |
304 | return c_regex_traits<wchar_t>::value(c, radix); |
305 | } |
306 | |
307 | #endif |
308 | |
309 | } |
310 | |
311 | #endif // BOOST_NO_WREGEX |
312 | |
313 | #endif // __BORLANDC__ |
314 | |
315 | |