1/*
2 *
3 * Copyright (c) 2004
4 * John Maddock
5 *
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12 /*
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE: wc_regex_traits.cpp
15 * VERSION: see <boost/version.hpp>
16 * DESCRIPTION: Implements out of line members for c_regex_traits<wchar_t>
17 */
18
19
20#define BOOST_REGEX_SOURCE
21
22#include <boost/detail/workaround.hpp>
23#include <memory>
24#include <string>
25#include "internals.hpp"
26
27#if defined(_DLL_CPPLIB) && !defined(_M_CEE_PURE) && defined(_NATIVE_WCHAR_T_DEFINED) \
28 && !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION) || defined(__STD_RWCOMPILER_H__) || defined(_RWSTD_VER))\
29 && BOOST_WORKAROUND(BOOST_MSVC, <1600)
30//
31// This is a horrible workaround, but without declaring these symbols extern we get
32// duplicate symbol errors when linking if the application is built without
33// /Zc:wchar_t
34//
35#ifdef _CRTIMP2_PURE
36# define BOOST_REGEX_STDLIB_DECL _CRTIMP2_PURE
37#else
38# define BOOST_REGEX_STDLIB_DECL _CRTIMP2
39#endif
40
41namespace std{
42
43#if BOOST_WORKAROUND(BOOST_MSVC, >= 1400)
44template class BOOST_REGEX_STDLIB_DECL allocator<unsigned short>;
45template class BOOST_REGEX_STDLIB_DECL _String_val<unsigned short, allocator<unsigned short> >;
46template class BOOST_REGEX_STDLIB_DECL basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >;
47#endif
48
49#if BOOST_WORKAROUND(BOOST_MSVC, > 1300) && BOOST_WORKAROUND(BOOST_MSVC, BOOST_TESTED_AT(1400))
50template<> BOOST_REGEX_STDLIB_DECL std::size_t __cdecl char_traits<unsigned short>::length(unsigned short const*);
51#endif
52
53template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==(
54 const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&,
55 const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&);
56template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==(
57 const unsigned short *,
58 const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&);
59template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==(
60 const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&,
61 const unsigned short *);
62template BOOST_REGEX_STDLIB_DECL bool __cdecl operator<(
63 const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&,
64 const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&);
65template BOOST_REGEX_STDLIB_DECL bool __cdecl operator>(
66 const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&,
67 const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&);
68}
69#endif
70
71#include <boost/regex/config.hpp>
72#include <boost/detail/workaround.hpp>
73
74#if !BOOST_WORKAROUND(__BORLANDC__, < 0x560)
75
76#include <boost/regex/v4/c_regex_traits.hpp>
77#ifndef BOOST_NO_WREGEX
78#include <boost/regex/v4/primary_transform.hpp>
79#include <boost/regex/v4/regex_traits_defaults.hpp>
80
81#if defined(BOOST_NO_STDC_NAMESPACE)
82namespace std{
83 using ::wcstol;
84}
85#endif
86
87namespace boost{
88
89c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform(const wchar_t* p1, const wchar_t* p2)
90{
91 std::size_t r;
92 std::size_t s = 10;
93 std::wstring src(p1, p2);
94 std::wstring result(s, L' ');
95 while(s < (r = std::wcsxfrm(&*result.begin(), src.c_str(), s)))
96 {
97#if defined(_CPPLIB_VER)
98 //
99 // A bug in VC11 and 12 causes the program to hang if we pass a null-string
100 // to std::strxfrm, but only for certain locales :-(
101 // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
102 //
103 if(r == INT_MAX)
104 {
105 result.erase();
106 result.insert(result.begin(), static_cast<wchar_t>(0));
107 return result;
108 }
109#endif
110 result.append(r - s + 3, L' ');
111 s = result.size();
112 }
113 result.erase(r);
114 return result;
115}
116
117c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform_primary(const wchar_t* p1, const wchar_t* p2)
118{
119 static wchar_t s_delim;
120 static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast<const c_regex_traits<wchar_t>*>(0), &s_delim);
121 std::wstring result;
122 //
123 // What we do here depends upon the format of the sort key returned by
124 // sort key returned by this->transform:
125 //
126 switch(s_collate_type)
127 {
128 case ::boost::BOOST_REGEX_DETAIL_NS::sort_C:
129 case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown:
130 // the best we can do is translate to lower case, then get a regular sort key:
131 {
132 result.assign(p1, p2);
133 for(std::wstring::size_type i = 0; i < result.size(); ++i)
134 result[i] = (std::towlower)(result[i]);
135 result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
136 break;
137 }
138 case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed:
139 {
140 // get a regular sort key, and then truncate it:
141 result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
142 result.erase(s_delim);
143 break;
144 }
145 case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim:
146 // get a regular sort key, and then truncate everything after the delim:
147 result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
148 if(result.size() && (result[0] == s_delim))
149 break;
150 std::size_t i;
151 for(i = 0; i < result.size(); ++i)
152 {
153 if(result[i] == s_delim)
154 break;
155 }
156 result.erase(i);
157 break;
158 }
159 if(result.empty())
160 result = std::wstring(1, char(0));
161 return result;
162}
163
164c_regex_traits<wchar_t>::char_class_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_classname(const wchar_t* p1, const wchar_t* p2)
165{
166 static const char_class_type masks[] =
167 {
168 0,
169 char_class_alnum,
170 char_class_alpha,
171 char_class_blank,
172 char_class_cntrl,
173 char_class_digit,
174 char_class_digit,
175 char_class_graph,
176 char_class_horizontal,
177 char_class_lower,
178 char_class_lower,
179 char_class_print,
180 char_class_punct,
181 char_class_space,
182 char_class_space,
183 char_class_upper,
184 char_class_unicode,
185 char_class_upper,
186 char_class_vertical,
187 char_class_alnum | char_class_word,
188 char_class_alnum | char_class_word,
189 char_class_xdigit,
190 };
191
192 int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
193 if(idx < 0)
194 {
195 std::wstring s(p1, p2);
196 for(std::wstring::size_type i = 0; i < s.size(); ++i)
197 s[i] = (std::towlower)(s[i]);
198 idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
199 }
200 BOOST_ASSERT(idx+1 < static_cast<int>(sizeof(masks) / sizeof(masks[0])));
201 return masks[idx+1];
202}
203
204bool BOOST_REGEX_CALL c_regex_traits<wchar_t>::isctype(wchar_t c, char_class_type mask)
205{
206 return
207 ((mask & char_class_space) && (std::iswspace)(c))
208 || ((mask & char_class_print) && (std::iswprint)(c))
209 || ((mask & char_class_cntrl) && (std::iswcntrl)(c))
210 || ((mask & char_class_upper) && (std::iswupper)(c))
211 || ((mask & char_class_lower) && (std::iswlower)(c))
212 || ((mask & char_class_alpha) && (std::iswalpha)(c))
213 || ((mask & char_class_digit) && (std::iswdigit)(c))
214 || ((mask & char_class_punct) && (std::iswpunct)(c))
215 || ((mask & char_class_xdigit) && (std::iswxdigit)(c))
216 || ((mask & char_class_blank) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c))
217 || ((mask & char_class_word) && (c == '_'))
218 || ((mask & char_class_unicode) && (c & ~static_cast<wchar_t>(0xff)))
219 || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == L'\v')))
220 || ((mask & char_class_horizontal) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != L'\v'));
221}
222
223c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_collatename(const wchar_t* p1, const wchar_t* p2)
224{
225#if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\
226 && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\
227 && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551)
228 std::string name(p1, p2);
229#else
230 std::string name;
231 const wchar_t* p0 = p1;
232 while(p0 != p2)
233 name.append(1, char(*p0++));
234#endif
235 name = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(name);
236#if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\
237 && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\
238 && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551)
239 if(name.size())
240 return string_type(name.begin(), name.end());
241#else
242 if(name.size())
243 {
244 string_type result;
245 typedef std::string::const_iterator iter;
246 iter b = name.begin();
247 iter e = name.end();
248 while(b != e)
249 result.append(1, wchar_t(*b++));
250 return result;
251 }
252#endif
253 if(p2 - p1 == 1)
254 return string_type(1, *p1);
255 return string_type();
256}
257
258int BOOST_REGEX_CALL c_regex_traits<wchar_t>::value(wchar_t c, int radix)
259{
260#ifdef __BORLANDC__
261 // workaround for broken wcstol:
262 if((std::iswxdigit)(c) == 0)
263 return -1;
264#endif
265 wchar_t b[2] = { c, '\0', };
266 wchar_t* ep;
267 int result = std::wcstol(b, &ep, radix);
268 if(ep == b)
269 return -1;
270 return result;
271}
272
273#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
274c_regex_traits<unsigned short>::string_type BOOST_REGEX_CALL c_regex_traits<unsigned short>::transform(const unsigned short* p1, const unsigned short* p2)
275{
276 std::wstring result = c_regex_traits<wchar_t>::transform((const wchar_t*)p1, (const wchar_t*)p2);
277 return string_type(result.begin(), result.end());
278}
279
280c_regex_traits<unsigned short>::string_type BOOST_REGEX_CALL c_regex_traits<unsigned short>::transform_primary(const unsigned short* p1, const unsigned short* p2)
281{
282 std::wstring result = c_regex_traits<wchar_t>::transform_primary((const wchar_t*)p1, (const wchar_t*)p2);
283 return string_type(result.begin(), result.end());
284}
285
286c_regex_traits<unsigned short>::char_class_type BOOST_REGEX_CALL c_regex_traits<unsigned short>::lookup_classname(const unsigned short* p1, const unsigned short* p2)
287{
288 return c_regex_traits<wchar_t>::lookup_classname((const wchar_t*)p1, (const wchar_t*)p2);
289}
290
291c_regex_traits<unsigned short>::string_type BOOST_REGEX_CALL c_regex_traits<unsigned short>::lookup_collatename(const unsigned short* p1, const unsigned short* p2)
292{
293 std::wstring result = c_regex_traits<wchar_t>::lookup_collatename((const wchar_t*)p1, (const wchar_t*)p2);
294 return string_type(result.begin(), result.end());
295}
296
297bool BOOST_REGEX_CALL c_regex_traits<unsigned short>::isctype(unsigned short c, char_class_type m)
298{
299 return c_regex_traits<wchar_t>::isctype(c, m);
300}
301
302int BOOST_REGEX_CALL c_regex_traits<unsigned short>::value(unsigned short c, int radix)
303{
304 return c_regex_traits<wchar_t>::value(c, radix);
305}
306
307#endif
308
309}
310
311#endif // BOOST_NO_WREGEX
312
313#endif // __BORLANDC__
314
315