string_conversion.cc source code [breakpad/common/string_conversion.cc]

1	// Copyright (c) 2006, Google Inc.
2	// All rights reserved.
3	//
4	// Redistribution and use in source and binary forms, with or without
5	// modification, are permitted provided that the following conditions are
6	// met:
7	//
8	// Redistributions of source code must retain the above copyright*
9	// notice, this list of conditions and the following disclaimer.
10	// Redistributions in binary form must reproduce the above*
11	// copyright notice, this list of conditions and the following disclaimer
12	// in the documentation and/or other materials provided with the
13	// distribution.
14	// Neither the name of Google Inc. nor the names of its*
15	// contributors may be used to endorse or promote products derived from
16	// this software without specific prior written permission.
17	//
18	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30	#include <string.h>
31
32	#include "common/convert_UTF.h"
33	#include "common/scoped_ptr.h"
34	#include "common/string_conversion.h"
35	#include "common/using_std_string.h"
36
37	namespace google_breakpad {
38
39	using std::vector;
40
41	void UTF8ToUTF16(const char* in, vector<uint16_t>* out) {
42	size_t source_length = strlen(in);
43	const UTF8* source_ptr = reinterpret_cast<const UTF8*>(in);
44	const UTF8* source_end_ptr = source_ptr + source_length;
45	// Erase the contents and zero fill to the expected size
46	out->clear();
47	out->insert(out->begin(), source_length, `0`);
48	uint16_t* target_ptr = &(*out)[`0`];
49	uint16_t* target_end_ptr = target_ptr + out->capacity();
50	ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr,
51	&target_ptr, target_end_ptr,
52	strictConversion);
53
54	// Resize to be the size of the # of converted characters + NULL
55	out->resize(result == conversionOK ? target_ptr - &(*out)[`0`] + `1`: `0`);
56	}
57
58	int UTF8ToUTF16Char(const char* in, int in_length, uint16_t out[`2`]) {
59	const UTF8* source_ptr = reinterpret_cast<const UTF8*>(in);
60	const UTF8* source_end_ptr = source_ptr + `1`;
61	uint16_t* target_ptr = out;
62	uint16_t* target_end_ptr = target_ptr + `2`;
63	out[`0`] = out[`1`] = `0`;
64
65	// Process one character at a time
66	while (`1`) {
67	ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr,
68	&target_ptr, target_end_ptr,
69	strictConversion);
70
71	if (result == conversionOK)
72	return static_cast<int>(source_ptr - reinterpret_cast<const UTF8*>(in));
73
74	// Add another character to the input stream and try again
75	source_ptr = reinterpret_cast<const UTF8*>(in);
76	++source_end_ptr;
77
78	if (source_end_ptr > reinterpret_cast<const UTF8*>(in) + in_length)
79	break;
80	}
81
82	return `0`;
83	}
84
85	void UTF32ToUTF16(const wchar_t* in, vector<uint16_t>* out) {
86	size_t source_length = wcslen(in);
87	const UTF32* source_ptr = reinterpret_cast<const UTF32*>(in);
88	const UTF32* source_end_ptr = source_ptr + source_length;
89	// Erase the contents and zero fill to the expected size
90	out->clear();
91	out->insert(out->begin(), source_length, `0`);
92	uint16_t* target_ptr = &(*out)[`0`];
93	uint16_t* target_end_ptr = target_ptr + out->capacity();
94	ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr,
95	&target_ptr, target_end_ptr,
96	strictConversion);
97
98	// Resize to be the size of the # of converted characters + NULL
99	out->resize(result == conversionOK ? target_ptr - &(*out)[`0`] + `1`: `0`);
100	}
101
102	void UTF32ToUTF16Char(wchar_t in, uint16_t out[`2`]) {
103	const UTF32* source_ptr = reinterpret_cast<const UTF32*>(&in);
104	const UTF32* source_end_ptr = source_ptr + `1`;
105	uint16_t* target_ptr = out;
106	uint16_t* target_end_ptr = target_ptr + `2`;
107	out[`0`] = out[`1`] = `0`;
108	ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr,
109	&target_ptr, target_end_ptr,
110	strictConversion);
111
112	if (result != conversionOK) {
113	out[`0`] = out[`1`] = `0`;
114	}
115	}
116
117	static inline uint16_t Swap(uint16_t value) {
118	return (value >> `8`) \| static_cast<uint16_t>(value << `8`);
119	}
120
121	string UTF16ToUTF8(const vector<uint16_t>& in, bool swap) {
122	const UTF16* source_ptr = &in [`0`];
123	scoped_array<uint16_t> source_buffer;
124
125	// If we're to swap, we need to make a local copy and swap each byte pair
126	if (swap) {
127	int idx = `0`;
128	source_buffer.reset(new uint16_t[in.size()]);
129	UTF16* source_buffer_ptr = source_buffer.get();
130	for (vector<uint16_t>::const_iterator it = in.begin();
131	it != in.end(); ++it, ++idx)
132	source_buffer_ptr[idx] = Swap(*it);
133
134	source_ptr = source_buffer.get();
135	}
136
137	// The maximum expansion would be 4x the size of the input string.
138	const UTF16* source_end_ptr = source_ptr + in.size();
139	size_t target_capacity = in.size() * `4`;
140	scoped_array<UTF8> target_buffer(new UTF8[target_capacity]);
141	UTF8* target_ptr = target_buffer.get();
142	UTF8* target_end_ptr = target_ptr + target_capacity;
143	ConversionResult result = ConvertUTF16toUTF8(&source_ptr, source_end_ptr,
144	&target_ptr, target_end_ptr,
145	strictConversion);
146
147	if (result == conversionOK) {
148	const char* targetPtr = reinterpret_cast<const char*>(target_buffer.get());
149	return targetPtr;
150	}
151
152	return "";
153	}
154
155	} // namespace google_breakpad
156

Browse the source code of breakpad/common/string_conversion.cc