1// Copyright (c) 2006, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30#include <string.h>
31
32#include "common/convert_UTF.h"
33#include "common/scoped_ptr.h"
34#include "common/string_conversion.h"
35#include "common/using_std_string.h"
36
37namespace google_breakpad {
38
39using std::vector;
40
41void UTF8ToUTF16(const char* in, vector<uint16_t>* out) {
42 size_t source_length = strlen(in);
43 const UTF8* source_ptr = reinterpret_cast<const UTF8*>(in);
44 const UTF8* source_end_ptr = source_ptr + source_length;
45 // Erase the contents and zero fill to the expected size
46 out->clear();
47 out->insert(out->begin(), source_length, 0);
48 uint16_t* target_ptr = &(*out)[0];
49 uint16_t* target_end_ptr = target_ptr + out->capacity();
50 ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr,
51 &target_ptr, target_end_ptr,
52 strictConversion);
53
54 // Resize to be the size of the # of converted characters + NULL
55 out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0);
56}
57
58int UTF8ToUTF16Char(const char* in, int in_length, uint16_t out[2]) {
59 const UTF8* source_ptr = reinterpret_cast<const UTF8*>(in);
60 const UTF8* source_end_ptr = source_ptr + 1;
61 uint16_t* target_ptr = out;
62 uint16_t* target_end_ptr = target_ptr + 2;
63 out[0] = out[1] = 0;
64
65 // Process one character at a time
66 while (1) {
67 ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr,
68 &target_ptr, target_end_ptr,
69 strictConversion);
70
71 if (result == conversionOK)
72 return static_cast<int>(source_ptr - reinterpret_cast<const UTF8*>(in));
73
74 // Add another character to the input stream and try again
75 source_ptr = reinterpret_cast<const UTF8*>(in);
76 ++source_end_ptr;
77
78 if (source_end_ptr > reinterpret_cast<const UTF8*>(in) + in_length)
79 break;
80 }
81
82 return 0;
83}
84
85void UTF32ToUTF16(const wchar_t* in, vector<uint16_t>* out) {
86 size_t source_length = wcslen(in);
87 const UTF32* source_ptr = reinterpret_cast<const UTF32*>(in);
88 const UTF32* source_end_ptr = source_ptr + source_length;
89 // Erase the contents and zero fill to the expected size
90 out->clear();
91 out->insert(out->begin(), source_length, 0);
92 uint16_t* target_ptr = &(*out)[0];
93 uint16_t* target_end_ptr = target_ptr + out->capacity();
94 ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr,
95 &target_ptr, target_end_ptr,
96 strictConversion);
97
98 // Resize to be the size of the # of converted characters + NULL
99 out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0);
100}
101
102void UTF32ToUTF16Char(wchar_t in, uint16_t out[2]) {
103 const UTF32* source_ptr = reinterpret_cast<const UTF32*>(&in);
104 const UTF32* source_end_ptr = source_ptr + 1;
105 uint16_t* target_ptr = out;
106 uint16_t* target_end_ptr = target_ptr + 2;
107 out[0] = out[1] = 0;
108 ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr,
109 &target_ptr, target_end_ptr,
110 strictConversion);
111
112 if (result != conversionOK) {
113 out[0] = out[1] = 0;
114 }
115}
116
117static inline uint16_t Swap(uint16_t value) {
118 return (value >> 8) | static_cast<uint16_t>(value << 8);
119}
120
121string UTF16ToUTF8(const vector<uint16_t>& in, bool swap) {
122 const UTF16* source_ptr = &in[0];
123 scoped_array<uint16_t> source_buffer;
124
125 // If we're to swap, we need to make a local copy and swap each byte pair
126 if (swap) {
127 int idx = 0;
128 source_buffer.reset(new uint16_t[in.size()]);
129 UTF16* source_buffer_ptr = source_buffer.get();
130 for (vector<uint16_t>::const_iterator it = in.begin();
131 it != in.end(); ++it, ++idx)
132 source_buffer_ptr[idx] = Swap(*it);
133
134 source_ptr = source_buffer.get();
135 }
136
137 // The maximum expansion would be 4x the size of the input string.
138 const UTF16* source_end_ptr = source_ptr + in.size();
139 size_t target_capacity = in.size() * 4;
140 scoped_array<UTF8> target_buffer(new UTF8[target_capacity]);
141 UTF8* target_ptr = target_buffer.get();
142 UTF8* target_end_ptr = target_ptr + target_capacity;
143 ConversionResult result = ConvertUTF16toUTF8(&source_ptr, source_end_ptr,
144 &target_ptr, target_end_ptr,
145 strictConversion);
146
147 if (result == conversionOK) {
148 const char* targetPtr = reinterpret_cast<const char*>(target_buffer.get());
149 return targetPtr;
150 }
151
152 return "";
153}
154
155} // namespace google_breakpad
156