1 | // Copyright (c) 2006, Google Inc. |
2 | // All rights reserved. |
3 | // |
4 | // Redistribution and use in source and binary forms, with or without |
5 | // modification, are permitted provided that the following conditions are |
6 | // met: |
7 | // |
8 | // * Redistributions of source code must retain the above copyright |
9 | // notice, this list of conditions and the following disclaimer. |
10 | // * Redistributions in binary form must reproduce the above |
11 | // copyright notice, this list of conditions and the following disclaimer |
12 | // in the documentation and/or other materials provided with the |
13 | // distribution. |
14 | // * Neither the name of Google Inc. nor the names of its |
15 | // contributors may be used to endorse or promote products derived from |
16 | // this software without specific prior written permission. |
17 | // |
18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | |
30 | #include <string.h> |
31 | |
32 | #include "common/convert_UTF.h" |
33 | #include "common/scoped_ptr.h" |
34 | #include "common/string_conversion.h" |
35 | #include "common/using_std_string.h" |
36 | |
37 | namespace google_breakpad { |
38 | |
39 | using std::vector; |
40 | |
41 | void UTF8ToUTF16(const char* in, vector<uint16_t>* out) { |
42 | size_t source_length = strlen(in); |
43 | const UTF8* source_ptr = reinterpret_cast<const UTF8*>(in); |
44 | const UTF8* source_end_ptr = source_ptr + source_length; |
45 | // Erase the contents and zero fill to the expected size |
46 | out->clear(); |
47 | out->insert(out->begin(), source_length, 0); |
48 | uint16_t* target_ptr = &(*out)[0]; |
49 | uint16_t* target_end_ptr = target_ptr + out->capacity(); |
50 | ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr, |
51 | &target_ptr, target_end_ptr, |
52 | strictConversion); |
53 | |
54 | // Resize to be the size of the # of converted characters + NULL |
55 | out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0); |
56 | } |
57 | |
58 | int UTF8ToUTF16Char(const char* in, int in_length, uint16_t out[2]) { |
59 | const UTF8* source_ptr = reinterpret_cast<const UTF8*>(in); |
60 | const UTF8* source_end_ptr = source_ptr + 1; |
61 | uint16_t* target_ptr = out; |
62 | uint16_t* target_end_ptr = target_ptr + 2; |
63 | out[0] = out[1] = 0; |
64 | |
65 | // Process one character at a time |
66 | while (1) { |
67 | ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr, |
68 | &target_ptr, target_end_ptr, |
69 | strictConversion); |
70 | |
71 | if (result == conversionOK) |
72 | return static_cast<int>(source_ptr - reinterpret_cast<const UTF8*>(in)); |
73 | |
74 | // Add another character to the input stream and try again |
75 | source_ptr = reinterpret_cast<const UTF8*>(in); |
76 | ++source_end_ptr; |
77 | |
78 | if (source_end_ptr > reinterpret_cast<const UTF8*>(in) + in_length) |
79 | break; |
80 | } |
81 | |
82 | return 0; |
83 | } |
84 | |
85 | void UTF32ToUTF16(const wchar_t* in, vector<uint16_t>* out) { |
86 | size_t source_length = wcslen(in); |
87 | const UTF32* source_ptr = reinterpret_cast<const UTF32*>(in); |
88 | const UTF32* source_end_ptr = source_ptr + source_length; |
89 | // Erase the contents and zero fill to the expected size |
90 | out->clear(); |
91 | out->insert(out->begin(), source_length, 0); |
92 | uint16_t* target_ptr = &(*out)[0]; |
93 | uint16_t* target_end_ptr = target_ptr + out->capacity(); |
94 | ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr, |
95 | &target_ptr, target_end_ptr, |
96 | strictConversion); |
97 | |
98 | // Resize to be the size of the # of converted characters + NULL |
99 | out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0); |
100 | } |
101 | |
102 | void UTF32ToUTF16Char(wchar_t in, uint16_t out[2]) { |
103 | const UTF32* source_ptr = reinterpret_cast<const UTF32*>(&in); |
104 | const UTF32* source_end_ptr = source_ptr + 1; |
105 | uint16_t* target_ptr = out; |
106 | uint16_t* target_end_ptr = target_ptr + 2; |
107 | out[0] = out[1] = 0; |
108 | ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr, |
109 | &target_ptr, target_end_ptr, |
110 | strictConversion); |
111 | |
112 | if (result != conversionOK) { |
113 | out[0] = out[1] = 0; |
114 | } |
115 | } |
116 | |
117 | static inline uint16_t Swap(uint16_t value) { |
118 | return (value >> 8) | static_cast<uint16_t>(value << 8); |
119 | } |
120 | |
121 | string UTF16ToUTF8(const vector<uint16_t>& in, bool swap) { |
122 | const UTF16* source_ptr = &in[0]; |
123 | scoped_array<uint16_t> source_buffer; |
124 | |
125 | // If we're to swap, we need to make a local copy and swap each byte pair |
126 | if (swap) { |
127 | int idx = 0; |
128 | source_buffer.reset(new uint16_t[in.size()]); |
129 | UTF16* source_buffer_ptr = source_buffer.get(); |
130 | for (vector<uint16_t>::const_iterator it = in.begin(); |
131 | it != in.end(); ++it, ++idx) |
132 | source_buffer_ptr[idx] = Swap(*it); |
133 | |
134 | source_ptr = source_buffer.get(); |
135 | } |
136 | |
137 | // The maximum expansion would be 4x the size of the input string. |
138 | const UTF16* source_end_ptr = source_ptr + in.size(); |
139 | size_t target_capacity = in.size() * 4; |
140 | scoped_array<UTF8> target_buffer(new UTF8[target_capacity]); |
141 | UTF8* target_ptr = target_buffer.get(); |
142 | UTF8* target_end_ptr = target_ptr + target_capacity; |
143 | ConversionResult result = ConvertUTF16toUTF8(&source_ptr, source_end_ptr, |
144 | &target_ptr, target_end_ptr, |
145 | strictConversion); |
146 | |
147 | if (result == conversionOK) { |
148 | const char* targetPtr = reinterpret_cast<const char*>(target_buffer.get()); |
149 | return targetPtr; |
150 | } |
151 | |
152 | return "" ; |
153 | } |
154 | |
155 | } // namespace google_breakpad |
156 | |