1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: laszlocsomor@google.com (Laszlo Csomor)
32// Based on original Protocol Buffers design by
33// Sanjay Ghemawat, Jeff Dean, and others.
34
35// Implementation for long-path-aware open/mkdir/access/etc. on Windows, as well
36// as for the supporting utility functions.
37//
38// These functions convert the input path to an absolute Windows path
39// with "\\?\" prefix, then pass that to _wopen/_wmkdir/_waccess/etc.
40// (declared in <io.h>) respectively. This allows working with files/directories
41// whose paths are longer than MAX_PATH (260 chars).
42//
43// This file is only used on Windows, it's empty on other platforms.
44
45#if defined(_WIN32) && !defined(_XBOX_ONE)
46
47// Comment this out to fall back to using the ANSI versions (open, mkdir, ...)
48// instead of the Unicode ones (_wopen, _wmkdir, ...). Doing so can be useful to
49// debug failing tests if that's caused by the long path support.
50#define SUPPORT_LONGPATHS
51
52#include <google/protobuf/io/io_win32.h>
53
54#include <ctype.h>
55#include <direct.h>
56#include <errno.h>
57#include <fcntl.h>
58#include <io.h>
59#include <sys/stat.h>
60#include <sys/types.h>
61#include <wctype.h>
62
63#ifndef WIN32_LEAN_AND_MEAN
64#define WIN32_LEAN_AND_MEAN 1
65#endif
66
67#include <windows.h>
68
69#include <memory>
70#include <sstream>
71#include <string>
72#include <vector>
73
74namespace google {
75namespace protobuf {
76namespace io {
77namespace win32 {
78namespace {
79
80using std::string;
81using std::wstring;
82
83template <typename char_type>
84struct CharTraits {
85 static bool is_alpha(char_type ch);
86};
87
88template <>
89struct CharTraits<char> {
90 static bool is_alpha(char ch) { return isalpha(ch); }
91};
92
93template <>
94struct CharTraits<wchar_t> {
95 static bool is_alpha(wchar_t ch) { return iswalpha(ch); }
96};
97
98template <typename char_type>
99bool null_or_empty(const char_type* s) {
100 return s == nullptr || *s == 0;
101}
102
103// Returns true if the path starts with a drive letter, e.g. "c:".
104// Note that this won't check for the "\" after the drive letter, so this also
105// returns true for "c:foo" (which is "c:\${PWD}\foo").
106// This check requires that a path not have a longpath prefix ("\\?\").
107template <typename char_type>
108bool has_drive_letter(const char_type* ch) {
109 return CharTraits<char_type>::is_alpha(ch[0]) && ch[1] == ':';
110}
111
112// Returns true if the path starts with a longpath prefix ("\\?\").
113template <typename char_type>
114bool has_longpath_prefix(const char_type* path) {
115 return path[0] == '\\' && path[1] == '\\' && path[2] == '?' &&
116 path[3] == '\\';
117}
118
119template <typename char_type>
120bool is_separator(char_type c) {
121 return c == '/' || c == '\\';
122}
123
124// Returns true if the path starts with a drive specifier (e.g. "c:\").
125template <typename char_type>
126bool is_path_absolute(const char_type* path) {
127 return has_drive_letter(path) && is_separator(path[2]);
128}
129
130template <typename char_type>
131bool is_drive_relative(const char_type* path) {
132 return has_drive_letter(path) && (path[2] == 0 || !is_separator(path[2]));
133}
134
135wstring join_paths(const wstring& path1, const wstring& path2) {
136 if (path1.empty() || is_path_absolute(path2.c_str()) ||
137 has_longpath_prefix(path2.c_str())) {
138 return path2;
139 }
140 if (path2.empty()) {
141 return path1;
142 }
143
144 if (is_separator(path1[path1.size() - 1])) {
145 return is_separator(path2[0]) ? (path1 + path2.substr(1))
146 : (path1 + path2);
147 } else {
148 return is_separator(path2[0]) ? (path1 + path2)
149 : (path1 + L'\\' + path2);
150 }
151}
152
153wstring normalize(wstring path) {
154 if (has_longpath_prefix(path.c_str())) {
155 path = path.substr(4);
156 }
157
158 static const wstring dot(L".");
159 static const wstring dotdot(L"..");
160 const WCHAR* p = path.c_str();
161
162 std::vector<wstring> segments;
163 int segment_start = -1;
164 // Find the path segments in `path` (separated by "/").
165 for (int i = 0;; ++i) {
166 if (!is_separator(p[i]) && p[i] != L'\0') {
167 // The current character does not end a segment, so start one unless it's
168 // already started.
169 if (segment_start < 0) {
170 segment_start = i;
171 }
172 } else if (segment_start >= 0 && i > segment_start) {
173 // The current character is "/" or "\0", so this ends a segment.
174 // Add that to `segments` if there's anything to add; handle "." and "..".
175 wstring segment(p, segment_start, i - segment_start);
176 segment_start = -1;
177 if (segment == dotdot) {
178 if (!segments.empty() &&
179 (!has_drive_letter(segments[0].c_str()) || segments.size() > 1)) {
180 segments.pop_back();
181 }
182 } else if (segment != dot && !segment.empty()) {
183 segments.push_back(segment);
184 }
185 }
186 if (p[i] == L'\0') {
187 break;
188 }
189 }
190
191 // Handle the case when `path` is just a drive specifier (or some degenerate
192 // form of it, e.g. "c:\..").
193 if (segments.size() == 1 && segments[0].size() == 2 &&
194 has_drive_letter(segments[0].c_str())) {
195 return segments[0] + L'\\';
196 }
197
198 // Join all segments.
199 bool first = true;
200 std::wstringstream result;
201 for (int i = 0; i < segments.size(); ++i) {
202 if (!first) {
203 result << L'\\';
204 }
205 first = false;
206 result << segments[i];
207 }
208 // Preserve trailing separator if the input contained it.
209 if (!path.empty() && is_separator(p[path.size() - 1])) {
210 result << L'\\';
211 }
212 return result.str();
213}
214
215bool as_windows_path(const char* path, wstring* result) {
216 if (null_or_empty(path)) {
217 result->clear();
218 return true;
219 }
220 wstring wpath;
221 if (!strings::utf8_to_wcs(path, &wpath)) {
222 return false;
223 }
224 if (has_longpath_prefix(wpath.c_str())) {
225 *result = wpath;
226 return true;
227 }
228 if (is_separator(path[0]) || is_drive_relative(path)) {
229 return false;
230 }
231
232
233 if (!is_path_absolute(wpath.c_str())) {
234 int size = ::GetCurrentDirectoryW(0, nullptr);
235 if (size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
236 return false;
237 }
238 std::unique_ptr<WCHAR[]> wcwd(new WCHAR[size]);
239 ::GetCurrentDirectoryW(size, wcwd.get());
240 wpath = join_paths(wcwd.get(), wpath);
241 }
242 wpath = normalize(wpath);
243 if (!has_longpath_prefix(wpath.c_str())) {
244 // Add the "\\?\" prefix unconditionally. This way we prevent the Win32 API
245 // from processing the path and "helpfully" removing trailing dots from the
246 // path, for example.
247 // See https://github.com/bazelbuild/bazel/issues/2935
248 wpath = wstring(L"\\\\?\\") + wpath;
249 }
250 *result = wpath;
251 return true;
252}
253
254} // namespace
255
256int open(const char* path, int flags, int mode) {
257#ifdef SUPPORT_LONGPATHS
258 wstring wpath;
259 if (!as_windows_path(path, &wpath)) {
260 errno = ENOENT;
261 return -1;
262 }
263 return ::_wopen(wpath.c_str(), flags, mode);
264#else
265 return ::_open(path, flags, mode);
266#endif
267}
268
269int mkdir(const char* path, int /*_mode*/) {
270#ifdef SUPPORT_LONGPATHS
271 wstring wpath;
272 if (!as_windows_path(path, &wpath)) {
273 errno = ENOENT;
274 return -1;
275 }
276 return ::_wmkdir(wpath.c_str());
277#else // not SUPPORT_LONGPATHS
278 return ::_mkdir(path);
279#endif // not SUPPORT_LONGPATHS
280}
281
282int access(const char* path, int mode) {
283#ifdef SUPPORT_LONGPATHS
284 wstring wpath;
285 if (!as_windows_path(path, &wpath)) {
286 errno = ENOENT;
287 return -1;
288 }
289 return ::_waccess(wpath.c_str(), mode);
290#else
291 return ::_access(path, mode);
292#endif
293}
294
295int chdir(const char* path) {
296#ifdef SUPPORT_LONGPATHS
297 wstring wpath;
298 if (!as_windows_path(path, &wpath)) {
299 errno = ENOENT;
300 return -1;
301 }
302 return ::_wchdir(wpath.c_str());
303#else
304 return ::_chdir(path);
305#endif
306}
307
308int stat(const char* path, struct _stat* buffer) {
309#ifdef SUPPORT_LONGPATHS
310 wstring wpath;
311 if (!as_windows_path(path, &wpath)) {
312 errno = ENOENT;
313 return -1;
314 }
315 return ::_wstat(wpath.c_str(), buffer);
316#else // not SUPPORT_LONGPATHS
317 return ::_stat(path, buffer);
318#endif // not SUPPORT_LONGPATHS
319}
320
321FILE* fopen(const char* path, const char* mode) {
322#ifdef SUPPORT_LONGPATHS
323 if (null_or_empty(path)) {
324 errno = EINVAL;
325 return nullptr;
326 }
327 wstring wpath;
328 if (!as_windows_path(path, &wpath)) {
329 errno = ENOENT;
330 return nullptr;
331 }
332 wstring wmode;
333 if (!strings::utf8_to_wcs(mode, &wmode)) {
334 errno = EINVAL;
335 return nullptr;
336 }
337 return ::_wfopen(wpath.c_str(), wmode.c_str());
338#else
339 return ::fopen(path, mode);
340#endif
341}
342
343int close(int fd) { return ::_close(fd); }
344
345int dup(int fd) { return ::_dup(fd); }
346
347int dup2(int fd1, int fd2) { return ::_dup2(fd1, fd2); }
348
349int read(int fd, void* buffer, size_t size) {
350 return ::_read(fd, buffer, size);
351}
352
353int setmode(int fd, int mode) { return ::_setmode(fd, mode); }
354
355int write(int fd, const void* buffer, size_t size) {
356 return ::_write(fd, buffer, size);
357}
358
359wstring testonly_utf8_to_winpath(const char* path) {
360 wstring wpath;
361 return as_windows_path(path, &wpath) ? wpath : wstring();
362}
363
364ExpandWildcardsResult ExpandWildcards(
365 const string& path, std::function<void(const string&)> consume) {
366 if (path.find_first_of("*?") == string::npos) {
367 // There are no wildcards in the path, we don't need to expand it.
368 consume(path);
369 return ExpandWildcardsResult::kSuccess;
370 }
371
372 wstring wpath;
373 if (!as_windows_path(path.c_str(), &wpath)) {
374 return ExpandWildcardsResult::kErrorInputPathConversion;
375 }
376
377 static const wstring kDot = L".";
378 static const wstring kDotDot = L"..";
379 WIN32_FIND_DATAW metadata;
380 HANDLE handle = ::FindFirstFileW(wpath.c_str(), &metadata);
381 if (handle == INVALID_HANDLE_VALUE) {
382 // The pattern does not match any files (or directories).
383 return ExpandWildcardsResult::kErrorNoMatchingFile;
384 }
385
386 string::size_type pos = path.find_last_of("\\/");
387 string dirname;
388 if (pos != string::npos) {
389 dirname = path.substr(0, pos + 1);
390 }
391
392 ExpandWildcardsResult matched = ExpandWildcardsResult::kErrorNoMatchingFile;
393 do {
394 // Ignore ".", "..", and directories.
395 if ((metadata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0 &&
396 kDot != metadata.cFileName && kDotDot != metadata.cFileName) {
397 matched = ExpandWildcardsResult::kSuccess;
398 string filename;
399 if (!strings::wcs_to_utf8(metadata.cFileName, &filename)) {
400 matched = ExpandWildcardsResult::kErrorOutputPathConversion;
401 break;
402 }
403
404 if (dirname.empty()) {
405 consume(filename);
406 } else {
407 consume(dirname + filename);
408 }
409 }
410 } while (::FindNextFileW(handle, &metadata));
411 FindClose(handle);
412 return matched;
413}
414
415namespace strings {
416
417bool wcs_to_mbs(const WCHAR* s, string* out, bool outUtf8) {
418 if (null_or_empty(s)) {
419 out->clear();
420 return true;
421 }
422 BOOL usedDefaultChar = FALSE;
423 SetLastError(0);
424 int size = WideCharToMultiByte(
425 outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, nullptr, 0, nullptr,
426 outUtf8 ? nullptr : &usedDefaultChar);
427 if ((size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER)
428 || usedDefaultChar) {
429 return false;
430 }
431 std::unique_ptr<CHAR[]> astr(new CHAR[size]);
432 WideCharToMultiByte(
433 outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, astr.get(), size, nullptr, nullptr);
434 out->assign(astr.get());
435 return true;
436}
437
438bool mbs_to_wcs(const char* s, wstring* out, bool inUtf8) {
439 if (null_or_empty(s)) {
440 out->clear();
441 return true;
442 }
443
444 SetLastError(0);
445 int size =
446 MultiByteToWideChar(inUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, nullptr, 0);
447 if (size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
448 return false;
449 }
450 std::unique_ptr<WCHAR[]> wstr(new WCHAR[size]);
451 MultiByteToWideChar(
452 inUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, wstr.get(), size + 1);
453 out->assign(wstr.get());
454 return true;
455}
456
457bool utf8_to_wcs(const char* input, wstring* out) {
458 return mbs_to_wcs(input, out, true);
459}
460
461bool wcs_to_utf8(const wchar_t* input, string* out) {
462 return wcs_to_mbs(input, out, true);
463}
464
465} // namespace strings
466} // namespace win32
467} // namespace io
468} // namespace protobuf
469} // namespace google
470
471#endif // defined(_WIN32)
472