1 | // |
2 | // Copyright (c) Microsoft. All rights reserved. |
3 | // Licensed under the MIT license. See LICENSE file in the project root for full license information. |
4 | // |
5 | |
6 | #include "standardpch.h" |
7 | #include "verbmerge.h" |
8 | #include "simpletimer.h" |
9 | #include "logging.h" |
10 | |
11 | // Do reads/writes in large 256MB chunks. |
12 | #define BUFFER_SIZE 0x10000000 |
13 | |
14 | // MergePathStrings: take two file system path components, compose them together, and return the merged pathname string. |
15 | // The caller must delete the returned string with delete[]. |
16 | // |
17 | // static |
18 | LPWSTR verbMerge::MergePathStrings(LPCWSTR dir, LPCWSTR file) |
19 | { |
20 | size_t dirlen = wcslen(dir); |
21 | size_t filelen = wcslen(file); |
22 | size_t newlen = dirlen + 1 /* slash */ + filelen + 1 /* null */; |
23 | LPWSTR newpath = new WCHAR[newlen]; |
24 | wcscpy(newpath, dir); |
25 | wcscat(newpath, DIRECTORY_SEPARATOR_STR_W); |
26 | wcscat(newpath, file); |
27 | return newpath; |
28 | } |
29 | |
30 | char* verbMerge::ConvertWideCharToMultiByte(LPCWSTR wstr) |
31 | { |
32 | unsigned int codePage = CP_UTF8; |
33 | int sizeNeeded = WideCharToMultiByte(codePage, 0, wstr, -1, NULL, 0, NULL, NULL); |
34 | char* encodedStr = new char[sizeNeeded]; |
35 | WideCharToMultiByte(codePage, 0, wstr, -1, encodedStr, sizeNeeded, NULL, NULL); |
36 | return encodedStr; |
37 | } |
38 | |
39 | // AppendFile: append the file named by 'fileName' to the output file referred to by 'hFileOut'. The 'hFileOut' |
40 | // handle is assumed to be open, and the file position is assumed to be at the correct spot for writing, to append. |
41 | // |
42 | // 'buffer' is memory that can be used to do reading/buffering. |
43 | // |
44 | // static |
45 | int verbMerge::AppendFile(HANDLE hFileOut, LPCWSTR fileName, unsigned char* buffer, size_t bufferSize) |
46 | { |
47 | int result = 0; // default to zero == success |
48 | |
49 | char* fileNameAsChar = ConvertWideCharToMultiByte(fileName); |
50 | LogInfo("Appending file '%s'" , fileNameAsChar); |
51 | |
52 | HANDLE hFileIn = CreateFileW(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, |
53 | FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN, NULL); |
54 | if (hFileIn == INVALID_HANDLE_VALUE) |
55 | { |
56 | // If you use a relative path, you can get GetLastError()==3, if the absolute path is longer |
57 | // than MAX_PATH. |
58 | LogError("Failed to open input file '%s'. GetLastError()=%u" , fileNameAsChar, GetLastError()); |
59 | return -1; |
60 | } |
61 | |
62 | LARGE_INTEGER fileSize; |
63 | if (GetFileSizeEx(hFileIn, &fileSize) == 0) |
64 | { |
65 | LogError("GetFileSizeEx on '%s' failed. GetLastError()=%u" , fileNameAsChar, GetLastError()); |
66 | result = -1; |
67 | goto CLEAN_UP; |
68 | } |
69 | |
70 | for (LONGLONG offset = 0; offset < fileSize.QuadPart; offset += bufferSize) |
71 | { |
72 | DWORD bytesRead = -1; |
73 | BOOL res = ReadFile(hFileIn, buffer, (DWORD)bufferSize, &bytesRead, nullptr); |
74 | if (!res) |
75 | { |
76 | LogError("Failed to read '%s' from offset %lld. GetLastError()=%u" , fileNameAsChar, offset, GetLastError()); |
77 | result = -1; |
78 | goto CLEAN_UP; |
79 | } |
80 | DWORD bytesWritten = -1; |
81 | BOOL res2 = WriteFile(hFileOut, buffer, bytesRead, &bytesWritten, nullptr); |
82 | if (!res2) |
83 | { |
84 | LogError("Failed to write output file at offset %lld. GetLastError()=%u" , offset, GetLastError()); |
85 | result = -1; |
86 | goto CLEAN_UP; |
87 | } |
88 | if (bytesRead != bytesWritten) |
89 | { |
90 | LogError("Failed to read/write matching bytes %u!=%u" , bytesRead, bytesWritten); |
91 | result = -1; |
92 | goto CLEAN_UP; |
93 | } |
94 | } |
95 | |
96 | CLEAN_UP: |
97 | |
98 | delete[] fileNameAsChar; |
99 | |
100 | if (CloseHandle(hFileIn) == 0) |
101 | { |
102 | LogError("CloseHandle failed. GetLastError()=%u" , GetLastError()); |
103 | result = -1; |
104 | } |
105 | |
106 | return result; |
107 | } |
108 | |
109 | // Return true if this is a directory |
110 | // |
111 | // static |
112 | bool verbMerge::DirectoryFilterDirectories(WIN32_FIND_DATAW* findData) |
113 | { |
114 | if ((findData->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0) |
115 | { |
116 | // It's a directory. See if we want to exclude it because of other reasons, such as: |
117 | // 1. reparse points: avoid the possibility of loops |
118 | // 2. system directories |
119 | // 3. hidden directories |
120 | // 4. "." or ".." |
121 | |
122 | #ifndef FEATURE_PAL // FILE_ATTRIBUTE_REPARSE_POINT is not defined in the PAL |
123 | if ((findData->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) != 0) |
124 | return false; |
125 | #endif // !FEATURE_PAL |
126 | if ((findData->dwFileAttributes & FILE_ATTRIBUTE_SYSTEM) != 0) |
127 | return false; |
128 | if ((findData->dwFileAttributes & FILE_ATTRIBUTE_HIDDEN) != 0) |
129 | return false; |
130 | |
131 | if (wcscmp(findData->cFileName, W("." )) == 0) |
132 | return false; |
133 | if (wcscmp(findData->cFileName, W(".." )) == 0) |
134 | return false; |
135 | |
136 | return true; |
137 | } |
138 | |
139 | return false; |
140 | } |
141 | |
142 | // Return true if this is a file. |
143 | // |
144 | // static |
145 | bool verbMerge::DirectoryFilterFile(WIN32_FIND_DATAW* findData) |
146 | { |
147 | if ((findData->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0) |
148 | { |
149 | // This is not a directory, so it must be a file. |
150 | return true; |
151 | } |
152 | |
153 | return false; |
154 | } |
155 | |
156 | // static |
157 | int __cdecl verbMerge::WIN32_FIND_DATAW_qsort_helper(const void* p1, const void* p2) |
158 | { |
159 | const WIN32_FIND_DATAW* file1 = (WIN32_FIND_DATAW*)p1; |
160 | const WIN32_FIND_DATAW* file2 = (WIN32_FIND_DATAW*)p2; |
161 | return wcscmp(file1->cFileName, file2->cFileName); |
162 | } |
163 | |
164 | // Enumerate a directory for the files specified by "searchPattern". For each element in the directory, |
165 | // pass it to the filter function. If the filter returns true, we keep it, otherwise we ignore it. Return |
166 | // an array of information for the files that we kept, sorted by filename. |
167 | // |
168 | // Returns 0 on success, non-zero on failure. |
169 | // If success, fileArray and elemCount are set. |
170 | // |
171 | // static |
172 | int verbMerge::FilterDirectory(LPCWSTR searchPattern, |
173 | DirectoryFilterFunction_t filter, |
174 | /* out */ WIN32_FIND_DATAW** ppFileArray, |
175 | int* pElemCount) |
176 | { |
177 | // First, build up a list, then create an array and sort it after we know how many elements there are. |
178 | struct findDataList |
179 | { |
180 | findDataList(WIN32_FIND_DATAW* newFindData, findDataList* newNext) : findData(*newFindData), next(newNext) |
181 | { |
182 | } |
183 | |
184 | static void DeleteList(findDataList* root) |
185 | { |
186 | for (findDataList* loop = root; loop != nullptr;) |
187 | { |
188 | findDataList* tmp = loop; |
189 | loop = loop->next; |
190 | delete tmp; |
191 | } |
192 | } |
193 | |
194 | WIN32_FIND_DATAW findData; |
195 | findDataList* next; |
196 | }; |
197 | |
198 | WIN32_FIND_DATAW* retArray = nullptr; |
199 | findDataList* first = nullptr; |
200 | |
201 | int result = 0; // default to zero == success |
202 | int elemCount = 0; |
203 | |
204 | // NOTE: this function only works on Windows 7 and later. |
205 | WIN32_FIND_DATAW findData; |
206 | HANDLE hSearch; |
207 | #ifdef FEATURE_PAL |
208 | // PAL doesn't have FindFirstFileEx(). So just use FindFirstFile(). The only reason we use |
209 | // the Ex version is potentially better performance (don't populate short name; use large fetch), |
210 | // not functionality. |
211 | hSearch = FindFirstFileW(searchPattern, &findData); |
212 | #else // !FEATURE_PAL |
213 | hSearch = FindFirstFileExW(searchPattern, |
214 | FindExInfoBasic, // We don't care about the short names |
215 | &findData, |
216 | FindExSearchNameMatch, // standard name matching |
217 | NULL, FIND_FIRST_EX_LARGE_FETCH); |
218 | #endif // !FEATURE_PAL |
219 | |
220 | if (hSearch == INVALID_HANDLE_VALUE) |
221 | { |
222 | DWORD lastErr = GetLastError(); |
223 | if (lastErr == ERROR_FILE_NOT_FOUND) |
224 | { |
225 | // This is ok; there was just nothing matching the pattern. |
226 | } |
227 | else |
228 | { |
229 | LogError("Failed to find pattern '%s'. GetLastError()=%u" , searchPattern, GetLastError()); |
230 | } |
231 | goto CLEAN_UP; |
232 | } |
233 | |
234 | while (true) |
235 | { |
236 | // Do something with findData... |
237 | |
238 | if (filter(&findData)) |
239 | { |
240 | // Prepend it to the list. |
241 | first = new findDataList(&findData, first); |
242 | ++elemCount; |
243 | } |
244 | |
245 | BOOL ok = FindNextFileW(hSearch, &findData); |
246 | if (!ok) |
247 | { |
248 | DWORD err = GetLastError(); |
249 | if (err != ERROR_NO_MORE_FILES) |
250 | { |
251 | LogError("Failed to find next file. GetLastError()=%u" , GetLastError()); |
252 | result = -1; |
253 | goto CLEAN_UP; |
254 | } |
255 | break; |
256 | } |
257 | } |
258 | |
259 | // Now sort the list. Create an array to put everything in. |
260 | |
261 | int i; |
262 | |
263 | retArray = new WIN32_FIND_DATAW[elemCount]; |
264 | i = 0; |
265 | for (findDataList* tmp = first; tmp != nullptr; tmp = tmp->next) |
266 | { |
267 | retArray[i++] = tmp->findData; |
268 | } |
269 | |
270 | qsort(retArray, elemCount, sizeof(retArray[0]), WIN32_FIND_DATAW_qsort_helper); |
271 | |
272 | CLEAN_UP: |
273 | |
274 | findDataList::DeleteList(first); |
275 | |
276 | if ((hSearch != INVALID_HANDLE_VALUE) && !FindClose(hSearch)) |
277 | { |
278 | LogError("Failed to close search handle. GetLastError()=%u" , GetLastError()); |
279 | delete[] retArray; |
280 | return -1; |
281 | } |
282 | |
283 | *ppFileArray = retArray; |
284 | *pElemCount = elemCount; |
285 | return result; |
286 | } |
287 | |
288 | // Append all files in the given directory matching the file pattern. |
289 | // |
290 | // static |
291 | int verbMerge::AppendAllInDir(HANDLE hFileOut, |
292 | LPCWSTR dir, |
293 | LPCWSTR file, |
294 | unsigned char* buffer, |
295 | size_t bufferSize, |
296 | bool recursive, |
297 | /* out */ LONGLONG* size) |
298 | { |
299 | int result = 0; // default to zero == success |
300 | LONGLONG totalSize = 0; |
301 | |
302 | LPWSTR searchPattern = MergePathStrings(dir, file); |
303 | |
304 | _WIN32_FIND_DATAW* fileArray = nullptr; |
305 | int elemCount = 0; |
306 | result = FilterDirectory(searchPattern, DirectoryFilterFile, &fileArray, &elemCount); |
307 | if (result != 0) |
308 | { |
309 | goto CLEAN_UP; |
310 | } |
311 | |
312 | for (int i = 0; i < elemCount; i++) |
313 | { |
314 | const _WIN32_FIND_DATAW& findData = fileArray[i]; |
315 | LPWSTR fileFullPath = MergePathStrings(dir, findData.cFileName); |
316 | |
317 | if (wcslen(fileFullPath) > MAX_PATH) // This path is too long, use \\?\ to access it. |
318 | { |
319 | assert(wcscmp(dir, W("." )) != 0 && "can't access the relative path with UNC" ); |
320 | LPWSTR newBuffer = new WCHAR[wcslen(fileFullPath) + 30]; |
321 | wcscpy(newBuffer, W("\\\\?\\" )); |
322 | if (*fileFullPath == '\\') // It is UNC path, use \\?\UNC\serverName to access it. |
323 | { |
324 | LPWSTR serverName = fileFullPath; |
325 | wcscat(newBuffer, W("UNC\\" )); |
326 | while (*serverName == '\\') |
327 | { |
328 | serverName++; |
329 | } |
330 | wcscat(newBuffer, serverName); |
331 | } |
332 | else |
333 | { |
334 | wcscat(newBuffer, fileFullPath); |
335 | } |
336 | delete[] fileFullPath; |
337 | |
338 | fileFullPath = newBuffer; |
339 | } |
340 | |
341 | // Is it zero length? If so, skip it. |
342 | if ((findData.nFileSizeLow == 0) && (findData.nFileSizeHigh == 0)) |
343 | { |
344 | char* fileFullPathAsChar = ConvertWideCharToMultiByte(fileFullPath); |
345 | LogInfo("Skipping zero-length file '%s'" , fileFullPathAsChar); |
346 | delete[] fileFullPathAsChar; |
347 | } |
348 | else |
349 | { |
350 | result = AppendFile(hFileOut, fileFullPath, buffer, bufferSize); |
351 | if (result != 0) |
352 | { |
353 | // Error was already logged. |
354 | delete[] fileFullPath; |
355 | goto CLEAN_UP; |
356 | } |
357 | } |
358 | |
359 | delete[] fileFullPath; |
360 | totalSize += ((LONGLONG)findData.nFileSizeHigh << 32) + (LONGLONG)findData.nFileSizeLow; |
361 | } |
362 | |
363 | // If we need to recurse, then search the directory again for directories, and recursively search each one. |
364 | if (recursive) |
365 | { |
366 | delete[] searchPattern; |
367 | delete[] fileArray; |
368 | |
369 | searchPattern = MergePathStrings(dir, W("*" )); |
370 | fileArray = nullptr; |
371 | elemCount = 0; |
372 | result = FilterDirectory(searchPattern, DirectoryFilterDirectories, &fileArray, &elemCount); |
373 | if (result != 0) |
374 | { |
375 | goto CLEAN_UP; |
376 | } |
377 | |
378 | LONGLONG dirSize = 0; |
379 | for (int i = 0; i < elemCount; i++) |
380 | { |
381 | const _WIN32_FIND_DATAW& findData = fileArray[i]; |
382 | |
383 | LPWSTR fileFullPath = MergePathStrings(dir, findData.cFileName); |
384 | result = AppendAllInDir(hFileOut, fileFullPath, file, buffer, bufferSize, recursive, &dirSize); |
385 | delete[] fileFullPath; |
386 | if (result != 0) |
387 | { |
388 | // Error was already logged. |
389 | goto CLEAN_UP; |
390 | } |
391 | |
392 | totalSize += dirSize; |
393 | } |
394 | } |
395 | |
396 | CLEAN_UP: |
397 | |
398 | delete[] searchPattern; |
399 | delete[] fileArray; |
400 | |
401 | if (result == 0) |
402 | { |
403 | *size = totalSize; |
404 | } |
405 | |
406 | return result; |
407 | } |
408 | |
409 | // Merge a set of .MC files into an output .MCH file. The .MC files to merge are given as a pattern, one of: |
410 | // 1. *.mc -- simple pattern. Assumes current directory. |
411 | // 2. foo\bar\*.mc -- simple pattern with relative directory. |
412 | // 3. c:\foo\bar\baz\*.mc -- simple pattern with full path. |
413 | // If no pattern is given, then the last component of the path is expected to be a directory name, and the pattern is |
414 | // assumed to be "*" (that is, all files). |
415 | // |
416 | // If "recursive" is true, then the pattern is searched for in the specified directory (or implicit current directory) |
417 | // and all sub-directories, recursively. |
418 | // |
419 | // static |
420 | int verbMerge::DoWork(const char* nameOfOutputFile, const char* pattern, bool recursive) |
421 | { |
422 | int result = 0; // default to zero == success |
423 | SimpleTimer st1; |
424 | |
425 | LogInfo("Merging files matching '%s' into '%s'" , pattern, nameOfOutputFile); |
426 | |
427 | int nameLength = (int)strlen(nameOfOutputFile) + 1; |
428 | LPWSTR nameOfOutputFileAsWchar = new WCHAR[nameLength]; |
429 | MultiByteToWideChar(CP_ACP, 0, nameOfOutputFile, -1, nameOfOutputFileAsWchar, nameLength); |
430 | |
431 | int patternLength = (int)strlen(pattern) + 1; |
432 | LPWSTR patternAsWchar = new WCHAR[patternLength]; |
433 | MultiByteToWideChar(CP_ACP, 0, pattern, -1, patternAsWchar, patternLength); |
434 | |
435 | HANDLE hFileOut = CreateFileW(nameOfOutputFileAsWchar, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, |
436 | FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN, NULL); |
437 | if (hFileOut == INVALID_HANDLE_VALUE) |
438 | { |
439 | LogError("Failed to open output file '%s'. GetLastError()=%u" , nameOfOutputFile, GetLastError()); |
440 | return -1; |
441 | } |
442 | |
443 | // Create a buffer we can use for all the copies. |
444 | unsigned char* buffer = new unsigned char[BUFFER_SIZE]; |
445 | LPCWSTR dir = nullptr; |
446 | LPCWSTR file = nullptr; |
447 | |
448 | LPWSTR lastSlash = wcsrchr(patternAsWchar, DIRECTORY_SEPARATOR_CHAR_A); |
449 | if (lastSlash == NULL) |
450 | { |
451 | // The user may have passed a relative path without a slash, or the current directory. |
452 | // If there is a wildcard, we use it as the file pattern. If there isn't, we assume it's a relative directory |
453 | // name and use it as a directory, with "*" as the file pattern. |
454 | LPCWSTR wildcard = wcschr(patternAsWchar, '*'); |
455 | if (wildcard == NULL) |
456 | { |
457 | file = W("*" ); |
458 | dir = patternAsWchar; |
459 | } |
460 | else |
461 | { |
462 | file = patternAsWchar; |
463 | dir = W("." ); |
464 | } |
465 | } |
466 | else |
467 | { |
468 | dir = patternAsWchar; |
469 | LPCWSTR wildcard = wcschr(lastSlash, '*'); |
470 | if (wildcard == NULL) |
471 | { |
472 | file = W("*" ); |
473 | |
474 | // Minor canonicalization: if there is a trailing last slash, strip it (probably should do this in a |
475 | // loop...) |
476 | if (*(lastSlash + 1) == '\0') |
477 | { |
478 | *lastSlash = '\0'; |
479 | } |
480 | } |
481 | else |
482 | { |
483 | // ok, we found a wildcard after the last slash, so assume there is a pattern. Strip it at the last slash. |
484 | *lastSlash = '\0'; |
485 | file = lastSlash + 1; |
486 | } |
487 | } |
488 | |
489 | LONGLONG totalSize = 0; |
490 | LONGLONG dirSize = 0; |
491 | |
492 | st1.Start(); |
493 | |
494 | result = AppendAllInDir(hFileOut, dir, file, buffer, BUFFER_SIZE, recursive, &dirSize); |
495 | if (result != 0) |
496 | { |
497 | goto CLEAN_UP; |
498 | } |
499 | totalSize += dirSize; |
500 | |
501 | st1.Stop(); |
502 | |
503 | LogInfo("Read/Wrote %lld MB @ %4.2f MB/s." , totalSize / (1000 * 1000), |
504 | (((double)totalSize) / (1000 * 1000)) / |
505 | st1.GetSeconds()); // yes yes.. http://en.wikipedia.org/wiki/Megabyte_per_second#Megabyte_per_second |
506 | |
507 | CLEAN_UP: |
508 | |
509 | delete[] patternAsWchar; |
510 | delete[] buffer; |
511 | |
512 | if (CloseHandle(hFileOut) == 0) |
513 | { |
514 | LogError("CloseHandle failed. GetLastError()=%u" , GetLastError()); |
515 | result = -1; |
516 | } |
517 | |
518 | if (result != 0) |
519 | { |
520 | // There was a failure. Delete the output file, to avoid leaving some half-created file. |
521 | BOOL ok = DeleteFileW(nameOfOutputFileAsWchar); |
522 | if (!ok) |
523 | { |
524 | LogError("Failed to delete file after MCS /merge failed. GetLastError()=%u" , GetLastError()); |
525 | } |
526 | } |
527 | delete[] nameOfOutputFileAsWchar; |
528 | |
529 | return result; |
530 | } |
531 | |