1//
2// Copyright (c) Microsoft. All rights reserved.
3// Licensed under the MIT license. See LICENSE file in the project root for full license information.
4//
5
6#include "standardpch.h"
7#include "verbmerge.h"
8#include "simpletimer.h"
9#include "logging.h"
10
11// Do reads/writes in large 256MB chunks.
12#define BUFFER_SIZE 0x10000000
13
14// MergePathStrings: take two file system path components, compose them together, and return the merged pathname string.
15// The caller must delete the returned string with delete[].
16//
17// static
18LPWSTR verbMerge::MergePathStrings(LPCWSTR dir, LPCWSTR file)
19{
20 size_t dirlen = wcslen(dir);
21 size_t filelen = wcslen(file);
22 size_t newlen = dirlen + 1 /* slash */ + filelen + 1 /* null */;
23 LPWSTR newpath = new WCHAR[newlen];
24 wcscpy(newpath, dir);
25 wcscat(newpath, DIRECTORY_SEPARATOR_STR_W);
26 wcscat(newpath, file);
27 return newpath;
28}
29
30char* verbMerge::ConvertWideCharToMultiByte(LPCWSTR wstr)
31{
32 unsigned int codePage = CP_UTF8;
33 int sizeNeeded = WideCharToMultiByte(codePage, 0, wstr, -1, NULL, 0, NULL, NULL);
34 char* encodedStr = new char[sizeNeeded];
35 WideCharToMultiByte(codePage, 0, wstr, -1, encodedStr, sizeNeeded, NULL, NULL);
36 return encodedStr;
37}
38
39// AppendFile: append the file named by 'fileName' to the output file referred to by 'hFileOut'. The 'hFileOut'
40// handle is assumed to be open, and the file position is assumed to be at the correct spot for writing, to append.
41//
42// 'buffer' is memory that can be used to do reading/buffering.
43//
44// static
45int verbMerge::AppendFile(HANDLE hFileOut, LPCWSTR fileName, unsigned char* buffer, size_t bufferSize)
46{
47 int result = 0; // default to zero == success
48
49 char* fileNameAsChar = ConvertWideCharToMultiByte(fileName);
50 LogInfo("Appending file '%s'", fileNameAsChar);
51
52 HANDLE hFileIn = CreateFileW(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING,
53 FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN, NULL);
54 if (hFileIn == INVALID_HANDLE_VALUE)
55 {
56 // If you use a relative path, you can get GetLastError()==3, if the absolute path is longer
57 // than MAX_PATH.
58 LogError("Failed to open input file '%s'. GetLastError()=%u", fileNameAsChar, GetLastError());
59 return -1;
60 }
61
62 LARGE_INTEGER fileSize;
63 if (GetFileSizeEx(hFileIn, &fileSize) == 0)
64 {
65 LogError("GetFileSizeEx on '%s' failed. GetLastError()=%u", fileNameAsChar, GetLastError());
66 result = -1;
67 goto CLEAN_UP;
68 }
69
70 for (LONGLONG offset = 0; offset < fileSize.QuadPart; offset += bufferSize)
71 {
72 DWORD bytesRead = -1;
73 BOOL res = ReadFile(hFileIn, buffer, (DWORD)bufferSize, &bytesRead, nullptr);
74 if (!res)
75 {
76 LogError("Failed to read '%s' from offset %lld. GetLastError()=%u", fileNameAsChar, offset, GetLastError());
77 result = -1;
78 goto CLEAN_UP;
79 }
80 DWORD bytesWritten = -1;
81 BOOL res2 = WriteFile(hFileOut, buffer, bytesRead, &bytesWritten, nullptr);
82 if (!res2)
83 {
84 LogError("Failed to write output file at offset %lld. GetLastError()=%u", offset, GetLastError());
85 result = -1;
86 goto CLEAN_UP;
87 }
88 if (bytesRead != bytesWritten)
89 {
90 LogError("Failed to read/write matching bytes %u!=%u", bytesRead, bytesWritten);
91 result = -1;
92 goto CLEAN_UP;
93 }
94 }
95
96CLEAN_UP:
97
98 delete[] fileNameAsChar;
99
100 if (CloseHandle(hFileIn) == 0)
101 {
102 LogError("CloseHandle failed. GetLastError()=%u", GetLastError());
103 result = -1;
104 }
105
106 return result;
107}
108
109// Return true if this is a directory
110//
111// static
112bool verbMerge::DirectoryFilterDirectories(WIN32_FIND_DATAW* findData)
113{
114 if ((findData->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0)
115 {
116// It's a directory. See if we want to exclude it because of other reasons, such as:
117// 1. reparse points: avoid the possibility of loops
118// 2. system directories
119// 3. hidden directories
120// 4. "." or ".."
121
122#ifndef FEATURE_PAL // FILE_ATTRIBUTE_REPARSE_POINT is not defined in the PAL
123 if ((findData->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) != 0)
124 return false;
125#endif // !FEATURE_PAL
126 if ((findData->dwFileAttributes & FILE_ATTRIBUTE_SYSTEM) != 0)
127 return false;
128 if ((findData->dwFileAttributes & FILE_ATTRIBUTE_HIDDEN) != 0)
129 return false;
130
131 if (wcscmp(findData->cFileName, W(".")) == 0)
132 return false;
133 if (wcscmp(findData->cFileName, W("..")) == 0)
134 return false;
135
136 return true;
137 }
138
139 return false;
140}
141
142// Return true if this is a file.
143//
144// static
145bool verbMerge::DirectoryFilterFile(WIN32_FIND_DATAW* findData)
146{
147 if ((findData->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0)
148 {
149 // This is not a directory, so it must be a file.
150 return true;
151 }
152
153 return false;
154}
155
156// static
157int __cdecl verbMerge::WIN32_FIND_DATAW_qsort_helper(const void* p1, const void* p2)
158{
159 const WIN32_FIND_DATAW* file1 = (WIN32_FIND_DATAW*)p1;
160 const WIN32_FIND_DATAW* file2 = (WIN32_FIND_DATAW*)p2;
161 return wcscmp(file1->cFileName, file2->cFileName);
162}
163
164// Enumerate a directory for the files specified by "searchPattern". For each element in the directory,
165// pass it to the filter function. If the filter returns true, we keep it, otherwise we ignore it. Return
166// an array of information for the files that we kept, sorted by filename.
167//
168// Returns 0 on success, non-zero on failure.
169// If success, fileArray and elemCount are set.
170//
171// static
172int verbMerge::FilterDirectory(LPCWSTR searchPattern,
173 DirectoryFilterFunction_t filter,
174 /* out */ WIN32_FIND_DATAW** ppFileArray,
175 int* pElemCount)
176{
177 // First, build up a list, then create an array and sort it after we know how many elements there are.
178 struct findDataList
179 {
180 findDataList(WIN32_FIND_DATAW* newFindData, findDataList* newNext) : findData(*newFindData), next(newNext)
181 {
182 }
183
184 static void DeleteList(findDataList* root)
185 {
186 for (findDataList* loop = root; loop != nullptr;)
187 {
188 findDataList* tmp = loop;
189 loop = loop->next;
190 delete tmp;
191 }
192 }
193
194 WIN32_FIND_DATAW findData;
195 findDataList* next;
196 };
197
198 WIN32_FIND_DATAW* retArray = nullptr;
199 findDataList* first = nullptr;
200
201 int result = 0; // default to zero == success
202 int elemCount = 0;
203
204 // NOTE: this function only works on Windows 7 and later.
205 WIN32_FIND_DATAW findData;
206 HANDLE hSearch;
207#ifdef FEATURE_PAL
208 // PAL doesn't have FindFirstFileEx(). So just use FindFirstFile(). The only reason we use
209 // the Ex version is potentially better performance (don't populate short name; use large fetch),
210 // not functionality.
211 hSearch = FindFirstFileW(searchPattern, &findData);
212#else // !FEATURE_PAL
213 hSearch = FindFirstFileExW(searchPattern,
214 FindExInfoBasic, // We don't care about the short names
215 &findData,
216 FindExSearchNameMatch, // standard name matching
217 NULL, FIND_FIRST_EX_LARGE_FETCH);
218#endif // !FEATURE_PAL
219
220 if (hSearch == INVALID_HANDLE_VALUE)
221 {
222 DWORD lastErr = GetLastError();
223 if (lastErr == ERROR_FILE_NOT_FOUND)
224 {
225 // This is ok; there was just nothing matching the pattern.
226 }
227 else
228 {
229 LogError("Failed to find pattern '%s'. GetLastError()=%u", searchPattern, GetLastError());
230 }
231 goto CLEAN_UP;
232 }
233
234 while (true)
235 {
236 // Do something with findData...
237
238 if (filter(&findData))
239 {
240 // Prepend it to the list.
241 first = new findDataList(&findData, first);
242 ++elemCount;
243 }
244
245 BOOL ok = FindNextFileW(hSearch, &findData);
246 if (!ok)
247 {
248 DWORD err = GetLastError();
249 if (err != ERROR_NO_MORE_FILES)
250 {
251 LogError("Failed to find next file. GetLastError()=%u", GetLastError());
252 result = -1;
253 goto CLEAN_UP;
254 }
255 break;
256 }
257 }
258
259 // Now sort the list. Create an array to put everything in.
260
261 int i;
262
263 retArray = new WIN32_FIND_DATAW[elemCount];
264 i = 0;
265 for (findDataList* tmp = first; tmp != nullptr; tmp = tmp->next)
266 {
267 retArray[i++] = tmp->findData;
268 }
269
270 qsort(retArray, elemCount, sizeof(retArray[0]), WIN32_FIND_DATAW_qsort_helper);
271
272CLEAN_UP:
273
274 findDataList::DeleteList(first);
275
276 if ((hSearch != INVALID_HANDLE_VALUE) && !FindClose(hSearch))
277 {
278 LogError("Failed to close search handle. GetLastError()=%u", GetLastError());
279 delete[] retArray;
280 return -1;
281 }
282
283 *ppFileArray = retArray;
284 *pElemCount = elemCount;
285 return result;
286}
287
288// Append all files in the given directory matching the file pattern.
289//
290// static
291int verbMerge::AppendAllInDir(HANDLE hFileOut,
292 LPCWSTR dir,
293 LPCWSTR file,
294 unsigned char* buffer,
295 size_t bufferSize,
296 bool recursive,
297 /* out */ LONGLONG* size)
298{
299 int result = 0; // default to zero == success
300 LONGLONG totalSize = 0;
301
302 LPWSTR searchPattern = MergePathStrings(dir, file);
303
304 _WIN32_FIND_DATAW* fileArray = nullptr;
305 int elemCount = 0;
306 result = FilterDirectory(searchPattern, DirectoryFilterFile, &fileArray, &elemCount);
307 if (result != 0)
308 {
309 goto CLEAN_UP;
310 }
311
312 for (int i = 0; i < elemCount; i++)
313 {
314 const _WIN32_FIND_DATAW& findData = fileArray[i];
315 LPWSTR fileFullPath = MergePathStrings(dir, findData.cFileName);
316
317 if (wcslen(fileFullPath) > MAX_PATH) // This path is too long, use \\?\ to access it.
318 {
319 assert(wcscmp(dir, W(".")) != 0 && "can't access the relative path with UNC");
320 LPWSTR newBuffer = new WCHAR[wcslen(fileFullPath) + 30];
321 wcscpy(newBuffer, W("\\\\?\\"));
322 if (*fileFullPath == '\\') // It is UNC path, use \\?\UNC\serverName to access it.
323 {
324 LPWSTR serverName = fileFullPath;
325 wcscat(newBuffer, W("UNC\\"));
326 while (*serverName == '\\')
327 {
328 serverName++;
329 }
330 wcscat(newBuffer, serverName);
331 }
332 else
333 {
334 wcscat(newBuffer, fileFullPath);
335 }
336 delete[] fileFullPath;
337
338 fileFullPath = newBuffer;
339 }
340
341 // Is it zero length? If so, skip it.
342 if ((findData.nFileSizeLow == 0) && (findData.nFileSizeHigh == 0))
343 {
344 char* fileFullPathAsChar = ConvertWideCharToMultiByte(fileFullPath);
345 LogInfo("Skipping zero-length file '%s'", fileFullPathAsChar);
346 delete[] fileFullPathAsChar;
347 }
348 else
349 {
350 result = AppendFile(hFileOut, fileFullPath, buffer, bufferSize);
351 if (result != 0)
352 {
353 // Error was already logged.
354 delete[] fileFullPath;
355 goto CLEAN_UP;
356 }
357 }
358
359 delete[] fileFullPath;
360 totalSize += ((LONGLONG)findData.nFileSizeHigh << 32) + (LONGLONG)findData.nFileSizeLow;
361 }
362
363 // If we need to recurse, then search the directory again for directories, and recursively search each one.
364 if (recursive)
365 {
366 delete[] searchPattern;
367 delete[] fileArray;
368
369 searchPattern = MergePathStrings(dir, W("*"));
370 fileArray = nullptr;
371 elemCount = 0;
372 result = FilterDirectory(searchPattern, DirectoryFilterDirectories, &fileArray, &elemCount);
373 if (result != 0)
374 {
375 goto CLEAN_UP;
376 }
377
378 LONGLONG dirSize = 0;
379 for (int i = 0; i < elemCount; i++)
380 {
381 const _WIN32_FIND_DATAW& findData = fileArray[i];
382
383 LPWSTR fileFullPath = MergePathStrings(dir, findData.cFileName);
384 result = AppendAllInDir(hFileOut, fileFullPath, file, buffer, bufferSize, recursive, &dirSize);
385 delete[] fileFullPath;
386 if (result != 0)
387 {
388 // Error was already logged.
389 goto CLEAN_UP;
390 }
391
392 totalSize += dirSize;
393 }
394 }
395
396CLEAN_UP:
397
398 delete[] searchPattern;
399 delete[] fileArray;
400
401 if (result == 0)
402 {
403 *size = totalSize;
404 }
405
406 return result;
407}
408
409// Merge a set of .MC files into an output .MCH file. The .MC files to merge are given as a pattern, one of:
410// 1. *.mc -- simple pattern. Assumes current directory.
411// 2. foo\bar\*.mc -- simple pattern with relative directory.
412// 3. c:\foo\bar\baz\*.mc -- simple pattern with full path.
413// If no pattern is given, then the last component of the path is expected to be a directory name, and the pattern is
414// assumed to be "*" (that is, all files).
415//
416// If "recursive" is true, then the pattern is searched for in the specified directory (or implicit current directory)
417// and all sub-directories, recursively.
418//
419// static
420int verbMerge::DoWork(const char* nameOfOutputFile, const char* pattern, bool recursive)
421{
422 int result = 0; // default to zero == success
423 SimpleTimer st1;
424
425 LogInfo("Merging files matching '%s' into '%s'", pattern, nameOfOutputFile);
426
427 int nameLength = (int)strlen(nameOfOutputFile) + 1;
428 LPWSTR nameOfOutputFileAsWchar = new WCHAR[nameLength];
429 MultiByteToWideChar(CP_ACP, 0, nameOfOutputFile, -1, nameOfOutputFileAsWchar, nameLength);
430
431 int patternLength = (int)strlen(pattern) + 1;
432 LPWSTR patternAsWchar = new WCHAR[patternLength];
433 MultiByteToWideChar(CP_ACP, 0, pattern, -1, patternAsWchar, patternLength);
434
435 HANDLE hFileOut = CreateFileW(nameOfOutputFileAsWchar, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS,
436 FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN, NULL);
437 if (hFileOut == INVALID_HANDLE_VALUE)
438 {
439 LogError("Failed to open output file '%s'. GetLastError()=%u", nameOfOutputFile, GetLastError());
440 return -1;
441 }
442
443 // Create a buffer we can use for all the copies.
444 unsigned char* buffer = new unsigned char[BUFFER_SIZE];
445 LPCWSTR dir = nullptr;
446 LPCWSTR file = nullptr;
447
448 LPWSTR lastSlash = wcsrchr(patternAsWchar, DIRECTORY_SEPARATOR_CHAR_A);
449 if (lastSlash == NULL)
450 {
451 // The user may have passed a relative path without a slash, or the current directory.
452 // If there is a wildcard, we use it as the file pattern. If there isn't, we assume it's a relative directory
453 // name and use it as a directory, with "*" as the file pattern.
454 LPCWSTR wildcard = wcschr(patternAsWchar, '*');
455 if (wildcard == NULL)
456 {
457 file = W("*");
458 dir = patternAsWchar;
459 }
460 else
461 {
462 file = patternAsWchar;
463 dir = W(".");
464 }
465 }
466 else
467 {
468 dir = patternAsWchar;
469 LPCWSTR wildcard = wcschr(lastSlash, '*');
470 if (wildcard == NULL)
471 {
472 file = W("*");
473
474 // Minor canonicalization: if there is a trailing last slash, strip it (probably should do this in a
475 // loop...)
476 if (*(lastSlash + 1) == '\0')
477 {
478 *lastSlash = '\0';
479 }
480 }
481 else
482 {
483 // ok, we found a wildcard after the last slash, so assume there is a pattern. Strip it at the last slash.
484 *lastSlash = '\0';
485 file = lastSlash + 1;
486 }
487 }
488
489 LONGLONG totalSize = 0;
490 LONGLONG dirSize = 0;
491
492 st1.Start();
493
494 result = AppendAllInDir(hFileOut, dir, file, buffer, BUFFER_SIZE, recursive, &dirSize);
495 if (result != 0)
496 {
497 goto CLEAN_UP;
498 }
499 totalSize += dirSize;
500
501 st1.Stop();
502
503 LogInfo("Read/Wrote %lld MB @ %4.2f MB/s.", totalSize / (1000 * 1000),
504 (((double)totalSize) / (1000 * 1000)) /
505 st1.GetSeconds()); // yes yes.. http://en.wikipedia.org/wiki/Megabyte_per_second#Megabyte_per_second
506
507CLEAN_UP:
508
509 delete[] patternAsWchar;
510 delete[] buffer;
511
512 if (CloseHandle(hFileOut) == 0)
513 {
514 LogError("CloseHandle failed. GetLastError()=%u", GetLastError());
515 result = -1;
516 }
517
518 if (result != 0)
519 {
520 // There was a failure. Delete the output file, to avoid leaving some half-created file.
521 BOOL ok = DeleteFileW(nameOfOutputFileAsWchar);
522 if (!ok)
523 {
524 LogError("Failed to delete file after MCS /merge failed. GetLastError()=%u", GetLastError());
525 }
526 }
527 delete[] nameOfOutputFileAsWchar;
528
529 return result;
530}
531