1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ****************************************************************************** |
5 | * |
6 | * Copyright (C) 1999-2013, International Business Machines |
7 | * Corporation and others. All Rights Reserved. |
8 | * |
9 | ******************************************************************************/ |
10 | |
11 | |
12 | /*---------------------------------------------------------------------------- |
13 | * |
14 | * Memory mapped file wrappers for use by the ICU Data Implementation |
15 | * All of the platform-specific implementation for mapping data files |
16 | * is here. The rest of the ICU Data implementation uses only the |
17 | * wrapper functions. |
18 | * |
19 | *----------------------------------------------------------------------------*/ |
20 | /* Defines _XOPEN_SOURCE for access to POSIX functions. |
21 | * Must be before any other #includes. */ |
22 | #include "uposixdefs.h" |
23 | |
24 | #include "unicode/putil.h" |
25 | #include "unicode/ustring.h" |
26 | #include "udatamem.h" |
27 | #include "umapfile.h" |
28 | |
29 | /* memory-mapping base definitions ------------------------------------------ */ |
30 | |
31 | #if MAP_IMPLEMENTATION==MAP_WIN32 |
32 | #ifndef WIN32_LEAN_AND_MEAN |
33 | # define WIN32_LEAN_AND_MEAN |
34 | #endif |
35 | # define VC_EXTRALEAN |
36 | # define NOUSER |
37 | # define NOSERVICE |
38 | # define NOIME |
39 | # define NOMCX |
40 | |
41 | # if U_PLATFORM_HAS_WINUWP_API == 1 |
42 | // Some previous versions of the Windows 10 SDK don't expose various APIs for UWP applications |
43 | // to use, even though UWP apps are allowed to call and use them. Temporarily change the |
44 | // WINAPI family partition below to Desktop, so that function declarations are visible for UWP. |
45 | # include <winapifamily.h> |
46 | # if !(WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_SYSTEM)) |
47 | # pragma push_macro("WINAPI_PARTITION_DESKTOP") |
48 | # undef WINAPI_PARTITION_DESKTOP |
49 | # define WINAPI_PARTITION_DESKTOP 1 |
50 | # define CHANGED_WINAPI_PARTITION_DESKTOP_VALUE |
51 | # endif |
52 | # endif |
53 | |
54 | # include <windows.h> |
55 | |
56 | # if U_PLATFORM_HAS_WINUWP_API == 1 && defined(CHANGED_WINAPI_PARTITION_DESKTOP_VALUE) |
57 | # pragma pop_macro("WINAPI_PARTITION_DESKTOP") |
58 | # endif |
59 | |
60 | # include "cmemory.h" |
61 | |
62 | typedef HANDLE MemoryMap; |
63 | |
64 | # define IS_MAP(map) ((map)!=nullptr) |
65 | |
66 | #elif MAP_IMPLEMENTATION==MAP_POSIX || MAP_IMPLEMENTATION==MAP_390DLL |
67 | typedef size_t MemoryMap; |
68 | |
69 | # define IS_MAP(map) ((map)!=0) |
70 | |
71 | # include <unistd.h> |
72 | # include <sys/mman.h> |
73 | # include <sys/stat.h> |
74 | # include <fcntl.h> |
75 | |
76 | # ifndef MAP_FAILED |
77 | # define MAP_FAILED ((void*)-1) |
78 | # endif |
79 | |
80 | # if MAP_IMPLEMENTATION==MAP_390DLL |
81 | /* No memory mapping for 390 batch mode. Fake it using dll loading. */ |
82 | # include <dll.h> |
83 | # include "cstring.h" |
84 | # include "cmemory.h" |
85 | # include "unicode/udata.h" |
86 | # define LIB_PREFIX "lib" |
87 | # define LIB_SUFFIX ".dll" |
88 | /* This is inconvenient until we figure out what to do with U_ICUDATA_NAME in utypes.h */ |
89 | # define U_ICUDATA_ENTRY_NAME "icudt" U_ICU_VERSION_SHORT U_LIB_SUFFIX_C_NAME_STRING "_dat" |
90 | # endif |
91 | #elif MAP_IMPLEMENTATION==MAP_STDIO |
92 | # include <stdio.h> |
93 | # include "cmemory.h" |
94 | |
95 | typedef void *MemoryMap; |
96 | |
97 | # define IS_MAP(map) ((map)!=nullptr) |
98 | #endif |
99 | |
100 | /*----------------------------------------------------------------------------* |
101 | * * |
102 | * Memory Mapped File support. Platform dependent implementation of * |
103 | * functions used by the rest of the implementation.* |
104 | * * |
105 | *----------------------------------------------------------------------------*/ |
106 | #if MAP_IMPLEMENTATION==MAP_NONE |
107 | U_CFUNC UBool |
108 | uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { |
109 | if (U_FAILURE(*status)) { |
110 | return false; |
111 | } |
112 | UDataMemory_init(pData); /* Clear the output struct. */ |
113 | return false; /* no file access */ |
114 | } |
115 | |
116 | U_CFUNC void uprv_unmapFile(UDataMemory *pData) { |
117 | /* nothing to do */ |
118 | } |
119 | #elif MAP_IMPLEMENTATION==MAP_WIN32 |
120 | U_CFUNC UBool |
121 | uprv_mapFile( |
122 | UDataMemory *pData, /* Fill in with info on the result doing the mapping. */ |
123 | /* Output only; any original contents are cleared. */ |
124 | const char *path, /* File path to be opened/mapped. */ |
125 | UErrorCode *status /* Error status, used to report out-of-memory errors. */ |
126 | ) |
127 | { |
128 | if (U_FAILURE(*status)) { |
129 | return false; |
130 | } |
131 | |
132 | HANDLE map = nullptr; |
133 | HANDLE file = INVALID_HANDLE_VALUE; |
134 | |
135 | UDataMemory_init(pData); /* Clear the output struct. */ |
136 | |
137 | /* open the input file */ |
138 | #if U_PLATFORM_HAS_WINUWP_API == 0 |
139 | // Note: In the non-UWP code-path (ie: Win32), the value of the path variable might have come from |
140 | // the CRT 'getenv' function, and would be therefore be encoded in the default ANSI code page. |
141 | // This means that we can't call the *W version of API below, whereas in the UWP code-path |
142 | // there is no 'getenv' call, and thus the string will be only UTF-8/Invariant characters. |
143 | file=CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, nullptr, |
144 | OPEN_EXISTING, |
145 | FILE_ATTRIBUTE_NORMAL|FILE_FLAG_RANDOM_ACCESS, nullptr); |
146 | #else |
147 | // Convert from UTF-8 string to UTF-16 string. |
148 | wchar_t utf16Path[MAX_PATH]; |
149 | int32_t pathUtf16Len = 0; |
150 | u_strFromUTF8(reinterpret_cast<char16_t*>(utf16Path), static_cast<int32_t>(UPRV_LENGTHOF(utf16Path)), &pathUtf16Len, path, -1, status); |
151 | |
152 | if (U_FAILURE(*status)) { |
153 | return false; |
154 | } |
155 | if (*status == U_STRING_NOT_TERMINATED_WARNING) { |
156 | // Report back an error instead of a warning. |
157 | *status = U_BUFFER_OVERFLOW_ERROR; |
158 | return false; |
159 | } |
160 | |
161 | file = CreateFileW(utf16Path, GENERIC_READ, FILE_SHARE_READ, nullptr, |
162 | OPEN_EXISTING, |
163 | FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS, nullptr); |
164 | #endif |
165 | if (file == INVALID_HANDLE_VALUE) { |
166 | // If we failed to open the file due to an out-of-memory error, then we want |
167 | // to report that error back to the caller. |
168 | if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) { |
169 | *status = U_MEMORY_ALLOCATION_ERROR; |
170 | } |
171 | return false; |
172 | } |
173 | |
174 | // Note: We use nullptr/nullptr for lpAttributes parameter below. |
175 | // This means our handle cannot be inherited and we will get the default security descriptor. |
176 | /* create an unnamed Windows file-mapping object for the specified file */ |
177 | map = CreateFileMappingW(file, nullptr, PAGE_READONLY, 0, 0, nullptr); |
178 | |
179 | CloseHandle(file); |
180 | if (map == nullptr) { |
181 | // If we failed to create the mapping due to an out-of-memory error, then |
182 | // we want to report that error back to the caller. |
183 | if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) { |
184 | *status = U_MEMORY_ALLOCATION_ERROR; |
185 | } |
186 | return false; |
187 | } |
188 | |
189 | /* map a view of the file into our address space */ |
190 | pData->pHeader = reinterpret_cast<const DataHeader *>(MapViewOfFile(map, FILE_MAP_READ, 0, 0, 0)); |
191 | if (pData->pHeader == nullptr) { |
192 | CloseHandle(map); |
193 | return false; |
194 | } |
195 | pData->map = map; |
196 | return true; |
197 | } |
198 | |
199 | U_CFUNC void |
200 | uprv_unmapFile(UDataMemory *pData) { |
201 | if (pData != nullptr && pData->map != nullptr) { |
202 | UnmapViewOfFile(pData->pHeader); |
203 | CloseHandle(pData->map); |
204 | pData->pHeader = nullptr; |
205 | pData->map = nullptr; |
206 | } |
207 | } |
208 | |
209 | |
210 | |
211 | #elif MAP_IMPLEMENTATION==MAP_POSIX |
212 | U_CFUNC UBool |
213 | uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { |
214 | int fd; |
215 | int length; |
216 | struct stat mystat; |
217 | void *data; |
218 | |
219 | if (U_FAILURE(*status)) { |
220 | return false; |
221 | } |
222 | |
223 | UDataMemory_init(pData); /* Clear the output struct. */ |
224 | |
225 | /* determine the length of the file */ |
226 | if(stat(path, &mystat)!=0 || mystat.st_size<=0) { |
227 | return false; |
228 | } |
229 | length=mystat.st_size; |
230 | |
231 | /* open the file */ |
232 | fd=open(path, O_RDONLY); |
233 | if(fd==-1) { |
234 | return false; |
235 | } |
236 | |
237 | /* get a view of the mapping */ |
238 | #if U_PLATFORM != U_PF_HPUX |
239 | data=mmap(0, length, PROT_READ, MAP_SHARED, fd, 0); |
240 | #else |
241 | data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0); |
242 | #endif |
243 | close(fd); /* no longer needed */ |
244 | if(data==MAP_FAILED) { |
245 | // Possibly check the errno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR? |
246 | return false; |
247 | } |
248 | |
249 | pData->map = (char *)data + length; |
250 | pData->pHeader=(const DataHeader *)data; |
251 | pData->mapAddr = data; |
252 | #if U_PLATFORM == U_PF_IPHONE |
253 | posix_madvise(data, length, POSIX_MADV_RANDOM); |
254 | #endif |
255 | return true; |
256 | } |
257 | |
258 | U_CFUNC void |
259 | uprv_unmapFile(UDataMemory *pData) { |
260 | if(pData!=nullptr && pData->map!=nullptr) { |
261 | size_t dataLen = (char *)pData->map - (char *)pData->mapAddr; |
262 | if(munmap(pData->mapAddr, dataLen)==-1) { |
263 | } |
264 | pData->pHeader=nullptr; |
265 | pData->map=0; |
266 | pData->mapAddr=nullptr; |
267 | } |
268 | } |
269 | |
270 | |
271 | |
272 | #elif MAP_IMPLEMENTATION==MAP_STDIO |
273 | /* copy of the filestrm.c/T_FileStream_size() implementation */ |
274 | static int32_t |
275 | umap_fsize(FILE *f) { |
276 | int32_t savedPos = ftell(f); |
277 | int32_t size = 0; |
278 | |
279 | /*Changes by Bertrand A. D. doesn't affect the current position |
280 | goes to the end of the file before ftell*/ |
281 | fseek(f, 0, SEEK_END); |
282 | size = (int32_t)ftell(f); |
283 | fseek(f, savedPos, SEEK_SET); |
284 | return size; |
285 | } |
286 | |
287 | U_CFUNC UBool |
288 | uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { |
289 | FILE *file; |
290 | int32_t fileLength; |
291 | void *p; |
292 | |
293 | if (U_FAILURE(*status)) { |
294 | return false; |
295 | } |
296 | |
297 | UDataMemory_init(pData); /* Clear the output struct. */ |
298 | /* open the input file */ |
299 | file=fopen(path, "rb" ); |
300 | if(file==nullptr) { |
301 | return false; |
302 | } |
303 | |
304 | /* get the file length */ |
305 | fileLength=umap_fsize(file); |
306 | if(ferror(file) || fileLength<=20) { |
307 | fclose(file); |
308 | return false; |
309 | } |
310 | |
311 | /* allocate the memory to hold the file data */ |
312 | p=uprv_malloc(fileLength); |
313 | if(p==nullptr) { |
314 | fclose(file); |
315 | *status = U_MEMORY_ALLOCATION_ERROR; |
316 | return false; |
317 | } |
318 | |
319 | /* read the file */ |
320 | if(fileLength!=fread(p, 1, fileLength, file)) { |
321 | uprv_free(p); |
322 | fclose(file); |
323 | return false; |
324 | } |
325 | |
326 | fclose(file); |
327 | pData->map=p; |
328 | pData->pHeader=(const DataHeader *)p; |
329 | pData->mapAddr=p; |
330 | return true; |
331 | } |
332 | |
333 | U_CFUNC void |
334 | uprv_unmapFile(UDataMemory *pData) { |
335 | if(pData!=nullptr && pData->map!=nullptr) { |
336 | uprv_free(pData->map); |
337 | pData->map = nullptr; |
338 | pData->mapAddr = nullptr; |
339 | pData->pHeader = nullptr; |
340 | } |
341 | } |
342 | |
343 | |
344 | #elif MAP_IMPLEMENTATION==MAP_390DLL |
345 | /* 390 specific Library Loading. |
346 | * This is the only platform left that dynamically loads an ICU Data Library. |
347 | * All other platforms use .data files when dynamic loading is required, but |
348 | * this turn out to be awkward to support in 390 batch mode. |
349 | * |
350 | * The idea here is to hide the fact that 390 is using dll loading from the |
351 | * rest of ICU, and make it look like there is file loading happening. |
352 | * |
353 | */ |
354 | |
355 | static char *strcpy_returnEnd(char *dest, const char *src) |
356 | { |
357 | while((*dest=*src)!=0) { |
358 | ++dest; |
359 | ++src; |
360 | } |
361 | return dest; |
362 | } |
363 | |
364 | /*------------------------------------------------------------------------------ |
365 | * |
366 | * computeDirPath given a user-supplied path of an item to be opened, |
367 | * compute and return |
368 | * - the full directory path to be used |
369 | * when opening the file. |
370 | * - Pointer to null at end of above returned path |
371 | * |
372 | * Parameters: |
373 | * path: input path. Buffer is not altered. |
374 | * pathBuffer: Output buffer. Any contents are overwritten. |
375 | * |
376 | * Returns: |
377 | * Pointer to null termination in returned pathBuffer. |
378 | * |
379 | * TODO: This works the way ICU historically has, but the |
380 | * whole data fallback search path is so complicated that |
381 | * probably almost no one will ever really understand it, |
382 | * the potential for confusion is large. (It's not just |
383 | * this one function, but the whole scheme.) |
384 | * |
385 | *------------------------------------------------------------------------------*/ |
386 | static char *uprv_computeDirPath(const char *path, char *pathBuffer) |
387 | { |
388 | char *finalSlash; /* Ptr to last dir separator in input path, or null if none. */ |
389 | int32_t pathLen; /* Length of the returned directory path */ |
390 | |
391 | finalSlash = 0; |
392 | if (path != 0) { |
393 | finalSlash = uprv_strrchr(path, U_FILE_SEP_CHAR); |
394 | } |
395 | |
396 | *pathBuffer = 0; |
397 | if (finalSlash == 0) { |
398 | /* No user-supplied path. |
399 | * Copy the ICU_DATA path to the path buffer and return that*/ |
400 | const char *icuDataDir; |
401 | icuDataDir=u_getDataDirectory(); |
402 | if(icuDataDir!=nullptr && *icuDataDir!=0) { |
403 | return strcpy_returnEnd(pathBuffer, icuDataDir); |
404 | } else { |
405 | /* there is no icuDataDir either. Just return the empty pathBuffer. */ |
406 | return pathBuffer; |
407 | } |
408 | } |
409 | |
410 | /* User supplied path did contain a directory portion. |
411 | * Copy it to the output path buffer */ |
412 | pathLen = (int32_t)(finalSlash - path + 1); |
413 | uprv_memcpy(pathBuffer, path, pathLen); |
414 | *(pathBuffer+pathLen) = 0; |
415 | return pathBuffer+pathLen; |
416 | } |
417 | |
418 | |
419 | # define DATA_TYPE "dat" |
420 | |
421 | U_CFUNC UBool uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { |
422 | const char *inBasename; |
423 | char *basename; |
424 | char pathBuffer[1024]; |
425 | const DataHeader *pHeader; |
426 | dllhandle *handle; |
427 | void *val=0; |
428 | |
429 | if (U_FAILURE(*status)) { |
430 | return false; |
431 | } |
432 | |
433 | inBasename=uprv_strrchr(path, U_FILE_SEP_CHAR); |
434 | if(inBasename==nullptr) { |
435 | inBasename = path; |
436 | } else { |
437 | inBasename++; |
438 | } |
439 | basename=uprv_computeDirPath(path, pathBuffer); |
440 | if(uprv_strcmp(inBasename, U_ICUDATA_NAME".dat" ) != 0) { |
441 | /* must mmap file... for build */ |
442 | int fd; |
443 | int length; |
444 | struct stat mystat; |
445 | void *data; |
446 | UDataMemory_init(pData); /* Clear the output struct. */ |
447 | |
448 | /* determine the length of the file */ |
449 | if(stat(path, &mystat)!=0 || mystat.st_size<=0) { |
450 | return false; |
451 | } |
452 | length=mystat.st_size; |
453 | |
454 | /* open the file */ |
455 | fd=open(path, O_RDONLY); |
456 | if(fd==-1) { |
457 | return false; |
458 | } |
459 | |
460 | /* get a view of the mapping */ |
461 | data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0); |
462 | close(fd); /* no longer needed */ |
463 | if(data==MAP_FAILED) { |
464 | // Possibly check the errorno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR? |
465 | return false; |
466 | } |
467 | pData->map = (char *)data + length; |
468 | pData->pHeader=(const DataHeader *)data; |
469 | pData->mapAddr = data; |
470 | return true; |
471 | } |
472 | |
473 | # ifdef OS390BATCH |
474 | /* ### hack: we still need to get u_getDataDirectory() fixed |
475 | for OS/390 (batch mode - always return "//"? ) |
476 | and this here straightened out with LIB_PREFIX and LIB_SUFFIX (both empty?!) |
477 | This is probably due to the strange file system on OS/390. It's more like |
478 | a database with short entry names than a typical file system. */ |
479 | /* U_ICUDATA_NAME should always have the correct name */ |
480 | /* BUT FOR BATCH MODE IT IS AN EXCEPTION BECAUSE */ |
481 | /* THE FIRST THREE LETTERS ARE PREASSIGNED TO THE */ |
482 | /* PROJECT!!!!! */ |
483 | uprv_strcpy(pathBuffer, "IXMI" U_ICU_VERSION_SHORT "DA" ); |
484 | # else |
485 | /* set up the library name */ |
486 | uprv_strcpy(basename, LIB_PREFIX U_LIBICUDATA_NAME U_ICU_VERSION_SHORT LIB_SUFFIX); |
487 | # endif |
488 | |
489 | # ifdef UDATA_DEBUG |
490 | fprintf(stderr, "dllload: %s " , pathBuffer); |
491 | # endif |
492 | |
493 | handle=dllload(pathBuffer); |
494 | |
495 | # ifdef UDATA_DEBUG |
496 | fprintf(stderr, " -> %08X\n" , handle ); |
497 | # endif |
498 | |
499 | if(handle != nullptr) { |
500 | /* we have a data DLL - what kind of lookup do we need here? */ |
501 | /* try to find the Table of Contents */ |
502 | UDataMemory_init(pData); /* Clear the output struct. */ |
503 | val=dllqueryvar((dllhandle*)handle, U_ICUDATA_ENTRY_NAME); |
504 | if(val == 0) { |
505 | /* failed... so keep looking */ |
506 | return false; |
507 | } |
508 | # ifdef UDATA_DEBUG |
509 | fprintf(stderr, "dllqueryvar(%08X, %s) -> %08X\n" , handle, U_ICUDATA_ENTRY_NAME, val); |
510 | # endif |
511 | |
512 | pData->pHeader=(const DataHeader *)val; |
513 | return true; |
514 | } else { |
515 | return false; /* no handle */ |
516 | } |
517 | } |
518 | |
519 | U_CFUNC void uprv_unmapFile(UDataMemory *pData) { |
520 | if(pData!=nullptr && pData->map!=nullptr) { |
521 | uprv_free(pData->map); |
522 | pData->map = nullptr; |
523 | pData->mapAddr = nullptr; |
524 | pData->pHeader = nullptr; |
525 | } |
526 | } |
527 | |
528 | #else |
529 | # error MAP_IMPLEMENTATION is set incorrectly |
530 | #endif |
531 | |