1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ****************************************************************************** |
5 | * |
6 | * Copyright (C) 1997-2016, International Business Machines |
7 | * Corporation and others. All Rights Reserved. |
8 | * |
9 | ****************************************************************************** |
10 | * |
11 | * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp) |
12 | * |
13 | * Date Name Description |
14 | * 04/14/97 aliu Creation. |
15 | * 04/24/97 aliu Added getDefaultDataDirectory() and |
16 | * getDefaultLocaleID(). |
17 | * 04/28/97 aliu Rewritten to assume Unix and apply general methods |
18 | * for assumed case. Non-UNIX platforms must be |
19 | * special-cased. Rewrote numeric methods dealing |
20 | * with NaN and Infinity to be platform independent |
21 | * over all IEEE 754 platforms. |
22 | * 05/13/97 aliu Restored sign of timezone |
23 | * (semantics are hours West of GMT) |
24 | * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan, |
25 | * nextDouble.. |
26 | * 07/22/98 stephen Added remainder, max, min, trunc |
27 | * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity |
28 | * 08/24/98 stephen Added longBitsFromDouble |
29 | * 09/08/98 stephen Minor changes for Mac Port |
30 | * 03/02/99 stephen Removed openFile(). Added AS400 support. |
31 | * Fixed EBCDIC tables |
32 | * 04/15/99 stephen Converted to C. |
33 | * 06/28/99 stephen Removed mutex locking in u_isBigEndian(). |
34 | * 08/04/99 jeffrey R. Added OS/2 changes |
35 | * 11/15/99 helena Integrated S/390 IEEE support. |
36 | * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID |
37 | * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage |
38 | * 01/03/08 Steven L. Fake Time Support |
39 | ****************************************************************************** |
40 | */ |
41 | |
42 | // Defines _XOPEN_SOURCE for access to POSIX functions. |
43 | // Must be before any other #includes. |
44 | #include "uposixdefs.h" |
45 | |
46 | // First, the platform type. Need this for U_PLATFORM. |
47 | #include "unicode/platform.h" |
48 | |
49 | #if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__ |
50 | /* tzset isn't defined in strict ANSI on MinGW. */ |
51 | #undef __STRICT_ANSI__ |
52 | #endif |
53 | |
54 | /* |
55 | * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement. |
56 | */ |
57 | #include <time.h> |
58 | |
59 | #if !U_PLATFORM_USES_ONLY_WIN32_API |
60 | #include <sys/time.h> |
61 | #endif |
62 | |
63 | /* include the rest of the ICU headers */ |
64 | #include "unicode/putil.h" |
65 | #include "unicode/ustring.h" |
66 | #include "putilimp.h" |
67 | #include "uassert.h" |
68 | #include "umutex.h" |
69 | #include "cmemory.h" |
70 | #include "cstring.h" |
71 | #include "locmap.h" |
72 | #include "ucln_cmn.h" |
73 | #include "charstr.h" |
74 | |
75 | /* Include standard headers. */ |
76 | #include <stdio.h> |
77 | #include <stdlib.h> |
78 | #include <string.h> |
79 | #include <math.h> |
80 | #include <locale.h> |
81 | #include <float.h> |
82 | |
83 | #ifndef U_COMMON_IMPLEMENTATION |
84 | #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/howtouseicu |
85 | #endif |
86 | |
87 | |
88 | /* include system headers */ |
89 | #if U_PLATFORM_USES_ONLY_WIN32_API |
90 | /* |
91 | * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW. |
92 | * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API) |
93 | * to use native APIs as much as possible? |
94 | */ |
95 | #ifndef WIN32_LEAN_AND_MEAN |
96 | # define WIN32_LEAN_AND_MEAN |
97 | #endif |
98 | # define VC_EXTRALEAN |
99 | # define NOUSER |
100 | # define NOSERVICE |
101 | # define NOIME |
102 | # define NOMCX |
103 | # include <windows.h> |
104 | # include "unicode/uloc.h" |
105 | # include "wintz.h" |
106 | #elif U_PLATFORM == U_PF_OS400 |
107 | # include <float.h> |
108 | # include <qusec.h> /* error code structure */ |
109 | # include <qusrjobi.h> |
110 | # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */ |
111 | # include <mih/testptr.h> /* For uprv_maximumPtr */ |
112 | #elif U_PLATFORM == U_PF_OS390 |
113 | # include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */ |
114 | #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS |
115 | # include <limits.h> |
116 | # include <unistd.h> |
117 | # if U_PLATFORM == U_PF_SOLARIS |
118 | # ifndef _XPG4_2 |
119 | # define _XPG4_2 |
120 | # endif |
121 | # elif U_PLATFORM == U_PF_ANDROID |
122 | # include <sys/system_properties.h> |
123 | # include <dlfcn.h> |
124 | # endif |
125 | #elif U_PLATFORM == U_PF_QNX |
126 | # include <sys/neutrino.h> |
127 | #endif |
128 | |
129 | |
130 | /* |
131 | * Only include langinfo.h if we have a way to get the codeset. If we later |
132 | * depend on more feature, we can test on U_HAVE_NL_LANGINFO. |
133 | * |
134 | */ |
135 | |
136 | #if U_HAVE_NL_LANGINFO_CODESET |
137 | #include <langinfo.h> |
138 | #endif |
139 | |
140 | /** |
141 | * Simple things (presence of functions, etc) should just go in configure.in and be added to |
142 | * icucfg.h via autoheader. |
143 | */ |
144 | #if U_PLATFORM_IMPLEMENTS_POSIX |
145 | # if U_PLATFORM == U_PF_OS400 |
146 | # define HAVE_DLFCN_H 0 |
147 | # define HAVE_DLOPEN 0 |
148 | # else |
149 | # ifndef HAVE_DLFCN_H |
150 | # define HAVE_DLFCN_H 1 |
151 | # endif |
152 | # ifndef HAVE_DLOPEN |
153 | # define HAVE_DLOPEN 1 |
154 | # endif |
155 | # endif |
156 | # ifndef HAVE_GETTIMEOFDAY |
157 | # define HAVE_GETTIMEOFDAY 1 |
158 | # endif |
159 | #else |
160 | # define HAVE_DLFCN_H 0 |
161 | # define HAVE_DLOPEN 0 |
162 | # define HAVE_GETTIMEOFDAY 0 |
163 | #endif |
164 | |
165 | U_NAMESPACE_USE |
166 | |
167 | /* Define the extension for data files, again... */ |
168 | #define DATA_TYPE "dat" |
169 | |
170 | /* Leave this copyright notice here! */ |
171 | static const char copyright[] = U_COPYRIGHT_STRING; |
172 | |
173 | /* floating point implementations ------------------------------------------- */ |
174 | |
175 | /* We return QNAN rather than SNAN*/ |
176 | #define SIGN 0x80000000U |
177 | |
178 | /* Make it easy to define certain types of constants */ |
179 | typedef union { |
180 | int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */ |
181 | double d64; |
182 | } BitPatternConversion; |
183 | static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) }; |
184 | static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) }; |
185 | |
186 | /*--------------------------------------------------------------------------- |
187 | Platform utilities |
188 | Our general strategy is to assume we're on a POSIX platform. Platforms which |
189 | are non-POSIX must declare themselves so. The default POSIX implementation |
190 | will sometimes work for non-POSIX platforms as well (e.g., the NaN-related |
191 | functions). |
192 | ---------------------------------------------------------------------------*/ |
193 | |
194 | #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400 |
195 | # undef U_POSIX_LOCALE |
196 | #else |
197 | # define U_POSIX_LOCALE 1 |
198 | #endif |
199 | |
200 | /* |
201 | WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble |
202 | can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2). |
203 | */ |
204 | #if !IEEE_754 |
205 | static char* |
206 | u_topNBytesOfDouble(double* d, int n) |
207 | { |
208 | #if U_IS_BIG_ENDIAN |
209 | return (char*)d; |
210 | #else |
211 | return (char*)(d + 1) - n; |
212 | #endif |
213 | } |
214 | |
215 | static char* |
216 | u_bottomNBytesOfDouble(double* d, int n) |
217 | { |
218 | #if U_IS_BIG_ENDIAN |
219 | return (char*)(d + 1) - n; |
220 | #else |
221 | return (char*)d; |
222 | #endif |
223 | } |
224 | #endif /* !IEEE_754 */ |
225 | |
226 | #if IEEE_754 |
227 | static UBool |
228 | u_signBit(double d) { |
229 | uint8_t hiByte; |
230 | #if U_IS_BIG_ENDIAN |
231 | hiByte = *(uint8_t *)&d; |
232 | #else |
233 | hiByte = *(((uint8_t *)&d) + sizeof(double) - 1); |
234 | #endif |
235 | return (hiByte & 0x80) != 0; |
236 | } |
237 | #endif |
238 | |
239 | |
240 | |
241 | #if defined (U_DEBUG_FAKETIME) |
242 | /* Override the clock to test things without having to move the system clock. |
243 | * Assumes POSIX gettimeofday() will function |
244 | */ |
245 | UDate fakeClock_t0 = 0; /** Time to start the clock from **/ |
246 | UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/ |
247 | UBool fakeClock_set = false; /** True if fake clock has spun up **/ |
248 | |
249 | static UDate getUTCtime_real() { |
250 | struct timeval posixTime; |
251 | gettimeofday(&posixTime, nullptr); |
252 | return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); |
253 | } |
254 | |
255 | static UDate getUTCtime_fake() { |
256 | static UMutex fakeClockMutex; |
257 | umtx_lock(&fakeClockMutex); |
258 | if(!fakeClock_set) { |
259 | UDate real = getUTCtime_real(); |
260 | const char *fake_start = getenv("U_FAKETIME_START" ); |
261 | if((fake_start!=nullptr) && (fake_start[0]!=0)) { |
262 | sscanf(fake_start,"%lf" ,&fakeClock_t0); |
263 | fakeClock_dt = fakeClock_t0 - real; |
264 | fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n" |
265 | "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n" , |
266 | fakeClock_t0, fake_start, fakeClock_dt, real); |
267 | } else { |
268 | fakeClock_dt = 0; |
269 | fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n" |
270 | "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n" ); |
271 | } |
272 | fakeClock_set = true; |
273 | } |
274 | umtx_unlock(&fakeClockMutex); |
275 | |
276 | return getUTCtime_real() + fakeClock_dt; |
277 | } |
278 | #endif |
279 | |
280 | #if U_PLATFORM_USES_ONLY_WIN32_API |
281 | typedef union { |
282 | int64_t int64; |
283 | FILETIME fileTime; |
284 | } FileTimeConversion; /* This is like a ULARGE_INTEGER */ |
285 | |
286 | /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */ |
287 | #define EPOCH_BIAS INT64_C(116444736000000000) |
288 | #define HECTONANOSECOND_PER_MILLISECOND 10000 |
289 | |
290 | #endif |
291 | |
292 | /*--------------------------------------------------------------------------- |
293 | Universal Implementations |
294 | These are designed to work on all platforms. Try these, and if they |
295 | don't work on your platform, then special case your platform with new |
296 | implementations. |
297 | ---------------------------------------------------------------------------*/ |
298 | |
299 | U_CAPI UDate U_EXPORT2 |
300 | uprv_getUTCtime() |
301 | { |
302 | #if defined(U_DEBUG_FAKETIME) |
303 | return getUTCtime_fake(); /* Hook for overriding the clock */ |
304 | #else |
305 | return uprv_getRawUTCtime(); |
306 | #endif |
307 | } |
308 | |
309 | /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/ |
310 | U_CAPI UDate U_EXPORT2 |
311 | uprv_getRawUTCtime() |
312 | { |
313 | #if U_PLATFORM_USES_ONLY_WIN32_API |
314 | |
315 | FileTimeConversion winTime; |
316 | GetSystemTimeAsFileTime(&winTime.fileTime); |
317 | return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND); |
318 | #else |
319 | |
320 | #if HAVE_GETTIMEOFDAY |
321 | struct timeval posixTime; |
322 | gettimeofday(&posixTime, nullptr); |
323 | return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); |
324 | #else |
325 | time_t epochtime; |
326 | time(&epochtime); |
327 | return (UDate)epochtime * U_MILLIS_PER_SECOND; |
328 | #endif |
329 | |
330 | #endif |
331 | } |
332 | |
333 | /*----------------------------------------------------------------------------- |
334 | IEEE 754 |
335 | These methods detect and return NaN and infinity values for doubles |
336 | conforming to IEEE 754. Platforms which support this standard include X86, |
337 | Mac 680x0, Mac PowerPC, AIX RS/6000, and most others. |
338 | If this doesn't work on your platform, you have non-IEEE floating-point, and |
339 | will need to code your own versions. A naive implementation is to return 0.0 |
340 | for getNaN and getInfinity, and false for isNaN and isInfinite. |
341 | ---------------------------------------------------------------------------*/ |
342 | |
343 | U_CAPI UBool U_EXPORT2 |
344 | uprv_isNaN(double number) |
345 | { |
346 | #if IEEE_754 |
347 | BitPatternConversion convertedNumber; |
348 | convertedNumber.d64 = number; |
349 | /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */ |
350 | return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64); |
351 | |
352 | #elif U_PLATFORM == U_PF_OS390 |
353 | uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, |
354 | sizeof(uint32_t)); |
355 | uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, |
356 | sizeof(uint32_t)); |
357 | |
358 | return ((highBits & 0x7F080000L) == 0x7F080000L) && |
359 | (lowBits == 0x00000000L); |
360 | |
361 | #else |
362 | /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ |
363 | /* you'll need to replace this default implementation with what's correct*/ |
364 | /* for your platform.*/ |
365 | return number != number; |
366 | #endif |
367 | } |
368 | |
369 | U_CAPI UBool U_EXPORT2 |
370 | uprv_isInfinite(double number) |
371 | { |
372 | #if IEEE_754 |
373 | BitPatternConversion convertedNumber; |
374 | convertedNumber.d64 = number; |
375 | /* Infinity is exactly 0x7FF0000000000000U. */ |
376 | return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64); |
377 | #elif U_PLATFORM == U_PF_OS390 |
378 | uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, |
379 | sizeof(uint32_t)); |
380 | uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, |
381 | sizeof(uint32_t)); |
382 | |
383 | return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L); |
384 | |
385 | #else |
386 | /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ |
387 | /* value, you'll need to replace this default implementation with what's*/ |
388 | /* correct for your platform.*/ |
389 | return number == (2.0 * number); |
390 | #endif |
391 | } |
392 | |
393 | U_CAPI UBool U_EXPORT2 |
394 | uprv_isPositiveInfinity(double number) |
395 | { |
396 | #if IEEE_754 || U_PLATFORM == U_PF_OS390 |
397 | return (UBool)(number > 0 && uprv_isInfinite(number)); |
398 | #else |
399 | return uprv_isInfinite(number); |
400 | #endif |
401 | } |
402 | |
403 | U_CAPI UBool U_EXPORT2 |
404 | uprv_isNegativeInfinity(double number) |
405 | { |
406 | #if IEEE_754 || U_PLATFORM == U_PF_OS390 |
407 | return (UBool)(number < 0 && uprv_isInfinite(number)); |
408 | |
409 | #else |
410 | uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, |
411 | sizeof(uint32_t)); |
412 | return((highBits & SIGN) && uprv_isInfinite(number)); |
413 | |
414 | #endif |
415 | } |
416 | |
417 | U_CAPI double U_EXPORT2 |
418 | uprv_getNaN() |
419 | { |
420 | #if IEEE_754 || U_PLATFORM == U_PF_OS390 |
421 | return gNan.d64; |
422 | #else |
423 | /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ |
424 | /* you'll need to replace this default implementation with what's correct*/ |
425 | /* for your platform.*/ |
426 | return 0.0; |
427 | #endif |
428 | } |
429 | |
430 | U_CAPI double U_EXPORT2 |
431 | uprv_getInfinity() |
432 | { |
433 | #if IEEE_754 || U_PLATFORM == U_PF_OS390 |
434 | return gInf.d64; |
435 | #else |
436 | /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ |
437 | /* value, you'll need to replace this default implementation with what's*/ |
438 | /* correct for your platform.*/ |
439 | return 0.0; |
440 | #endif |
441 | } |
442 | |
443 | U_CAPI double U_EXPORT2 |
444 | uprv_floor(double x) |
445 | { |
446 | return floor(x); |
447 | } |
448 | |
449 | U_CAPI double U_EXPORT2 |
450 | uprv_ceil(double x) |
451 | { |
452 | return ceil(x); |
453 | } |
454 | |
455 | U_CAPI double U_EXPORT2 |
456 | uprv_round(double x) |
457 | { |
458 | return uprv_floor(x + 0.5); |
459 | } |
460 | |
461 | U_CAPI double U_EXPORT2 |
462 | uprv_fabs(double x) |
463 | { |
464 | return fabs(x); |
465 | } |
466 | |
467 | U_CAPI double U_EXPORT2 |
468 | uprv_modf(double x, double* y) |
469 | { |
470 | return modf(x, y); |
471 | } |
472 | |
473 | U_CAPI double U_EXPORT2 |
474 | uprv_fmod(double x, double y) |
475 | { |
476 | return fmod(x, y); |
477 | } |
478 | |
479 | U_CAPI double U_EXPORT2 |
480 | uprv_pow(double x, double y) |
481 | { |
482 | /* This is declared as "double pow(double x, double y)" */ |
483 | return pow(x, y); |
484 | } |
485 | |
486 | U_CAPI double U_EXPORT2 |
487 | uprv_pow10(int32_t x) |
488 | { |
489 | return pow(10.0, (double)x); |
490 | } |
491 | |
492 | U_CAPI double U_EXPORT2 |
493 | uprv_fmax(double x, double y) |
494 | { |
495 | #if IEEE_754 |
496 | /* first handle NaN*/ |
497 | if(uprv_isNaN(x) || uprv_isNaN(y)) |
498 | return uprv_getNaN(); |
499 | |
500 | /* check for -0 and 0*/ |
501 | if(x == 0.0 && y == 0.0 && u_signBit(x)) |
502 | return y; |
503 | |
504 | #endif |
505 | |
506 | /* this should work for all flt point w/o NaN and Inf special cases */ |
507 | return (x > y ? x : y); |
508 | } |
509 | |
510 | U_CAPI double U_EXPORT2 |
511 | uprv_fmin(double x, double y) |
512 | { |
513 | #if IEEE_754 |
514 | /* first handle NaN*/ |
515 | if(uprv_isNaN(x) || uprv_isNaN(y)) |
516 | return uprv_getNaN(); |
517 | |
518 | /* check for -0 and 0*/ |
519 | if(x == 0.0 && y == 0.0 && u_signBit(y)) |
520 | return y; |
521 | |
522 | #endif |
523 | |
524 | /* this should work for all flt point w/o NaN and Inf special cases */ |
525 | return (x > y ? y : x); |
526 | } |
527 | |
528 | U_CAPI UBool U_EXPORT2 |
529 | uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) { |
530 | // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow. |
531 | // This function could be optimized by calling one of those primitives. |
532 | auto a64 = static_cast<int64_t>(a); |
533 | auto b64 = static_cast<int64_t>(b); |
534 | int64_t res64 = a64 + b64; |
535 | *res = static_cast<int32_t>(res64); |
536 | return res64 != *res; |
537 | } |
538 | |
539 | U_CAPI UBool U_EXPORT2 |
540 | uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) { |
541 | // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow. |
542 | // This function could be optimized by calling one of those primitives. |
543 | auto a64 = static_cast<int64_t>(a); |
544 | auto b64 = static_cast<int64_t>(b); |
545 | int64_t res64 = a64 * b64; |
546 | *res = static_cast<int32_t>(res64); |
547 | return res64 != *res; |
548 | } |
549 | |
550 | /** |
551 | * Truncates the given double. |
552 | * trunc(3.3) = 3.0, trunc (-3.3) = -3.0 |
553 | * This is different than calling floor() or ceil(): |
554 | * floor(3.3) = 3, floor(-3.3) = -4 |
555 | * ceil(3.3) = 4, ceil(-3.3) = -3 |
556 | */ |
557 | U_CAPI double U_EXPORT2 |
558 | uprv_trunc(double d) |
559 | { |
560 | #if IEEE_754 |
561 | /* handle error cases*/ |
562 | if(uprv_isNaN(d)) |
563 | return uprv_getNaN(); |
564 | if(uprv_isInfinite(d)) |
565 | return uprv_getInfinity(); |
566 | |
567 | if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */ |
568 | return ceil(d); |
569 | else |
570 | return floor(d); |
571 | |
572 | #else |
573 | return d >= 0 ? floor(d) : ceil(d); |
574 | |
575 | #endif |
576 | } |
577 | |
578 | /** |
579 | * Return the largest positive number that can be represented by an integer |
580 | * type of arbitrary bit length. |
581 | */ |
582 | U_CAPI double U_EXPORT2 |
583 | uprv_maxMantissa() |
584 | { |
585 | return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0; |
586 | } |
587 | |
588 | U_CAPI double U_EXPORT2 |
589 | uprv_log(double d) |
590 | { |
591 | return log(d); |
592 | } |
593 | |
594 | U_CAPI void * U_EXPORT2 |
595 | uprv_maximumPtr(void * base) |
596 | { |
597 | #if U_PLATFORM == U_PF_OS400 |
598 | /* |
599 | * With the provided function we should never be out of range of a given segment |
600 | * (a traditional/typical segment that is). Our segments have 5 bytes for the |
601 | * id and 3 bytes for the offset. The key is that the casting takes care of |
602 | * only retrieving the offset portion minus x1000. Hence, the smallest offset |
603 | * seen in a program is x001000 and when casted to an int would be 0. |
604 | * That's why we can only add 0xffefff. Otherwise, we would exceed the segment. |
605 | * |
606 | * Currently, 16MB is the current addressing limitation on i5/OS if the activation is |
607 | * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information). |
608 | * This function determines the activation based on the pointer that is passed in and |
609 | * calculates the appropriate maximum available size for |
610 | * each pointer type (TERASPACE and non-TERASPACE) |
611 | * |
612 | * Unlike other operating systems, the pointer model isn't determined at |
613 | * compile time on i5/OS. |
614 | */ |
615 | if ((base != nullptr) && (_TESTPTR(base, _C_TERASPACE_CHECK))) { |
616 | /* if it is a TERASPACE pointer the max is 2GB - 4k */ |
617 | return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff))); |
618 | } |
619 | /* otherwise 16MB since nullptr ptr is not checkable or the ptr is not TERASPACE */ |
620 | return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff))); |
621 | |
622 | #else |
623 | return U_MAX_PTR(base); |
624 | #endif |
625 | } |
626 | |
627 | /*--------------------------------------------------------------------------- |
628 | Platform-specific Implementations |
629 | Try these, and if they don't work on your platform, then special case your |
630 | platform with new implementations. |
631 | ---------------------------------------------------------------------------*/ |
632 | |
633 | /* Generic time zone layer -------------------------------------------------- */ |
634 | |
635 | /* Time zone utilities */ |
636 | U_CAPI void U_EXPORT2 |
637 | uprv_tzset() |
638 | { |
639 | #if defined(U_TZSET) |
640 | U_TZSET(); |
641 | #else |
642 | /* no initialization*/ |
643 | #endif |
644 | } |
645 | |
646 | U_CAPI int32_t U_EXPORT2 |
647 | uprv_timezone() |
648 | { |
649 | #ifdef U_TIMEZONE |
650 | return U_TIMEZONE; |
651 | #else |
652 | time_t t, t1, t2; |
653 | struct tm tmrec; |
654 | int32_t tdiff = 0; |
655 | |
656 | time(&t); |
657 | uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) ); |
658 | #if U_PLATFORM != U_PF_IPHONE |
659 | UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/ |
660 | #endif |
661 | t1 = mktime(&tmrec); /* local time in seconds*/ |
662 | uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); |
663 | t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/ |
664 | tdiff = t2 - t1; |
665 | |
666 | #if U_PLATFORM != U_PF_IPHONE |
667 | /* imitate NT behaviour, which returns same timezone offset to GMT for |
668 | winter and summer. |
669 | This does not work on all platforms. For instance, on glibc on Linux |
670 | and on Mac OS 10.5, tdiff calculated above remains the same |
671 | regardless of whether DST is in effect or not. iOS is another |
672 | platform where this does not work. Linux + glibc and Mac OS 10.5 |
673 | have U_TIMEZONE defined so that this code is not reached. |
674 | */ |
675 | if (dst_checked) |
676 | tdiff += 3600; |
677 | #endif |
678 | return tdiff; |
679 | #endif |
680 | } |
681 | |
682 | /* Note that U_TZNAME does *not* have to be tzname, but if it is, |
683 | some platforms need to have it declared here. */ |
684 | |
685 | #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED) |
686 | /* RS6000 and others reject char **tzname. */ |
687 | extern U_IMPORT char *U_TZNAME[]; |
688 | #endif |
689 | |
690 | #if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS) |
691 | /* These platforms are likely to use Olson timezone IDs. */ |
692 | /* common targets of the symbolic link at TZDEFAULT are: |
693 | * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12 |
694 | * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12 |
695 | * "/usr/share/lib/zoneinfo/<olsonID>" Solaris |
696 | * "../usr/share/lib/zoneinfo/<olsonID>" Solaris |
697 | * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13 |
698 | * To avoid checking lots of paths, just check that the target path |
699 | * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid. |
700 | */ |
701 | |
702 | #define CHECK_LOCALTIME_LINK 1 |
703 | #if U_PLATFORM_IS_DARWIN_BASED |
704 | #include <tzfile.h> |
705 | #define TZZONEINFO (TZDIR "/") |
706 | #elif U_PLATFORM == U_PF_SOLARIS |
707 | #define TZDEFAULT "/etc/localtime" |
708 | #define TZZONEINFO "/usr/share/lib/zoneinfo/" |
709 | #define TZ_ENV_CHECK "localtime" |
710 | #else |
711 | #define TZDEFAULT "/etc/localtime" |
712 | #define TZZONEINFO "/usr/share/zoneinfo/" |
713 | #endif |
714 | #define TZZONEINFOTAIL "/zoneinfo/" |
715 | #if U_HAVE_DIRENT_H |
716 | #define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */ |
717 | /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo |
718 | symlinked to /etc/localtime, which makes searchForTZFile return |
719 | 'localtime' when it's the first match. */ |
720 | #define TZFILE_SKIP2 "localtime" |
721 | #define SEARCH_TZFILE |
722 | #include <dirent.h> /* Needed to search through system timezone files */ |
723 | #endif |
724 | static char gTimeZoneBuffer[PATH_MAX]; |
725 | static const char *gTimeZoneBufferPtr = nullptr; |
726 | #endif |
727 | |
728 | #if !U_PLATFORM_USES_ONLY_WIN32_API |
729 | #define isNonDigit(ch) (ch < '0' || '9' < ch) |
730 | #define isDigit(ch) ('0' <= ch && ch <= '9') |
731 | static UBool isValidOlsonID(const char *id) { |
732 | int32_t idx = 0; |
733 | int32_t idxMax = 0; |
734 | |
735 | /* Determine if this is something like Iceland (Olson ID) |
736 | or AST4ADT (non-Olson ID) */ |
737 | while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') { |
738 | idx++; |
739 | } |
740 | |
741 | /* Allow at maximum 2 numbers at the end of the id to support zone id's |
742 | like GMT+11. */ |
743 | idxMax = idx + 2; |
744 | while (id[idx] && isDigit(id[idx]) && idx < idxMax) { |
745 | idx++; |
746 | } |
747 | |
748 | /* If we went through the whole string, then it might be okay. |
749 | The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30", |
750 | "GRNLNDST3GRNLNDDT" or similar, so we cannot use it. |
751 | The rest of the time it could be an Olson ID. George */ |
752 | return (UBool)(id[idx] == 0 |
753 | || uprv_strcmp(id, "PST8PDT" ) == 0 |
754 | || uprv_strcmp(id, "MST7MDT" ) == 0 |
755 | || uprv_strcmp(id, "CST6CDT" ) == 0 |
756 | || uprv_strcmp(id, "EST5EDT" ) == 0); |
757 | } |
758 | |
759 | /* On some Unix-like OS, 'posix' subdirectory in |
760 | /usr/share/zoneinfo replicates the top-level contents. 'right' |
761 | subdirectory has the same set of files, but individual files |
762 | are different from those in the top-level directory or 'posix' |
763 | because 'right' has files for TAI (Int'l Atomic Time) while 'posix' |
764 | has files for UTC. |
765 | When the first match for /etc/localtime is in either of them |
766 | (usually in posix because 'right' has different file contents), |
767 | or TZ environment variable points to one of them, createTimeZone |
768 | fails because, say, 'posix/America/New_York' is not an Olson |
769 | timezone id ('America/New_York' is). So, we have to skip |
770 | 'posix/' and 'right/' at the beginning. */ |
771 | static void skipZoneIDPrefix(const char** id) { |
772 | if (uprv_strncmp(*id, "posix/" , 6) == 0 |
773 | || uprv_strncmp(*id, "right/" , 6) == 0) |
774 | { |
775 | *id += 6; |
776 | } |
777 | } |
778 | #endif |
779 | |
780 | #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API |
781 | |
782 | #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600) |
783 | typedef struct OffsetZoneMapping { |
784 | int32_t offsetSeconds; |
785 | int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/ |
786 | const char *stdID; |
787 | const char *dstID; |
788 | const char *olsonID; |
789 | } OffsetZoneMapping; |
790 | |
791 | enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 }; |
792 | |
793 | /* |
794 | This list tries to disambiguate a set of abbreviated timezone IDs and offsets |
795 | and maps it to an Olson ID. |
796 | Before adding anything to this list, take a look at |
797 | icu/source/tools/tzcode/tz.alias |
798 | Sometimes no daylight savings (0) is important to define due to aliases. |
799 | This list can be tested with icu/source/test/compat/tzone.pl |
800 | More values could be added to daylightType to increase precision. |
801 | */ |
802 | static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = { |
803 | {-45900, 2, "CHAST" , "CHADT" , "Pacific/Chatham" }, |
804 | {-43200, 1, "PETT" , "PETST" , "Asia/Kamchatka" }, |
805 | {-43200, 2, "NZST" , "NZDT" , "Pacific/Auckland" }, |
806 | {-43200, 1, "ANAT" , "ANAST" , "Asia/Anadyr" }, |
807 | {-39600, 1, "MAGT" , "MAGST" , "Asia/Magadan" }, |
808 | {-37800, 2, "LHST" , "LHST" , "Australia/Lord_Howe" }, |
809 | {-36000, 2, "EST" , "EST" , "Australia/Sydney" }, |
810 | {-36000, 1, "SAKT" , "SAKST" , "Asia/Sakhalin" }, |
811 | {-36000, 1, "VLAT" , "VLAST" , "Asia/Vladivostok" }, |
812 | {-34200, 2, "CST" , "CST" , "Australia/South" }, |
813 | {-32400, 1, "YAKT" , "YAKST" , "Asia/Yakutsk" }, |
814 | {-32400, 1, "CHOT" , "CHOST" , "Asia/Choibalsan" }, |
815 | {-31500, 2, "CWST" , "CWST" , "Australia/Eucla" }, |
816 | {-28800, 1, "IRKT" , "IRKST" , "Asia/Irkutsk" }, |
817 | {-28800, 1, "ULAT" , "ULAST" , "Asia/Ulaanbaatar" }, |
818 | {-28800, 2, "WST" , "WST" , "Australia/West" }, |
819 | {-25200, 1, "HOVT" , "HOVST" , "Asia/Hovd" }, |
820 | {-25200, 1, "KRAT" , "KRAST" , "Asia/Krasnoyarsk" }, |
821 | {-21600, 1, "NOVT" , "NOVST" , "Asia/Novosibirsk" }, |
822 | {-21600, 1, "OMST" , "OMSST" , "Asia/Omsk" }, |
823 | {-18000, 1, "YEKT" , "YEKST" , "Asia/Yekaterinburg" }, |
824 | {-14400, 1, "SAMT" , "SAMST" , "Europe/Samara" }, |
825 | {-14400, 1, "AMT" , "AMST" , "Asia/Yerevan" }, |
826 | {-14400, 1, "AZT" , "AZST" , "Asia/Baku" }, |
827 | {-10800, 1, "AST" , "ADT" , "Asia/Baghdad" }, |
828 | {-10800, 1, "MSK" , "MSD" , "Europe/Moscow" }, |
829 | {-10800, 1, "VOLT" , "VOLST" , "Europe/Volgograd" }, |
830 | {-7200, 0, "EET" , "CEST" , "Africa/Tripoli" }, |
831 | {-7200, 1, "EET" , "EEST" , "Europe/Athens" }, /* Conflicts with Africa/Cairo */ |
832 | {-7200, 1, "IST" , "IDT" , "Asia/Jerusalem" }, |
833 | {-3600, 0, "CET" , "WEST" , "Africa/Algiers" }, |
834 | {-3600, 2, "WAT" , "WAST" , "Africa/Windhoek" }, |
835 | {0, 1, "GMT" , "IST" , "Europe/Dublin" }, |
836 | {0, 1, "GMT" , "BST" , "Europe/London" }, |
837 | {0, 0, "WET" , "WEST" , "Africa/Casablanca" }, |
838 | {0, 0, "WET" , "WET" , "Africa/El_Aaiun" }, |
839 | {3600, 1, "AZOT" , "AZOST" , "Atlantic/Azores" }, |
840 | {3600, 1, "EGT" , "EGST" , "America/Scoresbysund" }, |
841 | {10800, 1, "PMST" , "PMDT" , "America/Miquelon" }, |
842 | {10800, 2, "UYT" , "UYST" , "America/Montevideo" }, |
843 | {10800, 1, "WGT" , "WGST" , "America/Godthab" }, |
844 | {10800, 2, "BRT" , "BRST" , "Brazil/East" }, |
845 | {12600, 1, "NST" , "NDT" , "America/St_Johns" }, |
846 | {14400, 1, "AST" , "ADT" , "Canada/Atlantic" }, |
847 | {14400, 2, "AMT" , "AMST" , "America/Cuiaba" }, |
848 | {14400, 2, "CLT" , "CLST" , "Chile/Continental" }, |
849 | {14400, 2, "FKT" , "FKST" , "Atlantic/Stanley" }, |
850 | {14400, 2, "PYT" , "PYST" , "America/Asuncion" }, |
851 | {18000, 1, "CST" , "CDT" , "America/Havana" }, |
852 | {18000, 1, "EST" , "EDT" , "US/Eastern" }, /* Conflicts with America/Grand_Turk */ |
853 | {21600, 2, "EAST" , "EASST" , "Chile/EasterIsland" }, |
854 | {21600, 0, "CST" , "MDT" , "Canada/Saskatchewan" }, |
855 | {21600, 0, "CST" , "CDT" , "America/Guatemala" }, |
856 | {21600, 1, "CST" , "CDT" , "US/Central" }, /* Conflicts with Mexico/General */ |
857 | {25200, 1, "MST" , "MDT" , "US/Mountain" }, /* Conflicts with Mexico/BajaSur */ |
858 | {28800, 0, "PST" , "PST" , "Pacific/Pitcairn" }, |
859 | {28800, 1, "PST" , "PDT" , "US/Pacific" }, /* Conflicts with Mexico/BajaNorte */ |
860 | {32400, 1, "AKST" , "AKDT" , "US/Alaska" }, |
861 | {36000, 1, "HAST" , "HADT" , "US/Aleutian" } |
862 | }; |
863 | |
864 | /*#define DEBUG_TZNAME*/ |
865 | |
866 | static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset) |
867 | { |
868 | int32_t idx; |
869 | #ifdef DEBUG_TZNAME |
870 | fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n" , getenv("TZ" ), stdID, dstID, daylightType, offset); |
871 | #endif |
872 | for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++) |
873 | { |
874 | if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds |
875 | && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType |
876 | && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0 |
877 | && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0) |
878 | { |
879 | return OFFSET_ZONE_MAPPINGS[idx].olsonID; |
880 | } |
881 | } |
882 | return nullptr; |
883 | } |
884 | #endif |
885 | |
886 | #ifdef SEARCH_TZFILE |
887 | #define MAX_READ_SIZE 512 |
888 | |
889 | typedef struct DefaultTZInfo { |
890 | char* defaultTZBuffer; |
891 | int64_t defaultTZFileSize; |
892 | FILE* defaultTZFilePtr; |
893 | UBool defaultTZstatus; |
894 | int32_t defaultTZPosition; |
895 | } DefaultTZInfo; |
896 | |
897 | /* |
898 | * This method compares the two files given to see if they are a match. |
899 | * It is currently use to compare two TZ files. |
900 | */ |
901 | static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) { |
902 | FILE* file; |
903 | int64_t sizeFile; |
904 | int64_t sizeFileLeft; |
905 | int32_t sizeFileRead; |
906 | int32_t sizeFileToRead; |
907 | char bufferFile[MAX_READ_SIZE]; |
908 | UBool result = true; |
909 | |
910 | if (tzInfo->defaultTZFilePtr == nullptr) { |
911 | tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r" ); |
912 | } |
913 | file = fopen(TZFileName, "r" ); |
914 | |
915 | tzInfo->defaultTZPosition = 0; /* reset position to begin search */ |
916 | |
917 | if (file != nullptr && tzInfo->defaultTZFilePtr != nullptr) { |
918 | /* First check that the file size are equal. */ |
919 | if (tzInfo->defaultTZFileSize == 0) { |
920 | fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END); |
921 | tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr); |
922 | } |
923 | fseek(file, 0, SEEK_END); |
924 | sizeFile = ftell(file); |
925 | sizeFileLeft = sizeFile; |
926 | |
927 | if (sizeFile != tzInfo->defaultTZFileSize) { |
928 | result = false; |
929 | } else { |
930 | /* Store the data from the files in separate buffers and |
931 | * compare each byte to determine equality. |
932 | */ |
933 | if (tzInfo->defaultTZBuffer == nullptr) { |
934 | rewind(tzInfo->defaultTZFilePtr); |
935 | tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize); |
936 | sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr); |
937 | } |
938 | rewind(file); |
939 | while(sizeFileLeft > 0) { |
940 | uprv_memset(bufferFile, 0, MAX_READ_SIZE); |
941 | sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE; |
942 | |
943 | sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file); |
944 | if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) { |
945 | result = false; |
946 | break; |
947 | } |
948 | sizeFileLeft -= sizeFileRead; |
949 | tzInfo->defaultTZPosition += sizeFileRead; |
950 | } |
951 | } |
952 | } else { |
953 | result = false; |
954 | } |
955 | |
956 | if (file != nullptr) { |
957 | fclose(file); |
958 | } |
959 | |
960 | return result; |
961 | } |
962 | |
963 | |
964 | /* dirent also lists two entries: "." and ".." that we can safely ignore. */ |
965 | #define SKIP1 "." |
966 | #define SKIP2 ".." |
967 | static UBool U_CALLCONV putil_cleanup(); |
968 | static CharString *gSearchTZFileResult = nullptr; |
969 | |
970 | /* |
971 | * This method recursively traverses the directory given for a matching TZ file and returns the first match. |
972 | * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results. |
973 | */ |
974 | static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) { |
975 | DIR* dirp = nullptr; |
976 | struct dirent* dirEntry = nullptr; |
977 | char* result = nullptr; |
978 | UErrorCode status = U_ZERO_ERROR; |
979 | |
980 | /* Save the current path */ |
981 | CharString curpath(path, -1, status); |
982 | if (U_FAILURE(status)) { |
983 | goto cleanupAndReturn; |
984 | } |
985 | |
986 | dirp = opendir(path); |
987 | if (dirp == nullptr) { |
988 | goto cleanupAndReturn; |
989 | } |
990 | |
991 | if (gSearchTZFileResult == nullptr) { |
992 | gSearchTZFileResult = new CharString; |
993 | if (gSearchTZFileResult == nullptr) { |
994 | goto cleanupAndReturn; |
995 | } |
996 | ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); |
997 | } |
998 | |
999 | /* Check each entry in the directory. */ |
1000 | while((dirEntry = readdir(dirp)) != nullptr) { |
1001 | const char* dirName = dirEntry->d_name; |
1002 | if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0 |
1003 | && uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) { |
1004 | /* Create a newpath with the new entry to test each entry in the directory. */ |
1005 | CharString newpath(curpath, status); |
1006 | newpath.append(dirName, -1, status); |
1007 | if (U_FAILURE(status)) { |
1008 | break; |
1009 | } |
1010 | |
1011 | DIR* subDirp = nullptr; |
1012 | if ((subDirp = opendir(newpath.data())) != nullptr) { |
1013 | /* If this new path is a directory, make a recursive call with the newpath. */ |
1014 | closedir(subDirp); |
1015 | newpath.append('/', status); |
1016 | if (U_FAILURE(status)) { |
1017 | break; |
1018 | } |
1019 | result = searchForTZFile(newpath.data(), tzInfo); |
1020 | /* |
1021 | Have to get out here. Otherwise, we'd keep looking |
1022 | and return the first match in the top-level directory |
1023 | if there's a match in the top-level. If not, this function |
1024 | would return nullptr and set gTimeZoneBufferPtr to nullptr in initDefault(). |
1025 | It worked without this in most cases because we have a fallback of calling |
1026 | localtime_r to figure out the default timezone. |
1027 | */ |
1028 | if (result != nullptr) |
1029 | break; |
1030 | } else { |
1031 | if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) { |
1032 | int32_t amountToSkip = sizeof(TZZONEINFO) - 1; |
1033 | if (amountToSkip > newpath.length()) { |
1034 | amountToSkip = newpath.length(); |
1035 | } |
1036 | const char* zoneid = newpath.data() + amountToSkip; |
1037 | skipZoneIDPrefix(&zoneid); |
1038 | gSearchTZFileResult->clear(); |
1039 | gSearchTZFileResult->append(zoneid, -1, status); |
1040 | if (U_FAILURE(status)) { |
1041 | break; |
1042 | } |
1043 | result = gSearchTZFileResult->data(); |
1044 | /* Get out after the first one found. */ |
1045 | break; |
1046 | } |
1047 | } |
1048 | } |
1049 | } |
1050 | |
1051 | cleanupAndReturn: |
1052 | if (dirp) { |
1053 | closedir(dirp); |
1054 | } |
1055 | return result; |
1056 | } |
1057 | #endif |
1058 | |
1059 | #if U_PLATFORM == U_PF_ANDROID |
1060 | typedef int(system_property_read_callback)(const prop_info* info, |
1061 | void (*callback)(void* cookie, |
1062 | const char* name, |
1063 | const char* value, |
1064 | uint32_t serial), |
1065 | void* cookie); |
1066 | typedef int(system_property_get)(const char*, char*); |
1067 | |
1068 | static char gAndroidTimeZone[PROP_VALUE_MAX] = { '\0' }; |
1069 | |
1070 | static void u_property_read(void* cookie, const char* name, const char* value, |
1071 | uint32_t serial) { |
1072 | uprv_strcpy((char* )cookie, value); |
1073 | } |
1074 | #endif |
1075 | |
1076 | U_CAPI void U_EXPORT2 |
1077 | uprv_tzname_clear_cache() |
1078 | { |
1079 | #if U_PLATFORM == U_PF_ANDROID |
1080 | /* Android's timezone is stored in system property. */ |
1081 | gAndroidTimeZone[0] = '\0'; |
1082 | void* libc = dlopen("libc.so" , RTLD_NOLOAD); |
1083 | if (libc) { |
1084 | /* Android API 26+ has new API to get system property and old API |
1085 | * (__system_property_get) is deprecated */ |
1086 | system_property_read_callback* property_read_callback = |
1087 | (system_property_read_callback*)dlsym( |
1088 | libc, "__system_property_read_callback" ); |
1089 | if (property_read_callback) { |
1090 | const prop_info* info = |
1091 | __system_property_find("persist.sys.timezone" ); |
1092 | if (info) { |
1093 | property_read_callback(info, &u_property_read, gAndroidTimeZone); |
1094 | } |
1095 | } else { |
1096 | system_property_get* property_get = |
1097 | (system_property_get*)dlsym(libc, "__system_property_get" ); |
1098 | if (property_get) { |
1099 | property_get("persist.sys.timezone" , gAndroidTimeZone); |
1100 | } |
1101 | } |
1102 | dlclose(libc); |
1103 | } |
1104 | #endif |
1105 | |
1106 | #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK) |
1107 | gTimeZoneBufferPtr = nullptr; |
1108 | #endif |
1109 | } |
1110 | |
1111 | U_CAPI const char* U_EXPORT2 |
1112 | uprv_tzname(int n) |
1113 | { |
1114 | (void)n; // Avoid unreferenced parameter warning. |
1115 | const char *tzid = nullptr; |
1116 | #if U_PLATFORM_USES_ONLY_WIN32_API |
1117 | tzid = uprv_detectWindowsTimeZone(); |
1118 | |
1119 | if (tzid != nullptr) { |
1120 | return tzid; |
1121 | } |
1122 | |
1123 | #ifndef U_TZNAME |
1124 | // The return value is free'd in timezone.cpp on Windows because |
1125 | // the other code path returns a pointer to a heap location. |
1126 | // If we don't have a name already, then tzname wouldn't be any |
1127 | // better, so just fall back. |
1128 | return uprv_strdup("" ); |
1129 | #endif // !U_TZNAME |
1130 | |
1131 | #else |
1132 | |
1133 | /*#if U_PLATFORM_IS_DARWIN_BASED |
1134 | int ret; |
1135 | |
1136 | tzid = getenv("TZFILE"); |
1137 | if (tzid != nullptr) { |
1138 | return tzid; |
1139 | } |
1140 | #endif*/ |
1141 | |
1142 | /* This code can be temporarily disabled to test tzname resolution later on. */ |
1143 | #ifndef DEBUG_TZNAME |
1144 | #if U_PLATFORM == U_PF_ANDROID |
1145 | tzid = gAndroidTimeZone; |
1146 | #else |
1147 | tzid = getenv("TZ" ); |
1148 | #endif |
1149 | if (tzid != nullptr && isValidOlsonID(tzid) |
1150 | #if U_PLATFORM == U_PF_SOLARIS |
1151 | /* Don't misinterpret TZ "localtime" on Solaris as a time zone name. */ |
1152 | && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0 |
1153 | #endif |
1154 | ) { |
1155 | /* The colon forces tzset() to treat the remainder as zoneinfo path */ |
1156 | if (tzid[0] == ':') { |
1157 | tzid++; |
1158 | } |
1159 | /* This might be a good Olson ID. */ |
1160 | skipZoneIDPrefix(&tzid); |
1161 | return tzid; |
1162 | } |
1163 | /* else U_TZNAME will give a better result. */ |
1164 | #endif |
1165 | |
1166 | #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK) |
1167 | /* Caller must handle threading issues */ |
1168 | if (gTimeZoneBufferPtr == nullptr) { |
1169 | /* |
1170 | This is a trick to look at the name of the link to get the Olson ID |
1171 | because the tzfile contents is underspecified. |
1172 | This isn't guaranteed to work because it may not be a symlink. |
1173 | */ |
1174 | char *ret = realpath(TZDEFAULT, gTimeZoneBuffer); |
1175 | if (ret != nullptr && uprv_strcmp(TZDEFAULT, gTimeZoneBuffer) != 0) { |
1176 | int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL); |
1177 | const char *tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL); |
1178 | if (tzZoneInfoTailPtr != nullptr) { |
1179 | tzZoneInfoTailPtr += tzZoneInfoTailLen; |
1180 | skipZoneIDPrefix(&tzZoneInfoTailPtr); |
1181 | if (isValidOlsonID(tzZoneInfoTailPtr)) { |
1182 | return (gTimeZoneBufferPtr = tzZoneInfoTailPtr); |
1183 | } |
1184 | } |
1185 | } else { |
1186 | #if defined(SEARCH_TZFILE) |
1187 | DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo)); |
1188 | if (tzInfo != nullptr) { |
1189 | tzInfo->defaultTZBuffer = nullptr; |
1190 | tzInfo->defaultTZFileSize = 0; |
1191 | tzInfo->defaultTZFilePtr = nullptr; |
1192 | tzInfo->defaultTZstatus = false; |
1193 | tzInfo->defaultTZPosition = 0; |
1194 | |
1195 | gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo); |
1196 | |
1197 | /* Free previously allocated memory */ |
1198 | if (tzInfo->defaultTZBuffer != nullptr) { |
1199 | uprv_free(tzInfo->defaultTZBuffer); |
1200 | } |
1201 | if (tzInfo->defaultTZFilePtr != nullptr) { |
1202 | fclose(tzInfo->defaultTZFilePtr); |
1203 | } |
1204 | uprv_free(tzInfo); |
1205 | } |
1206 | |
1207 | if (gTimeZoneBufferPtr != nullptr && isValidOlsonID(gTimeZoneBufferPtr)) { |
1208 | return gTimeZoneBufferPtr; |
1209 | } |
1210 | #endif |
1211 | } |
1212 | } |
1213 | else { |
1214 | return gTimeZoneBufferPtr; |
1215 | } |
1216 | #endif |
1217 | #endif |
1218 | |
1219 | #ifdef U_TZNAME |
1220 | #if U_PLATFORM_USES_ONLY_WIN32_API |
1221 | /* The return value is free'd in timezone.cpp on Windows because |
1222 | * the other code path returns a pointer to a heap location. */ |
1223 | return uprv_strdup(U_TZNAME[n]); |
1224 | #else |
1225 | /* |
1226 | U_TZNAME is usually a non-unique abbreviation, which isn't normally usable. |
1227 | So we remap the abbreviation to an olson ID. |
1228 | |
1229 | Since Windows exposes a little more timezone information, |
1230 | we normally don't use this code on Windows because |
1231 | uprv_detectWindowsTimeZone should have already given the correct answer. |
1232 | */ |
1233 | { |
1234 | struct tm juneSol, decemberSol; |
1235 | int daylightType; |
1236 | static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/ |
1237 | static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/ |
1238 | |
1239 | /* This probing will tell us when daylight savings occurs. */ |
1240 | localtime_r(&juneSolstice, &juneSol); |
1241 | localtime_r(&decemberSolstice, &decemberSol); |
1242 | if(decemberSol.tm_isdst > 0) { |
1243 | daylightType = U_DAYLIGHT_DECEMBER; |
1244 | } else if(juneSol.tm_isdst > 0) { |
1245 | daylightType = U_DAYLIGHT_JUNE; |
1246 | } else { |
1247 | daylightType = U_DAYLIGHT_NONE; |
1248 | } |
1249 | tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone()); |
1250 | if (tzid != nullptr) { |
1251 | return tzid; |
1252 | } |
1253 | } |
1254 | return U_TZNAME[n]; |
1255 | #endif |
1256 | #else |
1257 | return "" ; |
1258 | #endif |
1259 | } |
1260 | |
1261 | /* Get and set the ICU data directory --------------------------------------- */ |
1262 | |
1263 | static icu::UInitOnce gDataDirInitOnce {}; |
1264 | static char *gDataDirectory = nullptr; |
1265 | |
1266 | UInitOnce gTimeZoneFilesInitOnce {}; |
1267 | static CharString *gTimeZoneFilesDirectory = nullptr; |
1268 | |
1269 | #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API |
1270 | static const char *gCorrectedPOSIXLocale = nullptr; /* Sometimes heap allocated */ |
1271 | static bool gCorrectedPOSIXLocaleHeapAllocated = false; |
1272 | #endif |
1273 | |
1274 | static UBool U_CALLCONV putil_cleanup() |
1275 | { |
1276 | if (gDataDirectory && *gDataDirectory) { |
1277 | uprv_free(gDataDirectory); |
1278 | } |
1279 | gDataDirectory = nullptr; |
1280 | gDataDirInitOnce.reset(); |
1281 | |
1282 | delete gTimeZoneFilesDirectory; |
1283 | gTimeZoneFilesDirectory = nullptr; |
1284 | gTimeZoneFilesInitOnce.reset(); |
1285 | |
1286 | #ifdef SEARCH_TZFILE |
1287 | delete gSearchTZFileResult; |
1288 | gSearchTZFileResult = nullptr; |
1289 | #endif |
1290 | |
1291 | #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API |
1292 | if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) { |
1293 | uprv_free(const_cast<char *>(gCorrectedPOSIXLocale)); |
1294 | gCorrectedPOSIXLocale = nullptr; |
1295 | gCorrectedPOSIXLocaleHeapAllocated = false; |
1296 | } |
1297 | #endif |
1298 | return true; |
1299 | } |
1300 | |
1301 | /* |
1302 | * Set the data directory. |
1303 | * Make a copy of the passed string, and set the global data dir to point to it. |
1304 | */ |
1305 | U_CAPI void U_EXPORT2 |
1306 | u_setDataDirectory(const char *directory) { |
1307 | char *newDataDir; |
1308 | int32_t length; |
1309 | |
1310 | if(directory==nullptr || *directory==0) { |
1311 | /* A small optimization to prevent the malloc and copy when the |
1312 | shared library is used, and this is a way to make sure that nullptr |
1313 | is never returned. |
1314 | */ |
1315 | newDataDir = (char *)"" ; |
1316 | } |
1317 | else { |
1318 | length=(int32_t)uprv_strlen(directory); |
1319 | newDataDir = (char *)uprv_malloc(length + 2); |
1320 | /* Exit out if newDataDir could not be created. */ |
1321 | if (newDataDir == nullptr) { |
1322 | return; |
1323 | } |
1324 | uprv_strcpy(newDataDir, directory); |
1325 | |
1326 | #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) |
1327 | { |
1328 | char *p; |
1329 | while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != nullptr) { |
1330 | *p = U_FILE_SEP_CHAR; |
1331 | } |
1332 | } |
1333 | #endif |
1334 | } |
1335 | |
1336 | if (gDataDirectory && *gDataDirectory) { |
1337 | uprv_free(gDataDirectory); |
1338 | } |
1339 | gDataDirectory = newDataDir; |
1340 | ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); |
1341 | } |
1342 | |
1343 | U_CAPI UBool U_EXPORT2 |
1344 | uprv_pathIsAbsolute(const char *path) |
1345 | { |
1346 | if(!path || !*path) { |
1347 | return false; |
1348 | } |
1349 | |
1350 | if(*path == U_FILE_SEP_CHAR) { |
1351 | return true; |
1352 | } |
1353 | |
1354 | #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) |
1355 | if(*path == U_FILE_ALT_SEP_CHAR) { |
1356 | return true; |
1357 | } |
1358 | #endif |
1359 | |
1360 | #if U_PLATFORM_USES_ONLY_WIN32_API |
1361 | if( (((path[0] >= 'A') && (path[0] <= 'Z')) || |
1362 | ((path[0] >= 'a') && (path[0] <= 'z'))) && |
1363 | path[1] == ':' ) { |
1364 | return true; |
1365 | } |
1366 | #endif |
1367 | |
1368 | return false; |
1369 | } |
1370 | |
1371 | /* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR |
1372 | (needed for some Darwin ICU build environments) */ |
1373 | #if U_PLATFORM_IS_DARWIN_BASED && defined(TARGET_OS_SIMULATOR) && TARGET_OS_SIMULATOR |
1374 | # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR) |
1375 | # define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT" |
1376 | # endif |
1377 | #endif |
1378 | |
1379 | #if defined(ICU_DATA_DIR_WINDOWS) |
1380 | // Helper function to get the ICU Data Directory under the Windows directory location. |
1381 | static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength) |
1382 | { |
1383 | wchar_t windowsPath[MAX_PATH]; |
1384 | char windowsPathUtf8[MAX_PATH]; |
1385 | |
1386 | UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath)); |
1387 | if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) { |
1388 | // Convert UTF-16 to a UTF-8 string. |
1389 | UErrorCode status = U_ZERO_ERROR; |
1390 | int32_t windowsPathUtf8Len = 0; |
1391 | u_strToUTF8(windowsPathUtf8, static_cast<int32_t>(UPRV_LENGTHOF(windowsPathUtf8)), |
1392 | &windowsPathUtf8Len, reinterpret_cast<const char16_t*>(windowsPath), -1, &status); |
1393 | |
1394 | if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) && |
1395 | (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) { |
1396 | // Ensure it always has a separator, so we can append the ICU data path. |
1397 | if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) { |
1398 | windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR; |
1399 | windowsPathUtf8[windowsPathUtf8Len] = '\0'; |
1400 | } |
1401 | // Check if the concatenated string will fit. |
1402 | if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) { |
1403 | uprv_strcpy(directoryBuffer, windowsPathUtf8); |
1404 | uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS); |
1405 | return true; |
1406 | } |
1407 | } |
1408 | } |
1409 | |
1410 | return false; |
1411 | } |
1412 | #endif |
1413 | |
1414 | static void U_CALLCONV dataDirectoryInitFn() { |
1415 | /* If we already have the directory, then return immediately. Will happen if user called |
1416 | * u_setDataDirectory(). |
1417 | */ |
1418 | if (gDataDirectory) { |
1419 | return; |
1420 | } |
1421 | |
1422 | const char *path = nullptr; |
1423 | #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) |
1424 | char datadir_path_buffer[PATH_MAX]; |
1425 | #endif |
1426 | |
1427 | /* |
1428 | When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to |
1429 | override ICU's data with the ICU_DATA environment variable. This prevents |
1430 | problems where multiple custom copies of ICU's specific version of data |
1431 | are installed on a system. Either the application must define the data |
1432 | directory with u_setDataDirectory, define ICU_DATA_DIR when compiling |
1433 | ICU, set the data with udata_setCommonData or trust that all of the |
1434 | required data is contained in ICU's data library that contains |
1435 | the entry point defined by U_ICUDATA_ENTRY_POINT. |
1436 | |
1437 | There may also be some platforms where environment variables |
1438 | are not allowed. |
1439 | */ |
1440 | # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO |
1441 | /* First try to get the environment variable */ |
1442 | # if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv |
1443 | path=getenv("ICU_DATA" ); |
1444 | # endif |
1445 | # endif |
1446 | |
1447 | /* ICU_DATA_DIR may be set as a compile option. |
1448 | * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time |
1449 | * and is used only when data is built in archive mode eliminating the need |
1450 | * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation |
1451 | * directory of the data dat file. Users should use ICU_DATA_DIR if they want to |
1452 | * set their own path. |
1453 | */ |
1454 | #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR) |
1455 | if(path==nullptr || *path==0) { |
1456 | # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) |
1457 | const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR); |
1458 | # endif |
1459 | # ifdef ICU_DATA_DIR |
1460 | path=ICU_DATA_DIR; |
1461 | # else |
1462 | path=U_ICU_DATA_DEFAULT_DIR; |
1463 | # endif |
1464 | # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) |
1465 | if (prefix != nullptr) { |
1466 | snprintf(datadir_path_buffer, sizeof(datadir_path_buffer), "%s%s" , prefix, path); |
1467 | path=datadir_path_buffer; |
1468 | } |
1469 | # endif |
1470 | } |
1471 | #endif |
1472 | |
1473 | #if defined(ICU_DATA_DIR_WINDOWS) |
1474 | char datadir_path_buffer[MAX_PATH]; |
1475 | if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) { |
1476 | path = datadir_path_buffer; |
1477 | } |
1478 | #endif |
1479 | |
1480 | if(path==nullptr) { |
1481 | /* It looks really bad, set it to something. */ |
1482 | path = "" ; |
1483 | } |
1484 | |
1485 | u_setDataDirectory(path); |
1486 | return; |
1487 | } |
1488 | |
1489 | U_CAPI const char * U_EXPORT2 |
1490 | u_getDataDirectory() { |
1491 | umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn); |
1492 | return gDataDirectory; |
1493 | } |
1494 | |
1495 | static void setTimeZoneFilesDir(const char *path, UErrorCode &status) { |
1496 | if (U_FAILURE(status)) { |
1497 | return; |
1498 | } |
1499 | gTimeZoneFilesDirectory->clear(); |
1500 | gTimeZoneFilesDirectory->append(path, status); |
1501 | #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) |
1502 | char *p = gTimeZoneFilesDirectory->data(); |
1503 | while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != nullptr) { |
1504 | *p = U_FILE_SEP_CHAR; |
1505 | } |
1506 | #endif |
1507 | } |
1508 | |
1509 | #define TO_STRING(x) TO_STRING_2(x) |
1510 | #define TO_STRING_2(x) #x |
1511 | |
1512 | static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) { |
1513 | U_ASSERT(gTimeZoneFilesDirectory == nullptr); |
1514 | ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); |
1515 | gTimeZoneFilesDirectory = new CharString(); |
1516 | if (gTimeZoneFilesDirectory == nullptr) { |
1517 | status = U_MEMORY_ALLOCATION_ERROR; |
1518 | return; |
1519 | } |
1520 | |
1521 | const char *dir = "" ; |
1522 | |
1523 | #if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR) |
1524 | char timezonefilesdir_path_buffer[PATH_MAX]; |
1525 | const char *prefix = getenv(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR); |
1526 | #endif |
1527 | |
1528 | #if U_PLATFORM_HAS_WINUWP_API == 1 |
1529 | // The UWP version does not support the environment variable setting. |
1530 | |
1531 | # if defined(ICU_DATA_DIR_WINDOWS) |
1532 | // When using the Windows system data, we can possibly pick up time zone data from the Windows directory. |
1533 | char datadir_path_buffer[MAX_PATH]; |
1534 | if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) { |
1535 | dir = datadir_path_buffer; |
1536 | } |
1537 | # endif |
1538 | |
1539 | #else |
1540 | dir = getenv("ICU_TIMEZONE_FILES_DIR" ); |
1541 | #endif // U_PLATFORM_HAS_WINUWP_API |
1542 | |
1543 | #if defined(U_TIMEZONE_FILES_DIR) |
1544 | if (dir == nullptr) { |
1545 | // Build time configuration setting. |
1546 | dir = TO_STRING(U_TIMEZONE_FILES_DIR); |
1547 | } |
1548 | #endif |
1549 | |
1550 | if (dir == nullptr) { |
1551 | dir = "" ; |
1552 | } |
1553 | |
1554 | #if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR) |
1555 | if (prefix != nullptr) { |
1556 | snprintf(timezonefilesdir_path_buffer, sizeof(timezonefilesdir_path_buffer), "%s%s" , prefix, dir); |
1557 | dir = timezonefilesdir_path_buffer; |
1558 | } |
1559 | #endif |
1560 | |
1561 | setTimeZoneFilesDir(dir, status); |
1562 | } |
1563 | |
1564 | |
1565 | U_CAPI const char * U_EXPORT2 |
1566 | u_getTimeZoneFilesDirectory(UErrorCode *status) { |
1567 | umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status); |
1568 | return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "" ; |
1569 | } |
1570 | |
1571 | U_CAPI void U_EXPORT2 |
1572 | u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) { |
1573 | umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status); |
1574 | setTimeZoneFilesDir(path, *status); |
1575 | |
1576 | // Note: this function does some extra churn, first setting based on the |
1577 | // environment, then immediately replacing with the value passed in. |
1578 | // The logic is simpler that way, and performance shouldn't be an issue. |
1579 | } |
1580 | |
1581 | |
1582 | #if U_POSIX_LOCALE |
1583 | /* A helper function used by uprv_getPOSIXIDForDefaultLocale and |
1584 | * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for |
1585 | * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories. |
1586 | */ |
1587 | static const char *uprv_getPOSIXIDForCategory(int category) |
1588 | { |
1589 | const char* posixID = nullptr; |
1590 | if (category == LC_MESSAGES || category == LC_CTYPE) { |
1591 | /* |
1592 | * On Solaris two different calls to setlocale can result in |
1593 | * different values. Only get this value once. |
1594 | * |
1595 | * We must check this first because an application can set this. |
1596 | * |
1597 | * LC_ALL can't be used because it's platform dependent. The LANG |
1598 | * environment variable seems to affect LC_CTYPE variable by default. |
1599 | * Here is what setlocale(LC_ALL, nullptr) can return. |
1600 | * HPUX can return 'C C C C C C C' |
1601 | * Solaris can return /en_US/C/C/C/C/C on the second try. |
1602 | * Linux can return LC_CTYPE=C;LC_NUMERIC=C;... |
1603 | * |
1604 | * The default codepage detection also needs to use LC_CTYPE. |
1605 | * |
1606 | * Do not call setlocale(LC_*, "")! Using an empty string instead |
1607 | * of nullptr, will modify the libc behavior. |
1608 | */ |
1609 | posixID = setlocale(category, nullptr); |
1610 | if ((posixID == 0) |
1611 | || (uprv_strcmp("C" , posixID) == 0) |
1612 | || (uprv_strcmp("POSIX" , posixID) == 0)) |
1613 | { |
1614 | /* Maybe we got some garbage. Try something more reasonable */ |
1615 | posixID = getenv("LC_ALL" ); |
1616 | /* Solaris speaks POSIX - See IEEE Std 1003.1-2008 |
1617 | * This is needed to properly handle empty env. variables |
1618 | */ |
1619 | #if U_PLATFORM == U_PF_SOLARIS |
1620 | if ((posixID == 0) || (posixID[0] == '\0')) { |
1621 | posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE" ); |
1622 | if ((posixID == 0) || (posixID[0] == '\0')) { |
1623 | #else |
1624 | if (posixID == 0) { |
1625 | posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE" ); |
1626 | if (posixID == 0) { |
1627 | #endif |
1628 | posixID = getenv("LANG" ); |
1629 | } |
1630 | } |
1631 | } |
1632 | } |
1633 | if ((posixID==0) |
1634 | || (uprv_strcmp("C" , posixID) == 0) |
1635 | || (uprv_strcmp("POSIX" , posixID) == 0)) |
1636 | { |
1637 | /* Nothing worked. Give it a nice POSIX default value. */ |
1638 | posixID = "en_US_POSIX" ; |
1639 | // Note: this test will not catch 'C.UTF-8', |
1640 | // that will be handled in uprv_getDefaultLocaleID(). |
1641 | // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage() |
1642 | // caller which expects to see "en_US_POSIX" in many branches. |
1643 | } |
1644 | return posixID; |
1645 | } |
1646 | |
1647 | /* Return just the POSIX id for the default locale, whatever happens to be in |
1648 | * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG. |
1649 | */ |
1650 | static const char *uprv_getPOSIXIDForDefaultLocale() |
1651 | { |
1652 | static const char* posixID = nullptr; |
1653 | if (posixID == 0) { |
1654 | posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES); |
1655 | } |
1656 | return posixID; |
1657 | } |
1658 | |
1659 | #if !U_CHARSET_IS_UTF8 |
1660 | /* Return just the POSIX id for the default codepage, whatever happens to be in |
1661 | * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG. |
1662 | */ |
1663 | static const char *uprv_getPOSIXIDForDefaultCodepage() |
1664 | { |
1665 | static const char* posixID = nullptr; |
1666 | if (posixID == 0) { |
1667 | posixID = uprv_getPOSIXIDForCategory(LC_CTYPE); |
1668 | } |
1669 | return posixID; |
1670 | } |
1671 | #endif |
1672 | #endif |
1673 | |
1674 | /* NOTE: The caller should handle thread safety */ |
1675 | U_CAPI const char* U_EXPORT2 |
1676 | uprv_getDefaultLocaleID() |
1677 | { |
1678 | #if U_POSIX_LOCALE |
1679 | /* |
1680 | Note that: (a '!' means the ID is improper somehow) |
1681 | LC_ALL ----> default_loc codepage |
1682 | -------------------------------------------------------- |
1683 | ab.CD ab CD |
1684 | ab@CD ab__CD - |
1685 | ab@CD.EF ab__CD EF |
1686 | |
1687 | ab_CD.EF@GH ab_CD_GH EF |
1688 | |
1689 | Some 'improper' ways to do the same as above: |
1690 | ! ab_CD@GH.EF ab_CD_GH EF |
1691 | ! ab_CD.EF@GH.IJ ab_CD_GH EF |
1692 | ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF |
1693 | |
1694 | _CD@GH _CD_GH - |
1695 | _CD.EF@GH _CD_GH EF |
1696 | |
1697 | The variant cannot have dots in it. |
1698 | The 'rightmost' variant (@xxx) wins. |
1699 | The leftmost codepage (.xxx) wins. |
1700 | */ |
1701 | const char* posixID = uprv_getPOSIXIDForDefaultLocale(); |
1702 | |
1703 | /* Format: (no spaces) |
1704 | ll [ _CC ] [ . MM ] [ @ VV] |
1705 | |
1706 | l = lang, C = ctry, M = charmap, V = variant |
1707 | */ |
1708 | |
1709 | if (gCorrectedPOSIXLocale != nullptr) { |
1710 | return gCorrectedPOSIXLocale; |
1711 | } |
1712 | |
1713 | // Copy the ID into owned memory. |
1714 | // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination |
1715 | char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1)); |
1716 | if (correctedPOSIXLocale == nullptr) { |
1717 | return nullptr; |
1718 | } |
1719 | uprv_strcpy(correctedPOSIXLocale, posixID); |
1720 | |
1721 | char *limit; |
1722 | if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) { |
1723 | *limit = 0; |
1724 | } |
1725 | if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) { |
1726 | *limit = 0; |
1727 | } |
1728 | |
1729 | if ((uprv_strcmp("C" , correctedPOSIXLocale) == 0) // no @ variant |
1730 | || (uprv_strcmp("POSIX" , correctedPOSIXLocale) == 0)) { |
1731 | // Raw input was C.* or POSIX.*, Give it a nice POSIX default value. |
1732 | // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory()) |
1733 | uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX" ); |
1734 | } |
1735 | |
1736 | /* Note that we scan the *uncorrected* ID. */ |
1737 | const char *p; |
1738 | if ((p = uprv_strrchr(posixID, '@')) != nullptr) { |
1739 | p++; |
1740 | |
1741 | /* Take care of any special cases here.. */ |
1742 | if (!uprv_strcmp(p, "nynorsk" )) { |
1743 | p = "NY" ; |
1744 | /* Don't worry about no__NY. In practice, it won't appear. */ |
1745 | } |
1746 | |
1747 | if (uprv_strchr(correctedPOSIXLocale,'_') == nullptr) { |
1748 | uprv_strcat(correctedPOSIXLocale, "__" ); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */ |
1749 | } |
1750 | else { |
1751 | uprv_strcat(correctedPOSIXLocale, "_" ); /* aa_CC@b -> aa_CC_b */ |
1752 | } |
1753 | |
1754 | const char *q; |
1755 | if ((q = uprv_strchr(p, '.')) != nullptr) { |
1756 | /* How big will the resulting string be? */ |
1757 | int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p)); |
1758 | uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset |
1759 | correctedPOSIXLocale[len] = 0; |
1760 | } |
1761 | else { |
1762 | /* Anything following the @ sign */ |
1763 | uprv_strcat(correctedPOSIXLocale, p); |
1764 | } |
1765 | |
1766 | /* Should there be a map from 'no@nynorsk' -> no_NO_NY here? |
1767 | * How about 'russian' -> 'ru'? |
1768 | * Many of the other locales using ISO codes will be handled by the |
1769 | * canonicalization functions in uloc_getDefault. |
1770 | */ |
1771 | } |
1772 | |
1773 | if (gCorrectedPOSIXLocale == nullptr) { |
1774 | gCorrectedPOSIXLocale = correctedPOSIXLocale; |
1775 | gCorrectedPOSIXLocaleHeapAllocated = true; |
1776 | ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); |
1777 | correctedPOSIXLocale = nullptr; |
1778 | } |
1779 | posixID = gCorrectedPOSIXLocale; |
1780 | |
1781 | if (correctedPOSIXLocale != nullptr) { /* Was already set - clean up. */ |
1782 | uprv_free(correctedPOSIXLocale); |
1783 | } |
1784 | |
1785 | return posixID; |
1786 | |
1787 | #elif U_PLATFORM_USES_ONLY_WIN32_API |
1788 | #define POSIX_LOCALE_CAPACITY 64 |
1789 | UErrorCode status = U_ZERO_ERROR; |
1790 | char *correctedPOSIXLocale = nullptr; |
1791 | |
1792 | // If we have already figured this out just use the cached value |
1793 | if (gCorrectedPOSIXLocale != nullptr) { |
1794 | return gCorrectedPOSIXLocale; |
1795 | } |
1796 | |
1797 | // No cached value, need to determine the current value |
1798 | static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {}; |
1799 | int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH); |
1800 | |
1801 | // Now we should have a Windows locale name that needs converted to the POSIX style. |
1802 | if (length > 0) // If length is 0, then the GetLocaleInfoEx failed. |
1803 | { |
1804 | // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.) |
1805 | char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {}; |
1806 | |
1807 | int32_t i; |
1808 | for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++) |
1809 | { |
1810 | if (windowsLocale[i] == '_') |
1811 | { |
1812 | modifiedWindowsLocale[i] = '-'; |
1813 | } |
1814 | else |
1815 | { |
1816 | modifiedWindowsLocale[i] = static_cast<char>(windowsLocale[i]); |
1817 | } |
1818 | |
1819 | if (modifiedWindowsLocale[i] == '\0') |
1820 | { |
1821 | break; |
1822 | } |
1823 | } |
1824 | |
1825 | if (i >= UPRV_LENGTHOF(modifiedWindowsLocale)) |
1826 | { |
1827 | // Ran out of room, can't really happen, maybe we'll be lucky about a matching |
1828 | // locale when tags are dropped |
1829 | modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0'; |
1830 | } |
1831 | |
1832 | // Now normalize the resulting name |
1833 | correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1)); |
1834 | /* TODO: Should we just exit on memory allocation failure? */ |
1835 | if (correctedPOSIXLocale) |
1836 | { |
1837 | int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status); |
1838 | if (U_SUCCESS(status)) |
1839 | { |
1840 | *(correctedPOSIXLocale + posixLen) = 0; |
1841 | gCorrectedPOSIXLocale = correctedPOSIXLocale; |
1842 | gCorrectedPOSIXLocaleHeapAllocated = true; |
1843 | ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); |
1844 | } |
1845 | else |
1846 | { |
1847 | uprv_free(correctedPOSIXLocale); |
1848 | } |
1849 | } |
1850 | } |
1851 | |
1852 | // If unable to find a locale we can agree upon, use en-US by default |
1853 | if (gCorrectedPOSIXLocale == nullptr) { |
1854 | gCorrectedPOSIXLocale = "en_US" ; |
1855 | } |
1856 | return gCorrectedPOSIXLocale; |
1857 | |
1858 | #elif U_PLATFORM == U_PF_OS400 |
1859 | /* locales are process scoped and are by definition thread safe */ |
1860 | static char correctedLocale[64]; |
1861 | const char *localeID = getenv("LC_ALL" ); |
1862 | char *p; |
1863 | |
1864 | if (localeID == nullptr) |
1865 | localeID = getenv("LANG" ); |
1866 | if (localeID == nullptr) |
1867 | localeID = setlocale(LC_ALL, nullptr); |
1868 | /* Make sure we have something... */ |
1869 | if (localeID == nullptr) |
1870 | return "en_US_POSIX" ; |
1871 | |
1872 | /* Extract the locale name from the path. */ |
1873 | if((p = uprv_strrchr(localeID, '/')) != nullptr) |
1874 | { |
1875 | /* Increment p to start of locale name. */ |
1876 | p++; |
1877 | localeID = p; |
1878 | } |
1879 | |
1880 | /* Copy to work location. */ |
1881 | uprv_strcpy(correctedLocale, localeID); |
1882 | |
1883 | /* Strip off the '.locale' extension. */ |
1884 | if((p = uprv_strchr(correctedLocale, '.')) != nullptr) { |
1885 | *p = 0; |
1886 | } |
1887 | |
1888 | /* Upper case the locale name. */ |
1889 | T_CString_toUpperCase(correctedLocale); |
1890 | |
1891 | /* See if we are using the POSIX locale. Any of the |
1892 | * following are equivalent and use the same QLGPGCMA |
1893 | * (POSIX) locale. |
1894 | * QLGPGCMA2 means UCS2 |
1895 | * QLGPGCMA_4 means UTF-32 |
1896 | * QLGPGCMA_8 means UTF-8 |
1897 | */ |
1898 | if ((uprv_strcmp("C" , correctedLocale) == 0) || |
1899 | (uprv_strcmp("POSIX" , correctedLocale) == 0) || |
1900 | (uprv_strncmp("QLGPGCMA" , correctedLocale, 8) == 0)) |
1901 | { |
1902 | uprv_strcpy(correctedLocale, "en_US_POSIX" ); |
1903 | } |
1904 | else |
1905 | { |
1906 | int16_t LocaleLen; |
1907 | |
1908 | /* Lower case the lang portion. */ |
1909 | for(p = correctedLocale; *p != 0 && *p != '_'; p++) |
1910 | { |
1911 | *p = uprv_tolower(*p); |
1912 | } |
1913 | |
1914 | /* Adjust for Euro. After '_E' add 'URO'. */ |
1915 | LocaleLen = uprv_strlen(correctedLocale); |
1916 | if (correctedLocale[LocaleLen - 2] == '_' && |
1917 | correctedLocale[LocaleLen - 1] == 'E') |
1918 | { |
1919 | uprv_strcat(correctedLocale, "URO" ); |
1920 | } |
1921 | |
1922 | /* If using Lotus-based locale then convert to |
1923 | * equivalent non Lotus. |
1924 | */ |
1925 | else if (correctedLocale[LocaleLen - 2] == '_' && |
1926 | correctedLocale[LocaleLen - 1] == 'L') |
1927 | { |
1928 | correctedLocale[LocaleLen - 2] = 0; |
1929 | } |
1930 | |
1931 | /* There are separate simplified and traditional |
1932 | * locales called zh_HK_S and zh_HK_T. |
1933 | */ |
1934 | else if (uprv_strncmp(correctedLocale, "zh_HK" , 5) == 0) |
1935 | { |
1936 | uprv_strcpy(correctedLocale, "zh_HK" ); |
1937 | } |
1938 | |
1939 | /* A special zh_CN_GBK locale... |
1940 | */ |
1941 | else if (uprv_strcmp(correctedLocale, "zh_CN_GBK" ) == 0) |
1942 | { |
1943 | uprv_strcpy(correctedLocale, "zh_CN" ); |
1944 | } |
1945 | |
1946 | } |
1947 | |
1948 | return correctedLocale; |
1949 | #endif |
1950 | |
1951 | } |
1952 | |
1953 | #if !U_CHARSET_IS_UTF8 |
1954 | #if U_POSIX_LOCALE |
1955 | /* |
1956 | Due to various platform differences, one platform may specify a charset, |
1957 | when they really mean a different charset. Remap the names so that they are |
1958 | compatible with ICU. Only conflicting/ambiguous aliases should be resolved |
1959 | here. Before adding anything to this function, please consider adding unique |
1960 | names to the ICU alias table in the data directory. |
1961 | */ |
1962 | static const char* |
1963 | remapPlatformDependentCodepage(const char *locale, const char *name) { |
1964 | if (locale != nullptr && *locale == 0) { |
1965 | /* Make sure that an empty locale is handled the same way. */ |
1966 | locale = nullptr; |
1967 | } |
1968 | if (name == nullptr) { |
1969 | return nullptr; |
1970 | } |
1971 | #if U_PLATFORM == U_PF_AIX |
1972 | if (uprv_strcmp(name, "IBM-943" ) == 0) { |
1973 | /* Use the ASCII compatible ibm-943 */ |
1974 | name = "Shift-JIS" ; |
1975 | } |
1976 | else if (uprv_strcmp(name, "IBM-1252" ) == 0) { |
1977 | /* Use the windows-1252 that contains the Euro */ |
1978 | name = "IBM-5348" ; |
1979 | } |
1980 | #elif U_PLATFORM == U_PF_SOLARIS |
1981 | if (locale != nullptr && uprv_strcmp(name, "EUC" ) == 0) { |
1982 | /* Solaris underspecifies the "EUC" name. */ |
1983 | if (uprv_strcmp(locale, "zh_CN" ) == 0) { |
1984 | name = "EUC-CN" ; |
1985 | } |
1986 | else if (uprv_strcmp(locale, "zh_TW" ) == 0) { |
1987 | name = "EUC-TW" ; |
1988 | } |
1989 | else if (uprv_strcmp(locale, "ko_KR" ) == 0) { |
1990 | name = "EUC-KR" ; |
1991 | } |
1992 | } |
1993 | else if (uprv_strcmp(name, "eucJP" ) == 0) { |
1994 | /* |
1995 | ibm-954 is the best match. |
1996 | ibm-33722 is the default for eucJP (similar to Windows). |
1997 | */ |
1998 | name = "eucjis" ; |
1999 | } |
2000 | else if (uprv_strcmp(name, "646" ) == 0) { |
2001 | /* |
2002 | * The default codepage given by Solaris is 646 but the C library routines treat it as if it was |
2003 | * ISO-8859-1 instead of US-ASCII(646). |
2004 | */ |
2005 | name = "ISO-8859-1" ; |
2006 | } |
2007 | #elif U_PLATFORM_IS_DARWIN_BASED |
2008 | if (locale == nullptr && *name == 0) { |
2009 | /* |
2010 | No locale was specified, and an empty name was passed in. |
2011 | This usually indicates that nl_langinfo didn't return valid information. |
2012 | Mac OS X uses UTF-8 by default (especially the locale data and console). |
2013 | */ |
2014 | name = "UTF-8" ; |
2015 | } |
2016 | else if (uprv_strcmp(name, "CP949" ) == 0) { |
2017 | /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ |
2018 | name = "EUC-KR" ; |
2019 | } |
2020 | else if (locale != nullptr && uprv_strcmp(locale, "en_US_POSIX" ) != 0 && uprv_strcmp(name, "US-ASCII" ) == 0) { |
2021 | /* |
2022 | * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. |
2023 | */ |
2024 | name = "UTF-8" ; |
2025 | } |
2026 | #elif U_PLATFORM == U_PF_BSD |
2027 | if (uprv_strcmp(name, "CP949" ) == 0) { |
2028 | /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ |
2029 | name = "EUC-KR" ; |
2030 | } |
2031 | #elif U_PLATFORM == U_PF_HPUX |
2032 | if (locale != nullptr && uprv_strcmp(locale, "zh_HK" ) == 0 && uprv_strcmp(name, "big5" ) == 0) { |
2033 | /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */ |
2034 | /* zh_TW.big5 is not the same charset as zh_HK.big5! */ |
2035 | name = "hkbig5" ; |
2036 | } |
2037 | else if (uprv_strcmp(name, "eucJP" ) == 0) { |
2038 | /* |
2039 | ibm-1350 is the best match, but unavailable. |
2040 | ibm-954 is mostly a superset of ibm-1350. |
2041 | ibm-33722 is the default for eucJP (similar to Windows). |
2042 | */ |
2043 | name = "eucjis" ; |
2044 | } |
2045 | #elif U_PLATFORM == U_PF_LINUX |
2046 | if (locale != nullptr && uprv_strcmp(name, "euc" ) == 0) { |
2047 | /* Linux underspecifies the "EUC" name. */ |
2048 | if (uprv_strcmp(locale, "korean" ) == 0) { |
2049 | name = "EUC-KR" ; |
2050 | } |
2051 | else if (uprv_strcmp(locale, "japanese" ) == 0) { |
2052 | /* See comment below about eucJP */ |
2053 | name = "eucjis" ; |
2054 | } |
2055 | } |
2056 | else if (uprv_strcmp(name, "eucjp" ) == 0) { |
2057 | /* |
2058 | ibm-1350 is the best match, but unavailable. |
2059 | ibm-954 is mostly a superset of ibm-1350. |
2060 | ibm-33722 is the default for eucJP (similar to Windows). |
2061 | */ |
2062 | name = "eucjis" ; |
2063 | } |
2064 | else if (locale != nullptr && uprv_strcmp(locale, "en_US_POSIX" ) != 0 && |
2065 | (uprv_strcmp(name, "ANSI_X3.4-1968" ) == 0 || uprv_strcmp(name, "US-ASCII" ) == 0)) { |
2066 | /* |
2067 | * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. |
2068 | */ |
2069 | name = "UTF-8" ; |
2070 | } |
2071 | /* |
2072 | * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of |
2073 | * it by falling back to 'US-ASCII' when nullptr is returned from this |
2074 | * function. So, we don't have to worry about it here. |
2075 | */ |
2076 | #endif |
2077 | /* return nullptr when "" is passed in */ |
2078 | if (*name == 0) { |
2079 | name = nullptr; |
2080 | } |
2081 | return name; |
2082 | } |
2083 | |
2084 | static const char* |
2085 | getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity) |
2086 | { |
2087 | char localeBuf[100]; |
2088 | const char *name = nullptr; |
2089 | char *variant = nullptr; |
2090 | |
2091 | if (localeName != nullptr && (name = (uprv_strchr(localeName, '.'))) != nullptr) { |
2092 | size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1); |
2093 | uprv_strncpy(localeBuf, localeName, localeCapacity); |
2094 | localeBuf[localeCapacity-1] = 0; /* ensure NUL termination */ |
2095 | name = uprv_strncpy(buffer, name+1, buffCapacity); |
2096 | buffer[buffCapacity-1] = 0; /* ensure NUL termination */ |
2097 | if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != nullptr) { |
2098 | *variant = 0; |
2099 | } |
2100 | name = remapPlatformDependentCodepage(localeBuf, name); |
2101 | } |
2102 | return name; |
2103 | } |
2104 | #endif |
2105 | |
2106 | static const char* |
2107 | int_getDefaultCodepage() |
2108 | { |
2109 | #if U_PLATFORM == U_PF_OS400 |
2110 | uint32_t ccsid = 37; /* Default to ibm-37 */ |
2111 | static char codepage[64]; |
2112 | Qwc_JOBI0400_t jobinfo; |
2113 | Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */ |
2114 | |
2115 | EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400" , |
2116 | "* " , " " , &error); |
2117 | |
2118 | if (error.Bytes_Available == 0) { |
2119 | if (jobinfo.Coded_Char_Set_ID != 0xFFFF) { |
2120 | ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID; |
2121 | } |
2122 | else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) { |
2123 | ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id; |
2124 | } |
2125 | /* else use the default */ |
2126 | } |
2127 | snprintf(codepage, sizeof(codepage), "ibm-%d" , ccsid); |
2128 | return codepage; |
2129 | |
2130 | #elif U_PLATFORM == U_PF_OS390 |
2131 | static char codepage[64]; |
2132 | |
2133 | strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING)); |
2134 | strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING); |
2135 | codepage[63] = 0; /* NUL terminate */ |
2136 | |
2137 | return codepage; |
2138 | |
2139 | #elif U_PLATFORM_USES_ONLY_WIN32_API |
2140 | static char codepage[64]; |
2141 | DWORD codepageNumber = 0; |
2142 | |
2143 | #if U_PLATFORM_HAS_WINUWP_API == 1 |
2144 | // UWP doesn't have a direct API to get the default ACP as Microsoft would rather |
2145 | // have folks use Unicode than a "system" code page, however this is the same |
2146 | // codepage as the system default locale codepage. (FWIW, the system locale is |
2147 | // ONLY used for codepage, it should never be used for anything else) |
2148 | GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER, |
2149 | (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR)); |
2150 | #else |
2151 | // Win32 apps can call GetACP |
2152 | codepageNumber = GetACP(); |
2153 | #endif |
2154 | // Special case for UTF-8 |
2155 | if (codepageNumber == 65001) |
2156 | { |
2157 | return "UTF-8" ; |
2158 | } |
2159 | // Windows codepages can look like windows-1252, so format the found number |
2160 | // the numbers are eclectic, however all valid system code pages, besides UTF-8 |
2161 | // are between 3 and 19999 |
2162 | if (codepageNumber > 0 && codepageNumber < 20000) |
2163 | { |
2164 | snprintf(codepage, sizeof(codepage), "windows-%ld" , codepageNumber); |
2165 | return codepage; |
2166 | } |
2167 | // If the codepage number call failed then return UTF-8 |
2168 | return "UTF-8" ; |
2169 | |
2170 | #elif U_POSIX_LOCALE |
2171 | static char codesetName[100]; |
2172 | const char *localeName = nullptr; |
2173 | const char *name = nullptr; |
2174 | |
2175 | localeName = uprv_getPOSIXIDForDefaultCodepage(); |
2176 | uprv_memset(codesetName, 0, sizeof(codesetName)); |
2177 | /* On Solaris nl_langinfo returns C locale values unless setlocale |
2178 | * was called earlier. |
2179 | */ |
2180 | #if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS) |
2181 | /* When available, check nl_langinfo first because it usually gives more |
2182 | useful names. It depends on LC_CTYPE. |
2183 | nl_langinfo may use the same buffer as setlocale. */ |
2184 | { |
2185 | const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET); |
2186 | #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED |
2187 | /* |
2188 | * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8 |
2189 | * instead of ASCII. |
2190 | */ |
2191 | if (uprv_strcmp(localeName, "en_US_POSIX" ) != 0) { |
2192 | codeset = remapPlatformDependentCodepage(localeName, codeset); |
2193 | } else |
2194 | #endif |
2195 | { |
2196 | codeset = remapPlatformDependentCodepage(nullptr, codeset); |
2197 | } |
2198 | |
2199 | if (codeset != nullptr) { |
2200 | uprv_strncpy(codesetName, codeset, sizeof(codesetName)); |
2201 | codesetName[sizeof(codesetName)-1] = 0; |
2202 | return codesetName; |
2203 | } |
2204 | } |
2205 | #endif |
2206 | |
2207 | /* Use setlocale in a nice way, and then check some environment variables. |
2208 | Maybe the application used setlocale already. |
2209 | */ |
2210 | uprv_memset(codesetName, 0, sizeof(codesetName)); |
2211 | name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName)); |
2212 | if (name) { |
2213 | /* if we can find the codeset name from setlocale, return that. */ |
2214 | return name; |
2215 | } |
2216 | |
2217 | if (*codesetName == 0) |
2218 | { |
2219 | /* Everything failed. Return US ASCII (ISO 646). */ |
2220 | (void)uprv_strcpy(codesetName, "US-ASCII" ); |
2221 | } |
2222 | return codesetName; |
2223 | #else |
2224 | return "US-ASCII" ; |
2225 | #endif |
2226 | } |
2227 | |
2228 | |
2229 | U_CAPI const char* U_EXPORT2 |
2230 | uprv_getDefaultCodepage() |
2231 | { |
2232 | static char const *name = nullptr; |
2233 | umtx_lock(nullptr); |
2234 | if (name == nullptr) { |
2235 | name = int_getDefaultCodepage(); |
2236 | } |
2237 | umtx_unlock(nullptr); |
2238 | return name; |
2239 | } |
2240 | #endif /* !U_CHARSET_IS_UTF8 */ |
2241 | |
2242 | |
2243 | /* end of platform-specific implementation -------------- */ |
2244 | |
2245 | /* version handling --------------------------------------------------------- */ |
2246 | |
2247 | U_CAPI void U_EXPORT2 |
2248 | u_versionFromString(UVersionInfo versionArray, const char *versionString) { |
2249 | char *end; |
2250 | uint16_t part=0; |
2251 | |
2252 | if(versionArray==nullptr) { |
2253 | return; |
2254 | } |
2255 | |
2256 | if(versionString!=nullptr) { |
2257 | for(;;) { |
2258 | versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10); |
2259 | if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) { |
2260 | break; |
2261 | } |
2262 | versionString=end+1; |
2263 | } |
2264 | } |
2265 | |
2266 | while(part<U_MAX_VERSION_LENGTH) { |
2267 | versionArray[part++]=0; |
2268 | } |
2269 | } |
2270 | |
2271 | U_CAPI void U_EXPORT2 |
2272 | u_versionFromUString(UVersionInfo versionArray, const char16_t *versionString) { |
2273 | if(versionArray!=nullptr && versionString!=nullptr) { |
2274 | char versionChars[U_MAX_VERSION_STRING_LENGTH+1]; |
2275 | int32_t len = u_strlen(versionString); |
2276 | if(len>U_MAX_VERSION_STRING_LENGTH) { |
2277 | len = U_MAX_VERSION_STRING_LENGTH; |
2278 | } |
2279 | u_UCharsToChars(versionString, versionChars, len); |
2280 | versionChars[len]=0; |
2281 | u_versionFromString(versionArray, versionChars); |
2282 | } |
2283 | } |
2284 | |
2285 | U_CAPI void U_EXPORT2 |
2286 | u_versionToString(const UVersionInfo versionArray, char *versionString) { |
2287 | uint16_t count, part; |
2288 | uint8_t field; |
2289 | |
2290 | if(versionString==nullptr) { |
2291 | return; |
2292 | } |
2293 | |
2294 | if(versionArray==nullptr) { |
2295 | versionString[0]=0; |
2296 | return; |
2297 | } |
2298 | |
2299 | /* count how many fields need to be written */ |
2300 | for(count=4; count>0 && versionArray[count-1]==0; --count) { |
2301 | } |
2302 | |
2303 | if(count <= 1) { |
2304 | count = 2; |
2305 | } |
2306 | |
2307 | /* write the first part */ |
2308 | /* write the decimal field value */ |
2309 | field=versionArray[0]; |
2310 | if(field>=100) { |
2311 | *versionString++=(char)('0'+field/100); |
2312 | field%=100; |
2313 | } |
2314 | if(field>=10) { |
2315 | *versionString++=(char)('0'+field/10); |
2316 | field%=10; |
2317 | } |
2318 | *versionString++=(char)('0'+field); |
2319 | |
2320 | /* write the following parts */ |
2321 | for(part=1; part<count; ++part) { |
2322 | /* write a dot first */ |
2323 | *versionString++=U_VERSION_DELIMITER; |
2324 | |
2325 | /* write the decimal field value */ |
2326 | field=versionArray[part]; |
2327 | if(field>=100) { |
2328 | *versionString++=(char)('0'+field/100); |
2329 | field%=100; |
2330 | } |
2331 | if(field>=10) { |
2332 | *versionString++=(char)('0'+field/10); |
2333 | field%=10; |
2334 | } |
2335 | *versionString++=(char)('0'+field); |
2336 | } |
2337 | |
2338 | /* NUL-terminate */ |
2339 | *versionString=0; |
2340 | } |
2341 | |
2342 | U_CAPI void U_EXPORT2 |
2343 | u_getVersion(UVersionInfo versionArray) { |
2344 | (void)copyright; // Suppress unused variable warning from clang. |
2345 | u_versionFromString(versionArray, U_ICU_VERSION); |
2346 | } |
2347 | |
2348 | /** |
2349 | * icucfg.h dependent code |
2350 | */ |
2351 | |
2352 | #if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API |
2353 | |
2354 | #if HAVE_DLFCN_H |
2355 | #ifdef __MVS__ |
2356 | #ifndef __SUSV3 |
2357 | #define __SUSV3 1 |
2358 | #endif |
2359 | #endif |
2360 | #include <dlfcn.h> |
2361 | #endif /* HAVE_DLFCN_H */ |
2362 | |
2363 | U_CAPI void * U_EXPORT2 |
2364 | uprv_dl_open(const char *libName, UErrorCode *status) { |
2365 | void *ret = nullptr; |
2366 | if(U_FAILURE(*status)) return ret; |
2367 | ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL); |
2368 | if(ret==nullptr) { |
2369 | #ifdef U_TRACE_DYLOAD |
2370 | printf("dlerror on dlopen(%s): %s\n" , libName, dlerror()); |
2371 | #endif |
2372 | *status = U_MISSING_RESOURCE_ERROR; |
2373 | } |
2374 | return ret; |
2375 | } |
2376 | |
2377 | U_CAPI void U_EXPORT2 |
2378 | uprv_dl_close(void *lib, UErrorCode *status) { |
2379 | if(U_FAILURE(*status)) return; |
2380 | dlclose(lib); |
2381 | } |
2382 | |
2383 | U_CAPI UVoidFunction* U_EXPORT2 |
2384 | uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { |
2385 | union { |
2386 | UVoidFunction *fp; |
2387 | void *vp; |
2388 | } uret; |
2389 | uret.fp = nullptr; |
2390 | if(U_FAILURE(*status)) return uret.fp; |
2391 | uret.vp = dlsym(lib, sym); |
2392 | if(uret.vp == nullptr) { |
2393 | #ifdef U_TRACE_DYLOAD |
2394 | printf("dlerror on dlsym(%p,%s): %s\n" , lib,sym, dlerror()); |
2395 | #endif |
2396 | *status = U_MISSING_RESOURCE_ERROR; |
2397 | } |
2398 | return uret.fp; |
2399 | } |
2400 | |
2401 | #elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API |
2402 | |
2403 | /* Windows API implementation. */ |
2404 | // Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */ |
2405 | |
2406 | U_CAPI void * U_EXPORT2 |
2407 | uprv_dl_open(const char *libName, UErrorCode *status) { |
2408 | HMODULE lib = nullptr; |
2409 | |
2410 | if(U_FAILURE(*status)) return nullptr; |
2411 | |
2412 | lib = LoadLibraryA(libName); |
2413 | |
2414 | if(lib==nullptr) { |
2415 | *status = U_MISSING_RESOURCE_ERROR; |
2416 | } |
2417 | |
2418 | return (void*)lib; |
2419 | } |
2420 | |
2421 | U_CAPI void U_EXPORT2 |
2422 | uprv_dl_close(void *lib, UErrorCode *status) { |
2423 | HMODULE handle = (HMODULE)lib; |
2424 | if(U_FAILURE(*status)) return; |
2425 | |
2426 | FreeLibrary(handle); |
2427 | |
2428 | return; |
2429 | } |
2430 | |
2431 | U_CAPI UVoidFunction* U_EXPORT2 |
2432 | uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { |
2433 | HMODULE handle = (HMODULE)lib; |
2434 | UVoidFunction* addr = nullptr; |
2435 | |
2436 | if(U_FAILURE(*status) || lib==nullptr) return nullptr; |
2437 | |
2438 | addr = (UVoidFunction*)GetProcAddress(handle, sym); |
2439 | |
2440 | if(addr==nullptr) { |
2441 | DWORD lastError = GetLastError(); |
2442 | if(lastError == ERROR_PROC_NOT_FOUND) { |
2443 | *status = U_MISSING_RESOURCE_ERROR; |
2444 | } else { |
2445 | *status = U_UNSUPPORTED_ERROR; /* other unknown error. */ |
2446 | } |
2447 | } |
2448 | |
2449 | return addr; |
2450 | } |
2451 | |
2452 | #else |
2453 | |
2454 | /* No dynamic loading, null (nonexistent) implementation. */ |
2455 | |
2456 | U_CAPI void * U_EXPORT2 |
2457 | uprv_dl_open(const char *libName, UErrorCode *status) { |
2458 | (void)libName; |
2459 | if(U_FAILURE(*status)) return nullptr; |
2460 | *status = U_UNSUPPORTED_ERROR; |
2461 | return nullptr; |
2462 | } |
2463 | |
2464 | U_CAPI void U_EXPORT2 |
2465 | uprv_dl_close(void *lib, UErrorCode *status) { |
2466 | (void)lib; |
2467 | if(U_FAILURE(*status)) return; |
2468 | *status = U_UNSUPPORTED_ERROR; |
2469 | return; |
2470 | } |
2471 | |
2472 | U_CAPI UVoidFunction* U_EXPORT2 |
2473 | uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { |
2474 | (void)lib; |
2475 | (void)sym; |
2476 | if(U_SUCCESS(*status)) { |
2477 | *status = U_UNSUPPORTED_ERROR; |
2478 | } |
2479 | return (UVoidFunction*)nullptr; |
2480 | } |
2481 | |
2482 | #endif |
2483 | |
2484 | /* |
2485 | * Hey, Emacs, please set the following: |
2486 | * |
2487 | * Local Variables: |
2488 | * indent-tabs-mode: nil |
2489 | * End: |
2490 | * |
2491 | */ |
2492 | |