1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ****************************************************************************** |
5 | * |
6 | * Copyright (C) 1997-2016, International Business Machines |
7 | * Corporation and others. All Rights Reserved. |
8 | * |
9 | ****************************************************************************** |
10 | * |
11 | * FILE NAME : putil.c (previously putil.cpp and ptypes.cpp) |
12 | * |
13 | * Date Name Description |
14 | * 04/14/97 aliu Creation. |
15 | * 04/24/97 aliu Added getDefaultDataDirectory() and |
16 | * getDefaultLocaleID(). |
17 | * 04/28/97 aliu Rewritten to assume Unix and apply general methods |
18 | * for assumed case. Non-UNIX platforms must be |
19 | * special-cased. Rewrote numeric methods dealing |
20 | * with NaN and Infinity to be platform independent |
21 | * over all IEEE 754 platforms. |
22 | * 05/13/97 aliu Restored sign of timezone |
23 | * (semantics are hours West of GMT) |
24 | * 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan, |
25 | * nextDouble.. |
26 | * 07/22/98 stephen Added remainder, max, min, trunc |
27 | * 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity |
28 | * 08/24/98 stephen Added longBitsFromDouble |
29 | * 09/08/98 stephen Minor changes for Mac Port |
30 | * 03/02/99 stephen Removed openFile(). Added AS400 support. |
31 | * Fixed EBCDIC tables |
32 | * 04/15/99 stephen Converted to C. |
33 | * 06/28/99 stephen Removed mutex locking in u_isBigEndian(). |
34 | * 08/04/99 jeffrey R. Added OS/2 changes |
35 | * 11/15/99 helena Integrated S/390 IEEE support. |
36 | * 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID |
37 | * 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage |
38 | * 01/03/08 Steven L. Fake Time Support |
39 | ****************************************************************************** |
40 | */ |
41 | |
42 | // Defines _XOPEN_SOURCE for access to POSIX functions. |
43 | // Must be before any other #includes. |
44 | #include "uposixdefs.h" |
45 | |
46 | // First, the platform type. Need this for U_PLATFORM. |
47 | #include "unicode/platform.h" |
48 | |
49 | #if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__ |
50 | /* tzset isn't defined in strict ANSI on MinGW. */ |
51 | #undef __STRICT_ANSI__ |
52 | #endif |
53 | |
54 | /* |
55 | * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement. |
56 | */ |
57 | #include <time.h> |
58 | |
59 | #if !U_PLATFORM_USES_ONLY_WIN32_API |
60 | #include <sys/time.h> |
61 | #endif |
62 | |
63 | /* include the rest of the ICU headers */ |
64 | #include "unicode/putil.h" |
65 | #include "unicode/ustring.h" |
66 | #include "putilimp.h" |
67 | #include "uassert.h" |
68 | #include "umutex.h" |
69 | #include "cmemory.h" |
70 | #include "cstring.h" |
71 | #include "locmap.h" |
72 | #include "ucln_cmn.h" |
73 | #include "charstr.h" |
74 | |
75 | /* Include standard headers. */ |
76 | #include <stdio.h> |
77 | #include <stdlib.h> |
78 | #include <string.h> |
79 | #include <math.h> |
80 | #include <locale.h> |
81 | #include <float.h> |
82 | |
83 | #ifndef U_COMMON_IMPLEMENTATION |
84 | #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu |
85 | #endif |
86 | |
87 | |
88 | /* include system headers */ |
89 | #if U_PLATFORM_USES_ONLY_WIN32_API |
90 | /* |
91 | * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW. |
92 | * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API) |
93 | * to use native APIs as much as possible? |
94 | */ |
95 | #ifndef WIN32_LEAN_AND_MEAN |
96 | # define WIN32_LEAN_AND_MEAN |
97 | #endif |
98 | # define VC_EXTRALEAN |
99 | # define NOUSER |
100 | # define NOSERVICE |
101 | # define NOIME |
102 | # define NOMCX |
103 | # include <windows.h> |
104 | # include "unicode/uloc.h" |
105 | # include "wintz.h" |
106 | #elif U_PLATFORM == U_PF_OS400 |
107 | # include <float.h> |
108 | # include <qusec.h> /* error code structure */ |
109 | # include <qusrjobi.h> |
110 | # include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */ |
111 | # include <mih/testptr.h> /* For uprv_maximumPtr */ |
112 | #elif U_PLATFORM == U_PF_OS390 |
113 | # include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */ |
114 | #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS |
115 | # include <limits.h> |
116 | # include <unistd.h> |
117 | # if U_PLATFORM == U_PF_SOLARIS |
118 | # ifndef _XPG4_2 |
119 | # define _XPG4_2 |
120 | # endif |
121 | # endif |
122 | #elif U_PLATFORM == U_PF_QNX |
123 | # include <sys/neutrino.h> |
124 | #endif |
125 | |
126 | /* |
127 | * Only include langinfo.h if we have a way to get the codeset. If we later |
128 | * depend on more feature, we can test on U_HAVE_NL_LANGINFO. |
129 | * |
130 | */ |
131 | |
132 | #if U_HAVE_NL_LANGINFO_CODESET |
133 | #include <langinfo.h> |
134 | #endif |
135 | |
136 | /** |
137 | * Simple things (presence of functions, etc) should just go in configure.in and be added to |
138 | * icucfg.h via autoheader. |
139 | */ |
140 | #if U_PLATFORM_IMPLEMENTS_POSIX |
141 | # if U_PLATFORM == U_PF_OS400 |
142 | # define HAVE_DLFCN_H 0 |
143 | # define HAVE_DLOPEN 0 |
144 | # else |
145 | # ifndef HAVE_DLFCN_H |
146 | # define HAVE_DLFCN_H 1 |
147 | # endif |
148 | # ifndef HAVE_DLOPEN |
149 | # define HAVE_DLOPEN 1 |
150 | # endif |
151 | # endif |
152 | # ifndef HAVE_GETTIMEOFDAY |
153 | # define HAVE_GETTIMEOFDAY 1 |
154 | # endif |
155 | #else |
156 | # define HAVE_DLFCN_H 0 |
157 | # define HAVE_DLOPEN 0 |
158 | # define HAVE_GETTIMEOFDAY 0 |
159 | #endif |
160 | |
161 | U_NAMESPACE_USE |
162 | |
163 | /* Define the extension for data files, again... */ |
164 | #define DATA_TYPE "dat" |
165 | |
166 | /* Leave this copyright notice here! */ |
167 | static const char copyright[] = U_COPYRIGHT_STRING; |
168 | |
169 | /* floating point implementations ------------------------------------------- */ |
170 | |
171 | /* We return QNAN rather than SNAN*/ |
172 | #define SIGN 0x80000000U |
173 | |
174 | /* Make it easy to define certain types of constants */ |
175 | typedef union { |
176 | int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */ |
177 | double d64; |
178 | } BitPatternConversion; |
179 | static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) }; |
180 | static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) }; |
181 | |
182 | /*--------------------------------------------------------------------------- |
183 | Platform utilities |
184 | Our general strategy is to assume we're on a POSIX platform. Platforms which |
185 | are non-POSIX must declare themselves so. The default POSIX implementation |
186 | will sometimes work for non-POSIX platforms as well (e.g., the NaN-related |
187 | functions). |
188 | ---------------------------------------------------------------------------*/ |
189 | |
190 | #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400 |
191 | # undef U_POSIX_LOCALE |
192 | #else |
193 | # define U_POSIX_LOCALE 1 |
194 | #endif |
195 | |
196 | /* |
197 | WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble |
198 | can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2). |
199 | */ |
200 | #if !IEEE_754 |
201 | static char* |
202 | u_topNBytesOfDouble(double* d, int n) |
203 | { |
204 | #if U_IS_BIG_ENDIAN |
205 | return (char*)d; |
206 | #else |
207 | return (char*)(d + 1) - n; |
208 | #endif |
209 | } |
210 | |
211 | static char* |
212 | u_bottomNBytesOfDouble(double* d, int n) |
213 | { |
214 | #if U_IS_BIG_ENDIAN |
215 | return (char*)(d + 1) - n; |
216 | #else |
217 | return (char*)d; |
218 | #endif |
219 | } |
220 | #endif /* !IEEE_754 */ |
221 | |
222 | #if IEEE_754 |
223 | static UBool |
224 | u_signBit(double d) { |
225 | uint8_t hiByte; |
226 | #if U_IS_BIG_ENDIAN |
227 | hiByte = *(uint8_t *)&d; |
228 | #else |
229 | hiByte = *(((uint8_t *)&d) + sizeof(double) - 1); |
230 | #endif |
231 | return (hiByte & 0x80) != 0; |
232 | } |
233 | #endif |
234 | |
235 | |
236 | |
237 | #if defined (U_DEBUG_FAKETIME) |
238 | /* Override the clock to test things without having to move the system clock. |
239 | * Assumes POSIX gettimeofday() will function |
240 | */ |
241 | UDate fakeClock_t0 = 0; /** Time to start the clock from **/ |
242 | UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/ |
243 | UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/ |
244 | |
245 | static UDate getUTCtime_real() { |
246 | struct timeval posixTime; |
247 | gettimeofday(&posixTime, NULL); |
248 | return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); |
249 | } |
250 | |
251 | static UDate getUTCtime_fake() { |
252 | static UMutex fakeClockMutex; |
253 | umtx_lock(&fakeClockMutex); |
254 | if(!fakeClock_set) { |
255 | UDate real = getUTCtime_real(); |
256 | const char *fake_start = getenv("U_FAKETIME_START" ); |
257 | if((fake_start!=NULL) && (fake_start[0]!=0)) { |
258 | sscanf(fake_start,"%lf" ,&fakeClock_t0); |
259 | fakeClock_dt = fakeClock_t0 - real; |
260 | fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n" |
261 | "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n" , |
262 | fakeClock_t0, fake_start, fakeClock_dt, real); |
263 | } else { |
264 | fakeClock_dt = 0; |
265 | fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n" |
266 | "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n" ); |
267 | } |
268 | fakeClock_set = TRUE; |
269 | } |
270 | umtx_unlock(&fakeClockMutex); |
271 | |
272 | return getUTCtime_real() + fakeClock_dt; |
273 | } |
274 | #endif |
275 | |
276 | #if U_PLATFORM_USES_ONLY_WIN32_API |
277 | typedef union { |
278 | int64_t int64; |
279 | FILETIME fileTime; |
280 | } FileTimeConversion; /* This is like a ULARGE_INTEGER */ |
281 | |
282 | /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */ |
283 | #define EPOCH_BIAS INT64_C(116444736000000000) |
284 | #define HECTONANOSECOND_PER_MILLISECOND 10000 |
285 | |
286 | #endif |
287 | |
288 | /*--------------------------------------------------------------------------- |
289 | Universal Implementations |
290 | These are designed to work on all platforms. Try these, and if they |
291 | don't work on your platform, then special case your platform with new |
292 | implementations. |
293 | ---------------------------------------------------------------------------*/ |
294 | |
295 | U_CAPI UDate U_EXPORT2 |
296 | uprv_getUTCtime() |
297 | { |
298 | #if defined(U_DEBUG_FAKETIME) |
299 | return getUTCtime_fake(); /* Hook for overriding the clock */ |
300 | #else |
301 | return uprv_getRawUTCtime(); |
302 | #endif |
303 | } |
304 | |
305 | /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/ |
306 | U_CAPI UDate U_EXPORT2 |
307 | uprv_getRawUTCtime() |
308 | { |
309 | #if U_PLATFORM_USES_ONLY_WIN32_API |
310 | |
311 | FileTimeConversion winTime; |
312 | GetSystemTimeAsFileTime(&winTime.fileTime); |
313 | return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND); |
314 | #else |
315 | |
316 | #if HAVE_GETTIMEOFDAY |
317 | struct timeval posixTime; |
318 | gettimeofday(&posixTime, NULL); |
319 | return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); |
320 | #else |
321 | time_t epochtime; |
322 | time(&epochtime); |
323 | return (UDate)epochtime * U_MILLIS_PER_SECOND; |
324 | #endif |
325 | |
326 | #endif |
327 | } |
328 | |
329 | /*----------------------------------------------------------------------------- |
330 | IEEE 754 |
331 | These methods detect and return NaN and infinity values for doubles |
332 | conforming to IEEE 754. Platforms which support this standard include X86, |
333 | Mac 680x0, Mac PowerPC, AIX RS/6000, and most others. |
334 | If this doesn't work on your platform, you have non-IEEE floating-point, and |
335 | will need to code your own versions. A naive implementation is to return 0.0 |
336 | for getNaN and getInfinity, and false for isNaN and isInfinite. |
337 | ---------------------------------------------------------------------------*/ |
338 | |
339 | U_CAPI UBool U_EXPORT2 |
340 | uprv_isNaN(double number) |
341 | { |
342 | #if IEEE_754 |
343 | BitPatternConversion convertedNumber; |
344 | convertedNumber.d64 = number; |
345 | /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */ |
346 | return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64); |
347 | |
348 | #elif U_PLATFORM == U_PF_OS390 |
349 | uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, |
350 | sizeof(uint32_t)); |
351 | uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, |
352 | sizeof(uint32_t)); |
353 | |
354 | return ((highBits & 0x7F080000L) == 0x7F080000L) && |
355 | (lowBits == 0x00000000L); |
356 | |
357 | #else |
358 | /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ |
359 | /* you'll need to replace this default implementation with what's correct*/ |
360 | /* for your platform.*/ |
361 | return number != number; |
362 | #endif |
363 | } |
364 | |
365 | U_CAPI UBool U_EXPORT2 |
366 | uprv_isInfinite(double number) |
367 | { |
368 | #if IEEE_754 |
369 | BitPatternConversion convertedNumber; |
370 | convertedNumber.d64 = number; |
371 | /* Infinity is exactly 0x7FF0000000000000U. */ |
372 | return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64); |
373 | #elif U_PLATFORM == U_PF_OS390 |
374 | uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, |
375 | sizeof(uint32_t)); |
376 | uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, |
377 | sizeof(uint32_t)); |
378 | |
379 | return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L); |
380 | |
381 | #else |
382 | /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ |
383 | /* value, you'll need to replace this default implementation with what's*/ |
384 | /* correct for your platform.*/ |
385 | return number == (2.0 * number); |
386 | #endif |
387 | } |
388 | |
389 | U_CAPI UBool U_EXPORT2 |
390 | uprv_isPositiveInfinity(double number) |
391 | { |
392 | #if IEEE_754 || U_PLATFORM == U_PF_OS390 |
393 | return (UBool)(number > 0 && uprv_isInfinite(number)); |
394 | #else |
395 | return uprv_isInfinite(number); |
396 | #endif |
397 | } |
398 | |
399 | U_CAPI UBool U_EXPORT2 |
400 | uprv_isNegativeInfinity(double number) |
401 | { |
402 | #if IEEE_754 || U_PLATFORM == U_PF_OS390 |
403 | return (UBool)(number < 0 && uprv_isInfinite(number)); |
404 | |
405 | #else |
406 | uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, |
407 | sizeof(uint32_t)); |
408 | return((highBits & SIGN) && uprv_isInfinite(number)); |
409 | |
410 | #endif |
411 | } |
412 | |
413 | U_CAPI double U_EXPORT2 |
414 | uprv_getNaN() |
415 | { |
416 | #if IEEE_754 || U_PLATFORM == U_PF_OS390 |
417 | return gNan.d64; |
418 | #else |
419 | /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ |
420 | /* you'll need to replace this default implementation with what's correct*/ |
421 | /* for your platform.*/ |
422 | return 0.0; |
423 | #endif |
424 | } |
425 | |
426 | U_CAPI double U_EXPORT2 |
427 | uprv_getInfinity() |
428 | { |
429 | #if IEEE_754 || U_PLATFORM == U_PF_OS390 |
430 | return gInf.d64; |
431 | #else |
432 | /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ |
433 | /* value, you'll need to replace this default implementation with what's*/ |
434 | /* correct for your platform.*/ |
435 | return 0.0; |
436 | #endif |
437 | } |
438 | |
439 | U_CAPI double U_EXPORT2 |
440 | uprv_floor(double x) |
441 | { |
442 | return floor(x); |
443 | } |
444 | |
445 | U_CAPI double U_EXPORT2 |
446 | uprv_ceil(double x) |
447 | { |
448 | return ceil(x); |
449 | } |
450 | |
451 | U_CAPI double U_EXPORT2 |
452 | uprv_round(double x) |
453 | { |
454 | return uprv_floor(x + 0.5); |
455 | } |
456 | |
457 | U_CAPI double U_EXPORT2 |
458 | uprv_fabs(double x) |
459 | { |
460 | return fabs(x); |
461 | } |
462 | |
463 | U_CAPI double U_EXPORT2 |
464 | uprv_modf(double x, double* y) |
465 | { |
466 | return modf(x, y); |
467 | } |
468 | |
469 | U_CAPI double U_EXPORT2 |
470 | uprv_fmod(double x, double y) |
471 | { |
472 | return fmod(x, y); |
473 | } |
474 | |
475 | U_CAPI double U_EXPORT2 |
476 | uprv_pow(double x, double y) |
477 | { |
478 | /* This is declared as "double pow(double x, double y)" */ |
479 | return pow(x, y); |
480 | } |
481 | |
482 | U_CAPI double U_EXPORT2 |
483 | uprv_pow10(int32_t x) |
484 | { |
485 | return pow(10.0, (double)x); |
486 | } |
487 | |
488 | U_CAPI double U_EXPORT2 |
489 | uprv_fmax(double x, double y) |
490 | { |
491 | #if IEEE_754 |
492 | /* first handle NaN*/ |
493 | if(uprv_isNaN(x) || uprv_isNaN(y)) |
494 | return uprv_getNaN(); |
495 | |
496 | /* check for -0 and 0*/ |
497 | if(x == 0.0 && y == 0.0 && u_signBit(x)) |
498 | return y; |
499 | |
500 | #endif |
501 | |
502 | /* this should work for all flt point w/o NaN and Inf special cases */ |
503 | return (x > y ? x : y); |
504 | } |
505 | |
506 | U_CAPI double U_EXPORT2 |
507 | uprv_fmin(double x, double y) |
508 | { |
509 | #if IEEE_754 |
510 | /* first handle NaN*/ |
511 | if(uprv_isNaN(x) || uprv_isNaN(y)) |
512 | return uprv_getNaN(); |
513 | |
514 | /* check for -0 and 0*/ |
515 | if(x == 0.0 && y == 0.0 && u_signBit(y)) |
516 | return y; |
517 | |
518 | #endif |
519 | |
520 | /* this should work for all flt point w/o NaN and Inf special cases */ |
521 | return (x > y ? y : x); |
522 | } |
523 | |
524 | U_CAPI UBool U_EXPORT2 |
525 | uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) { |
526 | // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow. |
527 | // This function could be optimized by calling one of those primitives. |
528 | auto a64 = static_cast<int64_t>(a); |
529 | auto b64 = static_cast<int64_t>(b); |
530 | int64_t res64 = a64 + b64; |
531 | *res = static_cast<int32_t>(res64); |
532 | return res64 != *res; |
533 | } |
534 | |
535 | U_CAPI UBool U_EXPORT2 |
536 | uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) { |
537 | // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow. |
538 | // This function could be optimized by calling one of those primitives. |
539 | auto a64 = static_cast<int64_t>(a); |
540 | auto b64 = static_cast<int64_t>(b); |
541 | int64_t res64 = a64 * b64; |
542 | *res = static_cast<int32_t>(res64); |
543 | return res64 != *res; |
544 | } |
545 | |
546 | /** |
547 | * Truncates the given double. |
548 | * trunc(3.3) = 3.0, trunc (-3.3) = -3.0 |
549 | * This is different than calling floor() or ceil(): |
550 | * floor(3.3) = 3, floor(-3.3) = -4 |
551 | * ceil(3.3) = 4, ceil(-3.3) = -3 |
552 | */ |
553 | U_CAPI double U_EXPORT2 |
554 | uprv_trunc(double d) |
555 | { |
556 | #if IEEE_754 |
557 | /* handle error cases*/ |
558 | if(uprv_isNaN(d)) |
559 | return uprv_getNaN(); |
560 | if(uprv_isInfinite(d)) |
561 | return uprv_getInfinity(); |
562 | |
563 | if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */ |
564 | return ceil(d); |
565 | else |
566 | return floor(d); |
567 | |
568 | #else |
569 | return d >= 0 ? floor(d) : ceil(d); |
570 | |
571 | #endif |
572 | } |
573 | |
574 | /** |
575 | * Return the largest positive number that can be represented by an integer |
576 | * type of arbitrary bit length. |
577 | */ |
578 | U_CAPI double U_EXPORT2 |
579 | uprv_maxMantissa(void) |
580 | { |
581 | return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0; |
582 | } |
583 | |
584 | U_CAPI double U_EXPORT2 |
585 | uprv_log(double d) |
586 | { |
587 | return log(d); |
588 | } |
589 | |
590 | U_CAPI void * U_EXPORT2 |
591 | uprv_maximumPtr(void * base) |
592 | { |
593 | #if U_PLATFORM == U_PF_OS400 |
594 | /* |
595 | * With the provided function we should never be out of range of a given segment |
596 | * (a traditional/typical segment that is). Our segments have 5 bytes for the |
597 | * id and 3 bytes for the offset. The key is that the casting takes care of |
598 | * only retrieving the offset portion minus x1000. Hence, the smallest offset |
599 | * seen in a program is x001000 and when casted to an int would be 0. |
600 | * That's why we can only add 0xffefff. Otherwise, we would exceed the segment. |
601 | * |
602 | * Currently, 16MB is the current addressing limitation on i5/OS if the activation is |
603 | * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information). |
604 | * This function determines the activation based on the pointer that is passed in and |
605 | * calculates the appropriate maximum available size for |
606 | * each pointer type (TERASPACE and non-TERASPACE) |
607 | * |
608 | * Unlike other operating systems, the pointer model isn't determined at |
609 | * compile time on i5/OS. |
610 | */ |
611 | if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) { |
612 | /* if it is a TERASPACE pointer the max is 2GB - 4k */ |
613 | return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff))); |
614 | } |
615 | /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */ |
616 | return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff))); |
617 | |
618 | #else |
619 | return U_MAX_PTR(base); |
620 | #endif |
621 | } |
622 | |
623 | /*--------------------------------------------------------------------------- |
624 | Platform-specific Implementations |
625 | Try these, and if they don't work on your platform, then special case your |
626 | platform with new implementations. |
627 | ---------------------------------------------------------------------------*/ |
628 | |
629 | /* Generic time zone layer -------------------------------------------------- */ |
630 | |
631 | /* Time zone utilities */ |
632 | U_CAPI void U_EXPORT2 |
633 | uprv_tzset() |
634 | { |
635 | #if defined(U_TZSET) |
636 | U_TZSET(); |
637 | #else |
638 | /* no initialization*/ |
639 | #endif |
640 | } |
641 | |
642 | U_CAPI int32_t U_EXPORT2 |
643 | uprv_timezone() |
644 | { |
645 | #ifdef U_TIMEZONE |
646 | return U_TIMEZONE; |
647 | #else |
648 | time_t t, t1, t2; |
649 | struct tm tmrec; |
650 | int32_t tdiff = 0; |
651 | |
652 | time(&t); |
653 | uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) ); |
654 | #if U_PLATFORM != U_PF_IPHONE |
655 | UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/ |
656 | #endif |
657 | t1 = mktime(&tmrec); /* local time in seconds*/ |
658 | uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); |
659 | t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/ |
660 | tdiff = t2 - t1; |
661 | |
662 | #if U_PLATFORM != U_PF_IPHONE |
663 | /* imitate NT behaviour, which returns same timezone offset to GMT for |
664 | winter and summer. |
665 | This does not work on all platforms. For instance, on glibc on Linux |
666 | and on Mac OS 10.5, tdiff calculated above remains the same |
667 | regardless of whether DST is in effect or not. iOS is another |
668 | platform where this does not work. Linux + glibc and Mac OS 10.5 |
669 | have U_TIMEZONE defined so that this code is not reached. |
670 | */ |
671 | if (dst_checked) |
672 | tdiff += 3600; |
673 | #endif |
674 | return tdiff; |
675 | #endif |
676 | } |
677 | |
678 | /* Note that U_TZNAME does *not* have to be tzname, but if it is, |
679 | some platforms need to have it declared here. */ |
680 | |
681 | #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED) |
682 | /* RS6000 and others reject char **tzname. */ |
683 | extern U_IMPORT char *U_TZNAME[]; |
684 | #endif |
685 | |
686 | #if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS) |
687 | /* These platforms are likely to use Olson timezone IDs. */ |
688 | /* common targets of the symbolic link at TZDEFAULT are: |
689 | * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12 |
690 | * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12 |
691 | * "/usr/share/lib/zoneinfo/<olsonID>" Solaris |
692 | * "../usr/share/lib/zoneinfo/<olsonID>" Solaris |
693 | * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13 |
694 | * To avoid checking lots of paths, just check that the target path |
695 | * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid. |
696 | */ |
697 | |
698 | #define CHECK_LOCALTIME_LINK 1 |
699 | #if U_PLATFORM_IS_DARWIN_BASED |
700 | #include <tzfile.h> |
701 | #define TZZONEINFO (TZDIR "/") |
702 | #elif U_PLATFORM == U_PF_SOLARIS |
703 | #define TZDEFAULT "/etc/localtime" |
704 | #define TZZONEINFO "/usr/share/lib/zoneinfo/" |
705 | #define TZ_ENV_CHECK "localtime" |
706 | #else |
707 | #define TZDEFAULT "/etc/localtime" |
708 | #define TZZONEINFO "/usr/share/zoneinfo/" |
709 | #endif |
710 | #define TZZONEINFOTAIL "/zoneinfo/" |
711 | #if U_HAVE_DIRENT_H |
712 | #define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */ |
713 | /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo |
714 | symlinked to /etc/localtime, which makes searchForTZFile return |
715 | 'localtime' when it's the first match. */ |
716 | #define TZFILE_SKIP2 "localtime" |
717 | #define SEARCH_TZFILE |
718 | #include <dirent.h> /* Needed to search through system timezone files */ |
719 | #endif |
720 | static char gTimeZoneBuffer[PATH_MAX]; |
721 | static char *gTimeZoneBufferPtr = NULL; |
722 | #endif |
723 | |
724 | #if !U_PLATFORM_USES_ONLY_WIN32_API |
725 | #define isNonDigit(ch) (ch < '0' || '9' < ch) |
726 | static UBool isValidOlsonID(const char *id) { |
727 | int32_t idx = 0; |
728 | |
729 | /* Determine if this is something like Iceland (Olson ID) |
730 | or AST4ADT (non-Olson ID) */ |
731 | while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') { |
732 | idx++; |
733 | } |
734 | |
735 | /* If we went through the whole string, then it might be okay. |
736 | The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30", |
737 | "GRNLNDST3GRNLNDDT" or similar, so we cannot use it. |
738 | The rest of the time it could be an Olson ID. George */ |
739 | return (UBool)(id[idx] == 0 |
740 | || uprv_strcmp(id, "PST8PDT" ) == 0 |
741 | || uprv_strcmp(id, "MST7MDT" ) == 0 |
742 | || uprv_strcmp(id, "CST6CDT" ) == 0 |
743 | || uprv_strcmp(id, "EST5EDT" ) == 0); |
744 | } |
745 | |
746 | /* On some Unix-like OS, 'posix' subdirectory in |
747 | /usr/share/zoneinfo replicates the top-level contents. 'right' |
748 | subdirectory has the same set of files, but individual files |
749 | are different from those in the top-level directory or 'posix' |
750 | because 'right' has files for TAI (Int'l Atomic Time) while 'posix' |
751 | has files for UTC. |
752 | When the first match for /etc/localtime is in either of them |
753 | (usually in posix because 'right' has different file contents), |
754 | or TZ environment variable points to one of them, createTimeZone |
755 | fails because, say, 'posix/America/New_York' is not an Olson |
756 | timezone id ('America/New_York' is). So, we have to skip |
757 | 'posix/' and 'right/' at the beginning. */ |
758 | static void skipZoneIDPrefix(const char** id) { |
759 | if (uprv_strncmp(*id, "posix/" , 6) == 0 |
760 | || uprv_strncmp(*id, "right/" , 6) == 0) |
761 | { |
762 | *id += 6; |
763 | } |
764 | } |
765 | #endif |
766 | |
767 | #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API |
768 | |
769 | #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600) |
770 | typedef struct OffsetZoneMapping { |
771 | int32_t offsetSeconds; |
772 | int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/ |
773 | const char *stdID; |
774 | const char *dstID; |
775 | const char *olsonID; |
776 | } OffsetZoneMapping; |
777 | |
778 | enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 }; |
779 | |
780 | /* |
781 | This list tries to disambiguate a set of abbreviated timezone IDs and offsets |
782 | and maps it to an Olson ID. |
783 | Before adding anything to this list, take a look at |
784 | icu/source/tools/tzcode/tz.alias |
785 | Sometimes no daylight savings (0) is important to define due to aliases. |
786 | This list can be tested with icu/source/test/compat/tzone.pl |
787 | More values could be added to daylightType to increase precision. |
788 | */ |
789 | static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = { |
790 | {-45900, 2, "CHAST" , "CHADT" , "Pacific/Chatham" }, |
791 | {-43200, 1, "PETT" , "PETST" , "Asia/Kamchatka" }, |
792 | {-43200, 2, "NZST" , "NZDT" , "Pacific/Auckland" }, |
793 | {-43200, 1, "ANAT" , "ANAST" , "Asia/Anadyr" }, |
794 | {-39600, 1, "MAGT" , "MAGST" , "Asia/Magadan" }, |
795 | {-37800, 2, "LHST" , "LHST" , "Australia/Lord_Howe" }, |
796 | {-36000, 2, "EST" , "EST" , "Australia/Sydney" }, |
797 | {-36000, 1, "SAKT" , "SAKST" , "Asia/Sakhalin" }, |
798 | {-36000, 1, "VLAT" , "VLAST" , "Asia/Vladivostok" }, |
799 | {-34200, 2, "CST" , "CST" , "Australia/South" }, |
800 | {-32400, 1, "YAKT" , "YAKST" , "Asia/Yakutsk" }, |
801 | {-32400, 1, "CHOT" , "CHOST" , "Asia/Choibalsan" }, |
802 | {-31500, 2, "CWST" , "CWST" , "Australia/Eucla" }, |
803 | {-28800, 1, "IRKT" , "IRKST" , "Asia/Irkutsk" }, |
804 | {-28800, 1, "ULAT" , "ULAST" , "Asia/Ulaanbaatar" }, |
805 | {-28800, 2, "WST" , "WST" , "Australia/West" }, |
806 | {-25200, 1, "HOVT" , "HOVST" , "Asia/Hovd" }, |
807 | {-25200, 1, "KRAT" , "KRAST" , "Asia/Krasnoyarsk" }, |
808 | {-21600, 1, "NOVT" , "NOVST" , "Asia/Novosibirsk" }, |
809 | {-21600, 1, "OMST" , "OMSST" , "Asia/Omsk" }, |
810 | {-18000, 1, "YEKT" , "YEKST" , "Asia/Yekaterinburg" }, |
811 | {-14400, 1, "SAMT" , "SAMST" , "Europe/Samara" }, |
812 | {-14400, 1, "AMT" , "AMST" , "Asia/Yerevan" }, |
813 | {-14400, 1, "AZT" , "AZST" , "Asia/Baku" }, |
814 | {-10800, 1, "AST" , "ADT" , "Asia/Baghdad" }, |
815 | {-10800, 1, "MSK" , "MSD" , "Europe/Moscow" }, |
816 | {-10800, 1, "VOLT" , "VOLST" , "Europe/Volgograd" }, |
817 | {-7200, 0, "EET" , "CEST" , "Africa/Tripoli" }, |
818 | {-7200, 1, "EET" , "EEST" , "Europe/Athens" }, /* Conflicts with Africa/Cairo */ |
819 | {-7200, 1, "IST" , "IDT" , "Asia/Jerusalem" }, |
820 | {-3600, 0, "CET" , "WEST" , "Africa/Algiers" }, |
821 | {-3600, 2, "WAT" , "WAST" , "Africa/Windhoek" }, |
822 | {0, 1, "GMT" , "IST" , "Europe/Dublin" }, |
823 | {0, 1, "GMT" , "BST" , "Europe/London" }, |
824 | {0, 0, "WET" , "WEST" , "Africa/Casablanca" }, |
825 | {0, 0, "WET" , "WET" , "Africa/El_Aaiun" }, |
826 | {3600, 1, "AZOT" , "AZOST" , "Atlantic/Azores" }, |
827 | {3600, 1, "EGT" , "EGST" , "America/Scoresbysund" }, |
828 | {10800, 1, "PMST" , "PMDT" , "America/Miquelon" }, |
829 | {10800, 2, "UYT" , "UYST" , "America/Montevideo" }, |
830 | {10800, 1, "WGT" , "WGST" , "America/Godthab" }, |
831 | {10800, 2, "BRT" , "BRST" , "Brazil/East" }, |
832 | {12600, 1, "NST" , "NDT" , "America/St_Johns" }, |
833 | {14400, 1, "AST" , "ADT" , "Canada/Atlantic" }, |
834 | {14400, 2, "AMT" , "AMST" , "America/Cuiaba" }, |
835 | {14400, 2, "CLT" , "CLST" , "Chile/Continental" }, |
836 | {14400, 2, "FKT" , "FKST" , "Atlantic/Stanley" }, |
837 | {14400, 2, "PYT" , "PYST" , "America/Asuncion" }, |
838 | {18000, 1, "CST" , "CDT" , "America/Havana" }, |
839 | {18000, 1, "EST" , "EDT" , "US/Eastern" }, /* Conflicts with America/Grand_Turk */ |
840 | {21600, 2, "EAST" , "EASST" , "Chile/EasterIsland" }, |
841 | {21600, 0, "CST" , "MDT" , "Canada/Saskatchewan" }, |
842 | {21600, 0, "CST" , "CDT" , "America/Guatemala" }, |
843 | {21600, 1, "CST" , "CDT" , "US/Central" }, /* Conflicts with Mexico/General */ |
844 | {25200, 1, "MST" , "MDT" , "US/Mountain" }, /* Conflicts with Mexico/BajaSur */ |
845 | {28800, 0, "PST" , "PST" , "Pacific/Pitcairn" }, |
846 | {28800, 1, "PST" , "PDT" , "US/Pacific" }, /* Conflicts with Mexico/BajaNorte */ |
847 | {32400, 1, "AKST" , "AKDT" , "US/Alaska" }, |
848 | {36000, 1, "HAST" , "HADT" , "US/Aleutian" } |
849 | }; |
850 | |
851 | /*#define DEBUG_TZNAME*/ |
852 | |
853 | static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset) |
854 | { |
855 | int32_t idx; |
856 | #ifdef DEBUG_TZNAME |
857 | fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n" , getenv("TZ" ), stdID, dstID, daylightType, offset); |
858 | #endif |
859 | for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++) |
860 | { |
861 | if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds |
862 | && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType |
863 | && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0 |
864 | && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0) |
865 | { |
866 | return OFFSET_ZONE_MAPPINGS[idx].olsonID; |
867 | } |
868 | } |
869 | return NULL; |
870 | } |
871 | #endif |
872 | |
873 | #ifdef SEARCH_TZFILE |
874 | #define MAX_READ_SIZE 512 |
875 | |
876 | typedef struct DefaultTZInfo { |
877 | char* defaultTZBuffer; |
878 | int64_t defaultTZFileSize; |
879 | FILE* defaultTZFilePtr; |
880 | UBool defaultTZstatus; |
881 | int32_t defaultTZPosition; |
882 | } DefaultTZInfo; |
883 | |
884 | /* |
885 | * This method compares the two files given to see if they are a match. |
886 | * It is currently use to compare two TZ files. |
887 | */ |
888 | static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) { |
889 | FILE* file; |
890 | int64_t sizeFile; |
891 | int64_t sizeFileLeft; |
892 | int32_t sizeFileRead; |
893 | int32_t sizeFileToRead; |
894 | char bufferFile[MAX_READ_SIZE]; |
895 | UBool result = TRUE; |
896 | |
897 | if (tzInfo->defaultTZFilePtr == NULL) { |
898 | tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r" ); |
899 | } |
900 | file = fopen(TZFileName, "r" ); |
901 | |
902 | tzInfo->defaultTZPosition = 0; /* reset position to begin search */ |
903 | |
904 | if (file != NULL && tzInfo->defaultTZFilePtr != NULL) { |
905 | /* First check that the file size are equal. */ |
906 | if (tzInfo->defaultTZFileSize == 0) { |
907 | fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END); |
908 | tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr); |
909 | } |
910 | fseek(file, 0, SEEK_END); |
911 | sizeFile = ftell(file); |
912 | sizeFileLeft = sizeFile; |
913 | |
914 | if (sizeFile != tzInfo->defaultTZFileSize) { |
915 | result = FALSE; |
916 | } else { |
917 | /* Store the data from the files in seperate buffers and |
918 | * compare each byte to determine equality. |
919 | */ |
920 | if (tzInfo->defaultTZBuffer == NULL) { |
921 | rewind(tzInfo->defaultTZFilePtr); |
922 | tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize); |
923 | sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr); |
924 | } |
925 | rewind(file); |
926 | while(sizeFileLeft > 0) { |
927 | uprv_memset(bufferFile, 0, MAX_READ_SIZE); |
928 | sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE; |
929 | |
930 | sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file); |
931 | if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) { |
932 | result = FALSE; |
933 | break; |
934 | } |
935 | sizeFileLeft -= sizeFileRead; |
936 | tzInfo->defaultTZPosition += sizeFileRead; |
937 | } |
938 | } |
939 | } else { |
940 | result = FALSE; |
941 | } |
942 | |
943 | if (file != NULL) { |
944 | fclose(file); |
945 | } |
946 | |
947 | return result; |
948 | } |
949 | |
950 | |
951 | /* dirent also lists two entries: "." and ".." that we can safely ignore. */ |
952 | #define SKIP1 "." |
953 | #define SKIP2 ".." |
954 | static UBool U_CALLCONV putil_cleanup(void); |
955 | static CharString *gSearchTZFileResult = NULL; |
956 | |
957 | /* |
958 | * This method recursively traverses the directory given for a matching TZ file and returns the first match. |
959 | * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results. |
960 | */ |
961 | static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) { |
962 | DIR* dirp = NULL; |
963 | struct dirent* dirEntry = NULL; |
964 | char* result = NULL; |
965 | UErrorCode status = U_ZERO_ERROR; |
966 | |
967 | /* Save the current path */ |
968 | CharString curpath(path, -1, status); |
969 | if (U_FAILURE(status)) { |
970 | goto cleanupAndReturn; |
971 | } |
972 | |
973 | dirp = opendir(path); |
974 | if (dirp == NULL) { |
975 | goto cleanupAndReturn; |
976 | } |
977 | |
978 | if (gSearchTZFileResult == NULL) { |
979 | gSearchTZFileResult = new CharString; |
980 | if (gSearchTZFileResult == NULL) { |
981 | goto cleanupAndReturn; |
982 | } |
983 | ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); |
984 | } |
985 | |
986 | /* Check each entry in the directory. */ |
987 | while((dirEntry = readdir(dirp)) != NULL) { |
988 | const char* dirName = dirEntry->d_name; |
989 | if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0 |
990 | && uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) { |
991 | /* Create a newpath with the new entry to test each entry in the directory. */ |
992 | CharString newpath(curpath, status); |
993 | newpath.append(dirName, -1, status); |
994 | if (U_FAILURE(status)) { |
995 | break; |
996 | } |
997 | |
998 | DIR* subDirp = NULL; |
999 | if ((subDirp = opendir(newpath.data())) != NULL) { |
1000 | /* If this new path is a directory, make a recursive call with the newpath. */ |
1001 | closedir(subDirp); |
1002 | newpath.append('/', status); |
1003 | if (U_FAILURE(status)) { |
1004 | break; |
1005 | } |
1006 | result = searchForTZFile(newpath.data(), tzInfo); |
1007 | /* |
1008 | Have to get out here. Otherwise, we'd keep looking |
1009 | and return the first match in the top-level directory |
1010 | if there's a match in the top-level. If not, this function |
1011 | would return NULL and set gTimeZoneBufferPtr to NULL in initDefault(). |
1012 | It worked without this in most cases because we have a fallback of calling |
1013 | localtime_r to figure out the default timezone. |
1014 | */ |
1015 | if (result != NULL) |
1016 | break; |
1017 | } else { |
1018 | if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) { |
1019 | int32_t amountToSkip = sizeof(TZZONEINFO) - 1; |
1020 | if (amountToSkip > newpath.length()) { |
1021 | amountToSkip = newpath.length(); |
1022 | } |
1023 | const char* zoneid = newpath.data() + amountToSkip; |
1024 | skipZoneIDPrefix(&zoneid); |
1025 | gSearchTZFileResult->clear(); |
1026 | gSearchTZFileResult->append(zoneid, -1, status); |
1027 | if (U_FAILURE(status)) { |
1028 | break; |
1029 | } |
1030 | result = gSearchTZFileResult->data(); |
1031 | /* Get out after the first one found. */ |
1032 | break; |
1033 | } |
1034 | } |
1035 | } |
1036 | } |
1037 | |
1038 | cleanupAndReturn: |
1039 | if (dirp) { |
1040 | closedir(dirp); |
1041 | } |
1042 | return result; |
1043 | } |
1044 | #endif |
1045 | |
1046 | U_CAPI void U_EXPORT2 |
1047 | uprv_tzname_clear_cache() |
1048 | { |
1049 | #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK) |
1050 | gTimeZoneBufferPtr = NULL; |
1051 | #endif |
1052 | } |
1053 | |
1054 | U_CAPI const char* U_EXPORT2 |
1055 | uprv_tzname(int n) |
1056 | { |
1057 | (void)n; // Avoid unreferenced parameter warning. |
1058 | const char *tzid = NULL; |
1059 | #if U_PLATFORM_USES_ONLY_WIN32_API |
1060 | tzid = uprv_detectWindowsTimeZone(); |
1061 | |
1062 | if (tzid != NULL) { |
1063 | return tzid; |
1064 | } |
1065 | |
1066 | #ifndef U_TZNAME |
1067 | // The return value is free'd in timezone.cpp on Windows because |
1068 | // the other code path returns a pointer to a heap location. |
1069 | // If we don't have a name already, then tzname wouldn't be any |
1070 | // better, so just fall back. |
1071 | return uprv_strdup("" ); |
1072 | #endif // !U_TZNAME |
1073 | |
1074 | #else |
1075 | |
1076 | /*#if U_PLATFORM_IS_DARWIN_BASED |
1077 | int ret; |
1078 | |
1079 | tzid = getenv("TZFILE"); |
1080 | if (tzid != NULL) { |
1081 | return tzid; |
1082 | } |
1083 | #endif*/ |
1084 | |
1085 | /* This code can be temporarily disabled to test tzname resolution later on. */ |
1086 | #ifndef DEBUG_TZNAME |
1087 | tzid = getenv("TZ" ); |
1088 | if (tzid != NULL && isValidOlsonID(tzid) |
1089 | #if U_PLATFORM == U_PF_SOLARIS |
1090 | /* When TZ equals localtime on Solaris, check the /etc/localtime file. */ |
1091 | && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0 |
1092 | #endif |
1093 | ) { |
1094 | /* The colon forces tzset() to treat the remainder as zoneinfo path */ |
1095 | if (tzid[0] == ':') { |
1096 | tzid++; |
1097 | } |
1098 | /* This might be a good Olson ID. */ |
1099 | skipZoneIDPrefix(&tzid); |
1100 | return tzid; |
1101 | } |
1102 | /* else U_TZNAME will give a better result. */ |
1103 | #endif |
1104 | |
1105 | #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK) |
1106 | /* Caller must handle threading issues */ |
1107 | if (gTimeZoneBufferPtr == NULL) { |
1108 | /* |
1109 | This is a trick to look at the name of the link to get the Olson ID |
1110 | because the tzfile contents is underspecified. |
1111 | This isn't guaranteed to work because it may not be a symlink. |
1112 | */ |
1113 | int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1); |
1114 | if (0 < ret) { |
1115 | int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL); |
1116 | gTimeZoneBuffer[ret] = 0; |
1117 | char * tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL); |
1118 | |
1119 | if (tzZoneInfoTailPtr != NULL |
1120 | && isValidOlsonID(tzZoneInfoTailPtr + tzZoneInfoTailLen)) |
1121 | { |
1122 | return (gTimeZoneBufferPtr = tzZoneInfoTailPtr + tzZoneInfoTailLen); |
1123 | } |
1124 | } else { |
1125 | #if defined(SEARCH_TZFILE) |
1126 | DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo)); |
1127 | if (tzInfo != NULL) { |
1128 | tzInfo->defaultTZBuffer = NULL; |
1129 | tzInfo->defaultTZFileSize = 0; |
1130 | tzInfo->defaultTZFilePtr = NULL; |
1131 | tzInfo->defaultTZstatus = FALSE; |
1132 | tzInfo->defaultTZPosition = 0; |
1133 | |
1134 | gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo); |
1135 | |
1136 | /* Free previously allocated memory */ |
1137 | if (tzInfo->defaultTZBuffer != NULL) { |
1138 | uprv_free(tzInfo->defaultTZBuffer); |
1139 | } |
1140 | if (tzInfo->defaultTZFilePtr != NULL) { |
1141 | fclose(tzInfo->defaultTZFilePtr); |
1142 | } |
1143 | uprv_free(tzInfo); |
1144 | } |
1145 | |
1146 | if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) { |
1147 | return gTimeZoneBufferPtr; |
1148 | } |
1149 | #endif |
1150 | } |
1151 | } |
1152 | else { |
1153 | return gTimeZoneBufferPtr; |
1154 | } |
1155 | #endif |
1156 | #endif |
1157 | |
1158 | #ifdef U_TZNAME |
1159 | #if U_PLATFORM_USES_ONLY_WIN32_API |
1160 | /* The return value is free'd in timezone.cpp on Windows because |
1161 | * the other code path returns a pointer to a heap location. */ |
1162 | return uprv_strdup(U_TZNAME[n]); |
1163 | #else |
1164 | /* |
1165 | U_TZNAME is usually a non-unique abbreviation, which isn't normally usable. |
1166 | So we remap the abbreviation to an olson ID. |
1167 | |
1168 | Since Windows exposes a little more timezone information, |
1169 | we normally don't use this code on Windows because |
1170 | uprv_detectWindowsTimeZone should have already given the correct answer. |
1171 | */ |
1172 | { |
1173 | struct tm juneSol, decemberSol; |
1174 | int daylightType; |
1175 | static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/ |
1176 | static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/ |
1177 | |
1178 | /* This probing will tell us when daylight savings occurs. */ |
1179 | localtime_r(&juneSolstice, &juneSol); |
1180 | localtime_r(&decemberSolstice, &decemberSol); |
1181 | if(decemberSol.tm_isdst > 0) { |
1182 | daylightType = U_DAYLIGHT_DECEMBER; |
1183 | } else if(juneSol.tm_isdst > 0) { |
1184 | daylightType = U_DAYLIGHT_JUNE; |
1185 | } else { |
1186 | daylightType = U_DAYLIGHT_NONE; |
1187 | } |
1188 | tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone()); |
1189 | if (tzid != NULL) { |
1190 | return tzid; |
1191 | } |
1192 | } |
1193 | return U_TZNAME[n]; |
1194 | #endif |
1195 | #else |
1196 | return "" ; |
1197 | #endif |
1198 | } |
1199 | |
1200 | /* Get and set the ICU data directory --------------------------------------- */ |
1201 | |
1202 | static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER; |
1203 | static char *gDataDirectory = NULL; |
1204 | |
1205 | UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER; |
1206 | static CharString *gTimeZoneFilesDirectory = NULL; |
1207 | |
1208 | #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API |
1209 | static const char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */ |
1210 | static bool gCorrectedPOSIXLocaleHeapAllocated = false; |
1211 | #endif |
1212 | |
1213 | static UBool U_CALLCONV putil_cleanup(void) |
1214 | { |
1215 | if (gDataDirectory && *gDataDirectory) { |
1216 | uprv_free(gDataDirectory); |
1217 | } |
1218 | gDataDirectory = NULL; |
1219 | gDataDirInitOnce.reset(); |
1220 | |
1221 | delete gTimeZoneFilesDirectory; |
1222 | gTimeZoneFilesDirectory = NULL; |
1223 | gTimeZoneFilesInitOnce.reset(); |
1224 | |
1225 | #ifdef SEARCH_TZFILE |
1226 | delete gSearchTZFileResult; |
1227 | gSearchTZFileResult = NULL; |
1228 | #endif |
1229 | |
1230 | #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API |
1231 | if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) { |
1232 | uprv_free(const_cast<char *>(gCorrectedPOSIXLocale)); |
1233 | gCorrectedPOSIXLocale = NULL; |
1234 | gCorrectedPOSIXLocaleHeapAllocated = false; |
1235 | } |
1236 | #endif |
1237 | return TRUE; |
1238 | } |
1239 | |
1240 | /* |
1241 | * Set the data directory. |
1242 | * Make a copy of the passed string, and set the global data dir to point to it. |
1243 | */ |
1244 | U_CAPI void U_EXPORT2 |
1245 | u_setDataDirectory(const char *directory) { |
1246 | char *newDataDir; |
1247 | int32_t length; |
1248 | |
1249 | if(directory==NULL || *directory==0) { |
1250 | /* A small optimization to prevent the malloc and copy when the |
1251 | shared library is used, and this is a way to make sure that NULL |
1252 | is never returned. |
1253 | */ |
1254 | newDataDir = (char *)"" ; |
1255 | } |
1256 | else { |
1257 | length=(int32_t)uprv_strlen(directory); |
1258 | newDataDir = (char *)uprv_malloc(length + 2); |
1259 | /* Exit out if newDataDir could not be created. */ |
1260 | if (newDataDir == NULL) { |
1261 | return; |
1262 | } |
1263 | uprv_strcpy(newDataDir, directory); |
1264 | |
1265 | #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) |
1266 | { |
1267 | char *p; |
1268 | while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != NULL) { |
1269 | *p = U_FILE_SEP_CHAR; |
1270 | } |
1271 | } |
1272 | #endif |
1273 | } |
1274 | |
1275 | if (gDataDirectory && *gDataDirectory) { |
1276 | uprv_free(gDataDirectory); |
1277 | } |
1278 | gDataDirectory = newDataDir; |
1279 | ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); |
1280 | } |
1281 | |
1282 | U_CAPI UBool U_EXPORT2 |
1283 | uprv_pathIsAbsolute(const char *path) |
1284 | { |
1285 | if(!path || !*path) { |
1286 | return FALSE; |
1287 | } |
1288 | |
1289 | if(*path == U_FILE_SEP_CHAR) { |
1290 | return TRUE; |
1291 | } |
1292 | |
1293 | #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) |
1294 | if(*path == U_FILE_ALT_SEP_CHAR) { |
1295 | return TRUE; |
1296 | } |
1297 | #endif |
1298 | |
1299 | #if U_PLATFORM_USES_ONLY_WIN32_API |
1300 | if( (((path[0] >= 'A') && (path[0] <= 'Z')) || |
1301 | ((path[0] >= 'a') && (path[0] <= 'z'))) && |
1302 | path[1] == ':' ) { |
1303 | return TRUE; |
1304 | } |
1305 | #endif |
1306 | |
1307 | return FALSE; |
1308 | } |
1309 | |
1310 | /* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR |
1311 | (needed for some Darwin ICU build environments) */ |
1312 | #if U_PLATFORM_IS_DARWIN_BASED && TARGET_OS_SIMULATOR |
1313 | # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR) |
1314 | # define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT" |
1315 | # endif |
1316 | #endif |
1317 | |
1318 | #if defined(ICU_DATA_DIR_WINDOWS) |
1319 | // Helper function to get the ICU Data Directory under the Windows directory location. |
1320 | static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength) |
1321 | { |
1322 | wchar_t windowsPath[MAX_PATH]; |
1323 | char windowsPathUtf8[MAX_PATH]; |
1324 | |
1325 | UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath)); |
1326 | if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) { |
1327 | // Convert UTF-16 to a UTF-8 string. |
1328 | UErrorCode status = U_ZERO_ERROR; |
1329 | int32_t windowsPathUtf8Len = 0; |
1330 | u_strToUTF8(windowsPathUtf8, static_cast<int32_t>(UPRV_LENGTHOF(windowsPathUtf8)), |
1331 | &windowsPathUtf8Len, reinterpret_cast<const UChar*>(windowsPath), -1, &status); |
1332 | |
1333 | if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) && |
1334 | (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) { |
1335 | // Ensure it always has a separator, so we can append the ICU data path. |
1336 | if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) { |
1337 | windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR; |
1338 | windowsPathUtf8[windowsPathUtf8Len] = '\0'; |
1339 | } |
1340 | // Check if the concatenated string will fit. |
1341 | if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) { |
1342 | uprv_strcpy(directoryBuffer, windowsPathUtf8); |
1343 | uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS); |
1344 | return TRUE; |
1345 | } |
1346 | } |
1347 | } |
1348 | |
1349 | return FALSE; |
1350 | } |
1351 | #endif |
1352 | |
1353 | static void U_CALLCONV dataDirectoryInitFn() { |
1354 | /* If we already have the directory, then return immediately. Will happen if user called |
1355 | * u_setDataDirectory(). |
1356 | */ |
1357 | if (gDataDirectory) { |
1358 | return; |
1359 | } |
1360 | |
1361 | const char *path = NULL; |
1362 | #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) |
1363 | char datadir_path_buffer[PATH_MAX]; |
1364 | #endif |
1365 | |
1366 | /* |
1367 | When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to |
1368 | override ICU's data with the ICU_DATA environment variable. This prevents |
1369 | problems where multiple custom copies of ICU's specific version of data |
1370 | are installed on a system. Either the application must define the data |
1371 | directory with u_setDataDirectory, define ICU_DATA_DIR when compiling |
1372 | ICU, set the data with udata_setCommonData or trust that all of the |
1373 | required data is contained in ICU's data library that contains |
1374 | the entry point defined by U_ICUDATA_ENTRY_POINT. |
1375 | |
1376 | There may also be some platforms where environment variables |
1377 | are not allowed. |
1378 | */ |
1379 | # if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO |
1380 | /* First try to get the environment variable */ |
1381 | # if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv |
1382 | path=getenv("ICU_DATA" ); |
1383 | # endif |
1384 | # endif |
1385 | |
1386 | /* ICU_DATA_DIR may be set as a compile option. |
1387 | * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time |
1388 | * and is used only when data is built in archive mode eliminating the need |
1389 | * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation |
1390 | * directory of the data dat file. Users should use ICU_DATA_DIR if they want to |
1391 | * set their own path. |
1392 | */ |
1393 | #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR) |
1394 | if(path==NULL || *path==0) { |
1395 | # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) |
1396 | const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR); |
1397 | # endif |
1398 | # ifdef ICU_DATA_DIR |
1399 | path=ICU_DATA_DIR; |
1400 | # else |
1401 | path=U_ICU_DATA_DEFAULT_DIR; |
1402 | # endif |
1403 | # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) |
1404 | if (prefix != NULL) { |
1405 | snprintf(datadir_path_buffer, PATH_MAX, "%s%s" , prefix, path); |
1406 | path=datadir_path_buffer; |
1407 | } |
1408 | # endif |
1409 | } |
1410 | #endif |
1411 | |
1412 | #if defined(ICU_DATA_DIR_WINDOWS) |
1413 | char datadir_path_buffer[MAX_PATH]; |
1414 | if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) { |
1415 | path = datadir_path_buffer; |
1416 | } |
1417 | #endif |
1418 | |
1419 | if(path==NULL) { |
1420 | /* It looks really bad, set it to something. */ |
1421 | path = "" ; |
1422 | } |
1423 | |
1424 | u_setDataDirectory(path); |
1425 | return; |
1426 | } |
1427 | |
1428 | U_CAPI const char * U_EXPORT2 |
1429 | u_getDataDirectory(void) { |
1430 | umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn); |
1431 | return gDataDirectory; |
1432 | } |
1433 | |
1434 | static void setTimeZoneFilesDir(const char *path, UErrorCode &status) { |
1435 | if (U_FAILURE(status)) { |
1436 | return; |
1437 | } |
1438 | gTimeZoneFilesDirectory->clear(); |
1439 | gTimeZoneFilesDirectory->append(path, status); |
1440 | #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) |
1441 | char *p = gTimeZoneFilesDirectory->data(); |
1442 | while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != NULL) { |
1443 | *p = U_FILE_SEP_CHAR; |
1444 | } |
1445 | #endif |
1446 | } |
1447 | |
1448 | #define TO_STRING(x) TO_STRING_2(x) |
1449 | #define TO_STRING_2(x) #x |
1450 | |
1451 | static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) { |
1452 | U_ASSERT(gTimeZoneFilesDirectory == NULL); |
1453 | ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); |
1454 | gTimeZoneFilesDirectory = new CharString(); |
1455 | if (gTimeZoneFilesDirectory == NULL) { |
1456 | status = U_MEMORY_ALLOCATION_ERROR; |
1457 | return; |
1458 | } |
1459 | |
1460 | const char *dir = "" ; |
1461 | |
1462 | #if U_PLATFORM_HAS_WINUWP_API == 1 |
1463 | // The UWP version does not support the environment variable setting. |
1464 | |
1465 | # if defined(ICU_DATA_DIR_WINDOWS) |
1466 | // When using the Windows system data, we can possibly pick up time zone data from the Windows directory. |
1467 | char datadir_path_buffer[MAX_PATH]; |
1468 | if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) { |
1469 | dir = datadir_path_buffer; |
1470 | } |
1471 | # endif |
1472 | |
1473 | #else |
1474 | dir = getenv("ICU_TIMEZONE_FILES_DIR" ); |
1475 | #endif // U_PLATFORM_HAS_WINUWP_API |
1476 | |
1477 | #if defined(U_TIMEZONE_FILES_DIR) |
1478 | if (dir == NULL) { |
1479 | // Build time configuration setting. |
1480 | dir = TO_STRING(U_TIMEZONE_FILES_DIR); |
1481 | } |
1482 | #endif |
1483 | |
1484 | if (dir == NULL) { |
1485 | dir = "" ; |
1486 | } |
1487 | |
1488 | setTimeZoneFilesDir(dir, status); |
1489 | } |
1490 | |
1491 | |
1492 | U_CAPI const char * U_EXPORT2 |
1493 | u_getTimeZoneFilesDirectory(UErrorCode *status) { |
1494 | umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status); |
1495 | return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "" ; |
1496 | } |
1497 | |
1498 | U_CAPI void U_EXPORT2 |
1499 | u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) { |
1500 | umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status); |
1501 | setTimeZoneFilesDir(path, *status); |
1502 | |
1503 | // Note: this function does some extra churn, first setting based on the |
1504 | // environment, then immediately replacing with the value passed in. |
1505 | // The logic is simpler that way, and performance shouldn't be an issue. |
1506 | } |
1507 | |
1508 | |
1509 | #if U_POSIX_LOCALE |
1510 | /* A helper function used by uprv_getPOSIXIDForDefaultLocale and |
1511 | * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for |
1512 | * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories. |
1513 | */ |
1514 | static const char *uprv_getPOSIXIDForCategory(int category) |
1515 | { |
1516 | const char* posixID = NULL; |
1517 | if (category == LC_MESSAGES || category == LC_CTYPE) { |
1518 | /* |
1519 | * On Solaris two different calls to setlocale can result in |
1520 | * different values. Only get this value once. |
1521 | * |
1522 | * We must check this first because an application can set this. |
1523 | * |
1524 | * LC_ALL can't be used because it's platform dependent. The LANG |
1525 | * environment variable seems to affect LC_CTYPE variable by default. |
1526 | * Here is what setlocale(LC_ALL, NULL) can return. |
1527 | * HPUX can return 'C C C C C C C' |
1528 | * Solaris can return /en_US/C/C/C/C/C on the second try. |
1529 | * Linux can return LC_CTYPE=C;LC_NUMERIC=C;... |
1530 | * |
1531 | * The default codepage detection also needs to use LC_CTYPE. |
1532 | * |
1533 | * Do not call setlocale(LC_*, "")! Using an empty string instead |
1534 | * of NULL, will modify the libc behavior. |
1535 | */ |
1536 | posixID = setlocale(category, NULL); |
1537 | if ((posixID == 0) |
1538 | || (uprv_strcmp("C" , posixID) == 0) |
1539 | || (uprv_strcmp("POSIX" , posixID) == 0)) |
1540 | { |
1541 | /* Maybe we got some garbage. Try something more reasonable */ |
1542 | posixID = getenv("LC_ALL" ); |
1543 | /* Solaris speaks POSIX - See IEEE Std 1003.1-2008 |
1544 | * This is needed to properly handle empty env. variables |
1545 | */ |
1546 | #if U_PLATFORM == U_PF_SOLARIS |
1547 | if ((posixID == 0) || (posixID[0] == '\0')) { |
1548 | posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE" ); |
1549 | if ((posixID == 0) || (posixID[0] == '\0')) { |
1550 | #else |
1551 | if (posixID == 0) { |
1552 | posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE" ); |
1553 | if (posixID == 0) { |
1554 | #endif |
1555 | posixID = getenv("LANG" ); |
1556 | } |
1557 | } |
1558 | } |
1559 | } |
1560 | if ((posixID==0) |
1561 | || (uprv_strcmp("C" , posixID) == 0) |
1562 | || (uprv_strcmp("POSIX" , posixID) == 0)) |
1563 | { |
1564 | /* Nothing worked. Give it a nice POSIX default value. */ |
1565 | posixID = "en_US_POSIX" ; |
1566 | // Note: this test will not catch 'C.UTF-8', |
1567 | // that will be handled in uprv_getDefaultLocaleID(). |
1568 | // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage() |
1569 | // caller which expects to see "en_US_POSIX" in many branches. |
1570 | } |
1571 | return posixID; |
1572 | } |
1573 | |
1574 | /* Return just the POSIX id for the default locale, whatever happens to be in |
1575 | * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG. |
1576 | */ |
1577 | static const char *uprv_getPOSIXIDForDefaultLocale(void) |
1578 | { |
1579 | static const char* posixID = NULL; |
1580 | if (posixID == 0) { |
1581 | posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES); |
1582 | } |
1583 | return posixID; |
1584 | } |
1585 | |
1586 | #if !U_CHARSET_IS_UTF8 |
1587 | /* Return just the POSIX id for the default codepage, whatever happens to be in |
1588 | * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG. |
1589 | */ |
1590 | static const char *uprv_getPOSIXIDForDefaultCodepage(void) |
1591 | { |
1592 | static const char* posixID = NULL; |
1593 | if (posixID == 0) { |
1594 | posixID = uprv_getPOSIXIDForCategory(LC_CTYPE); |
1595 | } |
1596 | return posixID; |
1597 | } |
1598 | #endif |
1599 | #endif |
1600 | |
1601 | /* NOTE: The caller should handle thread safety */ |
1602 | U_CAPI const char* U_EXPORT2 |
1603 | uprv_getDefaultLocaleID() |
1604 | { |
1605 | #if U_POSIX_LOCALE |
1606 | /* |
1607 | Note that: (a '!' means the ID is improper somehow) |
1608 | LC_ALL ----> default_loc codepage |
1609 | -------------------------------------------------------- |
1610 | ab.CD ab CD |
1611 | ab@CD ab__CD - |
1612 | ab@CD.EF ab__CD EF |
1613 | |
1614 | ab_CD.EF@GH ab_CD_GH EF |
1615 | |
1616 | Some 'improper' ways to do the same as above: |
1617 | ! ab_CD@GH.EF ab_CD_GH EF |
1618 | ! ab_CD.EF@GH.IJ ab_CD_GH EF |
1619 | ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF |
1620 | |
1621 | _CD@GH _CD_GH - |
1622 | _CD.EF@GH _CD_GH EF |
1623 | |
1624 | The variant cannot have dots in it. |
1625 | The 'rightmost' variant (@xxx) wins. |
1626 | The leftmost codepage (.xxx) wins. |
1627 | */ |
1628 | const char* posixID = uprv_getPOSIXIDForDefaultLocale(); |
1629 | |
1630 | /* Format: (no spaces) |
1631 | ll [ _CC ] [ . MM ] [ @ VV] |
1632 | |
1633 | l = lang, C = ctry, M = charmap, V = variant |
1634 | */ |
1635 | |
1636 | if (gCorrectedPOSIXLocale != nullptr) { |
1637 | return gCorrectedPOSIXLocale; |
1638 | } |
1639 | |
1640 | // Copy the ID into owned memory. |
1641 | // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination |
1642 | char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1)); |
1643 | if (correctedPOSIXLocale == nullptr) { |
1644 | return nullptr; |
1645 | } |
1646 | uprv_strcpy(correctedPOSIXLocale, posixID); |
1647 | |
1648 | char *limit; |
1649 | if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) { |
1650 | *limit = 0; |
1651 | } |
1652 | if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) { |
1653 | *limit = 0; |
1654 | } |
1655 | |
1656 | if ((uprv_strcmp("C" , correctedPOSIXLocale) == 0) // no @ variant |
1657 | || (uprv_strcmp("POSIX" , correctedPOSIXLocale) == 0)) { |
1658 | // Raw input was C.* or POSIX.*, Give it a nice POSIX default value. |
1659 | // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory()) |
1660 | uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX" ); |
1661 | } |
1662 | |
1663 | /* Note that we scan the *uncorrected* ID. */ |
1664 | const char *p; |
1665 | if ((p = uprv_strrchr(posixID, '@')) != nullptr) { |
1666 | p++; |
1667 | |
1668 | /* Take care of any special cases here.. */ |
1669 | if (!uprv_strcmp(p, "nynorsk" )) { |
1670 | p = "NY" ; |
1671 | /* Don't worry about no__NY. In practice, it won't appear. */ |
1672 | } |
1673 | |
1674 | if (uprv_strchr(correctedPOSIXLocale,'_') == nullptr) { |
1675 | uprv_strcat(correctedPOSIXLocale, "__" ); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */ |
1676 | } |
1677 | else { |
1678 | uprv_strcat(correctedPOSIXLocale, "_" ); /* aa_CC@b -> aa_CC_b */ |
1679 | } |
1680 | |
1681 | const char *q; |
1682 | if ((q = uprv_strchr(p, '.')) != nullptr) { |
1683 | /* How big will the resulting string be? */ |
1684 | int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p)); |
1685 | uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset |
1686 | correctedPOSIXLocale[len] = 0; |
1687 | } |
1688 | else { |
1689 | /* Anything following the @ sign */ |
1690 | uprv_strcat(correctedPOSIXLocale, p); |
1691 | } |
1692 | |
1693 | /* Should there be a map from 'no@nynorsk' -> no_NO_NY here? |
1694 | * How about 'russian' -> 'ru'? |
1695 | * Many of the other locales using ISO codes will be handled by the |
1696 | * canonicalization functions in uloc_getDefault. |
1697 | */ |
1698 | } |
1699 | |
1700 | if (gCorrectedPOSIXLocale == nullptr) { |
1701 | gCorrectedPOSIXLocale = correctedPOSIXLocale; |
1702 | gCorrectedPOSIXLocaleHeapAllocated = true; |
1703 | ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); |
1704 | correctedPOSIXLocale = nullptr; |
1705 | } |
1706 | posixID = gCorrectedPOSIXLocale; |
1707 | |
1708 | if (correctedPOSIXLocale != nullptr) { /* Was already set - clean up. */ |
1709 | uprv_free(correctedPOSIXLocale); |
1710 | } |
1711 | |
1712 | return posixID; |
1713 | |
1714 | #elif U_PLATFORM_USES_ONLY_WIN32_API |
1715 | #define POSIX_LOCALE_CAPACITY 64 |
1716 | UErrorCode status = U_ZERO_ERROR; |
1717 | char *correctedPOSIXLocale = nullptr; |
1718 | |
1719 | // If we have already figured this out just use the cached value |
1720 | if (gCorrectedPOSIXLocale != nullptr) { |
1721 | return gCorrectedPOSIXLocale; |
1722 | } |
1723 | |
1724 | // No cached value, need to determine the current value |
1725 | static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {}; |
1726 | int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH); |
1727 | |
1728 | // Now we should have a Windows locale name that needs converted to the POSIX style. |
1729 | if (length > 0) // If length is 0, then the GetLocaleInfoEx failed. |
1730 | { |
1731 | // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.) |
1732 | char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {}; |
1733 | |
1734 | int32_t i; |
1735 | for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++) |
1736 | { |
1737 | if (windowsLocale[i] == '_') |
1738 | { |
1739 | modifiedWindowsLocale[i] = '-'; |
1740 | } |
1741 | else |
1742 | { |
1743 | modifiedWindowsLocale[i] = static_cast<char>(windowsLocale[i]); |
1744 | } |
1745 | |
1746 | if (modifiedWindowsLocale[i] == '\0') |
1747 | { |
1748 | break; |
1749 | } |
1750 | } |
1751 | |
1752 | if (i >= UPRV_LENGTHOF(modifiedWindowsLocale)) |
1753 | { |
1754 | // Ran out of room, can't really happen, maybe we'll be lucky about a matching |
1755 | // locale when tags are dropped |
1756 | modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0'; |
1757 | } |
1758 | |
1759 | // Now normalize the resulting name |
1760 | correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1)); |
1761 | /* TODO: Should we just exit on memory allocation failure? */ |
1762 | if (correctedPOSIXLocale) |
1763 | { |
1764 | int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status); |
1765 | if (U_SUCCESS(status)) |
1766 | { |
1767 | *(correctedPOSIXLocale + posixLen) = 0; |
1768 | gCorrectedPOSIXLocale = correctedPOSIXLocale; |
1769 | gCorrectedPOSIXLocaleHeapAllocated = true; |
1770 | ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); |
1771 | } |
1772 | else |
1773 | { |
1774 | uprv_free(correctedPOSIXLocale); |
1775 | } |
1776 | } |
1777 | } |
1778 | |
1779 | // If unable to find a locale we can agree upon, use en-US by default |
1780 | if (gCorrectedPOSIXLocale == nullptr) { |
1781 | gCorrectedPOSIXLocale = "en_US" ; |
1782 | } |
1783 | return gCorrectedPOSIXLocale; |
1784 | |
1785 | #elif U_PLATFORM == U_PF_OS400 |
1786 | /* locales are process scoped and are by definition thread safe */ |
1787 | static char correctedLocale[64]; |
1788 | const char *localeID = getenv("LC_ALL" ); |
1789 | char *p; |
1790 | |
1791 | if (localeID == NULL) |
1792 | localeID = getenv("LANG" ); |
1793 | if (localeID == NULL) |
1794 | localeID = setlocale(LC_ALL, NULL); |
1795 | /* Make sure we have something... */ |
1796 | if (localeID == NULL) |
1797 | return "en_US_POSIX" ; |
1798 | |
1799 | /* Extract the locale name from the path. */ |
1800 | if((p = uprv_strrchr(localeID, '/')) != NULL) |
1801 | { |
1802 | /* Increment p to start of locale name. */ |
1803 | p++; |
1804 | localeID = p; |
1805 | } |
1806 | |
1807 | /* Copy to work location. */ |
1808 | uprv_strcpy(correctedLocale, localeID); |
1809 | |
1810 | /* Strip off the '.locale' extension. */ |
1811 | if((p = uprv_strchr(correctedLocale, '.')) != NULL) { |
1812 | *p = 0; |
1813 | } |
1814 | |
1815 | /* Upper case the locale name. */ |
1816 | T_CString_toUpperCase(correctedLocale); |
1817 | |
1818 | /* See if we are using the POSIX locale. Any of the |
1819 | * following are equivalent and use the same QLGPGCMA |
1820 | * (POSIX) locale. |
1821 | * QLGPGCMA2 means UCS2 |
1822 | * QLGPGCMA_4 means UTF-32 |
1823 | * QLGPGCMA_8 means UTF-8 |
1824 | */ |
1825 | if ((uprv_strcmp("C" , correctedLocale) == 0) || |
1826 | (uprv_strcmp("POSIX" , correctedLocale) == 0) || |
1827 | (uprv_strncmp("QLGPGCMA" , correctedLocale, 8) == 0)) |
1828 | { |
1829 | uprv_strcpy(correctedLocale, "en_US_POSIX" ); |
1830 | } |
1831 | else |
1832 | { |
1833 | int16_t LocaleLen; |
1834 | |
1835 | /* Lower case the lang portion. */ |
1836 | for(p = correctedLocale; *p != 0 && *p != '_'; p++) |
1837 | { |
1838 | *p = uprv_tolower(*p); |
1839 | } |
1840 | |
1841 | /* Adjust for Euro. After '_E' add 'URO'. */ |
1842 | LocaleLen = uprv_strlen(correctedLocale); |
1843 | if (correctedLocale[LocaleLen - 2] == '_' && |
1844 | correctedLocale[LocaleLen - 1] == 'E') |
1845 | { |
1846 | uprv_strcat(correctedLocale, "URO" ); |
1847 | } |
1848 | |
1849 | /* If using Lotus-based locale then convert to |
1850 | * equivalent non Lotus. |
1851 | */ |
1852 | else if (correctedLocale[LocaleLen - 2] == '_' && |
1853 | correctedLocale[LocaleLen - 1] == 'L') |
1854 | { |
1855 | correctedLocale[LocaleLen - 2] = 0; |
1856 | } |
1857 | |
1858 | /* There are separate simplified and traditional |
1859 | * locales called zh_HK_S and zh_HK_T. |
1860 | */ |
1861 | else if (uprv_strncmp(correctedLocale, "zh_HK" , 5) == 0) |
1862 | { |
1863 | uprv_strcpy(correctedLocale, "zh_HK" ); |
1864 | } |
1865 | |
1866 | /* A special zh_CN_GBK locale... |
1867 | */ |
1868 | else if (uprv_strcmp(correctedLocale, "zh_CN_GBK" ) == 0) |
1869 | { |
1870 | uprv_strcpy(correctedLocale, "zh_CN" ); |
1871 | } |
1872 | |
1873 | } |
1874 | |
1875 | return correctedLocale; |
1876 | #endif |
1877 | |
1878 | } |
1879 | |
1880 | #if !U_CHARSET_IS_UTF8 |
1881 | #if U_POSIX_LOCALE |
1882 | /* |
1883 | Due to various platform differences, one platform may specify a charset, |
1884 | when they really mean a different charset. Remap the names so that they are |
1885 | compatible with ICU. Only conflicting/ambiguous aliases should be resolved |
1886 | here. Before adding anything to this function, please consider adding unique |
1887 | names to the ICU alias table in the data directory. |
1888 | */ |
1889 | static const char* |
1890 | remapPlatformDependentCodepage(const char *locale, const char *name) { |
1891 | if (locale != NULL && *locale == 0) { |
1892 | /* Make sure that an empty locale is handled the same way. */ |
1893 | locale = NULL; |
1894 | } |
1895 | if (name == NULL) { |
1896 | return NULL; |
1897 | } |
1898 | #if U_PLATFORM == U_PF_AIX |
1899 | if (uprv_strcmp(name, "IBM-943" ) == 0) { |
1900 | /* Use the ASCII compatible ibm-943 */ |
1901 | name = "Shift-JIS" ; |
1902 | } |
1903 | else if (uprv_strcmp(name, "IBM-1252" ) == 0) { |
1904 | /* Use the windows-1252 that contains the Euro */ |
1905 | name = "IBM-5348" ; |
1906 | } |
1907 | #elif U_PLATFORM == U_PF_SOLARIS |
1908 | if (locale != NULL && uprv_strcmp(name, "EUC" ) == 0) { |
1909 | /* Solaris underspecifies the "EUC" name. */ |
1910 | if (uprv_strcmp(locale, "zh_CN" ) == 0) { |
1911 | name = "EUC-CN" ; |
1912 | } |
1913 | else if (uprv_strcmp(locale, "zh_TW" ) == 0) { |
1914 | name = "EUC-TW" ; |
1915 | } |
1916 | else if (uprv_strcmp(locale, "ko_KR" ) == 0) { |
1917 | name = "EUC-KR" ; |
1918 | } |
1919 | } |
1920 | else if (uprv_strcmp(name, "eucJP" ) == 0) { |
1921 | /* |
1922 | ibm-954 is the best match. |
1923 | ibm-33722 is the default for eucJP (similar to Windows). |
1924 | */ |
1925 | name = "eucjis" ; |
1926 | } |
1927 | else if (uprv_strcmp(name, "646" ) == 0) { |
1928 | /* |
1929 | * The default codepage given by Solaris is 646 but the C library routines treat it as if it was |
1930 | * ISO-8859-1 instead of US-ASCII(646). |
1931 | */ |
1932 | name = "ISO-8859-1" ; |
1933 | } |
1934 | #elif U_PLATFORM_IS_DARWIN_BASED |
1935 | if (locale == NULL && *name == 0) { |
1936 | /* |
1937 | No locale was specified, and an empty name was passed in. |
1938 | This usually indicates that nl_langinfo didn't return valid information. |
1939 | Mac OS X uses UTF-8 by default (especially the locale data and console). |
1940 | */ |
1941 | name = "UTF-8" ; |
1942 | } |
1943 | else if (uprv_strcmp(name, "CP949" ) == 0) { |
1944 | /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ |
1945 | name = "EUC-KR" ; |
1946 | } |
1947 | else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX" ) != 0 && uprv_strcmp(name, "US-ASCII" ) == 0) { |
1948 | /* |
1949 | * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. |
1950 | */ |
1951 | name = "UTF-8" ; |
1952 | } |
1953 | #elif U_PLATFORM == U_PF_BSD |
1954 | if (uprv_strcmp(name, "CP949" ) == 0) { |
1955 | /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ |
1956 | name = "EUC-KR" ; |
1957 | } |
1958 | #elif U_PLATFORM == U_PF_HPUX |
1959 | if (locale != NULL && uprv_strcmp(locale, "zh_HK" ) == 0 && uprv_strcmp(name, "big5" ) == 0) { |
1960 | /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */ |
1961 | /* zh_TW.big5 is not the same charset as zh_HK.big5! */ |
1962 | name = "hkbig5" ; |
1963 | } |
1964 | else if (uprv_strcmp(name, "eucJP" ) == 0) { |
1965 | /* |
1966 | ibm-1350 is the best match, but unavailable. |
1967 | ibm-954 is mostly a superset of ibm-1350. |
1968 | ibm-33722 is the default for eucJP (similar to Windows). |
1969 | */ |
1970 | name = "eucjis" ; |
1971 | } |
1972 | #elif U_PLATFORM == U_PF_LINUX |
1973 | if (locale != NULL && uprv_strcmp(name, "euc" ) == 0) { |
1974 | /* Linux underspecifies the "EUC" name. */ |
1975 | if (uprv_strcmp(locale, "korean" ) == 0) { |
1976 | name = "EUC-KR" ; |
1977 | } |
1978 | else if (uprv_strcmp(locale, "japanese" ) == 0) { |
1979 | /* See comment below about eucJP */ |
1980 | name = "eucjis" ; |
1981 | } |
1982 | } |
1983 | else if (uprv_strcmp(name, "eucjp" ) == 0) { |
1984 | /* |
1985 | ibm-1350 is the best match, but unavailable. |
1986 | ibm-954 is mostly a superset of ibm-1350. |
1987 | ibm-33722 is the default for eucJP (similar to Windows). |
1988 | */ |
1989 | name = "eucjis" ; |
1990 | } |
1991 | else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX" ) != 0 && |
1992 | (uprv_strcmp(name, "ANSI_X3.4-1968" ) == 0 || uprv_strcmp(name, "US-ASCII" ) == 0)) { |
1993 | /* |
1994 | * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. |
1995 | */ |
1996 | name = "UTF-8" ; |
1997 | } |
1998 | /* |
1999 | * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of |
2000 | * it by falling back to 'US-ASCII' when NULL is returned from this |
2001 | * function. So, we don't have to worry about it here. |
2002 | */ |
2003 | #endif |
2004 | /* return NULL when "" is passed in */ |
2005 | if (*name == 0) { |
2006 | name = NULL; |
2007 | } |
2008 | return name; |
2009 | } |
2010 | |
2011 | static const char* |
2012 | getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity) |
2013 | { |
2014 | char localeBuf[100]; |
2015 | const char *name = NULL; |
2016 | char *variant = NULL; |
2017 | |
2018 | if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) { |
2019 | size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1); |
2020 | uprv_strncpy(localeBuf, localeName, localeCapacity); |
2021 | localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */ |
2022 | name = uprv_strncpy(buffer, name+1, buffCapacity); |
2023 | buffer[buffCapacity-1] = 0; /* ensure NULL termination */ |
2024 | if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) { |
2025 | *variant = 0; |
2026 | } |
2027 | name = remapPlatformDependentCodepage(localeBuf, name); |
2028 | } |
2029 | return name; |
2030 | } |
2031 | #endif |
2032 | |
2033 | static const char* |
2034 | int_getDefaultCodepage() |
2035 | { |
2036 | #if U_PLATFORM == U_PF_OS400 |
2037 | uint32_t ccsid = 37; /* Default to ibm-37 */ |
2038 | static char codepage[64]; |
2039 | Qwc_JOBI0400_t jobinfo; |
2040 | Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */ |
2041 | |
2042 | EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400" , |
2043 | "* " , " " , &error); |
2044 | |
2045 | if (error.Bytes_Available == 0) { |
2046 | if (jobinfo.Coded_Char_Set_ID != 0xFFFF) { |
2047 | ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID; |
2048 | } |
2049 | else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) { |
2050 | ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id; |
2051 | } |
2052 | /* else use the default */ |
2053 | } |
2054 | sprintf(codepage,"ibm-%d" , ccsid); |
2055 | return codepage; |
2056 | |
2057 | #elif U_PLATFORM == U_PF_OS390 |
2058 | static char codepage[64]; |
2059 | |
2060 | strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING)); |
2061 | strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING); |
2062 | codepage[63] = 0; /* NULL terminate */ |
2063 | |
2064 | return codepage; |
2065 | |
2066 | #elif U_PLATFORM_USES_ONLY_WIN32_API |
2067 | static char codepage[64]; |
2068 | DWORD codepageNumber = 0; |
2069 | |
2070 | #if U_PLATFORM_HAS_WINUWP_API == 1 |
2071 | // UWP doesn't have a direct API to get the default ACP as Microsoft would rather |
2072 | // have folks use Unicode than a "system" code page, however this is the same |
2073 | // codepage as the system default locale codepage. (FWIW, the system locale is |
2074 | // ONLY used for codepage, it should never be used for anything else) |
2075 | GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER, |
2076 | (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR)); |
2077 | #else |
2078 | // Win32 apps can call GetACP |
2079 | codepageNumber = GetACP(); |
2080 | #endif |
2081 | // Special case for UTF-8 |
2082 | if (codepageNumber == 65001) |
2083 | { |
2084 | return "UTF-8" ; |
2085 | } |
2086 | // Windows codepages can look like windows-1252, so format the found number |
2087 | // the numbers are eclectic, however all valid system code pages, besides UTF-8 |
2088 | // are between 3 and 19999 |
2089 | if (codepageNumber > 0 && codepageNumber < 20000) |
2090 | { |
2091 | sprintf(codepage, "windows-%ld" , codepageNumber); |
2092 | return codepage; |
2093 | } |
2094 | // If the codepage number call failed then return UTF-8 |
2095 | return "UTF-8" ; |
2096 | |
2097 | #elif U_POSIX_LOCALE |
2098 | static char codesetName[100]; |
2099 | const char *localeName = NULL; |
2100 | const char *name = NULL; |
2101 | |
2102 | localeName = uprv_getPOSIXIDForDefaultCodepage(); |
2103 | uprv_memset(codesetName, 0, sizeof(codesetName)); |
2104 | /* On Solaris nl_langinfo returns C locale values unless setlocale |
2105 | * was called earlier. |
2106 | */ |
2107 | #if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS) |
2108 | /* When available, check nl_langinfo first because it usually gives more |
2109 | useful names. It depends on LC_CTYPE. |
2110 | nl_langinfo may use the same buffer as setlocale. */ |
2111 | { |
2112 | const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET); |
2113 | #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED |
2114 | /* |
2115 | * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8 |
2116 | * instead of ASCII. |
2117 | */ |
2118 | if (uprv_strcmp(localeName, "en_US_POSIX" ) != 0) { |
2119 | codeset = remapPlatformDependentCodepage(localeName, codeset); |
2120 | } else |
2121 | #endif |
2122 | { |
2123 | codeset = remapPlatformDependentCodepage(NULL, codeset); |
2124 | } |
2125 | |
2126 | if (codeset != NULL) { |
2127 | uprv_strncpy(codesetName, codeset, sizeof(codesetName)); |
2128 | codesetName[sizeof(codesetName)-1] = 0; |
2129 | return codesetName; |
2130 | } |
2131 | } |
2132 | #endif |
2133 | |
2134 | /* Use setlocale in a nice way, and then check some environment variables. |
2135 | Maybe the application used setlocale already. |
2136 | */ |
2137 | uprv_memset(codesetName, 0, sizeof(codesetName)); |
2138 | name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName)); |
2139 | if (name) { |
2140 | /* if we can find the codeset name from setlocale, return that. */ |
2141 | return name; |
2142 | } |
2143 | |
2144 | if (*codesetName == 0) |
2145 | { |
2146 | /* Everything failed. Return US ASCII (ISO 646). */ |
2147 | (void)uprv_strcpy(codesetName, "US-ASCII" ); |
2148 | } |
2149 | return codesetName; |
2150 | #else |
2151 | return "US-ASCII" ; |
2152 | #endif |
2153 | } |
2154 | |
2155 | |
2156 | U_CAPI const char* U_EXPORT2 |
2157 | uprv_getDefaultCodepage() |
2158 | { |
2159 | static char const *name = NULL; |
2160 | umtx_lock(NULL); |
2161 | if (name == NULL) { |
2162 | name = int_getDefaultCodepage(); |
2163 | } |
2164 | umtx_unlock(NULL); |
2165 | return name; |
2166 | } |
2167 | #endif /* !U_CHARSET_IS_UTF8 */ |
2168 | |
2169 | |
2170 | /* end of platform-specific implementation -------------- */ |
2171 | |
2172 | /* version handling --------------------------------------------------------- */ |
2173 | |
2174 | U_CAPI void U_EXPORT2 |
2175 | u_versionFromString(UVersionInfo versionArray, const char *versionString) { |
2176 | char *end; |
2177 | uint16_t part=0; |
2178 | |
2179 | if(versionArray==NULL) { |
2180 | return; |
2181 | } |
2182 | |
2183 | if(versionString!=NULL) { |
2184 | for(;;) { |
2185 | versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10); |
2186 | if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) { |
2187 | break; |
2188 | } |
2189 | versionString=end+1; |
2190 | } |
2191 | } |
2192 | |
2193 | while(part<U_MAX_VERSION_LENGTH) { |
2194 | versionArray[part++]=0; |
2195 | } |
2196 | } |
2197 | |
2198 | U_CAPI void U_EXPORT2 |
2199 | u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) { |
2200 | if(versionArray!=NULL && versionString!=NULL) { |
2201 | char versionChars[U_MAX_VERSION_STRING_LENGTH+1]; |
2202 | int32_t len = u_strlen(versionString); |
2203 | if(len>U_MAX_VERSION_STRING_LENGTH) { |
2204 | len = U_MAX_VERSION_STRING_LENGTH; |
2205 | } |
2206 | u_UCharsToChars(versionString, versionChars, len); |
2207 | versionChars[len]=0; |
2208 | u_versionFromString(versionArray, versionChars); |
2209 | } |
2210 | } |
2211 | |
2212 | U_CAPI void U_EXPORT2 |
2213 | u_versionToString(const UVersionInfo versionArray, char *versionString) { |
2214 | uint16_t count, part; |
2215 | uint8_t field; |
2216 | |
2217 | if(versionString==NULL) { |
2218 | return; |
2219 | } |
2220 | |
2221 | if(versionArray==NULL) { |
2222 | versionString[0]=0; |
2223 | return; |
2224 | } |
2225 | |
2226 | /* count how many fields need to be written */ |
2227 | for(count=4; count>0 && versionArray[count-1]==0; --count) { |
2228 | } |
2229 | |
2230 | if(count <= 1) { |
2231 | count = 2; |
2232 | } |
2233 | |
2234 | /* write the first part */ |
2235 | /* write the decimal field value */ |
2236 | field=versionArray[0]; |
2237 | if(field>=100) { |
2238 | *versionString++=(char)('0'+field/100); |
2239 | field%=100; |
2240 | } |
2241 | if(field>=10) { |
2242 | *versionString++=(char)('0'+field/10); |
2243 | field%=10; |
2244 | } |
2245 | *versionString++=(char)('0'+field); |
2246 | |
2247 | /* write the following parts */ |
2248 | for(part=1; part<count; ++part) { |
2249 | /* write a dot first */ |
2250 | *versionString++=U_VERSION_DELIMITER; |
2251 | |
2252 | /* write the decimal field value */ |
2253 | field=versionArray[part]; |
2254 | if(field>=100) { |
2255 | *versionString++=(char)('0'+field/100); |
2256 | field%=100; |
2257 | } |
2258 | if(field>=10) { |
2259 | *versionString++=(char)('0'+field/10); |
2260 | field%=10; |
2261 | } |
2262 | *versionString++=(char)('0'+field); |
2263 | } |
2264 | |
2265 | /* NUL-terminate */ |
2266 | *versionString=0; |
2267 | } |
2268 | |
2269 | U_CAPI void U_EXPORT2 |
2270 | u_getVersion(UVersionInfo versionArray) { |
2271 | (void)copyright; // Suppress unused variable warning from clang. |
2272 | u_versionFromString(versionArray, U_ICU_VERSION); |
2273 | } |
2274 | |
2275 | /** |
2276 | * icucfg.h dependent code |
2277 | */ |
2278 | |
2279 | #if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API |
2280 | |
2281 | #if HAVE_DLFCN_H |
2282 | #ifdef __MVS__ |
2283 | #ifndef __SUSV3 |
2284 | #define __SUSV3 1 |
2285 | #endif |
2286 | #endif |
2287 | #include <dlfcn.h> |
2288 | #endif /* HAVE_DLFCN_H */ |
2289 | |
2290 | U_INTERNAL void * U_EXPORT2 |
2291 | uprv_dl_open(const char *libName, UErrorCode *status) { |
2292 | void *ret = NULL; |
2293 | if(U_FAILURE(*status)) return ret; |
2294 | ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL); |
2295 | if(ret==NULL) { |
2296 | #ifdef U_TRACE_DYLOAD |
2297 | printf("dlerror on dlopen(%s): %s\n" , libName, dlerror()); |
2298 | #endif |
2299 | *status = U_MISSING_RESOURCE_ERROR; |
2300 | } |
2301 | return ret; |
2302 | } |
2303 | |
2304 | U_INTERNAL void U_EXPORT2 |
2305 | uprv_dl_close(void *lib, UErrorCode *status) { |
2306 | if(U_FAILURE(*status)) return; |
2307 | dlclose(lib); |
2308 | } |
2309 | |
2310 | U_INTERNAL UVoidFunction* U_EXPORT2 |
2311 | uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { |
2312 | union { |
2313 | UVoidFunction *fp; |
2314 | void *vp; |
2315 | } uret; |
2316 | uret.fp = NULL; |
2317 | if(U_FAILURE(*status)) return uret.fp; |
2318 | uret.vp = dlsym(lib, sym); |
2319 | if(uret.vp == NULL) { |
2320 | #ifdef U_TRACE_DYLOAD |
2321 | printf("dlerror on dlsym(%p,%s): %s\n" , lib,sym, dlerror()); |
2322 | #endif |
2323 | *status = U_MISSING_RESOURCE_ERROR; |
2324 | } |
2325 | return uret.fp; |
2326 | } |
2327 | |
2328 | #elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API |
2329 | |
2330 | /* Windows API implementation. */ |
2331 | // Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */ |
2332 | |
2333 | U_INTERNAL void * U_EXPORT2 |
2334 | uprv_dl_open(const char *libName, UErrorCode *status) { |
2335 | HMODULE lib = NULL; |
2336 | |
2337 | if(U_FAILURE(*status)) return NULL; |
2338 | |
2339 | lib = LoadLibraryA(libName); |
2340 | |
2341 | if(lib==NULL) { |
2342 | *status = U_MISSING_RESOURCE_ERROR; |
2343 | } |
2344 | |
2345 | return (void*)lib; |
2346 | } |
2347 | |
2348 | U_INTERNAL void U_EXPORT2 |
2349 | uprv_dl_close(void *lib, UErrorCode *status) { |
2350 | HMODULE handle = (HMODULE)lib; |
2351 | if(U_FAILURE(*status)) return; |
2352 | |
2353 | FreeLibrary(handle); |
2354 | |
2355 | return; |
2356 | } |
2357 | |
2358 | U_INTERNAL UVoidFunction* U_EXPORT2 |
2359 | uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { |
2360 | HMODULE handle = (HMODULE)lib; |
2361 | UVoidFunction* addr = NULL; |
2362 | |
2363 | if(U_FAILURE(*status) || lib==NULL) return NULL; |
2364 | |
2365 | addr = (UVoidFunction*)GetProcAddress(handle, sym); |
2366 | |
2367 | if(addr==NULL) { |
2368 | DWORD lastError = GetLastError(); |
2369 | if(lastError == ERROR_PROC_NOT_FOUND) { |
2370 | *status = U_MISSING_RESOURCE_ERROR; |
2371 | } else { |
2372 | *status = U_UNSUPPORTED_ERROR; /* other unknown error. */ |
2373 | } |
2374 | } |
2375 | |
2376 | return addr; |
2377 | } |
2378 | |
2379 | #else |
2380 | |
2381 | /* No dynamic loading, null (nonexistent) implementation. */ |
2382 | |
2383 | U_INTERNAL void * U_EXPORT2 |
2384 | uprv_dl_open(const char *libName, UErrorCode *status) { |
2385 | (void)libName; |
2386 | if(U_FAILURE(*status)) return NULL; |
2387 | *status = U_UNSUPPORTED_ERROR; |
2388 | return NULL; |
2389 | } |
2390 | |
2391 | U_INTERNAL void U_EXPORT2 |
2392 | uprv_dl_close(void *lib, UErrorCode *status) { |
2393 | (void)lib; |
2394 | if(U_FAILURE(*status)) return; |
2395 | *status = U_UNSUPPORTED_ERROR; |
2396 | return; |
2397 | } |
2398 | |
2399 | U_INTERNAL UVoidFunction* U_EXPORT2 |
2400 | uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { |
2401 | (void)lib; |
2402 | (void)sym; |
2403 | if(U_SUCCESS(*status)) { |
2404 | *status = U_UNSUPPORTED_ERROR; |
2405 | } |
2406 | return (UVoidFunction*)NULL; |
2407 | } |
2408 | |
2409 | #endif |
2410 | |
2411 | /* |
2412 | * Hey, Emacs, please set the following: |
2413 | * |
2414 | * Local Variables: |
2415 | * indent-tabs-mode: nil |
2416 | * End: |
2417 | * |
2418 | */ |
2419 | |