1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4******************************************************************************
5*
6* Copyright (C) 1997-2016, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9******************************************************************************
10*
11* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
12*
13* Date Name Description
14* 04/14/97 aliu Creation.
15* 04/24/97 aliu Added getDefaultDataDirectory() and
16* getDefaultLocaleID().
17* 04/28/97 aliu Rewritten to assume Unix and apply general methods
18* for assumed case. Non-UNIX platforms must be
19* special-cased. Rewrote numeric methods dealing
20* with NaN and Infinity to be platform independent
21* over all IEEE 754 platforms.
22* 05/13/97 aliu Restored sign of timezone
23* (semantics are hours West of GMT)
24* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
25* nextDouble..
26* 07/22/98 stephen Added remainder, max, min, trunc
27* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
28* 08/24/98 stephen Added longBitsFromDouble
29* 09/08/98 stephen Minor changes for Mac Port
30* 03/02/99 stephen Removed openFile(). Added AS400 support.
31* Fixed EBCDIC tables
32* 04/15/99 stephen Converted to C.
33* 06/28/99 stephen Removed mutex locking in u_isBigEndian().
34* 08/04/99 jeffrey R. Added OS/2 changes
35* 11/15/99 helena Integrated S/390 IEEE support.
36* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
37* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
38* 01/03/08 Steven L. Fake Time Support
39******************************************************************************
40*/
41
42// Defines _XOPEN_SOURCE for access to POSIX functions.
43// Must be before any other #includes.
44#include "uposixdefs.h"
45
46// First, the platform type. Need this for U_PLATFORM.
47#include "unicode/platform.h"
48
49#if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__
50/* tzset isn't defined in strict ANSI on MinGW. */
51#undef __STRICT_ANSI__
52#endif
53
54/*
55 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
56 */
57#include <time.h>
58
59#if !U_PLATFORM_USES_ONLY_WIN32_API
60#include <sys/time.h>
61#endif
62
63/* include the rest of the ICU headers */
64#include "unicode/putil.h"
65#include "unicode/ustring.h"
66#include "putilimp.h"
67#include "uassert.h"
68#include "umutex.h"
69#include "cmemory.h"
70#include "cstring.h"
71#include "locmap.h"
72#include "ucln_cmn.h"
73#include "charstr.h"
74
75/* Include standard headers. */
76#include <stdio.h>
77#include <stdlib.h>
78#include <string.h>
79#include <math.h>
80#include <locale.h>
81#include <float.h>
82
83#ifndef U_COMMON_IMPLEMENTATION
84#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
85#endif
86
87
88/* include system headers */
89#if U_PLATFORM_USES_ONLY_WIN32_API
90 /*
91 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
92 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
93 * to use native APIs as much as possible?
94 */
95#ifndef WIN32_LEAN_AND_MEAN
96# define WIN32_LEAN_AND_MEAN
97#endif
98# define VC_EXTRALEAN
99# define NOUSER
100# define NOSERVICE
101# define NOIME
102# define NOMCX
103# include <windows.h>
104# include "unicode/uloc.h"
105# include "wintz.h"
106#elif U_PLATFORM == U_PF_OS400
107# include <float.h>
108# include <qusec.h> /* error code structure */
109# include <qusrjobi.h>
110# include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
111# include <mih/testptr.h> /* For uprv_maximumPtr */
112#elif U_PLATFORM == U_PF_OS390
113# include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
114#elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
115# include <limits.h>
116# include <unistd.h>
117# if U_PLATFORM == U_PF_SOLARIS
118# ifndef _XPG4_2
119# define _XPG4_2
120# endif
121# endif
122#elif U_PLATFORM == U_PF_QNX
123# include <sys/neutrino.h>
124#endif
125
126/*
127 * Only include langinfo.h if we have a way to get the codeset. If we later
128 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
129 *
130 */
131
132#if U_HAVE_NL_LANGINFO_CODESET
133#include <langinfo.h>
134#endif
135
136/**
137 * Simple things (presence of functions, etc) should just go in configure.in and be added to
138 * icucfg.h via autoheader.
139 */
140#if U_PLATFORM_IMPLEMENTS_POSIX
141# if U_PLATFORM == U_PF_OS400
142# define HAVE_DLFCN_H 0
143# define HAVE_DLOPEN 0
144# else
145# ifndef HAVE_DLFCN_H
146# define HAVE_DLFCN_H 1
147# endif
148# ifndef HAVE_DLOPEN
149# define HAVE_DLOPEN 1
150# endif
151# endif
152# ifndef HAVE_GETTIMEOFDAY
153# define HAVE_GETTIMEOFDAY 1
154# endif
155#else
156# define HAVE_DLFCN_H 0
157# define HAVE_DLOPEN 0
158# define HAVE_GETTIMEOFDAY 0
159#endif
160
161U_NAMESPACE_USE
162
163/* Define the extension for data files, again... */
164#define DATA_TYPE "dat"
165
166/* Leave this copyright notice here! */
167static const char copyright[] = U_COPYRIGHT_STRING;
168
169/* floating point implementations ------------------------------------------- */
170
171/* We return QNAN rather than SNAN*/
172#define SIGN 0x80000000U
173
174/* Make it easy to define certain types of constants */
175typedef union {
176 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
177 double d64;
178} BitPatternConversion;
179static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
180static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
181
182/*---------------------------------------------------------------------------
183 Platform utilities
184 Our general strategy is to assume we're on a POSIX platform. Platforms which
185 are non-POSIX must declare themselves so. The default POSIX implementation
186 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
187 functions).
188 ---------------------------------------------------------------------------*/
189
190#if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
191# undef U_POSIX_LOCALE
192#else
193# define U_POSIX_LOCALE 1
194#endif
195
196/*
197 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
198 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
199*/
200#if !IEEE_754
201static char*
202u_topNBytesOfDouble(double* d, int n)
203{
204#if U_IS_BIG_ENDIAN
205 return (char*)d;
206#else
207 return (char*)(d + 1) - n;
208#endif
209}
210
211static char*
212u_bottomNBytesOfDouble(double* d, int n)
213{
214#if U_IS_BIG_ENDIAN
215 return (char*)(d + 1) - n;
216#else
217 return (char*)d;
218#endif
219}
220#endif /* !IEEE_754 */
221
222#if IEEE_754
223static UBool
224u_signBit(double d) {
225 uint8_t hiByte;
226#if U_IS_BIG_ENDIAN
227 hiByte = *(uint8_t *)&d;
228#else
229 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
230#endif
231 return (hiByte & 0x80) != 0;
232}
233#endif
234
235
236
237#if defined (U_DEBUG_FAKETIME)
238/* Override the clock to test things without having to move the system clock.
239 * Assumes POSIX gettimeofday() will function
240 */
241UDate fakeClock_t0 = 0; /** Time to start the clock from **/
242UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
243UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
244
245static UDate getUTCtime_real() {
246 struct timeval posixTime;
247 gettimeofday(&posixTime, NULL);
248 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
249}
250
251static UDate getUTCtime_fake() {
252 static UMutex fakeClockMutex;
253 umtx_lock(&fakeClockMutex);
254 if(!fakeClock_set) {
255 UDate real = getUTCtime_real();
256 const char *fake_start = getenv("U_FAKETIME_START");
257 if((fake_start!=NULL) && (fake_start[0]!=0)) {
258 sscanf(fake_start,"%lf",&fakeClock_t0);
259 fakeClock_dt = fakeClock_t0 - real;
260 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
261 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
262 fakeClock_t0, fake_start, fakeClock_dt, real);
263 } else {
264 fakeClock_dt = 0;
265 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
266 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
267 }
268 fakeClock_set = TRUE;
269 }
270 umtx_unlock(&fakeClockMutex);
271
272 return getUTCtime_real() + fakeClock_dt;
273}
274#endif
275
276#if U_PLATFORM_USES_ONLY_WIN32_API
277typedef union {
278 int64_t int64;
279 FILETIME fileTime;
280} FileTimeConversion; /* This is like a ULARGE_INTEGER */
281
282/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
283#define EPOCH_BIAS INT64_C(116444736000000000)
284#define HECTONANOSECOND_PER_MILLISECOND 10000
285
286#endif
287
288/*---------------------------------------------------------------------------
289 Universal Implementations
290 These are designed to work on all platforms. Try these, and if they
291 don't work on your platform, then special case your platform with new
292 implementations.
293---------------------------------------------------------------------------*/
294
295U_CAPI UDate U_EXPORT2
296uprv_getUTCtime()
297{
298#if defined(U_DEBUG_FAKETIME)
299 return getUTCtime_fake(); /* Hook for overriding the clock */
300#else
301 return uprv_getRawUTCtime();
302#endif
303}
304
305/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
306U_CAPI UDate U_EXPORT2
307uprv_getRawUTCtime()
308{
309#if U_PLATFORM_USES_ONLY_WIN32_API
310
311 FileTimeConversion winTime;
312 GetSystemTimeAsFileTime(&winTime.fileTime);
313 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
314#else
315
316#if HAVE_GETTIMEOFDAY
317 struct timeval posixTime;
318 gettimeofday(&posixTime, NULL);
319 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
320#else
321 time_t epochtime;
322 time(&epochtime);
323 return (UDate)epochtime * U_MILLIS_PER_SECOND;
324#endif
325
326#endif
327}
328
329/*-----------------------------------------------------------------------------
330 IEEE 754
331 These methods detect and return NaN and infinity values for doubles
332 conforming to IEEE 754. Platforms which support this standard include X86,
333 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
334 If this doesn't work on your platform, you have non-IEEE floating-point, and
335 will need to code your own versions. A naive implementation is to return 0.0
336 for getNaN and getInfinity, and false for isNaN and isInfinite.
337 ---------------------------------------------------------------------------*/
338
339U_CAPI UBool U_EXPORT2
340uprv_isNaN(double number)
341{
342#if IEEE_754
343 BitPatternConversion convertedNumber;
344 convertedNumber.d64 = number;
345 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
346 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
347
348#elif U_PLATFORM == U_PF_OS390
349 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
350 sizeof(uint32_t));
351 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
352 sizeof(uint32_t));
353
354 return ((highBits & 0x7F080000L) == 0x7F080000L) &&
355 (lowBits == 0x00000000L);
356
357#else
358 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
359 /* you'll need to replace this default implementation with what's correct*/
360 /* for your platform.*/
361 return number != number;
362#endif
363}
364
365U_CAPI UBool U_EXPORT2
366uprv_isInfinite(double number)
367{
368#if IEEE_754
369 BitPatternConversion convertedNumber;
370 convertedNumber.d64 = number;
371 /* Infinity is exactly 0x7FF0000000000000U. */
372 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
373#elif U_PLATFORM == U_PF_OS390
374 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
375 sizeof(uint32_t));
376 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
377 sizeof(uint32_t));
378
379 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
380
381#else
382 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
383 /* value, you'll need to replace this default implementation with what's*/
384 /* correct for your platform.*/
385 return number == (2.0 * number);
386#endif
387}
388
389U_CAPI UBool U_EXPORT2
390uprv_isPositiveInfinity(double number)
391{
392#if IEEE_754 || U_PLATFORM == U_PF_OS390
393 return (UBool)(number > 0 && uprv_isInfinite(number));
394#else
395 return uprv_isInfinite(number);
396#endif
397}
398
399U_CAPI UBool U_EXPORT2
400uprv_isNegativeInfinity(double number)
401{
402#if IEEE_754 || U_PLATFORM == U_PF_OS390
403 return (UBool)(number < 0 && uprv_isInfinite(number));
404
405#else
406 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
407 sizeof(uint32_t));
408 return((highBits & SIGN) && uprv_isInfinite(number));
409
410#endif
411}
412
413U_CAPI double U_EXPORT2
414uprv_getNaN()
415{
416#if IEEE_754 || U_PLATFORM == U_PF_OS390
417 return gNan.d64;
418#else
419 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
420 /* you'll need to replace this default implementation with what's correct*/
421 /* for your platform.*/
422 return 0.0;
423#endif
424}
425
426U_CAPI double U_EXPORT2
427uprv_getInfinity()
428{
429#if IEEE_754 || U_PLATFORM == U_PF_OS390
430 return gInf.d64;
431#else
432 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
433 /* value, you'll need to replace this default implementation with what's*/
434 /* correct for your platform.*/
435 return 0.0;
436#endif
437}
438
439U_CAPI double U_EXPORT2
440uprv_floor(double x)
441{
442 return floor(x);
443}
444
445U_CAPI double U_EXPORT2
446uprv_ceil(double x)
447{
448 return ceil(x);
449}
450
451U_CAPI double U_EXPORT2
452uprv_round(double x)
453{
454 return uprv_floor(x + 0.5);
455}
456
457U_CAPI double U_EXPORT2
458uprv_fabs(double x)
459{
460 return fabs(x);
461}
462
463U_CAPI double U_EXPORT2
464uprv_modf(double x, double* y)
465{
466 return modf(x, y);
467}
468
469U_CAPI double U_EXPORT2
470uprv_fmod(double x, double y)
471{
472 return fmod(x, y);
473}
474
475U_CAPI double U_EXPORT2
476uprv_pow(double x, double y)
477{
478 /* This is declared as "double pow(double x, double y)" */
479 return pow(x, y);
480}
481
482U_CAPI double U_EXPORT2
483uprv_pow10(int32_t x)
484{
485 return pow(10.0, (double)x);
486}
487
488U_CAPI double U_EXPORT2
489uprv_fmax(double x, double y)
490{
491#if IEEE_754
492 /* first handle NaN*/
493 if(uprv_isNaN(x) || uprv_isNaN(y))
494 return uprv_getNaN();
495
496 /* check for -0 and 0*/
497 if(x == 0.0 && y == 0.0 && u_signBit(x))
498 return y;
499
500#endif
501
502 /* this should work for all flt point w/o NaN and Inf special cases */
503 return (x > y ? x : y);
504}
505
506U_CAPI double U_EXPORT2
507uprv_fmin(double x, double y)
508{
509#if IEEE_754
510 /* first handle NaN*/
511 if(uprv_isNaN(x) || uprv_isNaN(y))
512 return uprv_getNaN();
513
514 /* check for -0 and 0*/
515 if(x == 0.0 && y == 0.0 && u_signBit(y))
516 return y;
517
518#endif
519
520 /* this should work for all flt point w/o NaN and Inf special cases */
521 return (x > y ? y : x);
522}
523
524U_CAPI UBool U_EXPORT2
525uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) {
526 // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow.
527 // This function could be optimized by calling one of those primitives.
528 auto a64 = static_cast<int64_t>(a);
529 auto b64 = static_cast<int64_t>(b);
530 int64_t res64 = a64 + b64;
531 *res = static_cast<int32_t>(res64);
532 return res64 != *res;
533}
534
535U_CAPI UBool U_EXPORT2
536uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) {
537 // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow.
538 // This function could be optimized by calling one of those primitives.
539 auto a64 = static_cast<int64_t>(a);
540 auto b64 = static_cast<int64_t>(b);
541 int64_t res64 = a64 * b64;
542 *res = static_cast<int32_t>(res64);
543 return res64 != *res;
544}
545
546/**
547 * Truncates the given double.
548 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
549 * This is different than calling floor() or ceil():
550 * floor(3.3) = 3, floor(-3.3) = -4
551 * ceil(3.3) = 4, ceil(-3.3) = -3
552 */
553U_CAPI double U_EXPORT2
554uprv_trunc(double d)
555{
556#if IEEE_754
557 /* handle error cases*/
558 if(uprv_isNaN(d))
559 return uprv_getNaN();
560 if(uprv_isInfinite(d))
561 return uprv_getInfinity();
562
563 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */
564 return ceil(d);
565 else
566 return floor(d);
567
568#else
569 return d >= 0 ? floor(d) : ceil(d);
570
571#endif
572}
573
574/**
575 * Return the largest positive number that can be represented by an integer
576 * type of arbitrary bit length.
577 */
578U_CAPI double U_EXPORT2
579uprv_maxMantissa(void)
580{
581 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
582}
583
584U_CAPI double U_EXPORT2
585uprv_log(double d)
586{
587 return log(d);
588}
589
590U_CAPI void * U_EXPORT2
591uprv_maximumPtr(void * base)
592{
593#if U_PLATFORM == U_PF_OS400
594 /*
595 * With the provided function we should never be out of range of a given segment
596 * (a traditional/typical segment that is). Our segments have 5 bytes for the
597 * id and 3 bytes for the offset. The key is that the casting takes care of
598 * only retrieving the offset portion minus x1000. Hence, the smallest offset
599 * seen in a program is x001000 and when casted to an int would be 0.
600 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
601 *
602 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
603 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
604 * This function determines the activation based on the pointer that is passed in and
605 * calculates the appropriate maximum available size for
606 * each pointer type (TERASPACE and non-TERASPACE)
607 *
608 * Unlike other operating systems, the pointer model isn't determined at
609 * compile time on i5/OS.
610 */
611 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
612 /* if it is a TERASPACE pointer the max is 2GB - 4k */
613 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
614 }
615 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
616 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
617
618#else
619 return U_MAX_PTR(base);
620#endif
621}
622
623/*---------------------------------------------------------------------------
624 Platform-specific Implementations
625 Try these, and if they don't work on your platform, then special case your
626 platform with new implementations.
627 ---------------------------------------------------------------------------*/
628
629/* Generic time zone layer -------------------------------------------------- */
630
631/* Time zone utilities */
632U_CAPI void U_EXPORT2
633uprv_tzset()
634{
635#if defined(U_TZSET)
636 U_TZSET();
637#else
638 /* no initialization*/
639#endif
640}
641
642U_CAPI int32_t U_EXPORT2
643uprv_timezone()
644{
645#ifdef U_TIMEZONE
646 return U_TIMEZONE;
647#else
648 time_t t, t1, t2;
649 struct tm tmrec;
650 int32_t tdiff = 0;
651
652 time(&t);
653 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
654#if U_PLATFORM != U_PF_IPHONE
655 UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
656#endif
657 t1 = mktime(&tmrec); /* local time in seconds*/
658 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
659 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
660 tdiff = t2 - t1;
661
662#if U_PLATFORM != U_PF_IPHONE
663 /* imitate NT behaviour, which returns same timezone offset to GMT for
664 winter and summer.
665 This does not work on all platforms. For instance, on glibc on Linux
666 and on Mac OS 10.5, tdiff calculated above remains the same
667 regardless of whether DST is in effect or not. iOS is another
668 platform where this does not work. Linux + glibc and Mac OS 10.5
669 have U_TIMEZONE defined so that this code is not reached.
670 */
671 if (dst_checked)
672 tdiff += 3600;
673#endif
674 return tdiff;
675#endif
676}
677
678/* Note that U_TZNAME does *not* have to be tzname, but if it is,
679 some platforms need to have it declared here. */
680
681#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED)
682/* RS6000 and others reject char **tzname. */
683extern U_IMPORT char *U_TZNAME[];
684#endif
685
686#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
687/* These platforms are likely to use Olson timezone IDs. */
688/* common targets of the symbolic link at TZDEFAULT are:
689 * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12
690 * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12
691 * "/usr/share/lib/zoneinfo/<olsonID>" Solaris
692 * "../usr/share/lib/zoneinfo/<olsonID>" Solaris
693 * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
694 * To avoid checking lots of paths, just check that the target path
695 * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid.
696 */
697
698#define CHECK_LOCALTIME_LINK 1
699#if U_PLATFORM_IS_DARWIN_BASED
700#include <tzfile.h>
701#define TZZONEINFO (TZDIR "/")
702#elif U_PLATFORM == U_PF_SOLARIS
703#define TZDEFAULT "/etc/localtime"
704#define TZZONEINFO "/usr/share/lib/zoneinfo/"
705#define TZ_ENV_CHECK "localtime"
706#else
707#define TZDEFAULT "/etc/localtime"
708#define TZZONEINFO "/usr/share/zoneinfo/"
709#endif
710#define TZZONEINFOTAIL "/zoneinfo/"
711#if U_HAVE_DIRENT_H
712#define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
713/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
714 symlinked to /etc/localtime, which makes searchForTZFile return
715 'localtime' when it's the first match. */
716#define TZFILE_SKIP2 "localtime"
717#define SEARCH_TZFILE
718#include <dirent.h> /* Needed to search through system timezone files */
719#endif
720static char gTimeZoneBuffer[PATH_MAX];
721static char *gTimeZoneBufferPtr = NULL;
722#endif
723
724#if !U_PLATFORM_USES_ONLY_WIN32_API
725#define isNonDigit(ch) (ch < '0' || '9' < ch)
726static UBool isValidOlsonID(const char *id) {
727 int32_t idx = 0;
728
729 /* Determine if this is something like Iceland (Olson ID)
730 or AST4ADT (non-Olson ID) */
731 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
732 idx++;
733 }
734
735 /* If we went through the whole string, then it might be okay.
736 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
737 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
738 The rest of the time it could be an Olson ID. George */
739 return (UBool)(id[idx] == 0
740 || uprv_strcmp(id, "PST8PDT") == 0
741 || uprv_strcmp(id, "MST7MDT") == 0
742 || uprv_strcmp(id, "CST6CDT") == 0
743 || uprv_strcmp(id, "EST5EDT") == 0);
744}
745
746/* On some Unix-like OS, 'posix' subdirectory in
747 /usr/share/zoneinfo replicates the top-level contents. 'right'
748 subdirectory has the same set of files, but individual files
749 are different from those in the top-level directory or 'posix'
750 because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
751 has files for UTC.
752 When the first match for /etc/localtime is in either of them
753 (usually in posix because 'right' has different file contents),
754 or TZ environment variable points to one of them, createTimeZone
755 fails because, say, 'posix/America/New_York' is not an Olson
756 timezone id ('America/New_York' is). So, we have to skip
757 'posix/' and 'right/' at the beginning. */
758static void skipZoneIDPrefix(const char** id) {
759 if (uprv_strncmp(*id, "posix/", 6) == 0
760 || uprv_strncmp(*id, "right/", 6) == 0)
761 {
762 *id += 6;
763 }
764}
765#endif
766
767#if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
768
769#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
770typedef struct OffsetZoneMapping {
771 int32_t offsetSeconds;
772 int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
773 const char *stdID;
774 const char *dstID;
775 const char *olsonID;
776} OffsetZoneMapping;
777
778enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
779
780/*
781This list tries to disambiguate a set of abbreviated timezone IDs and offsets
782and maps it to an Olson ID.
783Before adding anything to this list, take a look at
784icu/source/tools/tzcode/tz.alias
785Sometimes no daylight savings (0) is important to define due to aliases.
786This list can be tested with icu/source/test/compat/tzone.pl
787More values could be added to daylightType to increase precision.
788*/
789static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
790 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
791 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
792 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
793 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
794 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
795 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
796 {-36000, 2, "EST", "EST", "Australia/Sydney"},
797 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
798 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
799 {-34200, 2, "CST", "CST", "Australia/South"},
800 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
801 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
802 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
803 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
804 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
805 {-28800, 2, "WST", "WST", "Australia/West"},
806 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
807 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
808 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
809 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
810 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
811 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
812 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
813 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
814 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
815 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
816 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
817 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
818 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
819 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
820 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
821 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
822 {0, 1, "GMT", "IST", "Europe/Dublin"},
823 {0, 1, "GMT", "BST", "Europe/London"},
824 {0, 0, "WET", "WEST", "Africa/Casablanca"},
825 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
826 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
827 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
828 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
829 {10800, 2, "UYT", "UYST", "America/Montevideo"},
830 {10800, 1, "WGT", "WGST", "America/Godthab"},
831 {10800, 2, "BRT", "BRST", "Brazil/East"},
832 {12600, 1, "NST", "NDT", "America/St_Johns"},
833 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
834 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
835 {14400, 2, "CLT", "CLST", "Chile/Continental"},
836 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
837 {14400, 2, "PYT", "PYST", "America/Asuncion"},
838 {18000, 1, "CST", "CDT", "America/Havana"},
839 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
840 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
841 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
842 {21600, 0, "CST", "CDT", "America/Guatemala"},
843 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
844 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
845 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
846 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
847 {32400, 1, "AKST", "AKDT", "US/Alaska"},
848 {36000, 1, "HAST", "HADT", "US/Aleutian"}
849};
850
851/*#define DEBUG_TZNAME*/
852
853static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
854{
855 int32_t idx;
856#ifdef DEBUG_TZNAME
857 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
858#endif
859 for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
860 {
861 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
862 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
863 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
864 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
865 {
866 return OFFSET_ZONE_MAPPINGS[idx].olsonID;
867 }
868 }
869 return NULL;
870}
871#endif
872
873#ifdef SEARCH_TZFILE
874#define MAX_READ_SIZE 512
875
876typedef struct DefaultTZInfo {
877 char* defaultTZBuffer;
878 int64_t defaultTZFileSize;
879 FILE* defaultTZFilePtr;
880 UBool defaultTZstatus;
881 int32_t defaultTZPosition;
882} DefaultTZInfo;
883
884/*
885 * This method compares the two files given to see if they are a match.
886 * It is currently use to compare two TZ files.
887 */
888static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
889 FILE* file;
890 int64_t sizeFile;
891 int64_t sizeFileLeft;
892 int32_t sizeFileRead;
893 int32_t sizeFileToRead;
894 char bufferFile[MAX_READ_SIZE];
895 UBool result = TRUE;
896
897 if (tzInfo->defaultTZFilePtr == NULL) {
898 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
899 }
900 file = fopen(TZFileName, "r");
901
902 tzInfo->defaultTZPosition = 0; /* reset position to begin search */
903
904 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
905 /* First check that the file size are equal. */
906 if (tzInfo->defaultTZFileSize == 0) {
907 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
908 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
909 }
910 fseek(file, 0, SEEK_END);
911 sizeFile = ftell(file);
912 sizeFileLeft = sizeFile;
913
914 if (sizeFile != tzInfo->defaultTZFileSize) {
915 result = FALSE;
916 } else {
917 /* Store the data from the files in seperate buffers and
918 * compare each byte to determine equality.
919 */
920 if (tzInfo->defaultTZBuffer == NULL) {
921 rewind(tzInfo->defaultTZFilePtr);
922 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
923 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
924 }
925 rewind(file);
926 while(sizeFileLeft > 0) {
927 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
928 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
929
930 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
931 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
932 result = FALSE;
933 break;
934 }
935 sizeFileLeft -= sizeFileRead;
936 tzInfo->defaultTZPosition += sizeFileRead;
937 }
938 }
939 } else {
940 result = FALSE;
941 }
942
943 if (file != NULL) {
944 fclose(file);
945 }
946
947 return result;
948}
949
950
951/* dirent also lists two entries: "." and ".." that we can safely ignore. */
952#define SKIP1 "."
953#define SKIP2 ".."
954static UBool U_CALLCONV putil_cleanup(void);
955static CharString *gSearchTZFileResult = NULL;
956
957/*
958 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
959 * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
960 */
961static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
962 DIR* dirp = NULL;
963 struct dirent* dirEntry = NULL;
964 char* result = NULL;
965 UErrorCode status = U_ZERO_ERROR;
966
967 /* Save the current path */
968 CharString curpath(path, -1, status);
969 if (U_FAILURE(status)) {
970 goto cleanupAndReturn;
971 }
972
973 dirp = opendir(path);
974 if (dirp == NULL) {
975 goto cleanupAndReturn;
976 }
977
978 if (gSearchTZFileResult == NULL) {
979 gSearchTZFileResult = new CharString;
980 if (gSearchTZFileResult == NULL) {
981 goto cleanupAndReturn;
982 }
983 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
984 }
985
986 /* Check each entry in the directory. */
987 while((dirEntry = readdir(dirp)) != NULL) {
988 const char* dirName = dirEntry->d_name;
989 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0
990 && uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
991 /* Create a newpath with the new entry to test each entry in the directory. */
992 CharString newpath(curpath, status);
993 newpath.append(dirName, -1, status);
994 if (U_FAILURE(status)) {
995 break;
996 }
997
998 DIR* subDirp = NULL;
999 if ((subDirp = opendir(newpath.data())) != NULL) {
1000 /* If this new path is a directory, make a recursive call with the newpath. */
1001 closedir(subDirp);
1002 newpath.append('/', status);
1003 if (U_FAILURE(status)) {
1004 break;
1005 }
1006 result = searchForTZFile(newpath.data(), tzInfo);
1007 /*
1008 Have to get out here. Otherwise, we'd keep looking
1009 and return the first match in the top-level directory
1010 if there's a match in the top-level. If not, this function
1011 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
1012 It worked without this in most cases because we have a fallback of calling
1013 localtime_r to figure out the default timezone.
1014 */
1015 if (result != NULL)
1016 break;
1017 } else {
1018 if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) {
1019 int32_t amountToSkip = sizeof(TZZONEINFO) - 1;
1020 if (amountToSkip > newpath.length()) {
1021 amountToSkip = newpath.length();
1022 }
1023 const char* zoneid = newpath.data() + amountToSkip;
1024 skipZoneIDPrefix(&zoneid);
1025 gSearchTZFileResult->clear();
1026 gSearchTZFileResult->append(zoneid, -1, status);
1027 if (U_FAILURE(status)) {
1028 break;
1029 }
1030 result = gSearchTZFileResult->data();
1031 /* Get out after the first one found. */
1032 break;
1033 }
1034 }
1035 }
1036 }
1037
1038 cleanupAndReturn:
1039 if (dirp) {
1040 closedir(dirp);
1041 }
1042 return result;
1043}
1044#endif
1045
1046U_CAPI void U_EXPORT2
1047uprv_tzname_clear_cache()
1048{
1049#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1050 gTimeZoneBufferPtr = NULL;
1051#endif
1052}
1053
1054U_CAPI const char* U_EXPORT2
1055uprv_tzname(int n)
1056{
1057 (void)n; // Avoid unreferenced parameter warning.
1058 const char *tzid = NULL;
1059#if U_PLATFORM_USES_ONLY_WIN32_API
1060 tzid = uprv_detectWindowsTimeZone();
1061
1062 if (tzid != NULL) {
1063 return tzid;
1064 }
1065
1066#ifndef U_TZNAME
1067 // The return value is free'd in timezone.cpp on Windows because
1068 // the other code path returns a pointer to a heap location.
1069 // If we don't have a name already, then tzname wouldn't be any
1070 // better, so just fall back.
1071 return uprv_strdup("");
1072#endif // !U_TZNAME
1073
1074#else
1075
1076/*#if U_PLATFORM_IS_DARWIN_BASED
1077 int ret;
1078
1079 tzid = getenv("TZFILE");
1080 if (tzid != NULL) {
1081 return tzid;
1082 }
1083#endif*/
1084
1085/* This code can be temporarily disabled to test tzname resolution later on. */
1086#ifndef DEBUG_TZNAME
1087 tzid = getenv("TZ");
1088 if (tzid != NULL && isValidOlsonID(tzid)
1089#if U_PLATFORM == U_PF_SOLARIS
1090 /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
1091 && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
1092#endif
1093 ) {
1094 /* The colon forces tzset() to treat the remainder as zoneinfo path */
1095 if (tzid[0] == ':') {
1096 tzid++;
1097 }
1098 /* This might be a good Olson ID. */
1099 skipZoneIDPrefix(&tzid);
1100 return tzid;
1101 }
1102 /* else U_TZNAME will give a better result. */
1103#endif
1104
1105#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1106 /* Caller must handle threading issues */
1107 if (gTimeZoneBufferPtr == NULL) {
1108 /*
1109 This is a trick to look at the name of the link to get the Olson ID
1110 because the tzfile contents is underspecified.
1111 This isn't guaranteed to work because it may not be a symlink.
1112 */
1113 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
1114 if (0 < ret) {
1115 int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL);
1116 gTimeZoneBuffer[ret] = 0;
1117 char * tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
1118
1119 if (tzZoneInfoTailPtr != NULL
1120 && isValidOlsonID(tzZoneInfoTailPtr + tzZoneInfoTailLen))
1121 {
1122 return (gTimeZoneBufferPtr = tzZoneInfoTailPtr + tzZoneInfoTailLen);
1123 }
1124 } else {
1125#if defined(SEARCH_TZFILE)
1126 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1127 if (tzInfo != NULL) {
1128 tzInfo->defaultTZBuffer = NULL;
1129 tzInfo->defaultTZFileSize = 0;
1130 tzInfo->defaultTZFilePtr = NULL;
1131 tzInfo->defaultTZstatus = FALSE;
1132 tzInfo->defaultTZPosition = 0;
1133
1134 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1135
1136 /* Free previously allocated memory */
1137 if (tzInfo->defaultTZBuffer != NULL) {
1138 uprv_free(tzInfo->defaultTZBuffer);
1139 }
1140 if (tzInfo->defaultTZFilePtr != NULL) {
1141 fclose(tzInfo->defaultTZFilePtr);
1142 }
1143 uprv_free(tzInfo);
1144 }
1145
1146 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1147 return gTimeZoneBufferPtr;
1148 }
1149#endif
1150 }
1151 }
1152 else {
1153 return gTimeZoneBufferPtr;
1154 }
1155#endif
1156#endif
1157
1158#ifdef U_TZNAME
1159#if U_PLATFORM_USES_ONLY_WIN32_API
1160 /* The return value is free'd in timezone.cpp on Windows because
1161 * the other code path returns a pointer to a heap location. */
1162 return uprv_strdup(U_TZNAME[n]);
1163#else
1164 /*
1165 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1166 So we remap the abbreviation to an olson ID.
1167
1168 Since Windows exposes a little more timezone information,
1169 we normally don't use this code on Windows because
1170 uprv_detectWindowsTimeZone should have already given the correct answer.
1171 */
1172 {
1173 struct tm juneSol, decemberSol;
1174 int daylightType;
1175 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1176 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1177
1178 /* This probing will tell us when daylight savings occurs. */
1179 localtime_r(&juneSolstice, &juneSol);
1180 localtime_r(&decemberSolstice, &decemberSol);
1181 if(decemberSol.tm_isdst > 0) {
1182 daylightType = U_DAYLIGHT_DECEMBER;
1183 } else if(juneSol.tm_isdst > 0) {
1184 daylightType = U_DAYLIGHT_JUNE;
1185 } else {
1186 daylightType = U_DAYLIGHT_NONE;
1187 }
1188 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1189 if (tzid != NULL) {
1190 return tzid;
1191 }
1192 }
1193 return U_TZNAME[n];
1194#endif
1195#else
1196 return "";
1197#endif
1198}
1199
1200/* Get and set the ICU data directory --------------------------------------- */
1201
1202static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER;
1203static char *gDataDirectory = NULL;
1204
1205UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER;
1206static CharString *gTimeZoneFilesDirectory = NULL;
1207
1208#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1209 static const char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */
1210 static bool gCorrectedPOSIXLocaleHeapAllocated = false;
1211#endif
1212
1213static UBool U_CALLCONV putil_cleanup(void)
1214{
1215 if (gDataDirectory && *gDataDirectory) {
1216 uprv_free(gDataDirectory);
1217 }
1218 gDataDirectory = NULL;
1219 gDataDirInitOnce.reset();
1220
1221 delete gTimeZoneFilesDirectory;
1222 gTimeZoneFilesDirectory = NULL;
1223 gTimeZoneFilesInitOnce.reset();
1224
1225#ifdef SEARCH_TZFILE
1226 delete gSearchTZFileResult;
1227 gSearchTZFileResult = NULL;
1228#endif
1229
1230#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1231 if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) {
1232 uprv_free(const_cast<char *>(gCorrectedPOSIXLocale));
1233 gCorrectedPOSIXLocale = NULL;
1234 gCorrectedPOSIXLocaleHeapAllocated = false;
1235 }
1236#endif
1237 return TRUE;
1238}
1239
1240/*
1241 * Set the data directory.
1242 * Make a copy of the passed string, and set the global data dir to point to it.
1243 */
1244U_CAPI void U_EXPORT2
1245u_setDataDirectory(const char *directory) {
1246 char *newDataDir;
1247 int32_t length;
1248
1249 if(directory==NULL || *directory==0) {
1250 /* A small optimization to prevent the malloc and copy when the
1251 shared library is used, and this is a way to make sure that NULL
1252 is never returned.
1253 */
1254 newDataDir = (char *)"";
1255 }
1256 else {
1257 length=(int32_t)uprv_strlen(directory);
1258 newDataDir = (char *)uprv_malloc(length + 2);
1259 /* Exit out if newDataDir could not be created. */
1260 if (newDataDir == NULL) {
1261 return;
1262 }
1263 uprv_strcpy(newDataDir, directory);
1264
1265#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1266 {
1267 char *p;
1268 while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != NULL) {
1269 *p = U_FILE_SEP_CHAR;
1270 }
1271 }
1272#endif
1273 }
1274
1275 if (gDataDirectory && *gDataDirectory) {
1276 uprv_free(gDataDirectory);
1277 }
1278 gDataDirectory = newDataDir;
1279 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1280}
1281
1282U_CAPI UBool U_EXPORT2
1283uprv_pathIsAbsolute(const char *path)
1284{
1285 if(!path || !*path) {
1286 return FALSE;
1287 }
1288
1289 if(*path == U_FILE_SEP_CHAR) {
1290 return TRUE;
1291 }
1292
1293#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1294 if(*path == U_FILE_ALT_SEP_CHAR) {
1295 return TRUE;
1296 }
1297#endif
1298
1299#if U_PLATFORM_USES_ONLY_WIN32_API
1300 if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1301 ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1302 path[1] == ':' ) {
1303 return TRUE;
1304 }
1305#endif
1306
1307 return FALSE;
1308}
1309
1310/* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1311 (needed for some Darwin ICU build environments) */
1312#if U_PLATFORM_IS_DARWIN_BASED && TARGET_OS_SIMULATOR
1313# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1314# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1315# endif
1316#endif
1317
1318#if defined(ICU_DATA_DIR_WINDOWS)
1319// Helper function to get the ICU Data Directory under the Windows directory location.
1320static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength)
1321{
1322 wchar_t windowsPath[MAX_PATH];
1323 char windowsPathUtf8[MAX_PATH];
1324
1325 UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath));
1326 if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) {
1327 // Convert UTF-16 to a UTF-8 string.
1328 UErrorCode status = U_ZERO_ERROR;
1329 int32_t windowsPathUtf8Len = 0;
1330 u_strToUTF8(windowsPathUtf8, static_cast<int32_t>(UPRV_LENGTHOF(windowsPathUtf8)),
1331 &windowsPathUtf8Len, reinterpret_cast<const UChar*>(windowsPath), -1, &status);
1332
1333 if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) &&
1334 (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) {
1335 // Ensure it always has a separator, so we can append the ICU data path.
1336 if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) {
1337 windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR;
1338 windowsPathUtf8[windowsPathUtf8Len] = '\0';
1339 }
1340 // Check if the concatenated string will fit.
1341 if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) {
1342 uprv_strcpy(directoryBuffer, windowsPathUtf8);
1343 uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS);
1344 return TRUE;
1345 }
1346 }
1347 }
1348
1349 return FALSE;
1350}
1351#endif
1352
1353static void U_CALLCONV dataDirectoryInitFn() {
1354 /* If we already have the directory, then return immediately. Will happen if user called
1355 * u_setDataDirectory().
1356 */
1357 if (gDataDirectory) {
1358 return;
1359 }
1360
1361 const char *path = NULL;
1362#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1363 char datadir_path_buffer[PATH_MAX];
1364#endif
1365
1366 /*
1367 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1368 override ICU's data with the ICU_DATA environment variable. This prevents
1369 problems where multiple custom copies of ICU's specific version of data
1370 are installed on a system. Either the application must define the data
1371 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1372 ICU, set the data with udata_setCommonData or trust that all of the
1373 required data is contained in ICU's data library that contains
1374 the entry point defined by U_ICUDATA_ENTRY_POINT.
1375
1376 There may also be some platforms where environment variables
1377 are not allowed.
1378 */
1379# if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1380 /* First try to get the environment variable */
1381# if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv
1382 path=getenv("ICU_DATA");
1383# endif
1384# endif
1385
1386 /* ICU_DATA_DIR may be set as a compile option.
1387 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1388 * and is used only when data is built in archive mode eliminating the need
1389 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1390 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1391 * set their own path.
1392 */
1393#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1394 if(path==NULL || *path==0) {
1395# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1396 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1397# endif
1398# ifdef ICU_DATA_DIR
1399 path=ICU_DATA_DIR;
1400# else
1401 path=U_ICU_DATA_DEFAULT_DIR;
1402# endif
1403# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1404 if (prefix != NULL) {
1405 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1406 path=datadir_path_buffer;
1407 }
1408# endif
1409 }
1410#endif
1411
1412#if defined(ICU_DATA_DIR_WINDOWS)
1413 char datadir_path_buffer[MAX_PATH];
1414 if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
1415 path = datadir_path_buffer;
1416 }
1417#endif
1418
1419 if(path==NULL) {
1420 /* It looks really bad, set it to something. */
1421 path = "";
1422 }
1423
1424 u_setDataDirectory(path);
1425 return;
1426}
1427
1428U_CAPI const char * U_EXPORT2
1429u_getDataDirectory(void) {
1430 umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn);
1431 return gDataDirectory;
1432}
1433
1434static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
1435 if (U_FAILURE(status)) {
1436 return;
1437 }
1438 gTimeZoneFilesDirectory->clear();
1439 gTimeZoneFilesDirectory->append(path, status);
1440#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1441 char *p = gTimeZoneFilesDirectory->data();
1442 while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != NULL) {
1443 *p = U_FILE_SEP_CHAR;
1444 }
1445#endif
1446}
1447
1448#define TO_STRING(x) TO_STRING_2(x)
1449#define TO_STRING_2(x) #x
1450
1451static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
1452 U_ASSERT(gTimeZoneFilesDirectory == NULL);
1453 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1454 gTimeZoneFilesDirectory = new CharString();
1455 if (gTimeZoneFilesDirectory == NULL) {
1456 status = U_MEMORY_ALLOCATION_ERROR;
1457 return;
1458 }
1459
1460 const char *dir = "";
1461
1462#if U_PLATFORM_HAS_WINUWP_API == 1
1463// The UWP version does not support the environment variable setting.
1464
1465# if defined(ICU_DATA_DIR_WINDOWS)
1466 // When using the Windows system data, we can possibly pick up time zone data from the Windows directory.
1467 char datadir_path_buffer[MAX_PATH];
1468 if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
1469 dir = datadir_path_buffer;
1470 }
1471# endif
1472
1473#else
1474 dir = getenv("ICU_TIMEZONE_FILES_DIR");
1475#endif // U_PLATFORM_HAS_WINUWP_API
1476
1477#if defined(U_TIMEZONE_FILES_DIR)
1478 if (dir == NULL) {
1479 // Build time configuration setting.
1480 dir = TO_STRING(U_TIMEZONE_FILES_DIR);
1481 }
1482#endif
1483
1484 if (dir == NULL) {
1485 dir = "";
1486 }
1487
1488 setTimeZoneFilesDir(dir, status);
1489}
1490
1491
1492U_CAPI const char * U_EXPORT2
1493u_getTimeZoneFilesDirectory(UErrorCode *status) {
1494 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1495 return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "";
1496}
1497
1498U_CAPI void U_EXPORT2
1499u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) {
1500 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1501 setTimeZoneFilesDir(path, *status);
1502
1503 // Note: this function does some extra churn, first setting based on the
1504 // environment, then immediately replacing with the value passed in.
1505 // The logic is simpler that way, and performance shouldn't be an issue.
1506}
1507
1508
1509#if U_POSIX_LOCALE
1510/* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1511 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1512 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1513 */
1514static const char *uprv_getPOSIXIDForCategory(int category)
1515{
1516 const char* posixID = NULL;
1517 if (category == LC_MESSAGES || category == LC_CTYPE) {
1518 /*
1519 * On Solaris two different calls to setlocale can result in
1520 * different values. Only get this value once.
1521 *
1522 * We must check this first because an application can set this.
1523 *
1524 * LC_ALL can't be used because it's platform dependent. The LANG
1525 * environment variable seems to affect LC_CTYPE variable by default.
1526 * Here is what setlocale(LC_ALL, NULL) can return.
1527 * HPUX can return 'C C C C C C C'
1528 * Solaris can return /en_US/C/C/C/C/C on the second try.
1529 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1530 *
1531 * The default codepage detection also needs to use LC_CTYPE.
1532 *
1533 * Do not call setlocale(LC_*, "")! Using an empty string instead
1534 * of NULL, will modify the libc behavior.
1535 */
1536 posixID = setlocale(category, NULL);
1537 if ((posixID == 0)
1538 || (uprv_strcmp("C", posixID) == 0)
1539 || (uprv_strcmp("POSIX", posixID) == 0))
1540 {
1541 /* Maybe we got some garbage. Try something more reasonable */
1542 posixID = getenv("LC_ALL");
1543 /* Solaris speaks POSIX - See IEEE Std 1003.1-2008
1544 * This is needed to properly handle empty env. variables
1545 */
1546#if U_PLATFORM == U_PF_SOLARIS
1547 if ((posixID == 0) || (posixID[0] == '\0')) {
1548 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1549 if ((posixID == 0) || (posixID[0] == '\0')) {
1550#else
1551 if (posixID == 0) {
1552 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1553 if (posixID == 0) {
1554#endif
1555 posixID = getenv("LANG");
1556 }
1557 }
1558 }
1559 }
1560 if ((posixID==0)
1561 || (uprv_strcmp("C", posixID) == 0)
1562 || (uprv_strcmp("POSIX", posixID) == 0))
1563 {
1564 /* Nothing worked. Give it a nice POSIX default value. */
1565 posixID = "en_US_POSIX";
1566 // Note: this test will not catch 'C.UTF-8',
1567 // that will be handled in uprv_getDefaultLocaleID().
1568 // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage()
1569 // caller which expects to see "en_US_POSIX" in many branches.
1570 }
1571 return posixID;
1572}
1573
1574/* Return just the POSIX id for the default locale, whatever happens to be in
1575 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1576 */
1577static const char *uprv_getPOSIXIDForDefaultLocale(void)
1578{
1579 static const char* posixID = NULL;
1580 if (posixID == 0) {
1581 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1582 }
1583 return posixID;
1584}
1585
1586#if !U_CHARSET_IS_UTF8
1587/* Return just the POSIX id for the default codepage, whatever happens to be in
1588 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1589 */
1590static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1591{
1592 static const char* posixID = NULL;
1593 if (posixID == 0) {
1594 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1595 }
1596 return posixID;
1597}
1598#endif
1599#endif
1600
1601/* NOTE: The caller should handle thread safety */
1602U_CAPI const char* U_EXPORT2
1603uprv_getDefaultLocaleID()
1604{
1605#if U_POSIX_LOCALE
1606/*
1607 Note that: (a '!' means the ID is improper somehow)
1608 LC_ALL ----> default_loc codepage
1609--------------------------------------------------------
1610 ab.CD ab CD
1611 ab@CD ab__CD -
1612 ab@CD.EF ab__CD EF
1613
1614 ab_CD.EF@GH ab_CD_GH EF
1615
1616Some 'improper' ways to do the same as above:
1617 ! ab_CD@GH.EF ab_CD_GH EF
1618 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1619 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1620
1621 _CD@GH _CD_GH -
1622 _CD.EF@GH _CD_GH EF
1623
1624The variant cannot have dots in it.
1625The 'rightmost' variant (@xxx) wins.
1626The leftmost codepage (.xxx) wins.
1627*/
1628 const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1629
1630 /* Format: (no spaces)
1631 ll [ _CC ] [ . MM ] [ @ VV]
1632
1633 l = lang, C = ctry, M = charmap, V = variant
1634 */
1635
1636 if (gCorrectedPOSIXLocale != nullptr) {
1637 return gCorrectedPOSIXLocale;
1638 }
1639
1640 // Copy the ID into owned memory.
1641 // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination
1642 char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1));
1643 if (correctedPOSIXLocale == nullptr) {
1644 return nullptr;
1645 }
1646 uprv_strcpy(correctedPOSIXLocale, posixID);
1647
1648 char *limit;
1649 if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) {
1650 *limit = 0;
1651 }
1652 if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
1653 *limit = 0;
1654 }
1655
1656 if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant
1657 || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) {
1658 // Raw input was C.* or POSIX.*, Give it a nice POSIX default value.
1659 // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory())
1660 uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX");
1661 }
1662
1663 /* Note that we scan the *uncorrected* ID. */
1664 const char *p;
1665 if ((p = uprv_strrchr(posixID, '@')) != nullptr) {
1666 p++;
1667
1668 /* Take care of any special cases here.. */
1669 if (!uprv_strcmp(p, "nynorsk")) {
1670 p = "NY";
1671 /* Don't worry about no__NY. In practice, it won't appear. */
1672 }
1673
1674 if (uprv_strchr(correctedPOSIXLocale,'_') == nullptr) {
1675 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */
1676 }
1677 else {
1678 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1679 }
1680
1681 const char *q;
1682 if ((q = uprv_strchr(p, '.')) != nullptr) {
1683 /* How big will the resulting string be? */
1684 int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1685 uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset
1686 correctedPOSIXLocale[len] = 0;
1687 }
1688 else {
1689 /* Anything following the @ sign */
1690 uprv_strcat(correctedPOSIXLocale, p);
1691 }
1692
1693 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1694 * How about 'russian' -> 'ru'?
1695 * Many of the other locales using ISO codes will be handled by the
1696 * canonicalization functions in uloc_getDefault.
1697 */
1698 }
1699
1700 if (gCorrectedPOSIXLocale == nullptr) {
1701 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1702 gCorrectedPOSIXLocaleHeapAllocated = true;
1703 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1704 correctedPOSIXLocale = nullptr;
1705 }
1706 posixID = gCorrectedPOSIXLocale;
1707
1708 if (correctedPOSIXLocale != nullptr) { /* Was already set - clean up. */
1709 uprv_free(correctedPOSIXLocale);
1710 }
1711
1712 return posixID;
1713
1714#elif U_PLATFORM_USES_ONLY_WIN32_API
1715#define POSIX_LOCALE_CAPACITY 64
1716 UErrorCode status = U_ZERO_ERROR;
1717 char *correctedPOSIXLocale = nullptr;
1718
1719 // If we have already figured this out just use the cached value
1720 if (gCorrectedPOSIXLocale != nullptr) {
1721 return gCorrectedPOSIXLocale;
1722 }
1723
1724 // No cached value, need to determine the current value
1725 static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
1726 int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH);
1727
1728 // Now we should have a Windows locale name that needs converted to the POSIX style.
1729 if (length > 0) // If length is 0, then the GetLocaleInfoEx failed.
1730 {
1731 // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
1732 char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
1733
1734 int32_t i;
1735 for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++)
1736 {
1737 if (windowsLocale[i] == '_')
1738 {
1739 modifiedWindowsLocale[i] = '-';
1740 }
1741 else
1742 {
1743 modifiedWindowsLocale[i] = static_cast<char>(windowsLocale[i]);
1744 }
1745
1746 if (modifiedWindowsLocale[i] == '\0')
1747 {
1748 break;
1749 }
1750 }
1751
1752 if (i >= UPRV_LENGTHOF(modifiedWindowsLocale))
1753 {
1754 // Ran out of room, can't really happen, maybe we'll be lucky about a matching
1755 // locale when tags are dropped
1756 modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0';
1757 }
1758
1759 // Now normalize the resulting name
1760 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
1761 /* TODO: Should we just exit on memory allocation failure? */
1762 if (correctedPOSIXLocale)
1763 {
1764 int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
1765 if (U_SUCCESS(status))
1766 {
1767 *(correctedPOSIXLocale + posixLen) = 0;
1768 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1769 gCorrectedPOSIXLocaleHeapAllocated = true;
1770 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1771 }
1772 else
1773 {
1774 uprv_free(correctedPOSIXLocale);
1775 }
1776 }
1777 }
1778
1779 // If unable to find a locale we can agree upon, use en-US by default
1780 if (gCorrectedPOSIXLocale == nullptr) {
1781 gCorrectedPOSIXLocale = "en_US";
1782 }
1783 return gCorrectedPOSIXLocale;
1784
1785#elif U_PLATFORM == U_PF_OS400
1786 /* locales are process scoped and are by definition thread safe */
1787 static char correctedLocale[64];
1788 const char *localeID = getenv("LC_ALL");
1789 char *p;
1790
1791 if (localeID == NULL)
1792 localeID = getenv("LANG");
1793 if (localeID == NULL)
1794 localeID = setlocale(LC_ALL, NULL);
1795 /* Make sure we have something... */
1796 if (localeID == NULL)
1797 return "en_US_POSIX";
1798
1799 /* Extract the locale name from the path. */
1800 if((p = uprv_strrchr(localeID, '/')) != NULL)
1801 {
1802 /* Increment p to start of locale name. */
1803 p++;
1804 localeID = p;
1805 }
1806
1807 /* Copy to work location. */
1808 uprv_strcpy(correctedLocale, localeID);
1809
1810 /* Strip off the '.locale' extension. */
1811 if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1812 *p = 0;
1813 }
1814
1815 /* Upper case the locale name. */
1816 T_CString_toUpperCase(correctedLocale);
1817
1818 /* See if we are using the POSIX locale. Any of the
1819 * following are equivalent and use the same QLGPGCMA
1820 * (POSIX) locale.
1821 * QLGPGCMA2 means UCS2
1822 * QLGPGCMA_4 means UTF-32
1823 * QLGPGCMA_8 means UTF-8
1824 */
1825 if ((uprv_strcmp("C", correctedLocale) == 0) ||
1826 (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1827 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1828 {
1829 uprv_strcpy(correctedLocale, "en_US_POSIX");
1830 }
1831 else
1832 {
1833 int16_t LocaleLen;
1834
1835 /* Lower case the lang portion. */
1836 for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1837 {
1838 *p = uprv_tolower(*p);
1839 }
1840
1841 /* Adjust for Euro. After '_E' add 'URO'. */
1842 LocaleLen = uprv_strlen(correctedLocale);
1843 if (correctedLocale[LocaleLen - 2] == '_' &&
1844 correctedLocale[LocaleLen - 1] == 'E')
1845 {
1846 uprv_strcat(correctedLocale, "URO");
1847 }
1848
1849 /* If using Lotus-based locale then convert to
1850 * equivalent non Lotus.
1851 */
1852 else if (correctedLocale[LocaleLen - 2] == '_' &&
1853 correctedLocale[LocaleLen - 1] == 'L')
1854 {
1855 correctedLocale[LocaleLen - 2] = 0;
1856 }
1857
1858 /* There are separate simplified and traditional
1859 * locales called zh_HK_S and zh_HK_T.
1860 */
1861 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1862 {
1863 uprv_strcpy(correctedLocale, "zh_HK");
1864 }
1865
1866 /* A special zh_CN_GBK locale...
1867 */
1868 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1869 {
1870 uprv_strcpy(correctedLocale, "zh_CN");
1871 }
1872
1873 }
1874
1875 return correctedLocale;
1876#endif
1877
1878}
1879
1880#if !U_CHARSET_IS_UTF8
1881#if U_POSIX_LOCALE
1882/*
1883Due to various platform differences, one platform may specify a charset,
1884when they really mean a different charset. Remap the names so that they are
1885compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1886here. Before adding anything to this function, please consider adding unique
1887names to the ICU alias table in the data directory.
1888*/
1889static const char*
1890remapPlatformDependentCodepage(const char *locale, const char *name) {
1891 if (locale != NULL && *locale == 0) {
1892 /* Make sure that an empty locale is handled the same way. */
1893 locale = NULL;
1894 }
1895 if (name == NULL) {
1896 return NULL;
1897 }
1898#if U_PLATFORM == U_PF_AIX
1899 if (uprv_strcmp(name, "IBM-943") == 0) {
1900 /* Use the ASCII compatible ibm-943 */
1901 name = "Shift-JIS";
1902 }
1903 else if (uprv_strcmp(name, "IBM-1252") == 0) {
1904 /* Use the windows-1252 that contains the Euro */
1905 name = "IBM-5348";
1906 }
1907#elif U_PLATFORM == U_PF_SOLARIS
1908 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1909 /* Solaris underspecifies the "EUC" name. */
1910 if (uprv_strcmp(locale, "zh_CN") == 0) {
1911 name = "EUC-CN";
1912 }
1913 else if (uprv_strcmp(locale, "zh_TW") == 0) {
1914 name = "EUC-TW";
1915 }
1916 else if (uprv_strcmp(locale, "ko_KR") == 0) {
1917 name = "EUC-KR";
1918 }
1919 }
1920 else if (uprv_strcmp(name, "eucJP") == 0) {
1921 /*
1922 ibm-954 is the best match.
1923 ibm-33722 is the default for eucJP (similar to Windows).
1924 */
1925 name = "eucjis";
1926 }
1927 else if (uprv_strcmp(name, "646") == 0) {
1928 /*
1929 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1930 * ISO-8859-1 instead of US-ASCII(646).
1931 */
1932 name = "ISO-8859-1";
1933 }
1934#elif U_PLATFORM_IS_DARWIN_BASED
1935 if (locale == NULL && *name == 0) {
1936 /*
1937 No locale was specified, and an empty name was passed in.
1938 This usually indicates that nl_langinfo didn't return valid information.
1939 Mac OS X uses UTF-8 by default (especially the locale data and console).
1940 */
1941 name = "UTF-8";
1942 }
1943 else if (uprv_strcmp(name, "CP949") == 0) {
1944 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1945 name = "EUC-KR";
1946 }
1947 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
1948 /*
1949 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1950 */
1951 name = "UTF-8";
1952 }
1953#elif U_PLATFORM == U_PF_BSD
1954 if (uprv_strcmp(name, "CP949") == 0) {
1955 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1956 name = "EUC-KR";
1957 }
1958#elif U_PLATFORM == U_PF_HPUX
1959 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1960 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1961 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1962 name = "hkbig5";
1963 }
1964 else if (uprv_strcmp(name, "eucJP") == 0) {
1965 /*
1966 ibm-1350 is the best match, but unavailable.
1967 ibm-954 is mostly a superset of ibm-1350.
1968 ibm-33722 is the default for eucJP (similar to Windows).
1969 */
1970 name = "eucjis";
1971 }
1972#elif U_PLATFORM == U_PF_LINUX
1973 if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1974 /* Linux underspecifies the "EUC" name. */
1975 if (uprv_strcmp(locale, "korean") == 0) {
1976 name = "EUC-KR";
1977 }
1978 else if (uprv_strcmp(locale, "japanese") == 0) {
1979 /* See comment below about eucJP */
1980 name = "eucjis";
1981 }
1982 }
1983 else if (uprv_strcmp(name, "eucjp") == 0) {
1984 /*
1985 ibm-1350 is the best match, but unavailable.
1986 ibm-954 is mostly a superset of ibm-1350.
1987 ibm-33722 is the default for eucJP (similar to Windows).
1988 */
1989 name = "eucjis";
1990 }
1991 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
1992 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
1993 /*
1994 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1995 */
1996 name = "UTF-8";
1997 }
1998 /*
1999 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
2000 * it by falling back to 'US-ASCII' when NULL is returned from this
2001 * function. So, we don't have to worry about it here.
2002 */
2003#endif
2004 /* return NULL when "" is passed in */
2005 if (*name == 0) {
2006 name = NULL;
2007 }
2008 return name;
2009}
2010
2011static const char*
2012getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
2013{
2014 char localeBuf[100];
2015 const char *name = NULL;
2016 char *variant = NULL;
2017
2018 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
2019 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
2020 uprv_strncpy(localeBuf, localeName, localeCapacity);
2021 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
2022 name = uprv_strncpy(buffer, name+1, buffCapacity);
2023 buffer[buffCapacity-1] = 0; /* ensure NULL termination */
2024 if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
2025 *variant = 0;
2026 }
2027 name = remapPlatformDependentCodepage(localeBuf, name);
2028 }
2029 return name;
2030}
2031#endif
2032
2033static const char*
2034int_getDefaultCodepage()
2035{
2036#if U_PLATFORM == U_PF_OS400
2037 uint32_t ccsid = 37; /* Default to ibm-37 */
2038 static char codepage[64];
2039 Qwc_JOBI0400_t jobinfo;
2040 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
2041
2042 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
2043 "* ", " ", &error);
2044
2045 if (error.Bytes_Available == 0) {
2046 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
2047 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
2048 }
2049 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
2050 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
2051 }
2052 /* else use the default */
2053 }
2054 sprintf(codepage,"ibm-%d", ccsid);
2055 return codepage;
2056
2057#elif U_PLATFORM == U_PF_OS390
2058 static char codepage[64];
2059
2060 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
2061 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
2062 codepage[63] = 0; /* NULL terminate */
2063
2064 return codepage;
2065
2066#elif U_PLATFORM_USES_ONLY_WIN32_API
2067 static char codepage[64];
2068 DWORD codepageNumber = 0;
2069
2070#if U_PLATFORM_HAS_WINUWP_API == 1
2071 // UWP doesn't have a direct API to get the default ACP as Microsoft would rather
2072 // have folks use Unicode than a "system" code page, however this is the same
2073 // codepage as the system default locale codepage. (FWIW, the system locale is
2074 // ONLY used for codepage, it should never be used for anything else)
2075 GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
2076 (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR));
2077#else
2078 // Win32 apps can call GetACP
2079 codepageNumber = GetACP();
2080#endif
2081 // Special case for UTF-8
2082 if (codepageNumber == 65001)
2083 {
2084 return "UTF-8";
2085 }
2086 // Windows codepages can look like windows-1252, so format the found number
2087 // the numbers are eclectic, however all valid system code pages, besides UTF-8
2088 // are between 3 and 19999
2089 if (codepageNumber > 0 && codepageNumber < 20000)
2090 {
2091 sprintf(codepage, "windows-%ld", codepageNumber);
2092 return codepage;
2093 }
2094 // If the codepage number call failed then return UTF-8
2095 return "UTF-8";
2096
2097#elif U_POSIX_LOCALE
2098 static char codesetName[100];
2099 const char *localeName = NULL;
2100 const char *name = NULL;
2101
2102 localeName = uprv_getPOSIXIDForDefaultCodepage();
2103 uprv_memset(codesetName, 0, sizeof(codesetName));
2104 /* On Solaris nl_langinfo returns C locale values unless setlocale
2105 * was called earlier.
2106 */
2107#if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
2108 /* When available, check nl_langinfo first because it usually gives more
2109 useful names. It depends on LC_CTYPE.
2110 nl_langinfo may use the same buffer as setlocale. */
2111 {
2112 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
2113#if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
2114 /*
2115 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
2116 * instead of ASCII.
2117 */
2118 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
2119 codeset = remapPlatformDependentCodepage(localeName, codeset);
2120 } else
2121#endif
2122 {
2123 codeset = remapPlatformDependentCodepage(NULL, codeset);
2124 }
2125
2126 if (codeset != NULL) {
2127 uprv_strncpy(codesetName, codeset, sizeof(codesetName));
2128 codesetName[sizeof(codesetName)-1] = 0;
2129 return codesetName;
2130 }
2131 }
2132#endif
2133
2134 /* Use setlocale in a nice way, and then check some environment variables.
2135 Maybe the application used setlocale already.
2136 */
2137 uprv_memset(codesetName, 0, sizeof(codesetName));
2138 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
2139 if (name) {
2140 /* if we can find the codeset name from setlocale, return that. */
2141 return name;
2142 }
2143
2144 if (*codesetName == 0)
2145 {
2146 /* Everything failed. Return US ASCII (ISO 646). */
2147 (void)uprv_strcpy(codesetName, "US-ASCII");
2148 }
2149 return codesetName;
2150#else
2151 return "US-ASCII";
2152#endif
2153}
2154
2155
2156U_CAPI const char* U_EXPORT2
2157uprv_getDefaultCodepage()
2158{
2159 static char const *name = NULL;
2160 umtx_lock(NULL);
2161 if (name == NULL) {
2162 name = int_getDefaultCodepage();
2163 }
2164 umtx_unlock(NULL);
2165 return name;
2166}
2167#endif /* !U_CHARSET_IS_UTF8 */
2168
2169
2170/* end of platform-specific implementation -------------- */
2171
2172/* version handling --------------------------------------------------------- */
2173
2174U_CAPI void U_EXPORT2
2175u_versionFromString(UVersionInfo versionArray, const char *versionString) {
2176 char *end;
2177 uint16_t part=0;
2178
2179 if(versionArray==NULL) {
2180 return;
2181 }
2182
2183 if(versionString!=NULL) {
2184 for(;;) {
2185 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
2186 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
2187 break;
2188 }
2189 versionString=end+1;
2190 }
2191 }
2192
2193 while(part<U_MAX_VERSION_LENGTH) {
2194 versionArray[part++]=0;
2195 }
2196}
2197
2198U_CAPI void U_EXPORT2
2199u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2200 if(versionArray!=NULL && versionString!=NULL) {
2201 char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2202 int32_t len = u_strlen(versionString);
2203 if(len>U_MAX_VERSION_STRING_LENGTH) {
2204 len = U_MAX_VERSION_STRING_LENGTH;
2205 }
2206 u_UCharsToChars(versionString, versionChars, len);
2207 versionChars[len]=0;
2208 u_versionFromString(versionArray, versionChars);
2209 }
2210}
2211
2212U_CAPI void U_EXPORT2
2213u_versionToString(const UVersionInfo versionArray, char *versionString) {
2214 uint16_t count, part;
2215 uint8_t field;
2216
2217 if(versionString==NULL) {
2218 return;
2219 }
2220
2221 if(versionArray==NULL) {
2222 versionString[0]=0;
2223 return;
2224 }
2225
2226 /* count how many fields need to be written */
2227 for(count=4; count>0 && versionArray[count-1]==0; --count) {
2228 }
2229
2230 if(count <= 1) {
2231 count = 2;
2232 }
2233
2234 /* write the first part */
2235 /* write the decimal field value */
2236 field=versionArray[0];
2237 if(field>=100) {
2238 *versionString++=(char)('0'+field/100);
2239 field%=100;
2240 }
2241 if(field>=10) {
2242 *versionString++=(char)('0'+field/10);
2243 field%=10;
2244 }
2245 *versionString++=(char)('0'+field);
2246
2247 /* write the following parts */
2248 for(part=1; part<count; ++part) {
2249 /* write a dot first */
2250 *versionString++=U_VERSION_DELIMITER;
2251
2252 /* write the decimal field value */
2253 field=versionArray[part];
2254 if(field>=100) {
2255 *versionString++=(char)('0'+field/100);
2256 field%=100;
2257 }
2258 if(field>=10) {
2259 *versionString++=(char)('0'+field/10);
2260 field%=10;
2261 }
2262 *versionString++=(char)('0'+field);
2263 }
2264
2265 /* NUL-terminate */
2266 *versionString=0;
2267}
2268
2269U_CAPI void U_EXPORT2
2270u_getVersion(UVersionInfo versionArray) {
2271 (void)copyright; // Suppress unused variable warning from clang.
2272 u_versionFromString(versionArray, U_ICU_VERSION);
2273}
2274
2275/**
2276 * icucfg.h dependent code
2277 */
2278
2279#if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
2280
2281#if HAVE_DLFCN_H
2282#ifdef __MVS__
2283#ifndef __SUSV3
2284#define __SUSV3 1
2285#endif
2286#endif
2287#include <dlfcn.h>
2288#endif /* HAVE_DLFCN_H */
2289
2290U_INTERNAL void * U_EXPORT2
2291uprv_dl_open(const char *libName, UErrorCode *status) {
2292 void *ret = NULL;
2293 if(U_FAILURE(*status)) return ret;
2294 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2295 if(ret==NULL) {
2296#ifdef U_TRACE_DYLOAD
2297 printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
2298#endif
2299 *status = U_MISSING_RESOURCE_ERROR;
2300 }
2301 return ret;
2302}
2303
2304U_INTERNAL void U_EXPORT2
2305uprv_dl_close(void *lib, UErrorCode *status) {
2306 if(U_FAILURE(*status)) return;
2307 dlclose(lib);
2308}
2309
2310U_INTERNAL UVoidFunction* U_EXPORT2
2311uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2312 union {
2313 UVoidFunction *fp;
2314 void *vp;
2315 } uret;
2316 uret.fp = NULL;
2317 if(U_FAILURE(*status)) return uret.fp;
2318 uret.vp = dlsym(lib, sym);
2319 if(uret.vp == NULL) {
2320#ifdef U_TRACE_DYLOAD
2321 printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
2322#endif
2323 *status = U_MISSING_RESOURCE_ERROR;
2324 }
2325 return uret.fp;
2326}
2327
2328#elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API
2329
2330/* Windows API implementation. */
2331// Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */
2332
2333U_INTERNAL void * U_EXPORT2
2334uprv_dl_open(const char *libName, UErrorCode *status) {
2335 HMODULE lib = NULL;
2336
2337 if(U_FAILURE(*status)) return NULL;
2338
2339 lib = LoadLibraryA(libName);
2340
2341 if(lib==NULL) {
2342 *status = U_MISSING_RESOURCE_ERROR;
2343 }
2344
2345 return (void*)lib;
2346}
2347
2348U_INTERNAL void U_EXPORT2
2349uprv_dl_close(void *lib, UErrorCode *status) {
2350 HMODULE handle = (HMODULE)lib;
2351 if(U_FAILURE(*status)) return;
2352
2353 FreeLibrary(handle);
2354
2355 return;
2356}
2357
2358U_INTERNAL UVoidFunction* U_EXPORT2
2359uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2360 HMODULE handle = (HMODULE)lib;
2361 UVoidFunction* addr = NULL;
2362
2363 if(U_FAILURE(*status) || lib==NULL) return NULL;
2364
2365 addr = (UVoidFunction*)GetProcAddress(handle, sym);
2366
2367 if(addr==NULL) {
2368 DWORD lastError = GetLastError();
2369 if(lastError == ERROR_PROC_NOT_FOUND) {
2370 *status = U_MISSING_RESOURCE_ERROR;
2371 } else {
2372 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2373 }
2374 }
2375
2376 return addr;
2377}
2378
2379#else
2380
2381/* No dynamic loading, null (nonexistent) implementation. */
2382
2383U_INTERNAL void * U_EXPORT2
2384uprv_dl_open(const char *libName, UErrorCode *status) {
2385 (void)libName;
2386 if(U_FAILURE(*status)) return NULL;
2387 *status = U_UNSUPPORTED_ERROR;
2388 return NULL;
2389}
2390
2391U_INTERNAL void U_EXPORT2
2392uprv_dl_close(void *lib, UErrorCode *status) {
2393 (void)lib;
2394 if(U_FAILURE(*status)) return;
2395 *status = U_UNSUPPORTED_ERROR;
2396 return;
2397}
2398
2399U_INTERNAL UVoidFunction* U_EXPORT2
2400uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2401 (void)lib;
2402 (void)sym;
2403 if(U_SUCCESS(*status)) {
2404 *status = U_UNSUPPORTED_ERROR;
2405 }
2406 return (UVoidFunction*)NULL;
2407}
2408
2409#endif
2410
2411/*
2412 * Hey, Emacs, please set the following:
2413 *
2414 * Local Variables:
2415 * indent-tabs-mode: nil
2416 * End:
2417 *
2418 */
2419