1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4******************************************************************************
5*
6* Copyright (C) 1997-2016, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9******************************************************************************
10*
11* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
12*
13* Date Name Description
14* 04/14/97 aliu Creation.
15* 04/24/97 aliu Added getDefaultDataDirectory() and
16* getDefaultLocaleID().
17* 04/28/97 aliu Rewritten to assume Unix and apply general methods
18* for assumed case. Non-UNIX platforms must be
19* special-cased. Rewrote numeric methods dealing
20* with NaN and Infinity to be platform independent
21* over all IEEE 754 platforms.
22* 05/13/97 aliu Restored sign of timezone
23* (semantics are hours West of GMT)
24* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan,
25* nextDouble..
26* 07/22/98 stephen Added remainder, max, min, trunc
27* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
28* 08/24/98 stephen Added longBitsFromDouble
29* 09/08/98 stephen Minor changes for Mac Port
30* 03/02/99 stephen Removed openFile(). Added AS400 support.
31* Fixed EBCDIC tables
32* 04/15/99 stephen Converted to C.
33* 06/28/99 stephen Removed mutex locking in u_isBigEndian().
34* 08/04/99 jeffrey R. Added OS/2 changes
35* 11/15/99 helena Integrated S/390 IEEE support.
36* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID
37* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage
38* 01/03/08 Steven L. Fake Time Support
39******************************************************************************
40*/
41
42// Defines _XOPEN_SOURCE for access to POSIX functions.
43// Must be before any other #includes.
44#include "uposixdefs.h"
45
46// First, the platform type. Need this for U_PLATFORM.
47#include "unicode/platform.h"
48
49#if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__
50/* tzset isn't defined in strict ANSI on MinGW. */
51#undef __STRICT_ANSI__
52#endif
53
54/*
55 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
56 */
57#include <time.h>
58
59#if !U_PLATFORM_USES_ONLY_WIN32_API
60#include <sys/time.h>
61#endif
62
63/* include the rest of the ICU headers */
64#include "unicode/putil.h"
65#include "unicode/ustring.h"
66#include "putilimp.h"
67#include "uassert.h"
68#include "umutex.h"
69#include "cmemory.h"
70#include "cstring.h"
71#include "locmap.h"
72#include "ucln_cmn.h"
73#include "charstr.h"
74
75/* Include standard headers. */
76#include <stdio.h>
77#include <stdlib.h>
78#include <string.h>
79#include <math.h>
80#include <locale.h>
81#include <float.h>
82
83#ifndef U_COMMON_IMPLEMENTATION
84#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see http://userguide.icu-project.org/howtouseicu
85#endif
86
87
88/* include system headers */
89#if U_PLATFORM_USES_ONLY_WIN32_API
90 /*
91 * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
92 * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
93 * to use native APIs as much as possible?
94 */
95#ifndef WIN32_LEAN_AND_MEAN
96# define WIN32_LEAN_AND_MEAN
97#endif
98# define VC_EXTRALEAN
99# define NOUSER
100# define NOSERVICE
101# define NOIME
102# define NOMCX
103# include <windows.h>
104# include "unicode/uloc.h"
105# include "wintz.h"
106#elif U_PLATFORM == U_PF_OS400
107# include <float.h>
108# include <qusec.h> /* error code structure */
109# include <qusrjobi.h>
110# include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */
111# include <mih/testptr.h> /* For uprv_maximumPtr */
112#elif U_PLATFORM == U_PF_OS390
113# include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
114#elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
115# include <limits.h>
116# include <unistd.h>
117# if U_PLATFORM == U_PF_SOLARIS
118# ifndef _XPG4_2
119# define _XPG4_2
120# endif
121# endif
122#elif U_PLATFORM == U_PF_QNX
123# include <sys/neutrino.h>
124#endif
125
126/*
127 * Only include langinfo.h if we have a way to get the codeset. If we later
128 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
129 *
130 */
131
132#if U_HAVE_NL_LANGINFO_CODESET
133#include <langinfo.h>
134#endif
135
136/**
137 * Simple things (presence of functions, etc) should just go in configure.in and be added to
138 * icucfg.h via autoheader.
139 */
140#if U_PLATFORM_IMPLEMENTS_POSIX
141# if U_PLATFORM == U_PF_OS400
142# define HAVE_DLFCN_H 0
143# define HAVE_DLOPEN 0
144# else
145# ifndef HAVE_DLFCN_H
146# define HAVE_DLFCN_H 1
147# endif
148# ifndef HAVE_DLOPEN
149# define HAVE_DLOPEN 1
150# endif
151# endif
152# ifndef HAVE_GETTIMEOFDAY
153# define HAVE_GETTIMEOFDAY 1
154# endif
155#else
156# define HAVE_DLFCN_H 0
157# define HAVE_DLOPEN 0
158# define HAVE_GETTIMEOFDAY 0
159#endif
160
161U_NAMESPACE_USE
162
163/* Define the extension for data files, again... */
164#define DATA_TYPE "dat"
165
166/* Leave this copyright notice here! */
167static const char copyright[] = U_COPYRIGHT_STRING;
168
169/* floating point implementations ------------------------------------------- */
170
171/* We return QNAN rather than SNAN*/
172#define SIGN 0x80000000U
173
174/* Make it easy to define certain types of constants */
175typedef union {
176 int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
177 double d64;
178} BitPatternConversion;
179static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
180static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
181
182/*---------------------------------------------------------------------------
183 Platform utilities
184 Our general strategy is to assume we're on a POSIX platform. Platforms which
185 are non-POSIX must declare themselves so. The default POSIX implementation
186 will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
187 functions).
188 ---------------------------------------------------------------------------*/
189
190#if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
191# undef U_POSIX_LOCALE
192#else
193# define U_POSIX_LOCALE 1
194#endif
195
196/*
197 WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
198 can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
199*/
200#if !IEEE_754
201static char*
202u_topNBytesOfDouble(double* d, int n)
203{
204#if U_IS_BIG_ENDIAN
205 return (char*)d;
206#else
207 return (char*)(d + 1) - n;
208#endif
209}
210
211static char*
212u_bottomNBytesOfDouble(double* d, int n)
213{
214#if U_IS_BIG_ENDIAN
215 return (char*)(d + 1) - n;
216#else
217 return (char*)d;
218#endif
219}
220#endif /* !IEEE_754 */
221
222#if IEEE_754
223static UBool
224u_signBit(double d) {
225 uint8_t hiByte;
226#if U_IS_BIG_ENDIAN
227 hiByte = *(uint8_t *)&d;
228#else
229 hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
230#endif
231 return (hiByte & 0x80) != 0;
232}
233#endif
234
235
236
237#if defined (U_DEBUG_FAKETIME)
238/* Override the clock to test things without having to move the system clock.
239 * Assumes POSIX gettimeofday() will function
240 */
241UDate fakeClock_t0 = 0; /** Time to start the clock from **/
242UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
243UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
244
245static UDate getUTCtime_real() {
246 struct timeval posixTime;
247 gettimeofday(&posixTime, NULL);
248 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
249}
250
251static UDate getUTCtime_fake() {
252 static UMutex fakeClockMutex;
253 umtx_lock(&fakeClockMutex);
254 if(!fakeClock_set) {
255 UDate real = getUTCtime_real();
256 const char *fake_start = getenv("U_FAKETIME_START");
257 if((fake_start!=NULL) && (fake_start[0]!=0)) {
258 sscanf(fake_start,"%lf",&fakeClock_t0);
259 fakeClock_dt = fakeClock_t0 - real;
260 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
261 "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
262 fakeClock_t0, fake_start, fakeClock_dt, real);
263 } else {
264 fakeClock_dt = 0;
265 fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
266 "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
267 }
268 fakeClock_set = TRUE;
269 }
270 umtx_unlock(&fakeClockMutex);
271
272 return getUTCtime_real() + fakeClock_dt;
273}
274#endif
275
276#if U_PLATFORM_USES_ONLY_WIN32_API
277typedef union {
278 int64_t int64;
279 FILETIME fileTime;
280} FileTimeConversion; /* This is like a ULARGE_INTEGER */
281
282/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
283#define EPOCH_BIAS INT64_C(116444736000000000)
284#define HECTONANOSECOND_PER_MILLISECOND 10000
285
286#endif
287
288/*---------------------------------------------------------------------------
289 Universal Implementations
290 These are designed to work on all platforms. Try these, and if they
291 don't work on your platform, then special case your platform with new
292 implementations.
293---------------------------------------------------------------------------*/
294
295U_CAPI UDate U_EXPORT2
296uprv_getUTCtime()
297{
298#if defined(U_DEBUG_FAKETIME)
299 return getUTCtime_fake(); /* Hook for overriding the clock */
300#else
301 return uprv_getRawUTCtime();
302#endif
303}
304
305/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
306U_CAPI UDate U_EXPORT2
307uprv_getRawUTCtime()
308{
309#if U_PLATFORM_USES_ONLY_WIN32_API
310
311 FileTimeConversion winTime;
312 GetSystemTimeAsFileTime(&winTime.fileTime);
313 return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
314#else
315
316#if HAVE_GETTIMEOFDAY
317 struct timeval posixTime;
318 gettimeofday(&posixTime, NULL);
319 return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
320#else
321 time_t epochtime;
322 time(&epochtime);
323 return (UDate)epochtime * U_MILLIS_PER_SECOND;
324#endif
325
326#endif
327}
328
329/*-----------------------------------------------------------------------------
330 IEEE 754
331 These methods detect and return NaN and infinity values for doubles
332 conforming to IEEE 754. Platforms which support this standard include X86,
333 Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
334 If this doesn't work on your platform, you have non-IEEE floating-point, and
335 will need to code your own versions. A naive implementation is to return 0.0
336 for getNaN and getInfinity, and false for isNaN and isInfinite.
337 ---------------------------------------------------------------------------*/
338
339U_CAPI UBool U_EXPORT2
340uprv_isNaN(double number)
341{
342#if IEEE_754
343 BitPatternConversion convertedNumber;
344 convertedNumber.d64 = number;
345 /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
346 return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
347
348#elif U_PLATFORM == U_PF_OS390
349 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
350 sizeof(uint32_t));
351 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
352 sizeof(uint32_t));
353
354 return ((highBits & 0x7F080000L) == 0x7F080000L) &&
355 (lowBits == 0x00000000L);
356
357#else
358 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
359 /* you'll need to replace this default implementation with what's correct*/
360 /* for your platform.*/
361 return number != number;
362#endif
363}
364
365U_CAPI UBool U_EXPORT2
366uprv_isInfinite(double number)
367{
368#if IEEE_754
369 BitPatternConversion convertedNumber;
370 convertedNumber.d64 = number;
371 /* Infinity is exactly 0x7FF0000000000000U. */
372 return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
373#elif U_PLATFORM == U_PF_OS390
374 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
375 sizeof(uint32_t));
376 uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number,
377 sizeof(uint32_t));
378
379 return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
380
381#else
382 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
383 /* value, you'll need to replace this default implementation with what's*/
384 /* correct for your platform.*/
385 return number == (2.0 * number);
386#endif
387}
388
389U_CAPI UBool U_EXPORT2
390uprv_isPositiveInfinity(double number)
391{
392#if IEEE_754 || U_PLATFORM == U_PF_OS390
393 return (UBool)(number > 0 && uprv_isInfinite(number));
394#else
395 return uprv_isInfinite(number);
396#endif
397}
398
399U_CAPI UBool U_EXPORT2
400uprv_isNegativeInfinity(double number)
401{
402#if IEEE_754 || U_PLATFORM == U_PF_OS390
403 return (UBool)(number < 0 && uprv_isInfinite(number));
404
405#else
406 uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
407 sizeof(uint32_t));
408 return((highBits & SIGN) && uprv_isInfinite(number));
409
410#endif
411}
412
413U_CAPI double U_EXPORT2
414uprv_getNaN()
415{
416#if IEEE_754 || U_PLATFORM == U_PF_OS390
417 return gNan.d64;
418#else
419 /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
420 /* you'll need to replace this default implementation with what's correct*/
421 /* for your platform.*/
422 return 0.0;
423#endif
424}
425
426U_CAPI double U_EXPORT2
427uprv_getInfinity()
428{
429#if IEEE_754 || U_PLATFORM == U_PF_OS390
430 return gInf.d64;
431#else
432 /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
433 /* value, you'll need to replace this default implementation with what's*/
434 /* correct for your platform.*/
435 return 0.0;
436#endif
437}
438
439U_CAPI double U_EXPORT2
440uprv_floor(double x)
441{
442 return floor(x);
443}
444
445U_CAPI double U_EXPORT2
446uprv_ceil(double x)
447{
448 return ceil(x);
449}
450
451U_CAPI double U_EXPORT2
452uprv_round(double x)
453{
454 return uprv_floor(x + 0.5);
455}
456
457U_CAPI double U_EXPORT2
458uprv_fabs(double x)
459{
460 return fabs(x);
461}
462
463U_CAPI double U_EXPORT2
464uprv_modf(double x, double* y)
465{
466 return modf(x, y);
467}
468
469U_CAPI double U_EXPORT2
470uprv_fmod(double x, double y)
471{
472 return fmod(x, y);
473}
474
475U_CAPI double U_EXPORT2
476uprv_pow(double x, double y)
477{
478 /* This is declared as "double pow(double x, double y)" */
479 return pow(x, y);
480}
481
482U_CAPI double U_EXPORT2
483uprv_pow10(int32_t x)
484{
485 return pow(10.0, (double)x);
486}
487
488U_CAPI double U_EXPORT2
489uprv_fmax(double x, double y)
490{
491#if IEEE_754
492 /* first handle NaN*/
493 if(uprv_isNaN(x) || uprv_isNaN(y))
494 return uprv_getNaN();
495
496 /* check for -0 and 0*/
497 if(x == 0.0 && y == 0.0 && u_signBit(x))
498 return y;
499
500#endif
501
502 /* this should work for all flt point w/o NaN and Inf special cases */
503 return (x > y ? x : y);
504}
505
506U_CAPI double U_EXPORT2
507uprv_fmin(double x, double y)
508{
509#if IEEE_754
510 /* first handle NaN*/
511 if(uprv_isNaN(x) || uprv_isNaN(y))
512 return uprv_getNaN();
513
514 /* check for -0 and 0*/
515 if(x == 0.0 && y == 0.0 && u_signBit(y))
516 return y;
517
518#endif
519
520 /* this should work for all flt point w/o NaN and Inf special cases */
521 return (x > y ? y : x);
522}
523
524U_CAPI UBool U_EXPORT2
525uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) {
526 // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow.
527 // This function could be optimized by calling one of those primitives.
528 auto a64 = static_cast<int64_t>(a);
529 auto b64 = static_cast<int64_t>(b);
530 int64_t res64 = a64 + b64;
531 *res = static_cast<int32_t>(res64);
532 return res64 != *res;
533}
534
535U_CAPI UBool U_EXPORT2
536uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) {
537 // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow.
538 // This function could be optimized by calling one of those primitives.
539 auto a64 = static_cast<int64_t>(a);
540 auto b64 = static_cast<int64_t>(b);
541 int64_t res64 = a64 * b64;
542 *res = static_cast<int32_t>(res64);
543 return res64 != *res;
544}
545
546/**
547 * Truncates the given double.
548 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
549 * This is different than calling floor() or ceil():
550 * floor(3.3) = 3, floor(-3.3) = -4
551 * ceil(3.3) = 4, ceil(-3.3) = -3
552 */
553U_CAPI double U_EXPORT2
554uprv_trunc(double d)
555{
556#if IEEE_754
557 /* handle error cases*/
558 if(uprv_isNaN(d))
559 return uprv_getNaN();
560 if(uprv_isInfinite(d))
561 return uprv_getInfinity();
562
563 if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */
564 return ceil(d);
565 else
566 return floor(d);
567
568#else
569 return d >= 0 ? floor(d) : ceil(d);
570
571#endif
572}
573
574/**
575 * Return the largest positive number that can be represented by an integer
576 * type of arbitrary bit length.
577 */
578U_CAPI double U_EXPORT2
579uprv_maxMantissa(void)
580{
581 return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
582}
583
584U_CAPI double U_EXPORT2
585uprv_log(double d)
586{
587 return log(d);
588}
589
590U_CAPI void * U_EXPORT2
591uprv_maximumPtr(void * base)
592{
593#if U_PLATFORM == U_PF_OS400
594 /*
595 * With the provided function we should never be out of range of a given segment
596 * (a traditional/typical segment that is). Our segments have 5 bytes for the
597 * id and 3 bytes for the offset. The key is that the casting takes care of
598 * only retrieving the offset portion minus x1000. Hence, the smallest offset
599 * seen in a program is x001000 and when casted to an int would be 0.
600 * That's why we can only add 0xffefff. Otherwise, we would exceed the segment.
601 *
602 * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
603 * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information).
604 * This function determines the activation based on the pointer that is passed in and
605 * calculates the appropriate maximum available size for
606 * each pointer type (TERASPACE and non-TERASPACE)
607 *
608 * Unlike other operating systems, the pointer model isn't determined at
609 * compile time on i5/OS.
610 */
611 if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
612 /* if it is a TERASPACE pointer the max is 2GB - 4k */
613 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
614 }
615 /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
616 return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
617
618#else
619 return U_MAX_PTR(base);
620#endif
621}
622
623/*---------------------------------------------------------------------------
624 Platform-specific Implementations
625 Try these, and if they don't work on your platform, then special case your
626 platform with new implementations.
627 ---------------------------------------------------------------------------*/
628
629/* Generic time zone layer -------------------------------------------------- */
630
631/* Time zone utilities */
632U_CAPI void U_EXPORT2
633uprv_tzset()
634{
635#if defined(U_TZSET)
636 U_TZSET();
637#else
638 /* no initialization*/
639#endif
640}
641
642U_CAPI int32_t U_EXPORT2
643uprv_timezone()
644{
645#ifdef U_TIMEZONE
646 return U_TIMEZONE;
647#else
648 time_t t, t1, t2;
649 struct tm tmrec;
650 int32_t tdiff = 0;
651
652 time(&t);
653 uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
654#if U_PLATFORM != U_PF_IPHONE
655 UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
656#endif
657 t1 = mktime(&tmrec); /* local time in seconds*/
658 uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
659 t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/
660 tdiff = t2 - t1;
661
662#if U_PLATFORM != U_PF_IPHONE
663 /* imitate NT behaviour, which returns same timezone offset to GMT for
664 winter and summer.
665 This does not work on all platforms. For instance, on glibc on Linux
666 and on Mac OS 10.5, tdiff calculated above remains the same
667 regardless of whether DST is in effect or not. iOS is another
668 platform where this does not work. Linux + glibc and Mac OS 10.5
669 have U_TIMEZONE defined so that this code is not reached.
670 */
671 if (dst_checked)
672 tdiff += 3600;
673#endif
674 return tdiff;
675#endif
676}
677
678/* Note that U_TZNAME does *not* have to be tzname, but if it is,
679 some platforms need to have it declared here. */
680
681#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED)
682/* RS6000 and others reject char **tzname. */
683extern U_IMPORT char *U_TZNAME[];
684#endif
685
686#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
687/* These platforms are likely to use Olson timezone IDs. */
688/* common targets of the symbolic link at TZDEFAULT are:
689 * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12
690 * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12
691 * "/usr/share/lib/zoneinfo/<olsonID>" Solaris
692 * "../usr/share/lib/zoneinfo/<olsonID>" Solaris
693 * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
694 * To avoid checking lots of paths, just check that the target path
695 * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid.
696 */
697
698#define CHECK_LOCALTIME_LINK 1
699#if U_PLATFORM_IS_DARWIN_BASED
700#include <tzfile.h>
701#define TZZONEINFO (TZDIR "/")
702#elif U_PLATFORM == U_PF_SOLARIS
703#define TZDEFAULT "/etc/localtime"
704#define TZZONEINFO "/usr/share/lib/zoneinfo/"
705#define TZ_ENV_CHECK "localtime"
706#else
707#define TZDEFAULT "/etc/localtime"
708#define TZZONEINFO "/usr/share/zoneinfo/"
709#endif
710#define TZZONEINFOTAIL "/zoneinfo/"
711#if U_HAVE_DIRENT_H
712#define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */
713/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
714 symlinked to /etc/localtime, which makes searchForTZFile return
715 'localtime' when it's the first match. */
716#define TZFILE_SKIP2 "localtime"
717#define SEARCH_TZFILE
718#include <dirent.h> /* Needed to search through system timezone files */
719#endif
720static char gTimeZoneBuffer[PATH_MAX];
721static char *gTimeZoneBufferPtr = NULL;
722#endif
723
724#if !U_PLATFORM_USES_ONLY_WIN32_API
725#define isNonDigit(ch) (ch < '0' || '9' < ch)
726static UBool isValidOlsonID(const char *id) {
727 int32_t idx = 0;
728
729 /* Determine if this is something like Iceland (Olson ID)
730 or AST4ADT (non-Olson ID) */
731 while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
732 idx++;
733 }
734
735 /* If we went through the whole string, then it might be okay.
736 The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
737 "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
738 The rest of the time it could be an Olson ID. George */
739 return (UBool)(id[idx] == 0
740 || uprv_strcmp(id, "PST8PDT") == 0
741 || uprv_strcmp(id, "MST7MDT") == 0
742 || uprv_strcmp(id, "CST6CDT") == 0
743 || uprv_strcmp(id, "EST5EDT") == 0);
744}
745
746/* On some Unix-like OS, 'posix' subdirectory in
747 /usr/share/zoneinfo replicates the top-level contents. 'right'
748 subdirectory has the same set of files, but individual files
749 are different from those in the top-level directory or 'posix'
750 because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
751 has files for UTC.
752 When the first match for /etc/localtime is in either of them
753 (usually in posix because 'right' has different file contents),
754 or TZ environment variable points to one of them, createTimeZone
755 fails because, say, 'posix/America/New_York' is not an Olson
756 timezone id ('America/New_York' is). So, we have to skip
757 'posix/' and 'right/' at the beginning. */
758static void skipZoneIDPrefix(const char** id) {
759 if (uprv_strncmp(*id, "posix/", 6) == 0
760 || uprv_strncmp(*id, "right/", 6) == 0)
761 {
762 *id += 6;
763 }
764}
765#endif
766
767#if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
768
769#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
770typedef struct OffsetZoneMapping {
771 int32_t offsetSeconds;
772 int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
773 const char *stdID;
774 const char *dstID;
775 const char *olsonID;
776} OffsetZoneMapping;
777
778enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
779
780/*
781This list tries to disambiguate a set of abbreviated timezone IDs and offsets
782and maps it to an Olson ID.
783Before adding anything to this list, take a look at
784icu/source/tools/tzcode/tz.alias
785Sometimes no daylight savings (0) is important to define due to aliases.
786This list can be tested with icu/source/test/compat/tzone.pl
787More values could be added to daylightType to increase precision.
788*/
789static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
790 {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
791 {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
792 {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
793 {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
794 {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
795 {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
796 {-36000, 2, "EST", "EST", "Australia/Sydney"},
797 {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
798 {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
799 {-34200, 2, "CST", "CST", "Australia/South"},
800 {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
801 {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
802 {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
803 {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
804 {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
805 {-28800, 2, "WST", "WST", "Australia/West"},
806 {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
807 {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
808 {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
809 {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
810 {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
811 {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
812 {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
813 {-14400, 1, "AZT", "AZST", "Asia/Baku"},
814 {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
815 {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
816 {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
817 {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
818 {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
819 {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
820 {-3600, 0, "CET", "WEST", "Africa/Algiers"},
821 {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
822 {0, 1, "GMT", "IST", "Europe/Dublin"},
823 {0, 1, "GMT", "BST", "Europe/London"},
824 {0, 0, "WET", "WEST", "Africa/Casablanca"},
825 {0, 0, "WET", "WET", "Africa/El_Aaiun"},
826 {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
827 {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
828 {10800, 1, "PMST", "PMDT", "America/Miquelon"},
829 {10800, 2, "UYT", "UYST", "America/Montevideo"},
830 {10800, 1, "WGT", "WGST", "America/Godthab"},
831 {10800, 2, "BRT", "BRST", "Brazil/East"},
832 {12600, 1, "NST", "NDT", "America/St_Johns"},
833 {14400, 1, "AST", "ADT", "Canada/Atlantic"},
834 {14400, 2, "AMT", "AMST", "America/Cuiaba"},
835 {14400, 2, "CLT", "CLST", "Chile/Continental"},
836 {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
837 {14400, 2, "PYT", "PYST", "America/Asuncion"},
838 {18000, 1, "CST", "CDT", "America/Havana"},
839 {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
840 {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
841 {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
842 {21600, 0, "CST", "CDT", "America/Guatemala"},
843 {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
844 {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
845 {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
846 {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
847 {32400, 1, "AKST", "AKDT", "US/Alaska"},
848 {36000, 1, "HAST", "HADT", "US/Aleutian"}
849};
850
851/*#define DEBUG_TZNAME*/
852
853static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
854{
855 int32_t idx;
856#ifdef DEBUG_TZNAME
857 fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
858#endif
859 for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
860 {
861 if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
862 && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
863 && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
864 && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
865 {
866 return OFFSET_ZONE_MAPPINGS[idx].olsonID;
867 }
868 }
869 return NULL;
870}
871#endif
872
873#ifdef SEARCH_TZFILE
874#define MAX_READ_SIZE 512
875
876typedef struct DefaultTZInfo {
877 char* defaultTZBuffer;
878 int64_t defaultTZFileSize;
879 FILE* defaultTZFilePtr;
880 UBool defaultTZstatus;
881 int32_t defaultTZPosition;
882} DefaultTZInfo;
883
884/*
885 * This method compares the two files given to see if they are a match.
886 * It is currently use to compare two TZ files.
887 */
888static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
889 FILE* file;
890 int64_t sizeFile;
891 int64_t sizeFileLeft;
892 int32_t sizeFileRead;
893 int32_t sizeFileToRead;
894 char bufferFile[MAX_READ_SIZE];
895 UBool result = TRUE;
896
897 if (tzInfo->defaultTZFilePtr == NULL) {
898 tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
899 }
900 file = fopen(TZFileName, "r");
901
902 tzInfo->defaultTZPosition = 0; /* reset position to begin search */
903
904 if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
905 /* First check that the file size are equal. */
906 if (tzInfo->defaultTZFileSize == 0) {
907 fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
908 tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
909 }
910 fseek(file, 0, SEEK_END);
911 sizeFile = ftell(file);
912 sizeFileLeft = sizeFile;
913
914 if (sizeFile != tzInfo->defaultTZFileSize) {
915 result = FALSE;
916 } else {
917 /* Store the data from the files in seperate buffers and
918 * compare each byte to determine equality.
919 */
920 if (tzInfo->defaultTZBuffer == NULL) {
921 rewind(tzInfo->defaultTZFilePtr);
922 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
923 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
924 }
925 rewind(file);
926 while(sizeFileLeft > 0) {
927 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
928 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
929
930 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
931 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
932 result = FALSE;
933 break;
934 }
935 sizeFileLeft -= sizeFileRead;
936 tzInfo->defaultTZPosition += sizeFileRead;
937 }
938 }
939 } else {
940 result = FALSE;
941 }
942
943 if (file != NULL) {
944 fclose(file);
945 }
946
947 return result;
948}
949
950
951/* dirent also lists two entries: "." and ".." that we can safely ignore. */
952#define SKIP1 "."
953#define SKIP2 ".."
954static UBool U_CALLCONV putil_cleanup(void);
955static CharString *gSearchTZFileResult = NULL;
956
957/*
958 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
959 * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
960 */
961static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
962 DIR* dirp = NULL;
963 struct dirent* dirEntry = NULL;
964 char* result = NULL;
965 UErrorCode status = U_ZERO_ERROR;
966
967 /* Save the current path */
968 CharString curpath(path, -1, status);
969 if (U_FAILURE(status)) {
970 goto cleanupAndReturn;
971 }
972
973 dirp = opendir(path);
974 if (dirp == NULL) {
975 goto cleanupAndReturn;
976 }
977
978 if (gSearchTZFileResult == NULL) {
979 gSearchTZFileResult = new CharString;
980 if (gSearchTZFileResult == NULL) {
981 goto cleanupAndReturn;
982 }
983 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
984 }
985
986 /* Check each entry in the directory. */
987 while((dirEntry = readdir(dirp)) != NULL) {
988 const char* dirName = dirEntry->d_name;
989 if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0
990 && uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
991 /* Create a newpath with the new entry to test each entry in the directory. */
992 CharString newpath(curpath, status);
993 newpath.append(dirName, -1, status);
994 if (U_FAILURE(status)) {
995 break;
996 }
997
998 DIR* subDirp = NULL;
999 if ((subDirp = opendir(newpath.data())) != NULL) {
1000 /* If this new path is a directory, make a recursive call with the newpath. */
1001 closedir(subDirp);
1002 newpath.append('/', status);
1003 if (U_FAILURE(status)) {
1004 break;
1005 }
1006 result = searchForTZFile(newpath.data(), tzInfo);
1007 /*
1008 Have to get out here. Otherwise, we'd keep looking
1009 and return the first match in the top-level directory
1010 if there's a match in the top-level. If not, this function
1011 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
1012 It worked without this in most cases because we have a fallback of calling
1013 localtime_r to figure out the default timezone.
1014 */
1015 if (result != NULL)
1016 break;
1017 } else {
1018 if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) {
1019 int32_t amountToSkip = sizeof(TZZONEINFO) - 1;
1020 if (amountToSkip > newpath.length()) {
1021 amountToSkip = newpath.length();
1022 }
1023 const char* zoneid = newpath.data() + amountToSkip;
1024 skipZoneIDPrefix(&zoneid);
1025 gSearchTZFileResult->clear();
1026 gSearchTZFileResult->append(zoneid, -1, status);
1027 if (U_FAILURE(status)) {
1028 break;
1029 }
1030 result = gSearchTZFileResult->data();
1031 /* Get out after the first one found. */
1032 break;
1033 }
1034 }
1035 }
1036 }
1037
1038 cleanupAndReturn:
1039 if (dirp) {
1040 closedir(dirp);
1041 }
1042 return result;
1043}
1044#endif
1045
1046U_CAPI void U_EXPORT2
1047uprv_tzname_clear_cache()
1048{
1049#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1050 gTimeZoneBufferPtr = NULL;
1051#endif
1052}
1053
1054U_CAPI const char* U_EXPORT2
1055uprv_tzname(int n)
1056{
1057 (void)n; // Avoid unreferenced parameter warning.
1058 const char *tzid = NULL;
1059#if U_PLATFORM_USES_ONLY_WIN32_API
1060 tzid = uprv_detectWindowsTimeZone();
1061
1062 if (tzid != NULL) {
1063 return tzid;
1064 }
1065
1066#ifndef U_TZNAME
1067 // The return value is free'd in timezone.cpp on Windows because
1068 // the other code path returns a pointer to a heap location.
1069 // If we don't have a name already, then tzname wouldn't be any
1070 // better, so just fall back.
1071 return uprv_strdup("");
1072#endif // !U_TZNAME
1073
1074#else
1075
1076/*#if U_PLATFORM_IS_DARWIN_BASED
1077 int ret;
1078
1079 tzid = getenv("TZFILE");
1080 if (tzid != NULL) {
1081 return tzid;
1082 }
1083#endif*/
1084
1085/* This code can be temporarily disabled to test tzname resolution later on. */
1086#ifndef DEBUG_TZNAME
1087 tzid = getenv("TZ");
1088 if (tzid != NULL && isValidOlsonID(tzid)
1089#if U_PLATFORM == U_PF_SOLARIS
1090 /* When TZ equals localtime on Solaris, check the /etc/localtime file. */
1091 && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
1092#endif
1093 ) {
1094 /* The colon forces tzset() to treat the remainder as zoneinfo path */
1095 if (tzid[0] == ':') {
1096 tzid++;
1097 }
1098 /* This might be a good Olson ID. */
1099 skipZoneIDPrefix(&tzid);
1100 return tzid;
1101 }
1102 /* else U_TZNAME will give a better result. */
1103#endif
1104
1105#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1106 /* Caller must handle threading issues */
1107 if (gTimeZoneBufferPtr == NULL) {
1108 /*
1109 This is a trick to look at the name of the link to get the Olson ID
1110 because the tzfile contents is underspecified.
1111 This isn't guaranteed to work because it may not be a symlink.
1112 */
1113 int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
1114 if (0 < ret) {
1115 int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL);
1116 gTimeZoneBuffer[ret] = 0;
1117 char * tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
1118
1119 if (tzZoneInfoTailPtr != NULL
1120 && isValidOlsonID(tzZoneInfoTailPtr + tzZoneInfoTailLen))
1121 {
1122 return (gTimeZoneBufferPtr = tzZoneInfoTailPtr + tzZoneInfoTailLen);
1123 }
1124 } else {
1125#if defined(SEARCH_TZFILE)
1126 DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1127 if (tzInfo != NULL) {
1128 tzInfo->defaultTZBuffer = NULL;
1129 tzInfo->defaultTZFileSize = 0;
1130 tzInfo->defaultTZFilePtr = NULL;
1131 tzInfo->defaultTZstatus = FALSE;
1132 tzInfo->defaultTZPosition = 0;
1133
1134 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1135
1136 /* Free previously allocated memory */
1137 if (tzInfo->defaultTZBuffer != NULL) {
1138 uprv_free(tzInfo->defaultTZBuffer);
1139 }
1140 if (tzInfo->defaultTZFilePtr != NULL) {
1141 fclose(tzInfo->defaultTZFilePtr);
1142 }
1143 uprv_free(tzInfo);
1144 }
1145
1146 if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1147 return gTimeZoneBufferPtr;
1148 }
1149#endif
1150 }
1151 }
1152 else {
1153 return gTimeZoneBufferPtr;
1154 }
1155#endif
1156#endif
1157
1158#ifdef U_TZNAME
1159#if U_PLATFORM_USES_ONLY_WIN32_API
1160 /* The return value is free'd in timezone.cpp on Windows because
1161 * the other code path returns a pointer to a heap location. */
1162 return uprv_strdup(U_TZNAME[n]);
1163#else
1164 /*
1165 U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1166 So we remap the abbreviation to an olson ID.
1167
1168 Since Windows exposes a little more timezone information,
1169 we normally don't use this code on Windows because
1170 uprv_detectWindowsTimeZone should have already given the correct answer.
1171 */
1172 {
1173 struct tm juneSol, decemberSol;
1174 int daylightType;
1175 static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1176 static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1177
1178 /* This probing will tell us when daylight savings occurs. */
1179 localtime_r(&juneSolstice, &juneSol);
1180 localtime_r(&decemberSolstice, &decemberSol);
1181 if(decemberSol.tm_isdst > 0) {
1182 daylightType = U_DAYLIGHT_DECEMBER;
1183 } else if(juneSol.tm_isdst > 0) {
1184 daylightType = U_DAYLIGHT_JUNE;
1185 } else {
1186 daylightType = U_DAYLIGHT_NONE;
1187 }
1188 tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1189 if (tzid != NULL) {
1190 return tzid;
1191 }
1192 }
1193 return U_TZNAME[n];
1194#endif
1195#else
1196 return "";
1197#endif
1198}
1199
1200/* Get and set the ICU data directory --------------------------------------- */
1201
1202static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER;
1203static char *gDataDirectory = NULL;
1204
1205UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER;
1206static CharString *gTimeZoneFilesDirectory = NULL;
1207
1208#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1209 static const char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */
1210 static bool gCorrectedPOSIXLocaleHeapAllocated = false;
1211#endif
1212
1213static UBool U_CALLCONV putil_cleanup(void)
1214{
1215 if (gDataDirectory && *gDataDirectory) {
1216 uprv_free(gDataDirectory);
1217 }
1218 gDataDirectory = NULL;
1219 gDataDirInitOnce.reset();
1220
1221 delete gTimeZoneFilesDirectory;
1222 gTimeZoneFilesDirectory = NULL;
1223 gTimeZoneFilesInitOnce.reset();
1224
1225#ifdef SEARCH_TZFILE
1226 delete gSearchTZFileResult;
1227 gSearchTZFileResult = NULL;
1228#endif
1229
1230#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1231 if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) {
1232 uprv_free(const_cast<char *>(gCorrectedPOSIXLocale));
1233 gCorrectedPOSIXLocale = NULL;
1234 gCorrectedPOSIXLocaleHeapAllocated = false;
1235 }
1236#endif
1237 return TRUE;
1238}
1239
1240/*
1241 * Set the data directory.
1242 * Make a copy of the passed string, and set the global data dir to point to it.
1243 */
1244U_CAPI void U_EXPORT2
1245u_setDataDirectory(const char *directory) {
1246 char *newDataDir;
1247 int32_t length;
1248
1249 if(directory==NULL || *directory==0) {
1250 /* A small optimization to prevent the malloc and copy when the
1251 shared library is used, and this is a way to make sure that NULL
1252 is never returned.
1253 */
1254 newDataDir = (char *)"";
1255 }
1256 else {
1257 length=(int32_t)uprv_strlen(directory);
1258 newDataDir = (char *)uprv_malloc(length + 2);
1259 /* Exit out if newDataDir could not be created. */
1260 if (newDataDir == NULL) {
1261 return;
1262 }
1263 uprv_strcpy(newDataDir, directory);
1264
1265#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1266 {
1267 char *p;
1268 while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != NULL) {
1269 *p = U_FILE_SEP_CHAR;
1270 }
1271 }
1272#endif
1273 }
1274
1275 if (gDataDirectory && *gDataDirectory) {
1276 uprv_free(gDataDirectory);
1277 }
1278 gDataDirectory = newDataDir;
1279 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1280}
1281
1282U_CAPI UBool U_EXPORT2
1283uprv_pathIsAbsolute(const char *path)
1284{
1285 if(!path || !*path) {
1286 return FALSE;
1287 }
1288
1289 if(*path == U_FILE_SEP_CHAR) {
1290 return TRUE;
1291 }
1292
1293#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1294 if(*path == U_FILE_ALT_SEP_CHAR) {
1295 return TRUE;
1296 }
1297#endif
1298
1299#if U_PLATFORM_USES_ONLY_WIN32_API
1300 if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1301 ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1302 path[1] == ':' ) {
1303 return TRUE;
1304 }
1305#endif
1306
1307 return FALSE;
1308}
1309
1310/* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1311 (needed for some Darwin ICU build environments) */
1312#if U_PLATFORM_IS_DARWIN_BASED && TARGET_OS_SIMULATOR
1313# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1314# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1315# endif
1316#endif
1317
1318#if defined(ICU_DATA_DIR_WINDOWS)
1319// Helper function to get the ICU Data Directory under the Windows directory location.
1320static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength)
1321{
1322 wchar_t windowsPath[MAX_PATH];
1323 char windowsPathUtf8[MAX_PATH];
1324
1325 UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath));
1326 if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) {
1327 // Convert UTF-16 to a UTF-8 string.
1328 UErrorCode status = U_ZERO_ERROR;
1329 int32_t windowsPathUtf8Len = 0;
1330 u_strToUTF8(windowsPathUtf8, static_cast<int32_t>(UPRV_LENGTHOF(windowsPathUtf8)),
1331 &windowsPathUtf8Len, reinterpret_cast<const UChar*>(windowsPath), -1, &status);
1332
1333 if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) &&
1334 (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) {
1335 // Ensure it always has a separator, so we can append the ICU data path.
1336 if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) {
1337 windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR;
1338 windowsPathUtf8[windowsPathUtf8Len] = '\0';
1339 }
1340 // Check if the concatenated string will fit.
1341 if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) {
1342 uprv_strcpy(directoryBuffer, windowsPathUtf8);
1343 uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS);
1344 return TRUE;
1345 }
1346 }
1347 }
1348
1349 return FALSE;
1350}
1351#endif
1352
1353static void U_CALLCONV dataDirectoryInitFn() {
1354 /* If we already have the directory, then return immediately. Will happen if user called
1355 * u_setDataDirectory().
1356 */
1357 if (gDataDirectory) {
1358 return;
1359 }
1360
1361 const char *path = NULL;
1362#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1363 char datadir_path_buffer[PATH_MAX];
1364#endif
1365
1366 /*
1367 When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1368 override ICU's data with the ICU_DATA environment variable. This prevents
1369 problems where multiple custom copies of ICU's specific version of data
1370 are installed on a system. Either the application must define the data
1371 directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1372 ICU, set the data with udata_setCommonData or trust that all of the
1373 required data is contained in ICU's data library that contains
1374 the entry point defined by U_ICUDATA_ENTRY_POINT.
1375
1376 There may also be some platforms where environment variables
1377 are not allowed.
1378 */
1379# if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1380 /* First try to get the environment variable */
1381# if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv
1382 path=getenv("ICU_DATA");
1383# endif
1384# endif
1385
1386 /* ICU_DATA_DIR may be set as a compile option.
1387 * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1388 * and is used only when data is built in archive mode eliminating the need
1389 * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1390 * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1391 * set their own path.
1392 */
1393#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1394 if(path==NULL || *path==0) {
1395# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1396 const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1397# endif
1398# ifdef ICU_DATA_DIR
1399 path=ICU_DATA_DIR;
1400# else
1401 path=U_ICU_DATA_DEFAULT_DIR;
1402# endif
1403# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1404 if (prefix != NULL) {
1405 snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1406 path=datadir_path_buffer;
1407 }
1408# endif
1409 }
1410#endif
1411
1412#if defined(ICU_DATA_DIR_WINDOWS)
1413 char datadir_path_buffer[MAX_PATH];
1414 if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
1415 path = datadir_path_buffer;
1416 }
1417#endif
1418
1419 if(path==NULL) {
1420 /* It looks really bad, set it to something. */
1421 path = "";
1422 }
1423
1424 u_setDataDirectory(path);
1425 return;
1426}
1427
1428U_CAPI const char * U_EXPORT2
1429u_getDataDirectory(void) {
1430 umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn);
1431 return gDataDirectory;
1432}
1433
1434static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
1435 if (U_FAILURE(status)) {
1436 return;
1437 }
1438 gTimeZoneFilesDirectory->clear();
1439 gTimeZoneFilesDirectory->append(path, status);
1440#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1441 char *p = gTimeZoneFilesDirectory->data();
1442 while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != NULL) {
1443 *p = U_FILE_SEP_CHAR;
1444 }
1445#endif
1446}
1447
1448#define TO_STRING(x) TO_STRING_2(x)
1449#define TO_STRING_2(x) #x
1450
1451static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
1452 U_ASSERT(gTimeZoneFilesDirectory == NULL);
1453 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1454 gTimeZoneFilesDirectory = new CharString();
1455 if (gTimeZoneFilesDirectory == NULL) {
1456 status = U_MEMORY_ALLOCATION_ERROR;
1457 return;
1458 }
1459
1460 const char *dir = "";
1461
1462#if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
1463 char timezonefilesdir_path_buffer[PATH_MAX];
1464 const char *prefix = getenv(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR);
1465#endif
1466
1467#if U_PLATFORM_HAS_WINUWP_API == 1
1468// The UWP version does not support the environment variable setting.
1469
1470# if defined(ICU_DATA_DIR_WINDOWS)
1471 // When using the Windows system data, we can possibly pick up time zone data from the Windows directory.
1472 char datadir_path_buffer[MAX_PATH];
1473 if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
1474 dir = datadir_path_buffer;
1475 }
1476# endif
1477
1478#else
1479 dir = getenv("ICU_TIMEZONE_FILES_DIR");
1480#endif // U_PLATFORM_HAS_WINUWP_API
1481
1482#if defined(U_TIMEZONE_FILES_DIR)
1483 if (dir == NULL) {
1484 // Build time configuration setting.
1485 dir = TO_STRING(U_TIMEZONE_FILES_DIR);
1486 }
1487#endif
1488
1489 if (dir == NULL) {
1490 dir = "";
1491 }
1492
1493#if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
1494 if (prefix != NULL) {
1495 snprintf(timezonefilesdir_path_buffer, PATH_MAX, "%s%s", prefix, dir);
1496 dir = timezonefilesdir_path_buffer;
1497 }
1498#endif
1499
1500 setTimeZoneFilesDir(dir, status);
1501}
1502
1503
1504U_CAPI const char * U_EXPORT2
1505u_getTimeZoneFilesDirectory(UErrorCode *status) {
1506 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1507 return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "";
1508}
1509
1510U_CAPI void U_EXPORT2
1511u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) {
1512 umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1513 setTimeZoneFilesDir(path, *status);
1514
1515 // Note: this function does some extra churn, first setting based on the
1516 // environment, then immediately replacing with the value passed in.
1517 // The logic is simpler that way, and performance shouldn't be an issue.
1518}
1519
1520
1521#if U_POSIX_LOCALE
1522/* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1523 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1524 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1525 */
1526static const char *uprv_getPOSIXIDForCategory(int category)
1527{
1528 const char* posixID = NULL;
1529 if (category == LC_MESSAGES || category == LC_CTYPE) {
1530 /*
1531 * On Solaris two different calls to setlocale can result in
1532 * different values. Only get this value once.
1533 *
1534 * We must check this first because an application can set this.
1535 *
1536 * LC_ALL can't be used because it's platform dependent. The LANG
1537 * environment variable seems to affect LC_CTYPE variable by default.
1538 * Here is what setlocale(LC_ALL, NULL) can return.
1539 * HPUX can return 'C C C C C C C'
1540 * Solaris can return /en_US/C/C/C/C/C on the second try.
1541 * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1542 *
1543 * The default codepage detection also needs to use LC_CTYPE.
1544 *
1545 * Do not call setlocale(LC_*, "")! Using an empty string instead
1546 * of NULL, will modify the libc behavior.
1547 */
1548 posixID = setlocale(category, NULL);
1549 if ((posixID == 0)
1550 || (uprv_strcmp("C", posixID) == 0)
1551 || (uprv_strcmp("POSIX", posixID) == 0))
1552 {
1553 /* Maybe we got some garbage. Try something more reasonable */
1554 posixID = getenv("LC_ALL");
1555 /* Solaris speaks POSIX - See IEEE Std 1003.1-2008
1556 * This is needed to properly handle empty env. variables
1557 */
1558#if U_PLATFORM == U_PF_SOLARIS
1559 if ((posixID == 0) || (posixID[0] == '\0')) {
1560 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1561 if ((posixID == 0) || (posixID[0] == '\0')) {
1562#else
1563 if (posixID == 0) {
1564 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1565 if (posixID == 0) {
1566#endif
1567 posixID = getenv("LANG");
1568 }
1569 }
1570 }
1571 }
1572 if ((posixID==0)
1573 || (uprv_strcmp("C", posixID) == 0)
1574 || (uprv_strcmp("POSIX", posixID) == 0))
1575 {
1576 /* Nothing worked. Give it a nice POSIX default value. */
1577 posixID = "en_US_POSIX";
1578 // Note: this test will not catch 'C.UTF-8',
1579 // that will be handled in uprv_getDefaultLocaleID().
1580 // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage()
1581 // caller which expects to see "en_US_POSIX" in many branches.
1582 }
1583 return posixID;
1584}
1585
1586/* Return just the POSIX id for the default locale, whatever happens to be in
1587 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1588 */
1589static const char *uprv_getPOSIXIDForDefaultLocale(void)
1590{
1591 static const char* posixID = NULL;
1592 if (posixID == 0) {
1593 posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1594 }
1595 return posixID;
1596}
1597
1598#if !U_CHARSET_IS_UTF8
1599/* Return just the POSIX id for the default codepage, whatever happens to be in
1600 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1601 */
1602static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1603{
1604 static const char* posixID = NULL;
1605 if (posixID == 0) {
1606 posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1607 }
1608 return posixID;
1609}
1610#endif
1611#endif
1612
1613/* NOTE: The caller should handle thread safety */
1614U_CAPI const char* U_EXPORT2
1615uprv_getDefaultLocaleID()
1616{
1617#if U_POSIX_LOCALE
1618/*
1619 Note that: (a '!' means the ID is improper somehow)
1620 LC_ALL ----> default_loc codepage
1621--------------------------------------------------------
1622 ab.CD ab CD
1623 ab@CD ab__CD -
1624 ab@CD.EF ab__CD EF
1625
1626 ab_CD.EF@GH ab_CD_GH EF
1627
1628Some 'improper' ways to do the same as above:
1629 ! ab_CD@GH.EF ab_CD_GH EF
1630 ! ab_CD.EF@GH.IJ ab_CD_GH EF
1631 ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF
1632
1633 _CD@GH _CD_GH -
1634 _CD.EF@GH _CD_GH EF
1635
1636The variant cannot have dots in it.
1637The 'rightmost' variant (@xxx) wins.
1638The leftmost codepage (.xxx) wins.
1639*/
1640 const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1641
1642 /* Format: (no spaces)
1643 ll [ _CC ] [ . MM ] [ @ VV]
1644
1645 l = lang, C = ctry, M = charmap, V = variant
1646 */
1647
1648 if (gCorrectedPOSIXLocale != nullptr) {
1649 return gCorrectedPOSIXLocale;
1650 }
1651
1652 // Copy the ID into owned memory.
1653 // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination
1654 char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1));
1655 if (correctedPOSIXLocale == nullptr) {
1656 return nullptr;
1657 }
1658 uprv_strcpy(correctedPOSIXLocale, posixID);
1659
1660 char *limit;
1661 if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) {
1662 *limit = 0;
1663 }
1664 if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
1665 *limit = 0;
1666 }
1667
1668 if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant
1669 || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) {
1670 // Raw input was C.* or POSIX.*, Give it a nice POSIX default value.
1671 // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory())
1672 uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX");
1673 }
1674
1675 /* Note that we scan the *uncorrected* ID. */
1676 const char *p;
1677 if ((p = uprv_strrchr(posixID, '@')) != nullptr) {
1678 p++;
1679
1680 /* Take care of any special cases here.. */
1681 if (!uprv_strcmp(p, "nynorsk")) {
1682 p = "NY";
1683 /* Don't worry about no__NY. In practice, it won't appear. */
1684 }
1685
1686 if (uprv_strchr(correctedPOSIXLocale,'_') == nullptr) {
1687 uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */
1688 }
1689 else {
1690 uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1691 }
1692
1693 const char *q;
1694 if ((q = uprv_strchr(p, '.')) != nullptr) {
1695 /* How big will the resulting string be? */
1696 int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1697 uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset
1698 correctedPOSIXLocale[len] = 0;
1699 }
1700 else {
1701 /* Anything following the @ sign */
1702 uprv_strcat(correctedPOSIXLocale, p);
1703 }
1704
1705 /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1706 * How about 'russian' -> 'ru'?
1707 * Many of the other locales using ISO codes will be handled by the
1708 * canonicalization functions in uloc_getDefault.
1709 */
1710 }
1711
1712 if (gCorrectedPOSIXLocale == nullptr) {
1713 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1714 gCorrectedPOSIXLocaleHeapAllocated = true;
1715 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1716 correctedPOSIXLocale = nullptr;
1717 }
1718 posixID = gCorrectedPOSIXLocale;
1719
1720 if (correctedPOSIXLocale != nullptr) { /* Was already set - clean up. */
1721 uprv_free(correctedPOSIXLocale);
1722 }
1723
1724 return posixID;
1725
1726#elif U_PLATFORM_USES_ONLY_WIN32_API
1727#define POSIX_LOCALE_CAPACITY 64
1728 UErrorCode status = U_ZERO_ERROR;
1729 char *correctedPOSIXLocale = nullptr;
1730
1731 // If we have already figured this out just use the cached value
1732 if (gCorrectedPOSIXLocale != nullptr) {
1733 return gCorrectedPOSIXLocale;
1734 }
1735
1736 // No cached value, need to determine the current value
1737 static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
1738 int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH);
1739
1740 // Now we should have a Windows locale name that needs converted to the POSIX style.
1741 if (length > 0) // If length is 0, then the GetLocaleInfoEx failed.
1742 {
1743 // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
1744 char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
1745
1746 int32_t i;
1747 for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++)
1748 {
1749 if (windowsLocale[i] == '_')
1750 {
1751 modifiedWindowsLocale[i] = '-';
1752 }
1753 else
1754 {
1755 modifiedWindowsLocale[i] = static_cast<char>(windowsLocale[i]);
1756 }
1757
1758 if (modifiedWindowsLocale[i] == '\0')
1759 {
1760 break;
1761 }
1762 }
1763
1764 if (i >= UPRV_LENGTHOF(modifiedWindowsLocale))
1765 {
1766 // Ran out of room, can't really happen, maybe we'll be lucky about a matching
1767 // locale when tags are dropped
1768 modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0';
1769 }
1770
1771 // Now normalize the resulting name
1772 correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
1773 /* TODO: Should we just exit on memory allocation failure? */
1774 if (correctedPOSIXLocale)
1775 {
1776 int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
1777 if (U_SUCCESS(status))
1778 {
1779 *(correctedPOSIXLocale + posixLen) = 0;
1780 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1781 gCorrectedPOSIXLocaleHeapAllocated = true;
1782 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1783 }
1784 else
1785 {
1786 uprv_free(correctedPOSIXLocale);
1787 }
1788 }
1789 }
1790
1791 // If unable to find a locale we can agree upon, use en-US by default
1792 if (gCorrectedPOSIXLocale == nullptr) {
1793 gCorrectedPOSIXLocale = "en_US";
1794 }
1795 return gCorrectedPOSIXLocale;
1796
1797#elif U_PLATFORM == U_PF_OS400
1798 /* locales are process scoped and are by definition thread safe */
1799 static char correctedLocale[64];
1800 const char *localeID = getenv("LC_ALL");
1801 char *p;
1802
1803 if (localeID == NULL)
1804 localeID = getenv("LANG");
1805 if (localeID == NULL)
1806 localeID = setlocale(LC_ALL, NULL);
1807 /* Make sure we have something... */
1808 if (localeID == NULL)
1809 return "en_US_POSIX";
1810
1811 /* Extract the locale name from the path. */
1812 if((p = uprv_strrchr(localeID, '/')) != NULL)
1813 {
1814 /* Increment p to start of locale name. */
1815 p++;
1816 localeID = p;
1817 }
1818
1819 /* Copy to work location. */
1820 uprv_strcpy(correctedLocale, localeID);
1821
1822 /* Strip off the '.locale' extension. */
1823 if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1824 *p = 0;
1825 }
1826
1827 /* Upper case the locale name. */
1828 T_CString_toUpperCase(correctedLocale);
1829
1830 /* See if we are using the POSIX locale. Any of the
1831 * following are equivalent and use the same QLGPGCMA
1832 * (POSIX) locale.
1833 * QLGPGCMA2 means UCS2
1834 * QLGPGCMA_4 means UTF-32
1835 * QLGPGCMA_8 means UTF-8
1836 */
1837 if ((uprv_strcmp("C", correctedLocale) == 0) ||
1838 (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1839 (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1840 {
1841 uprv_strcpy(correctedLocale, "en_US_POSIX");
1842 }
1843 else
1844 {
1845 int16_t LocaleLen;
1846
1847 /* Lower case the lang portion. */
1848 for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1849 {
1850 *p = uprv_tolower(*p);
1851 }
1852
1853 /* Adjust for Euro. After '_E' add 'URO'. */
1854 LocaleLen = uprv_strlen(correctedLocale);
1855 if (correctedLocale[LocaleLen - 2] == '_' &&
1856 correctedLocale[LocaleLen - 1] == 'E')
1857 {
1858 uprv_strcat(correctedLocale, "URO");
1859 }
1860
1861 /* If using Lotus-based locale then convert to
1862 * equivalent non Lotus.
1863 */
1864 else if (correctedLocale[LocaleLen - 2] == '_' &&
1865 correctedLocale[LocaleLen - 1] == 'L')
1866 {
1867 correctedLocale[LocaleLen - 2] = 0;
1868 }
1869
1870 /* There are separate simplified and traditional
1871 * locales called zh_HK_S and zh_HK_T.
1872 */
1873 else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1874 {
1875 uprv_strcpy(correctedLocale, "zh_HK");
1876 }
1877
1878 /* A special zh_CN_GBK locale...
1879 */
1880 else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1881 {
1882 uprv_strcpy(correctedLocale, "zh_CN");
1883 }
1884
1885 }
1886
1887 return correctedLocale;
1888#endif
1889
1890}
1891
1892#if !U_CHARSET_IS_UTF8
1893#if U_POSIX_LOCALE
1894/*
1895Due to various platform differences, one platform may specify a charset,
1896when they really mean a different charset. Remap the names so that they are
1897compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1898here. Before adding anything to this function, please consider adding unique
1899names to the ICU alias table in the data directory.
1900*/
1901static const char*
1902remapPlatformDependentCodepage(const char *locale, const char *name) {
1903 if (locale != NULL && *locale == 0) {
1904 /* Make sure that an empty locale is handled the same way. */
1905 locale = NULL;
1906 }
1907 if (name == NULL) {
1908 return NULL;
1909 }
1910#if U_PLATFORM == U_PF_AIX
1911 if (uprv_strcmp(name, "IBM-943") == 0) {
1912 /* Use the ASCII compatible ibm-943 */
1913 name = "Shift-JIS";
1914 }
1915 else if (uprv_strcmp(name, "IBM-1252") == 0) {
1916 /* Use the windows-1252 that contains the Euro */
1917 name = "IBM-5348";
1918 }
1919#elif U_PLATFORM == U_PF_SOLARIS
1920 if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1921 /* Solaris underspecifies the "EUC" name. */
1922 if (uprv_strcmp(locale, "zh_CN") == 0) {
1923 name = "EUC-CN";
1924 }
1925 else if (uprv_strcmp(locale, "zh_TW") == 0) {
1926 name = "EUC-TW";
1927 }
1928 else if (uprv_strcmp(locale, "ko_KR") == 0) {
1929 name = "EUC-KR";
1930 }
1931 }
1932 else if (uprv_strcmp(name, "eucJP") == 0) {
1933 /*
1934 ibm-954 is the best match.
1935 ibm-33722 is the default for eucJP (similar to Windows).
1936 */
1937 name = "eucjis";
1938 }
1939 else if (uprv_strcmp(name, "646") == 0) {
1940 /*
1941 * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
1942 * ISO-8859-1 instead of US-ASCII(646).
1943 */
1944 name = "ISO-8859-1";
1945 }
1946#elif U_PLATFORM_IS_DARWIN_BASED
1947 if (locale == NULL && *name == 0) {
1948 /*
1949 No locale was specified, and an empty name was passed in.
1950 This usually indicates that nl_langinfo didn't return valid information.
1951 Mac OS X uses UTF-8 by default (especially the locale data and console).
1952 */
1953 name = "UTF-8";
1954 }
1955 else if (uprv_strcmp(name, "CP949") == 0) {
1956 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1957 name = "EUC-KR";
1958 }
1959 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
1960 /*
1961 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
1962 */
1963 name = "UTF-8";
1964 }
1965#elif U_PLATFORM == U_PF_BSD
1966 if (uprv_strcmp(name, "CP949") == 0) {
1967 /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
1968 name = "EUC-KR";
1969 }
1970#elif U_PLATFORM == U_PF_HPUX
1971 if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
1972 /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
1973 /* zh_TW.big5 is not the same charset as zh_HK.big5! */
1974 name = "hkbig5";
1975 }
1976 else if (uprv_strcmp(name, "eucJP") == 0) {
1977 /*
1978 ibm-1350 is the best match, but unavailable.
1979 ibm-954 is mostly a superset of ibm-1350.
1980 ibm-33722 is the default for eucJP (similar to Windows).
1981 */
1982 name = "eucjis";
1983 }
1984#elif U_PLATFORM == U_PF_LINUX
1985 if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
1986 /* Linux underspecifies the "EUC" name. */
1987 if (uprv_strcmp(locale, "korean") == 0) {
1988 name = "EUC-KR";
1989 }
1990 else if (uprv_strcmp(locale, "japanese") == 0) {
1991 /* See comment below about eucJP */
1992 name = "eucjis";
1993 }
1994 }
1995 else if (uprv_strcmp(name, "eucjp") == 0) {
1996 /*
1997 ibm-1350 is the best match, but unavailable.
1998 ibm-954 is mostly a superset of ibm-1350.
1999 ibm-33722 is the default for eucJP (similar to Windows).
2000 */
2001 name = "eucjis";
2002 }
2003 else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
2004 (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
2005 /*
2006 * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2007 */
2008 name = "UTF-8";
2009 }
2010 /*
2011 * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
2012 * it by falling back to 'US-ASCII' when NULL is returned from this
2013 * function. So, we don't have to worry about it here.
2014 */
2015#endif
2016 /* return NULL when "" is passed in */
2017 if (*name == 0) {
2018 name = NULL;
2019 }
2020 return name;
2021}
2022
2023static const char*
2024getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
2025{
2026 char localeBuf[100];
2027 const char *name = NULL;
2028 char *variant = NULL;
2029
2030 if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
2031 size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
2032 uprv_strncpy(localeBuf, localeName, localeCapacity);
2033 localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
2034 name = uprv_strncpy(buffer, name+1, buffCapacity);
2035 buffer[buffCapacity-1] = 0; /* ensure NULL termination */
2036 if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
2037 *variant = 0;
2038 }
2039 name = remapPlatformDependentCodepage(localeBuf, name);
2040 }
2041 return name;
2042}
2043#endif
2044
2045static const char*
2046int_getDefaultCodepage()
2047{
2048#if U_PLATFORM == U_PF_OS400
2049 uint32_t ccsid = 37; /* Default to ibm-37 */
2050 static char codepage[64];
2051 Qwc_JOBI0400_t jobinfo;
2052 Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
2053
2054 EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
2055 "* ", " ", &error);
2056
2057 if (error.Bytes_Available == 0) {
2058 if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
2059 ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
2060 }
2061 else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
2062 ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
2063 }
2064 /* else use the default */
2065 }
2066 sprintf(codepage,"ibm-%d", ccsid);
2067 return codepage;
2068
2069#elif U_PLATFORM == U_PF_OS390
2070 static char codepage[64];
2071
2072 strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
2073 strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
2074 codepage[63] = 0; /* NULL terminate */
2075
2076 return codepage;
2077
2078#elif U_PLATFORM_USES_ONLY_WIN32_API
2079 static char codepage[64];
2080 DWORD codepageNumber = 0;
2081
2082#if U_PLATFORM_HAS_WINUWP_API == 1
2083 // UWP doesn't have a direct API to get the default ACP as Microsoft would rather
2084 // have folks use Unicode than a "system" code page, however this is the same
2085 // codepage as the system default locale codepage. (FWIW, the system locale is
2086 // ONLY used for codepage, it should never be used for anything else)
2087 GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
2088 (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR));
2089#else
2090 // Win32 apps can call GetACP
2091 codepageNumber = GetACP();
2092#endif
2093 // Special case for UTF-8
2094 if (codepageNumber == 65001)
2095 {
2096 return "UTF-8";
2097 }
2098 // Windows codepages can look like windows-1252, so format the found number
2099 // the numbers are eclectic, however all valid system code pages, besides UTF-8
2100 // are between 3 and 19999
2101 if (codepageNumber > 0 && codepageNumber < 20000)
2102 {
2103 sprintf(codepage, "windows-%ld", codepageNumber);
2104 return codepage;
2105 }
2106 // If the codepage number call failed then return UTF-8
2107 return "UTF-8";
2108
2109#elif U_POSIX_LOCALE
2110 static char codesetName[100];
2111 const char *localeName = NULL;
2112 const char *name = NULL;
2113
2114 localeName = uprv_getPOSIXIDForDefaultCodepage();
2115 uprv_memset(codesetName, 0, sizeof(codesetName));
2116 /* On Solaris nl_langinfo returns C locale values unless setlocale
2117 * was called earlier.
2118 */
2119#if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
2120 /* When available, check nl_langinfo first because it usually gives more
2121 useful names. It depends on LC_CTYPE.
2122 nl_langinfo may use the same buffer as setlocale. */
2123 {
2124 const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
2125#if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
2126 /*
2127 * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
2128 * instead of ASCII.
2129 */
2130 if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
2131 codeset = remapPlatformDependentCodepage(localeName, codeset);
2132 } else
2133#endif
2134 {
2135 codeset = remapPlatformDependentCodepage(NULL, codeset);
2136 }
2137
2138 if (codeset != NULL) {
2139 uprv_strncpy(codesetName, codeset, sizeof(codesetName));
2140 codesetName[sizeof(codesetName)-1] = 0;
2141 return codesetName;
2142 }
2143 }
2144#endif
2145
2146 /* Use setlocale in a nice way, and then check some environment variables.
2147 Maybe the application used setlocale already.
2148 */
2149 uprv_memset(codesetName, 0, sizeof(codesetName));
2150 name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
2151 if (name) {
2152 /* if we can find the codeset name from setlocale, return that. */
2153 return name;
2154 }
2155
2156 if (*codesetName == 0)
2157 {
2158 /* Everything failed. Return US ASCII (ISO 646). */
2159 (void)uprv_strcpy(codesetName, "US-ASCII");
2160 }
2161 return codesetName;
2162#else
2163 return "US-ASCII";
2164#endif
2165}
2166
2167
2168U_CAPI const char* U_EXPORT2
2169uprv_getDefaultCodepage()
2170{
2171 static char const *name = NULL;
2172 umtx_lock(NULL);
2173 if (name == NULL) {
2174 name = int_getDefaultCodepage();
2175 }
2176 umtx_unlock(NULL);
2177 return name;
2178}
2179#endif /* !U_CHARSET_IS_UTF8 */
2180
2181
2182/* end of platform-specific implementation -------------- */
2183
2184/* version handling --------------------------------------------------------- */
2185
2186U_CAPI void U_EXPORT2
2187u_versionFromString(UVersionInfo versionArray, const char *versionString) {
2188 char *end;
2189 uint16_t part=0;
2190
2191 if(versionArray==NULL) {
2192 return;
2193 }
2194
2195 if(versionString!=NULL) {
2196 for(;;) {
2197 versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
2198 if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
2199 break;
2200 }
2201 versionString=end+1;
2202 }
2203 }
2204
2205 while(part<U_MAX_VERSION_LENGTH) {
2206 versionArray[part++]=0;
2207 }
2208}
2209
2210U_CAPI void U_EXPORT2
2211u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2212 if(versionArray!=NULL && versionString!=NULL) {
2213 char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2214 int32_t len = u_strlen(versionString);
2215 if(len>U_MAX_VERSION_STRING_LENGTH) {
2216 len = U_MAX_VERSION_STRING_LENGTH;
2217 }
2218 u_UCharsToChars(versionString, versionChars, len);
2219 versionChars[len]=0;
2220 u_versionFromString(versionArray, versionChars);
2221 }
2222}
2223
2224U_CAPI void U_EXPORT2
2225u_versionToString(const UVersionInfo versionArray, char *versionString) {
2226 uint16_t count, part;
2227 uint8_t field;
2228
2229 if(versionString==NULL) {
2230 return;
2231 }
2232
2233 if(versionArray==NULL) {
2234 versionString[0]=0;
2235 return;
2236 }
2237
2238 /* count how many fields need to be written */
2239 for(count=4; count>0 && versionArray[count-1]==0; --count) {
2240 }
2241
2242 if(count <= 1) {
2243 count = 2;
2244 }
2245
2246 /* write the first part */
2247 /* write the decimal field value */
2248 field=versionArray[0];
2249 if(field>=100) {
2250 *versionString++=(char)('0'+field/100);
2251 field%=100;
2252 }
2253 if(field>=10) {
2254 *versionString++=(char)('0'+field/10);
2255 field%=10;
2256 }
2257 *versionString++=(char)('0'+field);
2258
2259 /* write the following parts */
2260 for(part=1; part<count; ++part) {
2261 /* write a dot first */
2262 *versionString++=U_VERSION_DELIMITER;
2263
2264 /* write the decimal field value */
2265 field=versionArray[part];
2266 if(field>=100) {
2267 *versionString++=(char)('0'+field/100);
2268 field%=100;
2269 }
2270 if(field>=10) {
2271 *versionString++=(char)('0'+field/10);
2272 field%=10;
2273 }
2274 *versionString++=(char)('0'+field);
2275 }
2276
2277 /* NUL-terminate */
2278 *versionString=0;
2279}
2280
2281U_CAPI void U_EXPORT2
2282u_getVersion(UVersionInfo versionArray) {
2283 (void)copyright; // Suppress unused variable warning from clang.
2284 u_versionFromString(versionArray, U_ICU_VERSION);
2285}
2286
2287/**
2288 * icucfg.h dependent code
2289 */
2290
2291#if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
2292
2293#if HAVE_DLFCN_H
2294#ifdef __MVS__
2295#ifndef __SUSV3
2296#define __SUSV3 1
2297#endif
2298#endif
2299#include <dlfcn.h>
2300#endif /* HAVE_DLFCN_H */
2301
2302U_INTERNAL void * U_EXPORT2
2303uprv_dl_open(const char *libName, UErrorCode *status) {
2304 void *ret = NULL;
2305 if(U_FAILURE(*status)) return ret;
2306 ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2307 if(ret==NULL) {
2308#ifdef U_TRACE_DYLOAD
2309 printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
2310#endif
2311 *status = U_MISSING_RESOURCE_ERROR;
2312 }
2313 return ret;
2314}
2315
2316U_INTERNAL void U_EXPORT2
2317uprv_dl_close(void *lib, UErrorCode *status) {
2318 if(U_FAILURE(*status)) return;
2319 dlclose(lib);
2320}
2321
2322U_INTERNAL UVoidFunction* U_EXPORT2
2323uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2324 union {
2325 UVoidFunction *fp;
2326 void *vp;
2327 } uret;
2328 uret.fp = NULL;
2329 if(U_FAILURE(*status)) return uret.fp;
2330 uret.vp = dlsym(lib, sym);
2331 if(uret.vp == NULL) {
2332#ifdef U_TRACE_DYLOAD
2333 printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
2334#endif
2335 *status = U_MISSING_RESOURCE_ERROR;
2336 }
2337 return uret.fp;
2338}
2339
2340#elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API
2341
2342/* Windows API implementation. */
2343// Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */
2344
2345U_INTERNAL void * U_EXPORT2
2346uprv_dl_open(const char *libName, UErrorCode *status) {
2347 HMODULE lib = NULL;
2348
2349 if(U_FAILURE(*status)) return NULL;
2350
2351 lib = LoadLibraryA(libName);
2352
2353 if(lib==NULL) {
2354 *status = U_MISSING_RESOURCE_ERROR;
2355 }
2356
2357 return (void*)lib;
2358}
2359
2360U_INTERNAL void U_EXPORT2
2361uprv_dl_close(void *lib, UErrorCode *status) {
2362 HMODULE handle = (HMODULE)lib;
2363 if(U_FAILURE(*status)) return;
2364
2365 FreeLibrary(handle);
2366
2367 return;
2368}
2369
2370U_INTERNAL UVoidFunction* U_EXPORT2
2371uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2372 HMODULE handle = (HMODULE)lib;
2373 UVoidFunction* addr = NULL;
2374
2375 if(U_FAILURE(*status) || lib==NULL) return NULL;
2376
2377 addr = (UVoidFunction*)GetProcAddress(handle, sym);
2378
2379 if(addr==NULL) {
2380 DWORD lastError = GetLastError();
2381 if(lastError == ERROR_PROC_NOT_FOUND) {
2382 *status = U_MISSING_RESOURCE_ERROR;
2383 } else {
2384 *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2385 }
2386 }
2387
2388 return addr;
2389}
2390
2391#else
2392
2393/* No dynamic loading, null (nonexistent) implementation. */
2394
2395U_INTERNAL void * U_EXPORT2
2396uprv_dl_open(const char *libName, UErrorCode *status) {
2397 (void)libName;
2398 if(U_FAILURE(*status)) return NULL;
2399 *status = U_UNSUPPORTED_ERROR;
2400 return NULL;
2401}
2402
2403U_INTERNAL void U_EXPORT2
2404uprv_dl_close(void *lib, UErrorCode *status) {
2405 (void)lib;
2406 if(U_FAILURE(*status)) return;
2407 *status = U_UNSUPPORTED_ERROR;
2408 return;
2409}
2410
2411U_INTERNAL UVoidFunction* U_EXPORT2
2412uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2413 (void)lib;
2414 (void)sym;
2415 if(U_SUCCESS(*status)) {
2416 *status = U_UNSUPPORTED_ERROR;
2417 }
2418 return (UVoidFunction*)NULL;
2419}
2420
2421#endif
2422
2423/*
2424 * Hey, Emacs, please set the following:
2425 *
2426 * Local Variables:
2427 * indent-tabs-mode: nil
2428 * End:
2429 *
2430 */
2431