1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * Copyright (C) 2016, International Business Machines |
6 | * Corporation and others. All Rights Reserved. |
7 | ******************************************************************************* |
8 | * dayperiodrules.cpp |
9 | * |
10 | * created on: 2016-01-20 |
11 | * created by: kazede |
12 | */ |
13 | |
14 | #include "dayperiodrules.h" |
15 | |
16 | #include "unicode/ures.h" |
17 | #include "charstr.h" |
18 | #include "cstring.h" |
19 | #include "ucln_in.h" |
20 | #include "uhash.h" |
21 | #include "umutex.h" |
22 | #include "uresimp.h" |
23 | |
24 | |
25 | U_NAMESPACE_BEGIN |
26 | |
27 | namespace { |
28 | |
29 | struct DayPeriodRulesData : public UMemory { |
30 | DayPeriodRulesData() : localeToRuleSetNumMap(NULL), rules(NULL), maxRuleSetNum(0) {} |
31 | |
32 | UHashtable *localeToRuleSetNumMap; |
33 | DayPeriodRules *rules; |
34 | int32_t maxRuleSetNum; |
35 | } *data = NULL; |
36 | |
37 | enum CutoffType { |
38 | CUTOFF_TYPE_UNKNOWN = -1, |
39 | CUTOFF_TYPE_BEFORE, |
40 | CUTOFF_TYPE_AFTER, // TODO: AFTER is deprecated in CLDR 29. Remove. |
41 | CUTOFF_TYPE_FROM, |
42 | CUTOFF_TYPE_AT |
43 | }; |
44 | |
45 | } // namespace |
46 | |
47 | struct DayPeriodRulesDataSink : public ResourceSink { |
48 | DayPeriodRulesDataSink() { |
49 | for (int32_t i = 0; i < UPRV_LENGTHOF(cutoffs); ++i) { cutoffs[i] = 0; } |
50 | } |
51 | virtual ~DayPeriodRulesDataSink(); |
52 | |
53 | virtual void put(const char *key, ResourceValue &value, UBool, UErrorCode &errorCode) { |
54 | ResourceTable dayPeriodData = value.getTable(errorCode); |
55 | if (U_FAILURE(errorCode)) { return; } |
56 | |
57 | for (int32_t i = 0; dayPeriodData.getKeyAndValue(i, key, value); ++i) { |
58 | if (uprv_strcmp(key, "locales" ) == 0) { |
59 | ResourceTable locales = value.getTable(errorCode); |
60 | if (U_FAILURE(errorCode)) { return; } |
61 | |
62 | for (int32_t j = 0; locales.getKeyAndValue(j, key, value); ++j) { |
63 | UnicodeString setNum_str = value.getUnicodeString(errorCode); |
64 | int32_t setNum = parseSetNum(setNum_str, errorCode); |
65 | uhash_puti(data->localeToRuleSetNumMap, const_cast<char *>(key), setNum, &errorCode); |
66 | } |
67 | } else if (uprv_strcmp(key, "rules" ) == 0) { |
68 | // Allocate one more than needed to skip [0]. See comment in parseSetNum(). |
69 | data->rules = new DayPeriodRules[data->maxRuleSetNum + 1]; |
70 | if (data->rules == NULL) { |
71 | errorCode = U_MEMORY_ALLOCATION_ERROR; |
72 | return; |
73 | } |
74 | ResourceTable rules = value.getTable(errorCode); |
75 | processRules(rules, key, value, errorCode); |
76 | if (U_FAILURE(errorCode)) { return; } |
77 | } |
78 | } |
79 | } |
80 | |
81 | void processRules(const ResourceTable &rules, const char *key, |
82 | ResourceValue &value, UErrorCode &errorCode) { |
83 | if (U_FAILURE(errorCode)) { return; } |
84 | |
85 | for (int32_t i = 0; rules.getKeyAndValue(i, key, value); ++i) { |
86 | ruleSetNum = parseSetNum(key, errorCode); |
87 | ResourceTable ruleSet = value.getTable(errorCode); |
88 | if (U_FAILURE(errorCode)) { return; } |
89 | |
90 | for (int32_t j = 0; ruleSet.getKeyAndValue(j, key, value); ++j) { |
91 | period = DayPeriodRules::getDayPeriodFromString(key); |
92 | if (period == DayPeriodRules::DAYPERIOD_UNKNOWN) { |
93 | errorCode = U_INVALID_FORMAT_ERROR; |
94 | return; |
95 | } |
96 | ResourceTable periodDefinition = value.getTable(errorCode); |
97 | if (U_FAILURE(errorCode)) { return; } |
98 | |
99 | for (int32_t k = 0; periodDefinition.getKeyAndValue(k, key, value); ++k) { |
100 | if (value.getType() == URES_STRING) { |
101 | // Key-value pairs (e.g. before{6:00}). |
102 | CutoffType type = getCutoffTypeFromString(key); |
103 | addCutoff(type, value.getUnicodeString(errorCode), errorCode); |
104 | if (U_FAILURE(errorCode)) { return; } |
105 | } else { |
106 | // Arrays (e.g. before{6:00, 24:00}). |
107 | cutoffType = getCutoffTypeFromString(key); |
108 | ResourceArray cutoffArray = value.getArray(errorCode); |
109 | if (U_FAILURE(errorCode)) { return; } |
110 | |
111 | int32_t length = cutoffArray.getSize(); |
112 | for (int32_t l = 0; l < length; ++l) { |
113 | cutoffArray.getValue(l, value); |
114 | addCutoff(cutoffType, value.getUnicodeString(errorCode), errorCode); |
115 | if (U_FAILURE(errorCode)) { return; } |
116 | } |
117 | } |
118 | } |
119 | setDayPeriodForHoursFromCutoffs(errorCode); |
120 | for (int32_t k = 0; k < UPRV_LENGTHOF(cutoffs); ++k) { |
121 | cutoffs[k] = 0; |
122 | } |
123 | } |
124 | |
125 | if (!data->rules[ruleSetNum].allHoursAreSet()) { |
126 | errorCode = U_INVALID_FORMAT_ERROR; |
127 | return; |
128 | } |
129 | } |
130 | } |
131 | |
132 | // Members. |
133 | int32_t cutoffs[25]; // [0] thru [24]: 24 is allowed in "before 24". |
134 | |
135 | // "Path" to data. |
136 | int32_t ruleSetNum; |
137 | DayPeriodRules::DayPeriod period; |
138 | CutoffType cutoffType; |
139 | |
140 | // Helpers. |
141 | static int32_t parseSetNum(const UnicodeString &setNumStr, UErrorCode &errorCode) { |
142 | CharString cs; |
143 | cs.appendInvariantChars(setNumStr, errorCode); |
144 | return parseSetNum(cs.data(), errorCode); |
145 | } |
146 | |
147 | static int32_t parseSetNum(const char *setNumStr, UErrorCode &errorCode) { |
148 | if (U_FAILURE(errorCode)) { return -1; } |
149 | |
150 | if (uprv_strncmp(setNumStr, "set" , 3) != 0) { |
151 | errorCode = U_INVALID_FORMAT_ERROR; |
152 | return -1; |
153 | } |
154 | |
155 | int32_t i = 3; |
156 | int32_t setNum = 0; |
157 | while (setNumStr[i] != 0) { |
158 | int32_t digit = setNumStr[i] - '0'; |
159 | if (digit < 0 || 9 < digit) { |
160 | errorCode = U_INVALID_FORMAT_ERROR; |
161 | return -1; |
162 | } |
163 | setNum = 10 * setNum + digit; |
164 | ++i; |
165 | } |
166 | |
167 | // Rule set number must not be zero. (0 is used to indicate "not found" by hashmap.) |
168 | // Currently ICU data conveniently starts numbering rule sets from 1. |
169 | if (setNum == 0) { |
170 | errorCode = U_INVALID_FORMAT_ERROR; |
171 | return -1; |
172 | } else { |
173 | return setNum; |
174 | } |
175 | } |
176 | |
177 | void addCutoff(CutoffType type, const UnicodeString &hour_str, UErrorCode &errorCode) { |
178 | if (U_FAILURE(errorCode)) { return; } |
179 | |
180 | if (type == CUTOFF_TYPE_UNKNOWN) { |
181 | errorCode = U_INVALID_FORMAT_ERROR; |
182 | return; |
183 | } |
184 | |
185 | int32_t hour = parseHour(hour_str, errorCode); |
186 | if (U_FAILURE(errorCode)) { return; } |
187 | |
188 | cutoffs[hour] |= 1 << type; |
189 | } |
190 | |
191 | // Translate the cutoffs[] array to day period rules. |
192 | void setDayPeriodForHoursFromCutoffs(UErrorCode &errorCode) { |
193 | DayPeriodRules &rule = data->rules[ruleSetNum]; |
194 | |
195 | for (int32_t startHour = 0; startHour <= 24; ++startHour) { |
196 | // AT cutoffs must be either midnight or noon. |
197 | if (cutoffs[startHour] & (1 << CUTOFF_TYPE_AT)) { |
198 | if (startHour == 0 && period == DayPeriodRules::DAYPERIOD_MIDNIGHT) { |
199 | rule.fHasMidnight = TRUE; |
200 | } else if (startHour == 12 && period == DayPeriodRules::DAYPERIOD_NOON) { |
201 | rule.fHasNoon = TRUE; |
202 | } else { |
203 | errorCode = U_INVALID_FORMAT_ERROR; // Bad data. |
204 | return; |
205 | } |
206 | } |
207 | |
208 | // FROM/AFTER and BEFORE must come in a pair. |
209 | if (cutoffs[startHour] & (1 << CUTOFF_TYPE_FROM) || |
210 | cutoffs[startHour] & (1 << CUTOFF_TYPE_AFTER)) { |
211 | for (int32_t hour = startHour + 1;; ++hour) { |
212 | if (hour == startHour) { |
213 | // We've gone around the array once and can't find a BEFORE. |
214 | errorCode = U_INVALID_FORMAT_ERROR; |
215 | return; |
216 | } |
217 | if (hour == 25) { hour = 0; } |
218 | if (cutoffs[hour] & (1 << CUTOFF_TYPE_BEFORE)) { |
219 | rule.add(startHour, hour, period); |
220 | break; |
221 | } |
222 | } |
223 | } |
224 | } |
225 | } |
226 | |
227 | // Translate "before" to CUTOFF_TYPE_BEFORE, for example. |
228 | static CutoffType getCutoffTypeFromString(const char *type_str) { |
229 | if (uprv_strcmp(type_str, "from" ) == 0) { |
230 | return CUTOFF_TYPE_FROM; |
231 | } else if (uprv_strcmp(type_str, "before" ) == 0) { |
232 | return CUTOFF_TYPE_BEFORE; |
233 | } else if (uprv_strcmp(type_str, "after" ) == 0) { |
234 | return CUTOFF_TYPE_AFTER; |
235 | } else if (uprv_strcmp(type_str, "at" ) == 0) { |
236 | return CUTOFF_TYPE_AT; |
237 | } else { |
238 | return CUTOFF_TYPE_UNKNOWN; |
239 | } |
240 | } |
241 | |
242 | // Gets the numerical value of the hour from the Unicode string. |
243 | static int32_t parseHour(const UnicodeString &time, UErrorCode &errorCode) { |
244 | if (U_FAILURE(errorCode)) { |
245 | return 0; |
246 | } |
247 | |
248 | int32_t hourLimit = time.length() - 3; |
249 | // `time` must look like "x:00" or "xx:00". |
250 | // If length is wrong or `time` doesn't end with ":00", error out. |
251 | if ((hourLimit != 1 && hourLimit != 2) || |
252 | time[hourLimit] != 0x3A || time[hourLimit + 1] != 0x30 || |
253 | time[hourLimit + 2] != 0x30) { |
254 | errorCode = U_INVALID_FORMAT_ERROR; |
255 | return 0; |
256 | } |
257 | |
258 | // If `time` doesn't begin with a number in [0, 24], error out. |
259 | // Note: "24:00" is possible in "before 24:00". |
260 | int32_t hour = time[0] - 0x30; |
261 | if (hour < 0 || 9 < hour) { |
262 | errorCode = U_INVALID_FORMAT_ERROR; |
263 | return 0; |
264 | } |
265 | if (hourLimit == 2) { |
266 | int32_t hourDigit2 = time[1] - 0x30; |
267 | if (hourDigit2 < 0 || 9 < hourDigit2) { |
268 | errorCode = U_INVALID_FORMAT_ERROR; |
269 | return 0; |
270 | } |
271 | hour = hour * 10 + hourDigit2; |
272 | if (hour > 24) { |
273 | errorCode = U_INVALID_FORMAT_ERROR; |
274 | return 0; |
275 | } |
276 | } |
277 | |
278 | return hour; |
279 | } |
280 | }; // struct DayPeriodRulesDataSink |
281 | |
282 | struct DayPeriodRulesCountSink : public ResourceSink { |
283 | virtual ~DayPeriodRulesCountSink(); |
284 | |
285 | virtual void put(const char *key, ResourceValue &value, UBool, UErrorCode &errorCode) { |
286 | ResourceTable rules = value.getTable(errorCode); |
287 | if (U_FAILURE(errorCode)) { return; } |
288 | |
289 | for (int32_t i = 0; rules.getKeyAndValue(i, key, value); ++i) { |
290 | int32_t setNum = DayPeriodRulesDataSink::parseSetNum(key, errorCode); |
291 | if (setNum > data->maxRuleSetNum) { |
292 | data->maxRuleSetNum = setNum; |
293 | } |
294 | } |
295 | } |
296 | }; |
297 | |
298 | // Out-of-line virtual destructors. |
299 | DayPeriodRulesDataSink::~DayPeriodRulesDataSink() {} |
300 | DayPeriodRulesCountSink::~DayPeriodRulesCountSink() {} |
301 | |
302 | namespace { |
303 | |
304 | UInitOnce initOnce = U_INITONCE_INITIALIZER; |
305 | |
306 | U_CFUNC UBool U_CALLCONV dayPeriodRulesCleanup() { |
307 | delete[] data->rules; |
308 | uhash_close(data->localeToRuleSetNumMap); |
309 | delete data; |
310 | data = NULL; |
311 | return TRUE; |
312 | } |
313 | |
314 | } // namespace |
315 | |
316 | void U_CALLCONV DayPeriodRules::load(UErrorCode &errorCode) { |
317 | if (U_FAILURE(errorCode)) { |
318 | return; |
319 | } |
320 | |
321 | data = new DayPeriodRulesData(); |
322 | data->localeToRuleSetNumMap = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); |
323 | LocalUResourceBundlePointer rb_dayPeriods(ures_openDirect(NULL, "dayPeriods" , &errorCode)); |
324 | |
325 | // Get the largest rule set number (so we allocate enough objects). |
326 | DayPeriodRulesCountSink countSink; |
327 | ures_getAllItemsWithFallback(rb_dayPeriods.getAlias(), "rules" , countSink, errorCode); |
328 | |
329 | // Populate rules. |
330 | DayPeriodRulesDataSink sink; |
331 | ures_getAllItemsWithFallback(rb_dayPeriods.getAlias(), "" , sink, errorCode); |
332 | |
333 | ucln_i18n_registerCleanup(UCLN_I18N_DAYPERIODRULES, dayPeriodRulesCleanup); |
334 | } |
335 | |
336 | const DayPeriodRules *DayPeriodRules::getInstance(const Locale &locale, UErrorCode &errorCode) { |
337 | umtx_initOnce(initOnce, DayPeriodRules::load, errorCode); |
338 | |
339 | // If the entire day period rules data doesn't conform to spec (even if the part we want |
340 | // does), return NULL. |
341 | if(U_FAILURE(errorCode)) { return NULL; } |
342 | |
343 | const char *localeCode = locale.getBaseName(); |
344 | char name[ULOC_FULLNAME_CAPACITY]; |
345 | char parentName[ULOC_FULLNAME_CAPACITY]; |
346 | |
347 | if (uprv_strlen(localeCode) < ULOC_FULLNAME_CAPACITY) { |
348 | uprv_strcpy(name, localeCode); |
349 | |
350 | // Treat empty string as root. |
351 | if (*name == '\0') { |
352 | uprv_strcpy(name, "root" ); |
353 | } |
354 | } else { |
355 | errorCode = U_BUFFER_OVERFLOW_ERROR; |
356 | return NULL; |
357 | } |
358 | |
359 | int32_t ruleSetNum = 0; // NB there is no rule set 0 and 0 is returned upon lookup failure. |
360 | while (*name != '\0') { |
361 | ruleSetNum = uhash_geti(data->localeToRuleSetNumMap, name); |
362 | if (ruleSetNum == 0) { |
363 | // name and parentName can't be the same pointer, so fill in parent then copy to child. |
364 | uloc_getParent(name, parentName, ULOC_FULLNAME_CAPACITY, &errorCode); |
365 | if (*parentName == '\0') { |
366 | // Saves a lookup in the hash table. |
367 | break; |
368 | } |
369 | uprv_strcpy(name, parentName); |
370 | } else { |
371 | break; |
372 | } |
373 | } |
374 | |
375 | if (ruleSetNum <= 0 || data->rules[ruleSetNum].getDayPeriodForHour(0) == DAYPERIOD_UNKNOWN) { |
376 | // If day period for hour 0 is UNKNOWN then day period for all hours are UNKNOWN. |
377 | // Data doesn't exist even with fallback. |
378 | return NULL; |
379 | } else { |
380 | return &data->rules[ruleSetNum]; |
381 | } |
382 | } |
383 | |
384 | DayPeriodRules::DayPeriodRules() : fHasMidnight(FALSE), fHasNoon(FALSE) { |
385 | for (int32_t i = 0; i < 24; ++i) { |
386 | fDayPeriodForHour[i] = DayPeriodRules::DAYPERIOD_UNKNOWN; |
387 | } |
388 | } |
389 | |
390 | double DayPeriodRules::getMidPointForDayPeriod( |
391 | DayPeriodRules::DayPeriod dayPeriod, UErrorCode &errorCode) const { |
392 | if (U_FAILURE(errorCode)) { return -1; } |
393 | |
394 | int32_t startHour = getStartHourForDayPeriod(dayPeriod, errorCode); |
395 | int32_t endHour = getEndHourForDayPeriod(dayPeriod, errorCode); |
396 | // Can't obtain startHour or endHour; bail out. |
397 | if (U_FAILURE(errorCode)) { return -1; } |
398 | |
399 | double midPoint = (startHour + endHour) / 2.0; |
400 | |
401 | if (startHour > endHour) { |
402 | // dayPeriod wraps around midnight. Shift midPoint by 12 hours, in the direction that |
403 | // lands it in [0, 24). |
404 | midPoint += 12; |
405 | if (midPoint >= 24) { |
406 | midPoint -= 24; |
407 | } |
408 | } |
409 | |
410 | return midPoint; |
411 | } |
412 | |
413 | int32_t DayPeriodRules::getStartHourForDayPeriod( |
414 | DayPeriodRules::DayPeriod dayPeriod, UErrorCode &errorCode) const { |
415 | if (U_FAILURE(errorCode)) { return -1; } |
416 | |
417 | if (dayPeriod == DAYPERIOD_MIDNIGHT) { return 0; } |
418 | if (dayPeriod == DAYPERIOD_NOON) { return 12; } |
419 | |
420 | if (fDayPeriodForHour[0] == dayPeriod && fDayPeriodForHour[23] == dayPeriod) { |
421 | // dayPeriod wraps around midnight. Start hour is later than end hour. |
422 | for (int32_t i = 22; i >= 1; --i) { |
423 | if (fDayPeriodForHour[i] != dayPeriod) { |
424 | return (i + 1); |
425 | } |
426 | } |
427 | } else { |
428 | for (int32_t i = 0; i <= 23; ++i) { |
429 | if (fDayPeriodForHour[i] == dayPeriod) { |
430 | return i; |
431 | } |
432 | } |
433 | } |
434 | |
435 | // dayPeriod doesn't exist in rule set; set error and exit. |
436 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
437 | return -1; |
438 | } |
439 | |
440 | int32_t DayPeriodRules::getEndHourForDayPeriod( |
441 | DayPeriodRules::DayPeriod dayPeriod, UErrorCode &errorCode) const { |
442 | if (U_FAILURE(errorCode)) { return -1; } |
443 | |
444 | if (dayPeriod == DAYPERIOD_MIDNIGHT) { return 0; } |
445 | if (dayPeriod == DAYPERIOD_NOON) { return 12; } |
446 | |
447 | if (fDayPeriodForHour[0] == dayPeriod && fDayPeriodForHour[23] == dayPeriod) { |
448 | // dayPeriod wraps around midnight. End hour is before start hour. |
449 | for (int32_t i = 1; i <= 22; ++i) { |
450 | if (fDayPeriodForHour[i] != dayPeriod) { |
451 | // i o'clock is when a new period starts, therefore when the old period ends. |
452 | return i; |
453 | } |
454 | } |
455 | } else { |
456 | for (int32_t i = 23; i >= 0; --i) { |
457 | if (fDayPeriodForHour[i] == dayPeriod) { |
458 | return (i + 1); |
459 | } |
460 | } |
461 | } |
462 | |
463 | // dayPeriod doesn't exist in rule set; set error and exit. |
464 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
465 | return -1; |
466 | } |
467 | |
468 | DayPeriodRules::DayPeriod DayPeriodRules::getDayPeriodFromString(const char *type_str) { |
469 | if (uprv_strcmp(type_str, "midnight" ) == 0) { |
470 | return DAYPERIOD_MIDNIGHT; |
471 | } else if (uprv_strcmp(type_str, "noon" ) == 0) { |
472 | return DAYPERIOD_NOON; |
473 | } else if (uprv_strcmp(type_str, "morning1" ) == 0) { |
474 | return DAYPERIOD_MORNING1; |
475 | } else if (uprv_strcmp(type_str, "afternoon1" ) == 0) { |
476 | return DAYPERIOD_AFTERNOON1; |
477 | } else if (uprv_strcmp(type_str, "evening1" ) == 0) { |
478 | return DAYPERIOD_EVENING1; |
479 | } else if (uprv_strcmp(type_str, "night1" ) == 0) { |
480 | return DAYPERIOD_NIGHT1; |
481 | } else if (uprv_strcmp(type_str, "morning2" ) == 0) { |
482 | return DAYPERIOD_MORNING2; |
483 | } else if (uprv_strcmp(type_str, "afternoon2" ) == 0) { |
484 | return DAYPERIOD_AFTERNOON2; |
485 | } else if (uprv_strcmp(type_str, "evening2" ) == 0) { |
486 | return DAYPERIOD_EVENING2; |
487 | } else if (uprv_strcmp(type_str, "night2" ) == 0) { |
488 | return DAYPERIOD_NIGHT2; |
489 | } else if (uprv_strcmp(type_str, "am" ) == 0) { |
490 | return DAYPERIOD_AM; |
491 | } else if (uprv_strcmp(type_str, "pm" ) == 0) { |
492 | return DAYPERIOD_PM; |
493 | } else { |
494 | return DAYPERIOD_UNKNOWN; |
495 | } |
496 | } |
497 | |
498 | void DayPeriodRules::add(int32_t startHour, int32_t limitHour, DayPeriod period) { |
499 | for (int32_t i = startHour; i != limitHour; ++i) { |
500 | if (i == 24) { i = 0; } |
501 | fDayPeriodForHour[i] = period; |
502 | } |
503 | } |
504 | |
505 | UBool DayPeriodRules::allHoursAreSet() { |
506 | for (int32_t i = 0; i < 24; ++i) { |
507 | if (fDayPeriodForHour[i] == DAYPERIOD_UNKNOWN) { return FALSE; } |
508 | } |
509 | |
510 | return TRUE; |
511 | } |
512 | |
513 | |
514 | |
515 | U_NAMESPACE_END |
516 | |