1 | /** |
2 | * Licensed to the Apache Software Foundation (ASF) under one |
3 | * or more contributor license agreements. See the NOTICE file |
4 | * distributed with this work for additional information |
5 | * regarding copyright ownership. The ASF licenses this file |
6 | * to you under the Apache License, Version 2.0 (the |
7 | * "License"); you may not use this file except in compliance |
8 | * with the License. You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, software |
13 | * distributed under the License is distributed on an "AS IS" BASIS, |
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | * See the License for the specific language governing permissions and |
16 | * limitations under the License. |
17 | */ |
18 | |
19 | #include "orc/OrcFile.hh" |
20 | #include "Timezone.hh" |
21 | |
22 | #include <errno.h> |
23 | #include <map> |
24 | #include <sstream> |
25 | #include <stdint.h> |
26 | #include <stdlib.h> |
27 | #include <string.h> |
28 | #include <time.h> |
29 | |
30 | namespace orc { |
31 | |
32 | // default location of the timezone files |
33 | static const char DEFAULT_TZDIR[] = "/usr/share/zoneinfo" ; |
34 | |
35 | // location of a symlink to the local timezone |
36 | static const char LOCAL_TIMEZONE[] = "/etc/localtime" ; |
37 | |
38 | enum TransitionKind { |
39 | TRANSITION_JULIAN, |
40 | TRANSITION_DAY, |
41 | TRANSITION_MONTH |
42 | }; |
43 | |
44 | static const int64_t MONTHS_PER_YEAR = 12; |
45 | /** |
46 | * The number of days in each month in non-leap and leap years. |
47 | */ |
48 | static const int64_t DAYS_PER_MONTH[2][MONTHS_PER_YEAR] = |
49 | {{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, |
50 | {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; |
51 | static const int64_t DAYS_PER_WEEK = 7; |
52 | |
53 | // Leap years and day of the week repeat every 400 years, which makes it |
54 | // a good cycle length. |
55 | static const int64_t SECONDS_PER_400_YEARS = |
56 | SECONDS_PER_DAY * (365 * (300 + 3) + 366 * (100 - 3)); |
57 | |
58 | /** |
59 | * Is the given year a leap year? |
60 | */ |
61 | bool isLeap(int64_t year) { |
62 | return (year % 4 == 0) && ((year % 100 != 0) || (year % 400 == 0)); |
63 | } |
64 | |
65 | /** |
66 | * Find the position that is the closest and less than or equal to the |
67 | * target. |
68 | * @return -1 if the target < array[0] or array is empty or |
69 | * i if array[i] <= target and (i == n or array[i] < array[i+1]) |
70 | */ |
71 | int64_t binarySearch(const std::vector<int64_t> &array, int64_t target) { |
72 | uint64_t size = array.size(); |
73 | if (size == 0) { |
74 | return -1; |
75 | } |
76 | uint64_t min = 0; |
77 | uint64_t max = size - 1; |
78 | uint64_t mid = (min + max) / 2; |
79 | while ((array[mid] != target) && (min < max)) { |
80 | if (array[mid] < target) { |
81 | min = mid + 1; |
82 | } else if (mid == 0) { |
83 | max = 0; |
84 | } else { |
85 | max = mid - 1; |
86 | } |
87 | mid = (min + max) / 2; |
88 | } |
89 | if (target < array[mid]) { |
90 | return static_cast<int64_t>(mid) - 1; |
91 | } else { |
92 | return static_cast<int64_t>(mid); |
93 | } |
94 | } |
95 | |
96 | struct Transition { |
97 | TransitionKind kind; |
98 | int64_t day; |
99 | int64_t week; |
100 | int64_t month; |
101 | int64_t time; |
102 | |
103 | std::string toString() const { |
104 | std::stringstream buffer; |
105 | switch (kind) { |
106 | case TRANSITION_JULIAN: |
107 | buffer << "julian " << day; |
108 | break; |
109 | case TRANSITION_DAY: |
110 | buffer << "day " << day; |
111 | break; |
112 | case TRANSITION_MONTH: |
113 | buffer << "month " << month << " week " << week << " day " << day; |
114 | break; |
115 | } |
116 | buffer << " at " << (time / (60 * 60)) << ":" << ((time / 60) % 60) |
117 | << ":" << (time % 60); |
118 | return buffer.str(); |
119 | } |
120 | |
121 | /** |
122 | * Get the transition time for the given year. |
123 | * @param year the year |
124 | * @return the number of seconds past local Jan 1 00:00:00 that the |
125 | * transition happens. |
126 | */ |
127 | int64_t getTime(int64_t year) const { |
128 | int64_t result = time; |
129 | switch (kind) { |
130 | case TRANSITION_JULIAN: |
131 | result += SECONDS_PER_DAY * day; |
132 | if (day > 60 && isLeap(year)) { |
133 | result += SECONDS_PER_DAY; |
134 | } |
135 | break; |
136 | case TRANSITION_DAY: |
137 | result += SECONDS_PER_DAY * day; |
138 | break; |
139 | case TRANSITION_MONTH: { |
140 | bool inLeap = isLeap(year); |
141 | int64_t adjustedMonth = (month + 9) % 12 + 1; |
142 | int64_t adjustedYear = (month <= 2) ? (year - 1) : year; |
143 | int64_t adjustedCentury = adjustedYear / 100; |
144 | int64_t adjustedRemainder = adjustedYear % 100; |
145 | |
146 | // day of the week of the first day of month |
147 | int64_t dayOfWeek = ((26 * adjustedMonth - 2) / 10 + |
148 | 1 + adjustedRemainder + adjustedRemainder / 4 + |
149 | adjustedCentury / 4 - 2 * adjustedCentury) % 7; |
150 | if (dayOfWeek < 0) { |
151 | dayOfWeek += DAYS_PER_WEEK; |
152 | } |
153 | |
154 | int64_t d = day - dayOfWeek; |
155 | if (d < 0) { |
156 | d += DAYS_PER_WEEK; |
157 | } |
158 | for (int w = 1; w < week; ++w) { |
159 | if (d + DAYS_PER_WEEK >= DAYS_PER_MONTH[inLeap][month - 1]) { |
160 | break; |
161 | } |
162 | d += DAYS_PER_WEEK; |
163 | } |
164 | result += d * SECONDS_PER_DAY; |
165 | |
166 | // Add in the time for the month |
167 | for(int m=0; m < month - 1; ++m) { |
168 | result += DAYS_PER_MONTH[inLeap][m] * SECONDS_PER_DAY; |
169 | } |
170 | break; |
171 | } |
172 | } |
173 | return result; |
174 | } |
175 | }; |
176 | |
177 | /** |
178 | * The current rule for finding timezone variants arbitrarily far in |
179 | * the future. They are based on a string representation that |
180 | * specifies the standard name and offset. For timezones with |
181 | * daylight savings, the string specifies the daylight variant name |
182 | * and offset and the rules for switching between them. |
183 | * |
184 | * rule = <standard name><standard offset><daylight>? |
185 | * name = string with no numbers or '+', '-', or ',' |
186 | * offset = [-+]?hh(:mm(:ss)?)? |
187 | * daylight = <name><offset>,<start day>(/<offset>)?,<end day>(/<offset>)? |
188 | * day = J<day without 2/29>|<day with 2/29>|M<month>.<week>.<day of week> |
189 | */ |
190 | class FutureRuleImpl: public FutureRule { |
191 | std::string ruleString; |
192 | TimezoneVariant standard; |
193 | bool hasDst; |
194 | TimezoneVariant dst; |
195 | Transition start; |
196 | Transition end; |
197 | |
198 | // expanded time_t offsets of transitions |
199 | std::vector<int64_t> offsets; |
200 | |
201 | // Is the epoch (1 Jan 1970 00:00) in standard time? |
202 | // This code assumes that the transition dates fall in the same order |
203 | // each year. Hopefully no timezone regions decide to move across the |
204 | // equator, which is about what it would take. |
205 | bool startInStd; |
206 | |
207 | void computeOffsets() { |
208 | if (!hasDst) { |
209 | startInStd = true; |
210 | offsets.resize(1); |
211 | } else { |
212 | // Insert a transition for the epoch and two per a year for the next |
213 | // 400 years. We assume that the all even positions are in standard |
214 | // time if and only if startInStd and the odd ones are the reverse. |
215 | offsets.resize(400 * 2 + 1); |
216 | startInStd = start.getTime(1970) < end.getTime(1970); |
217 | int64_t base = 0; |
218 | for(int64_t year = 1970; year < 1970 + 400; ++year) { |
219 | if (startInStd) { |
220 | offsets[static_cast<uint64_t>(year - 1970) * 2 + 1] = |
221 | base + start.getTime(year) - standard.gmtOffset; |
222 | offsets[static_cast<uint64_t>(year - 1970) * 2 + 2] = |
223 | base + end.getTime(year) - dst.gmtOffset; |
224 | } else { |
225 | offsets[static_cast<uint64_t>(year - 1970) * 2 + 1] = |
226 | base + end.getTime(year) - dst.gmtOffset; |
227 | offsets[static_cast<uint64_t>(year - 1970) * 2 + 2] = |
228 | base + start.getTime(year) - standard.gmtOffset; |
229 | } |
230 | base += (isLeap(year) ? 366 : 365) * SECONDS_PER_DAY; |
231 | } |
232 | } |
233 | offsets[0] = 0; |
234 | } |
235 | |
236 | public: |
237 | virtual ~FutureRuleImpl() override; |
238 | bool isDefined() const override; |
239 | const TimezoneVariant& getVariant(int64_t clk) const override; |
240 | void print(std::ostream& out) const override; |
241 | |
242 | friend class FutureRuleParser; |
243 | }; |
244 | |
245 | FutureRule::~FutureRule() { |
246 | // PASS |
247 | } |
248 | |
249 | FutureRuleImpl::~FutureRuleImpl() { |
250 | // PASS |
251 | } |
252 | |
253 | bool FutureRuleImpl::isDefined() const { |
254 | return ruleString.size() > 0; |
255 | } |
256 | |
257 | const TimezoneVariant& FutureRuleImpl::getVariant(int64_t clk) const { |
258 | if (!hasDst) { |
259 | return standard; |
260 | } else { |
261 | int64_t adjusted = clk % SECONDS_PER_400_YEARS; |
262 | if (adjusted < 0) { |
263 | adjusted += SECONDS_PER_400_YEARS; |
264 | } |
265 | int64_t idx = binarySearch(offsets, adjusted); |
266 | if (startInStd == (idx % 2 == 0)) { |
267 | return standard; |
268 | } else { |
269 | return dst; |
270 | } |
271 | } |
272 | } |
273 | |
274 | void FutureRuleImpl::print(std::ostream& out) const { |
275 | if (isDefined()) { |
276 | out << " Future rule: " << ruleString << "\n" ; |
277 | out << " standard " << standard.toString() << "\n" ; |
278 | if (hasDst) { |
279 | out << " dst " << dst.toString() << "\n" ; |
280 | out << " start " << start.toString() << "\n" ; |
281 | out << " end " << end.toString() << "\n" ; |
282 | } |
283 | } |
284 | } |
285 | |
286 | /** |
287 | * A parser for the future rule strings. |
288 | */ |
289 | class FutureRuleParser { |
290 | public: |
291 | FutureRuleParser(const std::string& str, |
292 | FutureRuleImpl* rule |
293 | ): ruleString(str), |
294 | length(str.size()), |
295 | position(0), |
296 | output(*rule) { |
297 | output.ruleString = str; |
298 | if (position != length) { |
299 | parseName(output.standard.name); |
300 | output.standard.gmtOffset = -parseOffset(); |
301 | output.standard.isDst = false; |
302 | output.hasDst = position < length; |
303 | if (output.hasDst) { |
304 | parseName(output.dst.name); |
305 | output.dst.isDst = true; |
306 | if (ruleString[position] != ',') { |
307 | output.dst.gmtOffset = -parseOffset(); |
308 | } else { |
309 | output.dst.gmtOffset = output.standard.gmtOffset + 60 * 60; |
310 | } |
311 | parseTransition(output.start); |
312 | parseTransition(output.end); |
313 | } |
314 | if (position != length) { |
315 | throwError("Extra text" ); |
316 | } |
317 | output.computeOffsets(); |
318 | } |
319 | } |
320 | |
321 | private: |
322 | |
323 | const std::string& ruleString; |
324 | size_t length; |
325 | size_t position; |
326 | FutureRuleImpl &output; |
327 | |
328 | void throwError(const char *msg) { |
329 | std::stringstream buffer; |
330 | buffer << msg << " at " << position << " in '" << ruleString << "'" ; |
331 | throw TimezoneError(buffer.str()); |
332 | } |
333 | |
334 | /** |
335 | * Parse the names of the form: |
336 | * ([^-+0-9,]+|<[^>]+>) |
337 | * and set the output string. |
338 | */ |
339 | void parseName(std::string& result) { |
340 | if (position == length) { |
341 | throwError("name required" ); |
342 | } |
343 | size_t start = position; |
344 | if (ruleString[position] == '<') { |
345 | while (position < length && ruleString[position] != '>') { |
346 | position += 1; |
347 | } |
348 | if (position == length) { |
349 | throwError("missing close '>'" ); |
350 | } |
351 | position +=1; |
352 | } else { |
353 | while (position < length) { |
354 | char ch = ruleString[position]; |
355 | if (isdigit(ch) || ch == '-' || ch == '+' || ch == ',') { |
356 | break; |
357 | } |
358 | position += 1; |
359 | } |
360 | } |
361 | if (position == start) { |
362 | throwError("empty string not allowed" ); |
363 | } |
364 | result = ruleString.substr(start, position - start); |
365 | } |
366 | |
367 | /** |
368 | * Parse an integer of the form [0-9]+ and return it. |
369 | */ |
370 | int64_t parseNumber() { |
371 | if (position >= length) { |
372 | throwError("missing number" ); |
373 | } |
374 | int64_t result = 0; |
375 | while (position < length) { |
376 | char ch = ruleString[position]; |
377 | if (isdigit(ch)) { |
378 | result = result * 10 + (ch - '0'); |
379 | position += 1; |
380 | } else { |
381 | break; |
382 | } |
383 | } |
384 | return result; |
385 | } |
386 | |
387 | /** |
388 | * Parse the offsets of the form: |
389 | * [-+]?[0-9]+(:[0-9]+(:[0-9]+)?)? |
390 | * and convert it into a number of seconds. |
391 | */ |
392 | int64_t parseOffset() { |
393 | int64_t scale = 3600; |
394 | bool isNegative = false; |
395 | if (position < length) { |
396 | char ch = ruleString[position]; |
397 | isNegative = ch == '-'; |
398 | if (ch == '-' || ch == '+') { |
399 | position += 1; |
400 | } |
401 | } |
402 | int64_t result = parseNumber() * scale; |
403 | while (position < length && scale > 1 && ruleString[position] == ':') { |
404 | scale /= 60; |
405 | position += 1; |
406 | result += parseNumber() * scale; |
407 | } |
408 | if (isNegative) { |
409 | result = -result; |
410 | } |
411 | return result; |
412 | } |
413 | |
414 | /** |
415 | * Parse a transition of the following form: |
416 | * ,(J<number>|<number>|M<number>.<number>.<number>)(/<offset>)? |
417 | */ |
418 | void parseTransition(Transition& transition) { |
419 | if (length - position < 2 || ruleString[position] != ',') { |
420 | throwError("missing transition" ); |
421 | } |
422 | position += 1; |
423 | char ch = ruleString[position]; |
424 | if (ch == 'J') { |
425 | transition.kind = TRANSITION_JULIAN; |
426 | position += 1; |
427 | transition.day = parseNumber(); |
428 | } else if (ch == 'M') { |
429 | transition.kind = TRANSITION_MONTH; |
430 | position += 1; |
431 | transition.month = parseNumber(); |
432 | if (position == length || ruleString[position] != '.') { |
433 | throwError("missing first ." ); |
434 | } |
435 | position += 1; |
436 | transition.week = parseNumber(); |
437 | if (position == length || ruleString[position] != '.') { |
438 | throwError("missing second ." ); |
439 | } |
440 | position += 1; |
441 | transition.day = parseNumber(); |
442 | } else { |
443 | transition.kind = TRANSITION_DAY; |
444 | transition.day = parseNumber(); |
445 | } |
446 | if (position < length && ruleString[position] == '/') { |
447 | position += 1; |
448 | transition.time = parseOffset(); |
449 | } else { |
450 | transition.time = 2 * 60 * 60; |
451 | } |
452 | } |
453 | }; |
454 | |
455 | /** |
456 | * Parse the POSIX TZ string. |
457 | */ |
458 | std::shared_ptr<FutureRule> parseFutureRule(const std::string& ruleString) { |
459 | std::shared_ptr<FutureRule> result(new FutureRuleImpl()); |
460 | FutureRuleParser parser(ruleString, |
461 | dynamic_cast<FutureRuleImpl*>(result.get())); |
462 | return result; |
463 | } |
464 | |
465 | std::string TimezoneVariant::toString() const { |
466 | std::stringstream buffer; |
467 | buffer << name << " " << gmtOffset; |
468 | if (isDst) { |
469 | buffer << " (dst)" ; |
470 | } |
471 | return buffer.str(); |
472 | } |
473 | |
474 | /** |
475 | * An abstraction of the differences between versions. |
476 | */ |
477 | class VersionParser { |
478 | public: |
479 | virtual ~VersionParser(); |
480 | |
481 | /** |
482 | * Get the version number. |
483 | */ |
484 | virtual uint64_t getVersion() const = 0; |
485 | |
486 | /** |
487 | * Get the number of bytes |
488 | */ |
489 | virtual uint64_t getTimeSize() const = 0; |
490 | |
491 | /** |
492 | * Parse the time at the given location. |
493 | */ |
494 | virtual int64_t parseTime(const unsigned char* ptr) const = 0; |
495 | |
496 | /** |
497 | * Parse the future string |
498 | */ |
499 | virtual std::string parseFutureString(const unsigned char *ptr, |
500 | uint64_t offset, |
501 | uint64_t length) const = 0; |
502 | }; |
503 | |
504 | VersionParser::~VersionParser() { |
505 | // PASS |
506 | } |
507 | |
508 | static uint32_t decode32(const unsigned char* ptr) { |
509 | return static_cast<uint32_t>(ptr[0] << 24) | |
510 | static_cast<uint32_t>(ptr[1] << 16) | |
511 | static_cast<uint32_t>(ptr[2] << 8) | |
512 | static_cast<uint32_t>(ptr[3]); |
513 | } |
514 | |
515 | class Version1Parser: public VersionParser { |
516 | public: |
517 | virtual ~Version1Parser() override; |
518 | |
519 | virtual uint64_t getVersion() const override { |
520 | return 1; |
521 | } |
522 | |
523 | /** |
524 | * Get the number of bytes |
525 | */ |
526 | virtual uint64_t getTimeSize() const override { |
527 | return 4; |
528 | } |
529 | |
530 | /** |
531 | * Parse the time at the given location. |
532 | */ |
533 | virtual int64_t parseTime(const unsigned char* ptr) const override { |
534 | // sign extend from 32 bits |
535 | return static_cast<int32_t>(decode32(ptr)); |
536 | } |
537 | |
538 | virtual std::string parseFutureString(const unsigned char *, |
539 | uint64_t, |
540 | uint64_t) const override { |
541 | return "" ; |
542 | } |
543 | }; |
544 | |
545 | Version1Parser::~Version1Parser() { |
546 | // PASS |
547 | } |
548 | |
549 | class Version2Parser: public VersionParser { |
550 | public: |
551 | virtual ~Version2Parser() override; |
552 | |
553 | virtual uint64_t getVersion() const override { |
554 | return 2; |
555 | } |
556 | |
557 | /** |
558 | * Get the number of bytes |
559 | */ |
560 | virtual uint64_t getTimeSize() const override { |
561 | return 8; |
562 | } |
563 | |
564 | /** |
565 | * Parse the time at the given location. |
566 | */ |
567 | virtual int64_t parseTime(const unsigned char* ptr) const override { |
568 | return static_cast<int64_t>(decode32(ptr)) << 32 | decode32(ptr + 4); |
569 | } |
570 | |
571 | virtual std::string parseFutureString(const unsigned char *ptr, |
572 | uint64_t offset, |
573 | uint64_t length) const override { |
574 | return std::string(reinterpret_cast<const char*>(ptr) + offset + 1, |
575 | length - 2); |
576 | } |
577 | }; |
578 | |
579 | Version2Parser::~Version2Parser() { |
580 | // PASS |
581 | } |
582 | |
583 | class TimezoneImpl: public Timezone { |
584 | public: |
585 | TimezoneImpl(const std::string& name, |
586 | const std::vector<unsigned char> bytes); |
587 | virtual ~TimezoneImpl() override; |
588 | |
589 | /** |
590 | * Get the variant for the given time (time_t). |
591 | */ |
592 | const TimezoneVariant& getVariant(int64_t clk) const override; |
593 | |
594 | void print(std::ostream&) const override; |
595 | |
596 | uint64_t getVersion() const override { |
597 | return version; |
598 | } |
599 | |
600 | int64_t getEpoch() const override { |
601 | return epoch; |
602 | } |
603 | |
604 | int64_t convertToUTC(int64_t clk) const override { |
605 | return clk + getVariant(clk).gmtOffset; |
606 | } |
607 | |
608 | private: |
609 | void parseTimeVariants(const unsigned char* ptr, |
610 | uint64_t variantOffset, |
611 | uint64_t variantCount, |
612 | uint64_t nameOffset, |
613 | uint64_t nameCount); |
614 | void parseZoneFile(const unsigned char* ptr, |
615 | uint64_t sectionOffset, |
616 | uint64_t fileLength, |
617 | const VersionParser& version); |
618 | // filename |
619 | std::string filename; |
620 | |
621 | // the version of the file |
622 | uint64_t version; |
623 | |
624 | // the list of variants for this timezone |
625 | std::vector<TimezoneVariant> variants; |
626 | |
627 | // the list of the times where the local rules change |
628 | std::vector<int64_t> transitions; |
629 | |
630 | // the variant that starts at this transition. |
631 | std::vector<uint64_t> currentVariant; |
632 | |
633 | // the variant before the first transition |
634 | uint64_t ancientVariant; |
635 | |
636 | // the rule for future times |
637 | std::shared_ptr<FutureRule> futureRule; |
638 | |
639 | // the last explicit transition after which we use the future rule |
640 | int64_t lastTransition; |
641 | |
642 | // The ORC epoch time in this timezone. |
643 | int64_t epoch; |
644 | }; |
645 | |
646 | DIAGNOSTIC_PUSH |
647 | #ifdef __clang__ |
648 | DIAGNOSTIC_IGNORE("-Wglobal-constructors" ) |
649 | DIAGNOSTIC_IGNORE("-Wexit-time-destructors" ) |
650 | #endif |
651 | static std::mutex timezone_mutex; |
652 | static std::map<std::string, std::shared_ptr<Timezone> > timezoneCache; |
653 | DIAGNOSTIC_POP |
654 | |
655 | Timezone::~Timezone() { |
656 | // PASS |
657 | } |
658 | |
659 | TimezoneImpl::TimezoneImpl(const std::string& _filename, |
660 | const std::vector<unsigned char> buffer |
661 | ): filename(_filename) { |
662 | parseZoneFile(&buffer[0], 0, buffer.size(), Version1Parser()); |
663 | // Build the literal for the ORC epoch |
664 | // 2015 Jan 1 00:00:00 |
665 | tm epochStruct; |
666 | epochStruct.tm_sec = 0; |
667 | epochStruct.tm_min = 0; |
668 | epochStruct.tm_hour = 0; |
669 | epochStruct.tm_mday = 1; |
670 | epochStruct.tm_mon = 0; |
671 | epochStruct.tm_year = 2015 - 1900; |
672 | epochStruct.tm_isdst = 0; |
673 | time_t utcEpoch = timegm(&epochStruct); |
674 | epoch = utcEpoch - getVariant(utcEpoch).gmtOffset; |
675 | } |
676 | |
677 | const char* getTimezoneDirectory() { |
678 | const char *dir = getenv("TZDIR" ); |
679 | if (!dir) { |
680 | dir = DEFAULT_TZDIR; |
681 | } |
682 | return dir; |
683 | } |
684 | |
685 | /** |
686 | * Get a timezone by absolute filename. |
687 | * Results are cached. |
688 | */ |
689 | const Timezone& getTimezoneByFilename(const std::string& filename) { |
690 | // ORC-110 |
691 | std::lock_guard<std::mutex> timezone_lock(timezone_mutex); |
692 | std::map<std::string, std::shared_ptr<Timezone> >::iterator itr = |
693 | timezoneCache.find(filename); |
694 | if (itr != timezoneCache.end()) { |
695 | return *(itr->second).get(); |
696 | } |
697 | try { |
698 | ORC_UNIQUE_PTR<InputStream> file = readFile(filename); |
699 | size_t size = static_cast<size_t>(file->getLength()); |
700 | std::vector<unsigned char> buffer(size); |
701 | file->read(&buffer[0], size, 0); |
702 | timezoneCache[filename] = std::shared_ptr<Timezone>(new TimezoneImpl(filename, buffer)); |
703 | } catch(ParseError& err) { |
704 | throw TimezoneError(err.what()); |
705 | } |
706 | return *timezoneCache[filename].get(); |
707 | } |
708 | |
709 | /** |
710 | * Get the local timezone. |
711 | */ |
712 | const Timezone& getLocalTimezone() { |
713 | return getTimezoneByFilename(LOCAL_TIMEZONE); |
714 | } |
715 | |
716 | /** |
717 | * Get a timezone by name (eg. America/Los_Angeles). |
718 | * Results are cached. |
719 | */ |
720 | const Timezone& getTimezoneByName(const std::string& zone) { |
721 | std::string filename(getTimezoneDirectory()); |
722 | filename += "/" ; |
723 | filename += zone; |
724 | return getTimezoneByFilename(filename); |
725 | } |
726 | |
727 | /** |
728 | * Parse a set of bytes as a timezone file as if they came from filename. |
729 | */ |
730 | std::unique_ptr<Timezone> getTimezone(const std::string& filename, |
731 | const std::vector<unsigned char>& b){ |
732 | return std::unique_ptr<Timezone>(new TimezoneImpl(filename, b)); |
733 | } |
734 | |
735 | TimezoneImpl::~TimezoneImpl() { |
736 | // PASS |
737 | } |
738 | |
739 | void TimezoneImpl::parseTimeVariants(const unsigned char* ptr, |
740 | uint64_t variantOffset, |
741 | uint64_t variantCount, |
742 | uint64_t nameOffset, |
743 | uint64_t nameCount) { |
744 | for(uint64_t variant=0; variant < variantCount; ++variant) { |
745 | variants[variant].gmtOffset = |
746 | static_cast<int32_t>(decode32(ptr + variantOffset + 6 * variant)); |
747 | variants[variant].isDst = ptr[variantOffset + 6 * variant + 4]; |
748 | uint nameStart = ptr[variantOffset + 6 * variant + 5]; |
749 | if (nameStart >= nameCount) { |
750 | std::stringstream buffer; |
751 | buffer << "name out of range in variant " << variant |
752 | << " - " << nameStart << " >= " << nameCount; |
753 | throw TimezoneError(buffer.str()); |
754 | } |
755 | variants[variant].name = std::string(reinterpret_cast<const char*>(ptr) |
756 | + nameOffset + nameStart); |
757 | } |
758 | } |
759 | |
760 | /** |
761 | * Parse the zone file to get the bits we need. |
762 | * There are two versions of the timezone file: |
763 | * |
764 | * Version 1(version = 0x00): |
765 | * Magic(version) |
766 | * Header |
767 | * TransitionTimes(4 byte) |
768 | * TransitionRules |
769 | * Rules |
770 | * LeapSeconds(4 byte) |
771 | * IsStd |
772 | * IsGmt |
773 | * |
774 | * Version2: |
775 | * Version1(0x32) = a version 1 copy of the data for old clients |
776 | * Magic(0x32) |
777 | * Header |
778 | * TransitionTimes(8 byte) |
779 | * TransitionRules |
780 | * Rules |
781 | * LeapSeconds(8 byte) |
782 | * IsStd |
783 | * IsGmt |
784 | * FutureString |
785 | */ |
786 | void TimezoneImpl::parseZoneFile(const unsigned char *ptr, |
787 | uint64_t sectionOffset, |
788 | uint64_t fileLength, |
789 | const VersionParser& versionParser) { |
790 | const uint64_t magicOffset = sectionOffset + 0; |
791 | const uint64_t = magicOffset + 20; |
792 | |
793 | // check for validity before we start parsing |
794 | if (fileLength < headerOffset + 6 * 4 || |
795 | strncmp(reinterpret_cast<const char*>(ptr) + magicOffset, "TZif" , 4) |
796 | != 0) { |
797 | std::stringstream buffer; |
798 | buffer << "non-tzfile " << filename; |
799 | throw TimezoneError(buffer.str()); |
800 | } |
801 | |
802 | const uint64_t isGmtCount = decode32(ptr + headerOffset + 0); |
803 | const uint64_t isStdCount = decode32(ptr + headerOffset + 4); |
804 | const uint64_t leapCount = decode32(ptr + headerOffset + 8); |
805 | const uint64_t timeCount = decode32(ptr + headerOffset + 12); |
806 | const uint64_t variantCount = decode32(ptr + headerOffset + 16); |
807 | const uint64_t nameCount = decode32(ptr + headerOffset + 20); |
808 | |
809 | const uint64_t timeOffset = headerOffset + 24; |
810 | const uint64_t timeVariantOffset = |
811 | timeOffset + versionParser.getTimeSize() * timeCount; |
812 | const uint64_t variantOffset = timeVariantOffset + timeCount; |
813 | const uint64_t nameOffset = variantOffset + variantCount * 6; |
814 | const uint64_t sectionLength = nameOffset + nameCount |
815 | + (versionParser.getTimeSize() + 4) * leapCount |
816 | + isGmtCount + isStdCount; |
817 | |
818 | if (sectionLength > fileLength) { |
819 | std::stringstream buffer; |
820 | buffer << "tzfile too short " << filename |
821 | << " needs " << sectionLength << " and has " << fileLength; |
822 | throw TimezoneError(buffer.str()); |
823 | } |
824 | |
825 | // if it is version 2, skip over the old layout and read the new one. |
826 | if (sectionOffset == 0 && ptr[magicOffset + 4] != 0) { |
827 | parseZoneFile(ptr, sectionLength, fileLength, Version2Parser()); |
828 | return; |
829 | } |
830 | version = versionParser.getVersion(); |
831 | variants.resize(variantCount); |
832 | transitions.resize(timeCount); |
833 | currentVariant.resize(timeCount); |
834 | parseTimeVariants(ptr, variantOffset, variantCount, nameOffset, |
835 | nameCount); |
836 | bool foundAncient = false; |
837 | for(uint64_t t=0; t < timeCount; ++t) { |
838 | transitions[t] = |
839 | versionParser.parseTime(ptr + timeOffset + |
840 | t * versionParser.getTimeSize()); |
841 | currentVariant[t] = ptr[timeVariantOffset + t]; |
842 | if (currentVariant[t] >= variantCount) { |
843 | std::stringstream buffer; |
844 | buffer << "tzfile rule out of range " << filename |
845 | << " references rule " << currentVariant[t] |
846 | << " of " << variantCount; |
847 | throw TimezoneError(buffer.str()); |
848 | } |
849 | // find the oldest standard time and use that as the ancient value |
850 | if (!foundAncient && |
851 | !variants[currentVariant[t]].isDst) { |
852 | foundAncient = true; |
853 | ancientVariant = currentVariant[t]; |
854 | } |
855 | } |
856 | if (!foundAncient) { |
857 | ancientVariant = 0; |
858 | } |
859 | futureRule = parseFutureRule(versionParser.parseFutureString |
860 | (ptr, sectionLength, |
861 | fileLength - sectionLength)); |
862 | |
863 | // find the lower bound for applying the future rule |
864 | if (futureRule->isDefined()) { |
865 | if (timeCount > 0) { |
866 | lastTransition = transitions[timeCount - 1]; |
867 | } else { |
868 | lastTransition = INT64_MIN; |
869 | } |
870 | } else { |
871 | lastTransition = INT64_MAX; |
872 | } |
873 | } |
874 | |
875 | const TimezoneVariant& TimezoneImpl::getVariant(int64_t clk) const { |
876 | // if it is after the last explicit entry in the table, |
877 | // use the future rule to get an answer |
878 | if (clk > lastTransition) { |
879 | return futureRule->getVariant(clk); |
880 | } else { |
881 | int64_t transition = binarySearch(transitions, clk); |
882 | uint64_t idx; |
883 | if (transition < 0) { |
884 | idx = ancientVariant; |
885 | } else { |
886 | idx = currentVariant[static_cast<size_t>(transition)]; |
887 | } |
888 | return variants[idx]; |
889 | } |
890 | } |
891 | |
892 | void TimezoneImpl::print(std::ostream& out) const { |
893 | out << "Timezone file: " << filename << "\n" ; |
894 | out << " Version: " << version << "\n" ; |
895 | futureRule->print(out); |
896 | for(uint64_t r=0; r < variants.size(); ++r) { |
897 | out << " Variant " << r << ": " |
898 | << variants[r].toString() << "\n" ; |
899 | } |
900 | for(uint64_t t=0; t < transitions.size(); ++t) { |
901 | tm timeStruct; |
902 | tm* result = nullptr; |
903 | char buffer[25]; |
904 | if (sizeof(time_t) >= 8) { |
905 | time_t val = transitions[t]; |
906 | result = gmtime_r(&val, &timeStruct); |
907 | if (result) { |
908 | strftime(buffer, sizeof(buffer), "%F %H:%M:%S" , &timeStruct); |
909 | } |
910 | } |
911 | std::cout << " Transition: " << (result == nullptr ? "null" : buffer) |
912 | << " (" << transitions[t] << ") -> " |
913 | << variants[currentVariant[t]].name |
914 | << "\n" ; |
915 | } |
916 | } |
917 | |
918 | TimezoneError::TimezoneError(const std::string& what |
919 | ): std::runtime_error(what) { |
920 | // PASS |
921 | } |
922 | |
923 | TimezoneError::TimezoneError(const TimezoneError& other |
924 | ): std::runtime_error(other) { |
925 | // PASS |
926 | } |
927 | |
928 | TimezoneError::~TimezoneError() ORC_NOEXCEPT { |
929 | // PASS |
930 | } |
931 | |
932 | } |
933 | |