Timezone.cc source code [ClickHouse/contrib/orc/c++/src/Timezone.cc]

1	/**
2	* Licensed to the Apache Software Foundation (ASF) under one
3	* or more contributor license agreements. See the NOTICE file
4	* distributed with this work for additional information
5	* regarding copyright ownership. The ASF licenses this file
6	* to you under the Apache License, Version 2.0 (the
7	* "License"); you may not use this file except in compliance
8	* with the License. You may obtain a copy of the License at
9	*
10	* http://www.apache.org/licenses/LICENSE-2.0
11	*
12	* Unless required by applicable law or agreed to in writing, software
13	* distributed under the License is distributed on an "AS IS" BASIS,
14	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15	* See the License for the specific language governing permissions and
16	* limitations under the License.
17	*/
18
19	#include "orc/OrcFile.hh"
20	#include "Timezone.hh"
21
22	#include <errno.h>
23	#include <map>
24	#include <sstream>
25	#include <stdint.h>
26	#include <stdlib.h>
27	#include <string.h>
28	#include <time.h>
29
30	namespace orc {
31
32	// default location of the timezone files
33	static const char DEFAULT_TZDIR[] = "/usr/share/zoneinfo";
34
35	// location of a symlink to the local timezone
36	static const char LOCAL_TIMEZONE[] = "/etc/localtime";
37
38	enum TransitionKind {
39	TRANSITION_JULIAN,
40	TRANSITION_DAY,
41	TRANSITION_MONTH
42	};
43
44	static const int64_t MONTHS_PER_YEAR = `12`;
45	/**
46	* The number of days in each month in non-leap and leap years.
47	*/
48	static const int64_t DAYS_PER_MONTH[`2`][MONTHS_PER_YEAR] =
49	{{`31`, `28`, `31`, `30`, `31`, `30`, `31`, `31`, `30`, `31`, `30`, `31`},
50	{`31`, `29`, `31`, `30`, `31`, `30`, `31`, `31`, `30`, `31`, `30`, `31`}};
51	static const int64_t DAYS_PER_WEEK = `7`;
52
53	// Leap years and day of the week repeat every 400 years, which makes it
54	// a good cycle length.
55	static const int64_t SECONDS_PER_400_YEARS =
56	SECONDS_PER_DAY * (`365` * (`300` + `3`) + `366` * (`100` - `3`));
57
58	/**
59	* Is the given year a leap year?
60	*/
61	bool isLeap(int64_t year) {
62	return (year % `4` == `0`) && ((year % `100` != `0`) \|\| (year % `400` == `0`));
63	}
64
65	/**
66	* Find the position that is the closest and less than or equal to the
67	* target.
68	* @return -1 if the target < array[0] or array is empty or
69	* i if array[i] <= target and (i == n or array[i] < array[i+1])
70	*/
71	int64_t binarySearch(const std::vector<int64_t> &array, int64_t target) {
72	uint64_t size = array.size();
73	if (size == `0`) {
74	return -`1`;
75	}
76	uint64_t min = `0`;
77	uint64_t max = size - `1`;
78	uint64_t mid = (min + max) / `2`;
79	while ((array [mid] != target) && (min < max)) {
80	if (array [mid] < target) {
81	min = mid + `1`;
82	} else if (mid == `0`) {
83	max = `0`;
84	} else {
85	max = mid - `1`;
86	}
87	mid = (min + max) / `2`;
88	}
89	if (target < array [mid]) {
90	return static_cast<int64_t>(mid) - `1`;
91	} else {
92	return static_cast<int64_t>(mid);
93	}
94	}
95
96	struct Transition {
97	TransitionKind kind;
98	int64_t day;
99	int64_t week;
100	int64_t month;
101	int64_t time;
102
103	std::string toString() const {
104	std::stringstream buffer;
105	switch (kind) {
106	case TRANSITION_JULIAN:
107	buffer << "julian " << day;
108	break;
109	case TRANSITION_DAY:
110	buffer << "day " << day;
111	break;
112	case TRANSITION_MONTH:
113	buffer << "month " << month << " week " << week << " day " << day;
114	break;
115	}
116	buffer << " at " << (time / (`60` * `60`)) << ":" << ((time / `60`) % `60`)
117	<< ":" << (time % `60`);
118	return buffer.str();
119	}
120
121	/**
122	* Get the transition time for the given year.
123	* @param year the year
124	* @return the number of seconds past local Jan 1 00:00:00 that the
125	* transition happens.
126	*/
127	int64_t getTime(int64_t year) const {
128	int64_t result = time;
129	switch (kind) {
130	case TRANSITION_JULIAN:
131	result += SECONDS_PER_DAY * day;
132	if (day > `60` && isLeap(year)) {
133	result += SECONDS_PER_DAY;
134	}
135	break;
136	case TRANSITION_DAY:
137	result += SECONDS_PER_DAY * day;
138	break;
139	case TRANSITION_MONTH: {
140	bool inLeap = isLeap(year);
141	int64_t adjustedMonth = (month + `9`) % `12` + `1`;
142	int64_t adjustedYear = (month <= `2`) ? (year - `1`) : year;
143	int64_t adjustedCentury = adjustedYear / `100`;
144	int64_t adjustedRemainder = adjustedYear % `100`;
145
146	// day of the week of the first day of month
147	int64_t dayOfWeek = ((`26` * adjustedMonth - `2`) / `10` +
148	`1` + adjustedRemainder + adjustedRemainder / `4` +
149	adjustedCentury / `4` - `2` * adjustedCentury) % `7`;
150	if (dayOfWeek < `0`) {
151	dayOfWeek += DAYS_PER_WEEK;
152	}
153
154	int64_t d = day - dayOfWeek;
155	if (d < `0`) {
156	d += DAYS_PER_WEEK;
157	}
158	for (int w = `1`; w < week; ++w) {
159	if (d + DAYS_PER_WEEK >= DAYS_PER_MONTH[inLeap][month - `1`]) {
160	break;
161	}
162	d += DAYS_PER_WEEK;
163	}
164	result += d * SECONDS_PER_DAY;
165
166	// Add in the time for the month
167	for(int m=`0`; m < month - `1`; ++m) {
168	result += DAYS_PER_MONTH[inLeap][m] * SECONDS_PER_DAY;
169	}
170	break;
171	}
172	}
173	return result;
174	}
175	};
176
177	/**
178	* The current rule for finding timezone variants arbitrarily far in
179	* the future. They are based on a string representation that
180	* specifies the standard name and offset. For timezones with
181	* daylight savings, the string specifies the daylight variant name
182	* and offset and the rules for switching between them.
183	*
184	* rule = <standard name><standard offset><daylight>?
185	* name = string with no numbers or '+', '-', or ','
186	* offset = [-+]?hh(:mm(:ss)?)?
187	* daylight = <name><offset>,<start day>(/<offset>)?,<end day>(/<offset>)?
188	* day = J<day without 2/29>\|<day with 2/29>\|M<month>.<week>.<day of week>
189	*/
190	class FutureRuleImpl: public FutureRule {
191	std::string ruleString;
192	TimezoneVariant standard;
193	bool hasDst;
194	TimezoneVariant dst;
195	Transition start;
196	Transition end;
197
198	// expanded time_t offsets of transitions
199	std::vector<int64_t> offsets;
200
201	// Is the epoch (1 Jan 1970 00:00) in standard time?
202	// This code assumes that the transition dates fall in the same order
203	// each year. Hopefully no timezone regions decide to move across the
204	// equator, which is about what it would take.
205	bool startInStd;
206
207	void computeOffsets() {
208	if (!hasDst) {
209	startInStd = true;
210	offsets.resize(`1`);
211	} else {
212	// Insert a transition for the epoch and two per a year for the next
213	// 400 years. We assume that the all even positions are in standard
214	// time if and only if startInStd and the odd ones are the reverse.
215	offsets.resize(`400` * `2` + `1`);
216	startInStd = start.getTime(`1970`) < end.getTime(`1970`);
217	int64_t base = `0`;
218	for(int64_t year = `1970`; year < `1970` + `400`; ++year) {
219	if (startInStd) {
220	offsets [static_cast<uint64_t>(year - `1970`) * `2` + `1`] =
221	base + start.getTime(year) - standard.gmtOffset;
222	offsets [static_cast<uint64_t>(year - `1970`) * `2` + `2`] =
223	base + end.getTime(year) - dst.gmtOffset;
224	} else {
225	offsets [static_cast<uint64_t>(year - `1970`) * `2` + `1`] =
226	base + end.getTime(year) - dst.gmtOffset;
227	offsets [static_cast<uint64_t>(year - `1970`) * `2` + `2`] =
228	base + start.getTime(year) - standard.gmtOffset;
229	}
230	base += (isLeap(year) ? `366` : `365`) * SECONDS_PER_DAY;
231	}
232	}
233	offsets [`0`] = `0`;
234	}
235
236	public:
237	virtual ~FutureRuleImpl() override;
238	bool isDefined() const override;
239	const TimezoneVariant& getVariant(int64_t clk) const override;
240	void print(std::ostream& out) const override;
241
242	friend class FutureRuleParser;
243	};
244
245	FutureRule::~FutureRule() {
246	// PASS
247	}
248
249	FutureRuleImpl::~FutureRuleImpl() {
250	// PASS
251	}
252
253	bool FutureRuleImpl::isDefined() const {
254	return ruleString.size() > `0`;
255	}
256
257	const TimezoneVariant& FutureRuleImpl::getVariant(int64_t clk) const {
258	if (!hasDst) {
259	return standard;
260	} else {
261	int64_t adjusted = clk % SECONDS_PER_400_YEARS;
262	if (adjusted < `0`) {
263	adjusted += SECONDS_PER_400_YEARS;
264	}
265	int64_t idx = binarySearch(offsets, adjusted);
266	if (startInStd == (idx % `2` == `0`)) {
267	return standard;
268	} else {
269	return dst;
270	}
271	}
272	}
273
274	void FutureRuleImpl::print(std::ostream& out) const {
275	if (isDefined()) {
276	out << " Future rule: " << ruleString << "\n";
277	out << " standard " << standard.toString() << "\n";
278	if (hasDst) {
279	out << " dst " << dst.toString() << "\n";
280	out << " start " << start.toString() << "\n";
281	out << " end " << end.toString() << "\n";
282	}
283	}
284	}
285
286	/**
287	* A parser for the future rule strings.
288	*/
289	class FutureRuleParser {
290	public:
291	FutureRuleParser(const std::string& str,
292	FutureRuleImpl* rule
293	): ruleString(str),
294	length(str.size()),
295	position(`0`),
296	output(*rule) {
297	output.ruleString = str;
298	if (position != length) {
299	parseName(output.standard.name);
300	output.standard.gmtOffset = -parseOffset();
301	output.standard.isDst = false;
302	output.hasDst = position < length;
303	if (output.hasDst) {
304	parseName(output.dst.name);
305	output.dst.isDst = true;
306	if (ruleString [position] != `','`) {
307	output.dst.gmtOffset = -parseOffset();
308	} else {
309	output.dst.gmtOffset = output.standard.gmtOffset + `60` * `60`;
310	}
311	parseTransition(output.start);
312	parseTransition(output.end);
313	}
314	if (position != length) {
315	throwError("Extra text");
316	}
317	output.computeOffsets();
318	}
319	}
320
321	private:
322
323	const std::string& ruleString;
324	size_t length;
325	size_t position;
326	FutureRuleImpl &output;
327
328	void throwError(const char *msg) {
329	std::stringstream buffer;
330	buffer << msg << " at " << position << " in '" << ruleString << "'";
331	throw TimezoneError (buffer.str());
332	}
333
334	/**
335	* Parse the names of the form:
336	* ([^-+0-9,]+\|<[^>]+>)
337	* and set the output string.
338	*/
339	void parseName(std::string& result) {
340	if (position == length) {
341	throwError("name required");
342	}
343	size_t start = position;
344	if (ruleString [position] == `'<'`) {
345	while (position < length && ruleString [position] != `'>'`) {
346	position += `1`;
347	}
348	if (position == length) {
349	throwError("missing close '>'");
350	}
351	position +=`1`;
352	} else {
353	while (position < length) {
354	char ch = ruleString [position];
355	if (isdigit(ch) \|\| ch == `'-'` \|\| ch == `'+'` \|\| ch == `','`) {
356	break;
357	}
358	position += `1`;
359	}
360	}
361	if (position == start) {
362	throwError("empty string not allowed");
363	}
364	result = ruleString.substr(start, position - start);
365	}
366
367	/**
368	* Parse an integer of the form [0-9]+ and return it.
369	*/
370	int64_t parseNumber() {
371	if (position >= length) {
372	throwError("missing number");
373	}
374	int64_t result = `0`;
375	while (position < length) {
376	char ch = ruleString [position];
377	if (isdigit(ch)) {
378	result = result * `10` + (ch - `'0'`);
379	position += `1`;
380	} else {
381	break;
382	}
383	}
384	return result;
385	}
386
387	/**
388	* Parse the offsets of the form:
389	* [-+]?[0-9]+(:[0-9]+(:[0-9]+)?)?
390	* and convert it into a number of seconds.
391	*/
392	int64_t parseOffset() {
393	int64_t scale = `3600`;
394	bool isNegative = false;
395	if (position < length) {
396	char ch = ruleString [position];
397	isNegative = ch == `'-'`;
398	if (ch == `'-'` \|\| ch == `'+'`) {
399	position += `1`;
400	}
401	}
402	int64_t result = parseNumber() * scale;
403	while (position < length && scale > `1` && ruleString [position] == `':'`) {
404	scale /= `60`;
405	position += `1`;
406	result += parseNumber() * scale;
407	}
408	if (isNegative) {
409	result = -result;
410	}
411	return result;
412	}
413
414	/**
415	* Parse a transition of the following form:
416	* ,(J<number>\|<number>\|M<number>.<number>.<number>)(/<offset>)?
417	*/
418	void parseTransition(Transition& transition) {
419	if (length - position < `2` \|\| ruleString [position] != `','`) {
420	throwError("missing transition");
421	}
422	position += `1`;
423	char ch = ruleString [position];
424	if (ch == `'J'`) {
425	transition.kind = TRANSITION_JULIAN;
426	position += `1`;
427	transition.day = parseNumber();
428	} else if (ch == `'M'`) {
429	transition.kind = TRANSITION_MONTH;
430	position += `1`;
431	transition.month = parseNumber();
432	if (position == length \|\| ruleString [position] != `'.'`) {
433	throwError("missing first .");
434	}
435	position += `1`;
436	transition.week = parseNumber();
437	if (position == length \|\| ruleString [position] != `'.'`) {
438	throwError("missing second .");
439	}
440	position += `1`;
441	transition.day = parseNumber();
442	} else {
443	transition.kind = TRANSITION_DAY;
444	transition.day = parseNumber();
445	}
446	if (position < length && ruleString [position] == `'/'`) {
447	position += `1`;
448	transition.time = parseOffset();
449	} else {
450	transition.time = `2` * `60` * `60`;
451	}
452	}
453	};
454
455	/**
456	* Parse the POSIX TZ string.
457	*/
458	std::shared_ptr<FutureRule> parseFutureRule(const std::string& ruleString) {
459	std::shared_ptr<FutureRule> result(new FutureRuleImpl ());
460	FutureRuleParser parser(ruleString,
461	dynamic_cast<FutureRuleImpl*>(result.get()));
462	return result;
463	}
464
465	std::string TimezoneVariant::toString() const {
466	std::stringstream buffer;
467	buffer << name << " " << gmtOffset;
468	if (isDst) {
469	buffer << " (dst)";
470	}
471	return buffer.str();
472	}
473
474	/**
475	* An abstraction of the differences between versions.
476	*/
477	class VersionParser {
478	public:
479	virtual ~VersionParser();
480
481	/**
482	* Get the version number.
483	*/
484	virtual uint64_t getVersion() const = `0`;
485
486	/**
487	* Get the number of bytes
488	*/
489	virtual uint64_t getTimeSize() const = `0`;
490
491	/**
492	* Parse the time at the given location.
493	*/
494	virtual int64_t parseTime(const unsigned char* ptr) const = `0`;
495
496	/**
497	* Parse the future string
498	*/
499	virtual std::string parseFutureString(const unsigned char *ptr,
500	uint64_t offset,
501	uint64_t length) const = `0`;
502	};
503
504	VersionParser::~VersionParser() {
505	// PASS
506	}
507
508	static uint32_t decode32(const unsigned char* ptr) {
509	return static_cast<uint32_t>(ptr[`0`] << `24`) \|
510	static_cast<uint32_t>(ptr[`1`] << `16`) \|
511	static_cast<uint32_t>(ptr[`2`] << `8`) \|
512	static_cast<uint32_t>(ptr[`3`]);
513	}
514
515	class Version1Parser: public VersionParser {
516	public:
517	virtual ~Version1Parser() override;
518
519	virtual uint64_t getVersion() const override {
520	return `1`;
521	}
522
523	/**
524	* Get the number of bytes
525	*/
526	virtual uint64_t getTimeSize() const override {
527	return `4`;
528	}
529
530	/**
531	* Parse the time at the given location.
532	*/
533	virtual int64_t parseTime(const unsigned char* ptr) const override {
534	// sign extend from 32 bits
535	return static_cast<int32_t>(decode32(ptr));
536	}
537
538	virtual std::string parseFutureString(const unsigned char *,
539	uint64_t,
540	uint64_t) const override {
541	return "";
542	}
543	};
544
545	Version1Parser::~Version1Parser() {
546	// PASS
547	}
548
549	class Version2Parser: public VersionParser {
550	public:
551	virtual ~Version2Parser() override;
552
553	virtual uint64_t getVersion() const override {
554	return `2`;
555	}
556
557	/**
558	* Get the number of bytes
559	*/
560	virtual uint64_t getTimeSize() const override {
561	return `8`;
562	}
563
564	/**
565	* Parse the time at the given location.
566	*/
567	virtual int64_t parseTime(const unsigned char* ptr) const override {
568	return static_cast<int64_t>(decode32(ptr)) << `32` \| decode32(ptr + `4`);
569	}
570
571	virtual std::string parseFutureString(const unsigned char *ptr,
572	uint64_t offset,
573	uint64_t length) const override {
574	return std::string (reinterpret_cast<const char*>(ptr) + offset + `1`,
575	length - `2`);
576	}
577	};
578
579	Version2Parser::~Version2Parser() {
580	// PASS
581	}
582
583	class TimezoneImpl: public Timezone {
584	public:
585	TimezoneImpl(const std::string& name,
586	const std::vector<unsigned char> bytes);
587	virtual ~TimezoneImpl() override;
588
589	/**
590	* Get the variant for the given time (time_t).
591	*/
592	const TimezoneVariant& getVariant(int64_t clk) const override;
593
594	void print(std::ostream&) const override;
595
596	uint64_t getVersion() const override {
597	return version;
598	}
599
600	int64_t getEpoch() const override {
601	return epoch;
602	}
603
604	int64_t convertToUTC(int64_t clk) const override {
605	return clk + getVariant(clk).gmtOffset;
606	}
607
608	private:
609	void parseTimeVariants(const unsigned char* ptr,
610	uint64_t variantOffset,
611	uint64_t variantCount,
612	uint64_t nameOffset,
613	uint64_t nameCount);
614	void parseZoneFile(const unsigned char* ptr,
615	uint64_t sectionOffset,
616	uint64_t fileLength,
617	const VersionParser& version);
618	// filename
619	std::string filename;
620
621	// the version of the file
622	uint64_t version;
623
624	// the list of variants for this timezone
625	std::vector<TimezoneVariant> variants;
626
627	// the list of the times where the local rules change
628	std::vector<int64_t> transitions;
629
630	// the variant that starts at this transition.
631	std::vector<uint64_t> currentVariant;
632
633	// the variant before the first transition
634	uint64_t ancientVariant;
635
636	// the rule for future times
637	std::shared_ptr<FutureRule> futureRule;
638
639	// the last explicit transition after which we use the future rule
640	int64_t lastTransition;
641
642	// The ORC epoch time in this timezone.
643	int64_t epoch;
644	};
645
646	DIAGNOSTIC_PUSH
647	#ifdef __clang__
648	DIAGNOSTIC_IGNORE("-Wglobal-constructors")
649	DIAGNOSTIC_IGNORE("-Wexit-time-destructors")
650	#endif
651	static std::mutex timezone_mutex;
652	static std::map<std::string, std::shared_ptr<Timezone> > timezoneCache;
653	DIAGNOSTIC_POP
654
655	Timezone::~Timezone() {
656	// PASS
657	}
658
659	TimezoneImpl::TimezoneImpl(const std::string& _filename,
660	const std::vector<unsigned char> buffer
661	): filename (_filename) {
662	parseZoneFile(&buffer [`0`], `0`, buffer.size(), Version1Parser ());
663	// Build the literal for the ORC epoch
664	// 2015 Jan 1 00:00:00
665	tm epochStruct;
666	epochStruct.tm_sec = `0`;
667	epochStruct.tm_min = `0`;
668	epochStruct.tm_hour = `0`;
669	epochStruct.tm_mday = `1`;
670	epochStruct.tm_mon = `0`;
671	epochStruct.tm_year = `2015` - `1900`;
672	epochStruct.tm_isdst = `0`;
673	time_t utcEpoch = timegm(&epochStruct);
674	epoch = utcEpoch - getVariant(utcEpoch).gmtOffset;
675	}
676
677	const char* getTimezoneDirectory() {
678	const char *dir = getenv("TZDIR");
679	if (!dir) {
680	dir = DEFAULT_TZDIR;
681	}
682	return dir;
683	}
684
685	/**
686	* Get a timezone by absolute filename.
687	* Results are cached.
688	*/
689	const Timezone& getTimezoneByFilename(const std::string& filename) {
690	// ORC-110
691	std::lock_guard<std::mutex> timezone_lock(timezone_mutex);
692	std::map<std::string, std::shared_ptr<Timezone> >::iterator itr =
693	timezoneCache.find(filename);
694	if (itr != timezoneCache.end()) {
695	return *(itr ->second).get();
696	}
697	try {
698	ORC_UNIQUE_PTR<InputStream> file = readFile(filename);
699	size_t size = static_cast<size_t>(file ->getLength());
700	std::vector<unsigned char> buffer(size);
701	file ->read(&buffer [`0`], size, `0`);
702	timezoneCache [filename] = std::shared_ptr<Timezone>(new TimezoneImpl (filename, buffer));
703	} catch(ParseError& err) {
704	throw TimezoneError (err.what());
705	}
706	return *timezoneCache [filename].get();
707	}
708
709	/**
710	* Get the local timezone.
711	*/
712	const Timezone& getLocalTimezone() {
713	return getTimezoneByFilename(LOCAL_TIMEZONE);
714	}
715
716	/**
717	* Get a timezone by name (eg. America/Los_Angeles).
718	* Results are cached.
719	*/
720	const Timezone& getTimezoneByName(const std::string& zone) {
721	std::string filename(getTimezoneDirectory());
722	filename += "/";
723	filename += zone;
724	return getTimezoneByFilename(filename);
725	}
726
727	/**
728	* Parse a set of bytes as a timezone file as if they came from filename.
729	*/
730	std::unique_ptr<Timezone> getTimezone(const std::string& filename,
731	const std::vector<unsigned char>& b){
732	return std::unique_ptr<Timezone>(new TimezoneImpl (filename, b));
733	}
734
735	TimezoneImpl::~TimezoneImpl() {
736	// PASS
737	}
738
739	void TimezoneImpl::parseTimeVariants(const unsigned char* ptr,
740	uint64_t variantOffset,
741	uint64_t variantCount,
742	uint64_t nameOffset,
743	uint64_t nameCount) {
744	for(uint64_t variant=`0`; variant < variantCount; ++variant) {
745	variants [variant].gmtOffset =
746	static_cast<int32_t>(decode32(ptr + variantOffset + `6` * variant));
747	variants [variant].isDst = ptr[variantOffset + `6` * variant + `4`];
748	uint nameStart = ptr[variantOffset + `6` * variant + `5`];
749	if (nameStart >= nameCount) {
750	std::stringstream buffer;
751	buffer << "name out of range in variant " << variant
752	<< " - " << nameStart << " >= " << nameCount;
753	throw TimezoneError (buffer.str());
754	}
755	variants [variant].name = std::string (reinterpret_cast<const char*>(ptr)
756	+ nameOffset + nameStart);
757	}
758	}
759
760	/**
761	* Parse the zone file to get the bits we need.
762	* There are two versions of the timezone file:
763	*
764	* Version 1(version = 0x00):
765	* Magic(version)
766	* Header
767	* TransitionTimes(4 byte)
768	* TransitionRules
769	* Rules
770	* LeapSeconds(4 byte)
771	* IsStd
772	* IsGmt
773	*
774	* Version2:
775	* Version1(0x32) = a version 1 copy of the data for old clients
776	* Magic(0x32)
777	* Header
778	* TransitionTimes(8 byte)
779	* TransitionRules
780	* Rules
781	* LeapSeconds(8 byte)
782	* IsStd
783	* IsGmt
784	* FutureString
785	*/
786	void TimezoneImpl::parseZoneFile(const unsigned char *ptr,
787	uint64_t sectionOffset,
788	uint64_t fileLength,
789	const VersionParser& versionParser) {
790	const uint64_t magicOffset = sectionOffset + `0`;
791	const uint64_t headerOffset = magicOffset + `20`;
792
793	// check for validity before we start parsing
794	if (fileLength < headerOffset + `6` * `4` \|\|
795	strncmp(reinterpret_cast<const char*>(ptr) + magicOffset, "TZif", `4`)
796	!= `0`) {
797	std::stringstream buffer;
798	buffer << "non-tzfile " << filename;
799	throw TimezoneError (buffer.str());
800	}
801
802	const uint64_t isGmtCount = decode32(ptr + headerOffset + `0`);
803	const uint64_t isStdCount = decode32(ptr + headerOffset + `4`);
804	const uint64_t leapCount = decode32(ptr + headerOffset + `8`);
805	const uint64_t timeCount = decode32(ptr + headerOffset + `12`);
806	const uint64_t variantCount = decode32(ptr + headerOffset + `16`);
807	const uint64_t nameCount = decode32(ptr + headerOffset + `20`);
808
809	const uint64_t timeOffset = headerOffset + `24`;
810	const uint64_t timeVariantOffset =
811	timeOffset + versionParser.getTimeSize() * timeCount;
812	const uint64_t variantOffset = timeVariantOffset + timeCount;
813	const uint64_t nameOffset = variantOffset + variantCount * `6`;
814	const uint64_t sectionLength = nameOffset + nameCount
815	+ (versionParser.getTimeSize() + `4`) * leapCount
816	+ isGmtCount + isStdCount;
817
818	if (sectionLength > fileLength) {
819	std::stringstream buffer;
820	buffer << "tzfile too short " << filename
821	<< " needs " << sectionLength << " and has " << fileLength;
822	throw TimezoneError (buffer.str());
823	}
824
825	// if it is version 2, skip over the old layout and read the new one.
826	if (sectionOffset == `0` && ptr[magicOffset + `4`] != `0`) {
827	parseZoneFile(ptr, sectionLength, fileLength, Version2Parser ());
828	return;
829	}
830	version = versionParser.getVersion();
831	variants.resize(variantCount);
832	transitions.resize(timeCount);
833	currentVariant.resize(timeCount);
834	parseTimeVariants(ptr, variantOffset, variantCount, nameOffset,
835	nameCount);
836	bool foundAncient = false;
837	for(uint64_t t=`0`; t < timeCount; ++t) {
838	transitions [t] =
839	versionParser.parseTime(ptr + timeOffset +
840	t * versionParser.getTimeSize());
841	currentVariant [t] = ptr[timeVariantOffset + t];
842	if (currentVariant [t] >= variantCount) {
843	std::stringstream buffer;
844	buffer << "tzfile rule out of range " << filename
845	<< " references rule " << currentVariant [t]
846	<< " of " << variantCount;
847	throw TimezoneError (buffer.str());
848	}
849	// find the oldest standard time and use that as the ancient value
850	if (!foundAncient &&
851	!variants [currentVariant [t]].isDst) {
852	foundAncient = true;
853	ancientVariant = currentVariant [t];
854	}
855	}
856	if (!foundAncient) {
857	ancientVariant = `0`;
858	}
859	futureRule = parseFutureRule(versionParser.parseFutureString
860	(ptr, sectionLength,
861	fileLength - sectionLength));
862
863	// find the lower bound for applying the future rule
864	if (futureRule ->isDefined()) {
865	if (timeCount > `0`) {
866	lastTransition = transitions [timeCount - `1`];
867	} else {
868	lastTransition = INT64_MIN;
869	}
870	} else {
871	lastTransition = INT64_MAX;
872	}
873	}
874
875	const TimezoneVariant& TimezoneImpl::getVariant(int64_t clk) const {
876	// if it is after the last explicit entry in the table,
877	// use the future rule to get an answer
878	if (clk > lastTransition) {
879	return futureRule ->getVariant(clk);
880	} else {
881	int64_t transition = binarySearch(transitions, clk);
882	uint64_t idx;
883	if (transition < `0`) {
884	idx = ancientVariant;
885	} else {
886	idx = currentVariant [static_cast<size_t>(transition)];
887	}
888	return variants [idx];
889	}
890	}
891
892	void TimezoneImpl::print(std::ostream& out) const {
893	out << "Timezone file: " << filename << "\n";
894	out << " Version: " << version << "\n";
895	futureRule ->print(out);
896	for(uint64_t r=`0`; r < variants.size(); ++r) {
897	out << " Variant " << r << ": "
898	<< variants [r].toString() << "\n";
899	}
900	for(uint64_t t=`0`; t < transitions.size(); ++t) {
901	tm timeStruct;
902	tm* result = nullptr;
903	char buffer[`25`];
904	if (sizeof(time_t) >= `8`) {
905	time_t val = transitions [t];
906	result = gmtime_r(&val, &timeStruct);
907	if (result) {
908	strftime(buffer, sizeof(buffer), "%F %H:%M:%S", &timeStruct);
909	}
910	}
911	std::cout << " Transition: " << (result == nullptr ? "null" : buffer)
912	<< " (" << transitions [t] << ") -> "
913	<< variants [currentVariant [t]].name
914	<< "\n";
915	}
916	}
917
918	TimezoneError::TimezoneError(const std::string& what
919	): std::runtime_error (what) {
920	// PASS
921	}
922
923	TimezoneError::TimezoneError(const TimezoneError& other
924	): std::runtime_error (other) {
925	// PASS
926	}
927
928	TimezoneError::~TimezoneError() ORC_NOEXCEPT {
929	// PASS
930	}
931
932	}
933

Browse the source code of ClickHouse/contrib/orc/c++/src/Timezone.cc