1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * tzparser.c |
4 | * Functions for parsing timezone offset files |
5 | * |
6 | * Note: this code is invoked from the check_hook for the GUC variable |
7 | * timezone_abbreviations. Therefore, it should report problems using |
8 | * GUC_check_errmsg() and related functions, and try to avoid throwing |
9 | * elog(ERROR). This is not completely bulletproof at present --- in |
10 | * particular out-of-memory will throw an error. Could probably fix with |
11 | * PG_TRY if necessary. |
12 | * |
13 | * |
14 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
15 | * Portions Copyright (c) 1994, Regents of the University of California |
16 | * |
17 | * IDENTIFICATION |
18 | * src/backend/utils/misc/tzparser.c |
19 | * |
20 | *------------------------------------------------------------------------- |
21 | */ |
22 | |
23 | #include "postgres.h" |
24 | |
25 | #include <ctype.h> |
26 | |
27 | #include "miscadmin.h" |
28 | #include "storage/fd.h" |
29 | #include "utils/guc.h" |
30 | #include "utils/memutils.h" |
31 | #include "utils/tzparser.h" |
32 | |
33 | |
34 | #define WHITESPACE " \t\n\r" |
35 | |
36 | static bool validateTzEntry(tzEntry *tzentry); |
37 | static bool splitTzLine(const char *filename, int lineno, |
38 | char *line, tzEntry *tzentry); |
39 | static int addToArray(tzEntry **base, int *arraysize, int n, |
40 | tzEntry *entry, bool override); |
41 | static int ParseTzFile(const char *filename, int depth, |
42 | tzEntry **base, int *arraysize, int n); |
43 | |
44 | |
45 | /* |
46 | * Apply additional validation checks to a tzEntry |
47 | * |
48 | * Returns true if OK, else false |
49 | */ |
50 | static bool |
51 | validateTzEntry(tzEntry *tzentry) |
52 | { |
53 | unsigned char *p; |
54 | |
55 | /* |
56 | * Check restrictions imposed by datetkntbl storage format (see |
57 | * datetime.c) |
58 | */ |
59 | if (strlen(tzentry->abbrev) > TOKMAXLEN) |
60 | { |
61 | GUC_check_errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d" , |
62 | tzentry->abbrev, TOKMAXLEN, |
63 | tzentry->filename, tzentry->lineno); |
64 | return false; |
65 | } |
66 | |
67 | /* |
68 | * Sanity-check the offset: shouldn't exceed 14 hours |
69 | */ |
70 | if (tzentry->offset > 14 * 60 * 60 || |
71 | tzentry->offset < -14 * 60 * 60) |
72 | { |
73 | GUC_check_errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d" , |
74 | tzentry->offset, |
75 | tzentry->filename, tzentry->lineno); |
76 | return false; |
77 | } |
78 | |
79 | /* |
80 | * Convert abbrev to lowercase (must match datetime.c's conversion) |
81 | */ |
82 | for (p = (unsigned char *) tzentry->abbrev; *p; p++) |
83 | *p = pg_tolower(*p); |
84 | |
85 | return true; |
86 | } |
87 | |
88 | /* |
89 | * Attempt to parse the line as a timezone abbrev spec |
90 | * |
91 | * Valid formats are: |
92 | * name zone |
93 | * name offset dst |
94 | * |
95 | * Returns true if OK, else false; data is stored in *tzentry |
96 | */ |
97 | static bool |
98 | splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry) |
99 | { |
100 | char *abbrev; |
101 | char *offset; |
102 | char *offset_endptr; |
103 | char *remain; |
104 | char *is_dst; |
105 | |
106 | tzentry->lineno = lineno; |
107 | tzentry->filename = filename; |
108 | |
109 | abbrev = strtok(line, WHITESPACE); |
110 | if (!abbrev) |
111 | { |
112 | GUC_check_errmsg("missing time zone abbreviation in time zone file \"%s\", line %d" , |
113 | filename, lineno); |
114 | return false; |
115 | } |
116 | tzentry->abbrev = pstrdup(abbrev); |
117 | |
118 | offset = strtok(NULL, WHITESPACE); |
119 | if (!offset) |
120 | { |
121 | GUC_check_errmsg("missing time zone offset in time zone file \"%s\", line %d" , |
122 | filename, lineno); |
123 | return false; |
124 | } |
125 | |
126 | /* We assume zone names don't begin with a digit or sign */ |
127 | if (isdigit((unsigned char) *offset) || *offset == '+' || *offset == '-') |
128 | { |
129 | tzentry->zone = NULL; |
130 | tzentry->offset = strtol(offset, &offset_endptr, 10); |
131 | if (offset_endptr == offset || *offset_endptr != '\0') |
132 | { |
133 | GUC_check_errmsg("invalid number for time zone offset in time zone file \"%s\", line %d" , |
134 | filename, lineno); |
135 | return false; |
136 | } |
137 | |
138 | is_dst = strtok(NULL, WHITESPACE); |
139 | if (is_dst && pg_strcasecmp(is_dst, "D" ) == 0) |
140 | { |
141 | tzentry->is_dst = true; |
142 | remain = strtok(NULL, WHITESPACE); |
143 | } |
144 | else |
145 | { |
146 | /* there was no 'D' dst specifier */ |
147 | tzentry->is_dst = false; |
148 | remain = is_dst; |
149 | } |
150 | } |
151 | else |
152 | { |
153 | /* |
154 | * Assume entry is a zone name. We do not try to validate it by |
155 | * looking up the zone, because that would force loading of a lot of |
156 | * zones that probably will never be used in the current session. |
157 | */ |
158 | tzentry->zone = pstrdup(offset); |
159 | tzentry->offset = 0; |
160 | tzentry->is_dst = false; |
161 | remain = strtok(NULL, WHITESPACE); |
162 | } |
163 | |
164 | if (!remain) /* no more non-whitespace chars */ |
165 | return true; |
166 | |
167 | if (remain[0] != '#') /* must be a comment */ |
168 | { |
169 | GUC_check_errmsg("invalid syntax in time zone file \"%s\", line %d" , |
170 | filename, lineno); |
171 | return false; |
172 | } |
173 | return true; |
174 | } |
175 | |
176 | /* |
177 | * Insert entry into sorted array |
178 | * |
179 | * *base: base address of array (changeable if must enlarge array) |
180 | * *arraysize: allocated length of array (changeable if must enlarge array) |
181 | * n: current number of valid elements in array |
182 | * entry: new data to insert |
183 | * override: true if OK to override |
184 | * |
185 | * Returns the new array length (new value for n), or -1 if error |
186 | */ |
187 | static int |
188 | addToArray(tzEntry **base, int *arraysize, int n, |
189 | tzEntry *entry, bool override) |
190 | { |
191 | tzEntry *arrayptr; |
192 | int low; |
193 | int high; |
194 | |
195 | /* |
196 | * Search the array for a duplicate; as a useful side effect, the array is |
197 | * maintained in sorted order. We use strcmp() to ensure we match the |
198 | * sort order datetime.c expects. |
199 | */ |
200 | arrayptr = *base; |
201 | low = 0; |
202 | high = n - 1; |
203 | while (low <= high) |
204 | { |
205 | int mid = (low + high) >> 1; |
206 | tzEntry *midptr = arrayptr + mid; |
207 | int cmp; |
208 | |
209 | cmp = strcmp(entry->abbrev, midptr->abbrev); |
210 | if (cmp < 0) |
211 | high = mid - 1; |
212 | else if (cmp > 0) |
213 | low = mid + 1; |
214 | else |
215 | { |
216 | /* |
217 | * Found a duplicate entry; complain unless it's the same. |
218 | */ |
219 | if ((midptr->zone == NULL && entry->zone == NULL && |
220 | midptr->offset == entry->offset && |
221 | midptr->is_dst == entry->is_dst) || |
222 | (midptr->zone != NULL && entry->zone != NULL && |
223 | strcmp(midptr->zone, entry->zone) == 0)) |
224 | { |
225 | /* return unchanged array */ |
226 | return n; |
227 | } |
228 | if (override) |
229 | { |
230 | /* same abbrev but something is different, override */ |
231 | midptr->zone = entry->zone; |
232 | midptr->offset = entry->offset; |
233 | midptr->is_dst = entry->is_dst; |
234 | return n; |
235 | } |
236 | /* same abbrev but something is different, complain */ |
237 | GUC_check_errmsg("time zone abbreviation \"%s\" is multiply defined" , |
238 | entry->abbrev); |
239 | GUC_check_errdetail("Entry in time zone file \"%s\", line %d, conflicts with entry in file \"%s\", line %d." , |
240 | midptr->filename, midptr->lineno, |
241 | entry->filename, entry->lineno); |
242 | return -1; |
243 | } |
244 | } |
245 | |
246 | /* |
247 | * No match, insert at position "low". |
248 | */ |
249 | if (n >= *arraysize) |
250 | { |
251 | *arraysize *= 2; |
252 | *base = (tzEntry *) repalloc(*base, *arraysize * sizeof(tzEntry)); |
253 | } |
254 | |
255 | arrayptr = *base + low; |
256 | |
257 | memmove(arrayptr + 1, arrayptr, (n - low) * sizeof(tzEntry)); |
258 | |
259 | memcpy(arrayptr, entry, sizeof(tzEntry)); |
260 | |
261 | return n + 1; |
262 | } |
263 | |
264 | /* |
265 | * Parse a single timezone abbrev file --- can recurse to handle @INCLUDE |
266 | * |
267 | * filename: user-specified file name (does not include path) |
268 | * depth: current recursion depth |
269 | * *base: array for results (changeable if must enlarge array) |
270 | * *arraysize: allocated length of array (changeable if must enlarge array) |
271 | * n: current number of valid elements in array |
272 | * |
273 | * Returns the new array length (new value for n), or -1 if error |
274 | */ |
275 | static int |
276 | ParseTzFile(const char *filename, int depth, |
277 | tzEntry **base, int *arraysize, int n) |
278 | { |
279 | char share_path[MAXPGPATH]; |
280 | char file_path[MAXPGPATH]; |
281 | FILE *tzFile; |
282 | char tzbuf[1024]; |
283 | char *line; |
284 | tzEntry tzentry; |
285 | int lineno = 0; |
286 | bool override = false; |
287 | const char *p; |
288 | |
289 | /* |
290 | * We enforce that the filename is all alpha characters. This may be |
291 | * overly restrictive, but we don't want to allow access to anything |
292 | * outside the timezonesets directory, so for instance '/' *must* be |
293 | * rejected. |
294 | */ |
295 | for (p = filename; *p; p++) |
296 | { |
297 | if (!isalpha((unsigned char) *p)) |
298 | { |
299 | /* at level 0, just use guc.c's regular "invalid value" message */ |
300 | if (depth > 0) |
301 | GUC_check_errmsg("invalid time zone file name \"%s\"" , |
302 | filename); |
303 | return -1; |
304 | } |
305 | } |
306 | |
307 | /* |
308 | * The maximal recursion depth is a pretty arbitrary setting. It is hard |
309 | * to imagine that someone needs more than 3 levels so stick with this |
310 | * conservative setting until someone complains. |
311 | */ |
312 | if (depth > 3) |
313 | { |
314 | GUC_check_errmsg("time zone file recursion limit exceeded in file \"%s\"" , |
315 | filename); |
316 | return -1; |
317 | } |
318 | |
319 | get_share_path(my_exec_path, share_path); |
320 | snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s" , |
321 | share_path, filename); |
322 | tzFile = AllocateFile(file_path, "r" ); |
323 | if (!tzFile) |
324 | { |
325 | /* |
326 | * Check to see if the problem is not the filename but the directory. |
327 | * This is worth troubling over because if the installation share/ |
328 | * directory is missing or unreadable, this is likely to be the first |
329 | * place we notice a problem during postmaster startup. |
330 | */ |
331 | int save_errno = errno; |
332 | DIR *tzdir; |
333 | |
334 | snprintf(file_path, sizeof(file_path), "%s/timezonesets" , |
335 | share_path); |
336 | tzdir = AllocateDir(file_path); |
337 | if (tzdir == NULL) |
338 | { |
339 | GUC_check_errmsg("could not open directory \"%s\": %m" , |
340 | file_path); |
341 | GUC_check_errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location." , |
342 | my_exec_path); |
343 | return -1; |
344 | } |
345 | FreeDir(tzdir); |
346 | errno = save_errno; |
347 | |
348 | /* |
349 | * otherwise, if file doesn't exist and it's level 0, guc.c's |
350 | * complaint is enough |
351 | */ |
352 | if (errno != ENOENT || depth > 0) |
353 | GUC_check_errmsg("could not read time zone file \"%s\": %m" , |
354 | filename); |
355 | |
356 | return -1; |
357 | } |
358 | |
359 | while (!feof(tzFile)) |
360 | { |
361 | lineno++; |
362 | if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL) |
363 | { |
364 | if (ferror(tzFile)) |
365 | { |
366 | GUC_check_errmsg("could not read time zone file \"%s\": %m" , |
367 | filename); |
368 | return -1; |
369 | } |
370 | /* else we're at EOF after all */ |
371 | break; |
372 | } |
373 | if (strlen(tzbuf) == sizeof(tzbuf) - 1) |
374 | { |
375 | /* the line is too long for tzbuf */ |
376 | GUC_check_errmsg("line is too long in time zone file \"%s\", line %d" , |
377 | filename, lineno); |
378 | return -1; |
379 | } |
380 | |
381 | /* skip over whitespace */ |
382 | line = tzbuf; |
383 | while (*line && isspace((unsigned char) *line)) |
384 | line++; |
385 | |
386 | if (*line == '\0') /* empty line */ |
387 | continue; |
388 | if (*line == '#') /* comment line */ |
389 | continue; |
390 | |
391 | if (pg_strncasecmp(line, "@INCLUDE" , strlen("@INCLUDE" )) == 0) |
392 | { |
393 | /* pstrdup so we can use filename in result data structure */ |
394 | char *includeFile = pstrdup(line + strlen("@INCLUDE" )); |
395 | |
396 | includeFile = strtok(includeFile, WHITESPACE); |
397 | if (!includeFile || !*includeFile) |
398 | { |
399 | GUC_check_errmsg("@INCLUDE without file name in time zone file \"%s\", line %d" , |
400 | filename, lineno); |
401 | return -1; |
402 | } |
403 | n = ParseTzFile(includeFile, depth + 1, |
404 | base, arraysize, n); |
405 | if (n < 0) |
406 | return -1; |
407 | continue; |
408 | } |
409 | |
410 | if (pg_strncasecmp(line, "@OVERRIDE" , strlen("@OVERRIDE" )) == 0) |
411 | { |
412 | override = true; |
413 | continue; |
414 | } |
415 | |
416 | if (!splitTzLine(filename, lineno, line, &tzentry)) |
417 | return -1; |
418 | if (!validateTzEntry(&tzentry)) |
419 | return -1; |
420 | n = addToArray(base, arraysize, n, &tzentry, override); |
421 | if (n < 0) |
422 | return -1; |
423 | } |
424 | |
425 | FreeFile(tzFile); |
426 | |
427 | return n; |
428 | } |
429 | |
430 | /* |
431 | * load_tzoffsets --- read and parse the specified timezone offset file |
432 | * |
433 | * On success, return a filled-in TimeZoneAbbrevTable, which must have been |
434 | * malloc'd not palloc'd. On failure, return NULL, using GUC_check_errmsg |
435 | * and friends to give details of the problem. |
436 | */ |
437 | TimeZoneAbbrevTable * |
438 | load_tzoffsets(const char *filename) |
439 | { |
440 | TimeZoneAbbrevTable *result = NULL; |
441 | MemoryContext tmpContext; |
442 | MemoryContext oldContext; |
443 | tzEntry *array; |
444 | int arraysize; |
445 | int n; |
446 | |
447 | /* |
448 | * Create a temp memory context to work in. This makes it easy to clean |
449 | * up afterwards. |
450 | */ |
451 | tmpContext = AllocSetContextCreate(CurrentMemoryContext, |
452 | "TZParserMemory" , |
453 | ALLOCSET_SMALL_SIZES); |
454 | oldContext = MemoryContextSwitchTo(tmpContext); |
455 | |
456 | /* Initialize array at a reasonable size */ |
457 | arraysize = 128; |
458 | array = (tzEntry *) palloc(arraysize * sizeof(tzEntry)); |
459 | |
460 | /* Parse the file(s) */ |
461 | n = ParseTzFile(filename, 0, &array, &arraysize, 0); |
462 | |
463 | /* If no errors so far, let datetime.c allocate memory & convert format */ |
464 | if (n >= 0) |
465 | { |
466 | result = ConvertTimeZoneAbbrevs(array, n); |
467 | if (!result) |
468 | GUC_check_errmsg("out of memory" ); |
469 | } |
470 | |
471 | /* Clean up */ |
472 | MemoryContextSwitchTo(oldContext); |
473 | MemoryContextDelete(tmpContext); |
474 | |
475 | return result; |
476 | } |
477 | |