1 | /*-------------------------------------------------------------------- |
2 | * Symbols referenced in this file: |
3 | * - ScanKeywords |
4 | * - NumScanKeywords |
5 | * - ScanKeywordLookup |
6 | *-------------------------------------------------------------------- |
7 | */ |
8 | |
9 | /*------------------------------------------------------------------------- |
10 | * |
11 | * keywords.c |
12 | * lexical token lookup for key words in PostgreSQL |
13 | * |
14 | * |
15 | * Portions Copyright (c) 1996-2017, PostgreSQL Global Development PGGroup |
16 | * Portions Copyright (c) 1994, Regents of the University of California |
17 | * |
18 | * |
19 | * IDENTIFICATION |
20 | * src/common/keywords.c |
21 | * |
22 | *------------------------------------------------------------------------- |
23 | */ |
24 | #include "pg_functions.hpp" |
25 | #include <string.h> |
26 | #include <string> |
27 | #include <memory> |
28 | |
29 | #include "parser/gramparse.hpp" |
30 | |
31 | #include "parser/kwlist.hpp" |
32 | |
33 | namespace duckdb_libpgquery { |
34 | |
35 | /* |
36 | * ScanKeywordLookup - see if a given word is a keyword |
37 | * |
38 | * The table to be searched is passed explicitly, so that this can be used |
39 | * to search keyword lists other than the standard list appearing above. |
40 | * |
41 | * Returns a pointer to the PGScanKeyword table entry, or NULL if no match. |
42 | * |
43 | * The match is done case-insensitively. Note that we deliberately use a |
44 | * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z', |
45 | * even if we are in a locale where tolower() would produce more or different |
46 | * translations. This is to conform to the SQL99 spec, which says that |
47 | * keywords are to be matched in this way even though non-keyword identifiers |
48 | * receive a different case-normalization mapping. |
49 | */ |
50 | const PGScanKeyword *ScanKeywordLookup(const char *text, const PGScanKeyword *keywords, int num_keywords) { |
51 | int len, i; |
52 | const PGScanKeyword *low; |
53 | const PGScanKeyword *high; |
54 | |
55 | len = strlen(s: text); |
56 | auto data = std::unique_ptr<char[]>(new char[len + 1]); |
57 | auto word = data.get(); |
58 | /* We assume all keywords are shorter than NAMEDATALEN. */ |
59 | |
60 | /* |
61 | * Apply an ASCII-only downcasing. We must not use tolower() since it may |
62 | * produce the wrong translation in some locales (eg, Turkish). |
63 | */ |
64 | for (i = 0; i < len; i++) { |
65 | char ch = text[i]; |
66 | |
67 | if (ch >= 'A' && ch <= 'Z') |
68 | ch += 'a' - 'A'; |
69 | word[i] = ch; |
70 | } |
71 | word[len] = '\0'; |
72 | |
73 | /* |
74 | * Now do a binary search using plain strcmp() comparison. |
75 | */ |
76 | low = keywords; |
77 | high = keywords + (num_keywords - 1); |
78 | while (low <= high) { |
79 | const PGScanKeyword *middle; |
80 | int difference; |
81 | |
82 | middle = low + (high - low) / 2; |
83 | difference = strcmp(s1: middle->name, s2: word); |
84 | if (difference == 0) |
85 | return middle; |
86 | else if (difference < 0) |
87 | low = middle + 1; |
88 | else |
89 | high = middle - 1; |
90 | } |
91 | |
92 | return NULL; |
93 | } |
94 | } |