1/*--------------------------------------------------------------------
2 * Symbols referenced in this file:
3 * - ScanKeywords
4 * - NumScanKeywords
5 * - ScanKeywordLookup
6 *--------------------------------------------------------------------
7 */
8
9/*-------------------------------------------------------------------------
10 *
11 * keywords.c
12 * lexical token lookup for key words in PostgreSQL
13 *
14 *
15 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development PGGroup
16 * Portions Copyright (c) 1994, Regents of the University of California
17 *
18 *
19 * IDENTIFICATION
20 * src/common/keywords.c
21 *
22 *-------------------------------------------------------------------------
23 */
24#include "pg_functions.hpp"
25#include <string.h>
26#include <string>
27#include <memory>
28
29#include "parser/gramparse.hpp"
30
31#include "parser/kwlist.hpp"
32
33namespace duckdb_libpgquery {
34
35/*
36 * ScanKeywordLookup - see if a given word is a keyword
37 *
38 * The table to be searched is passed explicitly, so that this can be used
39 * to search keyword lists other than the standard list appearing above.
40 *
41 * Returns a pointer to the PGScanKeyword table entry, or NULL if no match.
42 *
43 * The match is done case-insensitively. Note that we deliberately use a
44 * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
45 * even if we are in a locale where tolower() would produce more or different
46 * translations. This is to conform to the SQL99 spec, which says that
47 * keywords are to be matched in this way even though non-keyword identifiers
48 * receive a different case-normalization mapping.
49 */
50const PGScanKeyword *ScanKeywordLookup(const char *text, const PGScanKeyword *keywords, int num_keywords) {
51 int len, i;
52 const PGScanKeyword *low;
53 const PGScanKeyword *high;
54
55 len = strlen(s: text);
56 auto data = std::unique_ptr<char[]>(new char[len + 1]);
57 auto word = data.get();
58 /* We assume all keywords are shorter than NAMEDATALEN. */
59
60 /*
61 * Apply an ASCII-only downcasing. We must not use tolower() since it may
62 * produce the wrong translation in some locales (eg, Turkish).
63 */
64 for (i = 0; i < len; i++) {
65 char ch = text[i];
66
67 if (ch >= 'A' && ch <= 'Z')
68 ch += 'a' - 'A';
69 word[i] = ch;
70 }
71 word[len] = '\0';
72
73 /*
74 * Now do a binary search using plain strcmp() comparison.
75 */
76 low = keywords;
77 high = keywords + (num_keywords - 1);
78 while (low <= high) {
79 const PGScanKeyword *middle;
80 int difference;
81
82 middle = low + (high - low) / 2;
83 difference = strcmp(s1: middle->name, s2: word);
84 if (difference == 0)
85 return middle;
86 else if (difference < 0)
87 low = middle + 1;
88 else
89 high = middle - 1;
90 }
91
92 return NULL;
93}
94}