1/*-------------------------------------------------------------------------
2 *
3 * kwlookup.c
4 * Key word lookup for PostgreSQL
5 *
6 *
7 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 *
11 * IDENTIFICATION
12 * src/common/kwlookup.c
13 *
14 *-------------------------------------------------------------------------
15 */
16#include "c.h"
17
18#include "common/kwlookup.h"
19
20
21/*
22 * ScanKeywordLookup - see if a given word is a keyword
23 *
24 * The list of keywords to be matched against is passed as a ScanKeywordList.
25 *
26 * Returns the keyword number (0..N-1) of the keyword, or -1 if no match.
27 * Callers typically use the keyword number to index into information
28 * arrays, but that is no concern of this code.
29 *
30 * The match is done case-insensitively. Note that we deliberately use a
31 * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
32 * even if we are in a locale where tolower() would produce more or different
33 * translations. This is to conform to the SQL99 spec, which says that
34 * keywords are to be matched in this way even though non-keyword identifiers
35 * receive a different case-normalization mapping.
36 */
37int
38ScanKeywordLookup(const char *str,
39 const ScanKeywordList *keywords)
40{
41 size_t len;
42 int h;
43 const char *kw;
44
45 /*
46 * Reject immediately if too long to be any keyword. This saves useless
47 * hashing and downcasing work on long strings.
48 */
49 len = strlen(str);
50 if (len > keywords->max_kw_len)
51 return -1;
52
53 /*
54 * Compute the hash function. We assume it was generated to produce
55 * case-insensitive results. Since it's a perfect hash, we need only
56 * match to the specific keyword it identifies.
57 */
58 h = keywords->hash(str, len);
59
60 /* An out-of-range result implies no match */
61 if (h < 0 || h >= keywords->num_keywords)
62 return -1;
63
64 /*
65 * Compare character-by-character to see if we have a match, applying an
66 * ASCII-only downcasing to the input characters. We must not use
67 * tolower() since it may produce the wrong translation in some locales
68 * (eg, Turkish).
69 */
70 kw = GetScanKeyword(h, keywords);
71 while (*str != '\0')
72 {
73 char ch = *str++;
74
75 if (ch >= 'A' && ch <= 'Z')
76 ch += 'a' - 'A';
77 if (ch != *kw++)
78 return -1;
79 }
80 if (*kw != '\0')
81 return -1;
82
83 /* Success! */
84 return h;
85}
86