| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * kwlookup.c |
| 4 | * Key word lookup for PostgreSQL |
| 5 | * |
| 6 | * |
| 7 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 8 | * Portions Copyright (c) 1994, Regents of the University of California |
| 9 | * |
| 10 | * |
| 11 | * IDENTIFICATION |
| 12 | * src/common/kwlookup.c |
| 13 | * |
| 14 | *------------------------------------------------------------------------- |
| 15 | */ |
| 16 | #include "c.h" |
| 17 | |
| 18 | #include "common/kwlookup.h" |
| 19 | |
| 20 | |
| 21 | /* |
| 22 | * ScanKeywordLookup - see if a given word is a keyword |
| 23 | * |
| 24 | * The list of keywords to be matched against is passed as a ScanKeywordList. |
| 25 | * |
| 26 | * Returns the keyword number (0..N-1) of the keyword, or -1 if no match. |
| 27 | * Callers typically use the keyword number to index into information |
| 28 | * arrays, but that is no concern of this code. |
| 29 | * |
| 30 | * The match is done case-insensitively. Note that we deliberately use a |
| 31 | * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z', |
| 32 | * even if we are in a locale where tolower() would produce more or different |
| 33 | * translations. This is to conform to the SQL99 spec, which says that |
| 34 | * keywords are to be matched in this way even though non-keyword identifiers |
| 35 | * receive a different case-normalization mapping. |
| 36 | */ |
| 37 | int |
| 38 | ScanKeywordLookup(const char *str, |
| 39 | const ScanKeywordList *keywords) |
| 40 | { |
| 41 | size_t len; |
| 42 | int h; |
| 43 | const char *kw; |
| 44 | |
| 45 | /* |
| 46 | * Reject immediately if too long to be any keyword. This saves useless |
| 47 | * hashing and downcasing work on long strings. |
| 48 | */ |
| 49 | len = strlen(str); |
| 50 | if (len > keywords->max_kw_len) |
| 51 | return -1; |
| 52 | |
| 53 | /* |
| 54 | * Compute the hash function. We assume it was generated to produce |
| 55 | * case-insensitive results. Since it's a perfect hash, we need only |
| 56 | * match to the specific keyword it identifies. |
| 57 | */ |
| 58 | h = keywords->hash(str, len); |
| 59 | |
| 60 | /* An out-of-range result implies no match */ |
| 61 | if (h < 0 || h >= keywords->num_keywords) |
| 62 | return -1; |
| 63 | |
| 64 | /* |
| 65 | * Compare character-by-character to see if we have a match, applying an |
| 66 | * ASCII-only downcasing to the input characters. We must not use |
| 67 | * tolower() since it may produce the wrong translation in some locales |
| 68 | * (eg, Turkish). |
| 69 | */ |
| 70 | kw = GetScanKeyword(h, keywords); |
| 71 | while (*str != '\0') |
| 72 | { |
| 73 | char ch = *str++; |
| 74 | |
| 75 | if (ch >= 'A' && ch <= 'Z') |
| 76 | ch += 'a' - 'A'; |
| 77 | if (ch != *kw++) |
| 78 | return -1; |
| 79 | } |
| 80 | if (*kw != '\0') |
| 81 | return -1; |
| 82 | |
| 83 | /* Success! */ |
| 84 | return h; |
| 85 | } |
| 86 | |