| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * ts_utils.c |
| 4 | * various support functions |
| 5 | * |
| 6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 7 | * |
| 8 | * |
| 9 | * IDENTIFICATION |
| 10 | * src/backend/tsearch/ts_utils.c |
| 11 | * |
| 12 | *------------------------------------------------------------------------- |
| 13 | */ |
| 14 | |
| 15 | #include "postgres.h" |
| 16 | |
| 17 | #include <ctype.h> |
| 18 | |
| 19 | #include "miscadmin.h" |
| 20 | #include "tsearch/ts_locale.h" |
| 21 | #include "tsearch/ts_utils.h" |
| 22 | |
| 23 | |
| 24 | /* |
| 25 | * Given the base name and extension of a tsearch config file, return |
| 26 | * its full path name. The base name is assumed to be user-supplied, |
| 27 | * and is checked to prevent pathname attacks. The extension is assumed |
| 28 | * to be safe. |
| 29 | * |
| 30 | * The result is a palloc'd string. |
| 31 | */ |
| 32 | char * |
| 33 | get_tsearch_config_filename(const char *basename, |
| 34 | const char *extension) |
| 35 | { |
| 36 | char sharepath[MAXPGPATH]; |
| 37 | char *result; |
| 38 | |
| 39 | /* |
| 40 | * We limit the basename to contain a-z, 0-9, and underscores. This may |
| 41 | * be overly restrictive, but we don't want to allow access to anything |
| 42 | * outside the tsearch_data directory, so for instance '/' *must* be |
| 43 | * rejected, and on some platforms '\' and ':' are risky as well. Allowing |
| 44 | * uppercase might result in incompatible behavior between case-sensitive |
| 45 | * and case-insensitive filesystems, and non-ASCII characters create other |
| 46 | * interesting risks, so on the whole a tight policy seems best. |
| 47 | */ |
| 48 | if (strspn(basename, "abcdefghijklmnopqrstuvwxyz0123456789_" ) != strlen(basename)) |
| 49 | ereport(ERROR, |
| 50 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| 51 | errmsg("invalid text search configuration file name \"%s\"" , |
| 52 | basename))); |
| 53 | |
| 54 | get_share_path(my_exec_path, sharepath); |
| 55 | result = palloc(MAXPGPATH); |
| 56 | snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s" , |
| 57 | sharepath, basename, extension); |
| 58 | |
| 59 | return result; |
| 60 | } |
| 61 | |
| 62 | /* |
| 63 | * Reads a stop-word file. Each word is run through 'wordop' |
| 64 | * function, if given. wordop may either modify the input in-place, |
| 65 | * or palloc a new version. |
| 66 | */ |
| 67 | void |
| 68 | readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *)) |
| 69 | { |
| 70 | char **stop = NULL; |
| 71 | |
| 72 | s->len = 0; |
| 73 | if (fname && *fname) |
| 74 | { |
| 75 | char *filename = get_tsearch_config_filename(fname, "stop" ); |
| 76 | tsearch_readline_state trst; |
| 77 | char *line; |
| 78 | int reallen = 0; |
| 79 | |
| 80 | if (!tsearch_readline_begin(&trst, filename)) |
| 81 | ereport(ERROR, |
| 82 | (errcode(ERRCODE_CONFIG_FILE_ERROR), |
| 83 | errmsg("could not open stop-word file \"%s\": %m" , |
| 84 | filename))); |
| 85 | |
| 86 | while ((line = tsearch_readline(&trst)) != NULL) |
| 87 | { |
| 88 | char *pbuf = line; |
| 89 | |
| 90 | /* Trim trailing space */ |
| 91 | while (*pbuf && !t_isspace(pbuf)) |
| 92 | pbuf += pg_mblen(pbuf); |
| 93 | *pbuf = '\0'; |
| 94 | |
| 95 | /* Skip empty lines */ |
| 96 | if (*line == '\0') |
| 97 | { |
| 98 | pfree(line); |
| 99 | continue; |
| 100 | } |
| 101 | |
| 102 | if (s->len >= reallen) |
| 103 | { |
| 104 | if (reallen == 0) |
| 105 | { |
| 106 | reallen = 64; |
| 107 | stop = (char **) palloc(sizeof(char *) * reallen); |
| 108 | } |
| 109 | else |
| 110 | { |
| 111 | reallen *= 2; |
| 112 | stop = (char **) repalloc((void *) stop, |
| 113 | sizeof(char *) * reallen); |
| 114 | } |
| 115 | } |
| 116 | |
| 117 | if (wordop) |
| 118 | { |
| 119 | stop[s->len] = wordop(line); |
| 120 | if (stop[s->len] != line) |
| 121 | pfree(line); |
| 122 | } |
| 123 | else |
| 124 | stop[s->len] = line; |
| 125 | |
| 126 | (s->len)++; |
| 127 | } |
| 128 | |
| 129 | tsearch_readline_end(&trst); |
| 130 | pfree(filename); |
| 131 | } |
| 132 | |
| 133 | s->stop = stop; |
| 134 | |
| 135 | /* Sort to allow binary searching */ |
| 136 | if (s->stop && s->len > 0) |
| 137 | qsort(s->stop, s->len, sizeof(char *), pg_qsort_strcmp); |
| 138 | } |
| 139 | |
| 140 | bool |
| 141 | searchstoplist(StopList *s, char *key) |
| 142 | { |
| 143 | return (s->stop && s->len > 0 && |
| 144 | bsearch(&key, s->stop, s->len, |
| 145 | sizeof(char *), pg_qsort_strcmp)) ? true : false; |
| 146 | } |
| 147 | |