| 1 | /*------------------------------------------------------------------------- | 
|---|
| 2 | * | 
|---|
| 3 | * ts_utils.c | 
|---|
| 4 | *		various support functions | 
|---|
| 5 | * | 
|---|
| 6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group | 
|---|
| 7 | * | 
|---|
| 8 | * | 
|---|
| 9 | * IDENTIFICATION | 
|---|
| 10 | *	  src/backend/tsearch/ts_utils.c | 
|---|
| 11 | * | 
|---|
| 12 | *------------------------------------------------------------------------- | 
|---|
| 13 | */ | 
|---|
| 14 |  | 
|---|
| 15 | #include "postgres.h" | 
|---|
| 16 |  | 
|---|
| 17 | #include <ctype.h> | 
|---|
| 18 |  | 
|---|
| 19 | #include "miscadmin.h" | 
|---|
| 20 | #include "tsearch/ts_locale.h" | 
|---|
| 21 | #include "tsearch/ts_utils.h" | 
|---|
| 22 |  | 
|---|
| 23 |  | 
|---|
| 24 | /* | 
|---|
| 25 | * Given the base name and extension of a tsearch config file, return | 
|---|
| 26 | * its full path name.  The base name is assumed to be user-supplied, | 
|---|
| 27 | * and is checked to prevent pathname attacks.  The extension is assumed | 
|---|
| 28 | * to be safe. | 
|---|
| 29 | * | 
|---|
| 30 | * The result is a palloc'd string. | 
|---|
| 31 | */ | 
|---|
| 32 | char * | 
|---|
| 33 | get_tsearch_config_filename(const char *basename, | 
|---|
| 34 | const char *extension) | 
|---|
| 35 | { | 
|---|
| 36 | char		sharepath[MAXPGPATH]; | 
|---|
| 37 | char	   *result; | 
|---|
| 38 |  | 
|---|
| 39 | /* | 
|---|
| 40 | * We limit the basename to contain a-z, 0-9, and underscores.  This may | 
|---|
| 41 | * be overly restrictive, but we don't want to allow access to anything | 
|---|
| 42 | * outside the tsearch_data directory, so for instance '/' *must* be | 
|---|
| 43 | * rejected, and on some platforms '\' and ':' are risky as well. Allowing | 
|---|
| 44 | * uppercase might result in incompatible behavior between case-sensitive | 
|---|
| 45 | * and case-insensitive filesystems, and non-ASCII characters create other | 
|---|
| 46 | * interesting risks, so on the whole a tight policy seems best. | 
|---|
| 47 | */ | 
|---|
| 48 | if (strspn(basename, "abcdefghijklmnopqrstuvwxyz0123456789_") != strlen(basename)) | 
|---|
| 49 | ereport(ERROR, | 
|---|
| 50 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), | 
|---|
| 51 | errmsg( "invalid text search configuration file name \"%s\"", | 
|---|
| 52 | basename))); | 
|---|
| 53 |  | 
|---|
| 54 | get_share_path(my_exec_path, sharepath); | 
|---|
| 55 | result = palloc(MAXPGPATH); | 
|---|
| 56 | snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s", | 
|---|
| 57 | sharepath, basename, extension); | 
|---|
| 58 |  | 
|---|
| 59 | return result; | 
|---|
| 60 | } | 
|---|
| 61 |  | 
|---|
| 62 | /* | 
|---|
| 63 | * Reads a stop-word file. Each word is run through 'wordop' | 
|---|
| 64 | * function, if given.  wordop may either modify the input in-place, | 
|---|
| 65 | * or palloc a new version. | 
|---|
| 66 | */ | 
|---|
| 67 | void | 
|---|
| 68 | readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *)) | 
|---|
| 69 | { | 
|---|
| 70 | char	  **stop = NULL; | 
|---|
| 71 |  | 
|---|
| 72 | s->len = 0; | 
|---|
| 73 | if (fname && *fname) | 
|---|
| 74 | { | 
|---|
| 75 | char	   *filename = get_tsearch_config_filename(fname, "stop"); | 
|---|
| 76 | tsearch_readline_state trst; | 
|---|
| 77 | char	   *line; | 
|---|
| 78 | int			reallen = 0; | 
|---|
| 79 |  | 
|---|
| 80 | if (!tsearch_readline_begin(&trst, filename)) | 
|---|
| 81 | ereport(ERROR, | 
|---|
| 82 | (errcode(ERRCODE_CONFIG_FILE_ERROR), | 
|---|
| 83 | errmsg( "could not open stop-word file \"%s\": %m", | 
|---|
| 84 | filename))); | 
|---|
| 85 |  | 
|---|
| 86 | while ((line = tsearch_readline(&trst)) != NULL) | 
|---|
| 87 | { | 
|---|
| 88 | char	   *pbuf = line; | 
|---|
| 89 |  | 
|---|
| 90 | /* Trim trailing space */ | 
|---|
| 91 | while (*pbuf && !t_isspace(pbuf)) | 
|---|
| 92 | pbuf += pg_mblen(pbuf); | 
|---|
| 93 | *pbuf = '\0'; | 
|---|
| 94 |  | 
|---|
| 95 | /* Skip empty lines */ | 
|---|
| 96 | if (*line == '\0') | 
|---|
| 97 | { | 
|---|
| 98 | pfree(line); | 
|---|
| 99 | continue; | 
|---|
| 100 | } | 
|---|
| 101 |  | 
|---|
| 102 | if (s->len >= reallen) | 
|---|
| 103 | { | 
|---|
| 104 | if (reallen == 0) | 
|---|
| 105 | { | 
|---|
| 106 | reallen = 64; | 
|---|
| 107 | stop = (char **) palloc(sizeof(char *) * reallen); | 
|---|
| 108 | } | 
|---|
| 109 | else | 
|---|
| 110 | { | 
|---|
| 111 | reallen *= 2; | 
|---|
| 112 | stop = (char **) repalloc((void *) stop, | 
|---|
| 113 | sizeof(char *) * reallen); | 
|---|
| 114 | } | 
|---|
| 115 | } | 
|---|
| 116 |  | 
|---|
| 117 | if (wordop) | 
|---|
| 118 | { | 
|---|
| 119 | stop[s->len] = wordop(line); | 
|---|
| 120 | if (stop[s->len] != line) | 
|---|
| 121 | pfree(line); | 
|---|
| 122 | } | 
|---|
| 123 | else | 
|---|
| 124 | stop[s->len] = line; | 
|---|
| 125 |  | 
|---|
| 126 | (s->len)++; | 
|---|
| 127 | } | 
|---|
| 128 |  | 
|---|
| 129 | tsearch_readline_end(&trst); | 
|---|
| 130 | pfree(filename); | 
|---|
| 131 | } | 
|---|
| 132 |  | 
|---|
| 133 | s->stop = stop; | 
|---|
| 134 |  | 
|---|
| 135 | /* Sort to allow binary searching */ | 
|---|
| 136 | if (s->stop && s->len > 0) | 
|---|
| 137 | qsort(s->stop, s->len, sizeof(char *), pg_qsort_strcmp); | 
|---|
| 138 | } | 
|---|
| 139 |  | 
|---|
| 140 | bool | 
|---|
| 141 | searchstoplist(StopList *s, char *key) | 
|---|
| 142 | { | 
|---|
| 143 | return (s->stop && s->len > 0 && | 
|---|
| 144 | bsearch(&key, s->stop, s->len, | 
|---|
| 145 | sizeof(char *), pg_qsort_strcmp)) ? true : false; | 
|---|
| 146 | } | 
|---|
| 147 |  | 
|---|