1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * ts_utils.c |
4 | * various support functions |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * |
8 | * |
9 | * IDENTIFICATION |
10 | * src/backend/tsearch/ts_utils.c |
11 | * |
12 | *------------------------------------------------------------------------- |
13 | */ |
14 | |
15 | #include "postgres.h" |
16 | |
17 | #include <ctype.h> |
18 | |
19 | #include "miscadmin.h" |
20 | #include "tsearch/ts_locale.h" |
21 | #include "tsearch/ts_utils.h" |
22 | |
23 | |
24 | /* |
25 | * Given the base name and extension of a tsearch config file, return |
26 | * its full path name. The base name is assumed to be user-supplied, |
27 | * and is checked to prevent pathname attacks. The extension is assumed |
28 | * to be safe. |
29 | * |
30 | * The result is a palloc'd string. |
31 | */ |
32 | char * |
33 | get_tsearch_config_filename(const char *basename, |
34 | const char *extension) |
35 | { |
36 | char sharepath[MAXPGPATH]; |
37 | char *result; |
38 | |
39 | /* |
40 | * We limit the basename to contain a-z, 0-9, and underscores. This may |
41 | * be overly restrictive, but we don't want to allow access to anything |
42 | * outside the tsearch_data directory, so for instance '/' *must* be |
43 | * rejected, and on some platforms '\' and ':' are risky as well. Allowing |
44 | * uppercase might result in incompatible behavior between case-sensitive |
45 | * and case-insensitive filesystems, and non-ASCII characters create other |
46 | * interesting risks, so on the whole a tight policy seems best. |
47 | */ |
48 | if (strspn(basename, "abcdefghijklmnopqrstuvwxyz0123456789_" ) != strlen(basename)) |
49 | ereport(ERROR, |
50 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
51 | errmsg("invalid text search configuration file name \"%s\"" , |
52 | basename))); |
53 | |
54 | get_share_path(my_exec_path, sharepath); |
55 | result = palloc(MAXPGPATH); |
56 | snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s" , |
57 | sharepath, basename, extension); |
58 | |
59 | return result; |
60 | } |
61 | |
62 | /* |
63 | * Reads a stop-word file. Each word is run through 'wordop' |
64 | * function, if given. wordop may either modify the input in-place, |
65 | * or palloc a new version. |
66 | */ |
67 | void |
68 | readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *)) |
69 | { |
70 | char **stop = NULL; |
71 | |
72 | s->len = 0; |
73 | if (fname && *fname) |
74 | { |
75 | char *filename = get_tsearch_config_filename(fname, "stop" ); |
76 | tsearch_readline_state trst; |
77 | char *line; |
78 | int reallen = 0; |
79 | |
80 | if (!tsearch_readline_begin(&trst, filename)) |
81 | ereport(ERROR, |
82 | (errcode(ERRCODE_CONFIG_FILE_ERROR), |
83 | errmsg("could not open stop-word file \"%s\": %m" , |
84 | filename))); |
85 | |
86 | while ((line = tsearch_readline(&trst)) != NULL) |
87 | { |
88 | char *pbuf = line; |
89 | |
90 | /* Trim trailing space */ |
91 | while (*pbuf && !t_isspace(pbuf)) |
92 | pbuf += pg_mblen(pbuf); |
93 | *pbuf = '\0'; |
94 | |
95 | /* Skip empty lines */ |
96 | if (*line == '\0') |
97 | { |
98 | pfree(line); |
99 | continue; |
100 | } |
101 | |
102 | if (s->len >= reallen) |
103 | { |
104 | if (reallen == 0) |
105 | { |
106 | reallen = 64; |
107 | stop = (char **) palloc(sizeof(char *) * reallen); |
108 | } |
109 | else |
110 | { |
111 | reallen *= 2; |
112 | stop = (char **) repalloc((void *) stop, |
113 | sizeof(char *) * reallen); |
114 | } |
115 | } |
116 | |
117 | if (wordop) |
118 | { |
119 | stop[s->len] = wordop(line); |
120 | if (stop[s->len] != line) |
121 | pfree(line); |
122 | } |
123 | else |
124 | stop[s->len] = line; |
125 | |
126 | (s->len)++; |
127 | } |
128 | |
129 | tsearch_readline_end(&trst); |
130 | pfree(filename); |
131 | } |
132 | |
133 | s->stop = stop; |
134 | |
135 | /* Sort to allow binary searching */ |
136 | if (s->stop && s->len > 0) |
137 | qsort(s->stop, s->len, sizeof(char *), pg_qsort_strcmp); |
138 | } |
139 | |
140 | bool |
141 | searchstoplist(StopList *s, char *key) |
142 | { |
143 | return (s->stop && s->len > 0 && |
144 | bsearch(&key, s->stop, s->len, |
145 | sizeof(char *), pg_qsort_strcmp)) ? true : false; |
146 | } |
147 | |