1/*-------------------------------------------------------------------------
2 *
3 * ts_utils.c
4 * various support functions
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/tsearch/ts_utils.c
11 *
12 *-------------------------------------------------------------------------
13 */
14
15#include "postgres.h"
16
17#include <ctype.h>
18
19#include "miscadmin.h"
20#include "tsearch/ts_locale.h"
21#include "tsearch/ts_utils.h"
22
23
24/*
25 * Given the base name and extension of a tsearch config file, return
26 * its full path name. The base name is assumed to be user-supplied,
27 * and is checked to prevent pathname attacks. The extension is assumed
28 * to be safe.
29 *
30 * The result is a palloc'd string.
31 */
32char *
33get_tsearch_config_filename(const char *basename,
34 const char *extension)
35{
36 char sharepath[MAXPGPATH];
37 char *result;
38
39 /*
40 * We limit the basename to contain a-z, 0-9, and underscores. This may
41 * be overly restrictive, but we don't want to allow access to anything
42 * outside the tsearch_data directory, so for instance '/' *must* be
43 * rejected, and on some platforms '\' and ':' are risky as well. Allowing
44 * uppercase might result in incompatible behavior between case-sensitive
45 * and case-insensitive filesystems, and non-ASCII characters create other
46 * interesting risks, so on the whole a tight policy seems best.
47 */
48 if (strspn(basename, "abcdefghijklmnopqrstuvwxyz0123456789_") != strlen(basename))
49 ereport(ERROR,
50 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
51 errmsg("invalid text search configuration file name \"%s\"",
52 basename)));
53
54 get_share_path(my_exec_path, sharepath);
55 result = palloc(MAXPGPATH);
56 snprintf(result, MAXPGPATH, "%s/tsearch_data/%s.%s",
57 sharepath, basename, extension);
58
59 return result;
60}
61
62/*
63 * Reads a stop-word file. Each word is run through 'wordop'
64 * function, if given. wordop may either modify the input in-place,
65 * or palloc a new version.
66 */
67void
68readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *))
69{
70 char **stop = NULL;
71
72 s->len = 0;
73 if (fname && *fname)
74 {
75 char *filename = get_tsearch_config_filename(fname, "stop");
76 tsearch_readline_state trst;
77 char *line;
78 int reallen = 0;
79
80 if (!tsearch_readline_begin(&trst, filename))
81 ereport(ERROR,
82 (errcode(ERRCODE_CONFIG_FILE_ERROR),
83 errmsg("could not open stop-word file \"%s\": %m",
84 filename)));
85
86 while ((line = tsearch_readline(&trst)) != NULL)
87 {
88 char *pbuf = line;
89
90 /* Trim trailing space */
91 while (*pbuf && !t_isspace(pbuf))
92 pbuf += pg_mblen(pbuf);
93 *pbuf = '\0';
94
95 /* Skip empty lines */
96 if (*line == '\0')
97 {
98 pfree(line);
99 continue;
100 }
101
102 if (s->len >= reallen)
103 {
104 if (reallen == 0)
105 {
106 reallen = 64;
107 stop = (char **) palloc(sizeof(char *) * reallen);
108 }
109 else
110 {
111 reallen *= 2;
112 stop = (char **) repalloc((void *) stop,
113 sizeof(char *) * reallen);
114 }
115 }
116
117 if (wordop)
118 {
119 stop[s->len] = wordop(line);
120 if (stop[s->len] != line)
121 pfree(line);
122 }
123 else
124 stop[s->len] = line;
125
126 (s->len)++;
127 }
128
129 tsearch_readline_end(&trst);
130 pfree(filename);
131 }
132
133 s->stop = stop;
134
135 /* Sort to allow binary searching */
136 if (s->stop && s->len > 0)
137 qsort(s->stop, s->len, sizeof(char *), pg_qsort_strcmp);
138}
139
140bool
141searchstoplist(StopList *s, char *key)
142{
143 return (s->stop && s->len > 0 &&
144 bsearch(&key, s->stop, s->len,
145 sizeof(char *), pg_qsort_strcmp)) ? true : false;
146}
147