1/*-------------------------------------------------------------------------
2 *
3 * wparser.c
4 * Standard interface to word parser
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/tsearch/wparser.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include "funcapi.h"
17#include "catalog/namespace.h"
18#include "catalog/pg_type.h"
19#include "commands/defrem.h"
20#include "tsearch/ts_cache.h"
21#include "tsearch/ts_utils.h"
22#include "utils/builtins.h"
23#include "utils/jsonapi.h"
24#include "utils/varlena.h"
25
26
27/******sql-level interface******/
28
29typedef struct
30{
31 int cur;
32 LexDescr *list;
33} TSTokenTypeStorage;
34
35/* state for ts_headline_json_* */
36typedef struct HeadlineJsonState
37{
38 HeadlineParsedText *prs;
39 TSConfigCacheEntry *cfg;
40 TSParserCacheEntry *prsobj;
41 TSQuery query;
42 List *prsoptions;
43 bool transformed;
44} HeadlineJsonState;
45
46static text *headline_json_value(void *_state, char *elem_value, int elem_len);
47
48static void
49tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
50{
51 TupleDesc tupdesc;
52 MemoryContext oldcontext;
53 TSTokenTypeStorage *st;
54 TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
55
56 if (!OidIsValid(prs->lextypeOid))
57 elog(ERROR, "method lextype isn't defined for text search parser %u",
58 prsid);
59
60 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
61
62 st = (TSTokenTypeStorage *) palloc(sizeof(TSTokenTypeStorage));
63 st->cur = 0;
64 /* lextype takes one dummy argument */
65 st->list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid,
66 (Datum) 0));
67 funcctx->user_fctx = (void *) st;
68
69 tupdesc = CreateTemplateTupleDesc(3);
70 TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
71 INT4OID, -1, 0);
72 TupleDescInitEntry(tupdesc, (AttrNumber) 2, "alias",
73 TEXTOID, -1, 0);
74 TupleDescInitEntry(tupdesc, (AttrNumber) 3, "description",
75 TEXTOID, -1, 0);
76
77 funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
78 MemoryContextSwitchTo(oldcontext);
79}
80
81static Datum
82tt_process_call(FuncCallContext *funcctx)
83{
84 TSTokenTypeStorage *st;
85
86 st = (TSTokenTypeStorage *) funcctx->user_fctx;
87 if (st->list && st->list[st->cur].lexid)
88 {
89 Datum result;
90 char *values[3];
91 char txtid[16];
92 HeapTuple tuple;
93
94 sprintf(txtid, "%d", st->list[st->cur].lexid);
95 values[0] = txtid;
96 values[1] = st->list[st->cur].alias;
97 values[2] = st->list[st->cur].descr;
98
99 tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
100 result = HeapTupleGetDatum(tuple);
101
102 pfree(values[1]);
103 pfree(values[2]);
104 st->cur++;
105 return result;
106 }
107 if (st->list)
108 pfree(st->list);
109 pfree(st);
110 return (Datum) 0;
111}
112
113Datum
114ts_token_type_byid(PG_FUNCTION_ARGS)
115{
116 FuncCallContext *funcctx;
117 Datum result;
118
119 if (SRF_IS_FIRSTCALL())
120 {
121 funcctx = SRF_FIRSTCALL_INIT();
122 tt_setup_firstcall(funcctx, PG_GETARG_OID(0));
123 }
124
125 funcctx = SRF_PERCALL_SETUP();
126
127 if ((result = tt_process_call(funcctx)) != (Datum) 0)
128 SRF_RETURN_NEXT(funcctx, result);
129 SRF_RETURN_DONE(funcctx);
130}
131
132Datum
133ts_token_type_byname(PG_FUNCTION_ARGS)
134{
135 FuncCallContext *funcctx;
136 Datum result;
137
138 if (SRF_IS_FIRSTCALL())
139 {
140 text *prsname = PG_GETARG_TEXT_PP(0);
141 Oid prsId;
142
143 funcctx = SRF_FIRSTCALL_INIT();
144 prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
145 tt_setup_firstcall(funcctx, prsId);
146 }
147
148 funcctx = SRF_PERCALL_SETUP();
149
150 if ((result = tt_process_call(funcctx)) != (Datum) 0)
151 SRF_RETURN_NEXT(funcctx, result);
152 SRF_RETURN_DONE(funcctx);
153}
154
155typedef struct
156{
157 int type;
158 char *lexeme;
159} LexemeEntry;
160
161typedef struct
162{
163 int cur;
164 int len;
165 LexemeEntry *list;
166} PrsStorage;
167
168
169static void
170prs_setup_firstcall(FuncCallContext *funcctx, Oid prsid, text *txt)
171{
172 TupleDesc tupdesc;
173 MemoryContext oldcontext;
174 PrsStorage *st;
175 TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid);
176 char *lex = NULL;
177 int llen = 0,
178 type = 0;
179 void *prsdata;
180
181 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
182
183 st = (PrsStorage *) palloc(sizeof(PrsStorage));
184 st->cur = 0;
185 st->len = 16;
186 st->list = (LexemeEntry *) palloc(sizeof(LexemeEntry) * st->len);
187
188 prsdata = (void *) DatumGetPointer(FunctionCall2(&prs->prsstart,
189 PointerGetDatum(VARDATA_ANY(txt)),
190 Int32GetDatum(VARSIZE_ANY_EXHDR(txt))));
191
192 while ((type = DatumGetInt32(FunctionCall3(&prs->prstoken,
193 PointerGetDatum(prsdata),
194 PointerGetDatum(&lex),
195 PointerGetDatum(&llen)))) != 0)
196 {
197 if (st->cur >= st->len)
198 {
199 st->len = 2 * st->len;
200 st->list = (LexemeEntry *) repalloc(st->list, sizeof(LexemeEntry) * st->len);
201 }
202 st->list[st->cur].lexeme = palloc(llen + 1);
203 memcpy(st->list[st->cur].lexeme, lex, llen);
204 st->list[st->cur].lexeme[llen] = '\0';
205 st->list[st->cur].type = type;
206 st->cur++;
207 }
208
209 FunctionCall1(&prs->prsend, PointerGetDatum(prsdata));
210
211 st->len = st->cur;
212 st->cur = 0;
213
214 funcctx->user_fctx = (void *) st;
215 tupdesc = CreateTemplateTupleDesc(2);
216 TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid",
217 INT4OID, -1, 0);
218 TupleDescInitEntry(tupdesc, (AttrNumber) 2, "token",
219 TEXTOID, -1, 0);
220
221 funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
222 MemoryContextSwitchTo(oldcontext);
223}
224
225static Datum
226prs_process_call(FuncCallContext *funcctx)
227{
228 PrsStorage *st;
229
230 st = (PrsStorage *) funcctx->user_fctx;
231 if (st->cur < st->len)
232 {
233 Datum result;
234 char *values[2];
235 char tid[16];
236 HeapTuple tuple;
237
238 values[0] = tid;
239 sprintf(tid, "%d", st->list[st->cur].type);
240 values[1] = st->list[st->cur].lexeme;
241 tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
242 result = HeapTupleGetDatum(tuple);
243
244 pfree(values[1]);
245 st->cur++;
246 return result;
247 }
248 else
249 {
250 if (st->list)
251 pfree(st->list);
252 pfree(st);
253 }
254 return (Datum) 0;
255}
256
257Datum
258ts_parse_byid(PG_FUNCTION_ARGS)
259{
260 FuncCallContext *funcctx;
261 Datum result;
262
263 if (SRF_IS_FIRSTCALL())
264 {
265 text *txt = PG_GETARG_TEXT_PP(1);
266
267 funcctx = SRF_FIRSTCALL_INIT();
268 prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt);
269 PG_FREE_IF_COPY(txt, 1);
270 }
271
272 funcctx = SRF_PERCALL_SETUP();
273
274 if ((result = prs_process_call(funcctx)) != (Datum) 0)
275 SRF_RETURN_NEXT(funcctx, result);
276 SRF_RETURN_DONE(funcctx);
277}
278
279Datum
280ts_parse_byname(PG_FUNCTION_ARGS)
281{
282 FuncCallContext *funcctx;
283 Datum result;
284
285 if (SRF_IS_FIRSTCALL())
286 {
287 text *prsname = PG_GETARG_TEXT_PP(0);
288 text *txt = PG_GETARG_TEXT_PP(1);
289 Oid prsId;
290
291 funcctx = SRF_FIRSTCALL_INIT();
292 prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false);
293 prs_setup_firstcall(funcctx, prsId, txt);
294 }
295
296 funcctx = SRF_PERCALL_SETUP();
297
298 if ((result = prs_process_call(funcctx)) != (Datum) 0)
299 SRF_RETURN_NEXT(funcctx, result);
300 SRF_RETURN_DONE(funcctx);
301}
302
303Datum
304ts_headline_byid_opt(PG_FUNCTION_ARGS)
305{
306 Oid tsconfig = PG_GETARG_OID(0);
307 text *in = PG_GETARG_TEXT_PP(1);
308 TSQuery query = PG_GETARG_TSQUERY(2);
309 text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_PP(3) : NULL;
310 HeadlineParsedText prs;
311 List *prsoptions;
312 text *out;
313 TSConfigCacheEntry *cfg;
314 TSParserCacheEntry *prsobj;
315
316 cfg = lookup_ts_config_cache(tsconfig);
317 prsobj = lookup_ts_parser_cache(cfg->prsId);
318
319 if (!OidIsValid(prsobj->headlineOid))
320 ereport(ERROR,
321 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
322 errmsg("text search parser does not support headline creation")));
323
324 memset(&prs, 0, sizeof(HeadlineParsedText));
325 prs.lenwords = 32;
326 prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
327
328 hlparsetext(cfg->cfgId, &prs, query,
329 VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
330
331 if (opt)
332 prsoptions = deserialize_deflist(PointerGetDatum(opt));
333 else
334 prsoptions = NIL;
335
336 FunctionCall3(&(prsobj->prsheadline),
337 PointerGetDatum(&prs),
338 PointerGetDatum(prsoptions),
339 PointerGetDatum(query));
340
341 out = generateHeadline(&prs);
342
343 PG_FREE_IF_COPY(in, 1);
344 PG_FREE_IF_COPY(query, 2);
345 if (opt)
346 PG_FREE_IF_COPY(opt, 3);
347 pfree(prs.words);
348 pfree(prs.startsel);
349 pfree(prs.stopsel);
350
351 PG_RETURN_POINTER(out);
352}
353
354Datum
355ts_headline_byid(PG_FUNCTION_ARGS)
356{
357 PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
358 PG_GETARG_DATUM(0),
359 PG_GETARG_DATUM(1),
360 PG_GETARG_DATUM(2)));
361}
362
363Datum
364ts_headline(PG_FUNCTION_ARGS)
365{
366 PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
367 ObjectIdGetDatum(getTSCurrentConfig(true)),
368 PG_GETARG_DATUM(0),
369 PG_GETARG_DATUM(1)));
370}
371
372Datum
373ts_headline_opt(PG_FUNCTION_ARGS)
374{
375 PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_byid_opt,
376 ObjectIdGetDatum(getTSCurrentConfig(true)),
377 PG_GETARG_DATUM(0),
378 PG_GETARG_DATUM(1),
379 PG_GETARG_DATUM(2)));
380}
381
382Datum
383ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
384{
385 Oid tsconfig = PG_GETARG_OID(0);
386 Jsonb *jb = PG_GETARG_JSONB_P(1);
387 TSQuery query = PG_GETARG_TSQUERY(2);
388 text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
389 Jsonb *out;
390 JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
391 HeadlineParsedText prs;
392 HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
393
394 memset(&prs, 0, sizeof(HeadlineParsedText));
395 prs.lenwords = 32;
396 prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
397
398 state->prs = &prs;
399 state->cfg = lookup_ts_config_cache(tsconfig);
400 state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
401 state->query = query;
402 if (opt)
403 state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
404 else
405 state->prsoptions = NIL;
406
407 if (!OidIsValid(state->prsobj->headlineOid))
408 ereport(ERROR,
409 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
410 errmsg("text search parser does not support headline creation")));
411
412 out = transform_jsonb_string_values(jb, state, action);
413
414 PG_FREE_IF_COPY(jb, 1);
415 PG_FREE_IF_COPY(query, 2);
416 if (opt)
417 PG_FREE_IF_COPY(opt, 3);
418
419 pfree(prs.words);
420
421 if (state->transformed)
422 {
423 pfree(prs.startsel);
424 pfree(prs.stopsel);
425 }
426
427 PG_RETURN_JSONB_P(out);
428}
429
430Datum
431ts_headline_jsonb(PG_FUNCTION_ARGS)
432{
433 PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
434 ObjectIdGetDatum(getTSCurrentConfig(true)),
435 PG_GETARG_DATUM(0),
436 PG_GETARG_DATUM(1)));
437}
438
439Datum
440ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
441{
442 PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
443 PG_GETARG_DATUM(0),
444 PG_GETARG_DATUM(1),
445 PG_GETARG_DATUM(2)));
446}
447
448Datum
449ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
450{
451 PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
452 ObjectIdGetDatum(getTSCurrentConfig(true)),
453 PG_GETARG_DATUM(0),
454 PG_GETARG_DATUM(1),
455 PG_GETARG_DATUM(2)));
456}
457
458Datum
459ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
460{
461 Oid tsconfig = PG_GETARG_OID(0);
462 text *json = PG_GETARG_TEXT_P(1);
463 TSQuery query = PG_GETARG_TSQUERY(2);
464 text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
465 text *out;
466 JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
467
468 HeadlineParsedText prs;
469 HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
470
471 memset(&prs, 0, sizeof(HeadlineParsedText));
472 prs.lenwords = 32;
473 prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
474
475 state->prs = &prs;
476 state->cfg = lookup_ts_config_cache(tsconfig);
477 state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
478 state->query = query;
479 if (opt)
480 state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
481 else
482 state->prsoptions = NIL;
483
484 if (!OidIsValid(state->prsobj->headlineOid))
485 ereport(ERROR,
486 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
487 errmsg("text search parser does not support headline creation")));
488
489 out = transform_json_string_values(json, state, action);
490
491 PG_FREE_IF_COPY(json, 1);
492 PG_FREE_IF_COPY(query, 2);
493 if (opt)
494 PG_FREE_IF_COPY(opt, 3);
495 pfree(prs.words);
496
497 if (state->transformed)
498 {
499 pfree(prs.startsel);
500 pfree(prs.stopsel);
501 }
502
503 PG_RETURN_TEXT_P(out);
504}
505
506Datum
507ts_headline_json(PG_FUNCTION_ARGS)
508{
509 PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
510 ObjectIdGetDatum(getTSCurrentConfig(true)),
511 PG_GETARG_DATUM(0),
512 PG_GETARG_DATUM(1)));
513}
514
515Datum
516ts_headline_json_byid(PG_FUNCTION_ARGS)
517{
518 PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
519 PG_GETARG_DATUM(0),
520 PG_GETARG_DATUM(1),
521 PG_GETARG_DATUM(2)));
522}
523
524Datum
525ts_headline_json_opt(PG_FUNCTION_ARGS)
526{
527 PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
528 ObjectIdGetDatum(getTSCurrentConfig(true)),
529 PG_GETARG_DATUM(0),
530 PG_GETARG_DATUM(1),
531 PG_GETARG_DATUM(2)));
532}
533
534
535/*
536 * Return headline in text from, generated from a json(b) element
537 */
538static text *
539headline_json_value(void *_state, char *elem_value, int elem_len)
540{
541 HeadlineJsonState *state = (HeadlineJsonState *) _state;
542
543 HeadlineParsedText *prs = state->prs;
544 TSConfigCacheEntry *cfg = state->cfg;
545 TSParserCacheEntry *prsobj = state->prsobj;
546 TSQuery query = state->query;
547 List *prsoptions = state->prsoptions;
548
549 prs->curwords = 0;
550 hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
551 FunctionCall3(&(prsobj->prsheadline),
552 PointerGetDatum(prs),
553 PointerGetDatum(prsoptions),
554 PointerGetDatum(query));
555
556 state->transformed = true;
557 return generateHeadline(prs);
558}
559