1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * wparser.c |
4 | * Standard interface to word parser |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * |
8 | * |
9 | * IDENTIFICATION |
10 | * src/backend/tsearch/wparser.c |
11 | * |
12 | *------------------------------------------------------------------------- |
13 | */ |
14 | #include "postgres.h" |
15 | |
16 | #include "funcapi.h" |
17 | #include "catalog/namespace.h" |
18 | #include "catalog/pg_type.h" |
19 | #include "commands/defrem.h" |
20 | #include "tsearch/ts_cache.h" |
21 | #include "tsearch/ts_utils.h" |
22 | #include "utils/builtins.h" |
23 | #include "utils/jsonapi.h" |
24 | #include "utils/varlena.h" |
25 | |
26 | |
27 | /******sql-level interface******/ |
28 | |
29 | typedef struct |
30 | { |
31 | int cur; |
32 | LexDescr *list; |
33 | } TSTokenTypeStorage; |
34 | |
35 | /* state for ts_headline_json_* */ |
36 | typedef struct HeadlineJsonState |
37 | { |
38 | HeadlineParsedText *prs; |
39 | TSConfigCacheEntry *cfg; |
40 | TSParserCacheEntry *prsobj; |
41 | TSQuery query; |
42 | List *prsoptions; |
43 | bool transformed; |
44 | } HeadlineJsonState; |
45 | |
46 | static text *headline_json_value(void *_state, char *elem_value, int elem_len); |
47 | |
48 | static void |
49 | tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid) |
50 | { |
51 | TupleDesc tupdesc; |
52 | MemoryContext oldcontext; |
53 | TSTokenTypeStorage *st; |
54 | TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid); |
55 | |
56 | if (!OidIsValid(prs->lextypeOid)) |
57 | elog(ERROR, "method lextype isn't defined for text search parser %u" , |
58 | prsid); |
59 | |
60 | oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); |
61 | |
62 | st = (TSTokenTypeStorage *) palloc(sizeof(TSTokenTypeStorage)); |
63 | st->cur = 0; |
64 | /* lextype takes one dummy argument */ |
65 | st->list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid, |
66 | (Datum) 0)); |
67 | funcctx->user_fctx = (void *) st; |
68 | |
69 | tupdesc = CreateTemplateTupleDesc(3); |
70 | TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid" , |
71 | INT4OID, -1, 0); |
72 | TupleDescInitEntry(tupdesc, (AttrNumber) 2, "alias" , |
73 | TEXTOID, -1, 0); |
74 | TupleDescInitEntry(tupdesc, (AttrNumber) 3, "description" , |
75 | TEXTOID, -1, 0); |
76 | |
77 | funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); |
78 | MemoryContextSwitchTo(oldcontext); |
79 | } |
80 | |
81 | static Datum |
82 | tt_process_call(FuncCallContext *funcctx) |
83 | { |
84 | TSTokenTypeStorage *st; |
85 | |
86 | st = (TSTokenTypeStorage *) funcctx->user_fctx; |
87 | if (st->list && st->list[st->cur].lexid) |
88 | { |
89 | Datum result; |
90 | char *values[3]; |
91 | char txtid[16]; |
92 | HeapTuple tuple; |
93 | |
94 | sprintf(txtid, "%d" , st->list[st->cur].lexid); |
95 | values[0] = txtid; |
96 | values[1] = st->list[st->cur].alias; |
97 | values[2] = st->list[st->cur].descr; |
98 | |
99 | tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); |
100 | result = HeapTupleGetDatum(tuple); |
101 | |
102 | pfree(values[1]); |
103 | pfree(values[2]); |
104 | st->cur++; |
105 | return result; |
106 | } |
107 | if (st->list) |
108 | pfree(st->list); |
109 | pfree(st); |
110 | return (Datum) 0; |
111 | } |
112 | |
113 | Datum |
114 | ts_token_type_byid(PG_FUNCTION_ARGS) |
115 | { |
116 | FuncCallContext *funcctx; |
117 | Datum result; |
118 | |
119 | if (SRF_IS_FIRSTCALL()) |
120 | { |
121 | funcctx = SRF_FIRSTCALL_INIT(); |
122 | tt_setup_firstcall(funcctx, PG_GETARG_OID(0)); |
123 | } |
124 | |
125 | funcctx = SRF_PERCALL_SETUP(); |
126 | |
127 | if ((result = tt_process_call(funcctx)) != (Datum) 0) |
128 | SRF_RETURN_NEXT(funcctx, result); |
129 | SRF_RETURN_DONE(funcctx); |
130 | } |
131 | |
132 | Datum |
133 | ts_token_type_byname(PG_FUNCTION_ARGS) |
134 | { |
135 | FuncCallContext *funcctx; |
136 | Datum result; |
137 | |
138 | if (SRF_IS_FIRSTCALL()) |
139 | { |
140 | text *prsname = PG_GETARG_TEXT_PP(0); |
141 | Oid prsId; |
142 | |
143 | funcctx = SRF_FIRSTCALL_INIT(); |
144 | prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false); |
145 | tt_setup_firstcall(funcctx, prsId); |
146 | } |
147 | |
148 | funcctx = SRF_PERCALL_SETUP(); |
149 | |
150 | if ((result = tt_process_call(funcctx)) != (Datum) 0) |
151 | SRF_RETURN_NEXT(funcctx, result); |
152 | SRF_RETURN_DONE(funcctx); |
153 | } |
154 | |
155 | typedef struct |
156 | { |
157 | int type; |
158 | char *lexeme; |
159 | } LexemeEntry; |
160 | |
161 | typedef struct |
162 | { |
163 | int cur; |
164 | int len; |
165 | LexemeEntry *list; |
166 | } ; |
167 | |
168 | |
169 | static void |
170 | prs_setup_firstcall(FuncCallContext *funcctx, Oid prsid, text *txt) |
171 | { |
172 | TupleDesc tupdesc; |
173 | MemoryContext oldcontext; |
174 | PrsStorage *st; |
175 | TSParserCacheEntry *prs = lookup_ts_parser_cache(prsid); |
176 | char *lex = NULL; |
177 | int llen = 0, |
178 | type = 0; |
179 | void *prsdata; |
180 | |
181 | oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); |
182 | |
183 | st = (PrsStorage *) palloc(sizeof(PrsStorage)); |
184 | st->cur = 0; |
185 | st->len = 16; |
186 | st->list = (LexemeEntry *) palloc(sizeof(LexemeEntry) * st->len); |
187 | |
188 | prsdata = (void *) DatumGetPointer(FunctionCall2(&prs->prsstart, |
189 | PointerGetDatum(VARDATA_ANY(txt)), |
190 | Int32GetDatum(VARSIZE_ANY_EXHDR(txt)))); |
191 | |
192 | while ((type = DatumGetInt32(FunctionCall3(&prs->prstoken, |
193 | PointerGetDatum(prsdata), |
194 | PointerGetDatum(&lex), |
195 | PointerGetDatum(&llen)))) != 0) |
196 | { |
197 | if (st->cur >= st->len) |
198 | { |
199 | st->len = 2 * st->len; |
200 | st->list = (LexemeEntry *) repalloc(st->list, sizeof(LexemeEntry) * st->len); |
201 | } |
202 | st->list[st->cur].lexeme = palloc(llen + 1); |
203 | memcpy(st->list[st->cur].lexeme, lex, llen); |
204 | st->list[st->cur].lexeme[llen] = '\0'; |
205 | st->list[st->cur].type = type; |
206 | st->cur++; |
207 | } |
208 | |
209 | FunctionCall1(&prs->prsend, PointerGetDatum(prsdata)); |
210 | |
211 | st->len = st->cur; |
212 | st->cur = 0; |
213 | |
214 | funcctx->user_fctx = (void *) st; |
215 | tupdesc = CreateTemplateTupleDesc(2); |
216 | TupleDescInitEntry(tupdesc, (AttrNumber) 1, "tokid" , |
217 | INT4OID, -1, 0); |
218 | TupleDescInitEntry(tupdesc, (AttrNumber) 2, "token" , |
219 | TEXTOID, -1, 0); |
220 | |
221 | funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); |
222 | MemoryContextSwitchTo(oldcontext); |
223 | } |
224 | |
225 | static Datum |
226 | prs_process_call(FuncCallContext *funcctx) |
227 | { |
228 | PrsStorage *st; |
229 | |
230 | st = (PrsStorage *) funcctx->user_fctx; |
231 | if (st->cur < st->len) |
232 | { |
233 | Datum result; |
234 | char *values[2]; |
235 | char tid[16]; |
236 | HeapTuple tuple; |
237 | |
238 | values[0] = tid; |
239 | sprintf(tid, "%d" , st->list[st->cur].type); |
240 | values[1] = st->list[st->cur].lexeme; |
241 | tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); |
242 | result = HeapTupleGetDatum(tuple); |
243 | |
244 | pfree(values[1]); |
245 | st->cur++; |
246 | return result; |
247 | } |
248 | else |
249 | { |
250 | if (st->list) |
251 | pfree(st->list); |
252 | pfree(st); |
253 | } |
254 | return (Datum) 0; |
255 | } |
256 | |
257 | Datum |
258 | ts_parse_byid(PG_FUNCTION_ARGS) |
259 | { |
260 | FuncCallContext *funcctx; |
261 | Datum result; |
262 | |
263 | if (SRF_IS_FIRSTCALL()) |
264 | { |
265 | text *txt = PG_GETARG_TEXT_PP(1); |
266 | |
267 | funcctx = SRF_FIRSTCALL_INIT(); |
268 | prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt); |
269 | PG_FREE_IF_COPY(txt, 1); |
270 | } |
271 | |
272 | funcctx = SRF_PERCALL_SETUP(); |
273 | |
274 | if ((result = prs_process_call(funcctx)) != (Datum) 0) |
275 | SRF_RETURN_NEXT(funcctx, result); |
276 | SRF_RETURN_DONE(funcctx); |
277 | } |
278 | |
279 | Datum |
280 | ts_parse_byname(PG_FUNCTION_ARGS) |
281 | { |
282 | FuncCallContext *funcctx; |
283 | Datum result; |
284 | |
285 | if (SRF_IS_FIRSTCALL()) |
286 | { |
287 | text *prsname = PG_GETARG_TEXT_PP(0); |
288 | text *txt = PG_GETARG_TEXT_PP(1); |
289 | Oid prsId; |
290 | |
291 | funcctx = SRF_FIRSTCALL_INIT(); |
292 | prsId = get_ts_parser_oid(textToQualifiedNameList(prsname), false); |
293 | prs_setup_firstcall(funcctx, prsId, txt); |
294 | } |
295 | |
296 | funcctx = SRF_PERCALL_SETUP(); |
297 | |
298 | if ((result = prs_process_call(funcctx)) != (Datum) 0) |
299 | SRF_RETURN_NEXT(funcctx, result); |
300 | SRF_RETURN_DONE(funcctx); |
301 | } |
302 | |
303 | Datum |
304 | ts_headline_byid_opt(PG_FUNCTION_ARGS) |
305 | { |
306 | Oid tsconfig = PG_GETARG_OID(0); |
307 | text *in = PG_GETARG_TEXT_PP(1); |
308 | TSQuery query = PG_GETARG_TSQUERY(2); |
309 | text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_PP(3) : NULL; |
310 | HeadlineParsedText prs; |
311 | List *prsoptions; |
312 | text *out; |
313 | TSConfigCacheEntry *cfg; |
314 | TSParserCacheEntry *prsobj; |
315 | |
316 | cfg = lookup_ts_config_cache(tsconfig); |
317 | prsobj = lookup_ts_parser_cache(cfg->prsId); |
318 | |
319 | if (!OidIsValid(prsobj->headlineOid)) |
320 | ereport(ERROR, |
321 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
322 | errmsg("text search parser does not support headline creation" ))); |
323 | |
324 | memset(&prs, 0, sizeof(HeadlineParsedText)); |
325 | prs.lenwords = 32; |
326 | prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords); |
327 | |
328 | hlparsetext(cfg->cfgId, &prs, query, |
329 | VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in)); |
330 | |
331 | if (opt) |
332 | prsoptions = deserialize_deflist(PointerGetDatum(opt)); |
333 | else |
334 | prsoptions = NIL; |
335 | |
336 | FunctionCall3(&(prsobj->prsheadline), |
337 | PointerGetDatum(&prs), |
338 | PointerGetDatum(prsoptions), |
339 | PointerGetDatum(query)); |
340 | |
341 | out = generateHeadline(&prs); |
342 | |
343 | PG_FREE_IF_COPY(in, 1); |
344 | PG_FREE_IF_COPY(query, 2); |
345 | if (opt) |
346 | PG_FREE_IF_COPY(opt, 3); |
347 | pfree(prs.words); |
348 | pfree(prs.startsel); |
349 | pfree(prs.stopsel); |
350 | |
351 | PG_RETURN_POINTER(out); |
352 | } |
353 | |
354 | Datum |
355 | ts_headline_byid(PG_FUNCTION_ARGS) |
356 | { |
357 | PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt, |
358 | PG_GETARG_DATUM(0), |
359 | PG_GETARG_DATUM(1), |
360 | PG_GETARG_DATUM(2))); |
361 | } |
362 | |
363 | Datum |
364 | ts_headline(PG_FUNCTION_ARGS) |
365 | { |
366 | PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt, |
367 | ObjectIdGetDatum(getTSCurrentConfig(true)), |
368 | PG_GETARG_DATUM(0), |
369 | PG_GETARG_DATUM(1))); |
370 | } |
371 | |
372 | Datum |
373 | ts_headline_opt(PG_FUNCTION_ARGS) |
374 | { |
375 | PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_byid_opt, |
376 | ObjectIdGetDatum(getTSCurrentConfig(true)), |
377 | PG_GETARG_DATUM(0), |
378 | PG_GETARG_DATUM(1), |
379 | PG_GETARG_DATUM(2))); |
380 | } |
381 | |
382 | Datum |
383 | ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS) |
384 | { |
385 | Oid tsconfig = PG_GETARG_OID(0); |
386 | Jsonb *jb = PG_GETARG_JSONB_P(1); |
387 | TSQuery query = PG_GETARG_TSQUERY(2); |
388 | text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL; |
389 | Jsonb *out; |
390 | JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value; |
391 | HeadlineParsedText prs; |
392 | HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState)); |
393 | |
394 | memset(&prs, 0, sizeof(HeadlineParsedText)); |
395 | prs.lenwords = 32; |
396 | prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords); |
397 | |
398 | state->prs = &prs; |
399 | state->cfg = lookup_ts_config_cache(tsconfig); |
400 | state->prsobj = lookup_ts_parser_cache(state->cfg->prsId); |
401 | state->query = query; |
402 | if (opt) |
403 | state->prsoptions = deserialize_deflist(PointerGetDatum(opt)); |
404 | else |
405 | state->prsoptions = NIL; |
406 | |
407 | if (!OidIsValid(state->prsobj->headlineOid)) |
408 | ereport(ERROR, |
409 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
410 | errmsg("text search parser does not support headline creation" ))); |
411 | |
412 | out = transform_jsonb_string_values(jb, state, action); |
413 | |
414 | PG_FREE_IF_COPY(jb, 1); |
415 | PG_FREE_IF_COPY(query, 2); |
416 | if (opt) |
417 | PG_FREE_IF_COPY(opt, 3); |
418 | |
419 | pfree(prs.words); |
420 | |
421 | if (state->transformed) |
422 | { |
423 | pfree(prs.startsel); |
424 | pfree(prs.stopsel); |
425 | } |
426 | |
427 | PG_RETURN_JSONB_P(out); |
428 | } |
429 | |
430 | Datum |
431 | ts_headline_jsonb(PG_FUNCTION_ARGS) |
432 | { |
433 | PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt, |
434 | ObjectIdGetDatum(getTSCurrentConfig(true)), |
435 | PG_GETARG_DATUM(0), |
436 | PG_GETARG_DATUM(1))); |
437 | } |
438 | |
439 | Datum |
440 | ts_headline_jsonb_byid(PG_FUNCTION_ARGS) |
441 | { |
442 | PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt, |
443 | PG_GETARG_DATUM(0), |
444 | PG_GETARG_DATUM(1), |
445 | PG_GETARG_DATUM(2))); |
446 | } |
447 | |
448 | Datum |
449 | ts_headline_jsonb_opt(PG_FUNCTION_ARGS) |
450 | { |
451 | PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt, |
452 | ObjectIdGetDatum(getTSCurrentConfig(true)), |
453 | PG_GETARG_DATUM(0), |
454 | PG_GETARG_DATUM(1), |
455 | PG_GETARG_DATUM(2))); |
456 | } |
457 | |
458 | Datum |
459 | ts_headline_json_byid_opt(PG_FUNCTION_ARGS) |
460 | { |
461 | Oid tsconfig = PG_GETARG_OID(0); |
462 | text *json = PG_GETARG_TEXT_P(1); |
463 | TSQuery query = PG_GETARG_TSQUERY(2); |
464 | text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL; |
465 | text *out; |
466 | JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value; |
467 | |
468 | HeadlineParsedText prs; |
469 | HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState)); |
470 | |
471 | memset(&prs, 0, sizeof(HeadlineParsedText)); |
472 | prs.lenwords = 32; |
473 | prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords); |
474 | |
475 | state->prs = &prs; |
476 | state->cfg = lookup_ts_config_cache(tsconfig); |
477 | state->prsobj = lookup_ts_parser_cache(state->cfg->prsId); |
478 | state->query = query; |
479 | if (opt) |
480 | state->prsoptions = deserialize_deflist(PointerGetDatum(opt)); |
481 | else |
482 | state->prsoptions = NIL; |
483 | |
484 | if (!OidIsValid(state->prsobj->headlineOid)) |
485 | ereport(ERROR, |
486 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
487 | errmsg("text search parser does not support headline creation" ))); |
488 | |
489 | out = transform_json_string_values(json, state, action); |
490 | |
491 | PG_FREE_IF_COPY(json, 1); |
492 | PG_FREE_IF_COPY(query, 2); |
493 | if (opt) |
494 | PG_FREE_IF_COPY(opt, 3); |
495 | pfree(prs.words); |
496 | |
497 | if (state->transformed) |
498 | { |
499 | pfree(prs.startsel); |
500 | pfree(prs.stopsel); |
501 | } |
502 | |
503 | PG_RETURN_TEXT_P(out); |
504 | } |
505 | |
506 | Datum |
507 | ts_headline_json(PG_FUNCTION_ARGS) |
508 | { |
509 | PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt, |
510 | ObjectIdGetDatum(getTSCurrentConfig(true)), |
511 | PG_GETARG_DATUM(0), |
512 | PG_GETARG_DATUM(1))); |
513 | } |
514 | |
515 | Datum |
516 | ts_headline_json_byid(PG_FUNCTION_ARGS) |
517 | { |
518 | PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt, |
519 | PG_GETARG_DATUM(0), |
520 | PG_GETARG_DATUM(1), |
521 | PG_GETARG_DATUM(2))); |
522 | } |
523 | |
524 | Datum |
525 | ts_headline_json_opt(PG_FUNCTION_ARGS) |
526 | { |
527 | PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt, |
528 | ObjectIdGetDatum(getTSCurrentConfig(true)), |
529 | PG_GETARG_DATUM(0), |
530 | PG_GETARG_DATUM(1), |
531 | PG_GETARG_DATUM(2))); |
532 | } |
533 | |
534 | |
535 | /* |
536 | * Return headline in text from, generated from a json(b) element |
537 | */ |
538 | static text * |
539 | headline_json_value(void *_state, char *elem_value, int elem_len) |
540 | { |
541 | HeadlineJsonState *state = (HeadlineJsonState *) _state; |
542 | |
543 | HeadlineParsedText *prs = state->prs; |
544 | TSConfigCacheEntry *cfg = state->cfg; |
545 | TSParserCacheEntry *prsobj = state->prsobj; |
546 | TSQuery query = state->query; |
547 | List *prsoptions = state->prsoptions; |
548 | |
549 | prs->curwords = 0; |
550 | hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len); |
551 | FunctionCall3(&(prsobj->prsheadline), |
552 | PointerGetDatum(prs), |
553 | PointerGetDatum(prsoptions), |
554 | PointerGetDatum(query)); |
555 | |
556 | state->transformed = true; |
557 | return generateHeadline(prs); |
558 | } |
559 | |