| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * tsginidx.c |
| 4 | * GIN support functions for tsvector_ops |
| 5 | * |
| 6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 7 | * |
| 8 | * |
| 9 | * IDENTIFICATION |
| 10 | * src/backend/utils/adt/tsginidx.c |
| 11 | * |
| 12 | *------------------------------------------------------------------------- |
| 13 | */ |
| 14 | #include "postgres.h" |
| 15 | |
| 16 | #include "access/gin.h" |
| 17 | #include "access/stratnum.h" |
| 18 | #include "miscadmin.h" |
| 19 | #include "tsearch/ts_type.h" |
| 20 | #include "tsearch/ts_utils.h" |
| 21 | #include "utils/builtins.h" |
| 22 | |
| 23 | |
| 24 | Datum |
| 25 | gin_cmp_tslexeme(PG_FUNCTION_ARGS) |
| 26 | { |
| 27 | text *a = PG_GETARG_TEXT_PP(0); |
| 28 | text *b = PG_GETARG_TEXT_PP(1); |
| 29 | int cmp; |
| 30 | |
| 31 | cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a), |
| 32 | VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b), |
| 33 | false); |
| 34 | |
| 35 | PG_FREE_IF_COPY(a, 0); |
| 36 | PG_FREE_IF_COPY(b, 1); |
| 37 | PG_RETURN_INT32(cmp); |
| 38 | } |
| 39 | |
| 40 | Datum |
| 41 | gin_cmp_prefix(PG_FUNCTION_ARGS) |
| 42 | { |
| 43 | text *a = PG_GETARG_TEXT_PP(0); |
| 44 | text *b = PG_GETARG_TEXT_PP(1); |
| 45 | |
| 46 | #ifdef NOT_USED |
| 47 | StrategyNumber strategy = PG_GETARG_UINT16(2); |
| 48 | Pointer extra_data = PG_GETARG_POINTER(3); |
| 49 | #endif |
| 50 | int cmp; |
| 51 | |
| 52 | cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a), |
| 53 | VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b), |
| 54 | true); |
| 55 | |
| 56 | if (cmp < 0) |
| 57 | cmp = 1; /* prevent continue scan */ |
| 58 | |
| 59 | PG_FREE_IF_COPY(a, 0); |
| 60 | PG_FREE_IF_COPY(b, 1); |
| 61 | PG_RETURN_INT32(cmp); |
| 62 | } |
| 63 | |
| 64 | Datum |
| 65 | (PG_FUNCTION_ARGS) |
| 66 | { |
| 67 | TSVector vector = PG_GETARG_TSVECTOR(0); |
| 68 | int32 *nentries = (int32 *) PG_GETARG_POINTER(1); |
| 69 | Datum *entries = NULL; |
| 70 | |
| 71 | *nentries = vector->size; |
| 72 | if (vector->size > 0) |
| 73 | { |
| 74 | int i; |
| 75 | WordEntry *we = ARRPTR(vector); |
| 76 | |
| 77 | entries = (Datum *) palloc(sizeof(Datum) * vector->size); |
| 78 | |
| 79 | for (i = 0; i < vector->size; i++) |
| 80 | { |
| 81 | text *txt; |
| 82 | |
| 83 | txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len); |
| 84 | entries[i] = PointerGetDatum(txt); |
| 85 | |
| 86 | we++; |
| 87 | } |
| 88 | } |
| 89 | |
| 90 | PG_FREE_IF_COPY(vector, 0); |
| 91 | PG_RETURN_POINTER(entries); |
| 92 | } |
| 93 | |
| 94 | Datum |
| 95 | (PG_FUNCTION_ARGS) |
| 96 | { |
| 97 | TSQuery query = PG_GETARG_TSQUERY(0); |
| 98 | int32 *nentries = (int32 *) PG_GETARG_POINTER(1); |
| 99 | |
| 100 | /* StrategyNumber strategy = PG_GETARG_UINT16(2); */ |
| 101 | bool **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3); |
| 102 | Pointer ** = (Pointer **) PG_GETARG_POINTER(4); |
| 103 | |
| 104 | /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */ |
| 105 | int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); |
| 106 | Datum *entries = NULL; |
| 107 | |
| 108 | *nentries = 0; |
| 109 | |
| 110 | if (query->size > 0) |
| 111 | { |
| 112 | QueryItem *item = GETQUERY(query); |
| 113 | int32 i, |
| 114 | j; |
| 115 | bool *partialmatch; |
| 116 | int *map_item_operand; |
| 117 | |
| 118 | /* |
| 119 | * If the query doesn't have any required positive matches (for |
| 120 | * instance, it's something like '! foo'), we have to do a full index |
| 121 | * scan. |
| 122 | */ |
| 123 | if (tsquery_requires_match(item)) |
| 124 | *searchMode = GIN_SEARCH_MODE_DEFAULT; |
| 125 | else |
| 126 | *searchMode = GIN_SEARCH_MODE_ALL; |
| 127 | |
| 128 | /* count number of VAL items */ |
| 129 | j = 0; |
| 130 | for (i = 0; i < query->size; i++) |
| 131 | { |
| 132 | if (item[i].type == QI_VAL) |
| 133 | j++; |
| 134 | } |
| 135 | *nentries = j; |
| 136 | |
| 137 | entries = (Datum *) palloc(sizeof(Datum) * j); |
| 138 | partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j); |
| 139 | |
| 140 | /* |
| 141 | * Make map to convert item's number to corresponding operand's (the |
| 142 | * same, entry's) number. Entry's number is used in check array in |
| 143 | * consistent method. We use the same map for each entry. |
| 144 | */ |
| 145 | *extra_data = (Pointer *) palloc(sizeof(Pointer) * j); |
| 146 | map_item_operand = (int *) palloc0(sizeof(int) * query->size); |
| 147 | |
| 148 | /* Now rescan the VAL items and fill in the arrays */ |
| 149 | j = 0; |
| 150 | for (i = 0; i < query->size; i++) |
| 151 | { |
| 152 | if (item[i].type == QI_VAL) |
| 153 | { |
| 154 | QueryOperand *val = &item[i].qoperand; |
| 155 | text *txt; |
| 156 | |
| 157 | txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance, |
| 158 | val->length); |
| 159 | entries[j] = PointerGetDatum(txt); |
| 160 | partialmatch[j] = val->prefix; |
| 161 | (*extra_data)[j] = (Pointer) map_item_operand; |
| 162 | map_item_operand[i] = j; |
| 163 | j++; |
| 164 | } |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | PG_FREE_IF_COPY(query, 0); |
| 169 | |
| 170 | PG_RETURN_POINTER(entries); |
| 171 | } |
| 172 | |
| 173 | typedef struct |
| 174 | { |
| 175 | QueryItem *first_item; |
| 176 | GinTernaryValue *check; |
| 177 | int *map_item_operand; |
| 178 | bool *need_recheck; |
| 179 | } GinChkVal; |
| 180 | |
| 181 | static GinTernaryValue |
| 182 | checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *data) |
| 183 | { |
| 184 | int j; |
| 185 | |
| 186 | /* |
| 187 | * if any val requiring a weight is used or caller needs position |
| 188 | * information then set recheck flag |
| 189 | */ |
| 190 | if (val->weight != 0 || data != NULL) |
| 191 | *(gcv->need_recheck) = true; |
| 192 | |
| 193 | /* convert item's number to corresponding entry's (operand's) number */ |
| 194 | j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item]; |
| 195 | |
| 196 | /* return presence of current entry in indexed value */ |
| 197 | return gcv->check[j]; |
| 198 | } |
| 199 | |
| 200 | /* |
| 201 | * Wrapper of check condition function for TS_execute. |
| 202 | */ |
| 203 | static bool |
| 204 | checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data) |
| 205 | { |
| 206 | return checkcondition_gin_internal((GinChkVal *) checkval, |
| 207 | val, |
| 208 | data) != GIN_FALSE; |
| 209 | } |
| 210 | |
| 211 | /* |
| 212 | * Evaluate tsquery boolean expression using ternary logic. |
| 213 | */ |
| 214 | static GinTernaryValue |
| 215 | TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase) |
| 216 | { |
| 217 | GinTernaryValue val1, |
| 218 | val2, |
| 219 | result; |
| 220 | |
| 221 | /* since this function recurses, it could be driven to stack overflow */ |
| 222 | check_stack_depth(); |
| 223 | |
| 224 | if (curitem->type == QI_VAL) |
| 225 | return |
| 226 | checkcondition_gin_internal(gcv, |
| 227 | (QueryOperand *) curitem, |
| 228 | NULL /* don't have position info */ ); |
| 229 | |
| 230 | switch (curitem->qoperator.oper) |
| 231 | { |
| 232 | case OP_NOT: |
| 233 | /* In phrase search, always return MAYBE since we lack positions */ |
| 234 | if (in_phrase) |
| 235 | return GIN_MAYBE; |
| 236 | result = TS_execute_ternary(gcv, curitem + 1, in_phrase); |
| 237 | if (result == GIN_MAYBE) |
| 238 | return result; |
| 239 | return !result; |
| 240 | |
| 241 | case OP_PHRASE: |
| 242 | |
| 243 | /* |
| 244 | * GIN doesn't contain any information about positions, so treat |
| 245 | * OP_PHRASE as OP_AND with recheck requirement |
| 246 | */ |
| 247 | *(gcv->need_recheck) = true; |
| 248 | /* Pass down in_phrase == true in case there's a NOT below */ |
| 249 | in_phrase = true; |
| 250 | |
| 251 | /* FALL THRU */ |
| 252 | |
| 253 | case OP_AND: |
| 254 | val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left, |
| 255 | in_phrase); |
| 256 | if (val1 == GIN_FALSE) |
| 257 | return GIN_FALSE; |
| 258 | val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase); |
| 259 | if (val2 == GIN_FALSE) |
| 260 | return GIN_FALSE; |
| 261 | if (val1 == GIN_TRUE && val2 == GIN_TRUE) |
| 262 | return GIN_TRUE; |
| 263 | else |
| 264 | return GIN_MAYBE; |
| 265 | |
| 266 | case OP_OR: |
| 267 | val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left, |
| 268 | in_phrase); |
| 269 | if (val1 == GIN_TRUE) |
| 270 | return GIN_TRUE; |
| 271 | val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase); |
| 272 | if (val2 == GIN_TRUE) |
| 273 | return GIN_TRUE; |
| 274 | if (val1 == GIN_FALSE && val2 == GIN_FALSE) |
| 275 | return GIN_FALSE; |
| 276 | else |
| 277 | return GIN_MAYBE; |
| 278 | |
| 279 | default: |
| 280 | elog(ERROR, "unrecognized operator: %d" , curitem->qoperator.oper); |
| 281 | } |
| 282 | |
| 283 | /* not reachable, but keep compiler quiet */ |
| 284 | return false; |
| 285 | } |
| 286 | |
| 287 | Datum |
| 288 | gin_tsquery_consistent(PG_FUNCTION_ARGS) |
| 289 | { |
| 290 | bool *check = (bool *) PG_GETARG_POINTER(0); |
| 291 | |
| 292 | /* StrategyNumber strategy = PG_GETARG_UINT16(1); */ |
| 293 | TSQuery query = PG_GETARG_TSQUERY(2); |
| 294 | |
| 295 | /* int32 nkeys = PG_GETARG_INT32(3); */ |
| 296 | Pointer * = (Pointer *) PG_GETARG_POINTER(4); |
| 297 | bool *recheck = (bool *) PG_GETARG_POINTER(5); |
| 298 | bool res = false; |
| 299 | |
| 300 | /* Initially assume query doesn't require recheck */ |
| 301 | *recheck = false; |
| 302 | |
| 303 | if (query->size > 0) |
| 304 | { |
| 305 | GinChkVal gcv; |
| 306 | |
| 307 | /* |
| 308 | * check-parameter array has one entry for each value (operand) in the |
| 309 | * query. |
| 310 | */ |
| 311 | gcv.first_item = GETQUERY(query); |
| 312 | StaticAssertStmt(sizeof(GinTernaryValue) == sizeof(bool), |
| 313 | "sizes of GinTernaryValue and bool are not equal" ); |
| 314 | gcv.check = (GinTernaryValue *) check; |
| 315 | gcv.map_item_operand = (int *) (extra_data[0]); |
| 316 | gcv.need_recheck = recheck; |
| 317 | |
| 318 | res = TS_execute(GETQUERY(query), |
| 319 | &gcv, |
| 320 | TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_NO_POS, |
| 321 | checkcondition_gin); |
| 322 | } |
| 323 | |
| 324 | PG_RETURN_BOOL(res); |
| 325 | } |
| 326 | |
| 327 | Datum |
| 328 | gin_tsquery_triconsistent(PG_FUNCTION_ARGS) |
| 329 | { |
| 330 | GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); |
| 331 | |
| 332 | /* StrategyNumber strategy = PG_GETARG_UINT16(1); */ |
| 333 | TSQuery query = PG_GETARG_TSQUERY(2); |
| 334 | |
| 335 | /* int32 nkeys = PG_GETARG_INT32(3); */ |
| 336 | Pointer * = (Pointer *) PG_GETARG_POINTER(4); |
| 337 | GinTernaryValue res = GIN_FALSE; |
| 338 | bool recheck; |
| 339 | |
| 340 | /* Initially assume query doesn't require recheck */ |
| 341 | recheck = false; |
| 342 | |
| 343 | if (query->size > 0) |
| 344 | { |
| 345 | GinChkVal gcv; |
| 346 | |
| 347 | /* |
| 348 | * check-parameter array has one entry for each value (operand) in the |
| 349 | * query. |
| 350 | */ |
| 351 | gcv.first_item = GETQUERY(query); |
| 352 | gcv.check = check; |
| 353 | gcv.map_item_operand = (int *) (extra_data[0]); |
| 354 | gcv.need_recheck = &recheck; |
| 355 | |
| 356 | res = TS_execute_ternary(&gcv, GETQUERY(query), false); |
| 357 | |
| 358 | if (res == GIN_TRUE && recheck) |
| 359 | res = GIN_MAYBE; |
| 360 | } |
| 361 | |
| 362 | PG_RETURN_GIN_TERNARY_VALUE(res); |
| 363 | } |
| 364 | |
| 365 | /* |
| 366 | * Formerly, gin_extract_tsvector had only two arguments. Now it has three, |
| 367 | * but we still need a pg_proc entry with two args to support reloading |
| 368 | * pre-9.1 contrib/tsearch2 opclass declarations. This compatibility |
| 369 | * function should go away eventually. (Note: you might say "hey, but the |
| 370 | * code above is only *using* two args, so let's just declare it that way". |
| 371 | * If you try that you'll find the opr_sanity regression test complains.) |
| 372 | */ |
| 373 | Datum |
| 374 | (PG_FUNCTION_ARGS) |
| 375 | { |
| 376 | if (PG_NARGS() < 3) /* should not happen */ |
| 377 | elog(ERROR, "gin_extract_tsvector requires three arguments" ); |
| 378 | return gin_extract_tsvector(fcinfo); |
| 379 | } |
| 380 | |
| 381 | /* |
| 382 | * Likewise, we need a stub version of gin_extract_tsquery declared with |
| 383 | * only five arguments. |
| 384 | */ |
| 385 | Datum |
| 386 | (PG_FUNCTION_ARGS) |
| 387 | { |
| 388 | if (PG_NARGS() < 7) /* should not happen */ |
| 389 | elog(ERROR, "gin_extract_tsquery requires seven arguments" ); |
| 390 | return gin_extract_tsquery(fcinfo); |
| 391 | } |
| 392 | |
| 393 | /* |
| 394 | * Likewise, we need a stub version of gin_tsquery_consistent declared with |
| 395 | * only six arguments. |
| 396 | */ |
| 397 | Datum |
| 398 | gin_tsquery_consistent_6args(PG_FUNCTION_ARGS) |
| 399 | { |
| 400 | if (PG_NARGS() < 8) /* should not happen */ |
| 401 | elog(ERROR, "gin_tsquery_consistent requires eight arguments" ); |
| 402 | return gin_tsquery_consistent(fcinfo); |
| 403 | } |
| 404 | |
| 405 | /* |
| 406 | * Likewise, a stub version of gin_extract_tsquery declared with argument |
| 407 | * types that are no longer considered appropriate. |
| 408 | */ |
| 409 | Datum |
| 410 | (PG_FUNCTION_ARGS) |
| 411 | { |
| 412 | return gin_extract_tsquery(fcinfo); |
| 413 | } |
| 414 | |
| 415 | /* |
| 416 | * Likewise, a stub version of gin_tsquery_consistent declared with argument |
| 417 | * types that are no longer considered appropriate. |
| 418 | */ |
| 419 | Datum |
| 420 | gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS) |
| 421 | { |
| 422 | return gin_tsquery_consistent(fcinfo); |
| 423 | } |
| 424 | |