1 | |
2 | |
3 | #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) |
4 | |
5 | #if !defined(NDEBUG) && !defined(SQLITE_DEBUG) |
6 | # define NDEBUG 1 |
7 | #endif |
8 | #if defined(NDEBUG) && defined(SQLITE_DEBUG) |
9 | # undef NDEBUG |
10 | #endif |
11 | |
12 | #line 1 "fts5.h" |
13 | /* |
14 | ** 2014 May 31 |
15 | ** |
16 | ** The author disclaims copyright to this source code. In place of |
17 | ** a legal notice, here is a blessing: |
18 | ** |
19 | ** May you do good and not evil. |
20 | ** May you find forgiveness for yourself and forgive others. |
21 | ** May you share freely, never taking more than you give. |
22 | ** |
23 | ****************************************************************************** |
24 | ** |
25 | ** Interfaces to extend FTS5. Using the interfaces defined in this file, |
26 | ** FTS5 may be extended with: |
27 | ** |
28 | ** * custom tokenizers, and |
29 | ** * custom auxiliary functions. |
30 | */ |
31 | |
32 | |
33 | #ifndef _FTS5_H |
34 | #define _FTS5_H |
35 | |
36 | #include "sqlite3.h" |
37 | |
38 | #ifdef __cplusplus |
39 | extern "C" { |
40 | #endif |
41 | |
42 | /************************************************************************* |
43 | ** CUSTOM AUXILIARY FUNCTIONS |
44 | ** |
45 | ** Virtual table implementations may overload SQL functions by implementing |
46 | ** the sqlite3_module.xFindFunction() method. |
47 | */ |
48 | |
49 | typedef struct Fts5ExtensionApi Fts5ExtensionApi; |
50 | typedef struct Fts5Context Fts5Context; |
51 | typedef struct Fts5PhraseIter Fts5PhraseIter; |
52 | |
53 | typedef void (*fts5_extension_function)( |
54 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ |
55 | Fts5Context *pFts, /* First arg to pass to pApi functions */ |
56 | sqlite3_context *pCtx, /* Context for returning result/error */ |
57 | int nVal, /* Number of values in apVal[] array */ |
58 | sqlite3_value **apVal /* Array of trailing arguments */ |
59 | ); |
60 | |
61 | struct Fts5PhraseIter { |
62 | const unsigned char *a; |
63 | const unsigned char *b; |
64 | }; |
65 | |
66 | /* |
67 | ** EXTENSION API FUNCTIONS |
68 | ** |
69 | ** xUserData(pFts): |
70 | ** Return a copy of the context pointer the extension function was |
71 | ** registered with. |
72 | ** |
73 | ** xColumnTotalSize(pFts, iCol, pnToken): |
74 | ** If parameter iCol is less than zero, set output variable *pnToken |
75 | ** to the total number of tokens in the FTS5 table. Or, if iCol is |
76 | ** non-negative but less than the number of columns in the table, return |
77 | ** the total number of tokens in column iCol, considering all rows in |
78 | ** the FTS5 table. |
79 | ** |
80 | ** If parameter iCol is greater than or equal to the number of columns |
81 | ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. |
82 | ** an OOM condition or IO error), an appropriate SQLite error code is |
83 | ** returned. |
84 | ** |
85 | ** xColumnCount(pFts): |
86 | ** Return the number of columns in the table. |
87 | ** |
88 | ** xColumnSize(pFts, iCol, pnToken): |
89 | ** If parameter iCol is less than zero, set output variable *pnToken |
90 | ** to the total number of tokens in the current row. Or, if iCol is |
91 | ** non-negative but less than the number of columns in the table, set |
92 | ** *pnToken to the number of tokens in column iCol of the current row. |
93 | ** |
94 | ** If parameter iCol is greater than or equal to the number of columns |
95 | ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. |
96 | ** an OOM condition or IO error), an appropriate SQLite error code is |
97 | ** returned. |
98 | ** |
99 | ** This function may be quite inefficient if used with an FTS5 table |
100 | ** created with the "columnsize=0" option. |
101 | ** |
102 | ** xColumnText: |
103 | ** This function attempts to retrieve the text of column iCol of the |
104 | ** current document. If successful, (*pz) is set to point to a buffer |
105 | ** containing the text in utf-8 encoding, (*pn) is set to the size in bytes |
106 | ** (not characters) of the buffer and SQLITE_OK is returned. Otherwise, |
107 | ** if an error occurs, an SQLite error code is returned and the final values |
108 | ** of (*pz) and (*pn) are undefined. |
109 | ** |
110 | ** xPhraseCount: |
111 | ** Returns the number of phrases in the current query expression. |
112 | ** |
113 | ** xPhraseSize: |
114 | ** Returns the number of tokens in phrase iPhrase of the query. Phrases |
115 | ** are numbered starting from zero. |
116 | ** |
117 | ** xInstCount: |
118 | ** Set *pnInst to the total number of occurrences of all phrases within |
119 | ** the query within the current row. Return SQLITE_OK if successful, or |
120 | ** an error code (i.e. SQLITE_NOMEM) if an error occurs. |
121 | ** |
122 | ** This API can be quite slow if used with an FTS5 table created with the |
123 | ** "detail=none" or "detail=column" option. If the FTS5 table is created |
124 | ** with either "detail=none" or "detail=column" and "content=" option |
125 | ** (i.e. if it is a contentless table), then this API always returns 0. |
126 | ** |
127 | ** xInst: |
128 | ** Query for the details of phrase match iIdx within the current row. |
129 | ** Phrase matches are numbered starting from zero, so the iIdx argument |
130 | ** should be greater than or equal to zero and smaller than the value |
131 | ** output by xInstCount(). |
132 | ** |
133 | ** Usually, output parameter *piPhrase is set to the phrase number, *piCol |
134 | ** to the column in which it occurs and *piOff the token offset of the |
135 | ** first token of the phrase. Returns SQLITE_OK if successful, or an error |
136 | ** code (i.e. SQLITE_NOMEM) if an error occurs. |
137 | ** |
138 | ** This API can be quite slow if used with an FTS5 table created with the |
139 | ** "detail=none" or "detail=column" option. |
140 | ** |
141 | ** xRowid: |
142 | ** Returns the rowid of the current row. |
143 | ** |
144 | ** xTokenize: |
145 | ** Tokenize text using the tokenizer belonging to the FTS5 table. |
146 | ** |
147 | ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback): |
148 | ** This API function is used to query the FTS table for phrase iPhrase |
149 | ** of the current query. Specifically, a query equivalent to: |
150 | ** |
151 | ** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid |
152 | ** |
153 | ** with $p set to a phrase equivalent to the phrase iPhrase of the |
154 | ** current query is executed. Any column filter that applies to |
155 | ** phrase iPhrase of the current query is included in $p. For each |
156 | ** row visited, the callback function passed as the fourth argument |
157 | ** is invoked. The context and API objects passed to the callback |
158 | ** function may be used to access the properties of each matched row. |
159 | ** Invoking Api.xUserData() returns a copy of the pointer passed as |
160 | ** the third argument to pUserData. |
161 | ** |
162 | ** If the callback function returns any value other than SQLITE_OK, the |
163 | ** query is abandoned and the xQueryPhrase function returns immediately. |
164 | ** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK. |
165 | ** Otherwise, the error code is propagated upwards. |
166 | ** |
167 | ** If the query runs to completion without incident, SQLITE_OK is returned. |
168 | ** Or, if some error occurs before the query completes or is aborted by |
169 | ** the callback, an SQLite error code is returned. |
170 | ** |
171 | ** |
172 | ** xSetAuxdata(pFts5, pAux, xDelete) |
173 | ** |
174 | ** Save the pointer passed as the second argument as the extension function's |
175 | ** "auxiliary data". The pointer may then be retrieved by the current or any |
176 | ** future invocation of the same fts5 extension function made as part of |
177 | ** the same MATCH query using the xGetAuxdata() API. |
178 | ** |
179 | ** Each extension function is allocated a single auxiliary data slot for |
180 | ** each FTS query (MATCH expression). If the extension function is invoked |
181 | ** more than once for a single FTS query, then all invocations share a |
182 | ** single auxiliary data context. |
183 | ** |
184 | ** If there is already an auxiliary data pointer when this function is |
185 | ** invoked, then it is replaced by the new pointer. If an xDelete callback |
186 | ** was specified along with the original pointer, it is invoked at this |
187 | ** point. |
188 | ** |
189 | ** The xDelete callback, if one is specified, is also invoked on the |
190 | ** auxiliary data pointer after the FTS5 query has finished. |
191 | ** |
192 | ** If an error (e.g. an OOM condition) occurs within this function, |
193 | ** the auxiliary data is set to NULL and an error code returned. If the |
194 | ** xDelete parameter was not NULL, it is invoked on the auxiliary data |
195 | ** pointer before returning. |
196 | ** |
197 | ** |
198 | ** xGetAuxdata(pFts5, bClear) |
199 | ** |
200 | ** Returns the current auxiliary data pointer for the fts5 extension |
201 | ** function. See the xSetAuxdata() method for details. |
202 | ** |
203 | ** If the bClear argument is non-zero, then the auxiliary data is cleared |
204 | ** (set to NULL) before this function returns. In this case the xDelete, |
205 | ** if any, is not invoked. |
206 | ** |
207 | ** |
208 | ** xRowCount(pFts5, pnRow) |
209 | ** |
210 | ** This function is used to retrieve the total number of rows in the table. |
211 | ** In other words, the same value that would be returned by: |
212 | ** |
213 | ** SELECT count(*) FROM ftstable; |
214 | ** |
215 | ** xPhraseFirst() |
216 | ** This function is used, along with type Fts5PhraseIter and the xPhraseNext |
217 | ** method, to iterate through all instances of a single query phrase within |
218 | ** the current row. This is the same information as is accessible via the |
219 | ** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient |
220 | ** to use, this API may be faster under some circumstances. To iterate |
221 | ** through instances of phrase iPhrase, use the following code: |
222 | ** |
223 | ** Fts5PhraseIter iter; |
224 | ** int iCol, iOff; |
225 | ** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff); |
226 | ** iCol>=0; |
227 | ** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff) |
228 | ** ){ |
229 | ** // An instance of phrase iPhrase at offset iOff of column iCol |
230 | ** } |
231 | ** |
232 | ** The Fts5PhraseIter structure is defined above. Applications should not |
233 | ** modify this structure directly - it should only be used as shown above |
234 | ** with the xPhraseFirst() and xPhraseNext() API methods (and by |
235 | ** xPhraseFirstColumn() and xPhraseNextColumn() as illustrated below). |
236 | ** |
237 | ** This API can be quite slow if used with an FTS5 table created with the |
238 | ** "detail=none" or "detail=column" option. If the FTS5 table is created |
239 | ** with either "detail=none" or "detail=column" and "content=" option |
240 | ** (i.e. if it is a contentless table), then this API always iterates |
241 | ** through an empty set (all calls to xPhraseFirst() set iCol to -1). |
242 | ** |
243 | ** xPhraseNext() |
244 | ** See xPhraseFirst above. |
245 | ** |
246 | ** xPhraseFirstColumn() |
247 | ** This function and xPhraseNextColumn() are similar to the xPhraseFirst() |
248 | ** and xPhraseNext() APIs described above. The difference is that instead |
249 | ** of iterating through all instances of a phrase in the current row, these |
250 | ** APIs are used to iterate through the set of columns in the current row |
251 | ** that contain one or more instances of a specified phrase. For example: |
252 | ** |
253 | ** Fts5PhraseIter iter; |
254 | ** int iCol; |
255 | ** for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol); |
256 | ** iCol>=0; |
257 | ** pApi->xPhraseNextColumn(pFts, &iter, &iCol) |
258 | ** ){ |
259 | ** // Column iCol contains at least one instance of phrase iPhrase |
260 | ** } |
261 | ** |
262 | ** This API can be quite slow if used with an FTS5 table created with the |
263 | ** "detail=none" option. If the FTS5 table is created with either |
264 | ** "detail=none" "content=" option (i.e. if it is a contentless table), |
265 | ** then this API always iterates through an empty set (all calls to |
266 | ** xPhraseFirstColumn() set iCol to -1). |
267 | ** |
268 | ** The information accessed using this API and its companion |
269 | ** xPhraseFirstColumn() may also be obtained using xPhraseFirst/xPhraseNext |
270 | ** (or xInst/xInstCount). The chief advantage of this API is that it is |
271 | ** significantly more efficient than those alternatives when used with |
272 | ** "detail=column" tables. |
273 | ** |
274 | ** xPhraseNextColumn() |
275 | ** See xPhraseFirstColumn above. |
276 | */ |
277 | struct Fts5ExtensionApi { |
278 | int iVersion; /* Currently always set to 3 */ |
279 | |
280 | void *(*xUserData)(Fts5Context*); |
281 | |
282 | int (*xColumnCount)(Fts5Context*); |
283 | int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow); |
284 | int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken); |
285 | |
286 | int (*xTokenize)(Fts5Context*, |
287 | const char *pText, int nText, /* Text to tokenize */ |
288 | void *pCtx, /* Context passed to xToken() */ |
289 | int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ |
290 | ); |
291 | |
292 | int (*xPhraseCount)(Fts5Context*); |
293 | int (*xPhraseSize)(Fts5Context*, int iPhrase); |
294 | |
295 | int (*xInstCount)(Fts5Context*, int *pnInst); |
296 | int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff); |
297 | |
298 | sqlite3_int64 (*xRowid)(Fts5Context*); |
299 | int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn); |
300 | int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken); |
301 | |
302 | int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData, |
303 | int(*)(const Fts5ExtensionApi*,Fts5Context*,void*) |
304 | ); |
305 | int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*)); |
306 | void *(*xGetAuxdata)(Fts5Context*, int bClear); |
307 | |
308 | int (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*); |
309 | void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff); |
310 | |
311 | int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*); |
312 | void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol); |
313 | }; |
314 | |
315 | /* |
316 | ** CUSTOM AUXILIARY FUNCTIONS |
317 | *************************************************************************/ |
318 | |
319 | /************************************************************************* |
320 | ** CUSTOM TOKENIZERS |
321 | ** |
322 | ** Applications may also register custom tokenizer types. A tokenizer |
323 | ** is registered by providing fts5 with a populated instance of the |
324 | ** following structure. All structure methods must be defined, setting |
325 | ** any member of the fts5_tokenizer struct to NULL leads to undefined |
326 | ** behaviour. The structure methods are expected to function as follows: |
327 | ** |
328 | ** xCreate: |
329 | ** This function is used to allocate and initialize a tokenizer instance. |
330 | ** A tokenizer instance is required to actually tokenize text. |
331 | ** |
332 | ** The first argument passed to this function is a copy of the (void*) |
333 | ** pointer provided by the application when the fts5_tokenizer object |
334 | ** was registered with FTS5 (the third argument to xCreateTokenizer()). |
335 | ** The second and third arguments are an array of nul-terminated strings |
336 | ** containing the tokenizer arguments, if any, specified following the |
337 | ** tokenizer name as part of the CREATE VIRTUAL TABLE statement used |
338 | ** to create the FTS5 table. |
339 | ** |
340 | ** The final argument is an output variable. If successful, (*ppOut) |
341 | ** should be set to point to the new tokenizer handle and SQLITE_OK |
342 | ** returned. If an error occurs, some value other than SQLITE_OK should |
343 | ** be returned. In this case, fts5 assumes that the final value of *ppOut |
344 | ** is undefined. |
345 | ** |
346 | ** xDelete: |
347 | ** This function is invoked to delete a tokenizer handle previously |
348 | ** allocated using xCreate(). Fts5 guarantees that this function will |
349 | ** be invoked exactly once for each successful call to xCreate(). |
350 | ** |
351 | ** xTokenize: |
352 | ** This function is expected to tokenize the nText byte string indicated |
353 | ** by argument pText. pText may or may not be nul-terminated. The first |
354 | ** argument passed to this function is a pointer to an Fts5Tokenizer object |
355 | ** returned by an earlier call to xCreate(). |
356 | ** |
357 | ** The second argument indicates the reason that FTS5 is requesting |
358 | ** tokenization of the supplied text. This is always one of the following |
359 | ** four values: |
360 | ** |
361 | ** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into |
362 | ** or removed from the FTS table. The tokenizer is being invoked to |
363 | ** determine the set of tokens to add to (or delete from) the |
364 | ** FTS index. |
365 | ** |
366 | ** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed |
367 | ** against the FTS index. The tokenizer is being called to tokenize |
368 | ** a bareword or quoted string specified as part of the query. |
369 | ** |
370 | ** <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as |
371 | ** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is |
372 | ** followed by a "*" character, indicating that the last token |
373 | ** returned by the tokenizer will be treated as a token prefix. |
374 | ** |
375 | ** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to |
376 | ** satisfy an fts5_api.xTokenize() request made by an auxiliary |
377 | ** function. Or an fts5_api.xColumnSize() request made by the same |
378 | ** on a columnsize=0 database. |
379 | ** </ul> |
380 | ** |
381 | ** For each token in the input string, the supplied callback xToken() must |
382 | ** be invoked. The first argument to it should be a copy of the pointer |
383 | ** passed as the second argument to xTokenize(). The third and fourth |
384 | ** arguments are a pointer to a buffer containing the token text, and the |
385 | ** size of the token in bytes. The 4th and 5th arguments are the byte offsets |
386 | ** of the first byte of and first byte immediately following the text from |
387 | ** which the token is derived within the input. |
388 | ** |
389 | ** The second argument passed to the xToken() callback ("tflags") should |
390 | ** normally be set to 0. The exception is if the tokenizer supports |
391 | ** synonyms. In this case see the discussion below for details. |
392 | ** |
393 | ** FTS5 assumes the xToken() callback is invoked for each token in the |
394 | ** order that they occur within the input text. |
395 | ** |
396 | ** If an xToken() callback returns any value other than SQLITE_OK, then |
397 | ** the tokenization should be abandoned and the xTokenize() method should |
398 | ** immediately return a copy of the xToken() return value. Or, if the |
399 | ** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally, |
400 | ** if an error occurs with the xTokenize() implementation itself, it |
401 | ** may abandon the tokenization and return any error code other than |
402 | ** SQLITE_OK or SQLITE_DONE. |
403 | ** |
404 | ** SYNONYM SUPPORT |
405 | ** |
406 | ** Custom tokenizers may also support synonyms. Consider a case in which a |
407 | ** user wishes to query for a phrase such as "first place". Using the |
408 | ** built-in tokenizers, the FTS5 query 'first + place' will match instances |
409 | ** of "first place" within the document set, but not alternative forms |
410 | ** such as "1st place". In some applications, it would be better to match |
411 | ** all instances of "first place" or "1st place" regardless of which form |
412 | ** the user specified in the MATCH query text. |
413 | ** |
414 | ** There are several ways to approach this in FTS5: |
415 | ** |
416 | ** <ol><li> By mapping all synonyms to a single token. In this case, using |
417 | ** the above example, this means that the tokenizer returns the |
418 | ** same token for inputs "first" and "1st". Say that token is in |
419 | ** fact "first", so that when the user inserts the document "I won |
420 | ** 1st place" entries are added to the index for tokens "i", "won", |
421 | ** "first" and "place". If the user then queries for '1st + place', |
422 | ** the tokenizer substitutes "first" for "1st" and the query works |
423 | ** as expected. |
424 | ** |
425 | ** <li> By querying the index for all synonyms of each query term |
426 | ** separately. In this case, when tokenizing query text, the |
427 | ** tokenizer may provide multiple synonyms for a single term |
428 | ** within the document. FTS5 then queries the index for each |
429 | ** synonym individually. For example, faced with the query: |
430 | ** |
431 | ** <codeblock> |
432 | ** ... MATCH 'first place'</codeblock> |
433 | ** |
434 | ** the tokenizer offers both "1st" and "first" as synonyms for the |
435 | ** first token in the MATCH query and FTS5 effectively runs a query |
436 | ** similar to: |
437 | ** |
438 | ** <codeblock> |
439 | ** ... MATCH '(first OR 1st) place'</codeblock> |
440 | ** |
441 | ** except that, for the purposes of auxiliary functions, the query |
442 | ** still appears to contain just two phrases - "(first OR 1st)" |
443 | ** being treated as a single phrase. |
444 | ** |
445 | ** <li> By adding multiple synonyms for a single term to the FTS index. |
446 | ** Using this method, when tokenizing document text, the tokenizer |
447 | ** provides multiple synonyms for each token. So that when a |
448 | ** document such as "I won first place" is tokenized, entries are |
449 | ** added to the FTS index for "i", "won", "first", "1st" and |
450 | ** "place". |
451 | ** |
452 | ** This way, even if the tokenizer does not provide synonyms |
453 | ** when tokenizing query text (it should not - to do so would be |
454 | ** inefficient), it doesn't matter if the user queries for |
455 | ** 'first + place' or '1st + place', as there are entries in the |
456 | ** FTS index corresponding to both forms of the first token. |
457 | ** </ol> |
458 | ** |
459 | ** Whether it is parsing document or query text, any call to xToken that |
460 | ** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit |
461 | ** is considered to supply a synonym for the previous token. For example, |
462 | ** when parsing the document "I won first place", a tokenizer that supports |
463 | ** synonyms would call xToken() 5 times, as follows: |
464 | ** |
465 | ** <codeblock> |
466 | ** xToken(pCtx, 0, "i", 1, 0, 1); |
467 | ** xToken(pCtx, 0, "won", 3, 2, 5); |
468 | ** xToken(pCtx, 0, "first", 5, 6, 11); |
469 | ** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11); |
470 | ** xToken(pCtx, 0, "place", 5, 12, 17); |
471 | **</codeblock> |
472 | ** |
473 | ** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time |
474 | ** xToken() is called. Multiple synonyms may be specified for a single token |
475 | ** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence. |
476 | ** There is no limit to the number of synonyms that may be provided for a |
477 | ** single token. |
478 | ** |
479 | ** In many cases, method (1) above is the best approach. It does not add |
480 | ** extra data to the FTS index or require FTS5 to query for multiple terms, |
481 | ** so it is efficient in terms of disk space and query speed. However, it |
482 | ** does not support prefix queries very well. If, as suggested above, the |
483 | ** token "first" is substituted for "1st" by the tokenizer, then the query: |
484 | ** |
485 | ** <codeblock> |
486 | ** ... MATCH '1s*'</codeblock> |
487 | ** |
488 | ** will not match documents that contain the token "1st" (as the tokenizer |
489 | ** will probably not map "1s" to any prefix of "first"). |
490 | ** |
491 | ** For full prefix support, method (3) may be preferred. In this case, |
492 | ** because the index contains entries for both "first" and "1st", prefix |
493 | ** queries such as 'fi*' or '1s*' will match correctly. However, because |
494 | ** extra entries are added to the FTS index, this method uses more space |
495 | ** within the database. |
496 | ** |
497 | ** Method (2) offers a midpoint between (1) and (3). Using this method, |
498 | ** a query such as '1s*' will match documents that contain the literal |
499 | ** token "1st", but not "first" (assuming the tokenizer is not able to |
500 | ** provide synonyms for prefixes). However, a non-prefix query like '1st' |
501 | ** will match against "1st" and "first". This method does not require |
502 | ** extra disk space, as no extra entries are added to the FTS index. |
503 | ** On the other hand, it may require more CPU cycles to run MATCH queries, |
504 | ** as separate queries of the FTS index are required for each synonym. |
505 | ** |
506 | ** When using methods (2) or (3), it is important that the tokenizer only |
507 | ** provide synonyms when tokenizing document text (method (2)) or query |
508 | ** text (method (3)), not both. Doing so will not cause any errors, but is |
509 | ** inefficient. |
510 | */ |
511 | typedef struct Fts5Tokenizer Fts5Tokenizer; |
512 | typedef struct fts5_tokenizer fts5_tokenizer; |
513 | struct fts5_tokenizer { |
514 | int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); |
515 | void (*xDelete)(Fts5Tokenizer*); |
516 | int (*xTokenize)(Fts5Tokenizer*, |
517 | void *pCtx, |
518 | int flags, /* Mask of FTS5_TOKENIZE_* flags */ |
519 | const char *pText, int nText, |
520 | int (*xToken)( |
521 | void *pCtx, /* Copy of 2nd argument to xTokenize() */ |
522 | int tflags, /* Mask of FTS5_TOKEN_* flags */ |
523 | const char *pToken, /* Pointer to buffer containing token */ |
524 | int nToken, /* Size of token in bytes */ |
525 | int iStart, /* Byte offset of token within input text */ |
526 | int iEnd /* Byte offset of end of token within input text */ |
527 | ) |
528 | ); |
529 | }; |
530 | |
531 | /* Flags that may be passed as the third argument to xTokenize() */ |
532 | #define FTS5_TOKENIZE_QUERY 0x0001 |
533 | #define FTS5_TOKENIZE_PREFIX 0x0002 |
534 | #define FTS5_TOKENIZE_DOCUMENT 0x0004 |
535 | #define FTS5_TOKENIZE_AUX 0x0008 |
536 | |
537 | /* Flags that may be passed by the tokenizer implementation back to FTS5 |
538 | ** as the third argument to the supplied xToken callback. */ |
539 | #define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */ |
540 | |
541 | /* |
542 | ** END OF CUSTOM TOKENIZERS |
543 | *************************************************************************/ |
544 | |
545 | /************************************************************************* |
546 | ** FTS5 EXTENSION REGISTRATION API |
547 | */ |
548 | typedef struct fts5_api fts5_api; |
549 | struct fts5_api { |
550 | int iVersion; /* Currently always set to 2 */ |
551 | |
552 | /* Create a new tokenizer */ |
553 | int (*xCreateTokenizer)( |
554 | fts5_api *pApi, |
555 | const char *zName, |
556 | void *pContext, |
557 | fts5_tokenizer *pTokenizer, |
558 | void (*xDestroy)(void*) |
559 | ); |
560 | |
561 | /* Find an existing tokenizer */ |
562 | int (*xFindTokenizer)( |
563 | fts5_api *pApi, |
564 | const char *zName, |
565 | void **ppContext, |
566 | fts5_tokenizer *pTokenizer |
567 | ); |
568 | |
569 | /* Create a new auxiliary function */ |
570 | int (*xCreateFunction)( |
571 | fts5_api *pApi, |
572 | const char *zName, |
573 | void *pContext, |
574 | fts5_extension_function xFunction, |
575 | void (*xDestroy)(void*) |
576 | ); |
577 | }; |
578 | |
579 | /* |
580 | ** END OF REGISTRATION API |
581 | *************************************************************************/ |
582 | |
583 | #ifdef __cplusplus |
584 | } /* end of the 'extern "C"' block */ |
585 | #endif |
586 | |
587 | #endif /* _FTS5_H */ |
588 | |
589 | #line 1 "fts5Int.h" |
590 | /* |
591 | ** 2014 May 31 |
592 | ** |
593 | ** The author disclaims copyright to this source code. In place of |
594 | ** a legal notice, here is a blessing: |
595 | ** |
596 | ** May you do good and not evil. |
597 | ** May you find forgiveness for yourself and forgive others. |
598 | ** May you share freely, never taking more than you give. |
599 | ** |
600 | ****************************************************************************** |
601 | ** |
602 | */ |
603 | #ifndef _FTS5INT_H |
604 | #define _FTS5INT_H |
605 | |
606 | /* #include "fts5.h" */ |
607 | #include "sqlite3ext.h" |
608 | SQLITE_EXTENSION_INIT1 |
609 | |
610 | #include <string.h> |
611 | #include <assert.h> |
612 | |
613 | #ifndef SQLITE_AMALGAMATION |
614 | |
615 | typedef unsigned char u8; |
616 | typedef unsigned int u32; |
617 | typedef unsigned short u16; |
618 | typedef short i16; |
619 | typedef sqlite3_int64 i64; |
620 | typedef sqlite3_uint64 u64; |
621 | |
622 | #ifndef ArraySize |
623 | # define ArraySize(x) ((int)(sizeof(x) / sizeof(x[0]))) |
624 | #endif |
625 | |
626 | #define testcase(x) |
627 | |
628 | #if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_MUTATION_TEST) |
629 | # define SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS 1 |
630 | #endif |
631 | #if defined(SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS) |
632 | # define ALWAYS(X) (1) |
633 | # define NEVER(X) (0) |
634 | #elif !defined(NDEBUG) |
635 | # define ALWAYS(X) ((X)?1:(assert(0),0)) |
636 | # define NEVER(X) ((X)?(assert(0),1):0) |
637 | #else |
638 | # define ALWAYS(X) (X) |
639 | # define NEVER(X) (X) |
640 | #endif |
641 | |
642 | #define MIN(x,y) (((x) < (y)) ? (x) : (y)) |
643 | #define MAX(x,y) (((x) > (y)) ? (x) : (y)) |
644 | |
645 | /* |
646 | ** Constants for the largest and smallest possible 64-bit signed integers. |
647 | */ |
648 | # define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) |
649 | # define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64) |
650 | |
651 | #endif |
652 | |
653 | /* Truncate very long tokens to this many bytes. Hard limit is |
654 | ** (65536-1-1-4-9)==65521 bytes. The limiting factor is the 16-bit offset |
655 | ** field that occurs at the start of each leaf page (see fts5_index.c). */ |
656 | #define FTS5_MAX_TOKEN_SIZE 32768 |
657 | |
658 | /* |
659 | ** Maximum number of prefix indexes on single FTS5 table. This must be |
660 | ** less than 32. If it is set to anything large than that, an #error |
661 | ** directive in fts5_index.c will cause the build to fail. |
662 | */ |
663 | #define FTS5_MAX_PREFIX_INDEXES 31 |
664 | |
665 | /* |
666 | ** Maximum segments permitted in a single index |
667 | */ |
668 | #define FTS5_MAX_SEGMENT 2000 |
669 | |
670 | #define FTS5_DEFAULT_NEARDIST 10 |
671 | #define FTS5_DEFAULT_RANK "bm25" |
672 | |
673 | /* Name of rank and rowid columns */ |
674 | #define FTS5_RANK_NAME "rank" |
675 | #define FTS5_ROWID_NAME "rowid" |
676 | |
677 | #ifdef SQLITE_DEBUG |
678 | # define FTS5_CORRUPT sqlite3Fts5Corrupt() |
679 | static int sqlite3Fts5Corrupt(void); |
680 | #else |
681 | # define FTS5_CORRUPT SQLITE_CORRUPT_VTAB |
682 | #endif |
683 | |
684 | /* |
685 | ** The assert_nc() macro is similar to the assert() macro, except that it |
686 | ** is used for assert() conditions that are true only if it can be |
687 | ** guranteed that the database is not corrupt. |
688 | */ |
689 | #ifdef SQLITE_DEBUG |
690 | extern int sqlite3_fts5_may_be_corrupt; |
691 | # define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x)) |
692 | #else |
693 | # define assert_nc(x) assert(x) |
694 | #endif |
695 | |
696 | /* |
697 | ** A version of memcmp() that does not cause asan errors if one of the pointer |
698 | ** parameters is NULL and the number of bytes to compare is zero. |
699 | */ |
700 | #define fts5Memcmp(s1, s2, n) ((n)<=0 ? 0 : memcmp((s1), (s2), (n))) |
701 | |
702 | /* Mark a function parameter as unused, to suppress nuisance compiler |
703 | ** warnings. */ |
704 | #ifndef UNUSED_PARAM |
705 | # define UNUSED_PARAM(X) (void)(X) |
706 | #endif |
707 | |
708 | #ifndef UNUSED_PARAM2 |
709 | # define UNUSED_PARAM2(X, Y) (void)(X), (void)(Y) |
710 | #endif |
711 | |
712 | typedef struct Fts5Global Fts5Global; |
713 | typedef struct Fts5Colset Fts5Colset; |
714 | |
715 | /* If a NEAR() clump or phrase may only match a specific set of columns, |
716 | ** then an object of the following type is used to record the set of columns. |
717 | ** Each entry in the aiCol[] array is a column that may be matched. |
718 | ** |
719 | ** This object is used by fts5_expr.c and fts5_index.c. |
720 | */ |
721 | struct Fts5Colset { |
722 | int nCol; |
723 | int aiCol[1]; |
724 | }; |
725 | |
726 | |
727 | |
728 | /************************************************************************** |
729 | ** Interface to code in fts5_config.c. fts5_config.c contains contains code |
730 | ** to parse the arguments passed to the CREATE VIRTUAL TABLE statement. |
731 | */ |
732 | |
733 | typedef struct Fts5Config Fts5Config; |
734 | |
735 | /* |
736 | ** An instance of the following structure encodes all information that can |
737 | ** be gleaned from the CREATE VIRTUAL TABLE statement. |
738 | ** |
739 | ** And all information loaded from the %_config table. |
740 | ** |
741 | ** nAutomerge: |
742 | ** The minimum number of segments that an auto-merge operation should |
743 | ** attempt to merge together. A value of 1 sets the object to use the |
744 | ** compile time default. Zero disables auto-merge altogether. |
745 | ** |
746 | ** zContent: |
747 | ** |
748 | ** zContentRowid: |
749 | ** The value of the content_rowid= option, if one was specified. Or |
750 | ** the string "rowid" otherwise. This text is not quoted - if it is |
751 | ** used as part of an SQL statement it needs to be quoted appropriately. |
752 | ** |
753 | ** zContentExprlist: |
754 | ** |
755 | ** pzErrmsg: |
756 | ** This exists in order to allow the fts5_index.c module to return a |
757 | ** decent error message if it encounters a file-format version it does |
758 | ** not understand. |
759 | ** |
760 | ** bColumnsize: |
761 | ** True if the %_docsize table is created. |
762 | ** |
763 | ** bPrefixIndex: |
764 | ** This is only used for debugging. If set to false, any prefix indexes |
765 | ** are ignored. This value is configured using: |
766 | ** |
767 | ** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex); |
768 | ** |
769 | */ |
770 | struct Fts5Config { |
771 | sqlite3 *db; /* Database handle */ |
772 | char *zDb; /* Database holding FTS index (e.g. "main") */ |
773 | char *zName; /* Name of FTS index */ |
774 | int nCol; /* Number of columns */ |
775 | char **azCol; /* Column names */ |
776 | u8 *abUnindexed; /* True for unindexed columns */ |
777 | int nPrefix; /* Number of prefix indexes */ |
778 | int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ |
779 | int eContent; /* An FTS5_CONTENT value */ |
780 | char *zContent; /* content table */ |
781 | char *zContentRowid; /* "content_rowid=" option value */ |
782 | int bColumnsize; /* "columnsize=" option value (dflt==1) */ |
783 | int eDetail; /* FTS5_DETAIL_XXX value */ |
784 | char *zContentExprlist; |
785 | Fts5Tokenizer *pTok; |
786 | fts5_tokenizer *pTokApi; |
787 | int bLock; /* True when table is preparing statement */ |
788 | int ePattern; /* FTS_PATTERN_XXX constant */ |
789 | |
790 | /* Values loaded from the %_config table */ |
791 | int iCookie; /* Incremented when %_config is modified */ |
792 | int pgsz; /* Approximate page size used in %_data */ |
793 | int nAutomerge; /* 'automerge' setting */ |
794 | int nCrisisMerge; /* Maximum allowed segments per level */ |
795 | int nUsermerge; /* 'usermerge' setting */ |
796 | int nHashSize; /* Bytes of memory for in-memory hash */ |
797 | char *zRank; /* Name of rank function */ |
798 | char *zRankArgs; /* Arguments to rank function */ |
799 | |
800 | /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */ |
801 | char **pzErrmsg; |
802 | |
803 | #ifdef SQLITE_DEBUG |
804 | int bPrefixIndex; /* True to use prefix-indexes */ |
805 | #endif |
806 | }; |
807 | |
808 | /* Current expected value of %_config table 'version' field */ |
809 | #define FTS5_CURRENT_VERSION 4 |
810 | |
811 | #define FTS5_CONTENT_NORMAL 0 |
812 | #define FTS5_CONTENT_NONE 1 |
813 | #define FTS5_CONTENT_EXTERNAL 2 |
814 | |
815 | #define FTS5_DETAIL_FULL 0 |
816 | #define FTS5_DETAIL_NONE 1 |
817 | #define FTS5_DETAIL_COLUMNS 2 |
818 | |
819 | #define FTS5_PATTERN_NONE 0 |
820 | #define FTS5_PATTERN_LIKE 65 /* matches SQLITE_INDEX_CONSTRAINT_LIKE */ |
821 | #define FTS5_PATTERN_GLOB 66 /* matches SQLITE_INDEX_CONSTRAINT_GLOB */ |
822 | |
823 | static int sqlite3Fts5ConfigParse( |
824 | Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char** |
825 | ); |
826 | static void sqlite3Fts5ConfigFree(Fts5Config*); |
827 | |
828 | static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig); |
829 | |
830 | static int sqlite3Fts5Tokenize( |
831 | Fts5Config *pConfig, /* FTS5 Configuration object */ |
832 | int flags, /* FTS5_TOKENIZE_* flags */ |
833 | const char *pText, int nText, /* Text to tokenize */ |
834 | void *pCtx, /* Context passed to xToken() */ |
835 | int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ |
836 | ); |
837 | |
838 | static void sqlite3Fts5Dequote(char *z); |
839 | |
840 | /* Load the contents of the %_config table */ |
841 | static int sqlite3Fts5ConfigLoad(Fts5Config*, int); |
842 | |
843 | /* Set the value of a single config attribute */ |
844 | static int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*); |
845 | |
846 | static int sqlite3Fts5ConfigParseRank(const char*, char**, char**); |
847 | |
848 | /* |
849 | ** End of interface to code in fts5_config.c. |
850 | **************************************************************************/ |
851 | |
852 | /************************************************************************** |
853 | ** Interface to code in fts5_buffer.c. |
854 | */ |
855 | |
856 | /* |
857 | ** Buffer object for the incremental building of string data. |
858 | */ |
859 | typedef struct Fts5Buffer Fts5Buffer; |
860 | struct Fts5Buffer { |
861 | u8 *p; |
862 | int n; |
863 | int nSpace; |
864 | }; |
865 | |
866 | static int sqlite3Fts5BufferSize(int*, Fts5Buffer*, u32); |
867 | static void sqlite3Fts5BufferAppendVarint(int*, Fts5Buffer*, i64); |
868 | static void sqlite3Fts5BufferAppendBlob(int*, Fts5Buffer*, u32, const u8*); |
869 | static void sqlite3Fts5BufferAppendString(int *, Fts5Buffer*, const char*); |
870 | static void sqlite3Fts5BufferFree(Fts5Buffer*); |
871 | static void sqlite3Fts5BufferZero(Fts5Buffer*); |
872 | static void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*); |
873 | static void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...); |
874 | |
875 | static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...); |
876 | |
877 | #define fts5BufferZero(x) sqlite3Fts5BufferZero(x) |
878 | #define fts5BufferAppendVarint(a,b,c) sqlite3Fts5BufferAppendVarint(a,b,(i64)c) |
879 | #define fts5BufferFree(a) sqlite3Fts5BufferFree(a) |
880 | #define fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d) |
881 | #define fts5BufferSet(a,b,c,d) sqlite3Fts5BufferSet(a,b,c,d) |
882 | |
883 | #define fts5BufferGrow(pRc,pBuf,nn) ( \ |
884 | (u32)((pBuf)->n) + (u32)(nn) <= (u32)((pBuf)->nSpace) ? 0 : \ |
885 | sqlite3Fts5BufferSize((pRc),(pBuf),(nn)+(pBuf)->n) \ |
886 | ) |
887 | |
888 | /* Write and decode big-endian 32-bit integer values */ |
889 | static void sqlite3Fts5Put32(u8*, int); |
890 | static int sqlite3Fts5Get32(const u8*); |
891 | |
892 | #define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32) |
893 | #define FTS5_POS2OFFSET(iPos) (int)(iPos & 0x7FFFFFFF) |
894 | |
895 | typedef struct Fts5PoslistReader Fts5PoslistReader; |
896 | struct Fts5PoslistReader { |
897 | /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */ |
898 | const u8 *a; /* Position list to iterate through */ |
899 | int n; /* Size of buffer at a[] in bytes */ |
900 | int i; /* Current offset in a[] */ |
901 | |
902 | u8 bFlag; /* For client use (any custom purpose) */ |
903 | |
904 | /* Output variables */ |
905 | u8 bEof; /* Set to true at EOF */ |
906 | i64 iPos; /* (iCol<<32) + iPos */ |
907 | }; |
908 | static int sqlite3Fts5PoslistReaderInit( |
909 | const u8 *a, int n, /* Poslist buffer to iterate through */ |
910 | Fts5PoslistReader *pIter /* Iterator object to initialize */ |
911 | ); |
912 | static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*); |
913 | |
914 | typedef struct Fts5PoslistWriter Fts5PoslistWriter; |
915 | struct Fts5PoslistWriter { |
916 | i64 iPrev; |
917 | }; |
918 | static int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64); |
919 | static void sqlite3Fts5PoslistSafeAppend(Fts5Buffer*, i64*, i64); |
920 | |
921 | static int sqlite3Fts5PoslistNext64( |
922 | const u8 *a, int n, /* Buffer containing poslist */ |
923 | int *pi, /* IN/OUT: Offset within a[] */ |
924 | i64 *piOff /* IN/OUT: Current offset */ |
925 | ); |
926 | |
927 | /* Malloc utility */ |
928 | static void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte); |
929 | static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn); |
930 | |
931 | /* Character set tests (like isspace(), isalpha() etc.) */ |
932 | static int sqlite3Fts5IsBareword(char t); |
933 | |
934 | |
935 | /* Bucket of terms object used by the integrity-check in offsets=0 mode. */ |
936 | typedef struct Fts5Termset Fts5Termset; |
937 | static int sqlite3Fts5TermsetNew(Fts5Termset**); |
938 | static int sqlite3Fts5TermsetAdd(Fts5Termset*, int, const char*, int, int *pbPresent); |
939 | static void sqlite3Fts5TermsetFree(Fts5Termset*); |
940 | |
941 | /* |
942 | ** End of interface to code in fts5_buffer.c. |
943 | **************************************************************************/ |
944 | |
945 | /************************************************************************** |
946 | ** Interface to code in fts5_index.c. fts5_index.c contains contains code |
947 | ** to access the data stored in the %_data table. |
948 | */ |
949 | |
950 | typedef struct Fts5Index Fts5Index; |
951 | typedef struct Fts5IndexIter Fts5IndexIter; |
952 | |
953 | struct Fts5IndexIter { |
954 | i64 iRowid; |
955 | const u8 *pData; |
956 | int nData; |
957 | u8 bEof; |
958 | }; |
959 | |
960 | #define sqlite3Fts5IterEof(x) ((x)->bEof) |
961 | |
962 | /* |
963 | ** Values used as part of the flags argument passed to IndexQuery(). |
964 | */ |
965 | #define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ |
966 | #define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ |
967 | #define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */ |
968 | #define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */ |
969 | |
970 | /* The following are used internally by the fts5_index.c module. They are |
971 | ** defined here only to make it easier to avoid clashes with the flags |
972 | ** above. */ |
973 | #define FTS5INDEX_QUERY_SKIPEMPTY 0x0010 |
974 | #define FTS5INDEX_QUERY_NOOUTPUT 0x0020 |
975 | |
976 | /* |
977 | ** Create/destroy an Fts5Index object. |
978 | */ |
979 | static int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**); |
980 | static int sqlite3Fts5IndexClose(Fts5Index *p); |
981 | |
982 | /* |
983 | ** Return a simple checksum value based on the arguments. |
984 | */ |
985 | static u64 sqlite3Fts5IndexEntryCksum( |
986 | i64 iRowid, |
987 | int iCol, |
988 | int iPos, |
989 | int iIdx, |
990 | const char *pTerm, |
991 | int nTerm |
992 | ); |
993 | |
994 | /* |
995 | ** Argument p points to a buffer containing utf-8 text that is n bytes in |
996 | ** size. Return the number of bytes in the nChar character prefix of the |
997 | ** buffer, or 0 if there are less than nChar characters in total. |
998 | */ |
999 | static int sqlite3Fts5IndexCharlenToBytelen( |
1000 | const char *p, |
1001 | int nByte, |
1002 | int nChar |
1003 | ); |
1004 | |
1005 | /* |
1006 | ** Open a new iterator to iterate though all rowids that match the |
1007 | ** specified token or token prefix. |
1008 | */ |
1009 | static int sqlite3Fts5IndexQuery( |
1010 | Fts5Index *p, /* FTS index to query */ |
1011 | const char *pToken, int nToken, /* Token (or prefix) to query for */ |
1012 | int flags, /* Mask of FTS5INDEX_QUERY_X flags */ |
1013 | Fts5Colset *pColset, /* Match these columns only */ |
1014 | Fts5IndexIter **ppIter /* OUT: New iterator object */ |
1015 | ); |
1016 | |
1017 | /* |
1018 | ** The various operations on open token or token prefix iterators opened |
1019 | ** using sqlite3Fts5IndexQuery(). |
1020 | */ |
1021 | static int sqlite3Fts5IterNext(Fts5IndexIter*); |
1022 | static int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch); |
1023 | |
1024 | /* |
1025 | ** Close an iterator opened by sqlite3Fts5IndexQuery(). |
1026 | */ |
1027 | static void sqlite3Fts5IterClose(Fts5IndexIter*); |
1028 | |
1029 | /* |
1030 | ** Close the reader blob handle, if it is open. |
1031 | */ |
1032 | static void sqlite3Fts5IndexCloseReader(Fts5Index*); |
1033 | |
1034 | /* |
1035 | ** This interface is used by the fts5vocab module. |
1036 | */ |
1037 | static const char *sqlite3Fts5IterTerm(Fts5IndexIter*, int*); |
1038 | static int sqlite3Fts5IterNextScan(Fts5IndexIter*); |
1039 | static void *sqlite3Fts5StructureRef(Fts5Index*); |
1040 | static void sqlite3Fts5StructureRelease(void*); |
1041 | static int sqlite3Fts5StructureTest(Fts5Index*, void*); |
1042 | |
1043 | |
1044 | /* |
1045 | ** Insert or remove data to or from the index. Each time a document is |
1046 | ** added to or removed from the index, this function is called one or more |
1047 | ** times. |
1048 | ** |
1049 | ** For an insert, it must be called once for each token in the new document. |
1050 | ** If the operation is a delete, it must be called (at least) once for each |
1051 | ** unique token in the document with an iCol value less than zero. The iPos |
1052 | ** argument is ignored for a delete. |
1053 | */ |
1054 | static int sqlite3Fts5IndexWrite( |
1055 | Fts5Index *p, /* Index to write to */ |
1056 | int iCol, /* Column token appears in (-ve -> delete) */ |
1057 | int iPos, /* Position of token within column */ |
1058 | const char *pToken, int nToken /* Token to add or remove to or from index */ |
1059 | ); |
1060 | |
1061 | /* |
1062 | ** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to |
1063 | ** document iDocid. |
1064 | */ |
1065 | static int sqlite3Fts5IndexBeginWrite( |
1066 | Fts5Index *p, /* Index to write to */ |
1067 | int bDelete, /* True if current operation is a delete */ |
1068 | i64 iDocid /* Docid to add or remove data from */ |
1069 | ); |
1070 | |
1071 | /* |
1072 | ** Flush any data stored in the in-memory hash tables to the database. |
1073 | ** Also close any open blob handles. |
1074 | */ |
1075 | static int sqlite3Fts5IndexSync(Fts5Index *p); |
1076 | |
1077 | /* |
1078 | ** Discard any data stored in the in-memory hash tables. Do not write it |
1079 | ** to the database. Additionally, assume that the contents of the %_data |
1080 | ** table may have changed on disk. So any in-memory caches of %_data |
1081 | ** records must be invalidated. |
1082 | */ |
1083 | static int sqlite3Fts5IndexRollback(Fts5Index *p); |
1084 | |
1085 | /* |
1086 | ** Get or set the "averages" values. |
1087 | */ |
1088 | static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize); |
1089 | static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int); |
1090 | |
1091 | /* |
1092 | ** Functions called by the storage module as part of integrity-check. |
1093 | */ |
1094 | static int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum, int bUseCksum); |
1095 | |
1096 | /* |
1097 | ** Called during virtual module initialization to register UDF |
1098 | ** fts5_decode() with SQLite |
1099 | */ |
1100 | static int sqlite3Fts5IndexInit(sqlite3*); |
1101 | |
1102 | static int sqlite3Fts5IndexSetCookie(Fts5Index*, int); |
1103 | |
1104 | /* |
1105 | ** Return the total number of entries read from the %_data table by |
1106 | ** this connection since it was created. |
1107 | */ |
1108 | static int sqlite3Fts5IndexReads(Fts5Index *p); |
1109 | |
1110 | static int sqlite3Fts5IndexReinit(Fts5Index *p); |
1111 | static int sqlite3Fts5IndexOptimize(Fts5Index *p); |
1112 | static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge); |
1113 | static int sqlite3Fts5IndexReset(Fts5Index *p); |
1114 | |
1115 | static int sqlite3Fts5IndexLoadConfig(Fts5Index *p); |
1116 | |
1117 | /* |
1118 | ** End of interface to code in fts5_index.c. |
1119 | **************************************************************************/ |
1120 | |
1121 | /************************************************************************** |
1122 | ** Interface to code in fts5_varint.c. |
1123 | */ |
1124 | static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v); |
1125 | static int sqlite3Fts5GetVarintLen(u32 iVal); |
1126 | static u8 sqlite3Fts5GetVarint(const unsigned char*, u64*); |
1127 | static int sqlite3Fts5PutVarint(unsigned char *p, u64 v); |
1128 | |
1129 | #define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b) |
1130 | #define fts5GetVarint sqlite3Fts5GetVarint |
1131 | |
1132 | #define fts5FastGetVarint32(a, iOff, nVal) { \ |
1133 | nVal = (a)[iOff++]; \ |
1134 | if( nVal & 0x80 ){ \ |
1135 | iOff--; \ |
1136 | iOff += fts5GetVarint32(&(a)[iOff], nVal); \ |
1137 | } \ |
1138 | } |
1139 | |
1140 | |
1141 | /* |
1142 | ** End of interface to code in fts5_varint.c. |
1143 | **************************************************************************/ |
1144 | |
1145 | |
1146 | /************************************************************************** |
1147 | ** Interface to code in fts5_main.c. |
1148 | */ |
1149 | |
1150 | /* |
1151 | ** Virtual-table object. |
1152 | */ |
1153 | typedef struct Fts5Table Fts5Table; |
1154 | struct Fts5Table { |
1155 | sqlite3_vtab base; /* Base class used by SQLite core */ |
1156 | Fts5Config *pConfig; /* Virtual table configuration */ |
1157 | Fts5Index *pIndex; /* Full-text index */ |
1158 | }; |
1159 | |
1160 | static int sqlite3Fts5GetTokenizer( |
1161 | Fts5Global*, |
1162 | const char **azArg, |
1163 | int nArg, |
1164 | Fts5Config*, |
1165 | char **pzErr |
1166 | ); |
1167 | |
1168 | static Fts5Table *sqlite3Fts5TableFromCsrid(Fts5Global*, i64); |
1169 | |
1170 | static int sqlite3Fts5FlushToDisk(Fts5Table*); |
1171 | |
1172 | /* |
1173 | ** End of interface to code in fts5.c. |
1174 | **************************************************************************/ |
1175 | |
1176 | /************************************************************************** |
1177 | ** Interface to code in fts5_hash.c. |
1178 | */ |
1179 | typedef struct Fts5Hash Fts5Hash; |
1180 | |
1181 | /* |
1182 | ** Create a hash table, free a hash table. |
1183 | */ |
1184 | static int sqlite3Fts5HashNew(Fts5Config*, Fts5Hash**, int *pnSize); |
1185 | static void sqlite3Fts5HashFree(Fts5Hash*); |
1186 | |
1187 | static int sqlite3Fts5HashWrite( |
1188 | Fts5Hash*, |
1189 | i64 iRowid, /* Rowid for this entry */ |
1190 | int iCol, /* Column token appears in (-ve -> delete) */ |
1191 | int iPos, /* Position of token within column */ |
1192 | char bByte, |
1193 | const char *pToken, int nToken /* Token to add or remove to or from index */ |
1194 | ); |
1195 | |
1196 | /* |
1197 | ** Empty (but do not delete) a hash table. |
1198 | */ |
1199 | static void sqlite3Fts5HashClear(Fts5Hash*); |
1200 | |
1201 | static int sqlite3Fts5HashQuery( |
1202 | Fts5Hash*, /* Hash table to query */ |
1203 | int nPre, |
1204 | const char *pTerm, int nTerm, /* Query term */ |
1205 | void **ppObj, /* OUT: Pointer to doclist for pTerm */ |
1206 | int *pnDoclist /* OUT: Size of doclist in bytes */ |
1207 | ); |
1208 | |
1209 | static int sqlite3Fts5HashScanInit( |
1210 | Fts5Hash*, /* Hash table to query */ |
1211 | const char *pTerm, int nTerm /* Query prefix */ |
1212 | ); |
1213 | static void sqlite3Fts5HashScanNext(Fts5Hash*); |
1214 | static int sqlite3Fts5HashScanEof(Fts5Hash*); |
1215 | static void sqlite3Fts5HashScanEntry(Fts5Hash *, |
1216 | const char **pzTerm, /* OUT: term (nul-terminated) */ |
1217 | const u8 **ppDoclist, /* OUT: pointer to doclist */ |
1218 | int *pnDoclist /* OUT: size of doclist in bytes */ |
1219 | ); |
1220 | |
1221 | |
1222 | /* |
1223 | ** End of interface to code in fts5_hash.c. |
1224 | **************************************************************************/ |
1225 | |
1226 | /************************************************************************** |
1227 | ** Interface to code in fts5_storage.c. fts5_storage.c contains contains |
1228 | ** code to access the data stored in the %_content and %_docsize tables. |
1229 | */ |
1230 | |
1231 | #define FTS5_STMT_SCAN_ASC 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */ |
1232 | #define FTS5_STMT_SCAN_DESC 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */ |
1233 | #define FTS5_STMT_LOOKUP 2 /* SELECT rowid, * FROM ... WHERE rowid=? */ |
1234 | |
1235 | typedef struct Fts5Storage Fts5Storage; |
1236 | |
1237 | static int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**); |
1238 | static int sqlite3Fts5StorageClose(Fts5Storage *p); |
1239 | static int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName); |
1240 | |
1241 | static int sqlite3Fts5DropAll(Fts5Config*); |
1242 | static int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **); |
1243 | |
1244 | static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64, sqlite3_value**); |
1245 | static int sqlite3Fts5StorageContentInsert(Fts5Storage *p, sqlite3_value**, i64*); |
1246 | static int sqlite3Fts5StorageIndexInsert(Fts5Storage *p, sqlite3_value**, i64); |
1247 | |
1248 | static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg); |
1249 | |
1250 | static int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt**, char**); |
1251 | static void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*); |
1252 | |
1253 | static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol); |
1254 | static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg); |
1255 | static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow); |
1256 | |
1257 | static int sqlite3Fts5StorageSync(Fts5Storage *p); |
1258 | static int sqlite3Fts5StorageRollback(Fts5Storage *p); |
1259 | |
1260 | static int sqlite3Fts5StorageConfigValue( |
1261 | Fts5Storage *p, const char*, sqlite3_value*, int |
1262 | ); |
1263 | |
1264 | static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p); |
1265 | static int sqlite3Fts5StorageRebuild(Fts5Storage *p); |
1266 | static int sqlite3Fts5StorageOptimize(Fts5Storage *p); |
1267 | static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge); |
1268 | static int sqlite3Fts5StorageReset(Fts5Storage *p); |
1269 | |
1270 | /* |
1271 | ** End of interface to code in fts5_storage.c. |
1272 | **************************************************************************/ |
1273 | |
1274 | |
1275 | /************************************************************************** |
1276 | ** Interface to code in fts5_expr.c. |
1277 | */ |
1278 | typedef struct Fts5Expr Fts5Expr; |
1279 | typedef struct Fts5ExprNode Fts5ExprNode; |
1280 | typedef struct Fts5Parse Fts5Parse; |
1281 | typedef struct Fts5Token Fts5Token; |
1282 | typedef struct Fts5ExprPhrase Fts5ExprPhrase; |
1283 | typedef struct Fts5ExprNearset Fts5ExprNearset; |
1284 | |
1285 | struct Fts5Token { |
1286 | const char *p; /* Token text (not NULL terminated) */ |
1287 | int n; /* Size of buffer p in bytes */ |
1288 | }; |
1289 | |
1290 | /* Parse a MATCH expression. */ |
1291 | static int sqlite3Fts5ExprNew( |
1292 | Fts5Config *pConfig, |
1293 | int bPhraseToAnd, |
1294 | int iCol, /* Column on LHS of MATCH operator */ |
1295 | const char *zExpr, |
1296 | Fts5Expr **ppNew, |
1297 | char **pzErr |
1298 | ); |
1299 | static int sqlite3Fts5ExprPattern( |
1300 | Fts5Config *pConfig, |
1301 | int bGlob, |
1302 | int iCol, |
1303 | const char *zText, |
1304 | Fts5Expr **pp |
1305 | ); |
1306 | |
1307 | /* |
1308 | ** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bDesc); |
1309 | ** rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr); |
1310 | ** rc = sqlite3Fts5ExprNext(pExpr) |
1311 | ** ){ |
1312 | ** // The document with rowid iRowid matches the expression! |
1313 | ** i64 iRowid = sqlite3Fts5ExprRowid(pExpr); |
1314 | ** } |
1315 | */ |
1316 | static int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, i64 iMin, int bDesc); |
1317 | static int sqlite3Fts5ExprNext(Fts5Expr*, i64 iMax); |
1318 | static int sqlite3Fts5ExprEof(Fts5Expr*); |
1319 | static i64 sqlite3Fts5ExprRowid(Fts5Expr*); |
1320 | |
1321 | static void sqlite3Fts5ExprFree(Fts5Expr*); |
1322 | static int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2); |
1323 | |
1324 | /* Called during startup to register a UDF with SQLite */ |
1325 | static int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*); |
1326 | |
1327 | static int sqlite3Fts5ExprPhraseCount(Fts5Expr*); |
1328 | static int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase); |
1329 | static int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **); |
1330 | |
1331 | typedef struct Fts5PoslistPopulator Fts5PoslistPopulator; |
1332 | static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr*, int); |
1333 | static int sqlite3Fts5ExprPopulatePoslists( |
1334 | Fts5Config*, Fts5Expr*, Fts5PoslistPopulator*, int, const char*, int |
1335 | ); |
1336 | static void sqlite3Fts5ExprCheckPoslists(Fts5Expr*, i64); |
1337 | |
1338 | static int sqlite3Fts5ExprClonePhrase(Fts5Expr*, int, Fts5Expr**); |
1339 | |
1340 | static int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *); |
1341 | |
1342 | /******************************************* |
1343 | ** The fts5_expr.c API above this point is used by the other hand-written |
1344 | ** C code in this module. The interfaces below this point are called by |
1345 | ** the parser code in fts5parse.y. */ |
1346 | |
1347 | static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...); |
1348 | |
1349 | static Fts5ExprNode *sqlite3Fts5ParseNode( |
1350 | Fts5Parse *pParse, |
1351 | int eType, |
1352 | Fts5ExprNode *pLeft, |
1353 | Fts5ExprNode *pRight, |
1354 | Fts5ExprNearset *pNear |
1355 | ); |
1356 | |
1357 | static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd( |
1358 | Fts5Parse *pParse, |
1359 | Fts5ExprNode *pLeft, |
1360 | Fts5ExprNode *pRight |
1361 | ); |
1362 | |
1363 | static Fts5ExprPhrase *sqlite3Fts5ParseTerm( |
1364 | Fts5Parse *pParse, |
1365 | Fts5ExprPhrase *pPhrase, |
1366 | Fts5Token *pToken, |
1367 | int bPrefix |
1368 | ); |
1369 | |
1370 | static void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase*); |
1371 | |
1372 | static Fts5ExprNearset *sqlite3Fts5ParseNearset( |
1373 | Fts5Parse*, |
1374 | Fts5ExprNearset*, |
1375 | Fts5ExprPhrase* |
1376 | ); |
1377 | |
1378 | static Fts5Colset *sqlite3Fts5ParseColset( |
1379 | Fts5Parse*, |
1380 | Fts5Colset*, |
1381 | Fts5Token * |
1382 | ); |
1383 | |
1384 | static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*); |
1385 | static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*); |
1386 | static void sqlite3Fts5ParseNodeFree(Fts5ExprNode*); |
1387 | |
1388 | static void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); |
1389 | static void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNode*, Fts5Colset*); |
1390 | static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse*, Fts5Colset*); |
1391 | static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p); |
1392 | static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*); |
1393 | |
1394 | /* |
1395 | ** End of interface to code in fts5_expr.c. |
1396 | **************************************************************************/ |
1397 | |
1398 | |
1399 | |
1400 | /************************************************************************** |
1401 | ** Interface to code in fts5_aux.c. |
1402 | */ |
1403 | |
1404 | static int sqlite3Fts5AuxInit(fts5_api*); |
1405 | /* |
1406 | ** End of interface to code in fts5_aux.c. |
1407 | **************************************************************************/ |
1408 | |
1409 | /************************************************************************** |
1410 | ** Interface to code in fts5_tokenizer.c. |
1411 | */ |
1412 | |
1413 | static int sqlite3Fts5TokenizerInit(fts5_api*); |
1414 | static int sqlite3Fts5TokenizerPattern( |
1415 | int (*xCreate)(void*, const char**, int, Fts5Tokenizer**), |
1416 | Fts5Tokenizer *pTok |
1417 | ); |
1418 | /* |
1419 | ** End of interface to code in fts5_tokenizer.c. |
1420 | **************************************************************************/ |
1421 | |
1422 | /************************************************************************** |
1423 | ** Interface to code in fts5_vocab.c. |
1424 | */ |
1425 | |
1426 | static int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*); |
1427 | |
1428 | /* |
1429 | ** End of interface to code in fts5_vocab.c. |
1430 | **************************************************************************/ |
1431 | |
1432 | |
1433 | /************************************************************************** |
1434 | ** Interface to automatically generated code in fts5_unicode2.c. |
1435 | */ |
1436 | static int sqlite3Fts5UnicodeIsdiacritic(int c); |
1437 | static int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic); |
1438 | |
1439 | static int sqlite3Fts5UnicodeCatParse(const char*, u8*); |
1440 | static int sqlite3Fts5UnicodeCategory(u32 iCode); |
1441 | static void sqlite3Fts5UnicodeAscii(u8*, u8*); |
1442 | /* |
1443 | ** End of interface to code in fts5_unicode2.c. |
1444 | **************************************************************************/ |
1445 | |
1446 | #endif |
1447 | |
1448 | #line 1 "fts5parse.h" |
1449 | #define FTS5_OR 1 |
1450 | #define FTS5_AND 2 |
1451 | #define FTS5_NOT 3 |
1452 | #define FTS5_TERM 4 |
1453 | #define FTS5_COLON 5 |
1454 | #define FTS5_MINUS 6 |
1455 | #define FTS5_LCP 7 |
1456 | #define FTS5_RCP 8 |
1457 | #define FTS5_STRING 9 |
1458 | #define FTS5_LP 10 |
1459 | #define FTS5_RP 11 |
1460 | #define FTS5_CARET 12 |
1461 | #define FTS5_COMMA 13 |
1462 | #define FTS5_PLUS 14 |
1463 | #define FTS5_STAR 15 |
1464 | |
1465 | #line 1 "fts5parse.c" |
1466 | /* This file is automatically generated by Lemon from input grammar |
1467 | ** source file "fts5parse.y". */ |
1468 | /* |
1469 | ** 2000-05-29 |
1470 | ** |
1471 | ** The author disclaims copyright to this source code. In place of |
1472 | ** a legal notice, here is a blessing: |
1473 | ** |
1474 | ** May you do good and not evil. |
1475 | ** May you find forgiveness for yourself and forgive others. |
1476 | ** May you share freely, never taking more than you give. |
1477 | ** |
1478 | ************************************************************************* |
1479 | ** Driver template for the LEMON parser generator. |
1480 | ** |
1481 | ** The "lemon" program processes an LALR(1) input grammar file, then uses |
1482 | ** this template to construct a parser. The "lemon" program inserts text |
1483 | ** at each "%%" line. Also, any "P-a-r-s-e" identifer prefix (without the |
1484 | ** interstitial "-" characters) contained in this template is changed into |
1485 | ** the value of the %name directive from the grammar. Otherwise, the content |
1486 | ** of this template is copied straight through into the generate parser |
1487 | ** source file. |
1488 | ** |
1489 | ** The following is the concatenation of all %include directives from the |
1490 | ** input grammar file: |
1491 | */ |
1492 | /************ Begin %include sections from the grammar ************************/ |
1493 | #line 47 "fts5parse.y" |
1494 | |
1495 | /* #include "fts5Int.h" */ |
1496 | /* #include "fts5parse.h" */ |
1497 | |
1498 | /* |
1499 | ** Disable all error recovery processing in the parser push-down |
1500 | ** automaton. |
1501 | */ |
1502 | #define fts5YYNOERRORRECOVERY 1 |
1503 | |
1504 | /* |
1505 | ** Make fts5yytestcase() the same as testcase() |
1506 | */ |
1507 | #define fts5yytestcase(X) testcase(X) |
1508 | |
1509 | /* |
1510 | ** Indicate that sqlite3ParserFree() will never be called with a null |
1511 | ** pointer. |
1512 | */ |
1513 | #define fts5YYPARSEFREENOTNULL 1 |
1514 | |
1515 | /* |
1516 | ** Alternative datatype for the argument to the malloc() routine passed |
1517 | ** into sqlite3ParserAlloc(). The default is size_t. |
1518 | */ |
1519 | #define fts5YYMALLOCARGTYPE u64 |
1520 | |
1521 | #line 57 "fts5parse.c" |
1522 | /**************** End of %include directives **********************************/ |
1523 | /* These constants specify the various numeric values for terminal symbols. |
1524 | ***************** Begin token definitions *************************************/ |
1525 | #ifndef FTS5_OR |
1526 | #define FTS5_OR 1 |
1527 | #define FTS5_AND 2 |
1528 | #define FTS5_NOT 3 |
1529 | #define FTS5_TERM 4 |
1530 | #define FTS5_COLON 5 |
1531 | #define FTS5_MINUS 6 |
1532 | #define FTS5_LCP 7 |
1533 | #define FTS5_RCP 8 |
1534 | #define FTS5_STRING 9 |
1535 | #define FTS5_LP 10 |
1536 | #define FTS5_RP 11 |
1537 | #define FTS5_CARET 12 |
1538 | #define FTS5_COMMA 13 |
1539 | #define FTS5_PLUS 14 |
1540 | #define FTS5_STAR 15 |
1541 | #endif |
1542 | /**************** End token definitions ***************************************/ |
1543 | |
1544 | /* The next sections is a series of control #defines. |
1545 | ** various aspects of the generated parser. |
1546 | ** fts5YYCODETYPE is the data type used to store the integer codes |
1547 | ** that represent terminal and non-terminal symbols. |
1548 | ** "unsigned char" is used if there are fewer than |
1549 | ** 256 symbols. Larger types otherwise. |
1550 | ** fts5YYNOCODE is a number of type fts5YYCODETYPE that is not used for |
1551 | ** any terminal or nonterminal symbol. |
1552 | ** fts5YYFALLBACK If defined, this indicates that one or more tokens |
1553 | ** (also known as: "terminal symbols") have fall-back |
1554 | ** values which should be used if the original symbol |
1555 | ** would not parse. This permits keywords to sometimes |
1556 | ** be used as identifiers, for example. |
1557 | ** fts5YYACTIONTYPE is the data type used for "action codes" - numbers |
1558 | ** that indicate what to do in response to the next |
1559 | ** token. |
1560 | ** sqlite3Fts5ParserFTS5TOKENTYPE is the data type used for minor type for terminal |
1561 | ** symbols. Background: A "minor type" is a semantic |
1562 | ** value associated with a terminal or non-terminal |
1563 | ** symbols. For example, for an "ID" terminal symbol, |
1564 | ** the minor type might be the name of the identifier. |
1565 | ** Each non-terminal can have a different minor type. |
1566 | ** Terminal symbols all have the same minor type, though. |
1567 | ** This macros defines the minor type for terminal |
1568 | ** symbols. |
1569 | ** fts5YYMINORTYPE is the data type used for all minor types. |
1570 | ** This is typically a union of many types, one of |
1571 | ** which is sqlite3Fts5ParserFTS5TOKENTYPE. The entry in the union |
1572 | ** for terminal symbols is called "fts5yy0". |
1573 | ** fts5YYSTACKDEPTH is the maximum depth of the parser's stack. If |
1574 | ** zero the stack is dynamically sized using realloc() |
1575 | ** sqlite3Fts5ParserARG_SDECL A static variable declaration for the %extra_argument |
1576 | ** sqlite3Fts5ParserARG_PDECL A parameter declaration for the %extra_argument |
1577 | ** sqlite3Fts5ParserARG_PARAM Code to pass %extra_argument as a subroutine parameter |
1578 | ** sqlite3Fts5ParserARG_STORE Code to store %extra_argument into fts5yypParser |
1579 | ** sqlite3Fts5ParserARG_FETCH Code to extract %extra_argument from fts5yypParser |
1580 | ** sqlite3Fts5ParserCTX_* As sqlite3Fts5ParserARG_ except for %extra_context |
1581 | ** fts5YYERRORSYMBOL is the code number of the error symbol. If not |
1582 | ** defined, then do no error processing. |
1583 | ** fts5YYNSTATE the combined number of states. |
1584 | ** fts5YYNRULE the number of rules in the grammar |
1585 | ** fts5YYNFTS5TOKEN Number of terminal symbols |
1586 | ** fts5YY_MAX_SHIFT Maximum value for shift actions |
1587 | ** fts5YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions |
1588 | ** fts5YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions |
1589 | ** fts5YY_ERROR_ACTION The fts5yy_action[] code for syntax error |
1590 | ** fts5YY_ACCEPT_ACTION The fts5yy_action[] code for accept |
1591 | ** fts5YY_NO_ACTION The fts5yy_action[] code for no-op |
1592 | ** fts5YY_MIN_REDUCE Minimum value for reduce actions |
1593 | ** fts5YY_MAX_REDUCE Maximum value for reduce actions |
1594 | */ |
1595 | #ifndef INTERFACE |
1596 | # define INTERFACE 1 |
1597 | #endif |
1598 | /************* Begin control #defines *****************************************/ |
1599 | #define fts5YYCODETYPE unsigned char |
1600 | #define fts5YYNOCODE 27 |
1601 | #define fts5YYACTIONTYPE unsigned char |
1602 | #define sqlite3Fts5ParserFTS5TOKENTYPE Fts5Token |
1603 | typedef union { |
1604 | int fts5yyinit; |
1605 | sqlite3Fts5ParserFTS5TOKENTYPE fts5yy0; |
1606 | int fts5yy4; |
1607 | Fts5Colset* fts5yy11; |
1608 | Fts5ExprNode* fts5yy24; |
1609 | Fts5ExprNearset* fts5yy46; |
1610 | Fts5ExprPhrase* fts5yy53; |
1611 | } fts5YYMINORTYPE; |
1612 | #ifndef fts5YYSTACKDEPTH |
1613 | #define fts5YYSTACKDEPTH 100 |
1614 | #endif |
1615 | #define sqlite3Fts5ParserARG_SDECL Fts5Parse *pParse; |
1616 | #define sqlite3Fts5ParserARG_PDECL ,Fts5Parse *pParse |
1617 | #define sqlite3Fts5ParserARG_PARAM ,pParse |
1618 | #define sqlite3Fts5ParserARG_FETCH Fts5Parse *pParse=fts5yypParser->pParse; |
1619 | #define sqlite3Fts5ParserARG_STORE fts5yypParser->pParse=pParse; |
1620 | #define sqlite3Fts5ParserCTX_SDECL |
1621 | #define sqlite3Fts5ParserCTX_PDECL |
1622 | #define sqlite3Fts5ParserCTX_PARAM |
1623 | #define sqlite3Fts5ParserCTX_FETCH |
1624 | #define sqlite3Fts5ParserCTX_STORE |
1625 | #define fts5YYNSTATE 35 |
1626 | #define fts5YYNRULE 28 |
1627 | #define fts5YYNRULE_WITH_ACTION 28 |
1628 | #define fts5YYNFTS5TOKEN 16 |
1629 | #define fts5YY_MAX_SHIFT 34 |
1630 | #define fts5YY_MIN_SHIFTREDUCE 52 |
1631 | #define fts5YY_MAX_SHIFTREDUCE 79 |
1632 | #define fts5YY_ERROR_ACTION 80 |
1633 | #define fts5YY_ACCEPT_ACTION 81 |
1634 | #define fts5YY_NO_ACTION 82 |
1635 | #define fts5YY_MIN_REDUCE 83 |
1636 | #define fts5YY_MAX_REDUCE 110 |
1637 | /************* End control #defines *******************************************/ |
1638 | #define fts5YY_NLOOKAHEAD ((int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0]))) |
1639 | |
1640 | /* Define the fts5yytestcase() macro to be a no-op if is not already defined |
1641 | ** otherwise. |
1642 | ** |
1643 | ** Applications can choose to define fts5yytestcase() in the %include section |
1644 | ** to a macro that can assist in verifying code coverage. For production |
1645 | ** code the fts5yytestcase() macro should be turned off. But it is useful |
1646 | ** for testing. |
1647 | */ |
1648 | #ifndef fts5yytestcase |
1649 | # define fts5yytestcase(X) |
1650 | #endif |
1651 | |
1652 | |
1653 | /* Next are the tables used to determine what action to take based on the |
1654 | ** current state and lookahead token. These tables are used to implement |
1655 | ** functions that take a state number and lookahead value and return an |
1656 | ** action integer. |
1657 | ** |
1658 | ** Suppose the action integer is N. Then the action is determined as |
1659 | ** follows |
1660 | ** |
1661 | ** 0 <= N <= fts5YY_MAX_SHIFT Shift N. That is, push the lookahead |
1662 | ** token onto the stack and goto state N. |
1663 | ** |
1664 | ** N between fts5YY_MIN_SHIFTREDUCE Shift to an arbitrary state then |
1665 | ** and fts5YY_MAX_SHIFTREDUCE reduce by rule N-fts5YY_MIN_SHIFTREDUCE. |
1666 | ** |
1667 | ** N == fts5YY_ERROR_ACTION A syntax error has occurred. |
1668 | ** |
1669 | ** N == fts5YY_ACCEPT_ACTION The parser accepts its input. |
1670 | ** |
1671 | ** N == fts5YY_NO_ACTION No such action. Denotes unused |
1672 | ** slots in the fts5yy_action[] table. |
1673 | ** |
1674 | ** N between fts5YY_MIN_REDUCE Reduce by rule N-fts5YY_MIN_REDUCE |
1675 | ** and fts5YY_MAX_REDUCE |
1676 | ** |
1677 | ** The action table is constructed as a single large table named fts5yy_action[]. |
1678 | ** Given state S and lookahead X, the action is computed as either: |
1679 | ** |
1680 | ** (A) N = fts5yy_action[ fts5yy_shift_ofst[S] + X ] |
1681 | ** (B) N = fts5yy_default[S] |
1682 | ** |
1683 | ** The (A) formula is preferred. The B formula is used instead if |
1684 | ** fts5yy_lookahead[fts5yy_shift_ofst[S]+X] is not equal to X. |
1685 | ** |
1686 | ** The formulas above are for computing the action when the lookahead is |
1687 | ** a terminal symbol. If the lookahead is a non-terminal (as occurs after |
1688 | ** a reduce action) then the fts5yy_reduce_ofst[] array is used in place of |
1689 | ** the fts5yy_shift_ofst[] array. |
1690 | ** |
1691 | ** The following are the tables generated in this section: |
1692 | ** |
1693 | ** fts5yy_action[] A single table containing all actions. |
1694 | ** fts5yy_lookahead[] A table containing the lookahead for each entry in |
1695 | ** fts5yy_action. Used to detect hash collisions. |
1696 | ** fts5yy_shift_ofst[] For each state, the offset into fts5yy_action for |
1697 | ** shifting terminals. |
1698 | ** fts5yy_reduce_ofst[] For each state, the offset into fts5yy_action for |
1699 | ** shifting non-terminals after a reduce. |
1700 | ** fts5yy_default[] Default action for each state. |
1701 | ** |
1702 | *********** Begin parsing tables **********************************************/ |
1703 | #define fts5YY_ACTTAB_COUNT (105) |
1704 | static const fts5YYACTIONTYPE fts5yy_action[] = { |
1705 | /* 0 */ 81, 20, 96, 6, 28, 99, 98, 26, 26, 18, |
1706 | /* 10 */ 96, 6, 28, 17, 98, 56, 26, 19, 96, 6, |
1707 | /* 20 */ 28, 14, 98, 14, 26, 31, 92, 96, 6, 28, |
1708 | /* 30 */ 108, 98, 25, 26, 21, 96, 6, 28, 78, 98, |
1709 | /* 40 */ 58, 26, 29, 96, 6, 28, 107, 98, 22, 26, |
1710 | /* 50 */ 24, 16, 12, 11, 1, 13, 13, 24, 16, 23, |
1711 | /* 60 */ 11, 33, 34, 13, 97, 8, 27, 32, 98, 7, |
1712 | /* 70 */ 26, 3, 4, 5, 3, 4, 5, 3, 83, 4, |
1713 | /* 80 */ 5, 3, 63, 5, 3, 62, 12, 2, 86, 13, |
1714 | /* 90 */ 9, 30, 10, 10, 54, 57, 75, 78, 78, 53, |
1715 | /* 100 */ 57, 15, 82, 82, 71, |
1716 | }; |
1717 | static const fts5YYCODETYPE fts5yy_lookahead[] = { |
1718 | /* 0 */ 16, 17, 18, 19, 20, 22, 22, 24, 24, 17, |
1719 | /* 10 */ 18, 19, 20, 7, 22, 9, 24, 17, 18, 19, |
1720 | /* 20 */ 20, 9, 22, 9, 24, 13, 17, 18, 19, 20, |
1721 | /* 30 */ 26, 22, 24, 24, 17, 18, 19, 20, 15, 22, |
1722 | /* 40 */ 9, 24, 17, 18, 19, 20, 26, 22, 21, 24, |
1723 | /* 50 */ 6, 7, 9, 9, 10, 12, 12, 6, 7, 21, |
1724 | /* 60 */ 9, 24, 25, 12, 18, 5, 20, 14, 22, 5, |
1725 | /* 70 */ 24, 3, 1, 2, 3, 1, 2, 3, 0, 1, |
1726 | /* 80 */ 2, 3, 11, 2, 3, 11, 9, 10, 5, 12, |
1727 | /* 90 */ 23, 24, 10, 10, 8, 9, 9, 15, 15, 8, |
1728 | /* 100 */ 9, 9, 27, 27, 11, 27, 27, 27, 27, 27, |
1729 | /* 110 */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, |
1730 | /* 120 */ 27, |
1731 | }; |
1732 | #define fts5YY_SHIFT_COUNT (34) |
1733 | #define fts5YY_SHIFT_MIN (0) |
1734 | #define fts5YY_SHIFT_MAX (93) |
1735 | static const unsigned char fts5yy_shift_ofst[] = { |
1736 | /* 0 */ 44, 44, 44, 44, 44, 44, 51, 77, 43, 12, |
1737 | /* 10 */ 14, 83, 82, 14, 23, 23, 31, 31, 71, 74, |
1738 | /* 20 */ 78, 81, 86, 91, 6, 53, 53, 60, 64, 68, |
1739 | /* 30 */ 53, 87, 92, 53, 93, |
1740 | }; |
1741 | #define fts5YY_REDUCE_COUNT (17) |
1742 | #define fts5YY_REDUCE_MIN (-17) |
1743 | #define fts5YY_REDUCE_MAX (67) |
1744 | static const signed char fts5yy_reduce_ofst[] = { |
1745 | /* 0 */ -16, -8, 0, 9, 17, 25, 46, -17, -17, 37, |
1746 | /* 10 */ 67, 4, 4, 8, 4, 20, 27, 38, |
1747 | }; |
1748 | static const fts5YYACTIONTYPE fts5yy_default[] = { |
1749 | /* 0 */ 80, 80, 80, 80, 80, 80, 95, 80, 80, 105, |
1750 | /* 10 */ 80, 110, 110, 80, 110, 110, 80, 80, 80, 80, |
1751 | /* 20 */ 80, 91, 80, 80, 80, 101, 100, 80, 80, 90, |
1752 | /* 30 */ 103, 80, 80, 104, 80, |
1753 | }; |
1754 | /********** End of lemon-generated parsing tables *****************************/ |
1755 | |
1756 | /* The next table maps tokens (terminal symbols) into fallback tokens. |
1757 | ** If a construct like the following: |
1758 | ** |
1759 | ** %fallback ID X Y Z. |
1760 | ** |
1761 | ** appears in the grammar, then ID becomes a fallback token for X, Y, |
1762 | ** and Z. Whenever one of the tokens X, Y, or Z is input to the parser |
1763 | ** but it does not parse, the type of the token is changed to ID and |
1764 | ** the parse is retried before an error is thrown. |
1765 | ** |
1766 | ** This feature can be used, for example, to cause some keywords in a language |
1767 | ** to revert to identifiers if they keyword does not apply in the context where |
1768 | ** it appears. |
1769 | */ |
1770 | #ifdef fts5YYFALLBACK |
1771 | static const fts5YYCODETYPE fts5yyFallback[] = { |
1772 | }; |
1773 | #endif /* fts5YYFALLBACK */ |
1774 | |
1775 | /* The following structure represents a single element of the |
1776 | ** parser's stack. Information stored includes: |
1777 | ** |
1778 | ** + The state number for the parser at this level of the stack. |
1779 | ** |
1780 | ** + The value of the token stored at this level of the stack. |
1781 | ** (In other words, the "major" token.) |
1782 | ** |
1783 | ** + The semantic value stored at this level of the stack. This is |
1784 | ** the information used by the action routines in the grammar. |
1785 | ** It is sometimes called the "minor" token. |
1786 | ** |
1787 | ** After the "shift" half of a SHIFTREDUCE action, the stateno field |
1788 | ** actually contains the reduce action for the second half of the |
1789 | ** SHIFTREDUCE. |
1790 | */ |
1791 | struct fts5yyStackEntry { |
1792 | fts5YYACTIONTYPE stateno; /* The state-number, or reduce action in SHIFTREDUCE */ |
1793 | fts5YYCODETYPE major; /* The major token value. This is the code |
1794 | ** number for the token at this stack level */ |
1795 | fts5YYMINORTYPE minor; /* The user-supplied minor token value. This |
1796 | ** is the value of the token */ |
1797 | }; |
1798 | typedef struct fts5yyStackEntry fts5yyStackEntry; |
1799 | |
1800 | /* The state of the parser is completely contained in an instance of |
1801 | ** the following structure */ |
1802 | struct fts5yyParser { |
1803 | fts5yyStackEntry *fts5yytos; /* Pointer to top element of the stack */ |
1804 | #ifdef fts5YYTRACKMAXSTACKDEPTH |
1805 | int fts5yyhwm; /* High-water mark of the stack */ |
1806 | #endif |
1807 | #ifndef fts5YYNOERRORRECOVERY |
1808 | int fts5yyerrcnt; /* Shifts left before out of the error */ |
1809 | #endif |
1810 | sqlite3Fts5ParserARG_SDECL /* A place to hold %extra_argument */ |
1811 | sqlite3Fts5ParserCTX_SDECL /* A place to hold %extra_context */ |
1812 | #if fts5YYSTACKDEPTH<=0 |
1813 | int fts5yystksz; /* Current side of the stack */ |
1814 | fts5yyStackEntry *fts5yystack; /* The parser's stack */ |
1815 | fts5yyStackEntry fts5yystk0; /* First stack entry */ |
1816 | #else |
1817 | fts5yyStackEntry fts5yystack[fts5YYSTACKDEPTH]; /* The parser's stack */ |
1818 | fts5yyStackEntry *fts5yystackEnd; /* Last entry in the stack */ |
1819 | #endif |
1820 | }; |
1821 | typedef struct fts5yyParser fts5yyParser; |
1822 | |
1823 | #include <assert.h> |
1824 | #ifndef NDEBUG |
1825 | #include <stdio.h> |
1826 | static FILE *fts5yyTraceFILE = 0; |
1827 | static char *fts5yyTracePrompt = 0; |
1828 | #endif /* NDEBUG */ |
1829 | |
1830 | #ifndef NDEBUG |
1831 | /* |
1832 | ** Turn parser tracing on by giving a stream to which to write the trace |
1833 | ** and a prompt to preface each trace message. Tracing is turned off |
1834 | ** by making either argument NULL |
1835 | ** |
1836 | ** Inputs: |
1837 | ** <ul> |
1838 | ** <li> A FILE* to which trace output should be written. |
1839 | ** If NULL, then tracing is turned off. |
1840 | ** <li> A prefix string written at the beginning of every |
1841 | ** line of trace output. If NULL, then tracing is |
1842 | ** turned off. |
1843 | ** </ul> |
1844 | ** |
1845 | ** Outputs: |
1846 | ** None. |
1847 | */ |
1848 | static void sqlite3Fts5ParserTrace(FILE *TraceFILE, char *zTracePrompt){ |
1849 | fts5yyTraceFILE = TraceFILE; |
1850 | fts5yyTracePrompt = zTracePrompt; |
1851 | if( fts5yyTraceFILE==0 ) fts5yyTracePrompt = 0; |
1852 | else if( fts5yyTracePrompt==0 ) fts5yyTraceFILE = 0; |
1853 | } |
1854 | #endif /* NDEBUG */ |
1855 | |
1856 | #if defined(fts5YYCOVERAGE) || !defined(NDEBUG) |
1857 | /* For tracing shifts, the names of all terminals and nonterminals |
1858 | ** are required. The following table supplies these names */ |
1859 | static const char *const fts5yyTokenName[] = { |
1860 | /* 0 */ "$" , |
1861 | /* 1 */ "OR" , |
1862 | /* 2 */ "AND" , |
1863 | /* 3 */ "NOT" , |
1864 | /* 4 */ "TERM" , |
1865 | /* 5 */ "COLON" , |
1866 | /* 6 */ "MINUS" , |
1867 | /* 7 */ "LCP" , |
1868 | /* 8 */ "RCP" , |
1869 | /* 9 */ "STRING" , |
1870 | /* 10 */ "LP" , |
1871 | /* 11 */ "RP" , |
1872 | /* 12 */ "CARET" , |
1873 | /* 13 */ "COMMA" , |
1874 | /* 14 */ "PLUS" , |
1875 | /* 15 */ "STAR" , |
1876 | /* 16 */ "input" , |
1877 | /* 17 */ "expr" , |
1878 | /* 18 */ "cnearset" , |
1879 | /* 19 */ "exprlist" , |
1880 | /* 20 */ "colset" , |
1881 | /* 21 */ "colsetlist" , |
1882 | /* 22 */ "nearset" , |
1883 | /* 23 */ "nearphrases" , |
1884 | /* 24 */ "phrase" , |
1885 | /* 25 */ "neardist_opt" , |
1886 | /* 26 */ "star_opt" , |
1887 | }; |
1888 | #endif /* defined(fts5YYCOVERAGE) || !defined(NDEBUG) */ |
1889 | |
1890 | #ifndef NDEBUG |
1891 | /* For tracing reduce actions, the names of all rules are required. |
1892 | */ |
1893 | static const char *const fts5yyRuleName[] = { |
1894 | /* 0 */ "input ::= expr" , |
1895 | /* 1 */ "colset ::= MINUS LCP colsetlist RCP" , |
1896 | /* 2 */ "colset ::= LCP colsetlist RCP" , |
1897 | /* 3 */ "colset ::= STRING" , |
1898 | /* 4 */ "colset ::= MINUS STRING" , |
1899 | /* 5 */ "colsetlist ::= colsetlist STRING" , |
1900 | /* 6 */ "colsetlist ::= STRING" , |
1901 | /* 7 */ "expr ::= expr AND expr" , |
1902 | /* 8 */ "expr ::= expr OR expr" , |
1903 | /* 9 */ "expr ::= expr NOT expr" , |
1904 | /* 10 */ "expr ::= colset COLON LP expr RP" , |
1905 | /* 11 */ "expr ::= LP expr RP" , |
1906 | /* 12 */ "expr ::= exprlist" , |
1907 | /* 13 */ "exprlist ::= cnearset" , |
1908 | /* 14 */ "exprlist ::= exprlist cnearset" , |
1909 | /* 15 */ "cnearset ::= nearset" , |
1910 | /* 16 */ "cnearset ::= colset COLON nearset" , |
1911 | /* 17 */ "nearset ::= phrase" , |
1912 | /* 18 */ "nearset ::= CARET phrase" , |
1913 | /* 19 */ "nearset ::= STRING LP nearphrases neardist_opt RP" , |
1914 | /* 20 */ "nearphrases ::= phrase" , |
1915 | /* 21 */ "nearphrases ::= nearphrases phrase" , |
1916 | /* 22 */ "neardist_opt ::=" , |
1917 | /* 23 */ "neardist_opt ::= COMMA STRING" , |
1918 | /* 24 */ "phrase ::= phrase PLUS STRING star_opt" , |
1919 | /* 25 */ "phrase ::= STRING star_opt" , |
1920 | /* 26 */ "star_opt ::= STAR" , |
1921 | /* 27 */ "star_opt ::=" , |
1922 | }; |
1923 | #endif /* NDEBUG */ |
1924 | |
1925 | |
1926 | #if fts5YYSTACKDEPTH<=0 |
1927 | /* |
1928 | ** Try to increase the size of the parser stack. Return the number |
1929 | ** of errors. Return 0 on success. |
1930 | */ |
1931 | static int fts5yyGrowStack(fts5yyParser *p){ |
1932 | int newSize; |
1933 | int idx; |
1934 | fts5yyStackEntry *pNew; |
1935 | |
1936 | newSize = p->fts5yystksz*2 + 100; |
1937 | idx = p->fts5yytos ? (int)(p->fts5yytos - p->fts5yystack) : 0; |
1938 | if( p->fts5yystack==&p->fts5yystk0 ){ |
1939 | pNew = malloc(newSize*sizeof(pNew[0])); |
1940 | if( pNew ) pNew[0] = p->fts5yystk0; |
1941 | }else{ |
1942 | pNew = realloc(p->fts5yystack, newSize*sizeof(pNew[0])); |
1943 | } |
1944 | if( pNew ){ |
1945 | p->fts5yystack = pNew; |
1946 | p->fts5yytos = &p->fts5yystack[idx]; |
1947 | #ifndef NDEBUG |
1948 | if( fts5yyTraceFILE ){ |
1949 | fprintf(fts5yyTraceFILE,"%sStack grows from %d to %d entries.\n" , |
1950 | fts5yyTracePrompt, p->fts5yystksz, newSize); |
1951 | } |
1952 | #endif |
1953 | p->fts5yystksz = newSize; |
1954 | } |
1955 | return pNew==0; |
1956 | } |
1957 | #endif |
1958 | |
1959 | /* Datatype of the argument to the memory allocated passed as the |
1960 | ** second argument to sqlite3Fts5ParserAlloc() below. This can be changed by |
1961 | ** putting an appropriate #define in the %include section of the input |
1962 | ** grammar. |
1963 | */ |
1964 | #ifndef fts5YYMALLOCARGTYPE |
1965 | # define fts5YYMALLOCARGTYPE size_t |
1966 | #endif |
1967 | |
1968 | /* Initialize a new parser that has already been allocated. |
1969 | */ |
1970 | static void sqlite3Fts5ParserInit(void *fts5yypRawParser sqlite3Fts5ParserCTX_PDECL){ |
1971 | fts5yyParser *fts5yypParser = (fts5yyParser*)fts5yypRawParser; |
1972 | sqlite3Fts5ParserCTX_STORE |
1973 | #ifdef fts5YYTRACKMAXSTACKDEPTH |
1974 | fts5yypParser->fts5yyhwm = 0; |
1975 | #endif |
1976 | #if fts5YYSTACKDEPTH<=0 |
1977 | fts5yypParser->fts5yytos = NULL; |
1978 | fts5yypParser->fts5yystack = NULL; |
1979 | fts5yypParser->fts5yystksz = 0; |
1980 | if( fts5yyGrowStack(fts5yypParser) ){ |
1981 | fts5yypParser->fts5yystack = &fts5yypParser->fts5yystk0; |
1982 | fts5yypParser->fts5yystksz = 1; |
1983 | } |
1984 | #endif |
1985 | #ifndef fts5YYNOERRORRECOVERY |
1986 | fts5yypParser->fts5yyerrcnt = -1; |
1987 | #endif |
1988 | fts5yypParser->fts5yytos = fts5yypParser->fts5yystack; |
1989 | fts5yypParser->fts5yystack[0].stateno = 0; |
1990 | fts5yypParser->fts5yystack[0].major = 0; |
1991 | #if fts5YYSTACKDEPTH>0 |
1992 | fts5yypParser->fts5yystackEnd = &fts5yypParser->fts5yystack[fts5YYSTACKDEPTH-1]; |
1993 | #endif |
1994 | } |
1995 | |
1996 | #ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK |
1997 | /* |
1998 | ** This function allocates a new parser. |
1999 | ** The only argument is a pointer to a function which works like |
2000 | ** malloc. |
2001 | ** |
2002 | ** Inputs: |
2003 | ** A pointer to the function used to allocate memory. |
2004 | ** |
2005 | ** Outputs: |
2006 | ** A pointer to a parser. This pointer is used in subsequent calls |
2007 | ** to sqlite3Fts5Parser and sqlite3Fts5ParserFree. |
2008 | */ |
2009 | static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(fts5YYMALLOCARGTYPE) sqlite3Fts5ParserCTX_PDECL){ |
2010 | fts5yyParser *fts5yypParser; |
2011 | fts5yypParser = (fts5yyParser*)(*mallocProc)( (fts5YYMALLOCARGTYPE)sizeof(fts5yyParser) ); |
2012 | if( fts5yypParser ){ |
2013 | sqlite3Fts5ParserCTX_STORE |
2014 | sqlite3Fts5ParserInit(fts5yypParser sqlite3Fts5ParserCTX_PARAM); |
2015 | } |
2016 | return (void*)fts5yypParser; |
2017 | } |
2018 | #endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */ |
2019 | |
2020 | |
2021 | /* The following function deletes the "minor type" or semantic value |
2022 | ** associated with a symbol. The symbol can be either a terminal |
2023 | ** or nonterminal. "fts5yymajor" is the symbol code, and "fts5yypminor" is |
2024 | ** a pointer to the value to be deleted. The code used to do the |
2025 | ** deletions is derived from the %destructor and/or %token_destructor |
2026 | ** directives of the input grammar. |
2027 | */ |
2028 | static void fts5yy_destructor( |
2029 | fts5yyParser *fts5yypParser, /* The parser */ |
2030 | fts5YYCODETYPE fts5yymajor, /* Type code for object to destroy */ |
2031 | fts5YYMINORTYPE *fts5yypminor /* The object to be destroyed */ |
2032 | ){ |
2033 | sqlite3Fts5ParserARG_FETCH |
2034 | sqlite3Fts5ParserCTX_FETCH |
2035 | switch( fts5yymajor ){ |
2036 | /* Here is inserted the actions which take place when a |
2037 | ** terminal or non-terminal is destroyed. This can happen |
2038 | ** when the symbol is popped from the stack during a |
2039 | ** reduce or during error processing or when a parser is |
2040 | ** being destroyed before it is finished parsing. |
2041 | ** |
2042 | ** Note: during a reduce, the only symbols destroyed are those |
2043 | ** which appear on the RHS of the rule, but which are *not* used |
2044 | ** inside the C code. |
2045 | */ |
2046 | /********* Begin destructor definitions ***************************************/ |
2047 | case 16: /* input */ |
2048 | { |
2049 | #line 83 "fts5parse.y" |
2050 | (void)pParse; |
2051 | #line 586 "fts5parse.c" |
2052 | } |
2053 | break; |
2054 | case 17: /* expr */ |
2055 | case 18: /* cnearset */ |
2056 | case 19: /* exprlist */ |
2057 | { |
2058 | #line 89 "fts5parse.y" |
2059 | sqlite3Fts5ParseNodeFree((fts5yypminor->fts5yy24)); |
2060 | #line 595 "fts5parse.c" |
2061 | } |
2062 | break; |
2063 | case 20: /* colset */ |
2064 | case 21: /* colsetlist */ |
2065 | { |
2066 | #line 93 "fts5parse.y" |
2067 | sqlite3_free((fts5yypminor->fts5yy11)); |
2068 | #line 603 "fts5parse.c" |
2069 | } |
2070 | break; |
2071 | case 22: /* nearset */ |
2072 | case 23: /* nearphrases */ |
2073 | { |
2074 | #line 148 "fts5parse.y" |
2075 | sqlite3Fts5ParseNearsetFree((fts5yypminor->fts5yy46)); |
2076 | #line 611 "fts5parse.c" |
2077 | } |
2078 | break; |
2079 | case 24: /* phrase */ |
2080 | { |
2081 | #line 183 "fts5parse.y" |
2082 | sqlite3Fts5ParsePhraseFree((fts5yypminor->fts5yy53)); |
2083 | #line 618 "fts5parse.c" |
2084 | } |
2085 | break; |
2086 | /********* End destructor definitions *****************************************/ |
2087 | default: break; /* If no destructor action specified: do nothing */ |
2088 | } |
2089 | } |
2090 | |
2091 | /* |
2092 | ** Pop the parser's stack once. |
2093 | ** |
2094 | ** If there is a destructor routine associated with the token which |
2095 | ** is popped from the stack, then call it. |
2096 | */ |
2097 | static void fts5yy_pop_parser_stack(fts5yyParser *pParser){ |
2098 | fts5yyStackEntry *fts5yytos; |
2099 | assert( pParser->fts5yytos!=0 ); |
2100 | assert( pParser->fts5yytos > pParser->fts5yystack ); |
2101 | fts5yytos = pParser->fts5yytos--; |
2102 | #ifndef NDEBUG |
2103 | if( fts5yyTraceFILE ){ |
2104 | fprintf(fts5yyTraceFILE,"%sPopping %s\n" , |
2105 | fts5yyTracePrompt, |
2106 | fts5yyTokenName[fts5yytos->major]); |
2107 | } |
2108 | #endif |
2109 | fts5yy_destructor(pParser, fts5yytos->major, &fts5yytos->minor); |
2110 | } |
2111 | |
2112 | /* |
2113 | ** Clear all secondary memory allocations from the parser |
2114 | */ |
2115 | static void sqlite3Fts5ParserFinalize(void *p){ |
2116 | fts5yyParser *pParser = (fts5yyParser*)p; |
2117 | while( pParser->fts5yytos>pParser->fts5yystack ) fts5yy_pop_parser_stack(pParser); |
2118 | #if fts5YYSTACKDEPTH<=0 |
2119 | if( pParser->fts5yystack!=&pParser->fts5yystk0 ) free(pParser->fts5yystack); |
2120 | #endif |
2121 | } |
2122 | |
2123 | #ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK |
2124 | /* |
2125 | ** Deallocate and destroy a parser. Destructors are called for |
2126 | ** all stack elements before shutting the parser down. |
2127 | ** |
2128 | ** If the fts5YYPARSEFREENEVERNULL macro exists (for example because it |
2129 | ** is defined in a %include section of the input grammar) then it is |
2130 | ** assumed that the input pointer is never NULL. |
2131 | */ |
2132 | static void sqlite3Fts5ParserFree( |
2133 | void *p, /* The parser to be deleted */ |
2134 | void (*freeProc)(void*) /* Function used to reclaim memory */ |
2135 | ){ |
2136 | #ifndef fts5YYPARSEFREENEVERNULL |
2137 | if( p==0 ) return; |
2138 | #endif |
2139 | sqlite3Fts5ParserFinalize(p); |
2140 | (*freeProc)(p); |
2141 | } |
2142 | #endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */ |
2143 | |
2144 | /* |
2145 | ** Return the peak depth of the stack for a parser. |
2146 | */ |
2147 | #ifdef fts5YYTRACKMAXSTACKDEPTH |
2148 | static int sqlite3Fts5ParserStackPeak(void *p){ |
2149 | fts5yyParser *pParser = (fts5yyParser*)p; |
2150 | return pParser->fts5yyhwm; |
2151 | } |
2152 | #endif |
2153 | |
2154 | /* This array of booleans keeps track of the parser statement |
2155 | ** coverage. The element fts5yycoverage[X][Y] is set when the parser |
2156 | ** is in state X and has a lookahead token Y. In a well-tested |
2157 | ** systems, every element of this matrix should end up being set. |
2158 | */ |
2159 | #if defined(fts5YYCOVERAGE) |
2160 | static unsigned char fts5yycoverage[fts5YYNSTATE][fts5YYNFTS5TOKEN]; |
2161 | #endif |
2162 | |
2163 | /* |
2164 | ** Write into out a description of every state/lookahead combination that |
2165 | ** |
2166 | ** (1) has not been used by the parser, and |
2167 | ** (2) is not a syntax error. |
2168 | ** |
2169 | ** Return the number of missed state/lookahead combinations. |
2170 | */ |
2171 | #if defined(fts5YYCOVERAGE) |
2172 | static int sqlite3Fts5ParserCoverage(FILE *out){ |
2173 | int stateno, iLookAhead, i; |
2174 | int nMissed = 0; |
2175 | for(stateno=0; stateno<fts5YYNSTATE; stateno++){ |
2176 | i = fts5yy_shift_ofst[stateno]; |
2177 | for(iLookAhead=0; iLookAhead<fts5YYNFTS5TOKEN; iLookAhead++){ |
2178 | if( fts5yy_lookahead[i+iLookAhead]!=iLookAhead ) continue; |
2179 | if( fts5yycoverage[stateno][iLookAhead]==0 ) nMissed++; |
2180 | if( out ){ |
2181 | fprintf(out,"State %d lookahead %s %s\n" , stateno, |
2182 | fts5yyTokenName[iLookAhead], |
2183 | fts5yycoverage[stateno][iLookAhead] ? "ok" : "missed" ); |
2184 | } |
2185 | } |
2186 | } |
2187 | return nMissed; |
2188 | } |
2189 | #endif |
2190 | |
2191 | /* |
2192 | ** Find the appropriate action for a parser given the terminal |
2193 | ** look-ahead token iLookAhead. |
2194 | */ |
2195 | static fts5YYACTIONTYPE fts5yy_find_shift_action( |
2196 | fts5YYCODETYPE iLookAhead, /* The look-ahead token */ |
2197 | fts5YYACTIONTYPE stateno /* Current state number */ |
2198 | ){ |
2199 | int i; |
2200 | |
2201 | if( stateno>fts5YY_MAX_SHIFT ) return stateno; |
2202 | assert( stateno <= fts5YY_SHIFT_COUNT ); |
2203 | #if defined(fts5YYCOVERAGE) |
2204 | fts5yycoverage[stateno][iLookAhead] = 1; |
2205 | #endif |
2206 | do{ |
2207 | i = fts5yy_shift_ofst[stateno]; |
2208 | assert( i>=0 ); |
2209 | assert( i<=fts5YY_ACTTAB_COUNT ); |
2210 | assert( i+fts5YYNFTS5TOKEN<=(int)fts5YY_NLOOKAHEAD ); |
2211 | assert( iLookAhead!=fts5YYNOCODE ); |
2212 | assert( iLookAhead < fts5YYNFTS5TOKEN ); |
2213 | i += iLookAhead; |
2214 | assert( i<(int)fts5YY_NLOOKAHEAD ); |
2215 | if( fts5yy_lookahead[i]!=iLookAhead ){ |
2216 | #ifdef fts5YYFALLBACK |
2217 | fts5YYCODETYPE iFallback; /* Fallback token */ |
2218 | assert( iLookAhead<sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0]) ); |
2219 | iFallback = fts5yyFallback[iLookAhead]; |
2220 | if( iFallback!=0 ){ |
2221 | #ifndef NDEBUG |
2222 | if( fts5yyTraceFILE ){ |
2223 | fprintf(fts5yyTraceFILE, "%sFALLBACK %s => %s\n" , |
2224 | fts5yyTracePrompt, fts5yyTokenName[iLookAhead], fts5yyTokenName[iFallback]); |
2225 | } |
2226 | #endif |
2227 | assert( fts5yyFallback[iFallback]==0 ); /* Fallback loop must terminate */ |
2228 | iLookAhead = iFallback; |
2229 | continue; |
2230 | } |
2231 | #endif |
2232 | #ifdef fts5YYWILDCARD |
2233 | { |
2234 | int j = i - iLookAhead + fts5YYWILDCARD; |
2235 | assert( j<(int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0])) ); |
2236 | if( fts5yy_lookahead[j]==fts5YYWILDCARD && iLookAhead>0 ){ |
2237 | #ifndef NDEBUG |
2238 | if( fts5yyTraceFILE ){ |
2239 | fprintf(fts5yyTraceFILE, "%sWILDCARD %s => %s\n" , |
2240 | fts5yyTracePrompt, fts5yyTokenName[iLookAhead], |
2241 | fts5yyTokenName[fts5YYWILDCARD]); |
2242 | } |
2243 | #endif /* NDEBUG */ |
2244 | return fts5yy_action[j]; |
2245 | } |
2246 | } |
2247 | #endif /* fts5YYWILDCARD */ |
2248 | return fts5yy_default[stateno]; |
2249 | }else{ |
2250 | assert( i>=0 && i<(int)(sizeof(fts5yy_action)/sizeof(fts5yy_action[0])) ); |
2251 | return fts5yy_action[i]; |
2252 | } |
2253 | }while(1); |
2254 | } |
2255 | |
2256 | /* |
2257 | ** Find the appropriate action for a parser given the non-terminal |
2258 | ** look-ahead token iLookAhead. |
2259 | */ |
2260 | static fts5YYACTIONTYPE fts5yy_find_reduce_action( |
2261 | fts5YYACTIONTYPE stateno, /* Current state number */ |
2262 | fts5YYCODETYPE iLookAhead /* The look-ahead token */ |
2263 | ){ |
2264 | int i; |
2265 | #ifdef fts5YYERRORSYMBOL |
2266 | if( stateno>fts5YY_REDUCE_COUNT ){ |
2267 | return fts5yy_default[stateno]; |
2268 | } |
2269 | #else |
2270 | assert( stateno<=fts5YY_REDUCE_COUNT ); |
2271 | #endif |
2272 | i = fts5yy_reduce_ofst[stateno]; |
2273 | assert( iLookAhead!=fts5YYNOCODE ); |
2274 | i += iLookAhead; |
2275 | #ifdef fts5YYERRORSYMBOL |
2276 | if( i<0 || i>=fts5YY_ACTTAB_COUNT || fts5yy_lookahead[i]!=iLookAhead ){ |
2277 | return fts5yy_default[stateno]; |
2278 | } |
2279 | #else |
2280 | assert( i>=0 && i<fts5YY_ACTTAB_COUNT ); |
2281 | assert( fts5yy_lookahead[i]==iLookAhead ); |
2282 | #endif |
2283 | return fts5yy_action[i]; |
2284 | } |
2285 | |
2286 | /* |
2287 | ** The following routine is called if the stack overflows. |
2288 | */ |
2289 | static void fts5yyStackOverflow(fts5yyParser *fts5yypParser){ |
2290 | sqlite3Fts5ParserARG_FETCH |
2291 | sqlite3Fts5ParserCTX_FETCH |
2292 | #ifndef NDEBUG |
2293 | if( fts5yyTraceFILE ){ |
2294 | fprintf(fts5yyTraceFILE,"%sStack Overflow!\n" ,fts5yyTracePrompt); |
2295 | } |
2296 | #endif |
2297 | while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser_stack(fts5yypParser); |
2298 | /* Here code is inserted which will execute if the parser |
2299 | ** stack every overflows */ |
2300 | /******** Begin %stack_overflow code ******************************************/ |
2301 | #line 36 "fts5parse.y" |
2302 | |
2303 | sqlite3Fts5ParseError(pParse, "fts5: parser stack overflow" ); |
2304 | #line 839 "fts5parse.c" |
2305 | /******** End %stack_overflow code ********************************************/ |
2306 | sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument var */ |
2307 | sqlite3Fts5ParserCTX_STORE |
2308 | } |
2309 | |
2310 | /* |
2311 | ** Print tracing information for a SHIFT action |
2312 | */ |
2313 | #ifndef NDEBUG |
2314 | static void fts5yyTraceShift(fts5yyParser *fts5yypParser, int fts5yyNewState, const char *zTag){ |
2315 | if( fts5yyTraceFILE ){ |
2316 | if( fts5yyNewState<fts5YYNSTATE ){ |
2317 | fprintf(fts5yyTraceFILE,"%s%s '%s', go to state %d\n" , |
2318 | fts5yyTracePrompt, zTag, fts5yyTokenName[fts5yypParser->fts5yytos->major], |
2319 | fts5yyNewState); |
2320 | }else{ |
2321 | fprintf(fts5yyTraceFILE,"%s%s '%s', pending reduce %d\n" , |
2322 | fts5yyTracePrompt, zTag, fts5yyTokenName[fts5yypParser->fts5yytos->major], |
2323 | fts5yyNewState - fts5YY_MIN_REDUCE); |
2324 | } |
2325 | } |
2326 | } |
2327 | #else |
2328 | # define fts5yyTraceShift(X,Y,Z) |
2329 | #endif |
2330 | |
2331 | /* |
2332 | ** Perform a shift action. |
2333 | */ |
2334 | static void fts5yy_shift( |
2335 | fts5yyParser *fts5yypParser, /* The parser to be shifted */ |
2336 | fts5YYACTIONTYPE fts5yyNewState, /* The new state to shift in */ |
2337 | fts5YYCODETYPE fts5yyMajor, /* The major token to shift in */ |
2338 | sqlite3Fts5ParserFTS5TOKENTYPE fts5yyMinor /* The minor token to shift in */ |
2339 | ){ |
2340 | fts5yyStackEntry *fts5yytos; |
2341 | fts5yypParser->fts5yytos++; |
2342 | #ifdef fts5YYTRACKMAXSTACKDEPTH |
2343 | if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser->fts5yyhwm ){ |
2344 | fts5yypParser->fts5yyhwm++; |
2345 | assert( fts5yypParser->fts5yyhwm == (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack) ); |
2346 | } |
2347 | #endif |
2348 | #if fts5YYSTACKDEPTH>0 |
2349 | if( fts5yypParser->fts5yytos>fts5yypParser->fts5yystackEnd ){ |
2350 | fts5yypParser->fts5yytos--; |
2351 | fts5yyStackOverflow(fts5yypParser); |
2352 | return; |
2353 | } |
2354 | #else |
2355 | if( fts5yypParser->fts5yytos>=&fts5yypParser->fts5yystack[fts5yypParser->fts5yystksz] ){ |
2356 | if( fts5yyGrowStack(fts5yypParser) ){ |
2357 | fts5yypParser->fts5yytos--; |
2358 | fts5yyStackOverflow(fts5yypParser); |
2359 | return; |
2360 | } |
2361 | } |
2362 | #endif |
2363 | if( fts5yyNewState > fts5YY_MAX_SHIFT ){ |
2364 | fts5yyNewState += fts5YY_MIN_REDUCE - fts5YY_MIN_SHIFTREDUCE; |
2365 | } |
2366 | fts5yytos = fts5yypParser->fts5yytos; |
2367 | fts5yytos->stateno = fts5yyNewState; |
2368 | fts5yytos->major = fts5yyMajor; |
2369 | fts5yytos->minor.fts5yy0 = fts5yyMinor; |
2370 | fts5yyTraceShift(fts5yypParser, fts5yyNewState, "Shift" ); |
2371 | } |
2372 | |
2373 | /* For rule J, fts5yyRuleInfoLhs[J] contains the symbol on the left-hand side |
2374 | ** of that rule */ |
2375 | static const fts5YYCODETYPE fts5yyRuleInfoLhs[] = { |
2376 | 16, /* (0) input ::= expr */ |
2377 | 20, /* (1) colset ::= MINUS LCP colsetlist RCP */ |
2378 | 20, /* (2) colset ::= LCP colsetlist RCP */ |
2379 | 20, /* (3) colset ::= STRING */ |
2380 | 20, /* (4) colset ::= MINUS STRING */ |
2381 | 21, /* (5) colsetlist ::= colsetlist STRING */ |
2382 | 21, /* (6) colsetlist ::= STRING */ |
2383 | 17, /* (7) expr ::= expr AND expr */ |
2384 | 17, /* (8) expr ::= expr OR expr */ |
2385 | 17, /* (9) expr ::= expr NOT expr */ |
2386 | 17, /* (10) expr ::= colset COLON LP expr RP */ |
2387 | 17, /* (11) expr ::= LP expr RP */ |
2388 | 17, /* (12) expr ::= exprlist */ |
2389 | 19, /* (13) exprlist ::= cnearset */ |
2390 | 19, /* (14) exprlist ::= exprlist cnearset */ |
2391 | 18, /* (15) cnearset ::= nearset */ |
2392 | 18, /* (16) cnearset ::= colset COLON nearset */ |
2393 | 22, /* (17) nearset ::= phrase */ |
2394 | 22, /* (18) nearset ::= CARET phrase */ |
2395 | 22, /* (19) nearset ::= STRING LP nearphrases neardist_opt RP */ |
2396 | 23, /* (20) nearphrases ::= phrase */ |
2397 | 23, /* (21) nearphrases ::= nearphrases phrase */ |
2398 | 25, /* (22) neardist_opt ::= */ |
2399 | 25, /* (23) neardist_opt ::= COMMA STRING */ |
2400 | 24, /* (24) phrase ::= phrase PLUS STRING star_opt */ |
2401 | 24, /* (25) phrase ::= STRING star_opt */ |
2402 | 26, /* (26) star_opt ::= STAR */ |
2403 | 26, /* (27) star_opt ::= */ |
2404 | }; |
2405 | |
2406 | /* For rule J, fts5yyRuleInfoNRhs[J] contains the negative of the number |
2407 | ** of symbols on the right-hand side of that rule. */ |
2408 | static const signed char fts5yyRuleInfoNRhs[] = { |
2409 | -1, /* (0) input ::= expr */ |
2410 | -4, /* (1) colset ::= MINUS LCP colsetlist RCP */ |
2411 | -3, /* (2) colset ::= LCP colsetlist RCP */ |
2412 | -1, /* (3) colset ::= STRING */ |
2413 | -2, /* (4) colset ::= MINUS STRING */ |
2414 | -2, /* (5) colsetlist ::= colsetlist STRING */ |
2415 | -1, /* (6) colsetlist ::= STRING */ |
2416 | -3, /* (7) expr ::= expr AND expr */ |
2417 | -3, /* (8) expr ::= expr OR expr */ |
2418 | -3, /* (9) expr ::= expr NOT expr */ |
2419 | -5, /* (10) expr ::= colset COLON LP expr RP */ |
2420 | -3, /* (11) expr ::= LP expr RP */ |
2421 | -1, /* (12) expr ::= exprlist */ |
2422 | -1, /* (13) exprlist ::= cnearset */ |
2423 | -2, /* (14) exprlist ::= exprlist cnearset */ |
2424 | -1, /* (15) cnearset ::= nearset */ |
2425 | -3, /* (16) cnearset ::= colset COLON nearset */ |
2426 | -1, /* (17) nearset ::= phrase */ |
2427 | -2, /* (18) nearset ::= CARET phrase */ |
2428 | -5, /* (19) nearset ::= STRING LP nearphrases neardist_opt RP */ |
2429 | -1, /* (20) nearphrases ::= phrase */ |
2430 | -2, /* (21) nearphrases ::= nearphrases phrase */ |
2431 | 0, /* (22) neardist_opt ::= */ |
2432 | -2, /* (23) neardist_opt ::= COMMA STRING */ |
2433 | -4, /* (24) phrase ::= phrase PLUS STRING star_opt */ |
2434 | -2, /* (25) phrase ::= STRING star_opt */ |
2435 | -1, /* (26) star_opt ::= STAR */ |
2436 | 0, /* (27) star_opt ::= */ |
2437 | }; |
2438 | |
2439 | static void fts5yy_accept(fts5yyParser*); /* Forward Declaration */ |
2440 | |
2441 | /* |
2442 | ** Perform a reduce action and the shift that must immediately |
2443 | ** follow the reduce. |
2444 | ** |
2445 | ** The fts5yyLookahead and fts5yyLookaheadToken parameters provide reduce actions |
2446 | ** access to the lookahead token (if any). The fts5yyLookahead will be fts5YYNOCODE |
2447 | ** if the lookahead token has already been consumed. As this procedure is |
2448 | ** only called from one place, optimizing compilers will in-line it, which |
2449 | ** means that the extra parameters have no performance impact. |
2450 | */ |
2451 | static fts5YYACTIONTYPE fts5yy_reduce( |
2452 | fts5yyParser *fts5yypParser, /* The parser */ |
2453 | unsigned int fts5yyruleno, /* Number of the rule by which to reduce */ |
2454 | int fts5yyLookahead, /* Lookahead token, or fts5YYNOCODE if none */ |
2455 | sqlite3Fts5ParserFTS5TOKENTYPE fts5yyLookaheadToken /* Value of the lookahead token */ |
2456 | sqlite3Fts5ParserCTX_PDECL /* %extra_context */ |
2457 | ){ |
2458 | int fts5yygoto; /* The next state */ |
2459 | fts5YYACTIONTYPE fts5yyact; /* The next action */ |
2460 | fts5yyStackEntry *fts5yymsp; /* The top of the parser's stack */ |
2461 | int fts5yysize; /* Amount to pop the stack */ |
2462 | sqlite3Fts5ParserARG_FETCH |
2463 | (void)fts5yyLookahead; |
2464 | (void)fts5yyLookaheadToken; |
2465 | fts5yymsp = fts5yypParser->fts5yytos; |
2466 | |
2467 | switch( fts5yyruleno ){ |
2468 | /* Beginning here are the reduction cases. A typical example |
2469 | ** follows: |
2470 | ** case 0: |
2471 | ** #line <lineno> <grammarfile> |
2472 | ** { ... } // User supplied code |
2473 | ** #line <lineno> <thisfile> |
2474 | ** break; |
2475 | */ |
2476 | /********** Begin reduce actions **********************************************/ |
2477 | fts5YYMINORTYPE fts5yylhsminor; |
2478 | case 0: /* input ::= expr */ |
2479 | #line 82 "fts5parse.y" |
2480 | { sqlite3Fts5ParseFinished(pParse, fts5yymsp[0].minor.fts5yy24); } |
2481 | #line 1016 "fts5parse.c" |
2482 | break; |
2483 | case 1: /* colset ::= MINUS LCP colsetlist RCP */ |
2484 | #line 97 "fts5parse.y" |
2485 | { |
2486 | fts5yymsp[-3].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yymsp[-1].minor.fts5yy11); |
2487 | } |
2488 | #line 1023 "fts5parse.c" |
2489 | break; |
2490 | case 2: /* colset ::= LCP colsetlist RCP */ |
2491 | #line 100 "fts5parse.y" |
2492 | { fts5yymsp[-2].minor.fts5yy11 = fts5yymsp[-1].minor.fts5yy11; } |
2493 | #line 1028 "fts5parse.c" |
2494 | break; |
2495 | case 3: /* colset ::= STRING */ |
2496 | #line 101 "fts5parse.y" |
2497 | { |
2498 | fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0); |
2499 | } |
2500 | #line 1035 "fts5parse.c" |
2501 | fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11; |
2502 | break; |
2503 | case 4: /* colset ::= MINUS STRING */ |
2504 | #line 104 "fts5parse.y" |
2505 | { |
2506 | fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0); |
2507 | fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yymsp[-1].minor.fts5yy11); |
2508 | } |
2509 | #line 1044 "fts5parse.c" |
2510 | break; |
2511 | case 5: /* colsetlist ::= colsetlist STRING */ |
2512 | #line 109 "fts5parse.y" |
2513 | { |
2514 | fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, fts5yymsp[-1].minor.fts5yy11, &fts5yymsp[0].minor.fts5yy0); } |
2515 | #line 1050 "fts5parse.c" |
2516 | fts5yymsp[-1].minor.fts5yy11 = fts5yylhsminor.fts5yy11; |
2517 | break; |
2518 | case 6: /* colsetlist ::= STRING */ |
2519 | #line 111 "fts5parse.y" |
2520 | { |
2521 | fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0); |
2522 | } |
2523 | #line 1058 "fts5parse.c" |
2524 | fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11; |
2525 | break; |
2526 | case 7: /* expr ::= expr AND expr */ |
2527 | #line 115 "fts5parse.y" |
2528 | { |
2529 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_AND, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0); |
2530 | } |
2531 | #line 1066 "fts5parse.c" |
2532 | fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; |
2533 | break; |
2534 | case 8: /* expr ::= expr OR expr */ |
2535 | #line 118 "fts5parse.y" |
2536 | { |
2537 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_OR, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0); |
2538 | } |
2539 | #line 1074 "fts5parse.c" |
2540 | fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; |
2541 | break; |
2542 | case 9: /* expr ::= expr NOT expr */ |
2543 | #line 121 "fts5parse.y" |
2544 | { |
2545 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_NOT, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0); |
2546 | } |
2547 | #line 1082 "fts5parse.c" |
2548 | fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; |
2549 | break; |
2550 | case 10: /* expr ::= colset COLON LP expr RP */ |
2551 | #line 125 "fts5parse.y" |
2552 | { |
2553 | sqlite3Fts5ParseSetColset(pParse, fts5yymsp[-1].minor.fts5yy24, fts5yymsp[-4].minor.fts5yy11); |
2554 | fts5yylhsminor.fts5yy24 = fts5yymsp[-1].minor.fts5yy24; |
2555 | } |
2556 | #line 1091 "fts5parse.c" |
2557 | fts5yymsp[-4].minor.fts5yy24 = fts5yylhsminor.fts5yy24; |
2558 | break; |
2559 | case 11: /* expr ::= LP expr RP */ |
2560 | #line 129 "fts5parse.y" |
2561 | {fts5yymsp[-2].minor.fts5yy24 = fts5yymsp[-1].minor.fts5yy24;} |
2562 | #line 1097 "fts5parse.c" |
2563 | break; |
2564 | case 12: /* expr ::= exprlist */ |
2565 | case 13: /* exprlist ::= cnearset */ fts5yytestcase(fts5yyruleno==13); |
2566 | #line 130 "fts5parse.y" |
2567 | {fts5yylhsminor.fts5yy24 = fts5yymsp[0].minor.fts5yy24;} |
2568 | #line 1103 "fts5parse.c" |
2569 | fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24; |
2570 | break; |
2571 | case 14: /* exprlist ::= exprlist cnearset */ |
2572 | #line 133 "fts5parse.y" |
2573 | { |
2574 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseImplicitAnd(pParse, fts5yymsp[-1].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24); |
2575 | } |
2576 | #line 1111 "fts5parse.c" |
2577 | fts5yymsp[-1].minor.fts5yy24 = fts5yylhsminor.fts5yy24; |
2578 | break; |
2579 | case 15: /* cnearset ::= nearset */ |
2580 | #line 137 "fts5parse.y" |
2581 | { |
2582 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, fts5yymsp[0].minor.fts5yy46); |
2583 | } |
2584 | #line 1119 "fts5parse.c" |
2585 | fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24; |
2586 | break; |
2587 | case 16: /* cnearset ::= colset COLON nearset */ |
2588 | #line 140 "fts5parse.y" |
2589 | { |
2590 | fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, fts5yymsp[0].minor.fts5yy46); |
2591 | sqlite3Fts5ParseSetColset(pParse, fts5yylhsminor.fts5yy24, fts5yymsp[-2].minor.fts5yy11); |
2592 | } |
2593 | #line 1128 "fts5parse.c" |
2594 | fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; |
2595 | break; |
2596 | case 17: /* nearset ::= phrase */ |
2597 | #line 151 "fts5parse.y" |
2598 | { fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53); } |
2599 | #line 1134 "fts5parse.c" |
2600 | fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46; |
2601 | break; |
2602 | case 18: /* nearset ::= CARET phrase */ |
2603 | #line 152 "fts5parse.y" |
2604 | { |
2605 | sqlite3Fts5ParseSetCaret(fts5yymsp[0].minor.fts5yy53); |
2606 | fts5yymsp[-1].minor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53); |
2607 | } |
2608 | #line 1143 "fts5parse.c" |
2609 | break; |
2610 | case 19: /* nearset ::= STRING LP nearphrases neardist_opt RP */ |
2611 | #line 156 "fts5parse.y" |
2612 | { |
2613 | sqlite3Fts5ParseNear(pParse, &fts5yymsp[-4].minor.fts5yy0); |
2614 | sqlite3Fts5ParseSetDistance(pParse, fts5yymsp[-2].minor.fts5yy46, &fts5yymsp[-1].minor.fts5yy0); |
2615 | fts5yylhsminor.fts5yy46 = fts5yymsp[-2].minor.fts5yy46; |
2616 | } |
2617 | #line 1152 "fts5parse.c" |
2618 | fts5yymsp[-4].minor.fts5yy46 = fts5yylhsminor.fts5yy46; |
2619 | break; |
2620 | case 20: /* nearphrases ::= phrase */ |
2621 | #line 162 "fts5parse.y" |
2622 | { |
2623 | fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53); |
2624 | } |
2625 | #line 1160 "fts5parse.c" |
2626 | fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46; |
2627 | break; |
2628 | case 21: /* nearphrases ::= nearphrases phrase */ |
2629 | #line 165 "fts5parse.y" |
2630 | { |
2631 | fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, fts5yymsp[-1].minor.fts5yy46, fts5yymsp[0].minor.fts5yy53); |
2632 | } |
2633 | #line 1168 "fts5parse.c" |
2634 | fts5yymsp[-1].minor.fts5yy46 = fts5yylhsminor.fts5yy46; |
2635 | break; |
2636 | case 22: /* neardist_opt ::= */ |
2637 | #line 172 "fts5parse.y" |
2638 | { fts5yymsp[1].minor.fts5yy0.p = 0; fts5yymsp[1].minor.fts5yy0.n = 0; } |
2639 | #line 1174 "fts5parse.c" |
2640 | break; |
2641 | case 23: /* neardist_opt ::= COMMA STRING */ |
2642 | #line 173 "fts5parse.y" |
2643 | { fts5yymsp[-1].minor.fts5yy0 = fts5yymsp[0].minor.fts5yy0; } |
2644 | #line 1179 "fts5parse.c" |
2645 | break; |
2646 | case 24: /* phrase ::= phrase PLUS STRING star_opt */ |
2647 | #line 185 "fts5parse.y" |
2648 | { |
2649 | fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, fts5yymsp[-3].minor.fts5yy53, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy4); |
2650 | } |
2651 | #line 1186 "fts5parse.c" |
2652 | fts5yymsp[-3].minor.fts5yy53 = fts5yylhsminor.fts5yy53; |
2653 | break; |
2654 | case 25: /* phrase ::= STRING star_opt */ |
2655 | #line 188 "fts5parse.y" |
2656 | { |
2657 | fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, 0, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy4); |
2658 | } |
2659 | #line 1194 "fts5parse.c" |
2660 | fts5yymsp[-1].minor.fts5yy53 = fts5yylhsminor.fts5yy53; |
2661 | break; |
2662 | case 26: /* star_opt ::= STAR */ |
2663 | #line 196 "fts5parse.y" |
2664 | { fts5yymsp[0].minor.fts5yy4 = 1; } |
2665 | #line 1200 "fts5parse.c" |
2666 | break; |
2667 | case 27: /* star_opt ::= */ |
2668 | #line 197 "fts5parse.y" |
2669 | { fts5yymsp[1].minor.fts5yy4 = 0; } |
2670 | #line 1205 "fts5parse.c" |
2671 | break; |
2672 | default: |
2673 | break; |
2674 | /********** End reduce actions ************************************************/ |
2675 | }; |
2676 | assert( fts5yyruleno<sizeof(fts5yyRuleInfoLhs)/sizeof(fts5yyRuleInfoLhs[0]) ); |
2677 | fts5yygoto = fts5yyRuleInfoLhs[fts5yyruleno]; |
2678 | fts5yysize = fts5yyRuleInfoNRhs[fts5yyruleno]; |
2679 | fts5yyact = fts5yy_find_reduce_action(fts5yymsp[fts5yysize].stateno,(fts5YYCODETYPE)fts5yygoto); |
2680 | |
2681 | /* There are no SHIFTREDUCE actions on nonterminals because the table |
2682 | ** generator has simplified them to pure REDUCE actions. */ |
2683 | assert( !(fts5yyact>fts5YY_MAX_SHIFT && fts5yyact<=fts5YY_MAX_SHIFTREDUCE) ); |
2684 | |
2685 | /* It is not possible for a REDUCE to be followed by an error */ |
2686 | assert( fts5yyact!=fts5YY_ERROR_ACTION ); |
2687 | |
2688 | fts5yymsp += fts5yysize+1; |
2689 | fts5yypParser->fts5yytos = fts5yymsp; |
2690 | fts5yymsp->stateno = (fts5YYACTIONTYPE)fts5yyact; |
2691 | fts5yymsp->major = (fts5YYCODETYPE)fts5yygoto; |
2692 | fts5yyTraceShift(fts5yypParser, fts5yyact, "... then shift" ); |
2693 | return fts5yyact; |
2694 | } |
2695 | |
2696 | /* |
2697 | ** The following code executes when the parse fails |
2698 | */ |
2699 | #ifndef fts5YYNOERRORRECOVERY |
2700 | static void fts5yy_parse_failed( |
2701 | fts5yyParser *fts5yypParser /* The parser */ |
2702 | ){ |
2703 | sqlite3Fts5ParserARG_FETCH |
2704 | sqlite3Fts5ParserCTX_FETCH |
2705 | #ifndef NDEBUG |
2706 | if( fts5yyTraceFILE ){ |
2707 | fprintf(fts5yyTraceFILE,"%sFail!\n" ,fts5yyTracePrompt); |
2708 | } |
2709 | #endif |
2710 | while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser_stack(fts5yypParser); |
2711 | /* Here code is inserted which will be executed whenever the |
2712 | ** parser fails */ |
2713 | /************ Begin %parse_failure code ***************************************/ |
2714 | /************ End %parse_failure code *****************************************/ |
2715 | sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument variable */ |
2716 | sqlite3Fts5ParserCTX_STORE |
2717 | } |
2718 | #endif /* fts5YYNOERRORRECOVERY */ |
2719 | |
2720 | /* |
2721 | ** The following code executes when a syntax error first occurs. |
2722 | */ |
2723 | static void fts5yy_syntax_error( |
2724 | fts5yyParser *fts5yypParser, /* The parser */ |
2725 | int fts5yymajor, /* The major type of the error token */ |
2726 | sqlite3Fts5ParserFTS5TOKENTYPE fts5yyminor /* The minor type of the error token */ |
2727 | ){ |
2728 | sqlite3Fts5ParserARG_FETCH |
2729 | sqlite3Fts5ParserCTX_FETCH |
2730 | #define FTS5TOKEN fts5yyminor |
2731 | /************ Begin %syntax_error code ****************************************/ |
2732 | #line 30 "fts5parse.y" |
2733 | |
2734 | UNUSED_PARAM(fts5yymajor); /* Silence a compiler warning */ |
2735 | sqlite3Fts5ParseError( |
2736 | pParse, "fts5: syntax error near \"%.*s\"" ,FTS5TOKEN.n,FTS5TOKEN.p |
2737 | ); |
2738 | #line 1273 "fts5parse.c" |
2739 | /************ End %syntax_error code ******************************************/ |
2740 | sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument variable */ |
2741 | sqlite3Fts5ParserCTX_STORE |
2742 | } |
2743 | |
2744 | /* |
2745 | ** The following is executed when the parser accepts |
2746 | */ |
2747 | static void fts5yy_accept( |
2748 | fts5yyParser *fts5yypParser /* The parser */ |
2749 | ){ |
2750 | sqlite3Fts5ParserARG_FETCH |
2751 | sqlite3Fts5ParserCTX_FETCH |
2752 | #ifndef NDEBUG |
2753 | if( fts5yyTraceFILE ){ |
2754 | fprintf(fts5yyTraceFILE,"%sAccept!\n" ,fts5yyTracePrompt); |
2755 | } |
2756 | #endif |
2757 | #ifndef fts5YYNOERRORRECOVERY |
2758 | fts5yypParser->fts5yyerrcnt = -1; |
2759 | #endif |
2760 | assert( fts5yypParser->fts5yytos==fts5yypParser->fts5yystack ); |
2761 | /* Here code is inserted which will be executed whenever the |
2762 | ** parser accepts */ |
2763 | /*********** Begin %parse_accept code *****************************************/ |
2764 | /*********** End %parse_accept code *******************************************/ |
2765 | sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument variable */ |
2766 | sqlite3Fts5ParserCTX_STORE |
2767 | } |
2768 | |
2769 | /* The main parser program. |
2770 | ** The first argument is a pointer to a structure obtained from |
2771 | ** "sqlite3Fts5ParserAlloc" which describes the current state of the parser. |
2772 | ** The second argument is the major token number. The third is |
2773 | ** the minor token. The fourth optional argument is whatever the |
2774 | ** user wants (and specified in the grammar) and is available for |
2775 | ** use by the action routines. |
2776 | ** |
2777 | ** Inputs: |
2778 | ** <ul> |
2779 | ** <li> A pointer to the parser (an opaque structure.) |
2780 | ** <li> The major token number. |
2781 | ** <li> The minor token number. |
2782 | ** <li> An option argument of a grammar-specified type. |
2783 | ** </ul> |
2784 | ** |
2785 | ** Outputs: |
2786 | ** None. |
2787 | */ |
2788 | static void sqlite3Fts5Parser( |
2789 | void *fts5yyp, /* The parser */ |
2790 | int fts5yymajor, /* The major token code number */ |
2791 | sqlite3Fts5ParserFTS5TOKENTYPE fts5yyminor /* The value for the token */ |
2792 | sqlite3Fts5ParserARG_PDECL /* Optional %extra_argument parameter */ |
2793 | ){ |
2794 | fts5YYMINORTYPE fts5yyminorunion; |
2795 | fts5YYACTIONTYPE fts5yyact; /* The parser action. */ |
2796 | #if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY) |
2797 | int fts5yyendofinput; /* True if we are at the end of input */ |
2798 | #endif |
2799 | #ifdef fts5YYERRORSYMBOL |
2800 | int fts5yyerrorhit = 0; /* True if fts5yymajor has invoked an error */ |
2801 | #endif |
2802 | fts5yyParser *fts5yypParser = (fts5yyParser*)fts5yyp; /* The parser */ |
2803 | sqlite3Fts5ParserCTX_FETCH |
2804 | sqlite3Fts5ParserARG_STORE |
2805 | |
2806 | assert( fts5yypParser->fts5yytos!=0 ); |
2807 | #if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY) |
2808 | fts5yyendofinput = (fts5yymajor==0); |
2809 | #endif |
2810 | |
2811 | fts5yyact = fts5yypParser->fts5yytos->stateno; |
2812 | #ifndef NDEBUG |
2813 | if( fts5yyTraceFILE ){ |
2814 | if( fts5yyact < fts5YY_MIN_REDUCE ){ |
2815 | fprintf(fts5yyTraceFILE,"%sInput '%s' in state %d\n" , |
2816 | fts5yyTracePrompt,fts5yyTokenName[fts5yymajor],fts5yyact); |
2817 | }else{ |
2818 | fprintf(fts5yyTraceFILE,"%sInput '%s' with pending reduce %d\n" , |
2819 | fts5yyTracePrompt,fts5yyTokenName[fts5yymajor],fts5yyact-fts5YY_MIN_REDUCE); |
2820 | } |
2821 | } |
2822 | #endif |
2823 | |
2824 | while(1){ /* Exit by "break" */ |
2825 | assert( fts5yypParser->fts5yytos>=fts5yypParser->fts5yystack ); |
2826 | assert( fts5yyact==fts5yypParser->fts5yytos->stateno ); |
2827 | fts5yyact = fts5yy_find_shift_action((fts5YYCODETYPE)fts5yymajor,fts5yyact); |
2828 | if( fts5yyact >= fts5YY_MIN_REDUCE ){ |
2829 | unsigned int fts5yyruleno = fts5yyact - fts5YY_MIN_REDUCE; /* Reduce by this rule */ |
2830 | #ifndef NDEBUG |
2831 | assert( fts5yyruleno<(int)(sizeof(fts5yyRuleName)/sizeof(fts5yyRuleName[0])) ); |
2832 | if( fts5yyTraceFILE ){ |
2833 | int fts5yysize = fts5yyRuleInfoNRhs[fts5yyruleno]; |
2834 | if( fts5yysize ){ |
2835 | fprintf(fts5yyTraceFILE, "%sReduce %d [%s]%s, pop back to state %d.\n" , |
2836 | fts5yyTracePrompt, |
2837 | fts5yyruleno, fts5yyRuleName[fts5yyruleno], |
2838 | fts5yyruleno<fts5YYNRULE_WITH_ACTION ? "" : " without external action" , |
2839 | fts5yypParser->fts5yytos[fts5yysize].stateno); |
2840 | }else{ |
2841 | fprintf(fts5yyTraceFILE, "%sReduce %d [%s]%s.\n" , |
2842 | fts5yyTracePrompt, fts5yyruleno, fts5yyRuleName[fts5yyruleno], |
2843 | fts5yyruleno<fts5YYNRULE_WITH_ACTION ? "" : " without external action" ); |
2844 | } |
2845 | } |
2846 | #endif /* NDEBUG */ |
2847 | |
2848 | /* Check that the stack is large enough to grow by a single entry |
2849 | ** if the RHS of the rule is empty. This ensures that there is room |
2850 | ** enough on the stack to push the LHS value */ |
2851 | if( fts5yyRuleInfoNRhs[fts5yyruleno]==0 ){ |
2852 | #ifdef fts5YYTRACKMAXSTACKDEPTH |
2853 | if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser->fts5yyhwm ){ |
2854 | fts5yypParser->fts5yyhwm++; |
2855 | assert( fts5yypParser->fts5yyhwm == |
2856 | (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)); |
2857 | } |
2858 | #endif |
2859 | #if fts5YYSTACKDEPTH>0 |
2860 | if( fts5yypParser->fts5yytos>=fts5yypParser->fts5yystackEnd ){ |
2861 | fts5yyStackOverflow(fts5yypParser); |
2862 | break; |
2863 | } |
2864 | #else |
2865 | if( fts5yypParser->fts5yytos>=&fts5yypParser->fts5yystack[fts5yypParser->fts5yystksz-1] ){ |
2866 | if( fts5yyGrowStack(fts5yypParser) ){ |
2867 | fts5yyStackOverflow(fts5yypParser); |
2868 | break; |
2869 | } |
2870 | } |
2871 | #endif |
2872 | } |
2873 | fts5yyact = fts5yy_reduce(fts5yypParser,fts5yyruleno,fts5yymajor,fts5yyminor sqlite3Fts5ParserCTX_PARAM); |
2874 | }else if( fts5yyact <= fts5YY_MAX_SHIFTREDUCE ){ |
2875 | fts5yy_shift(fts5yypParser,fts5yyact,(fts5YYCODETYPE)fts5yymajor,fts5yyminor); |
2876 | #ifndef fts5YYNOERRORRECOVERY |
2877 | fts5yypParser->fts5yyerrcnt--; |
2878 | #endif |
2879 | break; |
2880 | }else if( fts5yyact==fts5YY_ACCEPT_ACTION ){ |
2881 | fts5yypParser->fts5yytos--; |
2882 | fts5yy_accept(fts5yypParser); |
2883 | return; |
2884 | }else{ |
2885 | assert( fts5yyact == fts5YY_ERROR_ACTION ); |
2886 | fts5yyminorunion.fts5yy0 = fts5yyminor; |
2887 | #ifdef fts5YYERRORSYMBOL |
2888 | int fts5yymx; |
2889 | #endif |
2890 | #ifndef NDEBUG |
2891 | if( fts5yyTraceFILE ){ |
2892 | fprintf(fts5yyTraceFILE,"%sSyntax Error!\n" ,fts5yyTracePrompt); |
2893 | } |
2894 | #endif |
2895 | #ifdef fts5YYERRORSYMBOL |
2896 | /* A syntax error has occurred. |
2897 | ** The response to an error depends upon whether or not the |
2898 | ** grammar defines an error token "ERROR". |
2899 | ** |
2900 | ** This is what we do if the grammar does define ERROR: |
2901 | ** |
2902 | ** * Call the %syntax_error function. |
2903 | ** |
2904 | ** * Begin popping the stack until we enter a state where |
2905 | ** it is legal to shift the error symbol, then shift |
2906 | ** the error symbol. |
2907 | ** |
2908 | ** * Set the error count to three. |
2909 | ** |
2910 | ** * Begin accepting and shifting new tokens. No new error |
2911 | ** processing will occur until three tokens have been |
2912 | ** shifted successfully. |
2913 | ** |
2914 | */ |
2915 | if( fts5yypParser->fts5yyerrcnt<0 ){ |
2916 | fts5yy_syntax_error(fts5yypParser,fts5yymajor,fts5yyminor); |
2917 | } |
2918 | fts5yymx = fts5yypParser->fts5yytos->major; |
2919 | if( fts5yymx==fts5YYERRORSYMBOL || fts5yyerrorhit ){ |
2920 | #ifndef NDEBUG |
2921 | if( fts5yyTraceFILE ){ |
2922 | fprintf(fts5yyTraceFILE,"%sDiscard input token %s\n" , |
2923 | fts5yyTracePrompt,fts5yyTokenName[fts5yymajor]); |
2924 | } |
2925 | #endif |
2926 | fts5yy_destructor(fts5yypParser, (fts5YYCODETYPE)fts5yymajor, &fts5yyminorunion); |
2927 | fts5yymajor = fts5YYNOCODE; |
2928 | }else{ |
2929 | while( fts5yypParser->fts5yytos > fts5yypParser->fts5yystack ){ |
2930 | fts5yyact = fts5yy_find_reduce_action(fts5yypParser->fts5yytos->stateno, |
2931 | fts5YYERRORSYMBOL); |
2932 | if( fts5yyact<=fts5YY_MAX_SHIFTREDUCE ) break; |
2933 | fts5yy_pop_parser_stack(fts5yypParser); |
2934 | } |
2935 | if( fts5yypParser->fts5yytos <= fts5yypParser->fts5yystack || fts5yymajor==0 ){ |
2936 | fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorunion); |
2937 | fts5yy_parse_failed(fts5yypParser); |
2938 | #ifndef fts5YYNOERRORRECOVERY |
2939 | fts5yypParser->fts5yyerrcnt = -1; |
2940 | #endif |
2941 | fts5yymajor = fts5YYNOCODE; |
2942 | }else if( fts5yymx!=fts5YYERRORSYMBOL ){ |
2943 | fts5yy_shift(fts5yypParser,fts5yyact,fts5YYERRORSYMBOL,fts5yyminor); |
2944 | } |
2945 | } |
2946 | fts5yypParser->fts5yyerrcnt = 3; |
2947 | fts5yyerrorhit = 1; |
2948 | if( fts5yymajor==fts5YYNOCODE ) break; |
2949 | fts5yyact = fts5yypParser->fts5yytos->stateno; |
2950 | #elif defined(fts5YYNOERRORRECOVERY) |
2951 | /* If the fts5YYNOERRORRECOVERY macro is defined, then do not attempt to |
2952 | ** do any kind of error recovery. Instead, simply invoke the syntax |
2953 | ** error routine and continue going as if nothing had happened. |
2954 | ** |
2955 | ** Applications can set this macro (for example inside %include) if |
2956 | ** they intend to abandon the parse upon the first syntax error seen. |
2957 | */ |
2958 | fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor); |
2959 | fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorunion); |
2960 | break; |
2961 | #else /* fts5YYERRORSYMBOL is not defined */ |
2962 | /* This is what we do if the grammar does not define ERROR: |
2963 | ** |
2964 | ** * Report an error message, and throw away the input token. |
2965 | ** |
2966 | ** * If the input token is $, then fail the parse. |
2967 | ** |
2968 | ** As before, subsequent error messages are suppressed until |
2969 | ** three input tokens have been successfully shifted. |
2970 | */ |
2971 | if( fts5yypParser->fts5yyerrcnt<=0 ){ |
2972 | fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor); |
2973 | } |
2974 | fts5yypParser->fts5yyerrcnt = 3; |
2975 | fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorunion); |
2976 | if( fts5yyendofinput ){ |
2977 | fts5yy_parse_failed(fts5yypParser); |
2978 | #ifndef fts5YYNOERRORRECOVERY |
2979 | fts5yypParser->fts5yyerrcnt = -1; |
2980 | #endif |
2981 | } |
2982 | break; |
2983 | #endif |
2984 | } |
2985 | } |
2986 | #ifndef NDEBUG |
2987 | if( fts5yyTraceFILE ){ |
2988 | fts5yyStackEntry *i; |
2989 | char cDiv = '['; |
2990 | fprintf(fts5yyTraceFILE,"%sReturn. Stack=" ,fts5yyTracePrompt); |
2991 | for(i=&fts5yypParser->fts5yystack[1]; i<=fts5yypParser->fts5yytos; i++){ |
2992 | fprintf(fts5yyTraceFILE,"%c%s" , cDiv, fts5yyTokenName[i->major]); |
2993 | cDiv = ' '; |
2994 | } |
2995 | fprintf(fts5yyTraceFILE,"]\n" ); |
2996 | } |
2997 | #endif |
2998 | return; |
2999 | } |
3000 | |
3001 | /* |
3002 | ** Return the fallback token corresponding to canonical token iToken, or |
3003 | ** 0 if iToken has no fallback. |
3004 | */ |
3005 | static int sqlite3Fts5ParserFallback(int iToken){ |
3006 | #ifdef fts5YYFALLBACK |
3007 | assert( iToken<(int)(sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0])) ); |
3008 | return fts5yyFallback[iToken]; |
3009 | #else |
3010 | (void)iToken; |
3011 | return 0; |
3012 | #endif |
3013 | } |
3014 | |
3015 | #line 1 "fts5_aux.c" |
3016 | /* |
3017 | ** 2014 May 31 |
3018 | ** |
3019 | ** The author disclaims copyright to this source code. In place of |
3020 | ** a legal notice, here is a blessing: |
3021 | ** |
3022 | ** May you do good and not evil. |
3023 | ** May you find forgiveness for yourself and forgive others. |
3024 | ** May you share freely, never taking more than you give. |
3025 | ** |
3026 | ****************************************************************************** |
3027 | */ |
3028 | |
3029 | |
3030 | /* #include "fts5Int.h" */ |
3031 | #include <math.h> /* amalgamator: keep */ |
3032 | |
3033 | /* |
3034 | ** Object used to iterate through all "coalesced phrase instances" in |
3035 | ** a single column of the current row. If the phrase instances in the |
3036 | ** column being considered do not overlap, this object simply iterates |
3037 | ** through them. Or, if they do overlap (share one or more tokens in |
3038 | ** common), each set of overlapping instances is treated as a single |
3039 | ** match. See documentation for the highlight() auxiliary function for |
3040 | ** details. |
3041 | ** |
3042 | ** Usage is: |
3043 | ** |
3044 | ** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter); |
3045 | ** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter); |
3046 | ** rc = fts5CInstIterNext(&iter) |
3047 | ** ){ |
3048 | ** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd); |
3049 | ** } |
3050 | ** |
3051 | */ |
3052 | typedef struct CInstIter CInstIter; |
3053 | struct CInstIter { |
3054 | const Fts5ExtensionApi *pApi; /* API offered by current FTS version */ |
3055 | Fts5Context *pFts; /* First arg to pass to pApi functions */ |
3056 | int iCol; /* Column to search */ |
3057 | int iInst; /* Next phrase instance index */ |
3058 | int nInst; /* Total number of phrase instances */ |
3059 | |
3060 | /* Output variables */ |
3061 | int iStart; /* First token in coalesced phrase instance */ |
3062 | int iEnd; /* Last token in coalesced phrase instance */ |
3063 | }; |
3064 | |
3065 | /* |
3066 | ** Advance the iterator to the next coalesced phrase instance. Return |
3067 | ** an SQLite error code if an error occurs, or SQLITE_OK otherwise. |
3068 | */ |
3069 | static int fts5CInstIterNext(CInstIter *pIter){ |
3070 | int rc = SQLITE_OK; |
3071 | pIter->iStart = -1; |
3072 | pIter->iEnd = -1; |
3073 | |
3074 | while( rc==SQLITE_OK && pIter->iInst<pIter->nInst ){ |
3075 | int ip; int ic; int io; |
3076 | rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io); |
3077 | if( rc==SQLITE_OK ){ |
3078 | if( ic==pIter->iCol ){ |
3079 | int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip); |
3080 | if( pIter->iStart<0 ){ |
3081 | pIter->iStart = io; |
3082 | pIter->iEnd = iEnd; |
3083 | }else if( io<=pIter->iEnd ){ |
3084 | if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd; |
3085 | }else{ |
3086 | break; |
3087 | } |
3088 | } |
3089 | pIter->iInst++; |
3090 | } |
3091 | } |
3092 | |
3093 | return rc; |
3094 | } |
3095 | |
3096 | /* |
3097 | ** Initialize the iterator object indicated by the final parameter to |
3098 | ** iterate through coalesced phrase instances in column iCol. |
3099 | */ |
3100 | static int fts5CInstIterInit( |
3101 | const Fts5ExtensionApi *pApi, |
3102 | Fts5Context *pFts, |
3103 | int iCol, |
3104 | CInstIter *pIter |
3105 | ){ |
3106 | int rc; |
3107 | |
3108 | memset(pIter, 0, sizeof(CInstIter)); |
3109 | pIter->pApi = pApi; |
3110 | pIter->pFts = pFts; |
3111 | pIter->iCol = iCol; |
3112 | rc = pApi->xInstCount(pFts, &pIter->nInst); |
3113 | |
3114 | if( rc==SQLITE_OK ){ |
3115 | rc = fts5CInstIterNext(pIter); |
3116 | } |
3117 | |
3118 | return rc; |
3119 | } |
3120 | |
3121 | |
3122 | |
3123 | /************************************************************************* |
3124 | ** Start of highlight() implementation. |
3125 | */ |
3126 | typedef struct HighlightContext HighlightContext; |
3127 | struct HighlightContext { |
3128 | CInstIter iter; /* Coalesced Instance Iterator */ |
3129 | int iPos; /* Current token offset in zIn[] */ |
3130 | int iRangeStart; /* First token to include */ |
3131 | int iRangeEnd; /* If non-zero, last token to include */ |
3132 | const char *zOpen; /* Opening highlight */ |
3133 | const char *zClose; /* Closing highlight */ |
3134 | const char *zIn; /* Input text */ |
3135 | int nIn; /* Size of input text in bytes */ |
3136 | int iOff; /* Current offset within zIn[] */ |
3137 | char *zOut; /* Output value */ |
3138 | }; |
3139 | |
3140 | /* |
3141 | ** Append text to the HighlightContext output string - p->zOut. Argument |
3142 | ** z points to a buffer containing n bytes of text to append. If n is |
3143 | ** negative, everything up until the first '\0' is appended to the output. |
3144 | ** |
3145 | ** If *pRc is set to any value other than SQLITE_OK when this function is |
3146 | ** called, it is a no-op. If an error (i.e. an OOM condition) is encountered, |
3147 | ** *pRc is set to an error code before returning. |
3148 | */ |
3149 | static void fts5HighlightAppend( |
3150 | int *pRc, |
3151 | HighlightContext *p, |
3152 | const char *z, int n |
3153 | ){ |
3154 | if( *pRc==SQLITE_OK && z ){ |
3155 | if( n<0 ) n = (int)strlen(z); |
3156 | p->zOut = sqlite3_mprintf("%z%.*s" , p->zOut, n, z); |
3157 | if( p->zOut==0 ) *pRc = SQLITE_NOMEM; |
3158 | } |
3159 | } |
3160 | |
3161 | /* |
3162 | ** Tokenizer callback used by implementation of highlight() function. |
3163 | */ |
3164 | static int fts5HighlightCb( |
3165 | void *pContext, /* Pointer to HighlightContext object */ |
3166 | int tflags, /* Mask of FTS5_TOKEN_* flags */ |
3167 | const char *pToken, /* Buffer containing token */ |
3168 | int nToken, /* Size of token in bytes */ |
3169 | int iStartOff, /* Start offset of token */ |
3170 | int iEndOff /* End offset of token */ |
3171 | ){ |
3172 | HighlightContext *p = (HighlightContext*)pContext; |
3173 | int rc = SQLITE_OK; |
3174 | int iPos; |
3175 | |
3176 | UNUSED_PARAM2(pToken, nToken); |
3177 | |
3178 | if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK; |
3179 | iPos = p->iPos++; |
3180 | |
3181 | if( p->iRangeEnd>0 ){ |
3182 | if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK; |
3183 | if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff; |
3184 | } |
3185 | |
3186 | if( iPos==p->iter.iStart ){ |
3187 | fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff); |
3188 | fts5HighlightAppend(&rc, p, p->zOpen, -1); |
3189 | p->iOff = iStartOff; |
3190 | } |
3191 | |
3192 | if( iPos==p->iter.iEnd ){ |
3193 | if( p->iRangeEnd && p->iter.iStart<p->iRangeStart ){ |
3194 | fts5HighlightAppend(&rc, p, p->zOpen, -1); |
3195 | } |
3196 | fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); |
3197 | fts5HighlightAppend(&rc, p, p->zClose, -1); |
3198 | p->iOff = iEndOff; |
3199 | if( rc==SQLITE_OK ){ |
3200 | rc = fts5CInstIterNext(&p->iter); |
3201 | } |
3202 | } |
3203 | |
3204 | if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){ |
3205 | fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); |
3206 | p->iOff = iEndOff; |
3207 | if( iPos>=p->iter.iStart && iPos<p->iter.iEnd ){ |
3208 | fts5HighlightAppend(&rc, p, p->zClose, -1); |
3209 | } |
3210 | } |
3211 | |
3212 | return rc; |
3213 | } |
3214 | |
3215 | /* |
3216 | ** Implementation of highlight() function. |
3217 | */ |
3218 | static void fts5HighlightFunction( |
3219 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ |
3220 | Fts5Context *pFts, /* First arg to pass to pApi functions */ |
3221 | sqlite3_context *pCtx, /* Context for returning result/error */ |
3222 | int nVal, /* Number of values in apVal[] array */ |
3223 | sqlite3_value **apVal /* Array of trailing arguments */ |
3224 | ){ |
3225 | HighlightContext ctx; |
3226 | int rc; |
3227 | int iCol; |
3228 | |
3229 | if( nVal!=3 ){ |
3230 | const char *zErr = "wrong number of arguments to function highlight()" ; |
3231 | sqlite3_result_error(pCtx, zErr, -1); |
3232 | return; |
3233 | } |
3234 | |
3235 | iCol = sqlite3_value_int(apVal[0]); |
3236 | memset(&ctx, 0, sizeof(HighlightContext)); |
3237 | ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); |
3238 | ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); |
3239 | rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn); |
3240 | |
3241 | if( ctx.zIn ){ |
3242 | if( rc==SQLITE_OK ){ |
3243 | rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter); |
3244 | } |
3245 | |
3246 | if( rc==SQLITE_OK ){ |
3247 | rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); |
3248 | } |
3249 | fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); |
3250 | |
3251 | if( rc==SQLITE_OK ){ |
3252 | sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); |
3253 | } |
3254 | sqlite3_free(ctx.zOut); |
3255 | } |
3256 | if( rc!=SQLITE_OK ){ |
3257 | sqlite3_result_error_code(pCtx, rc); |
3258 | } |
3259 | } |
3260 | /* |
3261 | ** End of highlight() implementation. |
3262 | **************************************************************************/ |
3263 | |
3264 | /* |
3265 | ** Context object passed to the fts5SentenceFinderCb() function. |
3266 | */ |
3267 | typedef struct Fts5SFinder Fts5SFinder; |
3268 | struct Fts5SFinder { |
3269 | int iPos; /* Current token position */ |
3270 | int nFirstAlloc; /* Allocated size of aFirst[] */ |
3271 | int nFirst; /* Number of entries in aFirst[] */ |
3272 | int *aFirst; /* Array of first token in each sentence */ |
3273 | const char *zDoc; /* Document being tokenized */ |
3274 | }; |
3275 | |
3276 | /* |
3277 | ** Add an entry to the Fts5SFinder.aFirst[] array. Grow the array if |
3278 | ** necessary. Return SQLITE_OK if successful, or SQLITE_NOMEM if an |
3279 | ** error occurs. |
3280 | */ |
3281 | static int fts5SentenceFinderAdd(Fts5SFinder *p, int iAdd){ |
3282 | if( p->nFirstAlloc==p->nFirst ){ |
3283 | int nNew = p->nFirstAlloc ? p->nFirstAlloc*2 : 64; |
3284 | int *aNew; |
3285 | |
3286 | aNew = (int*)sqlite3_realloc64(p->aFirst, nNew*sizeof(int)); |
3287 | if( aNew==0 ) return SQLITE_NOMEM; |
3288 | p->aFirst = aNew; |
3289 | p->nFirstAlloc = nNew; |
3290 | } |
3291 | p->aFirst[p->nFirst++] = iAdd; |
3292 | return SQLITE_OK; |
3293 | } |
3294 | |
3295 | /* |
3296 | ** This function is an xTokenize() callback used by the auxiliary snippet() |
3297 | ** function. Its job is to identify tokens that are the first in a sentence. |
3298 | ** For each such token, an entry is added to the SFinder.aFirst[] array. |
3299 | */ |
3300 | static int fts5SentenceFinderCb( |
3301 | void *pContext, /* Pointer to HighlightContext object */ |
3302 | int tflags, /* Mask of FTS5_TOKEN_* flags */ |
3303 | const char *pToken, /* Buffer containing token */ |
3304 | int nToken, /* Size of token in bytes */ |
3305 | int iStartOff, /* Start offset of token */ |
3306 | int iEndOff /* End offset of token */ |
3307 | ){ |
3308 | int rc = SQLITE_OK; |
3309 | |
3310 | UNUSED_PARAM2(pToken, nToken); |
3311 | UNUSED_PARAM(iEndOff); |
3312 | |
3313 | if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){ |
3314 | Fts5SFinder *p = (Fts5SFinder*)pContext; |
3315 | if( p->iPos>0 ){ |
3316 | int i; |
3317 | char c = 0; |
3318 | for(i=iStartOff-1; i>=0; i--){ |
3319 | c = p->zDoc[i]; |
3320 | if( c!=' ' && c!='\t' && c!='\n' && c!='\r' ) break; |
3321 | } |
3322 | if( i!=iStartOff-1 && (c=='.' || c==':') ){ |
3323 | rc = fts5SentenceFinderAdd(p, p->iPos); |
3324 | } |
3325 | }else{ |
3326 | rc = fts5SentenceFinderAdd(p, 0); |
3327 | } |
3328 | p->iPos++; |
3329 | } |
3330 | return rc; |
3331 | } |
3332 | |
3333 | static int fts5SnippetScore( |
3334 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ |
3335 | Fts5Context *pFts, /* First arg to pass to pApi functions */ |
3336 | int nDocsize, /* Size of column in tokens */ |
3337 | unsigned char *aSeen, /* Array with one element per query phrase */ |
3338 | int iCol, /* Column to score */ |
3339 | int iPos, /* Starting offset to score */ |
3340 | int nToken, /* Max tokens per snippet */ |
3341 | int *pnScore, /* OUT: Score */ |
3342 | int *piPos /* OUT: Adjusted offset */ |
3343 | ){ |
3344 | int rc; |
3345 | int i; |
3346 | int ip = 0; |
3347 | int ic = 0; |
3348 | int iOff = 0; |
3349 | int iFirst = -1; |
3350 | int nInst; |
3351 | int nScore = 0; |
3352 | int iLast = 0; |
3353 | sqlite3_int64 iEnd = (sqlite3_int64)iPos + nToken; |
3354 | |
3355 | rc = pApi->xInstCount(pFts, &nInst); |
3356 | for(i=0; i<nInst && rc==SQLITE_OK; i++){ |
3357 | rc = pApi->xInst(pFts, i, &ip, &ic, &iOff); |
3358 | if( rc==SQLITE_OK && ic==iCol && iOff>=iPos && iOff<iEnd ){ |
3359 | nScore += (aSeen[ip] ? 1 : 1000); |
3360 | aSeen[ip] = 1; |
3361 | if( iFirst<0 ) iFirst = iOff; |
3362 | iLast = iOff + pApi->xPhraseSize(pFts, ip); |
3363 | } |
3364 | } |
3365 | |
3366 | *pnScore = nScore; |
3367 | if( piPos ){ |
3368 | sqlite3_int64 iAdj = iFirst - (nToken - (iLast-iFirst)) / 2; |
3369 | if( (iAdj+nToken)>nDocsize ) iAdj = nDocsize - nToken; |
3370 | if( iAdj<0 ) iAdj = 0; |
3371 | *piPos = (int)iAdj; |
3372 | } |
3373 | |
3374 | return rc; |
3375 | } |
3376 | |
3377 | /* |
3378 | ** Return the value in pVal interpreted as utf-8 text. Except, if pVal |
3379 | ** contains a NULL value, return a pointer to a static string zero |
3380 | ** bytes in length instead of a NULL pointer. |
3381 | */ |
3382 | static const char *fts5ValueToText(sqlite3_value *pVal){ |
3383 | const char *zRet = (const char*)sqlite3_value_text(pVal); |
3384 | return zRet ? zRet : "" ; |
3385 | } |
3386 | |
3387 | /* |
3388 | ** Implementation of snippet() function. |
3389 | */ |
3390 | static void fts5SnippetFunction( |
3391 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ |
3392 | Fts5Context *pFts, /* First arg to pass to pApi functions */ |
3393 | sqlite3_context *pCtx, /* Context for returning result/error */ |
3394 | int nVal, /* Number of values in apVal[] array */ |
3395 | sqlite3_value **apVal /* Array of trailing arguments */ |
3396 | ){ |
3397 | HighlightContext ctx; |
3398 | int rc = SQLITE_OK; /* Return code */ |
3399 | int iCol; /* 1st argument to snippet() */ |
3400 | const char *zEllips; /* 4th argument to snippet() */ |
3401 | int nToken; /* 5th argument to snippet() */ |
3402 | int nInst = 0; /* Number of instance matches this row */ |
3403 | int i; /* Used to iterate through instances */ |
3404 | int nPhrase; /* Number of phrases in query */ |
3405 | unsigned char *aSeen; /* Array of "seen instance" flags */ |
3406 | int iBestCol; /* Column containing best snippet */ |
3407 | int iBestStart = 0; /* First token of best snippet */ |
3408 | int nBestScore = 0; /* Score of best snippet */ |
3409 | int nColSize = 0; /* Total size of iBestCol in tokens */ |
3410 | Fts5SFinder sFinder; /* Used to find the beginnings of sentences */ |
3411 | int nCol; |
3412 | |
3413 | if( nVal!=5 ){ |
3414 | const char *zErr = "wrong number of arguments to function snippet()" ; |
3415 | sqlite3_result_error(pCtx, zErr, -1); |
3416 | return; |
3417 | } |
3418 | |
3419 | nCol = pApi->xColumnCount(pFts); |
3420 | memset(&ctx, 0, sizeof(HighlightContext)); |
3421 | iCol = sqlite3_value_int(apVal[0]); |
3422 | ctx.zOpen = fts5ValueToText(apVal[1]); |
3423 | ctx.zClose = fts5ValueToText(apVal[2]); |
3424 | zEllips = fts5ValueToText(apVal[3]); |
3425 | nToken = sqlite3_value_int(apVal[4]); |
3426 | |
3427 | iBestCol = (iCol>=0 ? iCol : 0); |
3428 | nPhrase = pApi->xPhraseCount(pFts); |
3429 | aSeen = sqlite3_malloc(nPhrase); |
3430 | if( aSeen==0 ){ |
3431 | rc = SQLITE_NOMEM; |
3432 | } |
3433 | if( rc==SQLITE_OK ){ |
3434 | rc = pApi->xInstCount(pFts, &nInst); |
3435 | } |
3436 | |
3437 | memset(&sFinder, 0, sizeof(Fts5SFinder)); |
3438 | for(i=0; i<nCol; i++){ |
3439 | if( iCol<0 || iCol==i ){ |
3440 | int nDoc; |
3441 | int nDocsize; |
3442 | int ii; |
3443 | sFinder.iPos = 0; |
3444 | sFinder.nFirst = 0; |
3445 | rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc); |
3446 | if( rc!=SQLITE_OK ) break; |
3447 | rc = pApi->xTokenize(pFts, |
3448 | sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb |
3449 | ); |
3450 | if( rc!=SQLITE_OK ) break; |
3451 | rc = pApi->xColumnSize(pFts, i, &nDocsize); |
3452 | if( rc!=SQLITE_OK ) break; |
3453 | |
3454 | for(ii=0; rc==SQLITE_OK && ii<nInst; ii++){ |
3455 | int ip, ic, io; |
3456 | int iAdj; |
3457 | int nScore; |
3458 | int jj; |
3459 | |
3460 | rc = pApi->xInst(pFts, ii, &ip, &ic, &io); |
3461 | if( ic!=i ) continue; |
3462 | if( io>nDocsize ) rc = FTS5_CORRUPT; |
3463 | if( rc!=SQLITE_OK ) continue; |
3464 | memset(aSeen, 0, nPhrase); |
3465 | rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, |
3466 | io, nToken, &nScore, &iAdj |
3467 | ); |
3468 | if( rc==SQLITE_OK && nScore>nBestScore ){ |
3469 | nBestScore = nScore; |
3470 | iBestCol = i; |
3471 | iBestStart = iAdj; |
3472 | nColSize = nDocsize; |
3473 | } |
3474 | |
3475 | if( rc==SQLITE_OK && sFinder.nFirst && nDocsize>nToken ){ |
3476 | for(jj=0; jj<(sFinder.nFirst-1); jj++){ |
3477 | if( sFinder.aFirst[jj+1]>io ) break; |
3478 | } |
3479 | |
3480 | if( sFinder.aFirst[jj]<io ){ |
3481 | memset(aSeen, 0, nPhrase); |
3482 | rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, |
3483 | sFinder.aFirst[jj], nToken, &nScore, 0 |
3484 | ); |
3485 | |
3486 | nScore += (sFinder.aFirst[jj]==0 ? 120 : 100); |
3487 | if( rc==SQLITE_OK && nScore>nBestScore ){ |
3488 | nBestScore = nScore; |
3489 | iBestCol = i; |
3490 | iBestStart = sFinder.aFirst[jj]; |
3491 | nColSize = nDocsize; |
3492 | } |
3493 | } |
3494 | } |
3495 | } |
3496 | } |
3497 | } |
3498 | |
3499 | if( rc==SQLITE_OK ){ |
3500 | rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn); |
3501 | } |
3502 | if( rc==SQLITE_OK && nColSize==0 ){ |
3503 | rc = pApi->xColumnSize(pFts, iBestCol, &nColSize); |
3504 | } |
3505 | if( ctx.zIn ){ |
3506 | if( rc==SQLITE_OK ){ |
3507 | rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); |
3508 | } |
3509 | |
3510 | ctx.iRangeStart = iBestStart; |
3511 | ctx.iRangeEnd = iBestStart + nToken - 1; |
3512 | |
3513 | if( iBestStart>0 ){ |
3514 | fts5HighlightAppend(&rc, &ctx, zEllips, -1); |
3515 | } |
3516 | |
3517 | /* Advance iterator ctx.iter so that it points to the first coalesced |
3518 | ** phrase instance at or following position iBestStart. */ |
3519 | while( ctx.iter.iStart>=0 && ctx.iter.iStart<iBestStart && rc==SQLITE_OK ){ |
3520 | rc = fts5CInstIterNext(&ctx.iter); |
3521 | } |
3522 | |
3523 | if( rc==SQLITE_OK ){ |
3524 | rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); |
3525 | } |
3526 | if( ctx.iRangeEnd>=(nColSize-1) ){ |
3527 | fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); |
3528 | }else{ |
3529 | fts5HighlightAppend(&rc, &ctx, zEllips, -1); |
3530 | } |
3531 | } |
3532 | if( rc==SQLITE_OK ){ |
3533 | sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); |
3534 | }else{ |
3535 | sqlite3_result_error_code(pCtx, rc); |
3536 | } |
3537 | sqlite3_free(ctx.zOut); |
3538 | sqlite3_free(aSeen); |
3539 | sqlite3_free(sFinder.aFirst); |
3540 | } |
3541 | |
3542 | /************************************************************************/ |
3543 | |
3544 | /* |
3545 | ** The first time the bm25() function is called for a query, an instance |
3546 | ** of the following structure is allocated and populated. |
3547 | */ |
3548 | typedef struct Fts5Bm25Data Fts5Bm25Data; |
3549 | struct Fts5Bm25Data { |
3550 | int nPhrase; /* Number of phrases in query */ |
3551 | double avgdl; /* Average number of tokens in each row */ |
3552 | double *aIDF; /* IDF for each phrase */ |
3553 | double *aFreq; /* Array used to calculate phrase freq. */ |
3554 | }; |
3555 | |
3556 | /* |
3557 | ** Callback used by fts5Bm25GetData() to count the number of rows in the |
3558 | ** table matched by each individual phrase within the query. |
3559 | */ |
3560 | static int fts5CountCb( |
3561 | const Fts5ExtensionApi *pApi, |
3562 | Fts5Context *pFts, |
3563 | void *pUserData /* Pointer to sqlite3_int64 variable */ |
3564 | ){ |
3565 | sqlite3_int64 *pn = (sqlite3_int64*)pUserData; |
3566 | UNUSED_PARAM2(pApi, pFts); |
3567 | (*pn)++; |
3568 | return SQLITE_OK; |
3569 | } |
3570 | |
3571 | /* |
3572 | ** Set *ppData to point to the Fts5Bm25Data object for the current query. |
3573 | ** If the object has not already been allocated, allocate and populate it |
3574 | ** now. |
3575 | */ |
3576 | static int fts5Bm25GetData( |
3577 | const Fts5ExtensionApi *pApi, |
3578 | Fts5Context *pFts, |
3579 | Fts5Bm25Data **ppData /* OUT: bm25-data object for this query */ |
3580 | ){ |
3581 | int rc = SQLITE_OK; /* Return code */ |
3582 | Fts5Bm25Data *p; /* Object to return */ |
3583 | |
3584 | p = (Fts5Bm25Data*)pApi->xGetAuxdata(pFts, 0); |
3585 | if( p==0 ){ |
3586 | int nPhrase; /* Number of phrases in query */ |
3587 | sqlite3_int64 nRow = 0; /* Number of rows in table */ |
3588 | sqlite3_int64 nToken = 0; /* Number of tokens in table */ |
3589 | sqlite3_int64 nByte; /* Bytes of space to allocate */ |
3590 | int i; |
3591 | |
3592 | /* Allocate the Fts5Bm25Data object */ |
3593 | nPhrase = pApi->xPhraseCount(pFts); |
3594 | nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double); |
3595 | p = (Fts5Bm25Data*)sqlite3_malloc64(nByte); |
3596 | if( p==0 ){ |
3597 | rc = SQLITE_NOMEM; |
3598 | }else{ |
3599 | memset(p, 0, (size_t)nByte); |
3600 | p->nPhrase = nPhrase; |
3601 | p->aIDF = (double*)&p[1]; |
3602 | p->aFreq = &p->aIDF[nPhrase]; |
3603 | } |
3604 | |
3605 | /* Calculate the average document length for this FTS5 table */ |
3606 | if( rc==SQLITE_OK ) rc = pApi->xRowCount(pFts, &nRow); |
3607 | assert( rc!=SQLITE_OK || nRow>0 ); |
3608 | if( rc==SQLITE_OK ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken); |
3609 | if( rc==SQLITE_OK ) p->avgdl = (double)nToken / (double)nRow; |
3610 | |
3611 | /* Calculate an IDF for each phrase in the query */ |
3612 | for(i=0; rc==SQLITE_OK && i<nPhrase; i++){ |
3613 | sqlite3_int64 nHit = 0; |
3614 | rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb); |
3615 | if( rc==SQLITE_OK ){ |
3616 | /* Calculate the IDF (Inverse Document Frequency) for phrase i. |
3617 | ** This is done using the standard BM25 formula as found on wikipedia: |
3618 | ** |
3619 | ** IDF = log( (N - nHit + 0.5) / (nHit + 0.5) ) |
3620 | ** |
3621 | ** where "N" is the total number of documents in the set and nHit |
3622 | ** is the number that contain at least one instance of the phrase |
3623 | ** under consideration. |
3624 | ** |
3625 | ** The problem with this is that if (N < 2*nHit), the IDF is |
3626 | ** negative. Which is undesirable. So the mimimum allowable IDF is |
3627 | ** (1e-6) - roughly the same as a term that appears in just over |
3628 | ** half of set of 5,000,000 documents. */ |
3629 | double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) ); |
3630 | if( idf<=0.0 ) idf = 1e-6; |
3631 | p->aIDF[i] = idf; |
3632 | } |
3633 | } |
3634 | |
3635 | if( rc!=SQLITE_OK ){ |
3636 | sqlite3_free(p); |
3637 | }else{ |
3638 | rc = pApi->xSetAuxdata(pFts, p, sqlite3_free); |
3639 | } |
3640 | if( rc!=SQLITE_OK ) p = 0; |
3641 | } |
3642 | *ppData = p; |
3643 | return rc; |
3644 | } |
3645 | |
3646 | /* |
3647 | ** Implementation of bm25() function. |
3648 | */ |
3649 | static void fts5Bm25Function( |
3650 | const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ |
3651 | Fts5Context *pFts, /* First arg to pass to pApi functions */ |
3652 | sqlite3_context *pCtx, /* Context for returning result/error */ |
3653 | int nVal, /* Number of values in apVal[] array */ |
3654 | sqlite3_value **apVal /* Array of trailing arguments */ |
3655 | ){ |
3656 | const double k1 = 1.2; /* Constant "k1" from BM25 formula */ |
3657 | const double b = 0.75; /* Constant "b" from BM25 formula */ |
3658 | int rc; /* Error code */ |
3659 | double score = 0.0; /* SQL function return value */ |
3660 | Fts5Bm25Data *pData; /* Values allocated/calculated once only */ |
3661 | int i; /* Iterator variable */ |
3662 | int nInst = 0; /* Value returned by xInstCount() */ |
3663 | double D = 0.0; /* Total number of tokens in row */ |
3664 | double *aFreq = 0; /* Array of phrase freq. for current row */ |
3665 | |
3666 | /* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation) |
3667 | ** for each phrase in the query for the current row. */ |
3668 | rc = fts5Bm25GetData(pApi, pFts, &pData); |
3669 | if( rc==SQLITE_OK ){ |
3670 | aFreq = pData->aFreq; |
3671 | memset(aFreq, 0, sizeof(double) * pData->nPhrase); |
3672 | rc = pApi->xInstCount(pFts, &nInst); |
3673 | } |
3674 | for(i=0; rc==SQLITE_OK && i<nInst; i++){ |
3675 | int ip; int ic; int io; |
3676 | rc = pApi->xInst(pFts, i, &ip, &ic, &io); |
3677 | if( rc==SQLITE_OK ){ |
3678 | double w = (nVal > ic) ? sqlite3_value_double(apVal[ic]) : 1.0; |
3679 | aFreq[ip] += w; |
3680 | } |
3681 | } |
3682 | |
3683 | /* Figure out the total size of the current row in tokens. */ |
3684 | if( rc==SQLITE_OK ){ |
3685 | int nTok; |
3686 | rc = pApi->xColumnSize(pFts, -1, &nTok); |
3687 | D = (double)nTok; |
3688 | } |
3689 | |
3690 | /* Determine and return the BM25 score for the current row. Or, if an |
3691 | ** error has occurred, throw an exception. */ |
3692 | if( rc==SQLITE_OK ){ |
3693 | for(i=0; i<pData->nPhrase; i++){ |
3694 | score += pData->aIDF[i] * ( |
3695 | ( aFreq[i] * (k1 + 1.0) ) / |
3696 | ( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) ) |
3697 | ); |
3698 | } |
3699 | sqlite3_result_double(pCtx, -1.0 * score); |
3700 | }else{ |
3701 | sqlite3_result_error_code(pCtx, rc); |
3702 | } |
3703 | } |
3704 | |
3705 | static int sqlite3Fts5AuxInit(fts5_api *pApi){ |
3706 | struct Builtin { |
3707 | const char *zFunc; /* Function name (nul-terminated) */ |
3708 | void *pUserData; /* User-data pointer */ |
3709 | fts5_extension_function xFunc;/* Callback function */ |
3710 | void (*xDestroy)(void*); /* Destructor function */ |
3711 | } aBuiltin [] = { |
3712 | { "snippet" , 0, fts5SnippetFunction, 0 }, |
3713 | { "highlight" , 0, fts5HighlightFunction, 0 }, |
3714 | { "bm25" , 0, fts5Bm25Function, 0 }, |
3715 | }; |
3716 | int rc = SQLITE_OK; /* Return code */ |
3717 | int i; /* To iterate through builtin functions */ |
3718 | |
3719 | for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){ |
3720 | rc = pApi->xCreateFunction(pApi, |
3721 | aBuiltin[i].zFunc, |
3722 | aBuiltin[i].pUserData, |
3723 | aBuiltin[i].xFunc, |
3724 | aBuiltin[i].xDestroy |
3725 | ); |
3726 | } |
3727 | |
3728 | return rc; |
3729 | } |
3730 | |
3731 | #line 1 "fts5_buffer.c" |
3732 | /* |
3733 | ** 2014 May 31 |
3734 | ** |
3735 | ** The author disclaims copyright to this source code. In place of |
3736 | ** a legal notice, here is a blessing: |
3737 | ** |
3738 | ** May you do good and not evil. |
3739 | ** May you find forgiveness for yourself and forgive others. |
3740 | ** May you share freely, never taking more than you give. |
3741 | ** |
3742 | ****************************************************************************** |
3743 | */ |
3744 | |
3745 | |
3746 | |
3747 | /* #include "fts5Int.h" */ |
3748 | |
3749 | static int sqlite3Fts5BufferSize(int *pRc, Fts5Buffer *pBuf, u32 nByte){ |
3750 | if( (u32)pBuf->nSpace<nByte ){ |
3751 | u64 nNew = pBuf->nSpace ? pBuf->nSpace : 64; |
3752 | u8 *pNew; |
3753 | while( nNew<nByte ){ |
3754 | nNew = nNew * 2; |
3755 | } |
3756 | pNew = sqlite3_realloc64(pBuf->p, nNew); |
3757 | if( pNew==0 ){ |
3758 | *pRc = SQLITE_NOMEM; |
3759 | return 1; |
3760 | }else{ |
3761 | pBuf->nSpace = (int)nNew; |
3762 | pBuf->p = pNew; |
3763 | } |
3764 | } |
3765 | return 0; |
3766 | } |
3767 | |
3768 | |
3769 | /* |
3770 | ** Encode value iVal as an SQLite varint and append it to the buffer object |
3771 | ** pBuf. If an OOM error occurs, set the error code in p. |
3772 | */ |
3773 | static void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){ |
3774 | if( fts5BufferGrow(pRc, pBuf, 9) ) return; |
3775 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal); |
3776 | } |
3777 | |
3778 | static void sqlite3Fts5Put32(u8 *aBuf, int iVal){ |
3779 | aBuf[0] = (iVal>>24) & 0x00FF; |
3780 | aBuf[1] = (iVal>>16) & 0x00FF; |
3781 | aBuf[2] = (iVal>> 8) & 0x00FF; |
3782 | aBuf[3] = (iVal>> 0) & 0x00FF; |
3783 | } |
3784 | |
3785 | static int sqlite3Fts5Get32(const u8 *aBuf){ |
3786 | return (int)((((u32)aBuf[0])<<24) + (aBuf[1]<<16) + (aBuf[2]<<8) + aBuf[3]); |
3787 | } |
3788 | |
3789 | /* |
3790 | ** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set |
3791 | ** the error code in p. If an error has already occurred when this function |
3792 | ** is called, it is a no-op. |
3793 | */ |
3794 | static void sqlite3Fts5BufferAppendBlob( |
3795 | int *pRc, |
3796 | Fts5Buffer *pBuf, |
3797 | u32 nData, |
3798 | const u8 *pData |
3799 | ){ |
3800 | if( nData ){ |
3801 | if( fts5BufferGrow(pRc, pBuf, nData) ) return; |
3802 | memcpy(&pBuf->p[pBuf->n], pData, nData); |
3803 | pBuf->n += nData; |
3804 | } |
3805 | } |
3806 | |
3807 | /* |
3808 | ** Append the nul-terminated string zStr to the buffer pBuf. This function |
3809 | ** ensures that the byte following the buffer data is set to 0x00, even |
3810 | ** though this byte is not included in the pBuf->n count. |
3811 | */ |
3812 | static void sqlite3Fts5BufferAppendString( |
3813 | int *pRc, |
3814 | Fts5Buffer *pBuf, |
3815 | const char *zStr |
3816 | ){ |
3817 | int nStr = (int)strlen(zStr); |
3818 | sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr); |
3819 | pBuf->n--; |
3820 | } |
3821 | |
3822 | /* |
3823 | ** Argument zFmt is a printf() style format string. This function performs |
3824 | ** the printf() style processing, then appends the results to buffer pBuf. |
3825 | ** |
3826 | ** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte |
3827 | ** following the buffer data is set to 0x00, even though this byte is not |
3828 | ** included in the pBuf->n count. |
3829 | */ |
3830 | static void sqlite3Fts5BufferAppendPrintf( |
3831 | int *pRc, |
3832 | Fts5Buffer *pBuf, |
3833 | char *zFmt, ... |
3834 | ){ |
3835 | if( *pRc==SQLITE_OK ){ |
3836 | char *zTmp; |
3837 | va_list ap; |
3838 | va_start(ap, zFmt); |
3839 | zTmp = sqlite3_vmprintf(zFmt, ap); |
3840 | va_end(ap); |
3841 | |
3842 | if( zTmp==0 ){ |
3843 | *pRc = SQLITE_NOMEM; |
3844 | }else{ |
3845 | sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp); |
3846 | sqlite3_free(zTmp); |
3847 | } |
3848 | } |
3849 | } |
3850 | |
3851 | static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){ |
3852 | char *zRet = 0; |
3853 | if( *pRc==SQLITE_OK ){ |
3854 | va_list ap; |
3855 | va_start(ap, zFmt); |
3856 | zRet = sqlite3_vmprintf(zFmt, ap); |
3857 | va_end(ap); |
3858 | if( zRet==0 ){ |
3859 | *pRc = SQLITE_NOMEM; |
3860 | } |
3861 | } |
3862 | return zRet; |
3863 | } |
3864 | |
3865 | |
3866 | /* |
3867 | ** Free any buffer allocated by pBuf. Zero the structure before returning. |
3868 | */ |
3869 | static void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){ |
3870 | sqlite3_free(pBuf->p); |
3871 | memset(pBuf, 0, sizeof(Fts5Buffer)); |
3872 | } |
3873 | |
3874 | /* |
3875 | ** Zero the contents of the buffer object. But do not free the associated |
3876 | ** memory allocation. |
3877 | */ |
3878 | static void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){ |
3879 | pBuf->n = 0; |
3880 | } |
3881 | |
3882 | /* |
3883 | ** Set the buffer to contain nData/pData. If an OOM error occurs, leave an |
3884 | ** the error code in p. If an error has already occurred when this function |
3885 | ** is called, it is a no-op. |
3886 | */ |
3887 | static void sqlite3Fts5BufferSet( |
3888 | int *pRc, |
3889 | Fts5Buffer *pBuf, |
3890 | int nData, |
3891 | const u8 *pData |
3892 | ){ |
3893 | pBuf->n = 0; |
3894 | sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData); |
3895 | } |
3896 | |
3897 | static int sqlite3Fts5PoslistNext64( |
3898 | const u8 *a, int n, /* Buffer containing poslist */ |
3899 | int *pi, /* IN/OUT: Offset within a[] */ |
3900 | i64 *piOff /* IN/OUT: Current offset */ |
3901 | ){ |
3902 | int i = *pi; |
3903 | if( i>=n ){ |
3904 | /* EOF */ |
3905 | *piOff = -1; |
3906 | return 1; |
3907 | }else{ |
3908 | i64 iOff = *piOff; |
3909 | u32 iVal; |
3910 | fts5FastGetVarint32(a, i, iVal); |
3911 | if( iVal<=1 ){ |
3912 | if( iVal==0 ){ |
3913 | *pi = i; |
3914 | return 0; |
3915 | } |
3916 | fts5FastGetVarint32(a, i, iVal); |
3917 | iOff = ((i64)iVal) << 32; |
3918 | assert( iOff>=0 ); |
3919 | fts5FastGetVarint32(a, i, iVal); |
3920 | if( iVal<2 ){ |
3921 | /* This is a corrupt record. So stop parsing it here. */ |
3922 | *piOff = -1; |
3923 | return 1; |
3924 | } |
3925 | *piOff = iOff + ((iVal-2) & 0x7FFFFFFF); |
3926 | }else{ |
3927 | *piOff = (iOff & (i64)0x7FFFFFFF<<32)+((iOff + (iVal-2)) & 0x7FFFFFFF); |
3928 | } |
3929 | *pi = i; |
3930 | assert_nc( *piOff>=iOff ); |
3931 | return 0; |
3932 | } |
3933 | } |
3934 | |
3935 | |
3936 | /* |
3937 | ** Advance the iterator object passed as the only argument. Return true |
3938 | ** if the iterator reaches EOF, or false otherwise. |
3939 | */ |
3940 | static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){ |
3941 | if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) ){ |
3942 | pIter->bEof = 1; |
3943 | } |
3944 | return pIter->bEof; |
3945 | } |
3946 | |
3947 | static int sqlite3Fts5PoslistReaderInit( |
3948 | const u8 *a, int n, /* Poslist buffer to iterate through */ |
3949 | Fts5PoslistReader *pIter /* Iterator object to initialize */ |
3950 | ){ |
3951 | memset(pIter, 0, sizeof(*pIter)); |
3952 | pIter->a = a; |
3953 | pIter->n = n; |
3954 | sqlite3Fts5PoslistReaderNext(pIter); |
3955 | return pIter->bEof; |
3956 | } |
3957 | |
3958 | /* |
3959 | ** Append position iPos to the position list being accumulated in buffer |
3960 | ** pBuf, which must be already be large enough to hold the new data. |
3961 | ** The previous position written to this list is *piPrev. *piPrev is set |
3962 | ** to iPos before returning. |
3963 | */ |
3964 | static void sqlite3Fts5PoslistSafeAppend( |
3965 | Fts5Buffer *pBuf, |
3966 | i64 *piPrev, |
3967 | i64 iPos |
3968 | ){ |
3969 | if( iPos>=*piPrev ){ |
3970 | static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32; |
3971 | if( (iPos & colmask) != (*piPrev & colmask) ){ |
3972 | pBuf->p[pBuf->n++] = 1; |
3973 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos>>32)); |
3974 | *piPrev = (iPos & colmask); |
3975 | } |
3976 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos-*piPrev)+2); |
3977 | *piPrev = iPos; |
3978 | } |
3979 | } |
3980 | |
3981 | static int sqlite3Fts5PoslistWriterAppend( |
3982 | Fts5Buffer *pBuf, |
3983 | Fts5PoslistWriter *pWriter, |
3984 | i64 iPos |
3985 | ){ |
3986 | int rc = 0; /* Initialized only to suppress erroneous warning from Clang */ |
3987 | if( fts5BufferGrow(&rc, pBuf, 5+5+5) ) return rc; |
3988 | sqlite3Fts5PoslistSafeAppend(pBuf, &pWriter->iPrev, iPos); |
3989 | return SQLITE_OK; |
3990 | } |
3991 | |
3992 | static void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte){ |
3993 | void *pRet = 0; |
3994 | if( *pRc==SQLITE_OK ){ |
3995 | pRet = sqlite3_malloc64(nByte); |
3996 | if( pRet==0 ){ |
3997 | if( nByte>0 ) *pRc = SQLITE_NOMEM; |
3998 | }else{ |
3999 | memset(pRet, 0, (size_t)nByte); |
4000 | } |
4001 | } |
4002 | return pRet; |
4003 | } |
4004 | |
4005 | /* |
4006 | ** Return a nul-terminated copy of the string indicated by pIn. If nIn |
4007 | ** is non-negative, then it is the length of the string in bytes. Otherwise, |
4008 | ** the length of the string is determined using strlen(). |
4009 | ** |
4010 | ** It is the responsibility of the caller to eventually free the returned |
4011 | ** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned. |
4012 | */ |
4013 | static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){ |
4014 | char *zRet = 0; |
4015 | if( *pRc==SQLITE_OK ){ |
4016 | if( nIn<0 ){ |
4017 | nIn = (int)strlen(pIn); |
4018 | } |
4019 | zRet = (char*)sqlite3_malloc(nIn+1); |
4020 | if( zRet ){ |
4021 | memcpy(zRet, pIn, nIn); |
4022 | zRet[nIn] = '\0'; |
4023 | }else{ |
4024 | *pRc = SQLITE_NOMEM; |
4025 | } |
4026 | } |
4027 | return zRet; |
4028 | } |
4029 | |
4030 | |
4031 | /* |
4032 | ** Return true if character 't' may be part of an FTS5 bareword, or false |
4033 | ** otherwise. Characters that may be part of barewords: |
4034 | ** |
4035 | ** * All non-ASCII characters, |
4036 | ** * The 52 upper and lower case ASCII characters, and |
4037 | ** * The 10 integer ASCII characters. |
4038 | ** * The underscore character "_" (0x5F). |
4039 | ** * The unicode "subsitute" character (0x1A). |
4040 | */ |
4041 | static int sqlite3Fts5IsBareword(char t){ |
4042 | u8 aBareword[128] = { |
4043 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 .. 0x0F */ |
4044 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x10 .. 0x1F */ |
4045 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 .. 0x2F */ |
4046 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30 .. 0x3F */ |
4047 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 .. 0x4F */ |
4048 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 .. 0x5F */ |
4049 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 .. 0x6F */ |
4050 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70 .. 0x7F */ |
4051 | }; |
4052 | |
4053 | return (t & 0x80) || aBareword[(int)t]; |
4054 | } |
4055 | |
4056 | |
4057 | /************************************************************************* |
4058 | */ |
4059 | typedef struct Fts5TermsetEntry Fts5TermsetEntry; |
4060 | struct Fts5TermsetEntry { |
4061 | char *pTerm; |
4062 | int nTerm; |
4063 | int iIdx; /* Index (main or aPrefix[] entry) */ |
4064 | Fts5TermsetEntry *pNext; |
4065 | }; |
4066 | |
4067 | struct Fts5Termset { |
4068 | Fts5TermsetEntry *apHash[512]; |
4069 | }; |
4070 | |
4071 | static int sqlite3Fts5TermsetNew(Fts5Termset **pp){ |
4072 | int rc = SQLITE_OK; |
4073 | *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset)); |
4074 | return rc; |
4075 | } |
4076 | |
4077 | static int sqlite3Fts5TermsetAdd( |
4078 | Fts5Termset *p, |
4079 | int iIdx, |
4080 | const char *pTerm, int nTerm, |
4081 | int *pbPresent |
4082 | ){ |
4083 | int rc = SQLITE_OK; |
4084 | *pbPresent = 0; |
4085 | if( p ){ |
4086 | int i; |
4087 | u32 hash = 13; |
4088 | Fts5TermsetEntry *pEntry; |
4089 | |
4090 | /* Calculate a hash value for this term. This is the same hash checksum |
4091 | ** used by the fts5_hash.c module. This is not important for correct |
4092 | ** operation of the module, but is necessary to ensure that some tests |
4093 | ** designed to produce hash table collisions really do work. */ |
4094 | for(i=nTerm-1; i>=0; i--){ |
4095 | hash = (hash << 3) ^ hash ^ pTerm[i]; |
4096 | } |
4097 | hash = (hash << 3) ^ hash ^ iIdx; |
4098 | hash = hash % ArraySize(p->apHash); |
4099 | |
4100 | for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){ |
4101 | if( pEntry->iIdx==iIdx |
4102 | && pEntry->nTerm==nTerm |
4103 | && memcmp(pEntry->pTerm, pTerm, nTerm)==0 |
4104 | ){ |
4105 | *pbPresent = 1; |
4106 | break; |
4107 | } |
4108 | } |
4109 | |
4110 | if( pEntry==0 ){ |
4111 | pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm); |
4112 | if( pEntry ){ |
4113 | pEntry->pTerm = (char*)&pEntry[1]; |
4114 | pEntry->nTerm = nTerm; |
4115 | pEntry->iIdx = iIdx; |
4116 | memcpy(pEntry->pTerm, pTerm, nTerm); |
4117 | pEntry->pNext = p->apHash[hash]; |
4118 | p->apHash[hash] = pEntry; |
4119 | } |
4120 | } |
4121 | } |
4122 | |
4123 | return rc; |
4124 | } |
4125 | |
4126 | static void sqlite3Fts5TermsetFree(Fts5Termset *p){ |
4127 | if( p ){ |
4128 | u32 i; |
4129 | for(i=0; i<ArraySize(p->apHash); i++){ |
4130 | Fts5TermsetEntry *pEntry = p->apHash[i]; |
4131 | while( pEntry ){ |
4132 | Fts5TermsetEntry *pDel = pEntry; |
4133 | pEntry = pEntry->pNext; |
4134 | sqlite3_free(pDel); |
4135 | } |
4136 | } |
4137 | sqlite3_free(p); |
4138 | } |
4139 | } |
4140 | |
4141 | #line 1 "fts5_config.c" |
4142 | /* |
4143 | ** 2014 Jun 09 |
4144 | ** |
4145 | ** The author disclaims copyright to this source code. In place of |
4146 | ** a legal notice, here is a blessing: |
4147 | ** |
4148 | ** May you do good and not evil. |
4149 | ** May you find forgiveness for yourself and forgive others. |
4150 | ** May you share freely, never taking more than you give. |
4151 | ** |
4152 | ****************************************************************************** |
4153 | ** |
4154 | ** This is an SQLite module implementing full-text search. |
4155 | */ |
4156 | |
4157 | |
4158 | /* #include "fts5Int.h" */ |
4159 | |
4160 | #define FTS5_DEFAULT_PAGE_SIZE 4050 |
4161 | #define FTS5_DEFAULT_AUTOMERGE 4 |
4162 | #define FTS5_DEFAULT_USERMERGE 4 |
4163 | #define FTS5_DEFAULT_CRISISMERGE 16 |
4164 | #define FTS5_DEFAULT_HASHSIZE (1024*1024) |
4165 | |
4166 | /* Maximum allowed page size */ |
4167 | #define FTS5_MAX_PAGE_SIZE (64*1024) |
4168 | |
4169 | static int fts5_iswhitespace(char x){ |
4170 | return (x==' '); |
4171 | } |
4172 | |
4173 | static int fts5_isopenquote(char x){ |
4174 | return (x=='"' || x=='\'' || x=='[' || x=='`'); |
4175 | } |
4176 | |
4177 | /* |
4178 | ** Argument pIn points to a character that is part of a nul-terminated |
4179 | ** string. Return a pointer to the first character following *pIn in |
4180 | ** the string that is not a white-space character. |
4181 | */ |
4182 | static const char *fts5ConfigSkipWhitespace(const char *pIn){ |
4183 | const char *p = pIn; |
4184 | if( p ){ |
4185 | while( fts5_iswhitespace(*p) ){ p++; } |
4186 | } |
4187 | return p; |
4188 | } |
4189 | |
4190 | /* |
4191 | ** Argument pIn points to a character that is part of a nul-terminated |
4192 | ** string. Return a pointer to the first character following *pIn in |
4193 | ** the string that is not a "bareword" character. |
4194 | */ |
4195 | static const char *fts5ConfigSkipBareword(const char *pIn){ |
4196 | const char *p = pIn; |
4197 | while ( sqlite3Fts5IsBareword(*p) ) p++; |
4198 | if( p==pIn ) p = 0; |
4199 | return p; |
4200 | } |
4201 | |
4202 | static int fts5_isdigit(char a){ |
4203 | return (a>='0' && a<='9'); |
4204 | } |
4205 | |
4206 | |
4207 | |
4208 | static const char *fts5ConfigSkipLiteral(const char *pIn){ |
4209 | const char *p = pIn; |
4210 | switch( *p ){ |
4211 | case 'n': case 'N': |
4212 | if( sqlite3_strnicmp("null" , p, 4)==0 ){ |
4213 | p = &p[4]; |
4214 | }else{ |
4215 | p = 0; |
4216 | } |
4217 | break; |
4218 | |
4219 | case 'x': case 'X': |
4220 | p++; |
4221 | if( *p=='\'' ){ |
4222 | p++; |
4223 | while( (*p>='a' && *p<='f') |
4224 | || (*p>='A' && *p<='F') |
4225 | || (*p>='0' && *p<='9') |
4226 | ){ |
4227 | p++; |
4228 | } |
4229 | if( *p=='\'' && 0==((p-pIn)%2) ){ |
4230 | p++; |
4231 | }else{ |
4232 | p = 0; |
4233 | } |
4234 | }else{ |
4235 | p = 0; |
4236 | } |
4237 | break; |
4238 | |
4239 | case '\'': |
4240 | p++; |
4241 | while( p ){ |
4242 | if( *p=='\'' ){ |
4243 | p++; |
4244 | if( *p!='\'' ) break; |
4245 | } |
4246 | p++; |
4247 | if( *p==0 ) p = 0; |
4248 | } |
4249 | break; |
4250 | |
4251 | default: |
4252 | /* maybe a number */ |
4253 | if( *p=='+' || *p=='-' ) p++; |
4254 | while( fts5_isdigit(*p) ) p++; |
4255 | |
4256 | /* At this point, if the literal was an integer, the parse is |
4257 | ** finished. Or, if it is a floating point value, it may continue |
4258 | ** with either a decimal point or an 'E' character. */ |
4259 | if( *p=='.' && fts5_isdigit(p[1]) ){ |
4260 | p += 2; |
4261 | while( fts5_isdigit(*p) ) p++; |
4262 | } |
4263 | if( p==pIn ) p = 0; |
4264 | |
4265 | break; |
4266 | } |
4267 | |
4268 | return p; |
4269 | } |
4270 | |
4271 | /* |
4272 | ** The first character of the string pointed to by argument z is guaranteed |
4273 | ** to be an open-quote character (see function fts5_isopenquote()). |
4274 | ** |
4275 | ** This function searches for the corresponding close-quote character within |
4276 | ** the string and, if found, dequotes the string in place and adds a new |
4277 | ** nul-terminator byte. |
4278 | ** |
4279 | ** If the close-quote is found, the value returned is the byte offset of |
4280 | ** the character immediately following it. Or, if the close-quote is not |
4281 | ** found, -1 is returned. If -1 is returned, the buffer is left in an |
4282 | ** undefined state. |
4283 | */ |
4284 | static int fts5Dequote(char *z){ |
4285 | char q; |
4286 | int iIn = 1; |
4287 | int iOut = 0; |
4288 | q = z[0]; |
4289 | |
4290 | /* Set stack variable q to the close-quote character */ |
4291 | assert( q=='[' || q=='\'' || q=='"' || q=='`' ); |
4292 | if( q=='[' ) q = ']'; |
4293 | |
4294 | while( z[iIn] ){ |
4295 | if( z[iIn]==q ){ |
4296 | if( z[iIn+1]!=q ){ |
4297 | /* Character iIn was the close quote. */ |
4298 | iIn++; |
4299 | break; |
4300 | }else{ |
4301 | /* Character iIn and iIn+1 form an escaped quote character. Skip |
4302 | ** the input cursor past both and copy a single quote character |
4303 | ** to the output buffer. */ |
4304 | iIn += 2; |
4305 | z[iOut++] = q; |
4306 | } |
4307 | }else{ |
4308 | z[iOut++] = z[iIn++]; |
4309 | } |
4310 | } |
4311 | |
4312 | z[iOut] = '\0'; |
4313 | return iIn; |
4314 | } |
4315 | |
4316 | /* |
4317 | ** Convert an SQL-style quoted string into a normal string by removing |
4318 | ** the quote characters. The conversion is done in-place. If the |
4319 | ** input does not begin with a quote character, then this routine |
4320 | ** is a no-op. |
4321 | ** |
4322 | ** Examples: |
4323 | ** |
4324 | ** "abc" becomes abc |
4325 | ** 'xyz' becomes xyz |
4326 | ** [pqr] becomes pqr |
4327 | ** `mno` becomes mno |
4328 | */ |
4329 | static void sqlite3Fts5Dequote(char *z){ |
4330 | char quote; /* Quote character (if any ) */ |
4331 | |
4332 | assert( 0==fts5_iswhitespace(z[0]) ); |
4333 | quote = z[0]; |
4334 | if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){ |
4335 | fts5Dequote(z); |
4336 | } |
4337 | } |
4338 | |
4339 | |
4340 | struct Fts5Enum { |
4341 | const char *zName; |
4342 | int eVal; |
4343 | }; |
4344 | typedef struct Fts5Enum Fts5Enum; |
4345 | |
4346 | static int fts5ConfigSetEnum( |
4347 | const Fts5Enum *aEnum, |
4348 | const char *zEnum, |
4349 | int *peVal |
4350 | ){ |
4351 | int nEnum = (int)strlen(zEnum); |
4352 | int i; |
4353 | int iVal = -1; |
4354 | |
4355 | for(i=0; aEnum[i].zName; i++){ |
4356 | if( sqlite3_strnicmp(aEnum[i].zName, zEnum, nEnum)==0 ){ |
4357 | if( iVal>=0 ) return SQLITE_ERROR; |
4358 | iVal = aEnum[i].eVal; |
4359 | } |
4360 | } |
4361 | |
4362 | *peVal = iVal; |
4363 | return iVal<0 ? SQLITE_ERROR : SQLITE_OK; |
4364 | } |
4365 | |
4366 | /* |
4367 | ** Parse a "special" CREATE VIRTUAL TABLE directive and update |
4368 | ** configuration object pConfig as appropriate. |
4369 | ** |
4370 | ** If successful, object pConfig is updated and SQLITE_OK returned. If |
4371 | ** an error occurs, an SQLite error code is returned and an error message |
4372 | ** may be left in *pzErr. It is the responsibility of the caller to |
4373 | ** eventually free any such error message using sqlite3_free(). |
4374 | */ |
4375 | static int fts5ConfigParseSpecial( |
4376 | Fts5Global *pGlobal, |
4377 | Fts5Config *pConfig, /* Configuration object to update */ |
4378 | const char *zCmd, /* Special command to parse */ |
4379 | const char *zArg, /* Argument to parse */ |
4380 | char **pzErr /* OUT: Error message */ |
4381 | ){ |
4382 | int rc = SQLITE_OK; |
4383 | int nCmd = (int)strlen(zCmd); |
4384 | if( sqlite3_strnicmp("prefix" , zCmd, nCmd)==0 ){ |
4385 | const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES; |
4386 | const char *p; |
4387 | int bFirst = 1; |
4388 | if( pConfig->aPrefix==0 ){ |
4389 | pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte); |
4390 | if( rc ) return rc; |
4391 | } |
4392 | |
4393 | p = zArg; |
4394 | while( 1 ){ |
4395 | int nPre = 0; |
4396 | |
4397 | while( p[0]==' ' ) p++; |
4398 | if( bFirst==0 && p[0]==',' ){ |
4399 | p++; |
4400 | while( p[0]==' ' ) p++; |
4401 | }else if( p[0]=='\0' ){ |
4402 | break; |
4403 | } |
4404 | if( p[0]<'0' || p[0]>'9' ){ |
4405 | *pzErr = sqlite3_mprintf("malformed prefix=... directive" ); |
4406 | rc = SQLITE_ERROR; |
4407 | break; |
4408 | } |
4409 | |
4410 | if( pConfig->nPrefix==FTS5_MAX_PREFIX_INDEXES ){ |
4411 | *pzErr = sqlite3_mprintf( |
4412 | "too many prefix indexes (max %d)" , FTS5_MAX_PREFIX_INDEXES |
4413 | ); |
4414 | rc = SQLITE_ERROR; |
4415 | break; |
4416 | } |
4417 | |
4418 | while( p[0]>='0' && p[0]<='9' && nPre<1000 ){ |
4419 | nPre = nPre*10 + (p[0] - '0'); |
4420 | p++; |
4421 | } |
4422 | |
4423 | if( nPre<=0 || nPre>=1000 ){ |
4424 | *pzErr = sqlite3_mprintf("prefix length out of range (max 999)" ); |
4425 | rc = SQLITE_ERROR; |
4426 | break; |
4427 | } |
4428 | |
4429 | pConfig->aPrefix[pConfig->nPrefix] = nPre; |
4430 | pConfig->nPrefix++; |
4431 | bFirst = 0; |
4432 | } |
4433 | assert( pConfig->nPrefix<=FTS5_MAX_PREFIX_INDEXES ); |
4434 | return rc; |
4435 | } |
4436 | |
4437 | if( sqlite3_strnicmp("tokenize" , zCmd, nCmd)==0 ){ |
4438 | const char *p = (const char*)zArg; |
4439 | sqlite3_int64 nArg = strlen(zArg) + 1; |
4440 | char **azArg = sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg); |
4441 | char *pDel = sqlite3Fts5MallocZero(&rc, nArg * 2); |
4442 | char *pSpace = pDel; |
4443 | |
4444 | if( azArg && pSpace ){ |
4445 | if( pConfig->pTok ){ |
4446 | *pzErr = sqlite3_mprintf("multiple tokenize=... directives" ); |
4447 | rc = SQLITE_ERROR; |
4448 | }else{ |
4449 | for(nArg=0; p && *p; nArg++){ |
4450 | const char *p2 = fts5ConfigSkipWhitespace(p); |
4451 | if( *p2=='\'' ){ |
4452 | p = fts5ConfigSkipLiteral(p2); |
4453 | }else{ |
4454 | p = fts5ConfigSkipBareword(p2); |
4455 | } |
4456 | if( p ){ |
4457 | memcpy(pSpace, p2, p-p2); |
4458 | azArg[nArg] = pSpace; |
4459 | sqlite3Fts5Dequote(pSpace); |
4460 | pSpace += (p - p2) + 1; |
4461 | p = fts5ConfigSkipWhitespace(p); |
4462 | } |
4463 | } |
4464 | if( p==0 ){ |
4465 | *pzErr = sqlite3_mprintf("parse error in tokenize directive" ); |
4466 | rc = SQLITE_ERROR; |
4467 | }else{ |
4468 | rc = sqlite3Fts5GetTokenizer(pGlobal, |
4469 | (const char**)azArg, (int)nArg, pConfig, |
4470 | pzErr |
4471 | ); |
4472 | } |
4473 | } |
4474 | } |
4475 | |
4476 | sqlite3_free(azArg); |
4477 | sqlite3_free(pDel); |
4478 | return rc; |
4479 | } |
4480 | |
4481 | if( sqlite3_strnicmp("content" , zCmd, nCmd)==0 ){ |
4482 | if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){ |
4483 | *pzErr = sqlite3_mprintf("multiple content=... directives" ); |
4484 | rc = SQLITE_ERROR; |
4485 | }else{ |
4486 | if( zArg[0] ){ |
4487 | pConfig->eContent = FTS5_CONTENT_EXTERNAL; |
4488 | pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q" , pConfig->zDb,zArg); |
4489 | }else{ |
4490 | pConfig->eContent = FTS5_CONTENT_NONE; |
4491 | } |
4492 | } |
4493 | return rc; |
4494 | } |
4495 | |
4496 | if( sqlite3_strnicmp("content_rowid" , zCmd, nCmd)==0 ){ |
4497 | if( pConfig->zContentRowid ){ |
4498 | *pzErr = sqlite3_mprintf("multiple content_rowid=... directives" ); |
4499 | rc = SQLITE_ERROR; |
4500 | }else{ |
4501 | pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1); |
4502 | } |
4503 | return rc; |
4504 | } |
4505 | |
4506 | if( sqlite3_strnicmp("columnsize" , zCmd, nCmd)==0 ){ |
4507 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ |
4508 | *pzErr = sqlite3_mprintf("malformed columnsize=... directive" ); |
4509 | rc = SQLITE_ERROR; |
4510 | }else{ |
4511 | pConfig->bColumnsize = (zArg[0]=='1'); |
4512 | } |
4513 | return rc; |
4514 | } |
4515 | |
4516 | if( sqlite3_strnicmp("detail" , zCmd, nCmd)==0 ){ |
4517 | const Fts5Enum aDetail[] = { |
4518 | { "none" , FTS5_DETAIL_NONE }, |
4519 | { "full" , FTS5_DETAIL_FULL }, |
4520 | { "columns" , FTS5_DETAIL_COLUMNS }, |
4521 | { 0, 0 } |
4522 | }; |
4523 | |
4524 | if( (rc = fts5ConfigSetEnum(aDetail, zArg, &pConfig->eDetail)) ){ |
4525 | *pzErr = sqlite3_mprintf("malformed detail=... directive" ); |
4526 | } |
4527 | return rc; |
4528 | } |
4529 | |
4530 | *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"" , nCmd, zCmd); |
4531 | return SQLITE_ERROR; |
4532 | } |
4533 | |
4534 | /* |
4535 | ** Allocate an instance of the default tokenizer ("simple") at |
4536 | ** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error |
4537 | ** code if an error occurs. |
4538 | */ |
4539 | static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){ |
4540 | assert( pConfig->pTok==0 && pConfig->pTokApi==0 ); |
4541 | return sqlite3Fts5GetTokenizer(pGlobal, 0, 0, pConfig, 0); |
4542 | } |
4543 | |
4544 | /* |
4545 | ** Gobble up the first bareword or quoted word from the input buffer zIn. |
4546 | ** Return a pointer to the character immediately following the last in |
4547 | ** the gobbled word if successful, or a NULL pointer otherwise (failed |
4548 | ** to find close-quote character). |
4549 | ** |
4550 | ** Before returning, set pzOut to point to a new buffer containing a |
4551 | ** nul-terminated, dequoted copy of the gobbled word. If the word was |
4552 | ** quoted, *pbQuoted is also set to 1 before returning. |
4553 | ** |
4554 | ** If *pRc is other than SQLITE_OK when this function is called, it is |
4555 | ** a no-op (NULL is returned). Otherwise, if an OOM occurs within this |
4556 | ** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not* |
4557 | ** set if a parse error (failed to find close quote) occurs. |
4558 | */ |
4559 | static const char *fts5ConfigGobbleWord( |
4560 | int *pRc, /* IN/OUT: Error code */ |
4561 | const char *zIn, /* Buffer to gobble string/bareword from */ |
4562 | char **pzOut, /* OUT: malloc'd buffer containing str/bw */ |
4563 | int *pbQuoted /* OUT: Set to true if dequoting required */ |
4564 | ){ |
4565 | const char *zRet = 0; |
4566 | |
4567 | sqlite3_int64 nIn = strlen(zIn); |
4568 | char *zOut = sqlite3_malloc64(nIn+1); |
4569 | |
4570 | assert( *pRc==SQLITE_OK ); |
4571 | *pbQuoted = 0; |
4572 | *pzOut = 0; |
4573 | |
4574 | if( zOut==0 ){ |
4575 | *pRc = SQLITE_NOMEM; |
4576 | }else{ |
4577 | memcpy(zOut, zIn, (size_t)(nIn+1)); |
4578 | if( fts5_isopenquote(zOut[0]) ){ |
4579 | int ii = fts5Dequote(zOut); |
4580 | zRet = &zIn[ii]; |
4581 | *pbQuoted = 1; |
4582 | }else{ |
4583 | zRet = fts5ConfigSkipBareword(zIn); |
4584 | if( zRet ){ |
4585 | zOut[zRet-zIn] = '\0'; |
4586 | } |
4587 | } |
4588 | } |
4589 | |
4590 | if( zRet==0 ){ |
4591 | sqlite3_free(zOut); |
4592 | }else{ |
4593 | *pzOut = zOut; |
4594 | } |
4595 | |
4596 | return zRet; |
4597 | } |
4598 | |
4599 | static int fts5ConfigParseColumn( |
4600 | Fts5Config *p, |
4601 | char *zCol, |
4602 | char *zArg, |
4603 | char **pzErr |
4604 | ){ |
4605 | int rc = SQLITE_OK; |
4606 | if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME) |
4607 | || 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME) |
4608 | ){ |
4609 | *pzErr = sqlite3_mprintf("reserved fts5 column name: %s" , zCol); |
4610 | rc = SQLITE_ERROR; |
4611 | }else if( zArg ){ |
4612 | if( 0==sqlite3_stricmp(zArg, "unindexed" ) ){ |
4613 | p->abUnindexed[p->nCol] = 1; |
4614 | }else{ |
4615 | *pzErr = sqlite3_mprintf("unrecognized column option: %s" , zArg); |
4616 | rc = SQLITE_ERROR; |
4617 | } |
4618 | } |
4619 | |
4620 | p->azCol[p->nCol++] = zCol; |
4621 | return rc; |
4622 | } |
4623 | |
4624 | /* |
4625 | ** Populate the Fts5Config.zContentExprlist string. |
4626 | */ |
4627 | static int fts5ConfigMakeExprlist(Fts5Config *p){ |
4628 | int i; |
4629 | int rc = SQLITE_OK; |
4630 | Fts5Buffer buf = {0, 0, 0}; |
4631 | |
4632 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q" , p->zContentRowid); |
4633 | if( p->eContent!=FTS5_CONTENT_NONE ){ |
4634 | for(i=0; i<p->nCol; i++){ |
4635 | if( p->eContent==FTS5_CONTENT_EXTERNAL ){ |
4636 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q" , p->azCol[i]); |
4637 | }else{ |
4638 | sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d" , i); |
4639 | } |
4640 | } |
4641 | } |
4642 | |
4643 | assert( p->zContentExprlist==0 ); |
4644 | p->zContentExprlist = (char*)buf.p; |
4645 | return rc; |
4646 | } |
4647 | |
4648 | /* |
4649 | ** Arguments nArg/azArg contain the string arguments passed to the xCreate |
4650 | ** or xConnect method of the virtual table. This function attempts to |
4651 | ** allocate an instance of Fts5Config containing the results of parsing |
4652 | ** those arguments. |
4653 | ** |
4654 | ** If successful, SQLITE_OK is returned and *ppOut is set to point to the |
4655 | ** new Fts5Config object. If an error occurs, an SQLite error code is |
4656 | ** returned, *ppOut is set to NULL and an error message may be left in |
4657 | ** *pzErr. It is the responsibility of the caller to eventually free any |
4658 | ** such error message using sqlite3_free(). |
4659 | */ |
4660 | static int sqlite3Fts5ConfigParse( |
4661 | Fts5Global *pGlobal, |
4662 | sqlite3 *db, |
4663 | int nArg, /* Number of arguments */ |
4664 | const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */ |
4665 | Fts5Config **ppOut, /* OUT: Results of parse */ |
4666 | char **pzErr /* OUT: Error message */ |
4667 | ){ |
4668 | int rc = SQLITE_OK; /* Return code */ |
4669 | Fts5Config *pRet; /* New object to return */ |
4670 | int i; |
4671 | sqlite3_int64 nByte; |
4672 | |
4673 | *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config)); |
4674 | if( pRet==0 ) return SQLITE_NOMEM; |
4675 | memset(pRet, 0, sizeof(Fts5Config)); |
4676 | pRet->db = db; |
4677 | pRet->iCookie = -1; |
4678 | |
4679 | nByte = nArg * (sizeof(char*) + sizeof(u8)); |
4680 | pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte); |
4681 | pRet->abUnindexed = pRet->azCol ? (u8*)&pRet->azCol[nArg] : 0; |
4682 | pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1); |
4683 | pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1); |
4684 | pRet->bColumnsize = 1; |
4685 | pRet->eDetail = FTS5_DETAIL_FULL; |
4686 | #ifdef SQLITE_DEBUG |
4687 | pRet->bPrefixIndex = 1; |
4688 | #endif |
4689 | if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ |
4690 | *pzErr = sqlite3_mprintf("reserved fts5 table name: %s" , pRet->zName); |
4691 | rc = SQLITE_ERROR; |
4692 | } |
4693 | |
4694 | for(i=3; rc==SQLITE_OK && i<nArg; i++){ |
4695 | const char *zOrig = azArg[i]; |
4696 | const char *z; |
4697 | char *zOne = 0; |
4698 | char *zTwo = 0; |
4699 | int bOption = 0; |
4700 | int bMustBeCol = 0; |
4701 | |
4702 | z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol); |
4703 | z = fts5ConfigSkipWhitespace(z); |
4704 | if( z && *z=='=' ){ |
4705 | bOption = 1; |
4706 | assert( zOne!=0 ); |
4707 | z++; |
4708 | if( bMustBeCol ) z = 0; |
4709 | } |
4710 | z = fts5ConfigSkipWhitespace(z); |
4711 | if( z && z[0] ){ |
4712 | int bDummy; |
4713 | z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy); |
4714 | if( z && z[0] ) z = 0; |
4715 | } |
4716 | |
4717 | if( rc==SQLITE_OK ){ |
4718 | if( z==0 ){ |
4719 | *pzErr = sqlite3_mprintf("parse error in \"%s\"" , zOrig); |
4720 | rc = SQLITE_ERROR; |
4721 | }else{ |
4722 | if( bOption ){ |
4723 | rc = fts5ConfigParseSpecial(pGlobal, pRet, |
4724 | ALWAYS(zOne)?zOne:"" , |
4725 | zTwo?zTwo:"" , |
4726 | pzErr |
4727 | ); |
4728 | }else{ |
4729 | rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr); |
4730 | zOne = 0; |
4731 | } |
4732 | } |
4733 | } |
4734 | |
4735 | sqlite3_free(zOne); |
4736 | sqlite3_free(zTwo); |
4737 | } |
4738 | |
4739 | /* If a tokenizer= option was successfully parsed, the tokenizer has |
4740 | ** already been allocated. Otherwise, allocate an instance of the default |
4741 | ** tokenizer (unicode61) now. */ |
4742 | if( rc==SQLITE_OK && pRet->pTok==0 ){ |
4743 | rc = fts5ConfigDefaultTokenizer(pGlobal, pRet); |
4744 | } |
4745 | |
4746 | /* If no zContent option was specified, fill in the default values. */ |
4747 | if( rc==SQLITE_OK && pRet->zContent==0 ){ |
4748 | const char *zTail = 0; |
4749 | assert( pRet->eContent==FTS5_CONTENT_NORMAL |
4750 | || pRet->eContent==FTS5_CONTENT_NONE |
4751 | ); |
4752 | if( pRet->eContent==FTS5_CONTENT_NORMAL ){ |
4753 | zTail = "content" ; |
4754 | }else if( pRet->bColumnsize ){ |
4755 | zTail = "docsize" ; |
4756 | } |
4757 | |
4758 | if( zTail ){ |
4759 | pRet->zContent = sqlite3Fts5Mprintf( |
4760 | &rc, "%Q.'%q_%s'" , pRet->zDb, pRet->zName, zTail |
4761 | ); |
4762 | } |
4763 | } |
4764 | |
4765 | if( rc==SQLITE_OK && pRet->zContentRowid==0 ){ |
4766 | pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid" , -1); |
4767 | } |
4768 | |
4769 | /* Formulate the zContentExprlist text */ |
4770 | if( rc==SQLITE_OK ){ |
4771 | rc = fts5ConfigMakeExprlist(pRet); |
4772 | } |
4773 | |
4774 | if( rc!=SQLITE_OK ){ |
4775 | sqlite3Fts5ConfigFree(pRet); |
4776 | *ppOut = 0; |
4777 | } |
4778 | return rc; |
4779 | } |
4780 | |
4781 | /* |
4782 | ** Free the configuration object passed as the only argument. |
4783 | */ |
4784 | static void sqlite3Fts5ConfigFree(Fts5Config *pConfig){ |
4785 | if( pConfig ){ |
4786 | int i; |
4787 | if( pConfig->pTok ){ |
4788 | pConfig->pTokApi->xDelete(pConfig->pTok); |
4789 | } |
4790 | sqlite3_free(pConfig->zDb); |
4791 | sqlite3_free(pConfig->zName); |
4792 | for(i=0; i<pConfig->nCol; i++){ |
4793 | sqlite3_free(pConfig->azCol[i]); |
4794 | } |
4795 | sqlite3_free(pConfig->azCol); |
4796 | sqlite3_free(pConfig->aPrefix); |
4797 | sqlite3_free(pConfig->zRank); |
4798 | sqlite3_free(pConfig->zRankArgs); |
4799 | sqlite3_free(pConfig->zContent); |
4800 | sqlite3_free(pConfig->zContentRowid); |
4801 | sqlite3_free(pConfig->zContentExprlist); |
4802 | sqlite3_free(pConfig); |
4803 | } |
4804 | } |
4805 | |
4806 | /* |
4807 | ** Call sqlite3_declare_vtab() based on the contents of the configuration |
4808 | ** object passed as the only argument. Return SQLITE_OK if successful, or |
4809 | ** an SQLite error code if an error occurs. |
4810 | */ |
4811 | static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){ |
4812 | int i; |
4813 | int rc = SQLITE_OK; |
4814 | char *zSql; |
4815 | |
4816 | zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x(" ); |
4817 | for(i=0; zSql && i<pConfig->nCol; i++){ |
4818 | const char *zSep = (i==0?"" :", " ); |
4819 | zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q" , zSql, zSep, pConfig->azCol[i]); |
4820 | } |
4821 | zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)" , |
4822 | zSql, pConfig->zName, FTS5_RANK_NAME |
4823 | ); |
4824 | |
4825 | assert( zSql || rc==SQLITE_NOMEM ); |
4826 | if( zSql ){ |
4827 | rc = sqlite3_declare_vtab(pConfig->db, zSql); |
4828 | sqlite3_free(zSql); |
4829 | } |
4830 | |
4831 | return rc; |
4832 | } |
4833 | |
4834 | /* |
4835 | ** Tokenize the text passed via the second and third arguments. |
4836 | ** |
4837 | ** The callback is invoked once for each token in the input text. The |
4838 | ** arguments passed to it are, in order: |
4839 | ** |
4840 | ** void *pCtx // Copy of 4th argument to sqlite3Fts5Tokenize() |
4841 | ** const char *pToken // Pointer to buffer containing token |
4842 | ** int nToken // Size of token in bytes |
4843 | ** int iStart // Byte offset of start of token within input text |
4844 | ** int iEnd // Byte offset of end of token within input text |
4845 | ** int iPos // Position of token in input (first token is 0) |
4846 | ** |
4847 | ** If the callback returns a non-zero value the tokenization is abandoned |
4848 | ** and no further callbacks are issued. |
4849 | ** |
4850 | ** This function returns SQLITE_OK if successful or an SQLite error code |
4851 | ** if an error occurs. If the tokenization was abandoned early because |
4852 | ** the callback returned SQLITE_DONE, this is not an error and this function |
4853 | ** still returns SQLITE_OK. Or, if the tokenization was abandoned early |
4854 | ** because the callback returned another non-zero value, it is assumed |
4855 | ** to be an SQLite error code and returned to the caller. |
4856 | */ |
4857 | static int sqlite3Fts5Tokenize( |
4858 | Fts5Config *pConfig, /* FTS5 Configuration object */ |
4859 | int flags, /* FTS5_TOKENIZE_* flags */ |
4860 | const char *pText, int nText, /* Text to tokenize */ |
4861 | void *pCtx, /* Context passed to xToken() */ |
4862 | int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ |
4863 | ){ |
4864 | if( pText==0 ) return SQLITE_OK; |
4865 | return pConfig->pTokApi->xTokenize( |
4866 | pConfig->pTok, pCtx, flags, pText, nText, xToken |
4867 | ); |
4868 | } |
4869 | |
4870 | /* |
4871 | ** Argument pIn points to the first character in what is expected to be |
4872 | ** a comma-separated list of SQL literals followed by a ')' character. |
4873 | ** If it actually is this, return a pointer to the ')'. Otherwise, return |
4874 | ** NULL to indicate a parse error. |
4875 | */ |
4876 | static const char *fts5ConfigSkipArgs(const char *pIn){ |
4877 | const char *p = pIn; |
4878 | |
4879 | while( 1 ){ |
4880 | p = fts5ConfigSkipWhitespace(p); |
4881 | p = fts5ConfigSkipLiteral(p); |
4882 | p = fts5ConfigSkipWhitespace(p); |
4883 | if( p==0 || *p==')' ) break; |
4884 | if( *p!=',' ){ |
4885 | p = 0; |
4886 | break; |
4887 | } |
4888 | p++; |
4889 | } |
4890 | |
4891 | return p; |
4892 | } |
4893 | |
4894 | /* |
4895 | ** Parameter zIn contains a rank() function specification. The format of |
4896 | ** this is: |
4897 | ** |
4898 | ** + Bareword (function name) |
4899 | ** + Open parenthesis - "(" |
4900 | ** + Zero or more SQL literals in a comma separated list |
4901 | ** + Close parenthesis - ")" |
4902 | */ |
4903 | static int sqlite3Fts5ConfigParseRank( |
4904 | const char *zIn, /* Input string */ |
4905 | char **pzRank, /* OUT: Rank function name */ |
4906 | char **pzRankArgs /* OUT: Rank function arguments */ |
4907 | ){ |
4908 | const char *p = zIn; |
4909 | const char *pRank; |
4910 | char *zRank = 0; |
4911 | char *zRankArgs = 0; |
4912 | int rc = SQLITE_OK; |
4913 | |
4914 | *pzRank = 0; |
4915 | *pzRankArgs = 0; |
4916 | |
4917 | if( p==0 ){ |
4918 | rc = SQLITE_ERROR; |
4919 | }else{ |
4920 | p = fts5ConfigSkipWhitespace(p); |
4921 | pRank = p; |
4922 | p = fts5ConfigSkipBareword(p); |
4923 | |
4924 | if( p ){ |
4925 | zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank); |
4926 | if( zRank ) memcpy(zRank, pRank, p-pRank); |
4927 | }else{ |
4928 | rc = SQLITE_ERROR; |
4929 | } |
4930 | |
4931 | if( rc==SQLITE_OK ){ |
4932 | p = fts5ConfigSkipWhitespace(p); |
4933 | if( *p!='(' ) rc = SQLITE_ERROR; |
4934 | p++; |
4935 | } |
4936 | if( rc==SQLITE_OK ){ |
4937 | const char *pArgs; |
4938 | p = fts5ConfigSkipWhitespace(p); |
4939 | pArgs = p; |
4940 | if( *p!=')' ){ |
4941 | p = fts5ConfigSkipArgs(p); |
4942 | if( p==0 ){ |
4943 | rc = SQLITE_ERROR; |
4944 | }else{ |
4945 | zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs); |
4946 | if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs); |
4947 | } |
4948 | } |
4949 | } |
4950 | } |
4951 | |
4952 | if( rc!=SQLITE_OK ){ |
4953 | sqlite3_free(zRank); |
4954 | assert( zRankArgs==0 ); |
4955 | }else{ |
4956 | *pzRank = zRank; |
4957 | *pzRankArgs = zRankArgs; |
4958 | } |
4959 | return rc; |
4960 | } |
4961 | |
4962 | static int sqlite3Fts5ConfigSetValue( |
4963 | Fts5Config *pConfig, |
4964 | const char *zKey, |
4965 | sqlite3_value *pVal, |
4966 | int *pbBadkey |
4967 | ){ |
4968 | int rc = SQLITE_OK; |
4969 | |
4970 | if( 0==sqlite3_stricmp(zKey, "pgsz" ) ){ |
4971 | int pgsz = 0; |
4972 | if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ |
4973 | pgsz = sqlite3_value_int(pVal); |
4974 | } |
4975 | if( pgsz<32 || pgsz>FTS5_MAX_PAGE_SIZE ){ |
4976 | *pbBadkey = 1; |
4977 | }else{ |
4978 | pConfig->pgsz = pgsz; |
4979 | } |
4980 | } |
4981 | |
4982 | else if( 0==sqlite3_stricmp(zKey, "hashsize" ) ){ |
4983 | int nHashSize = -1; |
4984 | if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ |
4985 | nHashSize = sqlite3_value_int(pVal); |
4986 | } |
4987 | if( nHashSize<=0 ){ |
4988 | *pbBadkey = 1; |
4989 | }else{ |
4990 | pConfig->nHashSize = nHashSize; |
4991 | } |
4992 | } |
4993 | |
4994 | else if( 0==sqlite3_stricmp(zKey, "automerge" ) ){ |
4995 | int nAutomerge = -1; |
4996 | if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ |
4997 | nAutomerge = sqlite3_value_int(pVal); |
4998 | } |
4999 | if( nAutomerge<0 || nAutomerge>64 ){ |
5000 | *pbBadkey = 1; |
5001 | }else{ |
5002 | if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE; |
5003 | pConfig->nAutomerge = nAutomerge; |
5004 | } |
5005 | } |
5006 | |
5007 | else if( 0==sqlite3_stricmp(zKey, "usermerge" ) ){ |
5008 | int nUsermerge = -1; |
5009 | if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ |
5010 | nUsermerge = sqlite3_value_int(pVal); |
5011 | } |
5012 | if( nUsermerge<2 || nUsermerge>16 ){ |
5013 | *pbBadkey = 1; |
5014 | }else{ |
5015 | pConfig->nUsermerge = nUsermerge; |
5016 | } |
5017 | } |
5018 | |
5019 | else if( 0==sqlite3_stricmp(zKey, "crisismerge" ) ){ |
5020 | int nCrisisMerge = -1; |
5021 | if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ |
5022 | nCrisisMerge = sqlite3_value_int(pVal); |
5023 | } |
5024 | if( nCrisisMerge<0 ){ |
5025 | *pbBadkey = 1; |
5026 | }else{ |
5027 | if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE; |
5028 | if( nCrisisMerge>=FTS5_MAX_SEGMENT ) nCrisisMerge = FTS5_MAX_SEGMENT-1; |
5029 | pConfig->nCrisisMerge = nCrisisMerge; |
5030 | } |
5031 | } |
5032 | |
5033 | else if( 0==sqlite3_stricmp(zKey, "rank" ) ){ |
5034 | const char *zIn = (const char*)sqlite3_value_text(pVal); |
5035 | char *zRank; |
5036 | char *zRankArgs; |
5037 | rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs); |
5038 | if( rc==SQLITE_OK ){ |
5039 | sqlite3_free(pConfig->zRank); |
5040 | sqlite3_free(pConfig->zRankArgs); |
5041 | pConfig->zRank = zRank; |
5042 | pConfig->zRankArgs = zRankArgs; |
5043 | }else if( rc==SQLITE_ERROR ){ |
5044 | rc = SQLITE_OK; |
5045 | *pbBadkey = 1; |
5046 | } |
5047 | }else{ |
5048 | *pbBadkey = 1; |
5049 | } |
5050 | return rc; |
5051 | } |
5052 | |
5053 | /* |
5054 | ** Load the contents of the %_config table into memory. |
5055 | */ |
5056 | static int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ |
5057 | const char *zSelect = "SELECT k, v FROM %Q.'%q_config'" ; |
5058 | char *zSql; |
5059 | sqlite3_stmt *p = 0; |
5060 | int rc = SQLITE_OK; |
5061 | int iVersion = 0; |
5062 | |
5063 | /* Set default values */ |
5064 | pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE; |
5065 | pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE; |
5066 | pConfig->nUsermerge = FTS5_DEFAULT_USERMERGE; |
5067 | pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE; |
5068 | pConfig->nHashSize = FTS5_DEFAULT_HASHSIZE; |
5069 | |
5070 | zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName); |
5071 | if( zSql ){ |
5072 | rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p, 0); |
5073 | sqlite3_free(zSql); |
5074 | } |
5075 | |
5076 | assert( rc==SQLITE_OK || p==0 ); |
5077 | if( rc==SQLITE_OK ){ |
5078 | while( SQLITE_ROW==sqlite3_step(p) ){ |
5079 | const char *zK = (const char*)sqlite3_column_text(p, 0); |
5080 | sqlite3_value *pVal = sqlite3_column_value(p, 1); |
5081 | if( 0==sqlite3_stricmp(zK, "version" ) ){ |
5082 | iVersion = sqlite3_value_int(pVal); |
5083 | }else{ |
5084 | int bDummy = 0; |
5085 | sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy); |
5086 | } |
5087 | } |
5088 | rc = sqlite3_finalize(p); |
5089 | } |
5090 | |
5091 | if( rc==SQLITE_OK && iVersion!=FTS5_CURRENT_VERSION ){ |
5092 | rc = SQLITE_ERROR; |
5093 | if( pConfig->pzErrmsg ){ |
5094 | assert( 0==*pConfig->pzErrmsg ); |
5095 | *pConfig->pzErrmsg = sqlite3_mprintf( |
5096 | "invalid fts5 file format (found %d, expected %d) - run 'rebuild'" , |
5097 | iVersion, FTS5_CURRENT_VERSION |
5098 | ); |
5099 | } |
5100 | } |
5101 | |
5102 | if( rc==SQLITE_OK ){ |
5103 | pConfig->iCookie = iCookie; |
5104 | } |
5105 | return rc; |
5106 | } |
5107 | |
5108 | #line 1 "fts5_expr.c" |
5109 | /* |
5110 | ** 2014 May 31 |
5111 | ** |
5112 | ** The author disclaims copyright to this source code. In place of |
5113 | ** a legal notice, here is a blessing: |
5114 | ** |
5115 | ** May you do good and not evil. |
5116 | ** May you find forgiveness for yourself and forgive others. |
5117 | ** May you share freely, never taking more than you give. |
5118 | ** |
5119 | ****************************************************************************** |
5120 | ** |
5121 | */ |
5122 | |
5123 | |
5124 | |
5125 | /* #include "fts5Int.h" */ |
5126 | /* #include "fts5parse.h" */ |
5127 | |
5128 | /* |
5129 | ** All token types in the generated fts5parse.h file are greater than 0. |
5130 | */ |
5131 | #define FTS5_EOF 0 |
5132 | |
5133 | #define FTS5_LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) |
5134 | |
5135 | typedef struct Fts5ExprTerm Fts5ExprTerm; |
5136 | |
5137 | /* |
5138 | ** Functions generated by lemon from fts5parse.y. |
5139 | */ |
5140 | static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64)); |
5141 | static void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*)); |
5142 | static void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*); |
5143 | #ifndef NDEBUG |
5144 | #include <stdio.h> |
5145 | static void sqlite3Fts5ParserTrace(FILE*, char*); |
5146 | #endif |
5147 | static int sqlite3Fts5ParserFallback(int); |
5148 | |
5149 | |
5150 | struct Fts5Expr { |
5151 | Fts5Index *pIndex; |
5152 | Fts5Config *pConfig; |
5153 | Fts5ExprNode *pRoot; |
5154 | int bDesc; /* Iterate in descending rowid order */ |
5155 | int nPhrase; /* Number of phrases in expression */ |
5156 | Fts5ExprPhrase **apExprPhrase; /* Pointers to phrase objects */ |
5157 | }; |
5158 | |
5159 | /* |
5160 | ** eType: |
5161 | ** Expression node type. Always one of: |
5162 | ** |
5163 | ** FTS5_AND (nChild, apChild valid) |
5164 | ** FTS5_OR (nChild, apChild valid) |
5165 | ** FTS5_NOT (nChild, apChild valid) |
5166 | ** FTS5_STRING (pNear valid) |
5167 | ** FTS5_TERM (pNear valid) |
5168 | */ |
5169 | struct Fts5ExprNode { |
5170 | int eType; /* Node type */ |
5171 | int bEof; /* True at EOF */ |
5172 | int bNomatch; /* True if entry is not a match */ |
5173 | |
5174 | /* Next method for this node. */ |
5175 | int (*xNext)(Fts5Expr*, Fts5ExprNode*, int, i64); |
5176 | |
5177 | i64 iRowid; /* Current rowid */ |
5178 | Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */ |
5179 | |
5180 | /* Child nodes. For a NOT node, this array always contains 2 entries. For |
5181 | ** AND or OR nodes, it contains 2 or more entries. */ |
5182 | int nChild; /* Number of child nodes */ |
5183 | Fts5ExprNode *apChild[1]; /* Array of child nodes */ |
5184 | }; |
5185 | |
5186 | #define Fts5NodeIsString(p) ((p)->eType==FTS5_TERM || (p)->eType==FTS5_STRING) |
5187 | |
5188 | /* |
5189 | ** Invoke the xNext method of an Fts5ExprNode object. This macro should be |
5190 | ** used as if it has the same signature as the xNext() methods themselves. |
5191 | */ |
5192 | #define fts5ExprNodeNext(a,b,c,d) (b)->xNext((a), (b), (c), (d)) |
5193 | |
5194 | /* |
5195 | ** An instance of the following structure represents a single search term |
5196 | ** or term prefix. |
5197 | */ |
5198 | struct Fts5ExprTerm { |
5199 | u8 bPrefix; /* True for a prefix term */ |
5200 | u8 bFirst; /* True if token must be first in column */ |
5201 | char *zTerm; /* nul-terminated term */ |
5202 | Fts5IndexIter *pIter; /* Iterator for this term */ |
5203 | Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */ |
5204 | }; |
5205 | |
5206 | /* |
5207 | ** A phrase. One or more terms that must appear in a contiguous sequence |
5208 | ** within a document for it to match. |
5209 | */ |
5210 | struct Fts5ExprPhrase { |
5211 | Fts5ExprNode *pNode; /* FTS5_STRING node this phrase is part of */ |
5212 | Fts5Buffer poslist; /* Current position list */ |
5213 | int nTerm; /* Number of entries in aTerm[] */ |
5214 | Fts5ExprTerm aTerm[1]; /* Terms that make up this phrase */ |
5215 | }; |
5216 | |
5217 | /* |
5218 | ** One or more phrases that must appear within a certain token distance of |
5219 | ** each other within each matching document. |
5220 | */ |
5221 | struct Fts5ExprNearset { |
5222 | int nNear; /* NEAR parameter */ |
5223 | Fts5Colset *pColset; /* Columns to search (NULL -> all columns) */ |
5224 | int nPhrase; /* Number of entries in aPhrase[] array */ |
5225 | Fts5ExprPhrase *apPhrase[1]; /* Array of phrase pointers */ |
5226 | }; |
5227 | |
5228 | |
5229 | /* |
5230 | ** Parse context. |
5231 | */ |
5232 | struct Fts5Parse { |
5233 | Fts5Config *pConfig; |
5234 | char *zErr; |
5235 | int rc; |
5236 | int nPhrase; /* Size of apPhrase array */ |
5237 | Fts5ExprPhrase **apPhrase; /* Array of all phrases */ |
5238 | Fts5ExprNode *pExpr; /* Result of a successful parse */ |
5239 | int bPhraseToAnd; /* Convert "a+b" to "a AND b" */ |
5240 | }; |
5241 | |
5242 | static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){ |
5243 | va_list ap; |
5244 | va_start(ap, zFmt); |
5245 | if( pParse->rc==SQLITE_OK ){ |
5246 | assert( pParse->zErr==0 ); |
5247 | pParse->zErr = sqlite3_vmprintf(zFmt, ap); |
5248 | pParse->rc = SQLITE_ERROR; |
5249 | } |
5250 | va_end(ap); |
5251 | } |
5252 | |
5253 | static int fts5ExprIsspace(char t){ |
5254 | return t==' ' || t=='\t' || t=='\n' || t=='\r'; |
5255 | } |
5256 | |
5257 | /* |
5258 | ** Read the first token from the nul-terminated string at *pz. |
5259 | */ |
5260 | static int fts5ExprGetToken( |
5261 | Fts5Parse *pParse, |
5262 | const char **pz, /* IN/OUT: Pointer into buffer */ |
5263 | Fts5Token *pToken |
5264 | ){ |
5265 | const char *z = *pz; |
5266 | int tok; |
5267 | |
5268 | /* Skip past any whitespace */ |
5269 | while( fts5ExprIsspace(*z) ) z++; |
5270 | |
5271 | pToken->p = z; |
5272 | pToken->n = 1; |
5273 | switch( *z ){ |
5274 | case '(': tok = FTS5_LP; break; |
5275 | case ')': tok = FTS5_RP; break; |
5276 | case '{': tok = FTS5_LCP; break; |
5277 | case '}': tok = FTS5_RCP; break; |
5278 | case ':': tok = FTS5_COLON; break; |
5279 | case ',': tok = FTS5_COMMA; break; |
5280 | case '+': tok = FTS5_PLUS; break; |
5281 | case '*': tok = FTS5_STAR; break; |
5282 | case '-': tok = FTS5_MINUS; break; |
5283 | case '^': tok = FTS5_CARET; break; |
5284 | case '\0': tok = FTS5_EOF; break; |
5285 | |
5286 | case '"': { |
5287 | const char *z2; |
5288 | tok = FTS5_STRING; |
5289 | |
5290 | for(z2=&z[1]; 1; z2++){ |
5291 | if( z2[0]=='"' ){ |
5292 | z2++; |
5293 | if( z2[0]!='"' ) break; |
5294 | } |
5295 | if( z2[0]=='\0' ){ |
5296 | sqlite3Fts5ParseError(pParse, "unterminated string" ); |
5297 | return FTS5_EOF; |
5298 | } |
5299 | } |
5300 | pToken->n = (z2 - z); |
5301 | break; |
5302 | } |
5303 | |
5304 | default: { |
5305 | const char *z2; |
5306 | if( sqlite3Fts5IsBareword(z[0])==0 ){ |
5307 | sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"" , z); |
5308 | return FTS5_EOF; |
5309 | } |
5310 | tok = FTS5_STRING; |
5311 | for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++); |
5312 | pToken->n = (z2 - z); |
5313 | if( pToken->n==2 && memcmp(pToken->p, "OR" , 2)==0 ) tok = FTS5_OR; |
5314 | if( pToken->n==3 && memcmp(pToken->p, "NOT" , 3)==0 ) tok = FTS5_NOT; |
5315 | if( pToken->n==3 && memcmp(pToken->p, "AND" , 3)==0 ) tok = FTS5_AND; |
5316 | break; |
5317 | } |
5318 | } |
5319 | |
5320 | *pz = &pToken->p[pToken->n]; |
5321 | return tok; |
5322 | } |
5323 | |
5324 | static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc64((sqlite3_int64)t);} |
5325 | static void fts5ParseFree(void *p){ sqlite3_free(p); } |
5326 | |
5327 | static int sqlite3Fts5ExprNew( |
5328 | Fts5Config *pConfig, /* FTS5 Configuration */ |
5329 | int bPhraseToAnd, |
5330 | int iCol, |
5331 | const char *zExpr, /* Expression text */ |
5332 | Fts5Expr **ppNew, |
5333 | char **pzErr |
5334 | ){ |
5335 | Fts5Parse sParse; |
5336 | Fts5Token token; |
5337 | const char *z = zExpr; |
5338 | int t; /* Next token type */ |
5339 | void *pEngine; |
5340 | Fts5Expr *pNew; |
5341 | |
5342 | *ppNew = 0; |
5343 | *pzErr = 0; |
5344 | memset(&sParse, 0, sizeof(sParse)); |
5345 | sParse.bPhraseToAnd = bPhraseToAnd; |
5346 | pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc); |
5347 | if( pEngine==0 ){ return SQLITE_NOMEM; } |
5348 | sParse.pConfig = pConfig; |
5349 | |
5350 | do { |
5351 | t = fts5ExprGetToken(&sParse, &z, &token); |
5352 | sqlite3Fts5Parser(pEngine, t, token, &sParse); |
5353 | }while( sParse.rc==SQLITE_OK && t!=FTS5_EOF ); |
5354 | sqlite3Fts5ParserFree(pEngine, fts5ParseFree); |
5355 | |
5356 | /* If the LHS of the MATCH expression was a user column, apply the |
5357 | ** implicit column-filter. */ |
5358 | if( iCol<pConfig->nCol && sParse.pExpr && sParse.rc==SQLITE_OK ){ |
5359 | int n = sizeof(Fts5Colset); |
5360 | Fts5Colset *pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&sParse.rc, n); |
5361 | if( pColset ){ |
5362 | pColset->nCol = 1; |
5363 | pColset->aiCol[0] = iCol; |
5364 | sqlite3Fts5ParseSetColset(&sParse, sParse.pExpr, pColset); |
5365 | } |
5366 | } |
5367 | |
5368 | assert( sParse.rc!=SQLITE_OK || sParse.zErr==0 ); |
5369 | if( sParse.rc==SQLITE_OK ){ |
5370 | *ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr)); |
5371 | if( pNew==0 ){ |
5372 | sParse.rc = SQLITE_NOMEM; |
5373 | sqlite3Fts5ParseNodeFree(sParse.pExpr); |
5374 | }else{ |
5375 | if( !sParse.pExpr ){ |
5376 | const int nByte = sizeof(Fts5ExprNode); |
5377 | pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&sParse.rc, nByte); |
5378 | if( pNew->pRoot ){ |
5379 | pNew->pRoot->bEof = 1; |
5380 | } |
5381 | }else{ |
5382 | pNew->pRoot = sParse.pExpr; |
5383 | } |
5384 | pNew->pIndex = 0; |
5385 | pNew->pConfig = pConfig; |
5386 | pNew->apExprPhrase = sParse.apPhrase; |
5387 | pNew->nPhrase = sParse.nPhrase; |
5388 | pNew->bDesc = 0; |
5389 | sParse.apPhrase = 0; |
5390 | } |
5391 | }else{ |
5392 | sqlite3Fts5ParseNodeFree(sParse.pExpr); |
5393 | } |
5394 | |
5395 | sqlite3_free(sParse.apPhrase); |
5396 | *pzErr = sParse.zErr; |
5397 | return sParse.rc; |
5398 | } |
5399 | |
5400 | /* |
5401 | ** This function is only called when using the special 'trigram' tokenizer. |
5402 | ** Argument zText contains the text of a LIKE or GLOB pattern matched |
5403 | ** against column iCol. This function creates and compiles an FTS5 MATCH |
5404 | ** expression that will match a superset of the rows matched by the LIKE or |
5405 | ** GLOB. If successful, SQLITE_OK is returned. Otherwise, an SQLite error |
5406 | ** code. |
5407 | */ |
5408 | static int sqlite3Fts5ExprPattern( |
5409 | Fts5Config *pConfig, int bGlob, int iCol, const char *zText, Fts5Expr **pp |
5410 | ){ |
5411 | i64 nText = strlen(zText); |
5412 | char *zExpr = (char*)sqlite3_malloc64(nText*4 + 1); |
5413 | int rc = SQLITE_OK; |
5414 | |
5415 | if( zExpr==0 ){ |
5416 | rc = SQLITE_NOMEM; |
5417 | }else{ |
5418 | char aSpec[3]; |
5419 | int iOut = 0; |
5420 | int i = 0; |
5421 | int iFirst = 0; |
5422 | |
5423 | if( bGlob==0 ){ |
5424 | aSpec[0] = '_'; |
5425 | aSpec[1] = '%'; |
5426 | aSpec[2] = 0; |
5427 | }else{ |
5428 | aSpec[0] = '*'; |
5429 | aSpec[1] = '?'; |
5430 | aSpec[2] = '['; |
5431 | } |
5432 | |
5433 | while( i<=nText ){ |
5434 | if( i==nText |
5435 | || zText[i]==aSpec[0] || zText[i]==aSpec[1] || zText[i]==aSpec[2] |
5436 | ){ |
5437 | if( i-iFirst>=3 ){ |
5438 | int jj; |
5439 | zExpr[iOut++] = '"'; |
5440 | for(jj=iFirst; jj<i; jj++){ |
5441 | zExpr[iOut++] = zText[jj]; |
5442 | if( zText[jj]=='"' ) zExpr[iOut++] = '"'; |
5443 | } |
5444 | zExpr[iOut++] = '"'; |
5445 | zExpr[iOut++] = ' '; |
5446 | } |
5447 | if( zText[i]==aSpec[2] ){ |
5448 | i += 2; |
5449 | if( zText[i-1]=='^' ) i++; |
5450 | while( i<nText && zText[i]!=']' ) i++; |
5451 | } |
5452 | iFirst = i+1; |
5453 | } |
5454 | i++; |
5455 | } |
5456 | if( iOut>0 ){ |
5457 | int bAnd = 0; |
5458 | if( pConfig->eDetail!=FTS5_DETAIL_FULL ){ |
5459 | bAnd = 1; |
5460 | if( pConfig->eDetail==FTS5_DETAIL_NONE ){ |
5461 | iCol = pConfig->nCol; |
5462 | } |
5463 | } |
5464 | zExpr[iOut] = '\0'; |
5465 | rc = sqlite3Fts5ExprNew(pConfig, bAnd, iCol, zExpr, pp,pConfig->pzErrmsg); |
5466 | }else{ |
5467 | *pp = 0; |
5468 | } |
5469 | sqlite3_free(zExpr); |
5470 | } |
5471 | |
5472 | return rc; |
5473 | } |
5474 | |
5475 | /* |
5476 | ** Free the expression node object passed as the only argument. |
5477 | */ |
5478 | static void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){ |
5479 | if( p ){ |
5480 | int i; |
5481 | for(i=0; i<p->nChild; i++){ |
5482 | sqlite3Fts5ParseNodeFree(p->apChild[i]); |
5483 | } |
5484 | sqlite3Fts5ParseNearsetFree(p->pNear); |
5485 | sqlite3_free(p); |
5486 | } |
5487 | } |
5488 | |
5489 | /* |
5490 | ** Free the expression object passed as the only argument. |
5491 | */ |
5492 | static void sqlite3Fts5ExprFree(Fts5Expr *p){ |
5493 | if( p ){ |
5494 | sqlite3Fts5ParseNodeFree(p->pRoot); |
5495 | sqlite3_free(p->apExprPhrase); |
5496 | sqlite3_free(p); |
5497 | } |
5498 | } |
5499 | |
5500 | static int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2){ |
5501 | Fts5Parse sParse; |
5502 | memset(&sParse, 0, sizeof(sParse)); |
5503 | |
5504 | if( *pp1 ){ |
5505 | Fts5Expr *p1 = *pp1; |
5506 | int nPhrase = p1->nPhrase + p2->nPhrase; |
5507 | |
5508 | p1->pRoot = sqlite3Fts5ParseNode(&sParse, FTS5_AND, p1->pRoot, p2->pRoot,0); |
5509 | p2->pRoot = 0; |
5510 | |
5511 | if( sParse.rc==SQLITE_OK ){ |
5512 | Fts5ExprPhrase **ap = (Fts5ExprPhrase**)sqlite3_realloc( |
5513 | p1->apExprPhrase, nPhrase * sizeof(Fts5ExprPhrase*) |
5514 | ); |
5515 | if( ap==0 ){ |
5516 | sParse.rc = SQLITE_NOMEM; |
5517 | }else{ |
5518 | int i; |
5519 | memmove(&ap[p2->nPhrase], ap, p1->nPhrase*sizeof(Fts5ExprPhrase*)); |
5520 | for(i=0; i<p2->nPhrase; i++){ |
5521 | ap[i] = p2->apExprPhrase[i]; |
5522 | } |
5523 | p1->nPhrase = nPhrase; |
5524 | p1->apExprPhrase = ap; |
5525 | } |
5526 | } |
5527 | sqlite3_free(p2->apExprPhrase); |
5528 | sqlite3_free(p2); |
5529 | }else{ |
5530 | *pp1 = p2; |
5531 | } |
5532 | |
5533 | return sParse.rc; |
5534 | } |
5535 | |
5536 | /* |
5537 | ** Argument pTerm must be a synonym iterator. Return the current rowid |
5538 | ** that it points to. |
5539 | */ |
5540 | static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){ |
5541 | i64 iRet = 0; |
5542 | int bRetValid = 0; |
5543 | Fts5ExprTerm *p; |
5544 | |
5545 | assert( pTerm ); |
5546 | assert( pTerm->pSynonym ); |
5547 | assert( bDesc==0 || bDesc==1 ); |
5548 | for(p=pTerm; p; p=p->pSynonym){ |
5549 | if( 0==sqlite3Fts5IterEof(p->pIter) ){ |
5550 | i64 iRowid = p->pIter->iRowid; |
5551 | if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){ |
5552 | iRet = iRowid; |
5553 | bRetValid = 1; |
5554 | } |
5555 | } |
5556 | } |
5557 | |
5558 | if( pbEof && bRetValid==0 ) *pbEof = 1; |
5559 | return iRet; |
5560 | } |
5561 | |
5562 | /* |
5563 | ** Argument pTerm must be a synonym iterator. |
5564 | */ |
5565 | static int fts5ExprSynonymList( |
5566 | Fts5ExprTerm *pTerm, |
5567 | i64 iRowid, |
5568 | Fts5Buffer *pBuf, /* Use this buffer for space if required */ |
5569 | u8 **pa, int *pn |
5570 | ){ |
5571 | Fts5PoslistReader aStatic[4]; |
5572 | Fts5PoslistReader *aIter = aStatic; |
5573 | int nIter = 0; |
5574 | int nAlloc = 4; |
5575 | int rc = SQLITE_OK; |
5576 | Fts5ExprTerm *p; |
5577 | |
5578 | assert( pTerm->pSynonym ); |
5579 | for(p=pTerm; p; p=p->pSynonym){ |
5580 | Fts5IndexIter *pIter = p->pIter; |
5581 | if( sqlite3Fts5IterEof(pIter)==0 && pIter->iRowid==iRowid ){ |
5582 | if( pIter->nData==0 ) continue; |
5583 | if( nIter==nAlloc ){ |
5584 | sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * nAlloc * 2; |
5585 | Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc64(nByte); |
5586 | if( aNew==0 ){ |
5587 | rc = SQLITE_NOMEM; |
5588 | goto synonym_poslist_out; |
5589 | } |
5590 | memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter); |
5591 | nAlloc = nAlloc*2; |
5592 | if( aIter!=aStatic ) sqlite3_free(aIter); |
5593 | aIter = aNew; |
5594 | } |
5595 | sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &aIter[nIter]); |
5596 | assert( aIter[nIter].bEof==0 ); |
5597 | nIter++; |
5598 | } |
5599 | } |
5600 | |
5601 | if( nIter==1 ){ |
5602 | *pa = (u8*)aIter[0].a; |
5603 | *pn = aIter[0].n; |
5604 | }else{ |
5605 | Fts5PoslistWriter writer = {0}; |
5606 | i64 iPrev = -1; |
5607 | fts5BufferZero(pBuf); |
5608 | while( 1 ){ |
5609 | int i; |
5610 | i64 iMin = FTS5_LARGEST_INT64; |
5611 | for(i=0; i<nIter; i++){ |
5612 | if( aIter[i].bEof==0 ){ |
5613 | if( aIter[i].iPos==iPrev ){ |
5614 | if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue; |
5615 | } |
5616 | if( aIter[i].iPos<iMin ){ |
5617 | iMin = aIter[i].iPos; |
5618 | } |
5619 | } |
5620 | } |
5621 | if( iMin==FTS5_LARGEST_INT64 || rc!=SQLITE_OK ) break; |
5622 | rc = sqlite3Fts5PoslistWriterAppend(pBuf, &writer, iMin); |
5623 | iPrev = iMin; |
5624 | } |
5625 | if( rc==SQLITE_OK ){ |
5626 | *pa = pBuf->p; |
5627 | *pn = pBuf->n; |
5628 | } |
5629 | } |
5630 | |
5631 | synonym_poslist_out: |
5632 | if( aIter!=aStatic ) sqlite3_free(aIter); |
5633 | return rc; |
5634 | } |
5635 | |
5636 | |
5637 | /* |
5638 | ** All individual term iterators in pPhrase are guaranteed to be valid and |
5639 | ** pointing to the same rowid when this function is called. This function |
5640 | ** checks if the current rowid really is a match, and if so populates |
5641 | ** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch |
5642 | ** is set to true if this is really a match, or false otherwise. |
5643 | ** |
5644 | ** SQLITE_OK is returned if an error occurs, or an SQLite error code |
5645 | ** otherwise. It is not considered an error code if the current rowid is |
5646 | ** not a match. |
5647 | */ |
5648 | static int fts5ExprPhraseIsMatch( |
5649 | Fts5ExprNode *pNode, /* Node pPhrase belongs to */ |
5650 | Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ |
5651 | int *pbMatch /* OUT: Set to true if really a match */ |
5652 | ){ |
5653 | Fts5PoslistWriter writer = {0}; |
5654 | Fts5PoslistReader aStatic[4]; |
5655 | Fts5PoslistReader *aIter = aStatic; |
5656 | int i; |
5657 | int rc = SQLITE_OK; |
5658 | int bFirst = pPhrase->aTerm[0].bFirst; |
5659 | |
5660 | fts5BufferZero(&pPhrase->poslist); |
5661 | |
5662 | /* If the aStatic[] array is not large enough, allocate a large array |
5663 | ** using sqlite3_malloc(). This approach could be improved upon. */ |
5664 | if( pPhrase->nTerm>ArraySize(aStatic) ){ |
5665 | sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; |
5666 | aIter = (Fts5PoslistReader*)sqlite3_malloc64(nByte); |
5667 | if( !aIter ) return SQLITE_NOMEM; |
5668 | } |
5669 | memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm); |
5670 | |
5671 | /* Initialize a term iterator for each term in the phrase */ |
5672 | for(i=0; i<pPhrase->nTerm; i++){ |
5673 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; |
5674 | int n = 0; |
5675 | int bFlag = 0; |
5676 | u8 *a = 0; |
5677 | if( pTerm->pSynonym ){ |
5678 | Fts5Buffer buf = {0, 0, 0}; |
5679 | rc = fts5ExprSynonymList(pTerm, pNode->iRowid, &buf, &a, &n); |
5680 | if( rc ){ |
5681 | sqlite3_free(a); |
5682 | goto ismatch_out; |
5683 | } |
5684 | if( a==buf.p ) bFlag = 1; |
5685 | }else{ |
5686 | a = (u8*)pTerm->pIter->pData; |
5687 | n = pTerm->pIter->nData; |
5688 | } |
5689 | sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]); |
5690 | aIter[i].bFlag = (u8)bFlag; |
5691 | if( aIter[i].bEof ) goto ismatch_out; |
5692 | } |
5693 | |
5694 | while( 1 ){ |
5695 | int bMatch; |
5696 | i64 iPos = aIter[0].iPos; |
5697 | do { |
5698 | bMatch = 1; |
5699 | for(i=0; i<pPhrase->nTerm; i++){ |
5700 | Fts5PoslistReader *pPos = &aIter[i]; |
5701 | i64 iAdj = iPos + i; |
5702 | if( pPos->iPos!=iAdj ){ |
5703 | bMatch = 0; |
5704 | while( pPos->iPos<iAdj ){ |
5705 | if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out; |
5706 | } |
5707 | if( pPos->iPos>iAdj ) iPos = pPos->iPos-i; |
5708 | } |
5709 | } |
5710 | }while( bMatch==0 ); |
5711 | |
5712 | /* Append position iPos to the output */ |
5713 | if( bFirst==0 || FTS5_POS2OFFSET(iPos)==0 ){ |
5714 | rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos); |
5715 | if( rc!=SQLITE_OK ) goto ismatch_out; |
5716 | } |
5717 | |
5718 | for(i=0; i<pPhrase->nTerm; i++){ |
5719 | if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out; |
5720 | } |
5721 | } |
5722 | |
5723 | ismatch_out: |
5724 | *pbMatch = (pPhrase->poslist.n>0); |
5725 | for(i=0; i<pPhrase->nTerm; i++){ |
5726 | if( aIter[i].bFlag ) sqlite3_free((u8*)aIter[i].a); |
5727 | } |
5728 | if( aIter!=aStatic ) sqlite3_free(aIter); |
5729 | return rc; |
5730 | } |
5731 | |
5732 | typedef struct Fts5LookaheadReader Fts5LookaheadReader; |
5733 | struct Fts5LookaheadReader { |
5734 | const u8 *a; /* Buffer containing position list */ |
5735 | int n; /* Size of buffer a[] in bytes */ |
5736 | int i; /* Current offset in position list */ |
5737 | i64 iPos; /* Current position */ |
5738 | i64 iLookahead; /* Next position */ |
5739 | }; |
5740 | |
5741 | #define FTS5_LOOKAHEAD_EOF (((i64)1) << 62) |
5742 | |
5743 | static int fts5LookaheadReaderNext(Fts5LookaheadReader *p){ |
5744 | p->iPos = p->iLookahead; |
5745 | if( sqlite3Fts5PoslistNext64(p->a, p->n, &p->i, &p->iLookahead) ){ |
5746 | p->iLookahead = FTS5_LOOKAHEAD_EOF; |
5747 | } |
5748 | return (p->iPos==FTS5_LOOKAHEAD_EOF); |
5749 | } |
5750 | |
5751 | static int fts5LookaheadReaderInit( |
5752 | const u8 *a, int n, /* Buffer to read position list from */ |
5753 | Fts5LookaheadReader *p /* Iterator object to initialize */ |
5754 | ){ |
5755 | memset(p, 0, sizeof(Fts5LookaheadReader)); |
5756 | p->a = a; |
5757 | p->n = n; |
5758 | fts5LookaheadReaderNext(p); |
5759 | return fts5LookaheadReaderNext(p); |
5760 | } |
5761 | |
5762 | typedef struct Fts5NearTrimmer Fts5NearTrimmer; |
5763 | struct Fts5NearTrimmer { |
5764 | Fts5LookaheadReader reader; /* Input iterator */ |
5765 | Fts5PoslistWriter writer; /* Writer context */ |
5766 | Fts5Buffer *pOut; /* Output poslist */ |
5767 | }; |
5768 | |
5769 | /* |
5770 | ** The near-set object passed as the first argument contains more than |
5771 | ** one phrase. All phrases currently point to the same row. The |
5772 | ** Fts5ExprPhrase.poslist buffers are populated accordingly. This function |
5773 | ** tests if the current row contains instances of each phrase sufficiently |
5774 | ** close together to meet the NEAR constraint. Non-zero is returned if it |
5775 | ** does, or zero otherwise. |
5776 | ** |
5777 | ** If in/out parameter (*pRc) is set to other than SQLITE_OK when this |
5778 | ** function is called, it is a no-op. Or, if an error (e.g. SQLITE_NOMEM) |
5779 | ** occurs within this function (*pRc) is set accordingly before returning. |
5780 | ** The return value is undefined in both these cases. |
5781 | ** |
5782 | ** If no error occurs and non-zero (a match) is returned, the position-list |
5783 | ** of each phrase object is edited to contain only those entries that |
5784 | ** meet the constraint before returning. |
5785 | */ |
5786 | static int fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){ |
5787 | Fts5NearTrimmer aStatic[4]; |
5788 | Fts5NearTrimmer *a = aStatic; |
5789 | Fts5ExprPhrase **apPhrase = pNear->apPhrase; |
5790 | |
5791 | int i; |
5792 | int rc = *pRc; |
5793 | int bMatch; |
5794 | |
5795 | assert( pNear->nPhrase>1 ); |
5796 | |
5797 | /* If the aStatic[] array is not large enough, allocate a large array |
5798 | ** using sqlite3_malloc(). This approach could be improved upon. */ |
5799 | if( pNear->nPhrase>ArraySize(aStatic) ){ |
5800 | sqlite3_int64 nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase; |
5801 | a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte); |
5802 | }else{ |
5803 | memset(aStatic, 0, sizeof(aStatic)); |
5804 | } |
5805 | if( rc!=SQLITE_OK ){ |
5806 | *pRc = rc; |
5807 | return 0; |
5808 | } |
5809 | |
5810 | /* Initialize a lookahead iterator for each phrase. After passing the |
5811 | ** buffer and buffer size to the lookaside-reader init function, zero |
5812 | ** the phrase poslist buffer. The new poslist for the phrase (containing |
5813 | ** the same entries as the original with some entries removed on account |
5814 | ** of the NEAR constraint) is written over the original even as it is |
5815 | ** being read. This is safe as the entries for the new poslist are a |
5816 | ** subset of the old, so it is not possible for data yet to be read to |
5817 | ** be overwritten. */ |
5818 | for(i=0; i<pNear->nPhrase; i++){ |
5819 | Fts5Buffer *pPoslist = &apPhrase[i]->poslist; |
5820 | fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader); |
5821 | pPoslist->n = 0; |
5822 | a[i].pOut = pPoslist; |
5823 | } |
5824 | |
5825 | while( 1 ){ |
5826 | int iAdv; |
5827 | i64 iMin; |
5828 | i64 iMax; |
5829 | |
5830 | /* This block advances the phrase iterators until they point to a set of |
5831 | ** entries that together comprise a match. */ |
5832 | iMax = a[0].reader.iPos; |
5833 | do { |
5834 | bMatch = 1; |
5835 | for(i=0; i<pNear->nPhrase; i++){ |
5836 | Fts5LookaheadReader *pPos = &a[i].reader; |
5837 | iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear; |
5838 | if( pPos->iPos<iMin || pPos->iPos>iMax ){ |
5839 | bMatch = 0; |
5840 | while( pPos->iPos<iMin ){ |
5841 | if( fts5LookaheadReaderNext(pPos) ) goto ismatch_out; |
5842 | } |
5843 | if( pPos->iPos>iMax ) iMax = pPos->iPos; |
5844 | } |
5845 | } |
5846 | }while( bMatch==0 ); |
5847 | |
5848 | /* Add an entry to each output position list */ |
5849 | for(i=0; i<pNear->nPhrase; i++){ |
5850 | i64 iPos = a[i].reader.iPos; |
5851 | Fts5PoslistWriter *pWriter = &a[i].writer; |
5852 | if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){ |
5853 | sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos); |
5854 | } |
5855 | } |
5856 | |
5857 | iAdv = 0; |
5858 | iMin = a[0].reader.iLookahead; |
5859 | for(i=0; i<pNear->nPhrase; i++){ |
5860 | if( a[i].reader.iLookahead < iMin ){ |
5861 | iMin = a[i].reader.iLookahead; |
5862 | iAdv = i; |
5863 | } |
5864 | } |
5865 | if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out; |
5866 | } |
5867 | |
5868 | ismatch_out: { |
5869 | int bRet = a[0].pOut->n>0; |
5870 | *pRc = rc; |
5871 | if( a!=aStatic ) sqlite3_free(a); |
5872 | return bRet; |
5873 | } |
5874 | } |
5875 | |
5876 | /* |
5877 | ** Advance iterator pIter until it points to a value equal to or laster |
5878 | ** than the initial value of *piLast. If this means the iterator points |
5879 | ** to a value laster than *piLast, update *piLast to the new lastest value. |
5880 | ** |
5881 | ** If the iterator reaches EOF, set *pbEof to true before returning. If |
5882 | ** an error occurs, set *pRc to an error code. If either *pbEof or *pRc |
5883 | ** are set, return a non-zero value. Otherwise, return zero. |
5884 | */ |
5885 | static int fts5ExprAdvanceto( |
5886 | Fts5IndexIter *pIter, /* Iterator to advance */ |
5887 | int bDesc, /* True if iterator is "rowid DESC" */ |
5888 | i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ |
5889 | int *pRc, /* OUT: Error code */ |
5890 | int *pbEof /* OUT: Set to true if EOF */ |
5891 | ){ |
5892 | i64 iLast = *piLast; |
5893 | i64 iRowid; |
5894 | |
5895 | iRowid = pIter->iRowid; |
5896 | if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){ |
5897 | int rc = sqlite3Fts5IterNextFrom(pIter, iLast); |
5898 | if( rc || sqlite3Fts5IterEof(pIter) ){ |
5899 | *pRc = rc; |
5900 | *pbEof = 1; |
5901 | return 1; |
5902 | } |
5903 | iRowid = pIter->iRowid; |
5904 | assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) ); |
5905 | } |
5906 | *piLast = iRowid; |
5907 | |
5908 | return 0; |
5909 | } |
5910 | |
5911 | static int fts5ExprSynonymAdvanceto( |
5912 | Fts5ExprTerm *pTerm, /* Term iterator to advance */ |
5913 | int bDesc, /* True if iterator is "rowid DESC" */ |
5914 | i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ |
5915 | int *pRc /* OUT: Error code */ |
5916 | ){ |
5917 | int rc = SQLITE_OK; |
5918 | i64 iLast = *piLast; |
5919 | Fts5ExprTerm *p; |
5920 | int bEof = 0; |
5921 | |
5922 | for(p=pTerm; rc==SQLITE_OK && p; p=p->pSynonym){ |
5923 | if( sqlite3Fts5IterEof(p->pIter)==0 ){ |
5924 | i64 iRowid = p->pIter->iRowid; |
5925 | if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){ |
5926 | rc = sqlite3Fts5IterNextFrom(p->pIter, iLast); |
5927 | } |
5928 | } |
5929 | } |
5930 | |
5931 | if( rc!=SQLITE_OK ){ |
5932 | *pRc = rc; |
5933 | bEof = 1; |
5934 | }else{ |
5935 | *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof); |
5936 | } |
5937 | return bEof; |
5938 | } |
5939 | |
5940 | |
5941 | static int fts5ExprNearTest( |
5942 | int *pRc, |
5943 | Fts5Expr *pExpr, /* Expression that pNear is a part of */ |
5944 | Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */ |
5945 | ){ |
5946 | Fts5ExprNearset *pNear = pNode->pNear; |
5947 | int rc = *pRc; |
5948 | |
5949 | if( pExpr->pConfig->eDetail!=FTS5_DETAIL_FULL ){ |
5950 | Fts5ExprTerm *pTerm; |
5951 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; |
5952 | pPhrase->poslist.n = 0; |
5953 | for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ |
5954 | Fts5IndexIter *pIter = pTerm->pIter; |
5955 | if( sqlite3Fts5IterEof(pIter)==0 ){ |
5956 | if( pIter->iRowid==pNode->iRowid && pIter->nData>0 ){ |
5957 | pPhrase->poslist.n = 1; |
5958 | } |
5959 | } |
5960 | } |
5961 | return pPhrase->poslist.n; |
5962 | }else{ |
5963 | int i; |
5964 | |
5965 | /* Check that each phrase in the nearset matches the current row. |
5966 | ** Populate the pPhrase->poslist buffers at the same time. If any |
5967 | ** phrase is not a match, break out of the loop early. */ |
5968 | for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ |
5969 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; |
5970 | if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym |
5971 | || pNear->pColset || pPhrase->aTerm[0].bFirst |
5972 | ){ |
5973 | int bMatch = 0; |
5974 | rc = fts5ExprPhraseIsMatch(pNode, pPhrase, &bMatch); |
5975 | if( bMatch==0 ) break; |
5976 | }else{ |
5977 | Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; |
5978 | fts5BufferSet(&rc, &pPhrase->poslist, pIter->nData, pIter->pData); |
5979 | } |
5980 | } |
5981 | |
5982 | *pRc = rc; |
5983 | if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){ |
5984 | return 1; |
5985 | } |
5986 | return 0; |
5987 | } |
5988 | } |
5989 | |
5990 | |
5991 | /* |
5992 | ** Initialize all term iterators in the pNear object. If any term is found |
5993 | ** to match no documents at all, return immediately without initializing any |
5994 | ** further iterators. |
5995 | ** |
5996 | ** If an error occurs, return an SQLite error code. Otherwise, return |
5997 | ** SQLITE_OK. It is not considered an error if some term matches zero |
5998 | ** documents. |
5999 | */ |
6000 | static int fts5ExprNearInitAll( |
6001 | Fts5Expr *pExpr, |
6002 | Fts5ExprNode *pNode |
6003 | ){ |
6004 | Fts5ExprNearset *pNear = pNode->pNear; |
6005 | int i; |
6006 | |
6007 | assert( pNode->bNomatch==0 ); |
6008 | for(i=0; i<pNear->nPhrase; i++){ |
6009 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; |
6010 | if( pPhrase->nTerm==0 ){ |
6011 | pNode->bEof = 1; |
6012 | return SQLITE_OK; |
6013 | }else{ |
6014 | int j; |
6015 | for(j=0; j<pPhrase->nTerm; j++){ |
6016 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; |
6017 | Fts5ExprTerm *p; |
6018 | int bHit = 0; |
6019 | |
6020 | for(p=pTerm; p; p=p->pSynonym){ |
6021 | int rc; |
6022 | if( p->pIter ){ |
6023 | sqlite3Fts5IterClose(p->pIter); |
6024 | p->pIter = 0; |
6025 | } |
6026 | rc = sqlite3Fts5IndexQuery( |
6027 | pExpr->pIndex, p->zTerm, (int)strlen(p->zTerm), |
6028 | (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | |
6029 | (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), |
6030 | pNear->pColset, |
6031 | &p->pIter |
6032 | ); |
6033 | assert( (rc==SQLITE_OK)==(p->pIter!=0) ); |
6034 | if( rc!=SQLITE_OK ) return rc; |
6035 | if( 0==sqlite3Fts5IterEof(p->pIter) ){ |
6036 | bHit = 1; |
6037 | } |
6038 | } |
6039 | |
6040 | if( bHit==0 ){ |
6041 | pNode->bEof = 1; |
6042 | return SQLITE_OK; |
6043 | } |
6044 | } |
6045 | } |
6046 | } |
6047 | |
6048 | pNode->bEof = 0; |
6049 | return SQLITE_OK; |
6050 | } |
6051 | |
6052 | /* |
6053 | ** If pExpr is an ASC iterator, this function returns a value with the |
6054 | ** same sign as: |
6055 | ** |
6056 | ** (iLhs - iRhs) |
6057 | ** |
6058 | ** Otherwise, if this is a DESC iterator, the opposite is returned: |
6059 | ** |
6060 | ** (iRhs - iLhs) |
6061 | */ |
6062 | static int fts5RowidCmp( |
6063 | Fts5Expr *pExpr, |
6064 | i64 iLhs, |
6065 | i64 iRhs |
6066 | ){ |
6067 | assert( pExpr->bDesc==0 || pExpr->bDesc==1 ); |
6068 | if( pExpr->bDesc==0 ){ |
6069 | if( iLhs<iRhs ) return -1; |
6070 | return (iLhs > iRhs); |
6071 | }else{ |
6072 | if( iLhs>iRhs ) return -1; |
6073 | return (iLhs < iRhs); |
6074 | } |
6075 | } |
6076 | |
6077 | static void fts5ExprSetEof(Fts5ExprNode *pNode){ |
6078 | int i; |
6079 | pNode->bEof = 1; |
6080 | pNode->bNomatch = 0; |
6081 | for(i=0; i<pNode->nChild; i++){ |
6082 | fts5ExprSetEof(pNode->apChild[i]); |
6083 | } |
6084 | } |
6085 | |
6086 | static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){ |
6087 | if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){ |
6088 | Fts5ExprNearset *pNear = pNode->pNear; |
6089 | int i; |
6090 | for(i=0; i<pNear->nPhrase; i++){ |
6091 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; |
6092 | pPhrase->poslist.n = 0; |
6093 | } |
6094 | }else{ |
6095 | int i; |
6096 | for(i=0; i<pNode->nChild; i++){ |
6097 | fts5ExprNodeZeroPoslist(pNode->apChild[i]); |
6098 | } |
6099 | } |
6100 | } |
6101 | |
6102 | |
6103 | |
6104 | /* |
6105 | ** Compare the values currently indicated by the two nodes as follows: |
6106 | ** |
6107 | ** res = (*p1) - (*p2) |
6108 | ** |
6109 | ** Nodes that point to values that come later in the iteration order are |
6110 | ** considered to be larger. Nodes at EOF are the largest of all. |
6111 | ** |
6112 | ** This means that if the iteration order is ASC, then numerically larger |
6113 | ** rowids are considered larger. Or if it is the default DESC, numerically |
6114 | ** smaller rowids are larger. |
6115 | */ |
6116 | static int fts5NodeCompare( |
6117 | Fts5Expr *pExpr, |
6118 | Fts5ExprNode *p1, |
6119 | Fts5ExprNode *p2 |
6120 | ){ |
6121 | if( p2->bEof ) return -1; |
6122 | if( p1->bEof ) return +1; |
6123 | return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid); |
6124 | } |
6125 | |
6126 | /* |
6127 | ** All individual term iterators in pNear are guaranteed to be valid when |
6128 | ** this function is called. This function checks if all term iterators |
6129 | ** point to the same rowid, and if not, advances them until they do. |
6130 | ** If an EOF is reached before this happens, *pbEof is set to true before |
6131 | ** returning. |
6132 | ** |
6133 | ** SQLITE_OK is returned if an error occurs, or an SQLite error code |
6134 | ** otherwise. It is not considered an error code if an iterator reaches |
6135 | ** EOF. |
6136 | */ |
6137 | static int fts5ExprNodeTest_STRING( |
6138 | Fts5Expr *pExpr, /* Expression pPhrase belongs to */ |
6139 | Fts5ExprNode *pNode |
6140 | ){ |
6141 | Fts5ExprNearset *pNear = pNode->pNear; |
6142 | Fts5ExprPhrase *pLeft = pNear->apPhrase[0]; |
6143 | int rc = SQLITE_OK; |
6144 | i64 iLast; /* Lastest rowid any iterator points to */ |
6145 | int i, j; /* Phrase and token index, respectively */ |
6146 | int bMatch; /* True if all terms are at the same rowid */ |
6147 | const int bDesc = pExpr->bDesc; |
6148 | |
6149 | /* Check that this node should not be FTS5_TERM */ |
6150 | assert( pNear->nPhrase>1 |
6151 | || pNear->apPhrase[0]->nTerm>1 |
6152 | || pNear->apPhrase[0]->aTerm[0].pSynonym |
6153 | || pNear->apPhrase[0]->aTerm[0].bFirst |
6154 | ); |
6155 | |
6156 | /* Initialize iLast, the "lastest" rowid any iterator points to. If the |
6157 | ** iterator skips through rowids in the default ascending order, this means |
6158 | ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it |
6159 | ** means the minimum rowid. */ |
6160 | if( pLeft->aTerm[0].pSynonym ){ |
6161 | iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0); |
6162 | }else{ |
6163 | iLast = pLeft->aTerm[0].pIter->iRowid; |
6164 | } |
6165 | |
6166 | do { |
6167 | bMatch = 1; |
6168 | for(i=0; i<pNear->nPhrase; i++){ |
6169 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; |
6170 | for(j=0; j<pPhrase->nTerm; j++){ |
6171 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; |
6172 | if( pTerm->pSynonym ){ |
6173 | i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0); |
6174 | if( iRowid==iLast ) continue; |
6175 | bMatch = 0; |
6176 | if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){ |
6177 | pNode->bNomatch = 0; |
6178 | pNode->bEof = 1; |
6179 | return rc; |
6180 | } |
6181 | }else{ |
6182 | Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; |
6183 | if( pIter->iRowid==iLast || pIter->bEof ) continue; |
6184 | bMatch = 0; |
6185 | if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){ |
6186 | return rc; |
6187 | } |
6188 | } |
6189 | } |
6190 | } |
6191 | }while( bMatch==0 ); |
6192 | |
6193 | pNode->iRowid = iLast; |
6194 | pNode->bNomatch = ((0==fts5ExprNearTest(&rc, pExpr, pNode)) && rc==SQLITE_OK); |
6195 | assert( pNode->bEof==0 || pNode->bNomatch==0 ); |
6196 | |
6197 | return rc; |
6198 | } |
6199 | |
6200 | /* |
6201 | ** Advance the first term iterator in the first phrase of pNear. Set output |
6202 | ** variable *pbEof to true if it reaches EOF or if an error occurs. |
6203 | ** |
6204 | ** Return SQLITE_OK if successful, or an SQLite error code if an error |
6205 | ** occurs. |
6206 | */ |
6207 | static int fts5ExprNodeNext_STRING( |
6208 | Fts5Expr *pExpr, /* Expression pPhrase belongs to */ |
6209 | Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */ |
6210 | int bFromValid, |
6211 | i64 iFrom |
6212 | ){ |
6213 | Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0]; |
6214 | int rc = SQLITE_OK; |
6215 | |
6216 | pNode->bNomatch = 0; |
6217 | if( pTerm->pSynonym ){ |
6218 | int bEof = 1; |
6219 | Fts5ExprTerm *p; |
6220 | |
6221 | /* Find the firstest rowid any synonym points to. */ |
6222 | i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0); |
6223 | |
6224 | /* Advance each iterator that currently points to iRowid. Or, if iFrom |
6225 | ** is valid - each iterator that points to a rowid before iFrom. */ |
6226 | for(p=pTerm; p; p=p->pSynonym){ |
6227 | if( sqlite3Fts5IterEof(p->pIter)==0 ){ |
6228 | i64 ii = p->pIter->iRowid; |
6229 | if( ii==iRowid |
6230 | || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc) |
6231 | ){ |
6232 | if( bFromValid ){ |
6233 | rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom); |
6234 | }else{ |
6235 | rc = sqlite3Fts5IterNext(p->pIter); |
6236 | } |
6237 | if( rc!=SQLITE_OK ) break; |
6238 | if( sqlite3Fts5IterEof(p->pIter)==0 ){ |
6239 | bEof = 0; |
6240 | } |
6241 | }else{ |
6242 | bEof = 0; |
6243 | } |
6244 | } |
6245 | } |
6246 | |
6247 | /* Set the EOF flag if either all synonym iterators are at EOF or an |
6248 | ** error has occurred. */ |
6249 | pNode->bEof = (rc || bEof); |
6250 | }else{ |
6251 | Fts5IndexIter *pIter = pTerm->pIter; |
6252 | |
6253 | assert( Fts5NodeIsString(pNode) ); |
6254 | if( bFromValid ){ |
6255 | rc = sqlite3Fts5IterNextFrom(pIter, iFrom); |
6256 | }else{ |
6257 | rc = sqlite3Fts5IterNext(pIter); |
6258 | } |
6259 | |
6260 | pNode->bEof = (rc || sqlite3Fts5IterEof(pIter)); |
6261 | } |
6262 | |
6263 | if( pNode->bEof==0 ){ |
6264 | assert( rc==SQLITE_OK ); |
6265 | rc = fts5ExprNodeTest_STRING(pExpr, pNode); |
6266 | } |
6267 | |
6268 | return rc; |
6269 | } |
6270 | |
6271 | |
6272 | static int fts5ExprNodeTest_TERM( |
6273 | Fts5Expr *pExpr, /* Expression that pNear is a part of */ |
6274 | Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */ |
6275 | ){ |
6276 | /* As this "NEAR" object is actually a single phrase that consists |
6277 | ** of a single term only, grab pointers into the poslist managed by the |
6278 | ** fts5_index.c iterator object. This is much faster than synthesizing |
6279 | ** a new poslist the way we have to for more complicated phrase or NEAR |
6280 | ** expressions. */ |
6281 | Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; |
6282 | Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; |
6283 | |
6284 | assert( pNode->eType==FTS5_TERM ); |
6285 | assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 ); |
6286 | assert( pPhrase->aTerm[0].pSynonym==0 ); |
6287 | |
6288 | pPhrase->poslist.n = pIter->nData; |
6289 | if( pExpr->pConfig->eDetail==FTS5_DETAIL_FULL ){ |
6290 | pPhrase->poslist.p = (u8*)pIter->pData; |
6291 | } |
6292 | pNode->iRowid = pIter->iRowid; |
6293 | pNode->bNomatch = (pPhrase->poslist.n==0); |
6294 | return SQLITE_OK; |
6295 | } |
6296 | |
6297 | /* |
6298 | ** xNext() method for a node of type FTS5_TERM. |
6299 | */ |
6300 | static int fts5ExprNodeNext_TERM( |
6301 | Fts5Expr *pExpr, |
6302 | Fts5ExprNode *pNode, |
6303 | int bFromValid, |
6304 | i64 iFrom |
6305 | ){ |
6306 | int rc; |
6307 | Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter; |
6308 | |
6309 | assert( pNode->bEof==0 ); |
6310 | if( bFromValid ){ |
6311 | rc = sqlite3Fts5IterNextFrom(pIter, iFrom); |
6312 | }else{ |
6313 | rc = sqlite3Fts5IterNext(pIter); |
6314 | } |
6315 | if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){ |
6316 | rc = fts5ExprNodeTest_TERM(pExpr, pNode); |
6317 | }else{ |
6318 | pNode->bEof = 1; |
6319 | pNode->bNomatch = 0; |
6320 | } |
6321 | return rc; |
6322 | } |
6323 | |
6324 | static void fts5ExprNodeTest_OR( |
6325 | Fts5Expr *pExpr, /* Expression of which pNode is a part */ |
6326 | Fts5ExprNode *pNode /* Expression node to test */ |
6327 | ){ |
6328 | Fts5ExprNode *pNext = pNode->apChild[0]; |
6329 | int i; |
6330 | |
6331 | for(i=1; i<pNode->nChild; i++){ |
6332 | Fts5ExprNode *pChild = pNode->apChild[i]; |
6333 | int cmp = fts5NodeCompare(pExpr, pNext, pChild); |
6334 | if( cmp>0 || (cmp==0 && pChild->bNomatch==0) ){ |
6335 | pNext = pChild; |
6336 | } |
6337 | } |
6338 | pNode->iRowid = pNext->iRowid; |
6339 | pNode->bEof = pNext->bEof; |
6340 | pNode->bNomatch = pNext->bNomatch; |
6341 | } |
6342 | |
6343 | static int fts5ExprNodeNext_OR( |
6344 | Fts5Expr *pExpr, |
6345 | Fts5ExprNode *pNode, |
6346 | int bFromValid, |
6347 | i64 iFrom |
6348 | ){ |
6349 | int i; |
6350 | i64 iLast = pNode->iRowid; |
6351 | |
6352 | for(i=0; i<pNode->nChild; i++){ |
6353 | Fts5ExprNode *p1 = pNode->apChild[i]; |
6354 | assert( p1->bEof || fts5RowidCmp(pExpr, p1->iRowid, iLast)>=0 ); |
6355 | if( p1->bEof==0 ){ |
6356 | if( (p1->iRowid==iLast) |
6357 | || (bFromValid && fts5RowidCmp(pExpr, p1->iRowid, iFrom)<0) |
6358 | ){ |
6359 | int rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom); |
6360 | if( rc!=SQLITE_OK ){ |
6361 | pNode->bNomatch = 0; |
6362 | return rc; |
6363 | } |
6364 | } |
6365 | } |
6366 | } |
6367 | |
6368 | fts5ExprNodeTest_OR(pExpr, pNode); |
6369 | return SQLITE_OK; |
6370 | } |
6371 | |
6372 | /* |
6373 | ** Argument pNode is an FTS5_AND node. |
6374 | */ |
6375 | static int fts5ExprNodeTest_AND( |
6376 | Fts5Expr *pExpr, /* Expression pPhrase belongs to */ |
6377 | Fts5ExprNode *pAnd /* FTS5_AND node to advance */ |
6378 | ){ |
6379 | int iChild; |
6380 | i64 iLast = pAnd->iRowid; |
6381 | int rc = SQLITE_OK; |
6382 | int bMatch; |
6383 | |
6384 | assert( pAnd->bEof==0 ); |
6385 | do { |
6386 | pAnd->bNomatch = 0; |
6387 | bMatch = 1; |
6388 | for(iChild=0; iChild<pAnd->nChild; iChild++){ |
6389 | Fts5ExprNode *pChild = pAnd->apChild[iChild]; |
6390 | int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid); |
6391 | if( cmp>0 ){ |
6392 | /* Advance pChild until it points to iLast or laster */ |
6393 | rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast); |
6394 | if( rc!=SQLITE_OK ){ |
6395 | pAnd->bNomatch = 0; |
6396 | return rc; |
6397 | } |
6398 | } |
6399 | |
6400 | /* If the child node is now at EOF, so is the parent AND node. Otherwise, |
6401 | ** the child node is guaranteed to have advanced at least as far as |
6402 | ** rowid iLast. So if it is not at exactly iLast, pChild->iRowid is the |
6403 | ** new lastest rowid seen so far. */ |
6404 | assert( pChild->bEof || fts5RowidCmp(pExpr, iLast, pChild->iRowid)<=0 ); |
6405 | if( pChild->bEof ){ |
6406 | fts5ExprSetEof(pAnd); |
6407 | bMatch = 1; |
6408 | break; |
6409 | }else if( iLast!=pChild->iRowid ){ |
6410 | bMatch = 0; |
6411 | iLast = pChild->iRowid; |
6412 | } |
6413 | |
6414 | if( pChild->bNomatch ){ |
6415 | pAnd->bNomatch = 1; |
6416 | } |
6417 | } |
6418 | }while( bMatch==0 ); |
6419 | |
6420 | if( pAnd->bNomatch && pAnd!=pExpr->pRoot ){ |
6421 | fts5ExprNodeZeroPoslist(pAnd); |
6422 | } |
6423 | pAnd->iRowid = iLast; |
6424 | return SQLITE_OK; |
6425 | } |
6426 | |
6427 | static int fts5ExprNodeNext_AND( |
6428 | Fts5Expr *pExpr, |
6429 | Fts5ExprNode *pNode, |
6430 | int bFromValid, |
6431 | i64 iFrom |
6432 | ){ |
6433 | int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom); |
6434 | if( rc==SQLITE_OK ){ |
6435 | rc = fts5ExprNodeTest_AND(pExpr, pNode); |
6436 | }else{ |
6437 | pNode->bNomatch = 0; |
6438 | } |
6439 | return rc; |
6440 | } |
6441 | |
6442 | static int fts5ExprNodeTest_NOT( |
6443 | Fts5Expr *pExpr, /* Expression pPhrase belongs to */ |
6444 | Fts5ExprNode *pNode /* FTS5_NOT node to advance */ |
6445 | ){ |
6446 | int rc = SQLITE_OK; |
6447 | Fts5ExprNode *p1 = pNode->apChild[0]; |
6448 | Fts5ExprNode *p2 = pNode->apChild[1]; |
6449 | assert( pNode->nChild==2 ); |
6450 | |
6451 | while( rc==SQLITE_OK && p1->bEof==0 ){ |
6452 | int cmp = fts5NodeCompare(pExpr, p1, p2); |
6453 | if( cmp>0 ){ |
6454 | rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid); |
6455 | cmp = fts5NodeCompare(pExpr, p1, p2); |
6456 | } |
6457 | assert( rc!=SQLITE_OK || cmp<=0 ); |
6458 | if( cmp || p2->bNomatch ) break; |
6459 | rc = fts5ExprNodeNext(pExpr, p1, 0, 0); |
6460 | } |
6461 | pNode->bEof = p1->bEof; |
6462 | pNode->bNomatch = p1->bNomatch; |
6463 | pNode->iRowid = p1->iRowid; |
6464 | if( p1->bEof ){ |
6465 | fts5ExprNodeZeroPoslist(p2); |
6466 | } |
6467 | return rc; |
6468 | } |
6469 | |
6470 | static int fts5ExprNodeNext_NOT( |
6471 | Fts5Expr *pExpr, |
6472 | Fts5ExprNode *pNode, |
6473 | int bFromValid, |
6474 | i64 iFrom |
6475 | ){ |
6476 | int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom); |
6477 | if( rc==SQLITE_OK ){ |
6478 | rc = fts5ExprNodeTest_NOT(pExpr, pNode); |
6479 | } |
6480 | if( rc!=SQLITE_OK ){ |
6481 | pNode->bNomatch = 0; |
6482 | } |
6483 | return rc; |
6484 | } |
6485 | |
6486 | /* |
6487 | ** If pNode currently points to a match, this function returns SQLITE_OK |
6488 | ** without modifying it. Otherwise, pNode is advanced until it does point |
6489 | ** to a match or EOF is reached. |
6490 | */ |
6491 | static int fts5ExprNodeTest( |
6492 | Fts5Expr *pExpr, /* Expression of which pNode is a part */ |
6493 | Fts5ExprNode *pNode /* Expression node to test */ |
6494 | ){ |
6495 | int rc = SQLITE_OK; |
6496 | if( pNode->bEof==0 ){ |
6497 | switch( pNode->eType ){ |
6498 | |
6499 | case FTS5_STRING: { |
6500 | rc = fts5ExprNodeTest_STRING(pExpr, pNode); |
6501 | break; |
6502 | } |
6503 | |
6504 | case FTS5_TERM: { |
6505 | rc = fts5ExprNodeTest_TERM(pExpr, pNode); |
6506 | break; |
6507 | } |
6508 | |
6509 | case FTS5_AND: { |
6510 | rc = fts5ExprNodeTest_AND(pExpr, pNode); |
6511 | break; |
6512 | } |
6513 | |
6514 | case FTS5_OR: { |
6515 | fts5ExprNodeTest_OR(pExpr, pNode); |
6516 | break; |
6517 | } |
6518 | |
6519 | default: assert( pNode->eType==FTS5_NOT ); { |
6520 | rc = fts5ExprNodeTest_NOT(pExpr, pNode); |
6521 | break; |
6522 | } |
6523 | } |
6524 | } |
6525 | return rc; |
6526 | } |
6527 | |
6528 | |
6529 | /* |
6530 | ** Set node pNode, which is part of expression pExpr, to point to the first |
6531 | ** match. If there are no matches, set the Node.bEof flag to indicate EOF. |
6532 | ** |
6533 | ** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise. |
6534 | ** It is not an error if there are no matches. |
6535 | */ |
6536 | static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ |
6537 | int rc = SQLITE_OK; |
6538 | pNode->bEof = 0; |
6539 | pNode->bNomatch = 0; |
6540 | |
6541 | if( Fts5NodeIsString(pNode) ){ |
6542 | /* Initialize all term iterators in the NEAR object. */ |
6543 | rc = fts5ExprNearInitAll(pExpr, pNode); |
6544 | }else if( pNode->xNext==0 ){ |
6545 | pNode->bEof = 1; |
6546 | }else{ |
6547 | int i; |
6548 | int nEof = 0; |
6549 | for(i=0; i<pNode->nChild && rc==SQLITE_OK; i++){ |
6550 | Fts5ExprNode *pChild = pNode->apChild[i]; |
6551 | rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]); |
6552 | assert( pChild->bEof==0 || pChild->bEof==1 ); |
6553 | nEof += pChild->bEof; |
6554 | } |
6555 | pNode->iRowid = pNode->apChild[0]->iRowid; |
6556 | |
6557 | switch( pNode->eType ){ |
6558 | case FTS5_AND: |
6559 | if( nEof>0 ) fts5ExprSetEof(pNode); |
6560 | break; |
6561 | |
6562 | case FTS5_OR: |
6563 | if( pNode->nChild==nEof ) fts5ExprSetEof(pNode); |
6564 | break; |
6565 | |
6566 | default: |
6567 | assert( pNode->eType==FTS5_NOT ); |
6568 | pNode->bEof = pNode->apChild[0]->bEof; |
6569 | break; |
6570 | } |
6571 | } |
6572 | |
6573 | if( rc==SQLITE_OK ){ |
6574 | rc = fts5ExprNodeTest(pExpr, pNode); |
6575 | } |
6576 | return rc; |
6577 | } |
6578 | |
6579 | |
6580 | /* |
6581 | ** Begin iterating through the set of documents in index pIdx matched by |
6582 | ** the MATCH expression passed as the first argument. If the "bDesc" |
6583 | ** parameter is passed a non-zero value, iteration is in descending rowid |
6584 | ** order. Or, if it is zero, in ascending order. |
6585 | ** |
6586 | ** If iterating in ascending rowid order (bDesc==0), the first document |
6587 | ** visited is that with the smallest rowid that is larger than or equal |
6588 | ** to parameter iFirst. Or, if iterating in ascending order (bDesc==1), |
6589 | ** then the first document visited must have a rowid smaller than or |
6590 | ** equal to iFirst. |
6591 | ** |
6592 | ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It |
6593 | ** is not considered an error if the query does not match any documents. |
6594 | */ |
6595 | static int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bDesc){ |
6596 | Fts5ExprNode *pRoot = p->pRoot; |
6597 | int rc; /* Return code */ |
6598 | |
6599 | p->pIndex = pIdx; |
6600 | p->bDesc = bDesc; |
6601 | rc = fts5ExprNodeFirst(p, pRoot); |
6602 | |
6603 | /* If not at EOF but the current rowid occurs earlier than iFirst in |
6604 | ** the iteration order, move to document iFirst or later. */ |
6605 | if( rc==SQLITE_OK |
6606 | && 0==pRoot->bEof |
6607 | && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0 |
6608 | ){ |
6609 | rc = fts5ExprNodeNext(p, pRoot, 1, iFirst); |
6610 | } |
6611 | |
6612 | /* If the iterator is not at a real match, skip forward until it is. */ |
6613 | while( pRoot->bNomatch && rc==SQLITE_OK ){ |
6614 | assert( pRoot->bEof==0 ); |
6615 | rc = fts5ExprNodeNext(p, pRoot, 0, 0); |
6616 | } |
6617 | return rc; |
6618 | } |
6619 | |
6620 | /* |
6621 | ** Move to the next document |
6622 | ** |
6623 | ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It |
6624 | ** is not considered an error if the query does not match any documents. |
6625 | */ |
6626 | static int sqlite3Fts5ExprNext(Fts5Expr *p, i64 iLast){ |
6627 | int rc; |
6628 | Fts5ExprNode *pRoot = p->pRoot; |
6629 | assert( pRoot->bEof==0 && pRoot->bNomatch==0 ); |
6630 | do { |
6631 | rc = fts5ExprNodeNext(p, pRoot, 0, 0); |
6632 | assert( pRoot->bNomatch==0 || (rc==SQLITE_OK && pRoot->bEof==0) ); |
6633 | }while( pRoot->bNomatch ); |
6634 | if( fts5RowidCmp(p, pRoot->iRowid, iLast)>0 ){ |
6635 | pRoot->bEof = 1; |
6636 | } |
6637 | return rc; |
6638 | } |
6639 | |
6640 | static int sqlite3Fts5ExprEof(Fts5Expr *p){ |
6641 | return p->pRoot->bEof; |
6642 | } |
6643 | |
6644 | static i64 sqlite3Fts5ExprRowid(Fts5Expr *p){ |
6645 | return p->pRoot->iRowid; |
6646 | } |
6647 | |
6648 | static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){ |
6649 | int rc = SQLITE_OK; |
6650 | *pz = sqlite3Fts5Strndup(&rc, pToken->p, pToken->n); |
6651 | return rc; |
6652 | } |
6653 | |
6654 | /* |
6655 | ** Free the phrase object passed as the only argument. |
6656 | */ |
6657 | static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ |
6658 | if( pPhrase ){ |
6659 | int i; |
6660 | for(i=0; i<pPhrase->nTerm; i++){ |
6661 | Fts5ExprTerm *pSyn; |
6662 | Fts5ExprTerm *pNext; |
6663 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; |
6664 | sqlite3_free(pTerm->zTerm); |
6665 | sqlite3Fts5IterClose(pTerm->pIter); |
6666 | for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){ |
6667 | pNext = pSyn->pSynonym; |
6668 | sqlite3Fts5IterClose(pSyn->pIter); |
6669 | fts5BufferFree((Fts5Buffer*)&pSyn[1]); |
6670 | sqlite3_free(pSyn); |
6671 | } |
6672 | } |
6673 | if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist); |
6674 | sqlite3_free(pPhrase); |
6675 | } |
6676 | } |
6677 | |
6678 | /* |
6679 | ** Set the "bFirst" flag on the first token of the phrase passed as the |
6680 | ** only argument. |
6681 | */ |
6682 | static void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase *pPhrase){ |
6683 | if( pPhrase && pPhrase->nTerm ){ |
6684 | pPhrase->aTerm[0].bFirst = 1; |
6685 | } |
6686 | } |
6687 | |
6688 | /* |
6689 | ** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated |
6690 | ** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is |
6691 | ** appended to it and the results returned. |
6692 | ** |
6693 | ** If an OOM error occurs, both the pNear and pPhrase objects are freed and |
6694 | ** NULL returned. |
6695 | */ |
6696 | static Fts5ExprNearset *sqlite3Fts5ParseNearset( |
6697 | Fts5Parse *pParse, /* Parse context */ |
6698 | Fts5ExprNearset *pNear, /* Existing nearset, or NULL */ |
6699 | Fts5ExprPhrase *pPhrase /* Recently parsed phrase */ |
6700 | ){ |
6701 | const int SZALLOC = 8; |
6702 | Fts5ExprNearset *pRet = 0; |
6703 | |
6704 | if( pParse->rc==SQLITE_OK ){ |
6705 | if( pPhrase==0 ){ |
6706 | return pNear; |
6707 | } |
6708 | if( pNear==0 ){ |
6709 | sqlite3_int64 nByte; |
6710 | nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*); |
6711 | pRet = sqlite3_malloc64(nByte); |
6712 | if( pRet==0 ){ |
6713 | pParse->rc = SQLITE_NOMEM; |
6714 | }else{ |
6715 | memset(pRet, 0, (size_t)nByte); |
6716 | } |
6717 | }else if( (pNear->nPhrase % SZALLOC)==0 ){ |
6718 | int nNew = pNear->nPhrase + SZALLOC; |
6719 | sqlite3_int64 nByte; |
6720 | |
6721 | nByte = sizeof(Fts5ExprNearset) + nNew * sizeof(Fts5ExprPhrase*); |
6722 | pRet = (Fts5ExprNearset*)sqlite3_realloc64(pNear, nByte); |
6723 | if( pRet==0 ){ |
6724 | pParse->rc = SQLITE_NOMEM; |
6725 | } |
6726 | }else{ |
6727 | pRet = pNear; |
6728 | } |
6729 | } |
6730 | |
6731 | if( pRet==0 ){ |
6732 | assert( pParse->rc!=SQLITE_OK ); |
6733 | sqlite3Fts5ParseNearsetFree(pNear); |
6734 | sqlite3Fts5ParsePhraseFree(pPhrase); |
6735 | }else{ |
6736 | if( pRet->nPhrase>0 ){ |
6737 | Fts5ExprPhrase *pLast = pRet->apPhrase[pRet->nPhrase-1]; |
6738 | assert( pParse!=0 ); |
6739 | assert( pParse->apPhrase!=0 ); |
6740 | assert( pParse->nPhrase>=2 ); |
6741 | assert( pLast==pParse->apPhrase[pParse->nPhrase-2] ); |
6742 | if( pPhrase->nTerm==0 ){ |
6743 | fts5ExprPhraseFree(pPhrase); |
6744 | pRet->nPhrase--; |
6745 | pParse->nPhrase--; |
6746 | pPhrase = pLast; |
6747 | }else if( pLast->nTerm==0 ){ |
6748 | fts5ExprPhraseFree(pLast); |
6749 | pParse->apPhrase[pParse->nPhrase-2] = pPhrase; |
6750 | pParse->nPhrase--; |
6751 | pRet->nPhrase--; |
6752 | } |
6753 | } |
6754 | pRet->apPhrase[pRet->nPhrase++] = pPhrase; |
6755 | } |
6756 | return pRet; |
6757 | } |
6758 | |
6759 | typedef struct TokenCtx TokenCtx; |
6760 | struct TokenCtx { |
6761 | Fts5ExprPhrase *pPhrase; |
6762 | int rc; |
6763 | }; |
6764 | |
6765 | /* |
6766 | ** Callback for tokenizing terms used by ParseTerm(). |
6767 | */ |
6768 | static int fts5ParseTokenize( |
6769 | void *pContext, /* Pointer to Fts5InsertCtx object */ |
6770 | int tflags, /* Mask of FTS5_TOKEN_* flags */ |
6771 | const char *pToken, /* Buffer containing token */ |
6772 | int nToken, /* Size of token in bytes */ |
6773 | int iUnused1, /* Start offset of token */ |
6774 | int iUnused2 /* End offset of token */ |
6775 | ){ |
6776 | int rc = SQLITE_OK; |
6777 | const int SZALLOC = 8; |
6778 | TokenCtx *pCtx = (TokenCtx*)pContext; |
6779 | Fts5ExprPhrase *pPhrase = pCtx->pPhrase; |
6780 | |
6781 | UNUSED_PARAM2(iUnused1, iUnused2); |
6782 | |
6783 | /* If an error has already occurred, this is a no-op */ |
6784 | if( pCtx->rc!=SQLITE_OK ) return pCtx->rc; |
6785 | if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; |
6786 | |
6787 | if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED) ){ |
6788 | Fts5ExprTerm *pSyn; |
6789 | sqlite3_int64 nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1; |
6790 | pSyn = (Fts5ExprTerm*)sqlite3_malloc64(nByte); |
6791 | if( pSyn==0 ){ |
6792 | rc = SQLITE_NOMEM; |
6793 | }else{ |
6794 | memset(pSyn, 0, (size_t)nByte); |
6795 | pSyn->zTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer); |
6796 | memcpy(pSyn->zTerm, pToken, nToken); |
6797 | pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; |
6798 | pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; |
6799 | } |
6800 | }else{ |
6801 | Fts5ExprTerm *pTerm; |
6802 | if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){ |
6803 | Fts5ExprPhrase *pNew; |
6804 | int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); |
6805 | |
6806 | pNew = (Fts5ExprPhrase*)sqlite3_realloc64(pPhrase, |
6807 | sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew |
6808 | ); |
6809 | if( pNew==0 ){ |
6810 | rc = SQLITE_NOMEM; |
6811 | }else{ |
6812 | if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase)); |
6813 | pCtx->pPhrase = pPhrase = pNew; |
6814 | pNew->nTerm = nNew - SZALLOC; |
6815 | } |
6816 | } |
6817 | |
6818 | if( rc==SQLITE_OK ){ |
6819 | pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; |
6820 | memset(pTerm, 0, sizeof(Fts5ExprTerm)); |
6821 | pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); |
6822 | } |
6823 | } |
6824 | |
6825 | pCtx->rc = rc; |
6826 | return rc; |
6827 | } |
6828 | |
6829 | |
6830 | /* |
6831 | ** Free the phrase object passed as the only argument. |
6832 | */ |
6833 | static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){ |
6834 | fts5ExprPhraseFree(pPhrase); |
6835 | } |
6836 | |
6837 | /* |
6838 | ** Free the phrase object passed as the second argument. |
6839 | */ |
6840 | static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){ |
6841 | if( pNear ){ |
6842 | int i; |
6843 | for(i=0; i<pNear->nPhrase; i++){ |
6844 | fts5ExprPhraseFree(pNear->apPhrase[i]); |
6845 | } |
6846 | sqlite3_free(pNear->pColset); |
6847 | sqlite3_free(pNear); |
6848 | } |
6849 | } |
6850 | |
6851 | static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){ |
6852 | assert( pParse->pExpr==0 ); |
6853 | pParse->pExpr = p; |
6854 | } |
6855 | |
6856 | static int parseGrowPhraseArray(Fts5Parse *pParse){ |
6857 | if( (pParse->nPhrase % 8)==0 ){ |
6858 | sqlite3_int64 nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8); |
6859 | Fts5ExprPhrase **apNew; |
6860 | apNew = (Fts5ExprPhrase**)sqlite3_realloc64(pParse->apPhrase, nByte); |
6861 | if( apNew==0 ){ |
6862 | pParse->rc = SQLITE_NOMEM; |
6863 | return SQLITE_NOMEM; |
6864 | } |
6865 | pParse->apPhrase = apNew; |
6866 | } |
6867 | return SQLITE_OK; |
6868 | } |
6869 | |
6870 | /* |
6871 | ** This function is called by the parser to process a string token. The |
6872 | ** string may or may not be quoted. In any case it is tokenized and a |
6873 | ** phrase object consisting of all tokens returned. |
6874 | */ |
6875 | static Fts5ExprPhrase *sqlite3Fts5ParseTerm( |
6876 | Fts5Parse *pParse, /* Parse context */ |
6877 | Fts5ExprPhrase *pAppend, /* Phrase to append to */ |
6878 | Fts5Token *pToken, /* String to tokenize */ |
6879 | int bPrefix /* True if there is a trailing "*" */ |
6880 | ){ |
6881 | Fts5Config *pConfig = pParse->pConfig; |
6882 | TokenCtx sCtx; /* Context object passed to callback */ |
6883 | int rc; /* Tokenize return code */ |
6884 | char *z = 0; |
6885 | |
6886 | memset(&sCtx, 0, sizeof(TokenCtx)); |
6887 | sCtx.pPhrase = pAppend; |
6888 | |
6889 | rc = fts5ParseStringFromToken(pToken, &z); |
6890 | if( rc==SQLITE_OK ){ |
6891 | int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_PREFIX : 0); |
6892 | int n; |
6893 | sqlite3Fts5Dequote(z); |
6894 | n = (int)strlen(z); |
6895 | rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize); |
6896 | } |
6897 | sqlite3_free(z); |
6898 | if( rc || (rc = sCtx.rc) ){ |
6899 | pParse->rc = rc; |
6900 | fts5ExprPhraseFree(sCtx.pPhrase); |
6901 | sCtx.pPhrase = 0; |
6902 | }else{ |
6903 | |
6904 | if( pAppend==0 ){ |
6905 | if( parseGrowPhraseArray(pParse) ){ |
6906 | fts5ExprPhraseFree(sCtx.pPhrase); |
6907 | return 0; |
6908 | } |
6909 | pParse->nPhrase++; |
6910 | } |
6911 | |
6912 | if( sCtx.pPhrase==0 ){ |
6913 | /* This happens when parsing a token or quoted phrase that contains |
6914 | ** no token characters at all. (e.g ... MATCH '""'). */ |
6915 | sCtx.pPhrase = sqlite3Fts5MallocZero(&pParse->rc, sizeof(Fts5ExprPhrase)); |
6916 | }else if( sCtx.pPhrase->nTerm ){ |
6917 | sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = (u8)bPrefix; |
6918 | } |
6919 | pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase; |
6920 | } |
6921 | |
6922 | return sCtx.pPhrase; |
6923 | } |
6924 | |
6925 | /* |
6926 | ** Create a new FTS5 expression by cloning phrase iPhrase of the |
6927 | ** expression passed as the second argument. |
6928 | */ |
6929 | static int sqlite3Fts5ExprClonePhrase( |
6930 | Fts5Expr *pExpr, |
6931 | int iPhrase, |
6932 | Fts5Expr **ppNew |
6933 | ){ |
6934 | int rc = SQLITE_OK; /* Return code */ |
6935 | Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */ |
6936 | Fts5Expr *pNew = 0; /* Expression to return via *ppNew */ |
6937 | TokenCtx sCtx = {0,0}; /* Context object for fts5ParseTokenize */ |
6938 | |
6939 | pOrig = pExpr->apExprPhrase[iPhrase]; |
6940 | pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr)); |
6941 | if( rc==SQLITE_OK ){ |
6942 | pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, |
6943 | sizeof(Fts5ExprPhrase*)); |
6944 | } |
6945 | if( rc==SQLITE_OK ){ |
6946 | pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, |
6947 | sizeof(Fts5ExprNode)); |
6948 | } |
6949 | if( rc==SQLITE_OK ){ |
6950 | pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, |
6951 | sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*)); |
6952 | } |
6953 | if( rc==SQLITE_OK ){ |
6954 | Fts5Colset *pColsetOrig = pOrig->pNode->pNear->pColset; |
6955 | if( pColsetOrig ){ |
6956 | sqlite3_int64 nByte; |
6957 | Fts5Colset *pColset; |
6958 | nByte = sizeof(Fts5Colset) + (pColsetOrig->nCol-1) * sizeof(int); |
6959 | pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&rc, nByte); |
6960 | if( pColset ){ |
6961 | memcpy(pColset, pColsetOrig, (size_t)nByte); |
6962 | } |
6963 | pNew->pRoot->pNear->pColset = pColset; |
6964 | } |
6965 | } |
6966 | |
6967 | if( pOrig->nTerm ){ |
6968 | int i; /* Used to iterate through phrase terms */ |
6969 | for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){ |
6970 | int tflags = 0; |
6971 | Fts5ExprTerm *p; |
6972 | for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){ |
6973 | const char *zTerm = p->zTerm; |
6974 | rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, (int)strlen(zTerm), |
6975 | 0, 0); |
6976 | tflags = FTS5_TOKEN_COLOCATED; |
6977 | } |
6978 | if( rc==SQLITE_OK ){ |
6979 | sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; |
6980 | sCtx.pPhrase->aTerm[i].bFirst = pOrig->aTerm[i].bFirst; |
6981 | } |
6982 | } |
6983 | }else{ |
6984 | /* This happens when parsing a token or quoted phrase that contains |
6985 | ** no token characters at all. (e.g ... MATCH '""'). */ |
6986 | sCtx.pPhrase = sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprPhrase)); |
6987 | } |
6988 | |
6989 | if( rc==SQLITE_OK && ALWAYS(sCtx.pPhrase) ){ |
6990 | /* All the allocations succeeded. Put the expression object together. */ |
6991 | pNew->pIndex = pExpr->pIndex; |
6992 | pNew->pConfig = pExpr->pConfig; |
6993 | pNew->nPhrase = 1; |
6994 | pNew->apExprPhrase[0] = sCtx.pPhrase; |
6995 | pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase; |
6996 | pNew->pRoot->pNear->nPhrase = 1; |
6997 | sCtx.pPhrase->pNode = pNew->pRoot; |
6998 | |
6999 | if( pOrig->nTerm==1 |
7000 | && pOrig->aTerm[0].pSynonym==0 |
7001 | && pOrig->aTerm[0].bFirst==0 |
7002 | ){ |
7003 | pNew->pRoot->eType = FTS5_TERM; |
7004 | pNew->pRoot->xNext = fts5ExprNodeNext_TERM; |
7005 | }else{ |
7006 | pNew->pRoot->eType = FTS5_STRING; |
7007 | pNew->pRoot->xNext = fts5ExprNodeNext_STRING; |
7008 | } |
7009 | }else{ |
7010 | sqlite3Fts5ExprFree(pNew); |
7011 | fts5ExprPhraseFree(sCtx.pPhrase); |
7012 | pNew = 0; |
7013 | } |
7014 | |
7015 | *ppNew = pNew; |
7016 | return rc; |
7017 | } |
7018 | |
7019 | |
7020 | /* |
7021 | ** Token pTok has appeared in a MATCH expression where the NEAR operator |
7022 | ** is expected. If token pTok does not contain "NEAR", store an error |
7023 | ** in the pParse object. |
7024 | */ |
7025 | static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ |
7026 | if( pTok->n!=4 || memcmp("NEAR" , pTok->p, 4) ){ |
7027 | sqlite3Fts5ParseError( |
7028 | pParse, "fts5: syntax error near \"%.*s\"" , pTok->n, pTok->p |
7029 | ); |
7030 | } |
7031 | } |
7032 | |
7033 | static void sqlite3Fts5ParseSetDistance( |
7034 | Fts5Parse *pParse, |
7035 | Fts5ExprNearset *pNear, |
7036 | Fts5Token *p |
7037 | ){ |
7038 | if( pNear ){ |
7039 | int nNear = 0; |
7040 | int i; |
7041 | if( p->n ){ |
7042 | for(i=0; i<p->n; i++){ |
7043 | char c = (char)p->p[i]; |
7044 | if( c<'0' || c>'9' ){ |
7045 | sqlite3Fts5ParseError( |
7046 | pParse, "expected integer, got \"%.*s\"" , p->n, p->p |
7047 | ); |
7048 | return; |
7049 | } |
7050 | nNear = nNear * 10 + (p->p[i] - '0'); |
7051 | } |
7052 | }else{ |
7053 | nNear = FTS5_DEFAULT_NEARDIST; |
7054 | } |
7055 | pNear->nNear = nNear; |
7056 | } |
7057 | } |
7058 | |
7059 | /* |
7060 | ** The second argument passed to this function may be NULL, or it may be |
7061 | ** an existing Fts5Colset object. This function returns a pointer to |
7062 | ** a new colset object containing the contents of (p) with new value column |
7063 | ** number iCol appended. |
7064 | ** |
7065 | ** If an OOM error occurs, store an error code in pParse and return NULL. |
7066 | ** The old colset object (if any) is not freed in this case. |
7067 | */ |
7068 | static Fts5Colset *fts5ParseColset( |
7069 | Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ |
7070 | Fts5Colset *p, /* Existing colset object */ |
7071 | int iCol /* New column to add to colset object */ |
7072 | ){ |
7073 | int nCol = p ? p->nCol : 0; /* Num. columns already in colset object */ |
7074 | Fts5Colset *pNew; /* New colset object to return */ |
7075 | |
7076 | assert( pParse->rc==SQLITE_OK ); |
7077 | assert( iCol>=0 && iCol<pParse->pConfig->nCol ); |
7078 | |
7079 | pNew = sqlite3_realloc64(p, sizeof(Fts5Colset) + sizeof(int)*nCol); |
7080 | if( pNew==0 ){ |
7081 | pParse->rc = SQLITE_NOMEM; |
7082 | }else{ |
7083 | int *aiCol = pNew->aiCol; |
7084 | int i, j; |
7085 | for(i=0; i<nCol; i++){ |
7086 | if( aiCol[i]==iCol ) return pNew; |
7087 | if( aiCol[i]>iCol ) break; |
7088 | } |
7089 | for(j=nCol; j>i; j--){ |
7090 | aiCol[j] = aiCol[j-1]; |
7091 | } |
7092 | aiCol[i] = iCol; |
7093 | pNew->nCol = nCol+1; |
7094 | |
7095 | #ifndef NDEBUG |
7096 | /* Check that the array is in order and contains no duplicate entries. */ |
7097 | for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] ); |
7098 | #endif |
7099 | } |
7100 | |
7101 | return pNew; |
7102 | } |
7103 | |
7104 | /* |
7105 | ** Allocate and return an Fts5Colset object specifying the inverse of |
7106 | ** the colset passed as the second argument. Free the colset passed |
7107 | ** as the second argument before returning. |
7108 | */ |
7109 | static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse *pParse, Fts5Colset *p){ |
7110 | Fts5Colset *pRet; |
7111 | int nCol = pParse->pConfig->nCol; |
7112 | |
7113 | pRet = (Fts5Colset*)sqlite3Fts5MallocZero(&pParse->rc, |
7114 | sizeof(Fts5Colset) + sizeof(int)*nCol |
7115 | ); |
7116 | if( pRet ){ |
7117 | int i; |
7118 | int iOld = 0; |
7119 | for(i=0; i<nCol; i++){ |
7120 | if( iOld>=p->nCol || p->aiCol[iOld]!=i ){ |
7121 | pRet->aiCol[pRet->nCol++] = i; |
7122 | }else{ |
7123 | iOld++; |
7124 | } |
7125 | } |
7126 | } |
7127 | |
7128 | sqlite3_free(p); |
7129 | return pRet; |
7130 | } |
7131 | |
7132 | static Fts5Colset *sqlite3Fts5ParseColset( |
7133 | Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ |
7134 | Fts5Colset *pColset, /* Existing colset object */ |
7135 | Fts5Token *p |
7136 | ){ |
7137 | Fts5Colset *pRet = 0; |
7138 | int iCol; |
7139 | char *z; /* Dequoted copy of token p */ |
7140 | |
7141 | z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n); |
7142 | if( pParse->rc==SQLITE_OK ){ |
7143 | Fts5Config *pConfig = pParse->pConfig; |
7144 | sqlite3Fts5Dequote(z); |
7145 | for(iCol=0; iCol<pConfig->nCol; iCol++){ |
7146 | if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ) break; |
7147 | } |
7148 | if( iCol==pConfig->nCol ){ |
7149 | sqlite3Fts5ParseError(pParse, "no such column: %s" , z); |
7150 | }else{ |
7151 | pRet = fts5ParseColset(pParse, pColset, iCol); |
7152 | } |
7153 | sqlite3_free(z); |
7154 | } |
7155 | |
7156 | if( pRet==0 ){ |
7157 | assert( pParse->rc!=SQLITE_OK ); |
7158 | sqlite3_free(pColset); |
7159 | } |
7160 | |
7161 | return pRet; |
7162 | } |
7163 | |
7164 | /* |
7165 | ** If argument pOrig is NULL, or if (*pRc) is set to anything other than |
7166 | ** SQLITE_OK when this function is called, NULL is returned. |
7167 | ** |
7168 | ** Otherwise, a copy of (*pOrig) is made into memory obtained from |
7169 | ** sqlite3Fts5MallocZero() and a pointer to it returned. If the allocation |
7170 | ** fails, (*pRc) is set to SQLITE_NOMEM and NULL is returned. |
7171 | */ |
7172 | static Fts5Colset *fts5CloneColset(int *pRc, Fts5Colset *pOrig){ |
7173 | Fts5Colset *pRet; |
7174 | if( pOrig ){ |
7175 | sqlite3_int64 nByte = sizeof(Fts5Colset) + (pOrig->nCol-1) * sizeof(int); |
7176 | pRet = (Fts5Colset*)sqlite3Fts5MallocZero(pRc, nByte); |
7177 | if( pRet ){ |
7178 | memcpy(pRet, pOrig, (size_t)nByte); |
7179 | } |
7180 | }else{ |
7181 | pRet = 0; |
7182 | } |
7183 | return pRet; |
7184 | } |
7185 | |
7186 | /* |
7187 | ** Remove from colset pColset any columns that are not also in colset pMerge. |
7188 | */ |
7189 | static void fts5MergeColset(Fts5Colset *pColset, Fts5Colset *pMerge){ |
7190 | int iIn = 0; /* Next input in pColset */ |
7191 | int iMerge = 0; /* Next input in pMerge */ |
7192 | int iOut = 0; /* Next output slot in pColset */ |
7193 | |
7194 | while( iIn<pColset->nCol && iMerge<pMerge->nCol ){ |
7195 | int iDiff = pColset->aiCol[iIn] - pMerge->aiCol[iMerge]; |
7196 | if( iDiff==0 ){ |
7197 | pColset->aiCol[iOut++] = pMerge->aiCol[iMerge]; |
7198 | iMerge++; |
7199 | iIn++; |
7200 | }else if( iDiff>0 ){ |
7201 | iMerge++; |
7202 | }else{ |
7203 | iIn++; |
7204 | } |
7205 | } |
7206 | pColset->nCol = iOut; |
7207 | } |
7208 | |
7209 | /* |
7210 | ** Recursively apply colset pColset to expression node pNode and all of |
7211 | ** its decendents. If (*ppFree) is not NULL, it contains a spare copy |
7212 | ** of pColset. This function may use the spare copy and set (*ppFree) to |
7213 | ** zero, or it may create copies of pColset using fts5CloneColset(). |
7214 | */ |
7215 | static void fts5ParseSetColset( |
7216 | Fts5Parse *pParse, |
7217 | Fts5ExprNode *pNode, |
7218 | Fts5Colset *pColset, |
7219 | Fts5Colset **ppFree |
7220 | ){ |
7221 | if( pParse->rc==SQLITE_OK ){ |
7222 | assert( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING |
7223 | || pNode->eType==FTS5_AND || pNode->eType==FTS5_OR |
7224 | || pNode->eType==FTS5_NOT || pNode->eType==FTS5_EOF |
7225 | ); |
7226 | if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){ |
7227 | Fts5ExprNearset *pNear = pNode->pNear; |
7228 | if( pNear->pColset ){ |
7229 | fts5MergeColset(pNear->pColset, pColset); |
7230 | if( pNear->pColset->nCol==0 ){ |
7231 | pNode->eType = FTS5_EOF; |
7232 | pNode->xNext = 0; |
7233 | } |
7234 | }else if( *ppFree ){ |
7235 | pNear->pColset = pColset; |
7236 | *ppFree = 0; |
7237 | }else{ |
7238 | pNear->pColset = fts5CloneColset(&pParse->rc, pColset); |
7239 | } |
7240 | }else{ |
7241 | int i; |
7242 | assert( pNode->eType!=FTS5_EOF || pNode->nChild==0 ); |
7243 | for(i=0; i<pNode->nChild; i++){ |
7244 | fts5ParseSetColset(pParse, pNode->apChild[i], pColset, ppFree); |
7245 | } |
7246 | } |
7247 | } |
7248 | } |
7249 | |
7250 | /* |
7251 | ** Apply colset pColset to expression node pExpr and all of its descendents. |
7252 | */ |
7253 | static void sqlite3Fts5ParseSetColset( |
7254 | Fts5Parse *pParse, |
7255 | Fts5ExprNode *pExpr, |
7256 | Fts5Colset *pColset |
7257 | ){ |
7258 | Fts5Colset *pFree = pColset; |
7259 | if( pParse->pConfig->eDetail==FTS5_DETAIL_NONE ){ |
7260 | sqlite3Fts5ParseError(pParse, |
7261 | "fts5: column queries are not supported (detail=none)" |
7262 | ); |
7263 | }else{ |
7264 | fts5ParseSetColset(pParse, pExpr, pColset, &pFree); |
7265 | } |
7266 | sqlite3_free(pFree); |
7267 | } |
7268 | |
7269 | static void fts5ExprAssignXNext(Fts5ExprNode *pNode){ |
7270 | switch( pNode->eType ){ |
7271 | case FTS5_STRING: { |
7272 | Fts5ExprNearset *pNear = pNode->pNear; |
7273 | if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 |
7274 | && pNear->apPhrase[0]->aTerm[0].pSynonym==0 |
7275 | && pNear->apPhrase[0]->aTerm[0].bFirst==0 |
7276 | ){ |
7277 | pNode->eType = FTS5_TERM; |
7278 | pNode->xNext = fts5ExprNodeNext_TERM; |
7279 | }else{ |
7280 | pNode->xNext = fts5ExprNodeNext_STRING; |
7281 | } |
7282 | break; |
7283 | }; |
7284 | |
7285 | case FTS5_OR: { |
7286 | pNode->xNext = fts5ExprNodeNext_OR; |
7287 | break; |
7288 | }; |
7289 | |
7290 | case FTS5_AND: { |
7291 | pNode->xNext = fts5ExprNodeNext_AND; |
7292 | break; |
7293 | }; |
7294 | |
7295 | default: assert( pNode->eType==FTS5_NOT ); { |
7296 | pNode->xNext = fts5ExprNodeNext_NOT; |
7297 | break; |
7298 | }; |
7299 | } |
7300 | } |
7301 | |
7302 | static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){ |
7303 | if( p->eType!=FTS5_NOT && pSub->eType==p->eType ){ |
7304 | int nByte = sizeof(Fts5ExprNode*) * pSub->nChild; |
7305 | memcpy(&p->apChild[p->nChild], pSub->apChild, nByte); |
7306 | p->nChild += pSub->nChild; |
7307 | sqlite3_free(pSub); |
7308 | }else{ |
7309 | p->apChild[p->nChild++] = pSub; |
7310 | } |
7311 | } |
7312 | |
7313 | /* |
7314 | ** This function is used when parsing LIKE or GLOB patterns against |
7315 | ** trigram indexes that specify either detail=column or detail=none. |
7316 | ** It converts a phrase: |
7317 | ** |
7318 | ** abc + def + ghi |
7319 | ** |
7320 | ** into an AND tree: |
7321 | ** |
7322 | ** abc AND def AND ghi |
7323 | */ |
7324 | static Fts5ExprNode *fts5ParsePhraseToAnd( |
7325 | Fts5Parse *pParse, |
7326 | Fts5ExprNearset *pNear |
7327 | ){ |
7328 | int nTerm = pNear->apPhrase[0]->nTerm; |
7329 | int ii; |
7330 | int nByte; |
7331 | Fts5ExprNode *pRet; |
7332 | |
7333 | assert( pNear->nPhrase==1 ); |
7334 | assert( pParse->bPhraseToAnd ); |
7335 | |
7336 | nByte = sizeof(Fts5ExprNode) + nTerm*sizeof(Fts5ExprNode*); |
7337 | pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte); |
7338 | if( pRet ){ |
7339 | pRet->eType = FTS5_AND; |
7340 | pRet->nChild = nTerm; |
7341 | fts5ExprAssignXNext(pRet); |
7342 | pParse->nPhrase--; |
7343 | for(ii=0; ii<nTerm; ii++){ |
7344 | Fts5ExprPhrase *pPhrase = (Fts5ExprPhrase*)sqlite3Fts5MallocZero( |
7345 | &pParse->rc, sizeof(Fts5ExprPhrase) |
7346 | ); |
7347 | if( pPhrase ){ |
7348 | if( parseGrowPhraseArray(pParse) ){ |
7349 | fts5ExprPhraseFree(pPhrase); |
7350 | }else{ |
7351 | pParse->apPhrase[pParse->nPhrase++] = pPhrase; |
7352 | pPhrase->nTerm = 1; |
7353 | pPhrase->aTerm[0].zTerm = sqlite3Fts5Strndup( |
7354 | &pParse->rc, pNear->apPhrase[0]->aTerm[ii].zTerm, -1 |
7355 | ); |
7356 | pRet->apChild[ii] = sqlite3Fts5ParseNode(pParse, FTS5_STRING, |
7357 | 0, 0, sqlite3Fts5ParseNearset(pParse, 0, pPhrase) |
7358 | ); |
7359 | } |
7360 | } |
7361 | } |
7362 | |
7363 | if( pParse->rc ){ |
7364 | sqlite3Fts5ParseNodeFree(pRet); |
7365 | pRet = 0; |
7366 | }else{ |
7367 | sqlite3Fts5ParseNearsetFree(pNear); |
7368 | } |
7369 | } |
7370 | |
7371 | return pRet; |
7372 | } |
7373 | |
7374 | /* |
7375 | ** Allocate and return a new expression object. If anything goes wrong (i.e. |
7376 | ** OOM error), leave an error code in pParse and return NULL. |
7377 | */ |
7378 | static Fts5ExprNode *sqlite3Fts5ParseNode( |
7379 | Fts5Parse *pParse, /* Parse context */ |
7380 | int eType, /* FTS5_STRING, AND, OR or NOT */ |
7381 | Fts5ExprNode *pLeft, /* Left hand child expression */ |
7382 | Fts5ExprNode *pRight, /* Right hand child expression */ |
7383 | Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */ |
7384 | ){ |
7385 | Fts5ExprNode *pRet = 0; |
7386 | |
7387 | if( pParse->rc==SQLITE_OK ){ |
7388 | int nChild = 0; /* Number of children of returned node */ |
7389 | sqlite3_int64 nByte; /* Bytes of space to allocate for this node */ |
7390 | |
7391 | assert( (eType!=FTS5_STRING && !pNear) |
7392 | || (eType==FTS5_STRING && !pLeft && !pRight) |
7393 | ); |
7394 | if( eType==FTS5_STRING && pNear==0 ) return 0; |
7395 | if( eType!=FTS5_STRING && pLeft==0 ) return pRight; |
7396 | if( eType!=FTS5_STRING && pRight==0 ) return pLeft; |
7397 | |
7398 | if( eType==FTS5_STRING |
7399 | && pParse->bPhraseToAnd |
7400 | && pNear->apPhrase[0]->nTerm>1 |
7401 | ){ |
7402 | pRet = fts5ParsePhraseToAnd(pParse, pNear); |
7403 | }else{ |
7404 | if( eType==FTS5_NOT ){ |
7405 | nChild = 2; |
7406 | }else if( eType==FTS5_AND || eType==FTS5_OR ){ |
7407 | nChild = 2; |
7408 | if( pLeft->eType==eType ) nChild += pLeft->nChild-1; |
7409 | if( pRight->eType==eType ) nChild += pRight->nChild-1; |
7410 | } |
7411 | |
7412 | nByte = sizeof(Fts5ExprNode) + sizeof(Fts5ExprNode*)*(nChild-1); |
7413 | pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte); |
7414 | |
7415 | if( pRet ){ |
7416 | pRet->eType = eType; |
7417 | pRet->pNear = pNear; |
7418 | fts5ExprAssignXNext(pRet); |
7419 | if( eType==FTS5_STRING ){ |
7420 | int iPhrase; |
7421 | for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){ |
7422 | pNear->apPhrase[iPhrase]->pNode = pRet; |
7423 | if( pNear->apPhrase[iPhrase]->nTerm==0 ){ |
7424 | pRet->xNext = 0; |
7425 | pRet->eType = FTS5_EOF; |
7426 | } |
7427 | } |
7428 | |
7429 | if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL ){ |
7430 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; |
7431 | if( pNear->nPhrase!=1 |
7432 | || pPhrase->nTerm>1 |
7433 | || (pPhrase->nTerm>0 && pPhrase->aTerm[0].bFirst) |
7434 | ){ |
7435 | sqlite3Fts5ParseError(pParse, |
7436 | "fts5: %s queries are not supported (detail!=full)" , |
7437 | pNear->nPhrase==1 ? "phrase" : "NEAR" |
7438 | ); |
7439 | sqlite3_free(pRet); |
7440 | pRet = 0; |
7441 | } |
7442 | } |
7443 | }else{ |
7444 | fts5ExprAddChildren(pRet, pLeft); |
7445 | fts5ExprAddChildren(pRet, pRight); |
7446 | } |
7447 | } |
7448 | } |
7449 | } |
7450 | |
7451 | if( pRet==0 ){ |
7452 | assert( pParse->rc!=SQLITE_OK ); |
7453 | sqlite3Fts5ParseNodeFree(pLeft); |
7454 | sqlite3Fts5ParseNodeFree(pRight); |
7455 | sqlite3Fts5ParseNearsetFree(pNear); |
7456 | } |
7457 | return pRet; |
7458 | } |
7459 | |
7460 | static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd( |
7461 | Fts5Parse *pParse, /* Parse context */ |
7462 | Fts5ExprNode *pLeft, /* Left hand child expression */ |
7463 | Fts5ExprNode *pRight /* Right hand child expression */ |
7464 | ){ |
7465 | Fts5ExprNode *pRet = 0; |
7466 | Fts5ExprNode *pPrev; |
7467 | |
7468 | if( pParse->rc ){ |
7469 | sqlite3Fts5ParseNodeFree(pLeft); |
7470 | sqlite3Fts5ParseNodeFree(pRight); |
7471 | }else{ |
7472 | |
7473 | assert( pLeft->eType==FTS5_STRING |
7474 | || pLeft->eType==FTS5_TERM |
7475 | || pLeft->eType==FTS5_EOF |
7476 | || pLeft->eType==FTS5_AND |
7477 | ); |
7478 | assert( pRight->eType==FTS5_STRING |
7479 | || pRight->eType==FTS5_TERM |
7480 | || pRight->eType==FTS5_EOF |
7481 | ); |
7482 | |
7483 | if( pLeft->eType==FTS5_AND ){ |
7484 | pPrev = pLeft->apChild[pLeft->nChild-1]; |
7485 | }else{ |
7486 | pPrev = pLeft; |
7487 | } |
7488 | assert( pPrev->eType==FTS5_STRING |
7489 | || pPrev->eType==FTS5_TERM |
7490 | || pPrev->eType==FTS5_EOF |
7491 | ); |
7492 | |
7493 | if( pRight->eType==FTS5_EOF ){ |
7494 | assert( pParse->apPhrase[pParse->nPhrase-1]==pRight->pNear->apPhrase[0] ); |
7495 | sqlite3Fts5ParseNodeFree(pRight); |
7496 | pRet = pLeft; |
7497 | pParse->nPhrase--; |
7498 | } |
7499 | else if( pPrev->eType==FTS5_EOF ){ |
7500 | Fts5ExprPhrase **ap; |
7501 | |
7502 | if( pPrev==pLeft ){ |
7503 | pRet = pRight; |
7504 | }else{ |
7505 | pLeft->apChild[pLeft->nChild-1] = pRight; |
7506 | pRet = pLeft; |
7507 | } |
7508 | |
7509 | ap = &pParse->apPhrase[pParse->nPhrase-1-pRight->pNear->nPhrase]; |
7510 | assert( ap[0]==pPrev->pNear->apPhrase[0] ); |
7511 | memmove(ap, &ap[1], sizeof(Fts5ExprPhrase*)*pRight->pNear->nPhrase); |
7512 | pParse->nPhrase--; |
7513 | |
7514 | sqlite3Fts5ParseNodeFree(pPrev); |
7515 | } |
7516 | else{ |
7517 | pRet = sqlite3Fts5ParseNode(pParse, FTS5_AND, pLeft, pRight, 0); |
7518 | } |
7519 | } |
7520 | |
7521 | return pRet; |
7522 | } |
7523 | |
7524 | #ifdef SQLITE_TEST |
7525 | static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ |
7526 | sqlite3_int64 nByte = 0; |
7527 | Fts5ExprTerm *p; |
7528 | char *zQuoted; |
7529 | |
7530 | /* Determine the maximum amount of space required. */ |
7531 | for(p=pTerm; p; p=p->pSynonym){ |
7532 | nByte += (int)strlen(pTerm->zTerm) * 2 + 3 + 2; |
7533 | } |
7534 | zQuoted = sqlite3_malloc64(nByte); |
7535 | |
7536 | if( zQuoted ){ |
7537 | int i = 0; |
7538 | for(p=pTerm; p; p=p->pSynonym){ |
7539 | char *zIn = p->zTerm; |
7540 | zQuoted[i++] = '"'; |
7541 | while( *zIn ){ |
7542 | if( *zIn=='"' ) zQuoted[i++] = '"'; |
7543 | zQuoted[i++] = *zIn++; |
7544 | } |
7545 | zQuoted[i++] = '"'; |
7546 | if( p->pSynonym ) zQuoted[i++] = '|'; |
7547 | } |
7548 | if( pTerm->bPrefix ){ |
7549 | zQuoted[i++] = ' '; |
7550 | zQuoted[i++] = '*'; |
7551 | } |
7552 | zQuoted[i++] = '\0'; |
7553 | } |
7554 | return zQuoted; |
7555 | } |
7556 | |
7557 | static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){ |
7558 | char *zNew; |
7559 | va_list ap; |
7560 | va_start(ap, zFmt); |
7561 | zNew = sqlite3_vmprintf(zFmt, ap); |
7562 | va_end(ap); |
7563 | if( zApp && zNew ){ |
7564 | char *zNew2 = sqlite3_mprintf("%s%s" , zApp, zNew); |
7565 | sqlite3_free(zNew); |
7566 | zNew = zNew2; |
7567 | } |
7568 | sqlite3_free(zApp); |
7569 | return zNew; |
7570 | } |
7571 | |
7572 | /* |
7573 | ** Compose a tcl-readable representation of expression pExpr. Return a |
7574 | ** pointer to a buffer containing that representation. It is the |
7575 | ** responsibility of the caller to at some point free the buffer using |
7576 | ** sqlite3_free(). |
7577 | */ |
7578 | static char *fts5ExprPrintTcl( |
7579 | Fts5Config *pConfig, |
7580 | const char *zNearsetCmd, |
7581 | Fts5ExprNode *pExpr |
7582 | ){ |
7583 | char *zRet = 0; |
7584 | if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){ |
7585 | Fts5ExprNearset *pNear = pExpr->pNear; |
7586 | int i; |
7587 | int iTerm; |
7588 | |
7589 | zRet = fts5PrintfAppend(zRet, "%s " , zNearsetCmd); |
7590 | if( zRet==0 ) return 0; |
7591 | if( pNear->pColset ){ |
7592 | int *aiCol = pNear->pColset->aiCol; |
7593 | int nCol = pNear->pColset->nCol; |
7594 | if( nCol==1 ){ |
7595 | zRet = fts5PrintfAppend(zRet, "-col %d " , aiCol[0]); |
7596 | }else{ |
7597 | zRet = fts5PrintfAppend(zRet, "-col {%d" , aiCol[0]); |
7598 | for(i=1; i<pNear->pColset->nCol; i++){ |
7599 | zRet = fts5PrintfAppend(zRet, " %d" , aiCol[i]); |
7600 | } |
7601 | zRet = fts5PrintfAppend(zRet, "} " ); |
7602 | } |
7603 | if( zRet==0 ) return 0; |
7604 | } |
7605 | |
7606 | if( pNear->nPhrase>1 ){ |
7607 | zRet = fts5PrintfAppend(zRet, "-near %d " , pNear->nNear); |
7608 | if( zRet==0 ) return 0; |
7609 | } |
7610 | |
7611 | zRet = fts5PrintfAppend(zRet, "--" ); |
7612 | if( zRet==0 ) return 0; |
7613 | |
7614 | for(i=0; i<pNear->nPhrase; i++){ |
7615 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; |
7616 | |
7617 | zRet = fts5PrintfAppend(zRet, " {" ); |
7618 | for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){ |
7619 | char *zTerm = pPhrase->aTerm[iTerm].zTerm; |
7620 | zRet = fts5PrintfAppend(zRet, "%s%s" , iTerm==0?"" :" " , zTerm); |
7621 | if( pPhrase->aTerm[iTerm].bPrefix ){ |
7622 | zRet = fts5PrintfAppend(zRet, "*" ); |
7623 | } |
7624 | } |
7625 | |
7626 | if( zRet ) zRet = fts5PrintfAppend(zRet, "}" ); |
7627 | if( zRet==0 ) return 0; |
7628 | } |
7629 | |
7630 | }else{ |
7631 | char const *zOp = 0; |
7632 | int i; |
7633 | switch( pExpr->eType ){ |
7634 | case FTS5_AND: zOp = "AND" ; break; |
7635 | case FTS5_NOT: zOp = "NOT" ; break; |
7636 | default: |
7637 | assert( pExpr->eType==FTS5_OR ); |
7638 | zOp = "OR" ; |
7639 | break; |
7640 | } |
7641 | |
7642 | zRet = sqlite3_mprintf("%s" , zOp); |
7643 | for(i=0; zRet && i<pExpr->nChild; i++){ |
7644 | char *z = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->apChild[i]); |
7645 | if( !z ){ |
7646 | sqlite3_free(zRet); |
7647 | zRet = 0; |
7648 | }else{ |
7649 | zRet = fts5PrintfAppend(zRet, " [%z]" , z); |
7650 | } |
7651 | } |
7652 | } |
7653 | |
7654 | return zRet; |
7655 | } |
7656 | |
7657 | static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ |
7658 | char *zRet = 0; |
7659 | if( pExpr->eType==0 ){ |
7660 | return sqlite3_mprintf("\"\"" ); |
7661 | }else |
7662 | if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){ |
7663 | Fts5ExprNearset *pNear = pExpr->pNear; |
7664 | int i; |
7665 | int iTerm; |
7666 | |
7667 | if( pNear->pColset ){ |
7668 | int ii; |
7669 | Fts5Colset *pColset = pNear->pColset; |
7670 | if( pColset->nCol>1 ) zRet = fts5PrintfAppend(zRet, "{" ); |
7671 | for(ii=0; ii<pColset->nCol; ii++){ |
7672 | zRet = fts5PrintfAppend(zRet, "%s%s" , |
7673 | pConfig->azCol[pColset->aiCol[ii]], ii==pColset->nCol-1 ? "" : " " |
7674 | ); |
7675 | } |
7676 | if( zRet ){ |
7677 | zRet = fts5PrintfAppend(zRet, "%s : " , pColset->nCol>1 ? "}" : "" ); |
7678 | } |
7679 | if( zRet==0 ) return 0; |
7680 | } |
7681 | |
7682 | if( pNear->nPhrase>1 ){ |
7683 | zRet = fts5PrintfAppend(zRet, "NEAR(" ); |
7684 | if( zRet==0 ) return 0; |
7685 | } |
7686 | |
7687 | for(i=0; i<pNear->nPhrase; i++){ |
7688 | Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; |
7689 | if( i!=0 ){ |
7690 | zRet = fts5PrintfAppend(zRet, " " ); |
7691 | if( zRet==0 ) return 0; |
7692 | } |
7693 | for(iTerm=0; iTerm<pPhrase->nTerm; iTerm++){ |
7694 | char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]); |
7695 | if( zTerm ){ |
7696 | zRet = fts5PrintfAppend(zRet, "%s%s" , iTerm==0?"" :" + " , zTerm); |
7697 | sqlite3_free(zTerm); |
7698 | } |
7699 | if( zTerm==0 || zRet==0 ){ |
7700 | sqlite3_free(zRet); |
7701 | return 0; |
7702 | } |
7703 | } |
7704 | } |
7705 | |
7706 | if( pNear->nPhrase>1 ){ |
7707 | zRet = fts5PrintfAppend(zRet, ", %d)" , pNear->nNear); |
7708 | if( zRet==0 ) return 0; |
7709 | } |
7710 | |
7711 | }else{ |
7712 | char const *zOp = 0; |
7713 | int i; |
7714 | |
7715 | switch( pExpr->eType ){ |
7716 | case FTS5_AND: zOp = " AND " ; break; |
7717 | case FTS5_NOT: zOp = " NOT " ; break; |
7718 | default: |
7719 | assert( pExpr->eType==FTS5_OR ); |
7720 | zOp = " OR " ; |
7721 | break; |
7722 | } |
7723 | |
7724 | for(i=0; i<pExpr->nChild; i++){ |
7725 | char *z = fts5ExprPrint(pConfig, pExpr->apChild[i]); |
7726 | if( z==0 ){ |
7727 | sqlite3_free(zRet); |
7728 | zRet = 0; |
7729 | }else{ |
7730 | int e = pExpr->apChild[i]->eType; |
7731 | int b = (e!=FTS5_STRING && e!=FTS5_TERM && e!=FTS5_EOF); |
7732 | zRet = fts5PrintfAppend(zRet, "%s%s%z%s" , |
7733 | (i==0 ? "" : zOp), |
7734 | (b?"(" :"" ), z, (b?")" :"" ) |
7735 | ); |
7736 | } |
7737 | if( zRet==0 ) break; |
7738 | } |
7739 | } |
7740 | |
7741 | return zRet; |
7742 | } |
7743 | |
7744 | /* |
7745 | ** The implementation of user-defined scalar functions fts5_expr() (bTcl==0) |
7746 | ** and fts5_expr_tcl() (bTcl!=0). |
7747 | */ |
7748 | static void fts5ExprFunction( |
7749 | sqlite3_context *pCtx, /* Function call context */ |
7750 | int nArg, /* Number of args */ |
7751 | sqlite3_value **apVal, /* Function arguments */ |
7752 | int bTcl |
7753 | ){ |
7754 | Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx); |
7755 | sqlite3 *db = sqlite3_context_db_handle(pCtx); |
7756 | const char *zExpr = 0; |
7757 | char *zErr = 0; |
7758 | Fts5Expr *pExpr = 0; |
7759 | int rc; |
7760 | int i; |
7761 | |
7762 | const char **azConfig; /* Array of arguments for Fts5Config */ |
7763 | const char *zNearsetCmd = "nearset" ; |
7764 | int nConfig; /* Size of azConfig[] */ |
7765 | Fts5Config *pConfig = 0; |
7766 | int iArg = 1; |
7767 | |
7768 | if( nArg<1 ){ |
7769 | zErr = sqlite3_mprintf("wrong number of arguments to function %s" , |
7770 | bTcl ? "fts5_expr_tcl" : "fts5_expr" |
7771 | ); |
7772 | sqlite3_result_error(pCtx, zErr, -1); |
7773 | sqlite3_free(zErr); |
7774 | return; |
7775 | } |
7776 | |
7777 | if( bTcl && nArg>1 ){ |
7778 | zNearsetCmd = (const char*)sqlite3_value_text(apVal[1]); |
7779 | iArg = 2; |
7780 | } |
7781 | |
7782 | nConfig = 3 + (nArg-iArg); |
7783 | azConfig = (const char**)sqlite3_malloc64(sizeof(char*) * nConfig); |
7784 | if( azConfig==0 ){ |
7785 | sqlite3_result_error_nomem(pCtx); |
7786 | return; |
7787 | } |
7788 | azConfig[0] = 0; |
7789 | azConfig[1] = "main" ; |
7790 | azConfig[2] = "tbl" ; |
7791 | for(i=3; iArg<nArg; iArg++){ |
7792 | const char *z = (const char*)sqlite3_value_text(apVal[iArg]); |
7793 | azConfig[i++] = (z ? z : "" ); |
7794 | } |
7795 | |
7796 | zExpr = (const char*)sqlite3_value_text(apVal[0]); |
7797 | if( zExpr==0 ) zExpr = "" ; |
7798 | |
7799 | rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr); |
7800 | if( rc==SQLITE_OK ){ |
7801 | rc = sqlite3Fts5ExprNew(pConfig, 0, pConfig->nCol, zExpr, &pExpr, &zErr); |
7802 | } |
7803 | if( rc==SQLITE_OK ){ |
7804 | char *zText; |
7805 | if( pExpr->pRoot->xNext==0 ){ |
7806 | zText = sqlite3_mprintf("" ); |
7807 | }else if( bTcl ){ |
7808 | zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot); |
7809 | }else{ |
7810 | zText = fts5ExprPrint(pConfig, pExpr->pRoot); |
7811 | } |
7812 | if( zText==0 ){ |
7813 | rc = SQLITE_NOMEM; |
7814 | }else{ |
7815 | sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT); |
7816 | sqlite3_free(zText); |
7817 | } |
7818 | } |
7819 | |
7820 | if( rc!=SQLITE_OK ){ |
7821 | if( zErr ){ |
7822 | sqlite3_result_error(pCtx, zErr, -1); |
7823 | sqlite3_free(zErr); |
7824 | }else{ |
7825 | sqlite3_result_error_code(pCtx, rc); |
7826 | } |
7827 | } |
7828 | sqlite3_free((void *)azConfig); |
7829 | sqlite3Fts5ConfigFree(pConfig); |
7830 | sqlite3Fts5ExprFree(pExpr); |
7831 | } |
7832 | |
7833 | static void fts5ExprFunctionHr( |
7834 | sqlite3_context *pCtx, /* Function call context */ |
7835 | int nArg, /* Number of args */ |
7836 | sqlite3_value **apVal /* Function arguments */ |
7837 | ){ |
7838 | fts5ExprFunction(pCtx, nArg, apVal, 0); |
7839 | } |
7840 | static void fts5ExprFunctionTcl( |
7841 | sqlite3_context *pCtx, /* Function call context */ |
7842 | int nArg, /* Number of args */ |
7843 | sqlite3_value **apVal /* Function arguments */ |
7844 | ){ |
7845 | fts5ExprFunction(pCtx, nArg, apVal, 1); |
7846 | } |
7847 | |
7848 | /* |
7849 | ** The implementation of an SQLite user-defined-function that accepts a |
7850 | ** single integer as an argument. If the integer is an alpha-numeric |
7851 | ** unicode code point, 1 is returned. Otherwise 0. |
7852 | */ |
7853 | static void fts5ExprIsAlnum( |
7854 | sqlite3_context *pCtx, /* Function call context */ |
7855 | int nArg, /* Number of args */ |
7856 | sqlite3_value **apVal /* Function arguments */ |
7857 | ){ |
7858 | int iCode; |
7859 | u8 aArr[32]; |
7860 | if( nArg!=1 ){ |
7861 | sqlite3_result_error(pCtx, |
7862 | "wrong number of arguments to function fts5_isalnum" , -1 |
7863 | ); |
7864 | return; |
7865 | } |
7866 | memset(aArr, 0, sizeof(aArr)); |
7867 | sqlite3Fts5UnicodeCatParse("L*" , aArr); |
7868 | sqlite3Fts5UnicodeCatParse("N*" , aArr); |
7869 | sqlite3Fts5UnicodeCatParse("Co" , aArr); |
7870 | iCode = sqlite3_value_int(apVal[0]); |
7871 | sqlite3_result_int(pCtx, aArr[sqlite3Fts5UnicodeCategory((u32)iCode)]); |
7872 | } |
7873 | |
7874 | static void fts5ExprFold( |
7875 | sqlite3_context *pCtx, /* Function call context */ |
7876 | int nArg, /* Number of args */ |
7877 | sqlite3_value **apVal /* Function arguments */ |
7878 | ){ |
7879 | if( nArg!=1 && nArg!=2 ){ |
7880 | sqlite3_result_error(pCtx, |
7881 | "wrong number of arguments to function fts5_fold" , -1 |
7882 | ); |
7883 | }else{ |
7884 | int iCode; |
7885 | int bRemoveDiacritics = 0; |
7886 | iCode = sqlite3_value_int(apVal[0]); |
7887 | if( nArg==2 ) bRemoveDiacritics = sqlite3_value_int(apVal[1]); |
7888 | sqlite3_result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics)); |
7889 | } |
7890 | } |
7891 | #endif /* ifdef SQLITE_TEST */ |
7892 | |
7893 | /* |
7894 | ** This is called during initialization to register the fts5_expr() scalar |
7895 | ** UDF with the SQLite handle passed as the only argument. |
7896 | */ |
7897 | static int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){ |
7898 | #ifdef SQLITE_TEST |
7899 | struct Fts5ExprFunc { |
7900 | const char *z; |
7901 | void (*x)(sqlite3_context*,int,sqlite3_value**); |
7902 | } aFunc[] = { |
7903 | { "fts5_expr" , fts5ExprFunctionHr }, |
7904 | { "fts5_expr_tcl" , fts5ExprFunctionTcl }, |
7905 | { "fts5_isalnum" , fts5ExprIsAlnum }, |
7906 | { "fts5_fold" , fts5ExprFold }, |
7907 | }; |
7908 | int i; |
7909 | int rc = SQLITE_OK; |
7910 | void *pCtx = (void*)pGlobal; |
7911 | |
7912 | for(i=0; rc==SQLITE_OK && i<ArraySize(aFunc); i++){ |
7913 | struct Fts5ExprFunc *p = &aFunc[i]; |
7914 | rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, pCtx, p->x, 0, 0); |
7915 | } |
7916 | #else |
7917 | int rc = SQLITE_OK; |
7918 | UNUSED_PARAM2(pGlobal,db); |
7919 | #endif |
7920 | |
7921 | /* Avoid warnings indicating that sqlite3Fts5ParserTrace() and |
7922 | ** sqlite3Fts5ParserFallback() are unused */ |
7923 | #ifndef NDEBUG |
7924 | (void)sqlite3Fts5ParserTrace; |
7925 | #endif |
7926 | (void)sqlite3Fts5ParserFallback; |
7927 | |
7928 | return rc; |
7929 | } |
7930 | |
7931 | /* |
7932 | ** Return the number of phrases in expression pExpr. |
7933 | */ |
7934 | static int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){ |
7935 | return (pExpr ? pExpr->nPhrase : 0); |
7936 | } |
7937 | |
7938 | /* |
7939 | ** Return the number of terms in the iPhrase'th phrase in pExpr. |
7940 | */ |
7941 | static int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){ |
7942 | if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0; |
7943 | return pExpr->apExprPhrase[iPhrase]->nTerm; |
7944 | } |
7945 | |
7946 | /* |
7947 | ** This function is used to access the current position list for phrase |
7948 | ** iPhrase. |
7949 | */ |
7950 | static int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){ |
7951 | int nRet; |
7952 | Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; |
7953 | Fts5ExprNode *pNode = pPhrase->pNode; |
7954 | if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){ |
7955 | *pa = pPhrase->poslist.p; |
7956 | nRet = pPhrase->poslist.n; |
7957 | }else{ |
7958 | *pa = 0; |
7959 | nRet = 0; |
7960 | } |
7961 | return nRet; |
7962 | } |
7963 | |
7964 | struct Fts5PoslistPopulator { |
7965 | Fts5PoslistWriter writer; |
7966 | int bOk; /* True if ok to populate */ |
7967 | int bMiss; |
7968 | }; |
7969 | |
7970 | /* |
7971 | ** Clear the position lists associated with all phrases in the expression |
7972 | ** passed as the first argument. Argument bLive is true if the expression |
7973 | ** might be pointing to a real entry, otherwise it has just been reset. |
7974 | ** |
7975 | ** At present this function is only used for detail=col and detail=none |
7976 | ** fts5 tables. This implies that all phrases must be at most 1 token |
7977 | ** in size, as phrase matches are not supported without detail=full. |
7978 | */ |
7979 | static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr *pExpr, int bLive){ |
7980 | Fts5PoslistPopulator *pRet; |
7981 | pRet = sqlite3_malloc64(sizeof(Fts5PoslistPopulator)*pExpr->nPhrase); |
7982 | if( pRet ){ |
7983 | int i; |
7984 | memset(pRet, 0, sizeof(Fts5PoslistPopulator)*pExpr->nPhrase); |
7985 | for(i=0; i<pExpr->nPhrase; i++){ |
7986 | Fts5Buffer *pBuf = &pExpr->apExprPhrase[i]->poslist; |
7987 | Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode; |
7988 | assert( pExpr->apExprPhrase[i]->nTerm<=1 ); |
7989 | if( bLive && |
7990 | (pBuf->n==0 || pNode->iRowid!=pExpr->pRoot->iRowid || pNode->bEof) |
7991 | ){ |
7992 | pRet[i].bMiss = 1; |
7993 | }else{ |
7994 | pBuf->n = 0; |
7995 | } |
7996 | } |
7997 | } |
7998 | return pRet; |
7999 | } |
8000 | |
8001 | struct Fts5ExprCtx { |
8002 | Fts5Expr *pExpr; |
8003 | Fts5PoslistPopulator *aPopulator; |
8004 | i64 iOff; |
8005 | }; |
8006 | typedef struct Fts5ExprCtx Fts5ExprCtx; |
8007 | |
8008 | /* |
8009 | ** TODO: Make this more efficient! |
8010 | */ |
8011 | static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){ |
8012 | int i; |
8013 | for(i=0; i<pColset->nCol; i++){ |
8014 | if( pColset->aiCol[i]==iCol ) return 1; |
8015 | } |
8016 | return 0; |
8017 | } |
8018 | |
8019 | static int fts5ExprPopulatePoslistsCb( |
8020 | void *pCtx, /* Copy of 2nd argument to xTokenize() */ |
8021 | int tflags, /* Mask of FTS5_TOKEN_* flags */ |
8022 | const char *pToken, /* Pointer to buffer containing token */ |
8023 | int nToken, /* Size of token in bytes */ |
8024 | int iUnused1, /* Byte offset of token within input text */ |
8025 | int iUnused2 /* Byte offset of end of token within input text */ |
8026 | ){ |
8027 | Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx; |
8028 | Fts5Expr *pExpr = p->pExpr; |
8029 | int i; |
8030 | |
8031 | UNUSED_PARAM2(iUnused1, iUnused2); |
8032 | |
8033 | if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; |
8034 | if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++; |
8035 | for(i=0; i<pExpr->nPhrase; i++){ |
8036 | Fts5ExprTerm *pTerm; |
8037 | if( p->aPopulator[i].bOk==0 ) continue; |
8038 | for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ |
8039 | int nTerm = (int)strlen(pTerm->zTerm); |
8040 | if( (nTerm==nToken || (nTerm<nToken && pTerm->bPrefix)) |
8041 | && memcmp(pTerm->zTerm, pToken, nTerm)==0 |
8042 | ){ |
8043 | int rc = sqlite3Fts5PoslistWriterAppend( |
8044 | &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff |
8045 | ); |
8046 | if( rc ) return rc; |
8047 | break; |
8048 | } |
8049 | } |
8050 | } |
8051 | return SQLITE_OK; |
8052 | } |
8053 | |
8054 | static int sqlite3Fts5ExprPopulatePoslists( |
8055 | Fts5Config *pConfig, |
8056 | Fts5Expr *pExpr, |
8057 | Fts5PoslistPopulator *aPopulator, |
8058 | int iCol, |
8059 | const char *z, int n |
8060 | ){ |
8061 | int i; |
8062 | Fts5ExprCtx sCtx; |
8063 | sCtx.pExpr = pExpr; |
8064 | sCtx.aPopulator = aPopulator; |
8065 | sCtx.iOff = (((i64)iCol) << 32) - 1; |
8066 | |
8067 | for(i=0; i<pExpr->nPhrase; i++){ |
8068 | Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode; |
8069 | Fts5Colset *pColset = pNode->pNear->pColset; |
8070 | if( (pColset && 0==fts5ExprColsetTest(pColset, iCol)) |
8071 | || aPopulator[i].bMiss |
8072 | ){ |
8073 | aPopulator[i].bOk = 0; |
8074 | }else{ |
8075 | aPopulator[i].bOk = 1; |
8076 | } |
8077 | } |
8078 | |
8079 | return sqlite3Fts5Tokenize(pConfig, |
8080 | FTS5_TOKENIZE_DOCUMENT, z, n, (void*)&sCtx, fts5ExprPopulatePoslistsCb |
8081 | ); |
8082 | } |
8083 | |
8084 | static void fts5ExprClearPoslists(Fts5ExprNode *pNode){ |
8085 | if( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING ){ |
8086 | pNode->pNear->apPhrase[0]->poslist.n = 0; |
8087 | }else{ |
8088 | int i; |
8089 | for(i=0; i<pNode->nChild; i++){ |
8090 | fts5ExprClearPoslists(pNode->apChild[i]); |
8091 | } |
8092 | } |
8093 | } |
8094 | |
8095 | static int fts5ExprCheckPoslists(Fts5ExprNode *pNode, i64 iRowid){ |
8096 | pNode->iRowid = iRowid; |
8097 | pNode->bEof = 0; |
8098 | switch( pNode->eType ){ |
8099 | case FTS5_TERM: |
8100 | case FTS5_STRING: |
8101 | return (pNode->pNear->apPhrase[0]->poslist.n>0); |
8102 | |
8103 | case FTS5_AND: { |
8104 | int i; |
8105 | for(i=0; i<pNode->nChild; i++){ |
8106 | if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid)==0 ){ |
8107 | fts5ExprClearPoslists(pNode); |
8108 | return 0; |
8109 | } |
8110 | } |
8111 | break; |
8112 | } |
8113 | |
8114 | case FTS5_OR: { |
8115 | int i; |
8116 | int bRet = 0; |
8117 | for(i=0; i<pNode->nChild; i++){ |
8118 | if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid) ){ |
8119 | bRet = 1; |
8120 | } |
8121 | } |
8122 | return bRet; |
8123 | } |
8124 | |
8125 | default: { |
8126 | assert( pNode->eType==FTS5_NOT ); |
8127 | if( 0==fts5ExprCheckPoslists(pNode->apChild[0], iRowid) |
8128 | || 0!=fts5ExprCheckPoslists(pNode->apChild[1], iRowid) |
8129 | ){ |
8130 | fts5ExprClearPoslists(pNode); |
8131 | return 0; |
8132 | } |
8133 | break; |
8134 | } |
8135 | } |
8136 | return 1; |
8137 | } |
8138 | |
8139 | static void sqlite3Fts5ExprCheckPoslists(Fts5Expr *pExpr, i64 iRowid){ |
8140 | fts5ExprCheckPoslists(pExpr->pRoot, iRowid); |
8141 | } |
8142 | |
8143 | /* |
8144 | ** This function is only called for detail=columns tables. |
8145 | */ |
8146 | static int sqlite3Fts5ExprPhraseCollist( |
8147 | Fts5Expr *pExpr, |
8148 | int iPhrase, |
8149 | const u8 **ppCollist, |
8150 | int *pnCollist |
8151 | ){ |
8152 | Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; |
8153 | Fts5ExprNode *pNode = pPhrase->pNode; |
8154 | int rc = SQLITE_OK; |
8155 | |
8156 | assert( iPhrase>=0 && iPhrase<pExpr->nPhrase ); |
8157 | assert( pExpr->pConfig->eDetail==FTS5_DETAIL_COLUMNS ); |
8158 | |
8159 | if( pNode->bEof==0 |
8160 | && pNode->iRowid==pExpr->pRoot->iRowid |
8161 | && pPhrase->poslist.n>0 |
8162 | ){ |
8163 | Fts5ExprTerm *pTerm = &pPhrase->aTerm[0]; |
8164 | if( pTerm->pSynonym ){ |
8165 | Fts5Buffer *pBuf = (Fts5Buffer*)&pTerm->pSynonym[1]; |
8166 | rc = fts5ExprSynonymList( |
8167 | pTerm, pNode->iRowid, pBuf, (u8**)ppCollist, pnCollist |
8168 | ); |
8169 | }else{ |
8170 | *ppCollist = pPhrase->aTerm[0].pIter->pData; |
8171 | *pnCollist = pPhrase->aTerm[0].pIter->nData; |
8172 | } |
8173 | }else{ |
8174 | *ppCollist = 0; |
8175 | *pnCollist = 0; |
8176 | } |
8177 | |
8178 | return rc; |
8179 | } |
8180 | |
8181 | #line 1 "fts5_hash.c" |
8182 | /* |
8183 | ** 2014 August 11 |
8184 | ** |
8185 | ** The author disclaims copyright to this source code. In place of |
8186 | ** a legal notice, here is a blessing: |
8187 | ** |
8188 | ** May you do good and not evil. |
8189 | ** May you find forgiveness for yourself and forgive others. |
8190 | ** May you share freely, never taking more than you give. |
8191 | ** |
8192 | ****************************************************************************** |
8193 | ** |
8194 | */ |
8195 | |
8196 | |
8197 | |
8198 | /* #include "fts5Int.h" */ |
8199 | |
8200 | typedef struct Fts5HashEntry Fts5HashEntry; |
8201 | |
8202 | /* |
8203 | ** This file contains the implementation of an in-memory hash table used |
8204 | ** to accumuluate "term -> doclist" content before it is flused to a level-0 |
8205 | ** segment. |
8206 | */ |
8207 | |
8208 | |
8209 | struct Fts5Hash { |
8210 | int eDetail; /* Copy of Fts5Config.eDetail */ |
8211 | int *pnByte; /* Pointer to bytes counter */ |
8212 | int nEntry; /* Number of entries currently in hash */ |
8213 | int nSlot; /* Size of aSlot[] array */ |
8214 | Fts5HashEntry *pScan; /* Current ordered scan item */ |
8215 | Fts5HashEntry **aSlot; /* Array of hash slots */ |
8216 | }; |
8217 | |
8218 | /* |
8219 | ** Each entry in the hash table is represented by an object of the |
8220 | ** following type. Each object, its key (a nul-terminated string) and |
8221 | ** its current data are stored in a single memory allocation. The |
8222 | ** key immediately follows the object in memory. The position list |
8223 | ** data immediately follows the key data in memory. |
8224 | ** |
8225 | ** The data that follows the key is in a similar, but not identical format |
8226 | ** to the doclist data stored in the database. It is: |
8227 | ** |
8228 | ** * Rowid, as a varint |
8229 | ** * Position list, without 0x00 terminator. |
8230 | ** * Size of previous position list and rowid, as a 4 byte |
8231 | ** big-endian integer. |
8232 | ** |
8233 | ** iRowidOff: |
8234 | ** Offset of last rowid written to data area. Relative to first byte of |
8235 | ** structure. |
8236 | ** |
8237 | ** nData: |
8238 | ** Bytes of data written since iRowidOff. |
8239 | */ |
8240 | struct Fts5HashEntry { |
8241 | Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */ |
8242 | Fts5HashEntry *pScanNext; /* Next entry in sorted order */ |
8243 | |
8244 | int nAlloc; /* Total size of allocation */ |
8245 | int iSzPoslist; /* Offset of space for 4-byte poslist size */ |
8246 | int nData; /* Total bytes of data (incl. structure) */ |
8247 | int nKey; /* Length of key in bytes */ |
8248 | u8 bDel; /* Set delete-flag @ iSzPoslist */ |
8249 | u8 bContent; /* Set content-flag (detail=none mode) */ |
8250 | i16 iCol; /* Column of last value written */ |
8251 | int iPos; /* Position of last value written */ |
8252 | i64 iRowid; /* Rowid of last value written */ |
8253 | }; |
8254 | |
8255 | /* |
8256 | ** Eqivalent to: |
8257 | ** |
8258 | ** char *fts5EntryKey(Fts5HashEntry *pEntry){ return zKey; } |
8259 | */ |
8260 | #define fts5EntryKey(p) ( ((char *)(&(p)[1])) ) |
8261 | |
8262 | |
8263 | /* |
8264 | ** Allocate a new hash table. |
8265 | */ |
8266 | static int sqlite3Fts5HashNew(Fts5Config *pConfig, Fts5Hash **ppNew, int *pnByte){ |
8267 | int rc = SQLITE_OK; |
8268 | Fts5Hash *pNew; |
8269 | |
8270 | *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash)); |
8271 | if( pNew==0 ){ |
8272 | rc = SQLITE_NOMEM; |
8273 | }else{ |
8274 | sqlite3_int64 nByte; |
8275 | memset(pNew, 0, sizeof(Fts5Hash)); |
8276 | pNew->pnByte = pnByte; |
8277 | pNew->eDetail = pConfig->eDetail; |
8278 | |
8279 | pNew->nSlot = 1024; |
8280 | nByte = sizeof(Fts5HashEntry*) * pNew->nSlot; |
8281 | pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc64(nByte); |
8282 | if( pNew->aSlot==0 ){ |
8283 | sqlite3_free(pNew); |
8284 | *ppNew = 0; |
8285 | rc = SQLITE_NOMEM; |
8286 | }else{ |
8287 | memset(pNew->aSlot, 0, (size_t)nByte); |
8288 | } |
8289 | } |
8290 | return rc; |
8291 | } |
8292 | |
8293 | /* |
8294 | ** Free a hash table object. |
8295 | */ |
8296 | static void sqlite3Fts5HashFree(Fts5Hash *pHash){ |
8297 | if( pHash ){ |
8298 | sqlite3Fts5HashClear(pHash); |
8299 | sqlite3_free(pHash->aSlot); |
8300 | sqlite3_free(pHash); |
8301 | } |
8302 | } |
8303 | |
8304 | /* |
8305 | ** Empty (but do not delete) a hash table. |
8306 | */ |
8307 | static void sqlite3Fts5HashClear(Fts5Hash *pHash){ |
8308 | int i; |
8309 | for(i=0; i<pHash->nSlot; i++){ |
8310 | Fts5HashEntry *pNext; |
8311 | Fts5HashEntry *pSlot; |
8312 | for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){ |
8313 | pNext = pSlot->pHashNext; |
8314 | sqlite3_free(pSlot); |
8315 | } |
8316 | } |
8317 | memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*)); |
8318 | pHash->nEntry = 0; |
8319 | } |
8320 | |
8321 | static unsigned int fts5HashKey(int nSlot, const u8 *p, int n){ |
8322 | int i; |
8323 | unsigned int h = 13; |
8324 | for(i=n-1; i>=0; i--){ |
8325 | h = (h << 3) ^ h ^ p[i]; |
8326 | } |
8327 | return (h % nSlot); |
8328 | } |
8329 | |
8330 | static unsigned int fts5HashKey2(int nSlot, u8 b, const u8 *p, int n){ |
8331 | int i; |
8332 | unsigned int h = 13; |
8333 | for(i=n-1; i>=0; i--){ |
8334 | h = (h << 3) ^ h ^ p[i]; |
8335 | } |
8336 | h = (h << 3) ^ h ^ b; |
8337 | return (h % nSlot); |
8338 | } |
8339 | |
8340 | /* |
8341 | ** Resize the hash table by doubling the number of slots. |
8342 | */ |
8343 | static int fts5HashResize(Fts5Hash *pHash){ |
8344 | int nNew = pHash->nSlot*2; |
8345 | int i; |
8346 | Fts5HashEntry **apNew; |
8347 | Fts5HashEntry **apOld = pHash->aSlot; |
8348 | |
8349 | apNew = (Fts5HashEntry**)sqlite3_malloc64(nNew*sizeof(Fts5HashEntry*)); |
8350 | if( !apNew ) return SQLITE_NOMEM; |
8351 | memset(apNew, 0, nNew*sizeof(Fts5HashEntry*)); |
8352 | |
8353 | for(i=0; i<pHash->nSlot; i++){ |
8354 | while( apOld[i] ){ |
8355 | unsigned int iHash; |
8356 | Fts5HashEntry *p = apOld[i]; |
8357 | apOld[i] = p->pHashNext; |
8358 | iHash = fts5HashKey(nNew, (u8*)fts5EntryKey(p), |
8359 | (int)strlen(fts5EntryKey(p))); |
8360 | p->pHashNext = apNew[iHash]; |
8361 | apNew[iHash] = p; |
8362 | } |
8363 | } |
8364 | |
8365 | sqlite3_free(apOld); |
8366 | pHash->nSlot = nNew; |
8367 | pHash->aSlot = apNew; |
8368 | return SQLITE_OK; |
8369 | } |
8370 | |
8371 | static int fts5HashAddPoslistSize( |
8372 | Fts5Hash *pHash, |
8373 | Fts5HashEntry *p, |
8374 | Fts5HashEntry *p2 |
8375 | ){ |
8376 | int nRet = 0; |
8377 | if( p->iSzPoslist ){ |
8378 | u8 *pPtr = p2 ? (u8*)p2 : (u8*)p; |
8379 | int nData = p->nData; |
8380 | if( pHash->eDetail==FTS5_DETAIL_NONE ){ |
8381 | assert( nData==p->iSzPoslist ); |
8382 | if( p->bDel ){ |
8383 | pPtr[nData++] = 0x00; |
8384 | if( p->bContent ){ |
8385 | pPtr[nData++] = 0x00; |
8386 | } |
8387 | } |
8388 | }else{ |
8389 | int nSz = (nData - p->iSzPoslist - 1); /* Size in bytes */ |
8390 | int nPos = nSz*2 + p->bDel; /* Value of nPos field */ |
8391 | |
8392 | assert( p->bDel==0 || p->bDel==1 ); |
8393 | if( nPos<=127 ){ |
8394 | pPtr[p->iSzPoslist] = (u8)nPos; |
8395 | }else{ |
8396 | int nByte = sqlite3Fts5GetVarintLen((u32)nPos); |
8397 | memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz); |
8398 | sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos); |
8399 | nData += (nByte-1); |
8400 | } |
8401 | } |
8402 | |
8403 | nRet = nData - p->nData; |
8404 | if( p2==0 ){ |
8405 | p->iSzPoslist = 0; |
8406 | p->bDel = 0; |
8407 | p->bContent = 0; |
8408 | p->nData = nData; |
8409 | } |
8410 | } |
8411 | return nRet; |
8412 | } |
8413 | |
8414 | /* |
8415 | ** Add an entry to the in-memory hash table. The key is the concatenation |
8416 | ** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos). |
8417 | ** |
8418 | ** (bByte || pToken) -> (iRowid,iCol,iPos) |
8419 | ** |
8420 | ** Or, if iCol is negative, then the value is a delete marker. |
8421 | */ |
8422 | static int sqlite3Fts5HashWrite( |
8423 | Fts5Hash *pHash, |
8424 | i64 iRowid, /* Rowid for this entry */ |
8425 | int iCol, /* Column token appears in (-ve -> delete) */ |
8426 | int iPos, /* Position of token within column */ |
8427 | char bByte, /* First byte of token */ |
8428 | const char *pToken, int nToken /* Token to add or remove to or from index */ |
8429 | ){ |
8430 | unsigned int iHash; |
8431 | Fts5HashEntry *p; |
8432 | u8 *pPtr; |
8433 | int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ |
8434 | int bNew; /* If non-delete entry should be written */ |
8435 | |
8436 | bNew = (pHash->eDetail==FTS5_DETAIL_FULL); |
8437 | |
8438 | /* Attempt to locate an existing hash entry */ |
8439 | iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); |
8440 | for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ |
8441 | char *zKey = fts5EntryKey(p); |
8442 | if( zKey[0]==bByte |
8443 | && p->nKey==nToken |
8444 | && memcmp(&zKey[1], pToken, nToken)==0 |
8445 | ){ |
8446 | break; |
8447 | } |
8448 | } |
8449 | |
8450 | /* If an existing hash entry cannot be found, create a new one. */ |
8451 | if( p==0 ){ |
8452 | /* Figure out how much space to allocate */ |
8453 | char *zKey; |
8454 | sqlite3_int64 nByte = sizeof(Fts5HashEntry) + (nToken+1) + 1 + 64; |
8455 | if( nByte<128 ) nByte = 128; |
8456 | |
8457 | /* Grow the Fts5Hash.aSlot[] array if necessary. */ |
8458 | if( (pHash->nEntry*2)>=pHash->nSlot ){ |
8459 | int rc = fts5HashResize(pHash); |
8460 | if( rc!=SQLITE_OK ) return rc; |
8461 | iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); |
8462 | } |
8463 | |
8464 | /* Allocate new Fts5HashEntry and add it to the hash table. */ |
8465 | p = (Fts5HashEntry*)sqlite3_malloc64(nByte); |
8466 | if( !p ) return SQLITE_NOMEM; |
8467 | memset(p, 0, sizeof(Fts5HashEntry)); |
8468 | p->nAlloc = (int)nByte; |
8469 | zKey = fts5EntryKey(p); |
8470 | zKey[0] = bByte; |
8471 | memcpy(&zKey[1], pToken, nToken); |
8472 | assert( iHash==fts5HashKey(pHash->nSlot, (u8*)zKey, nToken+1) ); |
8473 | p->nKey = nToken; |
8474 | zKey[nToken+1] = '\0'; |
8475 | p->nData = nToken+1 + 1 + sizeof(Fts5HashEntry); |
8476 | p->pHashNext = pHash->aSlot[iHash]; |
8477 | pHash->aSlot[iHash] = p; |
8478 | pHash->nEntry++; |
8479 | |
8480 | /* Add the first rowid field to the hash-entry */ |
8481 | p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid); |
8482 | p->iRowid = iRowid; |
8483 | |
8484 | p->iSzPoslist = p->nData; |
8485 | if( pHash->eDetail!=FTS5_DETAIL_NONE ){ |
8486 | p->nData += 1; |
8487 | p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1); |
8488 | } |
8489 | |
8490 | }else{ |
8491 | |
8492 | /* Appending to an existing hash-entry. Check that there is enough |
8493 | ** space to append the largest possible new entry. Worst case scenario |
8494 | ** is: |
8495 | ** |
8496 | ** + 9 bytes for a new rowid, |
8497 | ** + 4 byte reserved for the "poslist size" varint. |
8498 | ** + 1 byte for a "new column" byte, |
8499 | ** + 3 bytes for a new column number (16-bit max) as a varint, |
8500 | ** + 5 bytes for the new position offset (32-bit max). |
8501 | */ |
8502 | if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){ |
8503 | sqlite3_int64 nNew = p->nAlloc * 2; |
8504 | Fts5HashEntry *pNew; |
8505 | Fts5HashEntry **pp; |
8506 | pNew = (Fts5HashEntry*)sqlite3_realloc64(p, nNew); |
8507 | if( pNew==0 ) return SQLITE_NOMEM; |
8508 | pNew->nAlloc = (int)nNew; |
8509 | for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext); |
8510 | *pp = pNew; |
8511 | p = pNew; |
8512 | } |
8513 | nIncr -= p->nData; |
8514 | } |
8515 | assert( (p->nAlloc - p->nData) >= (9 + 4 + 1 + 3 + 5) ); |
8516 | |
8517 | pPtr = (u8*)p; |
8518 | |
8519 | /* If this is a new rowid, append the 4-byte size field for the previous |
8520 | ** entry, and the new rowid for this entry. */ |
8521 | if( iRowid!=p->iRowid ){ |
8522 | u64 iDiff = (u64)iRowid - (u64)p->iRowid; |
8523 | fts5HashAddPoslistSize(pHash, p, 0); |
8524 | p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iDiff); |
8525 | p->iRowid = iRowid; |
8526 | bNew = 1; |
8527 | p->iSzPoslist = p->nData; |
8528 | if( pHash->eDetail!=FTS5_DETAIL_NONE ){ |
8529 | p->nData += 1; |
8530 | p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1); |
8531 | p->iPos = 0; |
8532 | } |
8533 | } |
8534 | |
8535 | if( iCol>=0 ){ |
8536 | if( pHash->eDetail==FTS5_DETAIL_NONE ){ |
8537 | p->bContent = 1; |
8538 | }else{ |
8539 | /* Append a new column value, if necessary */ |
8540 | assert_nc( iCol>=p->iCol ); |
8541 | if( iCol!=p->iCol ){ |
8542 | if( pHash->eDetail==FTS5_DETAIL_FULL ){ |
8543 | pPtr[p->nData++] = 0x01; |
8544 | p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol); |
8545 | p->iCol = (i16)iCol; |
8546 | p->iPos = 0; |
8547 | }else{ |
8548 | bNew = 1; |
8549 | p->iCol = (i16)(iPos = iCol); |
8550 | } |
8551 | } |
8552 | |
8553 | /* Append the new position offset, if necessary */ |
8554 | if( bNew ){ |
8555 | p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2); |
8556 | p->iPos = iPos; |
8557 | } |
8558 | } |
8559 | }else{ |
8560 | /* This is a delete. Set the delete flag. */ |
8561 | p->bDel = 1; |
8562 | } |
8563 | |
8564 | nIncr += p->nData; |
8565 | *pHash->pnByte += nIncr; |
8566 | return SQLITE_OK; |
8567 | } |
8568 | |
8569 | |
8570 | /* |
8571 | ** Arguments pLeft and pRight point to linked-lists of hash-entry objects, |
8572 | ** each sorted in key order. This function merges the two lists into a |
8573 | ** single list and returns a pointer to its first element. |
8574 | */ |
8575 | static Fts5HashEntry *fts5HashEntryMerge( |
8576 | Fts5HashEntry *pLeft, |
8577 | Fts5HashEntry *pRight |
8578 | ){ |
8579 | Fts5HashEntry *p1 = pLeft; |
8580 | Fts5HashEntry *p2 = pRight; |
8581 | Fts5HashEntry *pRet = 0; |
8582 | Fts5HashEntry **ppOut = &pRet; |
8583 | |
8584 | while( p1 || p2 ){ |
8585 | if( p1==0 ){ |
8586 | *ppOut = p2; |
8587 | p2 = 0; |
8588 | }else if( p2==0 ){ |
8589 | *ppOut = p1; |
8590 | p1 = 0; |
8591 | }else{ |
8592 | int i = 0; |
8593 | char *zKey1 = fts5EntryKey(p1); |
8594 | char *zKey2 = fts5EntryKey(p2); |
8595 | while( zKey1[i]==zKey2[i] ) i++; |
8596 | |
8597 | if( ((u8)zKey1[i])>((u8)zKey2[i]) ){ |
8598 | /* p2 is smaller */ |
8599 | *ppOut = p2; |
8600 | ppOut = &p2->pScanNext; |
8601 | p2 = p2->pScanNext; |
8602 | }else{ |
8603 | /* p1 is smaller */ |
8604 | *ppOut = p1; |
8605 | ppOut = &p1->pScanNext; |
8606 | p1 = p1->pScanNext; |
8607 | } |
8608 | *ppOut = 0; |
8609 | } |
8610 | } |
8611 | |
8612 | return pRet; |
8613 | } |
8614 | |
8615 | /* |
8616 | ** Extract all tokens from hash table iHash and link them into a list |
8617 | ** in sorted order. The hash table is cleared before returning. It is |
8618 | ** the responsibility of the caller to free the elements of the returned |
8619 | ** list. |
8620 | */ |
8621 | static int fts5HashEntrySort( |
8622 | Fts5Hash *pHash, |
8623 | const char *pTerm, int nTerm, /* Query prefix, if any */ |
8624 | Fts5HashEntry **ppSorted |
8625 | ){ |
8626 | const int nMergeSlot = 32; |
8627 | Fts5HashEntry **ap; |
8628 | Fts5HashEntry *pList; |
8629 | int iSlot; |
8630 | int i; |
8631 | |
8632 | *ppSorted = 0; |
8633 | ap = sqlite3_malloc64(sizeof(Fts5HashEntry*) * nMergeSlot); |
8634 | if( !ap ) return SQLITE_NOMEM; |
8635 | memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot); |
8636 | |
8637 | for(iSlot=0; iSlot<pHash->nSlot; iSlot++){ |
8638 | Fts5HashEntry *pIter; |
8639 | for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){ |
8640 | if( pTerm==0 |
8641 | || (pIter->nKey+1>=nTerm && 0==memcmp(fts5EntryKey(pIter), pTerm, nTerm)) |
8642 | ){ |
8643 | Fts5HashEntry *pEntry = pIter; |
8644 | pEntry->pScanNext = 0; |
8645 | for(i=0; ap[i]; i++){ |
8646 | pEntry = fts5HashEntryMerge(pEntry, ap[i]); |
8647 | ap[i] = 0; |
8648 | } |
8649 | ap[i] = pEntry; |
8650 | } |
8651 | } |
8652 | } |
8653 | |
8654 | pList = 0; |
8655 | for(i=0; i<nMergeSlot; i++){ |
8656 | pList = fts5HashEntryMerge(pList, ap[i]); |
8657 | } |
8658 | |
8659 | pHash->nEntry = 0; |
8660 | sqlite3_free(ap); |
8661 | *ppSorted = pList; |
8662 | return SQLITE_OK; |
8663 | } |
8664 | |
8665 | /* |
8666 | ** Query the hash table for a doclist associated with term pTerm/nTerm. |
8667 | */ |
8668 | static int sqlite3Fts5HashQuery( |
8669 | Fts5Hash *pHash, /* Hash table to query */ |
8670 | int nPre, |
8671 | const char *pTerm, int nTerm, /* Query term */ |
8672 | void **ppOut, /* OUT: Pointer to new object */ |
8673 | int *pnDoclist /* OUT: Size of doclist in bytes */ |
8674 | ){ |
8675 | unsigned int iHash = fts5HashKey(pHash->nSlot, (const u8*)pTerm, nTerm); |
8676 | char *zKey = 0; |
8677 | Fts5HashEntry *p; |
8678 | |
8679 | for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ |
8680 | zKey = fts5EntryKey(p); |
8681 | assert( p->nKey+1==(int)strlen(zKey) ); |
8682 | if( nTerm==p->nKey+1 && memcmp(zKey, pTerm, nTerm)==0 ) break; |
8683 | } |
8684 | |
8685 | if( p ){ |
8686 | int nHashPre = sizeof(Fts5HashEntry) + nTerm + 1; |
8687 | int nList = p->nData - nHashPre; |
8688 | u8 *pRet = (u8*)(*ppOut = sqlite3_malloc64(nPre + nList + 10)); |
8689 | if( pRet ){ |
8690 | Fts5HashEntry *pFaux = (Fts5HashEntry*)&pRet[nPre-nHashPre]; |
8691 | memcpy(&pRet[nPre], &((u8*)p)[nHashPre], nList); |
8692 | nList += fts5HashAddPoslistSize(pHash, p, pFaux); |
8693 | *pnDoclist = nList; |
8694 | }else{ |
8695 | *pnDoclist = 0; |
8696 | return SQLITE_NOMEM; |
8697 | } |
8698 | }else{ |
8699 | *ppOut = 0; |
8700 | *pnDoclist = 0; |
8701 | } |
8702 | |
8703 | return SQLITE_OK; |
8704 | } |
8705 | |
8706 | static int sqlite3Fts5HashScanInit( |
8707 | Fts5Hash *p, /* Hash table to query */ |
8708 | const char *pTerm, int nTerm /* Query prefix */ |
8709 | ){ |
8710 | return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan); |
8711 | } |
8712 | |
8713 | static void sqlite3Fts5HashScanNext(Fts5Hash *p){ |
8714 | assert( !sqlite3Fts5HashScanEof(p) ); |
8715 | p->pScan = p->pScan->pScanNext; |
8716 | } |
8717 | |
8718 | static int sqlite3Fts5HashScanEof(Fts5Hash *p){ |
8719 | return (p->pScan==0); |
8720 | } |
8721 | |
8722 | static void sqlite3Fts5HashScanEntry( |
8723 | Fts5Hash *pHash, |
8724 | const char **pzTerm, /* OUT: term (nul-terminated) */ |
8725 | const u8 **ppDoclist, /* OUT: pointer to doclist */ |
8726 | int *pnDoclist /* OUT: size of doclist in bytes */ |
8727 | ){ |
8728 | Fts5HashEntry *p; |
8729 | if( (p = pHash->pScan) ){ |
8730 | char *zKey = fts5EntryKey(p); |
8731 | int nTerm = (int)strlen(zKey); |
8732 | fts5HashAddPoslistSize(pHash, p, 0); |
8733 | *pzTerm = zKey; |
8734 | *ppDoclist = (const u8*)&zKey[nTerm+1]; |
8735 | *pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm + 1); |
8736 | }else{ |
8737 | *pzTerm = 0; |
8738 | *ppDoclist = 0; |
8739 | *pnDoclist = 0; |
8740 | } |
8741 | } |
8742 | |
8743 | #line 1 "fts5_index.c" |
8744 | /* |
8745 | ** 2014 May 31 |
8746 | ** |
8747 | ** The author disclaims copyright to this source code. In place of |
8748 | ** a legal notice, here is a blessing: |
8749 | ** |
8750 | ** May you do good and not evil. |
8751 | ** May you find forgiveness for yourself and forgive others. |
8752 | ** May you share freely, never taking more than you give. |
8753 | ** |
8754 | ****************************************************************************** |
8755 | ** |
8756 | ** Low level access to the FTS index stored in the database file. The |
8757 | ** routines in this file file implement all read and write access to the |
8758 | ** %_data table. Other parts of the system access this functionality via |
8759 | ** the interface defined in fts5Int.h. |
8760 | */ |
8761 | |
8762 | |
8763 | /* #include "fts5Int.h" */ |
8764 | |
8765 | /* |
8766 | ** Overview: |
8767 | ** |
8768 | ** The %_data table contains all the FTS indexes for an FTS5 virtual table. |
8769 | ** As well as the main term index, there may be up to 31 prefix indexes. |
8770 | ** The format is similar to FTS3/4, except that: |
8771 | ** |
8772 | ** * all segment b-tree leaf data is stored in fixed size page records |
8773 | ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is |
8774 | ** taken to ensure it is possible to iterate in either direction through |
8775 | ** the entries in a doclist, or to seek to a specific entry within a |
8776 | ** doclist, without loading it into memory. |
8777 | ** |
8778 | ** * large doclists that span many pages have associated "doclist index" |
8779 | ** records that contain a copy of the first rowid on each page spanned by |
8780 | ** the doclist. This is used to speed up seek operations, and merges of |
8781 | ** large doclists with very small doclists. |
8782 | ** |
8783 | ** * extra fields in the "structure record" record the state of ongoing |
8784 | ** incremental merge operations. |
8785 | ** |
8786 | */ |
8787 | |
8788 | |
8789 | #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */ |
8790 | #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ |
8791 | |
8792 | #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ |
8793 | |
8794 | #define FTS5_MAIN_PREFIX '0' |
8795 | |
8796 | #if FTS5_MAX_PREFIX_INDEXES > 31 |
8797 | # error "FTS5_MAX_PREFIX_INDEXES is too large" |
8798 | #endif |
8799 | |
8800 | /* |
8801 | ** Details: |
8802 | ** |
8803 | ** The %_data table managed by this module, |
8804 | ** |
8805 | ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB); |
8806 | ** |
8807 | ** , contains the following 5 types of records. See the comments surrounding |
8808 | ** the FTS5_*_ROWID macros below for a description of how %_data rowids are |
8809 | ** assigned to each fo them. |
8810 | ** |
8811 | ** 1. Structure Records: |
8812 | ** |
8813 | ** The set of segments that make up an index - the index structure - are |
8814 | ** recorded in a single record within the %_data table. The record consists |
8815 | ** of a single 32-bit configuration cookie value followed by a list of |
8816 | ** SQLite varints. If the FTS table features more than one index (because |
8817 | ** there are one or more prefix indexes), it is guaranteed that all share |
8818 | ** the same cookie value. |
8819 | ** |
8820 | ** Immediately following the configuration cookie, the record begins with |
8821 | ** three varints: |
8822 | ** |
8823 | ** + number of levels, |
8824 | ** + total number of segments on all levels, |
8825 | ** + value of write counter. |
8826 | ** |
8827 | ** Then, for each level from 0 to nMax: |
8828 | ** |
8829 | ** + number of input segments in ongoing merge. |
8830 | ** + total number of segments in level. |
8831 | ** + for each segment from oldest to newest: |
8832 | ** + segment id (always > 0) |
8833 | ** + first leaf page number (often 1, always greater than 0) |
8834 | ** + final leaf page number |
8835 | ** |
8836 | ** 2. The Averages Record: |
8837 | ** |
8838 | ** A single record within the %_data table. The data is a list of varints. |
8839 | ** The first value is the number of rows in the index. Then, for each column |
8840 | ** from left to right, the total number of tokens in the column for all |
8841 | ** rows of the table. |
8842 | ** |
8843 | ** 3. Segment leaves: |
8844 | ** |
8845 | ** TERM/DOCLIST FORMAT: |
8846 | ** |
8847 | ** Most of each segment leaf is taken up by term/doclist data. The |
8848 | ** general format of term/doclist, starting with the first term |
8849 | ** on the leaf page, is: |
8850 | ** |
8851 | ** varint : size of first term |
8852 | ** blob: first term data |
8853 | ** doclist: first doclist |
8854 | ** zero-or-more { |
8855 | ** varint: number of bytes in common with previous term |
8856 | ** varint: number of bytes of new term data (nNew) |
8857 | ** blob: nNew bytes of new term data |
8858 | ** doclist: next doclist |
8859 | ** } |
8860 | ** |
8861 | ** doclist format: |
8862 | ** |
8863 | ** varint: first rowid |
8864 | ** poslist: first poslist |
8865 | ** zero-or-more { |
8866 | ** varint: rowid delta (always > 0) |
8867 | ** poslist: next poslist |
8868 | ** } |
8869 | ** |
8870 | ** poslist format: |
8871 | ** |
8872 | ** varint: size of poslist in bytes multiplied by 2, not including |
8873 | ** this field. Plus 1 if this entry carries the "delete" flag. |
8874 | ** collist: collist for column 0 |
8875 | ** zero-or-more { |
8876 | ** 0x01 byte |
8877 | ** varint: column number (I) |
8878 | ** collist: collist for column I |
8879 | ** } |
8880 | ** |
8881 | ** collist format: |
8882 | ** |
8883 | ** varint: first offset + 2 |
8884 | ** zero-or-more { |
8885 | ** varint: offset delta + 2 |
8886 | ** } |
8887 | ** |
8888 | ** PAGE FORMAT |
8889 | ** |
8890 | ** Each leaf page begins with a 4-byte header containing 2 16-bit |
8891 | ** unsigned integer fields in big-endian format. They are: |
8892 | ** |
8893 | ** * The byte offset of the first rowid on the page, if it exists |
8894 | ** and occurs before the first term (otherwise 0). |
8895 | ** |
8896 | ** * The byte offset of the start of the page footer. If the page |
8897 | ** footer is 0 bytes in size, then this field is the same as the |
8898 | ** size of the leaf page in bytes. |
8899 | ** |
8900 | ** The page footer consists of a single varint for each term located |
8901 | ** on the page. Each varint is the byte offset of the current term |
8902 | ** within the page, delta-compressed against the previous value. In |
8903 | ** other words, the first varint in the footer is the byte offset of |
8904 | ** the first term, the second is the byte offset of the second less that |
8905 | ** of the first, and so on. |
8906 | ** |
8907 | ** The term/doclist format described above is accurate if the entire |
8908 | ** term/doclist data fits on a single leaf page. If this is not the case, |
8909 | ** the format is changed in two ways: |
8910 | ** |
8911 | ** + if the first rowid on a page occurs before the first term, it |
8912 | ** is stored as a literal value: |
8913 | ** |
8914 | ** varint: first rowid |
8915 | ** |
8916 | ** + the first term on each page is stored in the same way as the |
8917 | ** very first term of the segment: |
8918 | ** |
8919 | ** varint : size of first term |
8920 | ** blob: first term data |
8921 | ** |
8922 | ** 5. Segment doclist indexes: |
8923 | ** |
8924 | ** Doclist indexes are themselves b-trees, however they usually consist of |
8925 | ** a single leaf record only. The format of each doclist index leaf page |
8926 | ** is: |
8927 | ** |
8928 | ** * Flags byte. Bits are: |
8929 | ** 0x01: Clear if leaf is also the root page, otherwise set. |
8930 | ** |
8931 | ** * Page number of fts index leaf page. As a varint. |
8932 | ** |
8933 | ** * First rowid on page indicated by previous field. As a varint. |
8934 | ** |
8935 | ** * A list of varints, one for each subsequent termless page. A |
8936 | ** positive delta if the termless page contains at least one rowid, |
8937 | ** or an 0x00 byte otherwise. |
8938 | ** |
8939 | ** Internal doclist index nodes are: |
8940 | ** |
8941 | ** * Flags byte. Bits are: |
8942 | ** 0x01: Clear for root page, otherwise set. |
8943 | ** |
8944 | ** * Page number of first child page. As a varint. |
8945 | ** |
8946 | ** * Copy of first rowid on page indicated by previous field. As a varint. |
8947 | ** |
8948 | ** * A list of delta-encoded varints - the first rowid on each subsequent |
8949 | ** child page. |
8950 | ** |
8951 | */ |
8952 | |
8953 | /* |
8954 | ** Rowids for the averages and structure records in the %_data table. |
8955 | */ |
8956 | #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */ |
8957 | #define FTS5_STRUCTURE_ROWID 10 /* The structure record */ |
8958 | |
8959 | /* |
8960 | ** Macros determining the rowids used by segment leaves and dlidx leaves |
8961 | ** and nodes. All nodes and leaves are stored in the %_data table with large |
8962 | ** positive rowids. |
8963 | ** |
8964 | ** Each segment has a unique non-zero 16-bit id. |
8965 | ** |
8966 | ** The rowid for each segment leaf is found by passing the segment id and |
8967 | ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered |
8968 | ** sequentially starting from 1. |
8969 | */ |
8970 | #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */ |
8971 | #define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */ |
8972 | #define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */ |
8973 | #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */ |
8974 | |
8975 | #define fts5_dri(segid, dlidx, height, pgno) ( \ |
8976 | ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \ |
8977 | ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \ |
8978 | ((i64)(height) << (FTS5_DATA_PAGE_B)) + \ |
8979 | ((i64)(pgno)) \ |
8980 | ) |
8981 | |
8982 | #define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno) |
8983 | #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno) |
8984 | |
8985 | #ifdef SQLITE_DEBUG |
8986 | static int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } |
8987 | #endif |
8988 | |
8989 | |
8990 | /* |
8991 | ** Each time a blob is read from the %_data table, it is padded with this |
8992 | ** many zero bytes. This makes it easier to decode the various record formats |
8993 | ** without overreading if the records are corrupt. |
8994 | */ |
8995 | #define FTS5_DATA_ZERO_PADDING 8 |
8996 | #define FTS5_DATA_PADDING 20 |
8997 | |
8998 | typedef struct Fts5Data Fts5Data; |
8999 | typedef struct Fts5DlidxIter Fts5DlidxIter; |
9000 | typedef struct Fts5DlidxLvl Fts5DlidxLvl; |
9001 | typedef struct Fts5DlidxWriter Fts5DlidxWriter; |
9002 | typedef struct Fts5Iter Fts5Iter; |
9003 | typedef struct Fts5PageWriter Fts5PageWriter; |
9004 | typedef struct Fts5SegIter Fts5SegIter; |
9005 | typedef struct Fts5DoclistIter Fts5DoclistIter; |
9006 | typedef struct Fts5SegWriter Fts5SegWriter; |
9007 | typedef struct Fts5Structure Fts5Structure; |
9008 | typedef struct Fts5StructureLevel Fts5StructureLevel; |
9009 | typedef struct Fts5StructureSegment Fts5StructureSegment; |
9010 | |
9011 | struct Fts5Data { |
9012 | u8 *p; /* Pointer to buffer containing record */ |
9013 | int nn; /* Size of record in bytes */ |
9014 | int szLeaf; /* Size of leaf without page-index */ |
9015 | }; |
9016 | |
9017 | /* |
9018 | ** One object per %_data table. |
9019 | */ |
9020 | struct Fts5Index { |
9021 | Fts5Config *pConfig; /* Virtual table configuration */ |
9022 | char *zDataTbl; /* Name of %_data table */ |
9023 | int nWorkUnit; /* Leaf pages in a "unit" of work */ |
9024 | |
9025 | /* |
9026 | ** Variables related to the accumulation of tokens and doclists within the |
9027 | ** in-memory hash tables before they are flushed to disk. |
9028 | */ |
9029 | Fts5Hash *pHash; /* Hash table for in-memory data */ |
9030 | int nPendingData; /* Current bytes of pending data */ |
9031 | i64 iWriteRowid; /* Rowid for current doc being written */ |
9032 | int bDelete; /* Current write is a delete */ |
9033 | |
9034 | /* Error state. */ |
9035 | int rc; /* Current error code */ |
9036 | |
9037 | /* State used by the fts5DataXXX() functions. */ |
9038 | sqlite3_blob *pReader; /* RO incr-blob open on %_data table */ |
9039 | sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */ |
9040 | sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ |
9041 | sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */ |
9042 | sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */ |
9043 | sqlite3_stmt *pIdxSelect; |
9044 | int nRead; /* Total number of blocks read */ |
9045 | |
9046 | sqlite3_stmt *pDataVersion; |
9047 | i64 iStructVersion; /* data_version when pStruct read */ |
9048 | Fts5Structure *pStruct; /* Current db structure (or NULL) */ |
9049 | }; |
9050 | |
9051 | struct Fts5DoclistIter { |
9052 | u8 *aEof; /* Pointer to 1 byte past end of doclist */ |
9053 | |
9054 | /* Output variables. aPoslist==0 at EOF */ |
9055 | i64 iRowid; |
9056 | u8 *aPoslist; |
9057 | int nPoslist; |
9058 | int nSize; |
9059 | }; |
9060 | |
9061 | /* |
9062 | ** The contents of the "structure" record for each index are represented |
9063 | ** using an Fts5Structure record in memory. Which uses instances of the |
9064 | ** other Fts5StructureXXX types as components. |
9065 | */ |
9066 | struct Fts5StructureSegment { |
9067 | int iSegid; /* Segment id */ |
9068 | int pgnoFirst; /* First leaf page number in segment */ |
9069 | int pgnoLast; /* Last leaf page number in segment */ |
9070 | }; |
9071 | struct Fts5StructureLevel { |
9072 | int nMerge; /* Number of segments in incr-merge */ |
9073 | int nSeg; /* Total number of segments on level */ |
9074 | Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */ |
9075 | }; |
9076 | struct Fts5Structure { |
9077 | int nRef; /* Object reference count */ |
9078 | u64 nWriteCounter; /* Total leaves written to level 0 */ |
9079 | int nSegment; /* Total segments in this structure */ |
9080 | int nLevel; /* Number of levels in this index */ |
9081 | Fts5StructureLevel aLevel[1]; /* Array of nLevel level objects */ |
9082 | }; |
9083 | |
9084 | /* |
9085 | ** An object of type Fts5SegWriter is used to write to segments. |
9086 | */ |
9087 | struct Fts5PageWriter { |
9088 | int pgno; /* Page number for this page */ |
9089 | int iPrevPgidx; /* Previous value written into pgidx */ |
9090 | Fts5Buffer buf; /* Buffer containing leaf data */ |
9091 | Fts5Buffer pgidx; /* Buffer containing page-index */ |
9092 | Fts5Buffer term; /* Buffer containing previous term on page */ |
9093 | }; |
9094 | struct Fts5DlidxWriter { |
9095 | int pgno; /* Page number for this page */ |
9096 | int bPrevValid; /* True if iPrev is valid */ |
9097 | i64 iPrev; /* Previous rowid value written to page */ |
9098 | Fts5Buffer buf; /* Buffer containing page data */ |
9099 | }; |
9100 | struct Fts5SegWriter { |
9101 | int iSegid; /* Segid to write to */ |
9102 | Fts5PageWriter writer; /* PageWriter object */ |
9103 | i64 iPrevRowid; /* Previous rowid written to current leaf */ |
9104 | u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ |
9105 | u8 bFirstRowidInPage; /* True if next rowid is first in page */ |
9106 | /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */ |
9107 | u8 bFirstTermInPage; /* True if next term will be first in leaf */ |
9108 | int nLeafWritten; /* Number of leaf pages written */ |
9109 | int nEmpty; /* Number of contiguous term-less nodes */ |
9110 | |
9111 | int nDlidx; /* Allocated size of aDlidx[] array */ |
9112 | Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */ |
9113 | |
9114 | /* Values to insert into the %_idx table */ |
9115 | Fts5Buffer btterm; /* Next term to insert into %_idx table */ |
9116 | int iBtPage; /* Page number corresponding to btterm */ |
9117 | }; |
9118 | |
9119 | typedef struct Fts5CResult Fts5CResult; |
9120 | struct Fts5CResult { |
9121 | u16 iFirst; /* aSeg[] index of firstest iterator */ |
9122 | u8 bTermEq; /* True if the terms are equal */ |
9123 | }; |
9124 | |
9125 | /* |
9126 | ** Object for iterating through a single segment, visiting each term/rowid |
9127 | ** pair in the segment. |
9128 | ** |
9129 | ** pSeg: |
9130 | ** The segment to iterate through. |
9131 | ** |
9132 | ** iLeafPgno: |
9133 | ** Current leaf page number within segment. |
9134 | ** |
9135 | ** iLeafOffset: |
9136 | ** Byte offset within the current leaf that is the first byte of the |
9137 | ** position list data (one byte passed the position-list size field). |
9138 | ** rowid field of the current entry. Usually this is the size field of the |
9139 | ** position list data. The exception is if the rowid for the current entry |
9140 | ** is the last thing on the leaf page. |
9141 | ** |
9142 | ** pLeaf: |
9143 | ** Buffer containing current leaf page data. Set to NULL at EOF. |
9144 | ** |
9145 | ** iTermLeafPgno, iTermLeafOffset: |
9146 | ** Leaf page number containing the last term read from the segment. And |
9147 | ** the offset immediately following the term data. |
9148 | ** |
9149 | ** flags: |
9150 | ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows: |
9151 | ** |
9152 | ** FTS5_SEGITER_ONETERM: |
9153 | ** If set, set the iterator to point to EOF after the current doclist |
9154 | ** has been exhausted. Do not proceed to the next term in the segment. |
9155 | ** |
9156 | ** FTS5_SEGITER_REVERSE: |
9157 | ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If |
9158 | ** it is set, iterate through rowid in descending order instead of the |
9159 | ** default ascending order. |
9160 | ** |
9161 | ** iRowidOffset/nRowidOffset/aRowidOffset: |
9162 | ** These are used if the FTS5_SEGITER_REVERSE flag is set. |
9163 | ** |
9164 | ** For each rowid on the page corresponding to the current term, the |
9165 | ** corresponding aRowidOffset[] entry is set to the byte offset of the |
9166 | ** start of the "position-list-size" field within the page. |
9167 | ** |
9168 | ** iTermIdx: |
9169 | ** Index of current term on iTermLeafPgno. |
9170 | */ |
9171 | struct Fts5SegIter { |
9172 | Fts5StructureSegment *pSeg; /* Segment to iterate through */ |
9173 | int flags; /* Mask of configuration flags */ |
9174 | int iLeafPgno; /* Current leaf page number */ |
9175 | Fts5Data *pLeaf; /* Current leaf data */ |
9176 | Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */ |
9177 | i64 iLeafOffset; /* Byte offset within current leaf */ |
9178 | |
9179 | /* Next method */ |
9180 | void (*xNext)(Fts5Index*, Fts5SegIter*, int*); |
9181 | |
9182 | /* The page and offset from which the current term was read. The offset |
9183 | ** is the offset of the first rowid in the current doclist. */ |
9184 | int iTermLeafPgno; |
9185 | int iTermLeafOffset; |
9186 | |
9187 | int iPgidxOff; /* Next offset in pgidx */ |
9188 | int iEndofDoclist; |
9189 | |
9190 | /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ |
9191 | int iRowidOffset; /* Current entry in aRowidOffset[] */ |
9192 | int nRowidOffset; /* Allocated size of aRowidOffset[] array */ |
9193 | int *aRowidOffset; /* Array of offset to rowid fields */ |
9194 | |
9195 | Fts5DlidxIter *pDlidx; /* If there is a doclist-index */ |
9196 | |
9197 | /* Variables populated based on current entry. */ |
9198 | Fts5Buffer term; /* Current term */ |
9199 | i64 iRowid; /* Current rowid */ |
9200 | int nPos; /* Number of bytes in current position list */ |
9201 | u8 bDel; /* True if the delete flag is set */ |
9202 | }; |
9203 | |
9204 | /* |
9205 | ** Argument is a pointer to an Fts5Data structure that contains a |
9206 | ** leaf page. |
9207 | */ |
9208 | #define ASSERT_SZLEAF_OK(x) assert( \ |
9209 | (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \ |
9210 | ) |
9211 | |
9212 | #define FTS5_SEGITER_ONETERM 0x01 |
9213 | #define FTS5_SEGITER_REVERSE 0x02 |
9214 | |
9215 | /* |
9216 | ** Argument is a pointer to an Fts5Data structure that contains a leaf |
9217 | ** page. This macro evaluates to true if the leaf contains no terms, or |
9218 | ** false if it contains at least one term. |
9219 | */ |
9220 | #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn) |
9221 | |
9222 | #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2])) |
9223 | |
9224 | #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p)) |
9225 | |
9226 | /* |
9227 | ** Object for iterating through the merged results of one or more segments, |
9228 | ** visiting each term/rowid pair in the merged data. |
9229 | ** |
9230 | ** nSeg is always a power of two greater than or equal to the number of |
9231 | ** segments that this object is merging data from. Both the aSeg[] and |
9232 | ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded |
9233 | ** with zeroed objects - these are handled as if they were iterators opened |
9234 | ** on empty segments. |
9235 | ** |
9236 | ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an |
9237 | ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the |
9238 | ** comparison in this context is the index of the iterator that currently |
9239 | ** points to the smaller term/rowid combination. Iterators at EOF are |
9240 | ** considered to be greater than all other iterators. |
9241 | ** |
9242 | ** aFirst[1] contains the index in aSeg[] of the iterator that points to |
9243 | ** the smallest key overall. aFirst[0] is unused. |
9244 | ** |
9245 | ** poslist: |
9246 | ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. |
9247 | ** There is no way to tell if this is populated or not. |
9248 | */ |
9249 | struct Fts5Iter { |
9250 | Fts5IndexIter base; /* Base class containing output vars */ |
9251 | |
9252 | Fts5Index *pIndex; /* Index that owns this iterator */ |
9253 | Fts5Buffer poslist; /* Buffer containing current poslist */ |
9254 | Fts5Colset *pColset; /* Restrict matches to these columns */ |
9255 | |
9256 | /* Invoked to set output variables. */ |
9257 | void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*); |
9258 | |
9259 | int nSeg; /* Size of aSeg[] array */ |
9260 | int bRev; /* True to iterate in reverse order */ |
9261 | u8 bSkipEmpty; /* True to skip deleted entries */ |
9262 | |
9263 | i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */ |
9264 | Fts5CResult *aFirst; /* Current merge state (see above) */ |
9265 | Fts5SegIter aSeg[1]; /* Array of segment iterators */ |
9266 | }; |
9267 | |
9268 | |
9269 | /* |
9270 | ** An instance of the following type is used to iterate through the contents |
9271 | ** of a doclist-index record. |
9272 | ** |
9273 | ** pData: |
9274 | ** Record containing the doclist-index data. |
9275 | ** |
9276 | ** bEof: |
9277 | ** Set to true once iterator has reached EOF. |
9278 | ** |
9279 | ** iOff: |
9280 | ** Set to the current offset within record pData. |
9281 | */ |
9282 | struct Fts5DlidxLvl { |
9283 | Fts5Data *pData; /* Data for current page of this level */ |
9284 | int iOff; /* Current offset into pData */ |
9285 | int bEof; /* At EOF already */ |
9286 | int iFirstOff; /* Used by reverse iterators */ |
9287 | |
9288 | /* Output variables */ |
9289 | int iLeafPgno; /* Page number of current leaf page */ |
9290 | i64 iRowid; /* First rowid on leaf iLeafPgno */ |
9291 | }; |
9292 | struct Fts5DlidxIter { |
9293 | int nLvl; |
9294 | int iSegid; |
9295 | Fts5DlidxLvl aLvl[1]; |
9296 | }; |
9297 | |
9298 | static void fts5PutU16(u8 *aOut, u16 iVal){ |
9299 | aOut[0] = (iVal>>8); |
9300 | aOut[1] = (iVal&0xFF); |
9301 | } |
9302 | |
9303 | static u16 fts5GetU16(const u8 *aIn){ |
9304 | return ((u16)aIn[0] << 8) + aIn[1]; |
9305 | } |
9306 | |
9307 | /* |
9308 | ** Allocate and return a buffer at least nByte bytes in size. |
9309 | ** |
9310 | ** If an OOM error is encountered, return NULL and set the error code in |
9311 | ** the Fts5Index handle passed as the first argument. |
9312 | */ |
9313 | static void *fts5IdxMalloc(Fts5Index *p, sqlite3_int64 nByte){ |
9314 | return sqlite3Fts5MallocZero(&p->rc, nByte); |
9315 | } |
9316 | |
9317 | /* |
9318 | ** Compare the contents of the pLeft buffer with the pRight/nRight blob. |
9319 | ** |
9320 | ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or |
9321 | ** +ve if pRight is smaller than pLeft. In other words: |
9322 | ** |
9323 | ** res = *pLeft - *pRight |
9324 | */ |
9325 | #ifdef SQLITE_DEBUG |
9326 | static int fts5BufferCompareBlob( |
9327 | Fts5Buffer *pLeft, /* Left hand side of comparison */ |
9328 | const u8 *pRight, int nRight /* Right hand side of comparison */ |
9329 | ){ |
9330 | int nCmp = MIN(pLeft->n, nRight); |
9331 | int res = memcmp(pLeft->p, pRight, nCmp); |
9332 | return (res==0 ? (pLeft->n - nRight) : res); |
9333 | } |
9334 | #endif |
9335 | |
9336 | /* |
9337 | ** Compare the contents of the two buffers using memcmp(). If one buffer |
9338 | ** is a prefix of the other, it is considered the lesser. |
9339 | ** |
9340 | ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or |
9341 | ** +ve if pRight is smaller than pLeft. In other words: |
9342 | ** |
9343 | ** res = *pLeft - *pRight |
9344 | */ |
9345 | static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){ |
9346 | int nCmp, res; |
9347 | nCmp = MIN(pLeft->n, pRight->n); |
9348 | assert( nCmp<=0 || pLeft->p!=0 ); |
9349 | assert( nCmp<=0 || pRight->p!=0 ); |
9350 | res = fts5Memcmp(pLeft->p, pRight->p, nCmp); |
9351 | return (res==0 ? (pLeft->n - pRight->n) : res); |
9352 | } |
9353 | |
9354 | static int fts5LeafFirstTermOff(Fts5Data *pLeaf){ |
9355 | int ret; |
9356 | fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret); |
9357 | return ret; |
9358 | } |
9359 | |
9360 | /* |
9361 | ** Close the read-only blob handle, if it is open. |
9362 | */ |
9363 | static void sqlite3Fts5IndexCloseReader(Fts5Index *p){ |
9364 | if( p->pReader ){ |
9365 | sqlite3_blob *pReader = p->pReader; |
9366 | p->pReader = 0; |
9367 | sqlite3_blob_close(pReader); |
9368 | } |
9369 | } |
9370 | |
9371 | /* |
9372 | ** Retrieve a record from the %_data table. |
9373 | ** |
9374 | ** If an error occurs, NULL is returned and an error left in the |
9375 | ** Fts5Index object. |
9376 | */ |
9377 | static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ |
9378 | Fts5Data *pRet = 0; |
9379 | if( p->rc==SQLITE_OK ){ |
9380 | int rc = SQLITE_OK; |
9381 | |
9382 | if( p->pReader ){ |
9383 | /* This call may return SQLITE_ABORT if there has been a savepoint |
9384 | ** rollback since it was last used. In this case a new blob handle |
9385 | ** is required. */ |
9386 | sqlite3_blob *pBlob = p->pReader; |
9387 | p->pReader = 0; |
9388 | rc = sqlite3_blob_reopen(pBlob, iRowid); |
9389 | assert( p->pReader==0 ); |
9390 | p->pReader = pBlob; |
9391 | if( rc!=SQLITE_OK ){ |
9392 | sqlite3Fts5IndexCloseReader(p); |
9393 | } |
9394 | if( rc==SQLITE_ABORT ) rc = SQLITE_OK; |
9395 | } |
9396 | |
9397 | /* If the blob handle is not open at this point, open it and seek |
9398 | ** to the requested entry. */ |
9399 | if( p->pReader==0 && rc==SQLITE_OK ){ |
9400 | Fts5Config *pConfig = p->pConfig; |
9401 | rc = sqlite3_blob_open(pConfig->db, |
9402 | pConfig->zDb, p->zDataTbl, "block" , iRowid, 0, &p->pReader |
9403 | ); |
9404 | } |
9405 | |
9406 | /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls |
9407 | ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead. |
9408 | ** All the reasons those functions might return SQLITE_ERROR - missing |
9409 | ** table, missing row, non-blob/text in block column - indicate |
9410 | ** backing store corruption. */ |
9411 | if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT; |
9412 | |
9413 | if( rc==SQLITE_OK ){ |
9414 | u8 *aOut = 0; /* Read blob data into this buffer */ |
9415 | int nByte = sqlite3_blob_bytes(p->pReader); |
9416 | sqlite3_int64 nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING; |
9417 | pRet = (Fts5Data*)sqlite3_malloc64(nAlloc); |
9418 | if( pRet ){ |
9419 | pRet->nn = nByte; |
9420 | aOut = pRet->p = (u8*)&pRet[1]; |
9421 | }else{ |
9422 | rc = SQLITE_NOMEM; |
9423 | } |
9424 | |
9425 | if( rc==SQLITE_OK ){ |
9426 | rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0); |
9427 | } |
9428 | if( rc!=SQLITE_OK ){ |
9429 | sqlite3_free(pRet); |
9430 | pRet = 0; |
9431 | }else{ |
9432 | /* TODO1: Fix this */ |
9433 | pRet->p[nByte] = 0x00; |
9434 | pRet->p[nByte+1] = 0x00; |
9435 | pRet->szLeaf = fts5GetU16(&pRet->p[2]); |
9436 | } |
9437 | } |
9438 | p->rc = rc; |
9439 | p->nRead++; |
9440 | } |
9441 | |
9442 | assert( (pRet==0)==(p->rc!=SQLITE_OK) ); |
9443 | return pRet; |
9444 | } |
9445 | |
9446 | |
9447 | /* |
9448 | ** Release a reference to data record returned by an earlier call to |
9449 | ** fts5DataRead(). |
9450 | */ |
9451 | static void fts5DataRelease(Fts5Data *pData){ |
9452 | sqlite3_free(pData); |
9453 | } |
9454 | |
9455 | static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){ |
9456 | Fts5Data *pRet = fts5DataRead(p, iRowid); |
9457 | if( pRet ){ |
9458 | if( pRet->nn<4 || pRet->szLeaf>pRet->nn ){ |
9459 | p->rc = FTS5_CORRUPT; |
9460 | fts5DataRelease(pRet); |
9461 | pRet = 0; |
9462 | } |
9463 | } |
9464 | return pRet; |
9465 | } |
9466 | |
9467 | static int fts5IndexPrepareStmt( |
9468 | Fts5Index *p, |
9469 | sqlite3_stmt **ppStmt, |
9470 | char *zSql |
9471 | ){ |
9472 | if( p->rc==SQLITE_OK ){ |
9473 | if( zSql ){ |
9474 | p->rc = sqlite3_prepare_v3(p->pConfig->db, zSql, -1, |
9475 | SQLITE_PREPARE_PERSISTENT|SQLITE_PREPARE_NO_VTAB, |
9476 | ppStmt, 0); |
9477 | }else{ |
9478 | p->rc = SQLITE_NOMEM; |
9479 | } |
9480 | } |
9481 | sqlite3_free(zSql); |
9482 | return p->rc; |
9483 | } |
9484 | |
9485 | |
9486 | /* |
9487 | ** INSERT OR REPLACE a record into the %_data table. |
9488 | */ |
9489 | static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){ |
9490 | if( p->rc!=SQLITE_OK ) return; |
9491 | |
9492 | if( p->pWriter==0 ){ |
9493 | Fts5Config *pConfig = p->pConfig; |
9494 | fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf( |
9495 | "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)" , |
9496 | pConfig->zDb, pConfig->zName |
9497 | )); |
9498 | if( p->rc ) return; |
9499 | } |
9500 | |
9501 | sqlite3_bind_int64(p->pWriter, 1, iRowid); |
9502 | sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC); |
9503 | sqlite3_step(p->pWriter); |
9504 | p->rc = sqlite3_reset(p->pWriter); |
9505 | sqlite3_bind_null(p->pWriter, 2); |
9506 | } |
9507 | |
9508 | /* |
9509 | ** Execute the following SQL: |
9510 | ** |
9511 | ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast |
9512 | */ |
9513 | static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){ |
9514 | if( p->rc!=SQLITE_OK ) return; |
9515 | |
9516 | if( p->pDeleter==0 ){ |
9517 | Fts5Config *pConfig = p->pConfig; |
9518 | char *zSql = sqlite3_mprintf( |
9519 | "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?" , |
9520 | pConfig->zDb, pConfig->zName |
9521 | ); |
9522 | if( fts5IndexPrepareStmt(p, &p->pDeleter, zSql) ) return; |
9523 | } |
9524 | |
9525 | sqlite3_bind_int64(p->pDeleter, 1, iFirst); |
9526 | sqlite3_bind_int64(p->pDeleter, 2, iLast); |
9527 | sqlite3_step(p->pDeleter); |
9528 | p->rc = sqlite3_reset(p->pDeleter); |
9529 | } |
9530 | |
9531 | /* |
9532 | ** Remove all records associated with segment iSegid. |
9533 | */ |
9534 | static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){ |
9535 | i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0); |
9536 | i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1; |
9537 | fts5DataDelete(p, iFirst, iLast); |
9538 | if( p->pIdxDeleter==0 ){ |
9539 | Fts5Config *pConfig = p->pConfig; |
9540 | fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf( |
9541 | "DELETE FROM '%q'.'%q_idx' WHERE segid=?" , |
9542 | pConfig->zDb, pConfig->zName |
9543 | )); |
9544 | } |
9545 | if( p->rc==SQLITE_OK ){ |
9546 | sqlite3_bind_int(p->pIdxDeleter, 1, iSegid); |
9547 | sqlite3_step(p->pIdxDeleter); |
9548 | p->rc = sqlite3_reset(p->pIdxDeleter); |
9549 | } |
9550 | } |
9551 | |
9552 | /* |
9553 | ** Release a reference to an Fts5Structure object returned by an earlier |
9554 | ** call to fts5StructureRead() or fts5StructureDecode(). |
9555 | */ |
9556 | static void fts5StructureRelease(Fts5Structure *pStruct){ |
9557 | if( pStruct && 0>=(--pStruct->nRef) ){ |
9558 | int i; |
9559 | assert( pStruct->nRef==0 ); |
9560 | for(i=0; i<pStruct->nLevel; i++){ |
9561 | sqlite3_free(pStruct->aLevel[i].aSeg); |
9562 | } |
9563 | sqlite3_free(pStruct); |
9564 | } |
9565 | } |
9566 | |
9567 | static void fts5StructureRef(Fts5Structure *pStruct){ |
9568 | pStruct->nRef++; |
9569 | } |
9570 | |
9571 | static void *sqlite3Fts5StructureRef(Fts5Index *p){ |
9572 | fts5StructureRef(p->pStruct); |
9573 | return (void*)p->pStruct; |
9574 | } |
9575 | static void sqlite3Fts5StructureRelease(void *p){ |
9576 | if( p ){ |
9577 | fts5StructureRelease((Fts5Structure*)p); |
9578 | } |
9579 | } |
9580 | static int sqlite3Fts5StructureTest(Fts5Index *p, void *pStruct){ |
9581 | if( p->pStruct!=(Fts5Structure*)pStruct ){ |
9582 | return SQLITE_ABORT; |
9583 | } |
9584 | return SQLITE_OK; |
9585 | } |
9586 | |
9587 | /* |
9588 | ** Ensure that structure object (*pp) is writable. |
9589 | ** |
9590 | ** This function is a no-op if (*pRc) is not SQLITE_OK when it is called. If |
9591 | ** an error occurs, (*pRc) is set to an SQLite error code before returning. |
9592 | */ |
9593 | static void fts5StructureMakeWritable(int *pRc, Fts5Structure **pp){ |
9594 | Fts5Structure *p = *pp; |
9595 | if( *pRc==SQLITE_OK && p->nRef>1 ){ |
9596 | i64 nByte = sizeof(Fts5Structure)+(p->nLevel-1)*sizeof(Fts5StructureLevel); |
9597 | Fts5Structure *pNew; |
9598 | pNew = (Fts5Structure*)sqlite3Fts5MallocZero(pRc, nByte); |
9599 | if( pNew ){ |
9600 | int i; |
9601 | memcpy(pNew, p, nByte); |
9602 | for(i=0; i<p->nLevel; i++) pNew->aLevel[i].aSeg = 0; |
9603 | for(i=0; i<p->nLevel; i++){ |
9604 | Fts5StructureLevel *pLvl = &pNew->aLevel[i]; |
9605 | nByte = sizeof(Fts5StructureSegment) * pNew->aLevel[i].nSeg; |
9606 | pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(pRc, nByte); |
9607 | if( pLvl->aSeg==0 ){ |
9608 | for(i=0; i<p->nLevel; i++){ |
9609 | sqlite3_free(pNew->aLevel[i].aSeg); |
9610 | } |
9611 | sqlite3_free(pNew); |
9612 | return; |
9613 | } |
9614 | memcpy(pLvl->aSeg, p->aLevel[i].aSeg, nByte); |
9615 | } |
9616 | p->nRef--; |
9617 | pNew->nRef = 1; |
9618 | } |
9619 | *pp = pNew; |
9620 | } |
9621 | } |
9622 | |
9623 | /* |
9624 | ** Deserialize and return the structure record currently stored in serialized |
9625 | ** form within buffer pData/nData. |
9626 | ** |
9627 | ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array |
9628 | ** are over-allocated by one slot. This allows the structure contents |
9629 | ** to be more easily edited. |
9630 | ** |
9631 | ** If an error occurs, *ppOut is set to NULL and an SQLite error code |
9632 | ** returned. Otherwise, *ppOut is set to point to the new object and |
9633 | ** SQLITE_OK returned. |
9634 | */ |
9635 | static int fts5StructureDecode( |
9636 | const u8 *pData, /* Buffer containing serialized structure */ |
9637 | int nData, /* Size of buffer pData in bytes */ |
9638 | int *piCookie, /* Configuration cookie value */ |
9639 | Fts5Structure **ppOut /* OUT: Deserialized object */ |
9640 | ){ |
9641 | int rc = SQLITE_OK; |
9642 | int i = 0; |
9643 | int iLvl; |
9644 | int nLevel = 0; |
9645 | int nSegment = 0; |
9646 | sqlite3_int64 nByte; /* Bytes of space to allocate at pRet */ |
9647 | Fts5Structure *pRet = 0; /* Structure object to return */ |
9648 | |
9649 | /* Grab the cookie value */ |
9650 | if( piCookie ) *piCookie = sqlite3Fts5Get32(pData); |
9651 | i = 4; |
9652 | |
9653 | /* Read the total number of levels and segments from the start of the |
9654 | ** structure record. */ |
9655 | i += fts5GetVarint32(&pData[i], nLevel); |
9656 | i += fts5GetVarint32(&pData[i], nSegment); |
9657 | if( nLevel>FTS5_MAX_SEGMENT || nLevel<0 |
9658 | || nSegment>FTS5_MAX_SEGMENT || nSegment<0 |
9659 | ){ |
9660 | return FTS5_CORRUPT; |
9661 | } |
9662 | nByte = ( |
9663 | sizeof(Fts5Structure) + /* Main structure */ |
9664 | sizeof(Fts5StructureLevel) * (nLevel-1) /* aLevel[] array */ |
9665 | ); |
9666 | pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte); |
9667 | |
9668 | if( pRet ){ |
9669 | pRet->nRef = 1; |
9670 | pRet->nLevel = nLevel; |
9671 | pRet->nSegment = nSegment; |
9672 | i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter); |
9673 | |
9674 | for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){ |
9675 | Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl]; |
9676 | int nTotal = 0; |
9677 | int iSeg; |
9678 | |
9679 | if( i>=nData ){ |
9680 | rc = FTS5_CORRUPT; |
9681 | }else{ |
9682 | i += fts5GetVarint32(&pData[i], pLvl->nMerge); |
9683 | i += fts5GetVarint32(&pData[i], nTotal); |
9684 | if( nTotal<pLvl->nMerge ) rc = FTS5_CORRUPT; |
9685 | pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc, |
9686 | nTotal * sizeof(Fts5StructureSegment) |
9687 | ); |
9688 | nSegment -= nTotal; |
9689 | } |
9690 | |
9691 | if( rc==SQLITE_OK ){ |
9692 | pLvl->nSeg = nTotal; |
9693 | for(iSeg=0; iSeg<nTotal; iSeg++){ |
9694 | Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; |
9695 | if( i>=nData ){ |
9696 | rc = FTS5_CORRUPT; |
9697 | break; |
9698 | } |
9699 | i += fts5GetVarint32(&pData[i], pSeg->iSegid); |
9700 | i += fts5GetVarint32(&pData[i], pSeg->pgnoFirst); |
9701 | i += fts5GetVarint32(&pData[i], pSeg->pgnoLast); |
9702 | if( pSeg->pgnoLast<pSeg->pgnoFirst ){ |
9703 | rc = FTS5_CORRUPT; |
9704 | break; |
9705 | } |
9706 | } |
9707 | if( iLvl>0 && pLvl[-1].nMerge && nTotal==0 ) rc = FTS5_CORRUPT; |
9708 | if( iLvl==nLevel-1 && pLvl->nMerge ) rc = FTS5_CORRUPT; |
9709 | } |
9710 | } |
9711 | if( nSegment!=0 && rc==SQLITE_OK ) rc = FTS5_CORRUPT; |
9712 | |
9713 | if( rc!=SQLITE_OK ){ |
9714 | fts5StructureRelease(pRet); |
9715 | pRet = 0; |
9716 | } |
9717 | } |
9718 | |
9719 | *ppOut = pRet; |
9720 | return rc; |
9721 | } |
9722 | |
9723 | /* |
9724 | ** Add a level to the Fts5Structure.aLevel[] array of structure object |
9725 | ** (*ppStruct). |
9726 | */ |
9727 | static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){ |
9728 | fts5StructureMakeWritable(pRc, ppStruct); |
9729 | if( *pRc==SQLITE_OK ){ |
9730 | Fts5Structure *pStruct = *ppStruct; |
9731 | int nLevel = pStruct->nLevel; |
9732 | sqlite3_int64 nByte = ( |
9733 | sizeof(Fts5Structure) + /* Main structure */ |
9734 | sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */ |
9735 | ); |
9736 | |
9737 | pStruct = sqlite3_realloc64(pStruct, nByte); |
9738 | if( pStruct ){ |
9739 | memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel)); |
9740 | pStruct->nLevel++; |
9741 | *ppStruct = pStruct; |
9742 | }else{ |
9743 | *pRc = SQLITE_NOMEM; |
9744 | } |
9745 | } |
9746 | } |
9747 | |
9748 | /* |
9749 | ** Extend level iLvl so that there is room for at least nExtra more |
9750 | ** segments. |
9751 | */ |
9752 | static void fts5StructureExtendLevel( |
9753 | int *pRc, |
9754 | Fts5Structure *pStruct, |
9755 | int iLvl, |
9756 | int nExtra, |
9757 | int bInsert |
9758 | ){ |
9759 | if( *pRc==SQLITE_OK ){ |
9760 | Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; |
9761 | Fts5StructureSegment *aNew; |
9762 | sqlite3_int64 nByte; |
9763 | |
9764 | nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment); |
9765 | aNew = sqlite3_realloc64(pLvl->aSeg, nByte); |
9766 | if( aNew ){ |
9767 | if( bInsert==0 ){ |
9768 | memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra); |
9769 | }else{ |
9770 | int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment); |
9771 | memmove(&aNew[nExtra], aNew, nMove); |
9772 | memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra); |
9773 | } |
9774 | pLvl->aSeg = aNew; |
9775 | }else{ |
9776 | *pRc = SQLITE_NOMEM; |
9777 | } |
9778 | } |
9779 | } |
9780 | |
9781 | static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){ |
9782 | Fts5Structure *pRet = 0; |
9783 | Fts5Config *pConfig = p->pConfig; |
9784 | int iCookie; /* Configuration cookie */ |
9785 | Fts5Data *pData; |
9786 | |
9787 | pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID); |
9788 | if( p->rc==SQLITE_OK ){ |
9789 | /* TODO: Do we need this if the leaf-index is appended? Probably... */ |
9790 | memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING); |
9791 | p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet); |
9792 | if( p->rc==SQLITE_OK && (pConfig->pgsz==0 || pConfig->iCookie!=iCookie) ){ |
9793 | p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie); |
9794 | } |
9795 | fts5DataRelease(pData); |
9796 | if( p->rc!=SQLITE_OK ){ |
9797 | fts5StructureRelease(pRet); |
9798 | pRet = 0; |
9799 | } |
9800 | } |
9801 | |
9802 | return pRet; |
9803 | } |
9804 | |
9805 | static i64 fts5IndexDataVersion(Fts5Index *p){ |
9806 | i64 iVersion = 0; |
9807 | |
9808 | if( p->rc==SQLITE_OK ){ |
9809 | if( p->pDataVersion==0 ){ |
9810 | p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion, |
9811 | sqlite3_mprintf("PRAGMA %Q.data_version" , p->pConfig->zDb) |
9812 | ); |
9813 | if( p->rc ) return 0; |
9814 | } |
9815 | |
9816 | if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){ |
9817 | iVersion = sqlite3_column_int64(p->pDataVersion, 0); |
9818 | } |
9819 | p->rc = sqlite3_reset(p->pDataVersion); |
9820 | } |
9821 | |
9822 | return iVersion; |
9823 | } |
9824 | |
9825 | /* |
9826 | ** Read, deserialize and return the structure record. |
9827 | ** |
9828 | ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array |
9829 | ** are over-allocated as described for function fts5StructureDecode() |
9830 | ** above. |
9831 | ** |
9832 | ** If an error occurs, NULL is returned and an error code left in the |
9833 | ** Fts5Index handle. If an error has already occurred when this function |
9834 | ** is called, it is a no-op. |
9835 | */ |
9836 | static Fts5Structure *fts5StructureRead(Fts5Index *p){ |
9837 | |
9838 | if( p->pStruct==0 ){ |
9839 | p->iStructVersion = fts5IndexDataVersion(p); |
9840 | if( p->rc==SQLITE_OK ){ |
9841 | p->pStruct = fts5StructureReadUncached(p); |
9842 | } |
9843 | } |
9844 | |
9845 | #if 0 |
9846 | else{ |
9847 | Fts5Structure *pTest = fts5StructureReadUncached(p); |
9848 | if( pTest ){ |
9849 | int i, j; |
9850 | assert_nc( p->pStruct->nSegment==pTest->nSegment ); |
9851 | assert_nc( p->pStruct->nLevel==pTest->nLevel ); |
9852 | for(i=0; i<pTest->nLevel; i++){ |
9853 | assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge ); |
9854 | assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg ); |
9855 | for(j=0; j<pTest->aLevel[i].nSeg; j++){ |
9856 | Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j]; |
9857 | Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j]; |
9858 | assert_nc( p1->iSegid==p2->iSegid ); |
9859 | assert_nc( p1->pgnoFirst==p2->pgnoFirst ); |
9860 | assert_nc( p1->pgnoLast==p2->pgnoLast ); |
9861 | } |
9862 | } |
9863 | fts5StructureRelease(pTest); |
9864 | } |
9865 | } |
9866 | #endif |
9867 | |
9868 | if( p->rc!=SQLITE_OK ) return 0; |
9869 | assert( p->iStructVersion!=0 ); |
9870 | assert( p->pStruct!=0 ); |
9871 | fts5StructureRef(p->pStruct); |
9872 | return p->pStruct; |
9873 | } |
9874 | |
9875 | static void fts5StructureInvalidate(Fts5Index *p){ |
9876 | if( p->pStruct ){ |
9877 | fts5StructureRelease(p->pStruct); |
9878 | p->pStruct = 0; |
9879 | } |
9880 | } |
9881 | |
9882 | /* |
9883 | ** Return the total number of segments in index structure pStruct. This |
9884 | ** function is only ever used as part of assert() conditions. |
9885 | */ |
9886 | #ifdef SQLITE_DEBUG |
9887 | static int fts5StructureCountSegments(Fts5Structure *pStruct){ |
9888 | int nSegment = 0; /* Total number of segments */ |
9889 | if( pStruct ){ |
9890 | int iLvl; /* Used to iterate through levels */ |
9891 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ |
9892 | nSegment += pStruct->aLevel[iLvl].nSeg; |
9893 | } |
9894 | } |
9895 | |
9896 | return nSegment; |
9897 | } |
9898 | #endif |
9899 | |
9900 | #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \ |
9901 | assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \ |
9902 | memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \ |
9903 | (pBuf)->n += nBlob; \ |
9904 | } |
9905 | |
9906 | #define fts5BufferSafeAppendVarint(pBuf, iVal) { \ |
9907 | (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \ |
9908 | assert( (pBuf)->nSpace>=(pBuf)->n ); \ |
9909 | } |
9910 | |
9911 | |
9912 | /* |
9913 | ** Serialize and store the "structure" record. |
9914 | ** |
9915 | ** If an error occurs, leave an error code in the Fts5Index object. If an |
9916 | ** error has already occurred, this function is a no-op. |
9917 | */ |
9918 | static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){ |
9919 | if( p->rc==SQLITE_OK ){ |
9920 | Fts5Buffer buf; /* Buffer to serialize record into */ |
9921 | int iLvl; /* Used to iterate through levels */ |
9922 | int iCookie; /* Cookie value to store */ |
9923 | |
9924 | assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); |
9925 | memset(&buf, 0, sizeof(Fts5Buffer)); |
9926 | |
9927 | /* Append the current configuration cookie */ |
9928 | iCookie = p->pConfig->iCookie; |
9929 | if( iCookie<0 ) iCookie = 0; |
9930 | |
9931 | if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, 4+9+9+9) ){ |
9932 | sqlite3Fts5Put32(buf.p, iCookie); |
9933 | buf.n = 4; |
9934 | fts5BufferSafeAppendVarint(&buf, pStruct->nLevel); |
9935 | fts5BufferSafeAppendVarint(&buf, pStruct->nSegment); |
9936 | fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter); |
9937 | } |
9938 | |
9939 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ |
9940 | int iSeg; /* Used to iterate through segments */ |
9941 | Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; |
9942 | fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge); |
9943 | fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg); |
9944 | assert( pLvl->nMerge<=pLvl->nSeg ); |
9945 | |
9946 | for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ |
9947 | fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid); |
9948 | fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst); |
9949 | fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast); |
9950 | } |
9951 | } |
9952 | |
9953 | fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n); |
9954 | fts5BufferFree(&buf); |
9955 | } |
9956 | } |
9957 | |
9958 | #if 0 |
9959 | static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*); |
9960 | static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){ |
9961 | int rc = SQLITE_OK; |
9962 | Fts5Buffer buf; |
9963 | memset(&buf, 0, sizeof(buf)); |
9964 | fts5DebugStructure(&rc, &buf, pStruct); |
9965 | fprintf(stdout, "%s: %s\n" , zCaption, buf.p); |
9966 | fflush(stdout); |
9967 | fts5BufferFree(&buf); |
9968 | } |
9969 | #else |
9970 | # define fts5PrintStructure(x,y) |
9971 | #endif |
9972 | |
9973 | static int fts5SegmentSize(Fts5StructureSegment *pSeg){ |
9974 | return 1 + pSeg->pgnoLast - pSeg->pgnoFirst; |
9975 | } |
9976 | |
9977 | /* |
9978 | ** Return a copy of index structure pStruct. Except, promote as many |
9979 | ** segments as possible to level iPromote. If an OOM occurs, NULL is |
9980 | ** returned. |
9981 | */ |
9982 | static void fts5StructurePromoteTo( |
9983 | Fts5Index *p, |
9984 | int iPromote, |
9985 | int szPromote, |
9986 | Fts5Structure *pStruct |
9987 | ){ |
9988 | int il, is; |
9989 | Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote]; |
9990 | |
9991 | if( pOut->nMerge==0 ){ |
9992 | for(il=iPromote+1; il<pStruct->nLevel; il++){ |
9993 | Fts5StructureLevel *pLvl = &pStruct->aLevel[il]; |
9994 | if( pLvl->nMerge ) return; |
9995 | for(is=pLvl->nSeg-1; is>=0; is--){ |
9996 | int sz = fts5SegmentSize(&pLvl->aSeg[is]); |
9997 | if( sz>szPromote ) return; |
9998 | fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1); |
9999 | if( p->rc ) return; |
10000 | memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment)); |
10001 | pOut->nSeg++; |
10002 | pLvl->nSeg--; |
10003 | } |
10004 | } |
10005 | } |
10006 | } |
10007 | |
10008 | /* |
10009 | ** A new segment has just been written to level iLvl of index structure |
10010 | ** pStruct. This function determines if any segments should be promoted |
10011 | ** as a result. Segments are promoted in two scenarios: |
10012 | ** |
10013 | ** a) If the segment just written is smaller than one or more segments |
10014 | ** within the previous populated level, it is promoted to the previous |
10015 | ** populated level. |
10016 | ** |
10017 | ** b) If the segment just written is larger than the newest segment on |
10018 | ** the next populated level, then that segment, and any other adjacent |
10019 | ** segments that are also smaller than the one just written, are |
10020 | ** promoted. |
10021 | ** |
10022 | ** If one or more segments are promoted, the structure object is updated |
10023 | ** to reflect this. |
10024 | */ |
10025 | static void fts5StructurePromote( |
10026 | Fts5Index *p, /* FTS5 backend object */ |
10027 | int iLvl, /* Index level just updated */ |
10028 | Fts5Structure *pStruct /* Index structure */ |
10029 | ){ |
10030 | if( p->rc==SQLITE_OK ){ |
10031 | int iTst; |
10032 | int iPromote = -1; |
10033 | int szPromote = 0; /* Promote anything this size or smaller */ |
10034 | Fts5StructureSegment *pSeg; /* Segment just written */ |
10035 | int szSeg; /* Size of segment just written */ |
10036 | int nSeg = pStruct->aLevel[iLvl].nSeg; |
10037 | |
10038 | if( nSeg==0 ) return; |
10039 | pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1]; |
10040 | szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst); |
10041 | |
10042 | /* Check for condition (a) */ |
10043 | for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--); |
10044 | if( iTst>=0 ){ |
10045 | int i; |
10046 | int szMax = 0; |
10047 | Fts5StructureLevel *pTst = &pStruct->aLevel[iTst]; |
10048 | assert( pTst->nMerge==0 ); |
10049 | for(i=0; i<pTst->nSeg; i++){ |
10050 | int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1; |
10051 | if( sz>szMax ) szMax = sz; |
10052 | } |
10053 | if( szMax>=szSeg ){ |
10054 | /* Condition (a) is true. Promote the newest segment on level |
10055 | ** iLvl to level iTst. */ |
10056 | iPromote = iTst; |
10057 | szPromote = szMax; |
10058 | } |
10059 | } |
10060 | |
10061 | /* If condition (a) is not met, assume (b) is true. StructurePromoteTo() |
10062 | ** is a no-op if it is not. */ |
10063 | if( iPromote<0 ){ |
10064 | iPromote = iLvl; |
10065 | szPromote = szSeg; |
10066 | } |
10067 | fts5StructurePromoteTo(p, iPromote, szPromote, pStruct); |
10068 | } |
10069 | } |
10070 | |
10071 | |
10072 | /* |
10073 | ** Advance the iterator passed as the only argument. If the end of the |
10074 | ** doclist-index page is reached, return non-zero. |
10075 | */ |
10076 | static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ |
10077 | Fts5Data *pData = pLvl->pData; |
10078 | |
10079 | if( pLvl->iOff==0 ){ |
10080 | assert( pLvl->bEof==0 ); |
10081 | pLvl->iOff = 1; |
10082 | pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno); |
10083 | pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid); |
10084 | pLvl->iFirstOff = pLvl->iOff; |
10085 | }else{ |
10086 | int iOff; |
10087 | for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){ |
10088 | if( pData->p[iOff] ) break; |
10089 | } |
10090 | |
10091 | if( iOff<pData->nn ){ |
10092 | i64 iVal; |
10093 | pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1; |
10094 | iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal); |
10095 | pLvl->iRowid += iVal; |
10096 | pLvl->iOff = iOff; |
10097 | }else{ |
10098 | pLvl->bEof = 1; |
10099 | } |
10100 | } |
10101 | |
10102 | return pLvl->bEof; |
10103 | } |
10104 | |
10105 | /* |
10106 | ** Advance the iterator passed as the only argument. |
10107 | */ |
10108 | static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ |
10109 | Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; |
10110 | |
10111 | assert( iLvl<pIter->nLvl ); |
10112 | if( fts5DlidxLvlNext(pLvl) ){ |
10113 | if( (iLvl+1) < pIter->nLvl ){ |
10114 | fts5DlidxIterNextR(p, pIter, iLvl+1); |
10115 | if( pLvl[1].bEof==0 ){ |
10116 | fts5DataRelease(pLvl->pData); |
10117 | memset(pLvl, 0, sizeof(Fts5DlidxLvl)); |
10118 | pLvl->pData = fts5DataRead(p, |
10119 | FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) |
10120 | ); |
10121 | if( pLvl->pData ) fts5DlidxLvlNext(pLvl); |
10122 | } |
10123 | } |
10124 | } |
10125 | |
10126 | return pIter->aLvl[0].bEof; |
10127 | } |
10128 | static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){ |
10129 | return fts5DlidxIterNextR(p, pIter, 0); |
10130 | } |
10131 | |
10132 | /* |
10133 | ** The iterator passed as the first argument has the following fields set |
10134 | ** as follows. This function sets up the rest of the iterator so that it |
10135 | ** points to the first rowid in the doclist-index. |
10136 | ** |
10137 | ** pData: |
10138 | ** pointer to doclist-index record, |
10139 | ** |
10140 | ** When this function is called pIter->iLeafPgno is the page number the |
10141 | ** doclist is associated with (the one featuring the term). |
10142 | */ |
10143 | static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ |
10144 | int i; |
10145 | for(i=0; i<pIter->nLvl; i++){ |
10146 | fts5DlidxLvlNext(&pIter->aLvl[i]); |
10147 | } |
10148 | return pIter->aLvl[0].bEof; |
10149 | } |
10150 | |
10151 | |
10152 | static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ |
10153 | return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof; |
10154 | } |
10155 | |
10156 | static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){ |
10157 | int i; |
10158 | |
10159 | /* Advance each level to the last entry on the last page */ |
10160 | for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){ |
10161 | Fts5DlidxLvl *pLvl = &pIter->aLvl[i]; |
10162 | while( fts5DlidxLvlNext(pLvl)==0 ); |
10163 | pLvl->bEof = 0; |
10164 | |
10165 | if( i>0 ){ |
10166 | Fts5DlidxLvl *pChild = &pLvl[-1]; |
10167 | fts5DataRelease(pChild->pData); |
10168 | memset(pChild, 0, sizeof(Fts5DlidxLvl)); |
10169 | pChild->pData = fts5DataRead(p, |
10170 | FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno) |
10171 | ); |
10172 | } |
10173 | } |
10174 | } |
10175 | |
10176 | /* |
10177 | ** Move the iterator passed as the only argument to the previous entry. |
10178 | */ |
10179 | static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){ |
10180 | int iOff = pLvl->iOff; |
10181 | |
10182 | assert( pLvl->bEof==0 ); |
10183 | if( iOff<=pLvl->iFirstOff ){ |
10184 | pLvl->bEof = 1; |
10185 | }else{ |
10186 | u8 *a = pLvl->pData->p; |
10187 | i64 iVal; |
10188 | int iLimit; |
10189 | int ii; |
10190 | int nZero = 0; |
10191 | |
10192 | /* Currently iOff points to the first byte of a varint. This block |
10193 | ** decrements iOff until it points to the first byte of the previous |
10194 | ** varint. Taking care not to read any memory locations that occur |
10195 | ** before the buffer in memory. */ |
10196 | iLimit = (iOff>9 ? iOff-9 : 0); |
10197 | for(iOff--; iOff>iLimit; iOff--){ |
10198 | if( (a[iOff-1] & 0x80)==0 ) break; |
10199 | } |
10200 | |
10201 | fts5GetVarint(&a[iOff], (u64*)&iVal); |
10202 | pLvl->iRowid -= iVal; |
10203 | pLvl->iLeafPgno--; |
10204 | |
10205 | /* Skip backwards past any 0x00 varints. */ |
10206 | for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){ |
10207 | nZero++; |
10208 | } |
10209 | if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){ |
10210 | /* The byte immediately before the last 0x00 byte has the 0x80 bit |
10211 | ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80 |
10212 | ** bytes before a[ii]. */ |
10213 | int bZero = 0; /* True if last 0x00 counts */ |
10214 | if( (ii-8)>=pLvl->iFirstOff ){ |
10215 | int j; |
10216 | for(j=1; j<=8 && (a[ii-j] & 0x80); j++); |
10217 | bZero = (j>8); |
10218 | } |
10219 | if( bZero==0 ) nZero--; |
10220 | } |
10221 | pLvl->iLeafPgno -= nZero; |
10222 | pLvl->iOff = iOff - nZero; |
10223 | } |
10224 | |
10225 | return pLvl->bEof; |
10226 | } |
10227 | |
10228 | static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ |
10229 | Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; |
10230 | |
10231 | assert( iLvl<pIter->nLvl ); |
10232 | if( fts5DlidxLvlPrev(pLvl) ){ |
10233 | if( (iLvl+1) < pIter->nLvl ){ |
10234 | fts5DlidxIterPrevR(p, pIter, iLvl+1); |
10235 | if( pLvl[1].bEof==0 ){ |
10236 | fts5DataRelease(pLvl->pData); |
10237 | memset(pLvl, 0, sizeof(Fts5DlidxLvl)); |
10238 | pLvl->pData = fts5DataRead(p, |
10239 | FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) |
10240 | ); |
10241 | if( pLvl->pData ){ |
10242 | while( fts5DlidxLvlNext(pLvl)==0 ); |
10243 | pLvl->bEof = 0; |
10244 | } |
10245 | } |
10246 | } |
10247 | } |
10248 | |
10249 | return pIter->aLvl[0].bEof; |
10250 | } |
10251 | static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){ |
10252 | return fts5DlidxIterPrevR(p, pIter, 0); |
10253 | } |
10254 | |
10255 | /* |
10256 | ** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). |
10257 | */ |
10258 | static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ |
10259 | if( pIter ){ |
10260 | int i; |
10261 | for(i=0; i<pIter->nLvl; i++){ |
10262 | fts5DataRelease(pIter->aLvl[i].pData); |
10263 | } |
10264 | sqlite3_free(pIter); |
10265 | } |
10266 | } |
10267 | |
10268 | static Fts5DlidxIter *fts5DlidxIterInit( |
10269 | Fts5Index *p, /* Fts5 Backend to iterate within */ |
10270 | int bRev, /* True for ORDER BY ASC */ |
10271 | int iSegid, /* Segment id */ |
10272 | int iLeafPg /* Leaf page number to load dlidx for */ |
10273 | ){ |
10274 | Fts5DlidxIter *pIter = 0; |
10275 | int i; |
10276 | int bDone = 0; |
10277 | |
10278 | for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ |
10279 | sqlite3_int64 nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl); |
10280 | Fts5DlidxIter *pNew; |
10281 | |
10282 | pNew = (Fts5DlidxIter*)sqlite3_realloc64(pIter, nByte); |
10283 | if( pNew==0 ){ |
10284 | p->rc = SQLITE_NOMEM; |
10285 | }else{ |
10286 | i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg); |
10287 | Fts5DlidxLvl *pLvl = &pNew->aLvl[i]; |
10288 | pIter = pNew; |
10289 | memset(pLvl, 0, sizeof(Fts5DlidxLvl)); |
10290 | pLvl->pData = fts5DataRead(p, iRowid); |
10291 | if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){ |
10292 | bDone = 1; |
10293 | } |
10294 | pIter->nLvl = i+1; |
10295 | } |
10296 | } |
10297 | |
10298 | if( p->rc==SQLITE_OK ){ |
10299 | pIter->iSegid = iSegid; |
10300 | if( bRev==0 ){ |
10301 | fts5DlidxIterFirst(pIter); |
10302 | }else{ |
10303 | fts5DlidxIterLast(p, pIter); |
10304 | } |
10305 | } |
10306 | |
10307 | if( p->rc!=SQLITE_OK ){ |
10308 | fts5DlidxIterFree(pIter); |
10309 | pIter = 0; |
10310 | } |
10311 | |
10312 | return pIter; |
10313 | } |
10314 | |
10315 | static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){ |
10316 | return pIter->aLvl[0].iRowid; |
10317 | } |
10318 | static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){ |
10319 | return pIter->aLvl[0].iLeafPgno; |
10320 | } |
10321 | |
10322 | /* |
10323 | ** Load the next leaf page into the segment iterator. |
10324 | */ |
10325 | static void fts5SegIterNextPage( |
10326 | Fts5Index *p, /* FTS5 backend object */ |
10327 | Fts5SegIter *pIter /* Iterator to advance to next page */ |
10328 | ){ |
10329 | Fts5Data *pLeaf; |
10330 | Fts5StructureSegment *pSeg = pIter->pSeg; |
10331 | fts5DataRelease(pIter->pLeaf); |
10332 | pIter->iLeafPgno++; |
10333 | if( pIter->pNextLeaf ){ |
10334 | pIter->pLeaf = pIter->pNextLeaf; |
10335 | pIter->pNextLeaf = 0; |
10336 | }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){ |
10337 | pIter->pLeaf = fts5LeafRead(p, |
10338 | FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno) |
10339 | ); |
10340 | }else{ |
10341 | pIter->pLeaf = 0; |
10342 | } |
10343 | pLeaf = pIter->pLeaf; |
10344 | |
10345 | if( pLeaf ){ |
10346 | pIter->iPgidxOff = pLeaf->szLeaf; |
10347 | if( fts5LeafIsTermless(pLeaf) ){ |
10348 | pIter->iEndofDoclist = pLeaf->nn+1; |
10349 | }else{ |
10350 | pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff], |
10351 | pIter->iEndofDoclist |
10352 | ); |
10353 | } |
10354 | } |
10355 | } |
10356 | |
10357 | /* |
10358 | ** Argument p points to a buffer containing a varint to be interpreted as a |
10359 | ** position list size field. Read the varint and return the number of bytes |
10360 | ** read. Before returning, set *pnSz to the number of bytes in the position |
10361 | ** list, and *pbDel to true if the delete flag is set, or false otherwise. |
10362 | */ |
10363 | static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){ |
10364 | int nSz; |
10365 | int n = 0; |
10366 | fts5FastGetVarint32(p, n, nSz); |
10367 | assert_nc( nSz>=0 ); |
10368 | *pnSz = nSz/2; |
10369 | *pbDel = nSz & 0x0001; |
10370 | return n; |
10371 | } |
10372 | |
10373 | /* |
10374 | ** Fts5SegIter.iLeafOffset currently points to the first byte of a |
10375 | ** position-list size field. Read the value of the field and store it |
10376 | ** in the following variables: |
10377 | ** |
10378 | ** Fts5SegIter.nPos |
10379 | ** Fts5SegIter.bDel |
10380 | ** |
10381 | ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the |
10382 | ** position list content (if any). |
10383 | */ |
10384 | static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ |
10385 | if( p->rc==SQLITE_OK ){ |
10386 | int iOff = pIter->iLeafOffset; /* Offset to read at */ |
10387 | ASSERT_SZLEAF_OK(pIter->pLeaf); |
10388 | if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ |
10389 | int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf); |
10390 | pIter->bDel = 0; |
10391 | pIter->nPos = 1; |
10392 | if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){ |
10393 | pIter->bDel = 1; |
10394 | iOff++; |
10395 | if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){ |
10396 | pIter->nPos = 1; |
10397 | iOff++; |
10398 | }else{ |
10399 | pIter->nPos = 0; |
10400 | } |
10401 | } |
10402 | }else{ |
10403 | int nSz; |
10404 | fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz); |
10405 | pIter->bDel = (nSz & 0x0001); |
10406 | pIter->nPos = nSz>>1; |
10407 | assert_nc( pIter->nPos>=0 ); |
10408 | } |
10409 | pIter->iLeafOffset = iOff; |
10410 | } |
10411 | } |
10412 | |
10413 | static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){ |
10414 | u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ |
10415 | i64 iOff = pIter->iLeafOffset; |
10416 | |
10417 | ASSERT_SZLEAF_OK(pIter->pLeaf); |
10418 | if( iOff>=pIter->pLeaf->szLeaf ){ |
10419 | fts5SegIterNextPage(p, pIter); |
10420 | if( pIter->pLeaf==0 ){ |
10421 | if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; |
10422 | return; |
10423 | } |
10424 | iOff = 4; |
10425 | a = pIter->pLeaf->p; |
10426 | } |
10427 | iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); |
10428 | pIter->iLeafOffset = iOff; |
10429 | } |
10430 | |
10431 | /* |
10432 | ** Fts5SegIter.iLeafOffset currently points to the first byte of the |
10433 | ** "nSuffix" field of a term. Function parameter nKeep contains the value |
10434 | ** of the "nPrefix" field (if there was one - it is passed 0 if this is |
10435 | ** the first term in the segment). |
10436 | ** |
10437 | ** This function populates: |
10438 | ** |
10439 | ** Fts5SegIter.term |
10440 | ** Fts5SegIter.rowid |
10441 | ** |
10442 | ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of |
10443 | ** the first position list. The position list belonging to document |
10444 | ** (Fts5SegIter.iRowid). |
10445 | */ |
10446 | static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ |
10447 | u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ |
10448 | i64 iOff = pIter->iLeafOffset; /* Offset to read at */ |
10449 | int nNew; /* Bytes of new data */ |
10450 | |
10451 | iOff += fts5GetVarint32(&a[iOff], nNew); |
10452 | if( iOff+nNew>pIter->pLeaf->szLeaf || nKeep>pIter->term.n || nNew==0 ){ |
10453 | p->rc = FTS5_CORRUPT; |
10454 | return; |
10455 | } |
10456 | pIter->term.n = nKeep; |
10457 | fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); |
10458 | assert( pIter->term.n<=pIter->term.nSpace ); |
10459 | iOff += nNew; |
10460 | pIter->iTermLeafOffset = iOff; |
10461 | pIter->iTermLeafPgno = pIter->iLeafPgno; |
10462 | pIter->iLeafOffset = iOff; |
10463 | |
10464 | if( pIter->iPgidxOff>=pIter->pLeaf->nn ){ |
10465 | pIter->iEndofDoclist = pIter->pLeaf->nn+1; |
10466 | }else{ |
10467 | int nExtra; |
10468 | pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra); |
10469 | pIter->iEndofDoclist += nExtra; |
10470 | } |
10471 | |
10472 | fts5SegIterLoadRowid(p, pIter); |
10473 | } |
10474 | |
10475 | static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*); |
10476 | static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*); |
10477 | static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*); |
10478 | |
10479 | static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){ |
10480 | if( pIter->flags & FTS5_SEGITER_REVERSE ){ |
10481 | pIter->xNext = fts5SegIterNext_Reverse; |
10482 | }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ |
10483 | pIter->xNext = fts5SegIterNext_None; |
10484 | }else{ |
10485 | pIter->xNext = fts5SegIterNext; |
10486 | } |
10487 | } |
10488 | |
10489 | /* |
10490 | ** Initialize the iterator object pIter to iterate through the entries in |
10491 | ** segment pSeg. The iterator is left pointing to the first entry when |
10492 | ** this function returns. |
10493 | ** |
10494 | ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If |
10495 | ** an error has already occurred when this function is called, it is a no-op. |
10496 | */ |
10497 | static void fts5SegIterInit( |
10498 | Fts5Index *p, /* FTS index object */ |
10499 | Fts5StructureSegment *pSeg, /* Description of segment */ |
10500 | Fts5SegIter *pIter /* Object to populate */ |
10501 | ){ |
10502 | if( pSeg->pgnoFirst==0 ){ |
10503 | /* This happens if the segment is being used as an input to an incremental |
10504 | ** merge and all data has already been "trimmed". See function |
10505 | ** fts5TrimSegments() for details. In this case leave the iterator empty. |
10506 | ** The caller will see the (pIter->pLeaf==0) and assume the iterator is |
10507 | ** at EOF already. */ |
10508 | assert( pIter->pLeaf==0 ); |
10509 | return; |
10510 | } |
10511 | |
10512 | if( p->rc==SQLITE_OK ){ |
10513 | memset(pIter, 0, sizeof(*pIter)); |
10514 | fts5SegIterSetNext(p, pIter); |
10515 | pIter->pSeg = pSeg; |
10516 | pIter->iLeafPgno = pSeg->pgnoFirst-1; |
10517 | fts5SegIterNextPage(p, pIter); |
10518 | } |
10519 | |
10520 | if( p->rc==SQLITE_OK ){ |
10521 | pIter->iLeafOffset = 4; |
10522 | assert( pIter->pLeaf!=0 ); |
10523 | assert_nc( pIter->pLeaf->nn>4 ); |
10524 | assert_nc( fts5LeafFirstTermOff(pIter->pLeaf)==4 ); |
10525 | pIter->iPgidxOff = pIter->pLeaf->szLeaf+1; |
10526 | fts5SegIterLoadTerm(p, pIter, 0); |
10527 | fts5SegIterLoadNPos(p, pIter); |
10528 | } |
10529 | } |
10530 | |
10531 | /* |
10532 | ** This function is only ever called on iterators created by calls to |
10533 | ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set. |
10534 | ** |
10535 | ** The iterator is in an unusual state when this function is called: the |
10536 | ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of |
10537 | ** the position-list size field for the first relevant rowid on the page. |
10538 | ** Fts5SegIter.rowid is set, but nPos and bDel are not. |
10539 | ** |
10540 | ** This function advances the iterator so that it points to the last |
10541 | ** relevant rowid on the page and, if necessary, initializes the |
10542 | ** aRowidOffset[] and iRowidOffset variables. At this point the iterator |
10543 | ** is in its regular state - Fts5SegIter.iLeafOffset points to the first |
10544 | ** byte of the position list content associated with said rowid. |
10545 | */ |
10546 | static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ |
10547 | int eDetail = p->pConfig->eDetail; |
10548 | int n = pIter->pLeaf->szLeaf; |
10549 | int i = pIter->iLeafOffset; |
10550 | u8 *a = pIter->pLeaf->p; |
10551 | int iRowidOffset = 0; |
10552 | |
10553 | if( n>pIter->iEndofDoclist ){ |
10554 | n = pIter->iEndofDoclist; |
10555 | } |
10556 | |
10557 | ASSERT_SZLEAF_OK(pIter->pLeaf); |
10558 | while( 1 ){ |
10559 | u64 iDelta = 0; |
10560 | |
10561 | if( eDetail==FTS5_DETAIL_NONE ){ |
10562 | /* todo */ |
10563 | if( i<n && a[i]==0 ){ |
10564 | i++; |
10565 | if( i<n && a[i]==0 ) i++; |
10566 | } |
10567 | }else{ |
10568 | int nPos; |
10569 | int bDummy; |
10570 | i += fts5GetPoslistSize(&a[i], &nPos, &bDummy); |
10571 | i += nPos; |
10572 | } |
10573 | if( i>=n ) break; |
10574 | i += fts5GetVarint(&a[i], &iDelta); |
10575 | pIter->iRowid += iDelta; |
10576 | |
10577 | /* If necessary, grow the pIter->aRowidOffset[] array. */ |
10578 | if( iRowidOffset>=pIter->nRowidOffset ){ |
10579 | int nNew = pIter->nRowidOffset + 8; |
10580 | int *aNew = (int*)sqlite3_realloc64(pIter->aRowidOffset,nNew*sizeof(int)); |
10581 | if( aNew==0 ){ |
10582 | p->rc = SQLITE_NOMEM; |
10583 | break; |
10584 | } |
10585 | pIter->aRowidOffset = aNew; |
10586 | pIter->nRowidOffset = nNew; |
10587 | } |
10588 | |
10589 | pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset; |
10590 | pIter->iLeafOffset = i; |
10591 | } |
10592 | pIter->iRowidOffset = iRowidOffset; |
10593 | fts5SegIterLoadNPos(p, pIter); |
10594 | } |
10595 | |
10596 | /* |
10597 | ** |
10598 | */ |
10599 | static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ |
10600 | assert( pIter->flags & FTS5_SEGITER_REVERSE ); |
10601 | assert( pIter->flags & FTS5_SEGITER_ONETERM ); |
10602 | |
10603 | fts5DataRelease(pIter->pLeaf); |
10604 | pIter->pLeaf = 0; |
10605 | while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){ |
10606 | Fts5Data *pNew; |
10607 | pIter->iLeafPgno--; |
10608 | pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( |
10609 | pIter->pSeg->iSegid, pIter->iLeafPgno |
10610 | )); |
10611 | if( pNew ){ |
10612 | /* iTermLeafOffset may be equal to szLeaf if the term is the last |
10613 | ** thing on the page - i.e. the first rowid is on the following page. |
10614 | ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */ |
10615 | if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ |
10616 | assert( pIter->pLeaf==0 ); |
10617 | if( pIter->iTermLeafOffset<pNew->szLeaf ){ |
10618 | pIter->pLeaf = pNew; |
10619 | pIter->iLeafOffset = pIter->iTermLeafOffset; |
10620 | } |
10621 | }else{ |
10622 | int iRowidOff; |
10623 | iRowidOff = fts5LeafFirstRowidOff(pNew); |
10624 | if( iRowidOff ){ |
10625 | if( iRowidOff>=pNew->szLeaf ){ |
10626 | p->rc = FTS5_CORRUPT; |
10627 | }else{ |
10628 | pIter->pLeaf = pNew; |
10629 | pIter->iLeafOffset = iRowidOff; |
10630 | } |
10631 | } |
10632 | } |
10633 | |
10634 | if( pIter->pLeaf ){ |
10635 | u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; |
10636 | pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid); |
10637 | break; |
10638 | }else{ |
10639 | fts5DataRelease(pNew); |
10640 | } |
10641 | } |
10642 | } |
10643 | |
10644 | if( pIter->pLeaf ){ |
10645 | pIter->iEndofDoclist = pIter->pLeaf->nn+1; |
10646 | fts5SegIterReverseInitPage(p, pIter); |
10647 | } |
10648 | } |
10649 | |
10650 | /* |
10651 | ** Return true if the iterator passed as the second argument currently |
10652 | ** points to a delete marker. A delete marker is an entry with a 0 byte |
10653 | ** position-list. |
10654 | */ |
10655 | static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){ |
10656 | Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; |
10657 | return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0); |
10658 | } |
10659 | |
10660 | /* |
10661 | ** Advance iterator pIter to the next entry. |
10662 | ** |
10663 | ** This version of fts5SegIterNext() is only used by reverse iterators. |
10664 | */ |
10665 | static void fts5SegIterNext_Reverse( |
10666 | Fts5Index *p, /* FTS5 backend object */ |
10667 | Fts5SegIter *pIter, /* Iterator to advance */ |
10668 | int *pbUnused /* Unused */ |
10669 | ){ |
10670 | assert( pIter->flags & FTS5_SEGITER_REVERSE ); |
10671 | assert( pIter->pNextLeaf==0 ); |
10672 | UNUSED_PARAM(pbUnused); |
10673 | |
10674 | if( pIter->iRowidOffset>0 ){ |
10675 | u8 *a = pIter->pLeaf->p; |
10676 | int iOff; |
10677 | u64 iDelta; |
10678 | |
10679 | pIter->iRowidOffset--; |
10680 | pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset]; |
10681 | fts5SegIterLoadNPos(p, pIter); |
10682 | iOff = pIter->iLeafOffset; |
10683 | if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){ |
10684 | iOff += pIter->nPos; |
10685 | } |
10686 | fts5GetVarint(&a[iOff], &iDelta); |
10687 | pIter->iRowid -= iDelta; |
10688 | }else{ |
10689 | fts5SegIterReverseNewPage(p, pIter); |
10690 | } |
10691 | } |
10692 | |
10693 | /* |
10694 | ** Advance iterator pIter to the next entry. |
10695 | ** |
10696 | ** This version of fts5SegIterNext() is only used if detail=none and the |
10697 | ** iterator is not a reverse direction iterator. |
10698 | */ |
10699 | static void fts5SegIterNext_None( |
10700 | Fts5Index *p, /* FTS5 backend object */ |
10701 | Fts5SegIter *pIter, /* Iterator to advance */ |
10702 | int *pbNewTerm /* OUT: Set for new term */ |
10703 | ){ |
10704 | int iOff; |
10705 | |
10706 | assert( p->rc==SQLITE_OK ); |
10707 | assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 ); |
10708 | assert( p->pConfig->eDetail==FTS5_DETAIL_NONE ); |
10709 | |
10710 | ASSERT_SZLEAF_OK(pIter->pLeaf); |
10711 | iOff = pIter->iLeafOffset; |
10712 | |
10713 | /* Next entry is on the next page */ |
10714 | if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){ |
10715 | fts5SegIterNextPage(p, pIter); |
10716 | if( p->rc || pIter->pLeaf==0 ) return; |
10717 | pIter->iRowid = 0; |
10718 | iOff = 4; |
10719 | } |
10720 | |
10721 | if( iOff<pIter->iEndofDoclist ){ |
10722 | /* Next entry is on the current page */ |
10723 | i64 iDelta; |
10724 | iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta); |
10725 | pIter->iLeafOffset = iOff; |
10726 | pIter->iRowid += iDelta; |
10727 | }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){ |
10728 | if( pIter->pSeg ){ |
10729 | int nKeep = 0; |
10730 | if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){ |
10731 | iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep); |
10732 | } |
10733 | pIter->iLeafOffset = iOff; |
10734 | fts5SegIterLoadTerm(p, pIter, nKeep); |
10735 | }else{ |
10736 | const u8 *pList = 0; |
10737 | const char *zTerm = 0; |
10738 | int nList; |
10739 | sqlite3Fts5HashScanNext(p->pHash); |
10740 | sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); |
10741 | if( pList==0 ) goto next_none_eof; |
10742 | pIter->pLeaf->p = (u8*)pList; |
10743 | pIter->pLeaf->nn = nList; |
10744 | pIter->pLeaf->szLeaf = nList; |
10745 | pIter->iEndofDoclist = nList; |
10746 | sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm); |
10747 | pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); |
10748 | } |
10749 | |
10750 | if( pbNewTerm ) *pbNewTerm = 1; |
10751 | }else{ |
10752 | goto next_none_eof; |
10753 | } |
10754 | |
10755 | fts5SegIterLoadNPos(p, pIter); |
10756 | |
10757 | return; |
10758 | next_none_eof: |
10759 | fts5DataRelease(pIter->pLeaf); |
10760 | pIter->pLeaf = 0; |
10761 | } |
10762 | |
10763 | |
10764 | /* |
10765 | ** Advance iterator pIter to the next entry. |
10766 | ** |
10767 | ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It |
10768 | ** is not considered an error if the iterator reaches EOF. If an error has |
10769 | ** already occurred when this function is called, it is a no-op. |
10770 | */ |
10771 | static void fts5SegIterNext( |
10772 | Fts5Index *p, /* FTS5 backend object */ |
10773 | Fts5SegIter *pIter, /* Iterator to advance */ |
10774 | int *pbNewTerm /* OUT: Set for new term */ |
10775 | ){ |
10776 | Fts5Data *pLeaf = pIter->pLeaf; |
10777 | int iOff; |
10778 | int bNewTerm = 0; |
10779 | int nKeep = 0; |
10780 | u8 *a; |
10781 | int n; |
10782 | |
10783 | assert( pbNewTerm==0 || *pbNewTerm==0 ); |
10784 | assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE ); |
10785 | |
10786 | /* Search for the end of the position list within the current page. */ |
10787 | a = pLeaf->p; |
10788 | n = pLeaf->szLeaf; |
10789 | |
10790 | ASSERT_SZLEAF_OK(pLeaf); |
10791 | iOff = pIter->iLeafOffset + pIter->nPos; |
10792 | |
10793 | if( iOff<n ){ |
10794 | /* The next entry is on the current page. */ |
10795 | assert_nc( iOff<=pIter->iEndofDoclist ); |
10796 | if( iOff>=pIter->iEndofDoclist ){ |
10797 | bNewTerm = 1; |
10798 | if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ |
10799 | iOff += fts5GetVarint32(&a[iOff], nKeep); |
10800 | } |
10801 | }else{ |
10802 | u64 iDelta; |
10803 | iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta); |
10804 | pIter->iRowid += iDelta; |
10805 | assert_nc( iDelta>0 ); |
10806 | } |
10807 | pIter->iLeafOffset = iOff; |
10808 | |
10809 | }else if( pIter->pSeg==0 ){ |
10810 | const u8 *pList = 0; |
10811 | const char *zTerm = 0; |
10812 | int nList = 0; |
10813 | assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm ); |
10814 | if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){ |
10815 | sqlite3Fts5HashScanNext(p->pHash); |
10816 | sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); |
10817 | } |
10818 | if( pList==0 ){ |
10819 | fts5DataRelease(pIter->pLeaf); |
10820 | pIter->pLeaf = 0; |
10821 | }else{ |
10822 | pIter->pLeaf->p = (u8*)pList; |
10823 | pIter->pLeaf->nn = nList; |
10824 | pIter->pLeaf->szLeaf = nList; |
10825 | pIter->iEndofDoclist = nList+1; |
10826 | sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm), |
10827 | (u8*)zTerm); |
10828 | pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); |
10829 | *pbNewTerm = 1; |
10830 | } |
10831 | }else{ |
10832 | iOff = 0; |
10833 | /* Next entry is not on the current page */ |
10834 | while( iOff==0 ){ |
10835 | fts5SegIterNextPage(p, pIter); |
10836 | pLeaf = pIter->pLeaf; |
10837 | if( pLeaf==0 ) break; |
10838 | ASSERT_SZLEAF_OK(pLeaf); |
10839 | if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){ |
10840 | iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); |
10841 | pIter->iLeafOffset = iOff; |
10842 | |
10843 | if( pLeaf->nn>pLeaf->szLeaf ){ |
10844 | pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( |
10845 | &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist |
10846 | ); |
10847 | } |
10848 | } |
10849 | else if( pLeaf->nn>pLeaf->szLeaf ){ |
10850 | pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( |
10851 | &pLeaf->p[pLeaf->szLeaf], iOff |
10852 | ); |
10853 | pIter->iLeafOffset = iOff; |
10854 | pIter->iEndofDoclist = iOff; |
10855 | bNewTerm = 1; |
10856 | } |
10857 | assert_nc( iOff<pLeaf->szLeaf ); |
10858 | if( iOff>pLeaf->szLeaf ){ |
10859 | p->rc = FTS5_CORRUPT; |
10860 | return; |
10861 | } |
10862 | } |
10863 | } |
10864 | |
10865 | /* Check if the iterator is now at EOF. If so, return early. */ |
10866 | if( pIter->pLeaf ){ |
10867 | if( bNewTerm ){ |
10868 | if( pIter->flags & FTS5_SEGITER_ONETERM ){ |
10869 | fts5DataRelease(pIter->pLeaf); |
10870 | pIter->pLeaf = 0; |
10871 | }else{ |
10872 | fts5SegIterLoadTerm(p, pIter, nKeep); |
10873 | fts5SegIterLoadNPos(p, pIter); |
10874 | if( pbNewTerm ) *pbNewTerm = 1; |
10875 | } |
10876 | }else{ |
10877 | /* The following could be done by calling fts5SegIterLoadNPos(). But |
10878 | ** this block is particularly performance critical, so equivalent |
10879 | ** code is inlined. */ |
10880 | int nSz; |
10881 | assert_nc( pIter->iLeafOffset<=pIter->pLeaf->nn ); |
10882 | fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz); |
10883 | pIter->bDel = (nSz & 0x0001); |
10884 | pIter->nPos = nSz>>1; |
10885 | assert_nc( pIter->nPos>=0 ); |
10886 | } |
10887 | } |
10888 | } |
10889 | |
10890 | #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; } |
10891 | |
10892 | #define fts5IndexSkipVarint(a, iOff) { \ |
10893 | int iEnd = iOff+9; \ |
10894 | while( (a[iOff++] & 0x80) && iOff<iEnd ); \ |
10895 | } |
10896 | |
10897 | /* |
10898 | ** Iterator pIter currently points to the first rowid in a doclist. This |
10899 | ** function sets the iterator up so that iterates in reverse order through |
10900 | ** the doclist. |
10901 | */ |
10902 | static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ |
10903 | Fts5DlidxIter *pDlidx = pIter->pDlidx; |
10904 | Fts5Data *pLast = 0; |
10905 | int pgnoLast = 0; |
10906 | |
10907 | if( pDlidx ){ |
10908 | int iSegid = pIter->pSeg->iSegid; |
10909 | pgnoLast = fts5DlidxIterPgno(pDlidx); |
10910 | pLast = fts5LeafRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast)); |
10911 | }else{ |
10912 | Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ |
10913 | |
10914 | /* Currently, Fts5SegIter.iLeafOffset points to the first byte of |
10915 | ** position-list content for the current rowid. Back it up so that it |
10916 | ** points to the start of the position-list size field. */ |
10917 | int iPoslist; |
10918 | if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ |
10919 | iPoslist = pIter->iTermLeafOffset; |
10920 | }else{ |
10921 | iPoslist = 4; |
10922 | } |
10923 | fts5IndexSkipVarint(pLeaf->p, iPoslist); |
10924 | pIter->iLeafOffset = iPoslist; |
10925 | |
10926 | /* If this condition is true then the largest rowid for the current |
10927 | ** term may not be stored on the current page. So search forward to |
10928 | ** see where said rowid really is. */ |
10929 | if( pIter->iEndofDoclist>=pLeaf->szLeaf ){ |
10930 | int pgno; |
10931 | Fts5StructureSegment *pSeg = pIter->pSeg; |
10932 | |
10933 | /* The last rowid in the doclist may not be on the current page. Search |
10934 | ** forward to find the page containing the last rowid. */ |
10935 | for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){ |
10936 | i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno); |
10937 | Fts5Data *pNew = fts5LeafRead(p, iAbs); |
10938 | if( pNew ){ |
10939 | int iRowid, bTermless; |
10940 | iRowid = fts5LeafFirstRowidOff(pNew); |
10941 | bTermless = fts5LeafIsTermless(pNew); |
10942 | if( iRowid ){ |
10943 | SWAPVAL(Fts5Data*, pNew, pLast); |
10944 | pgnoLast = pgno; |
10945 | } |
10946 | fts5DataRelease(pNew); |
10947 | if( bTermless==0 ) break; |
10948 | } |
10949 | } |
10950 | } |
10951 | } |
10952 | |
10953 | /* If pLast is NULL at this point, then the last rowid for this doclist |
10954 | ** lies on the page currently indicated by the iterator. In this case |
10955 | ** pIter->iLeafOffset is already set to point to the position-list size |
10956 | ** field associated with the first relevant rowid on the page. |
10957 | ** |
10958 | ** Or, if pLast is non-NULL, then it is the page that contains the last |
10959 | ** rowid. In this case configure the iterator so that it points to the |
10960 | ** first rowid on this page. |
10961 | */ |
10962 | if( pLast ){ |
10963 | int iOff; |
10964 | fts5DataRelease(pIter->pLeaf); |
10965 | pIter->pLeaf = pLast; |
10966 | pIter->iLeafPgno = pgnoLast; |
10967 | iOff = fts5LeafFirstRowidOff(pLast); |
10968 | if( iOff>pLast->szLeaf ){ |
10969 | p->rc = FTS5_CORRUPT; |
10970 | return; |
10971 | } |
10972 | iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); |
10973 | pIter->iLeafOffset = iOff; |
10974 | |
10975 | if( fts5LeafIsTermless(pLast) ){ |
10976 | pIter->iEndofDoclist = pLast->nn+1; |
10977 | }else{ |
10978 | pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast); |
10979 | } |
10980 | } |
10981 | |
10982 | fts5SegIterReverseInitPage(p, pIter); |
10983 | } |
10984 | |
10985 | /* |
10986 | ** Iterator pIter currently points to the first rowid of a doclist. |
10987 | ** There is a doclist-index associated with the final term on the current |
10988 | ** page. If the current term is the last term on the page, load the |
10989 | ** doclist-index from disk and initialize an iterator at (pIter->pDlidx). |
10990 | */ |
10991 | static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){ |
10992 | int iSeg = pIter->pSeg->iSegid; |
10993 | int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); |
10994 | Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ |
10995 | |
10996 | assert( pIter->flags & FTS5_SEGITER_ONETERM ); |
10997 | assert( pIter->pDlidx==0 ); |
10998 | |
10999 | /* Check if the current doclist ends on this page. If it does, return |
11000 | ** early without loading the doclist-index (as it belongs to a different |
11001 | ** term. */ |
11002 | if( pIter->iTermLeafPgno==pIter->iLeafPgno |
11003 | && pIter->iEndofDoclist<pLeaf->szLeaf |
11004 | ){ |
11005 | return; |
11006 | } |
11007 | |
11008 | pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); |
11009 | } |
11010 | |
11011 | /* |
11012 | ** The iterator object passed as the second argument currently contains |
11013 | ** no valid values except for the Fts5SegIter.pLeaf member variable. This |
11014 | ** function searches the leaf page for a term matching (pTerm/nTerm). |
11015 | ** |
11016 | ** If the specified term is found on the page, then the iterator is left |
11017 | ** pointing to it. If argument bGe is zero and the term is not found, |
11018 | ** the iterator is left pointing at EOF. |
11019 | ** |
11020 | ** If bGe is non-zero and the specified term is not found, then the |
11021 | ** iterator is left pointing to the smallest term in the segment that |
11022 | ** is larger than the specified term, even if this term is not on the |
11023 | ** current page. |
11024 | */ |
11025 | static void fts5LeafSeek( |
11026 | Fts5Index *p, /* Leave any error code here */ |
11027 | int bGe, /* True for a >= search */ |
11028 | Fts5SegIter *pIter, /* Iterator to seek */ |
11029 | const u8 *pTerm, int nTerm /* Term to search for */ |
11030 | ){ |
11031 | u32 iOff; |
11032 | const u8 *a = pIter->pLeaf->p; |
11033 | u32 n = (u32)pIter->pLeaf->nn; |
11034 | |
11035 | u32 nMatch = 0; |
11036 | u32 nKeep = 0; |
11037 | u32 nNew = 0; |
11038 | u32 iTermOff; |
11039 | u32 iPgidx; /* Current offset in pgidx */ |
11040 | int bEndOfPage = 0; |
11041 | |
11042 | assert( p->rc==SQLITE_OK ); |
11043 | |
11044 | iPgidx = (u32)pIter->pLeaf->szLeaf; |
11045 | iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff); |
11046 | iOff = iTermOff; |
11047 | if( iOff>n ){ |
11048 | p->rc = FTS5_CORRUPT; |
11049 | return; |
11050 | } |
11051 | |
11052 | while( 1 ){ |
11053 | |
11054 | /* Figure out how many new bytes are in this term */ |
11055 | fts5FastGetVarint32(a, iOff, nNew); |
11056 | if( nKeep<nMatch ){ |
11057 | goto search_failed; |
11058 | } |
11059 | |
11060 | assert( nKeep>=nMatch ); |
11061 | if( nKeep==nMatch ){ |
11062 | u32 nCmp; |
11063 | u32 i; |
11064 | nCmp = (u32)MIN(nNew, nTerm-nMatch); |
11065 | for(i=0; i<nCmp; i++){ |
11066 | if( a[iOff+i]!=pTerm[nMatch+i] ) break; |
11067 | } |
11068 | nMatch += i; |
11069 | |
11070 | if( (u32)nTerm==nMatch ){ |
11071 | if( i==nNew ){ |
11072 | goto search_success; |
11073 | }else{ |
11074 | goto search_failed; |
11075 | } |
11076 | }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){ |
11077 | goto search_failed; |
11078 | } |
11079 | } |
11080 | |
11081 | if( iPgidx>=n ){ |
11082 | bEndOfPage = 1; |
11083 | break; |
11084 | } |
11085 | |
11086 | iPgidx += fts5GetVarint32(&a[iPgidx], nKeep); |
11087 | iTermOff += nKeep; |
11088 | iOff = iTermOff; |
11089 | |
11090 | if( iOff>=n ){ |
11091 | p->rc = FTS5_CORRUPT; |
11092 | return; |
11093 | } |
11094 | |
11095 | /* Read the nKeep field of the next term. */ |
11096 | fts5FastGetVarint32(a, iOff, nKeep); |
11097 | } |
11098 | |
11099 | search_failed: |
11100 | if( bGe==0 ){ |
11101 | fts5DataRelease(pIter->pLeaf); |
11102 | pIter->pLeaf = 0; |
11103 | return; |
11104 | }else if( bEndOfPage ){ |
11105 | do { |
11106 | fts5SegIterNextPage(p, pIter); |
11107 | if( pIter->pLeaf==0 ) return; |
11108 | a = pIter->pLeaf->p; |
11109 | if( fts5LeafIsTermless(pIter->pLeaf)==0 ){ |
11110 | iPgidx = (u32)pIter->pLeaf->szLeaf; |
11111 | iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff); |
11112 | if( iOff<4 || (i64)iOff>=pIter->pLeaf->szLeaf ){ |
11113 | p->rc = FTS5_CORRUPT; |
11114 | return; |
11115 | }else{ |
11116 | nKeep = 0; |
11117 | iTermOff = iOff; |
11118 | n = (u32)pIter->pLeaf->nn; |
11119 | iOff += fts5GetVarint32(&a[iOff], nNew); |
11120 | break; |
11121 | } |
11122 | } |
11123 | }while( 1 ); |
11124 | } |
11125 | |
11126 | search_success: |
11127 | if( (i64)iOff+nNew>n || nNew<1 ){ |
11128 | p->rc = FTS5_CORRUPT; |
11129 | return; |
11130 | } |
11131 | pIter->iLeafOffset = iOff + nNew; |
11132 | pIter->iTermLeafOffset = pIter->iLeafOffset; |
11133 | pIter->iTermLeafPgno = pIter->iLeafPgno; |
11134 | |
11135 | fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm); |
11136 | fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); |
11137 | |
11138 | if( iPgidx>=n ){ |
11139 | pIter->iEndofDoclist = pIter->pLeaf->nn+1; |
11140 | }else{ |
11141 | int nExtra; |
11142 | iPgidx += fts5GetVarint32(&a[iPgidx], nExtra); |
11143 | pIter->iEndofDoclist = iTermOff + nExtra; |
11144 | } |
11145 | pIter->iPgidxOff = iPgidx; |
11146 | |
11147 | fts5SegIterLoadRowid(p, pIter); |
11148 | fts5SegIterLoadNPos(p, pIter); |
11149 | } |
11150 | |
11151 | static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){ |
11152 | if( p->pIdxSelect==0 ){ |
11153 | Fts5Config *pConfig = p->pConfig; |
11154 | fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf( |
11155 | "SELECT pgno FROM '%q'.'%q_idx' WHERE " |
11156 | "segid=? AND term<=? ORDER BY term DESC LIMIT 1" , |
11157 | pConfig->zDb, pConfig->zName |
11158 | )); |
11159 | } |
11160 | return p->pIdxSelect; |
11161 | } |
11162 | |
11163 | /* |
11164 | ** Initialize the object pIter to point to term pTerm/nTerm within segment |
11165 | ** pSeg. If there is no such term in the index, the iterator is set to EOF. |
11166 | ** |
11167 | ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If |
11168 | ** an error has already occurred when this function is called, it is a no-op. |
11169 | */ |
11170 | static void fts5SegIterSeekInit( |
11171 | Fts5Index *p, /* FTS5 backend */ |
11172 | const u8 *pTerm, int nTerm, /* Term to seek to */ |
11173 | int flags, /* Mask of FTS5INDEX_XXX flags */ |
11174 | Fts5StructureSegment *pSeg, /* Description of segment */ |
11175 | Fts5SegIter *pIter /* Object to populate */ |
11176 | ){ |
11177 | int iPg = 1; |
11178 | int bGe = (flags & FTS5INDEX_QUERY_SCAN); |
11179 | int bDlidx = 0; /* True if there is a doclist-index */ |
11180 | sqlite3_stmt *pIdxSelect = 0; |
11181 | |
11182 | assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 ); |
11183 | assert( pTerm && nTerm ); |
11184 | memset(pIter, 0, sizeof(*pIter)); |
11185 | pIter->pSeg = pSeg; |
11186 | |
11187 | /* This block sets stack variable iPg to the leaf page number that may |
11188 | ** contain term (pTerm/nTerm), if it is present in the segment. */ |
11189 | pIdxSelect = fts5IdxSelectStmt(p); |
11190 | if( p->rc ) return; |
11191 | sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid); |
11192 | sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC); |
11193 | if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){ |
11194 | i64 val = sqlite3_column_int(pIdxSelect, 0); |
11195 | iPg = (int)(val>>1); |
11196 | bDlidx = (val & 0x0001); |
11197 | } |
11198 | p->rc = sqlite3_reset(pIdxSelect); |
11199 | sqlite3_bind_null(pIdxSelect, 2); |
11200 | |
11201 | if( iPg<pSeg->pgnoFirst ){ |
11202 | iPg = pSeg->pgnoFirst; |
11203 | bDlidx = 0; |
11204 | } |
11205 | |
11206 | pIter->iLeafPgno = iPg - 1; |
11207 | fts5SegIterNextPage(p, pIter); |
11208 | |
11209 | if( pIter->pLeaf ){ |
11210 | fts5LeafSeek(p, bGe, pIter, pTerm, nTerm); |
11211 | } |
11212 | |
11213 | if( p->rc==SQLITE_OK && bGe==0 ){ |
11214 | pIter->flags |= FTS5_SEGITER_ONETERM; |
11215 | if( pIter->pLeaf ){ |
11216 | if( flags & FTS5INDEX_QUERY_DESC ){ |
11217 | pIter->flags |= FTS5_SEGITER_REVERSE; |
11218 | } |
11219 | if( bDlidx ){ |
11220 | fts5SegIterLoadDlidx(p, pIter); |
11221 | } |
11222 | if( flags & FTS5INDEX_QUERY_DESC ){ |
11223 | fts5SegIterReverse(p, pIter); |
11224 | } |
11225 | } |
11226 | } |
11227 | |
11228 | fts5SegIterSetNext(p, pIter); |
11229 | |
11230 | /* Either: |
11231 | ** |
11232 | ** 1) an error has occurred, or |
11233 | ** 2) the iterator points to EOF, or |
11234 | ** 3) the iterator points to an entry with term (pTerm/nTerm), or |
11235 | ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points |
11236 | ** to an entry with a term greater than or equal to (pTerm/nTerm). |
11237 | */ |
11238 | assert_nc( p->rc!=SQLITE_OK /* 1 */ |
11239 | || pIter->pLeaf==0 /* 2 */ |
11240 | || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */ |
11241 | || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */ |
11242 | ); |
11243 | } |
11244 | |
11245 | /* |
11246 | ** Initialize the object pIter to point to term pTerm/nTerm within the |
11247 | ** in-memory hash table. If there is no such term in the hash-table, the |
11248 | ** iterator is set to EOF. |
11249 | ** |
11250 | ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If |
11251 | ** an error has already occurred when this function is called, it is a no-op. |
11252 | */ |
11253 | static void fts5SegIterHashInit( |
11254 | Fts5Index *p, /* FTS5 backend */ |
11255 | const u8 *pTerm, int nTerm, /* Term to seek to */ |
11256 | int flags, /* Mask of FTS5INDEX_XXX flags */ |
11257 | Fts5SegIter *pIter /* Object to populate */ |
11258 | ){ |
11259 | int nList = 0; |
11260 | const u8 *z = 0; |
11261 | int n = 0; |
11262 | Fts5Data *pLeaf = 0; |
11263 | |
11264 | assert( p->pHash ); |
11265 | assert( p->rc==SQLITE_OK ); |
11266 | |
11267 | if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){ |
11268 | const u8 *pList = 0; |
11269 | |
11270 | p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm); |
11271 | sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList); |
11272 | n = (z ? (int)strlen((const char*)z) : 0); |
11273 | if( pList ){ |
11274 | pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); |
11275 | if( pLeaf ){ |
11276 | pLeaf->p = (u8*)pList; |
11277 | } |
11278 | } |
11279 | }else{ |
11280 | p->rc = sqlite3Fts5HashQuery(p->pHash, sizeof(Fts5Data), |
11281 | (const char*)pTerm, nTerm, (void**)&pLeaf, &nList |
11282 | ); |
11283 | if( pLeaf ){ |
11284 | pLeaf->p = (u8*)&pLeaf[1]; |
11285 | } |
11286 | z = pTerm; |
11287 | n = nTerm; |
11288 | pIter->flags |= FTS5_SEGITER_ONETERM; |
11289 | } |
11290 | |
11291 | if( pLeaf ){ |
11292 | sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z); |
11293 | pLeaf->nn = pLeaf->szLeaf = nList; |
11294 | pIter->pLeaf = pLeaf; |
11295 | pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid); |
11296 | pIter->iEndofDoclist = pLeaf->nn; |
11297 | |
11298 | if( flags & FTS5INDEX_QUERY_DESC ){ |
11299 | pIter->flags |= FTS5_SEGITER_REVERSE; |
11300 | fts5SegIterReverseInitPage(p, pIter); |
11301 | }else{ |
11302 | fts5SegIterLoadNPos(p, pIter); |
11303 | } |
11304 | } |
11305 | |
11306 | fts5SegIterSetNext(p, pIter); |
11307 | } |
11308 | |
11309 | /* |
11310 | ** Zero the iterator passed as the only argument. |
11311 | */ |
11312 | static void fts5SegIterClear(Fts5SegIter *pIter){ |
11313 | fts5BufferFree(&pIter->term); |
11314 | fts5DataRelease(pIter->pLeaf); |
11315 | fts5DataRelease(pIter->pNextLeaf); |
11316 | fts5DlidxIterFree(pIter->pDlidx); |
11317 | sqlite3_free(pIter->aRowidOffset); |
11318 | memset(pIter, 0, sizeof(Fts5SegIter)); |
11319 | } |
11320 | |
11321 | #ifdef SQLITE_DEBUG |
11322 | |
11323 | /* |
11324 | ** This function is used as part of the big assert() procedure implemented by |
11325 | ** fts5AssertMultiIterSetup(). It ensures that the result currently stored |
11326 | ** in *pRes is the correct result of comparing the current positions of the |
11327 | ** two iterators. |
11328 | */ |
11329 | static void fts5AssertComparisonResult( |
11330 | Fts5Iter *pIter, |
11331 | Fts5SegIter *p1, |
11332 | Fts5SegIter *p2, |
11333 | Fts5CResult *pRes |
11334 | ){ |
11335 | int i1 = p1 - pIter->aSeg; |
11336 | int i2 = p2 - pIter->aSeg; |
11337 | |
11338 | if( p1->pLeaf || p2->pLeaf ){ |
11339 | if( p1->pLeaf==0 ){ |
11340 | assert( pRes->iFirst==i2 ); |
11341 | }else if( p2->pLeaf==0 ){ |
11342 | assert( pRes->iFirst==i1 ); |
11343 | }else{ |
11344 | int nMin = MIN(p1->term.n, p2->term.n); |
11345 | int res = fts5Memcmp(p1->term.p, p2->term.p, nMin); |
11346 | if( res==0 ) res = p1->term.n - p2->term.n; |
11347 | |
11348 | if( res==0 ){ |
11349 | assert( pRes->bTermEq==1 ); |
11350 | assert( p1->iRowid!=p2->iRowid ); |
11351 | res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1; |
11352 | }else{ |
11353 | assert( pRes->bTermEq==0 ); |
11354 | } |
11355 | |
11356 | if( res<0 ){ |
11357 | assert( pRes->iFirst==i1 ); |
11358 | }else{ |
11359 | assert( pRes->iFirst==i2 ); |
11360 | } |
11361 | } |
11362 | } |
11363 | } |
11364 | |
11365 | /* |
11366 | ** This function is a no-op unless SQLITE_DEBUG is defined when this module |
11367 | ** is compiled. In that case, this function is essentially an assert() |
11368 | ** statement used to verify that the contents of the pIter->aFirst[] array |
11369 | ** are correct. |
11370 | */ |
11371 | static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){ |
11372 | if( p->rc==SQLITE_OK ){ |
11373 | Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; |
11374 | int i; |
11375 | |
11376 | assert( (pFirst->pLeaf==0)==pIter->base.bEof ); |
11377 | |
11378 | /* Check that pIter->iSwitchRowid is set correctly. */ |
11379 | for(i=0; i<pIter->nSeg; i++){ |
11380 | Fts5SegIter *p1 = &pIter->aSeg[i]; |
11381 | assert( p1==pFirst |
11382 | || p1->pLeaf==0 |
11383 | || fts5BufferCompare(&pFirst->term, &p1->term) |
11384 | || p1->iRowid==pIter->iSwitchRowid |
11385 | || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev |
11386 | ); |
11387 | } |
11388 | |
11389 | for(i=0; i<pIter->nSeg; i+=2){ |
11390 | Fts5SegIter *p1 = &pIter->aSeg[i]; |
11391 | Fts5SegIter *p2 = &pIter->aSeg[i+1]; |
11392 | Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2]; |
11393 | fts5AssertComparisonResult(pIter, p1, p2, pRes); |
11394 | } |
11395 | |
11396 | for(i=1; i<(pIter->nSeg / 2); i+=2){ |
11397 | Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ]; |
11398 | Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ]; |
11399 | Fts5CResult *pRes = &pIter->aFirst[i]; |
11400 | fts5AssertComparisonResult(pIter, p1, p2, pRes); |
11401 | } |
11402 | } |
11403 | } |
11404 | #else |
11405 | # define fts5AssertMultiIterSetup(x,y) |
11406 | #endif |
11407 | |
11408 | /* |
11409 | ** Do the comparison necessary to populate pIter->aFirst[iOut]. |
11410 | ** |
11411 | ** If the returned value is non-zero, then it is the index of an entry |
11412 | ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing |
11413 | ** to a key that is a duplicate of another, higher priority, |
11414 | ** segment-iterator in the pSeg->aSeg[] array. |
11415 | */ |
11416 | static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){ |
11417 | int i1; /* Index of left-hand Fts5SegIter */ |
11418 | int i2; /* Index of right-hand Fts5SegIter */ |
11419 | int iRes; |
11420 | Fts5SegIter *p1; /* Left-hand Fts5SegIter */ |
11421 | Fts5SegIter *p2; /* Right-hand Fts5SegIter */ |
11422 | Fts5CResult *pRes = &pIter->aFirst[iOut]; |
11423 | |
11424 | assert( iOut<pIter->nSeg && iOut>0 ); |
11425 | assert( pIter->bRev==0 || pIter->bRev==1 ); |
11426 | |
11427 | if( iOut>=(pIter->nSeg/2) ){ |
11428 | i1 = (iOut - pIter->nSeg/2) * 2; |
11429 | i2 = i1 + 1; |
11430 | }else{ |
11431 | i1 = pIter->aFirst[iOut*2].iFirst; |
11432 | i2 = pIter->aFirst[iOut*2+1].iFirst; |
11433 | } |
11434 | p1 = &pIter->aSeg[i1]; |
11435 | p2 = &pIter->aSeg[i2]; |
11436 | |
11437 | pRes->bTermEq = 0; |
11438 | if( p1->pLeaf==0 ){ /* If p1 is at EOF */ |
11439 | iRes = i2; |
11440 | }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */ |
11441 | iRes = i1; |
11442 | }else{ |
11443 | int res = fts5BufferCompare(&p1->term, &p2->term); |
11444 | if( res==0 ){ |
11445 | assert_nc( i2>i1 ); |
11446 | assert_nc( i2!=0 ); |
11447 | pRes->bTermEq = 1; |
11448 | if( p1->iRowid==p2->iRowid ){ |
11449 | p1->bDel = p2->bDel; |
11450 | return i2; |
11451 | } |
11452 | res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1; |
11453 | } |
11454 | assert( res!=0 ); |
11455 | if( res<0 ){ |
11456 | iRes = i1; |
11457 | }else{ |
11458 | iRes = i2; |
11459 | } |
11460 | } |
11461 | |
11462 | pRes->iFirst = (u16)iRes; |
11463 | return 0; |
11464 | } |
11465 | |
11466 | /* |
11467 | ** Move the seg-iter so that it points to the first rowid on page iLeafPgno. |
11468 | ** It is an error if leaf iLeafPgno does not exist or contains no rowids. |
11469 | */ |
11470 | static void fts5SegIterGotoPage( |
11471 | Fts5Index *p, /* FTS5 backend object */ |
11472 | Fts5SegIter *pIter, /* Iterator to advance */ |
11473 | int iLeafPgno |
11474 | ){ |
11475 | assert( iLeafPgno>pIter->iLeafPgno ); |
11476 | |
11477 | if( iLeafPgno>pIter->pSeg->pgnoLast ){ |
11478 | p->rc = FTS5_CORRUPT; |
11479 | }else{ |
11480 | fts5DataRelease(pIter->pNextLeaf); |
11481 | pIter->pNextLeaf = 0; |
11482 | pIter->iLeafPgno = iLeafPgno-1; |
11483 | fts5SegIterNextPage(p, pIter); |
11484 | assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); |
11485 | |
11486 | if( p->rc==SQLITE_OK && ALWAYS(pIter->pLeaf!=0) ){ |
11487 | int iOff; |
11488 | u8 *a = pIter->pLeaf->p; |
11489 | int n = pIter->pLeaf->szLeaf; |
11490 | |
11491 | iOff = fts5LeafFirstRowidOff(pIter->pLeaf); |
11492 | if( iOff<4 || iOff>=n ){ |
11493 | p->rc = FTS5_CORRUPT; |
11494 | }else{ |
11495 | iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); |
11496 | pIter->iLeafOffset = iOff; |
11497 | fts5SegIterLoadNPos(p, pIter); |
11498 | } |
11499 | } |
11500 | } |
11501 | } |
11502 | |
11503 | /* |
11504 | ** Advance the iterator passed as the second argument until it is at or |
11505 | ** past rowid iFrom. Regardless of the value of iFrom, the iterator is |
11506 | ** always advanced at least once. |
11507 | */ |
11508 | static void fts5SegIterNextFrom( |
11509 | Fts5Index *p, /* FTS5 backend object */ |
11510 | Fts5SegIter *pIter, /* Iterator to advance */ |
11511 | i64 iMatch /* Advance iterator at least this far */ |
11512 | ){ |
11513 | int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); |
11514 | Fts5DlidxIter *pDlidx = pIter->pDlidx; |
11515 | int iLeafPgno = pIter->iLeafPgno; |
11516 | int bMove = 1; |
11517 | |
11518 | assert( pIter->flags & FTS5_SEGITER_ONETERM ); |
11519 | assert( pIter->pDlidx ); |
11520 | assert( pIter->pLeaf ); |
11521 | |
11522 | if( bRev==0 ){ |
11523 | while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){ |
11524 | iLeafPgno = fts5DlidxIterPgno(pDlidx); |
11525 | fts5DlidxIterNext(p, pDlidx); |
11526 | } |
11527 | assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc ); |
11528 | if( iLeafPgno>pIter->iLeafPgno ){ |
11529 | fts5SegIterGotoPage(p, pIter, iLeafPgno); |
11530 | bMove = 0; |
11531 | } |
11532 | }else{ |
11533 | assert( pIter->pNextLeaf==0 ); |
11534 | assert( iMatch<pIter->iRowid ); |
11535 | while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){ |
11536 | fts5DlidxIterPrev(p, pDlidx); |
11537 | } |
11538 | iLeafPgno = fts5DlidxIterPgno(pDlidx); |
11539 | |
11540 | assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno ); |
11541 | |
11542 | if( iLeafPgno<pIter->iLeafPgno ){ |
11543 | pIter->iLeafPgno = iLeafPgno+1; |
11544 | fts5SegIterReverseNewPage(p, pIter); |
11545 | bMove = 0; |
11546 | } |
11547 | } |
11548 | |
11549 | do{ |
11550 | if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0); |
11551 | if( pIter->pLeaf==0 ) break; |
11552 | if( bRev==0 && pIter->iRowid>=iMatch ) break; |
11553 | if( bRev!=0 && pIter->iRowid<=iMatch ) break; |
11554 | bMove = 1; |
11555 | }while( p->rc==SQLITE_OK ); |
11556 | } |
11557 | |
11558 | |
11559 | /* |
11560 | ** Free the iterator object passed as the second argument. |
11561 | */ |
11562 | static void fts5MultiIterFree(Fts5Iter *pIter){ |
11563 | if( pIter ){ |
11564 | int i; |
11565 | for(i=0; i<pIter->nSeg; i++){ |
11566 | fts5SegIterClear(&pIter->aSeg[i]); |
11567 | } |
11568 | fts5BufferFree(&pIter->poslist); |
11569 | sqlite3_free(pIter); |
11570 | } |
11571 | } |
11572 | |
11573 | static void fts5MultiIterAdvanced( |
11574 | Fts5Index *p, /* FTS5 backend to iterate within */ |
11575 | Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ |
11576 | int iChanged, /* Index of sub-iterator just advanced */ |
11577 | int iMinset /* Minimum entry in aFirst[] to set */ |
11578 | ){ |
11579 | int i; |
11580 | for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ |
11581 | int iEq; |
11582 | if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ |
11583 | Fts5SegIter *pSeg = &pIter->aSeg[iEq]; |
11584 | assert( p->rc==SQLITE_OK ); |
11585 | pSeg->xNext(p, pSeg, 0); |
11586 | i = pIter->nSeg + iEq; |
11587 | } |
11588 | } |
11589 | } |
11590 | |
11591 | /* |
11592 | ** Sub-iterator iChanged of iterator pIter has just been advanced. It still |
11593 | ** points to the same term though - just a different rowid. This function |
11594 | ** attempts to update the contents of the pIter->aFirst[] accordingly. |
11595 | ** If it does so successfully, 0 is returned. Otherwise 1. |
11596 | ** |
11597 | ** If non-zero is returned, the caller should call fts5MultiIterAdvanced() |
11598 | ** on the iterator instead. That function does the same as this one, except |
11599 | ** that it deals with more complicated cases as well. |
11600 | */ |
11601 | static int fts5MultiIterAdvanceRowid( |
11602 | Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ |
11603 | int iChanged, /* Index of sub-iterator just advanced */ |
11604 | Fts5SegIter **ppFirst |
11605 | ){ |
11606 | Fts5SegIter *pNew = &pIter->aSeg[iChanged]; |
11607 | |
11608 | if( pNew->iRowid==pIter->iSwitchRowid |
11609 | || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev |
11610 | ){ |
11611 | int i; |
11612 | Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001]; |
11613 | pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64; |
11614 | for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){ |
11615 | Fts5CResult *pRes = &pIter->aFirst[i]; |
11616 | |
11617 | assert( pNew->pLeaf ); |
11618 | assert( pRes->bTermEq==0 || pOther->pLeaf ); |
11619 | |
11620 | if( pRes->bTermEq ){ |
11621 | if( pNew->iRowid==pOther->iRowid ){ |
11622 | return 1; |
11623 | }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){ |
11624 | pIter->iSwitchRowid = pOther->iRowid; |
11625 | pNew = pOther; |
11626 | }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){ |
11627 | pIter->iSwitchRowid = pOther->iRowid; |
11628 | } |
11629 | } |
11630 | pRes->iFirst = (u16)(pNew - pIter->aSeg); |
11631 | if( i==1 ) break; |
11632 | |
11633 | pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ]; |
11634 | } |
11635 | } |
11636 | |
11637 | *ppFirst = pNew; |
11638 | return 0; |
11639 | } |
11640 | |
11641 | /* |
11642 | ** Set the pIter->bEof variable based on the state of the sub-iterators. |
11643 | */ |
11644 | static void fts5MultiIterSetEof(Fts5Iter *pIter){ |
11645 | Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; |
11646 | pIter->base.bEof = pSeg->pLeaf==0; |
11647 | pIter->iSwitchRowid = pSeg->iRowid; |
11648 | } |
11649 | |
11650 | /* |
11651 | ** Move the iterator to the next entry. |
11652 | ** |
11653 | ** If an error occurs, an error code is left in Fts5Index.rc. It is not |
11654 | ** considered an error if the iterator reaches EOF, or if it is already at |
11655 | ** EOF when this function is called. |
11656 | */ |
11657 | static void fts5MultiIterNext( |
11658 | Fts5Index *p, |
11659 | Fts5Iter *pIter, |
11660 | int bFrom, /* True if argument iFrom is valid */ |
11661 | i64 iFrom /* Advance at least as far as this */ |
11662 | ){ |
11663 | int bUseFrom = bFrom; |
11664 | assert( pIter->base.bEof==0 ); |
11665 | while( p->rc==SQLITE_OK ){ |
11666 | int iFirst = pIter->aFirst[1].iFirst; |
11667 | int bNewTerm = 0; |
11668 | Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; |
11669 | assert( p->rc==SQLITE_OK ); |
11670 | if( bUseFrom && pSeg->pDlidx ){ |
11671 | fts5SegIterNextFrom(p, pSeg, iFrom); |
11672 | }else{ |
11673 | pSeg->xNext(p, pSeg, &bNewTerm); |
11674 | } |
11675 | |
11676 | if( pSeg->pLeaf==0 || bNewTerm |
11677 | || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg) |
11678 | ){ |
11679 | fts5MultiIterAdvanced(p, pIter, iFirst, 1); |
11680 | fts5MultiIterSetEof(pIter); |
11681 | pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; |
11682 | if( pSeg->pLeaf==0 ) return; |
11683 | } |
11684 | |
11685 | fts5AssertMultiIterSetup(p, pIter); |
11686 | assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf ); |
11687 | if( pIter->bSkipEmpty==0 || pSeg->nPos ){ |
11688 | pIter->xSetOutputs(pIter, pSeg); |
11689 | return; |
11690 | } |
11691 | bUseFrom = 0; |
11692 | } |
11693 | } |
11694 | |
11695 | static void fts5MultiIterNext2( |
11696 | Fts5Index *p, |
11697 | Fts5Iter *pIter, |
11698 | int *pbNewTerm /* OUT: True if *might* be new term */ |
11699 | ){ |
11700 | assert( pIter->bSkipEmpty ); |
11701 | if( p->rc==SQLITE_OK ){ |
11702 | *pbNewTerm = 0; |
11703 | do{ |
11704 | int iFirst = pIter->aFirst[1].iFirst; |
11705 | Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; |
11706 | int bNewTerm = 0; |
11707 | |
11708 | assert( p->rc==SQLITE_OK ); |
11709 | pSeg->xNext(p, pSeg, &bNewTerm); |
11710 | if( pSeg->pLeaf==0 || bNewTerm |
11711 | || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg) |
11712 | ){ |
11713 | fts5MultiIterAdvanced(p, pIter, iFirst, 1); |
11714 | fts5MultiIterSetEof(pIter); |
11715 | *pbNewTerm = 1; |
11716 | } |
11717 | fts5AssertMultiIterSetup(p, pIter); |
11718 | |
11719 | }while( fts5MultiIterIsEmpty(p, pIter) ); |
11720 | } |
11721 | } |
11722 | |
11723 | static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){ |
11724 | UNUSED_PARAM2(pUnused1, pUnused2); |
11725 | } |
11726 | |
11727 | static Fts5Iter *fts5MultiIterAlloc( |
11728 | Fts5Index *p, /* FTS5 backend to iterate within */ |
11729 | int nSeg |
11730 | ){ |
11731 | Fts5Iter *pNew; |
11732 | int nSlot; /* Power of two >= nSeg */ |
11733 | |
11734 | for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2); |
11735 | pNew = fts5IdxMalloc(p, |
11736 | sizeof(Fts5Iter) + /* pNew */ |
11737 | sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */ |
11738 | sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */ |
11739 | ); |
11740 | if( pNew ){ |
11741 | pNew->nSeg = nSlot; |
11742 | pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; |
11743 | pNew->pIndex = p; |
11744 | pNew->xSetOutputs = fts5IterSetOutputs_Noop; |
11745 | } |
11746 | return pNew; |
11747 | } |
11748 | |
11749 | static void fts5PoslistCallback( |
11750 | Fts5Index *pUnused, |
11751 | void *pContext, |
11752 | const u8 *pChunk, int nChunk |
11753 | ){ |
11754 | UNUSED_PARAM(pUnused); |
11755 | assert_nc( nChunk>=0 ); |
11756 | if( nChunk>0 ){ |
11757 | fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk); |
11758 | } |
11759 | } |
11760 | |
11761 | typedef struct PoslistCallbackCtx PoslistCallbackCtx; |
11762 | struct PoslistCallbackCtx { |
11763 | Fts5Buffer *pBuf; /* Append to this buffer */ |
11764 | Fts5Colset *pColset; /* Restrict matches to this column */ |
11765 | int eState; /* See above */ |
11766 | }; |
11767 | |
11768 | typedef struct PoslistOffsetsCtx PoslistOffsetsCtx; |
11769 | struct PoslistOffsetsCtx { |
11770 | Fts5Buffer *pBuf; /* Append to this buffer */ |
11771 | Fts5Colset *pColset; /* Restrict matches to this column */ |
11772 | int iRead; |
11773 | int iWrite; |
11774 | }; |
11775 | |
11776 | /* |
11777 | ** TODO: Make this more efficient! |
11778 | */ |
11779 | static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){ |
11780 | int i; |
11781 | for(i=0; i<pColset->nCol; i++){ |
11782 | if( pColset->aiCol[i]==iCol ) return 1; |
11783 | } |
11784 | return 0; |
11785 | } |
11786 | |
11787 | static void fts5PoslistOffsetsCallback( |
11788 | Fts5Index *pUnused, |
11789 | void *pContext, |
11790 | const u8 *pChunk, int nChunk |
11791 | ){ |
11792 | PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext; |
11793 | UNUSED_PARAM(pUnused); |
11794 | assert_nc( nChunk>=0 ); |
11795 | if( nChunk>0 ){ |
11796 | int i = 0; |
11797 | while( i<nChunk ){ |
11798 | int iVal; |
11799 | i += fts5GetVarint32(&pChunk[i], iVal); |
11800 | iVal += pCtx->iRead - 2; |
11801 | pCtx->iRead = iVal; |
11802 | if( fts5IndexColsetTest(pCtx->pColset, iVal) ){ |
11803 | fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite); |
11804 | pCtx->iWrite = iVal; |
11805 | } |
11806 | } |
11807 | } |
11808 | } |
11809 | |
11810 | static void fts5PoslistFilterCallback( |
11811 | Fts5Index *pUnused, |
11812 | void *pContext, |
11813 | const u8 *pChunk, int nChunk |
11814 | ){ |
11815 | PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext; |
11816 | UNUSED_PARAM(pUnused); |
11817 | assert_nc( nChunk>=0 ); |
11818 | if( nChunk>0 ){ |
11819 | /* Search through to find the first varint with value 1. This is the |
11820 | ** start of the next columns hits. */ |
11821 | int i = 0; |
11822 | int iStart = 0; |
11823 | |
11824 | if( pCtx->eState==2 ){ |
11825 | int iCol; |
11826 | fts5FastGetVarint32(pChunk, i, iCol); |
11827 | if( fts5IndexColsetTest(pCtx->pColset, iCol) ){ |
11828 | pCtx->eState = 1; |
11829 | fts5BufferSafeAppendVarint(pCtx->pBuf, 1); |
11830 | }else{ |
11831 | pCtx->eState = 0; |
11832 | } |
11833 | } |
11834 | |
11835 | do { |
11836 | while( i<nChunk && pChunk[i]!=0x01 ){ |
11837 | while( pChunk[i] & 0x80 ) i++; |
11838 | i++; |
11839 | } |
11840 | if( pCtx->eState ){ |
11841 | fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart); |
11842 | } |
11843 | if( i<nChunk ){ |
11844 | int iCol; |
11845 | iStart = i; |
11846 | i++; |
11847 | if( i>=nChunk ){ |
11848 | pCtx->eState = 2; |
11849 | }else{ |
11850 | fts5FastGetVarint32(pChunk, i, iCol); |
11851 | pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol); |
11852 | if( pCtx->eState ){ |
11853 | fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart); |
11854 | iStart = i; |
11855 | } |
11856 | } |
11857 | } |
11858 | }while( i<nChunk ); |
11859 | } |
11860 | } |
11861 | |
11862 | static void fts5ChunkIterate( |
11863 | Fts5Index *p, /* Index object */ |
11864 | Fts5SegIter *pSeg, /* Poslist of this iterator */ |
11865 | void *pCtx, /* Context pointer for xChunk callback */ |
11866 | void (*xChunk)(Fts5Index*, void*, const u8*, int) |
11867 | ){ |
11868 | int nRem = pSeg->nPos; /* Number of bytes still to come */ |
11869 | Fts5Data *pData = 0; |
11870 | u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; |
11871 | int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset); |
11872 | int pgno = pSeg->iLeafPgno; |
11873 | int pgnoSave = 0; |
11874 | |
11875 | /* This function does not work with detail=none databases. */ |
11876 | assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE ); |
11877 | |
11878 | if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){ |
11879 | pgnoSave = pgno+1; |
11880 | } |
11881 | |
11882 | while( 1 ){ |
11883 | xChunk(p, pCtx, pChunk, nChunk); |
11884 | nRem -= nChunk; |
11885 | fts5DataRelease(pData); |
11886 | if( nRem<=0 ){ |
11887 | break; |
11888 | }else if( pSeg->pSeg==0 ){ |
11889 | p->rc = FTS5_CORRUPT; |
11890 | return; |
11891 | }else{ |
11892 | pgno++; |
11893 | pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno)); |
11894 | if( pData==0 ) break; |
11895 | pChunk = &pData->p[4]; |
11896 | nChunk = MIN(nRem, pData->szLeaf - 4); |
11897 | if( pgno==pgnoSave ){ |
11898 | assert( pSeg->pNextLeaf==0 ); |
11899 | pSeg->pNextLeaf = pData; |
11900 | pData = 0; |
11901 | } |
11902 | } |
11903 | } |
11904 | } |
11905 | |
11906 | /* |
11907 | ** Iterator pIter currently points to a valid entry (not EOF). This |
11908 | ** function appends the position list data for the current entry to |
11909 | ** buffer pBuf. It does not make a copy of the position-list size |
11910 | ** field. |
11911 | */ |
11912 | static void fts5SegiterPoslist( |
11913 | Fts5Index *p, |
11914 | Fts5SegIter *pSeg, |
11915 | Fts5Colset *pColset, |
11916 | Fts5Buffer *pBuf |
11917 | ){ |
11918 | assert( pBuf!=0 ); |
11919 | assert( pSeg!=0 ); |
11920 | if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+FTS5_DATA_ZERO_PADDING) ){ |
11921 | assert( pBuf->p!=0 ); |
11922 | assert( pBuf->nSpace >= pBuf->n+pSeg->nPos+FTS5_DATA_ZERO_PADDING ); |
11923 | memset(&pBuf->p[pBuf->n+pSeg->nPos], 0, FTS5_DATA_ZERO_PADDING); |
11924 | if( pColset==0 ){ |
11925 | fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback); |
11926 | }else{ |
11927 | if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){ |
11928 | PoslistCallbackCtx sCtx; |
11929 | sCtx.pBuf = pBuf; |
11930 | sCtx.pColset = pColset; |
11931 | sCtx.eState = fts5IndexColsetTest(pColset, 0); |
11932 | assert( sCtx.eState==0 || sCtx.eState==1 ); |
11933 | fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback); |
11934 | }else{ |
11935 | PoslistOffsetsCtx sCtx; |
11936 | memset(&sCtx, 0, sizeof(sCtx)); |
11937 | sCtx.pBuf = pBuf; |
11938 | sCtx.pColset = pColset; |
11939 | fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback); |
11940 | } |
11941 | } |
11942 | } |
11943 | } |
11944 | |
11945 | /* |
11946 | ** Parameter pPos points to a buffer containing a position list, size nPos. |
11947 | ** This function filters it according to pColset (which must be non-NULL) |
11948 | ** and sets pIter->base.pData/nData to point to the new position list. |
11949 | ** If memory is required for the new position list, use buffer pIter->poslist. |
11950 | ** Or, if the new position list is a contiguous subset of the input, set |
11951 | ** pIter->base.pData/nData to point directly to it. |
11952 | ** |
11953 | ** This function is a no-op if *pRc is other than SQLITE_OK when it is |
11954 | ** called. If an OOM error is encountered, *pRc is set to SQLITE_NOMEM |
11955 | ** before returning. |
11956 | */ |
11957 | static void fts5IndexExtractColset( |
11958 | int *pRc, |
11959 | Fts5Colset *pColset, /* Colset to filter on */ |
11960 | const u8 *pPos, int nPos, /* Position list */ |
11961 | Fts5Iter *pIter |
11962 | ){ |
11963 | if( *pRc==SQLITE_OK ){ |
11964 | const u8 *p = pPos; |
11965 | const u8 *aCopy = p; |
11966 | const u8 *pEnd = &p[nPos]; /* One byte past end of position list */ |
11967 | int i = 0; |
11968 | int iCurrent = 0; |
11969 | |
11970 | if( pColset->nCol>1 && sqlite3Fts5BufferSize(pRc, &pIter->poslist, nPos) ){ |
11971 | return; |
11972 | } |
11973 | |
11974 | while( 1 ){ |
11975 | while( pColset->aiCol[i]<iCurrent ){ |
11976 | i++; |
11977 | if( i==pColset->nCol ){ |
11978 | pIter->base.pData = pIter->poslist.p; |
11979 | pIter->base.nData = pIter->poslist.n; |
11980 | return; |
11981 | } |
11982 | } |
11983 | |
11984 | /* Advance pointer p until it points to pEnd or an 0x01 byte that is |
11985 | ** not part of a varint */ |
11986 | while( p<pEnd && *p!=0x01 ){ |
11987 | while( *p++ & 0x80 ); |
11988 | } |
11989 | |
11990 | if( pColset->aiCol[i]==iCurrent ){ |
11991 | if( pColset->nCol==1 ){ |
11992 | pIter->base.pData = aCopy; |
11993 | pIter->base.nData = p-aCopy; |
11994 | return; |
11995 | } |
11996 | fts5BufferSafeAppendBlob(&pIter->poslist, aCopy, p-aCopy); |
11997 | } |
11998 | if( p>=pEnd ){ |
11999 | pIter->base.pData = pIter->poslist.p; |
12000 | pIter->base.nData = pIter->poslist.n; |
12001 | return; |
12002 | } |
12003 | aCopy = p++; |
12004 | iCurrent = *p++; |
12005 | if( iCurrent & 0x80 ){ |
12006 | p--; |
12007 | p += fts5GetVarint32(p, iCurrent); |
12008 | } |
12009 | } |
12010 | } |
12011 | |
12012 | } |
12013 | |
12014 | /* |
12015 | ** xSetOutputs callback used by detail=none tables. |
12016 | */ |
12017 | static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){ |
12018 | assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE ); |
12019 | pIter->base.iRowid = pSeg->iRowid; |
12020 | pIter->base.nData = pSeg->nPos; |
12021 | } |
12022 | |
12023 | /* |
12024 | ** xSetOutputs callback used by detail=full and detail=col tables when no |
12025 | ** column filters are specified. |
12026 | */ |
12027 | static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){ |
12028 | pIter->base.iRowid = pSeg->iRowid; |
12029 | pIter->base.nData = pSeg->nPos; |
12030 | |
12031 | assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE ); |
12032 | assert( pIter->pColset==0 ); |
12033 | |
12034 | if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ |
12035 | /* All data is stored on the current page. Populate the output |
12036 | ** variables to point into the body of the page object. */ |
12037 | pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset]; |
12038 | }else{ |
12039 | /* The data is distributed over two or more pages. Copy it into the |
12040 | ** Fts5Iter.poslist buffer and then set the output pointer to point |
12041 | ** to this buffer. */ |
12042 | fts5BufferZero(&pIter->poslist); |
12043 | fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist); |
12044 | pIter->base.pData = pIter->poslist.p; |
12045 | } |
12046 | } |
12047 | |
12048 | /* |
12049 | ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match |
12050 | ** against no columns at all). |
12051 | */ |
12052 | static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){ |
12053 | UNUSED_PARAM(pSeg); |
12054 | pIter->base.nData = 0; |
12055 | } |
12056 | |
12057 | /* |
12058 | ** xSetOutputs callback used by detail=col when there is a column filter |
12059 | ** and there are 100 or more columns. Also called as a fallback from |
12060 | ** fts5IterSetOutputs_Col100 if the column-list spans more than one page. |
12061 | */ |
12062 | static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){ |
12063 | fts5BufferZero(&pIter->poslist); |
12064 | fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist); |
12065 | pIter->base.iRowid = pSeg->iRowid; |
12066 | pIter->base.pData = pIter->poslist.p; |
12067 | pIter->base.nData = pIter->poslist.n; |
12068 | } |
12069 | |
12070 | /* |
12071 | ** xSetOutputs callback used when: |
12072 | ** |
12073 | ** * detail=col, |
12074 | ** * there is a column filter, and |
12075 | ** * the table contains 100 or fewer columns. |
12076 | ** |
12077 | ** The last point is to ensure all column numbers are stored as |
12078 | ** single-byte varints. |
12079 | */ |
12080 | static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){ |
12081 | |
12082 | assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS ); |
12083 | assert( pIter->pColset ); |
12084 | |
12085 | if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){ |
12086 | fts5IterSetOutputs_Col(pIter, pSeg); |
12087 | }else{ |
12088 | u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset]; |
12089 | u8 *pEnd = (u8*)&a[pSeg->nPos]; |
12090 | int iPrev = 0; |
12091 | int *aiCol = pIter->pColset->aiCol; |
12092 | int *aiColEnd = &aiCol[pIter->pColset->nCol]; |
12093 | |
12094 | u8 *aOut = pIter->poslist.p; |
12095 | int iPrevOut = 0; |
12096 | |
12097 | pIter->base.iRowid = pSeg->iRowid; |
12098 | |
12099 | while( a<pEnd ){ |
12100 | iPrev += (int)a++[0] - 2; |
12101 | while( *aiCol<iPrev ){ |
12102 | aiCol++; |
12103 | if( aiCol==aiColEnd ) goto setoutputs_col_out; |
12104 | } |
12105 | if( *aiCol==iPrev ){ |
12106 | *aOut++ = (u8)((iPrev - iPrevOut) + 2); |
12107 | iPrevOut = iPrev; |
12108 | } |
12109 | } |
12110 | |
12111 | setoutputs_col_out: |
12112 | pIter->base.pData = pIter->poslist.p; |
12113 | pIter->base.nData = aOut - pIter->poslist.p; |
12114 | } |
12115 | } |
12116 | |
12117 | /* |
12118 | ** xSetOutputs callback used by detail=full when there is a column filter. |
12119 | */ |
12120 | static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){ |
12121 | Fts5Colset *pColset = pIter->pColset; |
12122 | pIter->base.iRowid = pSeg->iRowid; |
12123 | |
12124 | assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL ); |
12125 | assert( pColset ); |
12126 | |
12127 | if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ |
12128 | /* All data is stored on the current page. Populate the output |
12129 | ** variables to point into the body of the page object. */ |
12130 | const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset]; |
12131 | int *pRc = &pIter->pIndex->rc; |
12132 | fts5BufferZero(&pIter->poslist); |
12133 | fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, pIter); |
12134 | }else{ |
12135 | /* The data is distributed over two or more pages. Copy it into the |
12136 | ** Fts5Iter.poslist buffer and then set the output pointer to point |
12137 | ** to this buffer. */ |
12138 | fts5BufferZero(&pIter->poslist); |
12139 | fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist); |
12140 | pIter->base.pData = pIter->poslist.p; |
12141 | pIter->base.nData = pIter->poslist.n; |
12142 | } |
12143 | } |
12144 | |
12145 | static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){ |
12146 | assert( pIter!=0 || (*pRc)!=SQLITE_OK ); |
12147 | if( *pRc==SQLITE_OK ){ |
12148 | Fts5Config *pConfig = pIter->pIndex->pConfig; |
12149 | if( pConfig->eDetail==FTS5_DETAIL_NONE ){ |
12150 | pIter->xSetOutputs = fts5IterSetOutputs_None; |
12151 | } |
12152 | |
12153 | else if( pIter->pColset==0 ){ |
12154 | pIter->xSetOutputs = fts5IterSetOutputs_Nocolset; |
12155 | } |
12156 | |
12157 | else if( pIter->pColset->nCol==0 ){ |
12158 | pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset; |
12159 | } |
12160 | |
12161 | else if( pConfig->eDetail==FTS5_DETAIL_FULL ){ |
12162 | pIter->xSetOutputs = fts5IterSetOutputs_Full; |
12163 | } |
12164 | |
12165 | else{ |
12166 | assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS ); |
12167 | if( pConfig->nCol<=100 ){ |
12168 | pIter->xSetOutputs = fts5IterSetOutputs_Col100; |
12169 | sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol); |
12170 | }else{ |
12171 | pIter->xSetOutputs = fts5IterSetOutputs_Col; |
12172 | } |
12173 | } |
12174 | } |
12175 | } |
12176 | |
12177 | |
12178 | /* |
12179 | ** Allocate a new Fts5Iter object. |
12180 | ** |
12181 | ** The new object will be used to iterate through data in structure pStruct. |
12182 | ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel |
12183 | ** is zero or greater, data from the first nSegment segments on level iLevel |
12184 | ** is merged. |
12185 | ** |
12186 | ** The iterator initially points to the first term/rowid entry in the |
12187 | ** iterated data. |
12188 | */ |
12189 | static void fts5MultiIterNew( |
12190 | Fts5Index *p, /* FTS5 backend to iterate within */ |
12191 | Fts5Structure *pStruct, /* Structure of specific index */ |
12192 | int flags, /* FTS5INDEX_QUERY_XXX flags */ |
12193 | Fts5Colset *pColset, /* Colset to filter on (or NULL) */ |
12194 | const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ |
12195 | int iLevel, /* Level to iterate (-1 for all) */ |
12196 | int nSegment, /* Number of segments to merge (iLevel>=0) */ |
12197 | Fts5Iter **ppOut /* New object */ |
12198 | ){ |
12199 | int nSeg = 0; /* Number of segment-iters in use */ |
12200 | int iIter = 0; /* */ |
12201 | int iSeg; /* Used to iterate through segments */ |
12202 | Fts5StructureLevel *pLvl; |
12203 | Fts5Iter *pNew; |
12204 | |
12205 | assert( (pTerm==0 && nTerm==0) || iLevel<0 ); |
12206 | |
12207 | /* Allocate space for the new multi-seg-iterator. */ |
12208 | if( p->rc==SQLITE_OK ){ |
12209 | if( iLevel<0 ){ |
12210 | assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); |
12211 | nSeg = pStruct->nSegment; |
12212 | nSeg += (p->pHash ? 1 : 0); |
12213 | }else{ |
12214 | nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); |
12215 | } |
12216 | } |
12217 | *ppOut = pNew = fts5MultiIterAlloc(p, nSeg); |
12218 | if( pNew==0 ){ |
12219 | assert( p->rc!=SQLITE_OK ); |
12220 | goto fts5MultiIterNew_post_check; |
12221 | } |
12222 | pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC)); |
12223 | pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY)); |
12224 | pNew->pColset = pColset; |
12225 | if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){ |
12226 | fts5IterSetOutputCb(&p->rc, pNew); |
12227 | } |
12228 | |
12229 | /* Initialize each of the component segment iterators. */ |
12230 | if( p->rc==SQLITE_OK ){ |
12231 | if( iLevel<0 ){ |
12232 | Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; |
12233 | if( p->pHash ){ |
12234 | /* Add a segment iterator for the current contents of the hash table. */ |
12235 | Fts5SegIter *pIter = &pNew->aSeg[iIter++]; |
12236 | fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter); |
12237 | } |
12238 | for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){ |
12239 | for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){ |
12240 | Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; |
12241 | Fts5SegIter *pIter = &pNew->aSeg[iIter++]; |
12242 | if( pTerm==0 ){ |
12243 | fts5SegIterInit(p, pSeg, pIter); |
12244 | }else{ |
12245 | fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter); |
12246 | } |
12247 | } |
12248 | } |
12249 | }else{ |
12250 | pLvl = &pStruct->aLevel[iLevel]; |
12251 | for(iSeg=nSeg-1; iSeg>=0; iSeg--){ |
12252 | fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); |
12253 | } |
12254 | } |
12255 | assert( iIter==nSeg ); |
12256 | } |
12257 | |
12258 | /* If the above was successful, each component iterators now points |
12259 | ** to the first entry in its segment. In this case initialize the |
12260 | ** aFirst[] array. Or, if an error has occurred, free the iterator |
12261 | ** object and set the output variable to NULL. */ |
12262 | if( p->rc==SQLITE_OK ){ |
12263 | for(iIter=pNew->nSeg-1; iIter>0; iIter--){ |
12264 | int iEq; |
12265 | if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ |
12266 | Fts5SegIter *pSeg = &pNew->aSeg[iEq]; |
12267 | if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0); |
12268 | fts5MultiIterAdvanced(p, pNew, iEq, iIter); |
12269 | } |
12270 | } |
12271 | fts5MultiIterSetEof(pNew); |
12272 | fts5AssertMultiIterSetup(p, pNew); |
12273 | |
12274 | if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){ |
12275 | fts5MultiIterNext(p, pNew, 0, 0); |
12276 | }else if( pNew->base.bEof==0 ){ |
12277 | Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst]; |
12278 | pNew->xSetOutputs(pNew, pSeg); |
12279 | } |
12280 | |
12281 | }else{ |
12282 | fts5MultiIterFree(pNew); |
12283 | *ppOut = 0; |
12284 | } |
12285 | |
12286 | fts5MultiIterNew_post_check: |
12287 | assert( (*ppOut)!=0 || p->rc!=SQLITE_OK ); |
12288 | return; |
12289 | } |
12290 | |
12291 | /* |
12292 | ** Create an Fts5Iter that iterates through the doclist provided |
12293 | ** as the second argument. |
12294 | */ |
12295 | static void fts5MultiIterNew2( |
12296 | Fts5Index *p, /* FTS5 backend to iterate within */ |
12297 | Fts5Data *pData, /* Doclist to iterate through */ |
12298 | int bDesc, /* True for descending rowid order */ |
12299 | Fts5Iter **ppOut /* New object */ |
12300 | ){ |
12301 | Fts5Iter *pNew; |
12302 | pNew = fts5MultiIterAlloc(p, 2); |
12303 | if( pNew ){ |
12304 | Fts5SegIter *pIter = &pNew->aSeg[1]; |
12305 | |
12306 | pIter->flags = FTS5_SEGITER_ONETERM; |
12307 | if( pData->szLeaf>0 ){ |
12308 | pIter->pLeaf = pData; |
12309 | pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid); |
12310 | pIter->iEndofDoclist = pData->nn; |
12311 | pNew->aFirst[1].iFirst = 1; |
12312 | if( bDesc ){ |
12313 | pNew->bRev = 1; |
12314 | pIter->flags |= FTS5_SEGITER_REVERSE; |
12315 | fts5SegIterReverseInitPage(p, pIter); |
12316 | }else{ |
12317 | fts5SegIterLoadNPos(p, pIter); |
12318 | } |
12319 | pData = 0; |
12320 | }else{ |
12321 | pNew->base.bEof = 1; |
12322 | } |
12323 | fts5SegIterSetNext(p, pIter); |
12324 | |
12325 | *ppOut = pNew; |
12326 | } |
12327 | |
12328 | fts5DataRelease(pData); |
12329 | } |
12330 | |
12331 | /* |
12332 | ** Return true if the iterator is at EOF or if an error has occurred. |
12333 | ** False otherwise. |
12334 | */ |
12335 | static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){ |
12336 | assert( pIter!=0 || p->rc!=SQLITE_OK ); |
12337 | assert( p->rc!=SQLITE_OK |
12338 | || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof |
12339 | ); |
12340 | return (p->rc || pIter->base.bEof); |
12341 | } |
12342 | |
12343 | /* |
12344 | ** Return the rowid of the entry that the iterator currently points |
12345 | ** to. If the iterator points to EOF when this function is called the |
12346 | ** results are undefined. |
12347 | */ |
12348 | static i64 fts5MultiIterRowid(Fts5Iter *pIter){ |
12349 | assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf ); |
12350 | return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid; |
12351 | } |
12352 | |
12353 | /* |
12354 | ** Move the iterator to the next entry at or following iMatch. |
12355 | */ |
12356 | static void fts5MultiIterNextFrom( |
12357 | Fts5Index *p, |
12358 | Fts5Iter *pIter, |
12359 | i64 iMatch |
12360 | ){ |
12361 | while( 1 ){ |
12362 | i64 iRowid; |
12363 | fts5MultiIterNext(p, pIter, 1, iMatch); |
12364 | if( fts5MultiIterEof(p, pIter) ) break; |
12365 | iRowid = fts5MultiIterRowid(pIter); |
12366 | if( pIter->bRev==0 && iRowid>=iMatch ) break; |
12367 | if( pIter->bRev!=0 && iRowid<=iMatch ) break; |
12368 | } |
12369 | } |
12370 | |
12371 | /* |
12372 | ** Return a pointer to a buffer containing the term associated with the |
12373 | ** entry that the iterator currently points to. |
12374 | */ |
12375 | static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){ |
12376 | Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; |
12377 | *pn = p->term.n; |
12378 | return p->term.p; |
12379 | } |
12380 | |
12381 | /* |
12382 | ** Allocate a new segment-id for the structure pStruct. The new segment |
12383 | ** id must be between 1 and 65335 inclusive, and must not be used by |
12384 | ** any currently existing segment. If a free segment id cannot be found, |
12385 | ** SQLITE_FULL is returned. |
12386 | ** |
12387 | ** If an error has already occurred, this function is a no-op. 0 is |
12388 | ** returned in this case. |
12389 | */ |
12390 | static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ |
12391 | int iSegid = 0; |
12392 | |
12393 | if( p->rc==SQLITE_OK ){ |
12394 | if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){ |
12395 | p->rc = SQLITE_FULL; |
12396 | }else{ |
12397 | /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following |
12398 | ** array is 63 elements, or 252 bytes, in size. */ |
12399 | u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32]; |
12400 | int iLvl, iSeg; |
12401 | int i; |
12402 | u32 mask; |
12403 | memset(aUsed, 0, sizeof(aUsed)); |
12404 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ |
12405 | for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ |
12406 | int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid; |
12407 | if( iId<=FTS5_MAX_SEGMENT && iId>0 ){ |
12408 | aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32); |
12409 | } |
12410 | } |
12411 | } |
12412 | |
12413 | for(i=0; aUsed[i]==0xFFFFFFFF; i++); |
12414 | mask = aUsed[i]; |
12415 | for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++); |
12416 | iSegid += 1 + i*32; |
12417 | |
12418 | #ifdef SQLITE_DEBUG |
12419 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ |
12420 | for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ |
12421 | assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ); |
12422 | } |
12423 | } |
12424 | assert_nc( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT ); |
12425 | |
12426 | { |
12427 | sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p); |
12428 | if( p->rc==SQLITE_OK ){ |
12429 | u8 aBlob[2] = {0xff, 0xff}; |
12430 | sqlite3_bind_int(pIdxSelect, 1, iSegid); |
12431 | sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC); |
12432 | assert_nc( sqlite3_step(pIdxSelect)!=SQLITE_ROW ); |
12433 | p->rc = sqlite3_reset(pIdxSelect); |
12434 | sqlite3_bind_null(pIdxSelect, 2); |
12435 | } |
12436 | } |
12437 | #endif |
12438 | } |
12439 | } |
12440 | |
12441 | return iSegid; |
12442 | } |
12443 | |
12444 | /* |
12445 | ** Discard all data currently cached in the hash-tables. |
12446 | */ |
12447 | static void fts5IndexDiscardData(Fts5Index *p){ |
12448 | assert( p->pHash || p->nPendingData==0 ); |
12449 | if( p->pHash ){ |
12450 | sqlite3Fts5HashClear(p->pHash); |
12451 | p->nPendingData = 0; |
12452 | } |
12453 | } |
12454 | |
12455 | /* |
12456 | ** Return the size of the prefix, in bytes, that buffer |
12457 | ** (pNew/<length-unknown>) shares with buffer (pOld/nOld). |
12458 | ** |
12459 | ** Buffer (pNew/<length-unknown>) is guaranteed to be greater |
12460 | ** than buffer (pOld/nOld). |
12461 | */ |
12462 | static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){ |
12463 | int i; |
12464 | for(i=0; i<nOld; i++){ |
12465 | if( pOld[i]!=pNew[i] ) break; |
12466 | } |
12467 | return i; |
12468 | } |
12469 | |
12470 | static void fts5WriteDlidxClear( |
12471 | Fts5Index *p, |
12472 | Fts5SegWriter *pWriter, |
12473 | int bFlush /* If true, write dlidx to disk */ |
12474 | ){ |
12475 | int i; |
12476 | assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) ); |
12477 | for(i=0; i<pWriter->nDlidx; i++){ |
12478 | Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; |
12479 | if( pDlidx->buf.n==0 ) break; |
12480 | if( bFlush ){ |
12481 | assert( pDlidx->pgno!=0 ); |
12482 | fts5DataWrite(p, |
12483 | FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), |
12484 | pDlidx->buf.p, pDlidx->buf.n |
12485 | ); |
12486 | } |
12487 | sqlite3Fts5BufferZero(&pDlidx->buf); |
12488 | pDlidx->bPrevValid = 0; |
12489 | } |
12490 | } |
12491 | |
12492 | /* |
12493 | ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size. |
12494 | ** Any new array elements are zeroed before returning. |
12495 | */ |
12496 | static int fts5WriteDlidxGrow( |
12497 | Fts5Index *p, |
12498 | Fts5SegWriter *pWriter, |
12499 | int nLvl |
12500 | ){ |
12501 | if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){ |
12502 | Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc64( |
12503 | pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl |
12504 | ); |
12505 | if( aDlidx==0 ){ |
12506 | p->rc = SQLITE_NOMEM; |
12507 | }else{ |
12508 | size_t nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx); |
12509 | memset(&aDlidx[pWriter->nDlidx], 0, nByte); |
12510 | pWriter->aDlidx = aDlidx; |
12511 | pWriter->nDlidx = nLvl; |
12512 | } |
12513 | } |
12514 | return p->rc; |
12515 | } |
12516 | |
12517 | /* |
12518 | ** If the current doclist-index accumulating in pWriter->aDlidx[] is large |
12519 | ** enough, flush it to disk and return 1. Otherwise discard it and return |
12520 | ** zero. |
12521 | */ |
12522 | static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){ |
12523 | int bFlag = 0; |
12524 | |
12525 | /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written |
12526 | ** to the database, also write the doclist-index to disk. */ |
12527 | if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ |
12528 | bFlag = 1; |
12529 | } |
12530 | fts5WriteDlidxClear(p, pWriter, bFlag); |
12531 | pWriter->nEmpty = 0; |
12532 | return bFlag; |
12533 | } |
12534 | |
12535 | /* |
12536 | ** This function is called whenever processing of the doclist for the |
12537 | ** last term on leaf page (pWriter->iBtPage) is completed. |
12538 | ** |
12539 | ** The doclist-index for that term is currently stored in-memory within the |
12540 | ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function |
12541 | ** writes it out to disk. Or, if it is too small to bother with, discards |
12542 | ** it. |
12543 | ** |
12544 | ** Fts5SegWriter.btterm currently contains the first term on page iBtPage. |
12545 | */ |
12546 | static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){ |
12547 | int bFlag; |
12548 | |
12549 | assert( pWriter->iBtPage || pWriter->nEmpty==0 ); |
12550 | if( pWriter->iBtPage==0 ) return; |
12551 | bFlag = fts5WriteFlushDlidx(p, pWriter); |
12552 | |
12553 | if( p->rc==SQLITE_OK ){ |
12554 | const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"" ); |
12555 | /* The following was already done in fts5WriteInit(): */ |
12556 | /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */ |
12557 | sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC); |
12558 | sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1)); |
12559 | sqlite3_step(p->pIdxWriter); |
12560 | p->rc = sqlite3_reset(p->pIdxWriter); |
12561 | sqlite3_bind_null(p->pIdxWriter, 2); |
12562 | } |
12563 | pWriter->iBtPage = 0; |
12564 | } |
12565 | |
12566 | /* |
12567 | ** This is called once for each leaf page except the first that contains |
12568 | ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that |
12569 | ** is larger than all terms written to earlier leaves, and equal to or |
12570 | ** smaller than the first term on the new leaf. |
12571 | ** |
12572 | ** If an error occurs, an error code is left in Fts5Index.rc. If an error |
12573 | ** has already occurred when this function is called, it is a no-op. |
12574 | */ |
12575 | static void fts5WriteBtreeTerm( |
12576 | Fts5Index *p, /* FTS5 backend object */ |
12577 | Fts5SegWriter *pWriter, /* Writer object */ |
12578 | int nTerm, const u8 *pTerm /* First term on new page */ |
12579 | ){ |
12580 | fts5WriteFlushBtree(p, pWriter); |
12581 | if( p->rc==SQLITE_OK ){ |
12582 | fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm); |
12583 | pWriter->iBtPage = pWriter->writer.pgno; |
12584 | } |
12585 | } |
12586 | |
12587 | /* |
12588 | ** This function is called when flushing a leaf page that contains no |
12589 | ** terms at all to disk. |
12590 | */ |
12591 | static void fts5WriteBtreeNoTerm( |
12592 | Fts5Index *p, /* FTS5 backend object */ |
12593 | Fts5SegWriter *pWriter /* Writer object */ |
12594 | ){ |
12595 | /* If there were no rowids on the leaf page either and the doclist-index |
12596 | ** has already been started, append an 0x00 byte to it. */ |
12597 | if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){ |
12598 | Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0]; |
12599 | assert( pDlidx->bPrevValid ); |
12600 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0); |
12601 | } |
12602 | |
12603 | /* Increment the "number of sequential leaves without a term" counter. */ |
12604 | pWriter->nEmpty++; |
12605 | } |
12606 | |
12607 | static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){ |
12608 | i64 iRowid; |
12609 | int iOff; |
12610 | |
12611 | iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid); |
12612 | fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid); |
12613 | return iRowid; |
12614 | } |
12615 | |
12616 | /* |
12617 | ** Rowid iRowid has just been appended to the current leaf page. It is the |
12618 | ** first on the page. This function appends an appropriate entry to the current |
12619 | ** doclist-index. |
12620 | */ |
12621 | static void fts5WriteDlidxAppend( |
12622 | Fts5Index *p, |
12623 | Fts5SegWriter *pWriter, |
12624 | i64 iRowid |
12625 | ){ |
12626 | int i; |
12627 | int bDone = 0; |
12628 | |
12629 | for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ |
12630 | i64 iVal; |
12631 | Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; |
12632 | |
12633 | if( pDlidx->buf.n>=p->pConfig->pgsz ){ |
12634 | /* The current doclist-index page is full. Write it to disk and push |
12635 | ** a copy of iRowid (which will become the first rowid on the next |
12636 | ** doclist-index leaf page) up into the next level of the b-tree |
12637 | ** hierarchy. If the node being flushed is currently the root node, |
12638 | ** also push its first rowid upwards. */ |
12639 | pDlidx->buf.p[0] = 0x01; /* Not the root node */ |
12640 | fts5DataWrite(p, |
12641 | FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), |
12642 | pDlidx->buf.p, pDlidx->buf.n |
12643 | ); |
12644 | fts5WriteDlidxGrow(p, pWriter, i+2); |
12645 | pDlidx = &pWriter->aDlidx[i]; |
12646 | if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){ |
12647 | i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf); |
12648 | |
12649 | /* This was the root node. Push its first rowid up to the new root. */ |
12650 | pDlidx[1].pgno = pDlidx->pgno; |
12651 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0); |
12652 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno); |
12653 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst); |
12654 | pDlidx[1].bPrevValid = 1; |
12655 | pDlidx[1].iPrev = iFirst; |
12656 | } |
12657 | |
12658 | sqlite3Fts5BufferZero(&pDlidx->buf); |
12659 | pDlidx->bPrevValid = 0; |
12660 | pDlidx->pgno++; |
12661 | }else{ |
12662 | bDone = 1; |
12663 | } |
12664 | |
12665 | if( pDlidx->bPrevValid ){ |
12666 | iVal = iRowid - pDlidx->iPrev; |
12667 | }else{ |
12668 | i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno); |
12669 | assert( pDlidx->buf.n==0 ); |
12670 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone); |
12671 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno); |
12672 | iVal = iRowid; |
12673 | } |
12674 | |
12675 | sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal); |
12676 | pDlidx->bPrevValid = 1; |
12677 | pDlidx->iPrev = iRowid; |
12678 | } |
12679 | } |
12680 | |
12681 | static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ |
12682 | static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; |
12683 | Fts5PageWriter *pPage = &pWriter->writer; |
12684 | i64 iRowid; |
12685 | |
12686 | assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) ); |
12687 | |
12688 | /* Set the szLeaf header field. */ |
12689 | assert( 0==fts5GetU16(&pPage->buf.p[2]) ); |
12690 | fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n); |
12691 | |
12692 | if( pWriter->bFirstTermInPage ){ |
12693 | /* No term was written to this page. */ |
12694 | assert( pPage->pgidx.n==0 ); |
12695 | fts5WriteBtreeNoTerm(p, pWriter); |
12696 | }else{ |
12697 | /* Append the pgidx to the page buffer. Set the szLeaf header field. */ |
12698 | fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p); |
12699 | } |
12700 | |
12701 | /* Write the page out to disk */ |
12702 | iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno); |
12703 | fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); |
12704 | |
12705 | /* Initialize the next page. */ |
12706 | fts5BufferZero(&pPage->buf); |
12707 | fts5BufferZero(&pPage->pgidx); |
12708 | fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); |
12709 | pPage->iPrevPgidx = 0; |
12710 | pPage->pgno++; |
12711 | |
12712 | /* Increase the leaves written counter */ |
12713 | pWriter->nLeafWritten++; |
12714 | |
12715 | /* The new leaf holds no terms or rowids */ |
12716 | pWriter->bFirstTermInPage = 1; |
12717 | pWriter->bFirstRowidInPage = 1; |
12718 | } |
12719 | |
12720 | /* |
12721 | ** Append term pTerm/nTerm to the segment being written by the writer passed |
12722 | ** as the second argument. |
12723 | ** |
12724 | ** If an error occurs, set the Fts5Index.rc error code. If an error has |
12725 | ** already occurred, this function is a no-op. |
12726 | */ |
12727 | static void fts5WriteAppendTerm( |
12728 | Fts5Index *p, |
12729 | Fts5SegWriter *pWriter, |
12730 | int nTerm, const u8 *pTerm |
12731 | ){ |
12732 | int nPrefix; /* Bytes of prefix compression for term */ |
12733 | Fts5PageWriter *pPage = &pWriter->writer; |
12734 | Fts5Buffer *pPgidx = &pWriter->writer.pgidx; |
12735 | int nMin = MIN(pPage->term.n, nTerm); |
12736 | |
12737 | assert( p->rc==SQLITE_OK ); |
12738 | assert( pPage->buf.n>=4 ); |
12739 | assert( pPage->buf.n>4 || pWriter->bFirstTermInPage ); |
12740 | |
12741 | /* If the current leaf page is full, flush it to disk. */ |
12742 | if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){ |
12743 | if( pPage->buf.n>4 ){ |
12744 | fts5WriteFlushLeaf(p, pWriter); |
12745 | if( p->rc!=SQLITE_OK ) return; |
12746 | } |
12747 | fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING); |
12748 | } |
12749 | |
12750 | /* TODO1: Updating pgidx here. */ |
12751 | pPgidx->n += sqlite3Fts5PutVarint( |
12752 | &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx |
12753 | ); |
12754 | pPage->iPrevPgidx = pPage->buf.n; |
12755 | #if 0 |
12756 | fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n); |
12757 | pPgidx->n += 2; |
12758 | #endif |
12759 | |
12760 | if( pWriter->bFirstTermInPage ){ |
12761 | nPrefix = 0; |
12762 | if( pPage->pgno!=1 ){ |
12763 | /* This is the first term on a leaf that is not the leftmost leaf in |
12764 | ** the segment b-tree. In this case it is necessary to add a term to |
12765 | ** the b-tree hierarchy that is (a) larger than the largest term |
12766 | ** already written to the segment and (b) smaller than or equal to |
12767 | ** this term. In other words, a prefix of (pTerm/nTerm) that is one |
12768 | ** byte longer than the longest prefix (pTerm/nTerm) shares with the |
12769 | ** previous term. |
12770 | ** |
12771 | ** Usually, the previous term is available in pPage->term. The exception |
12772 | ** is if this is the first term written in an incremental-merge step. |
12773 | ** In this case the previous term is not available, so just write a |
12774 | ** copy of (pTerm/nTerm) into the parent node. This is slightly |
12775 | ** inefficient, but still correct. */ |
12776 | int n = nTerm; |
12777 | if( pPage->term.n ){ |
12778 | n = 1 + fts5PrefixCompress(nMin, pPage->term.p, pTerm); |
12779 | } |
12780 | fts5WriteBtreeTerm(p, pWriter, n, pTerm); |
12781 | if( p->rc!=SQLITE_OK ) return; |
12782 | pPage = &pWriter->writer; |
12783 | } |
12784 | }else{ |
12785 | nPrefix = fts5PrefixCompress(nMin, pPage->term.p, pTerm); |
12786 | fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix); |
12787 | } |
12788 | |
12789 | /* Append the number of bytes of new data, then the term data itself |
12790 | ** to the page. */ |
12791 | fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix); |
12792 | fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]); |
12793 | |
12794 | /* Update the Fts5PageWriter.term field. */ |
12795 | fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); |
12796 | pWriter->bFirstTermInPage = 0; |
12797 | |
12798 | pWriter->bFirstRowidInPage = 0; |
12799 | pWriter->bFirstRowidInDoclist = 1; |
12800 | |
12801 | assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) ); |
12802 | pWriter->aDlidx[0].pgno = pPage->pgno; |
12803 | } |
12804 | |
12805 | /* |
12806 | ** Append a rowid and position-list size field to the writers output. |
12807 | */ |
12808 | static void fts5WriteAppendRowid( |
12809 | Fts5Index *p, |
12810 | Fts5SegWriter *pWriter, |
12811 | i64 iRowid |
12812 | ){ |
12813 | if( p->rc==SQLITE_OK ){ |
12814 | Fts5PageWriter *pPage = &pWriter->writer; |
12815 | |
12816 | if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ |
12817 | fts5WriteFlushLeaf(p, pWriter); |
12818 | } |
12819 | |
12820 | /* If this is to be the first rowid written to the page, set the |
12821 | ** rowid-pointer in the page-header. Also append a value to the dlidx |
12822 | ** buffer, in case a doclist-index is required. */ |
12823 | if( pWriter->bFirstRowidInPage ){ |
12824 | fts5PutU16(pPage->buf.p, (u16)pPage->buf.n); |
12825 | fts5WriteDlidxAppend(p, pWriter, iRowid); |
12826 | } |
12827 | |
12828 | /* Write the rowid. */ |
12829 | if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){ |
12830 | fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid); |
12831 | }else{ |
12832 | assert_nc( p->rc || iRowid>pWriter->iPrevRowid ); |
12833 | fts5BufferAppendVarint(&p->rc, &pPage->buf, |
12834 | (u64)iRowid - (u64)pWriter->iPrevRowid |
12835 | ); |
12836 | } |
12837 | pWriter->iPrevRowid = iRowid; |
12838 | pWriter->bFirstRowidInDoclist = 0; |
12839 | pWriter->bFirstRowidInPage = 0; |
12840 | } |
12841 | } |
12842 | |
12843 | static void fts5WriteAppendPoslistData( |
12844 | Fts5Index *p, |
12845 | Fts5SegWriter *pWriter, |
12846 | const u8 *aData, |
12847 | int nData |
12848 | ){ |
12849 | Fts5PageWriter *pPage = &pWriter->writer; |
12850 | const u8 *a = aData; |
12851 | int n = nData; |
12852 | |
12853 | assert( p->pConfig->pgsz>0 ); |
12854 | while( p->rc==SQLITE_OK |
12855 | && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz |
12856 | ){ |
12857 | int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n; |
12858 | int nCopy = 0; |
12859 | while( nCopy<nReq ){ |
12860 | i64 dummy; |
12861 | nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy); |
12862 | } |
12863 | fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a); |
12864 | a += nCopy; |
12865 | n -= nCopy; |
12866 | fts5WriteFlushLeaf(p, pWriter); |
12867 | } |
12868 | if( n>0 ){ |
12869 | fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a); |
12870 | } |
12871 | } |
12872 | |
12873 | /* |
12874 | ** Flush any data cached by the writer object to the database. Free any |
12875 | ** allocations associated with the writer. |
12876 | */ |
12877 | static void fts5WriteFinish( |
12878 | Fts5Index *p, |
12879 | Fts5SegWriter *pWriter, /* Writer object */ |
12880 | int *pnLeaf /* OUT: Number of leaf pages in b-tree */ |
12881 | ){ |
12882 | int i; |
12883 | Fts5PageWriter *pLeaf = &pWriter->writer; |
12884 | if( p->rc==SQLITE_OK ){ |
12885 | assert( pLeaf->pgno>=1 ); |
12886 | if( pLeaf->buf.n>4 ){ |
12887 | fts5WriteFlushLeaf(p, pWriter); |
12888 | } |
12889 | *pnLeaf = pLeaf->pgno-1; |
12890 | if( pLeaf->pgno>1 ){ |
12891 | fts5WriteFlushBtree(p, pWriter); |
12892 | } |
12893 | } |
12894 | fts5BufferFree(&pLeaf->term); |
12895 | fts5BufferFree(&pLeaf->buf); |
12896 | fts5BufferFree(&pLeaf->pgidx); |
12897 | fts5BufferFree(&pWriter->btterm); |
12898 | |
12899 | for(i=0; i<pWriter->nDlidx; i++){ |
12900 | sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf); |
12901 | } |
12902 | sqlite3_free(pWriter->aDlidx); |
12903 | } |
12904 | |
12905 | static void fts5WriteInit( |
12906 | Fts5Index *p, |
12907 | Fts5SegWriter *pWriter, |
12908 | int iSegid |
12909 | ){ |
12910 | const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING; |
12911 | |
12912 | memset(pWriter, 0, sizeof(Fts5SegWriter)); |
12913 | pWriter->iSegid = iSegid; |
12914 | |
12915 | fts5WriteDlidxGrow(p, pWriter, 1); |
12916 | pWriter->writer.pgno = 1; |
12917 | pWriter->bFirstTermInPage = 1; |
12918 | pWriter->iBtPage = 1; |
12919 | |
12920 | assert( pWriter->writer.buf.n==0 ); |
12921 | assert( pWriter->writer.pgidx.n==0 ); |
12922 | |
12923 | /* Grow the two buffers to pgsz + padding bytes in size. */ |
12924 | sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer); |
12925 | sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer); |
12926 | |
12927 | if( p->pIdxWriter==0 ){ |
12928 | Fts5Config *pConfig = p->pConfig; |
12929 | fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf( |
12930 | "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)" , |
12931 | pConfig->zDb, pConfig->zName |
12932 | )); |
12933 | } |
12934 | |
12935 | if( p->rc==SQLITE_OK ){ |
12936 | /* Initialize the 4-byte leaf-page header to 0x00. */ |
12937 | memset(pWriter->writer.buf.p, 0, 4); |
12938 | pWriter->writer.buf.n = 4; |
12939 | |
12940 | /* Bind the current output segment id to the index-writer. This is an |
12941 | ** optimization over binding the same value over and over as rows are |
12942 | ** inserted into %_idx by the current writer. */ |
12943 | sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); |
12944 | } |
12945 | } |
12946 | |
12947 | /* |
12948 | ** Iterator pIter was used to iterate through the input segments of on an |
12949 | ** incremental merge operation. This function is called if the incremental |
12950 | ** merge step has finished but the input has not been completely exhausted. |
12951 | */ |
12952 | static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){ |
12953 | int i; |
12954 | Fts5Buffer buf; |
12955 | memset(&buf, 0, sizeof(Fts5Buffer)); |
12956 | for(i=0; i<pIter->nSeg && p->rc==SQLITE_OK; i++){ |
12957 | Fts5SegIter *pSeg = &pIter->aSeg[i]; |
12958 | if( pSeg->pSeg==0 ){ |
12959 | /* no-op */ |
12960 | }else if( pSeg->pLeaf==0 ){ |
12961 | /* All keys from this input segment have been transfered to the output. |
12962 | ** Set both the first and last page-numbers to 0 to indicate that the |
12963 | ** segment is now empty. */ |
12964 | pSeg->pSeg->pgnoLast = 0; |
12965 | pSeg->pSeg->pgnoFirst = 0; |
12966 | }else{ |
12967 | int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */ |
12968 | i64 iLeafRowid; |
12969 | Fts5Data *pData; |
12970 | int iId = pSeg->pSeg->iSegid; |
12971 | u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00}; |
12972 | |
12973 | iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno); |
12974 | pData = fts5LeafRead(p, iLeafRowid); |
12975 | if( pData ){ |
12976 | if( iOff>pData->szLeaf ){ |
12977 | /* This can occur if the pages that the segments occupy overlap - if |
12978 | ** a single page has been assigned to more than one segment. In |
12979 | ** this case a prior iteration of this loop may have corrupted the |
12980 | ** segment currently being trimmed. */ |
12981 | p->rc = FTS5_CORRUPT; |
12982 | }else{ |
12983 | fts5BufferZero(&buf); |
12984 | fts5BufferGrow(&p->rc, &buf, pData->nn); |
12985 | fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr); |
12986 | fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n); |
12987 | fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p); |
12988 | fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff,&pData->p[iOff]); |
12989 | if( p->rc==SQLITE_OK ){ |
12990 | /* Set the szLeaf field */ |
12991 | fts5PutU16(&buf.p[2], (u16)buf.n); |
12992 | } |
12993 | |
12994 | /* Set up the new page-index array */ |
12995 | fts5BufferAppendVarint(&p->rc, &buf, 4); |
12996 | if( pSeg->iLeafPgno==pSeg->iTermLeafPgno |
12997 | && pSeg->iEndofDoclist<pData->szLeaf |
12998 | && pSeg->iPgidxOff<=pData->nn |
12999 | ){ |
13000 | int nDiff = pData->szLeaf - pSeg->iEndofDoclist; |
13001 | fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4); |
13002 | fts5BufferAppendBlob(&p->rc, &buf, |
13003 | pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff] |
13004 | ); |
13005 | } |
13006 | |
13007 | pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; |
13008 | fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid); |
13009 | fts5DataWrite(p, iLeafRowid, buf.p, buf.n); |
13010 | } |
13011 | fts5DataRelease(pData); |
13012 | } |
13013 | } |
13014 | } |
13015 | fts5BufferFree(&buf); |
13016 | } |
13017 | |
13018 | static void fts5MergeChunkCallback( |
13019 | Fts5Index *p, |
13020 | void *pCtx, |
13021 | const u8 *pChunk, int nChunk |
13022 | ){ |
13023 | Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx; |
13024 | fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk); |
13025 | } |
13026 | |
13027 | /* |
13028 | ** |
13029 | */ |
13030 | static void fts5IndexMergeLevel( |
13031 | Fts5Index *p, /* FTS5 backend object */ |
13032 | Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */ |
13033 | int iLvl, /* Level to read input from */ |
13034 | int *pnRem /* Write up to this many output leaves */ |
13035 | ){ |
13036 | Fts5Structure *pStruct = *ppStruct; |
13037 | Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; |
13038 | Fts5StructureLevel *pLvlOut; |
13039 | Fts5Iter *pIter = 0; /* Iterator to read input data */ |
13040 | int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ |
13041 | int nInput; /* Number of input segments */ |
13042 | Fts5SegWriter writer; /* Writer object */ |
13043 | Fts5StructureSegment *pSeg; /* Output segment */ |
13044 | Fts5Buffer term; |
13045 | int bOldest; /* True if the output segment is the oldest */ |
13046 | int eDetail = p->pConfig->eDetail; |
13047 | const int flags = FTS5INDEX_QUERY_NOOUTPUT; |
13048 | int bTermWritten = 0; /* True if current term already output */ |
13049 | |
13050 | assert( iLvl<pStruct->nLevel ); |
13051 | assert( pLvl->nMerge<=pLvl->nSeg ); |
13052 | |
13053 | memset(&writer, 0, sizeof(Fts5SegWriter)); |
13054 | memset(&term, 0, sizeof(Fts5Buffer)); |
13055 | if( pLvl->nMerge ){ |
13056 | pLvlOut = &pStruct->aLevel[iLvl+1]; |
13057 | assert( pLvlOut->nSeg>0 ); |
13058 | nInput = pLvl->nMerge; |
13059 | pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; |
13060 | |
13061 | fts5WriteInit(p, &writer, pSeg->iSegid); |
13062 | writer.writer.pgno = pSeg->pgnoLast+1; |
13063 | writer.iBtPage = 0; |
13064 | }else{ |
13065 | int iSegid = fts5AllocateSegid(p, pStruct); |
13066 | |
13067 | /* Extend the Fts5Structure object as required to ensure the output |
13068 | ** segment exists. */ |
13069 | if( iLvl==pStruct->nLevel-1 ){ |
13070 | fts5StructureAddLevel(&p->rc, ppStruct); |
13071 | pStruct = *ppStruct; |
13072 | } |
13073 | fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0); |
13074 | if( p->rc ) return; |
13075 | pLvl = &pStruct->aLevel[iLvl]; |
13076 | pLvlOut = &pStruct->aLevel[iLvl+1]; |
13077 | |
13078 | fts5WriteInit(p, &writer, iSegid); |
13079 | |
13080 | /* Add the new segment to the output level */ |
13081 | pSeg = &pLvlOut->aSeg[pLvlOut->nSeg]; |
13082 | pLvlOut->nSeg++; |
13083 | pSeg->pgnoFirst = 1; |
13084 | pSeg->iSegid = iSegid; |
13085 | pStruct->nSegment++; |
13086 | |
13087 | /* Read input from all segments in the input level */ |
13088 | nInput = pLvl->nSeg; |
13089 | } |
13090 | bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2); |
13091 | |
13092 | assert( iLvl>=0 ); |
13093 | for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter); |
13094 | fts5MultiIterEof(p, pIter)==0; |
13095 | fts5MultiIterNext(p, pIter, 0, 0) |
13096 | ){ |
13097 | Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; |
13098 | int nPos; /* position-list size field value */ |
13099 | int nTerm; |
13100 | const u8 *pTerm; |
13101 | |
13102 | pTerm = fts5MultiIterTerm(pIter, &nTerm); |
13103 | if( nTerm!=term.n || fts5Memcmp(pTerm, term.p, nTerm) ){ |
13104 | if( pnRem && writer.nLeafWritten>nRem ){ |
13105 | break; |
13106 | } |
13107 | fts5BufferSet(&p->rc, &term, nTerm, pTerm); |
13108 | bTermWritten =0; |
13109 | } |
13110 | |
13111 | /* Check for key annihilation. */ |
13112 | if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue; |
13113 | |
13114 | if( p->rc==SQLITE_OK && bTermWritten==0 ){ |
13115 | /* This is a new term. Append a term to the output segment. */ |
13116 | fts5WriteAppendTerm(p, &writer, nTerm, pTerm); |
13117 | bTermWritten = 1; |
13118 | } |
13119 | |
13120 | /* Append the rowid to the output */ |
13121 | /* WRITEPOSLISTSIZE */ |
13122 | fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); |
13123 | |
13124 | if( eDetail==FTS5_DETAIL_NONE ){ |
13125 | if( pSegIter->bDel ){ |
13126 | fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0); |
13127 | if( pSegIter->nPos>0 ){ |
13128 | fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0); |
13129 | } |
13130 | } |
13131 | }else{ |
13132 | /* Append the position-list data to the output */ |
13133 | nPos = pSegIter->nPos*2 + pSegIter->bDel; |
13134 | fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos); |
13135 | fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback); |
13136 | } |
13137 | } |
13138 | |
13139 | /* Flush the last leaf page to disk. Set the output segment b-tree height |
13140 | ** and last leaf page number at the same time. */ |
13141 | fts5WriteFinish(p, &writer, &pSeg->pgnoLast); |
13142 | |
13143 | assert( pIter!=0 || p->rc!=SQLITE_OK ); |
13144 | if( fts5MultiIterEof(p, pIter) ){ |
13145 | int i; |
13146 | |
13147 | /* Remove the redundant segments from the %_data table */ |
13148 | for(i=0; i<nInput; i++){ |
13149 | fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid); |
13150 | } |
13151 | |
13152 | /* Remove the redundant segments from the input level */ |
13153 | if( pLvl->nSeg!=nInput ){ |
13154 | int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment); |
13155 | memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove); |
13156 | } |
13157 | pStruct->nSegment -= nInput; |
13158 | pLvl->nSeg -= nInput; |
13159 | pLvl->nMerge = 0; |
13160 | if( pSeg->pgnoLast==0 ){ |
13161 | pLvlOut->nSeg--; |
13162 | pStruct->nSegment--; |
13163 | } |
13164 | }else{ |
13165 | assert( pSeg->pgnoLast>0 ); |
13166 | fts5TrimSegments(p, pIter); |
13167 | pLvl->nMerge = nInput; |
13168 | } |
13169 | |
13170 | fts5MultiIterFree(pIter); |
13171 | fts5BufferFree(&term); |
13172 | if( pnRem ) *pnRem -= writer.nLeafWritten; |
13173 | } |
13174 | |
13175 | /* |
13176 | ** Do up to nPg pages of automerge work on the index. |
13177 | ** |
13178 | ** Return true if any changes were actually made, or false otherwise. |
13179 | */ |
13180 | static int fts5IndexMerge( |
13181 | Fts5Index *p, /* FTS5 backend object */ |
13182 | Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ |
13183 | int nPg, /* Pages of work to do */ |
13184 | int nMin /* Minimum number of segments to merge */ |
13185 | ){ |
13186 | int nRem = nPg; |
13187 | int bRet = 0; |
13188 | Fts5Structure *pStruct = *ppStruct; |
13189 | while( nRem>0 && p->rc==SQLITE_OK ){ |
13190 | int iLvl; /* To iterate through levels */ |
13191 | int iBestLvl = 0; /* Level offering the most input segments */ |
13192 | int nBest = 0; /* Number of input segments on best level */ |
13193 | |
13194 | /* Set iBestLvl to the level to read input segments from. */ |
13195 | assert( pStruct->nLevel>0 ); |
13196 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ |
13197 | Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; |
13198 | if( pLvl->nMerge ){ |
13199 | if( pLvl->nMerge>nBest ){ |
13200 | iBestLvl = iLvl; |
13201 | nBest = pLvl->nMerge; |
13202 | } |
13203 | break; |
13204 | } |
13205 | if( pLvl->nSeg>nBest ){ |
13206 | nBest = pLvl->nSeg; |
13207 | iBestLvl = iLvl; |
13208 | } |
13209 | } |
13210 | |
13211 | /* If nBest is still 0, then the index must be empty. */ |
13212 | #ifdef SQLITE_DEBUG |
13213 | for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){ |
13214 | assert( pStruct->aLevel[iLvl].nSeg==0 ); |
13215 | } |
13216 | #endif |
13217 | |
13218 | if( nBest<nMin && pStruct->aLevel[iBestLvl].nMerge==0 ){ |
13219 | break; |
13220 | } |
13221 | bRet = 1; |
13222 | fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem); |
13223 | if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){ |
13224 | fts5StructurePromote(p, iBestLvl+1, pStruct); |
13225 | } |
13226 | } |
13227 | *ppStruct = pStruct; |
13228 | return bRet; |
13229 | } |
13230 | |
13231 | /* |
13232 | ** A total of nLeaf leaf pages of data has just been flushed to a level-0 |
13233 | ** segment. This function updates the write-counter accordingly and, if |
13234 | ** necessary, performs incremental merge work. |
13235 | ** |
13236 | ** If an error occurs, set the Fts5Index.rc error code. If an error has |
13237 | ** already occurred, this function is a no-op. |
13238 | */ |
13239 | static void fts5IndexAutomerge( |
13240 | Fts5Index *p, /* FTS5 backend object */ |
13241 | Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ |
13242 | int nLeaf /* Number of output leaves just written */ |
13243 | ){ |
13244 | if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 && ALWAYS((*ppStruct)!=0) ){ |
13245 | Fts5Structure *pStruct = *ppStruct; |
13246 | u64 nWrite; /* Initial value of write-counter */ |
13247 | int nWork; /* Number of work-quanta to perform */ |
13248 | int nRem; /* Number of leaf pages left to write */ |
13249 | |
13250 | /* Update the write-counter. While doing so, set nWork. */ |
13251 | nWrite = pStruct->nWriteCounter; |
13252 | nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit)); |
13253 | pStruct->nWriteCounter += nLeaf; |
13254 | nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel); |
13255 | |
13256 | fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge); |
13257 | } |
13258 | } |
13259 | |
13260 | static void fts5IndexCrisismerge( |
13261 | Fts5Index *p, /* FTS5 backend object */ |
13262 | Fts5Structure **ppStruct /* IN/OUT: Current structure of index */ |
13263 | ){ |
13264 | const int nCrisis = p->pConfig->nCrisisMerge; |
13265 | Fts5Structure *pStruct = *ppStruct; |
13266 | int iLvl = 0; |
13267 | |
13268 | assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 ); |
13269 | while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){ |
13270 | fts5IndexMergeLevel(p, &pStruct, iLvl, 0); |
13271 | assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) ); |
13272 | fts5StructurePromote(p, iLvl+1, pStruct); |
13273 | iLvl++; |
13274 | } |
13275 | *ppStruct = pStruct; |
13276 | } |
13277 | |
13278 | static int fts5IndexReturn(Fts5Index *p){ |
13279 | int rc = p->rc; |
13280 | p->rc = SQLITE_OK; |
13281 | return rc; |
13282 | } |
13283 | |
13284 | typedef struct Fts5FlushCtx Fts5FlushCtx; |
13285 | struct Fts5FlushCtx { |
13286 | Fts5Index *pIdx; |
13287 | Fts5SegWriter writer; |
13288 | }; |
13289 | |
13290 | /* |
13291 | ** Buffer aBuf[] contains a list of varints, all small enough to fit |
13292 | ** in a 32-bit integer. Return the size of the largest prefix of this |
13293 | ** list nMax bytes or less in size. |
13294 | */ |
13295 | static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ |
13296 | int ret; |
13297 | u32 dummy; |
13298 | ret = fts5GetVarint32(aBuf, dummy); |
13299 | if( ret<nMax ){ |
13300 | while( 1 ){ |
13301 | int i = fts5GetVarint32(&aBuf[ret], dummy); |
13302 | if( (ret + i) > nMax ) break; |
13303 | ret += i; |
13304 | } |
13305 | } |
13306 | return ret; |
13307 | } |
13308 | |
13309 | /* |
13310 | ** Flush the contents of in-memory hash table iHash to a new level-0 |
13311 | ** segment on disk. Also update the corresponding structure record. |
13312 | ** |
13313 | ** If an error occurs, set the Fts5Index.rc error code. If an error has |
13314 | ** already occurred, this function is a no-op. |
13315 | */ |
13316 | static void fts5FlushOneHash(Fts5Index *p){ |
13317 | Fts5Hash *pHash = p->pHash; |
13318 | Fts5Structure *pStruct; |
13319 | int iSegid; |
13320 | int pgnoLast = 0; /* Last leaf page number in segment */ |
13321 | |
13322 | /* Obtain a reference to the index structure and allocate a new segment-id |
13323 | ** for the new level-0 segment. */ |
13324 | pStruct = fts5StructureRead(p); |
13325 | iSegid = fts5AllocateSegid(p, pStruct); |
13326 | fts5StructureInvalidate(p); |
13327 | |
13328 | if( iSegid ){ |
13329 | const int pgsz = p->pConfig->pgsz; |
13330 | int eDetail = p->pConfig->eDetail; |
13331 | Fts5StructureSegment *pSeg; /* New segment within pStruct */ |
13332 | Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ |
13333 | Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */ |
13334 | |
13335 | Fts5SegWriter writer; |
13336 | fts5WriteInit(p, &writer, iSegid); |
13337 | |
13338 | pBuf = &writer.writer.buf; |
13339 | pPgidx = &writer.writer.pgidx; |
13340 | |
13341 | /* fts5WriteInit() should have initialized the buffers to (most likely) |
13342 | ** the maximum space required. */ |
13343 | assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) ); |
13344 | assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) ); |
13345 | |
13346 | /* Begin scanning through hash table entries. This loop runs once for each |
13347 | ** term/doclist currently stored within the hash table. */ |
13348 | if( p->rc==SQLITE_OK ){ |
13349 | p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); |
13350 | } |
13351 | while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ |
13352 | const char *zTerm; /* Buffer containing term */ |
13353 | const u8 *pDoclist; /* Pointer to doclist for this term */ |
13354 | int nDoclist; /* Size of doclist in bytes */ |
13355 | |
13356 | /* Write the term for this entry to disk. */ |
13357 | sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); |
13358 | fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm); |
13359 | if( p->rc!=SQLITE_OK ) break; |
13360 | |
13361 | assert( writer.bFirstRowidInPage==0 ); |
13362 | if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){ |
13363 | /* The entire doclist will fit on the current leaf. */ |
13364 | fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); |
13365 | }else{ |
13366 | i64 iRowid = 0; |
13367 | u64 iDelta = 0; |
13368 | int iOff = 0; |
13369 | |
13370 | /* The entire doclist will not fit on this leaf. The following |
13371 | ** loop iterates through the poslists that make up the current |
13372 | ** doclist. */ |
13373 | while( p->rc==SQLITE_OK && iOff<nDoclist ){ |
13374 | iOff += fts5GetVarint(&pDoclist[iOff], &iDelta); |
13375 | iRowid += iDelta; |
13376 | |
13377 | if( writer.bFirstRowidInPage ){ |
13378 | fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */ |
13379 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid); |
13380 | writer.bFirstRowidInPage = 0; |
13381 | fts5WriteDlidxAppend(p, &writer, iRowid); |
13382 | if( p->rc!=SQLITE_OK ) break; |
13383 | }else{ |
13384 | pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta); |
13385 | } |
13386 | assert( pBuf->n<=pBuf->nSpace ); |
13387 | |
13388 | if( eDetail==FTS5_DETAIL_NONE ){ |
13389 | if( iOff<nDoclist && pDoclist[iOff]==0 ){ |
13390 | pBuf->p[pBuf->n++] = 0; |
13391 | iOff++; |
13392 | if( iOff<nDoclist && pDoclist[iOff]==0 ){ |
13393 | pBuf->p[pBuf->n++] = 0; |
13394 | iOff++; |
13395 | } |
13396 | } |
13397 | if( (pBuf->n + pPgidx->n)>=pgsz ){ |
13398 | fts5WriteFlushLeaf(p, &writer); |
13399 | } |
13400 | }else{ |
13401 | int bDummy; |
13402 | int nPos; |
13403 | int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy); |
13404 | nCopy += nPos; |
13405 | if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){ |
13406 | /* The entire poslist will fit on the current leaf. So copy |
13407 | ** it in one go. */ |
13408 | fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy); |
13409 | }else{ |
13410 | /* The entire poslist will not fit on this leaf. So it needs |
13411 | ** to be broken into sections. The only qualification being |
13412 | ** that each varint must be stored contiguously. */ |
13413 | const u8 *pPoslist = &pDoclist[iOff]; |
13414 | int iPos = 0; |
13415 | while( p->rc==SQLITE_OK ){ |
13416 | int nSpace = pgsz - pBuf->n - pPgidx->n; |
13417 | int n = 0; |
13418 | if( (nCopy - iPos)<=nSpace ){ |
13419 | n = nCopy - iPos; |
13420 | }else{ |
13421 | n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); |
13422 | } |
13423 | assert( n>0 ); |
13424 | fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); |
13425 | iPos += n; |
13426 | if( (pBuf->n + pPgidx->n)>=pgsz ){ |
13427 | fts5WriteFlushLeaf(p, &writer); |
13428 | } |
13429 | if( iPos>=nCopy ) break; |
13430 | } |
13431 | } |
13432 | iOff += nCopy; |
13433 | } |
13434 | } |
13435 | } |
13436 | |
13437 | /* TODO2: Doclist terminator written here. */ |
13438 | /* pBuf->p[pBuf->n++] = '\0'; */ |
13439 | assert( pBuf->n<=pBuf->nSpace ); |
13440 | if( p->rc==SQLITE_OK ) sqlite3Fts5HashScanNext(pHash); |
13441 | } |
13442 | sqlite3Fts5HashClear(pHash); |
13443 | fts5WriteFinish(p, &writer, &pgnoLast); |
13444 | |
13445 | /* Update the Fts5Structure. It is written back to the database by the |
13446 | ** fts5StructureRelease() call below. */ |
13447 | if( pStruct->nLevel==0 ){ |
13448 | fts5StructureAddLevel(&p->rc, &pStruct); |
13449 | } |
13450 | fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); |
13451 | if( p->rc==SQLITE_OK ){ |
13452 | pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; |
13453 | pSeg->iSegid = iSegid; |
13454 | pSeg->pgnoFirst = 1; |
13455 | pSeg->pgnoLast = pgnoLast; |
13456 | pStruct->nSegment++; |
13457 | } |
13458 | fts5StructurePromote(p, 0, pStruct); |
13459 | } |
13460 | |
13461 | fts5IndexAutomerge(p, &pStruct, pgnoLast); |
13462 | fts5IndexCrisismerge(p, &pStruct); |
13463 | fts5StructureWrite(p, pStruct); |
13464 | fts5StructureRelease(pStruct); |
13465 | } |
13466 | |
13467 | /* |
13468 | ** Flush any data stored in the in-memory hash tables to the database. |
13469 | */ |
13470 | static void fts5IndexFlush(Fts5Index *p){ |
13471 | /* Unless it is empty, flush the hash table to disk */ |
13472 | if( p->nPendingData ){ |
13473 | assert( p->pHash ); |
13474 | p->nPendingData = 0; |
13475 | fts5FlushOneHash(p); |
13476 | } |
13477 | } |
13478 | |
13479 | static Fts5Structure *fts5IndexOptimizeStruct( |
13480 | Fts5Index *p, |
13481 | Fts5Structure *pStruct |
13482 | ){ |
13483 | Fts5Structure *pNew = 0; |
13484 | sqlite3_int64 nByte = sizeof(Fts5Structure); |
13485 | int nSeg = pStruct->nSegment; |
13486 | int i; |
13487 | |
13488 | /* Figure out if this structure requires optimization. A structure does |
13489 | ** not require optimization if either: |
13490 | ** |
13491 | ** + it consists of fewer than two segments, or |
13492 | ** + all segments are on the same level, or |
13493 | ** + all segments except one are currently inputs to a merge operation. |
13494 | ** |
13495 | ** In the first case, return NULL. In the second, increment the ref-count |
13496 | ** on *pStruct and return a copy of the pointer to it. |
13497 | */ |
13498 | if( nSeg<2 ) return 0; |
13499 | for(i=0; i<pStruct->nLevel; i++){ |
13500 | int nThis = pStruct->aLevel[i].nSeg; |
13501 | if( nThis==nSeg || (nThis==nSeg-1 && pStruct->aLevel[i].nMerge==nThis) ){ |
13502 | fts5StructureRef(pStruct); |
13503 | return pStruct; |
13504 | } |
13505 | assert( pStruct->aLevel[i].nMerge<=nThis ); |
13506 | } |
13507 | |
13508 | nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel); |
13509 | pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte); |
13510 | |
13511 | if( pNew ){ |
13512 | Fts5StructureLevel *pLvl; |
13513 | nByte = nSeg * sizeof(Fts5StructureSegment); |
13514 | pNew->nLevel = pStruct->nLevel+1; |
13515 | pNew->nRef = 1; |
13516 | pNew->nWriteCounter = pStruct->nWriteCounter; |
13517 | pLvl = &pNew->aLevel[pStruct->nLevel]; |
13518 | pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte); |
13519 | if( pLvl->aSeg ){ |
13520 | int iLvl, iSeg; |
13521 | int iSegOut = 0; |
13522 | /* Iterate through all segments, from oldest to newest. Add them to |
13523 | ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest |
13524 | ** segment in the data structure. */ |
13525 | for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){ |
13526 | for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ |
13527 | pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg]; |
13528 | iSegOut++; |
13529 | } |
13530 | } |
13531 | pNew->nSegment = pLvl->nSeg = nSeg; |
13532 | }else{ |
13533 | sqlite3_free(pNew); |
13534 | pNew = 0; |
13535 | } |
13536 | } |
13537 | |
13538 | return pNew; |
13539 | } |
13540 | |
13541 | static int sqlite3Fts5IndexOptimize(Fts5Index *p){ |
13542 | Fts5Structure *pStruct; |
13543 | Fts5Structure *pNew = 0; |
13544 | |
13545 | assert( p->rc==SQLITE_OK ); |
13546 | fts5IndexFlush(p); |
13547 | pStruct = fts5StructureRead(p); |
13548 | fts5StructureInvalidate(p); |
13549 | |
13550 | if( pStruct ){ |
13551 | pNew = fts5IndexOptimizeStruct(p, pStruct); |
13552 | } |
13553 | fts5StructureRelease(pStruct); |
13554 | |
13555 | assert( pNew==0 || pNew->nSegment>0 ); |
13556 | if( pNew ){ |
13557 | int iLvl; |
13558 | for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){} |
13559 | while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){ |
13560 | int nRem = FTS5_OPT_WORK_UNIT; |
13561 | fts5IndexMergeLevel(p, &pNew, iLvl, &nRem); |
13562 | } |
13563 | |
13564 | fts5StructureWrite(p, pNew); |
13565 | fts5StructureRelease(pNew); |
13566 | } |
13567 | |
13568 | return fts5IndexReturn(p); |
13569 | } |
13570 | |
13571 | /* |
13572 | ** This is called to implement the special "VALUES('merge', $nMerge)" |
13573 | ** INSERT command. |
13574 | */ |
13575 | static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){ |
13576 | Fts5Structure *pStruct = fts5StructureRead(p); |
13577 | if( pStruct ){ |
13578 | int nMin = p->pConfig->nUsermerge; |
13579 | fts5StructureInvalidate(p); |
13580 | if( nMerge<0 ){ |
13581 | Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct); |
13582 | fts5StructureRelease(pStruct); |
13583 | pStruct = pNew; |
13584 | nMin = 2; |
13585 | nMerge = nMerge*-1; |
13586 | } |
13587 | if( pStruct && pStruct->nLevel ){ |
13588 | if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){ |
13589 | fts5StructureWrite(p, pStruct); |
13590 | } |
13591 | } |
13592 | fts5StructureRelease(pStruct); |
13593 | } |
13594 | return fts5IndexReturn(p); |
13595 | } |
13596 | |
13597 | static void fts5AppendRowid( |
13598 | Fts5Index *p, |
13599 | u64 iDelta, |
13600 | Fts5Iter *pUnused, |
13601 | Fts5Buffer *pBuf |
13602 | ){ |
13603 | UNUSED_PARAM(pUnused); |
13604 | fts5BufferAppendVarint(&p->rc, pBuf, iDelta); |
13605 | } |
13606 | |
13607 | static void fts5AppendPoslist( |
13608 | Fts5Index *p, |
13609 | u64 iDelta, |
13610 | Fts5Iter *pMulti, |
13611 | Fts5Buffer *pBuf |
13612 | ){ |
13613 | int nData = pMulti->base.nData; |
13614 | int nByte = nData + 9 + 9 + FTS5_DATA_ZERO_PADDING; |
13615 | assert( nData>0 ); |
13616 | if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nByte) ){ |
13617 | fts5BufferSafeAppendVarint(pBuf, iDelta); |
13618 | fts5BufferSafeAppendVarint(pBuf, nData*2); |
13619 | fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData); |
13620 | memset(&pBuf->p[pBuf->n], 0, FTS5_DATA_ZERO_PADDING); |
13621 | } |
13622 | } |
13623 | |
13624 | |
13625 | static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ |
13626 | u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist; |
13627 | |
13628 | assert( pIter->aPoslist || (p==0 && pIter->aPoslist==0) ); |
13629 | if( p>=pIter->aEof ){ |
13630 | pIter->aPoslist = 0; |
13631 | }else{ |
13632 | i64 iDelta; |
13633 | |
13634 | p += fts5GetVarint(p, (u64*)&iDelta); |
13635 | pIter->iRowid += iDelta; |
13636 | |
13637 | /* Read position list size */ |
13638 | if( p[0] & 0x80 ){ |
13639 | int nPos; |
13640 | pIter->nSize = fts5GetVarint32(p, nPos); |
13641 | pIter->nPoslist = (nPos>>1); |
13642 | }else{ |
13643 | pIter->nPoslist = ((int)(p[0])) >> 1; |
13644 | pIter->nSize = 1; |
13645 | } |
13646 | |
13647 | pIter->aPoslist = p; |
13648 | if( &pIter->aPoslist[pIter->nPoslist]>pIter->aEof ){ |
13649 | pIter->aPoslist = 0; |
13650 | } |
13651 | } |
13652 | } |
13653 | |
13654 | static void fts5DoclistIterInit( |
13655 | Fts5Buffer *pBuf, |
13656 | Fts5DoclistIter *pIter |
13657 | ){ |
13658 | memset(pIter, 0, sizeof(*pIter)); |
13659 | if( pBuf->n>0 ){ |
13660 | pIter->aPoslist = pBuf->p; |
13661 | pIter->aEof = &pBuf->p[pBuf->n]; |
13662 | fts5DoclistIterNext(pIter); |
13663 | } |
13664 | } |
13665 | |
13666 | #if 0 |
13667 | /* |
13668 | ** Append a doclist to buffer pBuf. |
13669 | ** |
13670 | ** This function assumes that space within the buffer has already been |
13671 | ** allocated. |
13672 | */ |
13673 | static void fts5MergeAppendDocid( |
13674 | Fts5Buffer *pBuf, /* Buffer to write to */ |
13675 | i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */ |
13676 | i64 iRowid /* Rowid to append */ |
13677 | ){ |
13678 | assert( pBuf->n!=0 || (*piLastRowid)==0 ); |
13679 | fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid); |
13680 | *piLastRowid = iRowid; |
13681 | } |
13682 | #endif |
13683 | |
13684 | #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \ |
13685 | assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \ |
13686 | fts5BufferSafeAppendVarint((pBuf), (u64)(iRowid) - (u64)(iLastRowid)); \ |
13687 | (iLastRowid) = (iRowid); \ |
13688 | } |
13689 | |
13690 | /* |
13691 | ** Swap the contents of buffer *p1 with that of *p2. |
13692 | */ |
13693 | static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){ |
13694 | Fts5Buffer tmp = *p1; |
13695 | *p1 = *p2; |
13696 | *p2 = tmp; |
13697 | } |
13698 | |
13699 | static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){ |
13700 | int i = *piOff; |
13701 | if( i>=pBuf->n ){ |
13702 | *piOff = -1; |
13703 | }else{ |
13704 | u64 iVal; |
13705 | *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal); |
13706 | *piRowid += iVal; |
13707 | } |
13708 | } |
13709 | |
13710 | /* |
13711 | ** This is the equivalent of fts5MergePrefixLists() for detail=none mode. |
13712 | ** In this case the buffers consist of a delta-encoded list of rowids only. |
13713 | */ |
13714 | static void fts5MergeRowidLists( |
13715 | Fts5Index *p, /* FTS5 backend object */ |
13716 | Fts5Buffer *p1, /* First list to merge */ |
13717 | int nBuf, /* Number of entries in apBuf[] */ |
13718 | Fts5Buffer *aBuf /* Array of other lists to merge into p1 */ |
13719 | ){ |
13720 | int i1 = 0; |
13721 | int i2 = 0; |
13722 | i64 iRowid1 = 0; |
13723 | i64 iRowid2 = 0; |
13724 | i64 iOut = 0; |
13725 | Fts5Buffer *p2 = &aBuf[0]; |
13726 | Fts5Buffer out; |
13727 | |
13728 | (void)nBuf; |
13729 | memset(&out, 0, sizeof(out)); |
13730 | assert( nBuf==1 ); |
13731 | sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n); |
13732 | if( p->rc ) return; |
13733 | |
13734 | fts5NextRowid(p1, &i1, &iRowid1); |
13735 | fts5NextRowid(p2, &i2, &iRowid2); |
13736 | while( i1>=0 || i2>=0 ){ |
13737 | if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){ |
13738 | assert( iOut==0 || iRowid1>iOut ); |
13739 | fts5BufferSafeAppendVarint(&out, iRowid1 - iOut); |
13740 | iOut = iRowid1; |
13741 | fts5NextRowid(p1, &i1, &iRowid1); |
13742 | }else{ |
13743 | assert( iOut==0 || iRowid2>iOut ); |
13744 | fts5BufferSafeAppendVarint(&out, iRowid2 - iOut); |
13745 | iOut = iRowid2; |
13746 | if( i1>=0 && iRowid1==iRowid2 ){ |
13747 | fts5NextRowid(p1, &i1, &iRowid1); |
13748 | } |
13749 | fts5NextRowid(p2, &i2, &iRowid2); |
13750 | } |
13751 | } |
13752 | |
13753 | fts5BufferSwap(&out, p1); |
13754 | fts5BufferFree(&out); |
13755 | } |
13756 | |
13757 | typedef struct PrefixMerger PrefixMerger; |
13758 | struct PrefixMerger { |
13759 | Fts5DoclistIter iter; /* Doclist iterator */ |
13760 | i64 iPos; /* For iterating through a position list */ |
13761 | int iOff; |
13762 | u8 *aPos; |
13763 | PrefixMerger *pNext; /* Next in docid/poslist order */ |
13764 | }; |
13765 | |
13766 | static void fts5PrefixMergerInsertByRowid( |
13767 | PrefixMerger **ppHead, |
13768 | PrefixMerger *p |
13769 | ){ |
13770 | if( p->iter.aPoslist ){ |
13771 | PrefixMerger **pp = ppHead; |
13772 | while( *pp && p->iter.iRowid>(*pp)->iter.iRowid ){ |
13773 | pp = &(*pp)->pNext; |
13774 | } |
13775 | p->pNext = *pp; |
13776 | *pp = p; |
13777 | } |
13778 | } |
13779 | |
13780 | static void fts5PrefixMergerInsertByPosition( |
13781 | PrefixMerger **ppHead, |
13782 | PrefixMerger *p |
13783 | ){ |
13784 | if( p->iPos>=0 ){ |
13785 | PrefixMerger **pp = ppHead; |
13786 | while( *pp && p->iPos>(*pp)->iPos ){ |
13787 | pp = &(*pp)->pNext; |
13788 | } |
13789 | p->pNext = *pp; |
13790 | *pp = p; |
13791 | } |
13792 | } |
13793 | |
13794 | |
13795 | /* |
13796 | ** Array aBuf[] contains nBuf doclists. These are all merged in with the |
13797 | ** doclist in buffer p1. |
13798 | */ |
13799 | static void fts5MergePrefixLists( |
13800 | Fts5Index *p, /* FTS5 backend object */ |
13801 | Fts5Buffer *p1, /* First list to merge */ |
13802 | int nBuf, /* Number of buffers in array aBuf[] */ |
13803 | Fts5Buffer *aBuf /* Other lists to merge in */ |
13804 | ){ |
13805 | #define fts5PrefixMergerNextPosition(p) \ |
13806 | sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,&(p)->iOff,&(p)->iPos) |
13807 | #define FTS5_MERGE_NLIST 16 |
13808 | PrefixMerger aMerger[FTS5_MERGE_NLIST]; |
13809 | PrefixMerger *pHead = 0; |
13810 | int i; |
13811 | int nOut = 0; |
13812 | Fts5Buffer out = {0, 0, 0}; |
13813 | Fts5Buffer tmp = {0, 0, 0}; |
13814 | i64 iLastRowid = 0; |
13815 | |
13816 | /* Initialize a doclist-iterator for each input buffer. Arrange them in |
13817 | ** a linked-list starting at pHead in ascending order of rowid. Avoid |
13818 | ** linking any iterators already at EOF into the linked list at all. */ |
13819 | assert( nBuf+1<=sizeof(aMerger)/sizeof(aMerger[0]) ); |
13820 | memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1)); |
13821 | pHead = &aMerger[nBuf]; |
13822 | fts5DoclistIterInit(p1, &pHead->iter); |
13823 | for(i=0; i<nBuf; i++){ |
13824 | fts5DoclistIterInit(&aBuf[i], &aMerger[i].iter); |
13825 | fts5PrefixMergerInsertByRowid(&pHead, &aMerger[i]); |
13826 | nOut += aBuf[i].n; |
13827 | } |
13828 | if( nOut==0 ) return; |
13829 | nOut += p1->n + 9 + 10*nBuf; |
13830 | |
13831 | /* The maximum size of the output is equal to the sum of the |
13832 | ** input sizes + 1 varint (9 bytes). The extra varint is because if the |
13833 | ** first rowid in one input is a large negative number, and the first in |
13834 | ** the other a non-negative number, the delta for the non-negative |
13835 | ** number will be larger on disk than the literal integer value |
13836 | ** was. |
13837 | ** |
13838 | ** Or, if the input position-lists are corrupt, then the output might |
13839 | ** include up to (nBuf+1) extra 10-byte positions created by interpreting -1 |
13840 | ** (the value PoslistNext64() uses for EOF) as a position and appending |
13841 | ** it to the output. This can happen at most once for each input |
13842 | ** position-list, hence (nBuf+1) 10 byte paddings. */ |
13843 | if( sqlite3Fts5BufferSize(&p->rc, &out, nOut) ) return; |
13844 | |
13845 | while( pHead ){ |
13846 | fts5MergeAppendDocid(&out, iLastRowid, pHead->iter.iRowid); |
13847 | |
13848 | if( pHead->pNext && iLastRowid==pHead->pNext->iter.iRowid ){ |
13849 | /* Merge data from two or more poslists */ |
13850 | i64 iPrev = 0; |
13851 | int nTmp = FTS5_DATA_ZERO_PADDING; |
13852 | int nMerge = 0; |
13853 | PrefixMerger *pSave = pHead; |
13854 | PrefixMerger *pThis = 0; |
13855 | int nTail = 0; |
13856 | |
13857 | pHead = 0; |
13858 | while( pSave && pSave->iter.iRowid==iLastRowid ){ |
13859 | PrefixMerger *pNext = pSave->pNext; |
13860 | pSave->iOff = 0; |
13861 | pSave->iPos = 0; |
13862 | pSave->aPos = &pSave->iter.aPoslist[pSave->iter.nSize]; |
13863 | fts5PrefixMergerNextPosition(pSave); |
13864 | nTmp += pSave->iter.nPoslist + 10; |
13865 | nMerge++; |
13866 | fts5PrefixMergerInsertByPosition(&pHead, pSave); |
13867 | pSave = pNext; |
13868 | } |
13869 | |
13870 | if( pHead==0 || pHead->pNext==0 ){ |
13871 | p->rc = FTS5_CORRUPT; |
13872 | break; |
13873 | } |
13874 | |
13875 | /* See the earlier comment in this function for an explanation of why |
13876 | ** corrupt input position lists might cause the output to consume |
13877 | ** at most nMerge*10 bytes of unexpected space. */ |
13878 | if( sqlite3Fts5BufferSize(&p->rc, &tmp, nTmp+nMerge*10) ){ |
13879 | break; |
13880 | } |
13881 | fts5BufferZero(&tmp); |
13882 | |
13883 | pThis = pHead; |
13884 | pHead = pThis->pNext; |
13885 | sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos); |
13886 | fts5PrefixMergerNextPosition(pThis); |
13887 | fts5PrefixMergerInsertByPosition(&pHead, pThis); |
13888 | |
13889 | while( pHead->pNext ){ |
13890 | pThis = pHead; |
13891 | if( pThis->iPos!=iPrev ){ |
13892 | sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos); |
13893 | } |
13894 | fts5PrefixMergerNextPosition(pThis); |
13895 | pHead = pThis->pNext; |
13896 | fts5PrefixMergerInsertByPosition(&pHead, pThis); |
13897 | } |
13898 | |
13899 | if( pHead->iPos!=iPrev ){ |
13900 | sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pHead->iPos); |
13901 | } |
13902 | nTail = pHead->iter.nPoslist - pHead->iOff; |
13903 | |
13904 | /* WRITEPOSLISTSIZE */ |
13905 | assert_nc( tmp.n+nTail<=nTmp ); |
13906 | assert( tmp.n+nTail<=nTmp+nMerge*10 ); |
13907 | if( tmp.n+nTail>nTmp-FTS5_DATA_ZERO_PADDING ){ |
13908 | if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; |
13909 | break; |
13910 | } |
13911 | fts5BufferSafeAppendVarint(&out, (tmp.n+nTail) * 2); |
13912 | fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n); |
13913 | if( nTail>0 ){ |
13914 | fts5BufferSafeAppendBlob(&out, &pHead->aPos[pHead->iOff], nTail); |
13915 | } |
13916 | |
13917 | pHead = pSave; |
13918 | for(i=0; i<nBuf+1; i++){ |
13919 | PrefixMerger *pX = &aMerger[i]; |
13920 | if( pX->iter.aPoslist && pX->iter.iRowid==iLastRowid ){ |
13921 | fts5DoclistIterNext(&pX->iter); |
13922 | fts5PrefixMergerInsertByRowid(&pHead, pX); |
13923 | } |
13924 | } |
13925 | |
13926 | }else{ |
13927 | /* Copy poslist from pHead to output */ |
13928 | PrefixMerger *pThis = pHead; |
13929 | Fts5DoclistIter *pI = &pThis->iter; |
13930 | fts5BufferSafeAppendBlob(&out, pI->aPoslist, pI->nPoslist+pI->nSize); |
13931 | fts5DoclistIterNext(pI); |
13932 | pHead = pThis->pNext; |
13933 | fts5PrefixMergerInsertByRowid(&pHead, pThis); |
13934 | } |
13935 | } |
13936 | |
13937 | fts5BufferFree(p1); |
13938 | fts5BufferFree(&tmp); |
13939 | memset(&out.p[out.n], 0, FTS5_DATA_ZERO_PADDING); |
13940 | *p1 = out; |
13941 | } |
13942 | |
13943 | static void fts5SetupPrefixIter( |
13944 | Fts5Index *p, /* Index to read from */ |
13945 | int bDesc, /* True for "ORDER BY rowid DESC" */ |
13946 | int iIdx, /* Index to scan for data */ |
13947 | u8 *pToken, /* Buffer containing prefix to match */ |
13948 | int nToken, /* Size of buffer pToken in bytes */ |
13949 | Fts5Colset *pColset, /* Restrict matches to these columns */ |
13950 | Fts5Iter **ppIter /* OUT: New iterator */ |
13951 | ){ |
13952 | Fts5Structure *pStruct; |
13953 | Fts5Buffer *aBuf; |
13954 | int nBuf = 32; |
13955 | int nMerge = 1; |
13956 | |
13957 | void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*); |
13958 | void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*); |
13959 | if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ |
13960 | xMerge = fts5MergeRowidLists; |
13961 | xAppend = fts5AppendRowid; |
13962 | }else{ |
13963 | nMerge = FTS5_MERGE_NLIST-1; |
13964 | nBuf = nMerge*8; /* Sufficient to merge (16^8)==(2^32) lists */ |
13965 | xMerge = fts5MergePrefixLists; |
13966 | xAppend = fts5AppendPoslist; |
13967 | } |
13968 | |
13969 | aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); |
13970 | pStruct = fts5StructureRead(p); |
13971 | |
13972 | if( aBuf && pStruct ){ |
13973 | const int flags = FTS5INDEX_QUERY_SCAN |
13974 | | FTS5INDEX_QUERY_SKIPEMPTY |
13975 | | FTS5INDEX_QUERY_NOOUTPUT; |
13976 | int i; |
13977 | i64 iLastRowid = 0; |
13978 | Fts5Iter *p1 = 0; /* Iterator used to gather data from index */ |
13979 | Fts5Data *pData; |
13980 | Fts5Buffer doclist; |
13981 | int bNewTerm = 1; |
13982 | |
13983 | memset(&doclist, 0, sizeof(doclist)); |
13984 | if( iIdx!=0 ){ |
13985 | int dummy = 0; |
13986 | const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT; |
13987 | pToken[0] = FTS5_MAIN_PREFIX; |
13988 | fts5MultiIterNew(p, pStruct, f2, pColset, pToken, nToken, -1, 0, &p1); |
13989 | fts5IterSetOutputCb(&p->rc, p1); |
13990 | for(; |
13991 | fts5MultiIterEof(p, p1)==0; |
13992 | fts5MultiIterNext2(p, p1, &dummy) |
13993 | ){ |
13994 | Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; |
13995 | p1->xSetOutputs(p1, pSeg); |
13996 | if( p1->base.nData ){ |
13997 | xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist); |
13998 | iLastRowid = p1->base.iRowid; |
13999 | } |
14000 | } |
14001 | fts5MultiIterFree(p1); |
14002 | } |
14003 | |
14004 | pToken[0] = FTS5_MAIN_PREFIX + iIdx; |
14005 | fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1); |
14006 | fts5IterSetOutputCb(&p->rc, p1); |
14007 | for( /* no-op */ ; |
14008 | fts5MultiIterEof(p, p1)==0; |
14009 | fts5MultiIterNext2(p, p1, &bNewTerm) |
14010 | ){ |
14011 | Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; |
14012 | int nTerm = pSeg->term.n; |
14013 | const u8 *pTerm = pSeg->term.p; |
14014 | p1->xSetOutputs(p1, pSeg); |
14015 | |
14016 | assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); |
14017 | if( bNewTerm ){ |
14018 | if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break; |
14019 | } |
14020 | |
14021 | if( p1->base.nData==0 ) continue; |
14022 | |
14023 | if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){ |
14024 | for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ |
14025 | int i1 = i*nMerge; |
14026 | int iStore; |
14027 | assert( i1+nMerge<=nBuf ); |
14028 | for(iStore=i1; iStore<i1+nMerge; iStore++){ |
14029 | if( aBuf[iStore].n==0 ){ |
14030 | fts5BufferSwap(&doclist, &aBuf[iStore]); |
14031 | fts5BufferZero(&doclist); |
14032 | break; |
14033 | } |
14034 | } |
14035 | if( iStore==i1+nMerge ){ |
14036 | xMerge(p, &doclist, nMerge, &aBuf[i1]); |
14037 | for(iStore=i1; iStore<i1+nMerge; iStore++){ |
14038 | fts5BufferZero(&aBuf[iStore]); |
14039 | } |
14040 | } |
14041 | } |
14042 | iLastRowid = 0; |
14043 | } |
14044 | |
14045 | xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist); |
14046 | iLastRowid = p1->base.iRowid; |
14047 | } |
14048 | |
14049 | assert( (nBuf%nMerge)==0 ); |
14050 | for(i=0; i<nBuf; i+=nMerge){ |
14051 | int iFree; |
14052 | if( p->rc==SQLITE_OK ){ |
14053 | xMerge(p, &doclist, nMerge, &aBuf[i]); |
14054 | } |
14055 | for(iFree=i; iFree<i+nMerge; iFree++){ |
14056 | fts5BufferFree(&aBuf[iFree]); |
14057 | } |
14058 | } |
14059 | fts5MultiIterFree(p1); |
14060 | |
14061 | pData = fts5IdxMalloc(p, sizeof(Fts5Data)+doclist.n+FTS5_DATA_ZERO_PADDING); |
14062 | if( pData ){ |
14063 | pData->p = (u8*)&pData[1]; |
14064 | pData->nn = pData->szLeaf = doclist.n; |
14065 | if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n); |
14066 | fts5MultiIterNew2(p, pData, bDesc, ppIter); |
14067 | } |
14068 | fts5BufferFree(&doclist); |
14069 | } |
14070 | |
14071 | fts5StructureRelease(pStruct); |
14072 | sqlite3_free(aBuf); |
14073 | } |
14074 | |
14075 | |
14076 | /* |
14077 | ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain |
14078 | ** to the document with rowid iRowid. |
14079 | */ |
14080 | static int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){ |
14081 | assert( p->rc==SQLITE_OK ); |
14082 | |
14083 | /* Allocate the hash table if it has not already been allocated */ |
14084 | if( p->pHash==0 ){ |
14085 | p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData); |
14086 | } |
14087 | |
14088 | /* Flush the hash table to disk if required */ |
14089 | if( iRowid<p->iWriteRowid |
14090 | || (iRowid==p->iWriteRowid && p->bDelete==0) |
14091 | || (p->nPendingData > p->pConfig->nHashSize) |
14092 | ){ |
14093 | fts5IndexFlush(p); |
14094 | } |
14095 | |
14096 | p->iWriteRowid = iRowid; |
14097 | p->bDelete = bDelete; |
14098 | return fts5IndexReturn(p); |
14099 | } |
14100 | |
14101 | /* |
14102 | ** Commit data to disk. |
14103 | */ |
14104 | static int sqlite3Fts5IndexSync(Fts5Index *p){ |
14105 | assert( p->rc==SQLITE_OK ); |
14106 | fts5IndexFlush(p); |
14107 | sqlite3Fts5IndexCloseReader(p); |
14108 | return fts5IndexReturn(p); |
14109 | } |
14110 | |
14111 | /* |
14112 | ** Discard any data stored in the in-memory hash tables. Do not write it |
14113 | ** to the database. Additionally, assume that the contents of the %_data |
14114 | ** table may have changed on disk. So any in-memory caches of %_data |
14115 | ** records must be invalidated. |
14116 | */ |
14117 | static int sqlite3Fts5IndexRollback(Fts5Index *p){ |
14118 | sqlite3Fts5IndexCloseReader(p); |
14119 | fts5IndexDiscardData(p); |
14120 | fts5StructureInvalidate(p); |
14121 | /* assert( p->rc==SQLITE_OK ); */ |
14122 | return SQLITE_OK; |
14123 | } |
14124 | |
14125 | /* |
14126 | ** The %_data table is completely empty when this function is called. This |
14127 | ** function populates it with the initial structure objects for each index, |
14128 | ** and the initial version of the "averages" record (a zero-byte blob). |
14129 | */ |
14130 | static int sqlite3Fts5IndexReinit(Fts5Index *p){ |
14131 | Fts5Structure s; |
14132 | fts5StructureInvalidate(p); |
14133 | fts5IndexDiscardData(p); |
14134 | memset(&s, 0, sizeof(Fts5Structure)); |
14135 | fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"" , 0); |
14136 | fts5StructureWrite(p, &s); |
14137 | return fts5IndexReturn(p); |
14138 | } |
14139 | |
14140 | /* |
14141 | ** Open a new Fts5Index handle. If the bCreate argument is true, create |
14142 | ** and initialize the underlying %_data table. |
14143 | ** |
14144 | ** If successful, set *pp to point to the new object and return SQLITE_OK. |
14145 | ** Otherwise, set *pp to NULL and return an SQLite error code. |
14146 | */ |
14147 | static int sqlite3Fts5IndexOpen( |
14148 | Fts5Config *pConfig, |
14149 | int bCreate, |
14150 | Fts5Index **pp, |
14151 | char **pzErr |
14152 | ){ |
14153 | int rc = SQLITE_OK; |
14154 | Fts5Index *p; /* New object */ |
14155 | |
14156 | *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index)); |
14157 | if( rc==SQLITE_OK ){ |
14158 | p->pConfig = pConfig; |
14159 | p->nWorkUnit = FTS5_WORK_UNIT; |
14160 | p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data" , pConfig->zName); |
14161 | if( p->zDataTbl && bCreate ){ |
14162 | rc = sqlite3Fts5CreateTable( |
14163 | pConfig, "data" , "id INTEGER PRIMARY KEY, block BLOB" , 0, pzErr |
14164 | ); |
14165 | if( rc==SQLITE_OK ){ |
14166 | rc = sqlite3Fts5CreateTable(pConfig, "idx" , |
14167 | "segid, term, pgno, PRIMARY KEY(segid, term)" , |
14168 | 1, pzErr |
14169 | ); |
14170 | } |
14171 | if( rc==SQLITE_OK ){ |
14172 | rc = sqlite3Fts5IndexReinit(p); |
14173 | } |
14174 | } |
14175 | } |
14176 | |
14177 | assert( rc!=SQLITE_OK || p->rc==SQLITE_OK ); |
14178 | if( rc ){ |
14179 | sqlite3Fts5IndexClose(p); |
14180 | *pp = 0; |
14181 | } |
14182 | return rc; |
14183 | } |
14184 | |
14185 | /* |
14186 | ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen(). |
14187 | */ |
14188 | static int sqlite3Fts5IndexClose(Fts5Index *p){ |
14189 | int rc = SQLITE_OK; |
14190 | if( p ){ |
14191 | assert( p->pReader==0 ); |
14192 | fts5StructureInvalidate(p); |
14193 | sqlite3_finalize(p->pWriter); |
14194 | sqlite3_finalize(p->pDeleter); |
14195 | sqlite3_finalize(p->pIdxWriter); |
14196 | sqlite3_finalize(p->pIdxDeleter); |
14197 | sqlite3_finalize(p->pIdxSelect); |
14198 | sqlite3_finalize(p->pDataVersion); |
14199 | sqlite3Fts5HashFree(p->pHash); |
14200 | sqlite3_free(p->zDataTbl); |
14201 | sqlite3_free(p); |
14202 | } |
14203 | return rc; |
14204 | } |
14205 | |
14206 | /* |
14207 | ** Argument p points to a buffer containing utf-8 text that is n bytes in |
14208 | ** size. Return the number of bytes in the nChar character prefix of the |
14209 | ** buffer, or 0 if there are less than nChar characters in total. |
14210 | */ |
14211 | static int sqlite3Fts5IndexCharlenToBytelen( |
14212 | const char *p, |
14213 | int nByte, |
14214 | int nChar |
14215 | ){ |
14216 | int n = 0; |
14217 | int i; |
14218 | for(i=0; i<nChar; i++){ |
14219 | if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */ |
14220 | if( (unsigned char)p[n++]>=0xc0 ){ |
14221 | if( n>=nByte ) return 0; |
14222 | while( (p[n] & 0xc0)==0x80 ){ |
14223 | n++; |
14224 | if( n>=nByte ){ |
14225 | if( i+1==nChar ) break; |
14226 | return 0; |
14227 | } |
14228 | } |
14229 | } |
14230 | } |
14231 | return n; |
14232 | } |
14233 | |
14234 | /* |
14235 | ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of |
14236 | ** unicode characters in the string. |
14237 | */ |
14238 | static int fts5IndexCharlen(const char *pIn, int nIn){ |
14239 | int nChar = 0; |
14240 | int i = 0; |
14241 | while( i<nIn ){ |
14242 | if( (unsigned char)pIn[i++]>=0xc0 ){ |
14243 | while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++; |
14244 | } |
14245 | nChar++; |
14246 | } |
14247 | return nChar; |
14248 | } |
14249 | |
14250 | /* |
14251 | ** Insert or remove data to or from the index. Each time a document is |
14252 | ** added to or removed from the index, this function is called one or more |
14253 | ** times. |
14254 | ** |
14255 | ** For an insert, it must be called once for each token in the new document. |
14256 | ** If the operation is a delete, it must be called (at least) once for each |
14257 | ** unique token in the document with an iCol value less than zero. The iPos |
14258 | ** argument is ignored for a delete. |
14259 | */ |
14260 | static int sqlite3Fts5IndexWrite( |
14261 | Fts5Index *p, /* Index to write to */ |
14262 | int iCol, /* Column token appears in (-ve -> delete) */ |
14263 | int iPos, /* Position of token within column */ |
14264 | const char *pToken, int nToken /* Token to add or remove to or from index */ |
14265 | ){ |
14266 | int i; /* Used to iterate through indexes */ |
14267 | int rc = SQLITE_OK; /* Return code */ |
14268 | Fts5Config *pConfig = p->pConfig; |
14269 | |
14270 | assert( p->rc==SQLITE_OK ); |
14271 | assert( (iCol<0)==p->bDelete ); |
14272 | |
14273 | /* Add the entry to the main terms index. */ |
14274 | rc = sqlite3Fts5HashWrite( |
14275 | p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken |
14276 | ); |
14277 | |
14278 | for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){ |
14279 | const int nChar = pConfig->aPrefix[i]; |
14280 | int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); |
14281 | if( nByte ){ |
14282 | rc = sqlite3Fts5HashWrite(p->pHash, |
14283 | p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken, |
14284 | nByte |
14285 | ); |
14286 | } |
14287 | } |
14288 | |
14289 | return rc; |
14290 | } |
14291 | |
14292 | /* |
14293 | ** Open a new iterator to iterate though all rowid that match the |
14294 | ** specified token or token prefix. |
14295 | */ |
14296 | static int sqlite3Fts5IndexQuery( |
14297 | Fts5Index *p, /* FTS index to query */ |
14298 | const char *pToken, int nToken, /* Token (or prefix) to query for */ |
14299 | int flags, /* Mask of FTS5INDEX_QUERY_X flags */ |
14300 | Fts5Colset *pColset, /* Match these columns only */ |
14301 | Fts5IndexIter **ppIter /* OUT: New iterator object */ |
14302 | ){ |
14303 | Fts5Config *pConfig = p->pConfig; |
14304 | Fts5Iter *pRet = 0; |
14305 | Fts5Buffer buf = {0, 0, 0}; |
14306 | |
14307 | /* If the QUERY_SCAN flag is set, all other flags must be clear. */ |
14308 | assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN ); |
14309 | |
14310 | if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){ |
14311 | int iIdx = 0; /* Index to search */ |
14312 | int iPrefixIdx = 0; /* +1 prefix index */ |
14313 | if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken); |
14314 | |
14315 | /* Figure out which index to search and set iIdx accordingly. If this |
14316 | ** is a prefix query for which there is no prefix index, set iIdx to |
14317 | ** greater than pConfig->nPrefix to indicate that the query will be |
14318 | ** satisfied by scanning multiple terms in the main index. |
14319 | ** |
14320 | ** If the QUERY_TEST_NOIDX flag was specified, then this must be a |
14321 | ** prefix-query. Instead of using a prefix-index (if one exists), |
14322 | ** evaluate the prefix query using the main FTS index. This is used |
14323 | ** for internal sanity checking by the integrity-check in debug |
14324 | ** mode only. */ |
14325 | #ifdef SQLITE_DEBUG |
14326 | if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){ |
14327 | assert( flags & FTS5INDEX_QUERY_PREFIX ); |
14328 | iIdx = 1+pConfig->nPrefix; |
14329 | }else |
14330 | #endif |
14331 | if( flags & FTS5INDEX_QUERY_PREFIX ){ |
14332 | int nChar = fts5IndexCharlen(pToken, nToken); |
14333 | for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ |
14334 | int nIdxChar = pConfig->aPrefix[iIdx-1]; |
14335 | if( nIdxChar==nChar ) break; |
14336 | if( nIdxChar==nChar+1 ) iPrefixIdx = iIdx; |
14337 | } |
14338 | } |
14339 | |
14340 | if( iIdx<=pConfig->nPrefix ){ |
14341 | /* Straight index lookup */ |
14342 | Fts5Structure *pStruct = fts5StructureRead(p); |
14343 | buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx); |
14344 | if( pStruct ){ |
14345 | fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY, |
14346 | pColset, buf.p, nToken+1, -1, 0, &pRet |
14347 | ); |
14348 | fts5StructureRelease(pStruct); |
14349 | } |
14350 | }else{ |
14351 | /* Scan multiple terms in the main index */ |
14352 | int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; |
14353 | fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet); |
14354 | if( pRet==0 ){ |
14355 | assert( p->rc!=SQLITE_OK ); |
14356 | }else{ |
14357 | assert( pRet->pColset==0 ); |
14358 | fts5IterSetOutputCb(&p->rc, pRet); |
14359 | if( p->rc==SQLITE_OK ){ |
14360 | Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst]; |
14361 | if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg); |
14362 | } |
14363 | } |
14364 | } |
14365 | |
14366 | if( p->rc ){ |
14367 | sqlite3Fts5IterClose((Fts5IndexIter*)pRet); |
14368 | pRet = 0; |
14369 | sqlite3Fts5IndexCloseReader(p); |
14370 | } |
14371 | |
14372 | *ppIter = (Fts5IndexIter*)pRet; |
14373 | sqlite3Fts5BufferFree(&buf); |
14374 | } |
14375 | return fts5IndexReturn(p); |
14376 | } |
14377 | |
14378 | /* |
14379 | ** Return true if the iterator passed as the only argument is at EOF. |
14380 | */ |
14381 | /* |
14382 | ** Move to the next matching rowid. |
14383 | */ |
14384 | static int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){ |
14385 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; |
14386 | assert( pIter->pIndex->rc==SQLITE_OK ); |
14387 | fts5MultiIterNext(pIter->pIndex, pIter, 0, 0); |
14388 | return fts5IndexReturn(pIter->pIndex); |
14389 | } |
14390 | |
14391 | /* |
14392 | ** Move to the next matching term/rowid. Used by the fts5vocab module. |
14393 | */ |
14394 | static int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){ |
14395 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; |
14396 | Fts5Index *p = pIter->pIndex; |
14397 | |
14398 | assert( pIter->pIndex->rc==SQLITE_OK ); |
14399 | |
14400 | fts5MultiIterNext(p, pIter, 0, 0); |
14401 | if( p->rc==SQLITE_OK ){ |
14402 | Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; |
14403 | if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){ |
14404 | fts5DataRelease(pSeg->pLeaf); |
14405 | pSeg->pLeaf = 0; |
14406 | pIter->base.bEof = 1; |
14407 | } |
14408 | } |
14409 | |
14410 | return fts5IndexReturn(pIter->pIndex); |
14411 | } |
14412 | |
14413 | /* |
14414 | ** Move to the next matching rowid that occurs at or after iMatch. The |
14415 | ** definition of "at or after" depends on whether this iterator iterates |
14416 | ** in ascending or descending rowid order. |
14417 | */ |
14418 | static int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){ |
14419 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; |
14420 | fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch); |
14421 | return fts5IndexReturn(pIter->pIndex); |
14422 | } |
14423 | |
14424 | /* |
14425 | ** Return the current term. |
14426 | */ |
14427 | static const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ |
14428 | int n; |
14429 | const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n); |
14430 | assert_nc( z || n<=1 ); |
14431 | *pn = n-1; |
14432 | return (z ? &z[1] : 0); |
14433 | } |
14434 | |
14435 | /* |
14436 | ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). |
14437 | */ |
14438 | static void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){ |
14439 | if( pIndexIter ){ |
14440 | Fts5Iter *pIter = (Fts5Iter*)pIndexIter; |
14441 | Fts5Index *pIndex = pIter->pIndex; |
14442 | fts5MultiIterFree(pIter); |
14443 | sqlite3Fts5IndexCloseReader(pIndex); |
14444 | } |
14445 | } |
14446 | |
14447 | /* |
14448 | ** Read and decode the "averages" record from the database. |
14449 | ** |
14450 | ** Parameter anSize must point to an array of size nCol, where nCol is |
14451 | ** the number of user defined columns in the FTS table. |
14452 | */ |
14453 | static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){ |
14454 | int nCol = p->pConfig->nCol; |
14455 | Fts5Data *pData; |
14456 | |
14457 | *pnRow = 0; |
14458 | memset(anSize, 0, sizeof(i64) * nCol); |
14459 | pData = fts5DataRead(p, FTS5_AVERAGES_ROWID); |
14460 | if( p->rc==SQLITE_OK && pData->nn ){ |
14461 | int i = 0; |
14462 | int iCol; |
14463 | i += fts5GetVarint(&pData->p[i], (u64*)pnRow); |
14464 | for(iCol=0; i<pData->nn && iCol<nCol; iCol++){ |
14465 | i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]); |
14466 | } |
14467 | } |
14468 | |
14469 | fts5DataRelease(pData); |
14470 | return fts5IndexReturn(p); |
14471 | } |
14472 | |
14473 | /* |
14474 | ** Replace the current "averages" record with the contents of the buffer |
14475 | ** supplied as the second argument. |
14476 | */ |
14477 | static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){ |
14478 | assert( p->rc==SQLITE_OK ); |
14479 | fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData); |
14480 | return fts5IndexReturn(p); |
14481 | } |
14482 | |
14483 | /* |
14484 | ** Return the total number of blocks this module has read from the %_data |
14485 | ** table since it was created. |
14486 | */ |
14487 | static int sqlite3Fts5IndexReads(Fts5Index *p){ |
14488 | return p->nRead; |
14489 | } |
14490 | |
14491 | /* |
14492 | ** Set the 32-bit cookie value stored at the start of all structure |
14493 | ** records to the value passed as the second argument. |
14494 | ** |
14495 | ** Return SQLITE_OK if successful, or an SQLite error code if an error |
14496 | ** occurs. |
14497 | */ |
14498 | static int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ |
14499 | int rc; /* Return code */ |
14500 | Fts5Config *pConfig = p->pConfig; /* Configuration object */ |
14501 | u8 aCookie[4]; /* Binary representation of iNew */ |
14502 | sqlite3_blob *pBlob = 0; |
14503 | |
14504 | assert( p->rc==SQLITE_OK ); |
14505 | sqlite3Fts5Put32(aCookie, iNew); |
14506 | |
14507 | rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, |
14508 | "block" , FTS5_STRUCTURE_ROWID, 1, &pBlob |
14509 | ); |
14510 | if( rc==SQLITE_OK ){ |
14511 | sqlite3_blob_write(pBlob, aCookie, 4, 0); |
14512 | rc = sqlite3_blob_close(pBlob); |
14513 | } |
14514 | |
14515 | return rc; |
14516 | } |
14517 | |
14518 | static int sqlite3Fts5IndexLoadConfig(Fts5Index *p){ |
14519 | Fts5Structure *pStruct; |
14520 | pStruct = fts5StructureRead(p); |
14521 | fts5StructureRelease(pStruct); |
14522 | return fts5IndexReturn(p); |
14523 | } |
14524 | |
14525 | |
14526 | /************************************************************************* |
14527 | ************************************************************************** |
14528 | ** Below this point is the implementation of the integrity-check |
14529 | ** functionality. |
14530 | */ |
14531 | |
14532 | /* |
14533 | ** Return a simple checksum value based on the arguments. |
14534 | */ |
14535 | static u64 sqlite3Fts5IndexEntryCksum( |
14536 | i64 iRowid, |
14537 | int iCol, |
14538 | int iPos, |
14539 | int iIdx, |
14540 | const char *pTerm, |
14541 | int nTerm |
14542 | ){ |
14543 | int i; |
14544 | u64 ret = iRowid; |
14545 | ret += (ret<<3) + iCol; |
14546 | ret += (ret<<3) + iPos; |
14547 | if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx); |
14548 | for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i]; |
14549 | return ret; |
14550 | } |
14551 | |
14552 | #ifdef SQLITE_DEBUG |
14553 | /* |
14554 | ** This function is purely an internal test. It does not contribute to |
14555 | ** FTS functionality, or even the integrity-check, in any way. |
14556 | ** |
14557 | ** Instead, it tests that the same set of pgno/rowid combinations are |
14558 | ** visited regardless of whether the doclist-index identified by parameters |
14559 | ** iSegid/iLeaf is iterated in forwards or reverse order. |
14560 | */ |
14561 | static void fts5TestDlidxReverse( |
14562 | Fts5Index *p, |
14563 | int iSegid, /* Segment id to load from */ |
14564 | int iLeaf /* Load doclist-index for this leaf */ |
14565 | ){ |
14566 | Fts5DlidxIter *pDlidx = 0; |
14567 | u64 cksum1 = 13; |
14568 | u64 cksum2 = 13; |
14569 | |
14570 | for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf); |
14571 | fts5DlidxIterEof(p, pDlidx)==0; |
14572 | fts5DlidxIterNext(p, pDlidx) |
14573 | ){ |
14574 | i64 iRowid = fts5DlidxIterRowid(pDlidx); |
14575 | int pgno = fts5DlidxIterPgno(pDlidx); |
14576 | assert( pgno>iLeaf ); |
14577 | cksum1 += iRowid + ((i64)pgno<<32); |
14578 | } |
14579 | fts5DlidxIterFree(pDlidx); |
14580 | pDlidx = 0; |
14581 | |
14582 | for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf); |
14583 | fts5DlidxIterEof(p, pDlidx)==0; |
14584 | fts5DlidxIterPrev(p, pDlidx) |
14585 | ){ |
14586 | i64 iRowid = fts5DlidxIterRowid(pDlidx); |
14587 | int pgno = fts5DlidxIterPgno(pDlidx); |
14588 | assert( fts5DlidxIterPgno(pDlidx)>iLeaf ); |
14589 | cksum2 += iRowid + ((i64)pgno<<32); |
14590 | } |
14591 | fts5DlidxIterFree(pDlidx); |
14592 | pDlidx = 0; |
14593 | |
14594 | if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; |
14595 | } |
14596 | |
14597 | static int fts5QueryCksum( |
14598 | Fts5Index *p, /* Fts5 index object */ |
14599 | int iIdx, |
14600 | const char *z, /* Index key to query for */ |
14601 | int n, /* Size of index key in bytes */ |
14602 | int flags, /* Flags for Fts5IndexQuery */ |
14603 | u64 *pCksum /* IN/OUT: Checksum value */ |
14604 | ){ |
14605 | int eDetail = p->pConfig->eDetail; |
14606 | u64 cksum = *pCksum; |
14607 | Fts5IndexIter *pIter = 0; |
14608 | int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter); |
14609 | |
14610 | while( rc==SQLITE_OK && ALWAYS(pIter!=0) && 0==sqlite3Fts5IterEof(pIter) ){ |
14611 | i64 rowid = pIter->iRowid; |
14612 | |
14613 | if( eDetail==FTS5_DETAIL_NONE ){ |
14614 | cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n); |
14615 | }else{ |
14616 | Fts5PoslistReader sReader; |
14617 | for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader); |
14618 | sReader.bEof==0; |
14619 | sqlite3Fts5PoslistReaderNext(&sReader) |
14620 | ){ |
14621 | int iCol = FTS5_POS2COLUMN(sReader.iPos); |
14622 | int iOff = FTS5_POS2OFFSET(sReader.iPos); |
14623 | cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); |
14624 | } |
14625 | } |
14626 | if( rc==SQLITE_OK ){ |
14627 | rc = sqlite3Fts5IterNext(pIter); |
14628 | } |
14629 | } |
14630 | sqlite3Fts5IterClose(pIter); |
14631 | |
14632 | *pCksum = cksum; |
14633 | return rc; |
14634 | } |
14635 | |
14636 | /* |
14637 | ** Check if buffer z[], size n bytes, contains as series of valid utf-8 |
14638 | ** encoded codepoints. If so, return 0. Otherwise, if the buffer does not |
14639 | ** contain valid utf-8, return non-zero. |
14640 | */ |
14641 | static int fts5TestUtf8(const char *z, int n){ |
14642 | int i = 0; |
14643 | assert_nc( n>0 ); |
14644 | while( i<n ){ |
14645 | if( (z[i] & 0x80)==0x00 ){ |
14646 | i++; |
14647 | }else |
14648 | if( (z[i] & 0xE0)==0xC0 ){ |
14649 | if( i+1>=n || (z[i+1] & 0xC0)!=0x80 ) return 1; |
14650 | i += 2; |
14651 | }else |
14652 | if( (z[i] & 0xF0)==0xE0 ){ |
14653 | if( i+2>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1; |
14654 | i += 3; |
14655 | }else |
14656 | if( (z[i] & 0xF8)==0xF0 ){ |
14657 | if( i+3>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1; |
14658 | if( (z[i+2] & 0xC0)!=0x80 ) return 1; |
14659 | i += 3; |
14660 | }else{ |
14661 | return 1; |
14662 | } |
14663 | } |
14664 | |
14665 | return 0; |
14666 | } |
14667 | |
14668 | /* |
14669 | ** This function is also purely an internal test. It does not contribute to |
14670 | ** FTS functionality, or even the integrity-check, in any way. |
14671 | */ |
14672 | static void fts5TestTerm( |
14673 | Fts5Index *p, |
14674 | Fts5Buffer *pPrev, /* Previous term */ |
14675 | const char *z, int n, /* Possibly new term to test */ |
14676 | u64 expected, |
14677 | u64 *pCksum |
14678 | ){ |
14679 | int rc = p->rc; |
14680 | if( pPrev->n==0 ){ |
14681 | fts5BufferSet(&rc, pPrev, n, (const u8*)z); |
14682 | }else |
14683 | if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){ |
14684 | u64 cksum3 = *pCksum; |
14685 | const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */ |
14686 | int nTerm = pPrev->n-1; /* Size of zTerm in bytes */ |
14687 | int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX); |
14688 | int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX); |
14689 | u64 ck1 = 0; |
14690 | u64 ck2 = 0; |
14691 | |
14692 | /* Check that the results returned for ASC and DESC queries are |
14693 | ** the same. If not, call this corruption. */ |
14694 | rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1); |
14695 | if( rc==SQLITE_OK ){ |
14696 | int f = flags|FTS5INDEX_QUERY_DESC; |
14697 | rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); |
14698 | } |
14699 | if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; |
14700 | |
14701 | /* If this is a prefix query, check that the results returned if the |
14702 | ** the index is disabled are the same. In both ASC and DESC order. |
14703 | ** |
14704 | ** This check may only be performed if the hash table is empty. This |
14705 | ** is because the hash table only supports a single scan query at |
14706 | ** a time, and the multi-iter loop from which this function is called |
14707 | ** is already performing such a scan. |
14708 | ** |
14709 | ** Also only do this if buffer zTerm contains nTerm bytes of valid |
14710 | ** utf-8. Otherwise, the last part of the buffer contents might contain |
14711 | ** a non-utf-8 sequence that happens to be a prefix of a valid utf-8 |
14712 | ** character stored in the main fts index, which will cause the |
14713 | ** test to fail. */ |
14714 | if( p->nPendingData==0 && 0==fts5TestUtf8(zTerm, nTerm) ){ |
14715 | if( iIdx>0 && rc==SQLITE_OK ){ |
14716 | int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; |
14717 | ck2 = 0; |
14718 | rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); |
14719 | if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; |
14720 | } |
14721 | if( iIdx>0 && rc==SQLITE_OK ){ |
14722 | int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; |
14723 | ck2 = 0; |
14724 | rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); |
14725 | if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; |
14726 | } |
14727 | } |
14728 | |
14729 | cksum3 ^= ck1; |
14730 | fts5BufferSet(&rc, pPrev, n, (const u8*)z); |
14731 | |
14732 | if( rc==SQLITE_OK && cksum3!=expected ){ |
14733 | rc = FTS5_CORRUPT; |
14734 | } |
14735 | *pCksum = cksum3; |
14736 | } |
14737 | p->rc = rc; |
14738 | } |
14739 | |
14740 | #else |
14741 | # define fts5TestDlidxReverse(x,y,z) |
14742 | # define fts5TestTerm(u,v,w,x,y,z) |
14743 | #endif |
14744 | |
14745 | /* |
14746 | ** Check that: |
14747 | ** |
14748 | ** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and |
14749 | ** contain zero terms. |
14750 | ** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and |
14751 | ** contain zero rowids. |
14752 | */ |
14753 | static void fts5IndexIntegrityCheckEmpty( |
14754 | Fts5Index *p, |
14755 | Fts5StructureSegment *pSeg, /* Segment to check internal consistency */ |
14756 | int iFirst, |
14757 | int iNoRowid, |
14758 | int iLast |
14759 | ){ |
14760 | int i; |
14761 | |
14762 | /* Now check that the iter.nEmpty leaves following the current leaf |
14763 | ** (a) exist and (b) contain no terms. */ |
14764 | for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){ |
14765 | Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i)); |
14766 | if( pLeaf ){ |
14767 | if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT; |
14768 | if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT; |
14769 | } |
14770 | fts5DataRelease(pLeaf); |
14771 | } |
14772 | } |
14773 | |
14774 | static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){ |
14775 | int iTermOff = 0; |
14776 | int ii; |
14777 | |
14778 | Fts5Buffer buf1 = {0,0,0}; |
14779 | Fts5Buffer buf2 = {0,0,0}; |
14780 | |
14781 | ii = pLeaf->szLeaf; |
14782 | while( ii<pLeaf->nn && p->rc==SQLITE_OK ){ |
14783 | int res; |
14784 | int iOff; |
14785 | int nIncr; |
14786 | |
14787 | ii += fts5GetVarint32(&pLeaf->p[ii], nIncr); |
14788 | iTermOff += nIncr; |
14789 | iOff = iTermOff; |
14790 | |
14791 | if( iOff>=pLeaf->szLeaf ){ |
14792 | p->rc = FTS5_CORRUPT; |
14793 | }else if( iTermOff==nIncr ){ |
14794 | int nByte; |
14795 | iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); |
14796 | if( (iOff+nByte)>pLeaf->szLeaf ){ |
14797 | p->rc = FTS5_CORRUPT; |
14798 | }else{ |
14799 | fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]); |
14800 | } |
14801 | }else{ |
14802 | int nKeep, nByte; |
14803 | iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep); |
14804 | iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); |
14805 | if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){ |
14806 | p->rc = FTS5_CORRUPT; |
14807 | }else{ |
14808 | buf1.n = nKeep; |
14809 | fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]); |
14810 | } |
14811 | |
14812 | if( p->rc==SQLITE_OK ){ |
14813 | res = fts5BufferCompare(&buf1, &buf2); |
14814 | if( res<=0 ) p->rc = FTS5_CORRUPT; |
14815 | } |
14816 | } |
14817 | fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p); |
14818 | } |
14819 | |
14820 | fts5BufferFree(&buf1); |
14821 | fts5BufferFree(&buf2); |
14822 | } |
14823 | |
14824 | static void fts5IndexIntegrityCheckSegment( |
14825 | Fts5Index *p, /* FTS5 backend object */ |
14826 | Fts5StructureSegment *pSeg /* Segment to check internal consistency */ |
14827 | ){ |
14828 | Fts5Config *pConfig = p->pConfig; |
14829 | sqlite3_stmt *pStmt = 0; |
14830 | int rc2; |
14831 | int iIdxPrevLeaf = pSeg->pgnoFirst-1; |
14832 | int iDlidxPrevLeaf = pSeg->pgnoLast; |
14833 | |
14834 | if( pSeg->pgnoFirst==0 ) return; |
14835 | |
14836 | fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf( |
14837 | "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d " |
14838 | "ORDER BY 1, 2" , |
14839 | pConfig->zDb, pConfig->zName, pSeg->iSegid |
14840 | )); |
14841 | |
14842 | /* Iterate through the b-tree hierarchy. */ |
14843 | while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ |
14844 | i64 iRow; /* Rowid for this leaf */ |
14845 | Fts5Data *pLeaf; /* Data for this leaf */ |
14846 | |
14847 | const char *zIdxTerm = (const char*)sqlite3_column_blob(pStmt, 1); |
14848 | int nIdxTerm = sqlite3_column_bytes(pStmt, 1); |
14849 | int iIdxLeaf = sqlite3_column_int(pStmt, 2); |
14850 | int bIdxDlidx = sqlite3_column_int(pStmt, 3); |
14851 | |
14852 | /* If the leaf in question has already been trimmed from the segment, |
14853 | ** ignore this b-tree entry. Otherwise, load it into memory. */ |
14854 | if( iIdxLeaf<pSeg->pgnoFirst ) continue; |
14855 | iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf); |
14856 | pLeaf = fts5LeafRead(p, iRow); |
14857 | if( pLeaf==0 ) break; |
14858 | |
14859 | /* Check that the leaf contains at least one term, and that it is equal |
14860 | ** to or larger than the split-key in zIdxTerm. Also check that if there |
14861 | ** is also a rowid pointer within the leaf page header, it points to a |
14862 | ** location before the term. */ |
14863 | if( pLeaf->nn<=pLeaf->szLeaf ){ |
14864 | p->rc = FTS5_CORRUPT; |
14865 | }else{ |
14866 | int iOff; /* Offset of first term on leaf */ |
14867 | int iRowidOff; /* Offset of first rowid on leaf */ |
14868 | int nTerm; /* Size of term on leaf in bytes */ |
14869 | int res; /* Comparison of term and split-key */ |
14870 | |
14871 | iOff = fts5LeafFirstTermOff(pLeaf); |
14872 | iRowidOff = fts5LeafFirstRowidOff(pLeaf); |
14873 | if( iRowidOff>=iOff || iOff>=pLeaf->szLeaf ){ |
14874 | p->rc = FTS5_CORRUPT; |
14875 | }else{ |
14876 | iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm); |
14877 | res = fts5Memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm)); |
14878 | if( res==0 ) res = nTerm - nIdxTerm; |
14879 | if( res<0 ) p->rc = FTS5_CORRUPT; |
14880 | } |
14881 | |
14882 | fts5IntegrityCheckPgidx(p, pLeaf); |
14883 | } |
14884 | fts5DataRelease(pLeaf); |
14885 | if( p->rc ) break; |
14886 | |
14887 | /* Now check that the iter.nEmpty leaves following the current leaf |
14888 | ** (a) exist and (b) contain no terms. */ |
14889 | fts5IndexIntegrityCheckEmpty( |
14890 | p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1 |
14891 | ); |
14892 | if( p->rc ) break; |
14893 | |
14894 | /* If there is a doclist-index, check that it looks right. */ |
14895 | if( bIdxDlidx ){ |
14896 | Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */ |
14897 | int iPrevLeaf = iIdxLeaf; |
14898 | int iSegid = pSeg->iSegid; |
14899 | int iPg = 0; |
14900 | i64 iKey; |
14901 | |
14902 | for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf); |
14903 | fts5DlidxIterEof(p, pDlidx)==0; |
14904 | fts5DlidxIterNext(p, pDlidx) |
14905 | ){ |
14906 | |
14907 | /* Check any rowid-less pages that occur before the current leaf. */ |
14908 | for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){ |
14909 | iKey = FTS5_SEGMENT_ROWID(iSegid, iPg); |
14910 | pLeaf = fts5DataRead(p, iKey); |
14911 | if( pLeaf ){ |
14912 | if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT; |
14913 | fts5DataRelease(pLeaf); |
14914 | } |
14915 | } |
14916 | iPrevLeaf = fts5DlidxIterPgno(pDlidx); |
14917 | |
14918 | /* Check that the leaf page indicated by the iterator really does |
14919 | ** contain the rowid suggested by the same. */ |
14920 | iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf); |
14921 | pLeaf = fts5DataRead(p, iKey); |
14922 | if( pLeaf ){ |
14923 | i64 iRowid; |
14924 | int iRowidOff = fts5LeafFirstRowidOff(pLeaf); |
14925 | ASSERT_SZLEAF_OK(pLeaf); |
14926 | if( iRowidOff>=pLeaf->szLeaf ){ |
14927 | p->rc = FTS5_CORRUPT; |
14928 | }else{ |
14929 | fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); |
14930 | if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT; |
14931 | } |
14932 | fts5DataRelease(pLeaf); |
14933 | } |
14934 | } |
14935 | |
14936 | iDlidxPrevLeaf = iPg; |
14937 | fts5DlidxIterFree(pDlidx); |
14938 | fts5TestDlidxReverse(p, iSegid, iIdxLeaf); |
14939 | }else{ |
14940 | iDlidxPrevLeaf = pSeg->pgnoLast; |
14941 | /* TODO: Check there is no doclist index */ |
14942 | } |
14943 | |
14944 | iIdxPrevLeaf = iIdxLeaf; |
14945 | } |
14946 | |
14947 | rc2 = sqlite3_finalize(pStmt); |
14948 | if( p->rc==SQLITE_OK ) p->rc = rc2; |
14949 | |
14950 | /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */ |
14951 | #if 0 |
14952 | if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ |
14953 | p->rc = FTS5_CORRUPT; |
14954 | } |
14955 | #endif |
14956 | } |
14957 | |
14958 | |
14959 | /* |
14960 | ** Run internal checks to ensure that the FTS index (a) is internally |
14961 | ** consistent and (b) contains entries for which the XOR of the checksums |
14962 | ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum. |
14963 | ** |
14964 | ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the |
14965 | ** checksum does not match. Return SQLITE_OK if all checks pass without |
14966 | ** error, or some other SQLite error code if another error (e.g. OOM) |
14967 | ** occurs. |
14968 | */ |
14969 | static int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum, int bUseCksum){ |
14970 | int eDetail = p->pConfig->eDetail; |
14971 | u64 cksum2 = 0; /* Checksum based on contents of indexes */ |
14972 | Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */ |
14973 | Fts5Iter *pIter; /* Used to iterate through entire index */ |
14974 | Fts5Structure *pStruct; /* Index structure */ |
14975 | int iLvl, iSeg; |
14976 | |
14977 | #ifdef SQLITE_DEBUG |
14978 | /* Used by extra internal tests only run if NDEBUG is not defined */ |
14979 | u64 cksum3 = 0; /* Checksum based on contents of indexes */ |
14980 | Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ |
14981 | #endif |
14982 | const int flags = FTS5INDEX_QUERY_NOOUTPUT; |
14983 | |
14984 | /* Load the FTS index structure */ |
14985 | pStruct = fts5StructureRead(p); |
14986 | if( pStruct==0 ){ |
14987 | assert( p->rc!=SQLITE_OK ); |
14988 | return fts5IndexReturn(p); |
14989 | } |
14990 | |
14991 | /* Check that the internal nodes of each segment match the leaves */ |
14992 | for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ |
14993 | for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ |
14994 | Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; |
14995 | fts5IndexIntegrityCheckSegment(p, pSeg); |
14996 | } |
14997 | } |
14998 | |
14999 | /* The cksum argument passed to this function is a checksum calculated |
15000 | ** based on all expected entries in the FTS index (including prefix index |
15001 | ** entries). This block checks that a checksum calculated based on the |
15002 | ** actual contents of FTS index is identical. |
15003 | ** |
15004 | ** Two versions of the same checksum are calculated. The first (stack |
15005 | ** variable cksum2) based on entries extracted from the full-text index |
15006 | ** while doing a linear scan of each individual index in turn. |
15007 | ** |
15008 | ** As each term visited by the linear scans, a separate query for the |
15009 | ** same term is performed. cksum3 is calculated based on the entries |
15010 | ** extracted by these queries. |
15011 | */ |
15012 | for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter); |
15013 | fts5MultiIterEof(p, pIter)==0; |
15014 | fts5MultiIterNext(p, pIter, 0, 0) |
15015 | ){ |
15016 | int n; /* Size of term in bytes */ |
15017 | i64 iPos = 0; /* Position read from poslist */ |
15018 | int iOff = 0; /* Offset within poslist */ |
15019 | i64 iRowid = fts5MultiIterRowid(pIter); |
15020 | char *z = (char*)fts5MultiIterTerm(pIter, &n); |
15021 | |
15022 | /* If this is a new term, query for it. Update cksum3 with the results. */ |
15023 | fts5TestTerm(p, &term, z, n, cksum2, &cksum3); |
15024 | if( p->rc ) break; |
15025 | |
15026 | if( eDetail==FTS5_DETAIL_NONE ){ |
15027 | if( 0==fts5MultiIterIsEmpty(p, pIter) ){ |
15028 | cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n); |
15029 | } |
15030 | }else{ |
15031 | poslist.n = 0; |
15032 | fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist); |
15033 | fts5BufferAppendBlob(&p->rc, &poslist, 4, (const u8*)"\0\0\0\0" ); |
15034 | while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ |
15035 | int iCol = FTS5_POS2COLUMN(iPos); |
15036 | int iTokOff = FTS5_POS2OFFSET(iPos); |
15037 | cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n); |
15038 | } |
15039 | } |
15040 | } |
15041 | fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3); |
15042 | |
15043 | fts5MultiIterFree(pIter); |
15044 | if( p->rc==SQLITE_OK && bUseCksum && cksum!=cksum2 ) p->rc = FTS5_CORRUPT; |
15045 | |
15046 | fts5StructureRelease(pStruct); |
15047 | #ifdef SQLITE_DEBUG |
15048 | fts5BufferFree(&term); |
15049 | #endif |
15050 | fts5BufferFree(&poslist); |
15051 | return fts5IndexReturn(p); |
15052 | } |
15053 | |
15054 | /************************************************************************* |
15055 | ************************************************************************** |
15056 | ** Below this point is the implementation of the fts5_decode() scalar |
15057 | ** function only. |
15058 | */ |
15059 | |
15060 | #ifdef SQLITE_TEST |
15061 | /* |
15062 | ** Decode a segment-data rowid from the %_data table. This function is |
15063 | ** the opposite of macro FTS5_SEGMENT_ROWID(). |
15064 | */ |
15065 | static void fts5DecodeRowid( |
15066 | i64 iRowid, /* Rowid from %_data table */ |
15067 | int *piSegid, /* OUT: Segment id */ |
15068 | int *pbDlidx, /* OUT: Dlidx flag */ |
15069 | int *piHeight, /* OUT: Height */ |
15070 | int *piPgno /* OUT: Page number */ |
15071 | ){ |
15072 | *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1)); |
15073 | iRowid >>= FTS5_DATA_PAGE_B; |
15074 | |
15075 | *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); |
15076 | iRowid >>= FTS5_DATA_HEIGHT_B; |
15077 | |
15078 | *pbDlidx = (int)(iRowid & 0x0001); |
15079 | iRowid >>= FTS5_DATA_DLI_B; |
15080 | |
15081 | *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); |
15082 | } |
15083 | #endif /* SQLITE_TEST */ |
15084 | |
15085 | #ifdef SQLITE_TEST |
15086 | static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ |
15087 | int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */ |
15088 | fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno); |
15089 | |
15090 | if( iSegid==0 ){ |
15091 | if( iKey==FTS5_AVERAGES_ROWID ){ |
15092 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} " ); |
15093 | }else{ |
15094 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}" ); |
15095 | } |
15096 | } |
15097 | else{ |
15098 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}" , |
15099 | bDlidx ? "dlidx " : "" , iSegid, iHeight, iPgno |
15100 | ); |
15101 | } |
15102 | } |
15103 | #endif /* SQLITE_TEST */ |
15104 | |
15105 | #ifdef SQLITE_TEST |
15106 | static void fts5DebugStructure( |
15107 | int *pRc, /* IN/OUT: error code */ |
15108 | Fts5Buffer *pBuf, |
15109 | Fts5Structure *p |
15110 | ){ |
15111 | int iLvl, iSeg; /* Iterate through levels, segments */ |
15112 | |
15113 | for(iLvl=0; iLvl<p->nLevel; iLvl++){ |
15114 | Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; |
15115 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, |
15116 | " {lvl=%d nMerge=%d nSeg=%d" , iLvl, pLvl->nMerge, pLvl->nSeg |
15117 | ); |
15118 | for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ |
15119 | Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; |
15120 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}" , |
15121 | pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast |
15122 | ); |
15123 | } |
15124 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}" ); |
15125 | } |
15126 | } |
15127 | #endif /* SQLITE_TEST */ |
15128 | |
15129 | #ifdef SQLITE_TEST |
15130 | /* |
15131 | ** This is part of the fts5_decode() debugging aid. |
15132 | ** |
15133 | ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This |
15134 | ** function appends a human-readable representation of the same object |
15135 | ** to the buffer passed as the second argument. |
15136 | */ |
15137 | static void fts5DecodeStructure( |
15138 | int *pRc, /* IN/OUT: error code */ |
15139 | Fts5Buffer *pBuf, |
15140 | const u8 *pBlob, int nBlob |
15141 | ){ |
15142 | int rc; /* Return code */ |
15143 | Fts5Structure *p = 0; /* Decoded structure object */ |
15144 | |
15145 | rc = fts5StructureDecode(pBlob, nBlob, 0, &p); |
15146 | if( rc!=SQLITE_OK ){ |
15147 | *pRc = rc; |
15148 | return; |
15149 | } |
15150 | |
15151 | fts5DebugStructure(pRc, pBuf, p); |
15152 | fts5StructureRelease(p); |
15153 | } |
15154 | #endif /* SQLITE_TEST */ |
15155 | |
15156 | #ifdef SQLITE_TEST |
15157 | /* |
15158 | ** This is part of the fts5_decode() debugging aid. |
15159 | ** |
15160 | ** Arguments pBlob/nBlob contain an "averages" record. This function |
15161 | ** appends a human-readable representation of record to the buffer passed |
15162 | ** as the second argument. |
15163 | */ |
15164 | static void fts5DecodeAverages( |
15165 | int *pRc, /* IN/OUT: error code */ |
15166 | Fts5Buffer *pBuf, |
15167 | const u8 *pBlob, int nBlob |
15168 | ){ |
15169 | int i = 0; |
15170 | const char *zSpace = "" ; |
15171 | |
15172 | while( i<nBlob ){ |
15173 | u64 iVal; |
15174 | i += sqlite3Fts5GetVarint(&pBlob[i], &iVal); |
15175 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d" , zSpace, (int)iVal); |
15176 | zSpace = " " ; |
15177 | } |
15178 | } |
15179 | #endif /* SQLITE_TEST */ |
15180 | |
15181 | #ifdef SQLITE_TEST |
15182 | /* |
15183 | ** Buffer (a/n) is assumed to contain a list of serialized varints. Read |
15184 | ** each varint and append its string representation to buffer pBuf. Return |
15185 | ** after either the input buffer is exhausted or a 0 value is read. |
15186 | ** |
15187 | ** The return value is the number of bytes read from the input buffer. |
15188 | */ |
15189 | static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ |
15190 | int iOff = 0; |
15191 | while( iOff<n ){ |
15192 | int iVal; |
15193 | iOff += fts5GetVarint32(&a[iOff], iVal); |
15194 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d" , iVal); |
15195 | } |
15196 | return iOff; |
15197 | } |
15198 | #endif /* SQLITE_TEST */ |
15199 | |
15200 | #ifdef SQLITE_TEST |
15201 | /* |
15202 | ** The start of buffer (a/n) contains the start of a doclist. The doclist |
15203 | ** may or may not finish within the buffer. This function appends a text |
15204 | ** representation of the part of the doclist that is present to buffer |
15205 | ** pBuf. |
15206 | ** |
15207 | ** The return value is the number of bytes read from the input buffer. |
15208 | */ |
15209 | static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ |
15210 | i64 iDocid = 0; |
15211 | int iOff = 0; |
15212 | |
15213 | if( n>0 ){ |
15214 | iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid); |
15215 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld" , iDocid); |
15216 | } |
15217 | while( iOff<n ){ |
15218 | int nPos; |
15219 | int bDel; |
15220 | iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel); |
15221 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s" , nPos, bDel?"*" :"" ); |
15222 | iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos)); |
15223 | if( iOff<n ){ |
15224 | i64 iDelta; |
15225 | iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta); |
15226 | iDocid += iDelta; |
15227 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld" , iDocid); |
15228 | } |
15229 | } |
15230 | |
15231 | return iOff; |
15232 | } |
15233 | #endif /* SQLITE_TEST */ |
15234 | |
15235 | #ifdef SQLITE_TEST |
15236 | /* |
15237 | ** This function is part of the fts5_decode() debugging function. It is |
15238 | ** only ever used with detail=none tables. |
15239 | ** |
15240 | ** Buffer (pData/nData) contains a doclist in the format used by detail=none |
15241 | ** tables. This function appends a human-readable version of that list to |
15242 | ** buffer pBuf. |
15243 | ** |
15244 | ** If *pRc is other than SQLITE_OK when this function is called, it is a |
15245 | ** no-op. If an OOM or other error occurs within this function, *pRc is |
15246 | ** set to an SQLite error code before returning. The final state of buffer |
15247 | ** pBuf is undefined in this case. |
15248 | */ |
15249 | static void fts5DecodeRowidList( |
15250 | int *pRc, /* IN/OUT: Error code */ |
15251 | Fts5Buffer *pBuf, /* Buffer to append text to */ |
15252 | const u8 *pData, int nData /* Data to decode list-of-rowids from */ |
15253 | ){ |
15254 | int i = 0; |
15255 | i64 iRowid = 0; |
15256 | |
15257 | while( i<nData ){ |
15258 | const char *zApp = "" ; |
15259 | u64 iVal; |
15260 | i += sqlite3Fts5GetVarint(&pData[i], &iVal); |
15261 | iRowid += iVal; |
15262 | |
15263 | if( i<nData && pData[i]==0x00 ){ |
15264 | i++; |
15265 | if( i<nData && pData[i]==0x00 ){ |
15266 | i++; |
15267 | zApp = "+" ; |
15268 | }else{ |
15269 | zApp = "*" ; |
15270 | } |
15271 | } |
15272 | |
15273 | sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s" , iRowid, zApp); |
15274 | } |
15275 | } |
15276 | #endif /* SQLITE_TEST */ |
15277 | |
15278 | #ifdef SQLITE_TEST |
15279 | /* |
15280 | ** The implementation of user-defined scalar function fts5_decode(). |
15281 | */ |
15282 | static void fts5DecodeFunction( |
15283 | sqlite3_context *pCtx, /* Function call context */ |
15284 | int nArg, /* Number of args (always 2) */ |
15285 | sqlite3_value **apVal /* Function arguments */ |
15286 | ){ |
15287 | i64 iRowid; /* Rowid for record being decoded */ |
15288 | int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */ |
15289 | const u8 *aBlob; int n; /* Record to decode */ |
15290 | u8 *a = 0; |
15291 | Fts5Buffer s; /* Build up text to return here */ |
15292 | int rc = SQLITE_OK; /* Return code */ |
15293 | sqlite3_int64 nSpace = 0; |
15294 | int eDetailNone = (sqlite3_user_data(pCtx)!=0); |
15295 | |
15296 | assert( nArg==2 ); |
15297 | UNUSED_PARAM(nArg); |
15298 | memset(&s, 0, sizeof(Fts5Buffer)); |
15299 | iRowid = sqlite3_value_int64(apVal[0]); |
15300 | |
15301 | /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[] |
15302 | ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents |
15303 | ** buffer overreads even if the record is corrupt. */ |
15304 | n = sqlite3_value_bytes(apVal[1]); |
15305 | aBlob = sqlite3_value_blob(apVal[1]); |
15306 | nSpace = n + FTS5_DATA_ZERO_PADDING; |
15307 | a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); |
15308 | if( a==0 ) goto decode_out; |
15309 | if( n>0 ) memcpy(a, aBlob, n); |
15310 | |
15311 | fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno); |
15312 | |
15313 | fts5DebugRowid(&rc, &s, iRowid); |
15314 | if( bDlidx ){ |
15315 | Fts5Data dlidx; |
15316 | Fts5DlidxLvl lvl; |
15317 | |
15318 | dlidx.p = a; |
15319 | dlidx.nn = n; |
15320 | |
15321 | memset(&lvl, 0, sizeof(Fts5DlidxLvl)); |
15322 | lvl.pData = &dlidx; |
15323 | lvl.iLeafPgno = iPgno; |
15324 | |
15325 | for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){ |
15326 | sqlite3Fts5BufferAppendPrintf(&rc, &s, |
15327 | " %d(%lld)" , lvl.iLeafPgno, lvl.iRowid |
15328 | ); |
15329 | } |
15330 | }else if( iSegid==0 ){ |
15331 | if( iRowid==FTS5_AVERAGES_ROWID ){ |
15332 | fts5DecodeAverages(&rc, &s, a, n); |
15333 | }else{ |
15334 | fts5DecodeStructure(&rc, &s, a, n); |
15335 | } |
15336 | }else if( eDetailNone ){ |
15337 | Fts5Buffer term; /* Current term read from page */ |
15338 | int szLeaf; |
15339 | int iPgidxOff = szLeaf = fts5GetU16(&a[2]); |
15340 | int iTermOff; |
15341 | int nKeep = 0; |
15342 | int iOff; |
15343 | |
15344 | memset(&term, 0, sizeof(Fts5Buffer)); |
15345 | |
15346 | /* Decode any entries that occur before the first term. */ |
15347 | if( szLeaf<n ){ |
15348 | iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff); |
15349 | }else{ |
15350 | iTermOff = szLeaf; |
15351 | } |
15352 | fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4); |
15353 | |
15354 | iOff = iTermOff; |
15355 | while( iOff<szLeaf ){ |
15356 | int nAppend; |
15357 | |
15358 | /* Read the term data for the next term*/ |
15359 | iOff += fts5GetVarint32(&a[iOff], nAppend); |
15360 | term.n = nKeep; |
15361 | fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]); |
15362 | sqlite3Fts5BufferAppendPrintf( |
15363 | &rc, &s, " term=%.*s" , term.n, (const char*)term.p |
15364 | ); |
15365 | iOff += nAppend; |
15366 | |
15367 | /* Figure out where the doclist for this term ends */ |
15368 | if( iPgidxOff<n ){ |
15369 | int nIncr; |
15370 | iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr); |
15371 | iTermOff += nIncr; |
15372 | }else{ |
15373 | iTermOff = szLeaf; |
15374 | } |
15375 | |
15376 | fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff); |
15377 | iOff = iTermOff; |
15378 | if( iOff<szLeaf ){ |
15379 | iOff += fts5GetVarint32(&a[iOff], nKeep); |
15380 | } |
15381 | } |
15382 | |
15383 | fts5BufferFree(&term); |
15384 | }else{ |
15385 | Fts5Buffer term; /* Current term read from page */ |
15386 | int szLeaf; /* Offset of pgidx in a[] */ |
15387 | int iPgidxOff; |
15388 | int iPgidxPrev = 0; /* Previous value read from pgidx */ |
15389 | int iTermOff = 0; |
15390 | int iRowidOff = 0; |
15391 | int iOff; |
15392 | int nDoclist; |
15393 | |
15394 | memset(&term, 0, sizeof(Fts5Buffer)); |
15395 | |
15396 | if( n<4 ){ |
15397 | sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt" ); |
15398 | goto decode_out; |
15399 | }else{ |
15400 | iRowidOff = fts5GetU16(&a[0]); |
15401 | iPgidxOff = szLeaf = fts5GetU16(&a[2]); |
15402 | if( iPgidxOff<n ){ |
15403 | fts5GetVarint32(&a[iPgidxOff], iTermOff); |
15404 | }else if( iPgidxOff>n ){ |
15405 | rc = FTS5_CORRUPT; |
15406 | goto decode_out; |
15407 | } |
15408 | } |
15409 | |
15410 | /* Decode the position list tail at the start of the page */ |
15411 | if( iRowidOff!=0 ){ |
15412 | iOff = iRowidOff; |
15413 | }else if( iTermOff!=0 ){ |
15414 | iOff = iTermOff; |
15415 | }else{ |
15416 | iOff = szLeaf; |
15417 | } |
15418 | if( iOff>n ){ |
15419 | rc = FTS5_CORRUPT; |
15420 | goto decode_out; |
15421 | } |
15422 | fts5DecodePoslist(&rc, &s, &a[4], iOff-4); |
15423 | |
15424 | /* Decode any more doclist data that appears on the page before the |
15425 | ** first term. */ |
15426 | nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff; |
15427 | if( nDoclist+iOff>n ){ |
15428 | rc = FTS5_CORRUPT; |
15429 | goto decode_out; |
15430 | } |
15431 | fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist); |
15432 | |
15433 | while( iPgidxOff<n && rc==SQLITE_OK ){ |
15434 | int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */ |
15435 | int nByte; /* Bytes of data */ |
15436 | int iEnd; |
15437 | |
15438 | iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte); |
15439 | iPgidxPrev += nByte; |
15440 | iOff = iPgidxPrev; |
15441 | |
15442 | if( iPgidxOff<n ){ |
15443 | fts5GetVarint32(&a[iPgidxOff], nByte); |
15444 | iEnd = iPgidxPrev + nByte; |
15445 | }else{ |
15446 | iEnd = szLeaf; |
15447 | } |
15448 | if( iEnd>szLeaf ){ |
15449 | rc = FTS5_CORRUPT; |
15450 | break; |
15451 | } |
15452 | |
15453 | if( bFirst==0 ){ |
15454 | iOff += fts5GetVarint32(&a[iOff], nByte); |
15455 | if( nByte>term.n ){ |
15456 | rc = FTS5_CORRUPT; |
15457 | break; |
15458 | } |
15459 | term.n = nByte; |
15460 | } |
15461 | iOff += fts5GetVarint32(&a[iOff], nByte); |
15462 | if( iOff+nByte>n ){ |
15463 | rc = FTS5_CORRUPT; |
15464 | break; |
15465 | } |
15466 | fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]); |
15467 | iOff += nByte; |
15468 | |
15469 | sqlite3Fts5BufferAppendPrintf( |
15470 | &rc, &s, " term=%.*s" , term.n, (const char*)term.p |
15471 | ); |
15472 | iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff); |
15473 | } |
15474 | |
15475 | fts5BufferFree(&term); |
15476 | } |
15477 | |
15478 | decode_out: |
15479 | sqlite3_free(a); |
15480 | if( rc==SQLITE_OK ){ |
15481 | sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT); |
15482 | }else{ |
15483 | sqlite3_result_error_code(pCtx, rc); |
15484 | } |
15485 | fts5BufferFree(&s); |
15486 | } |
15487 | #endif /* SQLITE_TEST */ |
15488 | |
15489 | #ifdef SQLITE_TEST |
15490 | /* |
15491 | ** The implementation of user-defined scalar function fts5_rowid(). |
15492 | */ |
15493 | static void fts5RowidFunction( |
15494 | sqlite3_context *pCtx, /* Function call context */ |
15495 | int nArg, /* Number of args (always 2) */ |
15496 | sqlite3_value **apVal /* Function arguments */ |
15497 | ){ |
15498 | const char *zArg; |
15499 | if( nArg==0 ){ |
15500 | sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)" , -1); |
15501 | }else{ |
15502 | zArg = (const char*)sqlite3_value_text(apVal[0]); |
15503 | if( 0==sqlite3_stricmp(zArg, "segment" ) ){ |
15504 | i64 iRowid; |
15505 | int segid, pgno; |
15506 | if( nArg!=3 ){ |
15507 | sqlite3_result_error(pCtx, |
15508 | "should be: fts5_rowid('segment', segid, pgno))" , -1 |
15509 | ); |
15510 | }else{ |
15511 | segid = sqlite3_value_int(apVal[1]); |
15512 | pgno = sqlite3_value_int(apVal[2]); |
15513 | iRowid = FTS5_SEGMENT_ROWID(segid, pgno); |
15514 | sqlite3_result_int64(pCtx, iRowid); |
15515 | } |
15516 | }else{ |
15517 | sqlite3_result_error(pCtx, |
15518 | "first arg to fts5_rowid() must be 'segment'" , -1 |
15519 | ); |
15520 | } |
15521 | } |
15522 | } |
15523 | #endif /* SQLITE_TEST */ |
15524 | |
15525 | /* |
15526 | ** This is called as part of registering the FTS5 module with database |
15527 | ** connection db. It registers several user-defined scalar functions useful |
15528 | ** with FTS5. |
15529 | ** |
15530 | ** If successful, SQLITE_OK is returned. If an error occurs, some other |
15531 | ** SQLite error code is returned instead. |
15532 | */ |
15533 | static int sqlite3Fts5IndexInit(sqlite3 *db){ |
15534 | #ifdef SQLITE_TEST |
15535 | int rc = sqlite3_create_function( |
15536 | db, "fts5_decode" , 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0 |
15537 | ); |
15538 | |
15539 | if( rc==SQLITE_OK ){ |
15540 | rc = sqlite3_create_function( |
15541 | db, "fts5_decode_none" , 2, |
15542 | SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0 |
15543 | ); |
15544 | } |
15545 | |
15546 | if( rc==SQLITE_OK ){ |
15547 | rc = sqlite3_create_function( |
15548 | db, "fts5_rowid" , -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0 |
15549 | ); |
15550 | } |
15551 | return rc; |
15552 | #else |
15553 | return SQLITE_OK; |
15554 | UNUSED_PARAM(db); |
15555 | #endif |
15556 | } |
15557 | |
15558 | |
15559 | static int sqlite3Fts5IndexReset(Fts5Index *p){ |
15560 | assert( p->pStruct==0 || p->iStructVersion!=0 ); |
15561 | if( fts5IndexDataVersion(p)!=p->iStructVersion ){ |
15562 | fts5StructureInvalidate(p); |
15563 | } |
15564 | return fts5IndexReturn(p); |
15565 | } |
15566 | |
15567 | #line 1 "fts5_main.c" |
15568 | /* |
15569 | ** 2014 Jun 09 |
15570 | ** |
15571 | ** The author disclaims copyright to this source code. In place of |
15572 | ** a legal notice, here is a blessing: |
15573 | ** |
15574 | ** May you do good and not evil. |
15575 | ** May you find forgiveness for yourself and forgive others. |
15576 | ** May you share freely, never taking more than you give. |
15577 | ** |
15578 | ****************************************************************************** |
15579 | ** |
15580 | ** This is an SQLite module implementing full-text search. |
15581 | */ |
15582 | |
15583 | |
15584 | /* #include "fts5Int.h" */ |
15585 | |
15586 | /* |
15587 | ** This variable is set to false when running tests for which the on disk |
15588 | ** structures should not be corrupt. Otherwise, true. If it is false, extra |
15589 | ** assert() conditions in the fts5 code are activated - conditions that are |
15590 | ** only true if it is guaranteed that the fts5 database is not corrupt. |
15591 | */ |
15592 | #ifdef SQLITE_DEBUG |
15593 | int sqlite3_fts5_may_be_corrupt = 1; |
15594 | #endif |
15595 | |
15596 | |
15597 | typedef struct Fts5Auxdata Fts5Auxdata; |
15598 | typedef struct Fts5Auxiliary Fts5Auxiliary; |
15599 | typedef struct Fts5Cursor Fts5Cursor; |
15600 | typedef struct Fts5FullTable Fts5FullTable; |
15601 | typedef struct Fts5Sorter Fts5Sorter; |
15602 | typedef struct Fts5TokenizerModule Fts5TokenizerModule; |
15603 | |
15604 | /* |
15605 | ** NOTES ON TRANSACTIONS: |
15606 | ** |
15607 | ** SQLite invokes the following virtual table methods as transactions are |
15608 | ** opened and closed by the user: |
15609 | ** |
15610 | ** xBegin(): Start of a new transaction. |
15611 | ** xSync(): Initial part of two-phase commit. |
15612 | ** xCommit(): Final part of two-phase commit. |
15613 | ** xRollback(): Rollback the transaction. |
15614 | ** |
15615 | ** Anything that is required as part of a commit that may fail is performed |
15616 | ** in the xSync() callback. Current versions of SQLite ignore any errors |
15617 | ** returned by xCommit(). |
15618 | ** |
15619 | ** And as sub-transactions are opened/closed: |
15620 | ** |
15621 | ** xSavepoint(int S): Open savepoint S. |
15622 | ** xRelease(int S): Commit and close savepoint S. |
15623 | ** xRollbackTo(int S): Rollback to start of savepoint S. |
15624 | ** |
15625 | ** During a write-transaction the fts5_index.c module may cache some data |
15626 | ** in-memory. It is flushed to disk whenever xSync(), xRelease() or |
15627 | ** xSavepoint() is called. And discarded whenever xRollback() or xRollbackTo() |
15628 | ** is called. |
15629 | ** |
15630 | ** Additionally, if SQLITE_DEBUG is defined, an instance of the following |
15631 | ** structure is used to record the current transaction state. This information |
15632 | ** is not required, but it is used in the assert() statements executed by |
15633 | ** function fts5CheckTransactionState() (see below). |
15634 | */ |
15635 | struct Fts5TransactionState { |
15636 | int eState; /* 0==closed, 1==open, 2==synced */ |
15637 | int iSavepoint; /* Number of open savepoints (0 -> none) */ |
15638 | }; |
15639 | |
15640 | /* |
15641 | ** A single object of this type is allocated when the FTS5 module is |
15642 | ** registered with a database handle. It is used to store pointers to |
15643 | ** all registered FTS5 extensions - tokenizers and auxiliary functions. |
15644 | */ |
15645 | struct Fts5Global { |
15646 | fts5_api api; /* User visible part of object (see fts5.h) */ |
15647 | sqlite3 *db; /* Associated database connection */ |
15648 | i64 iNextId; /* Used to allocate unique cursor ids */ |
15649 | Fts5Auxiliary *pAux; /* First in list of all aux. functions */ |
15650 | Fts5TokenizerModule *pTok; /* First in list of all tokenizer modules */ |
15651 | Fts5TokenizerModule *pDfltTok; /* Default tokenizer module */ |
15652 | Fts5Cursor *pCsr; /* First in list of all open cursors */ |
15653 | }; |
15654 | |
15655 | /* |
15656 | ** Each auxiliary function registered with the FTS5 module is represented |
15657 | ** by an object of the following type. All such objects are stored as part |
15658 | ** of the Fts5Global.pAux list. |
15659 | */ |
15660 | struct Fts5Auxiliary { |
15661 | Fts5Global *pGlobal; /* Global context for this function */ |
15662 | char *zFunc; /* Function name (nul-terminated) */ |
15663 | void *pUserData; /* User-data pointer */ |
15664 | fts5_extension_function xFunc; /* Callback function */ |
15665 | void (*xDestroy)(void*); /* Destructor function */ |
15666 | Fts5Auxiliary *pNext; /* Next registered auxiliary function */ |
15667 | }; |
15668 | |
15669 | /* |
15670 | ** Each tokenizer module registered with the FTS5 module is represented |
15671 | ** by an object of the following type. All such objects are stored as part |
15672 | ** of the Fts5Global.pTok list. |
15673 | */ |
15674 | struct Fts5TokenizerModule { |
15675 | char *zName; /* Name of tokenizer */ |
15676 | void *pUserData; /* User pointer passed to xCreate() */ |
15677 | fts5_tokenizer x; /* Tokenizer functions */ |
15678 | void (*xDestroy)(void*); /* Destructor function */ |
15679 | Fts5TokenizerModule *pNext; /* Next registered tokenizer module */ |
15680 | }; |
15681 | |
15682 | struct Fts5FullTable { |
15683 | Fts5Table p; /* Public class members from fts5Int.h */ |
15684 | Fts5Storage *pStorage; /* Document store */ |
15685 | Fts5Global *pGlobal; /* Global (connection wide) data */ |
15686 | Fts5Cursor *pSortCsr; /* Sort data from this cursor */ |
15687 | #ifdef SQLITE_DEBUG |
15688 | struct Fts5TransactionState ts; |
15689 | #endif |
15690 | }; |
15691 | |
15692 | struct Fts5MatchPhrase { |
15693 | Fts5Buffer *pPoslist; /* Pointer to current poslist */ |
15694 | int nTerm; /* Size of phrase in terms */ |
15695 | }; |
15696 | |
15697 | /* |
15698 | ** pStmt: |
15699 | ** SELECT rowid, <fts> FROM <fts> ORDER BY +rank; |
15700 | ** |
15701 | ** aIdx[]: |
15702 | ** There is one entry in the aIdx[] array for each phrase in the query, |
15703 | ** the value of which is the offset within aPoslist[] following the last |
15704 | ** byte of the position list for the corresponding phrase. |
15705 | */ |
15706 | struct Fts5Sorter { |
15707 | sqlite3_stmt *pStmt; |
15708 | i64 iRowid; /* Current rowid */ |
15709 | const u8 *aPoslist; /* Position lists for current row */ |
15710 | int nIdx; /* Number of entries in aIdx[] */ |
15711 | int aIdx[1]; /* Offsets into aPoslist for current row */ |
15712 | }; |
15713 | |
15714 | |
15715 | /* |
15716 | ** Virtual-table cursor object. |
15717 | ** |
15718 | ** iSpecial: |
15719 | ** If this is a 'special' query (refer to function fts5SpecialMatch()), |
15720 | ** then this variable contains the result of the query. |
15721 | ** |
15722 | ** iFirstRowid, iLastRowid: |
15723 | ** These variables are only used for FTS5_PLAN_MATCH cursors. Assuming the |
15724 | ** cursor iterates in ascending order of rowids, iFirstRowid is the lower |
15725 | ** limit of rowids to return, and iLastRowid the upper. In other words, the |
15726 | ** WHERE clause in the user's query might have been: |
15727 | ** |
15728 | ** <tbl> MATCH <expr> AND rowid BETWEEN $iFirstRowid AND $iLastRowid |
15729 | ** |
15730 | ** If the cursor iterates in descending order of rowid, iFirstRowid |
15731 | ** is the upper limit (i.e. the "first" rowid visited) and iLastRowid |
15732 | ** the lower. |
15733 | */ |
15734 | struct Fts5Cursor { |
15735 | sqlite3_vtab_cursor base; /* Base class used by SQLite core */ |
15736 | Fts5Cursor *pNext; /* Next cursor in Fts5Cursor.pCsr list */ |
15737 | int *aColumnSize; /* Values for xColumnSize() */ |
15738 | i64 iCsrId; /* Cursor id */ |
15739 | |
15740 | /* Zero from this point onwards on cursor reset */ |
15741 | int ePlan; /* FTS5_PLAN_XXX value */ |
15742 | int bDesc; /* True for "ORDER BY rowid DESC" queries */ |
15743 | i64 iFirstRowid; /* Return no rowids earlier than this */ |
15744 | i64 iLastRowid; /* Return no rowids later than this */ |
15745 | sqlite3_stmt *pStmt; /* Statement used to read %_content */ |
15746 | Fts5Expr *pExpr; /* Expression for MATCH queries */ |
15747 | Fts5Sorter *pSorter; /* Sorter for "ORDER BY rank" queries */ |
15748 | int csrflags; /* Mask of cursor flags (see below) */ |
15749 | i64 iSpecial; /* Result of special query */ |
15750 | |
15751 | /* "rank" function. Populated on demand from vtab.xColumn(). */ |
15752 | char *zRank; /* Custom rank function */ |
15753 | char *zRankArgs; /* Custom rank function args */ |
15754 | Fts5Auxiliary *pRank; /* Rank callback (or NULL) */ |
15755 | int nRankArg; /* Number of trailing arguments for rank() */ |
15756 | sqlite3_value **apRankArg; /* Array of trailing arguments */ |
15757 | sqlite3_stmt *pRankArgStmt; /* Origin of objects in apRankArg[] */ |
15758 | |
15759 | /* Auxiliary data storage */ |
15760 | Fts5Auxiliary *pAux; /* Currently executing extension function */ |
15761 | Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */ |
15762 | |
15763 | /* Cache used by auxiliary functions xInst() and xInstCount() */ |
15764 | Fts5PoslistReader *aInstIter; /* One for each phrase */ |
15765 | int nInstAlloc; /* Size of aInst[] array (entries / 3) */ |
15766 | int nInstCount; /* Number of phrase instances */ |
15767 | int *aInst; /* 3 integers per phrase instance */ |
15768 | }; |
15769 | |
15770 | /* |
15771 | ** Bits that make up the "idxNum" parameter passed indirectly by |
15772 | ** xBestIndex() to xFilter(). |
15773 | */ |
15774 | #define FTS5_BI_MATCH 0x0001 /* <tbl> MATCH ? */ |
15775 | #define FTS5_BI_RANK 0x0002 /* rank MATCH ? */ |
15776 | #define FTS5_BI_ROWID_EQ 0x0004 /* rowid == ? */ |
15777 | #define FTS5_BI_ROWID_LE 0x0008 /* rowid <= ? */ |
15778 | #define FTS5_BI_ROWID_GE 0x0010 /* rowid >= ? */ |
15779 | |
15780 | #define FTS5_BI_ORDER_RANK 0x0020 |
15781 | #define FTS5_BI_ORDER_ROWID 0x0040 |
15782 | #define FTS5_BI_ORDER_DESC 0x0080 |
15783 | |
15784 | /* |
15785 | ** Values for Fts5Cursor.csrflags |
15786 | */ |
15787 | #define FTS5CSR_EOF 0x01 |
15788 | #define FTS5CSR_REQUIRE_CONTENT 0x02 |
15789 | #define FTS5CSR_REQUIRE_DOCSIZE 0x04 |
15790 | #define FTS5CSR_REQUIRE_INST 0x08 |
15791 | #define FTS5CSR_FREE_ZRANK 0x10 |
15792 | #define FTS5CSR_REQUIRE_RESEEK 0x20 |
15793 | #define FTS5CSR_REQUIRE_POSLIST 0x40 |
15794 | |
15795 | #define BitFlagAllTest(x,y) (((x) & (y))==(y)) |
15796 | #define BitFlagTest(x,y) (((x) & (y))!=0) |
15797 | |
15798 | |
15799 | /* |
15800 | ** Macros to Set(), Clear() and Test() cursor flags. |
15801 | */ |
15802 | #define CsrFlagSet(pCsr, flag) ((pCsr)->csrflags |= (flag)) |
15803 | #define CsrFlagClear(pCsr, flag) ((pCsr)->csrflags &= ~(flag)) |
15804 | #define CsrFlagTest(pCsr, flag) ((pCsr)->csrflags & (flag)) |
15805 | |
15806 | struct Fts5Auxdata { |
15807 | Fts5Auxiliary *pAux; /* Extension to which this belongs */ |
15808 | void *pPtr; /* Pointer value */ |
15809 | void(*xDelete)(void*); /* Destructor */ |
15810 | Fts5Auxdata *pNext; /* Next object in linked list */ |
15811 | }; |
15812 | |
15813 | #ifdef SQLITE_DEBUG |
15814 | #define FTS5_BEGIN 1 |
15815 | #define FTS5_SYNC 2 |
15816 | #define FTS5_COMMIT 3 |
15817 | #define FTS5_ROLLBACK 4 |
15818 | #define FTS5_SAVEPOINT 5 |
15819 | #define FTS5_RELEASE 6 |
15820 | #define FTS5_ROLLBACKTO 7 |
15821 | static void fts5CheckTransactionState(Fts5FullTable *p, int op, int iSavepoint){ |
15822 | switch( op ){ |
15823 | case FTS5_BEGIN: |
15824 | assert( p->ts.eState==0 ); |
15825 | p->ts.eState = 1; |
15826 | p->ts.iSavepoint = -1; |
15827 | break; |
15828 | |
15829 | case FTS5_SYNC: |
15830 | assert( p->ts.eState==1 || p->ts.eState==2 ); |
15831 | p->ts.eState = 2; |
15832 | break; |
15833 | |
15834 | case FTS5_COMMIT: |
15835 | assert( p->ts.eState==2 ); |
15836 | p->ts.eState = 0; |
15837 | break; |
15838 | |
15839 | case FTS5_ROLLBACK: |
15840 | assert( p->ts.eState==1 || p->ts.eState==2 || p->ts.eState==0 ); |
15841 | p->ts.eState = 0; |
15842 | break; |
15843 | |
15844 | case FTS5_SAVEPOINT: |
15845 | assert( p->ts.eState>=1 ); |
15846 | assert( iSavepoint>=0 ); |
15847 | assert( iSavepoint>=p->ts.iSavepoint ); |
15848 | p->ts.iSavepoint = iSavepoint; |
15849 | break; |
15850 | |
15851 | case FTS5_RELEASE: |
15852 | assert( p->ts.eState>=1 ); |
15853 | assert( iSavepoint>=0 ); |
15854 | assert( iSavepoint<=p->ts.iSavepoint ); |
15855 | p->ts.iSavepoint = iSavepoint-1; |
15856 | break; |
15857 | |
15858 | case FTS5_ROLLBACKTO: |
15859 | assert( p->ts.eState>=1 ); |
15860 | assert( iSavepoint>=-1 ); |
15861 | /* The following assert() can fail if another vtab strikes an error |
15862 | ** within an xSavepoint() call then SQLite calls xRollbackTo() - without |
15863 | ** having called xSavepoint() on this vtab. */ |
15864 | /* assert( iSavepoint<=p->ts.iSavepoint ); */ |
15865 | p->ts.iSavepoint = iSavepoint; |
15866 | break; |
15867 | } |
15868 | } |
15869 | #else |
15870 | # define fts5CheckTransactionState(x,y,z) |
15871 | #endif |
15872 | |
15873 | /* |
15874 | ** Return true if pTab is a contentless table. |
15875 | */ |
15876 | static int fts5IsContentless(Fts5FullTable *pTab){ |
15877 | return pTab->p.pConfig->eContent==FTS5_CONTENT_NONE; |
15878 | } |
15879 | |
15880 | /* |
15881 | ** Delete a virtual table handle allocated by fts5InitVtab(). |
15882 | */ |
15883 | static void fts5FreeVtab(Fts5FullTable *pTab){ |
15884 | if( pTab ){ |
15885 | sqlite3Fts5IndexClose(pTab->p.pIndex); |
15886 | sqlite3Fts5StorageClose(pTab->pStorage); |
15887 | sqlite3Fts5ConfigFree(pTab->p.pConfig); |
15888 | sqlite3_free(pTab); |
15889 | } |
15890 | } |
15891 | |
15892 | /* |
15893 | ** The xDisconnect() virtual table method. |
15894 | */ |
15895 | static int fts5DisconnectMethod(sqlite3_vtab *pVtab){ |
15896 | fts5FreeVtab((Fts5FullTable*)pVtab); |
15897 | return SQLITE_OK; |
15898 | } |
15899 | |
15900 | /* |
15901 | ** The xDestroy() virtual table method. |
15902 | */ |
15903 | static int fts5DestroyMethod(sqlite3_vtab *pVtab){ |
15904 | Fts5Table *pTab = (Fts5Table*)pVtab; |
15905 | int rc = sqlite3Fts5DropAll(pTab->pConfig); |
15906 | if( rc==SQLITE_OK ){ |
15907 | fts5FreeVtab((Fts5FullTable*)pVtab); |
15908 | } |
15909 | return rc; |
15910 | } |
15911 | |
15912 | /* |
15913 | ** This function is the implementation of both the xConnect and xCreate |
15914 | ** methods of the FTS3 virtual table. |
15915 | ** |
15916 | ** The argv[] array contains the following: |
15917 | ** |
15918 | ** argv[0] -> module name ("fts5") |
15919 | ** argv[1] -> database name |
15920 | ** argv[2] -> table name |
15921 | ** argv[...] -> "column name" and other module argument fields. |
15922 | */ |
15923 | static int fts5InitVtab( |
15924 | int bCreate, /* True for xCreate, false for xConnect */ |
15925 | sqlite3 *db, /* The SQLite database connection */ |
15926 | void *pAux, /* Hash table containing tokenizers */ |
15927 | int argc, /* Number of elements in argv array */ |
15928 | const char * const *argv, /* xCreate/xConnect argument array */ |
15929 | sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ |
15930 | char **pzErr /* Write any error message here */ |
15931 | ){ |
15932 | Fts5Global *pGlobal = (Fts5Global*)pAux; |
15933 | const char **azConfig = (const char**)argv; |
15934 | int rc = SQLITE_OK; /* Return code */ |
15935 | Fts5Config *pConfig = 0; /* Results of parsing argc/argv */ |
15936 | Fts5FullTable *pTab = 0; /* New virtual table object */ |
15937 | |
15938 | /* Allocate the new vtab object and parse the configuration */ |
15939 | pTab = (Fts5FullTable*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5FullTable)); |
15940 | if( rc==SQLITE_OK ){ |
15941 | rc = sqlite3Fts5ConfigParse(pGlobal, db, argc, azConfig, &pConfig, pzErr); |
15942 | assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 ); |
15943 | } |
15944 | if( rc==SQLITE_OK ){ |
15945 | pTab->p.pConfig = pConfig; |
15946 | pTab->pGlobal = pGlobal; |
15947 | } |
15948 | |
15949 | /* Open the index sub-system */ |
15950 | if( rc==SQLITE_OK ){ |
15951 | rc = sqlite3Fts5IndexOpen(pConfig, bCreate, &pTab->p.pIndex, pzErr); |
15952 | } |
15953 | |
15954 | /* Open the storage sub-system */ |
15955 | if( rc==SQLITE_OK ){ |
15956 | rc = sqlite3Fts5StorageOpen( |
15957 | pConfig, pTab->p.pIndex, bCreate, &pTab->pStorage, pzErr |
15958 | ); |
15959 | } |
15960 | |
15961 | /* Call sqlite3_declare_vtab() */ |
15962 | if( rc==SQLITE_OK ){ |
15963 | rc = sqlite3Fts5ConfigDeclareVtab(pConfig); |
15964 | } |
15965 | |
15966 | /* Load the initial configuration */ |
15967 | if( rc==SQLITE_OK ){ |
15968 | assert( pConfig->pzErrmsg==0 ); |
15969 | pConfig->pzErrmsg = pzErr; |
15970 | rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex); |
15971 | sqlite3Fts5IndexRollback(pTab->p.pIndex); |
15972 | pConfig->pzErrmsg = 0; |
15973 | } |
15974 | |
15975 | if( rc!=SQLITE_OK ){ |
15976 | fts5FreeVtab(pTab); |
15977 | pTab = 0; |
15978 | }else if( bCreate ){ |
15979 | fts5CheckTransactionState(pTab, FTS5_BEGIN, 0); |
15980 | } |
15981 | *ppVTab = (sqlite3_vtab*)pTab; |
15982 | return rc; |
15983 | } |
15984 | |
15985 | /* |
15986 | ** The xConnect() and xCreate() methods for the virtual table. All the |
15987 | ** work is done in function fts5InitVtab(). |
15988 | */ |
15989 | static int fts5ConnectMethod( |
15990 | sqlite3 *db, /* Database connection */ |
15991 | void *pAux, /* Pointer to tokenizer hash table */ |
15992 | int argc, /* Number of elements in argv array */ |
15993 | const char * const *argv, /* xCreate/xConnect argument array */ |
15994 | sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ |
15995 | char **pzErr /* OUT: sqlite3_malloc'd error message */ |
15996 | ){ |
15997 | return fts5InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr); |
15998 | } |
15999 | static int fts5CreateMethod( |
16000 | sqlite3 *db, /* Database connection */ |
16001 | void *pAux, /* Pointer to tokenizer hash table */ |
16002 | int argc, /* Number of elements in argv array */ |
16003 | const char * const *argv, /* xCreate/xConnect argument array */ |
16004 | sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ |
16005 | char **pzErr /* OUT: sqlite3_malloc'd error message */ |
16006 | ){ |
16007 | return fts5InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr); |
16008 | } |
16009 | |
16010 | /* |
16011 | ** The different query plans. |
16012 | */ |
16013 | #define FTS5_PLAN_MATCH 1 /* (<tbl> MATCH ?) */ |
16014 | #define FTS5_PLAN_SOURCE 2 /* A source cursor for SORTED_MATCH */ |
16015 | #define FTS5_PLAN_SPECIAL 3 /* An internal query */ |
16016 | #define FTS5_PLAN_SORTED_MATCH 4 /* (<tbl> MATCH ? ORDER BY rank) */ |
16017 | #define FTS5_PLAN_SCAN 5 /* No usable constraint */ |
16018 | #define FTS5_PLAN_ROWID 6 /* (rowid = ?) */ |
16019 | |
16020 | /* |
16021 | ** Set the SQLITE_INDEX_SCAN_UNIQUE flag in pIdxInfo->flags. Unless this |
16022 | ** extension is currently being used by a version of SQLite too old to |
16023 | ** support index-info flags. In that case this function is a no-op. |
16024 | */ |
16025 | static void fts5SetUniqueFlag(sqlite3_index_info *pIdxInfo){ |
16026 | #if SQLITE_VERSION_NUMBER>=3008012 |
16027 | #ifndef SQLITE_CORE |
16028 | if( sqlite3_libversion_number()>=3008012 ) |
16029 | #endif |
16030 | { |
16031 | pIdxInfo->idxFlags |= SQLITE_INDEX_SCAN_UNIQUE; |
16032 | } |
16033 | #endif |
16034 | } |
16035 | |
16036 | static int fts5UsePatternMatch( |
16037 | Fts5Config *pConfig, |
16038 | struct sqlite3_index_constraint *p |
16039 | ){ |
16040 | assert( FTS5_PATTERN_GLOB==SQLITE_INDEX_CONSTRAINT_GLOB ); |
16041 | assert( FTS5_PATTERN_LIKE==SQLITE_INDEX_CONSTRAINT_LIKE ); |
16042 | if( pConfig->ePattern==FTS5_PATTERN_GLOB && p->op==FTS5_PATTERN_GLOB ){ |
16043 | return 1; |
16044 | } |
16045 | if( pConfig->ePattern==FTS5_PATTERN_LIKE |
16046 | && (p->op==FTS5_PATTERN_LIKE || p->op==FTS5_PATTERN_GLOB) |
16047 | ){ |
16048 | return 1; |
16049 | } |
16050 | return 0; |
16051 | } |
16052 | |
16053 | /* |
16054 | ** Implementation of the xBestIndex method for FTS5 tables. Within the |
16055 | ** WHERE constraint, it searches for the following: |
16056 | ** |
16057 | ** 1. A MATCH constraint against the table column. |
16058 | ** 2. A MATCH constraint against the "rank" column. |
16059 | ** 3. A MATCH constraint against some other column. |
16060 | ** 4. An == constraint against the rowid column. |
16061 | ** 5. A < or <= constraint against the rowid column. |
16062 | ** 6. A > or >= constraint against the rowid column. |
16063 | ** |
16064 | ** Within the ORDER BY, the following are supported: |
16065 | ** |
16066 | ** 5. ORDER BY rank [ASC|DESC] |
16067 | ** 6. ORDER BY rowid [ASC|DESC] |
16068 | ** |
16069 | ** Information for the xFilter call is passed via both the idxNum and |
16070 | ** idxStr variables. Specifically, idxNum is a bitmask of the following |
16071 | ** flags used to encode the ORDER BY clause: |
16072 | ** |
16073 | ** FTS5_BI_ORDER_RANK |
16074 | ** FTS5_BI_ORDER_ROWID |
16075 | ** FTS5_BI_ORDER_DESC |
16076 | ** |
16077 | ** idxStr is used to encode data from the WHERE clause. For each argument |
16078 | ** passed to the xFilter method, the following is appended to idxStr: |
16079 | ** |
16080 | ** Match against table column: "m" |
16081 | ** Match against rank column: "r" |
16082 | ** Match against other column: "M<column-number>" |
16083 | ** LIKE against other column: "L<column-number>" |
16084 | ** GLOB against other column: "G<column-number>" |
16085 | ** Equality constraint against the rowid: "=" |
16086 | ** A < or <= against the rowid: "<" |
16087 | ** A > or >= against the rowid: ">" |
16088 | ** |
16089 | ** This function ensures that there is at most one "r" or "=". And that if |
16090 | ** there exists an "=" then there is no "<" or ">". |
16091 | ** |
16092 | ** Costs are assigned as follows: |
16093 | ** |
16094 | ** a) If an unusable MATCH operator is present in the WHERE clause, the |
16095 | ** cost is unconditionally set to 1e50 (a really big number). |
16096 | ** |
16097 | ** a) If a MATCH operator is present, the cost depends on the other |
16098 | ** constraints also present. As follows: |
16099 | ** |
16100 | ** * No other constraints: cost=1000.0 |
16101 | ** * One rowid range constraint: cost=750.0 |
16102 | ** * Both rowid range constraints: cost=500.0 |
16103 | ** * An == rowid constraint: cost=100.0 |
16104 | ** |
16105 | ** b) Otherwise, if there is no MATCH: |
16106 | ** |
16107 | ** * No other constraints: cost=1000000.0 |
16108 | ** * One rowid range constraint: cost=750000.0 |
16109 | ** * Both rowid range constraints: cost=250000.0 |
16110 | ** * An == rowid constraint: cost=10.0 |
16111 | ** |
16112 | ** Costs are not modified by the ORDER BY clause. |
16113 | */ |
16114 | static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ |
16115 | Fts5Table *pTab = (Fts5Table*)pVTab; |
16116 | Fts5Config *pConfig = pTab->pConfig; |
16117 | const int nCol = pConfig->nCol; |
16118 | int idxFlags = 0; /* Parameter passed through to xFilter() */ |
16119 | int i; |
16120 | |
16121 | char *idxStr; |
16122 | int iIdxStr = 0; |
16123 | int iCons = 0; |
16124 | |
16125 | int bSeenEq = 0; |
16126 | int bSeenGt = 0; |
16127 | int bSeenLt = 0; |
16128 | int bSeenMatch = 0; |
16129 | int bSeenRank = 0; |
16130 | |
16131 | |
16132 | assert( SQLITE_INDEX_CONSTRAINT_EQ<SQLITE_INDEX_CONSTRAINT_MATCH ); |
16133 | assert( SQLITE_INDEX_CONSTRAINT_GT<SQLITE_INDEX_CONSTRAINT_MATCH ); |
16134 | assert( SQLITE_INDEX_CONSTRAINT_LE<SQLITE_INDEX_CONSTRAINT_MATCH ); |
16135 | assert( SQLITE_INDEX_CONSTRAINT_GE<SQLITE_INDEX_CONSTRAINT_MATCH ); |
16136 | assert( SQLITE_INDEX_CONSTRAINT_LE<SQLITE_INDEX_CONSTRAINT_MATCH ); |
16137 | |
16138 | if( pConfig->bLock ){ |
16139 | pTab->base.zErrMsg = sqlite3_mprintf( |
16140 | "recursively defined fts5 content table" |
16141 | ); |
16142 | return SQLITE_ERROR; |
16143 | } |
16144 | |
16145 | idxStr = (char*)sqlite3_malloc(pInfo->nConstraint * 8 + 1); |
16146 | if( idxStr==0 ) return SQLITE_NOMEM; |
16147 | pInfo->idxStr = idxStr; |
16148 | pInfo->needToFreeIdxStr = 1; |
16149 | |
16150 | for(i=0; i<pInfo->nConstraint; i++){ |
16151 | struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; |
16152 | int iCol = p->iColumn; |
16153 | if( p->op==SQLITE_INDEX_CONSTRAINT_MATCH |
16154 | || (p->op==SQLITE_INDEX_CONSTRAINT_EQ && iCol>=nCol) |
16155 | ){ |
16156 | /* A MATCH operator or equivalent */ |
16157 | if( p->usable==0 || iCol<0 ){ |
16158 | /* As there exists an unusable MATCH constraint this is an |
16159 | ** unusable plan. Set a prohibitively high cost. */ |
16160 | pInfo->estimatedCost = 1e50; |
16161 | assert( iIdxStr < pInfo->nConstraint*6 + 1 ); |
16162 | idxStr[iIdxStr] = 0; |
16163 | return SQLITE_OK; |
16164 | }else{ |
16165 | if( iCol==nCol+1 ){ |
16166 | if( bSeenRank ) continue; |
16167 | idxStr[iIdxStr++] = 'r'; |
16168 | bSeenRank = 1; |
16169 | }else if( iCol>=0 ){ |
16170 | bSeenMatch = 1; |
16171 | idxStr[iIdxStr++] = 'M'; |
16172 | sqlite3_snprintf(6, &idxStr[iIdxStr], "%d" , iCol); |
16173 | idxStr += strlen(&idxStr[iIdxStr]); |
16174 | assert( idxStr[iIdxStr]=='\0' ); |
16175 | } |
16176 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; |
16177 | pInfo->aConstraintUsage[i].omit = 1; |
16178 | } |
16179 | }else if( p->usable ){ |
16180 | if( iCol>=0 && iCol<nCol && fts5UsePatternMatch(pConfig, p) ){ |
16181 | assert( p->op==FTS5_PATTERN_LIKE || p->op==FTS5_PATTERN_GLOB ); |
16182 | idxStr[iIdxStr++] = p->op==FTS5_PATTERN_LIKE ? 'L' : 'G'; |
16183 | sqlite3_snprintf(6, &idxStr[iIdxStr], "%d" , iCol); |
16184 | idxStr += strlen(&idxStr[iIdxStr]); |
16185 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; |
16186 | assert( idxStr[iIdxStr]=='\0' ); |
16187 | }else if( bSeenEq==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ && iCol<0 ){ |
16188 | idxStr[iIdxStr++] = '='; |
16189 | bSeenEq = 1; |
16190 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; |
16191 | } |
16192 | } |
16193 | } |
16194 | |
16195 | if( bSeenEq==0 ){ |
16196 | for(i=0; i<pInfo->nConstraint; i++){ |
16197 | struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; |
16198 | if( p->iColumn<0 && p->usable ){ |
16199 | int op = p->op; |
16200 | if( op==SQLITE_INDEX_CONSTRAINT_LT || op==SQLITE_INDEX_CONSTRAINT_LE ){ |
16201 | if( bSeenLt ) continue; |
16202 | idxStr[iIdxStr++] = '<'; |
16203 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; |
16204 | bSeenLt = 1; |
16205 | }else |
16206 | if( op==SQLITE_INDEX_CONSTRAINT_GT || op==SQLITE_INDEX_CONSTRAINT_GE ){ |
16207 | if( bSeenGt ) continue; |
16208 | idxStr[iIdxStr++] = '>'; |
16209 | pInfo->aConstraintUsage[i].argvIndex = ++iCons; |
16210 | bSeenGt = 1; |
16211 | } |
16212 | } |
16213 | } |
16214 | } |
16215 | idxStr[iIdxStr] = '\0'; |
16216 | |
16217 | /* Set idxFlags flags for the ORDER BY clause */ |
16218 | if( pInfo->nOrderBy==1 ){ |
16219 | int iSort = pInfo->aOrderBy[0].iColumn; |
16220 | if( iSort==(pConfig->nCol+1) && bSeenMatch ){ |
16221 | idxFlags |= FTS5_BI_ORDER_RANK; |
16222 | }else if( iSort==-1 ){ |
16223 | idxFlags |= FTS5_BI_ORDER_ROWID; |
16224 | } |
16225 | if( BitFlagTest(idxFlags, FTS5_BI_ORDER_RANK|FTS5_BI_ORDER_ROWID) ){ |
16226 | pInfo->orderByConsumed = 1; |
16227 | if( pInfo->aOrderBy[0].desc ){ |
16228 | idxFlags |= FTS5_BI_ORDER_DESC; |
16229 | } |
16230 | } |
16231 | } |
16232 | |
16233 | /* Calculate the estimated cost based on the flags set in idxFlags. */ |
16234 | if( bSeenEq ){ |
16235 | pInfo->estimatedCost = bSeenMatch ? 100.0 : 10.0; |
16236 | if( bSeenMatch==0 ) fts5SetUniqueFlag(pInfo); |
16237 | }else if( bSeenLt && bSeenGt ){ |
16238 | pInfo->estimatedCost = bSeenMatch ? 500.0 : 250000.0; |
16239 | }else if( bSeenLt || bSeenGt ){ |
16240 | pInfo->estimatedCost = bSeenMatch ? 750.0 : 750000.0; |
16241 | }else{ |
16242 | pInfo->estimatedCost = bSeenMatch ? 1000.0 : 1000000.0; |
16243 | } |
16244 | |
16245 | pInfo->idxNum = idxFlags; |
16246 | return SQLITE_OK; |
16247 | } |
16248 | |
16249 | static int fts5NewTransaction(Fts5FullTable *pTab){ |
16250 | Fts5Cursor *pCsr; |
16251 | for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ |
16252 | if( pCsr->base.pVtab==(sqlite3_vtab*)pTab ) return SQLITE_OK; |
16253 | } |
16254 | return sqlite3Fts5StorageReset(pTab->pStorage); |
16255 | } |
16256 | |
16257 | /* |
16258 | ** Implementation of xOpen method. |
16259 | */ |
16260 | static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ |
16261 | Fts5FullTable *pTab = (Fts5FullTable*)pVTab; |
16262 | Fts5Config *pConfig = pTab->p.pConfig; |
16263 | Fts5Cursor *pCsr = 0; /* New cursor object */ |
16264 | sqlite3_int64 nByte; /* Bytes of space to allocate */ |
16265 | int rc; /* Return code */ |
16266 | |
16267 | rc = fts5NewTransaction(pTab); |
16268 | if( rc==SQLITE_OK ){ |
16269 | nByte = sizeof(Fts5Cursor) + pConfig->nCol * sizeof(int); |
16270 | pCsr = (Fts5Cursor*)sqlite3_malloc64(nByte); |
16271 | if( pCsr ){ |
16272 | Fts5Global *pGlobal = pTab->pGlobal; |
16273 | memset(pCsr, 0, (size_t)nByte); |
16274 | pCsr->aColumnSize = (int*)&pCsr[1]; |
16275 | pCsr->pNext = pGlobal->pCsr; |
16276 | pGlobal->pCsr = pCsr; |
16277 | pCsr->iCsrId = ++pGlobal->iNextId; |
16278 | }else{ |
16279 | rc = SQLITE_NOMEM; |
16280 | } |
16281 | } |
16282 | *ppCsr = (sqlite3_vtab_cursor*)pCsr; |
16283 | return rc; |
16284 | } |
16285 | |
16286 | static int fts5StmtType(Fts5Cursor *pCsr){ |
16287 | if( pCsr->ePlan==FTS5_PLAN_SCAN ){ |
16288 | return (pCsr->bDesc) ? FTS5_STMT_SCAN_DESC : FTS5_STMT_SCAN_ASC; |
16289 | } |
16290 | return FTS5_STMT_LOOKUP; |
16291 | } |
16292 | |
16293 | /* |
16294 | ** This function is called after the cursor passed as the only argument |
16295 | ** is moved to point at a different row. It clears all cached data |
16296 | ** specific to the previous row stored by the cursor object. |
16297 | */ |
16298 | static void fts5CsrNewrow(Fts5Cursor *pCsr){ |
16299 | CsrFlagSet(pCsr, |
16300 | FTS5CSR_REQUIRE_CONTENT |
16301 | | FTS5CSR_REQUIRE_DOCSIZE |
16302 | | FTS5CSR_REQUIRE_INST |
16303 | | FTS5CSR_REQUIRE_POSLIST |
16304 | ); |
16305 | } |
16306 | |
16307 | static void fts5FreeCursorComponents(Fts5Cursor *pCsr){ |
16308 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); |
16309 | Fts5Auxdata *pData; |
16310 | Fts5Auxdata *pNext; |
16311 | |
16312 | sqlite3_free(pCsr->aInstIter); |
16313 | sqlite3_free(pCsr->aInst); |
16314 | if( pCsr->pStmt ){ |
16315 | int eStmt = fts5StmtType(pCsr); |
16316 | sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt); |
16317 | } |
16318 | if( pCsr->pSorter ){ |
16319 | Fts5Sorter *pSorter = pCsr->pSorter; |
16320 | sqlite3_finalize(pSorter->pStmt); |
16321 | sqlite3_free(pSorter); |
16322 | } |
16323 | |
16324 | if( pCsr->ePlan!=FTS5_PLAN_SOURCE ){ |
16325 | sqlite3Fts5ExprFree(pCsr->pExpr); |
16326 | } |
16327 | |
16328 | for(pData=pCsr->pAuxdata; pData; pData=pNext){ |
16329 | pNext = pData->pNext; |
16330 | if( pData->xDelete ) pData->xDelete(pData->pPtr); |
16331 | sqlite3_free(pData); |
16332 | } |
16333 | |
16334 | sqlite3_finalize(pCsr->pRankArgStmt); |
16335 | sqlite3_free(pCsr->apRankArg); |
16336 | |
16337 | if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK) ){ |
16338 | sqlite3_free(pCsr->zRank); |
16339 | sqlite3_free(pCsr->zRankArgs); |
16340 | } |
16341 | |
16342 | sqlite3Fts5IndexCloseReader(pTab->p.pIndex); |
16343 | memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan - (u8*)pCsr)); |
16344 | } |
16345 | |
16346 | |
16347 | /* |
16348 | ** Close the cursor. For additional information see the documentation |
16349 | ** on the xClose method of the virtual table interface. |
16350 | */ |
16351 | static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ |
16352 | if( pCursor ){ |
16353 | Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab); |
16354 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; |
16355 | Fts5Cursor **pp; |
16356 | |
16357 | fts5FreeCursorComponents(pCsr); |
16358 | /* Remove the cursor from the Fts5Global.pCsr list */ |
16359 | for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext); |
16360 | *pp = pCsr->pNext; |
16361 | |
16362 | sqlite3_free(pCsr); |
16363 | } |
16364 | return SQLITE_OK; |
16365 | } |
16366 | |
16367 | static int fts5SorterNext(Fts5Cursor *pCsr){ |
16368 | Fts5Sorter *pSorter = pCsr->pSorter; |
16369 | int rc; |
16370 | |
16371 | rc = sqlite3_step(pSorter->pStmt); |
16372 | if( rc==SQLITE_DONE ){ |
16373 | rc = SQLITE_OK; |
16374 | CsrFlagSet(pCsr, FTS5CSR_EOF|FTS5CSR_REQUIRE_CONTENT); |
16375 | }else if( rc==SQLITE_ROW ){ |
16376 | const u8 *a; |
16377 | const u8 *aBlob; |
16378 | int nBlob; |
16379 | int i; |
16380 | int iOff = 0; |
16381 | rc = SQLITE_OK; |
16382 | |
16383 | pSorter->iRowid = sqlite3_column_int64(pSorter->pStmt, 0); |
16384 | nBlob = sqlite3_column_bytes(pSorter->pStmt, 1); |
16385 | aBlob = a = sqlite3_column_blob(pSorter->pStmt, 1); |
16386 | |
16387 | /* nBlob==0 in detail=none mode. */ |
16388 | if( nBlob>0 ){ |
16389 | for(i=0; i<(pSorter->nIdx-1); i++){ |
16390 | int iVal; |
16391 | a += fts5GetVarint32(a, iVal); |
16392 | iOff += iVal; |
16393 | pSorter->aIdx[i] = iOff; |
16394 | } |
16395 | pSorter->aIdx[i] = &aBlob[nBlob] - a; |
16396 | pSorter->aPoslist = a; |
16397 | } |
16398 | |
16399 | fts5CsrNewrow(pCsr); |
16400 | } |
16401 | |
16402 | return rc; |
16403 | } |
16404 | |
16405 | |
16406 | /* |
16407 | ** Set the FTS5CSR_REQUIRE_RESEEK flag on all FTS5_PLAN_MATCH cursors |
16408 | ** open on table pTab. |
16409 | */ |
16410 | static void fts5TripCursors(Fts5FullTable *pTab){ |
16411 | Fts5Cursor *pCsr; |
16412 | for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ |
16413 | if( pCsr->ePlan==FTS5_PLAN_MATCH |
16414 | && pCsr->base.pVtab==(sqlite3_vtab*)pTab |
16415 | ){ |
16416 | CsrFlagSet(pCsr, FTS5CSR_REQUIRE_RESEEK); |
16417 | } |
16418 | } |
16419 | } |
16420 | |
16421 | /* |
16422 | ** If the REQUIRE_RESEEK flag is set on the cursor passed as the first |
16423 | ** argument, close and reopen all Fts5IndexIter iterators that the cursor |
16424 | ** is using. Then attempt to move the cursor to a rowid equal to or laster |
16425 | ** (in the cursors sort order - ASC or DESC) than the current rowid. |
16426 | ** |
16427 | ** If the new rowid is not equal to the old, set output parameter *pbSkip |
16428 | ** to 1 before returning. Otherwise, leave it unchanged. |
16429 | ** |
16430 | ** Return SQLITE_OK if successful or if no reseek was required, or an |
16431 | ** error code if an error occurred. |
16432 | */ |
16433 | static int fts5CursorReseek(Fts5Cursor *pCsr, int *pbSkip){ |
16434 | int rc = SQLITE_OK; |
16435 | assert( *pbSkip==0 ); |
16436 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_RESEEK) ){ |
16437 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); |
16438 | int bDesc = pCsr->bDesc; |
16439 | i64 iRowid = sqlite3Fts5ExprRowid(pCsr->pExpr); |
16440 | |
16441 | rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->p.pIndex, iRowid, bDesc); |
16442 | if( rc==SQLITE_OK && iRowid!=sqlite3Fts5ExprRowid(pCsr->pExpr) ){ |
16443 | *pbSkip = 1; |
16444 | } |
16445 | |
16446 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_RESEEK); |
16447 | fts5CsrNewrow(pCsr); |
16448 | if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ |
16449 | CsrFlagSet(pCsr, FTS5CSR_EOF); |
16450 | *pbSkip = 1; |
16451 | } |
16452 | } |
16453 | return rc; |
16454 | } |
16455 | |
16456 | |
16457 | /* |
16458 | ** Advance the cursor to the next row in the table that matches the |
16459 | ** search criteria. |
16460 | ** |
16461 | ** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned |
16462 | ** even if we reach end-of-file. The fts5EofMethod() will be called |
16463 | ** subsequently to determine whether or not an EOF was hit. |
16464 | */ |
16465 | static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ |
16466 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; |
16467 | int rc; |
16468 | |
16469 | assert( (pCsr->ePlan<3)== |
16470 | (pCsr->ePlan==FTS5_PLAN_MATCH || pCsr->ePlan==FTS5_PLAN_SOURCE) |
16471 | ); |
16472 | assert( !CsrFlagTest(pCsr, FTS5CSR_EOF) ); |
16473 | |
16474 | if( pCsr->ePlan<3 ){ |
16475 | int bSkip = 0; |
16476 | if( (rc = fts5CursorReseek(pCsr, &bSkip)) || bSkip ) return rc; |
16477 | rc = sqlite3Fts5ExprNext(pCsr->pExpr, pCsr->iLastRowid); |
16478 | CsrFlagSet(pCsr, sqlite3Fts5ExprEof(pCsr->pExpr)); |
16479 | fts5CsrNewrow(pCsr); |
16480 | }else{ |
16481 | switch( pCsr->ePlan ){ |
16482 | case FTS5_PLAN_SPECIAL: { |
16483 | CsrFlagSet(pCsr, FTS5CSR_EOF); |
16484 | rc = SQLITE_OK; |
16485 | break; |
16486 | } |
16487 | |
16488 | case FTS5_PLAN_SORTED_MATCH: { |
16489 | rc = fts5SorterNext(pCsr); |
16490 | break; |
16491 | } |
16492 | |
16493 | default: { |
16494 | Fts5Config *pConfig = ((Fts5Table*)pCursor->pVtab)->pConfig; |
16495 | pConfig->bLock++; |
16496 | rc = sqlite3_step(pCsr->pStmt); |
16497 | pConfig->bLock--; |
16498 | if( rc!=SQLITE_ROW ){ |
16499 | CsrFlagSet(pCsr, FTS5CSR_EOF); |
16500 | rc = sqlite3_reset(pCsr->pStmt); |
16501 | if( rc!=SQLITE_OK ){ |
16502 | pCursor->pVtab->zErrMsg = sqlite3_mprintf( |
16503 | "%s" , sqlite3_errmsg(pConfig->db) |
16504 | ); |
16505 | } |
16506 | }else{ |
16507 | rc = SQLITE_OK; |
16508 | } |
16509 | break; |
16510 | } |
16511 | } |
16512 | } |
16513 | |
16514 | return rc; |
16515 | } |
16516 | |
16517 | |
16518 | static int fts5PrepareStatement( |
16519 | sqlite3_stmt **ppStmt, |
16520 | Fts5Config *pConfig, |
16521 | const char *zFmt, |
16522 | ... |
16523 | ){ |
16524 | sqlite3_stmt *pRet = 0; |
16525 | int rc; |
16526 | char *zSql; |
16527 | va_list ap; |
16528 | |
16529 | va_start(ap, zFmt); |
16530 | zSql = sqlite3_vmprintf(zFmt, ap); |
16531 | if( zSql==0 ){ |
16532 | rc = SQLITE_NOMEM; |
16533 | }else{ |
16534 | rc = sqlite3_prepare_v3(pConfig->db, zSql, -1, |
16535 | SQLITE_PREPARE_PERSISTENT, &pRet, 0); |
16536 | if( rc!=SQLITE_OK ){ |
16537 | *pConfig->pzErrmsg = sqlite3_mprintf("%s" , sqlite3_errmsg(pConfig->db)); |
16538 | } |
16539 | sqlite3_free(zSql); |
16540 | } |
16541 | |
16542 | va_end(ap); |
16543 | *ppStmt = pRet; |
16544 | return rc; |
16545 | } |
16546 | |
16547 | static int fts5CursorFirstSorted( |
16548 | Fts5FullTable *pTab, |
16549 | Fts5Cursor *pCsr, |
16550 | int bDesc |
16551 | ){ |
16552 | Fts5Config *pConfig = pTab->p.pConfig; |
16553 | Fts5Sorter *pSorter; |
16554 | int nPhrase; |
16555 | sqlite3_int64 nByte; |
16556 | int rc; |
16557 | const char *zRank = pCsr->zRank; |
16558 | const char *zRankArgs = pCsr->zRankArgs; |
16559 | |
16560 | nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); |
16561 | nByte = sizeof(Fts5Sorter) + sizeof(int) * (nPhrase-1); |
16562 | pSorter = (Fts5Sorter*)sqlite3_malloc64(nByte); |
16563 | if( pSorter==0 ) return SQLITE_NOMEM; |
16564 | memset(pSorter, 0, (size_t)nByte); |
16565 | pSorter->nIdx = nPhrase; |
16566 | |
16567 | /* TODO: It would be better to have some system for reusing statement |
16568 | ** handles here, rather than preparing a new one for each query. But that |
16569 | ** is not possible as SQLite reference counts the virtual table objects. |
16570 | ** And since the statement required here reads from this very virtual |
16571 | ** table, saving it creates a circular reference. |
16572 | ** |
16573 | ** If SQLite a built-in statement cache, this wouldn't be a problem. */ |
16574 | rc = fts5PrepareStatement(&pSorter->pStmt, pConfig, |
16575 | "SELECT rowid, rank FROM %Q.%Q ORDER BY %s(\"%w\"%s%s) %s" , |
16576 | pConfig->zDb, pConfig->zName, zRank, pConfig->zName, |
16577 | (zRankArgs ? ", " : "" ), |
16578 | (zRankArgs ? zRankArgs : "" ), |
16579 | bDesc ? "DESC" : "ASC" |
16580 | ); |
16581 | |
16582 | pCsr->pSorter = pSorter; |
16583 | if( rc==SQLITE_OK ){ |
16584 | assert( pTab->pSortCsr==0 ); |
16585 | pTab->pSortCsr = pCsr; |
16586 | rc = fts5SorterNext(pCsr); |
16587 | pTab->pSortCsr = 0; |
16588 | } |
16589 | |
16590 | if( rc!=SQLITE_OK ){ |
16591 | sqlite3_finalize(pSorter->pStmt); |
16592 | sqlite3_free(pSorter); |
16593 | pCsr->pSorter = 0; |
16594 | } |
16595 | |
16596 | return rc; |
16597 | } |
16598 | |
16599 | static int fts5CursorFirst(Fts5FullTable *pTab, Fts5Cursor *pCsr, int bDesc){ |
16600 | int rc; |
16601 | Fts5Expr *pExpr = pCsr->pExpr; |
16602 | rc = sqlite3Fts5ExprFirst(pExpr, pTab->p.pIndex, pCsr->iFirstRowid, bDesc); |
16603 | if( sqlite3Fts5ExprEof(pExpr) ){ |
16604 | CsrFlagSet(pCsr, FTS5CSR_EOF); |
16605 | } |
16606 | fts5CsrNewrow(pCsr); |
16607 | return rc; |
16608 | } |
16609 | |
16610 | /* |
16611 | ** Process a "special" query. A special query is identified as one with a |
16612 | ** MATCH expression that begins with a '*' character. The remainder of |
16613 | ** the text passed to the MATCH operator are used as the special query |
16614 | ** parameters. |
16615 | */ |
16616 | static int fts5SpecialMatch( |
16617 | Fts5FullTable *pTab, |
16618 | Fts5Cursor *pCsr, |
16619 | const char *zQuery |
16620 | ){ |
16621 | int rc = SQLITE_OK; /* Return code */ |
16622 | const char *z = zQuery; /* Special query text */ |
16623 | int n; /* Number of bytes in text at z */ |
16624 | |
16625 | while( z[0]==' ' ) z++; |
16626 | for(n=0; z[n] && z[n]!=' '; n++); |
16627 | |
16628 | assert( pTab->p.base.zErrMsg==0 ); |
16629 | pCsr->ePlan = FTS5_PLAN_SPECIAL; |
16630 | |
16631 | if( n==5 && 0==sqlite3_strnicmp("reads" , z, n) ){ |
16632 | pCsr->iSpecial = sqlite3Fts5IndexReads(pTab->p.pIndex); |
16633 | } |
16634 | else if( n==2 && 0==sqlite3_strnicmp("id" , z, n) ){ |
16635 | pCsr->iSpecial = pCsr->iCsrId; |
16636 | } |
16637 | else{ |
16638 | /* An unrecognized directive. Return an error message. */ |
16639 | pTab->p.base.zErrMsg = sqlite3_mprintf("unknown special query: %.*s" , n, z); |
16640 | rc = SQLITE_ERROR; |
16641 | } |
16642 | |
16643 | return rc; |
16644 | } |
16645 | |
16646 | /* |
16647 | ** Search for an auxiliary function named zName that can be used with table |
16648 | ** pTab. If one is found, return a pointer to the corresponding Fts5Auxiliary |
16649 | ** structure. Otherwise, if no such function exists, return NULL. |
16650 | */ |
16651 | static Fts5Auxiliary *fts5FindAuxiliary(Fts5FullTable *pTab, const char *zName){ |
16652 | Fts5Auxiliary *pAux; |
16653 | |
16654 | for(pAux=pTab->pGlobal->pAux; pAux; pAux=pAux->pNext){ |
16655 | if( sqlite3_stricmp(zName, pAux->zFunc)==0 ) return pAux; |
16656 | } |
16657 | |
16658 | /* No function of the specified name was found. Return 0. */ |
16659 | return 0; |
16660 | } |
16661 | |
16662 | |
16663 | static int fts5FindRankFunction(Fts5Cursor *pCsr){ |
16664 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); |
16665 | Fts5Config *pConfig = pTab->p.pConfig; |
16666 | int rc = SQLITE_OK; |
16667 | Fts5Auxiliary *pAux = 0; |
16668 | const char *zRank = pCsr->zRank; |
16669 | const char *zRankArgs = pCsr->zRankArgs; |
16670 | |
16671 | if( zRankArgs ){ |
16672 | char *zSql = sqlite3Fts5Mprintf(&rc, "SELECT %s" , zRankArgs); |
16673 | if( zSql ){ |
16674 | sqlite3_stmt *pStmt = 0; |
16675 | rc = sqlite3_prepare_v3(pConfig->db, zSql, -1, |
16676 | SQLITE_PREPARE_PERSISTENT, &pStmt, 0); |
16677 | sqlite3_free(zSql); |
16678 | assert( rc==SQLITE_OK || pCsr->pRankArgStmt==0 ); |
16679 | if( rc==SQLITE_OK ){ |
16680 | if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
16681 | sqlite3_int64 nByte; |
16682 | pCsr->nRankArg = sqlite3_column_count(pStmt); |
16683 | nByte = sizeof(sqlite3_value*)*pCsr->nRankArg; |
16684 | pCsr->apRankArg = (sqlite3_value**)sqlite3Fts5MallocZero(&rc, nByte); |
16685 | if( rc==SQLITE_OK ){ |
16686 | int i; |
16687 | for(i=0; i<pCsr->nRankArg; i++){ |
16688 | pCsr->apRankArg[i] = sqlite3_column_value(pStmt, i); |
16689 | } |
16690 | } |
16691 | pCsr->pRankArgStmt = pStmt; |
16692 | }else{ |
16693 | rc = sqlite3_finalize(pStmt); |
16694 | assert( rc!=SQLITE_OK ); |
16695 | } |
16696 | } |
16697 | } |
16698 | } |
16699 | |
16700 | if( rc==SQLITE_OK ){ |
16701 | pAux = fts5FindAuxiliary(pTab, zRank); |
16702 | if( pAux==0 ){ |
16703 | assert( pTab->p.base.zErrMsg==0 ); |
16704 | pTab->p.base.zErrMsg = sqlite3_mprintf("no such function: %s" , zRank); |
16705 | rc = SQLITE_ERROR; |
16706 | } |
16707 | } |
16708 | |
16709 | pCsr->pRank = pAux; |
16710 | return rc; |
16711 | } |
16712 | |
16713 | |
16714 | static int fts5CursorParseRank( |
16715 | Fts5Config *pConfig, |
16716 | Fts5Cursor *pCsr, |
16717 | sqlite3_value *pRank |
16718 | ){ |
16719 | int rc = SQLITE_OK; |
16720 | if( pRank ){ |
16721 | const char *z = (const char*)sqlite3_value_text(pRank); |
16722 | char *zRank = 0; |
16723 | char *zRankArgs = 0; |
16724 | |
16725 | if( z==0 ){ |
16726 | if( sqlite3_value_type(pRank)==SQLITE_NULL ) rc = SQLITE_ERROR; |
16727 | }else{ |
16728 | rc = sqlite3Fts5ConfigParseRank(z, &zRank, &zRankArgs); |
16729 | } |
16730 | if( rc==SQLITE_OK ){ |
16731 | pCsr->zRank = zRank; |
16732 | pCsr->zRankArgs = zRankArgs; |
16733 | CsrFlagSet(pCsr, FTS5CSR_FREE_ZRANK); |
16734 | }else if( rc==SQLITE_ERROR ){ |
16735 | pCsr->base.pVtab->zErrMsg = sqlite3_mprintf( |
16736 | "parse error in rank function: %s" , z |
16737 | ); |
16738 | } |
16739 | }else{ |
16740 | if( pConfig->zRank ){ |
16741 | pCsr->zRank = (char*)pConfig->zRank; |
16742 | pCsr->zRankArgs = (char*)pConfig->zRankArgs; |
16743 | }else{ |
16744 | pCsr->zRank = (char*)FTS5_DEFAULT_RANK; |
16745 | pCsr->zRankArgs = 0; |
16746 | } |
16747 | } |
16748 | return rc; |
16749 | } |
16750 | |
16751 | static i64 fts5GetRowidLimit(sqlite3_value *pVal, i64 iDefault){ |
16752 | if( pVal ){ |
16753 | int eType = sqlite3_value_numeric_type(pVal); |
16754 | if( eType==SQLITE_INTEGER ){ |
16755 | return sqlite3_value_int64(pVal); |
16756 | } |
16757 | } |
16758 | return iDefault; |
16759 | } |
16760 | |
16761 | /* |
16762 | ** This is the xFilter interface for the virtual table. See |
16763 | ** the virtual table xFilter method documentation for additional |
16764 | ** information. |
16765 | ** |
16766 | ** There are three possible query strategies: |
16767 | ** |
16768 | ** 1. Full-text search using a MATCH operator. |
16769 | ** 2. A by-rowid lookup. |
16770 | ** 3. A full-table scan. |
16771 | */ |
16772 | static int fts5FilterMethod( |
16773 | sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ |
16774 | int idxNum, /* Strategy index */ |
16775 | const char *idxStr, /* Unused */ |
16776 | int nVal, /* Number of elements in apVal */ |
16777 | sqlite3_value **apVal /* Arguments for the indexing scheme */ |
16778 | ){ |
16779 | Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab); |
16780 | Fts5Config *pConfig = pTab->p.pConfig; |
16781 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; |
16782 | int rc = SQLITE_OK; /* Error code */ |
16783 | int bDesc; /* True if ORDER BY [rank|rowid] DESC */ |
16784 | int bOrderByRank; /* True if ORDER BY rank */ |
16785 | sqlite3_value *pRank = 0; /* rank MATCH ? expression (or NULL) */ |
16786 | sqlite3_value *pRowidEq = 0; /* rowid = ? expression (or NULL) */ |
16787 | sqlite3_value *pRowidLe = 0; /* rowid <= ? expression (or NULL) */ |
16788 | sqlite3_value *pRowidGe = 0; /* rowid >= ? expression (or NULL) */ |
16789 | int iCol; /* Column on LHS of MATCH operator */ |
16790 | char **pzErrmsg = pConfig->pzErrmsg; |
16791 | int i; |
16792 | int iIdxStr = 0; |
16793 | Fts5Expr *pExpr = 0; |
16794 | |
16795 | if( pConfig->bLock ){ |
16796 | pTab->p.base.zErrMsg = sqlite3_mprintf( |
16797 | "recursively defined fts5 content table" |
16798 | ); |
16799 | return SQLITE_ERROR; |
16800 | } |
16801 | |
16802 | if( pCsr->ePlan ){ |
16803 | fts5FreeCursorComponents(pCsr); |
16804 | memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan-(u8*)pCsr)); |
16805 | } |
16806 | |
16807 | assert( pCsr->pStmt==0 ); |
16808 | assert( pCsr->pExpr==0 ); |
16809 | assert( pCsr->csrflags==0 ); |
16810 | assert( pCsr->pRank==0 ); |
16811 | assert( pCsr->zRank==0 ); |
16812 | assert( pCsr->zRankArgs==0 ); |
16813 | assert( pTab->pSortCsr==0 || nVal==0 ); |
16814 | |
16815 | assert( pzErrmsg==0 || pzErrmsg==&pTab->p.base.zErrMsg ); |
16816 | pConfig->pzErrmsg = &pTab->p.base.zErrMsg; |
16817 | |
16818 | /* Decode the arguments passed through to this function. */ |
16819 | for(i=0; i<nVal; i++){ |
16820 | switch( idxStr[iIdxStr++] ){ |
16821 | case 'r': |
16822 | pRank = apVal[i]; |
16823 | break; |
16824 | case 'M': { |
16825 | const char *zText = (const char*)sqlite3_value_text(apVal[i]); |
16826 | if( zText==0 ) zText = "" ; |
16827 | iCol = 0; |
16828 | do{ |
16829 | iCol = iCol*10 + (idxStr[iIdxStr]-'0'); |
16830 | iIdxStr++; |
16831 | }while( idxStr[iIdxStr]>='0' && idxStr[iIdxStr]<='9' ); |
16832 | |
16833 | if( zText[0]=='*' ){ |
16834 | /* The user has issued a query of the form "MATCH '*...'". This |
16835 | ** indicates that the MATCH expression is not a full text query, |
16836 | ** but a request for an internal parameter. */ |
16837 | rc = fts5SpecialMatch(pTab, pCsr, &zText[1]); |
16838 | goto filter_out; |
16839 | }else{ |
16840 | char **pzErr = &pTab->p.base.zErrMsg; |
16841 | rc = sqlite3Fts5ExprNew(pConfig, 0, iCol, zText, &pExpr, pzErr); |
16842 | if( rc==SQLITE_OK ){ |
16843 | rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr); |
16844 | pExpr = 0; |
16845 | } |
16846 | if( rc!=SQLITE_OK ) goto filter_out; |
16847 | } |
16848 | |
16849 | break; |
16850 | } |
16851 | case 'L': |
16852 | case 'G': { |
16853 | int bGlob = (idxStr[iIdxStr-1]=='G'); |
16854 | const char *zText = (const char*)sqlite3_value_text(apVal[i]); |
16855 | iCol = 0; |
16856 | do{ |
16857 | iCol = iCol*10 + (idxStr[iIdxStr]-'0'); |
16858 | iIdxStr++; |
16859 | }while( idxStr[iIdxStr]>='0' && idxStr[iIdxStr]<='9' ); |
16860 | if( zText ){ |
16861 | rc = sqlite3Fts5ExprPattern(pConfig, bGlob, iCol, zText, &pExpr); |
16862 | } |
16863 | if( rc==SQLITE_OK ){ |
16864 | rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr); |
16865 | pExpr = 0; |
16866 | } |
16867 | if( rc!=SQLITE_OK ) goto filter_out; |
16868 | break; |
16869 | } |
16870 | case '=': |
16871 | pRowidEq = apVal[i]; |
16872 | break; |
16873 | case '<': |
16874 | pRowidLe = apVal[i]; |
16875 | break; |
16876 | default: assert( idxStr[iIdxStr-1]=='>' ); |
16877 | pRowidGe = apVal[i]; |
16878 | break; |
16879 | } |
16880 | } |
16881 | bOrderByRank = ((idxNum & FTS5_BI_ORDER_RANK) ? 1 : 0); |
16882 | pCsr->bDesc = bDesc = ((idxNum & FTS5_BI_ORDER_DESC) ? 1 : 0); |
16883 | |
16884 | /* Set the cursor upper and lower rowid limits. Only some strategies |
16885 | ** actually use them. This is ok, as the xBestIndex() method leaves the |
16886 | ** sqlite3_index_constraint.omit flag clear for range constraints |
16887 | ** on the rowid field. */ |
16888 | if( pRowidEq ){ |
16889 | pRowidLe = pRowidGe = pRowidEq; |
16890 | } |
16891 | if( bDesc ){ |
16892 | pCsr->iFirstRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64); |
16893 | pCsr->iLastRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64); |
16894 | }else{ |
16895 | pCsr->iLastRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64); |
16896 | pCsr->iFirstRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64); |
16897 | } |
16898 | |
16899 | if( pTab->pSortCsr ){ |
16900 | /* If pSortCsr is non-NULL, then this call is being made as part of |
16901 | ** processing for a "... MATCH <expr> ORDER BY rank" query (ePlan is |
16902 | ** set to FTS5_PLAN_SORTED_MATCH). pSortCsr is the cursor that will |
16903 | ** return results to the user for this query. The current cursor |
16904 | ** (pCursor) is used to execute the query issued by function |
16905 | ** fts5CursorFirstSorted() above. */ |
16906 | assert( pRowidEq==0 && pRowidLe==0 && pRowidGe==0 && pRank==0 ); |
16907 | assert( nVal==0 && bOrderByRank==0 && bDesc==0 ); |
16908 | assert( pCsr->iLastRowid==LARGEST_INT64 ); |
16909 | assert( pCsr->iFirstRowid==SMALLEST_INT64 ); |
16910 | if( pTab->pSortCsr->bDesc ){ |
16911 | pCsr->iLastRowid = pTab->pSortCsr->iFirstRowid; |
16912 | pCsr->iFirstRowid = pTab->pSortCsr->iLastRowid; |
16913 | }else{ |
16914 | pCsr->iLastRowid = pTab->pSortCsr->iLastRowid; |
16915 | pCsr->iFirstRowid = pTab->pSortCsr->iFirstRowid; |
16916 | } |
16917 | pCsr->ePlan = FTS5_PLAN_SOURCE; |
16918 | pCsr->pExpr = pTab->pSortCsr->pExpr; |
16919 | rc = fts5CursorFirst(pTab, pCsr, bDesc); |
16920 | }else if( pCsr->pExpr ){ |
16921 | rc = fts5CursorParseRank(pConfig, pCsr, pRank); |
16922 | if( rc==SQLITE_OK ){ |
16923 | if( bOrderByRank ){ |
16924 | pCsr->ePlan = FTS5_PLAN_SORTED_MATCH; |
16925 | rc = fts5CursorFirstSorted(pTab, pCsr, bDesc); |
16926 | }else{ |
16927 | pCsr->ePlan = FTS5_PLAN_MATCH; |
16928 | rc = fts5CursorFirst(pTab, pCsr, bDesc); |
16929 | } |
16930 | } |
16931 | }else if( pConfig->zContent==0 ){ |
16932 | *pConfig->pzErrmsg = sqlite3_mprintf( |
16933 | "%s: table does not support scanning" , pConfig->zName |
16934 | ); |
16935 | rc = SQLITE_ERROR; |
16936 | }else{ |
16937 | /* This is either a full-table scan (ePlan==FTS5_PLAN_SCAN) or a lookup |
16938 | ** by rowid (ePlan==FTS5_PLAN_ROWID). */ |
16939 | pCsr->ePlan = (pRowidEq ? FTS5_PLAN_ROWID : FTS5_PLAN_SCAN); |
16940 | rc = sqlite3Fts5StorageStmt( |
16941 | pTab->pStorage, fts5StmtType(pCsr), &pCsr->pStmt, &pTab->p.base.zErrMsg |
16942 | ); |
16943 | if( rc==SQLITE_OK ){ |
16944 | if( pRowidEq!=0 ){ |
16945 | assert( pCsr->ePlan==FTS5_PLAN_ROWID ); |
16946 | sqlite3_bind_value(pCsr->pStmt, 1, pRowidEq); |
16947 | }else{ |
16948 | sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iFirstRowid); |
16949 | sqlite3_bind_int64(pCsr->pStmt, 2, pCsr->iLastRowid); |
16950 | } |
16951 | rc = fts5NextMethod(pCursor); |
16952 | } |
16953 | } |
16954 | |
16955 | filter_out: |
16956 | sqlite3Fts5ExprFree(pExpr); |
16957 | pConfig->pzErrmsg = pzErrmsg; |
16958 | return rc; |
16959 | } |
16960 | |
16961 | /* |
16962 | ** This is the xEof method of the virtual table. SQLite calls this |
16963 | ** routine to find out if it has reached the end of a result set. |
16964 | */ |
16965 | static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){ |
16966 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; |
16967 | return (CsrFlagTest(pCsr, FTS5CSR_EOF) ? 1 : 0); |
16968 | } |
16969 | |
16970 | /* |
16971 | ** Return the rowid that the cursor currently points to. |
16972 | */ |
16973 | static i64 fts5CursorRowid(Fts5Cursor *pCsr){ |
16974 | assert( pCsr->ePlan==FTS5_PLAN_MATCH |
16975 | || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH |
16976 | || pCsr->ePlan==FTS5_PLAN_SOURCE |
16977 | ); |
16978 | if( pCsr->pSorter ){ |
16979 | return pCsr->pSorter->iRowid; |
16980 | }else{ |
16981 | return sqlite3Fts5ExprRowid(pCsr->pExpr); |
16982 | } |
16983 | } |
16984 | |
16985 | /* |
16986 | ** This is the xRowid method. The SQLite core calls this routine to |
16987 | ** retrieve the rowid for the current row of the result set. fts5 |
16988 | ** exposes %_content.rowid as the rowid for the virtual table. The |
16989 | ** rowid should be written to *pRowid. |
16990 | */ |
16991 | static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ |
16992 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; |
16993 | int ePlan = pCsr->ePlan; |
16994 | |
16995 | assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 ); |
16996 | switch( ePlan ){ |
16997 | case FTS5_PLAN_SPECIAL: |
16998 | *pRowid = 0; |
16999 | break; |
17000 | |
17001 | case FTS5_PLAN_SOURCE: |
17002 | case FTS5_PLAN_MATCH: |
17003 | case FTS5_PLAN_SORTED_MATCH: |
17004 | *pRowid = fts5CursorRowid(pCsr); |
17005 | break; |
17006 | |
17007 | default: |
17008 | *pRowid = sqlite3_column_int64(pCsr->pStmt, 0); |
17009 | break; |
17010 | } |
17011 | |
17012 | return SQLITE_OK; |
17013 | } |
17014 | |
17015 | /* |
17016 | ** If the cursor requires seeking (bSeekRequired flag is set), seek it. |
17017 | ** Return SQLITE_OK if no error occurs, or an SQLite error code otherwise. |
17018 | ** |
17019 | ** If argument bErrormsg is true and an error occurs, an error message may |
17020 | ** be left in sqlite3_vtab.zErrMsg. |
17021 | */ |
17022 | static int fts5SeekCursor(Fts5Cursor *pCsr, int bErrormsg){ |
17023 | int rc = SQLITE_OK; |
17024 | |
17025 | /* If the cursor does not yet have a statement handle, obtain one now. */ |
17026 | if( pCsr->pStmt==0 ){ |
17027 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); |
17028 | int eStmt = fts5StmtType(pCsr); |
17029 | rc = sqlite3Fts5StorageStmt( |
17030 | pTab->pStorage, eStmt, &pCsr->pStmt, (bErrormsg?&pTab->p.base.zErrMsg:0) |
17031 | ); |
17032 | assert( rc!=SQLITE_OK || pTab->p.base.zErrMsg==0 ); |
17033 | assert( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ); |
17034 | } |
17035 | |
17036 | if( rc==SQLITE_OK && CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ){ |
17037 | Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); |
17038 | assert( pCsr->pExpr ); |
17039 | sqlite3_reset(pCsr->pStmt); |
17040 | sqlite3_bind_int64(pCsr->pStmt, 1, fts5CursorRowid(pCsr)); |
17041 | pTab->pConfig->bLock++; |
17042 | rc = sqlite3_step(pCsr->pStmt); |
17043 | pTab->pConfig->bLock--; |
17044 | if( rc==SQLITE_ROW ){ |
17045 | rc = SQLITE_OK; |
17046 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_CONTENT); |
17047 | }else{ |
17048 | rc = sqlite3_reset(pCsr->pStmt); |
17049 | if( rc==SQLITE_OK ){ |
17050 | rc = FTS5_CORRUPT; |
17051 | }else if( pTab->pConfig->pzErrmsg ){ |
17052 | *pTab->pConfig->pzErrmsg = sqlite3_mprintf( |
17053 | "%s" , sqlite3_errmsg(pTab->pConfig->db) |
17054 | ); |
17055 | } |
17056 | } |
17057 | } |
17058 | return rc; |
17059 | } |
17060 | |
17061 | static void fts5SetVtabError(Fts5FullTable *p, const char *zFormat, ...){ |
17062 | va_list ap; /* ... printf arguments */ |
17063 | va_start(ap, zFormat); |
17064 | assert( p->p.base.zErrMsg==0 ); |
17065 | p->p.base.zErrMsg = sqlite3_vmprintf(zFormat, ap); |
17066 | va_end(ap); |
17067 | } |
17068 | |
17069 | /* |
17070 | ** This function is called to handle an FTS INSERT command. In other words, |
17071 | ** an INSERT statement of the form: |
17072 | ** |
17073 | ** INSERT INTO fts(fts) VALUES($pCmd) |
17074 | ** INSERT INTO fts(fts, rank) VALUES($pCmd, $pVal) |
17075 | ** |
17076 | ** Argument pVal is the value assigned to column "fts" by the INSERT |
17077 | ** statement. This function returns SQLITE_OK if successful, or an SQLite |
17078 | ** error code if an error occurs. |
17079 | ** |
17080 | ** The commands implemented by this function are documented in the "Special |
17081 | ** INSERT Directives" section of the documentation. It should be updated if |
17082 | ** more commands are added to this function. |
17083 | */ |
17084 | static int fts5SpecialInsert( |
17085 | Fts5FullTable *pTab, /* Fts5 table object */ |
17086 | const char *zCmd, /* Text inserted into table-name column */ |
17087 | sqlite3_value *pVal /* Value inserted into rank column */ |
17088 | ){ |
17089 | Fts5Config *pConfig = pTab->p.pConfig; |
17090 | int rc = SQLITE_OK; |
17091 | int bError = 0; |
17092 | |
17093 | if( 0==sqlite3_stricmp("delete-all" , zCmd) ){ |
17094 | if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ |
17095 | fts5SetVtabError(pTab, |
17096 | "'delete-all' may only be used with a " |
17097 | "contentless or external content fts5 table" |
17098 | ); |
17099 | rc = SQLITE_ERROR; |
17100 | }else{ |
17101 | rc = sqlite3Fts5StorageDeleteAll(pTab->pStorage); |
17102 | } |
17103 | }else if( 0==sqlite3_stricmp("rebuild" , zCmd) ){ |
17104 | if( pConfig->eContent==FTS5_CONTENT_NONE ){ |
17105 | fts5SetVtabError(pTab, |
17106 | "'rebuild' may not be used with a contentless fts5 table" |
17107 | ); |
17108 | rc = SQLITE_ERROR; |
17109 | }else{ |
17110 | rc = sqlite3Fts5StorageRebuild(pTab->pStorage); |
17111 | } |
17112 | }else if( 0==sqlite3_stricmp("optimize" , zCmd) ){ |
17113 | rc = sqlite3Fts5StorageOptimize(pTab->pStorage); |
17114 | }else if( 0==sqlite3_stricmp("merge" , zCmd) ){ |
17115 | int nMerge = sqlite3_value_int(pVal); |
17116 | rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge); |
17117 | }else if( 0==sqlite3_stricmp("integrity-check" , zCmd) ){ |
17118 | int iArg = sqlite3_value_int(pVal); |
17119 | rc = sqlite3Fts5StorageIntegrity(pTab->pStorage, iArg); |
17120 | #ifdef SQLITE_DEBUG |
17121 | }else if( 0==sqlite3_stricmp("prefix-index" , zCmd) ){ |
17122 | pConfig->bPrefixIndex = sqlite3_value_int(pVal); |
17123 | #endif |
17124 | }else{ |
17125 | rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex); |
17126 | if( rc==SQLITE_OK ){ |
17127 | rc = sqlite3Fts5ConfigSetValue(pTab->p.pConfig, zCmd, pVal, &bError); |
17128 | } |
17129 | if( rc==SQLITE_OK ){ |
17130 | if( bError ){ |
17131 | rc = SQLITE_ERROR; |
17132 | }else{ |
17133 | rc = sqlite3Fts5StorageConfigValue(pTab->pStorage, zCmd, pVal, 0); |
17134 | } |
17135 | } |
17136 | } |
17137 | return rc; |
17138 | } |
17139 | |
17140 | static int fts5SpecialDelete( |
17141 | Fts5FullTable *pTab, |
17142 | sqlite3_value **apVal |
17143 | ){ |
17144 | int rc = SQLITE_OK; |
17145 | int eType1 = sqlite3_value_type(apVal[1]); |
17146 | if( eType1==SQLITE_INTEGER ){ |
17147 | sqlite3_int64 iDel = sqlite3_value_int64(apVal[1]); |
17148 | rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, &apVal[2]); |
17149 | } |
17150 | return rc; |
17151 | } |
17152 | |
17153 | static void fts5StorageInsert( |
17154 | int *pRc, |
17155 | Fts5FullTable *pTab, |
17156 | sqlite3_value **apVal, |
17157 | i64 *piRowid |
17158 | ){ |
17159 | int rc = *pRc; |
17160 | if( rc==SQLITE_OK ){ |
17161 | rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, apVal, piRowid); |
17162 | } |
17163 | if( rc==SQLITE_OK ){ |
17164 | rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal, *piRowid); |
17165 | } |
17166 | *pRc = rc; |
17167 | } |
17168 | |
17169 | /* |
17170 | ** This function is the implementation of the xUpdate callback used by |
17171 | ** FTS3 virtual tables. It is invoked by SQLite each time a row is to be |
17172 | ** inserted, updated or deleted. |
17173 | ** |
17174 | ** A delete specifies a single argument - the rowid of the row to remove. |
17175 | ** |
17176 | ** Update and insert operations pass: |
17177 | ** |
17178 | ** 1. The "old" rowid, or NULL. |
17179 | ** 2. The "new" rowid. |
17180 | ** 3. Values for each of the nCol matchable columns. |
17181 | ** 4. Values for the two hidden columns (<tablename> and "rank"). |
17182 | */ |
17183 | static int fts5UpdateMethod( |
17184 | sqlite3_vtab *pVtab, /* Virtual table handle */ |
17185 | int nArg, /* Size of argument array */ |
17186 | sqlite3_value **apVal, /* Array of arguments */ |
17187 | sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ |
17188 | ){ |
17189 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; |
17190 | Fts5Config *pConfig = pTab->p.pConfig; |
17191 | int eType0; /* value_type() of apVal[0] */ |
17192 | int rc = SQLITE_OK; /* Return code */ |
17193 | |
17194 | /* A transaction must be open when this is called. */ |
17195 | assert( pTab->ts.eState==1 || pTab->ts.eState==2 ); |
17196 | |
17197 | assert( pVtab->zErrMsg==0 ); |
17198 | assert( nArg==1 || nArg==(2+pConfig->nCol+2) ); |
17199 | assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER |
17200 | || sqlite3_value_type(apVal[0])==SQLITE_NULL |
17201 | ); |
17202 | assert( pTab->p.pConfig->pzErrmsg==0 ); |
17203 | pTab->p.pConfig->pzErrmsg = &pTab->p.base.zErrMsg; |
17204 | |
17205 | /* Put any active cursors into REQUIRE_SEEK state. */ |
17206 | fts5TripCursors(pTab); |
17207 | |
17208 | eType0 = sqlite3_value_type(apVal[0]); |
17209 | if( eType0==SQLITE_NULL |
17210 | && sqlite3_value_type(apVal[2+pConfig->nCol])!=SQLITE_NULL |
17211 | ){ |
17212 | /* A "special" INSERT op. These are handled separately. */ |
17213 | const char *z = (const char*)sqlite3_value_text(apVal[2+pConfig->nCol]); |
17214 | if( pConfig->eContent!=FTS5_CONTENT_NORMAL |
17215 | && 0==sqlite3_stricmp("delete" , z) |
17216 | ){ |
17217 | rc = fts5SpecialDelete(pTab, apVal); |
17218 | }else{ |
17219 | rc = fts5SpecialInsert(pTab, z, apVal[2 + pConfig->nCol + 1]); |
17220 | } |
17221 | }else{ |
17222 | /* A regular INSERT, UPDATE or DELETE statement. The trick here is that |
17223 | ** any conflict on the rowid value must be detected before any |
17224 | ** modifications are made to the database file. There are 4 cases: |
17225 | ** |
17226 | ** 1) DELETE |
17227 | ** 2) UPDATE (rowid not modified) |
17228 | ** 3) UPDATE (rowid modified) |
17229 | ** 4) INSERT |
17230 | ** |
17231 | ** Cases 3 and 4 may violate the rowid constraint. |
17232 | */ |
17233 | int eConflict = SQLITE_ABORT; |
17234 | if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ |
17235 | eConflict = sqlite3_vtab_on_conflict(pConfig->db); |
17236 | } |
17237 | |
17238 | assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL ); |
17239 | assert( nArg!=1 || eType0==SQLITE_INTEGER ); |
17240 | |
17241 | /* Filter out attempts to run UPDATE or DELETE on contentless tables. |
17242 | ** This is not suported. */ |
17243 | if( eType0==SQLITE_INTEGER && fts5IsContentless(pTab) ){ |
17244 | pTab->p.base.zErrMsg = sqlite3_mprintf( |
17245 | "cannot %s contentless fts5 table: %s" , |
17246 | (nArg>1 ? "UPDATE" : "DELETE from" ), pConfig->zName |
17247 | ); |
17248 | rc = SQLITE_ERROR; |
17249 | } |
17250 | |
17251 | /* DELETE */ |
17252 | else if( nArg==1 ){ |
17253 | i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */ |
17254 | rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0); |
17255 | } |
17256 | |
17257 | /* INSERT or UPDATE */ |
17258 | else{ |
17259 | int eType1 = sqlite3_value_numeric_type(apVal[1]); |
17260 | |
17261 | if( eType1!=SQLITE_INTEGER && eType1!=SQLITE_NULL ){ |
17262 | rc = SQLITE_MISMATCH; |
17263 | } |
17264 | |
17265 | else if( eType0!=SQLITE_INTEGER ){ |
17266 | /* If this is a REPLACE, first remove the current entry (if any) */ |
17267 | if( eConflict==SQLITE_REPLACE && eType1==SQLITE_INTEGER ){ |
17268 | i64 iNew = sqlite3_value_int64(apVal[1]); /* Rowid to delete */ |
17269 | rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0); |
17270 | } |
17271 | fts5StorageInsert(&rc, pTab, apVal, pRowid); |
17272 | } |
17273 | |
17274 | /* UPDATE */ |
17275 | else{ |
17276 | i64 iOld = sqlite3_value_int64(apVal[0]); /* Old rowid */ |
17277 | i64 iNew = sqlite3_value_int64(apVal[1]); /* New rowid */ |
17278 | if( eType1==SQLITE_INTEGER && iOld!=iNew ){ |
17279 | if( eConflict==SQLITE_REPLACE ){ |
17280 | rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0); |
17281 | if( rc==SQLITE_OK ){ |
17282 | rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0); |
17283 | } |
17284 | fts5StorageInsert(&rc, pTab, apVal, pRowid); |
17285 | }else{ |
17286 | rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, apVal, pRowid); |
17287 | if( rc==SQLITE_OK ){ |
17288 | rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0); |
17289 | } |
17290 | if( rc==SQLITE_OK ){ |
17291 | rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal,*pRowid); |
17292 | } |
17293 | } |
17294 | }else{ |
17295 | rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0); |
17296 | fts5StorageInsert(&rc, pTab, apVal, pRowid); |
17297 | } |
17298 | } |
17299 | } |
17300 | } |
17301 | |
17302 | pTab->p.pConfig->pzErrmsg = 0; |
17303 | return rc; |
17304 | } |
17305 | |
17306 | /* |
17307 | ** Implementation of xSync() method. |
17308 | */ |
17309 | static int fts5SyncMethod(sqlite3_vtab *pVtab){ |
17310 | int rc; |
17311 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; |
17312 | fts5CheckTransactionState(pTab, FTS5_SYNC, 0); |
17313 | pTab->p.pConfig->pzErrmsg = &pTab->p.base.zErrMsg; |
17314 | fts5TripCursors(pTab); |
17315 | rc = sqlite3Fts5StorageSync(pTab->pStorage); |
17316 | pTab->p.pConfig->pzErrmsg = 0; |
17317 | return rc; |
17318 | } |
17319 | |
17320 | /* |
17321 | ** Implementation of xBegin() method. |
17322 | */ |
17323 | static int fts5BeginMethod(sqlite3_vtab *pVtab){ |
17324 | fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_BEGIN, 0); |
17325 | fts5NewTransaction((Fts5FullTable*)pVtab); |
17326 | return SQLITE_OK; |
17327 | } |
17328 | |
17329 | /* |
17330 | ** Implementation of xCommit() method. This is a no-op. The contents of |
17331 | ** the pending-terms hash-table have already been flushed into the database |
17332 | ** by fts5SyncMethod(). |
17333 | */ |
17334 | static int fts5CommitMethod(sqlite3_vtab *pVtab){ |
17335 | UNUSED_PARAM(pVtab); /* Call below is a no-op for NDEBUG builds */ |
17336 | fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_COMMIT, 0); |
17337 | return SQLITE_OK; |
17338 | } |
17339 | |
17340 | /* |
17341 | ** Implementation of xRollback(). Discard the contents of the pending-terms |
17342 | ** hash-table. Any changes made to the database are reverted by SQLite. |
17343 | */ |
17344 | static int fts5RollbackMethod(sqlite3_vtab *pVtab){ |
17345 | int rc; |
17346 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; |
17347 | fts5CheckTransactionState(pTab, FTS5_ROLLBACK, 0); |
17348 | rc = sqlite3Fts5StorageRollback(pTab->pStorage); |
17349 | return rc; |
17350 | } |
17351 | |
17352 | static int fts5CsrPoslist(Fts5Cursor*, int, const u8**, int*); |
17353 | |
17354 | static void *fts5ApiUserData(Fts5Context *pCtx){ |
17355 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
17356 | return pCsr->pAux->pUserData; |
17357 | } |
17358 | |
17359 | static int fts5ApiColumnCount(Fts5Context *pCtx){ |
17360 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
17361 | return ((Fts5Table*)(pCsr->base.pVtab))->pConfig->nCol; |
17362 | } |
17363 | |
17364 | static int fts5ApiColumnTotalSize( |
17365 | Fts5Context *pCtx, |
17366 | int iCol, |
17367 | sqlite3_int64 *pnToken |
17368 | ){ |
17369 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
17370 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); |
17371 | return sqlite3Fts5StorageSize(pTab->pStorage, iCol, pnToken); |
17372 | } |
17373 | |
17374 | static int fts5ApiRowCount(Fts5Context *pCtx, i64 *pnRow){ |
17375 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
17376 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); |
17377 | return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow); |
17378 | } |
17379 | |
17380 | static int fts5ApiTokenize( |
17381 | Fts5Context *pCtx, |
17382 | const char *pText, int nText, |
17383 | void *pUserData, |
17384 | int (*xToken)(void*, int, const char*, int, int, int) |
17385 | ){ |
17386 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
17387 | Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); |
17388 | return sqlite3Fts5Tokenize( |
17389 | pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken |
17390 | ); |
17391 | } |
17392 | |
17393 | static int fts5ApiPhraseCount(Fts5Context *pCtx){ |
17394 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
17395 | return sqlite3Fts5ExprPhraseCount(pCsr->pExpr); |
17396 | } |
17397 | |
17398 | static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){ |
17399 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
17400 | return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase); |
17401 | } |
17402 | |
17403 | static int fts5ApiColumnText( |
17404 | Fts5Context *pCtx, |
17405 | int iCol, |
17406 | const char **pz, |
17407 | int *pn |
17408 | ){ |
17409 | int rc = SQLITE_OK; |
17410 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
17411 | if( fts5IsContentless((Fts5FullTable*)(pCsr->base.pVtab)) |
17412 | || pCsr->ePlan==FTS5_PLAN_SPECIAL |
17413 | ){ |
17414 | *pz = 0; |
17415 | *pn = 0; |
17416 | }else{ |
17417 | rc = fts5SeekCursor(pCsr, 0); |
17418 | if( rc==SQLITE_OK ){ |
17419 | *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol+1); |
17420 | *pn = sqlite3_column_bytes(pCsr->pStmt, iCol+1); |
17421 | } |
17422 | } |
17423 | return rc; |
17424 | } |
17425 | |
17426 | static int fts5CsrPoslist( |
17427 | Fts5Cursor *pCsr, |
17428 | int iPhrase, |
17429 | const u8 **pa, |
17430 | int *pn |
17431 | ){ |
17432 | Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; |
17433 | int rc = SQLITE_OK; |
17434 | int bLive = (pCsr->pSorter==0); |
17435 | |
17436 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_POSLIST) ){ |
17437 | |
17438 | if( pConfig->eDetail!=FTS5_DETAIL_FULL ){ |
17439 | Fts5PoslistPopulator *aPopulator; |
17440 | int i; |
17441 | aPopulator = sqlite3Fts5ExprClearPoslists(pCsr->pExpr, bLive); |
17442 | if( aPopulator==0 ) rc = SQLITE_NOMEM; |
17443 | for(i=0; i<pConfig->nCol && rc==SQLITE_OK; i++){ |
17444 | int n; const char *z; |
17445 | rc = fts5ApiColumnText((Fts5Context*)pCsr, i, &z, &n); |
17446 | if( rc==SQLITE_OK ){ |
17447 | rc = sqlite3Fts5ExprPopulatePoslists( |
17448 | pConfig, pCsr->pExpr, aPopulator, i, z, n |
17449 | ); |
17450 | } |
17451 | } |
17452 | sqlite3_free(aPopulator); |
17453 | |
17454 | if( pCsr->pSorter ){ |
17455 | sqlite3Fts5ExprCheckPoslists(pCsr->pExpr, pCsr->pSorter->iRowid); |
17456 | } |
17457 | } |
17458 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_POSLIST); |
17459 | } |
17460 | |
17461 | if( pCsr->pSorter && pConfig->eDetail==FTS5_DETAIL_FULL ){ |
17462 | Fts5Sorter *pSorter = pCsr->pSorter; |
17463 | int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]); |
17464 | *pn = pSorter->aIdx[iPhrase] - i1; |
17465 | *pa = &pSorter->aPoslist[i1]; |
17466 | }else{ |
17467 | *pn = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, pa); |
17468 | } |
17469 | |
17470 | return rc; |
17471 | } |
17472 | |
17473 | /* |
17474 | ** Ensure that the Fts5Cursor.nInstCount and aInst[] variables are populated |
17475 | ** correctly for the current view. Return SQLITE_OK if successful, or an |
17476 | ** SQLite error code otherwise. |
17477 | */ |
17478 | static int fts5CacheInstArray(Fts5Cursor *pCsr){ |
17479 | int rc = SQLITE_OK; |
17480 | Fts5PoslistReader *aIter; /* One iterator for each phrase */ |
17481 | int nIter; /* Number of iterators/phrases */ |
17482 | int nCol = ((Fts5Table*)pCsr->base.pVtab)->pConfig->nCol; |
17483 | |
17484 | nIter = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); |
17485 | if( pCsr->aInstIter==0 ){ |
17486 | sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * nIter; |
17487 | pCsr->aInstIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte); |
17488 | } |
17489 | aIter = pCsr->aInstIter; |
17490 | |
17491 | if( aIter ){ |
17492 | int nInst = 0; /* Number instances seen so far */ |
17493 | int i; |
17494 | |
17495 | /* Initialize all iterators */ |
17496 | for(i=0; i<nIter && rc==SQLITE_OK; i++){ |
17497 | const u8 *a; |
17498 | int n; |
17499 | rc = fts5CsrPoslist(pCsr, i, &a, &n); |
17500 | if( rc==SQLITE_OK ){ |
17501 | sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]); |
17502 | } |
17503 | } |
17504 | |
17505 | if( rc==SQLITE_OK ){ |
17506 | while( 1 ){ |
17507 | int *aInst; |
17508 | int iBest = -1; |
17509 | for(i=0; i<nIter; i++){ |
17510 | if( (aIter[i].bEof==0) |
17511 | && (iBest<0 || aIter[i].iPos<aIter[iBest].iPos) |
17512 | ){ |
17513 | iBest = i; |
17514 | } |
17515 | } |
17516 | if( iBest<0 ) break; |
17517 | |
17518 | nInst++; |
17519 | if( nInst>=pCsr->nInstAlloc ){ |
17520 | int nNewSize = pCsr->nInstAlloc ? pCsr->nInstAlloc*2 : 32; |
17521 | aInst = (int*)sqlite3_realloc64( |
17522 | pCsr->aInst, nNewSize*sizeof(int)*3 |
17523 | ); |
17524 | if( aInst ){ |
17525 | pCsr->aInst = aInst; |
17526 | pCsr->nInstAlloc = nNewSize; |
17527 | }else{ |
17528 | nInst--; |
17529 | rc = SQLITE_NOMEM; |
17530 | break; |
17531 | } |
17532 | } |
17533 | |
17534 | aInst = &pCsr->aInst[3 * (nInst-1)]; |
17535 | aInst[0] = iBest; |
17536 | aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos); |
17537 | aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos); |
17538 | if( aInst[1]<0 || aInst[1]>=nCol ){ |
17539 | rc = FTS5_CORRUPT; |
17540 | break; |
17541 | } |
17542 | sqlite3Fts5PoslistReaderNext(&aIter[iBest]); |
17543 | } |
17544 | } |
17545 | |
17546 | pCsr->nInstCount = nInst; |
17547 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_INST); |
17548 | } |
17549 | return rc; |
17550 | } |
17551 | |
17552 | static int fts5ApiInstCount(Fts5Context *pCtx, int *pnInst){ |
17553 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
17554 | int rc = SQLITE_OK; |
17555 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0 |
17556 | || SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){ |
17557 | *pnInst = pCsr->nInstCount; |
17558 | } |
17559 | return rc; |
17560 | } |
17561 | |
17562 | static int fts5ApiInst( |
17563 | Fts5Context *pCtx, |
17564 | int iIdx, |
17565 | int *piPhrase, |
17566 | int *piCol, |
17567 | int *piOff |
17568 | ){ |
17569 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
17570 | int rc = SQLITE_OK; |
17571 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0 |
17572 | || SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) |
17573 | ){ |
17574 | if( iIdx<0 || iIdx>=pCsr->nInstCount ){ |
17575 | rc = SQLITE_RANGE; |
17576 | #if 0 |
17577 | }else if( fts5IsOffsetless((Fts5Table*)pCsr->base.pVtab) ){ |
17578 | *piPhrase = pCsr->aInst[iIdx*3]; |
17579 | *piCol = pCsr->aInst[iIdx*3 + 2]; |
17580 | *piOff = -1; |
17581 | #endif |
17582 | }else{ |
17583 | *piPhrase = pCsr->aInst[iIdx*3]; |
17584 | *piCol = pCsr->aInst[iIdx*3 + 1]; |
17585 | *piOff = pCsr->aInst[iIdx*3 + 2]; |
17586 | } |
17587 | } |
17588 | return rc; |
17589 | } |
17590 | |
17591 | static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){ |
17592 | return fts5CursorRowid((Fts5Cursor*)pCtx); |
17593 | } |
17594 | |
17595 | static int fts5ColumnSizeCb( |
17596 | void *pContext, /* Pointer to int */ |
17597 | int tflags, |
17598 | const char *pUnused, /* Buffer containing token */ |
17599 | int nUnused, /* Size of token in bytes */ |
17600 | int iUnused1, /* Start offset of token */ |
17601 | int iUnused2 /* End offset of token */ |
17602 | ){ |
17603 | int *pCnt = (int*)pContext; |
17604 | UNUSED_PARAM2(pUnused, nUnused); |
17605 | UNUSED_PARAM2(iUnused1, iUnused2); |
17606 | if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){ |
17607 | (*pCnt)++; |
17608 | } |
17609 | return SQLITE_OK; |
17610 | } |
17611 | |
17612 | static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ |
17613 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
17614 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); |
17615 | Fts5Config *pConfig = pTab->p.pConfig; |
17616 | int rc = SQLITE_OK; |
17617 | |
17618 | if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_DOCSIZE) ){ |
17619 | if( pConfig->bColumnsize ){ |
17620 | i64 iRowid = fts5CursorRowid(pCsr); |
17621 | rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize); |
17622 | }else if( pConfig->zContent==0 ){ |
17623 | int i; |
17624 | for(i=0; i<pConfig->nCol; i++){ |
17625 | if( pConfig->abUnindexed[i]==0 ){ |
17626 | pCsr->aColumnSize[i] = -1; |
17627 | } |
17628 | } |
17629 | }else{ |
17630 | int i; |
17631 | for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ |
17632 | if( pConfig->abUnindexed[i]==0 ){ |
17633 | const char *z; int n; |
17634 | void *p = (void*)(&pCsr->aColumnSize[i]); |
17635 | pCsr->aColumnSize[i] = 0; |
17636 | rc = fts5ApiColumnText(pCtx, i, &z, &n); |
17637 | if( rc==SQLITE_OK ){ |
17638 | rc = sqlite3Fts5Tokenize( |
17639 | pConfig, FTS5_TOKENIZE_AUX, z, n, p, fts5ColumnSizeCb |
17640 | ); |
17641 | } |
17642 | } |
17643 | } |
17644 | } |
17645 | CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE); |
17646 | } |
17647 | if( iCol<0 ){ |
17648 | int i; |
17649 | *pnToken = 0; |
17650 | for(i=0; i<pConfig->nCol; i++){ |
17651 | *pnToken += pCsr->aColumnSize[i]; |
17652 | } |
17653 | }else if( iCol<pConfig->nCol ){ |
17654 | *pnToken = pCsr->aColumnSize[iCol]; |
17655 | }else{ |
17656 | *pnToken = 0; |
17657 | rc = SQLITE_RANGE; |
17658 | } |
17659 | return rc; |
17660 | } |
17661 | |
17662 | /* |
17663 | ** Implementation of the xSetAuxdata() method. |
17664 | */ |
17665 | static int fts5ApiSetAuxdata( |
17666 | Fts5Context *pCtx, /* Fts5 context */ |
17667 | void *pPtr, /* Pointer to save as auxdata */ |
17668 | void(*xDelete)(void*) /* Destructor for pPtr (or NULL) */ |
17669 | ){ |
17670 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
17671 | Fts5Auxdata *pData; |
17672 | |
17673 | /* Search through the cursors list of Fts5Auxdata objects for one that |
17674 | ** corresponds to the currently executing auxiliary function. */ |
17675 | for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){ |
17676 | if( pData->pAux==pCsr->pAux ) break; |
17677 | } |
17678 | |
17679 | if( pData ){ |
17680 | if( pData->xDelete ){ |
17681 | pData->xDelete(pData->pPtr); |
17682 | } |
17683 | }else{ |
17684 | int rc = SQLITE_OK; |
17685 | pData = (Fts5Auxdata*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Auxdata)); |
17686 | if( pData==0 ){ |
17687 | if( xDelete ) xDelete(pPtr); |
17688 | return rc; |
17689 | } |
17690 | pData->pAux = pCsr->pAux; |
17691 | pData->pNext = pCsr->pAuxdata; |
17692 | pCsr->pAuxdata = pData; |
17693 | } |
17694 | |
17695 | pData->xDelete = xDelete; |
17696 | pData->pPtr = pPtr; |
17697 | return SQLITE_OK; |
17698 | } |
17699 | |
17700 | static void *fts5ApiGetAuxdata(Fts5Context *pCtx, int bClear){ |
17701 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
17702 | Fts5Auxdata *pData; |
17703 | void *pRet = 0; |
17704 | |
17705 | for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){ |
17706 | if( pData->pAux==pCsr->pAux ) break; |
17707 | } |
17708 | |
17709 | if( pData ){ |
17710 | pRet = pData->pPtr; |
17711 | if( bClear ){ |
17712 | pData->pPtr = 0; |
17713 | pData->xDelete = 0; |
17714 | } |
17715 | } |
17716 | |
17717 | return pRet; |
17718 | } |
17719 | |
17720 | static void fts5ApiPhraseNext( |
17721 | Fts5Context *pUnused, |
17722 | Fts5PhraseIter *pIter, |
17723 | int *piCol, int *piOff |
17724 | ){ |
17725 | UNUSED_PARAM(pUnused); |
17726 | if( pIter->a>=pIter->b ){ |
17727 | *piCol = -1; |
17728 | *piOff = -1; |
17729 | }else{ |
17730 | int iVal; |
17731 | pIter->a += fts5GetVarint32(pIter->a, iVal); |
17732 | if( iVal==1 ){ |
17733 | pIter->a += fts5GetVarint32(pIter->a, iVal); |
17734 | *piCol = iVal; |
17735 | *piOff = 0; |
17736 | pIter->a += fts5GetVarint32(pIter->a, iVal); |
17737 | } |
17738 | *piOff += (iVal-2); |
17739 | } |
17740 | } |
17741 | |
17742 | static int fts5ApiPhraseFirst( |
17743 | Fts5Context *pCtx, |
17744 | int iPhrase, |
17745 | Fts5PhraseIter *pIter, |
17746 | int *piCol, int *piOff |
17747 | ){ |
17748 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
17749 | int n; |
17750 | int rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n); |
17751 | if( rc==SQLITE_OK ){ |
17752 | assert( pIter->a || n==0 ); |
17753 | pIter->b = (pIter->a ? &pIter->a[n] : 0); |
17754 | *piCol = 0; |
17755 | *piOff = 0; |
17756 | fts5ApiPhraseNext(pCtx, pIter, piCol, piOff); |
17757 | } |
17758 | return rc; |
17759 | } |
17760 | |
17761 | static void fts5ApiPhraseNextColumn( |
17762 | Fts5Context *pCtx, |
17763 | Fts5PhraseIter *pIter, |
17764 | int *piCol |
17765 | ){ |
17766 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
17767 | Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; |
17768 | |
17769 | if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ |
17770 | if( pIter->a>=pIter->b ){ |
17771 | *piCol = -1; |
17772 | }else{ |
17773 | int iIncr; |
17774 | pIter->a += fts5GetVarint32(&pIter->a[0], iIncr); |
17775 | *piCol += (iIncr-2); |
17776 | } |
17777 | }else{ |
17778 | while( 1 ){ |
17779 | int dummy; |
17780 | if( pIter->a>=pIter->b ){ |
17781 | *piCol = -1; |
17782 | return; |
17783 | } |
17784 | if( pIter->a[0]==0x01 ) break; |
17785 | pIter->a += fts5GetVarint32(pIter->a, dummy); |
17786 | } |
17787 | pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol); |
17788 | } |
17789 | } |
17790 | |
17791 | static int fts5ApiPhraseFirstColumn( |
17792 | Fts5Context *pCtx, |
17793 | int iPhrase, |
17794 | Fts5PhraseIter *pIter, |
17795 | int *piCol |
17796 | ){ |
17797 | int rc = SQLITE_OK; |
17798 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
17799 | Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; |
17800 | |
17801 | if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ |
17802 | Fts5Sorter *pSorter = pCsr->pSorter; |
17803 | int n; |
17804 | if( pSorter ){ |
17805 | int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]); |
17806 | n = pSorter->aIdx[iPhrase] - i1; |
17807 | pIter->a = &pSorter->aPoslist[i1]; |
17808 | }else{ |
17809 | rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, iPhrase, &pIter->a, &n); |
17810 | } |
17811 | if( rc==SQLITE_OK ){ |
17812 | assert( pIter->a || n==0 ); |
17813 | pIter->b = (pIter->a ? &pIter->a[n] : 0); |
17814 | *piCol = 0; |
17815 | fts5ApiPhraseNextColumn(pCtx, pIter, piCol); |
17816 | } |
17817 | }else{ |
17818 | int n; |
17819 | rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n); |
17820 | if( rc==SQLITE_OK ){ |
17821 | assert( pIter->a || n==0 ); |
17822 | pIter->b = (pIter->a ? &pIter->a[n] : 0); |
17823 | if( n<=0 ){ |
17824 | *piCol = -1; |
17825 | }else if( pIter->a[0]==0x01 ){ |
17826 | pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol); |
17827 | }else{ |
17828 | *piCol = 0; |
17829 | } |
17830 | } |
17831 | } |
17832 | |
17833 | return rc; |
17834 | } |
17835 | |
17836 | |
17837 | static int fts5ApiQueryPhrase(Fts5Context*, int, void*, |
17838 | int(*)(const Fts5ExtensionApi*, Fts5Context*, void*) |
17839 | ); |
17840 | |
17841 | static const Fts5ExtensionApi sFts5Api = { |
17842 | 2, /* iVersion */ |
17843 | fts5ApiUserData, |
17844 | fts5ApiColumnCount, |
17845 | fts5ApiRowCount, |
17846 | fts5ApiColumnTotalSize, |
17847 | fts5ApiTokenize, |
17848 | fts5ApiPhraseCount, |
17849 | fts5ApiPhraseSize, |
17850 | fts5ApiInstCount, |
17851 | fts5ApiInst, |
17852 | fts5ApiRowid, |
17853 | fts5ApiColumnText, |
17854 | fts5ApiColumnSize, |
17855 | fts5ApiQueryPhrase, |
17856 | fts5ApiSetAuxdata, |
17857 | fts5ApiGetAuxdata, |
17858 | fts5ApiPhraseFirst, |
17859 | fts5ApiPhraseNext, |
17860 | fts5ApiPhraseFirstColumn, |
17861 | fts5ApiPhraseNextColumn, |
17862 | }; |
17863 | |
17864 | /* |
17865 | ** Implementation of API function xQueryPhrase(). |
17866 | */ |
17867 | static int fts5ApiQueryPhrase( |
17868 | Fts5Context *pCtx, |
17869 | int iPhrase, |
17870 | void *pUserData, |
17871 | int(*xCallback)(const Fts5ExtensionApi*, Fts5Context*, void*) |
17872 | ){ |
17873 | Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; |
17874 | Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); |
17875 | int rc; |
17876 | Fts5Cursor *pNew = 0; |
17877 | |
17878 | rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew); |
17879 | if( rc==SQLITE_OK ){ |
17880 | pNew->ePlan = FTS5_PLAN_MATCH; |
17881 | pNew->iFirstRowid = SMALLEST_INT64; |
17882 | pNew->iLastRowid = LARGEST_INT64; |
17883 | pNew->base.pVtab = (sqlite3_vtab*)pTab; |
17884 | rc = sqlite3Fts5ExprClonePhrase(pCsr->pExpr, iPhrase, &pNew->pExpr); |
17885 | } |
17886 | |
17887 | if( rc==SQLITE_OK ){ |
17888 | for(rc = fts5CursorFirst(pTab, pNew, 0); |
17889 | rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0; |
17890 | rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew) |
17891 | ){ |
17892 | rc = xCallback(&sFts5Api, (Fts5Context*)pNew, pUserData); |
17893 | if( rc!=SQLITE_OK ){ |
17894 | if( rc==SQLITE_DONE ) rc = SQLITE_OK; |
17895 | break; |
17896 | } |
17897 | } |
17898 | } |
17899 | |
17900 | fts5CloseMethod((sqlite3_vtab_cursor*)pNew); |
17901 | return rc; |
17902 | } |
17903 | |
17904 | static void fts5ApiInvoke( |
17905 | Fts5Auxiliary *pAux, |
17906 | Fts5Cursor *pCsr, |
17907 | sqlite3_context *context, |
17908 | int argc, |
17909 | sqlite3_value **argv |
17910 | ){ |
17911 | assert( pCsr->pAux==0 ); |
17912 | pCsr->pAux = pAux; |
17913 | pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc, argv); |
17914 | pCsr->pAux = 0; |
17915 | } |
17916 | |
17917 | static Fts5Cursor *fts5CursorFromCsrid(Fts5Global *pGlobal, i64 iCsrId){ |
17918 | Fts5Cursor *pCsr; |
17919 | for(pCsr=pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ |
17920 | if( pCsr->iCsrId==iCsrId ) break; |
17921 | } |
17922 | return pCsr; |
17923 | } |
17924 | |
17925 | static void fts5ApiCallback( |
17926 | sqlite3_context *context, |
17927 | int argc, |
17928 | sqlite3_value **argv |
17929 | ){ |
17930 | |
17931 | Fts5Auxiliary *pAux; |
17932 | Fts5Cursor *pCsr; |
17933 | i64 iCsrId; |
17934 | |
17935 | assert( argc>=1 ); |
17936 | pAux = (Fts5Auxiliary*)sqlite3_user_data(context); |
17937 | iCsrId = sqlite3_value_int64(argv[0]); |
17938 | |
17939 | pCsr = fts5CursorFromCsrid(pAux->pGlobal, iCsrId); |
17940 | if( pCsr==0 || pCsr->ePlan==0 ){ |
17941 | char *zErr = sqlite3_mprintf("no such cursor: %lld" , iCsrId); |
17942 | sqlite3_result_error(context, zErr, -1); |
17943 | sqlite3_free(zErr); |
17944 | }else{ |
17945 | fts5ApiInvoke(pAux, pCsr, context, argc-1, &argv[1]); |
17946 | } |
17947 | } |
17948 | |
17949 | |
17950 | /* |
17951 | ** Given cursor id iId, return a pointer to the corresponding Fts5Table |
17952 | ** object. Or NULL If the cursor id does not exist. |
17953 | */ |
17954 | static Fts5Table *sqlite3Fts5TableFromCsrid( |
17955 | Fts5Global *pGlobal, /* FTS5 global context for db handle */ |
17956 | i64 iCsrId /* Id of cursor to find */ |
17957 | ){ |
17958 | Fts5Cursor *pCsr; |
17959 | pCsr = fts5CursorFromCsrid(pGlobal, iCsrId); |
17960 | if( pCsr ){ |
17961 | return (Fts5Table*)pCsr->base.pVtab; |
17962 | } |
17963 | return 0; |
17964 | } |
17965 | |
17966 | /* |
17967 | ** Return a "position-list blob" corresponding to the current position of |
17968 | ** cursor pCsr via sqlite3_result_blob(). A position-list blob contains |
17969 | ** the current position-list for each phrase in the query associated with |
17970 | ** cursor pCsr. |
17971 | ** |
17972 | ** A position-list blob begins with (nPhrase-1) varints, where nPhrase is |
17973 | ** the number of phrases in the query. Following the varints are the |
17974 | ** concatenated position lists for each phrase, in order. |
17975 | ** |
17976 | ** The first varint (if it exists) contains the size of the position list |
17977 | ** for phrase 0. The second (same disclaimer) contains the size of position |
17978 | ** list 1. And so on. There is no size field for the final position list, |
17979 | ** as it can be derived from the total size of the blob. |
17980 | */ |
17981 | static int fts5PoslistBlob(sqlite3_context *pCtx, Fts5Cursor *pCsr){ |
17982 | int i; |
17983 | int rc = SQLITE_OK; |
17984 | int nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); |
17985 | Fts5Buffer val; |
17986 | |
17987 | memset(&val, 0, sizeof(Fts5Buffer)); |
17988 | switch( ((Fts5Table*)(pCsr->base.pVtab))->pConfig->eDetail ){ |
17989 | case FTS5_DETAIL_FULL: |
17990 | |
17991 | /* Append the varints */ |
17992 | for(i=0; i<(nPhrase-1); i++){ |
17993 | const u8 *dummy; |
17994 | int nByte = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &dummy); |
17995 | sqlite3Fts5BufferAppendVarint(&rc, &val, nByte); |
17996 | } |
17997 | |
17998 | /* Append the position lists */ |
17999 | for(i=0; i<nPhrase; i++){ |
18000 | const u8 *pPoslist; |
18001 | int nPoslist; |
18002 | nPoslist = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &pPoslist); |
18003 | sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist); |
18004 | } |
18005 | break; |
18006 | |
18007 | case FTS5_DETAIL_COLUMNS: |
18008 | |
18009 | /* Append the varints */ |
18010 | for(i=0; rc==SQLITE_OK && i<(nPhrase-1); i++){ |
18011 | const u8 *dummy; |
18012 | int nByte; |
18013 | rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &dummy, &nByte); |
18014 | sqlite3Fts5BufferAppendVarint(&rc, &val, nByte); |
18015 | } |
18016 | |
18017 | /* Append the position lists */ |
18018 | for(i=0; rc==SQLITE_OK && i<nPhrase; i++){ |
18019 | const u8 *pPoslist; |
18020 | int nPoslist; |
18021 | rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &pPoslist, &nPoslist); |
18022 | sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist); |
18023 | } |
18024 | break; |
18025 | |
18026 | default: |
18027 | break; |
18028 | } |
18029 | |
18030 | sqlite3_result_blob(pCtx, val.p, val.n, sqlite3_free); |
18031 | return rc; |
18032 | } |
18033 | |
18034 | /* |
18035 | ** This is the xColumn method, called by SQLite to request a value from |
18036 | ** the row that the supplied cursor currently points to. |
18037 | */ |
18038 | static int fts5ColumnMethod( |
18039 | sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ |
18040 | sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ |
18041 | int iCol /* Index of column to read value from */ |
18042 | ){ |
18043 | Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab); |
18044 | Fts5Config *pConfig = pTab->p.pConfig; |
18045 | Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; |
18046 | int rc = SQLITE_OK; |
18047 | |
18048 | assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 ); |
18049 | |
18050 | if( pCsr->ePlan==FTS5_PLAN_SPECIAL ){ |
18051 | if( iCol==pConfig->nCol ){ |
18052 | sqlite3_result_int64(pCtx, pCsr->iSpecial); |
18053 | } |
18054 | }else |
18055 | |
18056 | if( iCol==pConfig->nCol ){ |
18057 | /* User is requesting the value of the special column with the same name |
18058 | ** as the table. Return the cursor integer id number. This value is only |
18059 | ** useful in that it may be passed as the first argument to an FTS5 |
18060 | ** auxiliary function. */ |
18061 | sqlite3_result_int64(pCtx, pCsr->iCsrId); |
18062 | }else if( iCol==pConfig->nCol+1 ){ |
18063 | |
18064 | /* The value of the "rank" column. */ |
18065 | if( pCsr->ePlan==FTS5_PLAN_SOURCE ){ |
18066 | fts5PoslistBlob(pCtx, pCsr); |
18067 | }else if( |
18068 | pCsr->ePlan==FTS5_PLAN_MATCH |
18069 | || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH |
18070 | ){ |
18071 | if( pCsr->pRank || SQLITE_OK==(rc = fts5FindRankFunction(pCsr)) ){ |
18072 | fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, pCsr->nRankArg, pCsr->apRankArg); |
18073 | } |
18074 | } |
18075 | }else if( !fts5IsContentless(pTab) ){ |
18076 | pConfig->pzErrmsg = &pTab->p.base.zErrMsg; |
18077 | rc = fts5SeekCursor(pCsr, 1); |
18078 | if( rc==SQLITE_OK ){ |
18079 | sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); |
18080 | } |
18081 | pConfig->pzErrmsg = 0; |
18082 | } |
18083 | return rc; |
18084 | } |
18085 | |
18086 | |
18087 | /* |
18088 | ** This routine implements the xFindFunction method for the FTS3 |
18089 | ** virtual table. |
18090 | */ |
18091 | static int fts5FindFunctionMethod( |
18092 | sqlite3_vtab *pVtab, /* Virtual table handle */ |
18093 | int nUnused, /* Number of SQL function arguments */ |
18094 | const char *zName, /* Name of SQL function */ |
18095 | void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */ |
18096 | void **ppArg /* OUT: User data for *pxFunc */ |
18097 | ){ |
18098 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; |
18099 | Fts5Auxiliary *pAux; |
18100 | |
18101 | UNUSED_PARAM(nUnused); |
18102 | pAux = fts5FindAuxiliary(pTab, zName); |
18103 | if( pAux ){ |
18104 | *pxFunc = fts5ApiCallback; |
18105 | *ppArg = (void*)pAux; |
18106 | return 1; |
18107 | } |
18108 | |
18109 | /* No function of the specified name was found. Return 0. */ |
18110 | return 0; |
18111 | } |
18112 | |
18113 | /* |
18114 | ** Implementation of FTS5 xRename method. Rename an fts5 table. |
18115 | */ |
18116 | static int fts5RenameMethod( |
18117 | sqlite3_vtab *pVtab, /* Virtual table handle */ |
18118 | const char *zName /* New name of table */ |
18119 | ){ |
18120 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; |
18121 | return sqlite3Fts5StorageRename(pTab->pStorage, zName); |
18122 | } |
18123 | |
18124 | static int sqlite3Fts5FlushToDisk(Fts5Table *pTab){ |
18125 | fts5TripCursors((Fts5FullTable*)pTab); |
18126 | return sqlite3Fts5StorageSync(((Fts5FullTable*)pTab)->pStorage); |
18127 | } |
18128 | |
18129 | /* |
18130 | ** The xSavepoint() method. |
18131 | ** |
18132 | ** Flush the contents of the pending-terms table to disk. |
18133 | */ |
18134 | static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ |
18135 | UNUSED_PARAM(iSavepoint); /* Call below is a no-op for NDEBUG builds */ |
18136 | fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_SAVEPOINT, iSavepoint); |
18137 | return sqlite3Fts5FlushToDisk((Fts5Table*)pVtab); |
18138 | } |
18139 | |
18140 | /* |
18141 | ** The xRelease() method. |
18142 | ** |
18143 | ** This is a no-op. |
18144 | */ |
18145 | static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ |
18146 | UNUSED_PARAM(iSavepoint); /* Call below is a no-op for NDEBUG builds */ |
18147 | fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_RELEASE, iSavepoint); |
18148 | return sqlite3Fts5FlushToDisk((Fts5Table*)pVtab); |
18149 | } |
18150 | |
18151 | /* |
18152 | ** The xRollbackTo() method. |
18153 | ** |
18154 | ** Discard the contents of the pending terms table. |
18155 | */ |
18156 | static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ |
18157 | Fts5FullTable *pTab = (Fts5FullTable*)pVtab; |
18158 | UNUSED_PARAM(iSavepoint); /* Call below is a no-op for NDEBUG builds */ |
18159 | fts5CheckTransactionState(pTab, FTS5_ROLLBACKTO, iSavepoint); |
18160 | fts5TripCursors(pTab); |
18161 | return sqlite3Fts5StorageRollback(pTab->pStorage); |
18162 | } |
18163 | |
18164 | /* |
18165 | ** Register a new auxiliary function with global context pGlobal. |
18166 | */ |
18167 | static int fts5CreateAux( |
18168 | fts5_api *pApi, /* Global context (one per db handle) */ |
18169 | const char *zName, /* Name of new function */ |
18170 | void *pUserData, /* User data for aux. function */ |
18171 | fts5_extension_function xFunc, /* Aux. function implementation */ |
18172 | void(*xDestroy)(void*) /* Destructor for pUserData */ |
18173 | ){ |
18174 | Fts5Global *pGlobal = (Fts5Global*)pApi; |
18175 | int rc = sqlite3_overload_function(pGlobal->db, zName, -1); |
18176 | if( rc==SQLITE_OK ){ |
18177 | Fts5Auxiliary *pAux; |
18178 | sqlite3_int64 nName; /* Size of zName in bytes, including \0 */ |
18179 | sqlite3_int64 nByte; /* Bytes of space to allocate */ |
18180 | |
18181 | nName = strlen(zName) + 1; |
18182 | nByte = sizeof(Fts5Auxiliary) + nName; |
18183 | pAux = (Fts5Auxiliary*)sqlite3_malloc64(nByte); |
18184 | if( pAux ){ |
18185 | memset(pAux, 0, (size_t)nByte); |
18186 | pAux->zFunc = (char*)&pAux[1]; |
18187 | memcpy(pAux->zFunc, zName, nName); |
18188 | pAux->pGlobal = pGlobal; |
18189 | pAux->pUserData = pUserData; |
18190 | pAux->xFunc = xFunc; |
18191 | pAux->xDestroy = xDestroy; |
18192 | pAux->pNext = pGlobal->pAux; |
18193 | pGlobal->pAux = pAux; |
18194 | }else{ |
18195 | rc = SQLITE_NOMEM; |
18196 | } |
18197 | } |
18198 | |
18199 | return rc; |
18200 | } |
18201 | |
18202 | /* |
18203 | ** Register a new tokenizer. This is the implementation of the |
18204 | ** fts5_api.xCreateTokenizer() method. |
18205 | */ |
18206 | static int fts5CreateTokenizer( |
18207 | fts5_api *pApi, /* Global context (one per db handle) */ |
18208 | const char *zName, /* Name of new function */ |
18209 | void *pUserData, /* User data for aux. function */ |
18210 | fts5_tokenizer *pTokenizer, /* Tokenizer implementation */ |
18211 | void(*xDestroy)(void*) /* Destructor for pUserData */ |
18212 | ){ |
18213 | Fts5Global *pGlobal = (Fts5Global*)pApi; |
18214 | Fts5TokenizerModule *pNew; |
18215 | sqlite3_int64 nName; /* Size of zName and its \0 terminator */ |
18216 | sqlite3_int64 nByte; /* Bytes of space to allocate */ |
18217 | int rc = SQLITE_OK; |
18218 | |
18219 | nName = strlen(zName) + 1; |
18220 | nByte = sizeof(Fts5TokenizerModule) + nName; |
18221 | pNew = (Fts5TokenizerModule*)sqlite3_malloc64(nByte); |
18222 | if( pNew ){ |
18223 | memset(pNew, 0, (size_t)nByte); |
18224 | pNew->zName = (char*)&pNew[1]; |
18225 | memcpy(pNew->zName, zName, nName); |
18226 | pNew->pUserData = pUserData; |
18227 | pNew->x = *pTokenizer; |
18228 | pNew->xDestroy = xDestroy; |
18229 | pNew->pNext = pGlobal->pTok; |
18230 | pGlobal->pTok = pNew; |
18231 | if( pNew->pNext==0 ){ |
18232 | pGlobal->pDfltTok = pNew; |
18233 | } |
18234 | }else{ |
18235 | rc = SQLITE_NOMEM; |
18236 | } |
18237 | |
18238 | return rc; |
18239 | } |
18240 | |
18241 | static Fts5TokenizerModule *fts5LocateTokenizer( |
18242 | Fts5Global *pGlobal, |
18243 | const char *zName |
18244 | ){ |
18245 | Fts5TokenizerModule *pMod = 0; |
18246 | |
18247 | if( zName==0 ){ |
18248 | pMod = pGlobal->pDfltTok; |
18249 | }else{ |
18250 | for(pMod=pGlobal->pTok; pMod; pMod=pMod->pNext){ |
18251 | if( sqlite3_stricmp(zName, pMod->zName)==0 ) break; |
18252 | } |
18253 | } |
18254 | |
18255 | return pMod; |
18256 | } |
18257 | |
18258 | /* |
18259 | ** Find a tokenizer. This is the implementation of the |
18260 | ** fts5_api.xFindTokenizer() method. |
18261 | */ |
18262 | static int fts5FindTokenizer( |
18263 | fts5_api *pApi, /* Global context (one per db handle) */ |
18264 | const char *zName, /* Name of new function */ |
18265 | void **ppUserData, |
18266 | fts5_tokenizer *pTokenizer /* Populate this object */ |
18267 | ){ |
18268 | int rc = SQLITE_OK; |
18269 | Fts5TokenizerModule *pMod; |
18270 | |
18271 | pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName); |
18272 | if( pMod ){ |
18273 | *pTokenizer = pMod->x; |
18274 | *ppUserData = pMod->pUserData; |
18275 | }else{ |
18276 | memset(pTokenizer, 0, sizeof(fts5_tokenizer)); |
18277 | rc = SQLITE_ERROR; |
18278 | } |
18279 | |
18280 | return rc; |
18281 | } |
18282 | |
18283 | static int sqlite3Fts5GetTokenizer( |
18284 | Fts5Global *pGlobal, |
18285 | const char **azArg, |
18286 | int nArg, |
18287 | Fts5Config *pConfig, |
18288 | char **pzErr |
18289 | ){ |
18290 | Fts5TokenizerModule *pMod; |
18291 | int rc = SQLITE_OK; |
18292 | |
18293 | pMod = fts5LocateTokenizer(pGlobal, nArg==0 ? 0 : azArg[0]); |
18294 | if( pMod==0 ){ |
18295 | assert( nArg>0 ); |
18296 | rc = SQLITE_ERROR; |
18297 | *pzErr = sqlite3_mprintf("no such tokenizer: %s" , azArg[0]); |
18298 | }else{ |
18299 | rc = pMod->x.xCreate( |
18300 | pMod->pUserData, (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->pTok |
18301 | ); |
18302 | pConfig->pTokApi = &pMod->x; |
18303 | if( rc!=SQLITE_OK ){ |
18304 | if( pzErr ) *pzErr = sqlite3_mprintf("error in tokenizer constructor" ); |
18305 | }else{ |
18306 | pConfig->ePattern = sqlite3Fts5TokenizerPattern( |
18307 | pMod->x.xCreate, pConfig->pTok |
18308 | ); |
18309 | } |
18310 | } |
18311 | |
18312 | if( rc!=SQLITE_OK ){ |
18313 | pConfig->pTokApi = 0; |
18314 | pConfig->pTok = 0; |
18315 | } |
18316 | |
18317 | return rc; |
18318 | } |
18319 | |
18320 | static void fts5ModuleDestroy(void *pCtx){ |
18321 | Fts5TokenizerModule *pTok, *pNextTok; |
18322 | Fts5Auxiliary *pAux, *pNextAux; |
18323 | Fts5Global *pGlobal = (Fts5Global*)pCtx; |
18324 | |
18325 | for(pAux=pGlobal->pAux; pAux; pAux=pNextAux){ |
18326 | pNextAux = pAux->pNext; |
18327 | if( pAux->xDestroy ) pAux->xDestroy(pAux->pUserData); |
18328 | sqlite3_free(pAux); |
18329 | } |
18330 | |
18331 | for(pTok=pGlobal->pTok; pTok; pTok=pNextTok){ |
18332 | pNextTok = pTok->pNext; |
18333 | if( pTok->xDestroy ) pTok->xDestroy(pTok->pUserData); |
18334 | sqlite3_free(pTok); |
18335 | } |
18336 | |
18337 | sqlite3_free(pGlobal); |
18338 | } |
18339 | |
18340 | static void fts5Fts5Func( |
18341 | sqlite3_context *pCtx, /* Function call context */ |
18342 | int nArg, /* Number of args */ |
18343 | sqlite3_value **apArg /* Function arguments */ |
18344 | ){ |
18345 | Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx); |
18346 | fts5_api **ppApi; |
18347 | UNUSED_PARAM(nArg); |
18348 | assert( nArg==1 ); |
18349 | ppApi = (fts5_api**)sqlite3_value_pointer(apArg[0], "fts5_api_ptr" ); |
18350 | if( ppApi ) *ppApi = &pGlobal->api; |
18351 | } |
18352 | |
18353 | /* |
18354 | ** Implementation of fts5_source_id() function. |
18355 | */ |
18356 | static void fts5SourceIdFunc( |
18357 | sqlite3_context *pCtx, /* Function call context */ |
18358 | int nArg, /* Number of args */ |
18359 | sqlite3_value **apUnused /* Function arguments */ |
18360 | ){ |
18361 | assert( nArg==0 ); |
18362 | UNUSED_PARAM2(nArg, apUnused); |
18363 | sqlite3_result_text(pCtx, "fts5: 2022-12-28 14:03:47 df5c253c0b3dd24916e4ec7cf77d3db5294cc9fd45ae7b9c5e82ad8197f38a24" , -1, SQLITE_TRANSIENT); |
18364 | } |
18365 | |
18366 | /* |
18367 | ** Return true if zName is the extension on one of the shadow tables used |
18368 | ** by this module. |
18369 | */ |
18370 | static int fts5ShadowName(const char *zName){ |
18371 | static const char *azName[] = { |
18372 | "config" , "content" , "data" , "docsize" , "idx" |
18373 | }; |
18374 | unsigned int i; |
18375 | for(i=0; i<sizeof(azName)/sizeof(azName[0]); i++){ |
18376 | if( sqlite3_stricmp(zName, azName[i])==0 ) return 1; |
18377 | } |
18378 | return 0; |
18379 | } |
18380 | |
18381 | static int fts5Init(sqlite3 *db){ |
18382 | static const sqlite3_module fts5Mod = { |
18383 | /* iVersion */ 3, |
18384 | /* xCreate */ fts5CreateMethod, |
18385 | /* xConnect */ fts5ConnectMethod, |
18386 | /* xBestIndex */ fts5BestIndexMethod, |
18387 | /* xDisconnect */ fts5DisconnectMethod, |
18388 | /* xDestroy */ fts5DestroyMethod, |
18389 | /* xOpen */ fts5OpenMethod, |
18390 | /* xClose */ fts5CloseMethod, |
18391 | /* xFilter */ fts5FilterMethod, |
18392 | /* xNext */ fts5NextMethod, |
18393 | /* xEof */ fts5EofMethod, |
18394 | /* xColumn */ fts5ColumnMethod, |
18395 | /* xRowid */ fts5RowidMethod, |
18396 | /* xUpdate */ fts5UpdateMethod, |
18397 | /* xBegin */ fts5BeginMethod, |
18398 | /* xSync */ fts5SyncMethod, |
18399 | /* xCommit */ fts5CommitMethod, |
18400 | /* xRollback */ fts5RollbackMethod, |
18401 | /* xFindFunction */ fts5FindFunctionMethod, |
18402 | /* xRename */ fts5RenameMethod, |
18403 | /* xSavepoint */ fts5SavepointMethod, |
18404 | /* xRelease */ fts5ReleaseMethod, |
18405 | /* xRollbackTo */ fts5RollbackToMethod, |
18406 | /* xShadowName */ fts5ShadowName |
18407 | }; |
18408 | |
18409 | int rc; |
18410 | Fts5Global *pGlobal = 0; |
18411 | |
18412 | pGlobal = (Fts5Global*)sqlite3_malloc(sizeof(Fts5Global)); |
18413 | if( pGlobal==0 ){ |
18414 | rc = SQLITE_NOMEM; |
18415 | }else{ |
18416 | void *p = (void*)pGlobal; |
18417 | memset(pGlobal, 0, sizeof(Fts5Global)); |
18418 | pGlobal->db = db; |
18419 | pGlobal->api.iVersion = 2; |
18420 | pGlobal->api.xCreateFunction = fts5CreateAux; |
18421 | pGlobal->api.xCreateTokenizer = fts5CreateTokenizer; |
18422 | pGlobal->api.xFindTokenizer = fts5FindTokenizer; |
18423 | rc = sqlite3_create_module_v2(db, "fts5" , &fts5Mod, p, fts5ModuleDestroy); |
18424 | if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db); |
18425 | if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db); |
18426 | if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(&pGlobal->api); |
18427 | if( rc==SQLITE_OK ) rc = sqlite3Fts5TokenizerInit(&pGlobal->api); |
18428 | if( rc==SQLITE_OK ) rc = sqlite3Fts5VocabInit(pGlobal, db); |
18429 | if( rc==SQLITE_OK ){ |
18430 | rc = sqlite3_create_function( |
18431 | db, "fts5" , 1, SQLITE_UTF8, p, fts5Fts5Func, 0, 0 |
18432 | ); |
18433 | } |
18434 | if( rc==SQLITE_OK ){ |
18435 | rc = sqlite3_create_function( |
18436 | db, "fts5_source_id" , 0, SQLITE_UTF8, p, fts5SourceIdFunc, 0, 0 |
18437 | ); |
18438 | } |
18439 | } |
18440 | |
18441 | /* If SQLITE_FTS5_ENABLE_TEST_MI is defined, assume that the file |
18442 | ** fts5_test_mi.c is compiled and linked into the executable. And call |
18443 | ** its entry point to enable the matchinfo() demo. */ |
18444 | #ifdef SQLITE_FTS5_ENABLE_TEST_MI |
18445 | if( rc==SQLITE_OK ){ |
18446 | extern int sqlite3Fts5TestRegisterMatchinfo(sqlite3*); |
18447 | rc = sqlite3Fts5TestRegisterMatchinfo(db); |
18448 | } |
18449 | #endif |
18450 | |
18451 | return rc; |
18452 | } |
18453 | |
18454 | /* |
18455 | ** The following functions are used to register the module with SQLite. If |
18456 | ** this module is being built as part of the SQLite core (SQLITE_CORE is |
18457 | ** defined), then sqlite3_open() will call sqlite3Fts5Init() directly. |
18458 | ** |
18459 | ** Or, if this module is being built as a loadable extension, |
18460 | ** sqlite3Fts5Init() is omitted and the two standard entry points |
18461 | ** sqlite3_fts_init() and sqlite3_fts5_init() defined instead. |
18462 | */ |
18463 | #ifndef SQLITE_CORE |
18464 | #ifdef _WIN32 |
18465 | __declspec(dllexport) |
18466 | #endif |
18467 | int sqlite3_fts_init( |
18468 | sqlite3 *db, |
18469 | char **pzErrMsg, |
18470 | const sqlite3_api_routines *pApi |
18471 | ){ |
18472 | SQLITE_EXTENSION_INIT2(pApi); |
18473 | (void)pzErrMsg; /* Unused parameter */ |
18474 | return fts5Init(db); |
18475 | } |
18476 | |
18477 | #ifdef _WIN32 |
18478 | __declspec(dllexport) |
18479 | #endif |
18480 | int sqlite3_fts5_init( |
18481 | sqlite3 *db, |
18482 | char **pzErrMsg, |
18483 | const sqlite3_api_routines *pApi |
18484 | ){ |
18485 | SQLITE_EXTENSION_INIT2(pApi); |
18486 | (void)pzErrMsg; /* Unused parameter */ |
18487 | return fts5Init(db); |
18488 | } |
18489 | #else |
18490 | int sqlite3Fts5Init(sqlite3 *db){ |
18491 | return fts5Init(db); |
18492 | } |
18493 | #endif |
18494 | |
18495 | #line 1 "fts5_storage.c" |
18496 | /* |
18497 | ** 2014 May 31 |
18498 | ** |
18499 | ** The author disclaims copyright to this source code. In place of |
18500 | ** a legal notice, here is a blessing: |
18501 | ** |
18502 | ** May you do good and not evil. |
18503 | ** May you find forgiveness for yourself and forgive others. |
18504 | ** May you share freely, never taking more than you give. |
18505 | ** |
18506 | ****************************************************************************** |
18507 | ** |
18508 | */ |
18509 | |
18510 | |
18511 | |
18512 | /* #include "fts5Int.h" */ |
18513 | |
18514 | struct Fts5Storage { |
18515 | Fts5Config *pConfig; |
18516 | Fts5Index *pIndex; |
18517 | int bTotalsValid; /* True if nTotalRow/aTotalSize[] are valid */ |
18518 | i64 nTotalRow; /* Total number of rows in FTS table */ |
18519 | i64 *aTotalSize; /* Total sizes of each column */ |
18520 | sqlite3_stmt *aStmt[11]; |
18521 | }; |
18522 | |
18523 | |
18524 | #if FTS5_STMT_SCAN_ASC!=0 |
18525 | # error "FTS5_STMT_SCAN_ASC mismatch" |
18526 | #endif |
18527 | #if FTS5_STMT_SCAN_DESC!=1 |
18528 | # error "FTS5_STMT_SCAN_DESC mismatch" |
18529 | #endif |
18530 | #if FTS5_STMT_LOOKUP!=2 |
18531 | # error "FTS5_STMT_LOOKUP mismatch" |
18532 | #endif |
18533 | |
18534 | #define FTS5_STMT_INSERT_CONTENT 3 |
18535 | #define FTS5_STMT_REPLACE_CONTENT 4 |
18536 | #define FTS5_STMT_DELETE_CONTENT 5 |
18537 | #define FTS5_STMT_REPLACE_DOCSIZE 6 |
18538 | #define FTS5_STMT_DELETE_DOCSIZE 7 |
18539 | #define FTS5_STMT_LOOKUP_DOCSIZE 8 |
18540 | #define FTS5_STMT_REPLACE_CONFIG 9 |
18541 | #define FTS5_STMT_SCAN 10 |
18542 | |
18543 | /* |
18544 | ** Prepare the two insert statements - Fts5Storage.pInsertContent and |
18545 | ** Fts5Storage.pInsertDocsize - if they have not already been prepared. |
18546 | ** Return SQLITE_OK if successful, or an SQLite error code if an error |
18547 | ** occurs. |
18548 | */ |
18549 | static int fts5StorageGetStmt( |
18550 | Fts5Storage *p, /* Storage handle */ |
18551 | int eStmt, /* FTS5_STMT_XXX constant */ |
18552 | sqlite3_stmt **ppStmt, /* OUT: Prepared statement handle */ |
18553 | char **pzErrMsg /* OUT: Error message (if any) */ |
18554 | ){ |
18555 | int rc = SQLITE_OK; |
18556 | |
18557 | /* If there is no %_docsize table, there should be no requests for |
18558 | ** statements to operate on it. */ |
18559 | assert( p->pConfig->bColumnsize || ( |
18560 | eStmt!=FTS5_STMT_REPLACE_DOCSIZE |
18561 | && eStmt!=FTS5_STMT_DELETE_DOCSIZE |
18562 | && eStmt!=FTS5_STMT_LOOKUP_DOCSIZE |
18563 | )); |
18564 | |
18565 | assert( eStmt>=0 && eStmt<ArraySize(p->aStmt) ); |
18566 | if( p->aStmt[eStmt]==0 ){ |
18567 | const char *azStmt[] = { |
18568 | "SELECT %s FROM %s T WHERE T.%Q >= ? AND T.%Q <= ? ORDER BY T.%Q ASC" , |
18569 | "SELECT %s FROM %s T WHERE T.%Q <= ? AND T.%Q >= ? ORDER BY T.%Q DESC" , |
18570 | "SELECT %s FROM %s T WHERE T.%Q=?" , /* LOOKUP */ |
18571 | |
18572 | "INSERT INTO %Q.'%q_content' VALUES(%s)" , /* INSERT_CONTENT */ |
18573 | "REPLACE INTO %Q.'%q_content' VALUES(%s)" , /* REPLACE_CONTENT */ |
18574 | "DELETE FROM %Q.'%q_content' WHERE id=?" , /* DELETE_CONTENT */ |
18575 | "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)" , /* REPLACE_DOCSIZE */ |
18576 | "DELETE FROM %Q.'%q_docsize' WHERE id=?" , /* DELETE_DOCSIZE */ |
18577 | |
18578 | "SELECT sz FROM %Q.'%q_docsize' WHERE id=?" , /* LOOKUP_DOCSIZE */ |
18579 | |
18580 | "REPLACE INTO %Q.'%q_config' VALUES(?,?)" , /* REPLACE_CONFIG */ |
18581 | "SELECT %s FROM %s AS T" , /* SCAN */ |
18582 | }; |
18583 | Fts5Config *pC = p->pConfig; |
18584 | char *zSql = 0; |
18585 | |
18586 | switch( eStmt ){ |
18587 | case FTS5_STMT_SCAN: |
18588 | zSql = sqlite3_mprintf(azStmt[eStmt], |
18589 | pC->zContentExprlist, pC->zContent |
18590 | ); |
18591 | break; |
18592 | |
18593 | case FTS5_STMT_SCAN_ASC: |
18594 | case FTS5_STMT_SCAN_DESC: |
18595 | zSql = sqlite3_mprintf(azStmt[eStmt], pC->zContentExprlist, |
18596 | pC->zContent, pC->zContentRowid, pC->zContentRowid, |
18597 | pC->zContentRowid |
18598 | ); |
18599 | break; |
18600 | |
18601 | case FTS5_STMT_LOOKUP: |
18602 | zSql = sqlite3_mprintf(azStmt[eStmt], |
18603 | pC->zContentExprlist, pC->zContent, pC->zContentRowid |
18604 | ); |
18605 | break; |
18606 | |
18607 | case FTS5_STMT_INSERT_CONTENT: |
18608 | case FTS5_STMT_REPLACE_CONTENT: { |
18609 | int nCol = pC->nCol + 1; |
18610 | char *zBind; |
18611 | int i; |
18612 | |
18613 | zBind = sqlite3_malloc64(1 + nCol*2); |
18614 | if( zBind ){ |
18615 | for(i=0; i<nCol; i++){ |
18616 | zBind[i*2] = '?'; |
18617 | zBind[i*2 + 1] = ','; |
18618 | } |
18619 | zBind[i*2-1] = '\0'; |
18620 | zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName, zBind); |
18621 | sqlite3_free(zBind); |
18622 | } |
18623 | break; |
18624 | } |
18625 | |
18626 | default: |
18627 | zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName); |
18628 | break; |
18629 | } |
18630 | |
18631 | if( zSql==0 ){ |
18632 | rc = SQLITE_NOMEM; |
18633 | }else{ |
18634 | int f = SQLITE_PREPARE_PERSISTENT; |
18635 | if( eStmt>FTS5_STMT_LOOKUP ) f |= SQLITE_PREPARE_NO_VTAB; |
18636 | p->pConfig->bLock++; |
18637 | rc = sqlite3_prepare_v3(pC->db, zSql, -1, f, &p->aStmt[eStmt], 0); |
18638 | p->pConfig->bLock--; |
18639 | sqlite3_free(zSql); |
18640 | if( rc!=SQLITE_OK && pzErrMsg ){ |
18641 | *pzErrMsg = sqlite3_mprintf("%s" , sqlite3_errmsg(pC->db)); |
18642 | } |
18643 | } |
18644 | } |
18645 | |
18646 | *ppStmt = p->aStmt[eStmt]; |
18647 | sqlite3_reset(*ppStmt); |
18648 | return rc; |
18649 | } |
18650 | |
18651 | |
18652 | static int fts5ExecPrintf( |
18653 | sqlite3 *db, |
18654 | char **pzErr, |
18655 | const char *zFormat, |
18656 | ... |
18657 | ){ |
18658 | int rc; |
18659 | va_list ap; /* ... printf arguments */ |
18660 | char *zSql; |
18661 | |
18662 | va_start(ap, zFormat); |
18663 | zSql = sqlite3_vmprintf(zFormat, ap); |
18664 | |
18665 | if( zSql==0 ){ |
18666 | rc = SQLITE_NOMEM; |
18667 | }else{ |
18668 | rc = sqlite3_exec(db, zSql, 0, 0, pzErr); |
18669 | sqlite3_free(zSql); |
18670 | } |
18671 | |
18672 | va_end(ap); |
18673 | return rc; |
18674 | } |
18675 | |
18676 | /* |
18677 | ** Drop all shadow tables. Return SQLITE_OK if successful or an SQLite error |
18678 | ** code otherwise. |
18679 | */ |
18680 | static int sqlite3Fts5DropAll(Fts5Config *pConfig){ |
18681 | int rc = fts5ExecPrintf(pConfig->db, 0, |
18682 | "DROP TABLE IF EXISTS %Q.'%q_data';" |
18683 | "DROP TABLE IF EXISTS %Q.'%q_idx';" |
18684 | "DROP TABLE IF EXISTS %Q.'%q_config';" , |
18685 | pConfig->zDb, pConfig->zName, |
18686 | pConfig->zDb, pConfig->zName, |
18687 | pConfig->zDb, pConfig->zName |
18688 | ); |
18689 | if( rc==SQLITE_OK && pConfig->bColumnsize ){ |
18690 | rc = fts5ExecPrintf(pConfig->db, 0, |
18691 | "DROP TABLE IF EXISTS %Q.'%q_docsize';" , |
18692 | pConfig->zDb, pConfig->zName |
18693 | ); |
18694 | } |
18695 | if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){ |
18696 | rc = fts5ExecPrintf(pConfig->db, 0, |
18697 | "DROP TABLE IF EXISTS %Q.'%q_content';" , |
18698 | pConfig->zDb, pConfig->zName |
18699 | ); |
18700 | } |
18701 | return rc; |
18702 | } |
18703 | |
18704 | static void fts5StorageRenameOne( |
18705 | Fts5Config *pConfig, /* Current FTS5 configuration */ |
18706 | int *pRc, /* IN/OUT: Error code */ |
18707 | const char *zTail, /* Tail of table name e.g. "data", "config" */ |
18708 | const char *zName /* New name of FTS5 table */ |
18709 | ){ |
18710 | if( *pRc==SQLITE_OK ){ |
18711 | *pRc = fts5ExecPrintf(pConfig->db, 0, |
18712 | "ALTER TABLE %Q.'%q_%s' RENAME TO '%q_%s';" , |
18713 | pConfig->zDb, pConfig->zName, zTail, zName, zTail |
18714 | ); |
18715 | } |
18716 | } |
18717 | |
18718 | static int sqlite3Fts5StorageRename(Fts5Storage *pStorage, const char *zName){ |
18719 | Fts5Config *pConfig = pStorage->pConfig; |
18720 | int rc = sqlite3Fts5StorageSync(pStorage); |
18721 | |
18722 | fts5StorageRenameOne(pConfig, &rc, "data" , zName); |
18723 | fts5StorageRenameOne(pConfig, &rc, "idx" , zName); |
18724 | fts5StorageRenameOne(pConfig, &rc, "config" , zName); |
18725 | if( pConfig->bColumnsize ){ |
18726 | fts5StorageRenameOne(pConfig, &rc, "docsize" , zName); |
18727 | } |
18728 | if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ |
18729 | fts5StorageRenameOne(pConfig, &rc, "content" , zName); |
18730 | } |
18731 | return rc; |
18732 | } |
18733 | |
18734 | /* |
18735 | ** Create the shadow table named zPost, with definition zDefn. Return |
18736 | ** SQLITE_OK if successful, or an SQLite error code otherwise. |
18737 | */ |
18738 | static int sqlite3Fts5CreateTable( |
18739 | Fts5Config *pConfig, /* FTS5 configuration */ |
18740 | const char *zPost, /* Shadow table to create (e.g. "content") */ |
18741 | const char *zDefn, /* Columns etc. for shadow table */ |
18742 | int bWithout, /* True for without rowid */ |
18743 | char **pzErr /* OUT: Error message */ |
18744 | ){ |
18745 | int rc; |
18746 | char *zErr = 0; |
18747 | |
18748 | rc = fts5ExecPrintf(pConfig->db, &zErr, "CREATE TABLE %Q.'%q_%q'(%s)%s" , |
18749 | pConfig->zDb, pConfig->zName, zPost, zDefn, |
18750 | #ifndef SQLITE_FTS5_NO_WITHOUT_ROWID |
18751 | bWithout?" WITHOUT ROWID" : |
18752 | #endif |
18753 | "" |
18754 | ); |
18755 | if( zErr ){ |
18756 | *pzErr = sqlite3_mprintf( |
18757 | "fts5: error creating shadow table %q_%s: %s" , |
18758 | pConfig->zName, zPost, zErr |
18759 | ); |
18760 | sqlite3_free(zErr); |
18761 | } |
18762 | |
18763 | return rc; |
18764 | } |
18765 | |
18766 | /* |
18767 | ** Open a new Fts5Index handle. If the bCreate argument is true, create |
18768 | ** and initialize the underlying tables |
18769 | ** |
18770 | ** If successful, set *pp to point to the new object and return SQLITE_OK. |
18771 | ** Otherwise, set *pp to NULL and return an SQLite error code. |
18772 | */ |
18773 | static int sqlite3Fts5StorageOpen( |
18774 | Fts5Config *pConfig, |
18775 | Fts5Index *pIndex, |
18776 | int bCreate, |
18777 | Fts5Storage **pp, |
18778 | char **pzErr /* OUT: Error message */ |
18779 | ){ |
18780 | int rc = SQLITE_OK; |
18781 | Fts5Storage *p; /* New object */ |
18782 | sqlite3_int64 nByte; /* Bytes of space to allocate */ |
18783 | |
18784 | nByte = sizeof(Fts5Storage) /* Fts5Storage object */ |
18785 | + pConfig->nCol * sizeof(i64); /* Fts5Storage.aTotalSize[] */ |
18786 | *pp = p = (Fts5Storage*)sqlite3_malloc64(nByte); |
18787 | if( !p ) return SQLITE_NOMEM; |
18788 | |
18789 | memset(p, 0, (size_t)nByte); |
18790 | p->aTotalSize = (i64*)&p[1]; |
18791 | p->pConfig = pConfig; |
18792 | p->pIndex = pIndex; |
18793 | |
18794 | if( bCreate ){ |
18795 | if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ |
18796 | int nDefn = 32 + pConfig->nCol*10; |
18797 | char *zDefn = sqlite3_malloc64(32 + (sqlite3_int64)pConfig->nCol * 10); |
18798 | if( zDefn==0 ){ |
18799 | rc = SQLITE_NOMEM; |
18800 | }else{ |
18801 | int i; |
18802 | int iOff; |
18803 | sqlite3_snprintf(nDefn, zDefn, "id INTEGER PRIMARY KEY" ); |
18804 | iOff = (int)strlen(zDefn); |
18805 | for(i=0; i<pConfig->nCol; i++){ |
18806 | sqlite3_snprintf(nDefn-iOff, &zDefn[iOff], ", c%d" , i); |
18807 | iOff += (int)strlen(&zDefn[iOff]); |
18808 | } |
18809 | rc = sqlite3Fts5CreateTable(pConfig, "content" , zDefn, 0, pzErr); |
18810 | } |
18811 | sqlite3_free(zDefn); |
18812 | } |
18813 | |
18814 | if( rc==SQLITE_OK && pConfig->bColumnsize ){ |
18815 | rc = sqlite3Fts5CreateTable( |
18816 | pConfig, "docsize" , "id INTEGER PRIMARY KEY, sz BLOB" , 0, pzErr |
18817 | ); |
18818 | } |
18819 | if( rc==SQLITE_OK ){ |
18820 | rc = sqlite3Fts5CreateTable( |
18821 | pConfig, "config" , "k PRIMARY KEY, v" , 1, pzErr |
18822 | ); |
18823 | } |
18824 | if( rc==SQLITE_OK ){ |
18825 | rc = sqlite3Fts5StorageConfigValue(p, "version" , 0, FTS5_CURRENT_VERSION); |
18826 | } |
18827 | } |
18828 | |
18829 | if( rc ){ |
18830 | sqlite3Fts5StorageClose(p); |
18831 | *pp = 0; |
18832 | } |
18833 | return rc; |
18834 | } |
18835 | |
18836 | /* |
18837 | ** Close a handle opened by an earlier call to sqlite3Fts5StorageOpen(). |
18838 | */ |
18839 | static int sqlite3Fts5StorageClose(Fts5Storage *p){ |
18840 | int rc = SQLITE_OK; |
18841 | if( p ){ |
18842 | int i; |
18843 | |
18844 | /* Finalize all SQL statements */ |
18845 | for(i=0; i<ArraySize(p->aStmt); i++){ |
18846 | sqlite3_finalize(p->aStmt[i]); |
18847 | } |
18848 | |
18849 | sqlite3_free(p); |
18850 | } |
18851 | return rc; |
18852 | } |
18853 | |
18854 | typedef struct Fts5InsertCtx Fts5InsertCtx; |
18855 | struct Fts5InsertCtx { |
18856 | Fts5Storage *pStorage; |
18857 | int iCol; |
18858 | int szCol; /* Size of column value in tokens */ |
18859 | }; |
18860 | |
18861 | /* |
18862 | ** Tokenization callback used when inserting tokens into the FTS index. |
18863 | */ |
18864 | static int fts5StorageInsertCallback( |
18865 | void *pContext, /* Pointer to Fts5InsertCtx object */ |
18866 | int tflags, |
18867 | const char *pToken, /* Buffer containing token */ |
18868 | int nToken, /* Size of token in bytes */ |
18869 | int iUnused1, /* Start offset of token */ |
18870 | int iUnused2 /* End offset of token */ |
18871 | ){ |
18872 | Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; |
18873 | Fts5Index *pIdx = pCtx->pStorage->pIndex; |
18874 | UNUSED_PARAM2(iUnused1, iUnused2); |
18875 | if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; |
18876 | if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ |
18877 | pCtx->szCol++; |
18878 | } |
18879 | return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken); |
18880 | } |
18881 | |
18882 | /* |
18883 | ** If a row with rowid iDel is present in the %_content table, add the |
18884 | ** delete-markers to the FTS index necessary to delete it. Do not actually |
18885 | ** remove the %_content row at this time though. |
18886 | */ |
18887 | static int fts5StorageDeleteFromIndex( |
18888 | Fts5Storage *p, |
18889 | i64 iDel, |
18890 | sqlite3_value **apVal |
18891 | ){ |
18892 | Fts5Config *pConfig = p->pConfig; |
18893 | sqlite3_stmt *pSeek = 0; /* SELECT to read row iDel from %_data */ |
18894 | int rc; /* Return code */ |
18895 | int rc2; /* sqlite3_reset() return code */ |
18896 | int iCol; |
18897 | Fts5InsertCtx ctx; |
18898 | |
18899 | if( apVal==0 ){ |
18900 | rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP, &pSeek, 0); |
18901 | if( rc!=SQLITE_OK ) return rc; |
18902 | sqlite3_bind_int64(pSeek, 1, iDel); |
18903 | if( sqlite3_step(pSeek)!=SQLITE_ROW ){ |
18904 | return sqlite3_reset(pSeek); |
18905 | } |
18906 | } |
18907 | |
18908 | ctx.pStorage = p; |
18909 | ctx.iCol = -1; |
18910 | rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 1, iDel); |
18911 | for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){ |
18912 | if( pConfig->abUnindexed[iCol-1]==0 ){ |
18913 | const char *zText; |
18914 | int nText; |
18915 | assert( pSeek==0 || apVal==0 ); |
18916 | assert( pSeek!=0 || apVal!=0 ); |
18917 | if( pSeek ){ |
18918 | zText = (const char*)sqlite3_column_text(pSeek, iCol); |
18919 | nText = sqlite3_column_bytes(pSeek, iCol); |
18920 | }else if( ALWAYS(apVal) ){ |
18921 | zText = (const char*)sqlite3_value_text(apVal[iCol-1]); |
18922 | nText = sqlite3_value_bytes(apVal[iCol-1]); |
18923 | }else{ |
18924 | continue; |
18925 | } |
18926 | ctx.szCol = 0; |
18927 | rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, |
18928 | zText, nText, (void*)&ctx, fts5StorageInsertCallback |
18929 | ); |
18930 | p->aTotalSize[iCol-1] -= (i64)ctx.szCol; |
18931 | if( p->aTotalSize[iCol-1]<0 ){ |
18932 | rc = FTS5_CORRUPT; |
18933 | } |
18934 | } |
18935 | } |
18936 | if( rc==SQLITE_OK && p->nTotalRow<1 ){ |
18937 | rc = FTS5_CORRUPT; |
18938 | }else{ |
18939 | p->nTotalRow--; |
18940 | } |
18941 | |
18942 | rc2 = sqlite3_reset(pSeek); |
18943 | if( rc==SQLITE_OK ) rc = rc2; |
18944 | return rc; |
18945 | } |
18946 | |
18947 | |
18948 | /* |
18949 | ** Insert a record into the %_docsize table. Specifically, do: |
18950 | ** |
18951 | ** INSERT OR REPLACE INTO %_docsize(id, sz) VALUES(iRowid, pBuf); |
18952 | ** |
18953 | ** If there is no %_docsize table (as happens if the columnsize=0 option |
18954 | ** is specified when the FTS5 table is created), this function is a no-op. |
18955 | */ |
18956 | static int fts5StorageInsertDocsize( |
18957 | Fts5Storage *p, /* Storage module to write to */ |
18958 | i64 iRowid, /* id value */ |
18959 | Fts5Buffer *pBuf /* sz value */ |
18960 | ){ |
18961 | int rc = SQLITE_OK; |
18962 | if( p->pConfig->bColumnsize ){ |
18963 | sqlite3_stmt *pReplace = 0; |
18964 | rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0); |
18965 | if( rc==SQLITE_OK ){ |
18966 | sqlite3_bind_int64(pReplace, 1, iRowid); |
18967 | sqlite3_bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC); |
18968 | sqlite3_step(pReplace); |
18969 | rc = sqlite3_reset(pReplace); |
18970 | sqlite3_bind_null(pReplace, 2); |
18971 | } |
18972 | } |
18973 | return rc; |
18974 | } |
18975 | |
18976 | /* |
18977 | ** Load the contents of the "averages" record from disk into the |
18978 | ** p->nTotalRow and p->aTotalSize[] variables. If successful, and if |
18979 | ** argument bCache is true, set the p->bTotalsValid flag to indicate |
18980 | ** that the contents of aTotalSize[] and nTotalRow are valid until |
18981 | ** further notice. |
18982 | ** |
18983 | ** Return SQLITE_OK if successful, or an SQLite error code if an error |
18984 | ** occurs. |
18985 | */ |
18986 | static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){ |
18987 | int rc = SQLITE_OK; |
18988 | if( p->bTotalsValid==0 ){ |
18989 | rc = sqlite3Fts5IndexGetAverages(p->pIndex, &p->nTotalRow, p->aTotalSize); |
18990 | p->bTotalsValid = bCache; |
18991 | } |
18992 | return rc; |
18993 | } |
18994 | |
18995 | /* |
18996 | ** Store the current contents of the p->nTotalRow and p->aTotalSize[] |
18997 | ** variables in the "averages" record on disk. |
18998 | ** |
18999 | ** Return SQLITE_OK if successful, or an SQLite error code if an error |
19000 | ** occurs. |
19001 | */ |
19002 | static int fts5StorageSaveTotals(Fts5Storage *p){ |
19003 | int nCol = p->pConfig->nCol; |
19004 | int i; |
19005 | Fts5Buffer buf; |
19006 | int rc = SQLITE_OK; |
19007 | memset(&buf, 0, sizeof(buf)); |
19008 | |
19009 | sqlite3Fts5BufferAppendVarint(&rc, &buf, p->nTotalRow); |
19010 | for(i=0; i<nCol; i++){ |
19011 | sqlite3Fts5BufferAppendVarint(&rc, &buf, p->aTotalSize[i]); |
19012 | } |
19013 | if( rc==SQLITE_OK ){ |
19014 | rc = sqlite3Fts5IndexSetAverages(p->pIndex, buf.p, buf.n); |
19015 | } |
19016 | sqlite3_free(buf.p); |
19017 | |
19018 | return rc; |
19019 | } |
19020 | |
19021 | /* |
19022 | ** Remove a row from the FTS table. |
19023 | */ |
19024 | static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel, sqlite3_value **apVal){ |
19025 | Fts5Config *pConfig = p->pConfig; |
19026 | int rc; |
19027 | sqlite3_stmt *pDel = 0; |
19028 | |
19029 | assert( pConfig->eContent!=FTS5_CONTENT_NORMAL || apVal==0 ); |
19030 | rc = fts5StorageLoadTotals(p, 1); |
19031 | |
19032 | /* Delete the index records */ |
19033 | if( rc==SQLITE_OK ){ |
19034 | rc = fts5StorageDeleteFromIndex(p, iDel, apVal); |
19035 | } |
19036 | |
19037 | /* Delete the %_docsize record */ |
19038 | if( rc==SQLITE_OK && pConfig->bColumnsize ){ |
19039 | rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel, 0); |
19040 | if( rc==SQLITE_OK ){ |
19041 | sqlite3_bind_int64(pDel, 1, iDel); |
19042 | sqlite3_step(pDel); |
19043 | rc = sqlite3_reset(pDel); |
19044 | } |
19045 | } |
19046 | |
19047 | /* Delete the %_content record */ |
19048 | if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ |
19049 | if( rc==SQLITE_OK ){ |
19050 | rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_CONTENT, &pDel, 0); |
19051 | } |
19052 | if( rc==SQLITE_OK ){ |
19053 | sqlite3_bind_int64(pDel, 1, iDel); |
19054 | sqlite3_step(pDel); |
19055 | rc = sqlite3_reset(pDel); |
19056 | } |
19057 | } |
19058 | |
19059 | return rc; |
19060 | } |
19061 | |
19062 | /* |
19063 | ** Delete all entries in the FTS5 index. |
19064 | */ |
19065 | static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p){ |
19066 | Fts5Config *pConfig = p->pConfig; |
19067 | int rc; |
19068 | |
19069 | p->bTotalsValid = 0; |
19070 | |
19071 | /* Delete the contents of the %_data and %_docsize tables. */ |
19072 | rc = fts5ExecPrintf(pConfig->db, 0, |
19073 | "DELETE FROM %Q.'%q_data';" |
19074 | "DELETE FROM %Q.'%q_idx';" , |
19075 | pConfig->zDb, pConfig->zName, |
19076 | pConfig->zDb, pConfig->zName |
19077 | ); |
19078 | if( rc==SQLITE_OK && pConfig->bColumnsize ){ |
19079 | rc = fts5ExecPrintf(pConfig->db, 0, |
19080 | "DELETE FROM %Q.'%q_docsize';" , |
19081 | pConfig->zDb, pConfig->zName |
19082 | ); |
19083 | } |
19084 | |
19085 | /* Reinitialize the %_data table. This call creates the initial structure |
19086 | ** and averages records. */ |
19087 | if( rc==SQLITE_OK ){ |
19088 | rc = sqlite3Fts5IndexReinit(p->pIndex); |
19089 | } |
19090 | if( rc==SQLITE_OK ){ |
19091 | rc = sqlite3Fts5StorageConfigValue(p, "version" , 0, FTS5_CURRENT_VERSION); |
19092 | } |
19093 | return rc; |
19094 | } |
19095 | |
19096 | static int sqlite3Fts5StorageRebuild(Fts5Storage *p){ |
19097 | Fts5Buffer buf = {0,0,0}; |
19098 | Fts5Config *pConfig = p->pConfig; |
19099 | sqlite3_stmt *pScan = 0; |
19100 | Fts5InsertCtx ctx; |
19101 | int rc, rc2; |
19102 | |
19103 | memset(&ctx, 0, sizeof(Fts5InsertCtx)); |
19104 | ctx.pStorage = p; |
19105 | rc = sqlite3Fts5StorageDeleteAll(p); |
19106 | if( rc==SQLITE_OK ){ |
19107 | rc = fts5StorageLoadTotals(p, 1); |
19108 | } |
19109 | |
19110 | if( rc==SQLITE_OK ){ |
19111 | rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0); |
19112 | } |
19113 | |
19114 | while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pScan) ){ |
19115 | i64 iRowid = sqlite3_column_int64(pScan, 0); |
19116 | |
19117 | sqlite3Fts5BufferZero(&buf); |
19118 | rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid); |
19119 | for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){ |
19120 | ctx.szCol = 0; |
19121 | if( pConfig->abUnindexed[ctx.iCol]==0 ){ |
19122 | const char *zText = (const char*)sqlite3_column_text(pScan, ctx.iCol+1); |
19123 | int nText = sqlite3_column_bytes(pScan, ctx.iCol+1); |
19124 | rc = sqlite3Fts5Tokenize(pConfig, |
19125 | FTS5_TOKENIZE_DOCUMENT, |
19126 | zText, nText, |
19127 | (void*)&ctx, |
19128 | fts5StorageInsertCallback |
19129 | ); |
19130 | } |
19131 | sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); |
19132 | p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; |
19133 | } |
19134 | p->nTotalRow++; |
19135 | |
19136 | if( rc==SQLITE_OK ){ |
19137 | rc = fts5StorageInsertDocsize(p, iRowid, &buf); |
19138 | } |
19139 | } |
19140 | sqlite3_free(buf.p); |
19141 | rc2 = sqlite3_reset(pScan); |
19142 | if( rc==SQLITE_OK ) rc = rc2; |
19143 | |
19144 | /* Write the averages record */ |
19145 | if( rc==SQLITE_OK ){ |
19146 | rc = fts5StorageSaveTotals(p); |
19147 | } |
19148 | return rc; |
19149 | } |
19150 | |
19151 | static int sqlite3Fts5StorageOptimize(Fts5Storage *p){ |
19152 | return sqlite3Fts5IndexOptimize(p->pIndex); |
19153 | } |
19154 | |
19155 | static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge){ |
19156 | return sqlite3Fts5IndexMerge(p->pIndex, nMerge); |
19157 | } |
19158 | |
19159 | static int sqlite3Fts5StorageReset(Fts5Storage *p){ |
19160 | return sqlite3Fts5IndexReset(p->pIndex); |
19161 | } |
19162 | |
19163 | /* |
19164 | ** Allocate a new rowid. This is used for "external content" tables when |
19165 | ** a NULL value is inserted into the rowid column. The new rowid is allocated |
19166 | ** by inserting a dummy row into the %_docsize table. The dummy will be |
19167 | ** overwritten later. |
19168 | ** |
19169 | ** If the %_docsize table does not exist, SQLITE_MISMATCH is returned. In |
19170 | ** this case the user is required to provide a rowid explicitly. |
19171 | */ |
19172 | static int fts5StorageNewRowid(Fts5Storage *p, i64 *piRowid){ |
19173 | int rc = SQLITE_MISMATCH; |
19174 | if( p->pConfig->bColumnsize ){ |
19175 | sqlite3_stmt *pReplace = 0; |
19176 | rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0); |
19177 | if( rc==SQLITE_OK ){ |
19178 | sqlite3_bind_null(pReplace, 1); |
19179 | sqlite3_bind_null(pReplace, 2); |
19180 | sqlite3_step(pReplace); |
19181 | rc = sqlite3_reset(pReplace); |
19182 | } |
19183 | if( rc==SQLITE_OK ){ |
19184 | *piRowid = sqlite3_last_insert_rowid(p->pConfig->db); |
19185 | } |
19186 | } |
19187 | return rc; |
19188 | } |
19189 | |
19190 | /* |
19191 | ** Insert a new row into the FTS content table. |
19192 | */ |
19193 | static int sqlite3Fts5StorageContentInsert( |
19194 | Fts5Storage *p, |
19195 | sqlite3_value **apVal, |
19196 | i64 *piRowid |
19197 | ){ |
19198 | Fts5Config *pConfig = p->pConfig; |
19199 | int rc = SQLITE_OK; |
19200 | |
19201 | /* Insert the new row into the %_content table. */ |
19202 | if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){ |
19203 | if( sqlite3_value_type(apVal[1])==SQLITE_INTEGER ){ |
19204 | *piRowid = sqlite3_value_int64(apVal[1]); |
19205 | }else{ |
19206 | rc = fts5StorageNewRowid(p, piRowid); |
19207 | } |
19208 | }else{ |
19209 | sqlite3_stmt *pInsert = 0; /* Statement to write %_content table */ |
19210 | int i; /* Counter variable */ |
19211 | rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT, &pInsert, 0); |
19212 | for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){ |
19213 | rc = sqlite3_bind_value(pInsert, i, apVal[i]); |
19214 | } |
19215 | if( rc==SQLITE_OK ){ |
19216 | sqlite3_step(pInsert); |
19217 | rc = sqlite3_reset(pInsert); |
19218 | } |
19219 | *piRowid = sqlite3_last_insert_rowid(pConfig->db); |
19220 | } |
19221 | |
19222 | return rc; |
19223 | } |
19224 | |
19225 | /* |
19226 | ** Insert new entries into the FTS index and %_docsize table. |
19227 | */ |
19228 | static int sqlite3Fts5StorageIndexInsert( |
19229 | Fts5Storage *p, |
19230 | sqlite3_value **apVal, |
19231 | i64 iRowid |
19232 | ){ |
19233 | Fts5Config *pConfig = p->pConfig; |
19234 | int rc = SQLITE_OK; /* Return code */ |
19235 | Fts5InsertCtx ctx; /* Tokenization callback context object */ |
19236 | Fts5Buffer buf; /* Buffer used to build up %_docsize blob */ |
19237 | |
19238 | memset(&buf, 0, sizeof(Fts5Buffer)); |
19239 | ctx.pStorage = p; |
19240 | rc = fts5StorageLoadTotals(p, 1); |
19241 | |
19242 | if( rc==SQLITE_OK ){ |
19243 | rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid); |
19244 | } |
19245 | for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){ |
19246 | ctx.szCol = 0; |
19247 | if( pConfig->abUnindexed[ctx.iCol]==0 ){ |
19248 | const char *zText = (const char*)sqlite3_value_text(apVal[ctx.iCol+2]); |
19249 | int nText = sqlite3_value_bytes(apVal[ctx.iCol+2]); |
19250 | rc = sqlite3Fts5Tokenize(pConfig, |
19251 | FTS5_TOKENIZE_DOCUMENT, |
19252 | zText, nText, |
19253 | (void*)&ctx, |
19254 | fts5StorageInsertCallback |
19255 | ); |
19256 | } |
19257 | sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); |
19258 | p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; |
19259 | } |
19260 | p->nTotalRow++; |
19261 | |
19262 | /* Write the %_docsize record */ |
19263 | if( rc==SQLITE_OK ){ |
19264 | rc = fts5StorageInsertDocsize(p, iRowid, &buf); |
19265 | } |
19266 | sqlite3_free(buf.p); |
19267 | |
19268 | return rc; |
19269 | } |
19270 | |
19271 | static int fts5StorageCount(Fts5Storage *p, const char *zSuffix, i64 *pnRow){ |
19272 | Fts5Config *pConfig = p->pConfig; |
19273 | char *zSql; |
19274 | int rc; |
19275 | |
19276 | zSql = sqlite3_mprintf("SELECT count(*) FROM %Q.'%q_%s'" , |
19277 | pConfig->zDb, pConfig->zName, zSuffix |
19278 | ); |
19279 | if( zSql==0 ){ |
19280 | rc = SQLITE_NOMEM; |
19281 | }else{ |
19282 | sqlite3_stmt *pCnt = 0; |
19283 | rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pCnt, 0); |
19284 | if( rc==SQLITE_OK ){ |
19285 | if( SQLITE_ROW==sqlite3_step(pCnt) ){ |
19286 | *pnRow = sqlite3_column_int64(pCnt, 0); |
19287 | } |
19288 | rc = sqlite3_finalize(pCnt); |
19289 | } |
19290 | } |
19291 | |
19292 | sqlite3_free(zSql); |
19293 | return rc; |
19294 | } |
19295 | |
19296 | /* |
19297 | ** Context object used by sqlite3Fts5StorageIntegrity(). |
19298 | */ |
19299 | typedef struct Fts5IntegrityCtx Fts5IntegrityCtx; |
19300 | struct Fts5IntegrityCtx { |
19301 | i64 iRowid; |
19302 | int iCol; |
19303 | int szCol; |
19304 | u64 cksum; |
19305 | Fts5Termset *pTermset; |
19306 | Fts5Config *pConfig; |
19307 | }; |
19308 | |
19309 | |
19310 | /* |
19311 | ** Tokenization callback used by integrity check. |
19312 | */ |
19313 | static int fts5StorageIntegrityCallback( |
19314 | void *pContext, /* Pointer to Fts5IntegrityCtx object */ |
19315 | int tflags, |
19316 | const char *pToken, /* Buffer containing token */ |
19317 | int nToken, /* Size of token in bytes */ |
19318 | int iUnused1, /* Start offset of token */ |
19319 | int iUnused2 /* End offset of token */ |
19320 | ){ |
19321 | Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; |
19322 | Fts5Termset *pTermset = pCtx->pTermset; |
19323 | int bPresent; |
19324 | int ii; |
19325 | int rc = SQLITE_OK; |
19326 | int iPos; |
19327 | int iCol; |
19328 | |
19329 | UNUSED_PARAM2(iUnused1, iUnused2); |
19330 | if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; |
19331 | |
19332 | if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ |
19333 | pCtx->szCol++; |
19334 | } |
19335 | |
19336 | switch( pCtx->pConfig->eDetail ){ |
19337 | case FTS5_DETAIL_FULL: |
19338 | iPos = pCtx->szCol-1; |
19339 | iCol = pCtx->iCol; |
19340 | break; |
19341 | |
19342 | case FTS5_DETAIL_COLUMNS: |
19343 | iPos = pCtx->iCol; |
19344 | iCol = 0; |
19345 | break; |
19346 | |
19347 | default: |
19348 | assert( pCtx->pConfig->eDetail==FTS5_DETAIL_NONE ); |
19349 | iPos = 0; |
19350 | iCol = 0; |
19351 | break; |
19352 | } |
19353 | |
19354 | rc = sqlite3Fts5TermsetAdd(pTermset, 0, pToken, nToken, &bPresent); |
19355 | if( rc==SQLITE_OK && bPresent==0 ){ |
19356 | pCtx->cksum ^= sqlite3Fts5IndexEntryCksum( |
19357 | pCtx->iRowid, iCol, iPos, 0, pToken, nToken |
19358 | ); |
19359 | } |
19360 | |
19361 | for(ii=0; rc==SQLITE_OK && ii<pCtx->pConfig->nPrefix; ii++){ |
19362 | const int nChar = pCtx->pConfig->aPrefix[ii]; |
19363 | int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); |
19364 | if( nByte ){ |
19365 | rc = sqlite3Fts5TermsetAdd(pTermset, ii+1, pToken, nByte, &bPresent); |
19366 | if( bPresent==0 ){ |
19367 | pCtx->cksum ^= sqlite3Fts5IndexEntryCksum( |
19368 | pCtx->iRowid, iCol, iPos, ii+1, pToken, nByte |
19369 | ); |
19370 | } |
19371 | } |
19372 | } |
19373 | |
19374 | return rc; |
19375 | } |
19376 | |
19377 | /* |
19378 | ** Check that the contents of the FTS index match that of the %_content |
19379 | ** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return |
19380 | ** some other SQLite error code if an error occurs while attempting to |
19381 | ** determine this. |
19382 | */ |
19383 | static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg){ |
19384 | Fts5Config *pConfig = p->pConfig; |
19385 | int rc = SQLITE_OK; /* Return code */ |
19386 | int *aColSize; /* Array of size pConfig->nCol */ |
19387 | i64 *aTotalSize; /* Array of size pConfig->nCol */ |
19388 | Fts5IntegrityCtx ctx; |
19389 | sqlite3_stmt *pScan; |
19390 | int bUseCksum; |
19391 | |
19392 | memset(&ctx, 0, sizeof(Fts5IntegrityCtx)); |
19393 | ctx.pConfig = p->pConfig; |
19394 | aTotalSize = (i64*)sqlite3_malloc64(pConfig->nCol*(sizeof(int)+sizeof(i64))); |
19395 | if( !aTotalSize ) return SQLITE_NOMEM; |
19396 | aColSize = (int*)&aTotalSize[pConfig->nCol]; |
19397 | memset(aTotalSize, 0, sizeof(i64) * pConfig->nCol); |
19398 | |
19399 | bUseCksum = (pConfig->eContent==FTS5_CONTENT_NORMAL |
19400 | || (pConfig->eContent==FTS5_CONTENT_EXTERNAL && iArg) |
19401 | ); |
19402 | if( bUseCksum ){ |
19403 | /* Generate the expected index checksum based on the contents of the |
19404 | ** %_content table. This block stores the checksum in ctx.cksum. */ |
19405 | rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0); |
19406 | if( rc==SQLITE_OK ){ |
19407 | int rc2; |
19408 | while( SQLITE_ROW==sqlite3_step(pScan) ){ |
19409 | int i; |
19410 | ctx.iRowid = sqlite3_column_int64(pScan, 0); |
19411 | ctx.szCol = 0; |
19412 | if( pConfig->bColumnsize ){ |
19413 | rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); |
19414 | } |
19415 | if( rc==SQLITE_OK && pConfig->eDetail==FTS5_DETAIL_NONE ){ |
19416 | rc = sqlite3Fts5TermsetNew(&ctx.pTermset); |
19417 | } |
19418 | for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ |
19419 | if( pConfig->abUnindexed[i] ) continue; |
19420 | ctx.iCol = i; |
19421 | ctx.szCol = 0; |
19422 | if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ |
19423 | rc = sqlite3Fts5TermsetNew(&ctx.pTermset); |
19424 | } |
19425 | if( rc==SQLITE_OK ){ |
19426 | const char *zText = (const char*)sqlite3_column_text(pScan, i+1); |
19427 | int nText = sqlite3_column_bytes(pScan, i+1); |
19428 | rc = sqlite3Fts5Tokenize(pConfig, |
19429 | FTS5_TOKENIZE_DOCUMENT, |
19430 | zText, nText, |
19431 | (void*)&ctx, |
19432 | fts5StorageIntegrityCallback |
19433 | ); |
19434 | } |
19435 | if( rc==SQLITE_OK && pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){ |
19436 | rc = FTS5_CORRUPT; |
19437 | } |
19438 | aTotalSize[i] += ctx.szCol; |
19439 | if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ |
19440 | sqlite3Fts5TermsetFree(ctx.pTermset); |
19441 | ctx.pTermset = 0; |
19442 | } |
19443 | } |
19444 | sqlite3Fts5TermsetFree(ctx.pTermset); |
19445 | ctx.pTermset = 0; |
19446 | |
19447 | if( rc!=SQLITE_OK ) break; |
19448 | } |
19449 | rc2 = sqlite3_reset(pScan); |
19450 | if( rc==SQLITE_OK ) rc = rc2; |
19451 | } |
19452 | |
19453 | /* Test that the "totals" (sometimes called "averages") record looks Ok */ |
19454 | if( rc==SQLITE_OK ){ |
19455 | int i; |
19456 | rc = fts5StorageLoadTotals(p, 0); |
19457 | for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ |
19458 | if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT; |
19459 | } |
19460 | } |
19461 | |
19462 | /* Check that the %_docsize and %_content tables contain the expected |
19463 | ** number of rows. */ |
19464 | if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){ |
19465 | i64 nRow = 0; |
19466 | rc = fts5StorageCount(p, "content" , &nRow); |
19467 | if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT; |
19468 | } |
19469 | if( rc==SQLITE_OK && pConfig->bColumnsize ){ |
19470 | i64 nRow = 0; |
19471 | rc = fts5StorageCount(p, "docsize" , &nRow); |
19472 | if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT; |
19473 | } |
19474 | } |
19475 | |
19476 | /* Pass the expected checksum down to the FTS index module. It will |
19477 | ** verify, amongst other things, that it matches the checksum generated by |
19478 | ** inspecting the index itself. */ |
19479 | if( rc==SQLITE_OK ){ |
19480 | rc = sqlite3Fts5IndexIntegrityCheck(p->pIndex, ctx.cksum, bUseCksum); |
19481 | } |
19482 | |
19483 | sqlite3_free(aTotalSize); |
19484 | return rc; |
19485 | } |
19486 | |
19487 | /* |
19488 | ** Obtain an SQLite statement handle that may be used to read data from the |
19489 | ** %_content table. |
19490 | */ |
19491 | static int sqlite3Fts5StorageStmt( |
19492 | Fts5Storage *p, |
19493 | int eStmt, |
19494 | sqlite3_stmt **pp, |
19495 | char **pzErrMsg |
19496 | ){ |
19497 | int rc; |
19498 | assert( eStmt==FTS5_STMT_SCAN_ASC |
19499 | || eStmt==FTS5_STMT_SCAN_DESC |
19500 | || eStmt==FTS5_STMT_LOOKUP |
19501 | ); |
19502 | rc = fts5StorageGetStmt(p, eStmt, pp, pzErrMsg); |
19503 | if( rc==SQLITE_OK ){ |
19504 | assert( p->aStmt[eStmt]==*pp ); |
19505 | p->aStmt[eStmt] = 0; |
19506 | } |
19507 | return rc; |
19508 | } |
19509 | |
19510 | /* |
19511 | ** Release an SQLite statement handle obtained via an earlier call to |
19512 | ** sqlite3Fts5StorageStmt(). The eStmt parameter passed to this function |
19513 | ** must match that passed to the sqlite3Fts5StorageStmt() call. |
19514 | */ |
19515 | static void sqlite3Fts5StorageStmtRelease( |
19516 | Fts5Storage *p, |
19517 | int eStmt, |
19518 | sqlite3_stmt *pStmt |
19519 | ){ |
19520 | assert( eStmt==FTS5_STMT_SCAN_ASC |
19521 | || eStmt==FTS5_STMT_SCAN_DESC |
19522 | || eStmt==FTS5_STMT_LOOKUP |
19523 | ); |
19524 | if( p->aStmt[eStmt]==0 ){ |
19525 | sqlite3_reset(pStmt); |
19526 | p->aStmt[eStmt] = pStmt; |
19527 | }else{ |
19528 | sqlite3_finalize(pStmt); |
19529 | } |
19530 | } |
19531 | |
19532 | static int fts5StorageDecodeSizeArray( |
19533 | int *aCol, int nCol, /* Array to populate */ |
19534 | const u8 *aBlob, int nBlob /* Record to read varints from */ |
19535 | ){ |
19536 | int i; |
19537 | int iOff = 0; |
19538 | for(i=0; i<nCol; i++){ |
19539 | if( iOff>=nBlob ) return 1; |
19540 | iOff += fts5GetVarint32(&aBlob[iOff], aCol[i]); |
19541 | } |
19542 | return (iOff!=nBlob); |
19543 | } |
19544 | |
19545 | /* |
19546 | ** Argument aCol points to an array of integers containing one entry for |
19547 | ** each table column. This function reads the %_docsize record for the |
19548 | ** specified rowid and populates aCol[] with the results. |
19549 | ** |
19550 | ** An SQLite error code is returned if an error occurs, or SQLITE_OK |
19551 | ** otherwise. |
19552 | */ |
19553 | static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ |
19554 | int nCol = p->pConfig->nCol; /* Number of user columns in table */ |
19555 | sqlite3_stmt *pLookup = 0; /* Statement to query %_docsize */ |
19556 | int rc; /* Return Code */ |
19557 | |
19558 | assert( p->pConfig->bColumnsize ); |
19559 | rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0); |
19560 | if( pLookup ){ |
19561 | int bCorrupt = 1; |
19562 | assert( rc==SQLITE_OK ); |
19563 | sqlite3_bind_int64(pLookup, 1, iRowid); |
19564 | if( SQLITE_ROW==sqlite3_step(pLookup) ){ |
19565 | const u8 *aBlob = sqlite3_column_blob(pLookup, 0); |
19566 | int nBlob = sqlite3_column_bytes(pLookup, 0); |
19567 | if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){ |
19568 | bCorrupt = 0; |
19569 | } |
19570 | } |
19571 | rc = sqlite3_reset(pLookup); |
19572 | if( bCorrupt && rc==SQLITE_OK ){ |
19573 | rc = FTS5_CORRUPT; |
19574 | } |
19575 | }else{ |
19576 | assert( rc!=SQLITE_OK ); |
19577 | } |
19578 | |
19579 | return rc; |
19580 | } |
19581 | |
19582 | static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){ |
19583 | int rc = fts5StorageLoadTotals(p, 0); |
19584 | if( rc==SQLITE_OK ){ |
19585 | *pnToken = 0; |
19586 | if( iCol<0 ){ |
19587 | int i; |
19588 | for(i=0; i<p->pConfig->nCol; i++){ |
19589 | *pnToken += p->aTotalSize[i]; |
19590 | } |
19591 | }else if( iCol<p->pConfig->nCol ){ |
19592 | *pnToken = p->aTotalSize[iCol]; |
19593 | }else{ |
19594 | rc = SQLITE_RANGE; |
19595 | } |
19596 | } |
19597 | return rc; |
19598 | } |
19599 | |
19600 | static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow){ |
19601 | int rc = fts5StorageLoadTotals(p, 0); |
19602 | if( rc==SQLITE_OK ){ |
19603 | /* nTotalRow being zero does not necessarily indicate a corrupt |
19604 | ** database - it might be that the FTS5 table really does contain zero |
19605 | ** rows. However this function is only called from the xRowCount() API, |
19606 | ** and there is no way for that API to be invoked if the table contains |
19607 | ** no rows. Hence the FTS5_CORRUPT return. */ |
19608 | *pnRow = p->nTotalRow; |
19609 | if( p->nTotalRow<=0 ) rc = FTS5_CORRUPT; |
19610 | } |
19611 | return rc; |
19612 | } |
19613 | |
19614 | /* |
19615 | ** Flush any data currently held in-memory to disk. |
19616 | */ |
19617 | static int sqlite3Fts5StorageSync(Fts5Storage *p){ |
19618 | int rc = SQLITE_OK; |
19619 | i64 iLastRowid = sqlite3_last_insert_rowid(p->pConfig->db); |
19620 | if( p->bTotalsValid ){ |
19621 | rc = fts5StorageSaveTotals(p); |
19622 | p->bTotalsValid = 0; |
19623 | } |
19624 | if( rc==SQLITE_OK ){ |
19625 | rc = sqlite3Fts5IndexSync(p->pIndex); |
19626 | } |
19627 | sqlite3_set_last_insert_rowid(p->pConfig->db, iLastRowid); |
19628 | return rc; |
19629 | } |
19630 | |
19631 | static int sqlite3Fts5StorageRollback(Fts5Storage *p){ |
19632 | p->bTotalsValid = 0; |
19633 | return sqlite3Fts5IndexRollback(p->pIndex); |
19634 | } |
19635 | |
19636 | static int sqlite3Fts5StorageConfigValue( |
19637 | Fts5Storage *p, |
19638 | const char *z, |
19639 | sqlite3_value *pVal, |
19640 | int iVal |
19641 | ){ |
19642 | sqlite3_stmt *pReplace = 0; |
19643 | int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_CONFIG, &pReplace, 0); |
19644 | if( rc==SQLITE_OK ){ |
19645 | sqlite3_bind_text(pReplace, 1, z, -1, SQLITE_STATIC); |
19646 | if( pVal ){ |
19647 | sqlite3_bind_value(pReplace, 2, pVal); |
19648 | }else{ |
19649 | sqlite3_bind_int(pReplace, 2, iVal); |
19650 | } |
19651 | sqlite3_step(pReplace); |
19652 | rc = sqlite3_reset(pReplace); |
19653 | sqlite3_bind_null(pReplace, 1); |
19654 | } |
19655 | if( rc==SQLITE_OK && pVal ){ |
19656 | int iNew = p->pConfig->iCookie + 1; |
19657 | rc = sqlite3Fts5IndexSetCookie(p->pIndex, iNew); |
19658 | if( rc==SQLITE_OK ){ |
19659 | p->pConfig->iCookie = iNew; |
19660 | } |
19661 | } |
19662 | return rc; |
19663 | } |
19664 | |
19665 | #line 1 "fts5_tokenize.c" |
19666 | /* |
19667 | ** 2014 May 31 |
19668 | ** |
19669 | ** The author disclaims copyright to this source code. In place of |
19670 | ** a legal notice, here is a blessing: |
19671 | ** |
19672 | ** May you do good and not evil. |
19673 | ** May you find forgiveness for yourself and forgive others. |
19674 | ** May you share freely, never taking more than you give. |
19675 | ** |
19676 | ****************************************************************************** |
19677 | */ |
19678 | |
19679 | |
19680 | /* #include "fts5Int.h" */ |
19681 | |
19682 | /************************************************************************** |
19683 | ** Start of ascii tokenizer implementation. |
19684 | */ |
19685 | |
19686 | /* |
19687 | ** For tokenizers with no "unicode" modifier, the set of token characters |
19688 | ** is the same as the set of ASCII range alphanumeric characters. |
19689 | */ |
19690 | static unsigned char aAsciiTokenChar[128] = { |
19691 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00..0x0F */ |
19692 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10..0x1F */ |
19693 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20..0x2F */ |
19694 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30..0x3F */ |
19695 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40..0x4F */ |
19696 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x50..0x5F */ |
19697 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60..0x6F */ |
19698 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70..0x7F */ |
19699 | }; |
19700 | |
19701 | typedef struct AsciiTokenizer AsciiTokenizer; |
19702 | struct AsciiTokenizer { |
19703 | unsigned char aTokenChar[128]; |
19704 | }; |
19705 | |
19706 | static void fts5AsciiAddExceptions( |
19707 | AsciiTokenizer *p, |
19708 | const char *zArg, |
19709 | int bTokenChars |
19710 | ){ |
19711 | int i; |
19712 | for(i=0; zArg[i]; i++){ |
19713 | if( (zArg[i] & 0x80)==0 ){ |
19714 | p->aTokenChar[(int)zArg[i]] = (unsigned char)bTokenChars; |
19715 | } |
19716 | } |
19717 | } |
19718 | |
19719 | /* |
19720 | ** Delete a "ascii" tokenizer. |
19721 | */ |
19722 | static void fts5AsciiDelete(Fts5Tokenizer *p){ |
19723 | sqlite3_free(p); |
19724 | } |
19725 | |
19726 | /* |
19727 | ** Create an "ascii" tokenizer. |
19728 | */ |
19729 | static int fts5AsciiCreate( |
19730 | void *pUnused, |
19731 | const char **azArg, int nArg, |
19732 | Fts5Tokenizer **ppOut |
19733 | ){ |
19734 | int rc = SQLITE_OK; |
19735 | AsciiTokenizer *p = 0; |
19736 | UNUSED_PARAM(pUnused); |
19737 | if( nArg%2 ){ |
19738 | rc = SQLITE_ERROR; |
19739 | }else{ |
19740 | p = sqlite3_malloc(sizeof(AsciiTokenizer)); |
19741 | if( p==0 ){ |
19742 | rc = SQLITE_NOMEM; |
19743 | }else{ |
19744 | int i; |
19745 | memset(p, 0, sizeof(AsciiTokenizer)); |
19746 | memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar)); |
19747 | for(i=0; rc==SQLITE_OK && i<nArg; i+=2){ |
19748 | const char *zArg = azArg[i+1]; |
19749 | if( 0==sqlite3_stricmp(azArg[i], "tokenchars" ) ){ |
19750 | fts5AsciiAddExceptions(p, zArg, 1); |
19751 | }else |
19752 | if( 0==sqlite3_stricmp(azArg[i], "separators" ) ){ |
19753 | fts5AsciiAddExceptions(p, zArg, 0); |
19754 | }else{ |
19755 | rc = SQLITE_ERROR; |
19756 | } |
19757 | } |
19758 | if( rc!=SQLITE_OK ){ |
19759 | fts5AsciiDelete((Fts5Tokenizer*)p); |
19760 | p = 0; |
19761 | } |
19762 | } |
19763 | } |
19764 | |
19765 | *ppOut = (Fts5Tokenizer*)p; |
19766 | return rc; |
19767 | } |
19768 | |
19769 | |
19770 | static void asciiFold(char *aOut, const char *aIn, int nByte){ |
19771 | int i; |
19772 | for(i=0; i<nByte; i++){ |
19773 | char c = aIn[i]; |
19774 | if( c>='A' && c<='Z' ) c += 32; |
19775 | aOut[i] = c; |
19776 | } |
19777 | } |
19778 | |
19779 | /* |
19780 | ** Tokenize some text using the ascii tokenizer. |
19781 | */ |
19782 | static int fts5AsciiTokenize( |
19783 | Fts5Tokenizer *pTokenizer, |
19784 | void *pCtx, |
19785 | int iUnused, |
19786 | const char *pText, int nText, |
19787 | int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) |
19788 | ){ |
19789 | AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer; |
19790 | int rc = SQLITE_OK; |
19791 | int ie; |
19792 | int is = 0; |
19793 | |
19794 | char aFold[64]; |
19795 | int nFold = sizeof(aFold); |
19796 | char *pFold = aFold; |
19797 | unsigned char *a = p->aTokenChar; |
19798 | |
19799 | UNUSED_PARAM(iUnused); |
19800 | |
19801 | while( is<nText && rc==SQLITE_OK ){ |
19802 | int nByte; |
19803 | |
19804 | /* Skip any leading divider characters. */ |
19805 | while( is<nText && ((pText[is]&0x80)==0 && a[(int)pText[is]]==0) ){ |
19806 | is++; |
19807 | } |
19808 | if( is==nText ) break; |
19809 | |
19810 | /* Count the token characters */ |
19811 | ie = is+1; |
19812 | while( ie<nText && ((pText[ie]&0x80) || a[(int)pText[ie]] ) ){ |
19813 | ie++; |
19814 | } |
19815 | |
19816 | /* Fold to lower case */ |
19817 | nByte = ie-is; |
19818 | if( nByte>nFold ){ |
19819 | if( pFold!=aFold ) sqlite3_free(pFold); |
19820 | pFold = sqlite3_malloc64((sqlite3_int64)nByte*2); |
19821 | if( pFold==0 ){ |
19822 | rc = SQLITE_NOMEM; |
19823 | break; |
19824 | } |
19825 | nFold = nByte*2; |
19826 | } |
19827 | asciiFold(pFold, &pText[is], nByte); |
19828 | |
19829 | /* Invoke the token callback */ |
19830 | rc = xToken(pCtx, 0, pFold, nByte, is, ie); |
19831 | is = ie+1; |
19832 | } |
19833 | |
19834 | if( pFold!=aFold ) sqlite3_free(pFold); |
19835 | if( rc==SQLITE_DONE ) rc = SQLITE_OK; |
19836 | return rc; |
19837 | } |
19838 | |
19839 | /************************************************************************** |
19840 | ** Start of unicode61 tokenizer implementation. |
19841 | */ |
19842 | |
19843 | |
19844 | /* |
19845 | ** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied |
19846 | ** from the sqlite3 source file utf.c. If this file is compiled as part |
19847 | ** of the amalgamation, they are not required. |
19848 | */ |
19849 | #ifndef SQLITE_AMALGAMATION |
19850 | |
19851 | static const unsigned char sqlite3Utf8Trans1[] = { |
19852 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
19853 | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
19854 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, |
19855 | 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, |
19856 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
19857 | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
19858 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
19859 | 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, |
19860 | }; |
19861 | |
19862 | #define READ_UTF8(zIn, zTerm, c) \ |
19863 | c = *(zIn++); \ |
19864 | if( c>=0xc0 ){ \ |
19865 | c = sqlite3Utf8Trans1[c-0xc0]; \ |
19866 | while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ |
19867 | c = (c<<6) + (0x3f & *(zIn++)); \ |
19868 | } \ |
19869 | if( c<0x80 \ |
19870 | || (c&0xFFFFF800)==0xD800 \ |
19871 | || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ |
19872 | } |
19873 | |
19874 | |
19875 | #define WRITE_UTF8(zOut, c) { \ |
19876 | if( c<0x00080 ){ \ |
19877 | *zOut++ = (unsigned char)(c&0xFF); \ |
19878 | } \ |
19879 | else if( c<0x00800 ){ \ |
19880 | *zOut++ = 0xC0 + (unsigned char)((c>>6)&0x1F); \ |
19881 | *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ |
19882 | } \ |
19883 | else if( c<0x10000 ){ \ |
19884 | *zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F); \ |
19885 | *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \ |
19886 | *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ |
19887 | }else{ \ |
19888 | *zOut++ = 0xF0 + (unsigned char)((c>>18) & 0x07); \ |
19889 | *zOut++ = 0x80 + (unsigned char)((c>>12) & 0x3F); \ |
19890 | *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \ |
19891 | *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ |
19892 | } \ |
19893 | } |
19894 | |
19895 | #endif /* ifndef SQLITE_AMALGAMATION */ |
19896 | |
19897 | typedef struct Unicode61Tokenizer Unicode61Tokenizer; |
19898 | struct Unicode61Tokenizer { |
19899 | unsigned char aTokenChar[128]; /* ASCII range token characters */ |
19900 | char *aFold; /* Buffer to fold text into */ |
19901 | int nFold; /* Size of aFold[] in bytes */ |
19902 | int eRemoveDiacritic; /* True if remove_diacritics=1 is set */ |
19903 | int nException; |
19904 | int *aiException; |
19905 | |
19906 | unsigned char aCategory[32]; /* True for token char categories */ |
19907 | }; |
19908 | |
19909 | /* Values for eRemoveDiacritic (must match internals of fts5_unicode2.c) */ |
19910 | #define FTS5_REMOVE_DIACRITICS_NONE 0 |
19911 | #define FTS5_REMOVE_DIACRITICS_SIMPLE 1 |
19912 | #define FTS5_REMOVE_DIACRITICS_COMPLEX 2 |
19913 | |
19914 | static int fts5UnicodeAddExceptions( |
19915 | Unicode61Tokenizer *p, /* Tokenizer object */ |
19916 | const char *z, /* Characters to treat as exceptions */ |
19917 | int bTokenChars /* 1 for 'tokenchars', 0 for 'separators' */ |
19918 | ){ |
19919 | int rc = SQLITE_OK; |
19920 | int n = (int)strlen(z); |
19921 | int *aNew; |
19922 | |
19923 | if( n>0 ){ |
19924 | aNew = (int*)sqlite3_realloc64(p->aiException, |
19925 | (n+p->nException)*sizeof(int)); |
19926 | if( aNew ){ |
19927 | int nNew = p->nException; |
19928 | const unsigned char *zCsr = (const unsigned char*)z; |
19929 | const unsigned char *zTerm = (const unsigned char*)&z[n]; |
19930 | while( zCsr<zTerm ){ |
19931 | u32 iCode; |
19932 | int bToken; |
19933 | READ_UTF8(zCsr, zTerm, iCode); |
19934 | if( iCode<128 ){ |
19935 | p->aTokenChar[iCode] = (unsigned char)bTokenChars; |
19936 | }else{ |
19937 | bToken = p->aCategory[sqlite3Fts5UnicodeCategory(iCode)]; |
19938 | assert( (bToken==0 || bToken==1) ); |
19939 | assert( (bTokenChars==0 || bTokenChars==1) ); |
19940 | if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){ |
19941 | int i; |
19942 | for(i=0; i<nNew; i++){ |
19943 | if( (u32)aNew[i]>iCode ) break; |
19944 | } |
19945 | memmove(&aNew[i+1], &aNew[i], (nNew-i)*sizeof(int)); |
19946 | aNew[i] = iCode; |
19947 | nNew++; |
19948 | } |
19949 | } |
19950 | } |
19951 | p->aiException = aNew; |
19952 | p->nException = nNew; |
19953 | }else{ |
19954 | rc = SQLITE_NOMEM; |
19955 | } |
19956 | } |
19957 | |
19958 | return rc; |
19959 | } |
19960 | |
19961 | /* |
19962 | ** Return true if the p->aiException[] array contains the value iCode. |
19963 | */ |
19964 | static int fts5UnicodeIsException(Unicode61Tokenizer *p, int iCode){ |
19965 | if( p->nException>0 ){ |
19966 | int *a = p->aiException; |
19967 | int iLo = 0; |
19968 | int iHi = p->nException-1; |
19969 | |
19970 | while( iHi>=iLo ){ |
19971 | int iTest = (iHi + iLo) / 2; |
19972 | if( iCode==a[iTest] ){ |
19973 | return 1; |
19974 | }else if( iCode>a[iTest] ){ |
19975 | iLo = iTest+1; |
19976 | }else{ |
19977 | iHi = iTest-1; |
19978 | } |
19979 | } |
19980 | } |
19981 | |
19982 | return 0; |
19983 | } |
19984 | |
19985 | /* |
19986 | ** Delete a "unicode61" tokenizer. |
19987 | */ |
19988 | static void fts5UnicodeDelete(Fts5Tokenizer *pTok){ |
19989 | if( pTok ){ |
19990 | Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTok; |
19991 | sqlite3_free(p->aiException); |
19992 | sqlite3_free(p->aFold); |
19993 | sqlite3_free(p); |
19994 | } |
19995 | return; |
19996 | } |
19997 | |
19998 | static int unicodeSetCategories(Unicode61Tokenizer *p, const char *zCat){ |
19999 | const char *z = zCat; |
20000 | |
20001 | while( *z ){ |
20002 | while( *z==' ' || *z=='\t' ) z++; |
20003 | if( *z && sqlite3Fts5UnicodeCatParse(z, p->aCategory) ){ |
20004 | return SQLITE_ERROR; |
20005 | } |
20006 | while( *z!=' ' && *z!='\t' && *z!='\0' ) z++; |
20007 | } |
20008 | |
20009 | sqlite3Fts5UnicodeAscii(p->aCategory, p->aTokenChar); |
20010 | return SQLITE_OK; |
20011 | } |
20012 | |
20013 | /* |
20014 | ** Create a "unicode61" tokenizer. |
20015 | */ |
20016 | static int fts5UnicodeCreate( |
20017 | void *pUnused, |
20018 | const char **azArg, int nArg, |
20019 | Fts5Tokenizer **ppOut |
20020 | ){ |
20021 | int rc = SQLITE_OK; /* Return code */ |
20022 | Unicode61Tokenizer *p = 0; /* New tokenizer object */ |
20023 | |
20024 | UNUSED_PARAM(pUnused); |
20025 | |
20026 | if( nArg%2 ){ |
20027 | rc = SQLITE_ERROR; |
20028 | }else{ |
20029 | p = (Unicode61Tokenizer*)sqlite3_malloc(sizeof(Unicode61Tokenizer)); |
20030 | if( p ){ |
20031 | const char *zCat = "L* N* Co" ; |
20032 | int i; |
20033 | memset(p, 0, sizeof(Unicode61Tokenizer)); |
20034 | |
20035 | p->eRemoveDiacritic = FTS5_REMOVE_DIACRITICS_SIMPLE; |
20036 | p->nFold = 64; |
20037 | p->aFold = sqlite3_malloc64(p->nFold * sizeof(char)); |
20038 | if( p->aFold==0 ){ |
20039 | rc = SQLITE_NOMEM; |
20040 | } |
20041 | |
20042 | /* Search for a "categories" argument */ |
20043 | for(i=0; rc==SQLITE_OK && i<nArg; i+=2){ |
20044 | if( 0==sqlite3_stricmp(azArg[i], "categories" ) ){ |
20045 | zCat = azArg[i+1]; |
20046 | } |
20047 | } |
20048 | |
20049 | if( rc==SQLITE_OK ){ |
20050 | rc = unicodeSetCategories(p, zCat); |
20051 | } |
20052 | |
20053 | for(i=0; rc==SQLITE_OK && i<nArg; i+=2){ |
20054 | const char *zArg = azArg[i+1]; |
20055 | if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics" ) ){ |
20056 | if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){ |
20057 | rc = SQLITE_ERROR; |
20058 | }else{ |
20059 | p->eRemoveDiacritic = (zArg[0] - '0'); |
20060 | assert( p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_NONE |
20061 | || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_SIMPLE |
20062 | || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_COMPLEX |
20063 | ); |
20064 | } |
20065 | }else |
20066 | if( 0==sqlite3_stricmp(azArg[i], "tokenchars" ) ){ |
20067 | rc = fts5UnicodeAddExceptions(p, zArg, 1); |
20068 | }else |
20069 | if( 0==sqlite3_stricmp(azArg[i], "separators" ) ){ |
20070 | rc = fts5UnicodeAddExceptions(p, zArg, 0); |
20071 | }else |
20072 | if( 0==sqlite3_stricmp(azArg[i], "categories" ) ){ |
20073 | /* no-op */ |
20074 | }else{ |
20075 | rc = SQLITE_ERROR; |
20076 | } |
20077 | } |
20078 | |
20079 | }else{ |
20080 | rc = SQLITE_NOMEM; |
20081 | } |
20082 | if( rc!=SQLITE_OK ){ |
20083 | fts5UnicodeDelete((Fts5Tokenizer*)p); |
20084 | p = 0; |
20085 | } |
20086 | *ppOut = (Fts5Tokenizer*)p; |
20087 | } |
20088 | return rc; |
20089 | } |
20090 | |
20091 | /* |
20092 | ** Return true if, for the purposes of tokenizing with the tokenizer |
20093 | ** passed as the first argument, codepoint iCode is considered a token |
20094 | ** character (not a separator). |
20095 | */ |
20096 | static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){ |
20097 | return ( |
20098 | p->aCategory[sqlite3Fts5UnicodeCategory((u32)iCode)] |
20099 | ^ fts5UnicodeIsException(p, iCode) |
20100 | ); |
20101 | } |
20102 | |
20103 | static int fts5UnicodeTokenize( |
20104 | Fts5Tokenizer *pTokenizer, |
20105 | void *pCtx, |
20106 | int iUnused, |
20107 | const char *pText, int nText, |
20108 | int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) |
20109 | ){ |
20110 | Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer; |
20111 | int rc = SQLITE_OK; |
20112 | unsigned char *a = p->aTokenChar; |
20113 | |
20114 | unsigned char *zTerm = (unsigned char*)&pText[nText]; |
20115 | unsigned char *zCsr = (unsigned char *)pText; |
20116 | |
20117 | /* Output buffer */ |
20118 | char *aFold = p->aFold; |
20119 | int nFold = p->nFold; |
20120 | const char *pEnd = &aFold[nFold-6]; |
20121 | |
20122 | UNUSED_PARAM(iUnused); |
20123 | |
20124 | /* Each iteration of this loop gobbles up a contiguous run of separators, |
20125 | ** then the next token. */ |
20126 | while( rc==SQLITE_OK ){ |
20127 | u32 iCode; /* non-ASCII codepoint read from input */ |
20128 | char *zOut = aFold; |
20129 | int is; |
20130 | int ie; |
20131 | |
20132 | /* Skip any separator characters. */ |
20133 | while( 1 ){ |
20134 | if( zCsr>=zTerm ) goto tokenize_done; |
20135 | if( *zCsr & 0x80 ) { |
20136 | /* A character outside of the ascii range. Skip past it if it is |
20137 | ** a separator character. Or break out of the loop if it is not. */ |
20138 | is = zCsr - (unsigned char*)pText; |
20139 | READ_UTF8(zCsr, zTerm, iCode); |
20140 | if( fts5UnicodeIsAlnum(p, iCode) ){ |
20141 | goto non_ascii_tokenchar; |
20142 | } |
20143 | }else{ |
20144 | if( a[*zCsr] ){ |
20145 | is = zCsr - (unsigned char*)pText; |
20146 | goto ascii_tokenchar; |
20147 | } |
20148 | zCsr++; |
20149 | } |
20150 | } |
20151 | |
20152 | /* Run through the tokenchars. Fold them into the output buffer along |
20153 | ** the way. */ |
20154 | while( zCsr<zTerm ){ |
20155 | |
20156 | /* Grow the output buffer so that there is sufficient space to fit the |
20157 | ** largest possible utf-8 character. */ |
20158 | if( zOut>pEnd ){ |
20159 | aFold = sqlite3_malloc64((sqlite3_int64)nFold*2); |
20160 | if( aFold==0 ){ |
20161 | rc = SQLITE_NOMEM; |
20162 | goto tokenize_done; |
20163 | } |
20164 | zOut = &aFold[zOut - p->aFold]; |
20165 | memcpy(aFold, p->aFold, nFold); |
20166 | sqlite3_free(p->aFold); |
20167 | p->aFold = aFold; |
20168 | p->nFold = nFold = nFold*2; |
20169 | pEnd = &aFold[nFold-6]; |
20170 | } |
20171 | |
20172 | if( *zCsr & 0x80 ){ |
20173 | /* An non-ascii-range character. Fold it into the output buffer if |
20174 | ** it is a token character, or break out of the loop if it is not. */ |
20175 | READ_UTF8(zCsr, zTerm, iCode); |
20176 | if( fts5UnicodeIsAlnum(p,iCode)||sqlite3Fts5UnicodeIsdiacritic(iCode) ){ |
20177 | non_ascii_tokenchar: |
20178 | iCode = sqlite3Fts5UnicodeFold(iCode, p->eRemoveDiacritic); |
20179 | if( iCode ) WRITE_UTF8(zOut, iCode); |
20180 | }else{ |
20181 | break; |
20182 | } |
20183 | }else if( a[*zCsr]==0 ){ |
20184 | /* An ascii-range separator character. End of token. */ |
20185 | break; |
20186 | }else{ |
20187 | ascii_tokenchar: |
20188 | if( *zCsr>='A' && *zCsr<='Z' ){ |
20189 | *zOut++ = *zCsr + 32; |
20190 | }else{ |
20191 | *zOut++ = *zCsr; |
20192 | } |
20193 | zCsr++; |
20194 | } |
20195 | ie = zCsr - (unsigned char*)pText; |
20196 | } |
20197 | |
20198 | /* Invoke the token callback */ |
20199 | rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie); |
20200 | } |
20201 | |
20202 | tokenize_done: |
20203 | if( rc==SQLITE_DONE ) rc = SQLITE_OK; |
20204 | return rc; |
20205 | } |
20206 | |
20207 | /************************************************************************** |
20208 | ** Start of porter stemmer implementation. |
20209 | */ |
20210 | |
20211 | /* Any tokens larger than this (in bytes) are passed through without |
20212 | ** stemming. */ |
20213 | #define FTS5_PORTER_MAX_TOKEN 64 |
20214 | |
20215 | typedef struct PorterTokenizer PorterTokenizer; |
20216 | struct PorterTokenizer { |
20217 | fts5_tokenizer tokenizer; /* Parent tokenizer module */ |
20218 | Fts5Tokenizer *pTokenizer; /* Parent tokenizer instance */ |
20219 | char aBuf[FTS5_PORTER_MAX_TOKEN + 64]; |
20220 | }; |
20221 | |
20222 | /* |
20223 | ** Delete a "porter" tokenizer. |
20224 | */ |
20225 | static void fts5PorterDelete(Fts5Tokenizer *pTok){ |
20226 | if( pTok ){ |
20227 | PorterTokenizer *p = (PorterTokenizer*)pTok; |
20228 | if( p->pTokenizer ){ |
20229 | p->tokenizer.xDelete(p->pTokenizer); |
20230 | } |
20231 | sqlite3_free(p); |
20232 | } |
20233 | } |
20234 | |
20235 | /* |
20236 | ** Create a "porter" tokenizer. |
20237 | */ |
20238 | static int fts5PorterCreate( |
20239 | void *pCtx, |
20240 | const char **azArg, int nArg, |
20241 | Fts5Tokenizer **ppOut |
20242 | ){ |
20243 | fts5_api *pApi = (fts5_api*)pCtx; |
20244 | int rc = SQLITE_OK; |
20245 | PorterTokenizer *pRet; |
20246 | void *pUserdata = 0; |
20247 | const char *zBase = "unicode61" ; |
20248 | |
20249 | if( nArg>0 ){ |
20250 | zBase = azArg[0]; |
20251 | } |
20252 | |
20253 | pRet = (PorterTokenizer*)sqlite3_malloc(sizeof(PorterTokenizer)); |
20254 | if( pRet ){ |
20255 | memset(pRet, 0, sizeof(PorterTokenizer)); |
20256 | rc = pApi->xFindTokenizer(pApi, zBase, &pUserdata, &pRet->tokenizer); |
20257 | }else{ |
20258 | rc = SQLITE_NOMEM; |
20259 | } |
20260 | if( rc==SQLITE_OK ){ |
20261 | int nArg2 = (nArg>0 ? nArg-1 : 0); |
20262 | const char **azArg2 = (nArg2 ? &azArg[1] : 0); |
20263 | rc = pRet->tokenizer.xCreate(pUserdata, azArg2, nArg2, &pRet->pTokenizer); |
20264 | } |
20265 | |
20266 | if( rc!=SQLITE_OK ){ |
20267 | fts5PorterDelete((Fts5Tokenizer*)pRet); |
20268 | pRet = 0; |
20269 | } |
20270 | *ppOut = (Fts5Tokenizer*)pRet; |
20271 | return rc; |
20272 | } |
20273 | |
20274 | typedef struct PorterContext PorterContext; |
20275 | struct PorterContext { |
20276 | void *pCtx; |
20277 | int (*xToken)(void*, int, const char*, int, int, int); |
20278 | char *aBuf; |
20279 | }; |
20280 | |
20281 | typedef struct PorterRule PorterRule; |
20282 | struct PorterRule { |
20283 | const char *zSuffix; |
20284 | int nSuffix; |
20285 | int (*xCond)(char *zStem, int nStem); |
20286 | const char *zOutput; |
20287 | int nOutput; |
20288 | }; |
20289 | |
20290 | #if 0 |
20291 | static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){ |
20292 | int ret = -1; |
20293 | int nBuf = *pnBuf; |
20294 | PorterRule *p; |
20295 | |
20296 | for(p=aRule; p->zSuffix; p++){ |
20297 | assert( strlen(p->zSuffix)==p->nSuffix ); |
20298 | assert( strlen(p->zOutput)==p->nOutput ); |
20299 | if( nBuf<p->nSuffix ) continue; |
20300 | if( 0==memcmp(&aBuf[nBuf - p->nSuffix], p->zSuffix, p->nSuffix) ) break; |
20301 | } |
20302 | |
20303 | if( p->zSuffix ){ |
20304 | int nStem = nBuf - p->nSuffix; |
20305 | if( p->xCond==0 || p->xCond(aBuf, nStem) ){ |
20306 | memcpy(&aBuf[nStem], p->zOutput, p->nOutput); |
20307 | *pnBuf = nStem + p->nOutput; |
20308 | ret = p - aRule; |
20309 | } |
20310 | } |
20311 | |
20312 | return ret; |
20313 | } |
20314 | #endif |
20315 | |
20316 | static int fts5PorterIsVowel(char c, int bYIsVowel){ |
20317 | return ( |
20318 | c=='a' || c=='e' || c=='i' || c=='o' || c=='u' || (bYIsVowel && c=='y') |
20319 | ); |
20320 | } |
20321 | |
20322 | static int fts5PorterGobbleVC(char *zStem, int nStem, int bPrevCons){ |
20323 | int i; |
20324 | int bCons = bPrevCons; |
20325 | |
20326 | /* Scan for a vowel */ |
20327 | for(i=0; i<nStem; i++){ |
20328 | if( 0==(bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) break; |
20329 | } |
20330 | |
20331 | /* Scan for a consonent */ |
20332 | for(i++; i<nStem; i++){ |
20333 | if( (bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) return i+1; |
20334 | } |
20335 | return 0; |
20336 | } |
20337 | |
20338 | /* porter rule condition: (m > 0) */ |
20339 | static int fts5Porter_MGt0(char *zStem, int nStem){ |
20340 | return !!fts5PorterGobbleVC(zStem, nStem, 0); |
20341 | } |
20342 | |
20343 | /* porter rule condition: (m > 1) */ |
20344 | static int fts5Porter_MGt1(char *zStem, int nStem){ |
20345 | int n; |
20346 | n = fts5PorterGobbleVC(zStem, nStem, 0); |
20347 | if( n && fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){ |
20348 | return 1; |
20349 | } |
20350 | return 0; |
20351 | } |
20352 | |
20353 | /* porter rule condition: (m = 1) */ |
20354 | static int fts5Porter_MEq1(char *zStem, int nStem){ |
20355 | int n; |
20356 | n = fts5PorterGobbleVC(zStem, nStem, 0); |
20357 | if( n && 0==fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){ |
20358 | return 1; |
20359 | } |
20360 | return 0; |
20361 | } |
20362 | |
20363 | /* porter rule condition: (*o) */ |
20364 | static int fts5Porter_Ostar(char *zStem, int nStem){ |
20365 | if( zStem[nStem-1]=='w' || zStem[nStem-1]=='x' || zStem[nStem-1]=='y' ){ |
20366 | return 0; |
20367 | }else{ |
20368 | int i; |
20369 | int mask = 0; |
20370 | int bCons = 0; |
20371 | for(i=0; i<nStem; i++){ |
20372 | bCons = !fts5PorterIsVowel(zStem[i], bCons); |
20373 | assert( bCons==0 || bCons==1 ); |
20374 | mask = (mask << 1) + bCons; |
20375 | } |
20376 | return ((mask & 0x0007)==0x0005); |
20377 | } |
20378 | } |
20379 | |
20380 | /* porter rule condition: (m > 1 and (*S or *T)) */ |
20381 | static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){ |
20382 | assert( nStem>0 ); |
20383 | return (zStem[nStem-1]=='s' || zStem[nStem-1]=='t') |
20384 | && fts5Porter_MGt1(zStem, nStem); |
20385 | } |
20386 | |
20387 | /* porter rule condition: (*v*) */ |
20388 | static int fts5Porter_Vowel(char *zStem, int nStem){ |
20389 | int i; |
20390 | for(i=0; i<nStem; i++){ |
20391 | if( fts5PorterIsVowel(zStem[i], i>0) ){ |
20392 | return 1; |
20393 | } |
20394 | } |
20395 | return 0; |
20396 | } |
20397 | |
20398 | |
20399 | /************************************************************************** |
20400 | *************************************************************************** |
20401 | ** GENERATED CODE STARTS HERE (mkportersteps.tcl) |
20402 | */ |
20403 | |
20404 | static int fts5PorterStep4(char *aBuf, int *pnBuf){ |
20405 | int ret = 0; |
20406 | int nBuf = *pnBuf; |
20407 | switch( aBuf[nBuf-2] ){ |
20408 | |
20409 | case 'a': |
20410 | if( nBuf>2 && 0==memcmp("al" , &aBuf[nBuf-2], 2) ){ |
20411 | if( fts5Porter_MGt1(aBuf, nBuf-2) ){ |
20412 | *pnBuf = nBuf - 2; |
20413 | } |
20414 | } |
20415 | break; |
20416 | |
20417 | case 'c': |
20418 | if( nBuf>4 && 0==memcmp("ance" , &aBuf[nBuf-4], 4) ){ |
20419 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ |
20420 | *pnBuf = nBuf - 4; |
20421 | } |
20422 | }else if( nBuf>4 && 0==memcmp("ence" , &aBuf[nBuf-4], 4) ){ |
20423 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ |
20424 | *pnBuf = nBuf - 4; |
20425 | } |
20426 | } |
20427 | break; |
20428 | |
20429 | case 'e': |
20430 | if( nBuf>2 && 0==memcmp("er" , &aBuf[nBuf-2], 2) ){ |
20431 | if( fts5Porter_MGt1(aBuf, nBuf-2) ){ |
20432 | *pnBuf = nBuf - 2; |
20433 | } |
20434 | } |
20435 | break; |
20436 | |
20437 | case 'i': |
20438 | if( nBuf>2 && 0==memcmp("ic" , &aBuf[nBuf-2], 2) ){ |
20439 | if( fts5Porter_MGt1(aBuf, nBuf-2) ){ |
20440 | *pnBuf = nBuf - 2; |
20441 | } |
20442 | } |
20443 | break; |
20444 | |
20445 | case 'l': |
20446 | if( nBuf>4 && 0==memcmp("able" , &aBuf[nBuf-4], 4) ){ |
20447 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ |
20448 | *pnBuf = nBuf - 4; |
20449 | } |
20450 | }else if( nBuf>4 && 0==memcmp("ible" , &aBuf[nBuf-4], 4) ){ |
20451 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ |
20452 | *pnBuf = nBuf - 4; |
20453 | } |
20454 | } |
20455 | break; |
20456 | |
20457 | case 'n': |
20458 | if( nBuf>3 && 0==memcmp("ant" , &aBuf[nBuf-3], 3) ){ |
20459 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ |
20460 | *pnBuf = nBuf - 3; |
20461 | } |
20462 | }else if( nBuf>5 && 0==memcmp("ement" , &aBuf[nBuf-5], 5) ){ |
20463 | if( fts5Porter_MGt1(aBuf, nBuf-5) ){ |
20464 | *pnBuf = nBuf - 5; |
20465 | } |
20466 | }else if( nBuf>4 && 0==memcmp("ment" , &aBuf[nBuf-4], 4) ){ |
20467 | if( fts5Porter_MGt1(aBuf, nBuf-4) ){ |
20468 | *pnBuf = nBuf - 4; |
20469 | } |
20470 | }else if( nBuf>3 && 0==memcmp("ent" , &aBuf[nBuf-3], 3) ){ |
20471 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ |
20472 | *pnBuf = nBuf - 3; |
20473 | } |
20474 | } |
20475 | break; |
20476 | |
20477 | case 'o': |
20478 | if( nBuf>3 && 0==memcmp("ion" , &aBuf[nBuf-3], 3) ){ |
20479 | if( fts5Porter_MGt1_and_S_or_T(aBuf, nBuf-3) ){ |
20480 | *pnBuf = nBuf - 3; |
20481 | } |
20482 | }else if( nBuf>2 && 0==memcmp("ou" , &aBuf[nBuf-2], 2) ){ |
20483 | if( fts5Porter_MGt1(aBuf, nBuf-2) ){ |
20484 | *pnBuf = nBuf - 2; |
20485 | } |
20486 | } |
20487 | break; |
20488 | |
20489 | case 's': |
20490 | if( nBuf>3 && 0==memcmp("ism" , &aBuf[nBuf-3], 3) ){ |
20491 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ |
20492 | *pnBuf = nBuf - 3; |
20493 | } |
20494 | } |
20495 | break; |
20496 | |
20497 | case 't': |
20498 | if( nBuf>3 && 0==memcmp("ate" , &aBuf[nBuf-3], 3) ){ |
20499 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ |
20500 | *pnBuf = nBuf - 3; |
20501 | } |
20502 | }else if( nBuf>3 && 0==memcmp("iti" , &aBuf[nBuf-3], 3) ){ |
20503 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ |
20504 | *pnBuf = nBuf - 3; |
20505 | } |
20506 | } |
20507 | break; |
20508 | |
20509 | case 'u': |
20510 | if( nBuf>3 && 0==memcmp("ous" , &aBuf[nBuf-3], 3) ){ |
20511 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ |
20512 | *pnBuf = nBuf - 3; |
20513 | } |
20514 | } |
20515 | break; |
20516 | |
20517 | case 'v': |
20518 | if( nBuf>3 && 0==memcmp("ive" , &aBuf[nBuf-3], 3) ){ |
20519 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ |
20520 | *pnBuf = nBuf - 3; |
20521 | } |
20522 | } |
20523 | break; |
20524 | |
20525 | case 'z': |
20526 | if( nBuf>3 && 0==memcmp("ize" , &aBuf[nBuf-3], 3) ){ |
20527 | if( fts5Porter_MGt1(aBuf, nBuf-3) ){ |
20528 | *pnBuf = nBuf - 3; |
20529 | } |
20530 | } |
20531 | break; |
20532 | |
20533 | } |
20534 | return ret; |
20535 | } |
20536 | |
20537 | |
20538 | static int fts5PorterStep1B2(char *aBuf, int *pnBuf){ |
20539 | int ret = 0; |
20540 | int nBuf = *pnBuf; |
20541 | switch( aBuf[nBuf-2] ){ |
20542 | |
20543 | case 'a': |
20544 | if( nBuf>2 && 0==memcmp("at" , &aBuf[nBuf-2], 2) ){ |
20545 | memcpy(&aBuf[nBuf-2], "ate" , 3); |
20546 | *pnBuf = nBuf - 2 + 3; |
20547 | ret = 1; |
20548 | } |
20549 | break; |
20550 | |
20551 | case 'b': |
20552 | if( nBuf>2 && 0==memcmp("bl" , &aBuf[nBuf-2], 2) ){ |
20553 | memcpy(&aBuf[nBuf-2], "ble" , 3); |
20554 | *pnBuf = nBuf - 2 + 3; |
20555 | ret = 1; |
20556 | } |
20557 | break; |
20558 | |
20559 | case 'i': |
20560 | if( nBuf>2 && 0==memcmp("iz" , &aBuf[nBuf-2], 2) ){ |
20561 | memcpy(&aBuf[nBuf-2], "ize" , 3); |
20562 | *pnBuf = nBuf - 2 + 3; |
20563 | ret = 1; |
20564 | } |
20565 | break; |
20566 | |
20567 | } |
20568 | return ret; |
20569 | } |
20570 | |
20571 | |
20572 | static int fts5PorterStep2(char *aBuf, int *pnBuf){ |
20573 | int ret = 0; |
20574 | int nBuf = *pnBuf; |
20575 | switch( aBuf[nBuf-2] ){ |
20576 | |
20577 | case 'a': |
20578 | if( nBuf>7 && 0==memcmp("ational" , &aBuf[nBuf-7], 7) ){ |
20579 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ |
20580 | memcpy(&aBuf[nBuf-7], "ate" , 3); |
20581 | *pnBuf = nBuf - 7 + 3; |
20582 | } |
20583 | }else if( nBuf>6 && 0==memcmp("tional" , &aBuf[nBuf-6], 6) ){ |
20584 | if( fts5Porter_MGt0(aBuf, nBuf-6) ){ |
20585 | memcpy(&aBuf[nBuf-6], "tion" , 4); |
20586 | *pnBuf = nBuf - 6 + 4; |
20587 | } |
20588 | } |
20589 | break; |
20590 | |
20591 | case 'c': |
20592 | if( nBuf>4 && 0==memcmp("enci" , &aBuf[nBuf-4], 4) ){ |
20593 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ |
20594 | memcpy(&aBuf[nBuf-4], "ence" , 4); |
20595 | *pnBuf = nBuf - 4 + 4; |
20596 | } |
20597 | }else if( nBuf>4 && 0==memcmp("anci" , &aBuf[nBuf-4], 4) ){ |
20598 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ |
20599 | memcpy(&aBuf[nBuf-4], "ance" , 4); |
20600 | *pnBuf = nBuf - 4 + 4; |
20601 | } |
20602 | } |
20603 | break; |
20604 | |
20605 | case 'e': |
20606 | if( nBuf>4 && 0==memcmp("izer" , &aBuf[nBuf-4], 4) ){ |
20607 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ |
20608 | memcpy(&aBuf[nBuf-4], "ize" , 3); |
20609 | *pnBuf = nBuf - 4 + 3; |
20610 | } |
20611 | } |
20612 | break; |
20613 | |
20614 | case 'g': |
20615 | if( nBuf>4 && 0==memcmp("logi" , &aBuf[nBuf-4], 4) ){ |
20616 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ |
20617 | memcpy(&aBuf[nBuf-4], "log" , 3); |
20618 | *pnBuf = nBuf - 4 + 3; |
20619 | } |
20620 | } |
20621 | break; |
20622 | |
20623 | case 'l': |
20624 | if( nBuf>3 && 0==memcmp("bli" , &aBuf[nBuf-3], 3) ){ |
20625 | if( fts5Porter_MGt0(aBuf, nBuf-3) ){ |
20626 | memcpy(&aBuf[nBuf-3], "ble" , 3); |
20627 | *pnBuf = nBuf - 3 + 3; |
20628 | } |
20629 | }else if( nBuf>4 && 0==memcmp("alli" , &aBuf[nBuf-4], 4) ){ |
20630 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ |
20631 | memcpy(&aBuf[nBuf-4], "al" , 2); |
20632 | *pnBuf = nBuf - 4 + 2; |
20633 | } |
20634 | }else if( nBuf>5 && 0==memcmp("entli" , &aBuf[nBuf-5], 5) ){ |
20635 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
20636 | memcpy(&aBuf[nBuf-5], "ent" , 3); |
20637 | *pnBuf = nBuf - 5 + 3; |
20638 | } |
20639 | }else if( nBuf>3 && 0==memcmp("eli" , &aBuf[nBuf-3], 3) ){ |
20640 | if( fts5Porter_MGt0(aBuf, nBuf-3) ){ |
20641 | memcpy(&aBuf[nBuf-3], "e" , 1); |
20642 | *pnBuf = nBuf - 3 + 1; |
20643 | } |
20644 | }else if( nBuf>5 && 0==memcmp("ousli" , &aBuf[nBuf-5], 5) ){ |
20645 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
20646 | memcpy(&aBuf[nBuf-5], "ous" , 3); |
20647 | *pnBuf = nBuf - 5 + 3; |
20648 | } |
20649 | } |
20650 | break; |
20651 | |
20652 | case 'o': |
20653 | if( nBuf>7 && 0==memcmp("ization" , &aBuf[nBuf-7], 7) ){ |
20654 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ |
20655 | memcpy(&aBuf[nBuf-7], "ize" , 3); |
20656 | *pnBuf = nBuf - 7 + 3; |
20657 | } |
20658 | }else if( nBuf>5 && 0==memcmp("ation" , &aBuf[nBuf-5], 5) ){ |
20659 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
20660 | memcpy(&aBuf[nBuf-5], "ate" , 3); |
20661 | *pnBuf = nBuf - 5 + 3; |
20662 | } |
20663 | }else if( nBuf>4 && 0==memcmp("ator" , &aBuf[nBuf-4], 4) ){ |
20664 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ |
20665 | memcpy(&aBuf[nBuf-4], "ate" , 3); |
20666 | *pnBuf = nBuf - 4 + 3; |
20667 | } |
20668 | } |
20669 | break; |
20670 | |
20671 | case 's': |
20672 | if( nBuf>5 && 0==memcmp("alism" , &aBuf[nBuf-5], 5) ){ |
20673 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
20674 | memcpy(&aBuf[nBuf-5], "al" , 2); |
20675 | *pnBuf = nBuf - 5 + 2; |
20676 | } |
20677 | }else if( nBuf>7 && 0==memcmp("iveness" , &aBuf[nBuf-7], 7) ){ |
20678 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ |
20679 | memcpy(&aBuf[nBuf-7], "ive" , 3); |
20680 | *pnBuf = nBuf - 7 + 3; |
20681 | } |
20682 | }else if( nBuf>7 && 0==memcmp("fulness" , &aBuf[nBuf-7], 7) ){ |
20683 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ |
20684 | memcpy(&aBuf[nBuf-7], "ful" , 3); |
20685 | *pnBuf = nBuf - 7 + 3; |
20686 | } |
20687 | }else if( nBuf>7 && 0==memcmp("ousness" , &aBuf[nBuf-7], 7) ){ |
20688 | if( fts5Porter_MGt0(aBuf, nBuf-7) ){ |
20689 | memcpy(&aBuf[nBuf-7], "ous" , 3); |
20690 | *pnBuf = nBuf - 7 + 3; |
20691 | } |
20692 | } |
20693 | break; |
20694 | |
20695 | case 't': |
20696 | if( nBuf>5 && 0==memcmp("aliti" , &aBuf[nBuf-5], 5) ){ |
20697 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
20698 | memcpy(&aBuf[nBuf-5], "al" , 2); |
20699 | *pnBuf = nBuf - 5 + 2; |
20700 | } |
20701 | }else if( nBuf>5 && 0==memcmp("iviti" , &aBuf[nBuf-5], 5) ){ |
20702 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
20703 | memcpy(&aBuf[nBuf-5], "ive" , 3); |
20704 | *pnBuf = nBuf - 5 + 3; |
20705 | } |
20706 | }else if( nBuf>6 && 0==memcmp("biliti" , &aBuf[nBuf-6], 6) ){ |
20707 | if( fts5Porter_MGt0(aBuf, nBuf-6) ){ |
20708 | memcpy(&aBuf[nBuf-6], "ble" , 3); |
20709 | *pnBuf = nBuf - 6 + 3; |
20710 | } |
20711 | } |
20712 | break; |
20713 | |
20714 | } |
20715 | return ret; |
20716 | } |
20717 | |
20718 | |
20719 | static int fts5PorterStep3(char *aBuf, int *pnBuf){ |
20720 | int ret = 0; |
20721 | int nBuf = *pnBuf; |
20722 | switch( aBuf[nBuf-2] ){ |
20723 | |
20724 | case 'a': |
20725 | if( nBuf>4 && 0==memcmp("ical" , &aBuf[nBuf-4], 4) ){ |
20726 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ |
20727 | memcpy(&aBuf[nBuf-4], "ic" , 2); |
20728 | *pnBuf = nBuf - 4 + 2; |
20729 | } |
20730 | } |
20731 | break; |
20732 | |
20733 | case 's': |
20734 | if( nBuf>4 && 0==memcmp("ness" , &aBuf[nBuf-4], 4) ){ |
20735 | if( fts5Porter_MGt0(aBuf, nBuf-4) ){ |
20736 | *pnBuf = nBuf - 4; |
20737 | } |
20738 | } |
20739 | break; |
20740 | |
20741 | case 't': |
20742 | if( nBuf>5 && 0==memcmp("icate" , &aBuf[nBuf-5], 5) ){ |
20743 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
20744 | memcpy(&aBuf[nBuf-5], "ic" , 2); |
20745 | *pnBuf = nBuf - 5 + 2; |
20746 | } |
20747 | }else if( nBuf>5 && 0==memcmp("iciti" , &aBuf[nBuf-5], 5) ){ |
20748 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
20749 | memcpy(&aBuf[nBuf-5], "ic" , 2); |
20750 | *pnBuf = nBuf - 5 + 2; |
20751 | } |
20752 | } |
20753 | break; |
20754 | |
20755 | case 'u': |
20756 | if( nBuf>3 && 0==memcmp("ful" , &aBuf[nBuf-3], 3) ){ |
20757 | if( fts5Porter_MGt0(aBuf, nBuf-3) ){ |
20758 | *pnBuf = nBuf - 3; |
20759 | } |
20760 | } |
20761 | break; |
20762 | |
20763 | case 'v': |
20764 | if( nBuf>5 && 0==memcmp("ative" , &aBuf[nBuf-5], 5) ){ |
20765 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
20766 | *pnBuf = nBuf - 5; |
20767 | } |
20768 | } |
20769 | break; |
20770 | |
20771 | case 'z': |
20772 | if( nBuf>5 && 0==memcmp("alize" , &aBuf[nBuf-5], 5) ){ |
20773 | if( fts5Porter_MGt0(aBuf, nBuf-5) ){ |
20774 | memcpy(&aBuf[nBuf-5], "al" , 2); |
20775 | *pnBuf = nBuf - 5 + 2; |
20776 | } |
20777 | } |
20778 | break; |
20779 | |
20780 | } |
20781 | return ret; |
20782 | } |
20783 | |
20784 | |
20785 | static int fts5PorterStep1B(char *aBuf, int *pnBuf){ |
20786 | int ret = 0; |
20787 | int nBuf = *pnBuf; |
20788 | switch( aBuf[nBuf-2] ){ |
20789 | |
20790 | case 'e': |
20791 | if( nBuf>3 && 0==memcmp("eed" , &aBuf[nBuf-3], 3) ){ |
20792 | if( fts5Porter_MGt0(aBuf, nBuf-3) ){ |
20793 | memcpy(&aBuf[nBuf-3], "ee" , 2); |
20794 | *pnBuf = nBuf - 3 + 2; |
20795 | } |
20796 | }else if( nBuf>2 && 0==memcmp("ed" , &aBuf[nBuf-2], 2) ){ |
20797 | if( fts5Porter_Vowel(aBuf, nBuf-2) ){ |
20798 | *pnBuf = nBuf - 2; |
20799 | ret = 1; |
20800 | } |
20801 | } |
20802 | break; |
20803 | |
20804 | case 'n': |
20805 | if( nBuf>3 && 0==memcmp("ing" , &aBuf[nBuf-3], 3) ){ |
20806 | if( fts5Porter_Vowel(aBuf, nBuf-3) ){ |
20807 | *pnBuf = nBuf - 3; |
20808 | ret = 1; |
20809 | } |
20810 | } |
20811 | break; |
20812 | |
20813 | } |
20814 | return ret; |
20815 | } |
20816 | |
20817 | /* |
20818 | ** GENERATED CODE ENDS HERE (mkportersteps.tcl) |
20819 | *************************************************************************** |
20820 | **************************************************************************/ |
20821 | |
20822 | static void fts5PorterStep1A(char *aBuf, int *pnBuf){ |
20823 | int nBuf = *pnBuf; |
20824 | if( aBuf[nBuf-1]=='s' ){ |
20825 | if( aBuf[nBuf-2]=='e' ){ |
20826 | if( (nBuf>4 && aBuf[nBuf-4]=='s' && aBuf[nBuf-3]=='s') |
20827 | || (nBuf>3 && aBuf[nBuf-3]=='i' ) |
20828 | ){ |
20829 | *pnBuf = nBuf-2; |
20830 | }else{ |
20831 | *pnBuf = nBuf-1; |
20832 | } |
20833 | } |
20834 | else if( aBuf[nBuf-2]!='s' ){ |
20835 | *pnBuf = nBuf-1; |
20836 | } |
20837 | } |
20838 | } |
20839 | |
20840 | static int fts5PorterCb( |
20841 | void *pCtx, |
20842 | int tflags, |
20843 | const char *pToken, |
20844 | int nToken, |
20845 | int iStart, |
20846 | int iEnd |
20847 | ){ |
20848 | PorterContext *p = (PorterContext*)pCtx; |
20849 | |
20850 | char *aBuf; |
20851 | int nBuf; |
20852 | |
20853 | if( nToken>FTS5_PORTER_MAX_TOKEN || nToken<3 ) goto pass_through; |
20854 | aBuf = p->aBuf; |
20855 | nBuf = nToken; |
20856 | memcpy(aBuf, pToken, nBuf); |
20857 | |
20858 | /* Step 1. */ |
20859 | fts5PorterStep1A(aBuf, &nBuf); |
20860 | if( fts5PorterStep1B(aBuf, &nBuf) ){ |
20861 | if( fts5PorterStep1B2(aBuf, &nBuf)==0 ){ |
20862 | char c = aBuf[nBuf-1]; |
20863 | if( fts5PorterIsVowel(c, 0)==0 |
20864 | && c!='l' && c!='s' && c!='z' && c==aBuf[nBuf-2] |
20865 | ){ |
20866 | nBuf--; |
20867 | }else if( fts5Porter_MEq1(aBuf, nBuf) && fts5Porter_Ostar(aBuf, nBuf) ){ |
20868 | aBuf[nBuf++] = 'e'; |
20869 | } |
20870 | } |
20871 | } |
20872 | |
20873 | /* Step 1C. */ |
20874 | if( aBuf[nBuf-1]=='y' && fts5Porter_Vowel(aBuf, nBuf-1) ){ |
20875 | aBuf[nBuf-1] = 'i'; |
20876 | } |
20877 | |
20878 | /* Steps 2 through 4. */ |
20879 | fts5PorterStep2(aBuf, &nBuf); |
20880 | fts5PorterStep3(aBuf, &nBuf); |
20881 | fts5PorterStep4(aBuf, &nBuf); |
20882 | |
20883 | /* Step 5a. */ |
20884 | assert( nBuf>0 ); |
20885 | if( aBuf[nBuf-1]=='e' ){ |
20886 | if( fts5Porter_MGt1(aBuf, nBuf-1) |
20887 | || (fts5Porter_MEq1(aBuf, nBuf-1) && !fts5Porter_Ostar(aBuf, nBuf-1)) |
20888 | ){ |
20889 | nBuf--; |
20890 | } |
20891 | } |
20892 | |
20893 | /* Step 5b. */ |
20894 | if( nBuf>1 && aBuf[nBuf-1]=='l' |
20895 | && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) |
20896 | ){ |
20897 | nBuf--; |
20898 | } |
20899 | |
20900 | return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd); |
20901 | |
20902 | pass_through: |
20903 | return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd); |
20904 | } |
20905 | |
20906 | /* |
20907 | ** Tokenize using the porter tokenizer. |
20908 | */ |
20909 | static int fts5PorterTokenize( |
20910 | Fts5Tokenizer *pTokenizer, |
20911 | void *pCtx, |
20912 | int flags, |
20913 | const char *pText, int nText, |
20914 | int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) |
20915 | ){ |
20916 | PorterTokenizer *p = (PorterTokenizer*)pTokenizer; |
20917 | PorterContext sCtx; |
20918 | sCtx.xToken = xToken; |
20919 | sCtx.pCtx = pCtx; |
20920 | sCtx.aBuf = p->aBuf; |
20921 | return p->tokenizer.xTokenize( |
20922 | p->pTokenizer, (void*)&sCtx, flags, pText, nText, fts5PorterCb |
20923 | ); |
20924 | } |
20925 | |
20926 | /************************************************************************** |
20927 | ** Start of trigram implementation. |
20928 | */ |
20929 | typedef struct TrigramTokenizer TrigramTokenizer; |
20930 | struct TrigramTokenizer { |
20931 | int bFold; /* True to fold to lower-case */ |
20932 | }; |
20933 | |
20934 | /* |
20935 | ** Free a trigram tokenizer. |
20936 | */ |
20937 | static void fts5TriDelete(Fts5Tokenizer *p){ |
20938 | sqlite3_free(p); |
20939 | } |
20940 | |
20941 | /* |
20942 | ** Allocate a trigram tokenizer. |
20943 | */ |
20944 | static int fts5TriCreate( |
20945 | void *pUnused, |
20946 | const char **azArg, |
20947 | int nArg, |
20948 | Fts5Tokenizer **ppOut |
20949 | ){ |
20950 | int rc = SQLITE_OK; |
20951 | TrigramTokenizer *pNew = (TrigramTokenizer*)sqlite3_malloc(sizeof(*pNew)); |
20952 | UNUSED_PARAM(pUnused); |
20953 | if( pNew==0 ){ |
20954 | rc = SQLITE_NOMEM; |
20955 | }else{ |
20956 | int i; |
20957 | pNew->bFold = 1; |
20958 | for(i=0; rc==SQLITE_OK && i<nArg; i+=2){ |
20959 | const char *zArg = azArg[i+1]; |
20960 | if( 0==sqlite3_stricmp(azArg[i], "case_sensitive" ) ){ |
20961 | if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){ |
20962 | rc = SQLITE_ERROR; |
20963 | }else{ |
20964 | pNew->bFold = (zArg[0]=='0'); |
20965 | } |
20966 | }else{ |
20967 | rc = SQLITE_ERROR; |
20968 | } |
20969 | } |
20970 | if( rc!=SQLITE_OK ){ |
20971 | fts5TriDelete((Fts5Tokenizer*)pNew); |
20972 | pNew = 0; |
20973 | } |
20974 | } |
20975 | *ppOut = (Fts5Tokenizer*)pNew; |
20976 | return rc; |
20977 | } |
20978 | |
20979 | /* |
20980 | ** Trigram tokenizer tokenize routine. |
20981 | */ |
20982 | static int fts5TriTokenize( |
20983 | Fts5Tokenizer *pTok, |
20984 | void *pCtx, |
20985 | int unusedFlags, |
20986 | const char *pText, int nText, |
20987 | int (*xToken)(void*, int, const char*, int, int, int) |
20988 | ){ |
20989 | TrigramTokenizer *p = (TrigramTokenizer*)pTok; |
20990 | int rc = SQLITE_OK; |
20991 | char aBuf[32]; |
20992 | const unsigned char *zIn = (const unsigned char*)pText; |
20993 | const unsigned char *zEof = &zIn[nText]; |
20994 | u32 iCode; |
20995 | |
20996 | UNUSED_PARAM(unusedFlags); |
20997 | while( 1 ){ |
20998 | char *zOut = aBuf; |
20999 | int iStart = zIn - (const unsigned char*)pText; |
21000 | const unsigned char *zNext; |
21001 | |
21002 | READ_UTF8(zIn, zEof, iCode); |
21003 | if( iCode==0 ) break; |
21004 | zNext = zIn; |
21005 | if( zIn<zEof ){ |
21006 | if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, 0); |
21007 | WRITE_UTF8(zOut, iCode); |
21008 | READ_UTF8(zIn, zEof, iCode); |
21009 | if( iCode==0 ) break; |
21010 | }else{ |
21011 | break; |
21012 | } |
21013 | if( zIn<zEof ){ |
21014 | if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, 0); |
21015 | WRITE_UTF8(zOut, iCode); |
21016 | READ_UTF8(zIn, zEof, iCode); |
21017 | if( iCode==0 ) break; |
21018 | if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, 0); |
21019 | WRITE_UTF8(zOut, iCode); |
21020 | }else{ |
21021 | break; |
21022 | } |
21023 | rc = xToken(pCtx, 0, aBuf, zOut-aBuf, iStart, iStart + zOut-aBuf); |
21024 | if( rc!=SQLITE_OK ) break; |
21025 | zIn = zNext; |
21026 | } |
21027 | |
21028 | return rc; |
21029 | } |
21030 | |
21031 | /* |
21032 | ** Argument xCreate is a pointer to a constructor function for a tokenizer. |
21033 | ** pTok is a tokenizer previously created using the same method. This function |
21034 | ** returns one of FTS5_PATTERN_NONE, FTS5_PATTERN_LIKE or FTS5_PATTERN_GLOB |
21035 | ** indicating the style of pattern matching that the tokenizer can support. |
21036 | ** In practice, this is: |
21037 | ** |
21038 | ** "trigram" tokenizer, case_sensitive=1 - FTS5_PATTERN_GLOB |
21039 | ** "trigram" tokenizer, case_sensitive=0 (the default) - FTS5_PATTERN_LIKE |
21040 | ** all other tokenizers - FTS5_PATTERN_NONE |
21041 | */ |
21042 | static int sqlite3Fts5TokenizerPattern( |
21043 | int (*xCreate)(void*, const char**, int, Fts5Tokenizer**), |
21044 | Fts5Tokenizer *pTok |
21045 | ){ |
21046 | if( xCreate==fts5TriCreate ){ |
21047 | TrigramTokenizer *p = (TrigramTokenizer*)pTok; |
21048 | return p->bFold ? FTS5_PATTERN_LIKE : FTS5_PATTERN_GLOB; |
21049 | } |
21050 | return FTS5_PATTERN_NONE; |
21051 | } |
21052 | |
21053 | /* |
21054 | ** Register all built-in tokenizers with FTS5. |
21055 | */ |
21056 | static int sqlite3Fts5TokenizerInit(fts5_api *pApi){ |
21057 | struct BuiltinTokenizer { |
21058 | const char *zName; |
21059 | fts5_tokenizer x; |
21060 | } aBuiltin[] = { |
21061 | { "unicode61" , {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}}, |
21062 | { "ascii" , {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }}, |
21063 | { "porter" , {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }}, |
21064 | { "trigram" , {fts5TriCreate, fts5TriDelete, fts5TriTokenize}}, |
21065 | }; |
21066 | |
21067 | int rc = SQLITE_OK; /* Return code */ |
21068 | int i; /* To iterate through builtin functions */ |
21069 | |
21070 | for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){ |
21071 | rc = pApi->xCreateTokenizer(pApi, |
21072 | aBuiltin[i].zName, |
21073 | (void*)pApi, |
21074 | &aBuiltin[i].x, |
21075 | 0 |
21076 | ); |
21077 | } |
21078 | |
21079 | return rc; |
21080 | } |
21081 | |
21082 | #line 1 "fts5_unicode2.c" |
21083 | /* |
21084 | ** 2012-05-25 |
21085 | ** |
21086 | ** The author disclaims copyright to this source code. In place of |
21087 | ** a legal notice, here is a blessing: |
21088 | ** |
21089 | ** May you do good and not evil. |
21090 | ** May you find forgiveness for yourself and forgive others. |
21091 | ** May you share freely, never taking more than you give. |
21092 | ** |
21093 | ****************************************************************************** |
21094 | */ |
21095 | |
21096 | /* |
21097 | ** DO NOT EDIT THIS MACHINE GENERATED FILE. |
21098 | */ |
21099 | |
21100 | |
21101 | #include <assert.h> |
21102 | |
21103 | |
21104 | |
21105 | /* |
21106 | ** If the argument is a codepoint corresponding to a lowercase letter |
21107 | ** in the ASCII range with a diacritic added, return the codepoint |
21108 | ** of the ASCII letter only. For example, if passed 235 - "LATIN |
21109 | ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER |
21110 | ** E"). The resuls of passing a codepoint that corresponds to an |
21111 | ** uppercase letter are undefined. |
21112 | */ |
21113 | static int fts5_remove_diacritic(int c, int bComplex){ |
21114 | unsigned short aDia[] = { |
21115 | 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, |
21116 | 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, |
21117 | 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, |
21118 | 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, |
21119 | 3456, 3696, 3712, 3728, 3744, 3766, 3832, 3896, |
21120 | 3912, 3928, 3944, 3968, 4008, 4040, 4056, 4106, |
21121 | 4138, 4170, 4202, 4234, 4266, 4296, 4312, 4344, |
21122 | 4408, 4424, 4442, 4472, 4488, 4504, 6148, 6198, |
21123 | 6264, 6280, 6360, 6429, 6505, 6529, 61448, 61468, |
21124 | 61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704, |
21125 | 61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914, |
21126 | 61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218, |
21127 | 62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554, |
21128 | 62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766, |
21129 | 62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118, |
21130 | 63182, 63242, 63274, 63310, 63368, 63390, |
21131 | }; |
21132 | #define HIBIT ((unsigned char)0x80) |
21133 | unsigned char aChar[] = { |
21134 | '\0', 'a', 'c', 'e', 'i', 'n', |
21135 | 'o', 'u', 'y', 'y', 'a', 'c', |
21136 | 'd', 'e', 'e', 'g', 'h', 'i', |
21137 | 'j', 'k', 'l', 'n', 'o', 'r', |
21138 | 's', 't', 'u', 'u', 'w', 'y', |
21139 | 'z', 'o', 'u', 'a', 'i', 'o', |
21140 | 'u', 'u'|HIBIT, 'a'|HIBIT, 'g', 'k', 'o', |
21141 | 'o'|HIBIT, 'j', 'g', 'n', 'a'|HIBIT, 'a', |
21142 | 'e', 'i', 'o', 'r', 'u', 's', |
21143 | 't', 'h', 'a', 'e', 'o'|HIBIT, 'o', |
21144 | 'o'|HIBIT, 'y', '\0', '\0', '\0', '\0', |
21145 | '\0', '\0', '\0', '\0', 'a', 'b', |
21146 | 'c'|HIBIT, 'd', 'd', 'e'|HIBIT, 'e', 'e'|HIBIT, |
21147 | 'f', 'g', 'h', 'h', 'i', 'i'|HIBIT, |
21148 | 'k', 'l', 'l'|HIBIT, 'l', 'm', 'n', |
21149 | 'o'|HIBIT, 'p', 'r', 'r'|HIBIT, 'r', 's', |
21150 | 's'|HIBIT, 't', 'u', 'u'|HIBIT, 'v', 'w', |
21151 | 'w', 'x', 'y', 'z', 'h', 't', |
21152 | 'w', 'y', 'a', 'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT, |
21153 | 'e', 'e'|HIBIT, 'e'|HIBIT, 'i', 'o', 'o'|HIBIT, |
21154 | 'o'|HIBIT, 'o'|HIBIT, 'u', 'u'|HIBIT, 'u'|HIBIT, 'y', |
21155 | }; |
21156 | |
21157 | unsigned int key = (((unsigned int)c)<<3) | 0x00000007; |
21158 | int iRes = 0; |
21159 | int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; |
21160 | int iLo = 0; |
21161 | while( iHi>=iLo ){ |
21162 | int iTest = (iHi + iLo) / 2; |
21163 | if( key >= aDia[iTest] ){ |
21164 | iRes = iTest; |
21165 | iLo = iTest+1; |
21166 | }else{ |
21167 | iHi = iTest-1; |
21168 | } |
21169 | } |
21170 | assert( key>=aDia[iRes] ); |
21171 | if( bComplex==0 && (aChar[iRes] & 0x80) ) return c; |
21172 | return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F); |
21173 | } |
21174 | |
21175 | |
21176 | /* |
21177 | ** Return true if the argument interpreted as a unicode codepoint |
21178 | ** is a diacritical modifier character. |
21179 | */ |
21180 | static int sqlite3Fts5UnicodeIsdiacritic(int c){ |
21181 | unsigned int mask0 = 0x08029FDF; |
21182 | unsigned int mask1 = 0x000361F8; |
21183 | if( c<768 || c>817 ) return 0; |
21184 | return (c < 768+32) ? |
21185 | (mask0 & ((unsigned int)1 << (c-768))) : |
21186 | (mask1 & ((unsigned int)1 << (c-768-32))); |
21187 | } |
21188 | |
21189 | |
21190 | /* |
21191 | ** Interpret the argument as a unicode codepoint. If the codepoint |
21192 | ** is an upper case character that has a lower case equivalent, |
21193 | ** return the codepoint corresponding to the lower case version. |
21194 | ** Otherwise, return a copy of the argument. |
21195 | ** |
21196 | ** The results are undefined if the value passed to this function |
21197 | ** is less than zero. |
21198 | */ |
21199 | static int sqlite3Fts5UnicodeFold(int c, int eRemoveDiacritic){ |
21200 | /* Each entry in the following array defines a rule for folding a range |
21201 | ** of codepoints to lower case. The rule applies to a range of nRange |
21202 | ** codepoints starting at codepoint iCode. |
21203 | ** |
21204 | ** If the least significant bit in flags is clear, then the rule applies |
21205 | ** to all nRange codepoints (i.e. all nRange codepoints are upper case and |
21206 | ** need to be folded). Or, if it is set, then the rule only applies to |
21207 | ** every second codepoint in the range, starting with codepoint C. |
21208 | ** |
21209 | ** The 7 most significant bits in flags are an index into the aiOff[] |
21210 | ** array. If a specific codepoint C does require folding, then its lower |
21211 | ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). |
21212 | ** |
21213 | ** The contents of this array are generated by parsing the CaseFolding.txt |
21214 | ** file distributed as part of the "Unicode Character Database". See |
21215 | ** http://www.unicode.org for details. |
21216 | */ |
21217 | static const struct TableEntry { |
21218 | unsigned short iCode; |
21219 | unsigned char flags; |
21220 | unsigned char nRange; |
21221 | } aEntry[] = { |
21222 | {65, 14, 26}, {181, 64, 1}, {192, 14, 23}, |
21223 | {216, 14, 7}, {256, 1, 48}, {306, 1, 6}, |
21224 | {313, 1, 16}, {330, 1, 46}, {376, 116, 1}, |
21225 | {377, 1, 6}, {383, 104, 1}, {385, 50, 1}, |
21226 | {386, 1, 4}, {390, 44, 1}, {391, 0, 1}, |
21227 | {393, 42, 2}, {395, 0, 1}, {398, 32, 1}, |
21228 | {399, 38, 1}, {400, 40, 1}, {401, 0, 1}, |
21229 | {403, 42, 1}, {404, 46, 1}, {406, 52, 1}, |
21230 | {407, 48, 1}, {408, 0, 1}, {412, 52, 1}, |
21231 | {413, 54, 1}, {415, 56, 1}, {416, 1, 6}, |
21232 | {422, 60, 1}, {423, 0, 1}, {425, 60, 1}, |
21233 | {428, 0, 1}, {430, 60, 1}, {431, 0, 1}, |
21234 | {433, 58, 2}, {435, 1, 4}, {439, 62, 1}, |
21235 | {440, 0, 1}, {444, 0, 1}, {452, 2, 1}, |
21236 | {453, 0, 1}, {455, 2, 1}, {456, 0, 1}, |
21237 | {458, 2, 1}, {459, 1, 18}, {478, 1, 18}, |
21238 | {497, 2, 1}, {498, 1, 4}, {502, 122, 1}, |
21239 | {503, 134, 1}, {504, 1, 40}, {544, 110, 1}, |
21240 | {546, 1, 18}, {570, 70, 1}, {571, 0, 1}, |
21241 | {573, 108, 1}, {574, 68, 1}, {577, 0, 1}, |
21242 | {579, 106, 1}, {580, 28, 1}, {581, 30, 1}, |
21243 | {582, 1, 10}, {837, 36, 1}, {880, 1, 4}, |
21244 | {886, 0, 1}, {902, 18, 1}, {904, 16, 3}, |
21245 | {908, 26, 1}, {910, 24, 2}, {913, 14, 17}, |
21246 | {931, 14, 9}, {962, 0, 1}, {975, 4, 1}, |
21247 | {976, 140, 1}, {977, 142, 1}, {981, 146, 1}, |
21248 | {982, 144, 1}, {984, 1, 24}, {1008, 136, 1}, |
21249 | {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1}, |
21250 | {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1}, |
21251 | {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32}, |
21252 | {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1}, |
21253 | {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38}, |
21254 | {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1}, |
21255 | {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1}, |
21256 | {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6}, |
21257 | {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6}, |
21258 | {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8}, |
21259 | {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2}, |
21260 | {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1}, |
21261 | {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2}, |
21262 | {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2}, |
21263 | {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2}, |
21264 | {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1}, |
21265 | {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16}, |
21266 | {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47}, |
21267 | {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1}, |
21268 | {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1}, |
21269 | {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1}, |
21270 | {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2}, |
21271 | {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, |
21272 | {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, |
21273 | {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, |
21274 | {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, |
21275 | {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, |
21276 | {65313, 14, 26}, |
21277 | }; |
21278 | static const unsigned short aiOff[] = { |
21279 | 1, 2, 8, 15, 16, 26, 28, 32, |
21280 | 37, 38, 40, 48, 63, 64, 69, 71, |
21281 | 79, 80, 116, 202, 203, 205, 206, 207, |
21282 | 209, 210, 211, 213, 214, 217, 218, 219, |
21283 | 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, |
21284 | 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, |
21285 | 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, |
21286 | 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, |
21287 | 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, |
21288 | 65514, 65521, 65527, 65528, 65529, |
21289 | }; |
21290 | |
21291 | int ret = c; |
21292 | |
21293 | assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); |
21294 | |
21295 | if( c<128 ){ |
21296 | if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); |
21297 | }else if( c<65536 ){ |
21298 | const struct TableEntry *p; |
21299 | int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; |
21300 | int iLo = 0; |
21301 | int iRes = -1; |
21302 | |
21303 | assert( c>aEntry[0].iCode ); |
21304 | while( iHi>=iLo ){ |
21305 | int iTest = (iHi + iLo) / 2; |
21306 | int cmp = (c - aEntry[iTest].iCode); |
21307 | if( cmp>=0 ){ |
21308 | iRes = iTest; |
21309 | iLo = iTest+1; |
21310 | }else{ |
21311 | iHi = iTest-1; |
21312 | } |
21313 | } |
21314 | |
21315 | assert( iRes>=0 && c>=aEntry[iRes].iCode ); |
21316 | p = &aEntry[iRes]; |
21317 | if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ |
21318 | ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; |
21319 | assert( ret>0 ); |
21320 | } |
21321 | |
21322 | if( eRemoveDiacritic ){ |
21323 | ret = fts5_remove_diacritic(ret, eRemoveDiacritic==2); |
21324 | } |
21325 | } |
21326 | |
21327 | else if( c>=66560 && c<66600 ){ |
21328 | ret = c + 40; |
21329 | } |
21330 | |
21331 | return ret; |
21332 | } |
21333 | |
21334 | |
21335 | static int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){ |
21336 | aArray[0] = 1; |
21337 | switch( zCat[0] ){ |
21338 | case 'C': |
21339 | switch( zCat[1] ){ |
21340 | case 'c': aArray[1] = 1; break; |
21341 | case 'f': aArray[2] = 1; break; |
21342 | case 'n': aArray[3] = 1; break; |
21343 | case 's': aArray[4] = 1; break; |
21344 | case 'o': aArray[31] = 1; break; |
21345 | case '*': |
21346 | aArray[1] = 1; |
21347 | aArray[2] = 1; |
21348 | aArray[3] = 1; |
21349 | aArray[4] = 1; |
21350 | aArray[31] = 1; |
21351 | break; |
21352 | default: return 1; } |
21353 | break; |
21354 | |
21355 | case 'L': |
21356 | switch( zCat[1] ){ |
21357 | case 'l': aArray[5] = 1; break; |
21358 | case 'm': aArray[6] = 1; break; |
21359 | case 'o': aArray[7] = 1; break; |
21360 | case 't': aArray[8] = 1; break; |
21361 | case 'u': aArray[9] = 1; break; |
21362 | case 'C': aArray[30] = 1; break; |
21363 | case '*': |
21364 | aArray[5] = 1; |
21365 | aArray[6] = 1; |
21366 | aArray[7] = 1; |
21367 | aArray[8] = 1; |
21368 | aArray[9] = 1; |
21369 | aArray[30] = 1; |
21370 | break; |
21371 | default: return 1; } |
21372 | break; |
21373 | |
21374 | case 'M': |
21375 | switch( zCat[1] ){ |
21376 | case 'c': aArray[10] = 1; break; |
21377 | case 'e': aArray[11] = 1; break; |
21378 | case 'n': aArray[12] = 1; break; |
21379 | case '*': |
21380 | aArray[10] = 1; |
21381 | aArray[11] = 1; |
21382 | aArray[12] = 1; |
21383 | break; |
21384 | default: return 1; } |
21385 | break; |
21386 | |
21387 | case 'N': |
21388 | switch( zCat[1] ){ |
21389 | case 'd': aArray[13] = 1; break; |
21390 | case 'l': aArray[14] = 1; break; |
21391 | case 'o': aArray[15] = 1; break; |
21392 | case '*': |
21393 | aArray[13] = 1; |
21394 | aArray[14] = 1; |
21395 | aArray[15] = 1; |
21396 | break; |
21397 | default: return 1; } |
21398 | break; |
21399 | |
21400 | case 'P': |
21401 | switch( zCat[1] ){ |
21402 | case 'c': aArray[16] = 1; break; |
21403 | case 'd': aArray[17] = 1; break; |
21404 | case 'e': aArray[18] = 1; break; |
21405 | case 'f': aArray[19] = 1; break; |
21406 | case 'i': aArray[20] = 1; break; |
21407 | case 'o': aArray[21] = 1; break; |
21408 | case 's': aArray[22] = 1; break; |
21409 | case '*': |
21410 | aArray[16] = 1; |
21411 | aArray[17] = 1; |
21412 | aArray[18] = 1; |
21413 | aArray[19] = 1; |
21414 | aArray[20] = 1; |
21415 | aArray[21] = 1; |
21416 | aArray[22] = 1; |
21417 | break; |
21418 | default: return 1; } |
21419 | break; |
21420 | |
21421 | case 'S': |
21422 | switch( zCat[1] ){ |
21423 | case 'c': aArray[23] = 1; break; |
21424 | case 'k': aArray[24] = 1; break; |
21425 | case 'm': aArray[25] = 1; break; |
21426 | case 'o': aArray[26] = 1; break; |
21427 | case '*': |
21428 | aArray[23] = 1; |
21429 | aArray[24] = 1; |
21430 | aArray[25] = 1; |
21431 | aArray[26] = 1; |
21432 | break; |
21433 | default: return 1; } |
21434 | break; |
21435 | |
21436 | case 'Z': |
21437 | switch( zCat[1] ){ |
21438 | case 'l': aArray[27] = 1; break; |
21439 | case 'p': aArray[28] = 1; break; |
21440 | case 's': aArray[29] = 1; break; |
21441 | case '*': |
21442 | aArray[27] = 1; |
21443 | aArray[28] = 1; |
21444 | aArray[29] = 1; |
21445 | break; |
21446 | default: return 1; } |
21447 | break; |
21448 | |
21449 | } |
21450 | return 0; |
21451 | } |
21452 | |
21453 | static u16 aFts5UnicodeBlock[] = { |
21454 | 0, 1471, 1753, 1760, 1760, 1760, 1760, 1760, 1760, 1760, |
21455 | 1760, 1760, 1760, 1760, 1760, 1763, 1765, |
21456 | }; |
21457 | static u16 aFts5UnicodeMap[] = { |
21458 | 0, 32, 33, 36, 37, 40, 41, 42, 43, 44, |
21459 | 45, 46, 48, 58, 60, 63, 65, 91, 92, 93, |
21460 | 94, 95, 96, 97, 123, 124, 125, 126, 127, 160, |
21461 | 161, 162, 166, 167, 168, 169, 170, 171, 172, 173, |
21462 | 174, 175, 176, 177, 178, 180, 181, 182, 184, 185, |
21463 | 186, 187, 188, 191, 192, 215, 216, 223, 247, 248, |
21464 | 256, 312, 313, 329, 330, 377, 383, 385, 387, 388, |
21465 | 391, 394, 396, 398, 402, 403, 405, 406, 409, 412, |
21466 | 414, 415, 417, 418, 423, 427, 428, 431, 434, 436, |
21467 | 437, 440, 442, 443, 444, 446, 448, 452, 453, 454, |
21468 | 455, 456, 457, 458, 459, 460, 461, 477, 478, 496, |
21469 | 497, 498, 499, 500, 503, 505, 506, 564, 570, 572, |
21470 | 573, 575, 577, 580, 583, 584, 592, 660, 661, 688, |
21471 | 706, 710, 722, 736, 741, 748, 749, 750, 751, 768, |
21472 | 880, 884, 885, 886, 890, 891, 894, 900, 902, 903, |
21473 | 904, 908, 910, 912, 913, 931, 940, 975, 977, 978, |
21474 | 981, 984, 1008, 1012, 1014, 1015, 1018, 1020, 1021, 1072, |
21475 | 1120, 1154, 1155, 1160, 1162, 1217, 1231, 1232, 1329, 1369, |
21476 | 1370, 1377, 1417, 1418, 1423, 1425, 1470, 1471, 1472, 1473, |
21477 | 1475, 1476, 1478, 1479, 1488, 1520, 1523, 1536, 1542, 1545, |
21478 | 1547, 1548, 1550, 1552, 1563, 1566, 1568, 1600, 1601, 1611, |
21479 | 1632, 1642, 1646, 1648, 1649, 1748, 1749, 1750, 1757, 1758, |
21480 | 1759, 1765, 1767, 1769, 1770, 1774, 1776, 1786, 1789, 1791, |
21481 | 1792, 1807, 1808, 1809, 1810, 1840, 1869, 1958, 1969, 1984, |
21482 | 1994, 2027, 2036, 2038, 2039, 2042, 2048, 2070, 2074, 2075, |
21483 | 2084, 2085, 2088, 2089, 2096, 2112, 2137, 2142, 2208, 2210, |
21484 | 2276, 2304, 2307, 2308, 2362, 2363, 2364, 2365, 2366, 2369, |
21485 | 2377, 2381, 2382, 2384, 2385, 2392, 2402, 2404, 2406, 2416, |
21486 | 2417, 2418, 2425, 2433, 2434, 2437, 2447, 2451, 2474, 2482, |
21487 | 2486, 2492, 2493, 2494, 2497, 2503, 2507, 2509, 2510, 2519, |
21488 | 2524, 2527, 2530, 2534, 2544, 2546, 2548, 2554, 2555, 2561, |
21489 | 2563, 2565, 2575, 2579, 2602, 2610, 2613, 2616, 2620, 2622, |
21490 | 2625, 2631, 2635, 2641, 2649, 2654, 2662, 2672, 2674, 2677, |
21491 | 2689, 2691, 2693, 2703, 2707, 2730, 2738, 2741, 2748, 2749, |
21492 | 2750, 2753, 2759, 2761, 2763, 2765, 2768, 2784, 2786, 2790, |
21493 | 2800, 2801, 2817, 2818, 2821, 2831, 2835, 2858, 2866, 2869, |
21494 | 2876, 2877, 2878, 2879, 2880, 2881, 2887, 2891, 2893, 2902, |
21495 | 2903, 2908, 2911, 2914, 2918, 2928, 2929, 2930, 2946, 2947, |
21496 | 2949, 2958, 2962, 2969, 2972, 2974, 2979, 2984, 2990, 3006, |
21497 | 3008, 3009, 3014, 3018, 3021, 3024, 3031, 3046, 3056, 3059, |
21498 | 3065, 3066, 3073, 3077, 3086, 3090, 3114, 3125, 3133, 3134, |
21499 | 3137, 3142, 3146, 3157, 3160, 3168, 3170, 3174, 3192, 3199, |
21500 | 3202, 3205, 3214, 3218, 3242, 3253, 3260, 3261, 3262, 3263, |
21501 | 3264, 3270, 3271, 3274, 3276, 3285, 3294, 3296, 3298, 3302, |
21502 | 3313, 3330, 3333, 3342, 3346, 3389, 3390, 3393, 3398, 3402, |
21503 | 3405, 3406, 3415, 3424, 3426, 3430, 3440, 3449, 3450, 3458, |
21504 | 3461, 3482, 3507, 3517, 3520, 3530, 3535, 3538, 3542, 3544, |
21505 | 3570, 3572, 3585, 3633, 3634, 3636, 3647, 3648, 3654, 3655, |
21506 | 3663, 3664, 3674, 3713, 3716, 3719, 3722, 3725, 3732, 3737, |
21507 | 3745, 3749, 3751, 3754, 3757, 3761, 3762, 3764, 3771, 3773, |
21508 | 3776, 3782, 3784, 3792, 3804, 3840, 3841, 3844, 3859, 3860, |
21509 | 3861, 3864, 3866, 3872, 3882, 3892, 3893, 3894, 3895, 3896, |
21510 | 3897, 3898, 3899, 3900, 3901, 3902, 3904, 3913, 3953, 3967, |
21511 | 3968, 3973, 3974, 3976, 3981, 3993, 4030, 4038, 4039, 4046, |
21512 | 4048, 4053, 4057, 4096, 4139, 4141, 4145, 4146, 4152, 4153, |
21513 | 4155, 4157, 4159, 4160, 4170, 4176, 4182, 4184, 4186, 4190, |
21514 | 4193, 4194, 4197, 4199, 4206, 4209, 4213, 4226, 4227, 4229, |
21515 | 4231, 4237, 4238, 4239, 4240, 4250, 4253, 4254, 4256, 4295, |
21516 | 4301, 4304, 4347, 4348, 4349, 4682, 4688, 4696, 4698, 4704, |
21517 | 4746, 4752, 4786, 4792, 4800, 4802, 4808, 4824, 4882, 4888, |
21518 | 4957, 4960, 4969, 4992, 5008, 5024, 5120, 5121, 5741, 5743, |
21519 | 5760, 5761, 5787, 5788, 5792, 5867, 5870, 5888, 5902, 5906, |
21520 | 5920, 5938, 5941, 5952, 5970, 5984, 5998, 6002, 6016, 6068, |
21521 | 6070, 6071, 6078, 6086, 6087, 6089, 6100, 6103, 6104, 6107, |
21522 | 6108, 6109, 6112, 6128, 6144, 6150, 6151, 6155, 6158, 6160, |
21523 | 6176, 6211, 6212, 6272, 6313, 6314, 6320, 6400, 6432, 6435, |
21524 | 6439, 6441, 6448, 6450, 6451, 6457, 6464, 6468, 6470, 6480, |
21525 | 6512, 6528, 6576, 6593, 6600, 6608, 6618, 6622, 6656, 6679, |
21526 | 6681, 6686, 6688, 6741, 6742, 6743, 6744, 6752, 6753, 6754, |
21527 | 6755, 6757, 6765, 6771, 6783, 6784, 6800, 6816, 6823, 6824, |
21528 | 6912, 6916, 6917, 6964, 6965, 6966, 6971, 6972, 6973, 6978, |
21529 | 6979, 6981, 6992, 7002, 7009, 7019, 7028, 7040, 7042, 7043, |
21530 | 7073, 7074, 7078, 7080, 7082, 7083, 7084, 7086, 7088, 7098, |
21531 | 7142, 7143, 7144, 7146, 7149, 7150, 7151, 7154, 7164, 7168, |
21532 | 7204, 7212, 7220, 7222, 7227, 7232, 7245, 7248, 7258, 7288, |
21533 | 7294, 7360, 7376, 7379, 7380, 7393, 7394, 7401, 7405, 7406, |
21534 | 7410, 7412, 7413, 7424, 7468, 7531, 7544, 7545, 7579, 7616, |
21535 | 7676, 7680, 7830, 7838, 7936, 7944, 7952, 7960, 7968, 7976, |
21536 | 7984, 7992, 8000, 8008, 8016, 8025, 8027, 8029, 8031, 8033, |
21537 | 8040, 8048, 8064, 8072, 8080, 8088, 8096, 8104, 8112, 8118, |
21538 | 8120, 8124, 8125, 8126, 8127, 8130, 8134, 8136, 8140, 8141, |
21539 | 8144, 8150, 8152, 8157, 8160, 8168, 8173, 8178, 8182, 8184, |
21540 | 8188, 8189, 8192, 8203, 8208, 8214, 8216, 8217, 8218, 8219, |
21541 | 8221, 8222, 8223, 8224, 8232, 8233, 8234, 8239, 8240, 8249, |
21542 | 8250, 8251, 8255, 8257, 8260, 8261, 8262, 8263, 8274, 8275, |
21543 | 8276, 8277, 8287, 8288, 8298, 8304, 8305, 8308, 8314, 8317, |
21544 | 8318, 8319, 8320, 8330, 8333, 8334, 8336, 8352, 8400, 8413, |
21545 | 8417, 8418, 8421, 8448, 8450, 8451, 8455, 8456, 8458, 8459, |
21546 | 8462, 8464, 8467, 8468, 8469, 8470, 8472, 8473, 8478, 8484, |
21547 | 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8495, 8496, 8500, |
21548 | 8501, 8505, 8506, 8508, 8510, 8512, 8517, 8519, 8522, 8523, |
21549 | 8524, 8526, 8527, 8528, 8544, 8579, 8581, 8585, 8592, 8597, |
21550 | 8602, 8604, 8608, 8609, 8611, 8612, 8614, 8615, 8622, 8623, |
21551 | 8654, 8656, 8658, 8659, 8660, 8661, 8692, 8960, 8968, 8972, |
21552 | 8992, 8994, 9001, 9002, 9003, 9084, 9085, 9115, 9140, 9180, |
21553 | 9186, 9216, 9280, 9312, 9372, 9450, 9472, 9655, 9656, 9665, |
21554 | 9666, 9720, 9728, 9839, 9840, 9985, 10088, 10089, 10090, 10091, |
21555 | 10092, 10093, 10094, 10095, 10096, 10097, 10098, 10099, 10100, 10101, |
21556 | 10102, 10132, 10176, 10181, 10182, 10183, 10214, 10215, 10216, 10217, |
21557 | 10218, 10219, 10220, 10221, 10222, 10223, 10224, 10240, 10496, 10627, |
21558 | 10628, 10629, 10630, 10631, 10632, 10633, 10634, 10635, 10636, 10637, |
21559 | 10638, 10639, 10640, 10641, 10642, 10643, 10644, 10645, 10646, 10647, |
21560 | 10648, 10649, 10712, 10713, 10714, 10715, 10716, 10748, 10749, 10750, |
21561 | 11008, 11056, 11077, 11079, 11088, 11264, 11312, 11360, 11363, 11365, |
21562 | 11367, 11374, 11377, 11378, 11380, 11381, 11383, 11388, 11390, 11393, |
21563 | 11394, 11492, 11493, 11499, 11503, 11506, 11513, 11517, 11518, 11520, |
21564 | 11559, 11565, 11568, 11631, 11632, 11647, 11648, 11680, 11688, 11696, |
21565 | 11704, 11712, 11720, 11728, 11736, 11744, 11776, 11778, 11779, 11780, |
21566 | 11781, 11782, 11785, 11786, 11787, 11788, 11789, 11790, 11799, 11800, |
21567 | 11802, 11803, 11804, 11805, 11806, 11808, 11809, 11810, 11811, 11812, |
21568 | 11813, 11814, 11815, 11816, 11817, 11818, 11823, 11824, 11834, 11904, |
21569 | 11931, 12032, 12272, 12288, 12289, 12292, 12293, 12294, 12295, 12296, |
21570 | 12297, 12298, 12299, 12300, 12301, 12302, 12303, 12304, 12305, 12306, |
21571 | 12308, 12309, 12310, 12311, 12312, 12313, 12314, 12315, 12316, 12317, |
21572 | 12318, 12320, 12321, 12330, 12334, 12336, 12337, 12342, 12344, 12347, |
21573 | 12348, 12349, 12350, 12353, 12441, 12443, 12445, 12447, 12448, 12449, |
21574 | 12539, 12540, 12543, 12549, 12593, 12688, 12690, 12694, 12704, 12736, |
21575 | 12784, 12800, 12832, 12842, 12872, 12880, 12881, 12896, 12928, 12938, |
21576 | 12977, 12992, 13056, 13312, 19893, 19904, 19968, 40908, 40960, 40981, |
21577 | 40982, 42128, 42192, 42232, 42238, 42240, 42508, 42509, 42512, 42528, |
21578 | 42538, 42560, 42606, 42607, 42608, 42611, 42612, 42622, 42623, 42624, |
21579 | 42655, 42656, 42726, 42736, 42738, 42752, 42775, 42784, 42786, 42800, |
21580 | 42802, 42864, 42865, 42873, 42878, 42888, 42889, 42891, 42896, 42912, |
21581 | 43000, 43002, 43003, 43010, 43011, 43014, 43015, 43019, 43020, 43043, |
21582 | 43045, 43047, 43048, 43056, 43062, 43064, 43065, 43072, 43124, 43136, |
21583 | 43138, 43188, 43204, 43214, 43216, 43232, 43250, 43256, 43259, 43264, |
21584 | 43274, 43302, 43310, 43312, 43335, 43346, 43359, 43360, 43392, 43395, |
21585 | 43396, 43443, 43444, 43446, 43450, 43452, 43453, 43457, 43471, 43472, |
21586 | 43486, 43520, 43561, 43567, 43569, 43571, 43573, 43584, 43587, 43588, |
21587 | 43596, 43597, 43600, 43612, 43616, 43632, 43633, 43639, 43642, 43643, |
21588 | 43648, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713, |
21589 | 43714, 43739, 43741, 43742, 43744, 43755, 43756, 43758, 43760, 43762, |
21590 | 43763, 43765, 43766, 43777, 43785, 43793, 43808, 43816, 43968, 44003, |
21591 | 44005, 44006, 44008, 44009, 44011, 44012, 44013, 44016, 44032, 55203, |
21592 | 55216, 55243, 55296, 56191, 56319, 57343, 57344, 63743, 63744, 64112, |
21593 | 64256, 64275, 64285, 64286, 64287, 64297, 64298, 64312, 64318, 64320, |
21594 | 64323, 64326, 64434, 64467, 64830, 64831, 64848, 64914, 65008, 65020, |
21595 | 65021, 65024, 65040, 65047, 65048, 65049, 65056, 65072, 65073, 65075, |
21596 | 65077, 65078, 65079, 65080, 65081, 65082, 65083, 65084, 65085, 65086, |
21597 | 65087, 65088, 65089, 65090, 65091, 65092, 65093, 65095, 65096, 65097, |
21598 | 65101, 65104, 65108, 65112, 65113, 65114, 65115, 65116, 65117, 65118, |
21599 | 65119, 65122, 65123, 65124, 65128, 65129, 65130, 65136, 65142, 65279, |
21600 | 65281, 65284, 65285, 65288, 65289, 65290, 65291, 65292, 65293, 65294, |
21601 | 65296, 65306, 65308, 65311, 65313, 65339, 65340, 65341, 65342, 65343, |
21602 | 65344, 65345, 65371, 65372, 65373, 65374, 65375, 65376, 65377, 65378, |
21603 | 65379, 65380, 65382, 65392, 65393, 65438, 65440, 65474, 65482, 65490, |
21604 | 65498, 65504, 65506, 65507, 65508, 65509, 65512, 65513, 65517, 65529, |
21605 | 65532, 0, 13, 40, 60, 63, 80, 128, 256, 263, |
21606 | 311, 320, 373, 377, 394, 400, 464, 509, 640, 672, |
21607 | 768, 800, 816, 833, 834, 842, 896, 927, 928, 968, |
21608 | 976, 977, 1024, 1064, 1104, 1184, 2048, 2056, 2058, 2103, |
21609 | 2108, 2111, 2135, 2136, 2304, 2326, 2335, 2336, 2367, 2432, |
21610 | 2494, 2560, 2561, 2565, 2572, 2576, 2581, 2585, 2616, 2623, |
21611 | 2624, 2640, 2656, 2685, 2687, 2816, 2873, 2880, 2904, 2912, |
21612 | 2936, 3072, 3680, 4096, 4097, 4098, 4099, 4152, 4167, 4178, |
21613 | 4198, 4224, 4226, 4227, 4272, 4275, 4279, 4281, 4283, 4285, |
21614 | 4286, 4304, 4336, 4352, 4355, 4391, 4396, 4397, 4406, 4416, |
21615 | 4480, 4482, 4483, 4531, 4534, 4543, 4545, 4549, 4560, 5760, |
21616 | 5803, 5804, 5805, 5806, 5808, 5814, 5815, 5824, 8192, 9216, |
21617 | 9328, 12288, 26624, 28416, 28496, 28497, 28559, 28563, 45056, 53248, |
21618 | 53504, 53545, 53605, 53607, 53610, 53613, 53619, 53627, 53635, 53637, |
21619 | 53644, 53674, 53678, 53760, 53826, 53829, 54016, 54112, 54272, 54298, |
21620 | 54324, 54350, 54358, 54376, 54402, 54428, 54430, 54434, 54437, 54441, |
21621 | 54446, 54454, 54459, 54461, 54469, 54480, 54506, 54532, 54535, 54541, |
21622 | 54550, 54558, 54584, 54587, 54592, 54598, 54602, 54610, 54636, 54662, |
21623 | 54688, 54714, 54740, 54766, 54792, 54818, 54844, 54870, 54896, 54922, |
21624 | 54952, 54977, 54978, 55003, 55004, 55010, 55035, 55036, 55061, 55062, |
21625 | 55068, 55093, 55094, 55119, 55120, 55126, 55151, 55152, 55177, 55178, |
21626 | 55184, 55209, 55210, 55235, 55236, 55242, 55246, 60928, 60933, 60961, |
21627 | 60964, 60967, 60969, 60980, 60985, 60987, 60994, 60999, 61001, 61003, |
21628 | 61005, 61009, 61012, 61015, 61017, 61019, 61021, 61023, 61025, 61028, |
21629 | 61031, 61036, 61044, 61049, 61054, 61056, 61067, 61089, 61093, 61099, |
21630 | 61168, 61440, 61488, 61600, 61617, 61633, 61649, 61696, 61712, 61744, |
21631 | 61808, 61926, 61968, 62016, 62032, 62208, 62256, 62263, 62336, 62368, |
21632 | 62406, 62432, 62464, 62528, 62530, 62713, 62720, 62784, 62800, 62971, |
21633 | 63045, 63104, 63232, 0, 42710, 42752, 46900, 46912, 47133, 63488, |
21634 | 1, 32, 256, 0, 65533, |
21635 | }; |
21636 | static u16 aFts5UnicodeData[] = { |
21637 | 1025, 61, 117, 55, 117, 54, 50, 53, 57, 53, |
21638 | 49, 85, 333, 85, 121, 85, 841, 54, 53, 50, |
21639 | 56, 48, 56, 837, 54, 57, 50, 57, 1057, 61, |
21640 | 53, 151, 58, 53, 56, 58, 39, 52, 57, 34, |
21641 | 58, 56, 58, 57, 79, 56, 37, 85, 56, 47, |
21642 | 39, 51, 111, 53, 745, 57, 233, 773, 57, 261, |
21643 | 1822, 37, 542, 37, 1534, 222, 69, 73, 37, 126, |
21644 | 126, 73, 69, 137, 37, 73, 37, 105, 101, 73, |
21645 | 37, 73, 37, 190, 158, 37, 126, 126, 73, 37, |
21646 | 126, 94, 37, 39, 94, 69, 135, 41, 40, 37, |
21647 | 41, 40, 37, 41, 40, 37, 542, 37, 606, 37, |
21648 | 41, 40, 37, 126, 73, 37, 1886, 197, 73, 37, |
21649 | 73, 69, 126, 105, 37, 286, 2181, 39, 869, 582, |
21650 | 152, 390, 472, 166, 248, 38, 56, 38, 568, 3596, |
21651 | 158, 38, 56, 94, 38, 101, 53, 88, 41, 53, |
21652 | 105, 41, 73, 37, 553, 297, 1125, 94, 37, 105, |
21653 | 101, 798, 133, 94, 57, 126, 94, 37, 1641, 1541, |
21654 | 1118, 58, 172, 75, 1790, 478, 37, 2846, 1225, 38, |
21655 | 213, 1253, 53, 49, 55, 1452, 49, 44, 53, 76, |
21656 | 53, 76, 53, 44, 871, 103, 85, 162, 121, 85, |
21657 | 55, 85, 90, 364, 53, 85, 1031, 38, 327, 684, |
21658 | 333, 149, 71, 44, 3175, 53, 39, 236, 34, 58, |
21659 | 204, 70, 76, 58, 140, 71, 333, 103, 90, 39, |
21660 | 469, 34, 39, 44, 967, 876, 2855, 364, 39, 333, |
21661 | 1063, 300, 70, 58, 117, 38, 711, 140, 38, 300, |
21662 | 38, 108, 38, 172, 501, 807, 108, 53, 39, 359, |
21663 | 876, 108, 42, 1735, 44, 42, 44, 39, 106, 268, |
21664 | 138, 44, 74, 39, 236, 327, 76, 85, 333, 53, |
21665 | 38, 199, 231, 44, 74, 263, 71, 711, 231, 39, |
21666 | 135, 44, 39, 106, 140, 74, 74, 44, 39, 42, |
21667 | 71, 103, 76, 333, 71, 87, 207, 58, 55, 76, |
21668 | 42, 199, 71, 711, 231, 71, 71, 71, 44, 106, |
21669 | 76, 76, 108, 44, 135, 39, 333, 76, 103, 44, |
21670 | 76, 42, 295, 103, 711, 231, 71, 167, 44, 39, |
21671 | 106, 172, 76, 42, 74, 44, 39, 71, 76, 333, |
21672 | 53, 55, 44, 74, 263, 71, 711, 231, 71, 167, |
21673 | 44, 39, 42, 44, 42, 140, 74, 74, 44, 44, |
21674 | 42, 71, 103, 76, 333, 58, 39, 207, 44, 39, |
21675 | 199, 103, 135, 71, 39, 71, 71, 103, 391, 74, |
21676 | 44, 74, 106, 106, 44, 39, 42, 333, 111, 218, |
21677 | 55, 58, 106, 263, 103, 743, 327, 167, 39, 108, |
21678 | 138, 108, 140, 76, 71, 71, 76, 333, 239, 58, |
21679 | 74, 263, 103, 743, 327, 167, 44, 39, 42, 44, |
21680 | 170, 44, 74, 74, 76, 74, 39, 71, 76, 333, |
21681 | 71, 74, 263, 103, 1319, 39, 106, 140, 106, 106, |
21682 | 44, 39, 42, 71, 76, 333, 207, 58, 199, 74, |
21683 | 583, 775, 295, 39, 231, 44, 106, 108, 44, 266, |
21684 | 74, 53, 1543, 44, 71, 236, 55, 199, 38, 268, |
21685 | 53, 333, 85, 71, 39, 71, 39, 39, 135, 231, |
21686 | 103, 39, 39, 71, 135, 44, 71, 204, 76, 39, |
21687 | 167, 38, 204, 333, 135, 39, 122, 501, 58, 53, |
21688 | 122, 76, 218, 333, 335, 58, 44, 58, 44, 58, |
21689 | 44, 54, 50, 54, 50, 74, 263, 1159, 460, 42, |
21690 | 172, 53, 76, 167, 364, 1164, 282, 44, 218, 90, |
21691 | 181, 154, 85, 1383, 74, 140, 42, 204, 42, 76, |
21692 | 74, 76, 39, 333, 213, 199, 74, 76, 135, 108, |
21693 | 39, 106, 71, 234, 103, 140, 423, 44, 74, 76, |
21694 | 202, 44, 39, 42, 333, 106, 44, 90, 1225, 41, |
21695 | 41, 1383, 53, 38, 10631, 135, 231, 39, 135, 1319, |
21696 | 135, 1063, 135, 231, 39, 135, 487, 1831, 135, 2151, |
21697 | 108, 309, 655, 519, 346, 2727, 49, 19847, 85, 551, |
21698 | 61, 839, 54, 50, 2407, 117, 110, 423, 135, 108, |
21699 | 583, 108, 85, 583, 76, 423, 103, 76, 1671, 76, |
21700 | 42, 236, 266, 44, 74, 364, 117, 38, 117, 55, |
21701 | 39, 44, 333, 335, 213, 49, 149, 108, 61, 333, |
21702 | 1127, 38, 1671, 1319, 44, 39, 2247, 935, 108, 138, |
21703 | 76, 106, 74, 44, 202, 108, 58, 85, 333, 967, |
21704 | 167, 1415, 554, 231, 74, 333, 47, 1114, 743, 76, |
21705 | 106, 85, 1703, 42, 44, 42, 236, 44, 42, 44, |
21706 | 74, 268, 202, 332, 44, 333, 333, 245, 38, 213, |
21707 | 140, 42, 1511, 44, 42, 172, 42, 44, 170, 44, |
21708 | 74, 231, 333, 245, 346, 300, 314, 76, 42, 967, |
21709 | 42, 140, 74, 76, 42, 44, 74, 71, 333, 1415, |
21710 | 44, 42, 76, 106, 44, 42, 108, 74, 149, 1159, |
21711 | 266, 268, 74, 76, 181, 333, 103, 333, 967, 198, |
21712 | 85, 277, 108, 53, 428, 42, 236, 135, 44, 135, |
21713 | 74, 44, 71, 1413, 2022, 421, 38, 1093, 1190, 1260, |
21714 | 140, 4830, 261, 3166, 261, 265, 197, 201, 261, 265, |
21715 | 261, 265, 197, 201, 261, 41, 41, 41, 94, 229, |
21716 | 265, 453, 261, 264, 261, 264, 261, 264, 165, 69, |
21717 | 137, 40, 56, 37, 120, 101, 69, 137, 40, 120, |
21718 | 133, 69, 137, 120, 261, 169, 120, 101, 69, 137, |
21719 | 40, 88, 381, 162, 209, 85, 52, 51, 54, 84, |
21720 | 51, 54, 52, 277, 59, 60, 162, 61, 309, 52, |
21721 | 51, 149, 80, 117, 57, 54, 50, 373, 57, 53, |
21722 | 48, 341, 61, 162, 194, 47, 38, 207, 121, 54, |
21723 | 50, 38, 335, 121, 54, 50, 422, 855, 428, 139, |
21724 | 44, 107, 396, 90, 41, 154, 41, 90, 37, 105, |
21725 | 69, 105, 37, 58, 41, 90, 57, 169, 218, 41, |
21726 | 58, 41, 58, 41, 58, 137, 58, 37, 137, 37, |
21727 | 135, 37, 90, 69, 73, 185, 94, 101, 58, 57, |
21728 | 90, 37, 58, 527, 1134, 94, 142, 47, 185, 186, |
21729 | 89, 154, 57, 90, 57, 90, 57, 250, 57, 1018, |
21730 | 89, 90, 57, 58, 57, 1018, 8601, 282, 153, 666, |
21731 | 89, 250, 54, 50, 2618, 57, 986, 825, 1306, 217, |
21732 | 602, 1274, 378, 1935, 2522, 719, 5882, 57, 314, 57, |
21733 | 1754, 281, 3578, 57, 4634, 3322, 54, 50, 54, 50, |
21734 | 54, 50, 54, 50, 54, 50, 54, 50, 54, 50, |
21735 | 975, 1434, 185, 54, 50, 1017, 54, 50, 54, 50, |
21736 | 54, 50, 54, 50, 54, 50, 537, 8218, 4217, 54, |
21737 | 50, 54, 50, 54, 50, 54, 50, 54, 50, 54, |
21738 | 50, 54, 50, 54, 50, 54, 50, 54, 50, 54, |
21739 | 50, 2041, 54, 50, 54, 50, 1049, 54, 50, 8281, |
21740 | 1562, 697, 90, 217, 346, 1513, 1509, 126, 73, 69, |
21741 | 254, 105, 37, 94, 37, 94, 165, 70, 105, 37, |
21742 | 3166, 37, 218, 158, 108, 94, 149, 47, 85, 1221, |
21743 | 37, 37, 1799, 38, 53, 44, 743, 231, 231, 231, |
21744 | 231, 231, 231, 231, 231, 1036, 85, 52, 51, 52, |
21745 | 51, 117, 52, 51, 53, 52, 51, 309, 49, 85, |
21746 | 49, 53, 52, 51, 85, 52, 51, 54, 50, 54, |
21747 | 50, 54, 50, 54, 50, 181, 38, 341, 81, 858, |
21748 | 2874, 6874, 410, 61, 117, 58, 38, 39, 46, 54, |
21749 | 50, 54, 50, 54, 50, 54, 50, 54, 50, 90, |
21750 | 54, 50, 54, 50, 54, 50, 54, 50, 49, 54, |
21751 | 82, 58, 302, 140, 74, 49, 166, 90, 110, 38, |
21752 | 39, 53, 90, 2759, 76, 88, 70, 39, 49, 2887, |
21753 | 53, 102, 39, 1319, 3015, 90, 143, 346, 871, 1178, |
21754 | 519, 1018, 335, 986, 271, 58, 495, 1050, 335, 1274, |
21755 | 495, 2042, 8218, 39, 39, 2074, 39, 39, 679, 38, |
21756 | 36583, 1786, 1287, 198, 85, 8583, 38, 117, 519, 333, |
21757 | 71, 1502, 39, 44, 107, 53, 332, 53, 38, 798, |
21758 | 44, 2247, 334, 76, 213, 760, 294, 88, 478, 69, |
21759 | 2014, 38, 261, 190, 350, 38, 88, 158, 158, 382, |
21760 | 70, 37, 231, 44, 103, 44, 135, 44, 743, 74, |
21761 | 76, 42, 154, 207, 90, 55, 58, 1671, 149, 74, |
21762 | 1607, 522, 44, 85, 333, 588, 199, 117, 39, 333, |
21763 | 903, 268, 85, 743, 364, 74, 53, 935, 108, 42, |
21764 | 1511, 44, 74, 140, 74, 44, 138, 437, 38, 333, |
21765 | 85, 1319, 204, 74, 76, 74, 76, 103, 44, 263, |
21766 | 44, 42, 333, 149, 519, 38, 199, 122, 39, 42, |
21767 | 1543, 44, 39, 108, 71, 76, 167, 76, 39, 44, |
21768 | 39, 71, 38, 85, 359, 42, 76, 74, 85, 39, |
21769 | 70, 42, 44, 199, 199, 199, 231, 231, 1127, 74, |
21770 | 44, 74, 44, 74, 53, 42, 44, 333, 39, 39, |
21771 | 743, 1575, 36, 68, 68, 36, 63, 63, 11719, 3399, |
21772 | 229, 165, 39, 44, 327, 57, 423, 167, 39, 71, |
21773 | 71, 3463, 536, 11623, 54, 50, 2055, 1735, 391, 55, |
21774 | 58, 524, 245, 54, 50, 53, 236, 53, 81, 80, |
21775 | 54, 50, 54, 50, 54, 50, 54, 50, 54, 50, |
21776 | 54, 50, 54, 50, 54, 50, 85, 54, 50, 149, |
21777 | 112, 117, 149, 49, 54, 50, 54, 50, 54, 50, |
21778 | 117, 57, 49, 121, 53, 55, 85, 167, 4327, 34, |
21779 | 117, 55, 117, 54, 50, 53, 57, 53, 49, 85, |
21780 | 333, 85, 121, 85, 841, 54, 53, 50, 56, 48, |
21781 | 56, 837, 54, 57, 50, 57, 54, 50, 53, 54, |
21782 | 50, 85, 327, 38, 1447, 70, 999, 199, 199, 199, |
21783 | 103, 87, 57, 56, 58, 87, 58, 153, 90, 98, |
21784 | 90, 391, 839, 615, 71, 487, 455, 3943, 117, 1455, |
21785 | 314, 1710, 143, 570, 47, 410, 1466, 44, 935, 1575, |
21786 | 999, 143, 551, 46, 263, 46, 967, 53, 1159, 263, |
21787 | 53, 174, 1289, 1285, 2503, 333, 199, 39, 1415, 71, |
21788 | 39, 743, 53, 271, 711, 207, 53, 839, 53, 1799, |
21789 | 71, 39, 108, 76, 140, 135, 103, 871, 108, 44, |
21790 | 271, 309, 935, 79, 53, 1735, 245, 711, 271, 615, |
21791 | 271, 2343, 1007, 42, 44, 42, 1703, 492, 245, 655, |
21792 | 333, 76, 42, 1447, 106, 140, 74, 76, 85, 34, |
21793 | 149, 807, 333, 108, 1159, 172, 42, 268, 333, 149, |
21794 | 76, 42, 1543, 106, 300, 74, 135, 149, 333, 1383, |
21795 | 44, 42, 44, 74, 204, 42, 44, 333, 28135, 3182, |
21796 | 149, 34279, 18215, 2215, 39, 1482, 140, 422, 71, 7898, |
21797 | 1274, 1946, 74, 108, 122, 202, 258, 268, 90, 236, |
21798 | 986, 140, 1562, 2138, 108, 58, 2810, 591, 841, 837, |
21799 | 841, 229, 581, 841, 837, 41, 73, 41, 73, 137, |
21800 | 265, 133, 37, 229, 357, 841, 837, 73, 137, 265, |
21801 | 233, 837, 73, 137, 169, 41, 233, 837, 841, 837, |
21802 | 841, 837, 841, 837, 841, 837, 841, 837, 841, 901, |
21803 | 809, 57, 805, 57, 197, 809, 57, 805, 57, 197, |
21804 | 809, 57, 805, 57, 197, 809, 57, 805, 57, 197, |
21805 | 809, 57, 805, 57, 197, 94, 1613, 135, 871, 71, |
21806 | 39, 39, 327, 135, 39, 39, 39, 39, 39, 39, |
21807 | 103, 71, 39, 39, 39, 39, 39, 39, 71, 39, |
21808 | 135, 231, 135, 135, 39, 327, 551, 103, 167, 551, |
21809 | 89, 1434, 3226, 506, 474, 506, 506, 367, 1018, 1946, |
21810 | 1402, 954, 1402, 314, 90, 1082, 218, 2266, 666, 1210, |
21811 | 186, 570, 2042, 58, 5850, 154, 2010, 154, 794, 2266, |
21812 | 378, 2266, 3738, 39, 39, 39, 39, 39, 39, 17351, |
21813 | 34, 3074, 7692, 63, 63, |
21814 | }; |
21815 | |
21816 | static int sqlite3Fts5UnicodeCategory(u32 iCode) { |
21817 | int iRes = -1; |
21818 | int iHi; |
21819 | int iLo; |
21820 | int ret; |
21821 | u16 iKey; |
21822 | |
21823 | if( iCode>=(1<<20) ){ |
21824 | return 0; |
21825 | } |
21826 | iLo = aFts5UnicodeBlock[(iCode>>16)]; |
21827 | iHi = aFts5UnicodeBlock[1+(iCode>>16)]; |
21828 | iKey = (iCode & 0xFFFF); |
21829 | while( iHi>iLo ){ |
21830 | int iTest = (iHi + iLo) / 2; |
21831 | assert( iTest>=iLo && iTest<iHi ); |
21832 | if( iKey>=aFts5UnicodeMap[iTest] ){ |
21833 | iRes = iTest; |
21834 | iLo = iTest+1; |
21835 | }else{ |
21836 | iHi = iTest; |
21837 | } |
21838 | } |
21839 | |
21840 | if( iRes<0 ) return 0; |
21841 | if( iKey>=(aFts5UnicodeMap[iRes]+(aFts5UnicodeData[iRes]>>5)) ) return 0; |
21842 | ret = aFts5UnicodeData[iRes] & 0x1F; |
21843 | if( ret!=30 ) return ret; |
21844 | return ((iKey - aFts5UnicodeMap[iRes]) & 0x01) ? 5 : 9; |
21845 | } |
21846 | |
21847 | static void sqlite3Fts5UnicodeAscii(u8 *aArray, u8 *aAscii){ |
21848 | int i = 0; |
21849 | int iTbl = 0; |
21850 | while( i<128 ){ |
21851 | int bToken = aArray[ aFts5UnicodeData[iTbl] & 0x1F ]; |
21852 | int n = (aFts5UnicodeData[iTbl] >> 5) + i; |
21853 | for(; i<128 && i<n; i++){ |
21854 | aAscii[i] = (u8)bToken; |
21855 | } |
21856 | iTbl++; |
21857 | } |
21858 | aAscii[0] = 0; /* 0x00 is never a token character */ |
21859 | } |
21860 | |
21861 | |
21862 | #line 1 "fts5_varint.c" |
21863 | /* |
21864 | ** 2015 May 30 |
21865 | ** |
21866 | ** The author disclaims copyright to this source code. In place of |
21867 | ** a legal notice, here is a blessing: |
21868 | ** |
21869 | ** May you do good and not evil. |
21870 | ** May you find forgiveness for yourself and forgive others. |
21871 | ** May you share freely, never taking more than you give. |
21872 | ** |
21873 | ****************************************************************************** |
21874 | ** |
21875 | ** Routines for varint serialization and deserialization. |
21876 | */ |
21877 | |
21878 | |
21879 | /* #include "fts5Int.h" */ |
21880 | |
21881 | /* |
21882 | ** This is a copy of the sqlite3GetVarint32() routine from the SQLite core. |
21883 | ** Except, this version does handle the single byte case that the core |
21884 | ** version depends on being handled before its function is called. |
21885 | */ |
21886 | static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){ |
21887 | u32 a,b; |
21888 | |
21889 | /* The 1-byte case. Overwhelmingly the most common. */ |
21890 | a = *p; |
21891 | /* a: p0 (unmasked) */ |
21892 | if (!(a&0x80)) |
21893 | { |
21894 | /* Values between 0 and 127 */ |
21895 | *v = a; |
21896 | return 1; |
21897 | } |
21898 | |
21899 | /* The 2-byte case */ |
21900 | p++; |
21901 | b = *p; |
21902 | /* b: p1 (unmasked) */ |
21903 | if (!(b&0x80)) |
21904 | { |
21905 | /* Values between 128 and 16383 */ |
21906 | a &= 0x7f; |
21907 | a = a<<7; |
21908 | *v = a | b; |
21909 | return 2; |
21910 | } |
21911 | |
21912 | /* The 3-byte case */ |
21913 | p++; |
21914 | a = a<<14; |
21915 | a |= *p; |
21916 | /* a: p0<<14 | p2 (unmasked) */ |
21917 | if (!(a&0x80)) |
21918 | { |
21919 | /* Values between 16384 and 2097151 */ |
21920 | a &= (0x7f<<14)|(0x7f); |
21921 | b &= 0x7f; |
21922 | b = b<<7; |
21923 | *v = a | b; |
21924 | return 3; |
21925 | } |
21926 | |
21927 | /* A 32-bit varint is used to store size information in btrees. |
21928 | ** Objects are rarely larger than 2MiB limit of a 3-byte varint. |
21929 | ** A 3-byte varint is sufficient, for example, to record the size |
21930 | ** of a 1048569-byte BLOB or string. |
21931 | ** |
21932 | ** We only unroll the first 1-, 2-, and 3- byte cases. The very |
21933 | ** rare larger cases can be handled by the slower 64-bit varint |
21934 | ** routine. |
21935 | */ |
21936 | { |
21937 | u64 v64; |
21938 | u8 n; |
21939 | p -= 2; |
21940 | n = sqlite3Fts5GetVarint(p, &v64); |
21941 | *v = ((u32)v64) & 0x7FFFFFFF; |
21942 | assert( n>3 && n<=9 ); |
21943 | return n; |
21944 | } |
21945 | } |
21946 | |
21947 | |
21948 | /* |
21949 | ** Bitmasks used by sqlite3GetVarint(). These precomputed constants |
21950 | ** are defined here rather than simply putting the constant expressions |
21951 | ** inline in order to work around bugs in the RVT compiler. |
21952 | ** |
21953 | ** SLOT_2_0 A mask for (0x7f<<14) | 0x7f |
21954 | ** |
21955 | ** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0 |
21956 | */ |
21957 | #define SLOT_2_0 0x001fc07f |
21958 | #define SLOT_4_2_0 0xf01fc07f |
21959 | |
21960 | /* |
21961 | ** Read a 64-bit variable-length integer from memory starting at p[0]. |
21962 | ** Return the number of bytes read. The value is stored in *v. |
21963 | */ |
21964 | static u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){ |
21965 | u32 a,b,s; |
21966 | |
21967 | a = *p; |
21968 | /* a: p0 (unmasked) */ |
21969 | if (!(a&0x80)) |
21970 | { |
21971 | *v = a; |
21972 | return 1; |
21973 | } |
21974 | |
21975 | p++; |
21976 | b = *p; |
21977 | /* b: p1 (unmasked) */ |
21978 | if (!(b&0x80)) |
21979 | { |
21980 | a &= 0x7f; |
21981 | a = a<<7; |
21982 | a |= b; |
21983 | *v = a; |
21984 | return 2; |
21985 | } |
21986 | |
21987 | /* Verify that constants are precomputed correctly */ |
21988 | assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) ); |
21989 | assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) ); |
21990 | |
21991 | p++; |
21992 | a = a<<14; |
21993 | a |= *p; |
21994 | /* a: p0<<14 | p2 (unmasked) */ |
21995 | if (!(a&0x80)) |
21996 | { |
21997 | a &= SLOT_2_0; |
21998 | b &= 0x7f; |
21999 | b = b<<7; |
22000 | a |= b; |
22001 | *v = a; |
22002 | return 3; |
22003 | } |
22004 | |
22005 | /* CSE1 from below */ |
22006 | a &= SLOT_2_0; |
22007 | p++; |
22008 | b = b<<14; |
22009 | b |= *p; |
22010 | /* b: p1<<14 | p3 (unmasked) */ |
22011 | if (!(b&0x80)) |
22012 | { |
22013 | b &= SLOT_2_0; |
22014 | /* moved CSE1 up */ |
22015 | /* a &= (0x7f<<14)|(0x7f); */ |
22016 | a = a<<7; |
22017 | a |= b; |
22018 | *v = a; |
22019 | return 4; |
22020 | } |
22021 | |
22022 | /* a: p0<<14 | p2 (masked) */ |
22023 | /* b: p1<<14 | p3 (unmasked) */ |
22024 | /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ |
22025 | /* moved CSE1 up */ |
22026 | /* a &= (0x7f<<14)|(0x7f); */ |
22027 | b &= SLOT_2_0; |
22028 | s = a; |
22029 | /* s: p0<<14 | p2 (masked) */ |
22030 | |
22031 | p++; |
22032 | a = a<<14; |
22033 | a |= *p; |
22034 | /* a: p0<<28 | p2<<14 | p4 (unmasked) */ |
22035 | if (!(a&0x80)) |
22036 | { |
22037 | /* we can skip these cause they were (effectively) done above in calc'ing s */ |
22038 | /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ |
22039 | /* b &= (0x7f<<14)|(0x7f); */ |
22040 | b = b<<7; |
22041 | a |= b; |
22042 | s = s>>18; |
22043 | *v = ((u64)s)<<32 | a; |
22044 | return 5; |
22045 | } |
22046 | |
22047 | /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ |
22048 | s = s<<7; |
22049 | s |= b; |
22050 | /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ |
22051 | |
22052 | p++; |
22053 | b = b<<14; |
22054 | b |= *p; |
22055 | /* b: p1<<28 | p3<<14 | p5 (unmasked) */ |
22056 | if (!(b&0x80)) |
22057 | { |
22058 | /* we can skip this cause it was (effectively) done above in calc'ing s */ |
22059 | /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ |
22060 | a &= SLOT_2_0; |
22061 | a = a<<7; |
22062 | a |= b; |
22063 | s = s>>18; |
22064 | *v = ((u64)s)<<32 | a; |
22065 | return 6; |
22066 | } |
22067 | |
22068 | p++; |
22069 | a = a<<14; |
22070 | a |= *p; |
22071 | /* a: p2<<28 | p4<<14 | p6 (unmasked) */ |
22072 | if (!(a&0x80)) |
22073 | { |
22074 | a &= SLOT_4_2_0; |
22075 | b &= SLOT_2_0; |
22076 | b = b<<7; |
22077 | a |= b; |
22078 | s = s>>11; |
22079 | *v = ((u64)s)<<32 | a; |
22080 | return 7; |
22081 | } |
22082 | |
22083 | /* CSE2 from below */ |
22084 | a &= SLOT_2_0; |
22085 | p++; |
22086 | b = b<<14; |
22087 | b |= *p; |
22088 | /* b: p3<<28 | p5<<14 | p7 (unmasked) */ |
22089 | if (!(b&0x80)) |
22090 | { |
22091 | b &= SLOT_4_2_0; |
22092 | /* moved CSE2 up */ |
22093 | /* a &= (0x7f<<14)|(0x7f); */ |
22094 | a = a<<7; |
22095 | a |= b; |
22096 | s = s>>4; |
22097 | *v = ((u64)s)<<32 | a; |
22098 | return 8; |
22099 | } |
22100 | |
22101 | p++; |
22102 | a = a<<15; |
22103 | a |= *p; |
22104 | /* a: p4<<29 | p6<<15 | p8 (unmasked) */ |
22105 | |
22106 | /* moved CSE2 up */ |
22107 | /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */ |
22108 | b &= SLOT_2_0; |
22109 | b = b<<8; |
22110 | a |= b; |
22111 | |
22112 | s = s<<4; |
22113 | b = p[-4]; |
22114 | b &= 0x7f; |
22115 | b = b>>3; |
22116 | s |= b; |
22117 | |
22118 | *v = ((u64)s)<<32 | a; |
22119 | |
22120 | return 9; |
22121 | } |
22122 | |
22123 | /* |
22124 | ** The variable-length integer encoding is as follows: |
22125 | ** |
22126 | ** KEY: |
22127 | ** A = 0xxxxxxx 7 bits of data and one flag bit |
22128 | ** B = 1xxxxxxx 7 bits of data and one flag bit |
22129 | ** C = xxxxxxxx 8 bits of data |
22130 | ** |
22131 | ** 7 bits - A |
22132 | ** 14 bits - BA |
22133 | ** 21 bits - BBA |
22134 | ** 28 bits - BBBA |
22135 | ** 35 bits - BBBBA |
22136 | ** 42 bits - BBBBBA |
22137 | ** 49 bits - BBBBBBA |
22138 | ** 56 bits - BBBBBBBA |
22139 | ** 64 bits - BBBBBBBBC |
22140 | */ |
22141 | |
22142 | #ifdef SQLITE_NOINLINE |
22143 | # define FTS5_NOINLINE SQLITE_NOINLINE |
22144 | #else |
22145 | # define FTS5_NOINLINE |
22146 | #endif |
22147 | |
22148 | /* |
22149 | ** Write a 64-bit variable-length integer to memory starting at p[0]. |
22150 | ** The length of data write will be between 1 and 9 bytes. The number |
22151 | ** of bytes written is returned. |
22152 | ** |
22153 | ** A variable-length integer consists of the lower 7 bits of each byte |
22154 | ** for all bytes that have the 8th bit set and one byte with the 8th |
22155 | ** bit clear. Except, if we get to the 9th byte, it stores the full |
22156 | ** 8 bits and is the last byte. |
22157 | */ |
22158 | static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){ |
22159 | int i, j, n; |
22160 | u8 buf[10]; |
22161 | if( v & (((u64)0xff000000)<<32) ){ |
22162 | p[8] = (u8)v; |
22163 | v >>= 8; |
22164 | for(i=7; i>=0; i--){ |
22165 | p[i] = (u8)((v & 0x7f) | 0x80); |
22166 | v >>= 7; |
22167 | } |
22168 | return 9; |
22169 | } |
22170 | n = 0; |
22171 | do{ |
22172 | buf[n++] = (u8)((v & 0x7f) | 0x80); |
22173 | v >>= 7; |
22174 | }while( v!=0 ); |
22175 | buf[0] &= 0x7f; |
22176 | assert( n<=9 ); |
22177 | for(i=0, j=n-1; j>=0; j--, i++){ |
22178 | p[i] = buf[j]; |
22179 | } |
22180 | return n; |
22181 | } |
22182 | |
22183 | static int sqlite3Fts5PutVarint(unsigned char *p, u64 v){ |
22184 | if( v<=0x7f ){ |
22185 | p[0] = v&0x7f; |
22186 | return 1; |
22187 | } |
22188 | if( v<=0x3fff ){ |
22189 | p[0] = ((v>>7)&0x7f)|0x80; |
22190 | p[1] = v&0x7f; |
22191 | return 2; |
22192 | } |
22193 | return fts5PutVarint64(p,v); |
22194 | } |
22195 | |
22196 | |
22197 | static int sqlite3Fts5GetVarintLen(u32 iVal){ |
22198 | #if 0 |
22199 | if( iVal<(1 << 7 ) ) return 1; |
22200 | #endif |
22201 | assert( iVal>=(1 << 7) ); |
22202 | if( iVal<(1 << 14) ) return 2; |
22203 | if( iVal<(1 << 21) ) return 3; |
22204 | if( iVal<(1 << 28) ) return 4; |
22205 | return 5; |
22206 | } |
22207 | |
22208 | #line 1 "fts5_vocab.c" |
22209 | /* |
22210 | ** 2015 May 08 |
22211 | ** |
22212 | ** The author disclaims copyright to this source code. In place of |
22213 | ** a legal notice, here is a blessing: |
22214 | ** |
22215 | ** May you do good and not evil. |
22216 | ** May you find forgiveness for yourself and forgive others. |
22217 | ** May you share freely, never taking more than you give. |
22218 | ** |
22219 | ****************************************************************************** |
22220 | ** |
22221 | ** This is an SQLite virtual table module implementing direct access to an |
22222 | ** existing FTS5 index. The module may create several different types of |
22223 | ** tables: |
22224 | ** |
22225 | ** col: |
22226 | ** CREATE TABLE vocab(term, col, doc, cnt, PRIMARY KEY(term, col)); |
22227 | ** |
22228 | ** One row for each term/column combination. The value of $doc is set to |
22229 | ** the number of fts5 rows that contain at least one instance of term |
22230 | ** $term within column $col. Field $cnt is set to the total number of |
22231 | ** instances of term $term in column $col (in any row of the fts5 table). |
22232 | ** |
22233 | ** row: |
22234 | ** CREATE TABLE vocab(term, doc, cnt, PRIMARY KEY(term)); |
22235 | ** |
22236 | ** One row for each term in the database. The value of $doc is set to |
22237 | ** the number of fts5 rows that contain at least one instance of term |
22238 | ** $term. Field $cnt is set to the total number of instances of term |
22239 | ** $term in the database. |
22240 | ** |
22241 | ** instance: |
22242 | ** CREATE TABLE vocab(term, doc, col, offset, PRIMARY KEY(<all-fields>)); |
22243 | ** |
22244 | ** One row for each term instance in the database. |
22245 | */ |
22246 | |
22247 | |
22248 | /* #include "fts5Int.h" */ |
22249 | |
22250 | |
22251 | typedef struct Fts5VocabTable Fts5VocabTable; |
22252 | typedef struct Fts5VocabCursor Fts5VocabCursor; |
22253 | |
22254 | struct Fts5VocabTable { |
22255 | sqlite3_vtab base; |
22256 | char *zFts5Tbl; /* Name of fts5 table */ |
22257 | char *zFts5Db; /* Db containing fts5 table */ |
22258 | sqlite3 *db; /* Database handle */ |
22259 | Fts5Global *pGlobal; /* FTS5 global object for this database */ |
22260 | int eType; /* FTS5_VOCAB_COL, ROW or INSTANCE */ |
22261 | unsigned bBusy; /* True if busy */ |
22262 | }; |
22263 | |
22264 | struct Fts5VocabCursor { |
22265 | sqlite3_vtab_cursor base; |
22266 | sqlite3_stmt *pStmt; /* Statement holding lock on pIndex */ |
22267 | Fts5Table *pFts5; /* Associated FTS5 table */ |
22268 | |
22269 | int bEof; /* True if this cursor is at EOF */ |
22270 | Fts5IndexIter *pIter; /* Term/rowid iterator object */ |
22271 | void *pStruct; /* From sqlite3Fts5StructureRef() */ |
22272 | |
22273 | int nLeTerm; /* Size of zLeTerm in bytes */ |
22274 | char *zLeTerm; /* (term <= $zLeTerm) paramater, or NULL */ |
22275 | |
22276 | /* These are used by 'col' tables only */ |
22277 | int iCol; |
22278 | i64 *aCnt; |
22279 | i64 *aDoc; |
22280 | |
22281 | /* Output values used by all tables. */ |
22282 | i64 rowid; /* This table's current rowid value */ |
22283 | Fts5Buffer term; /* Current value of 'term' column */ |
22284 | |
22285 | /* Output values Used by 'instance' tables only */ |
22286 | i64 iInstPos; |
22287 | int iInstOff; |
22288 | }; |
22289 | |
22290 | #define FTS5_VOCAB_COL 0 |
22291 | #define FTS5_VOCAB_ROW 1 |
22292 | #define FTS5_VOCAB_INSTANCE 2 |
22293 | |
22294 | #define FTS5_VOCAB_COL_SCHEMA "term, col, doc, cnt" |
22295 | #define FTS5_VOCAB_ROW_SCHEMA "term, doc, cnt" |
22296 | #define FTS5_VOCAB_INST_SCHEMA "term, doc, col, offset" |
22297 | |
22298 | /* |
22299 | ** Bits for the mask used as the idxNum value by xBestIndex/xFilter. |
22300 | */ |
22301 | #define FTS5_VOCAB_TERM_EQ 0x01 |
22302 | #define FTS5_VOCAB_TERM_GE 0x02 |
22303 | #define FTS5_VOCAB_TERM_LE 0x04 |
22304 | |
22305 | |
22306 | /* |
22307 | ** Translate a string containing an fts5vocab table type to an |
22308 | ** FTS5_VOCAB_XXX constant. If successful, set *peType to the output |
22309 | ** value and return SQLITE_OK. Otherwise, set *pzErr to an error message |
22310 | ** and return SQLITE_ERROR. |
22311 | */ |
22312 | static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){ |
22313 | int rc = SQLITE_OK; |
22314 | char *zCopy = sqlite3Fts5Strndup(&rc, zType, -1); |
22315 | if( rc==SQLITE_OK ){ |
22316 | sqlite3Fts5Dequote(zCopy); |
22317 | if( sqlite3_stricmp(zCopy, "col" )==0 ){ |
22318 | *peType = FTS5_VOCAB_COL; |
22319 | }else |
22320 | |
22321 | if( sqlite3_stricmp(zCopy, "row" )==0 ){ |
22322 | *peType = FTS5_VOCAB_ROW; |
22323 | }else |
22324 | if( sqlite3_stricmp(zCopy, "instance" )==0 ){ |
22325 | *peType = FTS5_VOCAB_INSTANCE; |
22326 | }else |
22327 | { |
22328 | *pzErr = sqlite3_mprintf("fts5vocab: unknown table type: %Q" , zCopy); |
22329 | rc = SQLITE_ERROR; |
22330 | } |
22331 | sqlite3_free(zCopy); |
22332 | } |
22333 | |
22334 | return rc; |
22335 | } |
22336 | |
22337 | |
22338 | /* |
22339 | ** The xDisconnect() virtual table method. |
22340 | */ |
22341 | static int fts5VocabDisconnectMethod(sqlite3_vtab *pVtab){ |
22342 | Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab; |
22343 | sqlite3_free(pTab); |
22344 | return SQLITE_OK; |
22345 | } |
22346 | |
22347 | /* |
22348 | ** The xDestroy() virtual table method. |
22349 | */ |
22350 | static int fts5VocabDestroyMethod(sqlite3_vtab *pVtab){ |
22351 | Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab; |
22352 | sqlite3_free(pTab); |
22353 | return SQLITE_OK; |
22354 | } |
22355 | |
22356 | /* |
22357 | ** This function is the implementation of both the xConnect and xCreate |
22358 | ** methods of the FTS3 virtual table. |
22359 | ** |
22360 | ** The argv[] array contains the following: |
22361 | ** |
22362 | ** argv[0] -> module name ("fts5vocab") |
22363 | ** argv[1] -> database name |
22364 | ** argv[2] -> table name |
22365 | ** |
22366 | ** then: |
22367 | ** |
22368 | ** argv[3] -> name of fts5 table |
22369 | ** argv[4] -> type of fts5vocab table |
22370 | ** |
22371 | ** or, for tables in the TEMP schema only. |
22372 | ** |
22373 | ** argv[3] -> name of fts5 tables database |
22374 | ** argv[4] -> name of fts5 table |
22375 | ** argv[5] -> type of fts5vocab table |
22376 | */ |
22377 | static int fts5VocabInitVtab( |
22378 | sqlite3 *db, /* The SQLite database connection */ |
22379 | void *pAux, /* Pointer to Fts5Global object */ |
22380 | int argc, /* Number of elements in argv array */ |
22381 | const char * const *argv, /* xCreate/xConnect argument array */ |
22382 | sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ |
22383 | char **pzErr /* Write any error message here */ |
22384 | ){ |
22385 | const char *azSchema[] = { |
22386 | "CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA ")" , |
22387 | "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA ")" , |
22388 | "CREATE TABlE vocab(" FTS5_VOCAB_INST_SCHEMA ")" |
22389 | }; |
22390 | |
22391 | Fts5VocabTable *pRet = 0; |
22392 | int rc = SQLITE_OK; /* Return code */ |
22393 | int bDb; |
22394 | |
22395 | bDb = (argc==6 && strlen(argv[1])==4 && memcmp("temp" , argv[1], 4)==0); |
22396 | |
22397 | if( argc!=5 && bDb==0 ){ |
22398 | *pzErr = sqlite3_mprintf("wrong number of vtable arguments" ); |
22399 | rc = SQLITE_ERROR; |
22400 | }else{ |
22401 | int nByte; /* Bytes of space to allocate */ |
22402 | const char *zDb = bDb ? argv[3] : argv[1]; |
22403 | const char *zTab = bDb ? argv[4] : argv[3]; |
22404 | const char *zType = bDb ? argv[5] : argv[4]; |
22405 | int nDb = (int)strlen(zDb)+1; |
22406 | int nTab = (int)strlen(zTab)+1; |
22407 | int eType = 0; |
22408 | |
22409 | rc = fts5VocabTableType(zType, pzErr, &eType); |
22410 | if( rc==SQLITE_OK ){ |
22411 | assert( eType>=0 && eType<ArraySize(azSchema) ); |
22412 | rc = sqlite3_declare_vtab(db, azSchema[eType]); |
22413 | } |
22414 | |
22415 | nByte = sizeof(Fts5VocabTable) + nDb + nTab; |
22416 | pRet = sqlite3Fts5MallocZero(&rc, nByte); |
22417 | if( pRet ){ |
22418 | pRet->pGlobal = (Fts5Global*)pAux; |
22419 | pRet->eType = eType; |
22420 | pRet->db = db; |
22421 | pRet->zFts5Tbl = (char*)&pRet[1]; |
22422 | pRet->zFts5Db = &pRet->zFts5Tbl[nTab]; |
22423 | memcpy(pRet->zFts5Tbl, zTab, nTab); |
22424 | memcpy(pRet->zFts5Db, zDb, nDb); |
22425 | sqlite3Fts5Dequote(pRet->zFts5Tbl); |
22426 | sqlite3Fts5Dequote(pRet->zFts5Db); |
22427 | } |
22428 | } |
22429 | |
22430 | *ppVTab = (sqlite3_vtab*)pRet; |
22431 | return rc; |
22432 | } |
22433 | |
22434 | |
22435 | /* |
22436 | ** The xConnect() and xCreate() methods for the virtual table. All the |
22437 | ** work is done in function fts5VocabInitVtab(). |
22438 | */ |
22439 | static int fts5VocabConnectMethod( |
22440 | sqlite3 *db, /* Database connection */ |
22441 | void *pAux, /* Pointer to tokenizer hash table */ |
22442 | int argc, /* Number of elements in argv array */ |
22443 | const char * const *argv, /* xCreate/xConnect argument array */ |
22444 | sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ |
22445 | char **pzErr /* OUT: sqlite3_malloc'd error message */ |
22446 | ){ |
22447 | return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr); |
22448 | } |
22449 | static int fts5VocabCreateMethod( |
22450 | sqlite3 *db, /* Database connection */ |
22451 | void *pAux, /* Pointer to tokenizer hash table */ |
22452 | int argc, /* Number of elements in argv array */ |
22453 | const char * const *argv, /* xCreate/xConnect argument array */ |
22454 | sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ |
22455 | char **pzErr /* OUT: sqlite3_malloc'd error message */ |
22456 | ){ |
22457 | return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr); |
22458 | } |
22459 | |
22460 | /* |
22461 | ** Implementation of the xBestIndex method. |
22462 | ** |
22463 | ** Only constraints of the form: |
22464 | ** |
22465 | ** term <= ? |
22466 | ** term == ? |
22467 | ** term >= ? |
22468 | ** |
22469 | ** are interpreted. Less-than and less-than-or-equal are treated |
22470 | ** identically, as are greater-than and greater-than-or-equal. |
22471 | */ |
22472 | static int fts5VocabBestIndexMethod( |
22473 | sqlite3_vtab *pUnused, |
22474 | sqlite3_index_info *pInfo |
22475 | ){ |
22476 | int i; |
22477 | int iTermEq = -1; |
22478 | int iTermGe = -1; |
22479 | int iTermLe = -1; |
22480 | int idxNum = 0; |
22481 | int nArg = 0; |
22482 | |
22483 | UNUSED_PARAM(pUnused); |
22484 | |
22485 | for(i=0; i<pInfo->nConstraint; i++){ |
22486 | struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; |
22487 | if( p->usable==0 ) continue; |
22488 | if( p->iColumn==0 ){ /* term column */ |
22489 | if( p->op==SQLITE_INDEX_CONSTRAINT_EQ ) iTermEq = i; |
22490 | if( p->op==SQLITE_INDEX_CONSTRAINT_LE ) iTermLe = i; |
22491 | if( p->op==SQLITE_INDEX_CONSTRAINT_LT ) iTermLe = i; |
22492 | if( p->op==SQLITE_INDEX_CONSTRAINT_GE ) iTermGe = i; |
22493 | if( p->op==SQLITE_INDEX_CONSTRAINT_GT ) iTermGe = i; |
22494 | } |
22495 | } |
22496 | |
22497 | if( iTermEq>=0 ){ |
22498 | idxNum |= FTS5_VOCAB_TERM_EQ; |
22499 | pInfo->aConstraintUsage[iTermEq].argvIndex = ++nArg; |
22500 | pInfo->estimatedCost = 100; |
22501 | }else{ |
22502 | pInfo->estimatedCost = 1000000; |
22503 | if( iTermGe>=0 ){ |
22504 | idxNum |= FTS5_VOCAB_TERM_GE; |
22505 | pInfo->aConstraintUsage[iTermGe].argvIndex = ++nArg; |
22506 | pInfo->estimatedCost = pInfo->estimatedCost / 2; |
22507 | } |
22508 | if( iTermLe>=0 ){ |
22509 | idxNum |= FTS5_VOCAB_TERM_LE; |
22510 | pInfo->aConstraintUsage[iTermLe].argvIndex = ++nArg; |
22511 | pInfo->estimatedCost = pInfo->estimatedCost / 2; |
22512 | } |
22513 | } |
22514 | |
22515 | /* This virtual table always delivers results in ascending order of |
22516 | ** the "term" column (column 0). So if the user has requested this |
22517 | ** specifically - "ORDER BY term" or "ORDER BY term ASC" - set the |
22518 | ** sqlite3_index_info.orderByConsumed flag to tell the core the results |
22519 | ** are already in sorted order. */ |
22520 | if( pInfo->nOrderBy==1 |
22521 | && pInfo->aOrderBy[0].iColumn==0 |
22522 | && pInfo->aOrderBy[0].desc==0 |
22523 | ){ |
22524 | pInfo->orderByConsumed = 1; |
22525 | } |
22526 | |
22527 | pInfo->idxNum = idxNum; |
22528 | return SQLITE_OK; |
22529 | } |
22530 | |
22531 | /* |
22532 | ** Implementation of xOpen method. |
22533 | */ |
22534 | static int fts5VocabOpenMethod( |
22535 | sqlite3_vtab *pVTab, |
22536 | sqlite3_vtab_cursor **ppCsr |
22537 | ){ |
22538 | Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab; |
22539 | Fts5Table *pFts5 = 0; |
22540 | Fts5VocabCursor *pCsr = 0; |
22541 | int rc = SQLITE_OK; |
22542 | sqlite3_stmt *pStmt = 0; |
22543 | char *zSql = 0; |
22544 | |
22545 | if( pTab->bBusy ){ |
22546 | pVTab->zErrMsg = sqlite3_mprintf( |
22547 | "recursive definition for %s.%s" , pTab->zFts5Db, pTab->zFts5Tbl |
22548 | ); |
22549 | return SQLITE_ERROR; |
22550 | } |
22551 | zSql = sqlite3Fts5Mprintf(&rc, |
22552 | "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'" , |
22553 | pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl |
22554 | ); |
22555 | if( zSql ){ |
22556 | rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pStmt, 0); |
22557 | } |
22558 | sqlite3_free(zSql); |
22559 | assert( rc==SQLITE_OK || pStmt==0 ); |
22560 | if( rc==SQLITE_ERROR ) rc = SQLITE_OK; |
22561 | |
22562 | pTab->bBusy = 1; |
22563 | if( pStmt && sqlite3_step(pStmt)==SQLITE_ROW ){ |
22564 | i64 iId = sqlite3_column_int64(pStmt, 0); |
22565 | pFts5 = sqlite3Fts5TableFromCsrid(pTab->pGlobal, iId); |
22566 | } |
22567 | pTab->bBusy = 0; |
22568 | |
22569 | if( rc==SQLITE_OK ){ |
22570 | if( pFts5==0 ){ |
22571 | rc = sqlite3_finalize(pStmt); |
22572 | pStmt = 0; |
22573 | if( rc==SQLITE_OK ){ |
22574 | pVTab->zErrMsg = sqlite3_mprintf( |
22575 | "no such fts5 table: %s.%s" , pTab->zFts5Db, pTab->zFts5Tbl |
22576 | ); |
22577 | rc = SQLITE_ERROR; |
22578 | } |
22579 | }else{ |
22580 | rc = sqlite3Fts5FlushToDisk(pFts5); |
22581 | } |
22582 | } |
22583 | |
22584 | if( rc==SQLITE_OK ){ |
22585 | i64 nByte = pFts5->pConfig->nCol * sizeof(i64)*2 + sizeof(Fts5VocabCursor); |
22586 | pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte); |
22587 | } |
22588 | |
22589 | if( pCsr ){ |
22590 | pCsr->pFts5 = pFts5; |
22591 | pCsr->pStmt = pStmt; |
22592 | pCsr->aCnt = (i64*)&pCsr[1]; |
22593 | pCsr->aDoc = &pCsr->aCnt[pFts5->pConfig->nCol]; |
22594 | }else{ |
22595 | sqlite3_finalize(pStmt); |
22596 | } |
22597 | |
22598 | *ppCsr = (sqlite3_vtab_cursor*)pCsr; |
22599 | return rc; |
22600 | } |
22601 | |
22602 | static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){ |
22603 | pCsr->rowid = 0; |
22604 | sqlite3Fts5IterClose(pCsr->pIter); |
22605 | sqlite3Fts5StructureRelease(pCsr->pStruct); |
22606 | pCsr->pStruct = 0; |
22607 | pCsr->pIter = 0; |
22608 | sqlite3_free(pCsr->zLeTerm); |
22609 | pCsr->nLeTerm = -1; |
22610 | pCsr->zLeTerm = 0; |
22611 | pCsr->bEof = 0; |
22612 | } |
22613 | |
22614 | /* |
22615 | ** Close the cursor. For additional information see the documentation |
22616 | ** on the xClose method of the virtual table interface. |
22617 | */ |
22618 | static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){ |
22619 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; |
22620 | fts5VocabResetCursor(pCsr); |
22621 | sqlite3Fts5BufferFree(&pCsr->term); |
22622 | sqlite3_finalize(pCsr->pStmt); |
22623 | sqlite3_free(pCsr); |
22624 | return SQLITE_OK; |
22625 | } |
22626 | |
22627 | static int fts5VocabInstanceNewTerm(Fts5VocabCursor *pCsr){ |
22628 | int rc = SQLITE_OK; |
22629 | |
22630 | if( sqlite3Fts5IterEof(pCsr->pIter) ){ |
22631 | pCsr->bEof = 1; |
22632 | }else{ |
22633 | const char *zTerm; |
22634 | int nTerm; |
22635 | zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); |
22636 | if( pCsr->nLeTerm>=0 ){ |
22637 | int nCmp = MIN(nTerm, pCsr->nLeTerm); |
22638 | int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp); |
22639 | if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){ |
22640 | pCsr->bEof = 1; |
22641 | } |
22642 | } |
22643 | |
22644 | sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm); |
22645 | } |
22646 | return rc; |
22647 | } |
22648 | |
22649 | static int fts5VocabInstanceNext(Fts5VocabCursor *pCsr){ |
22650 | int eDetail = pCsr->pFts5->pConfig->eDetail; |
22651 | int rc = SQLITE_OK; |
22652 | Fts5IndexIter *pIter = pCsr->pIter; |
22653 | i64 *pp = &pCsr->iInstPos; |
22654 | int *po = &pCsr->iInstOff; |
22655 | |
22656 | assert( sqlite3Fts5IterEof(pIter)==0 ); |
22657 | assert( pCsr->bEof==0 ); |
22658 | while( eDetail==FTS5_DETAIL_NONE |
22659 | || sqlite3Fts5PoslistNext64(pIter->pData, pIter->nData, po, pp) |
22660 | ){ |
22661 | pCsr->iInstPos = 0; |
22662 | pCsr->iInstOff = 0; |
22663 | |
22664 | rc = sqlite3Fts5IterNextScan(pCsr->pIter); |
22665 | if( rc==SQLITE_OK ){ |
22666 | rc = fts5VocabInstanceNewTerm(pCsr); |
22667 | if( pCsr->bEof || eDetail==FTS5_DETAIL_NONE ) break; |
22668 | } |
22669 | if( rc ){ |
22670 | pCsr->bEof = 1; |
22671 | break; |
22672 | } |
22673 | } |
22674 | |
22675 | return rc; |
22676 | } |
22677 | |
22678 | /* |
22679 | ** Advance the cursor to the next row in the table. |
22680 | */ |
22681 | static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){ |
22682 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; |
22683 | Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab; |
22684 | int nCol = pCsr->pFts5->pConfig->nCol; |
22685 | int rc; |
22686 | |
22687 | rc = sqlite3Fts5StructureTest(pCsr->pFts5->pIndex, pCsr->pStruct); |
22688 | if( rc!=SQLITE_OK ) return rc; |
22689 | pCsr->rowid++; |
22690 | |
22691 | if( pTab->eType==FTS5_VOCAB_INSTANCE ){ |
22692 | return fts5VocabInstanceNext(pCsr); |
22693 | } |
22694 | |
22695 | if( pTab->eType==FTS5_VOCAB_COL ){ |
22696 | for(pCsr->iCol++; pCsr->iCol<nCol; pCsr->iCol++){ |
22697 | if( pCsr->aDoc[pCsr->iCol] ) break; |
22698 | } |
22699 | } |
22700 | |
22701 | if( pTab->eType!=FTS5_VOCAB_COL || pCsr->iCol>=nCol ){ |
22702 | if( sqlite3Fts5IterEof(pCsr->pIter) ){ |
22703 | pCsr->bEof = 1; |
22704 | }else{ |
22705 | const char *zTerm; |
22706 | int nTerm; |
22707 | |
22708 | zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); |
22709 | assert( nTerm>=0 ); |
22710 | if( pCsr->nLeTerm>=0 ){ |
22711 | int nCmp = MIN(nTerm, pCsr->nLeTerm); |
22712 | int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp); |
22713 | if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){ |
22714 | pCsr->bEof = 1; |
22715 | return SQLITE_OK; |
22716 | } |
22717 | } |
22718 | |
22719 | sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm); |
22720 | memset(pCsr->aCnt, 0, nCol * sizeof(i64)); |
22721 | memset(pCsr->aDoc, 0, nCol * sizeof(i64)); |
22722 | pCsr->iCol = 0; |
22723 | |
22724 | assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW ); |
22725 | while( rc==SQLITE_OK ){ |
22726 | int eDetail = pCsr->pFts5->pConfig->eDetail; |
22727 | const u8 *pPos; int nPos; /* Position list */ |
22728 | i64 iPos = 0; /* 64-bit position read from poslist */ |
22729 | int iOff = 0; /* Current offset within position list */ |
22730 | |
22731 | pPos = pCsr->pIter->pData; |
22732 | nPos = pCsr->pIter->nData; |
22733 | |
22734 | switch( pTab->eType ){ |
22735 | case FTS5_VOCAB_ROW: |
22736 | if( eDetail==FTS5_DETAIL_FULL ){ |
22737 | while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ |
22738 | pCsr->aCnt[0]++; |
22739 | } |
22740 | } |
22741 | pCsr->aDoc[0]++; |
22742 | break; |
22743 | |
22744 | case FTS5_VOCAB_COL: |
22745 | if( eDetail==FTS5_DETAIL_FULL ){ |
22746 | int iCol = -1; |
22747 | while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ |
22748 | int ii = FTS5_POS2COLUMN(iPos); |
22749 | if( iCol!=ii ){ |
22750 | if( ii>=nCol ){ |
22751 | rc = FTS5_CORRUPT; |
22752 | break; |
22753 | } |
22754 | pCsr->aDoc[ii]++; |
22755 | iCol = ii; |
22756 | } |
22757 | pCsr->aCnt[ii]++; |
22758 | } |
22759 | }else if( eDetail==FTS5_DETAIL_COLUMNS ){ |
22760 | while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){ |
22761 | assert_nc( iPos>=0 && iPos<nCol ); |
22762 | if( iPos>=nCol ){ |
22763 | rc = FTS5_CORRUPT; |
22764 | break; |
22765 | } |
22766 | pCsr->aDoc[iPos]++; |
22767 | } |
22768 | }else{ |
22769 | assert( eDetail==FTS5_DETAIL_NONE ); |
22770 | pCsr->aDoc[0]++; |
22771 | } |
22772 | break; |
22773 | |
22774 | default: |
22775 | assert( pTab->eType==FTS5_VOCAB_INSTANCE ); |
22776 | break; |
22777 | } |
22778 | |
22779 | if( rc==SQLITE_OK ){ |
22780 | rc = sqlite3Fts5IterNextScan(pCsr->pIter); |
22781 | } |
22782 | if( pTab->eType==FTS5_VOCAB_INSTANCE ) break; |
22783 | |
22784 | if( rc==SQLITE_OK ){ |
22785 | zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); |
22786 | if( nTerm!=pCsr->term.n |
22787 | || (nTerm>0 && memcmp(zTerm, pCsr->term.p, nTerm)) |
22788 | ){ |
22789 | break; |
22790 | } |
22791 | if( sqlite3Fts5IterEof(pCsr->pIter) ) break; |
22792 | } |
22793 | } |
22794 | } |
22795 | } |
22796 | |
22797 | if( rc==SQLITE_OK && pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL ){ |
22798 | for(/* noop */; pCsr->iCol<nCol && pCsr->aDoc[pCsr->iCol]==0; pCsr->iCol++); |
22799 | if( pCsr->iCol==nCol ){ |
22800 | rc = FTS5_CORRUPT; |
22801 | } |
22802 | } |
22803 | return rc; |
22804 | } |
22805 | |
22806 | /* |
22807 | ** This is the xFilter implementation for the virtual table. |
22808 | */ |
22809 | static int fts5VocabFilterMethod( |
22810 | sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ |
22811 | int idxNum, /* Strategy index */ |
22812 | const char *zUnused, /* Unused */ |
22813 | int nUnused, /* Number of elements in apVal */ |
22814 | sqlite3_value **apVal /* Arguments for the indexing scheme */ |
22815 | ){ |
22816 | Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab; |
22817 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; |
22818 | int eType = pTab->eType; |
22819 | int rc = SQLITE_OK; |
22820 | |
22821 | int iVal = 0; |
22822 | int f = FTS5INDEX_QUERY_SCAN; |
22823 | const char *zTerm = 0; |
22824 | int nTerm = 0; |
22825 | |
22826 | sqlite3_value *pEq = 0; |
22827 | sqlite3_value *pGe = 0; |
22828 | sqlite3_value *pLe = 0; |
22829 | |
22830 | UNUSED_PARAM2(zUnused, nUnused); |
22831 | |
22832 | fts5VocabResetCursor(pCsr); |
22833 | if( idxNum & FTS5_VOCAB_TERM_EQ ) pEq = apVal[iVal++]; |
22834 | if( idxNum & FTS5_VOCAB_TERM_GE ) pGe = apVal[iVal++]; |
22835 | if( idxNum & FTS5_VOCAB_TERM_LE ) pLe = apVal[iVal++]; |
22836 | |
22837 | if( pEq ){ |
22838 | zTerm = (const char *)sqlite3_value_text(pEq); |
22839 | nTerm = sqlite3_value_bytes(pEq); |
22840 | f = 0; |
22841 | }else{ |
22842 | if( pGe ){ |
22843 | zTerm = (const char *)sqlite3_value_text(pGe); |
22844 | nTerm = sqlite3_value_bytes(pGe); |
22845 | } |
22846 | if( pLe ){ |
22847 | const char *zCopy = (const char *)sqlite3_value_text(pLe); |
22848 | if( zCopy==0 ) zCopy = "" ; |
22849 | pCsr->nLeTerm = sqlite3_value_bytes(pLe); |
22850 | pCsr->zLeTerm = sqlite3_malloc(pCsr->nLeTerm+1); |
22851 | if( pCsr->zLeTerm==0 ){ |
22852 | rc = SQLITE_NOMEM; |
22853 | }else{ |
22854 | memcpy(pCsr->zLeTerm, zCopy, pCsr->nLeTerm+1); |
22855 | } |
22856 | } |
22857 | } |
22858 | |
22859 | if( rc==SQLITE_OK ){ |
22860 | Fts5Index *pIndex = pCsr->pFts5->pIndex; |
22861 | rc = sqlite3Fts5IndexQuery(pIndex, zTerm, nTerm, f, 0, &pCsr->pIter); |
22862 | if( rc==SQLITE_OK ){ |
22863 | pCsr->pStruct = sqlite3Fts5StructureRef(pIndex); |
22864 | } |
22865 | } |
22866 | if( rc==SQLITE_OK && eType==FTS5_VOCAB_INSTANCE ){ |
22867 | rc = fts5VocabInstanceNewTerm(pCsr); |
22868 | } |
22869 | if( rc==SQLITE_OK && !pCsr->bEof |
22870 | && (eType!=FTS5_VOCAB_INSTANCE |
22871 | || pCsr->pFts5->pConfig->eDetail!=FTS5_DETAIL_NONE) |
22872 | ){ |
22873 | rc = fts5VocabNextMethod(pCursor); |
22874 | } |
22875 | |
22876 | return rc; |
22877 | } |
22878 | |
22879 | /* |
22880 | ** This is the xEof method of the virtual table. SQLite calls this |
22881 | ** routine to find out if it has reached the end of a result set. |
22882 | */ |
22883 | static int fts5VocabEofMethod(sqlite3_vtab_cursor *pCursor){ |
22884 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; |
22885 | return pCsr->bEof; |
22886 | } |
22887 | |
22888 | static int fts5VocabColumnMethod( |
22889 | sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ |
22890 | sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ |
22891 | int iCol /* Index of column to read value from */ |
22892 | ){ |
22893 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; |
22894 | int eDetail = pCsr->pFts5->pConfig->eDetail; |
22895 | int eType = ((Fts5VocabTable*)(pCursor->pVtab))->eType; |
22896 | i64 iVal = 0; |
22897 | |
22898 | if( iCol==0 ){ |
22899 | sqlite3_result_text( |
22900 | pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT |
22901 | ); |
22902 | }else if( eType==FTS5_VOCAB_COL ){ |
22903 | assert( iCol==1 || iCol==2 || iCol==3 ); |
22904 | if( iCol==1 ){ |
22905 | if( eDetail!=FTS5_DETAIL_NONE ){ |
22906 | const char *z = pCsr->pFts5->pConfig->azCol[pCsr->iCol]; |
22907 | sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC); |
22908 | } |
22909 | }else if( iCol==2 ){ |
22910 | iVal = pCsr->aDoc[pCsr->iCol]; |
22911 | }else{ |
22912 | iVal = pCsr->aCnt[pCsr->iCol]; |
22913 | } |
22914 | }else if( eType==FTS5_VOCAB_ROW ){ |
22915 | assert( iCol==1 || iCol==2 ); |
22916 | if( iCol==1 ){ |
22917 | iVal = pCsr->aDoc[0]; |
22918 | }else{ |
22919 | iVal = pCsr->aCnt[0]; |
22920 | } |
22921 | }else{ |
22922 | assert( eType==FTS5_VOCAB_INSTANCE ); |
22923 | switch( iCol ){ |
22924 | case 1: |
22925 | sqlite3_result_int64(pCtx, pCsr->pIter->iRowid); |
22926 | break; |
22927 | case 2: { |
22928 | int ii = -1; |
22929 | if( eDetail==FTS5_DETAIL_FULL ){ |
22930 | ii = FTS5_POS2COLUMN(pCsr->iInstPos); |
22931 | }else if( eDetail==FTS5_DETAIL_COLUMNS ){ |
22932 | ii = (int)pCsr->iInstPos; |
22933 | } |
22934 | if( ii>=0 && ii<pCsr->pFts5->pConfig->nCol ){ |
22935 | const char *z = pCsr->pFts5->pConfig->azCol[ii]; |
22936 | sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC); |
22937 | } |
22938 | break; |
22939 | } |
22940 | default: { |
22941 | assert( iCol==3 ); |
22942 | if( eDetail==FTS5_DETAIL_FULL ){ |
22943 | int ii = FTS5_POS2OFFSET(pCsr->iInstPos); |
22944 | sqlite3_result_int(pCtx, ii); |
22945 | } |
22946 | break; |
22947 | } |
22948 | } |
22949 | } |
22950 | |
22951 | if( iVal>0 ) sqlite3_result_int64(pCtx, iVal); |
22952 | return SQLITE_OK; |
22953 | } |
22954 | |
22955 | /* |
22956 | ** This is the xRowid method. The SQLite core calls this routine to |
22957 | ** retrieve the rowid for the current row of the result set. The |
22958 | ** rowid should be written to *pRowid. |
22959 | */ |
22960 | static int fts5VocabRowidMethod( |
22961 | sqlite3_vtab_cursor *pCursor, |
22962 | sqlite_int64 *pRowid |
22963 | ){ |
22964 | Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; |
22965 | *pRowid = pCsr->rowid; |
22966 | return SQLITE_OK; |
22967 | } |
22968 | |
22969 | static int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){ |
22970 | static const sqlite3_module fts5Vocab = { |
22971 | /* iVersion */ 2, |
22972 | /* xCreate */ fts5VocabCreateMethod, |
22973 | /* xConnect */ fts5VocabConnectMethod, |
22974 | /* xBestIndex */ fts5VocabBestIndexMethod, |
22975 | /* xDisconnect */ fts5VocabDisconnectMethod, |
22976 | /* xDestroy */ fts5VocabDestroyMethod, |
22977 | /* xOpen */ fts5VocabOpenMethod, |
22978 | /* xClose */ fts5VocabCloseMethod, |
22979 | /* xFilter */ fts5VocabFilterMethod, |
22980 | /* xNext */ fts5VocabNextMethod, |
22981 | /* xEof */ fts5VocabEofMethod, |
22982 | /* xColumn */ fts5VocabColumnMethod, |
22983 | /* xRowid */ fts5VocabRowidMethod, |
22984 | /* xUpdate */ 0, |
22985 | /* xBegin */ 0, |
22986 | /* xSync */ 0, |
22987 | /* xCommit */ 0, |
22988 | /* xRollback */ 0, |
22989 | /* xFindFunction */ 0, |
22990 | /* xRename */ 0, |
22991 | /* xSavepoint */ 0, |
22992 | /* xRelease */ 0, |
22993 | /* xRollbackTo */ 0, |
22994 | /* xShadowName */ 0 |
22995 | }; |
22996 | void *p = (void*)pGlobal; |
22997 | |
22998 | return sqlite3_create_module_v2(db, "fts5vocab" , &fts5Vocab, p, 0); |
22999 | } |
23000 | |
23001 | |
23002 | |
23003 | #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) */ |
23004 | |