1
2
3#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5)
4
5#if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
6# define NDEBUG 1
7#endif
8#if defined(NDEBUG) && defined(SQLITE_DEBUG)
9# undef NDEBUG
10#endif
11
12#line 1 "fts5.h"
13/*
14** 2014 May 31
15**
16** The author disclaims copyright to this source code. In place of
17** a legal notice, here is a blessing:
18**
19** May you do good and not evil.
20** May you find forgiveness for yourself and forgive others.
21** May you share freely, never taking more than you give.
22**
23******************************************************************************
24**
25** Interfaces to extend FTS5. Using the interfaces defined in this file,
26** FTS5 may be extended with:
27**
28** * custom tokenizers, and
29** * custom auxiliary functions.
30*/
31
32
33#ifndef _FTS5_H
34#define _FTS5_H
35
36#include "sqlite3.h"
37
38#ifdef __cplusplus
39extern "C" {
40#endif
41
42/*************************************************************************
43** CUSTOM AUXILIARY FUNCTIONS
44**
45** Virtual table implementations may overload SQL functions by implementing
46** the sqlite3_module.xFindFunction() method.
47*/
48
49typedef struct Fts5ExtensionApi Fts5ExtensionApi;
50typedef struct Fts5Context Fts5Context;
51typedef struct Fts5PhraseIter Fts5PhraseIter;
52
53typedef void (*fts5_extension_function)(
54 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
55 Fts5Context *pFts, /* First arg to pass to pApi functions */
56 sqlite3_context *pCtx, /* Context for returning result/error */
57 int nVal, /* Number of values in apVal[] array */
58 sqlite3_value **apVal /* Array of trailing arguments */
59);
60
61struct Fts5PhraseIter {
62 const unsigned char *a;
63 const unsigned char *b;
64};
65
66/*
67** EXTENSION API FUNCTIONS
68**
69** xUserData(pFts):
70** Return a copy of the context pointer the extension function was
71** registered with.
72**
73** xColumnTotalSize(pFts, iCol, pnToken):
74** If parameter iCol is less than zero, set output variable *pnToken
75** to the total number of tokens in the FTS5 table. Or, if iCol is
76** non-negative but less than the number of columns in the table, return
77** the total number of tokens in column iCol, considering all rows in
78** the FTS5 table.
79**
80** If parameter iCol is greater than or equal to the number of columns
81** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
82** an OOM condition or IO error), an appropriate SQLite error code is
83** returned.
84**
85** xColumnCount(pFts):
86** Return the number of columns in the table.
87**
88** xColumnSize(pFts, iCol, pnToken):
89** If parameter iCol is less than zero, set output variable *pnToken
90** to the total number of tokens in the current row. Or, if iCol is
91** non-negative but less than the number of columns in the table, set
92** *pnToken to the number of tokens in column iCol of the current row.
93**
94** If parameter iCol is greater than or equal to the number of columns
95** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
96** an OOM condition or IO error), an appropriate SQLite error code is
97** returned.
98**
99** This function may be quite inefficient if used with an FTS5 table
100** created with the "columnsize=0" option.
101**
102** xColumnText:
103** This function attempts to retrieve the text of column iCol of the
104** current document. If successful, (*pz) is set to point to a buffer
105** containing the text in utf-8 encoding, (*pn) is set to the size in bytes
106** (not characters) of the buffer and SQLITE_OK is returned. Otherwise,
107** if an error occurs, an SQLite error code is returned and the final values
108** of (*pz) and (*pn) are undefined.
109**
110** xPhraseCount:
111** Returns the number of phrases in the current query expression.
112**
113** xPhraseSize:
114** Returns the number of tokens in phrase iPhrase of the query. Phrases
115** are numbered starting from zero.
116**
117** xInstCount:
118** Set *pnInst to the total number of occurrences of all phrases within
119** the query within the current row. Return SQLITE_OK if successful, or
120** an error code (i.e. SQLITE_NOMEM) if an error occurs.
121**
122** This API can be quite slow if used with an FTS5 table created with the
123** "detail=none" or "detail=column" option. If the FTS5 table is created
124** with either "detail=none" or "detail=column" and "content=" option
125** (i.e. if it is a contentless table), then this API always returns 0.
126**
127** xInst:
128** Query for the details of phrase match iIdx within the current row.
129** Phrase matches are numbered starting from zero, so the iIdx argument
130** should be greater than or equal to zero and smaller than the value
131** output by xInstCount().
132**
133** Usually, output parameter *piPhrase is set to the phrase number, *piCol
134** to the column in which it occurs and *piOff the token offset of the
135** first token of the phrase. Returns SQLITE_OK if successful, or an error
136** code (i.e. SQLITE_NOMEM) if an error occurs.
137**
138** This API can be quite slow if used with an FTS5 table created with the
139** "detail=none" or "detail=column" option.
140**
141** xRowid:
142** Returns the rowid of the current row.
143**
144** xTokenize:
145** Tokenize text using the tokenizer belonging to the FTS5 table.
146**
147** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
148** This API function is used to query the FTS table for phrase iPhrase
149** of the current query. Specifically, a query equivalent to:
150**
151** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid
152**
153** with $p set to a phrase equivalent to the phrase iPhrase of the
154** current query is executed. Any column filter that applies to
155** phrase iPhrase of the current query is included in $p. For each
156** row visited, the callback function passed as the fourth argument
157** is invoked. The context and API objects passed to the callback
158** function may be used to access the properties of each matched row.
159** Invoking Api.xUserData() returns a copy of the pointer passed as
160** the third argument to pUserData.
161**
162** If the callback function returns any value other than SQLITE_OK, the
163** query is abandoned and the xQueryPhrase function returns immediately.
164** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK.
165** Otherwise, the error code is propagated upwards.
166**
167** If the query runs to completion without incident, SQLITE_OK is returned.
168** Or, if some error occurs before the query completes or is aborted by
169** the callback, an SQLite error code is returned.
170**
171**
172** xSetAuxdata(pFts5, pAux, xDelete)
173**
174** Save the pointer passed as the second argument as the extension function's
175** "auxiliary data". The pointer may then be retrieved by the current or any
176** future invocation of the same fts5 extension function made as part of
177** the same MATCH query using the xGetAuxdata() API.
178**
179** Each extension function is allocated a single auxiliary data slot for
180** each FTS query (MATCH expression). If the extension function is invoked
181** more than once for a single FTS query, then all invocations share a
182** single auxiliary data context.
183**
184** If there is already an auxiliary data pointer when this function is
185** invoked, then it is replaced by the new pointer. If an xDelete callback
186** was specified along with the original pointer, it is invoked at this
187** point.
188**
189** The xDelete callback, if one is specified, is also invoked on the
190** auxiliary data pointer after the FTS5 query has finished.
191**
192** If an error (e.g. an OOM condition) occurs within this function,
193** the auxiliary data is set to NULL and an error code returned. If the
194** xDelete parameter was not NULL, it is invoked on the auxiliary data
195** pointer before returning.
196**
197**
198** xGetAuxdata(pFts5, bClear)
199**
200** Returns the current auxiliary data pointer for the fts5 extension
201** function. See the xSetAuxdata() method for details.
202**
203** If the bClear argument is non-zero, then the auxiliary data is cleared
204** (set to NULL) before this function returns. In this case the xDelete,
205** if any, is not invoked.
206**
207**
208** xRowCount(pFts5, pnRow)
209**
210** This function is used to retrieve the total number of rows in the table.
211** In other words, the same value that would be returned by:
212**
213** SELECT count(*) FROM ftstable;
214**
215** xPhraseFirst()
216** This function is used, along with type Fts5PhraseIter and the xPhraseNext
217** method, to iterate through all instances of a single query phrase within
218** the current row. This is the same information as is accessible via the
219** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient
220** to use, this API may be faster under some circumstances. To iterate
221** through instances of phrase iPhrase, use the following code:
222**
223** Fts5PhraseIter iter;
224** int iCol, iOff;
225** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
226** iCol>=0;
227** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
228** ){
229** // An instance of phrase iPhrase at offset iOff of column iCol
230** }
231**
232** The Fts5PhraseIter structure is defined above. Applications should not
233** modify this structure directly - it should only be used as shown above
234** with the xPhraseFirst() and xPhraseNext() API methods (and by
235** xPhraseFirstColumn() and xPhraseNextColumn() as illustrated below).
236**
237** This API can be quite slow if used with an FTS5 table created with the
238** "detail=none" or "detail=column" option. If the FTS5 table is created
239** with either "detail=none" or "detail=column" and "content=" option
240** (i.e. if it is a contentless table), then this API always iterates
241** through an empty set (all calls to xPhraseFirst() set iCol to -1).
242**
243** xPhraseNext()
244** See xPhraseFirst above.
245**
246** xPhraseFirstColumn()
247** This function and xPhraseNextColumn() are similar to the xPhraseFirst()
248** and xPhraseNext() APIs described above. The difference is that instead
249** of iterating through all instances of a phrase in the current row, these
250** APIs are used to iterate through the set of columns in the current row
251** that contain one or more instances of a specified phrase. For example:
252**
253** Fts5PhraseIter iter;
254** int iCol;
255** for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol);
256** iCol>=0;
257** pApi->xPhraseNextColumn(pFts, &iter, &iCol)
258** ){
259** // Column iCol contains at least one instance of phrase iPhrase
260** }
261**
262** This API can be quite slow if used with an FTS5 table created with the
263** "detail=none" option. If the FTS5 table is created with either
264** "detail=none" "content=" option (i.e. if it is a contentless table),
265** then this API always iterates through an empty set (all calls to
266** xPhraseFirstColumn() set iCol to -1).
267**
268** The information accessed using this API and its companion
269** xPhraseFirstColumn() may also be obtained using xPhraseFirst/xPhraseNext
270** (or xInst/xInstCount). The chief advantage of this API is that it is
271** significantly more efficient than those alternatives when used with
272** "detail=column" tables.
273**
274** xPhraseNextColumn()
275** See xPhraseFirstColumn above.
276*/
277struct Fts5ExtensionApi {
278 int iVersion; /* Currently always set to 3 */
279
280 void *(*xUserData)(Fts5Context*);
281
282 int (*xColumnCount)(Fts5Context*);
283 int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
284 int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);
285
286 int (*xTokenize)(Fts5Context*,
287 const char *pText, int nText, /* Text to tokenize */
288 void *pCtx, /* Context passed to xToken() */
289 int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
290 );
291
292 int (*xPhraseCount)(Fts5Context*);
293 int (*xPhraseSize)(Fts5Context*, int iPhrase);
294
295 int (*xInstCount)(Fts5Context*, int *pnInst);
296 int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
297
298 sqlite3_int64 (*xRowid)(Fts5Context*);
299 int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
300 int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
301
302 int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData,
303 int(*)(const Fts5ExtensionApi*,Fts5Context*,void*)
304 );
305 int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*));
306 void *(*xGetAuxdata)(Fts5Context*, int bClear);
307
308 int (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*);
309 void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff);
310
311 int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*);
312 void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol);
313};
314
315/*
316** CUSTOM AUXILIARY FUNCTIONS
317*************************************************************************/
318
319/*************************************************************************
320** CUSTOM TOKENIZERS
321**
322** Applications may also register custom tokenizer types. A tokenizer
323** is registered by providing fts5 with a populated instance of the
324** following structure. All structure methods must be defined, setting
325** any member of the fts5_tokenizer struct to NULL leads to undefined
326** behaviour. The structure methods are expected to function as follows:
327**
328** xCreate:
329** This function is used to allocate and initialize a tokenizer instance.
330** A tokenizer instance is required to actually tokenize text.
331**
332** The first argument passed to this function is a copy of the (void*)
333** pointer provided by the application when the fts5_tokenizer object
334** was registered with FTS5 (the third argument to xCreateTokenizer()).
335** The second and third arguments are an array of nul-terminated strings
336** containing the tokenizer arguments, if any, specified following the
337** tokenizer name as part of the CREATE VIRTUAL TABLE statement used
338** to create the FTS5 table.
339**
340** The final argument is an output variable. If successful, (*ppOut)
341** should be set to point to the new tokenizer handle and SQLITE_OK
342** returned. If an error occurs, some value other than SQLITE_OK should
343** be returned. In this case, fts5 assumes that the final value of *ppOut
344** is undefined.
345**
346** xDelete:
347** This function is invoked to delete a tokenizer handle previously
348** allocated using xCreate(). Fts5 guarantees that this function will
349** be invoked exactly once for each successful call to xCreate().
350**
351** xTokenize:
352** This function is expected to tokenize the nText byte string indicated
353** by argument pText. pText may or may not be nul-terminated. The first
354** argument passed to this function is a pointer to an Fts5Tokenizer object
355** returned by an earlier call to xCreate().
356**
357** The second argument indicates the reason that FTS5 is requesting
358** tokenization of the supplied text. This is always one of the following
359** four values:
360**
361** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into
362** or removed from the FTS table. The tokenizer is being invoked to
363** determine the set of tokens to add to (or delete from) the
364** FTS index.
365**
366** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed
367** against the FTS index. The tokenizer is being called to tokenize
368** a bareword or quoted string specified as part of the query.
369**
370** <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as
371** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is
372** followed by a "*" character, indicating that the last token
373** returned by the tokenizer will be treated as a token prefix.
374**
375** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to
376** satisfy an fts5_api.xTokenize() request made by an auxiliary
377** function. Or an fts5_api.xColumnSize() request made by the same
378** on a columnsize=0 database.
379** </ul>
380**
381** For each token in the input string, the supplied callback xToken() must
382** be invoked. The first argument to it should be a copy of the pointer
383** passed as the second argument to xTokenize(). The third and fourth
384** arguments are a pointer to a buffer containing the token text, and the
385** size of the token in bytes. The 4th and 5th arguments are the byte offsets
386** of the first byte of and first byte immediately following the text from
387** which the token is derived within the input.
388**
389** The second argument passed to the xToken() callback ("tflags") should
390** normally be set to 0. The exception is if the tokenizer supports
391** synonyms. In this case see the discussion below for details.
392**
393** FTS5 assumes the xToken() callback is invoked for each token in the
394** order that they occur within the input text.
395**
396** If an xToken() callback returns any value other than SQLITE_OK, then
397** the tokenization should be abandoned and the xTokenize() method should
398** immediately return a copy of the xToken() return value. Or, if the
399** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
400** if an error occurs with the xTokenize() implementation itself, it
401** may abandon the tokenization and return any error code other than
402** SQLITE_OK or SQLITE_DONE.
403**
404** SYNONYM SUPPORT
405**
406** Custom tokenizers may also support synonyms. Consider a case in which a
407** user wishes to query for a phrase such as "first place". Using the
408** built-in tokenizers, the FTS5 query 'first + place' will match instances
409** of "first place" within the document set, but not alternative forms
410** such as "1st place". In some applications, it would be better to match
411** all instances of "first place" or "1st place" regardless of which form
412** the user specified in the MATCH query text.
413**
414** There are several ways to approach this in FTS5:
415**
416** <ol><li> By mapping all synonyms to a single token. In this case, using
417** the above example, this means that the tokenizer returns the
418** same token for inputs "first" and "1st". Say that token is in
419** fact "first", so that when the user inserts the document "I won
420** 1st place" entries are added to the index for tokens "i", "won",
421** "first" and "place". If the user then queries for '1st + place',
422** the tokenizer substitutes "first" for "1st" and the query works
423** as expected.
424**
425** <li> By querying the index for all synonyms of each query term
426** separately. In this case, when tokenizing query text, the
427** tokenizer may provide multiple synonyms for a single term
428** within the document. FTS5 then queries the index for each
429** synonym individually. For example, faced with the query:
430**
431** <codeblock>
432** ... MATCH 'first place'</codeblock>
433**
434** the tokenizer offers both "1st" and "first" as synonyms for the
435** first token in the MATCH query and FTS5 effectively runs a query
436** similar to:
437**
438** <codeblock>
439** ... MATCH '(first OR 1st) place'</codeblock>
440**
441** except that, for the purposes of auxiliary functions, the query
442** still appears to contain just two phrases - "(first OR 1st)"
443** being treated as a single phrase.
444**
445** <li> By adding multiple synonyms for a single term to the FTS index.
446** Using this method, when tokenizing document text, the tokenizer
447** provides multiple synonyms for each token. So that when a
448** document such as "I won first place" is tokenized, entries are
449** added to the FTS index for "i", "won", "first", "1st" and
450** "place".
451**
452** This way, even if the tokenizer does not provide synonyms
453** when tokenizing query text (it should not - to do so would be
454** inefficient), it doesn't matter if the user queries for
455** 'first + place' or '1st + place', as there are entries in the
456** FTS index corresponding to both forms of the first token.
457** </ol>
458**
459** Whether it is parsing document or query text, any call to xToken that
460** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit
461** is considered to supply a synonym for the previous token. For example,
462** when parsing the document "I won first place", a tokenizer that supports
463** synonyms would call xToken() 5 times, as follows:
464**
465** <codeblock>
466** xToken(pCtx, 0, "i", 1, 0, 1);
467** xToken(pCtx, 0, "won", 3, 2, 5);
468** xToken(pCtx, 0, "first", 5, 6, 11);
469** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11);
470** xToken(pCtx, 0, "place", 5, 12, 17);
471**</codeblock>
472**
473** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time
474** xToken() is called. Multiple synonyms may be specified for a single token
475** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence.
476** There is no limit to the number of synonyms that may be provided for a
477** single token.
478**
479** In many cases, method (1) above is the best approach. It does not add
480** extra data to the FTS index or require FTS5 to query for multiple terms,
481** so it is efficient in terms of disk space and query speed. However, it
482** does not support prefix queries very well. If, as suggested above, the
483** token "first" is substituted for "1st" by the tokenizer, then the query:
484**
485** <codeblock>
486** ... MATCH '1s*'</codeblock>
487**
488** will not match documents that contain the token "1st" (as the tokenizer
489** will probably not map "1s" to any prefix of "first").
490**
491** For full prefix support, method (3) may be preferred. In this case,
492** because the index contains entries for both "first" and "1st", prefix
493** queries such as 'fi*' or '1s*' will match correctly. However, because
494** extra entries are added to the FTS index, this method uses more space
495** within the database.
496**
497** Method (2) offers a midpoint between (1) and (3). Using this method,
498** a query such as '1s*' will match documents that contain the literal
499** token "1st", but not "first" (assuming the tokenizer is not able to
500** provide synonyms for prefixes). However, a non-prefix query like '1st'
501** will match against "1st" and "first". This method does not require
502** extra disk space, as no extra entries are added to the FTS index.
503** On the other hand, it may require more CPU cycles to run MATCH queries,
504** as separate queries of the FTS index are required for each synonym.
505**
506** When using methods (2) or (3), it is important that the tokenizer only
507** provide synonyms when tokenizing document text (method (2)) or query
508** text (method (3)), not both. Doing so will not cause any errors, but is
509** inefficient.
510*/
511typedef struct Fts5Tokenizer Fts5Tokenizer;
512typedef struct fts5_tokenizer fts5_tokenizer;
513struct fts5_tokenizer {
514 int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
515 void (*xDelete)(Fts5Tokenizer*);
516 int (*xTokenize)(Fts5Tokenizer*,
517 void *pCtx,
518 int flags, /* Mask of FTS5_TOKENIZE_* flags */
519 const char *pText, int nText,
520 int (*xToken)(
521 void *pCtx, /* Copy of 2nd argument to xTokenize() */
522 int tflags, /* Mask of FTS5_TOKEN_* flags */
523 const char *pToken, /* Pointer to buffer containing token */
524 int nToken, /* Size of token in bytes */
525 int iStart, /* Byte offset of token within input text */
526 int iEnd /* Byte offset of end of token within input text */
527 )
528 );
529};
530
531/* Flags that may be passed as the third argument to xTokenize() */
532#define FTS5_TOKENIZE_QUERY 0x0001
533#define FTS5_TOKENIZE_PREFIX 0x0002
534#define FTS5_TOKENIZE_DOCUMENT 0x0004
535#define FTS5_TOKENIZE_AUX 0x0008
536
537/* Flags that may be passed by the tokenizer implementation back to FTS5
538** as the third argument to the supplied xToken callback. */
539#define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */
540
541/*
542** END OF CUSTOM TOKENIZERS
543*************************************************************************/
544
545/*************************************************************************
546** FTS5 EXTENSION REGISTRATION API
547*/
548typedef struct fts5_api fts5_api;
549struct fts5_api {
550 int iVersion; /* Currently always set to 2 */
551
552 /* Create a new tokenizer */
553 int (*xCreateTokenizer)(
554 fts5_api *pApi,
555 const char *zName,
556 void *pContext,
557 fts5_tokenizer *pTokenizer,
558 void (*xDestroy)(void*)
559 );
560
561 /* Find an existing tokenizer */
562 int (*xFindTokenizer)(
563 fts5_api *pApi,
564 const char *zName,
565 void **ppContext,
566 fts5_tokenizer *pTokenizer
567 );
568
569 /* Create a new auxiliary function */
570 int (*xCreateFunction)(
571 fts5_api *pApi,
572 const char *zName,
573 void *pContext,
574 fts5_extension_function xFunction,
575 void (*xDestroy)(void*)
576 );
577};
578
579/*
580** END OF REGISTRATION API
581*************************************************************************/
582
583#ifdef __cplusplus
584} /* end of the 'extern "C"' block */
585#endif
586
587#endif /* _FTS5_H */
588
589#line 1 "fts5Int.h"
590/*
591** 2014 May 31
592**
593** The author disclaims copyright to this source code. In place of
594** a legal notice, here is a blessing:
595**
596** May you do good and not evil.
597** May you find forgiveness for yourself and forgive others.
598** May you share freely, never taking more than you give.
599**
600******************************************************************************
601**
602*/
603#ifndef _FTS5INT_H
604#define _FTS5INT_H
605
606/* #include "fts5.h" */
607#include "sqlite3ext.h"
608SQLITE_EXTENSION_INIT1
609
610#include <string.h>
611#include <assert.h>
612
613#ifndef SQLITE_AMALGAMATION
614
615typedef unsigned char u8;
616typedef unsigned int u32;
617typedef unsigned short u16;
618typedef short i16;
619typedef sqlite3_int64 i64;
620typedef sqlite3_uint64 u64;
621
622#ifndef ArraySize
623# define ArraySize(x) ((int)(sizeof(x) / sizeof(x[0])))
624#endif
625
626#define testcase(x)
627
628#if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_MUTATION_TEST)
629# define SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS 1
630#endif
631#if defined(SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS)
632# define ALWAYS(X) (1)
633# define NEVER(X) (0)
634#elif !defined(NDEBUG)
635# define ALWAYS(X) ((X)?1:(assert(0),0))
636# define NEVER(X) ((X)?(assert(0),1):0)
637#else
638# define ALWAYS(X) (X)
639# define NEVER(X) (X)
640#endif
641
642#define MIN(x,y) (((x) < (y)) ? (x) : (y))
643#define MAX(x,y) (((x) > (y)) ? (x) : (y))
644
645/*
646** Constants for the largest and smallest possible 64-bit signed integers.
647*/
648# define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32))
649# define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64)
650
651#endif
652
653/* Truncate very long tokens to this many bytes. Hard limit is
654** (65536-1-1-4-9)==65521 bytes. The limiting factor is the 16-bit offset
655** field that occurs at the start of each leaf page (see fts5_index.c). */
656#define FTS5_MAX_TOKEN_SIZE 32768
657
658/*
659** Maximum number of prefix indexes on single FTS5 table. This must be
660** less than 32. If it is set to anything large than that, an #error
661** directive in fts5_index.c will cause the build to fail.
662*/
663#define FTS5_MAX_PREFIX_INDEXES 31
664
665/*
666** Maximum segments permitted in a single index
667*/
668#define FTS5_MAX_SEGMENT 2000
669
670#define FTS5_DEFAULT_NEARDIST 10
671#define FTS5_DEFAULT_RANK "bm25"
672
673/* Name of rank and rowid columns */
674#define FTS5_RANK_NAME "rank"
675#define FTS5_ROWID_NAME "rowid"
676
677#ifdef SQLITE_DEBUG
678# define FTS5_CORRUPT sqlite3Fts5Corrupt()
679static int sqlite3Fts5Corrupt(void);
680#else
681# define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
682#endif
683
684/*
685** The assert_nc() macro is similar to the assert() macro, except that it
686** is used for assert() conditions that are true only if it can be
687** guranteed that the database is not corrupt.
688*/
689#ifdef SQLITE_DEBUG
690extern int sqlite3_fts5_may_be_corrupt;
691# define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x))
692#else
693# define assert_nc(x) assert(x)
694#endif
695
696/*
697** A version of memcmp() that does not cause asan errors if one of the pointer
698** parameters is NULL and the number of bytes to compare is zero.
699*/
700#define fts5Memcmp(s1, s2, n) ((n)<=0 ? 0 : memcmp((s1), (s2), (n)))
701
702/* Mark a function parameter as unused, to suppress nuisance compiler
703** warnings. */
704#ifndef UNUSED_PARAM
705# define UNUSED_PARAM(X) (void)(X)
706#endif
707
708#ifndef UNUSED_PARAM2
709# define UNUSED_PARAM2(X, Y) (void)(X), (void)(Y)
710#endif
711
712typedef struct Fts5Global Fts5Global;
713typedef struct Fts5Colset Fts5Colset;
714
715/* If a NEAR() clump or phrase may only match a specific set of columns,
716** then an object of the following type is used to record the set of columns.
717** Each entry in the aiCol[] array is a column that may be matched.
718**
719** This object is used by fts5_expr.c and fts5_index.c.
720*/
721struct Fts5Colset {
722 int nCol;
723 int aiCol[1];
724};
725
726
727
728/**************************************************************************
729** Interface to code in fts5_config.c. fts5_config.c contains contains code
730** to parse the arguments passed to the CREATE VIRTUAL TABLE statement.
731*/
732
733typedef struct Fts5Config Fts5Config;
734
735/*
736** An instance of the following structure encodes all information that can
737** be gleaned from the CREATE VIRTUAL TABLE statement.
738**
739** And all information loaded from the %_config table.
740**
741** nAutomerge:
742** The minimum number of segments that an auto-merge operation should
743** attempt to merge together. A value of 1 sets the object to use the
744** compile time default. Zero disables auto-merge altogether.
745**
746** zContent:
747**
748** zContentRowid:
749** The value of the content_rowid= option, if one was specified. Or
750** the string "rowid" otherwise. This text is not quoted - if it is
751** used as part of an SQL statement it needs to be quoted appropriately.
752**
753** zContentExprlist:
754**
755** pzErrmsg:
756** This exists in order to allow the fts5_index.c module to return a
757** decent error message if it encounters a file-format version it does
758** not understand.
759**
760** bColumnsize:
761** True if the %_docsize table is created.
762**
763** bPrefixIndex:
764** This is only used for debugging. If set to false, any prefix indexes
765** are ignored. This value is configured using:
766**
767** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex);
768**
769*/
770struct Fts5Config {
771 sqlite3 *db; /* Database handle */
772 char *zDb; /* Database holding FTS index (e.g. "main") */
773 char *zName; /* Name of FTS index */
774 int nCol; /* Number of columns */
775 char **azCol; /* Column names */
776 u8 *abUnindexed; /* True for unindexed columns */
777 int nPrefix; /* Number of prefix indexes */
778 int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */
779 int eContent; /* An FTS5_CONTENT value */
780 char *zContent; /* content table */
781 char *zContentRowid; /* "content_rowid=" option value */
782 int bColumnsize; /* "columnsize=" option value (dflt==1) */
783 int eDetail; /* FTS5_DETAIL_XXX value */
784 char *zContentExprlist;
785 Fts5Tokenizer *pTok;
786 fts5_tokenizer *pTokApi;
787 int bLock; /* True when table is preparing statement */
788 int ePattern; /* FTS_PATTERN_XXX constant */
789
790 /* Values loaded from the %_config table */
791 int iCookie; /* Incremented when %_config is modified */
792 int pgsz; /* Approximate page size used in %_data */
793 int nAutomerge; /* 'automerge' setting */
794 int nCrisisMerge; /* Maximum allowed segments per level */
795 int nUsermerge; /* 'usermerge' setting */
796 int nHashSize; /* Bytes of memory for in-memory hash */
797 char *zRank; /* Name of rank function */
798 char *zRankArgs; /* Arguments to rank function */
799
800 /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
801 char **pzErrmsg;
802
803#ifdef SQLITE_DEBUG
804 int bPrefixIndex; /* True to use prefix-indexes */
805#endif
806};
807
808/* Current expected value of %_config table 'version' field */
809#define FTS5_CURRENT_VERSION 4
810
811#define FTS5_CONTENT_NORMAL 0
812#define FTS5_CONTENT_NONE 1
813#define FTS5_CONTENT_EXTERNAL 2
814
815#define FTS5_DETAIL_FULL 0
816#define FTS5_DETAIL_NONE 1
817#define FTS5_DETAIL_COLUMNS 2
818
819#define FTS5_PATTERN_NONE 0
820#define FTS5_PATTERN_LIKE 65 /* matches SQLITE_INDEX_CONSTRAINT_LIKE */
821#define FTS5_PATTERN_GLOB 66 /* matches SQLITE_INDEX_CONSTRAINT_GLOB */
822
823static int sqlite3Fts5ConfigParse(
824 Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char**
825);
826static void sqlite3Fts5ConfigFree(Fts5Config*);
827
828static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig);
829
830static int sqlite3Fts5Tokenize(
831 Fts5Config *pConfig, /* FTS5 Configuration object */
832 int flags, /* FTS5_TOKENIZE_* flags */
833 const char *pText, int nText, /* Text to tokenize */
834 void *pCtx, /* Context passed to xToken() */
835 int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
836);
837
838static void sqlite3Fts5Dequote(char *z);
839
840/* Load the contents of the %_config table */
841static int sqlite3Fts5ConfigLoad(Fts5Config*, int);
842
843/* Set the value of a single config attribute */
844static int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*);
845
846static int sqlite3Fts5ConfigParseRank(const char*, char**, char**);
847
848/*
849** End of interface to code in fts5_config.c.
850**************************************************************************/
851
852/**************************************************************************
853** Interface to code in fts5_buffer.c.
854*/
855
856/*
857** Buffer object for the incremental building of string data.
858*/
859typedef struct Fts5Buffer Fts5Buffer;
860struct Fts5Buffer {
861 u8 *p;
862 int n;
863 int nSpace;
864};
865
866static int sqlite3Fts5BufferSize(int*, Fts5Buffer*, u32);
867static void sqlite3Fts5BufferAppendVarint(int*, Fts5Buffer*, i64);
868static void sqlite3Fts5BufferAppendBlob(int*, Fts5Buffer*, u32, const u8*);
869static void sqlite3Fts5BufferAppendString(int *, Fts5Buffer*, const char*);
870static void sqlite3Fts5BufferFree(Fts5Buffer*);
871static void sqlite3Fts5BufferZero(Fts5Buffer*);
872static void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*);
873static void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...);
874
875static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...);
876
877#define fts5BufferZero(x) sqlite3Fts5BufferZero(x)
878#define fts5BufferAppendVarint(a,b,c) sqlite3Fts5BufferAppendVarint(a,b,(i64)c)
879#define fts5BufferFree(a) sqlite3Fts5BufferFree(a)
880#define fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d)
881#define fts5BufferSet(a,b,c,d) sqlite3Fts5BufferSet(a,b,c,d)
882
883#define fts5BufferGrow(pRc,pBuf,nn) ( \
884 (u32)((pBuf)->n) + (u32)(nn) <= (u32)((pBuf)->nSpace) ? 0 : \
885 sqlite3Fts5BufferSize((pRc),(pBuf),(nn)+(pBuf)->n) \
886)
887
888/* Write and decode big-endian 32-bit integer values */
889static void sqlite3Fts5Put32(u8*, int);
890static int sqlite3Fts5Get32(const u8*);
891
892#define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32)
893#define FTS5_POS2OFFSET(iPos) (int)(iPos & 0x7FFFFFFF)
894
895typedef struct Fts5PoslistReader Fts5PoslistReader;
896struct Fts5PoslistReader {
897 /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */
898 const u8 *a; /* Position list to iterate through */
899 int n; /* Size of buffer at a[] in bytes */
900 int i; /* Current offset in a[] */
901
902 u8 bFlag; /* For client use (any custom purpose) */
903
904 /* Output variables */
905 u8 bEof; /* Set to true at EOF */
906 i64 iPos; /* (iCol<<32) + iPos */
907};
908static int sqlite3Fts5PoslistReaderInit(
909 const u8 *a, int n, /* Poslist buffer to iterate through */
910 Fts5PoslistReader *pIter /* Iterator object to initialize */
911);
912static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*);
913
914typedef struct Fts5PoslistWriter Fts5PoslistWriter;
915struct Fts5PoslistWriter {
916 i64 iPrev;
917};
918static int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64);
919static void sqlite3Fts5PoslistSafeAppend(Fts5Buffer*, i64*, i64);
920
921static int sqlite3Fts5PoslistNext64(
922 const u8 *a, int n, /* Buffer containing poslist */
923 int *pi, /* IN/OUT: Offset within a[] */
924 i64 *piOff /* IN/OUT: Current offset */
925);
926
927/* Malloc utility */
928static void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte);
929static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn);
930
931/* Character set tests (like isspace(), isalpha() etc.) */
932static int sqlite3Fts5IsBareword(char t);
933
934
935/* Bucket of terms object used by the integrity-check in offsets=0 mode. */
936typedef struct Fts5Termset Fts5Termset;
937static int sqlite3Fts5TermsetNew(Fts5Termset**);
938static int sqlite3Fts5TermsetAdd(Fts5Termset*, int, const char*, int, int *pbPresent);
939static void sqlite3Fts5TermsetFree(Fts5Termset*);
940
941/*
942** End of interface to code in fts5_buffer.c.
943**************************************************************************/
944
945/**************************************************************************
946** Interface to code in fts5_index.c. fts5_index.c contains contains code
947** to access the data stored in the %_data table.
948*/
949
950typedef struct Fts5Index Fts5Index;
951typedef struct Fts5IndexIter Fts5IndexIter;
952
953struct Fts5IndexIter {
954 i64 iRowid;
955 const u8 *pData;
956 int nData;
957 u8 bEof;
958};
959
960#define sqlite3Fts5IterEof(x) ((x)->bEof)
961
962/*
963** Values used as part of the flags argument passed to IndexQuery().
964*/
965#define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */
966#define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */
967#define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */
968#define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */
969
970/* The following are used internally by the fts5_index.c module. They are
971** defined here only to make it easier to avoid clashes with the flags
972** above. */
973#define FTS5INDEX_QUERY_SKIPEMPTY 0x0010
974#define FTS5INDEX_QUERY_NOOUTPUT 0x0020
975
976/*
977** Create/destroy an Fts5Index object.
978*/
979static int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**);
980static int sqlite3Fts5IndexClose(Fts5Index *p);
981
982/*
983** Return a simple checksum value based on the arguments.
984*/
985static u64 sqlite3Fts5IndexEntryCksum(
986 i64 iRowid,
987 int iCol,
988 int iPos,
989 int iIdx,
990 const char *pTerm,
991 int nTerm
992);
993
994/*
995** Argument p points to a buffer containing utf-8 text that is n bytes in
996** size. Return the number of bytes in the nChar character prefix of the
997** buffer, or 0 if there are less than nChar characters in total.
998*/
999static int sqlite3Fts5IndexCharlenToBytelen(
1000 const char *p,
1001 int nByte,
1002 int nChar
1003);
1004
1005/*
1006** Open a new iterator to iterate though all rowids that match the
1007** specified token or token prefix.
1008*/
1009static int sqlite3Fts5IndexQuery(
1010 Fts5Index *p, /* FTS index to query */
1011 const char *pToken, int nToken, /* Token (or prefix) to query for */
1012 int flags, /* Mask of FTS5INDEX_QUERY_X flags */
1013 Fts5Colset *pColset, /* Match these columns only */
1014 Fts5IndexIter **ppIter /* OUT: New iterator object */
1015);
1016
1017/*
1018** The various operations on open token or token prefix iterators opened
1019** using sqlite3Fts5IndexQuery().
1020*/
1021static int sqlite3Fts5IterNext(Fts5IndexIter*);
1022static int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch);
1023
1024/*
1025** Close an iterator opened by sqlite3Fts5IndexQuery().
1026*/
1027static void sqlite3Fts5IterClose(Fts5IndexIter*);
1028
1029/*
1030** Close the reader blob handle, if it is open.
1031*/
1032static void sqlite3Fts5IndexCloseReader(Fts5Index*);
1033
1034/*
1035** This interface is used by the fts5vocab module.
1036*/
1037static const char *sqlite3Fts5IterTerm(Fts5IndexIter*, int*);
1038static int sqlite3Fts5IterNextScan(Fts5IndexIter*);
1039static void *sqlite3Fts5StructureRef(Fts5Index*);
1040static void sqlite3Fts5StructureRelease(void*);
1041static int sqlite3Fts5StructureTest(Fts5Index*, void*);
1042
1043
1044/*
1045** Insert or remove data to or from the index. Each time a document is
1046** added to or removed from the index, this function is called one or more
1047** times.
1048**
1049** For an insert, it must be called once for each token in the new document.
1050** If the operation is a delete, it must be called (at least) once for each
1051** unique token in the document with an iCol value less than zero. The iPos
1052** argument is ignored for a delete.
1053*/
1054static int sqlite3Fts5IndexWrite(
1055 Fts5Index *p, /* Index to write to */
1056 int iCol, /* Column token appears in (-ve -> delete) */
1057 int iPos, /* Position of token within column */
1058 const char *pToken, int nToken /* Token to add or remove to or from index */
1059);
1060
1061/*
1062** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to
1063** document iDocid.
1064*/
1065static int sqlite3Fts5IndexBeginWrite(
1066 Fts5Index *p, /* Index to write to */
1067 int bDelete, /* True if current operation is a delete */
1068 i64 iDocid /* Docid to add or remove data from */
1069);
1070
1071/*
1072** Flush any data stored in the in-memory hash tables to the database.
1073** Also close any open blob handles.
1074*/
1075static int sqlite3Fts5IndexSync(Fts5Index *p);
1076
1077/*
1078** Discard any data stored in the in-memory hash tables. Do not write it
1079** to the database. Additionally, assume that the contents of the %_data
1080** table may have changed on disk. So any in-memory caches of %_data
1081** records must be invalidated.
1082*/
1083static int sqlite3Fts5IndexRollback(Fts5Index *p);
1084
1085/*
1086** Get or set the "averages" values.
1087*/
1088static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize);
1089static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int);
1090
1091/*
1092** Functions called by the storage module as part of integrity-check.
1093*/
1094static int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum, int bUseCksum);
1095
1096/*
1097** Called during virtual module initialization to register UDF
1098** fts5_decode() with SQLite
1099*/
1100static int sqlite3Fts5IndexInit(sqlite3*);
1101
1102static int sqlite3Fts5IndexSetCookie(Fts5Index*, int);
1103
1104/*
1105** Return the total number of entries read from the %_data table by
1106** this connection since it was created.
1107*/
1108static int sqlite3Fts5IndexReads(Fts5Index *p);
1109
1110static int sqlite3Fts5IndexReinit(Fts5Index *p);
1111static int sqlite3Fts5IndexOptimize(Fts5Index *p);
1112static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge);
1113static int sqlite3Fts5IndexReset(Fts5Index *p);
1114
1115static int sqlite3Fts5IndexLoadConfig(Fts5Index *p);
1116
1117/*
1118** End of interface to code in fts5_index.c.
1119**************************************************************************/
1120
1121/**************************************************************************
1122** Interface to code in fts5_varint.c.
1123*/
1124static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v);
1125static int sqlite3Fts5GetVarintLen(u32 iVal);
1126static u8 sqlite3Fts5GetVarint(const unsigned char*, u64*);
1127static int sqlite3Fts5PutVarint(unsigned char *p, u64 v);
1128
1129#define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b)
1130#define fts5GetVarint sqlite3Fts5GetVarint
1131
1132#define fts5FastGetVarint32(a, iOff, nVal) { \
1133 nVal = (a)[iOff++]; \
1134 if( nVal & 0x80 ){ \
1135 iOff--; \
1136 iOff += fts5GetVarint32(&(a)[iOff], nVal); \
1137 } \
1138}
1139
1140
1141/*
1142** End of interface to code in fts5_varint.c.
1143**************************************************************************/
1144
1145
1146/**************************************************************************
1147** Interface to code in fts5_main.c.
1148*/
1149
1150/*
1151** Virtual-table object.
1152*/
1153typedef struct Fts5Table Fts5Table;
1154struct Fts5Table {
1155 sqlite3_vtab base; /* Base class used by SQLite core */
1156 Fts5Config *pConfig; /* Virtual table configuration */
1157 Fts5Index *pIndex; /* Full-text index */
1158};
1159
1160static int sqlite3Fts5GetTokenizer(
1161 Fts5Global*,
1162 const char **azArg,
1163 int nArg,
1164 Fts5Config*,
1165 char **pzErr
1166);
1167
1168static Fts5Table *sqlite3Fts5TableFromCsrid(Fts5Global*, i64);
1169
1170static int sqlite3Fts5FlushToDisk(Fts5Table*);
1171
1172/*
1173** End of interface to code in fts5.c.
1174**************************************************************************/
1175
1176/**************************************************************************
1177** Interface to code in fts5_hash.c.
1178*/
1179typedef struct Fts5Hash Fts5Hash;
1180
1181/*
1182** Create a hash table, free a hash table.
1183*/
1184static int sqlite3Fts5HashNew(Fts5Config*, Fts5Hash**, int *pnSize);
1185static void sqlite3Fts5HashFree(Fts5Hash*);
1186
1187static int sqlite3Fts5HashWrite(
1188 Fts5Hash*,
1189 i64 iRowid, /* Rowid for this entry */
1190 int iCol, /* Column token appears in (-ve -> delete) */
1191 int iPos, /* Position of token within column */
1192 char bByte,
1193 const char *pToken, int nToken /* Token to add or remove to or from index */
1194);
1195
1196/*
1197** Empty (but do not delete) a hash table.
1198*/
1199static void sqlite3Fts5HashClear(Fts5Hash*);
1200
1201static int sqlite3Fts5HashQuery(
1202 Fts5Hash*, /* Hash table to query */
1203 int nPre,
1204 const char *pTerm, int nTerm, /* Query term */
1205 void **ppObj, /* OUT: Pointer to doclist for pTerm */
1206 int *pnDoclist /* OUT: Size of doclist in bytes */
1207);
1208
1209static int sqlite3Fts5HashScanInit(
1210 Fts5Hash*, /* Hash table to query */
1211 const char *pTerm, int nTerm /* Query prefix */
1212);
1213static void sqlite3Fts5HashScanNext(Fts5Hash*);
1214static int sqlite3Fts5HashScanEof(Fts5Hash*);
1215static void sqlite3Fts5HashScanEntry(Fts5Hash *,
1216 const char **pzTerm, /* OUT: term (nul-terminated) */
1217 const u8 **ppDoclist, /* OUT: pointer to doclist */
1218 int *pnDoclist /* OUT: size of doclist in bytes */
1219);
1220
1221
1222/*
1223** End of interface to code in fts5_hash.c.
1224**************************************************************************/
1225
1226/**************************************************************************
1227** Interface to code in fts5_storage.c. fts5_storage.c contains contains
1228** code to access the data stored in the %_content and %_docsize tables.
1229*/
1230
1231#define FTS5_STMT_SCAN_ASC 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */
1232#define FTS5_STMT_SCAN_DESC 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */
1233#define FTS5_STMT_LOOKUP 2 /* SELECT rowid, * FROM ... WHERE rowid=? */
1234
1235typedef struct Fts5Storage Fts5Storage;
1236
1237static int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**);
1238static int sqlite3Fts5StorageClose(Fts5Storage *p);
1239static int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName);
1240
1241static int sqlite3Fts5DropAll(Fts5Config*);
1242static int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **);
1243
1244static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64, sqlite3_value**);
1245static int sqlite3Fts5StorageContentInsert(Fts5Storage *p, sqlite3_value**, i64*);
1246static int sqlite3Fts5StorageIndexInsert(Fts5Storage *p, sqlite3_value**, i64);
1247
1248static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg);
1249
1250static int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt**, char**);
1251static void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*);
1252
1253static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol);
1254static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg);
1255static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow);
1256
1257static int sqlite3Fts5StorageSync(Fts5Storage *p);
1258static int sqlite3Fts5StorageRollback(Fts5Storage *p);
1259
1260static int sqlite3Fts5StorageConfigValue(
1261 Fts5Storage *p, const char*, sqlite3_value*, int
1262);
1263
1264static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p);
1265static int sqlite3Fts5StorageRebuild(Fts5Storage *p);
1266static int sqlite3Fts5StorageOptimize(Fts5Storage *p);
1267static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge);
1268static int sqlite3Fts5StorageReset(Fts5Storage *p);
1269
1270/*
1271** End of interface to code in fts5_storage.c.
1272**************************************************************************/
1273
1274
1275/**************************************************************************
1276** Interface to code in fts5_expr.c.
1277*/
1278typedef struct Fts5Expr Fts5Expr;
1279typedef struct Fts5ExprNode Fts5ExprNode;
1280typedef struct Fts5Parse Fts5Parse;
1281typedef struct Fts5Token Fts5Token;
1282typedef struct Fts5ExprPhrase Fts5ExprPhrase;
1283typedef struct Fts5ExprNearset Fts5ExprNearset;
1284
1285struct Fts5Token {
1286 const char *p; /* Token text (not NULL terminated) */
1287 int n; /* Size of buffer p in bytes */
1288};
1289
1290/* Parse a MATCH expression. */
1291static int sqlite3Fts5ExprNew(
1292 Fts5Config *pConfig,
1293 int bPhraseToAnd,
1294 int iCol, /* Column on LHS of MATCH operator */
1295 const char *zExpr,
1296 Fts5Expr **ppNew,
1297 char **pzErr
1298);
1299static int sqlite3Fts5ExprPattern(
1300 Fts5Config *pConfig,
1301 int bGlob,
1302 int iCol,
1303 const char *zText,
1304 Fts5Expr **pp
1305);
1306
1307/*
1308** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bDesc);
1309** rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr);
1310** rc = sqlite3Fts5ExprNext(pExpr)
1311** ){
1312** // The document with rowid iRowid matches the expression!
1313** i64 iRowid = sqlite3Fts5ExprRowid(pExpr);
1314** }
1315*/
1316static int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, i64 iMin, int bDesc);
1317static int sqlite3Fts5ExprNext(Fts5Expr*, i64 iMax);
1318static int sqlite3Fts5ExprEof(Fts5Expr*);
1319static i64 sqlite3Fts5ExprRowid(Fts5Expr*);
1320
1321static void sqlite3Fts5ExprFree(Fts5Expr*);
1322static int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2);
1323
1324/* Called during startup to register a UDF with SQLite */
1325static int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*);
1326
1327static int sqlite3Fts5ExprPhraseCount(Fts5Expr*);
1328static int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase);
1329static int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **);
1330
1331typedef struct Fts5PoslistPopulator Fts5PoslistPopulator;
1332static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr*, int);
1333static int sqlite3Fts5ExprPopulatePoslists(
1334 Fts5Config*, Fts5Expr*, Fts5PoslistPopulator*, int, const char*, int
1335);
1336static void sqlite3Fts5ExprCheckPoslists(Fts5Expr*, i64);
1337
1338static int sqlite3Fts5ExprClonePhrase(Fts5Expr*, int, Fts5Expr**);
1339
1340static int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *);
1341
1342/*******************************************
1343** The fts5_expr.c API above this point is used by the other hand-written
1344** C code in this module. The interfaces below this point are called by
1345** the parser code in fts5parse.y. */
1346
1347static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...);
1348
1349static Fts5ExprNode *sqlite3Fts5ParseNode(
1350 Fts5Parse *pParse,
1351 int eType,
1352 Fts5ExprNode *pLeft,
1353 Fts5ExprNode *pRight,
1354 Fts5ExprNearset *pNear
1355);
1356
1357static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd(
1358 Fts5Parse *pParse,
1359 Fts5ExprNode *pLeft,
1360 Fts5ExprNode *pRight
1361);
1362
1363static Fts5ExprPhrase *sqlite3Fts5ParseTerm(
1364 Fts5Parse *pParse,
1365 Fts5ExprPhrase *pPhrase,
1366 Fts5Token *pToken,
1367 int bPrefix
1368);
1369
1370static void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase*);
1371
1372static Fts5ExprNearset *sqlite3Fts5ParseNearset(
1373 Fts5Parse*,
1374 Fts5ExprNearset*,
1375 Fts5ExprPhrase*
1376);
1377
1378static Fts5Colset *sqlite3Fts5ParseColset(
1379 Fts5Parse*,
1380 Fts5Colset*,
1381 Fts5Token *
1382);
1383
1384static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*);
1385static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*);
1386static void sqlite3Fts5ParseNodeFree(Fts5ExprNode*);
1387
1388static void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*);
1389static void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNode*, Fts5Colset*);
1390static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse*, Fts5Colset*);
1391static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p);
1392static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*);
1393
1394/*
1395** End of interface to code in fts5_expr.c.
1396**************************************************************************/
1397
1398
1399
1400/**************************************************************************
1401** Interface to code in fts5_aux.c.
1402*/
1403
1404static int sqlite3Fts5AuxInit(fts5_api*);
1405/*
1406** End of interface to code in fts5_aux.c.
1407**************************************************************************/
1408
1409/**************************************************************************
1410** Interface to code in fts5_tokenizer.c.
1411*/
1412
1413static int sqlite3Fts5TokenizerInit(fts5_api*);
1414static int sqlite3Fts5TokenizerPattern(
1415 int (*xCreate)(void*, const char**, int, Fts5Tokenizer**),
1416 Fts5Tokenizer *pTok
1417);
1418/*
1419** End of interface to code in fts5_tokenizer.c.
1420**************************************************************************/
1421
1422/**************************************************************************
1423** Interface to code in fts5_vocab.c.
1424*/
1425
1426static int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*);
1427
1428/*
1429** End of interface to code in fts5_vocab.c.
1430**************************************************************************/
1431
1432
1433/**************************************************************************
1434** Interface to automatically generated code in fts5_unicode2.c.
1435*/
1436static int sqlite3Fts5UnicodeIsdiacritic(int c);
1437static int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic);
1438
1439static int sqlite3Fts5UnicodeCatParse(const char*, u8*);
1440static int sqlite3Fts5UnicodeCategory(u32 iCode);
1441static void sqlite3Fts5UnicodeAscii(u8*, u8*);
1442/*
1443** End of interface to code in fts5_unicode2.c.
1444**************************************************************************/
1445
1446#endif
1447
1448#line 1 "fts5parse.h"
1449#define FTS5_OR 1
1450#define FTS5_AND 2
1451#define FTS5_NOT 3
1452#define FTS5_TERM 4
1453#define FTS5_COLON 5
1454#define FTS5_MINUS 6
1455#define FTS5_LCP 7
1456#define FTS5_RCP 8
1457#define FTS5_STRING 9
1458#define FTS5_LP 10
1459#define FTS5_RP 11
1460#define FTS5_CARET 12
1461#define FTS5_COMMA 13
1462#define FTS5_PLUS 14
1463#define FTS5_STAR 15
1464
1465#line 1 "fts5parse.c"
1466/* This file is automatically generated by Lemon from input grammar
1467** source file "fts5parse.y". */
1468/*
1469** 2000-05-29
1470**
1471** The author disclaims copyright to this source code. In place of
1472** a legal notice, here is a blessing:
1473**
1474** May you do good and not evil.
1475** May you find forgiveness for yourself and forgive others.
1476** May you share freely, never taking more than you give.
1477**
1478*************************************************************************
1479** Driver template for the LEMON parser generator.
1480**
1481** The "lemon" program processes an LALR(1) input grammar file, then uses
1482** this template to construct a parser. The "lemon" program inserts text
1483** at each "%%" line. Also, any "P-a-r-s-e" identifer prefix (without the
1484** interstitial "-" characters) contained in this template is changed into
1485** the value of the %name directive from the grammar. Otherwise, the content
1486** of this template is copied straight through into the generate parser
1487** source file.
1488**
1489** The following is the concatenation of all %include directives from the
1490** input grammar file:
1491*/
1492/************ Begin %include sections from the grammar ************************/
1493#line 47 "fts5parse.y"
1494
1495/* #include "fts5Int.h" */
1496/* #include "fts5parse.h" */
1497
1498/*
1499** Disable all error recovery processing in the parser push-down
1500** automaton.
1501*/
1502#define fts5YYNOERRORRECOVERY 1
1503
1504/*
1505** Make fts5yytestcase() the same as testcase()
1506*/
1507#define fts5yytestcase(X) testcase(X)
1508
1509/*
1510** Indicate that sqlite3ParserFree() will never be called with a null
1511** pointer.
1512*/
1513#define fts5YYPARSEFREENOTNULL 1
1514
1515/*
1516** Alternative datatype for the argument to the malloc() routine passed
1517** into sqlite3ParserAlloc(). The default is size_t.
1518*/
1519#define fts5YYMALLOCARGTYPE u64
1520
1521#line 57 "fts5parse.c"
1522/**************** End of %include directives **********************************/
1523/* These constants specify the various numeric values for terminal symbols.
1524***************** Begin token definitions *************************************/
1525#ifndef FTS5_OR
1526#define FTS5_OR 1
1527#define FTS5_AND 2
1528#define FTS5_NOT 3
1529#define FTS5_TERM 4
1530#define FTS5_COLON 5
1531#define FTS5_MINUS 6
1532#define FTS5_LCP 7
1533#define FTS5_RCP 8
1534#define FTS5_STRING 9
1535#define FTS5_LP 10
1536#define FTS5_RP 11
1537#define FTS5_CARET 12
1538#define FTS5_COMMA 13
1539#define FTS5_PLUS 14
1540#define FTS5_STAR 15
1541#endif
1542/**************** End token definitions ***************************************/
1543
1544/* The next sections is a series of control #defines.
1545** various aspects of the generated parser.
1546** fts5YYCODETYPE is the data type used to store the integer codes
1547** that represent terminal and non-terminal symbols.
1548** "unsigned char" is used if there are fewer than
1549** 256 symbols. Larger types otherwise.
1550** fts5YYNOCODE is a number of type fts5YYCODETYPE that is not used for
1551** any terminal or nonterminal symbol.
1552** fts5YYFALLBACK If defined, this indicates that one or more tokens
1553** (also known as: "terminal symbols") have fall-back
1554** values which should be used if the original symbol
1555** would not parse. This permits keywords to sometimes
1556** be used as identifiers, for example.
1557** fts5YYACTIONTYPE is the data type used for "action codes" - numbers
1558** that indicate what to do in response to the next
1559** token.
1560** sqlite3Fts5ParserFTS5TOKENTYPE is the data type used for minor type for terminal
1561** symbols. Background: A "minor type" is a semantic
1562** value associated with a terminal or non-terminal
1563** symbols. For example, for an "ID" terminal symbol,
1564** the minor type might be the name of the identifier.
1565** Each non-terminal can have a different minor type.
1566** Terminal symbols all have the same minor type, though.
1567** This macros defines the minor type for terminal
1568** symbols.
1569** fts5YYMINORTYPE is the data type used for all minor types.
1570** This is typically a union of many types, one of
1571** which is sqlite3Fts5ParserFTS5TOKENTYPE. The entry in the union
1572** for terminal symbols is called "fts5yy0".
1573** fts5YYSTACKDEPTH is the maximum depth of the parser's stack. If
1574** zero the stack is dynamically sized using realloc()
1575** sqlite3Fts5ParserARG_SDECL A static variable declaration for the %extra_argument
1576** sqlite3Fts5ParserARG_PDECL A parameter declaration for the %extra_argument
1577** sqlite3Fts5ParserARG_PARAM Code to pass %extra_argument as a subroutine parameter
1578** sqlite3Fts5ParserARG_STORE Code to store %extra_argument into fts5yypParser
1579** sqlite3Fts5ParserARG_FETCH Code to extract %extra_argument from fts5yypParser
1580** sqlite3Fts5ParserCTX_* As sqlite3Fts5ParserARG_ except for %extra_context
1581** fts5YYERRORSYMBOL is the code number of the error symbol. If not
1582** defined, then do no error processing.
1583** fts5YYNSTATE the combined number of states.
1584** fts5YYNRULE the number of rules in the grammar
1585** fts5YYNFTS5TOKEN Number of terminal symbols
1586** fts5YY_MAX_SHIFT Maximum value for shift actions
1587** fts5YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions
1588** fts5YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions
1589** fts5YY_ERROR_ACTION The fts5yy_action[] code for syntax error
1590** fts5YY_ACCEPT_ACTION The fts5yy_action[] code for accept
1591** fts5YY_NO_ACTION The fts5yy_action[] code for no-op
1592** fts5YY_MIN_REDUCE Minimum value for reduce actions
1593** fts5YY_MAX_REDUCE Maximum value for reduce actions
1594*/
1595#ifndef INTERFACE
1596# define INTERFACE 1
1597#endif
1598/************* Begin control #defines *****************************************/
1599#define fts5YYCODETYPE unsigned char
1600#define fts5YYNOCODE 27
1601#define fts5YYACTIONTYPE unsigned char
1602#define sqlite3Fts5ParserFTS5TOKENTYPE Fts5Token
1603typedef union {
1604 int fts5yyinit;
1605 sqlite3Fts5ParserFTS5TOKENTYPE fts5yy0;
1606 int fts5yy4;
1607 Fts5Colset* fts5yy11;
1608 Fts5ExprNode* fts5yy24;
1609 Fts5ExprNearset* fts5yy46;
1610 Fts5ExprPhrase* fts5yy53;
1611} fts5YYMINORTYPE;
1612#ifndef fts5YYSTACKDEPTH
1613#define fts5YYSTACKDEPTH 100
1614#endif
1615#define sqlite3Fts5ParserARG_SDECL Fts5Parse *pParse;
1616#define sqlite3Fts5ParserARG_PDECL ,Fts5Parse *pParse
1617#define sqlite3Fts5ParserARG_PARAM ,pParse
1618#define sqlite3Fts5ParserARG_FETCH Fts5Parse *pParse=fts5yypParser->pParse;
1619#define sqlite3Fts5ParserARG_STORE fts5yypParser->pParse=pParse;
1620#define sqlite3Fts5ParserCTX_SDECL
1621#define sqlite3Fts5ParserCTX_PDECL
1622#define sqlite3Fts5ParserCTX_PARAM
1623#define sqlite3Fts5ParserCTX_FETCH
1624#define sqlite3Fts5ParserCTX_STORE
1625#define fts5YYNSTATE 35
1626#define fts5YYNRULE 28
1627#define fts5YYNRULE_WITH_ACTION 28
1628#define fts5YYNFTS5TOKEN 16
1629#define fts5YY_MAX_SHIFT 34
1630#define fts5YY_MIN_SHIFTREDUCE 52
1631#define fts5YY_MAX_SHIFTREDUCE 79
1632#define fts5YY_ERROR_ACTION 80
1633#define fts5YY_ACCEPT_ACTION 81
1634#define fts5YY_NO_ACTION 82
1635#define fts5YY_MIN_REDUCE 83
1636#define fts5YY_MAX_REDUCE 110
1637/************* End control #defines *******************************************/
1638#define fts5YY_NLOOKAHEAD ((int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0])))
1639
1640/* Define the fts5yytestcase() macro to be a no-op if is not already defined
1641** otherwise.
1642**
1643** Applications can choose to define fts5yytestcase() in the %include section
1644** to a macro that can assist in verifying code coverage. For production
1645** code the fts5yytestcase() macro should be turned off. But it is useful
1646** for testing.
1647*/
1648#ifndef fts5yytestcase
1649# define fts5yytestcase(X)
1650#endif
1651
1652
1653/* Next are the tables used to determine what action to take based on the
1654** current state and lookahead token. These tables are used to implement
1655** functions that take a state number and lookahead value and return an
1656** action integer.
1657**
1658** Suppose the action integer is N. Then the action is determined as
1659** follows
1660**
1661** 0 <= N <= fts5YY_MAX_SHIFT Shift N. That is, push the lookahead
1662** token onto the stack and goto state N.
1663**
1664** N between fts5YY_MIN_SHIFTREDUCE Shift to an arbitrary state then
1665** and fts5YY_MAX_SHIFTREDUCE reduce by rule N-fts5YY_MIN_SHIFTREDUCE.
1666**
1667** N == fts5YY_ERROR_ACTION A syntax error has occurred.
1668**
1669** N == fts5YY_ACCEPT_ACTION The parser accepts its input.
1670**
1671** N == fts5YY_NO_ACTION No such action. Denotes unused
1672** slots in the fts5yy_action[] table.
1673**
1674** N between fts5YY_MIN_REDUCE Reduce by rule N-fts5YY_MIN_REDUCE
1675** and fts5YY_MAX_REDUCE
1676**
1677** The action table is constructed as a single large table named fts5yy_action[].
1678** Given state S and lookahead X, the action is computed as either:
1679**
1680** (A) N = fts5yy_action[ fts5yy_shift_ofst[S] + X ]
1681** (B) N = fts5yy_default[S]
1682**
1683** The (A) formula is preferred. The B formula is used instead if
1684** fts5yy_lookahead[fts5yy_shift_ofst[S]+X] is not equal to X.
1685**
1686** The formulas above are for computing the action when the lookahead is
1687** a terminal symbol. If the lookahead is a non-terminal (as occurs after
1688** a reduce action) then the fts5yy_reduce_ofst[] array is used in place of
1689** the fts5yy_shift_ofst[] array.
1690**
1691** The following are the tables generated in this section:
1692**
1693** fts5yy_action[] A single table containing all actions.
1694** fts5yy_lookahead[] A table containing the lookahead for each entry in
1695** fts5yy_action. Used to detect hash collisions.
1696** fts5yy_shift_ofst[] For each state, the offset into fts5yy_action for
1697** shifting terminals.
1698** fts5yy_reduce_ofst[] For each state, the offset into fts5yy_action for
1699** shifting non-terminals after a reduce.
1700** fts5yy_default[] Default action for each state.
1701**
1702*********** Begin parsing tables **********************************************/
1703#define fts5YY_ACTTAB_COUNT (105)
1704static const fts5YYACTIONTYPE fts5yy_action[] = {
1705 /* 0 */ 81, 20, 96, 6, 28, 99, 98, 26, 26, 18,
1706 /* 10 */ 96, 6, 28, 17, 98, 56, 26, 19, 96, 6,
1707 /* 20 */ 28, 14, 98, 14, 26, 31, 92, 96, 6, 28,
1708 /* 30 */ 108, 98, 25, 26, 21, 96, 6, 28, 78, 98,
1709 /* 40 */ 58, 26, 29, 96, 6, 28, 107, 98, 22, 26,
1710 /* 50 */ 24, 16, 12, 11, 1, 13, 13, 24, 16, 23,
1711 /* 60 */ 11, 33, 34, 13, 97, 8, 27, 32, 98, 7,
1712 /* 70 */ 26, 3, 4, 5, 3, 4, 5, 3, 83, 4,
1713 /* 80 */ 5, 3, 63, 5, 3, 62, 12, 2, 86, 13,
1714 /* 90 */ 9, 30, 10, 10, 54, 57, 75, 78, 78, 53,
1715 /* 100 */ 57, 15, 82, 82, 71,
1716};
1717static const fts5YYCODETYPE fts5yy_lookahead[] = {
1718 /* 0 */ 16, 17, 18, 19, 20, 22, 22, 24, 24, 17,
1719 /* 10 */ 18, 19, 20, 7, 22, 9, 24, 17, 18, 19,
1720 /* 20 */ 20, 9, 22, 9, 24, 13, 17, 18, 19, 20,
1721 /* 30 */ 26, 22, 24, 24, 17, 18, 19, 20, 15, 22,
1722 /* 40 */ 9, 24, 17, 18, 19, 20, 26, 22, 21, 24,
1723 /* 50 */ 6, 7, 9, 9, 10, 12, 12, 6, 7, 21,
1724 /* 60 */ 9, 24, 25, 12, 18, 5, 20, 14, 22, 5,
1725 /* 70 */ 24, 3, 1, 2, 3, 1, 2, 3, 0, 1,
1726 /* 80 */ 2, 3, 11, 2, 3, 11, 9, 10, 5, 12,
1727 /* 90 */ 23, 24, 10, 10, 8, 9, 9, 15, 15, 8,
1728 /* 100 */ 9, 9, 27, 27, 11, 27, 27, 27, 27, 27,
1729 /* 110 */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
1730 /* 120 */ 27,
1731};
1732#define fts5YY_SHIFT_COUNT (34)
1733#define fts5YY_SHIFT_MIN (0)
1734#define fts5YY_SHIFT_MAX (93)
1735static const unsigned char fts5yy_shift_ofst[] = {
1736 /* 0 */ 44, 44, 44, 44, 44, 44, 51, 77, 43, 12,
1737 /* 10 */ 14, 83, 82, 14, 23, 23, 31, 31, 71, 74,
1738 /* 20 */ 78, 81, 86, 91, 6, 53, 53, 60, 64, 68,
1739 /* 30 */ 53, 87, 92, 53, 93,
1740};
1741#define fts5YY_REDUCE_COUNT (17)
1742#define fts5YY_REDUCE_MIN (-17)
1743#define fts5YY_REDUCE_MAX (67)
1744static const signed char fts5yy_reduce_ofst[] = {
1745 /* 0 */ -16, -8, 0, 9, 17, 25, 46, -17, -17, 37,
1746 /* 10 */ 67, 4, 4, 8, 4, 20, 27, 38,
1747};
1748static const fts5YYACTIONTYPE fts5yy_default[] = {
1749 /* 0 */ 80, 80, 80, 80, 80, 80, 95, 80, 80, 105,
1750 /* 10 */ 80, 110, 110, 80, 110, 110, 80, 80, 80, 80,
1751 /* 20 */ 80, 91, 80, 80, 80, 101, 100, 80, 80, 90,
1752 /* 30 */ 103, 80, 80, 104, 80,
1753};
1754/********** End of lemon-generated parsing tables *****************************/
1755
1756/* The next table maps tokens (terminal symbols) into fallback tokens.
1757** If a construct like the following:
1758**
1759** %fallback ID X Y Z.
1760**
1761** appears in the grammar, then ID becomes a fallback token for X, Y,
1762** and Z. Whenever one of the tokens X, Y, or Z is input to the parser
1763** but it does not parse, the type of the token is changed to ID and
1764** the parse is retried before an error is thrown.
1765**
1766** This feature can be used, for example, to cause some keywords in a language
1767** to revert to identifiers if they keyword does not apply in the context where
1768** it appears.
1769*/
1770#ifdef fts5YYFALLBACK
1771static const fts5YYCODETYPE fts5yyFallback[] = {
1772};
1773#endif /* fts5YYFALLBACK */
1774
1775/* The following structure represents a single element of the
1776** parser's stack. Information stored includes:
1777**
1778** + The state number for the parser at this level of the stack.
1779**
1780** + The value of the token stored at this level of the stack.
1781** (In other words, the "major" token.)
1782**
1783** + The semantic value stored at this level of the stack. This is
1784** the information used by the action routines in the grammar.
1785** It is sometimes called the "minor" token.
1786**
1787** After the "shift" half of a SHIFTREDUCE action, the stateno field
1788** actually contains the reduce action for the second half of the
1789** SHIFTREDUCE.
1790*/
1791struct fts5yyStackEntry {
1792 fts5YYACTIONTYPE stateno; /* The state-number, or reduce action in SHIFTREDUCE */
1793 fts5YYCODETYPE major; /* The major token value. This is the code
1794 ** number for the token at this stack level */
1795 fts5YYMINORTYPE minor; /* The user-supplied minor token value. This
1796 ** is the value of the token */
1797};
1798typedef struct fts5yyStackEntry fts5yyStackEntry;
1799
1800/* The state of the parser is completely contained in an instance of
1801** the following structure */
1802struct fts5yyParser {
1803 fts5yyStackEntry *fts5yytos; /* Pointer to top element of the stack */
1804#ifdef fts5YYTRACKMAXSTACKDEPTH
1805 int fts5yyhwm; /* High-water mark of the stack */
1806#endif
1807#ifndef fts5YYNOERRORRECOVERY
1808 int fts5yyerrcnt; /* Shifts left before out of the error */
1809#endif
1810 sqlite3Fts5ParserARG_SDECL /* A place to hold %extra_argument */
1811 sqlite3Fts5ParserCTX_SDECL /* A place to hold %extra_context */
1812#if fts5YYSTACKDEPTH<=0
1813 int fts5yystksz; /* Current side of the stack */
1814 fts5yyStackEntry *fts5yystack; /* The parser's stack */
1815 fts5yyStackEntry fts5yystk0; /* First stack entry */
1816#else
1817 fts5yyStackEntry fts5yystack[fts5YYSTACKDEPTH]; /* The parser's stack */
1818 fts5yyStackEntry *fts5yystackEnd; /* Last entry in the stack */
1819#endif
1820};
1821typedef struct fts5yyParser fts5yyParser;
1822
1823#include <assert.h>
1824#ifndef NDEBUG
1825#include <stdio.h>
1826static FILE *fts5yyTraceFILE = 0;
1827static char *fts5yyTracePrompt = 0;
1828#endif /* NDEBUG */
1829
1830#ifndef NDEBUG
1831/*
1832** Turn parser tracing on by giving a stream to which to write the trace
1833** and a prompt to preface each trace message. Tracing is turned off
1834** by making either argument NULL
1835**
1836** Inputs:
1837** <ul>
1838** <li> A FILE* to which trace output should be written.
1839** If NULL, then tracing is turned off.
1840** <li> A prefix string written at the beginning of every
1841** line of trace output. If NULL, then tracing is
1842** turned off.
1843** </ul>
1844**
1845** Outputs:
1846** None.
1847*/
1848static void sqlite3Fts5ParserTrace(FILE *TraceFILE, char *zTracePrompt){
1849 fts5yyTraceFILE = TraceFILE;
1850 fts5yyTracePrompt = zTracePrompt;
1851 if( fts5yyTraceFILE==0 ) fts5yyTracePrompt = 0;
1852 else if( fts5yyTracePrompt==0 ) fts5yyTraceFILE = 0;
1853}
1854#endif /* NDEBUG */
1855
1856#if defined(fts5YYCOVERAGE) || !defined(NDEBUG)
1857/* For tracing shifts, the names of all terminals and nonterminals
1858** are required. The following table supplies these names */
1859static const char *const fts5yyTokenName[] = {
1860 /* 0 */ "$",
1861 /* 1 */ "OR",
1862 /* 2 */ "AND",
1863 /* 3 */ "NOT",
1864 /* 4 */ "TERM",
1865 /* 5 */ "COLON",
1866 /* 6 */ "MINUS",
1867 /* 7 */ "LCP",
1868 /* 8 */ "RCP",
1869 /* 9 */ "STRING",
1870 /* 10 */ "LP",
1871 /* 11 */ "RP",
1872 /* 12 */ "CARET",
1873 /* 13 */ "COMMA",
1874 /* 14 */ "PLUS",
1875 /* 15 */ "STAR",
1876 /* 16 */ "input",
1877 /* 17 */ "expr",
1878 /* 18 */ "cnearset",
1879 /* 19 */ "exprlist",
1880 /* 20 */ "colset",
1881 /* 21 */ "colsetlist",
1882 /* 22 */ "nearset",
1883 /* 23 */ "nearphrases",
1884 /* 24 */ "phrase",
1885 /* 25 */ "neardist_opt",
1886 /* 26 */ "star_opt",
1887};
1888#endif /* defined(fts5YYCOVERAGE) || !defined(NDEBUG) */
1889
1890#ifndef NDEBUG
1891/* For tracing reduce actions, the names of all rules are required.
1892*/
1893static const char *const fts5yyRuleName[] = {
1894 /* 0 */ "input ::= expr",
1895 /* 1 */ "colset ::= MINUS LCP colsetlist RCP",
1896 /* 2 */ "colset ::= LCP colsetlist RCP",
1897 /* 3 */ "colset ::= STRING",
1898 /* 4 */ "colset ::= MINUS STRING",
1899 /* 5 */ "colsetlist ::= colsetlist STRING",
1900 /* 6 */ "colsetlist ::= STRING",
1901 /* 7 */ "expr ::= expr AND expr",
1902 /* 8 */ "expr ::= expr OR expr",
1903 /* 9 */ "expr ::= expr NOT expr",
1904 /* 10 */ "expr ::= colset COLON LP expr RP",
1905 /* 11 */ "expr ::= LP expr RP",
1906 /* 12 */ "expr ::= exprlist",
1907 /* 13 */ "exprlist ::= cnearset",
1908 /* 14 */ "exprlist ::= exprlist cnearset",
1909 /* 15 */ "cnearset ::= nearset",
1910 /* 16 */ "cnearset ::= colset COLON nearset",
1911 /* 17 */ "nearset ::= phrase",
1912 /* 18 */ "nearset ::= CARET phrase",
1913 /* 19 */ "nearset ::= STRING LP nearphrases neardist_opt RP",
1914 /* 20 */ "nearphrases ::= phrase",
1915 /* 21 */ "nearphrases ::= nearphrases phrase",
1916 /* 22 */ "neardist_opt ::=",
1917 /* 23 */ "neardist_opt ::= COMMA STRING",
1918 /* 24 */ "phrase ::= phrase PLUS STRING star_opt",
1919 /* 25 */ "phrase ::= STRING star_opt",
1920 /* 26 */ "star_opt ::= STAR",
1921 /* 27 */ "star_opt ::=",
1922};
1923#endif /* NDEBUG */
1924
1925
1926#if fts5YYSTACKDEPTH<=0
1927/*
1928** Try to increase the size of the parser stack. Return the number
1929** of errors. Return 0 on success.
1930*/
1931static int fts5yyGrowStack(fts5yyParser *p){
1932 int newSize;
1933 int idx;
1934 fts5yyStackEntry *pNew;
1935
1936 newSize = p->fts5yystksz*2 + 100;
1937 idx = p->fts5yytos ? (int)(p->fts5yytos - p->fts5yystack) : 0;
1938 if( p->fts5yystack==&p->fts5yystk0 ){
1939 pNew = malloc(newSize*sizeof(pNew[0]));
1940 if( pNew ) pNew[0] = p->fts5yystk0;
1941 }else{
1942 pNew = realloc(p->fts5yystack, newSize*sizeof(pNew[0]));
1943 }
1944 if( pNew ){
1945 p->fts5yystack = pNew;
1946 p->fts5yytos = &p->fts5yystack[idx];
1947#ifndef NDEBUG
1948 if( fts5yyTraceFILE ){
1949 fprintf(fts5yyTraceFILE,"%sStack grows from %d to %d entries.\n",
1950 fts5yyTracePrompt, p->fts5yystksz, newSize);
1951 }
1952#endif
1953 p->fts5yystksz = newSize;
1954 }
1955 return pNew==0;
1956}
1957#endif
1958
1959/* Datatype of the argument to the memory allocated passed as the
1960** second argument to sqlite3Fts5ParserAlloc() below. This can be changed by
1961** putting an appropriate #define in the %include section of the input
1962** grammar.
1963*/
1964#ifndef fts5YYMALLOCARGTYPE
1965# define fts5YYMALLOCARGTYPE size_t
1966#endif
1967
1968/* Initialize a new parser that has already been allocated.
1969*/
1970static void sqlite3Fts5ParserInit(void *fts5yypRawParser sqlite3Fts5ParserCTX_PDECL){
1971 fts5yyParser *fts5yypParser = (fts5yyParser*)fts5yypRawParser;
1972 sqlite3Fts5ParserCTX_STORE
1973#ifdef fts5YYTRACKMAXSTACKDEPTH
1974 fts5yypParser->fts5yyhwm = 0;
1975#endif
1976#if fts5YYSTACKDEPTH<=0
1977 fts5yypParser->fts5yytos = NULL;
1978 fts5yypParser->fts5yystack = NULL;
1979 fts5yypParser->fts5yystksz = 0;
1980 if( fts5yyGrowStack(fts5yypParser) ){
1981 fts5yypParser->fts5yystack = &fts5yypParser->fts5yystk0;
1982 fts5yypParser->fts5yystksz = 1;
1983 }
1984#endif
1985#ifndef fts5YYNOERRORRECOVERY
1986 fts5yypParser->fts5yyerrcnt = -1;
1987#endif
1988 fts5yypParser->fts5yytos = fts5yypParser->fts5yystack;
1989 fts5yypParser->fts5yystack[0].stateno = 0;
1990 fts5yypParser->fts5yystack[0].major = 0;
1991#if fts5YYSTACKDEPTH>0
1992 fts5yypParser->fts5yystackEnd = &fts5yypParser->fts5yystack[fts5YYSTACKDEPTH-1];
1993#endif
1994}
1995
1996#ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK
1997/*
1998** This function allocates a new parser.
1999** The only argument is a pointer to a function which works like
2000** malloc.
2001**
2002** Inputs:
2003** A pointer to the function used to allocate memory.
2004**
2005** Outputs:
2006** A pointer to a parser. This pointer is used in subsequent calls
2007** to sqlite3Fts5Parser and sqlite3Fts5ParserFree.
2008*/
2009static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(fts5YYMALLOCARGTYPE) sqlite3Fts5ParserCTX_PDECL){
2010 fts5yyParser *fts5yypParser;
2011 fts5yypParser = (fts5yyParser*)(*mallocProc)( (fts5YYMALLOCARGTYPE)sizeof(fts5yyParser) );
2012 if( fts5yypParser ){
2013 sqlite3Fts5ParserCTX_STORE
2014 sqlite3Fts5ParserInit(fts5yypParser sqlite3Fts5ParserCTX_PARAM);
2015 }
2016 return (void*)fts5yypParser;
2017}
2018#endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */
2019
2020
2021/* The following function deletes the "minor type" or semantic value
2022** associated with a symbol. The symbol can be either a terminal
2023** or nonterminal. "fts5yymajor" is the symbol code, and "fts5yypminor" is
2024** a pointer to the value to be deleted. The code used to do the
2025** deletions is derived from the %destructor and/or %token_destructor
2026** directives of the input grammar.
2027*/
2028static void fts5yy_destructor(
2029 fts5yyParser *fts5yypParser, /* The parser */
2030 fts5YYCODETYPE fts5yymajor, /* Type code for object to destroy */
2031 fts5YYMINORTYPE *fts5yypminor /* The object to be destroyed */
2032){
2033 sqlite3Fts5ParserARG_FETCH
2034 sqlite3Fts5ParserCTX_FETCH
2035 switch( fts5yymajor ){
2036 /* Here is inserted the actions which take place when a
2037 ** terminal or non-terminal is destroyed. This can happen
2038 ** when the symbol is popped from the stack during a
2039 ** reduce or during error processing or when a parser is
2040 ** being destroyed before it is finished parsing.
2041 **
2042 ** Note: during a reduce, the only symbols destroyed are those
2043 ** which appear on the RHS of the rule, but which are *not* used
2044 ** inside the C code.
2045 */
2046/********* Begin destructor definitions ***************************************/
2047 case 16: /* input */
2048{
2049#line 83 "fts5parse.y"
2050 (void)pParse;
2051#line 586 "fts5parse.c"
2052}
2053 break;
2054 case 17: /* expr */
2055 case 18: /* cnearset */
2056 case 19: /* exprlist */
2057{
2058#line 89 "fts5parse.y"
2059 sqlite3Fts5ParseNodeFree((fts5yypminor->fts5yy24));
2060#line 595 "fts5parse.c"
2061}
2062 break;
2063 case 20: /* colset */
2064 case 21: /* colsetlist */
2065{
2066#line 93 "fts5parse.y"
2067 sqlite3_free((fts5yypminor->fts5yy11));
2068#line 603 "fts5parse.c"
2069}
2070 break;
2071 case 22: /* nearset */
2072 case 23: /* nearphrases */
2073{
2074#line 148 "fts5parse.y"
2075 sqlite3Fts5ParseNearsetFree((fts5yypminor->fts5yy46));
2076#line 611 "fts5parse.c"
2077}
2078 break;
2079 case 24: /* phrase */
2080{
2081#line 183 "fts5parse.y"
2082 sqlite3Fts5ParsePhraseFree((fts5yypminor->fts5yy53));
2083#line 618 "fts5parse.c"
2084}
2085 break;
2086/********* End destructor definitions *****************************************/
2087 default: break; /* If no destructor action specified: do nothing */
2088 }
2089}
2090
2091/*
2092** Pop the parser's stack once.
2093**
2094** If there is a destructor routine associated with the token which
2095** is popped from the stack, then call it.
2096*/
2097static void fts5yy_pop_parser_stack(fts5yyParser *pParser){
2098 fts5yyStackEntry *fts5yytos;
2099 assert( pParser->fts5yytos!=0 );
2100 assert( pParser->fts5yytos > pParser->fts5yystack );
2101 fts5yytos = pParser->fts5yytos--;
2102#ifndef NDEBUG
2103 if( fts5yyTraceFILE ){
2104 fprintf(fts5yyTraceFILE,"%sPopping %s\n",
2105 fts5yyTracePrompt,
2106 fts5yyTokenName[fts5yytos->major]);
2107 }
2108#endif
2109 fts5yy_destructor(pParser, fts5yytos->major, &fts5yytos->minor);
2110}
2111
2112/*
2113** Clear all secondary memory allocations from the parser
2114*/
2115static void sqlite3Fts5ParserFinalize(void *p){
2116 fts5yyParser *pParser = (fts5yyParser*)p;
2117 while( pParser->fts5yytos>pParser->fts5yystack ) fts5yy_pop_parser_stack(pParser);
2118#if fts5YYSTACKDEPTH<=0
2119 if( pParser->fts5yystack!=&pParser->fts5yystk0 ) free(pParser->fts5yystack);
2120#endif
2121}
2122
2123#ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK
2124/*
2125** Deallocate and destroy a parser. Destructors are called for
2126** all stack elements before shutting the parser down.
2127**
2128** If the fts5YYPARSEFREENEVERNULL macro exists (for example because it
2129** is defined in a %include section of the input grammar) then it is
2130** assumed that the input pointer is never NULL.
2131*/
2132static void sqlite3Fts5ParserFree(
2133 void *p, /* The parser to be deleted */
2134 void (*freeProc)(void*) /* Function used to reclaim memory */
2135){
2136#ifndef fts5YYPARSEFREENEVERNULL
2137 if( p==0 ) return;
2138#endif
2139 sqlite3Fts5ParserFinalize(p);
2140 (*freeProc)(p);
2141}
2142#endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */
2143
2144/*
2145** Return the peak depth of the stack for a parser.
2146*/
2147#ifdef fts5YYTRACKMAXSTACKDEPTH
2148static int sqlite3Fts5ParserStackPeak(void *p){
2149 fts5yyParser *pParser = (fts5yyParser*)p;
2150 return pParser->fts5yyhwm;
2151}
2152#endif
2153
2154/* This array of booleans keeps track of the parser statement
2155** coverage. The element fts5yycoverage[X][Y] is set when the parser
2156** is in state X and has a lookahead token Y. In a well-tested
2157** systems, every element of this matrix should end up being set.
2158*/
2159#if defined(fts5YYCOVERAGE)
2160static unsigned char fts5yycoverage[fts5YYNSTATE][fts5YYNFTS5TOKEN];
2161#endif
2162
2163/*
2164** Write into out a description of every state/lookahead combination that
2165**
2166** (1) has not been used by the parser, and
2167** (2) is not a syntax error.
2168**
2169** Return the number of missed state/lookahead combinations.
2170*/
2171#if defined(fts5YYCOVERAGE)
2172static int sqlite3Fts5ParserCoverage(FILE *out){
2173 int stateno, iLookAhead, i;
2174 int nMissed = 0;
2175 for(stateno=0; stateno<fts5YYNSTATE; stateno++){
2176 i = fts5yy_shift_ofst[stateno];
2177 for(iLookAhead=0; iLookAhead<fts5YYNFTS5TOKEN; iLookAhead++){
2178 if( fts5yy_lookahead[i+iLookAhead]!=iLookAhead ) continue;
2179 if( fts5yycoverage[stateno][iLookAhead]==0 ) nMissed++;
2180 if( out ){
2181 fprintf(out,"State %d lookahead %s %s\n", stateno,
2182 fts5yyTokenName[iLookAhead],
2183 fts5yycoverage[stateno][iLookAhead] ? "ok" : "missed");
2184 }
2185 }
2186 }
2187 return nMissed;
2188}
2189#endif
2190
2191/*
2192** Find the appropriate action for a parser given the terminal
2193** look-ahead token iLookAhead.
2194*/
2195static fts5YYACTIONTYPE fts5yy_find_shift_action(
2196 fts5YYCODETYPE iLookAhead, /* The look-ahead token */
2197 fts5YYACTIONTYPE stateno /* Current state number */
2198){
2199 int i;
2200
2201 if( stateno>fts5YY_MAX_SHIFT ) return stateno;
2202 assert( stateno <= fts5YY_SHIFT_COUNT );
2203#if defined(fts5YYCOVERAGE)
2204 fts5yycoverage[stateno][iLookAhead] = 1;
2205#endif
2206 do{
2207 i = fts5yy_shift_ofst[stateno];
2208 assert( i>=0 );
2209 assert( i<=fts5YY_ACTTAB_COUNT );
2210 assert( i+fts5YYNFTS5TOKEN<=(int)fts5YY_NLOOKAHEAD );
2211 assert( iLookAhead!=fts5YYNOCODE );
2212 assert( iLookAhead < fts5YYNFTS5TOKEN );
2213 i += iLookAhead;
2214 assert( i<(int)fts5YY_NLOOKAHEAD );
2215 if( fts5yy_lookahead[i]!=iLookAhead ){
2216#ifdef fts5YYFALLBACK
2217 fts5YYCODETYPE iFallback; /* Fallback token */
2218 assert( iLookAhead<sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0]) );
2219 iFallback = fts5yyFallback[iLookAhead];
2220 if( iFallback!=0 ){
2221#ifndef NDEBUG
2222 if( fts5yyTraceFILE ){
2223 fprintf(fts5yyTraceFILE, "%sFALLBACK %s => %s\n",
2224 fts5yyTracePrompt, fts5yyTokenName[iLookAhead], fts5yyTokenName[iFallback]);
2225 }
2226#endif
2227 assert( fts5yyFallback[iFallback]==0 ); /* Fallback loop must terminate */
2228 iLookAhead = iFallback;
2229 continue;
2230 }
2231#endif
2232#ifdef fts5YYWILDCARD
2233 {
2234 int j = i - iLookAhead + fts5YYWILDCARD;
2235 assert( j<(int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0])) );
2236 if( fts5yy_lookahead[j]==fts5YYWILDCARD && iLookAhead>0 ){
2237#ifndef NDEBUG
2238 if( fts5yyTraceFILE ){
2239 fprintf(fts5yyTraceFILE, "%sWILDCARD %s => %s\n",
2240 fts5yyTracePrompt, fts5yyTokenName[iLookAhead],
2241 fts5yyTokenName[fts5YYWILDCARD]);
2242 }
2243#endif /* NDEBUG */
2244 return fts5yy_action[j];
2245 }
2246 }
2247#endif /* fts5YYWILDCARD */
2248 return fts5yy_default[stateno];
2249 }else{
2250 assert( i>=0 && i<(int)(sizeof(fts5yy_action)/sizeof(fts5yy_action[0])) );
2251 return fts5yy_action[i];
2252 }
2253 }while(1);
2254}
2255
2256/*
2257** Find the appropriate action for a parser given the non-terminal
2258** look-ahead token iLookAhead.
2259*/
2260static fts5YYACTIONTYPE fts5yy_find_reduce_action(
2261 fts5YYACTIONTYPE stateno, /* Current state number */
2262 fts5YYCODETYPE iLookAhead /* The look-ahead token */
2263){
2264 int i;
2265#ifdef fts5YYERRORSYMBOL
2266 if( stateno>fts5YY_REDUCE_COUNT ){
2267 return fts5yy_default[stateno];
2268 }
2269#else
2270 assert( stateno<=fts5YY_REDUCE_COUNT );
2271#endif
2272 i = fts5yy_reduce_ofst[stateno];
2273 assert( iLookAhead!=fts5YYNOCODE );
2274 i += iLookAhead;
2275#ifdef fts5YYERRORSYMBOL
2276 if( i<0 || i>=fts5YY_ACTTAB_COUNT || fts5yy_lookahead[i]!=iLookAhead ){
2277 return fts5yy_default[stateno];
2278 }
2279#else
2280 assert( i>=0 && i<fts5YY_ACTTAB_COUNT );
2281 assert( fts5yy_lookahead[i]==iLookAhead );
2282#endif
2283 return fts5yy_action[i];
2284}
2285
2286/*
2287** The following routine is called if the stack overflows.
2288*/
2289static void fts5yyStackOverflow(fts5yyParser *fts5yypParser){
2290 sqlite3Fts5ParserARG_FETCH
2291 sqlite3Fts5ParserCTX_FETCH
2292#ifndef NDEBUG
2293 if( fts5yyTraceFILE ){
2294 fprintf(fts5yyTraceFILE,"%sStack Overflow!\n",fts5yyTracePrompt);
2295 }
2296#endif
2297 while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser_stack(fts5yypParser);
2298 /* Here code is inserted which will execute if the parser
2299 ** stack every overflows */
2300/******** Begin %stack_overflow code ******************************************/
2301#line 36 "fts5parse.y"
2302
2303 sqlite3Fts5ParseError(pParse, "fts5: parser stack overflow");
2304#line 839 "fts5parse.c"
2305/******** End %stack_overflow code ********************************************/
2306 sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument var */
2307 sqlite3Fts5ParserCTX_STORE
2308}
2309
2310/*
2311** Print tracing information for a SHIFT action
2312*/
2313#ifndef NDEBUG
2314static void fts5yyTraceShift(fts5yyParser *fts5yypParser, int fts5yyNewState, const char *zTag){
2315 if( fts5yyTraceFILE ){
2316 if( fts5yyNewState<fts5YYNSTATE ){
2317 fprintf(fts5yyTraceFILE,"%s%s '%s', go to state %d\n",
2318 fts5yyTracePrompt, zTag, fts5yyTokenName[fts5yypParser->fts5yytos->major],
2319 fts5yyNewState);
2320 }else{
2321 fprintf(fts5yyTraceFILE,"%s%s '%s', pending reduce %d\n",
2322 fts5yyTracePrompt, zTag, fts5yyTokenName[fts5yypParser->fts5yytos->major],
2323 fts5yyNewState - fts5YY_MIN_REDUCE);
2324 }
2325 }
2326}
2327#else
2328# define fts5yyTraceShift(X,Y,Z)
2329#endif
2330
2331/*
2332** Perform a shift action.
2333*/
2334static void fts5yy_shift(
2335 fts5yyParser *fts5yypParser, /* The parser to be shifted */
2336 fts5YYACTIONTYPE fts5yyNewState, /* The new state to shift in */
2337 fts5YYCODETYPE fts5yyMajor, /* The major token to shift in */
2338 sqlite3Fts5ParserFTS5TOKENTYPE fts5yyMinor /* The minor token to shift in */
2339){
2340 fts5yyStackEntry *fts5yytos;
2341 fts5yypParser->fts5yytos++;
2342#ifdef fts5YYTRACKMAXSTACKDEPTH
2343 if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser->fts5yyhwm ){
2344 fts5yypParser->fts5yyhwm++;
2345 assert( fts5yypParser->fts5yyhwm == (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack) );
2346 }
2347#endif
2348#if fts5YYSTACKDEPTH>0
2349 if( fts5yypParser->fts5yytos>fts5yypParser->fts5yystackEnd ){
2350 fts5yypParser->fts5yytos--;
2351 fts5yyStackOverflow(fts5yypParser);
2352 return;
2353 }
2354#else
2355 if( fts5yypParser->fts5yytos>=&fts5yypParser->fts5yystack[fts5yypParser->fts5yystksz] ){
2356 if( fts5yyGrowStack(fts5yypParser) ){
2357 fts5yypParser->fts5yytos--;
2358 fts5yyStackOverflow(fts5yypParser);
2359 return;
2360 }
2361 }
2362#endif
2363 if( fts5yyNewState > fts5YY_MAX_SHIFT ){
2364 fts5yyNewState += fts5YY_MIN_REDUCE - fts5YY_MIN_SHIFTREDUCE;
2365 }
2366 fts5yytos = fts5yypParser->fts5yytos;
2367 fts5yytos->stateno = fts5yyNewState;
2368 fts5yytos->major = fts5yyMajor;
2369 fts5yytos->minor.fts5yy0 = fts5yyMinor;
2370 fts5yyTraceShift(fts5yypParser, fts5yyNewState, "Shift");
2371}
2372
2373/* For rule J, fts5yyRuleInfoLhs[J] contains the symbol on the left-hand side
2374** of that rule */
2375static const fts5YYCODETYPE fts5yyRuleInfoLhs[] = {
2376 16, /* (0) input ::= expr */
2377 20, /* (1) colset ::= MINUS LCP colsetlist RCP */
2378 20, /* (2) colset ::= LCP colsetlist RCP */
2379 20, /* (3) colset ::= STRING */
2380 20, /* (4) colset ::= MINUS STRING */
2381 21, /* (5) colsetlist ::= colsetlist STRING */
2382 21, /* (6) colsetlist ::= STRING */
2383 17, /* (7) expr ::= expr AND expr */
2384 17, /* (8) expr ::= expr OR expr */
2385 17, /* (9) expr ::= expr NOT expr */
2386 17, /* (10) expr ::= colset COLON LP expr RP */
2387 17, /* (11) expr ::= LP expr RP */
2388 17, /* (12) expr ::= exprlist */
2389 19, /* (13) exprlist ::= cnearset */
2390 19, /* (14) exprlist ::= exprlist cnearset */
2391 18, /* (15) cnearset ::= nearset */
2392 18, /* (16) cnearset ::= colset COLON nearset */
2393 22, /* (17) nearset ::= phrase */
2394 22, /* (18) nearset ::= CARET phrase */
2395 22, /* (19) nearset ::= STRING LP nearphrases neardist_opt RP */
2396 23, /* (20) nearphrases ::= phrase */
2397 23, /* (21) nearphrases ::= nearphrases phrase */
2398 25, /* (22) neardist_opt ::= */
2399 25, /* (23) neardist_opt ::= COMMA STRING */
2400 24, /* (24) phrase ::= phrase PLUS STRING star_opt */
2401 24, /* (25) phrase ::= STRING star_opt */
2402 26, /* (26) star_opt ::= STAR */
2403 26, /* (27) star_opt ::= */
2404};
2405
2406/* For rule J, fts5yyRuleInfoNRhs[J] contains the negative of the number
2407** of symbols on the right-hand side of that rule. */
2408static const signed char fts5yyRuleInfoNRhs[] = {
2409 -1, /* (0) input ::= expr */
2410 -4, /* (1) colset ::= MINUS LCP colsetlist RCP */
2411 -3, /* (2) colset ::= LCP colsetlist RCP */
2412 -1, /* (3) colset ::= STRING */
2413 -2, /* (4) colset ::= MINUS STRING */
2414 -2, /* (5) colsetlist ::= colsetlist STRING */
2415 -1, /* (6) colsetlist ::= STRING */
2416 -3, /* (7) expr ::= expr AND expr */
2417 -3, /* (8) expr ::= expr OR expr */
2418 -3, /* (9) expr ::= expr NOT expr */
2419 -5, /* (10) expr ::= colset COLON LP expr RP */
2420 -3, /* (11) expr ::= LP expr RP */
2421 -1, /* (12) expr ::= exprlist */
2422 -1, /* (13) exprlist ::= cnearset */
2423 -2, /* (14) exprlist ::= exprlist cnearset */
2424 -1, /* (15) cnearset ::= nearset */
2425 -3, /* (16) cnearset ::= colset COLON nearset */
2426 -1, /* (17) nearset ::= phrase */
2427 -2, /* (18) nearset ::= CARET phrase */
2428 -5, /* (19) nearset ::= STRING LP nearphrases neardist_opt RP */
2429 -1, /* (20) nearphrases ::= phrase */
2430 -2, /* (21) nearphrases ::= nearphrases phrase */
2431 0, /* (22) neardist_opt ::= */
2432 -2, /* (23) neardist_opt ::= COMMA STRING */
2433 -4, /* (24) phrase ::= phrase PLUS STRING star_opt */
2434 -2, /* (25) phrase ::= STRING star_opt */
2435 -1, /* (26) star_opt ::= STAR */
2436 0, /* (27) star_opt ::= */
2437};
2438
2439static void fts5yy_accept(fts5yyParser*); /* Forward Declaration */
2440
2441/*
2442** Perform a reduce action and the shift that must immediately
2443** follow the reduce.
2444**
2445** The fts5yyLookahead and fts5yyLookaheadToken parameters provide reduce actions
2446** access to the lookahead token (if any). The fts5yyLookahead will be fts5YYNOCODE
2447** if the lookahead token has already been consumed. As this procedure is
2448** only called from one place, optimizing compilers will in-line it, which
2449** means that the extra parameters have no performance impact.
2450*/
2451static fts5YYACTIONTYPE fts5yy_reduce(
2452 fts5yyParser *fts5yypParser, /* The parser */
2453 unsigned int fts5yyruleno, /* Number of the rule by which to reduce */
2454 int fts5yyLookahead, /* Lookahead token, or fts5YYNOCODE if none */
2455 sqlite3Fts5ParserFTS5TOKENTYPE fts5yyLookaheadToken /* Value of the lookahead token */
2456 sqlite3Fts5ParserCTX_PDECL /* %extra_context */
2457){
2458 int fts5yygoto; /* The next state */
2459 fts5YYACTIONTYPE fts5yyact; /* The next action */
2460 fts5yyStackEntry *fts5yymsp; /* The top of the parser's stack */
2461 int fts5yysize; /* Amount to pop the stack */
2462 sqlite3Fts5ParserARG_FETCH
2463 (void)fts5yyLookahead;
2464 (void)fts5yyLookaheadToken;
2465 fts5yymsp = fts5yypParser->fts5yytos;
2466
2467 switch( fts5yyruleno ){
2468 /* Beginning here are the reduction cases. A typical example
2469 ** follows:
2470 ** case 0:
2471 ** #line <lineno> <grammarfile>
2472 ** { ... } // User supplied code
2473 ** #line <lineno> <thisfile>
2474 ** break;
2475 */
2476/********** Begin reduce actions **********************************************/
2477 fts5YYMINORTYPE fts5yylhsminor;
2478 case 0: /* input ::= expr */
2479#line 82 "fts5parse.y"
2480{ sqlite3Fts5ParseFinished(pParse, fts5yymsp[0].minor.fts5yy24); }
2481#line 1016 "fts5parse.c"
2482 break;
2483 case 1: /* colset ::= MINUS LCP colsetlist RCP */
2484#line 97 "fts5parse.y"
2485{
2486 fts5yymsp[-3].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yymsp[-1].minor.fts5yy11);
2487}
2488#line 1023 "fts5parse.c"
2489 break;
2490 case 2: /* colset ::= LCP colsetlist RCP */
2491#line 100 "fts5parse.y"
2492{ fts5yymsp[-2].minor.fts5yy11 = fts5yymsp[-1].minor.fts5yy11; }
2493#line 1028 "fts5parse.c"
2494 break;
2495 case 3: /* colset ::= STRING */
2496#line 101 "fts5parse.y"
2497{
2498 fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0);
2499}
2500#line 1035 "fts5parse.c"
2501 fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11;
2502 break;
2503 case 4: /* colset ::= MINUS STRING */
2504#line 104 "fts5parse.y"
2505{
2506 fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0);
2507 fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yymsp[-1].minor.fts5yy11);
2508}
2509#line 1044 "fts5parse.c"
2510 break;
2511 case 5: /* colsetlist ::= colsetlist STRING */
2512#line 109 "fts5parse.y"
2513{
2514 fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, fts5yymsp[-1].minor.fts5yy11, &fts5yymsp[0].minor.fts5yy0); }
2515#line 1050 "fts5parse.c"
2516 fts5yymsp[-1].minor.fts5yy11 = fts5yylhsminor.fts5yy11;
2517 break;
2518 case 6: /* colsetlist ::= STRING */
2519#line 111 "fts5parse.y"
2520{
2521 fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0);
2522}
2523#line 1058 "fts5parse.c"
2524 fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11;
2525 break;
2526 case 7: /* expr ::= expr AND expr */
2527#line 115 "fts5parse.y"
2528{
2529 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_AND, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0);
2530}
2531#line 1066 "fts5parse.c"
2532 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2533 break;
2534 case 8: /* expr ::= expr OR expr */
2535#line 118 "fts5parse.y"
2536{
2537 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_OR, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0);
2538}
2539#line 1074 "fts5parse.c"
2540 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2541 break;
2542 case 9: /* expr ::= expr NOT expr */
2543#line 121 "fts5parse.y"
2544{
2545 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_NOT, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0);
2546}
2547#line 1082 "fts5parse.c"
2548 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2549 break;
2550 case 10: /* expr ::= colset COLON LP expr RP */
2551#line 125 "fts5parse.y"
2552{
2553 sqlite3Fts5ParseSetColset(pParse, fts5yymsp[-1].minor.fts5yy24, fts5yymsp[-4].minor.fts5yy11);
2554 fts5yylhsminor.fts5yy24 = fts5yymsp[-1].minor.fts5yy24;
2555}
2556#line 1091 "fts5parse.c"
2557 fts5yymsp[-4].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2558 break;
2559 case 11: /* expr ::= LP expr RP */
2560#line 129 "fts5parse.y"
2561{fts5yymsp[-2].minor.fts5yy24 = fts5yymsp[-1].minor.fts5yy24;}
2562#line 1097 "fts5parse.c"
2563 break;
2564 case 12: /* expr ::= exprlist */
2565 case 13: /* exprlist ::= cnearset */ fts5yytestcase(fts5yyruleno==13);
2566#line 130 "fts5parse.y"
2567{fts5yylhsminor.fts5yy24 = fts5yymsp[0].minor.fts5yy24;}
2568#line 1103 "fts5parse.c"
2569 fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2570 break;
2571 case 14: /* exprlist ::= exprlist cnearset */
2572#line 133 "fts5parse.y"
2573{
2574 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseImplicitAnd(pParse, fts5yymsp[-1].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24);
2575}
2576#line 1111 "fts5parse.c"
2577 fts5yymsp[-1].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2578 break;
2579 case 15: /* cnearset ::= nearset */
2580#line 137 "fts5parse.y"
2581{
2582 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, fts5yymsp[0].minor.fts5yy46);
2583}
2584#line 1119 "fts5parse.c"
2585 fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2586 break;
2587 case 16: /* cnearset ::= colset COLON nearset */
2588#line 140 "fts5parse.y"
2589{
2590 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, fts5yymsp[0].minor.fts5yy46);
2591 sqlite3Fts5ParseSetColset(pParse, fts5yylhsminor.fts5yy24, fts5yymsp[-2].minor.fts5yy11);
2592}
2593#line 1128 "fts5parse.c"
2594 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2595 break;
2596 case 17: /* nearset ::= phrase */
2597#line 151 "fts5parse.y"
2598{ fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53); }
2599#line 1134 "fts5parse.c"
2600 fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46;
2601 break;
2602 case 18: /* nearset ::= CARET phrase */
2603#line 152 "fts5parse.y"
2604{
2605 sqlite3Fts5ParseSetCaret(fts5yymsp[0].minor.fts5yy53);
2606 fts5yymsp[-1].minor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53);
2607}
2608#line 1143 "fts5parse.c"
2609 break;
2610 case 19: /* nearset ::= STRING LP nearphrases neardist_opt RP */
2611#line 156 "fts5parse.y"
2612{
2613 sqlite3Fts5ParseNear(pParse, &fts5yymsp[-4].minor.fts5yy0);
2614 sqlite3Fts5ParseSetDistance(pParse, fts5yymsp[-2].minor.fts5yy46, &fts5yymsp[-1].minor.fts5yy0);
2615 fts5yylhsminor.fts5yy46 = fts5yymsp[-2].minor.fts5yy46;
2616}
2617#line 1152 "fts5parse.c"
2618 fts5yymsp[-4].minor.fts5yy46 = fts5yylhsminor.fts5yy46;
2619 break;
2620 case 20: /* nearphrases ::= phrase */
2621#line 162 "fts5parse.y"
2622{
2623 fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53);
2624}
2625#line 1160 "fts5parse.c"
2626 fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46;
2627 break;
2628 case 21: /* nearphrases ::= nearphrases phrase */
2629#line 165 "fts5parse.y"
2630{
2631 fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, fts5yymsp[-1].minor.fts5yy46, fts5yymsp[0].minor.fts5yy53);
2632}
2633#line 1168 "fts5parse.c"
2634 fts5yymsp[-1].minor.fts5yy46 = fts5yylhsminor.fts5yy46;
2635 break;
2636 case 22: /* neardist_opt ::= */
2637#line 172 "fts5parse.y"
2638{ fts5yymsp[1].minor.fts5yy0.p = 0; fts5yymsp[1].minor.fts5yy0.n = 0; }
2639#line 1174 "fts5parse.c"
2640 break;
2641 case 23: /* neardist_opt ::= COMMA STRING */
2642#line 173 "fts5parse.y"
2643{ fts5yymsp[-1].minor.fts5yy0 = fts5yymsp[0].minor.fts5yy0; }
2644#line 1179 "fts5parse.c"
2645 break;
2646 case 24: /* phrase ::= phrase PLUS STRING star_opt */
2647#line 185 "fts5parse.y"
2648{
2649 fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, fts5yymsp[-3].minor.fts5yy53, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy4);
2650}
2651#line 1186 "fts5parse.c"
2652 fts5yymsp[-3].minor.fts5yy53 = fts5yylhsminor.fts5yy53;
2653 break;
2654 case 25: /* phrase ::= STRING star_opt */
2655#line 188 "fts5parse.y"
2656{
2657 fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, 0, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy4);
2658}
2659#line 1194 "fts5parse.c"
2660 fts5yymsp[-1].minor.fts5yy53 = fts5yylhsminor.fts5yy53;
2661 break;
2662 case 26: /* star_opt ::= STAR */
2663#line 196 "fts5parse.y"
2664{ fts5yymsp[0].minor.fts5yy4 = 1; }
2665#line 1200 "fts5parse.c"
2666 break;
2667 case 27: /* star_opt ::= */
2668#line 197 "fts5parse.y"
2669{ fts5yymsp[1].minor.fts5yy4 = 0; }
2670#line 1205 "fts5parse.c"
2671 break;
2672 default:
2673 break;
2674/********** End reduce actions ************************************************/
2675 };
2676 assert( fts5yyruleno<sizeof(fts5yyRuleInfoLhs)/sizeof(fts5yyRuleInfoLhs[0]) );
2677 fts5yygoto = fts5yyRuleInfoLhs[fts5yyruleno];
2678 fts5yysize = fts5yyRuleInfoNRhs[fts5yyruleno];
2679 fts5yyact = fts5yy_find_reduce_action(fts5yymsp[fts5yysize].stateno,(fts5YYCODETYPE)fts5yygoto);
2680
2681 /* There are no SHIFTREDUCE actions on nonterminals because the table
2682 ** generator has simplified them to pure REDUCE actions. */
2683 assert( !(fts5yyact>fts5YY_MAX_SHIFT && fts5yyact<=fts5YY_MAX_SHIFTREDUCE) );
2684
2685 /* It is not possible for a REDUCE to be followed by an error */
2686 assert( fts5yyact!=fts5YY_ERROR_ACTION );
2687
2688 fts5yymsp += fts5yysize+1;
2689 fts5yypParser->fts5yytos = fts5yymsp;
2690 fts5yymsp->stateno = (fts5YYACTIONTYPE)fts5yyact;
2691 fts5yymsp->major = (fts5YYCODETYPE)fts5yygoto;
2692 fts5yyTraceShift(fts5yypParser, fts5yyact, "... then shift");
2693 return fts5yyact;
2694}
2695
2696/*
2697** The following code executes when the parse fails
2698*/
2699#ifndef fts5YYNOERRORRECOVERY
2700static void fts5yy_parse_failed(
2701 fts5yyParser *fts5yypParser /* The parser */
2702){
2703 sqlite3Fts5ParserARG_FETCH
2704 sqlite3Fts5ParserCTX_FETCH
2705#ifndef NDEBUG
2706 if( fts5yyTraceFILE ){
2707 fprintf(fts5yyTraceFILE,"%sFail!\n",fts5yyTracePrompt);
2708 }
2709#endif
2710 while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser_stack(fts5yypParser);
2711 /* Here code is inserted which will be executed whenever the
2712 ** parser fails */
2713/************ Begin %parse_failure code ***************************************/
2714/************ End %parse_failure code *****************************************/
2715 sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument variable */
2716 sqlite3Fts5ParserCTX_STORE
2717}
2718#endif /* fts5YYNOERRORRECOVERY */
2719
2720/*
2721** The following code executes when a syntax error first occurs.
2722*/
2723static void fts5yy_syntax_error(
2724 fts5yyParser *fts5yypParser, /* The parser */
2725 int fts5yymajor, /* The major type of the error token */
2726 sqlite3Fts5ParserFTS5TOKENTYPE fts5yyminor /* The minor type of the error token */
2727){
2728 sqlite3Fts5ParserARG_FETCH
2729 sqlite3Fts5ParserCTX_FETCH
2730#define FTS5TOKEN fts5yyminor
2731/************ Begin %syntax_error code ****************************************/
2732#line 30 "fts5parse.y"
2733
2734 UNUSED_PARAM(fts5yymajor); /* Silence a compiler warning */
2735 sqlite3Fts5ParseError(
2736 pParse, "fts5: syntax error near \"%.*s\"",FTS5TOKEN.n,FTS5TOKEN.p
2737 );
2738#line 1273 "fts5parse.c"
2739/************ End %syntax_error code ******************************************/
2740 sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument variable */
2741 sqlite3Fts5ParserCTX_STORE
2742}
2743
2744/*
2745** The following is executed when the parser accepts
2746*/
2747static void fts5yy_accept(
2748 fts5yyParser *fts5yypParser /* The parser */
2749){
2750 sqlite3Fts5ParserARG_FETCH
2751 sqlite3Fts5ParserCTX_FETCH
2752#ifndef NDEBUG
2753 if( fts5yyTraceFILE ){
2754 fprintf(fts5yyTraceFILE,"%sAccept!\n",fts5yyTracePrompt);
2755 }
2756#endif
2757#ifndef fts5YYNOERRORRECOVERY
2758 fts5yypParser->fts5yyerrcnt = -1;
2759#endif
2760 assert( fts5yypParser->fts5yytos==fts5yypParser->fts5yystack );
2761 /* Here code is inserted which will be executed whenever the
2762 ** parser accepts */
2763/*********** Begin %parse_accept code *****************************************/
2764/*********** End %parse_accept code *******************************************/
2765 sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument variable */
2766 sqlite3Fts5ParserCTX_STORE
2767}
2768
2769/* The main parser program.
2770** The first argument is a pointer to a structure obtained from
2771** "sqlite3Fts5ParserAlloc" which describes the current state of the parser.
2772** The second argument is the major token number. The third is
2773** the minor token. The fourth optional argument is whatever the
2774** user wants (and specified in the grammar) and is available for
2775** use by the action routines.
2776**
2777** Inputs:
2778** <ul>
2779** <li> A pointer to the parser (an opaque structure.)
2780** <li> The major token number.
2781** <li> The minor token number.
2782** <li> An option argument of a grammar-specified type.
2783** </ul>
2784**
2785** Outputs:
2786** None.
2787*/
2788static void sqlite3Fts5Parser(
2789 void *fts5yyp, /* The parser */
2790 int fts5yymajor, /* The major token code number */
2791 sqlite3Fts5ParserFTS5TOKENTYPE fts5yyminor /* The value for the token */
2792 sqlite3Fts5ParserARG_PDECL /* Optional %extra_argument parameter */
2793){
2794 fts5YYMINORTYPE fts5yyminorunion;
2795 fts5YYACTIONTYPE fts5yyact; /* The parser action. */
2796#if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY)
2797 int fts5yyendofinput; /* True if we are at the end of input */
2798#endif
2799#ifdef fts5YYERRORSYMBOL
2800 int fts5yyerrorhit = 0; /* True if fts5yymajor has invoked an error */
2801#endif
2802 fts5yyParser *fts5yypParser = (fts5yyParser*)fts5yyp; /* The parser */
2803 sqlite3Fts5ParserCTX_FETCH
2804 sqlite3Fts5ParserARG_STORE
2805
2806 assert( fts5yypParser->fts5yytos!=0 );
2807#if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY)
2808 fts5yyendofinput = (fts5yymajor==0);
2809#endif
2810
2811 fts5yyact = fts5yypParser->fts5yytos->stateno;
2812#ifndef NDEBUG
2813 if( fts5yyTraceFILE ){
2814 if( fts5yyact < fts5YY_MIN_REDUCE ){
2815 fprintf(fts5yyTraceFILE,"%sInput '%s' in state %d\n",
2816 fts5yyTracePrompt,fts5yyTokenName[fts5yymajor],fts5yyact);
2817 }else{
2818 fprintf(fts5yyTraceFILE,"%sInput '%s' with pending reduce %d\n",
2819 fts5yyTracePrompt,fts5yyTokenName[fts5yymajor],fts5yyact-fts5YY_MIN_REDUCE);
2820 }
2821 }
2822#endif
2823
2824 while(1){ /* Exit by "break" */
2825 assert( fts5yypParser->fts5yytos>=fts5yypParser->fts5yystack );
2826 assert( fts5yyact==fts5yypParser->fts5yytos->stateno );
2827 fts5yyact = fts5yy_find_shift_action((fts5YYCODETYPE)fts5yymajor,fts5yyact);
2828 if( fts5yyact >= fts5YY_MIN_REDUCE ){
2829 unsigned int fts5yyruleno = fts5yyact - fts5YY_MIN_REDUCE; /* Reduce by this rule */
2830#ifndef NDEBUG
2831 assert( fts5yyruleno<(int)(sizeof(fts5yyRuleName)/sizeof(fts5yyRuleName[0])) );
2832 if( fts5yyTraceFILE ){
2833 int fts5yysize = fts5yyRuleInfoNRhs[fts5yyruleno];
2834 if( fts5yysize ){
2835 fprintf(fts5yyTraceFILE, "%sReduce %d [%s]%s, pop back to state %d.\n",
2836 fts5yyTracePrompt,
2837 fts5yyruleno, fts5yyRuleName[fts5yyruleno],
2838 fts5yyruleno<fts5YYNRULE_WITH_ACTION ? "" : " without external action",
2839 fts5yypParser->fts5yytos[fts5yysize].stateno);
2840 }else{
2841 fprintf(fts5yyTraceFILE, "%sReduce %d [%s]%s.\n",
2842 fts5yyTracePrompt, fts5yyruleno, fts5yyRuleName[fts5yyruleno],
2843 fts5yyruleno<fts5YYNRULE_WITH_ACTION ? "" : " without external action");
2844 }
2845 }
2846#endif /* NDEBUG */
2847
2848 /* Check that the stack is large enough to grow by a single entry
2849 ** if the RHS of the rule is empty. This ensures that there is room
2850 ** enough on the stack to push the LHS value */
2851 if( fts5yyRuleInfoNRhs[fts5yyruleno]==0 ){
2852#ifdef fts5YYTRACKMAXSTACKDEPTH
2853 if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser->fts5yyhwm ){
2854 fts5yypParser->fts5yyhwm++;
2855 assert( fts5yypParser->fts5yyhwm ==
2856 (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack));
2857 }
2858#endif
2859#if fts5YYSTACKDEPTH>0
2860 if( fts5yypParser->fts5yytos>=fts5yypParser->fts5yystackEnd ){
2861 fts5yyStackOverflow(fts5yypParser);
2862 break;
2863 }
2864#else
2865 if( fts5yypParser->fts5yytos>=&fts5yypParser->fts5yystack[fts5yypParser->fts5yystksz-1] ){
2866 if( fts5yyGrowStack(fts5yypParser) ){
2867 fts5yyStackOverflow(fts5yypParser);
2868 break;
2869 }
2870 }
2871#endif
2872 }
2873 fts5yyact = fts5yy_reduce(fts5yypParser,fts5yyruleno,fts5yymajor,fts5yyminor sqlite3Fts5ParserCTX_PARAM);
2874 }else if( fts5yyact <= fts5YY_MAX_SHIFTREDUCE ){
2875 fts5yy_shift(fts5yypParser,fts5yyact,(fts5YYCODETYPE)fts5yymajor,fts5yyminor);
2876#ifndef fts5YYNOERRORRECOVERY
2877 fts5yypParser->fts5yyerrcnt--;
2878#endif
2879 break;
2880 }else if( fts5yyact==fts5YY_ACCEPT_ACTION ){
2881 fts5yypParser->fts5yytos--;
2882 fts5yy_accept(fts5yypParser);
2883 return;
2884 }else{
2885 assert( fts5yyact == fts5YY_ERROR_ACTION );
2886 fts5yyminorunion.fts5yy0 = fts5yyminor;
2887#ifdef fts5YYERRORSYMBOL
2888 int fts5yymx;
2889#endif
2890#ifndef NDEBUG
2891 if( fts5yyTraceFILE ){
2892 fprintf(fts5yyTraceFILE,"%sSyntax Error!\n",fts5yyTracePrompt);
2893 }
2894#endif
2895#ifdef fts5YYERRORSYMBOL
2896 /* A syntax error has occurred.
2897 ** The response to an error depends upon whether or not the
2898 ** grammar defines an error token "ERROR".
2899 **
2900 ** This is what we do if the grammar does define ERROR:
2901 **
2902 ** * Call the %syntax_error function.
2903 **
2904 ** * Begin popping the stack until we enter a state where
2905 ** it is legal to shift the error symbol, then shift
2906 ** the error symbol.
2907 **
2908 ** * Set the error count to three.
2909 **
2910 ** * Begin accepting and shifting new tokens. No new error
2911 ** processing will occur until three tokens have been
2912 ** shifted successfully.
2913 **
2914 */
2915 if( fts5yypParser->fts5yyerrcnt<0 ){
2916 fts5yy_syntax_error(fts5yypParser,fts5yymajor,fts5yyminor);
2917 }
2918 fts5yymx = fts5yypParser->fts5yytos->major;
2919 if( fts5yymx==fts5YYERRORSYMBOL || fts5yyerrorhit ){
2920#ifndef NDEBUG
2921 if( fts5yyTraceFILE ){
2922 fprintf(fts5yyTraceFILE,"%sDiscard input token %s\n",
2923 fts5yyTracePrompt,fts5yyTokenName[fts5yymajor]);
2924 }
2925#endif
2926 fts5yy_destructor(fts5yypParser, (fts5YYCODETYPE)fts5yymajor, &fts5yyminorunion);
2927 fts5yymajor = fts5YYNOCODE;
2928 }else{
2929 while( fts5yypParser->fts5yytos > fts5yypParser->fts5yystack ){
2930 fts5yyact = fts5yy_find_reduce_action(fts5yypParser->fts5yytos->stateno,
2931 fts5YYERRORSYMBOL);
2932 if( fts5yyact<=fts5YY_MAX_SHIFTREDUCE ) break;
2933 fts5yy_pop_parser_stack(fts5yypParser);
2934 }
2935 if( fts5yypParser->fts5yytos <= fts5yypParser->fts5yystack || fts5yymajor==0 ){
2936 fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorunion);
2937 fts5yy_parse_failed(fts5yypParser);
2938#ifndef fts5YYNOERRORRECOVERY
2939 fts5yypParser->fts5yyerrcnt = -1;
2940#endif
2941 fts5yymajor = fts5YYNOCODE;
2942 }else if( fts5yymx!=fts5YYERRORSYMBOL ){
2943 fts5yy_shift(fts5yypParser,fts5yyact,fts5YYERRORSYMBOL,fts5yyminor);
2944 }
2945 }
2946 fts5yypParser->fts5yyerrcnt = 3;
2947 fts5yyerrorhit = 1;
2948 if( fts5yymajor==fts5YYNOCODE ) break;
2949 fts5yyact = fts5yypParser->fts5yytos->stateno;
2950#elif defined(fts5YYNOERRORRECOVERY)
2951 /* If the fts5YYNOERRORRECOVERY macro is defined, then do not attempt to
2952 ** do any kind of error recovery. Instead, simply invoke the syntax
2953 ** error routine and continue going as if nothing had happened.
2954 **
2955 ** Applications can set this macro (for example inside %include) if
2956 ** they intend to abandon the parse upon the first syntax error seen.
2957 */
2958 fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor);
2959 fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorunion);
2960 break;
2961#else /* fts5YYERRORSYMBOL is not defined */
2962 /* This is what we do if the grammar does not define ERROR:
2963 **
2964 ** * Report an error message, and throw away the input token.
2965 **
2966 ** * If the input token is $, then fail the parse.
2967 **
2968 ** As before, subsequent error messages are suppressed until
2969 ** three input tokens have been successfully shifted.
2970 */
2971 if( fts5yypParser->fts5yyerrcnt<=0 ){
2972 fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor);
2973 }
2974 fts5yypParser->fts5yyerrcnt = 3;
2975 fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorunion);
2976 if( fts5yyendofinput ){
2977 fts5yy_parse_failed(fts5yypParser);
2978#ifndef fts5YYNOERRORRECOVERY
2979 fts5yypParser->fts5yyerrcnt = -1;
2980#endif
2981 }
2982 break;
2983#endif
2984 }
2985 }
2986#ifndef NDEBUG
2987 if( fts5yyTraceFILE ){
2988 fts5yyStackEntry *i;
2989 char cDiv = '[';
2990 fprintf(fts5yyTraceFILE,"%sReturn. Stack=",fts5yyTracePrompt);
2991 for(i=&fts5yypParser->fts5yystack[1]; i<=fts5yypParser->fts5yytos; i++){
2992 fprintf(fts5yyTraceFILE,"%c%s", cDiv, fts5yyTokenName[i->major]);
2993 cDiv = ' ';
2994 }
2995 fprintf(fts5yyTraceFILE,"]\n");
2996 }
2997#endif
2998 return;
2999}
3000
3001/*
3002** Return the fallback token corresponding to canonical token iToken, or
3003** 0 if iToken has no fallback.
3004*/
3005static int sqlite3Fts5ParserFallback(int iToken){
3006#ifdef fts5YYFALLBACK
3007 assert( iToken<(int)(sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0])) );
3008 return fts5yyFallback[iToken];
3009#else
3010 (void)iToken;
3011 return 0;
3012#endif
3013}
3014
3015#line 1 "fts5_aux.c"
3016/*
3017** 2014 May 31
3018**
3019** The author disclaims copyright to this source code. In place of
3020** a legal notice, here is a blessing:
3021**
3022** May you do good and not evil.
3023** May you find forgiveness for yourself and forgive others.
3024** May you share freely, never taking more than you give.
3025**
3026******************************************************************************
3027*/
3028
3029
3030/* #include "fts5Int.h" */
3031#include <math.h> /* amalgamator: keep */
3032
3033/*
3034** Object used to iterate through all "coalesced phrase instances" in
3035** a single column of the current row. If the phrase instances in the
3036** column being considered do not overlap, this object simply iterates
3037** through them. Or, if they do overlap (share one or more tokens in
3038** common), each set of overlapping instances is treated as a single
3039** match. See documentation for the highlight() auxiliary function for
3040** details.
3041**
3042** Usage is:
3043**
3044** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter);
3045** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter);
3046** rc = fts5CInstIterNext(&iter)
3047** ){
3048** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd);
3049** }
3050**
3051*/
3052typedef struct CInstIter CInstIter;
3053struct CInstIter {
3054 const Fts5ExtensionApi *pApi; /* API offered by current FTS version */
3055 Fts5Context *pFts; /* First arg to pass to pApi functions */
3056 int iCol; /* Column to search */
3057 int iInst; /* Next phrase instance index */
3058 int nInst; /* Total number of phrase instances */
3059
3060 /* Output variables */
3061 int iStart; /* First token in coalesced phrase instance */
3062 int iEnd; /* Last token in coalesced phrase instance */
3063};
3064
3065/*
3066** Advance the iterator to the next coalesced phrase instance. Return
3067** an SQLite error code if an error occurs, or SQLITE_OK otherwise.
3068*/
3069static int fts5CInstIterNext(CInstIter *pIter){
3070 int rc = SQLITE_OK;
3071 pIter->iStart = -1;
3072 pIter->iEnd = -1;
3073
3074 while( rc==SQLITE_OK && pIter->iInst<pIter->nInst ){
3075 int ip; int ic; int io;
3076 rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io);
3077 if( rc==SQLITE_OK ){
3078 if( ic==pIter->iCol ){
3079 int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip);
3080 if( pIter->iStart<0 ){
3081 pIter->iStart = io;
3082 pIter->iEnd = iEnd;
3083 }else if( io<=pIter->iEnd ){
3084 if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd;
3085 }else{
3086 break;
3087 }
3088 }
3089 pIter->iInst++;
3090 }
3091 }
3092
3093 return rc;
3094}
3095
3096/*
3097** Initialize the iterator object indicated by the final parameter to
3098** iterate through coalesced phrase instances in column iCol.
3099*/
3100static int fts5CInstIterInit(
3101 const Fts5ExtensionApi *pApi,
3102 Fts5Context *pFts,
3103 int iCol,
3104 CInstIter *pIter
3105){
3106 int rc;
3107
3108 memset(pIter, 0, sizeof(CInstIter));
3109 pIter->pApi = pApi;
3110 pIter->pFts = pFts;
3111 pIter->iCol = iCol;
3112 rc = pApi->xInstCount(pFts, &pIter->nInst);
3113
3114 if( rc==SQLITE_OK ){
3115 rc = fts5CInstIterNext(pIter);
3116 }
3117
3118 return rc;
3119}
3120
3121
3122
3123/*************************************************************************
3124** Start of highlight() implementation.
3125*/
3126typedef struct HighlightContext HighlightContext;
3127struct HighlightContext {
3128 CInstIter iter; /* Coalesced Instance Iterator */
3129 int iPos; /* Current token offset in zIn[] */
3130 int iRangeStart; /* First token to include */
3131 int iRangeEnd; /* If non-zero, last token to include */
3132 const char *zOpen; /* Opening highlight */
3133 const char *zClose; /* Closing highlight */
3134 const char *zIn; /* Input text */
3135 int nIn; /* Size of input text in bytes */
3136 int iOff; /* Current offset within zIn[] */
3137 char *zOut; /* Output value */
3138};
3139
3140/*
3141** Append text to the HighlightContext output string - p->zOut. Argument
3142** z points to a buffer containing n bytes of text to append. If n is
3143** negative, everything up until the first '\0' is appended to the output.
3144**
3145** If *pRc is set to any value other than SQLITE_OK when this function is
3146** called, it is a no-op. If an error (i.e. an OOM condition) is encountered,
3147** *pRc is set to an error code before returning.
3148*/
3149static void fts5HighlightAppend(
3150 int *pRc,
3151 HighlightContext *p,
3152 const char *z, int n
3153){
3154 if( *pRc==SQLITE_OK && z ){
3155 if( n<0 ) n = (int)strlen(z);
3156 p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z);
3157 if( p->zOut==0 ) *pRc = SQLITE_NOMEM;
3158 }
3159}
3160
3161/*
3162** Tokenizer callback used by implementation of highlight() function.
3163*/
3164static int fts5HighlightCb(
3165 void *pContext, /* Pointer to HighlightContext object */
3166 int tflags, /* Mask of FTS5_TOKEN_* flags */
3167 const char *pToken, /* Buffer containing token */
3168 int nToken, /* Size of token in bytes */
3169 int iStartOff, /* Start offset of token */
3170 int iEndOff /* End offset of token */
3171){
3172 HighlightContext *p = (HighlightContext*)pContext;
3173 int rc = SQLITE_OK;
3174 int iPos;
3175
3176 UNUSED_PARAM2(pToken, nToken);
3177
3178 if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK;
3179 iPos = p->iPos++;
3180
3181 if( p->iRangeEnd>0 ){
3182 if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;
3183 if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff;
3184 }
3185
3186 if( iPos==p->iter.iStart ){
3187 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff);
3188 fts5HighlightAppend(&rc, p, p->zOpen, -1);
3189 p->iOff = iStartOff;
3190 }
3191
3192 if( iPos==p->iter.iEnd ){
3193 if( p->iRangeEnd && p->iter.iStart<p->iRangeStart ){
3194 fts5HighlightAppend(&rc, p, p->zOpen, -1);
3195 }
3196 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
3197 fts5HighlightAppend(&rc, p, p->zClose, -1);
3198 p->iOff = iEndOff;
3199 if( rc==SQLITE_OK ){
3200 rc = fts5CInstIterNext(&p->iter);
3201 }
3202 }
3203
3204 if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){
3205 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
3206 p->iOff = iEndOff;
3207 if( iPos>=p->iter.iStart && iPos<p->iter.iEnd ){
3208 fts5HighlightAppend(&rc, p, p->zClose, -1);
3209 }
3210 }
3211
3212 return rc;
3213}
3214
3215/*
3216** Implementation of highlight() function.
3217*/
3218static void fts5HighlightFunction(
3219 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
3220 Fts5Context *pFts, /* First arg to pass to pApi functions */
3221 sqlite3_context *pCtx, /* Context for returning result/error */
3222 int nVal, /* Number of values in apVal[] array */
3223 sqlite3_value **apVal /* Array of trailing arguments */
3224){
3225 HighlightContext ctx;
3226 int rc;
3227 int iCol;
3228
3229 if( nVal!=3 ){
3230 const char *zErr = "wrong number of arguments to function highlight()";
3231 sqlite3_result_error(pCtx, zErr, -1);
3232 return;
3233 }
3234
3235 iCol = sqlite3_value_int(apVal[0]);
3236 memset(&ctx, 0, sizeof(HighlightContext));
3237 ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
3238 ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
3239 rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn);
3240
3241 if( ctx.zIn ){
3242 if( rc==SQLITE_OK ){
3243 rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter);
3244 }
3245
3246 if( rc==SQLITE_OK ){
3247 rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
3248 }
3249 fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
3250
3251 if( rc==SQLITE_OK ){
3252 sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
3253 }
3254 sqlite3_free(ctx.zOut);
3255 }
3256 if( rc!=SQLITE_OK ){
3257 sqlite3_result_error_code(pCtx, rc);
3258 }
3259}
3260/*
3261** End of highlight() implementation.
3262**************************************************************************/
3263
3264/*
3265** Context object passed to the fts5SentenceFinderCb() function.
3266*/
3267typedef struct Fts5SFinder Fts5SFinder;
3268struct Fts5SFinder {
3269 int iPos; /* Current token position */
3270 int nFirstAlloc; /* Allocated size of aFirst[] */
3271 int nFirst; /* Number of entries in aFirst[] */
3272 int *aFirst; /* Array of first token in each sentence */
3273 const char *zDoc; /* Document being tokenized */
3274};
3275
3276/*
3277** Add an entry to the Fts5SFinder.aFirst[] array. Grow the array if
3278** necessary. Return SQLITE_OK if successful, or SQLITE_NOMEM if an
3279** error occurs.
3280*/
3281static int fts5SentenceFinderAdd(Fts5SFinder *p, int iAdd){
3282 if( p->nFirstAlloc==p->nFirst ){
3283 int nNew = p->nFirstAlloc ? p->nFirstAlloc*2 : 64;
3284 int *aNew;
3285
3286 aNew = (int*)sqlite3_realloc64(p->aFirst, nNew*sizeof(int));
3287 if( aNew==0 ) return SQLITE_NOMEM;
3288 p->aFirst = aNew;
3289 p->nFirstAlloc = nNew;
3290 }
3291 p->aFirst[p->nFirst++] = iAdd;
3292 return SQLITE_OK;
3293}
3294
3295/*
3296** This function is an xTokenize() callback used by the auxiliary snippet()
3297** function. Its job is to identify tokens that are the first in a sentence.
3298** For each such token, an entry is added to the SFinder.aFirst[] array.
3299*/
3300static int fts5SentenceFinderCb(
3301 void *pContext, /* Pointer to HighlightContext object */
3302 int tflags, /* Mask of FTS5_TOKEN_* flags */
3303 const char *pToken, /* Buffer containing token */
3304 int nToken, /* Size of token in bytes */
3305 int iStartOff, /* Start offset of token */
3306 int iEndOff /* End offset of token */
3307){
3308 int rc = SQLITE_OK;
3309
3310 UNUSED_PARAM2(pToken, nToken);
3311 UNUSED_PARAM(iEndOff);
3312
3313 if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
3314 Fts5SFinder *p = (Fts5SFinder*)pContext;
3315 if( p->iPos>0 ){
3316 int i;
3317 char c = 0;
3318 for(i=iStartOff-1; i>=0; i--){
3319 c = p->zDoc[i];
3320 if( c!=' ' && c!='\t' && c!='\n' && c!='\r' ) break;
3321 }
3322 if( i!=iStartOff-1 && (c=='.' || c==':') ){
3323 rc = fts5SentenceFinderAdd(p, p->iPos);
3324 }
3325 }else{
3326 rc = fts5SentenceFinderAdd(p, 0);
3327 }
3328 p->iPos++;
3329 }
3330 return rc;
3331}
3332
3333static int fts5SnippetScore(
3334 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
3335 Fts5Context *pFts, /* First arg to pass to pApi functions */
3336 int nDocsize, /* Size of column in tokens */
3337 unsigned char *aSeen, /* Array with one element per query phrase */
3338 int iCol, /* Column to score */
3339 int iPos, /* Starting offset to score */
3340 int nToken, /* Max tokens per snippet */
3341 int *pnScore, /* OUT: Score */
3342 int *piPos /* OUT: Adjusted offset */
3343){
3344 int rc;
3345 int i;
3346 int ip = 0;
3347 int ic = 0;
3348 int iOff = 0;
3349 int iFirst = -1;
3350 int nInst;
3351 int nScore = 0;
3352 int iLast = 0;
3353 sqlite3_int64 iEnd = (sqlite3_int64)iPos + nToken;
3354
3355 rc = pApi->xInstCount(pFts, &nInst);
3356 for(i=0; i<nInst && rc==SQLITE_OK; i++){
3357 rc = pApi->xInst(pFts, i, &ip, &ic, &iOff);
3358 if( rc==SQLITE_OK && ic==iCol && iOff>=iPos && iOff<iEnd ){
3359 nScore += (aSeen[ip] ? 1 : 1000);
3360 aSeen[ip] = 1;
3361 if( iFirst<0 ) iFirst = iOff;
3362 iLast = iOff + pApi->xPhraseSize(pFts, ip);
3363 }
3364 }
3365
3366 *pnScore = nScore;
3367 if( piPos ){
3368 sqlite3_int64 iAdj = iFirst - (nToken - (iLast-iFirst)) / 2;
3369 if( (iAdj+nToken)>nDocsize ) iAdj = nDocsize - nToken;
3370 if( iAdj<0 ) iAdj = 0;
3371 *piPos = (int)iAdj;
3372 }
3373
3374 return rc;
3375}
3376
3377/*
3378** Return the value in pVal interpreted as utf-8 text. Except, if pVal
3379** contains a NULL value, return a pointer to a static string zero
3380** bytes in length instead of a NULL pointer.
3381*/
3382static const char *fts5ValueToText(sqlite3_value *pVal){
3383 const char *zRet = (const char*)sqlite3_value_text(pVal);
3384 return zRet ? zRet : "";
3385}
3386
3387/*
3388** Implementation of snippet() function.
3389*/
3390static void fts5SnippetFunction(
3391 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
3392 Fts5Context *pFts, /* First arg to pass to pApi functions */
3393 sqlite3_context *pCtx, /* Context for returning result/error */
3394 int nVal, /* Number of values in apVal[] array */
3395 sqlite3_value **apVal /* Array of trailing arguments */
3396){
3397 HighlightContext ctx;
3398 int rc = SQLITE_OK; /* Return code */
3399 int iCol; /* 1st argument to snippet() */
3400 const char *zEllips; /* 4th argument to snippet() */
3401 int nToken; /* 5th argument to snippet() */
3402 int nInst = 0; /* Number of instance matches this row */
3403 int i; /* Used to iterate through instances */
3404 int nPhrase; /* Number of phrases in query */
3405 unsigned char *aSeen; /* Array of "seen instance" flags */
3406 int iBestCol; /* Column containing best snippet */
3407 int iBestStart = 0; /* First token of best snippet */
3408 int nBestScore = 0; /* Score of best snippet */
3409 int nColSize = 0; /* Total size of iBestCol in tokens */
3410 Fts5SFinder sFinder; /* Used to find the beginnings of sentences */
3411 int nCol;
3412
3413 if( nVal!=5 ){
3414 const char *zErr = "wrong number of arguments to function snippet()";
3415 sqlite3_result_error(pCtx, zErr, -1);
3416 return;
3417 }
3418
3419 nCol = pApi->xColumnCount(pFts);
3420 memset(&ctx, 0, sizeof(HighlightContext));
3421 iCol = sqlite3_value_int(apVal[0]);
3422 ctx.zOpen = fts5ValueToText(apVal[1]);
3423 ctx.zClose = fts5ValueToText(apVal[2]);
3424 zEllips = fts5ValueToText(apVal[3]);
3425 nToken = sqlite3_value_int(apVal[4]);
3426
3427 iBestCol = (iCol>=0 ? iCol : 0);
3428 nPhrase = pApi->xPhraseCount(pFts);
3429 aSeen = sqlite3_malloc(nPhrase);
3430 if( aSeen==0 ){
3431 rc = SQLITE_NOMEM;
3432 }
3433 if( rc==SQLITE_OK ){
3434 rc = pApi->xInstCount(pFts, &nInst);
3435 }
3436
3437 memset(&sFinder, 0, sizeof(Fts5SFinder));
3438 for(i=0; i<nCol; i++){
3439 if( iCol<0 || iCol==i ){
3440 int nDoc;
3441 int nDocsize;
3442 int ii;
3443 sFinder.iPos = 0;
3444 sFinder.nFirst = 0;
3445 rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc);
3446 if( rc!=SQLITE_OK ) break;
3447 rc = pApi->xTokenize(pFts,
3448 sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb
3449 );
3450 if( rc!=SQLITE_OK ) break;
3451 rc = pApi->xColumnSize(pFts, i, &nDocsize);
3452 if( rc!=SQLITE_OK ) break;
3453
3454 for(ii=0; rc==SQLITE_OK && ii<nInst; ii++){
3455 int ip, ic, io;
3456 int iAdj;
3457 int nScore;
3458 int jj;
3459
3460 rc = pApi->xInst(pFts, ii, &ip, &ic, &io);
3461 if( ic!=i ) continue;
3462 if( io>nDocsize ) rc = FTS5_CORRUPT;
3463 if( rc!=SQLITE_OK ) continue;
3464 memset(aSeen, 0, nPhrase);
3465 rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i,
3466 io, nToken, &nScore, &iAdj
3467 );
3468 if( rc==SQLITE_OK && nScore>nBestScore ){
3469 nBestScore = nScore;
3470 iBestCol = i;
3471 iBestStart = iAdj;
3472 nColSize = nDocsize;
3473 }
3474
3475 if( rc==SQLITE_OK && sFinder.nFirst && nDocsize>nToken ){
3476 for(jj=0; jj<(sFinder.nFirst-1); jj++){
3477 if( sFinder.aFirst[jj+1]>io ) break;
3478 }
3479
3480 if( sFinder.aFirst[jj]<io ){
3481 memset(aSeen, 0, nPhrase);
3482 rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i,
3483 sFinder.aFirst[jj], nToken, &nScore, 0
3484 );
3485
3486 nScore += (sFinder.aFirst[jj]==0 ? 120 : 100);
3487 if( rc==SQLITE_OK && nScore>nBestScore ){
3488 nBestScore = nScore;
3489 iBestCol = i;
3490 iBestStart = sFinder.aFirst[jj];
3491 nColSize = nDocsize;
3492 }
3493 }
3494 }
3495 }
3496 }
3497 }
3498
3499 if( rc==SQLITE_OK ){
3500 rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn);
3501 }
3502 if( rc==SQLITE_OK && nColSize==0 ){
3503 rc = pApi->xColumnSize(pFts, iBestCol, &nColSize);
3504 }
3505 if( ctx.zIn ){
3506 if( rc==SQLITE_OK ){
3507 rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter);
3508 }
3509
3510 ctx.iRangeStart = iBestStart;
3511 ctx.iRangeEnd = iBestStart + nToken - 1;
3512
3513 if( iBestStart>0 ){
3514 fts5HighlightAppend(&rc, &ctx, zEllips, -1);
3515 }
3516
3517 /* Advance iterator ctx.iter so that it points to the first coalesced
3518 ** phrase instance at or following position iBestStart. */
3519 while( ctx.iter.iStart>=0 && ctx.iter.iStart<iBestStart && rc==SQLITE_OK ){
3520 rc = fts5CInstIterNext(&ctx.iter);
3521 }
3522
3523 if( rc==SQLITE_OK ){
3524 rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
3525 }
3526 if( ctx.iRangeEnd>=(nColSize-1) ){
3527 fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
3528 }else{
3529 fts5HighlightAppend(&rc, &ctx, zEllips, -1);
3530 }
3531 }
3532 if( rc==SQLITE_OK ){
3533 sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
3534 }else{
3535 sqlite3_result_error_code(pCtx, rc);
3536 }
3537 sqlite3_free(ctx.zOut);
3538 sqlite3_free(aSeen);
3539 sqlite3_free(sFinder.aFirst);
3540}
3541
3542/************************************************************************/
3543
3544/*
3545** The first time the bm25() function is called for a query, an instance
3546** of the following structure is allocated and populated.
3547*/
3548typedef struct Fts5Bm25Data Fts5Bm25Data;
3549struct Fts5Bm25Data {
3550 int nPhrase; /* Number of phrases in query */
3551 double avgdl; /* Average number of tokens in each row */
3552 double *aIDF; /* IDF for each phrase */
3553 double *aFreq; /* Array used to calculate phrase freq. */
3554};
3555
3556/*
3557** Callback used by fts5Bm25GetData() to count the number of rows in the
3558** table matched by each individual phrase within the query.
3559*/
3560static int fts5CountCb(
3561 const Fts5ExtensionApi *pApi,
3562 Fts5Context *pFts,
3563 void *pUserData /* Pointer to sqlite3_int64 variable */
3564){
3565 sqlite3_int64 *pn = (sqlite3_int64*)pUserData;
3566 UNUSED_PARAM2(pApi, pFts);
3567 (*pn)++;
3568 return SQLITE_OK;
3569}
3570
3571/*
3572** Set *ppData to point to the Fts5Bm25Data object for the current query.
3573** If the object has not already been allocated, allocate and populate it
3574** now.
3575*/
3576static int fts5Bm25GetData(
3577 const Fts5ExtensionApi *pApi,
3578 Fts5Context *pFts,
3579 Fts5Bm25Data **ppData /* OUT: bm25-data object for this query */
3580){
3581 int rc = SQLITE_OK; /* Return code */
3582 Fts5Bm25Data *p; /* Object to return */
3583
3584 p = (Fts5Bm25Data*)pApi->xGetAuxdata(pFts, 0);
3585 if( p==0 ){
3586 int nPhrase; /* Number of phrases in query */
3587 sqlite3_int64 nRow = 0; /* Number of rows in table */
3588 sqlite3_int64 nToken = 0; /* Number of tokens in table */
3589 sqlite3_int64 nByte; /* Bytes of space to allocate */
3590 int i;
3591
3592 /* Allocate the Fts5Bm25Data object */
3593 nPhrase = pApi->xPhraseCount(pFts);
3594 nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double);
3595 p = (Fts5Bm25Data*)sqlite3_malloc64(nByte);
3596 if( p==0 ){
3597 rc = SQLITE_NOMEM;
3598 }else{
3599 memset(p, 0, (size_t)nByte);
3600 p->nPhrase = nPhrase;
3601 p->aIDF = (double*)&p[1];
3602 p->aFreq = &p->aIDF[nPhrase];
3603 }
3604
3605 /* Calculate the average document length for this FTS5 table */
3606 if( rc==SQLITE_OK ) rc = pApi->xRowCount(pFts, &nRow);
3607 assert( rc!=SQLITE_OK || nRow>0 );
3608 if( rc==SQLITE_OK ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken);
3609 if( rc==SQLITE_OK ) p->avgdl = (double)nToken / (double)nRow;
3610
3611 /* Calculate an IDF for each phrase in the query */
3612 for(i=0; rc==SQLITE_OK && i<nPhrase; i++){
3613 sqlite3_int64 nHit = 0;
3614 rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb);
3615 if( rc==SQLITE_OK ){
3616 /* Calculate the IDF (Inverse Document Frequency) for phrase i.
3617 ** This is done using the standard BM25 formula as found on wikipedia:
3618 **
3619 ** IDF = log( (N - nHit + 0.5) / (nHit + 0.5) )
3620 **
3621 ** where "N" is the total number of documents in the set and nHit
3622 ** is the number that contain at least one instance of the phrase
3623 ** under consideration.
3624 **
3625 ** The problem with this is that if (N < 2*nHit), the IDF is
3626 ** negative. Which is undesirable. So the mimimum allowable IDF is
3627 ** (1e-6) - roughly the same as a term that appears in just over
3628 ** half of set of 5,000,000 documents. */
3629 double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) );
3630 if( idf<=0.0 ) idf = 1e-6;
3631 p->aIDF[i] = idf;
3632 }
3633 }
3634
3635 if( rc!=SQLITE_OK ){
3636 sqlite3_free(p);
3637 }else{
3638 rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
3639 }
3640 if( rc!=SQLITE_OK ) p = 0;
3641 }
3642 *ppData = p;
3643 return rc;
3644}
3645
3646/*
3647** Implementation of bm25() function.
3648*/
3649static void fts5Bm25Function(
3650 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
3651 Fts5Context *pFts, /* First arg to pass to pApi functions */
3652 sqlite3_context *pCtx, /* Context for returning result/error */
3653 int nVal, /* Number of values in apVal[] array */
3654 sqlite3_value **apVal /* Array of trailing arguments */
3655){
3656 const double k1 = 1.2; /* Constant "k1" from BM25 formula */
3657 const double b = 0.75; /* Constant "b" from BM25 formula */
3658 int rc; /* Error code */
3659 double score = 0.0; /* SQL function return value */
3660 Fts5Bm25Data *pData; /* Values allocated/calculated once only */
3661 int i; /* Iterator variable */
3662 int nInst = 0; /* Value returned by xInstCount() */
3663 double D = 0.0; /* Total number of tokens in row */
3664 double *aFreq = 0; /* Array of phrase freq. for current row */
3665
3666 /* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation)
3667 ** for each phrase in the query for the current row. */
3668 rc = fts5Bm25GetData(pApi, pFts, &pData);
3669 if( rc==SQLITE_OK ){
3670 aFreq = pData->aFreq;
3671 memset(aFreq, 0, sizeof(double) * pData->nPhrase);
3672 rc = pApi->xInstCount(pFts, &nInst);
3673 }
3674 for(i=0; rc==SQLITE_OK && i<nInst; i++){
3675 int ip; int ic; int io;
3676 rc = pApi->xInst(pFts, i, &ip, &ic, &io);
3677 if( rc==SQLITE_OK ){
3678 double w = (nVal > ic) ? sqlite3_value_double(apVal[ic]) : 1.0;
3679 aFreq[ip] += w;
3680 }
3681 }
3682
3683 /* Figure out the total size of the current row in tokens. */
3684 if( rc==SQLITE_OK ){
3685 int nTok;
3686 rc = pApi->xColumnSize(pFts, -1, &nTok);
3687 D = (double)nTok;
3688 }
3689
3690 /* Determine and return the BM25 score for the current row. Or, if an
3691 ** error has occurred, throw an exception. */
3692 if( rc==SQLITE_OK ){
3693 for(i=0; i<pData->nPhrase; i++){
3694 score += pData->aIDF[i] * (
3695 ( aFreq[i] * (k1 + 1.0) ) /
3696 ( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) )
3697 );
3698 }
3699 sqlite3_result_double(pCtx, -1.0 * score);
3700 }else{
3701 sqlite3_result_error_code(pCtx, rc);
3702 }
3703}
3704
3705static int sqlite3Fts5AuxInit(fts5_api *pApi){
3706 struct Builtin {
3707 const char *zFunc; /* Function name (nul-terminated) */
3708 void *pUserData; /* User-data pointer */
3709 fts5_extension_function xFunc;/* Callback function */
3710 void (*xDestroy)(void*); /* Destructor function */
3711 } aBuiltin [] = {
3712 { "snippet", 0, fts5SnippetFunction, 0 },
3713 { "highlight", 0, fts5HighlightFunction, 0 },
3714 { "bm25", 0, fts5Bm25Function, 0 },
3715 };
3716 int rc = SQLITE_OK; /* Return code */
3717 int i; /* To iterate through builtin functions */
3718
3719 for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){
3720 rc = pApi->xCreateFunction(pApi,
3721 aBuiltin[i].zFunc,
3722 aBuiltin[i].pUserData,
3723 aBuiltin[i].xFunc,
3724 aBuiltin[i].xDestroy
3725 );
3726 }
3727
3728 return rc;
3729}
3730
3731#line 1 "fts5_buffer.c"
3732/*
3733** 2014 May 31
3734**
3735** The author disclaims copyright to this source code. In place of
3736** a legal notice, here is a blessing:
3737**
3738** May you do good and not evil.
3739** May you find forgiveness for yourself and forgive others.
3740** May you share freely, never taking more than you give.
3741**
3742******************************************************************************
3743*/
3744
3745
3746
3747/* #include "fts5Int.h" */
3748
3749static int sqlite3Fts5BufferSize(int *pRc, Fts5Buffer *pBuf, u32 nByte){
3750 if( (u32)pBuf->nSpace<nByte ){
3751 u64 nNew = pBuf->nSpace ? pBuf->nSpace : 64;
3752 u8 *pNew;
3753 while( nNew<nByte ){
3754 nNew = nNew * 2;
3755 }
3756 pNew = sqlite3_realloc64(pBuf->p, nNew);
3757 if( pNew==0 ){
3758 *pRc = SQLITE_NOMEM;
3759 return 1;
3760 }else{
3761 pBuf->nSpace = (int)nNew;
3762 pBuf->p = pNew;
3763 }
3764 }
3765 return 0;
3766}
3767
3768
3769/*
3770** Encode value iVal as an SQLite varint and append it to the buffer object
3771** pBuf. If an OOM error occurs, set the error code in p.
3772*/
3773static void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){
3774 if( fts5BufferGrow(pRc, pBuf, 9) ) return;
3775 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal);
3776}
3777
3778static void sqlite3Fts5Put32(u8 *aBuf, int iVal){
3779 aBuf[0] = (iVal>>24) & 0x00FF;
3780 aBuf[1] = (iVal>>16) & 0x00FF;
3781 aBuf[2] = (iVal>> 8) & 0x00FF;
3782 aBuf[3] = (iVal>> 0) & 0x00FF;
3783}
3784
3785static int sqlite3Fts5Get32(const u8 *aBuf){
3786 return (int)((((u32)aBuf[0])<<24) + (aBuf[1]<<16) + (aBuf[2]<<8) + aBuf[3]);
3787}
3788
3789/*
3790** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set
3791** the error code in p. If an error has already occurred when this function
3792** is called, it is a no-op.
3793*/
3794static void sqlite3Fts5BufferAppendBlob(
3795 int *pRc,
3796 Fts5Buffer *pBuf,
3797 u32 nData,
3798 const u8 *pData
3799){
3800 if( nData ){
3801 if( fts5BufferGrow(pRc, pBuf, nData) ) return;
3802 memcpy(&pBuf->p[pBuf->n], pData, nData);
3803 pBuf->n += nData;
3804 }
3805}
3806
3807/*
3808** Append the nul-terminated string zStr to the buffer pBuf. This function
3809** ensures that the byte following the buffer data is set to 0x00, even
3810** though this byte is not included in the pBuf->n count.
3811*/
3812static void sqlite3Fts5BufferAppendString(
3813 int *pRc,
3814 Fts5Buffer *pBuf,
3815 const char *zStr
3816){
3817 int nStr = (int)strlen(zStr);
3818 sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr);
3819 pBuf->n--;
3820}
3821
3822/*
3823** Argument zFmt is a printf() style format string. This function performs
3824** the printf() style processing, then appends the results to buffer pBuf.
3825**
3826** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte
3827** following the buffer data is set to 0x00, even though this byte is not
3828** included in the pBuf->n count.
3829*/
3830static void sqlite3Fts5BufferAppendPrintf(
3831 int *pRc,
3832 Fts5Buffer *pBuf,
3833 char *zFmt, ...
3834){
3835 if( *pRc==SQLITE_OK ){
3836 char *zTmp;
3837 va_list ap;
3838 va_start(ap, zFmt);
3839 zTmp = sqlite3_vmprintf(zFmt, ap);
3840 va_end(ap);
3841
3842 if( zTmp==0 ){
3843 *pRc = SQLITE_NOMEM;
3844 }else{
3845 sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp);
3846 sqlite3_free(zTmp);
3847 }
3848 }
3849}
3850
3851static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){
3852 char *zRet = 0;
3853 if( *pRc==SQLITE_OK ){
3854 va_list ap;
3855 va_start(ap, zFmt);
3856 zRet = sqlite3_vmprintf(zFmt, ap);
3857 va_end(ap);
3858 if( zRet==0 ){
3859 *pRc = SQLITE_NOMEM;
3860 }
3861 }
3862 return zRet;
3863}
3864
3865
3866/*
3867** Free any buffer allocated by pBuf. Zero the structure before returning.
3868*/
3869static void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){
3870 sqlite3_free(pBuf->p);
3871 memset(pBuf, 0, sizeof(Fts5Buffer));
3872}
3873
3874/*
3875** Zero the contents of the buffer object. But do not free the associated
3876** memory allocation.
3877*/
3878static void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){
3879 pBuf->n = 0;
3880}
3881
3882/*
3883** Set the buffer to contain nData/pData. If an OOM error occurs, leave an
3884** the error code in p. If an error has already occurred when this function
3885** is called, it is a no-op.
3886*/
3887static void sqlite3Fts5BufferSet(
3888 int *pRc,
3889 Fts5Buffer *pBuf,
3890 int nData,
3891 const u8 *pData
3892){
3893 pBuf->n = 0;
3894 sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData);
3895}
3896
3897static int sqlite3Fts5PoslistNext64(
3898 const u8 *a, int n, /* Buffer containing poslist */
3899 int *pi, /* IN/OUT: Offset within a[] */
3900 i64 *piOff /* IN/OUT: Current offset */
3901){
3902 int i = *pi;
3903 if( i>=n ){
3904 /* EOF */
3905 *piOff = -1;
3906 return 1;
3907 }else{
3908 i64 iOff = *piOff;
3909 u32 iVal;
3910 fts5FastGetVarint32(a, i, iVal);
3911 if( iVal<=1 ){
3912 if( iVal==0 ){
3913 *pi = i;
3914 return 0;
3915 }
3916 fts5FastGetVarint32(a, i, iVal);
3917 iOff = ((i64)iVal) << 32;
3918 assert( iOff>=0 );
3919 fts5FastGetVarint32(a, i, iVal);
3920 if( iVal<2 ){
3921 /* This is a corrupt record. So stop parsing it here. */
3922 *piOff = -1;
3923 return 1;
3924 }
3925 *piOff = iOff + ((iVal-2) & 0x7FFFFFFF);
3926 }else{
3927 *piOff = (iOff & (i64)0x7FFFFFFF<<32)+((iOff + (iVal-2)) & 0x7FFFFFFF);
3928 }
3929 *pi = i;
3930 assert_nc( *piOff>=iOff );
3931 return 0;
3932 }
3933}
3934
3935
3936/*
3937** Advance the iterator object passed as the only argument. Return true
3938** if the iterator reaches EOF, or false otherwise.
3939*/
3940static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){
3941 if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) ){
3942 pIter->bEof = 1;
3943 }
3944 return pIter->bEof;
3945}
3946
3947static int sqlite3Fts5PoslistReaderInit(
3948 const u8 *a, int n, /* Poslist buffer to iterate through */
3949 Fts5PoslistReader *pIter /* Iterator object to initialize */
3950){
3951 memset(pIter, 0, sizeof(*pIter));
3952 pIter->a = a;
3953 pIter->n = n;
3954 sqlite3Fts5PoslistReaderNext(pIter);
3955 return pIter->bEof;
3956}
3957
3958/*
3959** Append position iPos to the position list being accumulated in buffer
3960** pBuf, which must be already be large enough to hold the new data.
3961** The previous position written to this list is *piPrev. *piPrev is set
3962** to iPos before returning.
3963*/
3964static void sqlite3Fts5PoslistSafeAppend(
3965 Fts5Buffer *pBuf,
3966 i64 *piPrev,
3967 i64 iPos
3968){
3969 if( iPos>=*piPrev ){
3970 static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32;
3971 if( (iPos & colmask) != (*piPrev & colmask) ){
3972 pBuf->p[pBuf->n++] = 1;
3973 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos>>32));
3974 *piPrev = (iPos & colmask);
3975 }
3976 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos-*piPrev)+2);
3977 *piPrev = iPos;
3978 }
3979}
3980
3981static int sqlite3Fts5PoslistWriterAppend(
3982 Fts5Buffer *pBuf,
3983 Fts5PoslistWriter *pWriter,
3984 i64 iPos
3985){
3986 int rc = 0; /* Initialized only to suppress erroneous warning from Clang */
3987 if( fts5BufferGrow(&rc, pBuf, 5+5+5) ) return rc;
3988 sqlite3Fts5PoslistSafeAppend(pBuf, &pWriter->iPrev, iPos);
3989 return SQLITE_OK;
3990}
3991
3992static void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte){
3993 void *pRet = 0;
3994 if( *pRc==SQLITE_OK ){
3995 pRet = sqlite3_malloc64(nByte);
3996 if( pRet==0 ){
3997 if( nByte>0 ) *pRc = SQLITE_NOMEM;
3998 }else{
3999 memset(pRet, 0, (size_t)nByte);
4000 }
4001 }
4002 return pRet;
4003}
4004
4005/*
4006** Return a nul-terminated copy of the string indicated by pIn. If nIn
4007** is non-negative, then it is the length of the string in bytes. Otherwise,
4008** the length of the string is determined using strlen().
4009**
4010** It is the responsibility of the caller to eventually free the returned
4011** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned.
4012*/
4013static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){
4014 char *zRet = 0;
4015 if( *pRc==SQLITE_OK ){
4016 if( nIn<0 ){
4017 nIn = (int)strlen(pIn);
4018 }
4019 zRet = (char*)sqlite3_malloc(nIn+1);
4020 if( zRet ){
4021 memcpy(zRet, pIn, nIn);
4022 zRet[nIn] = '\0';
4023 }else{
4024 *pRc = SQLITE_NOMEM;
4025 }
4026 }
4027 return zRet;
4028}
4029
4030
4031/*
4032** Return true if character 't' may be part of an FTS5 bareword, or false
4033** otherwise. Characters that may be part of barewords:
4034**
4035** * All non-ASCII characters,
4036** * The 52 upper and lower case ASCII characters, and
4037** * The 10 integer ASCII characters.
4038** * The underscore character "_" (0x5F).
4039** * The unicode "subsitute" character (0x1A).
4040*/
4041static int sqlite3Fts5IsBareword(char t){
4042 u8 aBareword[128] = {
4043 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 .. 0x0F */
4044 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x10 .. 0x1F */
4045 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 .. 0x2F */
4046 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30 .. 0x3F */
4047 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 .. 0x4F */
4048 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 .. 0x5F */
4049 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 .. 0x6F */
4050 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70 .. 0x7F */
4051 };
4052
4053 return (t & 0x80) || aBareword[(int)t];
4054}
4055
4056
4057/*************************************************************************
4058*/
4059typedef struct Fts5TermsetEntry Fts5TermsetEntry;
4060struct Fts5TermsetEntry {
4061 char *pTerm;
4062 int nTerm;
4063 int iIdx; /* Index (main or aPrefix[] entry) */
4064 Fts5TermsetEntry *pNext;
4065};
4066
4067struct Fts5Termset {
4068 Fts5TermsetEntry *apHash[512];
4069};
4070
4071static int sqlite3Fts5TermsetNew(Fts5Termset **pp){
4072 int rc = SQLITE_OK;
4073 *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset));
4074 return rc;
4075}
4076
4077static int sqlite3Fts5TermsetAdd(
4078 Fts5Termset *p,
4079 int iIdx,
4080 const char *pTerm, int nTerm,
4081 int *pbPresent
4082){
4083 int rc = SQLITE_OK;
4084 *pbPresent = 0;
4085 if( p ){
4086 int i;
4087 u32 hash = 13;
4088 Fts5TermsetEntry *pEntry;
4089
4090 /* Calculate a hash value for this term. This is the same hash checksum
4091 ** used by the fts5_hash.c module. This is not important for correct
4092 ** operation of the module, but is necessary to ensure that some tests
4093 ** designed to produce hash table collisions really do work. */
4094 for(i=nTerm-1; i>=0; i--){
4095 hash = (hash << 3) ^ hash ^ pTerm[i];
4096 }
4097 hash = (hash << 3) ^ hash ^ iIdx;
4098 hash = hash % ArraySize(p->apHash);
4099
4100 for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){
4101 if( pEntry->iIdx==iIdx
4102 && pEntry->nTerm==nTerm
4103 && memcmp(pEntry->pTerm, pTerm, nTerm)==0
4104 ){
4105 *pbPresent = 1;
4106 break;
4107 }
4108 }
4109
4110 if( pEntry==0 ){
4111 pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm);
4112 if( pEntry ){
4113 pEntry->pTerm = (char*)&pEntry[1];
4114 pEntry->nTerm = nTerm;
4115 pEntry->iIdx = iIdx;
4116 memcpy(pEntry->pTerm, pTerm, nTerm);
4117 pEntry->pNext = p->apHash[hash];
4118 p->apHash[hash] = pEntry;
4119 }
4120 }
4121 }
4122
4123 return rc;
4124}
4125
4126static void sqlite3Fts5TermsetFree(Fts5Termset *p){
4127 if( p ){
4128 u32 i;
4129 for(i=0; i<ArraySize(p->apHash); i++){
4130 Fts5TermsetEntry *pEntry = p->apHash[i];
4131 while( pEntry ){
4132 Fts5TermsetEntry *pDel = pEntry;
4133 pEntry = pEntry->pNext;
4134 sqlite3_free(pDel);
4135 }
4136 }
4137 sqlite3_free(p);
4138 }
4139}
4140
4141#line 1 "fts5_config.c"
4142/*
4143** 2014 Jun 09
4144**
4145** The author disclaims copyright to this source code. In place of
4146** a legal notice, here is a blessing:
4147**
4148** May you do good and not evil.
4149** May you find forgiveness for yourself and forgive others.
4150** May you share freely, never taking more than you give.
4151**
4152******************************************************************************
4153**
4154** This is an SQLite module implementing full-text search.
4155*/
4156
4157
4158/* #include "fts5Int.h" */
4159
4160#define FTS5_DEFAULT_PAGE_SIZE 4050
4161#define FTS5_DEFAULT_AUTOMERGE 4
4162#define FTS5_DEFAULT_USERMERGE 4
4163#define FTS5_DEFAULT_CRISISMERGE 16
4164#define FTS5_DEFAULT_HASHSIZE (1024*1024)
4165
4166/* Maximum allowed page size */
4167#define FTS5_MAX_PAGE_SIZE (64*1024)
4168
4169static int fts5_iswhitespace(char x){
4170 return (x==' ');
4171}
4172
4173static int fts5_isopenquote(char x){
4174 return (x=='"' || x=='\'' || x=='[' || x=='`');
4175}
4176
4177/*
4178** Argument pIn points to a character that is part of a nul-terminated
4179** string. Return a pointer to the first character following *pIn in
4180** the string that is not a white-space character.
4181*/
4182static const char *fts5ConfigSkipWhitespace(const char *pIn){
4183 const char *p = pIn;
4184 if( p ){
4185 while( fts5_iswhitespace(*p) ){ p++; }
4186 }
4187 return p;
4188}
4189
4190/*
4191** Argument pIn points to a character that is part of a nul-terminated
4192** string. Return a pointer to the first character following *pIn in
4193** the string that is not a "bareword" character.
4194*/
4195static const char *fts5ConfigSkipBareword(const char *pIn){
4196 const char *p = pIn;
4197 while ( sqlite3Fts5IsBareword(*p) ) p++;
4198 if( p==pIn ) p = 0;
4199 return p;
4200}
4201
4202static int fts5_isdigit(char a){
4203 return (a>='0' && a<='9');
4204}
4205
4206
4207
4208static const char *fts5ConfigSkipLiteral(const char *pIn){
4209 const char *p = pIn;
4210 switch( *p ){
4211 case 'n': case 'N':
4212 if( sqlite3_strnicmp("null", p, 4)==0 ){
4213 p = &p[4];
4214 }else{
4215 p = 0;
4216 }
4217 break;
4218
4219 case 'x': case 'X':
4220 p++;
4221 if( *p=='\'' ){
4222 p++;
4223 while( (*p>='a' && *p<='f')
4224 || (*p>='A' && *p<='F')
4225 || (*p>='0' && *p<='9')
4226 ){
4227 p++;
4228 }
4229 if( *p=='\'' && 0==((p-pIn)%2) ){
4230 p++;
4231 }else{
4232 p = 0;
4233 }
4234 }else{
4235 p = 0;
4236 }
4237 break;
4238
4239 case '\'':
4240 p++;
4241 while( p ){
4242 if( *p=='\'' ){
4243 p++;
4244 if( *p!='\'' ) break;
4245 }
4246 p++;
4247 if( *p==0 ) p = 0;
4248 }
4249 break;
4250
4251 default:
4252 /* maybe a number */
4253 if( *p=='+' || *p=='-' ) p++;
4254 while( fts5_isdigit(*p) ) p++;
4255
4256 /* At this point, if the literal was an integer, the parse is
4257 ** finished. Or, if it is a floating point value, it may continue
4258 ** with either a decimal point or an 'E' character. */
4259 if( *p=='.' && fts5_isdigit(p[1]) ){
4260 p += 2;
4261 while( fts5_isdigit(*p) ) p++;
4262 }
4263 if( p==pIn ) p = 0;
4264
4265 break;
4266 }
4267
4268 return p;
4269}
4270
4271/*
4272** The first character of the string pointed to by argument z is guaranteed
4273** to be an open-quote character (see function fts5_isopenquote()).
4274**
4275** This function searches for the corresponding close-quote character within
4276** the string and, if found, dequotes the string in place and adds a new
4277** nul-terminator byte.
4278**
4279** If the close-quote is found, the value returned is the byte offset of
4280** the character immediately following it. Or, if the close-quote is not
4281** found, -1 is returned. If -1 is returned, the buffer is left in an
4282** undefined state.
4283*/
4284static int fts5Dequote(char *z){
4285 char q;
4286 int iIn = 1;
4287 int iOut = 0;
4288 q = z[0];
4289
4290 /* Set stack variable q to the close-quote character */
4291 assert( q=='[' || q=='\'' || q=='"' || q=='`' );
4292 if( q=='[' ) q = ']';
4293
4294 while( z[iIn] ){
4295 if( z[iIn]==q ){
4296 if( z[iIn+1]!=q ){
4297 /* Character iIn was the close quote. */
4298 iIn++;
4299 break;
4300 }else{
4301 /* Character iIn and iIn+1 form an escaped quote character. Skip
4302 ** the input cursor past both and copy a single quote character
4303 ** to the output buffer. */
4304 iIn += 2;
4305 z[iOut++] = q;
4306 }
4307 }else{
4308 z[iOut++] = z[iIn++];
4309 }
4310 }
4311
4312 z[iOut] = '\0';
4313 return iIn;
4314}
4315
4316/*
4317** Convert an SQL-style quoted string into a normal string by removing
4318** the quote characters. The conversion is done in-place. If the
4319** input does not begin with a quote character, then this routine
4320** is a no-op.
4321**
4322** Examples:
4323**
4324** "abc" becomes abc
4325** 'xyz' becomes xyz
4326** [pqr] becomes pqr
4327** `mno` becomes mno
4328*/
4329static void sqlite3Fts5Dequote(char *z){
4330 char quote; /* Quote character (if any ) */
4331
4332 assert( 0==fts5_iswhitespace(z[0]) );
4333 quote = z[0];
4334 if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){
4335 fts5Dequote(z);
4336 }
4337}
4338
4339
4340struct Fts5Enum {
4341 const char *zName;
4342 int eVal;
4343};
4344typedef struct Fts5Enum Fts5Enum;
4345
4346static int fts5ConfigSetEnum(
4347 const Fts5Enum *aEnum,
4348 const char *zEnum,
4349 int *peVal
4350){
4351 int nEnum = (int)strlen(zEnum);
4352 int i;
4353 int iVal = -1;
4354
4355 for(i=0; aEnum[i].zName; i++){
4356 if( sqlite3_strnicmp(aEnum[i].zName, zEnum, nEnum)==0 ){
4357 if( iVal>=0 ) return SQLITE_ERROR;
4358 iVal = aEnum[i].eVal;
4359 }
4360 }
4361
4362 *peVal = iVal;
4363 return iVal<0 ? SQLITE_ERROR : SQLITE_OK;
4364}
4365
4366/*
4367** Parse a "special" CREATE VIRTUAL TABLE directive and update
4368** configuration object pConfig as appropriate.
4369**
4370** If successful, object pConfig is updated and SQLITE_OK returned. If
4371** an error occurs, an SQLite error code is returned and an error message
4372** may be left in *pzErr. It is the responsibility of the caller to
4373** eventually free any such error message using sqlite3_free().
4374*/
4375static int fts5ConfigParseSpecial(
4376 Fts5Global *pGlobal,
4377 Fts5Config *pConfig, /* Configuration object to update */
4378 const char *zCmd, /* Special command to parse */
4379 const char *zArg, /* Argument to parse */
4380 char **pzErr /* OUT: Error message */
4381){
4382 int rc = SQLITE_OK;
4383 int nCmd = (int)strlen(zCmd);
4384 if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){
4385 const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES;
4386 const char *p;
4387 int bFirst = 1;
4388 if( pConfig->aPrefix==0 ){
4389 pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte);
4390 if( rc ) return rc;
4391 }
4392
4393 p = zArg;
4394 while( 1 ){
4395 int nPre = 0;
4396
4397 while( p[0]==' ' ) p++;
4398 if( bFirst==0 && p[0]==',' ){
4399 p++;
4400 while( p[0]==' ' ) p++;
4401 }else if( p[0]=='\0' ){
4402 break;
4403 }
4404 if( p[0]<'0' || p[0]>'9' ){
4405 *pzErr = sqlite3_mprintf("malformed prefix=... directive");
4406 rc = SQLITE_ERROR;
4407 break;
4408 }
4409
4410 if( pConfig->nPrefix==FTS5_MAX_PREFIX_INDEXES ){
4411 *pzErr = sqlite3_mprintf(
4412 "too many prefix indexes (max %d)", FTS5_MAX_PREFIX_INDEXES
4413 );
4414 rc = SQLITE_ERROR;
4415 break;
4416 }
4417
4418 while( p[0]>='0' && p[0]<='9' && nPre<1000 ){
4419 nPre = nPre*10 + (p[0] - '0');
4420 p++;
4421 }
4422
4423 if( nPre<=0 || nPre>=1000 ){
4424 *pzErr = sqlite3_mprintf("prefix length out of range (max 999)");
4425 rc = SQLITE_ERROR;
4426 break;
4427 }
4428
4429 pConfig->aPrefix[pConfig->nPrefix] = nPre;
4430 pConfig->nPrefix++;
4431 bFirst = 0;
4432 }
4433 assert( pConfig->nPrefix<=FTS5_MAX_PREFIX_INDEXES );
4434 return rc;
4435 }
4436
4437 if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){
4438 const char *p = (const char*)zArg;
4439 sqlite3_int64 nArg = strlen(zArg) + 1;
4440 char **azArg = sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg);
4441 char *pDel = sqlite3Fts5MallocZero(&rc, nArg * 2);
4442 char *pSpace = pDel;
4443
4444 if( azArg && pSpace ){
4445 if( pConfig->pTok ){
4446 *pzErr = sqlite3_mprintf("multiple tokenize=... directives");
4447 rc = SQLITE_ERROR;
4448 }else{
4449 for(nArg=0; p && *p; nArg++){
4450 const char *p2 = fts5ConfigSkipWhitespace(p);
4451 if( *p2=='\'' ){
4452 p = fts5ConfigSkipLiteral(p2);
4453 }else{
4454 p = fts5ConfigSkipBareword(p2);
4455 }
4456 if( p ){
4457 memcpy(pSpace, p2, p-p2);
4458 azArg[nArg] = pSpace;
4459 sqlite3Fts5Dequote(pSpace);
4460 pSpace += (p - p2) + 1;
4461 p = fts5ConfigSkipWhitespace(p);
4462 }
4463 }
4464 if( p==0 ){
4465 *pzErr = sqlite3_mprintf("parse error in tokenize directive");
4466 rc = SQLITE_ERROR;
4467 }else{
4468 rc = sqlite3Fts5GetTokenizer(pGlobal,
4469 (const char**)azArg, (int)nArg, pConfig,
4470 pzErr
4471 );
4472 }
4473 }
4474 }
4475
4476 sqlite3_free(azArg);
4477 sqlite3_free(pDel);
4478 return rc;
4479 }
4480
4481 if( sqlite3_strnicmp("content", zCmd, nCmd)==0 ){
4482 if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){
4483 *pzErr = sqlite3_mprintf("multiple content=... directives");
4484 rc = SQLITE_ERROR;
4485 }else{
4486 if( zArg[0] ){
4487 pConfig->eContent = FTS5_CONTENT_EXTERNAL;
4488 pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q", pConfig->zDb,zArg);
4489 }else{
4490 pConfig->eContent = FTS5_CONTENT_NONE;
4491 }
4492 }
4493 return rc;
4494 }
4495
4496 if( sqlite3_strnicmp("content_rowid", zCmd, nCmd)==0 ){
4497 if( pConfig->zContentRowid ){
4498 *pzErr = sqlite3_mprintf("multiple content_rowid=... directives");
4499 rc = SQLITE_ERROR;
4500 }else{
4501 pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1);
4502 }
4503 return rc;
4504 }
4505
4506 if( sqlite3_strnicmp("columnsize", zCmd, nCmd)==0 ){
4507 if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
4508 *pzErr = sqlite3_mprintf("malformed columnsize=... directive");
4509 rc = SQLITE_ERROR;
4510 }else{
4511 pConfig->bColumnsize = (zArg[0]=='1');
4512 }
4513 return rc;
4514 }
4515
4516 if( sqlite3_strnicmp("detail", zCmd, nCmd)==0 ){
4517 const Fts5Enum aDetail[] = {
4518 { "none", FTS5_DETAIL_NONE },
4519 { "full", FTS5_DETAIL_FULL },
4520 { "columns", FTS5_DETAIL_COLUMNS },
4521 { 0, 0 }
4522 };
4523
4524 if( (rc = fts5ConfigSetEnum(aDetail, zArg, &pConfig->eDetail)) ){
4525 *pzErr = sqlite3_mprintf("malformed detail=... directive");
4526 }
4527 return rc;
4528 }
4529
4530 *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd);
4531 return SQLITE_ERROR;
4532}
4533
4534/*
4535** Allocate an instance of the default tokenizer ("simple") at
4536** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error
4537** code if an error occurs.
4538*/
4539static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){
4540 assert( pConfig->pTok==0 && pConfig->pTokApi==0 );
4541 return sqlite3Fts5GetTokenizer(pGlobal, 0, 0, pConfig, 0);
4542}
4543
4544/*
4545** Gobble up the first bareword or quoted word from the input buffer zIn.
4546** Return a pointer to the character immediately following the last in
4547** the gobbled word if successful, or a NULL pointer otherwise (failed
4548** to find close-quote character).
4549**
4550** Before returning, set pzOut to point to a new buffer containing a
4551** nul-terminated, dequoted copy of the gobbled word. If the word was
4552** quoted, *pbQuoted is also set to 1 before returning.
4553**
4554** If *pRc is other than SQLITE_OK when this function is called, it is
4555** a no-op (NULL is returned). Otherwise, if an OOM occurs within this
4556** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not*
4557** set if a parse error (failed to find close quote) occurs.
4558*/
4559static const char *fts5ConfigGobbleWord(
4560 int *pRc, /* IN/OUT: Error code */
4561 const char *zIn, /* Buffer to gobble string/bareword from */
4562 char **pzOut, /* OUT: malloc'd buffer containing str/bw */
4563 int *pbQuoted /* OUT: Set to true if dequoting required */
4564){
4565 const char *zRet = 0;
4566
4567 sqlite3_int64 nIn = strlen(zIn);
4568 char *zOut = sqlite3_malloc64(nIn+1);
4569
4570 assert( *pRc==SQLITE_OK );
4571 *pbQuoted = 0;
4572 *pzOut = 0;
4573
4574 if( zOut==0 ){
4575 *pRc = SQLITE_NOMEM;
4576 }else{
4577 memcpy(zOut, zIn, (size_t)(nIn+1));
4578 if( fts5_isopenquote(zOut[0]) ){
4579 int ii = fts5Dequote(zOut);
4580 zRet = &zIn[ii];
4581 *pbQuoted = 1;
4582 }else{
4583 zRet = fts5ConfigSkipBareword(zIn);
4584 if( zRet ){
4585 zOut[zRet-zIn] = '\0';
4586 }
4587 }
4588 }
4589
4590 if( zRet==0 ){
4591 sqlite3_free(zOut);
4592 }else{
4593 *pzOut = zOut;
4594 }
4595
4596 return zRet;
4597}
4598
4599static int fts5ConfigParseColumn(
4600 Fts5Config *p,
4601 char *zCol,
4602 char *zArg,
4603 char **pzErr
4604){
4605 int rc = SQLITE_OK;
4606 if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME)
4607 || 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME)
4608 ){
4609 *pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol);
4610 rc = SQLITE_ERROR;
4611 }else if( zArg ){
4612 if( 0==sqlite3_stricmp(zArg, "unindexed") ){
4613 p->abUnindexed[p->nCol] = 1;
4614 }else{
4615 *pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg);
4616 rc = SQLITE_ERROR;
4617 }
4618 }
4619
4620 p->azCol[p->nCol++] = zCol;
4621 return rc;
4622}
4623
4624/*
4625** Populate the Fts5Config.zContentExprlist string.
4626*/
4627static int fts5ConfigMakeExprlist(Fts5Config *p){
4628 int i;
4629 int rc = SQLITE_OK;
4630 Fts5Buffer buf = {0, 0, 0};
4631
4632 sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid);
4633 if( p->eContent!=FTS5_CONTENT_NONE ){
4634 for(i=0; i<p->nCol; i++){
4635 if( p->eContent==FTS5_CONTENT_EXTERNAL ){
4636 sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]);
4637 }else{
4638 sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i);
4639 }
4640 }
4641 }
4642
4643 assert( p->zContentExprlist==0 );
4644 p->zContentExprlist = (char*)buf.p;
4645 return rc;
4646}
4647
4648/*
4649** Arguments nArg/azArg contain the string arguments passed to the xCreate
4650** or xConnect method of the virtual table. This function attempts to
4651** allocate an instance of Fts5Config containing the results of parsing
4652** those arguments.
4653**
4654** If successful, SQLITE_OK is returned and *ppOut is set to point to the
4655** new Fts5Config object. If an error occurs, an SQLite error code is
4656** returned, *ppOut is set to NULL and an error message may be left in
4657** *pzErr. It is the responsibility of the caller to eventually free any
4658** such error message using sqlite3_free().
4659*/
4660static int sqlite3Fts5ConfigParse(
4661 Fts5Global *pGlobal,
4662 sqlite3 *db,
4663 int nArg, /* Number of arguments */
4664 const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */
4665 Fts5Config **ppOut, /* OUT: Results of parse */
4666 char **pzErr /* OUT: Error message */
4667){
4668 int rc = SQLITE_OK; /* Return code */
4669 Fts5Config *pRet; /* New object to return */
4670 int i;
4671 sqlite3_int64 nByte;
4672
4673 *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config));
4674 if( pRet==0 ) return SQLITE_NOMEM;
4675 memset(pRet, 0, sizeof(Fts5Config));
4676 pRet->db = db;
4677 pRet->iCookie = -1;
4678
4679 nByte = nArg * (sizeof(char*) + sizeof(u8));
4680 pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte);
4681 pRet->abUnindexed = pRet->azCol ? (u8*)&pRet->azCol[nArg] : 0;
4682 pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1);
4683 pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1);
4684 pRet->bColumnsize = 1;
4685 pRet->eDetail = FTS5_DETAIL_FULL;
4686#ifdef SQLITE_DEBUG
4687 pRet->bPrefixIndex = 1;
4688#endif
4689 if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){
4690 *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName);
4691 rc = SQLITE_ERROR;
4692 }
4693
4694 for(i=3; rc==SQLITE_OK && i<nArg; i++){
4695 const char *zOrig = azArg[i];
4696 const char *z;
4697 char *zOne = 0;
4698 char *zTwo = 0;
4699 int bOption = 0;
4700 int bMustBeCol = 0;
4701
4702 z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol);
4703 z = fts5ConfigSkipWhitespace(z);
4704 if( z && *z=='=' ){
4705 bOption = 1;
4706 assert( zOne!=0 );
4707 z++;
4708 if( bMustBeCol ) z = 0;
4709 }
4710 z = fts5ConfigSkipWhitespace(z);
4711 if( z && z[0] ){
4712 int bDummy;
4713 z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy);
4714 if( z && z[0] ) z = 0;
4715 }
4716
4717 if( rc==SQLITE_OK ){
4718 if( z==0 ){
4719 *pzErr = sqlite3_mprintf("parse error in \"%s\"", zOrig);
4720 rc = SQLITE_ERROR;
4721 }else{
4722 if( bOption ){
4723 rc = fts5ConfigParseSpecial(pGlobal, pRet,
4724 ALWAYS(zOne)?zOne:"",
4725 zTwo?zTwo:"",
4726 pzErr
4727 );
4728 }else{
4729 rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr);
4730 zOne = 0;
4731 }
4732 }
4733 }
4734
4735 sqlite3_free(zOne);
4736 sqlite3_free(zTwo);
4737 }
4738
4739 /* If a tokenizer= option was successfully parsed, the tokenizer has
4740 ** already been allocated. Otherwise, allocate an instance of the default
4741 ** tokenizer (unicode61) now. */
4742 if( rc==SQLITE_OK && pRet->pTok==0 ){
4743 rc = fts5ConfigDefaultTokenizer(pGlobal, pRet);
4744 }
4745
4746 /* If no zContent option was specified, fill in the default values. */
4747 if( rc==SQLITE_OK && pRet->zContent==0 ){
4748 const char *zTail = 0;
4749 assert( pRet->eContent==FTS5_CONTENT_NORMAL
4750 || pRet->eContent==FTS5_CONTENT_NONE
4751 );
4752 if( pRet->eContent==FTS5_CONTENT_NORMAL ){
4753 zTail = "content";
4754 }else if( pRet->bColumnsize ){
4755 zTail = "docsize";
4756 }
4757
4758 if( zTail ){
4759 pRet->zContent = sqlite3Fts5Mprintf(
4760 &rc, "%Q.'%q_%s'", pRet->zDb, pRet->zName, zTail
4761 );
4762 }
4763 }
4764
4765 if( rc==SQLITE_OK && pRet->zContentRowid==0 ){
4766 pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1);
4767 }
4768
4769 /* Formulate the zContentExprlist text */
4770 if( rc==SQLITE_OK ){
4771 rc = fts5ConfigMakeExprlist(pRet);
4772 }
4773
4774 if( rc!=SQLITE_OK ){
4775 sqlite3Fts5ConfigFree(pRet);
4776 *ppOut = 0;
4777 }
4778 return rc;
4779}
4780
4781/*
4782** Free the configuration object passed as the only argument.
4783*/
4784static void sqlite3Fts5ConfigFree(Fts5Config *pConfig){
4785 if( pConfig ){
4786 int i;
4787 if( pConfig->pTok ){
4788 pConfig->pTokApi->xDelete(pConfig->pTok);
4789 }
4790 sqlite3_free(pConfig->zDb);
4791 sqlite3_free(pConfig->zName);
4792 for(i=0; i<pConfig->nCol; i++){
4793 sqlite3_free(pConfig->azCol[i]);
4794 }
4795 sqlite3_free(pConfig->azCol);
4796 sqlite3_free(pConfig->aPrefix);
4797 sqlite3_free(pConfig->zRank);
4798 sqlite3_free(pConfig->zRankArgs);
4799 sqlite3_free(pConfig->zContent);
4800 sqlite3_free(pConfig->zContentRowid);
4801 sqlite3_free(pConfig->zContentExprlist);
4802 sqlite3_free(pConfig);
4803 }
4804}
4805
4806/*
4807** Call sqlite3_declare_vtab() based on the contents of the configuration
4808** object passed as the only argument. Return SQLITE_OK if successful, or
4809** an SQLite error code if an error occurs.
4810*/
4811static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){
4812 int i;
4813 int rc = SQLITE_OK;
4814 char *zSql;
4815
4816 zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x(");
4817 for(i=0; zSql && i<pConfig->nCol; i++){
4818 const char *zSep = (i==0?"":", ");
4819 zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q", zSql, zSep, pConfig->azCol[i]);
4820 }
4821 zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)",
4822 zSql, pConfig->zName, FTS5_RANK_NAME
4823 );
4824
4825 assert( zSql || rc==SQLITE_NOMEM );
4826 if( zSql ){
4827 rc = sqlite3_declare_vtab(pConfig->db, zSql);
4828 sqlite3_free(zSql);
4829 }
4830
4831 return rc;
4832}
4833
4834/*
4835** Tokenize the text passed via the second and third arguments.
4836**
4837** The callback is invoked once for each token in the input text. The
4838** arguments passed to it are, in order:
4839**
4840** void *pCtx // Copy of 4th argument to sqlite3Fts5Tokenize()
4841** const char *pToken // Pointer to buffer containing token
4842** int nToken // Size of token in bytes
4843** int iStart // Byte offset of start of token within input text
4844** int iEnd // Byte offset of end of token within input text
4845** int iPos // Position of token in input (first token is 0)
4846**
4847** If the callback returns a non-zero value the tokenization is abandoned
4848** and no further callbacks are issued.
4849**
4850** This function returns SQLITE_OK if successful or an SQLite error code
4851** if an error occurs. If the tokenization was abandoned early because
4852** the callback returned SQLITE_DONE, this is not an error and this function
4853** still returns SQLITE_OK. Or, if the tokenization was abandoned early
4854** because the callback returned another non-zero value, it is assumed
4855** to be an SQLite error code and returned to the caller.
4856*/
4857static int sqlite3Fts5Tokenize(
4858 Fts5Config *pConfig, /* FTS5 Configuration object */
4859 int flags, /* FTS5_TOKENIZE_* flags */
4860 const char *pText, int nText, /* Text to tokenize */
4861 void *pCtx, /* Context passed to xToken() */
4862 int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
4863){
4864 if( pText==0 ) return SQLITE_OK;
4865 return pConfig->pTokApi->xTokenize(
4866 pConfig->pTok, pCtx, flags, pText, nText, xToken
4867 );
4868}
4869
4870/*
4871** Argument pIn points to the first character in what is expected to be
4872** a comma-separated list of SQL literals followed by a ')' character.
4873** If it actually is this, return a pointer to the ')'. Otherwise, return
4874** NULL to indicate a parse error.
4875*/
4876static const char *fts5ConfigSkipArgs(const char *pIn){
4877 const char *p = pIn;
4878
4879 while( 1 ){
4880 p = fts5ConfigSkipWhitespace(p);
4881 p = fts5ConfigSkipLiteral(p);
4882 p = fts5ConfigSkipWhitespace(p);
4883 if( p==0 || *p==')' ) break;
4884 if( *p!=',' ){
4885 p = 0;
4886 break;
4887 }
4888 p++;
4889 }
4890
4891 return p;
4892}
4893
4894/*
4895** Parameter zIn contains a rank() function specification. The format of
4896** this is:
4897**
4898** + Bareword (function name)
4899** + Open parenthesis - "("
4900** + Zero or more SQL literals in a comma separated list
4901** + Close parenthesis - ")"
4902*/
4903static int sqlite3Fts5ConfigParseRank(
4904 const char *zIn, /* Input string */
4905 char **pzRank, /* OUT: Rank function name */
4906 char **pzRankArgs /* OUT: Rank function arguments */
4907){
4908 const char *p = zIn;
4909 const char *pRank;
4910 char *zRank = 0;
4911 char *zRankArgs = 0;
4912 int rc = SQLITE_OK;
4913
4914 *pzRank = 0;
4915 *pzRankArgs = 0;
4916
4917 if( p==0 ){
4918 rc = SQLITE_ERROR;
4919 }else{
4920 p = fts5ConfigSkipWhitespace(p);
4921 pRank = p;
4922 p = fts5ConfigSkipBareword(p);
4923
4924 if( p ){
4925 zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank);
4926 if( zRank ) memcpy(zRank, pRank, p-pRank);
4927 }else{
4928 rc = SQLITE_ERROR;
4929 }
4930
4931 if( rc==SQLITE_OK ){
4932 p = fts5ConfigSkipWhitespace(p);
4933 if( *p!='(' ) rc = SQLITE_ERROR;
4934 p++;
4935 }
4936 if( rc==SQLITE_OK ){
4937 const char *pArgs;
4938 p = fts5ConfigSkipWhitespace(p);
4939 pArgs = p;
4940 if( *p!=')' ){
4941 p = fts5ConfigSkipArgs(p);
4942 if( p==0 ){
4943 rc = SQLITE_ERROR;
4944 }else{
4945 zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs);
4946 if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs);
4947 }
4948 }
4949 }
4950 }
4951
4952 if( rc!=SQLITE_OK ){
4953 sqlite3_free(zRank);
4954 assert( zRankArgs==0 );
4955 }else{
4956 *pzRank = zRank;
4957 *pzRankArgs = zRankArgs;
4958 }
4959 return rc;
4960}
4961
4962static int sqlite3Fts5ConfigSetValue(
4963 Fts5Config *pConfig,
4964 const char *zKey,
4965 sqlite3_value *pVal,
4966 int *pbBadkey
4967){
4968 int rc = SQLITE_OK;
4969
4970 if( 0==sqlite3_stricmp(zKey, "pgsz") ){
4971 int pgsz = 0;
4972 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
4973 pgsz = sqlite3_value_int(pVal);
4974 }
4975 if( pgsz<32 || pgsz>FTS5_MAX_PAGE_SIZE ){
4976 *pbBadkey = 1;
4977 }else{
4978 pConfig->pgsz = pgsz;
4979 }
4980 }
4981
4982 else if( 0==sqlite3_stricmp(zKey, "hashsize") ){
4983 int nHashSize = -1;
4984 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
4985 nHashSize = sqlite3_value_int(pVal);
4986 }
4987 if( nHashSize<=0 ){
4988 *pbBadkey = 1;
4989 }else{
4990 pConfig->nHashSize = nHashSize;
4991 }
4992 }
4993
4994 else if( 0==sqlite3_stricmp(zKey, "automerge") ){
4995 int nAutomerge = -1;
4996 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
4997 nAutomerge = sqlite3_value_int(pVal);
4998 }
4999 if( nAutomerge<0 || nAutomerge>64 ){
5000 *pbBadkey = 1;
5001 }else{
5002 if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE;
5003 pConfig->nAutomerge = nAutomerge;
5004 }
5005 }
5006
5007 else if( 0==sqlite3_stricmp(zKey, "usermerge") ){
5008 int nUsermerge = -1;
5009 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
5010 nUsermerge = sqlite3_value_int(pVal);
5011 }
5012 if( nUsermerge<2 || nUsermerge>16 ){
5013 *pbBadkey = 1;
5014 }else{
5015 pConfig->nUsermerge = nUsermerge;
5016 }
5017 }
5018
5019 else if( 0==sqlite3_stricmp(zKey, "crisismerge") ){
5020 int nCrisisMerge = -1;
5021 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
5022 nCrisisMerge = sqlite3_value_int(pVal);
5023 }
5024 if( nCrisisMerge<0 ){
5025 *pbBadkey = 1;
5026 }else{
5027 if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
5028 if( nCrisisMerge>=FTS5_MAX_SEGMENT ) nCrisisMerge = FTS5_MAX_SEGMENT-1;
5029 pConfig->nCrisisMerge = nCrisisMerge;
5030 }
5031 }
5032
5033 else if( 0==sqlite3_stricmp(zKey, "rank") ){
5034 const char *zIn = (const char*)sqlite3_value_text(pVal);
5035 char *zRank;
5036 char *zRankArgs;
5037 rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs);
5038 if( rc==SQLITE_OK ){
5039 sqlite3_free(pConfig->zRank);
5040 sqlite3_free(pConfig->zRankArgs);
5041 pConfig->zRank = zRank;
5042 pConfig->zRankArgs = zRankArgs;
5043 }else if( rc==SQLITE_ERROR ){
5044 rc = SQLITE_OK;
5045 *pbBadkey = 1;
5046 }
5047 }else{
5048 *pbBadkey = 1;
5049 }
5050 return rc;
5051}
5052
5053/*
5054** Load the contents of the %_config table into memory.
5055*/
5056static int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){
5057 const char *zSelect = "SELECT k, v FROM %Q.'%q_config'";
5058 char *zSql;
5059 sqlite3_stmt *p = 0;
5060 int rc = SQLITE_OK;
5061 int iVersion = 0;
5062
5063 /* Set default values */
5064 pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE;
5065 pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE;
5066 pConfig->nUsermerge = FTS5_DEFAULT_USERMERGE;
5067 pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
5068 pConfig->nHashSize = FTS5_DEFAULT_HASHSIZE;
5069
5070 zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName);
5071 if( zSql ){
5072 rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p, 0);
5073 sqlite3_free(zSql);
5074 }
5075
5076 assert( rc==SQLITE_OK || p==0 );
5077 if( rc==SQLITE_OK ){
5078 while( SQLITE_ROW==sqlite3_step(p) ){
5079 const char *zK = (const char*)sqlite3_column_text(p, 0);
5080 sqlite3_value *pVal = sqlite3_column_value(p, 1);
5081 if( 0==sqlite3_stricmp(zK, "version") ){
5082 iVersion = sqlite3_value_int(pVal);
5083 }else{
5084 int bDummy = 0;
5085 sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy);
5086 }
5087 }
5088 rc = sqlite3_finalize(p);
5089 }
5090
5091 if( rc==SQLITE_OK && iVersion!=FTS5_CURRENT_VERSION ){
5092 rc = SQLITE_ERROR;
5093 if( pConfig->pzErrmsg ){
5094 assert( 0==*pConfig->pzErrmsg );
5095 *pConfig->pzErrmsg = sqlite3_mprintf(
5096 "invalid fts5 file format (found %d, expected %d) - run 'rebuild'",
5097 iVersion, FTS5_CURRENT_VERSION
5098 );
5099 }
5100 }
5101
5102 if( rc==SQLITE_OK ){
5103 pConfig->iCookie = iCookie;
5104 }
5105 return rc;
5106}
5107
5108#line 1 "fts5_expr.c"
5109/*
5110** 2014 May 31
5111**
5112** The author disclaims copyright to this source code. In place of
5113** a legal notice, here is a blessing:
5114**
5115** May you do good and not evil.
5116** May you find forgiveness for yourself and forgive others.
5117** May you share freely, never taking more than you give.
5118**
5119******************************************************************************
5120**
5121*/
5122
5123
5124
5125/* #include "fts5Int.h" */
5126/* #include "fts5parse.h" */
5127
5128/*
5129** All token types in the generated fts5parse.h file are greater than 0.
5130*/
5131#define FTS5_EOF 0
5132
5133#define FTS5_LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32))
5134
5135typedef struct Fts5ExprTerm Fts5ExprTerm;
5136
5137/*
5138** Functions generated by lemon from fts5parse.y.
5139*/
5140static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64));
5141static void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*));
5142static void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*);
5143#ifndef NDEBUG
5144#include <stdio.h>
5145static void sqlite3Fts5ParserTrace(FILE*, char*);
5146#endif
5147static int sqlite3Fts5ParserFallback(int);
5148
5149
5150struct Fts5Expr {
5151 Fts5Index *pIndex;
5152 Fts5Config *pConfig;
5153 Fts5ExprNode *pRoot;
5154 int bDesc; /* Iterate in descending rowid order */
5155 int nPhrase; /* Number of phrases in expression */
5156 Fts5ExprPhrase **apExprPhrase; /* Pointers to phrase objects */
5157};
5158
5159/*
5160** eType:
5161** Expression node type. Always one of:
5162**
5163** FTS5_AND (nChild, apChild valid)
5164** FTS5_OR (nChild, apChild valid)
5165** FTS5_NOT (nChild, apChild valid)
5166** FTS5_STRING (pNear valid)
5167** FTS5_TERM (pNear valid)
5168*/
5169struct Fts5ExprNode {
5170 int eType; /* Node type */
5171 int bEof; /* True at EOF */
5172 int bNomatch; /* True if entry is not a match */
5173
5174 /* Next method for this node. */
5175 int (*xNext)(Fts5Expr*, Fts5ExprNode*, int, i64);
5176
5177 i64 iRowid; /* Current rowid */
5178 Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */
5179
5180 /* Child nodes. For a NOT node, this array always contains 2 entries. For
5181 ** AND or OR nodes, it contains 2 or more entries. */
5182 int nChild; /* Number of child nodes */
5183 Fts5ExprNode *apChild[1]; /* Array of child nodes */
5184};
5185
5186#define Fts5NodeIsString(p) ((p)->eType==FTS5_TERM || (p)->eType==FTS5_STRING)
5187
5188/*
5189** Invoke the xNext method of an Fts5ExprNode object. This macro should be
5190** used as if it has the same signature as the xNext() methods themselves.
5191*/
5192#define fts5ExprNodeNext(a,b,c,d) (b)->xNext((a), (b), (c), (d))
5193
5194/*
5195** An instance of the following structure represents a single search term
5196** or term prefix.
5197*/
5198struct Fts5ExprTerm {
5199 u8 bPrefix; /* True for a prefix term */
5200 u8 bFirst; /* True if token must be first in column */
5201 char *zTerm; /* nul-terminated term */
5202 Fts5IndexIter *pIter; /* Iterator for this term */
5203 Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */
5204};
5205
5206/*
5207** A phrase. One or more terms that must appear in a contiguous sequence
5208** within a document for it to match.
5209*/
5210struct Fts5ExprPhrase {
5211 Fts5ExprNode *pNode; /* FTS5_STRING node this phrase is part of */
5212 Fts5Buffer poslist; /* Current position list */
5213 int nTerm; /* Number of entries in aTerm[] */
5214 Fts5ExprTerm aTerm[1]; /* Terms that make up this phrase */
5215};
5216
5217/*
5218** One or more phrases that must appear within a certain token distance of
5219** each other within each matching document.
5220*/
5221struct Fts5ExprNearset {
5222 int nNear; /* NEAR parameter */
5223 Fts5Colset *pColset; /* Columns to search (NULL -> all columns) */
5224 int nPhrase; /* Number of entries in aPhrase[] array */
5225 Fts5ExprPhrase *apPhrase[1]; /* Array of phrase pointers */
5226};
5227
5228
5229/*
5230** Parse context.
5231*/
5232struct Fts5Parse {
5233 Fts5Config *pConfig;
5234 char *zErr;
5235 int rc;
5236 int nPhrase; /* Size of apPhrase array */
5237 Fts5ExprPhrase **apPhrase; /* Array of all phrases */
5238 Fts5ExprNode *pExpr; /* Result of a successful parse */
5239 int bPhraseToAnd; /* Convert "a+b" to "a AND b" */
5240};
5241
5242static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){
5243 va_list ap;
5244 va_start(ap, zFmt);
5245 if( pParse->rc==SQLITE_OK ){
5246 assert( pParse->zErr==0 );
5247 pParse->zErr = sqlite3_vmprintf(zFmt, ap);
5248 pParse->rc = SQLITE_ERROR;
5249 }
5250 va_end(ap);
5251}
5252
5253static int fts5ExprIsspace(char t){
5254 return t==' ' || t=='\t' || t=='\n' || t=='\r';
5255}
5256
5257/*
5258** Read the first token from the nul-terminated string at *pz.
5259*/
5260static int fts5ExprGetToken(
5261 Fts5Parse *pParse,
5262 const char **pz, /* IN/OUT: Pointer into buffer */
5263 Fts5Token *pToken
5264){
5265 const char *z = *pz;
5266 int tok;
5267
5268 /* Skip past any whitespace */
5269 while( fts5ExprIsspace(*z) ) z++;
5270
5271 pToken->p = z;
5272 pToken->n = 1;
5273 switch( *z ){
5274 case '(': tok = FTS5_LP; break;
5275 case ')': tok = FTS5_RP; break;
5276 case '{': tok = FTS5_LCP; break;
5277 case '}': tok = FTS5_RCP; break;
5278 case ':': tok = FTS5_COLON; break;
5279 case ',': tok = FTS5_COMMA; break;
5280 case '+': tok = FTS5_PLUS; break;
5281 case '*': tok = FTS5_STAR; break;
5282 case '-': tok = FTS5_MINUS; break;
5283 case '^': tok = FTS5_CARET; break;
5284 case '\0': tok = FTS5_EOF; break;
5285
5286 case '"': {
5287 const char *z2;
5288 tok = FTS5_STRING;
5289
5290 for(z2=&z[1]; 1; z2++){
5291 if( z2[0]=='"' ){
5292 z2++;
5293 if( z2[0]!='"' ) break;
5294 }
5295 if( z2[0]=='\0' ){
5296 sqlite3Fts5ParseError(pParse, "unterminated string");
5297 return FTS5_EOF;
5298 }
5299 }
5300 pToken->n = (z2 - z);
5301 break;
5302 }
5303
5304 default: {
5305 const char *z2;
5306 if( sqlite3Fts5IsBareword(z[0])==0 ){
5307 sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z);
5308 return FTS5_EOF;
5309 }
5310 tok = FTS5_STRING;
5311 for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++);
5312 pToken->n = (z2 - z);
5313 if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR;
5314 if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT;
5315 if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND;
5316 break;
5317 }
5318 }
5319
5320 *pz = &pToken->p[pToken->n];
5321 return tok;
5322}
5323
5324static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc64((sqlite3_int64)t);}
5325static void fts5ParseFree(void *p){ sqlite3_free(p); }
5326
5327static int sqlite3Fts5ExprNew(
5328 Fts5Config *pConfig, /* FTS5 Configuration */
5329 int bPhraseToAnd,
5330 int iCol,
5331 const char *zExpr, /* Expression text */
5332 Fts5Expr **ppNew,
5333 char **pzErr
5334){
5335 Fts5Parse sParse;
5336 Fts5Token token;
5337 const char *z = zExpr;
5338 int t; /* Next token type */
5339 void *pEngine;
5340 Fts5Expr *pNew;
5341
5342 *ppNew = 0;
5343 *pzErr = 0;
5344 memset(&sParse, 0, sizeof(sParse));
5345 sParse.bPhraseToAnd = bPhraseToAnd;
5346 pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc);
5347 if( pEngine==0 ){ return SQLITE_NOMEM; }
5348 sParse.pConfig = pConfig;
5349
5350 do {
5351 t = fts5ExprGetToken(&sParse, &z, &token);
5352 sqlite3Fts5Parser(pEngine, t, token, &sParse);
5353 }while( sParse.rc==SQLITE_OK && t!=FTS5_EOF );
5354 sqlite3Fts5ParserFree(pEngine, fts5ParseFree);
5355
5356 /* If the LHS of the MATCH expression was a user column, apply the
5357 ** implicit column-filter. */
5358 if( iCol<pConfig->nCol && sParse.pExpr && sParse.rc==SQLITE_OK ){
5359 int n = sizeof(Fts5Colset);
5360 Fts5Colset *pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&sParse.rc, n);
5361 if( pColset ){
5362 pColset->nCol = 1;
5363 pColset->aiCol[0] = iCol;
5364 sqlite3Fts5ParseSetColset(&sParse, sParse.pExpr, pColset);
5365 }
5366 }
5367
5368 assert( sParse.rc!=SQLITE_OK || sParse.zErr==0 );
5369 if( sParse.rc==SQLITE_OK ){
5370 *ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr));
5371 if( pNew==0 ){
5372 sParse.rc = SQLITE_NOMEM;
5373 sqlite3Fts5ParseNodeFree(sParse.pExpr);
5374 }else{
5375 if( !sParse.pExpr ){
5376 const int nByte = sizeof(Fts5ExprNode);
5377 pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&sParse.rc, nByte);
5378 if( pNew->pRoot ){
5379 pNew->pRoot->bEof = 1;
5380 }
5381 }else{
5382 pNew->pRoot = sParse.pExpr;
5383 }
5384 pNew->pIndex = 0;
5385 pNew->pConfig = pConfig;
5386 pNew->apExprPhrase = sParse.apPhrase;
5387 pNew->nPhrase = sParse.nPhrase;
5388 pNew->bDesc = 0;
5389 sParse.apPhrase = 0;
5390 }
5391 }else{
5392 sqlite3Fts5ParseNodeFree(sParse.pExpr);
5393 }
5394
5395 sqlite3_free(sParse.apPhrase);
5396 *pzErr = sParse.zErr;
5397 return sParse.rc;
5398}
5399
5400/*
5401** This function is only called when using the special 'trigram' tokenizer.
5402** Argument zText contains the text of a LIKE or GLOB pattern matched
5403** against column iCol. This function creates and compiles an FTS5 MATCH
5404** expression that will match a superset of the rows matched by the LIKE or
5405** GLOB. If successful, SQLITE_OK is returned. Otherwise, an SQLite error
5406** code.
5407*/
5408static int sqlite3Fts5ExprPattern(
5409 Fts5Config *pConfig, int bGlob, int iCol, const char *zText, Fts5Expr **pp
5410){
5411 i64 nText = strlen(zText);
5412 char *zExpr = (char*)sqlite3_malloc64(nText*4 + 1);
5413 int rc = SQLITE_OK;
5414
5415 if( zExpr==0 ){
5416 rc = SQLITE_NOMEM;
5417 }else{
5418 char aSpec[3];
5419 int iOut = 0;
5420 int i = 0;
5421 int iFirst = 0;
5422
5423 if( bGlob==0 ){
5424 aSpec[0] = '_';
5425 aSpec[1] = '%';
5426 aSpec[2] = 0;
5427 }else{
5428 aSpec[0] = '*';
5429 aSpec[1] = '?';
5430 aSpec[2] = '[';
5431 }
5432
5433 while( i<=nText ){
5434 if( i==nText
5435 || zText[i]==aSpec[0] || zText[i]==aSpec[1] || zText[i]==aSpec[2]
5436 ){
5437 if( i-iFirst>=3 ){
5438 int jj;
5439 zExpr[iOut++] = '"';
5440 for(jj=iFirst; jj<i; jj++){
5441 zExpr[iOut++] = zText[jj];
5442 if( zText[jj]=='"' ) zExpr[iOut++] = '"';
5443 }
5444 zExpr[iOut++] = '"';
5445 zExpr[iOut++] = ' ';
5446 }
5447 if( zText[i]==aSpec[2] ){
5448 i += 2;
5449 if( zText[i-1]=='^' ) i++;
5450 while( i<nText && zText[i]!=']' ) i++;
5451 }
5452 iFirst = i+1;
5453 }
5454 i++;
5455 }
5456 if( iOut>0 ){
5457 int bAnd = 0;
5458 if( pConfig->eDetail!=FTS5_DETAIL_FULL ){
5459 bAnd = 1;
5460 if( pConfig->eDetail==FTS5_DETAIL_NONE ){
5461 iCol = pConfig->nCol;
5462 }
5463 }
5464 zExpr[iOut] = '\0';
5465 rc = sqlite3Fts5ExprNew(pConfig, bAnd, iCol, zExpr, pp,pConfig->pzErrmsg);
5466 }else{
5467 *pp = 0;
5468 }
5469 sqlite3_free(zExpr);
5470 }
5471
5472 return rc;
5473}
5474
5475/*
5476** Free the expression node object passed as the only argument.
5477*/
5478static void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){
5479 if( p ){
5480 int i;
5481 for(i=0; i<p->nChild; i++){
5482 sqlite3Fts5ParseNodeFree(p->apChild[i]);
5483 }
5484 sqlite3Fts5ParseNearsetFree(p->pNear);
5485 sqlite3_free(p);
5486 }
5487}
5488
5489/*
5490** Free the expression object passed as the only argument.
5491*/
5492static void sqlite3Fts5ExprFree(Fts5Expr *p){
5493 if( p ){
5494 sqlite3Fts5ParseNodeFree(p->pRoot);
5495 sqlite3_free(p->apExprPhrase);
5496 sqlite3_free(p);
5497 }
5498}
5499
5500static int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2){
5501 Fts5Parse sParse;
5502 memset(&sParse, 0, sizeof(sParse));
5503
5504 if( *pp1 ){
5505 Fts5Expr *p1 = *pp1;
5506 int nPhrase = p1->nPhrase + p2->nPhrase;
5507
5508 p1->pRoot = sqlite3Fts5ParseNode(&sParse, FTS5_AND, p1->pRoot, p2->pRoot,0);
5509 p2->pRoot = 0;
5510
5511 if( sParse.rc==SQLITE_OK ){
5512 Fts5ExprPhrase **ap = (Fts5ExprPhrase**)sqlite3_realloc(
5513 p1->apExprPhrase, nPhrase * sizeof(Fts5ExprPhrase*)
5514 );
5515 if( ap==0 ){
5516 sParse.rc = SQLITE_NOMEM;
5517 }else{
5518 int i;
5519 memmove(&ap[p2->nPhrase], ap, p1->nPhrase*sizeof(Fts5ExprPhrase*));
5520 for(i=0; i<p2->nPhrase; i++){
5521 ap[i] = p2->apExprPhrase[i];
5522 }
5523 p1->nPhrase = nPhrase;
5524 p1->apExprPhrase = ap;
5525 }
5526 }
5527 sqlite3_free(p2->apExprPhrase);
5528 sqlite3_free(p2);
5529 }else{
5530 *pp1 = p2;
5531 }
5532
5533 return sParse.rc;
5534}
5535
5536/*
5537** Argument pTerm must be a synonym iterator. Return the current rowid
5538** that it points to.
5539*/
5540static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){
5541 i64 iRet = 0;
5542 int bRetValid = 0;
5543 Fts5ExprTerm *p;
5544
5545 assert( pTerm );
5546 assert( pTerm->pSynonym );
5547 assert( bDesc==0 || bDesc==1 );
5548 for(p=pTerm; p; p=p->pSynonym){
5549 if( 0==sqlite3Fts5IterEof(p->pIter) ){
5550 i64 iRowid = p->pIter->iRowid;
5551 if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){
5552 iRet = iRowid;
5553 bRetValid = 1;
5554 }
5555 }
5556 }
5557
5558 if( pbEof && bRetValid==0 ) *pbEof = 1;
5559 return iRet;
5560}
5561
5562/*
5563** Argument pTerm must be a synonym iterator.
5564*/
5565static int fts5ExprSynonymList(
5566 Fts5ExprTerm *pTerm,
5567 i64 iRowid,
5568 Fts5Buffer *pBuf, /* Use this buffer for space if required */
5569 u8 **pa, int *pn
5570){
5571 Fts5PoslistReader aStatic[4];
5572 Fts5PoslistReader *aIter = aStatic;
5573 int nIter = 0;
5574 int nAlloc = 4;
5575 int rc = SQLITE_OK;
5576 Fts5ExprTerm *p;
5577
5578 assert( pTerm->pSynonym );
5579 for(p=pTerm; p; p=p->pSynonym){
5580 Fts5IndexIter *pIter = p->pIter;
5581 if( sqlite3Fts5IterEof(pIter)==0 && pIter->iRowid==iRowid ){
5582 if( pIter->nData==0 ) continue;
5583 if( nIter==nAlloc ){
5584 sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * nAlloc * 2;
5585 Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc64(nByte);
5586 if( aNew==0 ){
5587 rc = SQLITE_NOMEM;
5588 goto synonym_poslist_out;
5589 }
5590 memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter);
5591 nAlloc = nAlloc*2;
5592 if( aIter!=aStatic ) sqlite3_free(aIter);
5593 aIter = aNew;
5594 }
5595 sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &aIter[nIter]);
5596 assert( aIter[nIter].bEof==0 );
5597 nIter++;
5598 }
5599 }
5600
5601 if( nIter==1 ){
5602 *pa = (u8*)aIter[0].a;
5603 *pn = aIter[0].n;
5604 }else{
5605 Fts5PoslistWriter writer = {0};
5606 i64 iPrev = -1;
5607 fts5BufferZero(pBuf);
5608 while( 1 ){
5609 int i;
5610 i64 iMin = FTS5_LARGEST_INT64;
5611 for(i=0; i<nIter; i++){
5612 if( aIter[i].bEof==0 ){
5613 if( aIter[i].iPos==iPrev ){
5614 if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue;
5615 }
5616 if( aIter[i].iPos<iMin ){
5617 iMin = aIter[i].iPos;
5618 }
5619 }
5620 }
5621 if( iMin==FTS5_LARGEST_INT64 || rc!=SQLITE_OK ) break;
5622 rc = sqlite3Fts5PoslistWriterAppend(pBuf, &writer, iMin);
5623 iPrev = iMin;
5624 }
5625 if( rc==SQLITE_OK ){
5626 *pa = pBuf->p;
5627 *pn = pBuf->n;
5628 }
5629 }
5630
5631 synonym_poslist_out:
5632 if( aIter!=aStatic ) sqlite3_free(aIter);
5633 return rc;
5634}
5635
5636
5637/*
5638** All individual term iterators in pPhrase are guaranteed to be valid and
5639** pointing to the same rowid when this function is called. This function
5640** checks if the current rowid really is a match, and if so populates
5641** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch
5642** is set to true if this is really a match, or false otherwise.
5643**
5644** SQLITE_OK is returned if an error occurs, or an SQLite error code
5645** otherwise. It is not considered an error code if the current rowid is
5646** not a match.
5647*/
5648static int fts5ExprPhraseIsMatch(
5649 Fts5ExprNode *pNode, /* Node pPhrase belongs to */
5650 Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */
5651 int *pbMatch /* OUT: Set to true if really a match */
5652){
5653 Fts5PoslistWriter writer = {0};
5654 Fts5PoslistReader aStatic[4];
5655 Fts5PoslistReader *aIter = aStatic;
5656 int i;
5657 int rc = SQLITE_OK;
5658 int bFirst = pPhrase->aTerm[0].bFirst;
5659
5660 fts5BufferZero(&pPhrase->poslist);
5661
5662 /* If the aStatic[] array is not large enough, allocate a large array
5663 ** using sqlite3_malloc(). This approach could be improved upon. */
5664 if( pPhrase->nTerm>ArraySize(aStatic) ){
5665 sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
5666 aIter = (Fts5PoslistReader*)sqlite3_malloc64(nByte);
5667 if( !aIter ) return SQLITE_NOMEM;
5668 }
5669 memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm);
5670
5671 /* Initialize a term iterator for each term in the phrase */
5672 for(i=0; i<pPhrase->nTerm; i++){
5673 Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
5674 int n = 0;
5675 int bFlag = 0;
5676 u8 *a = 0;
5677 if( pTerm->pSynonym ){
5678 Fts5Buffer buf = {0, 0, 0};
5679 rc = fts5ExprSynonymList(pTerm, pNode->iRowid, &buf, &a, &n);
5680 if( rc ){
5681 sqlite3_free(a);
5682 goto ismatch_out;
5683 }
5684 if( a==buf.p ) bFlag = 1;
5685 }else{
5686 a = (u8*)pTerm->pIter->pData;
5687 n = pTerm->pIter->nData;
5688 }
5689 sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
5690 aIter[i].bFlag = (u8)bFlag;
5691 if( aIter[i].bEof ) goto ismatch_out;
5692 }
5693
5694 while( 1 ){
5695 int bMatch;
5696 i64 iPos = aIter[0].iPos;
5697 do {
5698 bMatch = 1;
5699 for(i=0; i<pPhrase->nTerm; i++){
5700 Fts5PoslistReader *pPos = &aIter[i];
5701 i64 iAdj = iPos + i;
5702 if( pPos->iPos!=iAdj ){
5703 bMatch = 0;
5704 while( pPos->iPos<iAdj ){
5705 if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out;
5706 }
5707 if( pPos->iPos>iAdj ) iPos = pPos->iPos-i;
5708 }
5709 }
5710 }while( bMatch==0 );
5711
5712 /* Append position iPos to the output */
5713 if( bFirst==0 || FTS5_POS2OFFSET(iPos)==0 ){
5714 rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos);
5715 if( rc!=SQLITE_OK ) goto ismatch_out;
5716 }
5717
5718 for(i=0; i<pPhrase->nTerm; i++){
5719 if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out;
5720 }
5721 }
5722
5723 ismatch_out:
5724 *pbMatch = (pPhrase->poslist.n>0);
5725 for(i=0; i<pPhrase->nTerm; i++){
5726 if( aIter[i].bFlag ) sqlite3_free((u8*)aIter[i].a);
5727 }
5728 if( aIter!=aStatic ) sqlite3_free(aIter);
5729 return rc;
5730}
5731
5732typedef struct Fts5LookaheadReader Fts5LookaheadReader;
5733struct Fts5LookaheadReader {
5734 const u8 *a; /* Buffer containing position list */
5735 int n; /* Size of buffer a[] in bytes */
5736 int i; /* Current offset in position list */
5737 i64 iPos; /* Current position */
5738 i64 iLookahead; /* Next position */
5739};
5740
5741#define FTS5_LOOKAHEAD_EOF (((i64)1) << 62)
5742
5743static int fts5LookaheadReaderNext(Fts5LookaheadReader *p){
5744 p->iPos = p->iLookahead;
5745 if( sqlite3Fts5PoslistNext64(p->a, p->n, &p->i, &p->iLookahead) ){
5746 p->iLookahead = FTS5_LOOKAHEAD_EOF;
5747 }
5748 return (p->iPos==FTS5_LOOKAHEAD_EOF);
5749}
5750
5751static int fts5LookaheadReaderInit(
5752 const u8 *a, int n, /* Buffer to read position list from */
5753 Fts5LookaheadReader *p /* Iterator object to initialize */
5754){
5755 memset(p, 0, sizeof(Fts5LookaheadReader));
5756 p->a = a;
5757 p->n = n;
5758 fts5LookaheadReaderNext(p);
5759 return fts5LookaheadReaderNext(p);
5760}
5761
5762typedef struct Fts5NearTrimmer Fts5NearTrimmer;
5763struct Fts5NearTrimmer {
5764 Fts5LookaheadReader reader; /* Input iterator */
5765 Fts5PoslistWriter writer; /* Writer context */
5766 Fts5Buffer *pOut; /* Output poslist */
5767};
5768
5769/*
5770** The near-set object passed as the first argument contains more than
5771** one phrase. All phrases currently point to the same row. The
5772** Fts5ExprPhrase.poslist buffers are populated accordingly. This function
5773** tests if the current row contains instances of each phrase sufficiently
5774** close together to meet the NEAR constraint. Non-zero is returned if it
5775** does, or zero otherwise.
5776**
5777** If in/out parameter (*pRc) is set to other than SQLITE_OK when this
5778** function is called, it is a no-op. Or, if an error (e.g. SQLITE_NOMEM)
5779** occurs within this function (*pRc) is set accordingly before returning.
5780** The return value is undefined in both these cases.
5781**
5782** If no error occurs and non-zero (a match) is returned, the position-list
5783** of each phrase object is edited to contain only those entries that
5784** meet the constraint before returning.
5785*/
5786static int fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){
5787 Fts5NearTrimmer aStatic[4];
5788 Fts5NearTrimmer *a = aStatic;
5789 Fts5ExprPhrase **apPhrase = pNear->apPhrase;
5790
5791 int i;
5792 int rc = *pRc;
5793 int bMatch;
5794
5795 assert( pNear->nPhrase>1 );
5796
5797 /* If the aStatic[] array is not large enough, allocate a large array
5798 ** using sqlite3_malloc(). This approach could be improved upon. */
5799 if( pNear->nPhrase>ArraySize(aStatic) ){
5800 sqlite3_int64 nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase;
5801 a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte);
5802 }else{
5803 memset(aStatic, 0, sizeof(aStatic));
5804 }
5805 if( rc!=SQLITE_OK ){
5806 *pRc = rc;
5807 return 0;
5808 }
5809
5810 /* Initialize a lookahead iterator for each phrase. After passing the
5811 ** buffer and buffer size to the lookaside-reader init function, zero
5812 ** the phrase poslist buffer. The new poslist for the phrase (containing
5813 ** the same entries as the original with some entries removed on account
5814 ** of the NEAR constraint) is written over the original even as it is
5815 ** being read. This is safe as the entries for the new poslist are a
5816 ** subset of the old, so it is not possible for data yet to be read to
5817 ** be overwritten. */
5818 for(i=0; i<pNear->nPhrase; i++){
5819 Fts5Buffer *pPoslist = &apPhrase[i]->poslist;
5820 fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader);
5821 pPoslist->n = 0;
5822 a[i].pOut = pPoslist;
5823 }
5824
5825 while( 1 ){
5826 int iAdv;
5827 i64 iMin;
5828 i64 iMax;
5829
5830 /* This block advances the phrase iterators until they point to a set of
5831 ** entries that together comprise a match. */
5832 iMax = a[0].reader.iPos;
5833 do {
5834 bMatch = 1;
5835 for(i=0; i<pNear->nPhrase; i++){
5836 Fts5LookaheadReader *pPos = &a[i].reader;
5837 iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear;
5838 if( pPos->iPos<iMin || pPos->iPos>iMax ){
5839 bMatch = 0;
5840 while( pPos->iPos<iMin ){
5841 if( fts5LookaheadReaderNext(pPos) ) goto ismatch_out;
5842 }
5843 if( pPos->iPos>iMax ) iMax = pPos->iPos;
5844 }
5845 }
5846 }while( bMatch==0 );
5847
5848 /* Add an entry to each output position list */
5849 for(i=0; i<pNear->nPhrase; i++){
5850 i64 iPos = a[i].reader.iPos;
5851 Fts5PoslistWriter *pWriter = &a[i].writer;
5852 if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){
5853 sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos);
5854 }
5855 }
5856
5857 iAdv = 0;
5858 iMin = a[0].reader.iLookahead;
5859 for(i=0; i<pNear->nPhrase; i++){
5860 if( a[i].reader.iLookahead < iMin ){
5861 iMin = a[i].reader.iLookahead;
5862 iAdv = i;
5863 }
5864 }
5865 if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out;
5866 }
5867
5868 ismatch_out: {
5869 int bRet = a[0].pOut->n>0;
5870 *pRc = rc;
5871 if( a!=aStatic ) sqlite3_free(a);
5872 return bRet;
5873 }
5874}
5875
5876/*
5877** Advance iterator pIter until it points to a value equal to or laster
5878** than the initial value of *piLast. If this means the iterator points
5879** to a value laster than *piLast, update *piLast to the new lastest value.
5880**
5881** If the iterator reaches EOF, set *pbEof to true before returning. If
5882** an error occurs, set *pRc to an error code. If either *pbEof or *pRc
5883** are set, return a non-zero value. Otherwise, return zero.
5884*/
5885static int fts5ExprAdvanceto(
5886 Fts5IndexIter *pIter, /* Iterator to advance */
5887 int bDesc, /* True if iterator is "rowid DESC" */
5888 i64 *piLast, /* IN/OUT: Lastest rowid seen so far */
5889 int *pRc, /* OUT: Error code */
5890 int *pbEof /* OUT: Set to true if EOF */
5891){
5892 i64 iLast = *piLast;
5893 i64 iRowid;
5894
5895 iRowid = pIter->iRowid;
5896 if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
5897 int rc = sqlite3Fts5IterNextFrom(pIter, iLast);
5898 if( rc || sqlite3Fts5IterEof(pIter) ){
5899 *pRc = rc;
5900 *pbEof = 1;
5901 return 1;
5902 }
5903 iRowid = pIter->iRowid;
5904 assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) );
5905 }
5906 *piLast = iRowid;
5907
5908 return 0;
5909}
5910
5911static int fts5ExprSynonymAdvanceto(
5912 Fts5ExprTerm *pTerm, /* Term iterator to advance */
5913 int bDesc, /* True if iterator is "rowid DESC" */
5914 i64 *piLast, /* IN/OUT: Lastest rowid seen so far */
5915 int *pRc /* OUT: Error code */
5916){
5917 int rc = SQLITE_OK;
5918 i64 iLast = *piLast;
5919 Fts5ExprTerm *p;
5920 int bEof = 0;
5921
5922 for(p=pTerm; rc==SQLITE_OK && p; p=p->pSynonym){
5923 if( sqlite3Fts5IterEof(p->pIter)==0 ){
5924 i64 iRowid = p->pIter->iRowid;
5925 if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
5926 rc = sqlite3Fts5IterNextFrom(p->pIter, iLast);
5927 }
5928 }
5929 }
5930
5931 if( rc!=SQLITE_OK ){
5932 *pRc = rc;
5933 bEof = 1;
5934 }else{
5935 *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof);
5936 }
5937 return bEof;
5938}
5939
5940
5941static int fts5ExprNearTest(
5942 int *pRc,
5943 Fts5Expr *pExpr, /* Expression that pNear is a part of */
5944 Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */
5945){
5946 Fts5ExprNearset *pNear = pNode->pNear;
5947 int rc = *pRc;
5948
5949 if( pExpr->pConfig->eDetail!=FTS5_DETAIL_FULL ){
5950 Fts5ExprTerm *pTerm;
5951 Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
5952 pPhrase->poslist.n = 0;
5953 for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){
5954 Fts5IndexIter *pIter = pTerm->pIter;
5955 if( sqlite3Fts5IterEof(pIter)==0 ){
5956 if( pIter->iRowid==pNode->iRowid && pIter->nData>0 ){
5957 pPhrase->poslist.n = 1;
5958 }
5959 }
5960 }
5961 return pPhrase->poslist.n;
5962 }else{
5963 int i;
5964
5965 /* Check that each phrase in the nearset matches the current row.
5966 ** Populate the pPhrase->poslist buffers at the same time. If any
5967 ** phrase is not a match, break out of the loop early. */
5968 for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
5969 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
5970 if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym
5971 || pNear->pColset || pPhrase->aTerm[0].bFirst
5972 ){
5973 int bMatch = 0;
5974 rc = fts5ExprPhraseIsMatch(pNode, pPhrase, &bMatch);
5975 if( bMatch==0 ) break;
5976 }else{
5977 Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
5978 fts5BufferSet(&rc, &pPhrase->poslist, pIter->nData, pIter->pData);
5979 }
5980 }
5981
5982 *pRc = rc;
5983 if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){
5984 return 1;
5985 }
5986 return 0;
5987 }
5988}
5989
5990
5991/*
5992** Initialize all term iterators in the pNear object. If any term is found
5993** to match no documents at all, return immediately without initializing any
5994** further iterators.
5995**
5996** If an error occurs, return an SQLite error code. Otherwise, return
5997** SQLITE_OK. It is not considered an error if some term matches zero
5998** documents.
5999*/
6000static int fts5ExprNearInitAll(
6001 Fts5Expr *pExpr,
6002 Fts5ExprNode *pNode
6003){
6004 Fts5ExprNearset *pNear = pNode->pNear;
6005 int i;
6006
6007 assert( pNode->bNomatch==0 );
6008 for(i=0; i<pNear->nPhrase; i++){
6009 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
6010 if( pPhrase->nTerm==0 ){
6011 pNode->bEof = 1;
6012 return SQLITE_OK;
6013 }else{
6014 int j;
6015 for(j=0; j<pPhrase->nTerm; j++){
6016 Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
6017 Fts5ExprTerm *p;
6018 int bHit = 0;
6019
6020 for(p=pTerm; p; p=p->pSynonym){
6021 int rc;
6022 if( p->pIter ){
6023 sqlite3Fts5IterClose(p->pIter);
6024 p->pIter = 0;
6025 }
6026 rc = sqlite3Fts5IndexQuery(
6027 pExpr->pIndex, p->zTerm, (int)strlen(p->zTerm),
6028 (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
6029 (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
6030 pNear->pColset,
6031 &p->pIter
6032 );
6033 assert( (rc==SQLITE_OK)==(p->pIter!=0) );
6034 if( rc!=SQLITE_OK ) return rc;
6035 if( 0==sqlite3Fts5IterEof(p->pIter) ){
6036 bHit = 1;
6037 }
6038 }
6039
6040 if( bHit==0 ){
6041 pNode->bEof = 1;
6042 return SQLITE_OK;
6043 }
6044 }
6045 }
6046 }
6047
6048 pNode->bEof = 0;
6049 return SQLITE_OK;
6050}
6051
6052/*
6053** If pExpr is an ASC iterator, this function returns a value with the
6054** same sign as:
6055**
6056** (iLhs - iRhs)
6057**
6058** Otherwise, if this is a DESC iterator, the opposite is returned:
6059**
6060** (iRhs - iLhs)
6061*/
6062static int fts5RowidCmp(
6063 Fts5Expr *pExpr,
6064 i64 iLhs,
6065 i64 iRhs
6066){
6067 assert( pExpr->bDesc==0 || pExpr->bDesc==1 );
6068 if( pExpr->bDesc==0 ){
6069 if( iLhs<iRhs ) return -1;
6070 return (iLhs > iRhs);
6071 }else{
6072 if( iLhs>iRhs ) return -1;
6073 return (iLhs < iRhs);
6074 }
6075}
6076
6077static void fts5ExprSetEof(Fts5ExprNode *pNode){
6078 int i;
6079 pNode->bEof = 1;
6080 pNode->bNomatch = 0;
6081 for(i=0; i<pNode->nChild; i++){
6082 fts5ExprSetEof(pNode->apChild[i]);
6083 }
6084}
6085
6086static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){
6087 if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){
6088 Fts5ExprNearset *pNear = pNode->pNear;
6089 int i;
6090 for(i=0; i<pNear->nPhrase; i++){
6091 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
6092 pPhrase->poslist.n = 0;
6093 }
6094 }else{
6095 int i;
6096 for(i=0; i<pNode->nChild; i++){
6097 fts5ExprNodeZeroPoslist(pNode->apChild[i]);
6098 }
6099 }
6100}
6101
6102
6103
6104/*
6105** Compare the values currently indicated by the two nodes as follows:
6106**
6107** res = (*p1) - (*p2)
6108**
6109** Nodes that point to values that come later in the iteration order are
6110** considered to be larger. Nodes at EOF are the largest of all.
6111**
6112** This means that if the iteration order is ASC, then numerically larger
6113** rowids are considered larger. Or if it is the default DESC, numerically
6114** smaller rowids are larger.
6115*/
6116static int fts5NodeCompare(
6117 Fts5Expr *pExpr,
6118 Fts5ExprNode *p1,
6119 Fts5ExprNode *p2
6120){
6121 if( p2->bEof ) return -1;
6122 if( p1->bEof ) return +1;
6123 return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid);
6124}
6125
6126/*
6127** All individual term iterators in pNear are guaranteed to be valid when
6128** this function is called. This function checks if all term iterators
6129** point to the same rowid, and if not, advances them until they do.
6130** If an EOF is reached before this happens, *pbEof is set to true before
6131** returning.
6132**
6133** SQLITE_OK is returned if an error occurs, or an SQLite error code
6134** otherwise. It is not considered an error code if an iterator reaches
6135** EOF.
6136*/
6137static int fts5ExprNodeTest_STRING(
6138 Fts5Expr *pExpr, /* Expression pPhrase belongs to */
6139 Fts5ExprNode *pNode
6140){
6141 Fts5ExprNearset *pNear = pNode->pNear;
6142 Fts5ExprPhrase *pLeft = pNear->apPhrase[0];
6143 int rc = SQLITE_OK;
6144 i64 iLast; /* Lastest rowid any iterator points to */
6145 int i, j; /* Phrase and token index, respectively */
6146 int bMatch; /* True if all terms are at the same rowid */
6147 const int bDesc = pExpr->bDesc;
6148
6149 /* Check that this node should not be FTS5_TERM */
6150 assert( pNear->nPhrase>1
6151 || pNear->apPhrase[0]->nTerm>1
6152 || pNear->apPhrase[0]->aTerm[0].pSynonym
6153 || pNear->apPhrase[0]->aTerm[0].bFirst
6154 );
6155
6156 /* Initialize iLast, the "lastest" rowid any iterator points to. If the
6157 ** iterator skips through rowids in the default ascending order, this means
6158 ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it
6159 ** means the minimum rowid. */
6160 if( pLeft->aTerm[0].pSynonym ){
6161 iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0);
6162 }else{
6163 iLast = pLeft->aTerm[0].pIter->iRowid;
6164 }
6165
6166 do {
6167 bMatch = 1;
6168 for(i=0; i<pNear->nPhrase; i++){
6169 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
6170 for(j=0; j<pPhrase->nTerm; j++){
6171 Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
6172 if( pTerm->pSynonym ){
6173 i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0);
6174 if( iRowid==iLast ) continue;
6175 bMatch = 0;
6176 if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){
6177 pNode->bNomatch = 0;
6178 pNode->bEof = 1;
6179 return rc;
6180 }
6181 }else{
6182 Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
6183 if( pIter->iRowid==iLast || pIter->bEof ) continue;
6184 bMatch = 0;
6185 if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){
6186 return rc;
6187 }
6188 }
6189 }
6190 }
6191 }while( bMatch==0 );
6192
6193 pNode->iRowid = iLast;
6194 pNode->bNomatch = ((0==fts5ExprNearTest(&rc, pExpr, pNode)) && rc==SQLITE_OK);
6195 assert( pNode->bEof==0 || pNode->bNomatch==0 );
6196
6197 return rc;
6198}
6199
6200/*
6201** Advance the first term iterator in the first phrase of pNear. Set output
6202** variable *pbEof to true if it reaches EOF or if an error occurs.
6203**
6204** Return SQLITE_OK if successful, or an SQLite error code if an error
6205** occurs.
6206*/
6207static int fts5ExprNodeNext_STRING(
6208 Fts5Expr *pExpr, /* Expression pPhrase belongs to */
6209 Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */
6210 int bFromValid,
6211 i64 iFrom
6212){
6213 Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0];
6214 int rc = SQLITE_OK;
6215
6216 pNode->bNomatch = 0;
6217 if( pTerm->pSynonym ){
6218 int bEof = 1;
6219 Fts5ExprTerm *p;
6220
6221 /* Find the firstest rowid any synonym points to. */
6222 i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0);
6223
6224 /* Advance each iterator that currently points to iRowid. Or, if iFrom
6225 ** is valid - each iterator that points to a rowid before iFrom. */
6226 for(p=pTerm; p; p=p->pSynonym){
6227 if( sqlite3Fts5IterEof(p->pIter)==0 ){
6228 i64 ii = p->pIter->iRowid;
6229 if( ii==iRowid
6230 || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc)
6231 ){
6232 if( bFromValid ){
6233 rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom);
6234 }else{
6235 rc = sqlite3Fts5IterNext(p->pIter);
6236 }
6237 if( rc!=SQLITE_OK ) break;
6238 if( sqlite3Fts5IterEof(p->pIter)==0 ){
6239 bEof = 0;
6240 }
6241 }else{
6242 bEof = 0;
6243 }
6244 }
6245 }
6246
6247 /* Set the EOF flag if either all synonym iterators are at EOF or an
6248 ** error has occurred. */
6249 pNode->bEof = (rc || bEof);
6250 }else{
6251 Fts5IndexIter *pIter = pTerm->pIter;
6252
6253 assert( Fts5NodeIsString(pNode) );
6254 if( bFromValid ){
6255 rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
6256 }else{
6257 rc = sqlite3Fts5IterNext(pIter);
6258 }
6259
6260 pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));
6261 }
6262
6263 if( pNode->bEof==0 ){
6264 assert( rc==SQLITE_OK );
6265 rc = fts5ExprNodeTest_STRING(pExpr, pNode);
6266 }
6267
6268 return rc;
6269}
6270
6271
6272static int fts5ExprNodeTest_TERM(
6273 Fts5Expr *pExpr, /* Expression that pNear is a part of */
6274 Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */
6275){
6276 /* As this "NEAR" object is actually a single phrase that consists
6277 ** of a single term only, grab pointers into the poslist managed by the
6278 ** fts5_index.c iterator object. This is much faster than synthesizing
6279 ** a new poslist the way we have to for more complicated phrase or NEAR
6280 ** expressions. */
6281 Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0];
6282 Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
6283
6284 assert( pNode->eType==FTS5_TERM );
6285 assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 );
6286 assert( pPhrase->aTerm[0].pSynonym==0 );
6287
6288 pPhrase->poslist.n = pIter->nData;
6289 if( pExpr->pConfig->eDetail==FTS5_DETAIL_FULL ){
6290 pPhrase->poslist.p = (u8*)pIter->pData;
6291 }
6292 pNode->iRowid = pIter->iRowid;
6293 pNode->bNomatch = (pPhrase->poslist.n==0);
6294 return SQLITE_OK;
6295}
6296
6297/*
6298** xNext() method for a node of type FTS5_TERM.
6299*/
6300static int fts5ExprNodeNext_TERM(
6301 Fts5Expr *pExpr,
6302 Fts5ExprNode *pNode,
6303 int bFromValid,
6304 i64 iFrom
6305){
6306 int rc;
6307 Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
6308
6309 assert( pNode->bEof==0 );
6310 if( bFromValid ){
6311 rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
6312 }else{
6313 rc = sqlite3Fts5IterNext(pIter);
6314 }
6315 if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){
6316 rc = fts5ExprNodeTest_TERM(pExpr, pNode);
6317 }else{
6318 pNode->bEof = 1;
6319 pNode->bNomatch = 0;
6320 }
6321 return rc;
6322}
6323
6324static void fts5ExprNodeTest_OR(
6325 Fts5Expr *pExpr, /* Expression of which pNode is a part */
6326 Fts5ExprNode *pNode /* Expression node to test */
6327){
6328 Fts5ExprNode *pNext = pNode->apChild[0];
6329 int i;
6330
6331 for(i=1; i<pNode->nChild; i++){
6332 Fts5ExprNode *pChild = pNode->apChild[i];
6333 int cmp = fts5NodeCompare(pExpr, pNext, pChild);
6334 if( cmp>0 || (cmp==0 && pChild->bNomatch==0) ){
6335 pNext = pChild;
6336 }
6337 }
6338 pNode->iRowid = pNext->iRowid;
6339 pNode->bEof = pNext->bEof;
6340 pNode->bNomatch = pNext->bNomatch;
6341}
6342
6343static int fts5ExprNodeNext_OR(
6344 Fts5Expr *pExpr,
6345 Fts5ExprNode *pNode,
6346 int bFromValid,
6347 i64 iFrom
6348){
6349 int i;
6350 i64 iLast = pNode->iRowid;
6351
6352 for(i=0; i<pNode->nChild; i++){
6353 Fts5ExprNode *p1 = pNode->apChild[i];
6354 assert( p1->bEof || fts5RowidCmp(pExpr, p1->iRowid, iLast)>=0 );
6355 if( p1->bEof==0 ){
6356 if( (p1->iRowid==iLast)
6357 || (bFromValid && fts5RowidCmp(pExpr, p1->iRowid, iFrom)<0)
6358 ){
6359 int rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom);
6360 if( rc!=SQLITE_OK ){
6361 pNode->bNomatch = 0;
6362 return rc;
6363 }
6364 }
6365 }
6366 }
6367
6368 fts5ExprNodeTest_OR(pExpr, pNode);
6369 return SQLITE_OK;
6370}
6371
6372/*
6373** Argument pNode is an FTS5_AND node.
6374*/
6375static int fts5ExprNodeTest_AND(
6376 Fts5Expr *pExpr, /* Expression pPhrase belongs to */
6377 Fts5ExprNode *pAnd /* FTS5_AND node to advance */
6378){
6379 int iChild;
6380 i64 iLast = pAnd->iRowid;
6381 int rc = SQLITE_OK;
6382 int bMatch;
6383
6384 assert( pAnd->bEof==0 );
6385 do {
6386 pAnd->bNomatch = 0;
6387 bMatch = 1;
6388 for(iChild=0; iChild<pAnd->nChild; iChild++){
6389 Fts5ExprNode *pChild = pAnd->apChild[iChild];
6390 int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid);
6391 if( cmp>0 ){
6392 /* Advance pChild until it points to iLast or laster */
6393 rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast);
6394 if( rc!=SQLITE_OK ){
6395 pAnd->bNomatch = 0;
6396 return rc;
6397 }
6398 }
6399
6400 /* If the child node is now at EOF, so is the parent AND node. Otherwise,
6401 ** the child node is guaranteed to have advanced at least as far as
6402 ** rowid iLast. So if it is not at exactly iLast, pChild->iRowid is the
6403 ** new lastest rowid seen so far. */
6404 assert( pChild->bEof || fts5RowidCmp(pExpr, iLast, pChild->iRowid)<=0 );
6405 if( pChild->bEof ){
6406 fts5ExprSetEof(pAnd);
6407 bMatch = 1;
6408 break;
6409 }else if( iLast!=pChild->iRowid ){
6410 bMatch = 0;
6411 iLast = pChild->iRowid;
6412 }
6413
6414 if( pChild->bNomatch ){
6415 pAnd->bNomatch = 1;
6416 }
6417 }
6418 }while( bMatch==0 );
6419
6420 if( pAnd->bNomatch && pAnd!=pExpr->pRoot ){
6421 fts5ExprNodeZeroPoslist(pAnd);
6422 }
6423 pAnd->iRowid = iLast;
6424 return SQLITE_OK;
6425}
6426
6427static int fts5ExprNodeNext_AND(
6428 Fts5Expr *pExpr,
6429 Fts5ExprNode *pNode,
6430 int bFromValid,
6431 i64 iFrom
6432){
6433 int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom);
6434 if( rc==SQLITE_OK ){
6435 rc = fts5ExprNodeTest_AND(pExpr, pNode);
6436 }else{
6437 pNode->bNomatch = 0;
6438 }
6439 return rc;
6440}
6441
6442static int fts5ExprNodeTest_NOT(
6443 Fts5Expr *pExpr, /* Expression pPhrase belongs to */
6444 Fts5ExprNode *pNode /* FTS5_NOT node to advance */
6445){
6446 int rc = SQLITE_OK;
6447 Fts5ExprNode *p1 = pNode->apChild[0];
6448 Fts5ExprNode *p2 = pNode->apChild[1];
6449 assert( pNode->nChild==2 );
6450
6451 while( rc==SQLITE_OK && p1->bEof==0 ){
6452 int cmp = fts5NodeCompare(pExpr, p1, p2);
6453 if( cmp>0 ){
6454 rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid);
6455 cmp = fts5NodeCompare(pExpr, p1, p2);
6456 }
6457 assert( rc!=SQLITE_OK || cmp<=0 );
6458 if( cmp || p2->bNomatch ) break;
6459 rc = fts5ExprNodeNext(pExpr, p1, 0, 0);
6460 }
6461 pNode->bEof = p1->bEof;
6462 pNode->bNomatch = p1->bNomatch;
6463 pNode->iRowid = p1->iRowid;
6464 if( p1->bEof ){
6465 fts5ExprNodeZeroPoslist(p2);
6466 }
6467 return rc;
6468}
6469
6470static int fts5ExprNodeNext_NOT(
6471 Fts5Expr *pExpr,
6472 Fts5ExprNode *pNode,
6473 int bFromValid,
6474 i64 iFrom
6475){
6476 int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom);
6477 if( rc==SQLITE_OK ){
6478 rc = fts5ExprNodeTest_NOT(pExpr, pNode);
6479 }
6480 if( rc!=SQLITE_OK ){
6481 pNode->bNomatch = 0;
6482 }
6483 return rc;
6484}
6485
6486/*
6487** If pNode currently points to a match, this function returns SQLITE_OK
6488** without modifying it. Otherwise, pNode is advanced until it does point
6489** to a match or EOF is reached.
6490*/
6491static int fts5ExprNodeTest(
6492 Fts5Expr *pExpr, /* Expression of which pNode is a part */
6493 Fts5ExprNode *pNode /* Expression node to test */
6494){
6495 int rc = SQLITE_OK;
6496 if( pNode->bEof==0 ){
6497 switch( pNode->eType ){
6498
6499 case FTS5_STRING: {
6500 rc = fts5ExprNodeTest_STRING(pExpr, pNode);
6501 break;
6502 }
6503
6504 case FTS5_TERM: {
6505 rc = fts5ExprNodeTest_TERM(pExpr, pNode);
6506 break;
6507 }
6508
6509 case FTS5_AND: {
6510 rc = fts5ExprNodeTest_AND(pExpr, pNode);
6511 break;
6512 }
6513
6514 case FTS5_OR: {
6515 fts5ExprNodeTest_OR(pExpr, pNode);
6516 break;
6517 }
6518
6519 default: assert( pNode->eType==FTS5_NOT ); {
6520 rc = fts5ExprNodeTest_NOT(pExpr, pNode);
6521 break;
6522 }
6523 }
6524 }
6525 return rc;
6526}
6527
6528
6529/*
6530** Set node pNode, which is part of expression pExpr, to point to the first
6531** match. If there are no matches, set the Node.bEof flag to indicate EOF.
6532**
6533** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise.
6534** It is not an error if there are no matches.
6535*/
6536static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){
6537 int rc = SQLITE_OK;
6538 pNode->bEof = 0;
6539 pNode->bNomatch = 0;
6540
6541 if( Fts5NodeIsString(pNode) ){
6542 /* Initialize all term iterators in the NEAR object. */
6543 rc = fts5ExprNearInitAll(pExpr, pNode);
6544 }else if( pNode->xNext==0 ){
6545 pNode->bEof = 1;
6546 }else{
6547 int i;
6548 int nEof = 0;
6549 for(i=0; i<pNode->nChild && rc==SQLITE_OK; i++){
6550 Fts5ExprNode *pChild = pNode->apChild[i];
6551 rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]);
6552 assert( pChild->bEof==0 || pChild->bEof==1 );
6553 nEof += pChild->bEof;
6554 }
6555 pNode->iRowid = pNode->apChild[0]->iRowid;
6556
6557 switch( pNode->eType ){
6558 case FTS5_AND:
6559 if( nEof>0 ) fts5ExprSetEof(pNode);
6560 break;
6561
6562 case FTS5_OR:
6563 if( pNode->nChild==nEof ) fts5ExprSetEof(pNode);
6564 break;
6565
6566 default:
6567 assert( pNode->eType==FTS5_NOT );
6568 pNode->bEof = pNode->apChild[0]->bEof;
6569 break;
6570 }
6571 }
6572
6573 if( rc==SQLITE_OK ){
6574 rc = fts5ExprNodeTest(pExpr, pNode);
6575 }
6576 return rc;
6577}
6578
6579
6580/*
6581** Begin iterating through the set of documents in index pIdx matched by
6582** the MATCH expression passed as the first argument. If the "bDesc"
6583** parameter is passed a non-zero value, iteration is in descending rowid
6584** order. Or, if it is zero, in ascending order.
6585**
6586** If iterating in ascending rowid order (bDesc==0), the first document
6587** visited is that with the smallest rowid that is larger than or equal
6588** to parameter iFirst. Or, if iterating in ascending order (bDesc==1),
6589** then the first document visited must have a rowid smaller than or
6590** equal to iFirst.
6591**
6592** Return SQLITE_OK if successful, or an SQLite error code otherwise. It
6593** is not considered an error if the query does not match any documents.
6594*/
6595static int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bDesc){
6596 Fts5ExprNode *pRoot = p->pRoot;
6597 int rc; /* Return code */
6598
6599 p->pIndex = pIdx;
6600 p->bDesc = bDesc;
6601 rc = fts5ExprNodeFirst(p, pRoot);
6602
6603 /* If not at EOF but the current rowid occurs earlier than iFirst in
6604 ** the iteration order, move to document iFirst or later. */
6605 if( rc==SQLITE_OK
6606 && 0==pRoot->bEof
6607 && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0
6608 ){
6609 rc = fts5ExprNodeNext(p, pRoot, 1, iFirst);
6610 }
6611
6612 /* If the iterator is not at a real match, skip forward until it is. */
6613 while( pRoot->bNomatch && rc==SQLITE_OK ){
6614 assert( pRoot->bEof==0 );
6615 rc = fts5ExprNodeNext(p, pRoot, 0, 0);
6616 }
6617 return rc;
6618}
6619
6620/*
6621** Move to the next document
6622**
6623** Return SQLITE_OK if successful, or an SQLite error code otherwise. It
6624** is not considered an error if the query does not match any documents.
6625*/
6626static int sqlite3Fts5ExprNext(Fts5Expr *p, i64 iLast){
6627 int rc;
6628 Fts5ExprNode *pRoot = p->pRoot;
6629 assert( pRoot->bEof==0 && pRoot->bNomatch==0 );
6630 do {
6631 rc = fts5ExprNodeNext(p, pRoot, 0, 0);
6632 assert( pRoot->bNomatch==0 || (rc==SQLITE_OK && pRoot->bEof==0) );
6633 }while( pRoot->bNomatch );
6634 if( fts5RowidCmp(p, pRoot->iRowid, iLast)>0 ){
6635 pRoot->bEof = 1;
6636 }
6637 return rc;
6638}
6639
6640static int sqlite3Fts5ExprEof(Fts5Expr *p){
6641 return p->pRoot->bEof;
6642}
6643
6644static i64 sqlite3Fts5ExprRowid(Fts5Expr *p){
6645 return p->pRoot->iRowid;
6646}
6647
6648static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){
6649 int rc = SQLITE_OK;
6650 *pz = sqlite3Fts5Strndup(&rc, pToken->p, pToken->n);
6651 return rc;
6652}
6653
6654/*
6655** Free the phrase object passed as the only argument.
6656*/
6657static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
6658 if( pPhrase ){
6659 int i;
6660 for(i=0; i<pPhrase->nTerm; i++){
6661 Fts5ExprTerm *pSyn;
6662 Fts5ExprTerm *pNext;
6663 Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
6664 sqlite3_free(pTerm->zTerm);
6665 sqlite3Fts5IterClose(pTerm->pIter);
6666 for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){
6667 pNext = pSyn->pSynonym;
6668 sqlite3Fts5IterClose(pSyn->pIter);
6669 fts5BufferFree((Fts5Buffer*)&pSyn[1]);
6670 sqlite3_free(pSyn);
6671 }
6672 }
6673 if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist);
6674 sqlite3_free(pPhrase);
6675 }
6676}
6677
6678/*
6679** Set the "bFirst" flag on the first token of the phrase passed as the
6680** only argument.
6681*/
6682static void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase *pPhrase){
6683 if( pPhrase && pPhrase->nTerm ){
6684 pPhrase->aTerm[0].bFirst = 1;
6685 }
6686}
6687
6688/*
6689** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated
6690** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is
6691** appended to it and the results returned.
6692**
6693** If an OOM error occurs, both the pNear and pPhrase objects are freed and
6694** NULL returned.
6695*/
6696static Fts5ExprNearset *sqlite3Fts5ParseNearset(
6697 Fts5Parse *pParse, /* Parse context */
6698 Fts5ExprNearset *pNear, /* Existing nearset, or NULL */
6699 Fts5ExprPhrase *pPhrase /* Recently parsed phrase */
6700){
6701 const int SZALLOC = 8;
6702 Fts5ExprNearset *pRet = 0;
6703
6704 if( pParse->rc==SQLITE_OK ){
6705 if( pPhrase==0 ){
6706 return pNear;
6707 }
6708 if( pNear==0 ){
6709 sqlite3_int64 nByte;
6710 nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*);
6711 pRet = sqlite3_malloc64(nByte);
6712 if( pRet==0 ){
6713 pParse->rc = SQLITE_NOMEM;
6714 }else{
6715 memset(pRet, 0, (size_t)nByte);
6716 }
6717 }else if( (pNear->nPhrase % SZALLOC)==0 ){
6718 int nNew = pNear->nPhrase + SZALLOC;
6719 sqlite3_int64 nByte;
6720
6721 nByte = sizeof(Fts5ExprNearset) + nNew * sizeof(Fts5ExprPhrase*);
6722 pRet = (Fts5ExprNearset*)sqlite3_realloc64(pNear, nByte);
6723 if( pRet==0 ){
6724 pParse->rc = SQLITE_NOMEM;
6725 }
6726 }else{
6727 pRet = pNear;
6728 }
6729 }
6730
6731 if( pRet==0 ){
6732 assert( pParse->rc!=SQLITE_OK );
6733 sqlite3Fts5ParseNearsetFree(pNear);
6734 sqlite3Fts5ParsePhraseFree(pPhrase);
6735 }else{
6736 if( pRet->nPhrase>0 ){
6737 Fts5ExprPhrase *pLast = pRet->apPhrase[pRet->nPhrase-1];
6738 assert( pParse!=0 );
6739 assert( pParse->apPhrase!=0 );
6740 assert( pParse->nPhrase>=2 );
6741 assert( pLast==pParse->apPhrase[pParse->nPhrase-2] );
6742 if( pPhrase->nTerm==0 ){
6743 fts5ExprPhraseFree(pPhrase);
6744 pRet->nPhrase--;
6745 pParse->nPhrase--;
6746 pPhrase = pLast;
6747 }else if( pLast->nTerm==0 ){
6748 fts5ExprPhraseFree(pLast);
6749 pParse->apPhrase[pParse->nPhrase-2] = pPhrase;
6750 pParse->nPhrase--;
6751 pRet->nPhrase--;
6752 }
6753 }
6754 pRet->apPhrase[pRet->nPhrase++] = pPhrase;
6755 }
6756 return pRet;
6757}
6758
6759typedef struct TokenCtx TokenCtx;
6760struct TokenCtx {
6761 Fts5ExprPhrase *pPhrase;
6762 int rc;
6763};
6764
6765/*
6766** Callback for tokenizing terms used by ParseTerm().
6767*/
6768static int fts5ParseTokenize(
6769 void *pContext, /* Pointer to Fts5InsertCtx object */
6770 int tflags, /* Mask of FTS5_TOKEN_* flags */
6771 const char *pToken, /* Buffer containing token */
6772 int nToken, /* Size of token in bytes */
6773 int iUnused1, /* Start offset of token */
6774 int iUnused2 /* End offset of token */
6775){
6776 int rc = SQLITE_OK;
6777 const int SZALLOC = 8;
6778 TokenCtx *pCtx = (TokenCtx*)pContext;
6779 Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
6780
6781 UNUSED_PARAM2(iUnused1, iUnused2);
6782
6783 /* If an error has already occurred, this is a no-op */
6784 if( pCtx->rc!=SQLITE_OK ) return pCtx->rc;
6785 if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
6786
6787 if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED) ){
6788 Fts5ExprTerm *pSyn;
6789 sqlite3_int64 nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1;
6790 pSyn = (Fts5ExprTerm*)sqlite3_malloc64(nByte);
6791 if( pSyn==0 ){
6792 rc = SQLITE_NOMEM;
6793 }else{
6794 memset(pSyn, 0, (size_t)nByte);
6795 pSyn->zTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer);
6796 memcpy(pSyn->zTerm, pToken, nToken);
6797 pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
6798 pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
6799 }
6800 }else{
6801 Fts5ExprTerm *pTerm;
6802 if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
6803 Fts5ExprPhrase *pNew;
6804 int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);
6805
6806 pNew = (Fts5ExprPhrase*)sqlite3_realloc64(pPhrase,
6807 sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
6808 );
6809 if( pNew==0 ){
6810 rc = SQLITE_NOMEM;
6811 }else{
6812 if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
6813 pCtx->pPhrase = pPhrase = pNew;
6814 pNew->nTerm = nNew - SZALLOC;
6815 }
6816 }
6817
6818 if( rc==SQLITE_OK ){
6819 pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
6820 memset(pTerm, 0, sizeof(Fts5ExprTerm));
6821 pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
6822 }
6823 }
6824
6825 pCtx->rc = rc;
6826 return rc;
6827}
6828
6829
6830/*
6831** Free the phrase object passed as the only argument.
6832*/
6833static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){
6834 fts5ExprPhraseFree(pPhrase);
6835}
6836
6837/*
6838** Free the phrase object passed as the second argument.
6839*/
6840static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){
6841 if( pNear ){
6842 int i;
6843 for(i=0; i<pNear->nPhrase; i++){
6844 fts5ExprPhraseFree(pNear->apPhrase[i]);
6845 }
6846 sqlite3_free(pNear->pColset);
6847 sqlite3_free(pNear);
6848 }
6849}
6850
6851static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){
6852 assert( pParse->pExpr==0 );
6853 pParse->pExpr = p;
6854}
6855
6856static int parseGrowPhraseArray(Fts5Parse *pParse){
6857 if( (pParse->nPhrase % 8)==0 ){
6858 sqlite3_int64 nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8);
6859 Fts5ExprPhrase **apNew;
6860 apNew = (Fts5ExprPhrase**)sqlite3_realloc64(pParse->apPhrase, nByte);
6861 if( apNew==0 ){
6862 pParse->rc = SQLITE_NOMEM;
6863 return SQLITE_NOMEM;
6864 }
6865 pParse->apPhrase = apNew;
6866 }
6867 return SQLITE_OK;
6868}
6869
6870/*
6871** This function is called by the parser to process a string token. The
6872** string may or may not be quoted. In any case it is tokenized and a
6873** phrase object consisting of all tokens returned.
6874*/
6875static Fts5ExprPhrase *sqlite3Fts5ParseTerm(
6876 Fts5Parse *pParse, /* Parse context */
6877 Fts5ExprPhrase *pAppend, /* Phrase to append to */
6878 Fts5Token *pToken, /* String to tokenize */
6879 int bPrefix /* True if there is a trailing "*" */
6880){
6881 Fts5Config *pConfig = pParse->pConfig;
6882 TokenCtx sCtx; /* Context object passed to callback */
6883 int rc; /* Tokenize return code */
6884 char *z = 0;
6885
6886 memset(&sCtx, 0, sizeof(TokenCtx));
6887 sCtx.pPhrase = pAppend;
6888
6889 rc = fts5ParseStringFromToken(pToken, &z);
6890 if( rc==SQLITE_OK ){
6891 int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_PREFIX : 0);
6892 int n;
6893 sqlite3Fts5Dequote(z);
6894 n = (int)strlen(z);
6895 rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize);
6896 }
6897 sqlite3_free(z);
6898 if( rc || (rc = sCtx.rc) ){
6899 pParse->rc = rc;
6900 fts5ExprPhraseFree(sCtx.pPhrase);
6901 sCtx.pPhrase = 0;
6902 }else{
6903
6904 if( pAppend==0 ){
6905 if( parseGrowPhraseArray(pParse) ){
6906 fts5ExprPhraseFree(sCtx.pPhrase);
6907 return 0;
6908 }
6909 pParse->nPhrase++;
6910 }
6911
6912 if( sCtx.pPhrase==0 ){
6913 /* This happens when parsing a token or quoted phrase that contains
6914 ** no token characters at all. (e.g ... MATCH '""'). */
6915 sCtx.pPhrase = sqlite3Fts5MallocZero(&pParse->rc, sizeof(Fts5ExprPhrase));
6916 }else if( sCtx.pPhrase->nTerm ){
6917 sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = (u8)bPrefix;
6918 }
6919 pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase;
6920 }
6921
6922 return sCtx.pPhrase;
6923}
6924
6925/*
6926** Create a new FTS5 expression by cloning phrase iPhrase of the
6927** expression passed as the second argument.
6928*/
6929static int sqlite3Fts5ExprClonePhrase(
6930 Fts5Expr *pExpr,
6931 int iPhrase,
6932 Fts5Expr **ppNew
6933){
6934 int rc = SQLITE_OK; /* Return code */
6935 Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */
6936 Fts5Expr *pNew = 0; /* Expression to return via *ppNew */
6937 TokenCtx sCtx = {0,0}; /* Context object for fts5ParseTokenize */
6938
6939 pOrig = pExpr->apExprPhrase[iPhrase];
6940 pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
6941 if( rc==SQLITE_OK ){
6942 pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc,
6943 sizeof(Fts5ExprPhrase*));
6944 }
6945 if( rc==SQLITE_OK ){
6946 pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc,
6947 sizeof(Fts5ExprNode));
6948 }
6949 if( rc==SQLITE_OK ){
6950 pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc,
6951 sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*));
6952 }
6953 if( rc==SQLITE_OK ){
6954 Fts5Colset *pColsetOrig = pOrig->pNode->pNear->pColset;
6955 if( pColsetOrig ){
6956 sqlite3_int64 nByte;
6957 Fts5Colset *pColset;
6958 nByte = sizeof(Fts5Colset) + (pColsetOrig->nCol-1) * sizeof(int);
6959 pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&rc, nByte);
6960 if( pColset ){
6961 memcpy(pColset, pColsetOrig, (size_t)nByte);
6962 }
6963 pNew->pRoot->pNear->pColset = pColset;
6964 }
6965 }
6966
6967 if( pOrig->nTerm ){
6968 int i; /* Used to iterate through phrase terms */
6969 for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){
6970 int tflags = 0;
6971 Fts5ExprTerm *p;
6972 for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){
6973 const char *zTerm = p->zTerm;
6974 rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, (int)strlen(zTerm),
6975 0, 0);
6976 tflags = FTS5_TOKEN_COLOCATED;
6977 }
6978 if( rc==SQLITE_OK ){
6979 sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
6980 sCtx.pPhrase->aTerm[i].bFirst = pOrig->aTerm[i].bFirst;
6981 }
6982 }
6983 }else{
6984 /* This happens when parsing a token or quoted phrase that contains
6985 ** no token characters at all. (e.g ... MATCH '""'). */
6986 sCtx.pPhrase = sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprPhrase));
6987 }
6988
6989 if( rc==SQLITE_OK && ALWAYS(sCtx.pPhrase) ){
6990 /* All the allocations succeeded. Put the expression object together. */
6991 pNew->pIndex = pExpr->pIndex;
6992 pNew->pConfig = pExpr->pConfig;
6993 pNew->nPhrase = 1;
6994 pNew->apExprPhrase[0] = sCtx.pPhrase;
6995 pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase;
6996 pNew->pRoot->pNear->nPhrase = 1;
6997 sCtx.pPhrase->pNode = pNew->pRoot;
6998
6999 if( pOrig->nTerm==1
7000 && pOrig->aTerm[0].pSynonym==0
7001 && pOrig->aTerm[0].bFirst==0
7002 ){
7003 pNew->pRoot->eType = FTS5_TERM;
7004 pNew->pRoot->xNext = fts5ExprNodeNext_TERM;
7005 }else{
7006 pNew->pRoot->eType = FTS5_STRING;
7007 pNew->pRoot->xNext = fts5ExprNodeNext_STRING;
7008 }
7009 }else{
7010 sqlite3Fts5ExprFree(pNew);
7011 fts5ExprPhraseFree(sCtx.pPhrase);
7012 pNew = 0;
7013 }
7014
7015 *ppNew = pNew;
7016 return rc;
7017}
7018
7019
7020/*
7021** Token pTok has appeared in a MATCH expression where the NEAR operator
7022** is expected. If token pTok does not contain "NEAR", store an error
7023** in the pParse object.
7024*/
7025static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){
7026 if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){
7027 sqlite3Fts5ParseError(
7028 pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p
7029 );
7030 }
7031}
7032
7033static void sqlite3Fts5ParseSetDistance(
7034 Fts5Parse *pParse,
7035 Fts5ExprNearset *pNear,
7036 Fts5Token *p
7037){
7038 if( pNear ){
7039 int nNear = 0;
7040 int i;
7041 if( p->n ){
7042 for(i=0; i<p->n; i++){
7043 char c = (char)p->p[i];
7044 if( c<'0' || c>'9' ){
7045 sqlite3Fts5ParseError(
7046 pParse, "expected integer, got \"%.*s\"", p->n, p->p
7047 );
7048 return;
7049 }
7050 nNear = nNear * 10 + (p->p[i] - '0');
7051 }
7052 }else{
7053 nNear = FTS5_DEFAULT_NEARDIST;
7054 }
7055 pNear->nNear = nNear;
7056 }
7057}
7058
7059/*
7060** The second argument passed to this function may be NULL, or it may be
7061** an existing Fts5Colset object. This function returns a pointer to
7062** a new colset object containing the contents of (p) with new value column
7063** number iCol appended.
7064**
7065** If an OOM error occurs, store an error code in pParse and return NULL.
7066** The old colset object (if any) is not freed in this case.
7067*/
7068static Fts5Colset *fts5ParseColset(
7069 Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */
7070 Fts5Colset *p, /* Existing colset object */
7071 int iCol /* New column to add to colset object */
7072){
7073 int nCol = p ? p->nCol : 0; /* Num. columns already in colset object */
7074 Fts5Colset *pNew; /* New colset object to return */
7075
7076 assert( pParse->rc==SQLITE_OK );
7077 assert( iCol>=0 && iCol<pParse->pConfig->nCol );
7078
7079 pNew = sqlite3_realloc64(p, sizeof(Fts5Colset) + sizeof(int)*nCol);
7080 if( pNew==0 ){
7081 pParse->rc = SQLITE_NOMEM;
7082 }else{
7083 int *aiCol = pNew->aiCol;
7084 int i, j;
7085 for(i=0; i<nCol; i++){
7086 if( aiCol[i]==iCol ) return pNew;
7087 if( aiCol[i]>iCol ) break;
7088 }
7089 for(j=nCol; j>i; j--){
7090 aiCol[j] = aiCol[j-1];
7091 }
7092 aiCol[i] = iCol;
7093 pNew->nCol = nCol+1;
7094
7095#ifndef NDEBUG
7096 /* Check that the array is in order and contains no duplicate entries. */
7097 for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] );
7098#endif
7099 }
7100
7101 return pNew;
7102}
7103
7104/*
7105** Allocate and return an Fts5Colset object specifying the inverse of
7106** the colset passed as the second argument. Free the colset passed
7107** as the second argument before returning.
7108*/
7109static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse *pParse, Fts5Colset *p){
7110 Fts5Colset *pRet;
7111 int nCol = pParse->pConfig->nCol;
7112
7113 pRet = (Fts5Colset*)sqlite3Fts5MallocZero(&pParse->rc,
7114 sizeof(Fts5Colset) + sizeof(int)*nCol
7115 );
7116 if( pRet ){
7117 int i;
7118 int iOld = 0;
7119 for(i=0; i<nCol; i++){
7120 if( iOld>=p->nCol || p->aiCol[iOld]!=i ){
7121 pRet->aiCol[pRet->nCol++] = i;
7122 }else{
7123 iOld++;
7124 }
7125 }
7126 }
7127
7128 sqlite3_free(p);
7129 return pRet;
7130}
7131
7132static Fts5Colset *sqlite3Fts5ParseColset(
7133 Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */
7134 Fts5Colset *pColset, /* Existing colset object */
7135 Fts5Token *p
7136){
7137 Fts5Colset *pRet = 0;
7138 int iCol;
7139 char *z; /* Dequoted copy of token p */
7140
7141 z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n);
7142 if( pParse->rc==SQLITE_OK ){
7143 Fts5Config *pConfig = pParse->pConfig;
7144 sqlite3Fts5Dequote(z);
7145 for(iCol=0; iCol<pConfig->nCol; iCol++){
7146 if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ) break;
7147 }
7148 if( iCol==pConfig->nCol ){
7149 sqlite3Fts5ParseError(pParse, "no such column: %s", z);
7150 }else{
7151 pRet = fts5ParseColset(pParse, pColset, iCol);
7152 }
7153 sqlite3_free(z);
7154 }
7155
7156 if( pRet==0 ){
7157 assert( pParse->rc!=SQLITE_OK );
7158 sqlite3_free(pColset);
7159 }
7160
7161 return pRet;
7162}
7163
7164/*
7165** If argument pOrig is NULL, or if (*pRc) is set to anything other than
7166** SQLITE_OK when this function is called, NULL is returned.
7167**
7168** Otherwise, a copy of (*pOrig) is made into memory obtained from
7169** sqlite3Fts5MallocZero() and a pointer to it returned. If the allocation
7170** fails, (*pRc) is set to SQLITE_NOMEM and NULL is returned.
7171*/
7172static Fts5Colset *fts5CloneColset(int *pRc, Fts5Colset *pOrig){
7173 Fts5Colset *pRet;
7174 if( pOrig ){
7175 sqlite3_int64 nByte = sizeof(Fts5Colset) + (pOrig->nCol-1) * sizeof(int);
7176 pRet = (Fts5Colset*)sqlite3Fts5MallocZero(pRc, nByte);
7177 if( pRet ){
7178 memcpy(pRet, pOrig, (size_t)nByte);
7179 }
7180 }else{
7181 pRet = 0;
7182 }
7183 return pRet;
7184}
7185
7186/*
7187** Remove from colset pColset any columns that are not also in colset pMerge.
7188*/
7189static void fts5MergeColset(Fts5Colset *pColset, Fts5Colset *pMerge){
7190 int iIn = 0; /* Next input in pColset */
7191 int iMerge = 0; /* Next input in pMerge */
7192 int iOut = 0; /* Next output slot in pColset */
7193
7194 while( iIn<pColset->nCol && iMerge<pMerge->nCol ){
7195 int iDiff = pColset->aiCol[iIn] - pMerge->aiCol[iMerge];
7196 if( iDiff==0 ){
7197 pColset->aiCol[iOut++] = pMerge->aiCol[iMerge];
7198 iMerge++;
7199 iIn++;
7200 }else if( iDiff>0 ){
7201 iMerge++;
7202 }else{
7203 iIn++;
7204 }
7205 }
7206 pColset->nCol = iOut;
7207}
7208
7209/*
7210** Recursively apply colset pColset to expression node pNode and all of
7211** its decendents. If (*ppFree) is not NULL, it contains a spare copy
7212** of pColset. This function may use the spare copy and set (*ppFree) to
7213** zero, or it may create copies of pColset using fts5CloneColset().
7214*/
7215static void fts5ParseSetColset(
7216 Fts5Parse *pParse,
7217 Fts5ExprNode *pNode,
7218 Fts5Colset *pColset,
7219 Fts5Colset **ppFree
7220){
7221 if( pParse->rc==SQLITE_OK ){
7222 assert( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING
7223 || pNode->eType==FTS5_AND || pNode->eType==FTS5_OR
7224 || pNode->eType==FTS5_NOT || pNode->eType==FTS5_EOF
7225 );
7226 if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){
7227 Fts5ExprNearset *pNear = pNode->pNear;
7228 if( pNear->pColset ){
7229 fts5MergeColset(pNear->pColset, pColset);
7230 if( pNear->pColset->nCol==0 ){
7231 pNode->eType = FTS5_EOF;
7232 pNode->xNext = 0;
7233 }
7234 }else if( *ppFree ){
7235 pNear->pColset = pColset;
7236 *ppFree = 0;
7237 }else{
7238 pNear->pColset = fts5CloneColset(&pParse->rc, pColset);
7239 }
7240 }else{
7241 int i;
7242 assert( pNode->eType!=FTS5_EOF || pNode->nChild==0 );
7243 for(i=0; i<pNode->nChild; i++){
7244 fts5ParseSetColset(pParse, pNode->apChild[i], pColset, ppFree);
7245 }
7246 }
7247 }
7248}
7249
7250/*
7251** Apply colset pColset to expression node pExpr and all of its descendents.
7252*/
7253static void sqlite3Fts5ParseSetColset(
7254 Fts5Parse *pParse,
7255 Fts5ExprNode *pExpr,
7256 Fts5Colset *pColset
7257){
7258 Fts5Colset *pFree = pColset;
7259 if( pParse->pConfig->eDetail==FTS5_DETAIL_NONE ){
7260 sqlite3Fts5ParseError(pParse,
7261 "fts5: column queries are not supported (detail=none)"
7262 );
7263 }else{
7264 fts5ParseSetColset(pParse, pExpr, pColset, &pFree);
7265 }
7266 sqlite3_free(pFree);
7267}
7268
7269static void fts5ExprAssignXNext(Fts5ExprNode *pNode){
7270 switch( pNode->eType ){
7271 case FTS5_STRING: {
7272 Fts5ExprNearset *pNear = pNode->pNear;
7273 if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1
7274 && pNear->apPhrase[0]->aTerm[0].pSynonym==0
7275 && pNear->apPhrase[0]->aTerm[0].bFirst==0
7276 ){
7277 pNode->eType = FTS5_TERM;
7278 pNode->xNext = fts5ExprNodeNext_TERM;
7279 }else{
7280 pNode->xNext = fts5ExprNodeNext_STRING;
7281 }
7282 break;
7283 };
7284
7285 case FTS5_OR: {
7286 pNode->xNext = fts5ExprNodeNext_OR;
7287 break;
7288 };
7289
7290 case FTS5_AND: {
7291 pNode->xNext = fts5ExprNodeNext_AND;
7292 break;
7293 };
7294
7295 default: assert( pNode->eType==FTS5_NOT ); {
7296 pNode->xNext = fts5ExprNodeNext_NOT;
7297 break;
7298 };
7299 }
7300}
7301
7302static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){
7303 if( p->eType!=FTS5_NOT && pSub->eType==p->eType ){
7304 int nByte = sizeof(Fts5ExprNode*) * pSub->nChild;
7305 memcpy(&p->apChild[p->nChild], pSub->apChild, nByte);
7306 p->nChild += pSub->nChild;
7307 sqlite3_free(pSub);
7308 }else{
7309 p->apChild[p->nChild++] = pSub;
7310 }
7311}
7312
7313/*
7314** This function is used when parsing LIKE or GLOB patterns against
7315** trigram indexes that specify either detail=column or detail=none.
7316** It converts a phrase:
7317**
7318** abc + def + ghi
7319**
7320** into an AND tree:
7321**
7322** abc AND def AND ghi
7323*/
7324static Fts5ExprNode *fts5ParsePhraseToAnd(
7325 Fts5Parse *pParse,
7326 Fts5ExprNearset *pNear
7327){
7328 int nTerm = pNear->apPhrase[0]->nTerm;
7329 int ii;
7330 int nByte;
7331 Fts5ExprNode *pRet;
7332
7333 assert( pNear->nPhrase==1 );
7334 assert( pParse->bPhraseToAnd );
7335
7336 nByte = sizeof(Fts5ExprNode) + nTerm*sizeof(Fts5ExprNode*);
7337 pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte);
7338 if( pRet ){
7339 pRet->eType = FTS5_AND;
7340 pRet->nChild = nTerm;
7341 fts5ExprAssignXNext(pRet);
7342 pParse->nPhrase--;
7343 for(ii=0; ii<nTerm; ii++){
7344 Fts5ExprPhrase *pPhrase = (Fts5ExprPhrase*)sqlite3Fts5MallocZero(
7345 &pParse->rc, sizeof(Fts5ExprPhrase)
7346 );
7347 if( pPhrase ){
7348 if( parseGrowPhraseArray(pParse) ){
7349 fts5ExprPhraseFree(pPhrase);
7350 }else{
7351 pParse->apPhrase[pParse->nPhrase++] = pPhrase;
7352 pPhrase->nTerm = 1;
7353 pPhrase->aTerm[0].zTerm = sqlite3Fts5Strndup(
7354 &pParse->rc, pNear->apPhrase[0]->aTerm[ii].zTerm, -1
7355 );
7356 pRet->apChild[ii] = sqlite3Fts5ParseNode(pParse, FTS5_STRING,
7357 0, 0, sqlite3Fts5ParseNearset(pParse, 0, pPhrase)
7358 );
7359 }
7360 }
7361 }
7362
7363 if( pParse->rc ){
7364 sqlite3Fts5ParseNodeFree(pRet);
7365 pRet = 0;
7366 }else{
7367 sqlite3Fts5ParseNearsetFree(pNear);
7368 }
7369 }
7370
7371 return pRet;
7372}
7373
7374/*
7375** Allocate and return a new expression object. If anything goes wrong (i.e.
7376** OOM error), leave an error code in pParse and return NULL.
7377*/
7378static Fts5ExprNode *sqlite3Fts5ParseNode(
7379 Fts5Parse *pParse, /* Parse context */
7380 int eType, /* FTS5_STRING, AND, OR or NOT */
7381 Fts5ExprNode *pLeft, /* Left hand child expression */
7382 Fts5ExprNode *pRight, /* Right hand child expression */
7383 Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */
7384){
7385 Fts5ExprNode *pRet = 0;
7386
7387 if( pParse->rc==SQLITE_OK ){
7388 int nChild = 0; /* Number of children of returned node */
7389 sqlite3_int64 nByte; /* Bytes of space to allocate for this node */
7390
7391 assert( (eType!=FTS5_STRING && !pNear)
7392 || (eType==FTS5_STRING && !pLeft && !pRight)
7393 );
7394 if( eType==FTS5_STRING && pNear==0 ) return 0;
7395 if( eType!=FTS5_STRING && pLeft==0 ) return pRight;
7396 if( eType!=FTS5_STRING && pRight==0 ) return pLeft;
7397
7398 if( eType==FTS5_STRING
7399 && pParse->bPhraseToAnd
7400 && pNear->apPhrase[0]->nTerm>1
7401 ){
7402 pRet = fts5ParsePhraseToAnd(pParse, pNear);
7403 }else{
7404 if( eType==FTS5_NOT ){
7405 nChild = 2;
7406 }else if( eType==FTS5_AND || eType==FTS5_OR ){
7407 nChild = 2;
7408 if( pLeft->eType==eType ) nChild += pLeft->nChild-1;
7409 if( pRight->eType==eType ) nChild += pRight->nChild-1;
7410 }
7411
7412 nByte = sizeof(Fts5ExprNode) + sizeof(Fts5ExprNode*)*(nChild-1);
7413 pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte);
7414
7415 if( pRet ){
7416 pRet->eType = eType;
7417 pRet->pNear = pNear;
7418 fts5ExprAssignXNext(pRet);
7419 if( eType==FTS5_STRING ){
7420 int iPhrase;
7421 for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){
7422 pNear->apPhrase[iPhrase]->pNode = pRet;
7423 if( pNear->apPhrase[iPhrase]->nTerm==0 ){
7424 pRet->xNext = 0;
7425 pRet->eType = FTS5_EOF;
7426 }
7427 }
7428
7429 if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL ){
7430 Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
7431 if( pNear->nPhrase!=1
7432 || pPhrase->nTerm>1
7433 || (pPhrase->nTerm>0 && pPhrase->aTerm[0].bFirst)
7434 ){
7435 sqlite3Fts5ParseError(pParse,
7436 "fts5: %s queries are not supported (detail!=full)",
7437 pNear->nPhrase==1 ? "phrase": "NEAR"
7438 );
7439 sqlite3_free(pRet);
7440 pRet = 0;
7441 }
7442 }
7443 }else{
7444 fts5ExprAddChildren(pRet, pLeft);
7445 fts5ExprAddChildren(pRet, pRight);
7446 }
7447 }
7448 }
7449 }
7450
7451 if( pRet==0 ){
7452 assert( pParse->rc!=SQLITE_OK );
7453 sqlite3Fts5ParseNodeFree(pLeft);
7454 sqlite3Fts5ParseNodeFree(pRight);
7455 sqlite3Fts5ParseNearsetFree(pNear);
7456 }
7457 return pRet;
7458}
7459
7460static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd(
7461 Fts5Parse *pParse, /* Parse context */
7462 Fts5ExprNode *pLeft, /* Left hand child expression */
7463 Fts5ExprNode *pRight /* Right hand child expression */
7464){
7465 Fts5ExprNode *pRet = 0;
7466 Fts5ExprNode *pPrev;
7467
7468 if( pParse->rc ){
7469 sqlite3Fts5ParseNodeFree(pLeft);
7470 sqlite3Fts5ParseNodeFree(pRight);
7471 }else{
7472
7473 assert( pLeft->eType==FTS5_STRING
7474 || pLeft->eType==FTS5_TERM
7475 || pLeft->eType==FTS5_EOF
7476 || pLeft->eType==FTS5_AND
7477 );
7478 assert( pRight->eType==FTS5_STRING
7479 || pRight->eType==FTS5_TERM
7480 || pRight->eType==FTS5_EOF
7481 );
7482
7483 if( pLeft->eType==FTS5_AND ){
7484 pPrev = pLeft->apChild[pLeft->nChild-1];
7485 }else{
7486 pPrev = pLeft;
7487 }
7488 assert( pPrev->eType==FTS5_STRING
7489 || pPrev->eType==FTS5_TERM
7490 || pPrev->eType==FTS5_EOF
7491 );
7492
7493 if( pRight->eType==FTS5_EOF ){
7494 assert( pParse->apPhrase[pParse->nPhrase-1]==pRight->pNear->apPhrase[0] );
7495 sqlite3Fts5ParseNodeFree(pRight);
7496 pRet = pLeft;
7497 pParse->nPhrase--;
7498 }
7499 else if( pPrev->eType==FTS5_EOF ){
7500 Fts5ExprPhrase **ap;
7501
7502 if( pPrev==pLeft ){
7503 pRet = pRight;
7504 }else{
7505 pLeft->apChild[pLeft->nChild-1] = pRight;
7506 pRet = pLeft;
7507 }
7508
7509 ap = &pParse->apPhrase[pParse->nPhrase-1-pRight->pNear->nPhrase];
7510 assert( ap[0]==pPrev->pNear->apPhrase[0] );
7511 memmove(ap, &ap[1], sizeof(Fts5ExprPhrase*)*pRight->pNear->nPhrase);
7512 pParse->nPhrase--;
7513
7514 sqlite3Fts5ParseNodeFree(pPrev);
7515 }
7516 else{
7517 pRet = sqlite3Fts5ParseNode(pParse, FTS5_AND, pLeft, pRight, 0);
7518 }
7519 }
7520
7521 return pRet;
7522}
7523
7524#ifdef SQLITE_TEST
7525static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
7526 sqlite3_int64 nByte = 0;
7527 Fts5ExprTerm *p;
7528 char *zQuoted;
7529
7530 /* Determine the maximum amount of space required. */
7531 for(p=pTerm; p; p=p->pSynonym){
7532 nByte += (int)strlen(pTerm->zTerm) * 2 + 3 + 2;
7533 }
7534 zQuoted = sqlite3_malloc64(nByte);
7535
7536 if( zQuoted ){
7537 int i = 0;
7538 for(p=pTerm; p; p=p->pSynonym){
7539 char *zIn = p->zTerm;
7540 zQuoted[i++] = '"';
7541 while( *zIn ){
7542 if( *zIn=='"' ) zQuoted[i++] = '"';
7543 zQuoted[i++] = *zIn++;
7544 }
7545 zQuoted[i++] = '"';
7546 if( p->pSynonym ) zQuoted[i++] = '|';
7547 }
7548 if( pTerm->bPrefix ){
7549 zQuoted[i++] = ' ';
7550 zQuoted[i++] = '*';
7551 }
7552 zQuoted[i++] = '\0';
7553 }
7554 return zQuoted;
7555}
7556
7557static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){
7558 char *zNew;
7559 va_list ap;
7560 va_start(ap, zFmt);
7561 zNew = sqlite3_vmprintf(zFmt, ap);
7562 va_end(ap);
7563 if( zApp && zNew ){
7564 char *zNew2 = sqlite3_mprintf("%s%s", zApp, zNew);
7565 sqlite3_free(zNew);
7566 zNew = zNew2;
7567 }
7568 sqlite3_free(zApp);
7569 return zNew;
7570}
7571
7572/*
7573** Compose a tcl-readable representation of expression pExpr. Return a
7574** pointer to a buffer containing that representation. It is the
7575** responsibility of the caller to at some point free the buffer using
7576** sqlite3_free().
7577*/
7578static char *fts5ExprPrintTcl(
7579 Fts5Config *pConfig,
7580 const char *zNearsetCmd,
7581 Fts5ExprNode *pExpr
7582){
7583 char *zRet = 0;
7584 if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){
7585 Fts5ExprNearset *pNear = pExpr->pNear;
7586 int i;
7587 int iTerm;
7588
7589 zRet = fts5PrintfAppend(zRet, "%s ", zNearsetCmd);
7590 if( zRet==0 ) return 0;
7591 if( pNear->pColset ){
7592 int *aiCol = pNear->pColset->aiCol;
7593 int nCol = pNear->pColset->nCol;
7594 if( nCol==1 ){
7595 zRet = fts5PrintfAppend(zRet, "-col %d ", aiCol[0]);
7596 }else{
7597 zRet = fts5PrintfAppend(zRet, "-col {%d", aiCol[0]);
7598 for(i=1; i<pNear->pColset->nCol; i++){
7599 zRet = fts5PrintfAppend(zRet, " %d", aiCol[i]);
7600 }
7601 zRet = fts5PrintfAppend(zRet, "} ");
7602 }
7603 if( zRet==0 ) return 0;
7604 }
7605
7606 if( pNear->nPhrase>1 ){
7607 zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear);
7608 if( zRet==0 ) return 0;
7609 }
7610
7611 zRet = fts5PrintfAppend(zRet, "--");
7612 if( zRet==0 ) return 0;
7613
7614 for(i=0; i<pNear->nPhrase; i++){
7615 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
7616
7617 zRet = fts5PrintfAppend(zRet, " {");
7618 for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){
7619 char *zTerm = pPhrase->aTerm[iTerm].zTerm;
7620 zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm);
7621 if( pPhrase->aTerm[iTerm].bPrefix ){
7622 zRet = fts5PrintfAppend(zRet, "*");
7623 }
7624 }
7625
7626 if( zRet ) zRet = fts5PrintfAppend(zRet, "}");
7627 if( zRet==0 ) return 0;
7628 }
7629
7630 }else{
7631 char const *zOp = 0;
7632 int i;
7633 switch( pExpr->eType ){
7634 case FTS5_AND: zOp = "AND"; break;
7635 case FTS5_NOT: zOp = "NOT"; break;
7636 default:
7637 assert( pExpr->eType==FTS5_OR );
7638 zOp = "OR";
7639 break;
7640 }
7641
7642 zRet = sqlite3_mprintf("%s", zOp);
7643 for(i=0; zRet && i<pExpr->nChild; i++){
7644 char *z = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->apChild[i]);
7645 if( !z ){
7646 sqlite3_free(zRet);
7647 zRet = 0;
7648 }else{
7649 zRet = fts5PrintfAppend(zRet, " [%z]", z);
7650 }
7651 }
7652 }
7653
7654 return zRet;
7655}
7656
7657static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){
7658 char *zRet = 0;
7659 if( pExpr->eType==0 ){
7660 return sqlite3_mprintf("\"\"");
7661 }else
7662 if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){
7663 Fts5ExprNearset *pNear = pExpr->pNear;
7664 int i;
7665 int iTerm;
7666
7667 if( pNear->pColset ){
7668 int ii;
7669 Fts5Colset *pColset = pNear->pColset;
7670 if( pColset->nCol>1 ) zRet = fts5PrintfAppend(zRet, "{");
7671 for(ii=0; ii<pColset->nCol; ii++){
7672 zRet = fts5PrintfAppend(zRet, "%s%s",
7673 pConfig->azCol[pColset->aiCol[ii]], ii==pColset->nCol-1 ? "" : " "
7674 );
7675 }
7676 if( zRet ){
7677 zRet = fts5PrintfAppend(zRet, "%s : ", pColset->nCol>1 ? "}" : "");
7678 }
7679 if( zRet==0 ) return 0;
7680 }
7681
7682 if( pNear->nPhrase>1 ){
7683 zRet = fts5PrintfAppend(zRet, "NEAR(");
7684 if( zRet==0 ) return 0;
7685 }
7686
7687 for(i=0; i<pNear->nPhrase; i++){
7688 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
7689 if( i!=0 ){
7690 zRet = fts5PrintfAppend(zRet, " ");
7691 if( zRet==0 ) return 0;
7692 }
7693 for(iTerm=0; iTerm<pPhrase->nTerm; iTerm++){
7694 char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]);
7695 if( zTerm ){
7696 zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" + ", zTerm);
7697 sqlite3_free(zTerm);
7698 }
7699 if( zTerm==0 || zRet==0 ){
7700 sqlite3_free(zRet);
7701 return 0;
7702 }
7703 }
7704 }
7705
7706 if( pNear->nPhrase>1 ){
7707 zRet = fts5PrintfAppend(zRet, ", %d)", pNear->nNear);
7708 if( zRet==0 ) return 0;
7709 }
7710
7711 }else{
7712 char const *zOp = 0;
7713 int i;
7714
7715 switch( pExpr->eType ){
7716 case FTS5_AND: zOp = " AND "; break;
7717 case FTS5_NOT: zOp = " NOT "; break;
7718 default:
7719 assert( pExpr->eType==FTS5_OR );
7720 zOp = " OR ";
7721 break;
7722 }
7723
7724 for(i=0; i<pExpr->nChild; i++){
7725 char *z = fts5ExprPrint(pConfig, pExpr->apChild[i]);
7726 if( z==0 ){
7727 sqlite3_free(zRet);
7728 zRet = 0;
7729 }else{
7730 int e = pExpr->apChild[i]->eType;
7731 int b = (e!=FTS5_STRING && e!=FTS5_TERM && e!=FTS5_EOF);
7732 zRet = fts5PrintfAppend(zRet, "%s%s%z%s",
7733 (i==0 ? "" : zOp),
7734 (b?"(":""), z, (b?")":"")
7735 );
7736 }
7737 if( zRet==0 ) break;
7738 }
7739 }
7740
7741 return zRet;
7742}
7743
7744/*
7745** The implementation of user-defined scalar functions fts5_expr() (bTcl==0)
7746** and fts5_expr_tcl() (bTcl!=0).
7747*/
7748static void fts5ExprFunction(
7749 sqlite3_context *pCtx, /* Function call context */
7750 int nArg, /* Number of args */
7751 sqlite3_value **apVal, /* Function arguments */
7752 int bTcl
7753){
7754 Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx);
7755 sqlite3 *db = sqlite3_context_db_handle(pCtx);
7756 const char *zExpr = 0;
7757 char *zErr = 0;
7758 Fts5Expr *pExpr = 0;
7759 int rc;
7760 int i;
7761
7762 const char **azConfig; /* Array of arguments for Fts5Config */
7763 const char *zNearsetCmd = "nearset";
7764 int nConfig; /* Size of azConfig[] */
7765 Fts5Config *pConfig = 0;
7766 int iArg = 1;
7767
7768 if( nArg<1 ){
7769 zErr = sqlite3_mprintf("wrong number of arguments to function %s",
7770 bTcl ? "fts5_expr_tcl" : "fts5_expr"
7771 );
7772 sqlite3_result_error(pCtx, zErr, -1);
7773 sqlite3_free(zErr);
7774 return;
7775 }
7776
7777 if( bTcl && nArg>1 ){
7778 zNearsetCmd = (const char*)sqlite3_value_text(apVal[1]);
7779 iArg = 2;
7780 }
7781
7782 nConfig = 3 + (nArg-iArg);
7783 azConfig = (const char**)sqlite3_malloc64(sizeof(char*) * nConfig);
7784 if( azConfig==0 ){
7785 sqlite3_result_error_nomem(pCtx);
7786 return;
7787 }
7788 azConfig[0] = 0;
7789 azConfig[1] = "main";
7790 azConfig[2] = "tbl";
7791 for(i=3; iArg<nArg; iArg++){
7792 const char *z = (const char*)sqlite3_value_text(apVal[iArg]);
7793 azConfig[i++] = (z ? z : "");
7794 }
7795
7796 zExpr = (const char*)sqlite3_value_text(apVal[0]);
7797 if( zExpr==0 ) zExpr = "";
7798
7799 rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr);
7800 if( rc==SQLITE_OK ){
7801 rc = sqlite3Fts5ExprNew(pConfig, 0, pConfig->nCol, zExpr, &pExpr, &zErr);
7802 }
7803 if( rc==SQLITE_OK ){
7804 char *zText;
7805 if( pExpr->pRoot->xNext==0 ){
7806 zText = sqlite3_mprintf("");
7807 }else if( bTcl ){
7808 zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot);
7809 }else{
7810 zText = fts5ExprPrint(pConfig, pExpr->pRoot);
7811 }
7812 if( zText==0 ){
7813 rc = SQLITE_NOMEM;
7814 }else{
7815 sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT);
7816 sqlite3_free(zText);
7817 }
7818 }
7819
7820 if( rc!=SQLITE_OK ){
7821 if( zErr ){
7822 sqlite3_result_error(pCtx, zErr, -1);
7823 sqlite3_free(zErr);
7824 }else{
7825 sqlite3_result_error_code(pCtx, rc);
7826 }
7827 }
7828 sqlite3_free((void *)azConfig);
7829 sqlite3Fts5ConfigFree(pConfig);
7830 sqlite3Fts5ExprFree(pExpr);
7831}
7832
7833static void fts5ExprFunctionHr(
7834 sqlite3_context *pCtx, /* Function call context */
7835 int nArg, /* Number of args */
7836 sqlite3_value **apVal /* Function arguments */
7837){
7838 fts5ExprFunction(pCtx, nArg, apVal, 0);
7839}
7840static void fts5ExprFunctionTcl(
7841 sqlite3_context *pCtx, /* Function call context */
7842 int nArg, /* Number of args */
7843 sqlite3_value **apVal /* Function arguments */
7844){
7845 fts5ExprFunction(pCtx, nArg, apVal, 1);
7846}
7847
7848/*
7849** The implementation of an SQLite user-defined-function that accepts a
7850** single integer as an argument. If the integer is an alpha-numeric
7851** unicode code point, 1 is returned. Otherwise 0.
7852*/
7853static void fts5ExprIsAlnum(
7854 sqlite3_context *pCtx, /* Function call context */
7855 int nArg, /* Number of args */
7856 sqlite3_value **apVal /* Function arguments */
7857){
7858 int iCode;
7859 u8 aArr[32];
7860 if( nArg!=1 ){
7861 sqlite3_result_error(pCtx,
7862 "wrong number of arguments to function fts5_isalnum", -1
7863 );
7864 return;
7865 }
7866 memset(aArr, 0, sizeof(aArr));
7867 sqlite3Fts5UnicodeCatParse("L*", aArr);
7868 sqlite3Fts5UnicodeCatParse("N*", aArr);
7869 sqlite3Fts5UnicodeCatParse("Co", aArr);
7870 iCode = sqlite3_value_int(apVal[0]);
7871 sqlite3_result_int(pCtx, aArr[sqlite3Fts5UnicodeCategory((u32)iCode)]);
7872}
7873
7874static void fts5ExprFold(
7875 sqlite3_context *pCtx, /* Function call context */
7876 int nArg, /* Number of args */
7877 sqlite3_value **apVal /* Function arguments */
7878){
7879 if( nArg!=1 && nArg!=2 ){
7880 sqlite3_result_error(pCtx,
7881 "wrong number of arguments to function fts5_fold", -1
7882 );
7883 }else{
7884 int iCode;
7885 int bRemoveDiacritics = 0;
7886 iCode = sqlite3_value_int(apVal[0]);
7887 if( nArg==2 ) bRemoveDiacritics = sqlite3_value_int(apVal[1]);
7888 sqlite3_result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics));
7889 }
7890}
7891#endif /* ifdef SQLITE_TEST */
7892
7893/*
7894** This is called during initialization to register the fts5_expr() scalar
7895** UDF with the SQLite handle passed as the only argument.
7896*/
7897static int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){
7898#ifdef SQLITE_TEST
7899 struct Fts5ExprFunc {
7900 const char *z;
7901 void (*x)(sqlite3_context*,int,sqlite3_value**);
7902 } aFunc[] = {
7903 { "fts5_expr", fts5ExprFunctionHr },
7904 { "fts5_expr_tcl", fts5ExprFunctionTcl },
7905 { "fts5_isalnum", fts5ExprIsAlnum },
7906 { "fts5_fold", fts5ExprFold },
7907 };
7908 int i;
7909 int rc = SQLITE_OK;
7910 void *pCtx = (void*)pGlobal;
7911
7912 for(i=0; rc==SQLITE_OK && i<ArraySize(aFunc); i++){
7913 struct Fts5ExprFunc *p = &aFunc[i];
7914 rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, pCtx, p->x, 0, 0);
7915 }
7916#else
7917 int rc = SQLITE_OK;
7918 UNUSED_PARAM2(pGlobal,db);
7919#endif
7920
7921 /* Avoid warnings indicating that sqlite3Fts5ParserTrace() and
7922 ** sqlite3Fts5ParserFallback() are unused */
7923#ifndef NDEBUG
7924 (void)sqlite3Fts5ParserTrace;
7925#endif
7926 (void)sqlite3Fts5ParserFallback;
7927
7928 return rc;
7929}
7930
7931/*
7932** Return the number of phrases in expression pExpr.
7933*/
7934static int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){
7935 return (pExpr ? pExpr->nPhrase : 0);
7936}
7937
7938/*
7939** Return the number of terms in the iPhrase'th phrase in pExpr.
7940*/
7941static int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){
7942 if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0;
7943 return pExpr->apExprPhrase[iPhrase]->nTerm;
7944}
7945
7946/*
7947** This function is used to access the current position list for phrase
7948** iPhrase.
7949*/
7950static int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){
7951 int nRet;
7952 Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
7953 Fts5ExprNode *pNode = pPhrase->pNode;
7954 if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){
7955 *pa = pPhrase->poslist.p;
7956 nRet = pPhrase->poslist.n;
7957 }else{
7958 *pa = 0;
7959 nRet = 0;
7960 }
7961 return nRet;
7962}
7963
7964struct Fts5PoslistPopulator {
7965 Fts5PoslistWriter writer;
7966 int bOk; /* True if ok to populate */
7967 int bMiss;
7968};
7969
7970/*
7971** Clear the position lists associated with all phrases in the expression
7972** passed as the first argument. Argument bLive is true if the expression
7973** might be pointing to a real entry, otherwise it has just been reset.
7974**
7975** At present this function is only used for detail=col and detail=none
7976** fts5 tables. This implies that all phrases must be at most 1 token
7977** in size, as phrase matches are not supported without detail=full.
7978*/
7979static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr *pExpr, int bLive){
7980 Fts5PoslistPopulator *pRet;
7981 pRet = sqlite3_malloc64(sizeof(Fts5PoslistPopulator)*pExpr->nPhrase);
7982 if( pRet ){
7983 int i;
7984 memset(pRet, 0, sizeof(Fts5PoslistPopulator)*pExpr->nPhrase);
7985 for(i=0; i<pExpr->nPhrase; i++){
7986 Fts5Buffer *pBuf = &pExpr->apExprPhrase[i]->poslist;
7987 Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode;
7988 assert( pExpr->apExprPhrase[i]->nTerm<=1 );
7989 if( bLive &&
7990 (pBuf->n==0 || pNode->iRowid!=pExpr->pRoot->iRowid || pNode->bEof)
7991 ){
7992 pRet[i].bMiss = 1;
7993 }else{
7994 pBuf->n = 0;
7995 }
7996 }
7997 }
7998 return pRet;
7999}
8000
8001struct Fts5ExprCtx {
8002 Fts5Expr *pExpr;
8003 Fts5PoslistPopulator *aPopulator;
8004 i64 iOff;
8005};
8006typedef struct Fts5ExprCtx Fts5ExprCtx;
8007
8008/*
8009** TODO: Make this more efficient!
8010*/
8011static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){
8012 int i;
8013 for(i=0; i<pColset->nCol; i++){
8014 if( pColset->aiCol[i]==iCol ) return 1;
8015 }
8016 return 0;
8017}
8018
8019static int fts5ExprPopulatePoslistsCb(
8020 void *pCtx, /* Copy of 2nd argument to xTokenize() */
8021 int tflags, /* Mask of FTS5_TOKEN_* flags */
8022 const char *pToken, /* Pointer to buffer containing token */
8023 int nToken, /* Size of token in bytes */
8024 int iUnused1, /* Byte offset of token within input text */
8025 int iUnused2 /* Byte offset of end of token within input text */
8026){
8027 Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx;
8028 Fts5Expr *pExpr = p->pExpr;
8029 int i;
8030
8031 UNUSED_PARAM2(iUnused1, iUnused2);
8032
8033 if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
8034 if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++;
8035 for(i=0; i<pExpr->nPhrase; i++){
8036 Fts5ExprTerm *pTerm;
8037 if( p->aPopulator[i].bOk==0 ) continue;
8038 for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){
8039 int nTerm = (int)strlen(pTerm->zTerm);
8040 if( (nTerm==nToken || (nTerm<nToken && pTerm->bPrefix))
8041 && memcmp(pTerm->zTerm, pToken, nTerm)==0
8042 ){
8043 int rc = sqlite3Fts5PoslistWriterAppend(
8044 &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff
8045 );
8046 if( rc ) return rc;
8047 break;
8048 }
8049 }
8050 }
8051 return SQLITE_OK;
8052}
8053
8054static int sqlite3Fts5ExprPopulatePoslists(
8055 Fts5Config *pConfig,
8056 Fts5Expr *pExpr,
8057 Fts5PoslistPopulator *aPopulator,
8058 int iCol,
8059 const char *z, int n
8060){
8061 int i;
8062 Fts5ExprCtx sCtx;
8063 sCtx.pExpr = pExpr;
8064 sCtx.aPopulator = aPopulator;
8065 sCtx.iOff = (((i64)iCol) << 32) - 1;
8066
8067 for(i=0; i<pExpr->nPhrase; i++){
8068 Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode;
8069 Fts5Colset *pColset = pNode->pNear->pColset;
8070 if( (pColset && 0==fts5ExprColsetTest(pColset, iCol))
8071 || aPopulator[i].bMiss
8072 ){
8073 aPopulator[i].bOk = 0;
8074 }else{
8075 aPopulator[i].bOk = 1;
8076 }
8077 }
8078
8079 return sqlite3Fts5Tokenize(pConfig,
8080 FTS5_TOKENIZE_DOCUMENT, z, n, (void*)&sCtx, fts5ExprPopulatePoslistsCb
8081 );
8082}
8083
8084static void fts5ExprClearPoslists(Fts5ExprNode *pNode){
8085 if( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING ){
8086 pNode->pNear->apPhrase[0]->poslist.n = 0;
8087 }else{
8088 int i;
8089 for(i=0; i<pNode->nChild; i++){
8090 fts5ExprClearPoslists(pNode->apChild[i]);
8091 }
8092 }
8093}
8094
8095static int fts5ExprCheckPoslists(Fts5ExprNode *pNode, i64 iRowid){
8096 pNode->iRowid = iRowid;
8097 pNode->bEof = 0;
8098 switch( pNode->eType ){
8099 case FTS5_TERM:
8100 case FTS5_STRING:
8101 return (pNode->pNear->apPhrase[0]->poslist.n>0);
8102
8103 case FTS5_AND: {
8104 int i;
8105 for(i=0; i<pNode->nChild; i++){
8106 if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid)==0 ){
8107 fts5ExprClearPoslists(pNode);
8108 return 0;
8109 }
8110 }
8111 break;
8112 }
8113
8114 case FTS5_OR: {
8115 int i;
8116 int bRet = 0;
8117 for(i=0; i<pNode->nChild; i++){
8118 if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid) ){
8119 bRet = 1;
8120 }
8121 }
8122 return bRet;
8123 }
8124
8125 default: {
8126 assert( pNode->eType==FTS5_NOT );
8127 if( 0==fts5ExprCheckPoslists(pNode->apChild[0], iRowid)
8128 || 0!=fts5ExprCheckPoslists(pNode->apChild[1], iRowid)
8129 ){
8130 fts5ExprClearPoslists(pNode);
8131 return 0;
8132 }
8133 break;
8134 }
8135 }
8136 return 1;
8137}
8138
8139static void sqlite3Fts5ExprCheckPoslists(Fts5Expr *pExpr, i64 iRowid){
8140 fts5ExprCheckPoslists(pExpr->pRoot, iRowid);
8141}
8142
8143/*
8144** This function is only called for detail=columns tables.
8145*/
8146static int sqlite3Fts5ExprPhraseCollist(
8147 Fts5Expr *pExpr,
8148 int iPhrase,
8149 const u8 **ppCollist,
8150 int *pnCollist
8151){
8152 Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
8153 Fts5ExprNode *pNode = pPhrase->pNode;
8154 int rc = SQLITE_OK;
8155
8156 assert( iPhrase>=0 && iPhrase<pExpr->nPhrase );
8157 assert( pExpr->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
8158
8159 if( pNode->bEof==0
8160 && pNode->iRowid==pExpr->pRoot->iRowid
8161 && pPhrase->poslist.n>0
8162 ){
8163 Fts5ExprTerm *pTerm = &pPhrase->aTerm[0];
8164 if( pTerm->pSynonym ){
8165 Fts5Buffer *pBuf = (Fts5Buffer*)&pTerm->pSynonym[1];
8166 rc = fts5ExprSynonymList(
8167 pTerm, pNode->iRowid, pBuf, (u8**)ppCollist, pnCollist
8168 );
8169 }else{
8170 *ppCollist = pPhrase->aTerm[0].pIter->pData;
8171 *pnCollist = pPhrase->aTerm[0].pIter->nData;
8172 }
8173 }else{
8174 *ppCollist = 0;
8175 *pnCollist = 0;
8176 }
8177
8178 return rc;
8179}
8180
8181#line 1 "fts5_hash.c"
8182/*
8183** 2014 August 11
8184**
8185** The author disclaims copyright to this source code. In place of
8186** a legal notice, here is a blessing:
8187**
8188** May you do good and not evil.
8189** May you find forgiveness for yourself and forgive others.
8190** May you share freely, never taking more than you give.
8191**
8192******************************************************************************
8193**
8194*/
8195
8196
8197
8198/* #include "fts5Int.h" */
8199
8200typedef struct Fts5HashEntry Fts5HashEntry;
8201
8202/*
8203** This file contains the implementation of an in-memory hash table used
8204** to accumuluate "term -> doclist" content before it is flused to a level-0
8205** segment.
8206*/
8207
8208
8209struct Fts5Hash {
8210 int eDetail; /* Copy of Fts5Config.eDetail */
8211 int *pnByte; /* Pointer to bytes counter */
8212 int nEntry; /* Number of entries currently in hash */
8213 int nSlot; /* Size of aSlot[] array */
8214 Fts5HashEntry *pScan; /* Current ordered scan item */
8215 Fts5HashEntry **aSlot; /* Array of hash slots */
8216};
8217
8218/*
8219** Each entry in the hash table is represented by an object of the
8220** following type. Each object, its key (a nul-terminated string) and
8221** its current data are stored in a single memory allocation. The
8222** key immediately follows the object in memory. The position list
8223** data immediately follows the key data in memory.
8224**
8225** The data that follows the key is in a similar, but not identical format
8226** to the doclist data stored in the database. It is:
8227**
8228** * Rowid, as a varint
8229** * Position list, without 0x00 terminator.
8230** * Size of previous position list and rowid, as a 4 byte
8231** big-endian integer.
8232**
8233** iRowidOff:
8234** Offset of last rowid written to data area. Relative to first byte of
8235** structure.
8236**
8237** nData:
8238** Bytes of data written since iRowidOff.
8239*/
8240struct Fts5HashEntry {
8241 Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */
8242 Fts5HashEntry *pScanNext; /* Next entry in sorted order */
8243
8244 int nAlloc; /* Total size of allocation */
8245 int iSzPoslist; /* Offset of space for 4-byte poslist size */
8246 int nData; /* Total bytes of data (incl. structure) */
8247 int nKey; /* Length of key in bytes */
8248 u8 bDel; /* Set delete-flag @ iSzPoslist */
8249 u8 bContent; /* Set content-flag (detail=none mode) */
8250 i16 iCol; /* Column of last value written */
8251 int iPos; /* Position of last value written */
8252 i64 iRowid; /* Rowid of last value written */
8253};
8254
8255/*
8256** Eqivalent to:
8257**
8258** char *fts5EntryKey(Fts5HashEntry *pEntry){ return zKey; }
8259*/
8260#define fts5EntryKey(p) ( ((char *)(&(p)[1])) )
8261
8262
8263/*
8264** Allocate a new hash table.
8265*/
8266static int sqlite3Fts5HashNew(Fts5Config *pConfig, Fts5Hash **ppNew, int *pnByte){
8267 int rc = SQLITE_OK;
8268 Fts5Hash *pNew;
8269
8270 *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash));
8271 if( pNew==0 ){
8272 rc = SQLITE_NOMEM;
8273 }else{
8274 sqlite3_int64 nByte;
8275 memset(pNew, 0, sizeof(Fts5Hash));
8276 pNew->pnByte = pnByte;
8277 pNew->eDetail = pConfig->eDetail;
8278
8279 pNew->nSlot = 1024;
8280 nByte = sizeof(Fts5HashEntry*) * pNew->nSlot;
8281 pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc64(nByte);
8282 if( pNew->aSlot==0 ){
8283 sqlite3_free(pNew);
8284 *ppNew = 0;
8285 rc = SQLITE_NOMEM;
8286 }else{
8287 memset(pNew->aSlot, 0, (size_t)nByte);
8288 }
8289 }
8290 return rc;
8291}
8292
8293/*
8294** Free a hash table object.
8295*/
8296static void sqlite3Fts5HashFree(Fts5Hash *pHash){
8297 if( pHash ){
8298 sqlite3Fts5HashClear(pHash);
8299 sqlite3_free(pHash->aSlot);
8300 sqlite3_free(pHash);
8301 }
8302}
8303
8304/*
8305** Empty (but do not delete) a hash table.
8306*/
8307static void sqlite3Fts5HashClear(Fts5Hash *pHash){
8308 int i;
8309 for(i=0; i<pHash->nSlot; i++){
8310 Fts5HashEntry *pNext;
8311 Fts5HashEntry *pSlot;
8312 for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){
8313 pNext = pSlot->pHashNext;
8314 sqlite3_free(pSlot);
8315 }
8316 }
8317 memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*));
8318 pHash->nEntry = 0;
8319}
8320
8321static unsigned int fts5HashKey(int nSlot, const u8 *p, int n){
8322 int i;
8323 unsigned int h = 13;
8324 for(i=n-1; i>=0; i--){
8325 h = (h << 3) ^ h ^ p[i];
8326 }
8327 return (h % nSlot);
8328}
8329
8330static unsigned int fts5HashKey2(int nSlot, u8 b, const u8 *p, int n){
8331 int i;
8332 unsigned int h = 13;
8333 for(i=n-1; i>=0; i--){
8334 h = (h << 3) ^ h ^ p[i];
8335 }
8336 h = (h << 3) ^ h ^ b;
8337 return (h % nSlot);
8338}
8339
8340/*
8341** Resize the hash table by doubling the number of slots.
8342*/
8343static int fts5HashResize(Fts5Hash *pHash){
8344 int nNew = pHash->nSlot*2;
8345 int i;
8346 Fts5HashEntry **apNew;
8347 Fts5HashEntry **apOld = pHash->aSlot;
8348
8349 apNew = (Fts5HashEntry**)sqlite3_malloc64(nNew*sizeof(Fts5HashEntry*));
8350 if( !apNew ) return SQLITE_NOMEM;
8351 memset(apNew, 0, nNew*sizeof(Fts5HashEntry*));
8352
8353 for(i=0; i<pHash->nSlot; i++){
8354 while( apOld[i] ){
8355 unsigned int iHash;
8356 Fts5HashEntry *p = apOld[i];
8357 apOld[i] = p->pHashNext;
8358 iHash = fts5HashKey(nNew, (u8*)fts5EntryKey(p),
8359 (int)strlen(fts5EntryKey(p)));
8360 p->pHashNext = apNew[iHash];
8361 apNew[iHash] = p;
8362 }
8363 }
8364
8365 sqlite3_free(apOld);
8366 pHash->nSlot = nNew;
8367 pHash->aSlot = apNew;
8368 return SQLITE_OK;
8369}
8370
8371static int fts5HashAddPoslistSize(
8372 Fts5Hash *pHash,
8373 Fts5HashEntry *p,
8374 Fts5HashEntry *p2
8375){
8376 int nRet = 0;
8377 if( p->iSzPoslist ){
8378 u8 *pPtr = p2 ? (u8*)p2 : (u8*)p;
8379 int nData = p->nData;
8380 if( pHash->eDetail==FTS5_DETAIL_NONE ){
8381 assert( nData==p->iSzPoslist );
8382 if( p->bDel ){
8383 pPtr[nData++] = 0x00;
8384 if( p->bContent ){
8385 pPtr[nData++] = 0x00;
8386 }
8387 }
8388 }else{
8389 int nSz = (nData - p->iSzPoslist - 1); /* Size in bytes */
8390 int nPos = nSz*2 + p->bDel; /* Value of nPos field */
8391
8392 assert( p->bDel==0 || p->bDel==1 );
8393 if( nPos<=127 ){
8394 pPtr[p->iSzPoslist] = (u8)nPos;
8395 }else{
8396 int nByte = sqlite3Fts5GetVarintLen((u32)nPos);
8397 memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz);
8398 sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos);
8399 nData += (nByte-1);
8400 }
8401 }
8402
8403 nRet = nData - p->nData;
8404 if( p2==0 ){
8405 p->iSzPoslist = 0;
8406 p->bDel = 0;
8407 p->bContent = 0;
8408 p->nData = nData;
8409 }
8410 }
8411 return nRet;
8412}
8413
8414/*
8415** Add an entry to the in-memory hash table. The key is the concatenation
8416** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos).
8417**
8418** (bByte || pToken) -> (iRowid,iCol,iPos)
8419**
8420** Or, if iCol is negative, then the value is a delete marker.
8421*/
8422static int sqlite3Fts5HashWrite(
8423 Fts5Hash *pHash,
8424 i64 iRowid, /* Rowid for this entry */
8425 int iCol, /* Column token appears in (-ve -> delete) */
8426 int iPos, /* Position of token within column */
8427 char bByte, /* First byte of token */
8428 const char *pToken, int nToken /* Token to add or remove to or from index */
8429){
8430 unsigned int iHash;
8431 Fts5HashEntry *p;
8432 u8 *pPtr;
8433 int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */
8434 int bNew; /* If non-delete entry should be written */
8435
8436 bNew = (pHash->eDetail==FTS5_DETAIL_FULL);
8437
8438 /* Attempt to locate an existing hash entry */
8439 iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
8440 for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
8441 char *zKey = fts5EntryKey(p);
8442 if( zKey[0]==bByte
8443 && p->nKey==nToken
8444 && memcmp(&zKey[1], pToken, nToken)==0
8445 ){
8446 break;
8447 }
8448 }
8449
8450 /* If an existing hash entry cannot be found, create a new one. */
8451 if( p==0 ){
8452 /* Figure out how much space to allocate */
8453 char *zKey;
8454 sqlite3_int64 nByte = sizeof(Fts5HashEntry) + (nToken+1) + 1 + 64;
8455 if( nByte<128 ) nByte = 128;
8456
8457 /* Grow the Fts5Hash.aSlot[] array if necessary. */
8458 if( (pHash->nEntry*2)>=pHash->nSlot ){
8459 int rc = fts5HashResize(pHash);
8460 if( rc!=SQLITE_OK ) return rc;
8461 iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
8462 }
8463
8464 /* Allocate new Fts5HashEntry and add it to the hash table. */
8465 p = (Fts5HashEntry*)sqlite3_malloc64(nByte);
8466 if( !p ) return SQLITE_NOMEM;
8467 memset(p, 0, sizeof(Fts5HashEntry));
8468 p->nAlloc = (int)nByte;
8469 zKey = fts5EntryKey(p);
8470 zKey[0] = bByte;
8471 memcpy(&zKey[1], pToken, nToken);
8472 assert( iHash==fts5HashKey(pHash->nSlot, (u8*)zKey, nToken+1) );
8473 p->nKey = nToken;
8474 zKey[nToken+1] = '\0';
8475 p->nData = nToken+1 + 1 + sizeof(Fts5HashEntry);
8476 p->pHashNext = pHash->aSlot[iHash];
8477 pHash->aSlot[iHash] = p;
8478 pHash->nEntry++;
8479
8480 /* Add the first rowid field to the hash-entry */
8481 p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid);
8482 p->iRowid = iRowid;
8483
8484 p->iSzPoslist = p->nData;
8485 if( pHash->eDetail!=FTS5_DETAIL_NONE ){
8486 p->nData += 1;
8487 p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1);
8488 }
8489
8490 }else{
8491
8492 /* Appending to an existing hash-entry. Check that there is enough
8493 ** space to append the largest possible new entry. Worst case scenario
8494 ** is:
8495 **
8496 ** + 9 bytes for a new rowid,
8497 ** + 4 byte reserved for the "poslist size" varint.
8498 ** + 1 byte for a "new column" byte,
8499 ** + 3 bytes for a new column number (16-bit max) as a varint,
8500 ** + 5 bytes for the new position offset (32-bit max).
8501 */
8502 if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){
8503 sqlite3_int64 nNew = p->nAlloc * 2;
8504 Fts5HashEntry *pNew;
8505 Fts5HashEntry **pp;
8506 pNew = (Fts5HashEntry*)sqlite3_realloc64(p, nNew);
8507 if( pNew==0 ) return SQLITE_NOMEM;
8508 pNew->nAlloc = (int)nNew;
8509 for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext);
8510 *pp = pNew;
8511 p = pNew;
8512 }
8513 nIncr -= p->nData;
8514 }
8515 assert( (p->nAlloc - p->nData) >= (9 + 4 + 1 + 3 + 5) );
8516
8517 pPtr = (u8*)p;
8518
8519 /* If this is a new rowid, append the 4-byte size field for the previous
8520 ** entry, and the new rowid for this entry. */
8521 if( iRowid!=p->iRowid ){
8522 u64 iDiff = (u64)iRowid - (u64)p->iRowid;
8523 fts5HashAddPoslistSize(pHash, p, 0);
8524 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iDiff);
8525 p->iRowid = iRowid;
8526 bNew = 1;
8527 p->iSzPoslist = p->nData;
8528 if( pHash->eDetail!=FTS5_DETAIL_NONE ){
8529 p->nData += 1;
8530 p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1);
8531 p->iPos = 0;
8532 }
8533 }
8534
8535 if( iCol>=0 ){
8536 if( pHash->eDetail==FTS5_DETAIL_NONE ){
8537 p->bContent = 1;
8538 }else{
8539 /* Append a new column value, if necessary */
8540 assert_nc( iCol>=p->iCol );
8541 if( iCol!=p->iCol ){
8542 if( pHash->eDetail==FTS5_DETAIL_FULL ){
8543 pPtr[p->nData++] = 0x01;
8544 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol);
8545 p->iCol = (i16)iCol;
8546 p->iPos = 0;
8547 }else{
8548 bNew = 1;
8549 p->iCol = (i16)(iPos = iCol);
8550 }
8551 }
8552
8553 /* Append the new position offset, if necessary */
8554 if( bNew ){
8555 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2);
8556 p->iPos = iPos;
8557 }
8558 }
8559 }else{
8560 /* This is a delete. Set the delete flag. */
8561 p->bDel = 1;
8562 }
8563
8564 nIncr += p->nData;
8565 *pHash->pnByte += nIncr;
8566 return SQLITE_OK;
8567}
8568
8569
8570/*
8571** Arguments pLeft and pRight point to linked-lists of hash-entry objects,
8572** each sorted in key order. This function merges the two lists into a
8573** single list and returns a pointer to its first element.
8574*/
8575static Fts5HashEntry *fts5HashEntryMerge(
8576 Fts5HashEntry *pLeft,
8577 Fts5HashEntry *pRight
8578){
8579 Fts5HashEntry *p1 = pLeft;
8580 Fts5HashEntry *p2 = pRight;
8581 Fts5HashEntry *pRet = 0;
8582 Fts5HashEntry **ppOut = &pRet;
8583
8584 while( p1 || p2 ){
8585 if( p1==0 ){
8586 *ppOut = p2;
8587 p2 = 0;
8588 }else if( p2==0 ){
8589 *ppOut = p1;
8590 p1 = 0;
8591 }else{
8592 int i = 0;
8593 char *zKey1 = fts5EntryKey(p1);
8594 char *zKey2 = fts5EntryKey(p2);
8595 while( zKey1[i]==zKey2[i] ) i++;
8596
8597 if( ((u8)zKey1[i])>((u8)zKey2[i]) ){
8598 /* p2 is smaller */
8599 *ppOut = p2;
8600 ppOut = &p2->pScanNext;
8601 p2 = p2->pScanNext;
8602 }else{
8603 /* p1 is smaller */
8604 *ppOut = p1;
8605 ppOut = &p1->pScanNext;
8606 p1 = p1->pScanNext;
8607 }
8608 *ppOut = 0;
8609 }
8610 }
8611
8612 return pRet;
8613}
8614
8615/*
8616** Extract all tokens from hash table iHash and link them into a list
8617** in sorted order. The hash table is cleared before returning. It is
8618** the responsibility of the caller to free the elements of the returned
8619** list.
8620*/
8621static int fts5HashEntrySort(
8622 Fts5Hash *pHash,
8623 const char *pTerm, int nTerm, /* Query prefix, if any */
8624 Fts5HashEntry **ppSorted
8625){
8626 const int nMergeSlot = 32;
8627 Fts5HashEntry **ap;
8628 Fts5HashEntry *pList;
8629 int iSlot;
8630 int i;
8631
8632 *ppSorted = 0;
8633 ap = sqlite3_malloc64(sizeof(Fts5HashEntry*) * nMergeSlot);
8634 if( !ap ) return SQLITE_NOMEM;
8635 memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot);
8636
8637 for(iSlot=0; iSlot<pHash->nSlot; iSlot++){
8638 Fts5HashEntry *pIter;
8639 for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){
8640 if( pTerm==0
8641 || (pIter->nKey+1>=nTerm && 0==memcmp(fts5EntryKey(pIter), pTerm, nTerm))
8642 ){
8643 Fts5HashEntry *pEntry = pIter;
8644 pEntry->pScanNext = 0;
8645 for(i=0; ap[i]; i++){
8646 pEntry = fts5HashEntryMerge(pEntry, ap[i]);
8647 ap[i] = 0;
8648 }
8649 ap[i] = pEntry;
8650 }
8651 }
8652 }
8653
8654 pList = 0;
8655 for(i=0; i<nMergeSlot; i++){
8656 pList = fts5HashEntryMerge(pList, ap[i]);
8657 }
8658
8659 pHash->nEntry = 0;
8660 sqlite3_free(ap);
8661 *ppSorted = pList;
8662 return SQLITE_OK;
8663}
8664
8665/*
8666** Query the hash table for a doclist associated with term pTerm/nTerm.
8667*/
8668static int sqlite3Fts5HashQuery(
8669 Fts5Hash *pHash, /* Hash table to query */
8670 int nPre,
8671 const char *pTerm, int nTerm, /* Query term */
8672 void **ppOut, /* OUT: Pointer to new object */
8673 int *pnDoclist /* OUT: Size of doclist in bytes */
8674){
8675 unsigned int iHash = fts5HashKey(pHash->nSlot, (const u8*)pTerm, nTerm);
8676 char *zKey = 0;
8677 Fts5HashEntry *p;
8678
8679 for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
8680 zKey = fts5EntryKey(p);
8681 assert( p->nKey+1==(int)strlen(zKey) );
8682 if( nTerm==p->nKey+1 && memcmp(zKey, pTerm, nTerm)==0 ) break;
8683 }
8684
8685 if( p ){
8686 int nHashPre = sizeof(Fts5HashEntry) + nTerm + 1;
8687 int nList = p->nData - nHashPre;
8688 u8 *pRet = (u8*)(*ppOut = sqlite3_malloc64(nPre + nList + 10));
8689 if( pRet ){
8690 Fts5HashEntry *pFaux = (Fts5HashEntry*)&pRet[nPre-nHashPre];
8691 memcpy(&pRet[nPre], &((u8*)p)[nHashPre], nList);
8692 nList += fts5HashAddPoslistSize(pHash, p, pFaux);
8693 *pnDoclist = nList;
8694 }else{
8695 *pnDoclist = 0;
8696 return SQLITE_NOMEM;
8697 }
8698 }else{
8699 *ppOut = 0;
8700 *pnDoclist = 0;
8701 }
8702
8703 return SQLITE_OK;
8704}
8705
8706static int sqlite3Fts5HashScanInit(
8707 Fts5Hash *p, /* Hash table to query */
8708 const char *pTerm, int nTerm /* Query prefix */
8709){
8710 return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan);
8711}
8712
8713static void sqlite3Fts5HashScanNext(Fts5Hash *p){
8714 assert( !sqlite3Fts5HashScanEof(p) );
8715 p->pScan = p->pScan->pScanNext;
8716}
8717
8718static int sqlite3Fts5HashScanEof(Fts5Hash *p){
8719 return (p->pScan==0);
8720}
8721
8722static void sqlite3Fts5HashScanEntry(
8723 Fts5Hash *pHash,
8724 const char **pzTerm, /* OUT: term (nul-terminated) */
8725 const u8 **ppDoclist, /* OUT: pointer to doclist */
8726 int *pnDoclist /* OUT: size of doclist in bytes */
8727){
8728 Fts5HashEntry *p;
8729 if( (p = pHash->pScan) ){
8730 char *zKey = fts5EntryKey(p);
8731 int nTerm = (int)strlen(zKey);
8732 fts5HashAddPoslistSize(pHash, p, 0);
8733 *pzTerm = zKey;
8734 *ppDoclist = (const u8*)&zKey[nTerm+1];
8735 *pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm + 1);
8736 }else{
8737 *pzTerm = 0;
8738 *ppDoclist = 0;
8739 *pnDoclist = 0;
8740 }
8741}
8742
8743#line 1 "fts5_index.c"
8744/*
8745** 2014 May 31
8746**
8747** The author disclaims copyright to this source code. In place of
8748** a legal notice, here is a blessing:
8749**
8750** May you do good and not evil.
8751** May you find forgiveness for yourself and forgive others.
8752** May you share freely, never taking more than you give.
8753**
8754******************************************************************************
8755**
8756** Low level access to the FTS index stored in the database file. The
8757** routines in this file file implement all read and write access to the
8758** %_data table. Other parts of the system access this functionality via
8759** the interface defined in fts5Int.h.
8760*/
8761
8762
8763/* #include "fts5Int.h" */
8764
8765/*
8766** Overview:
8767**
8768** The %_data table contains all the FTS indexes for an FTS5 virtual table.
8769** As well as the main term index, there may be up to 31 prefix indexes.
8770** The format is similar to FTS3/4, except that:
8771**
8772** * all segment b-tree leaf data is stored in fixed size page records
8773** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is
8774** taken to ensure it is possible to iterate in either direction through
8775** the entries in a doclist, or to seek to a specific entry within a
8776** doclist, without loading it into memory.
8777**
8778** * large doclists that span many pages have associated "doclist index"
8779** records that contain a copy of the first rowid on each page spanned by
8780** the doclist. This is used to speed up seek operations, and merges of
8781** large doclists with very small doclists.
8782**
8783** * extra fields in the "structure record" record the state of ongoing
8784** incremental merge operations.
8785**
8786*/
8787
8788
8789#define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */
8790#define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */
8791
8792#define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */
8793
8794#define FTS5_MAIN_PREFIX '0'
8795
8796#if FTS5_MAX_PREFIX_INDEXES > 31
8797# error "FTS5_MAX_PREFIX_INDEXES is too large"
8798#endif
8799
8800/*
8801** Details:
8802**
8803** The %_data table managed by this module,
8804**
8805** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
8806**
8807** , contains the following 5 types of records. See the comments surrounding
8808** the FTS5_*_ROWID macros below for a description of how %_data rowids are
8809** assigned to each fo them.
8810**
8811** 1. Structure Records:
8812**
8813** The set of segments that make up an index - the index structure - are
8814** recorded in a single record within the %_data table. The record consists
8815** of a single 32-bit configuration cookie value followed by a list of
8816** SQLite varints. If the FTS table features more than one index (because
8817** there are one or more prefix indexes), it is guaranteed that all share
8818** the same cookie value.
8819**
8820** Immediately following the configuration cookie, the record begins with
8821** three varints:
8822**
8823** + number of levels,
8824** + total number of segments on all levels,
8825** + value of write counter.
8826**
8827** Then, for each level from 0 to nMax:
8828**
8829** + number of input segments in ongoing merge.
8830** + total number of segments in level.
8831** + for each segment from oldest to newest:
8832** + segment id (always > 0)
8833** + first leaf page number (often 1, always greater than 0)
8834** + final leaf page number
8835**
8836** 2. The Averages Record:
8837**
8838** A single record within the %_data table. The data is a list of varints.
8839** The first value is the number of rows in the index. Then, for each column
8840** from left to right, the total number of tokens in the column for all
8841** rows of the table.
8842**
8843** 3. Segment leaves:
8844**
8845** TERM/DOCLIST FORMAT:
8846**
8847** Most of each segment leaf is taken up by term/doclist data. The
8848** general format of term/doclist, starting with the first term
8849** on the leaf page, is:
8850**
8851** varint : size of first term
8852** blob: first term data
8853** doclist: first doclist
8854** zero-or-more {
8855** varint: number of bytes in common with previous term
8856** varint: number of bytes of new term data (nNew)
8857** blob: nNew bytes of new term data
8858** doclist: next doclist
8859** }
8860**
8861** doclist format:
8862**
8863** varint: first rowid
8864** poslist: first poslist
8865** zero-or-more {
8866** varint: rowid delta (always > 0)
8867** poslist: next poslist
8868** }
8869**
8870** poslist format:
8871**
8872** varint: size of poslist in bytes multiplied by 2, not including
8873** this field. Plus 1 if this entry carries the "delete" flag.
8874** collist: collist for column 0
8875** zero-or-more {
8876** 0x01 byte
8877** varint: column number (I)
8878** collist: collist for column I
8879** }
8880**
8881** collist format:
8882**
8883** varint: first offset + 2
8884** zero-or-more {
8885** varint: offset delta + 2
8886** }
8887**
8888** PAGE FORMAT
8889**
8890** Each leaf page begins with a 4-byte header containing 2 16-bit
8891** unsigned integer fields in big-endian format. They are:
8892**
8893** * The byte offset of the first rowid on the page, if it exists
8894** and occurs before the first term (otherwise 0).
8895**
8896** * The byte offset of the start of the page footer. If the page
8897** footer is 0 bytes in size, then this field is the same as the
8898** size of the leaf page in bytes.
8899**
8900** The page footer consists of a single varint for each term located
8901** on the page. Each varint is the byte offset of the current term
8902** within the page, delta-compressed against the previous value. In
8903** other words, the first varint in the footer is the byte offset of
8904** the first term, the second is the byte offset of the second less that
8905** of the first, and so on.
8906**
8907** The term/doclist format described above is accurate if the entire
8908** term/doclist data fits on a single leaf page. If this is not the case,
8909** the format is changed in two ways:
8910**
8911** + if the first rowid on a page occurs before the first term, it
8912** is stored as a literal value:
8913**
8914** varint: first rowid
8915**
8916** + the first term on each page is stored in the same way as the
8917** very first term of the segment:
8918**
8919** varint : size of first term
8920** blob: first term data
8921**
8922** 5. Segment doclist indexes:
8923**
8924** Doclist indexes are themselves b-trees, however they usually consist of
8925** a single leaf record only. The format of each doclist index leaf page
8926** is:
8927**
8928** * Flags byte. Bits are:
8929** 0x01: Clear if leaf is also the root page, otherwise set.
8930**
8931** * Page number of fts index leaf page. As a varint.
8932**
8933** * First rowid on page indicated by previous field. As a varint.
8934**
8935** * A list of varints, one for each subsequent termless page. A
8936** positive delta if the termless page contains at least one rowid,
8937** or an 0x00 byte otherwise.
8938**
8939** Internal doclist index nodes are:
8940**
8941** * Flags byte. Bits are:
8942** 0x01: Clear for root page, otherwise set.
8943**
8944** * Page number of first child page. As a varint.
8945**
8946** * Copy of first rowid on page indicated by previous field. As a varint.
8947**
8948** * A list of delta-encoded varints - the first rowid on each subsequent
8949** child page.
8950**
8951*/
8952
8953/*
8954** Rowids for the averages and structure records in the %_data table.
8955*/
8956#define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */
8957#define FTS5_STRUCTURE_ROWID 10 /* The structure record */
8958
8959/*
8960** Macros determining the rowids used by segment leaves and dlidx leaves
8961** and nodes. All nodes and leaves are stored in the %_data table with large
8962** positive rowids.
8963**
8964** Each segment has a unique non-zero 16-bit id.
8965**
8966** The rowid for each segment leaf is found by passing the segment id and
8967** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
8968** sequentially starting from 1.
8969*/
8970#define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */
8971#define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */
8972#define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */
8973#define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */
8974
8975#define fts5_dri(segid, dlidx, height, pgno) ( \
8976 ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \
8977 ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \
8978 ((i64)(height) << (FTS5_DATA_PAGE_B)) + \
8979 ((i64)(pgno)) \
8980)
8981
8982#define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno)
8983#define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
8984
8985#ifdef SQLITE_DEBUG
8986static int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
8987#endif
8988
8989
8990/*
8991** Each time a blob is read from the %_data table, it is padded with this
8992** many zero bytes. This makes it easier to decode the various record formats
8993** without overreading if the records are corrupt.
8994*/
8995#define FTS5_DATA_ZERO_PADDING 8
8996#define FTS5_DATA_PADDING 20
8997
8998typedef struct Fts5Data Fts5Data;
8999typedef struct Fts5DlidxIter Fts5DlidxIter;
9000typedef struct Fts5DlidxLvl Fts5DlidxLvl;
9001typedef struct Fts5DlidxWriter Fts5DlidxWriter;
9002typedef struct Fts5Iter Fts5Iter;
9003typedef struct Fts5PageWriter Fts5PageWriter;
9004typedef struct Fts5SegIter Fts5SegIter;
9005typedef struct Fts5DoclistIter Fts5DoclistIter;
9006typedef struct Fts5SegWriter Fts5SegWriter;
9007typedef struct Fts5Structure Fts5Structure;
9008typedef struct Fts5StructureLevel Fts5StructureLevel;
9009typedef struct Fts5StructureSegment Fts5StructureSegment;
9010
9011struct Fts5Data {
9012 u8 *p; /* Pointer to buffer containing record */
9013 int nn; /* Size of record in bytes */
9014 int szLeaf; /* Size of leaf without page-index */
9015};
9016
9017/*
9018** One object per %_data table.
9019*/
9020struct Fts5Index {
9021 Fts5Config *pConfig; /* Virtual table configuration */
9022 char *zDataTbl; /* Name of %_data table */
9023 int nWorkUnit; /* Leaf pages in a "unit" of work */
9024
9025 /*
9026 ** Variables related to the accumulation of tokens and doclists within the
9027 ** in-memory hash tables before they are flushed to disk.
9028 */
9029 Fts5Hash *pHash; /* Hash table for in-memory data */
9030 int nPendingData; /* Current bytes of pending data */
9031 i64 iWriteRowid; /* Rowid for current doc being written */
9032 int bDelete; /* Current write is a delete */
9033
9034 /* Error state. */
9035 int rc; /* Current error code */
9036
9037 /* State used by the fts5DataXXX() functions. */
9038 sqlite3_blob *pReader; /* RO incr-blob open on %_data table */
9039 sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */
9040 sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */
9041 sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
9042 sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */
9043 sqlite3_stmt *pIdxSelect;
9044 int nRead; /* Total number of blocks read */
9045
9046 sqlite3_stmt *pDataVersion;
9047 i64 iStructVersion; /* data_version when pStruct read */
9048 Fts5Structure *pStruct; /* Current db structure (or NULL) */
9049};
9050
9051struct Fts5DoclistIter {
9052 u8 *aEof; /* Pointer to 1 byte past end of doclist */
9053
9054 /* Output variables. aPoslist==0 at EOF */
9055 i64 iRowid;
9056 u8 *aPoslist;
9057 int nPoslist;
9058 int nSize;
9059};
9060
9061/*
9062** The contents of the "structure" record for each index are represented
9063** using an Fts5Structure record in memory. Which uses instances of the
9064** other Fts5StructureXXX types as components.
9065*/
9066struct Fts5StructureSegment {
9067 int iSegid; /* Segment id */
9068 int pgnoFirst; /* First leaf page number in segment */
9069 int pgnoLast; /* Last leaf page number in segment */
9070};
9071struct Fts5StructureLevel {
9072 int nMerge; /* Number of segments in incr-merge */
9073 int nSeg; /* Total number of segments on level */
9074 Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */
9075};
9076struct Fts5Structure {
9077 int nRef; /* Object reference count */
9078 u64 nWriteCounter; /* Total leaves written to level 0 */
9079 int nSegment; /* Total segments in this structure */
9080 int nLevel; /* Number of levels in this index */
9081 Fts5StructureLevel aLevel[1]; /* Array of nLevel level objects */
9082};
9083
9084/*
9085** An object of type Fts5SegWriter is used to write to segments.
9086*/
9087struct Fts5PageWriter {
9088 int pgno; /* Page number for this page */
9089 int iPrevPgidx; /* Previous value written into pgidx */
9090 Fts5Buffer buf; /* Buffer containing leaf data */
9091 Fts5Buffer pgidx; /* Buffer containing page-index */
9092 Fts5Buffer term; /* Buffer containing previous term on page */
9093};
9094struct Fts5DlidxWriter {
9095 int pgno; /* Page number for this page */
9096 int bPrevValid; /* True if iPrev is valid */
9097 i64 iPrev; /* Previous rowid value written to page */
9098 Fts5Buffer buf; /* Buffer containing page data */
9099};
9100struct Fts5SegWriter {
9101 int iSegid; /* Segid to write to */
9102 Fts5PageWriter writer; /* PageWriter object */
9103 i64 iPrevRowid; /* Previous rowid written to current leaf */
9104 u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
9105 u8 bFirstRowidInPage; /* True if next rowid is first in page */
9106 /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
9107 u8 bFirstTermInPage; /* True if next term will be first in leaf */
9108 int nLeafWritten; /* Number of leaf pages written */
9109 int nEmpty; /* Number of contiguous term-less nodes */
9110
9111 int nDlidx; /* Allocated size of aDlidx[] array */
9112 Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */
9113
9114 /* Values to insert into the %_idx table */
9115 Fts5Buffer btterm; /* Next term to insert into %_idx table */
9116 int iBtPage; /* Page number corresponding to btterm */
9117};
9118
9119typedef struct Fts5CResult Fts5CResult;
9120struct Fts5CResult {
9121 u16 iFirst; /* aSeg[] index of firstest iterator */
9122 u8 bTermEq; /* True if the terms are equal */
9123};
9124
9125/*
9126** Object for iterating through a single segment, visiting each term/rowid
9127** pair in the segment.
9128**
9129** pSeg:
9130** The segment to iterate through.
9131**
9132** iLeafPgno:
9133** Current leaf page number within segment.
9134**
9135** iLeafOffset:
9136** Byte offset within the current leaf that is the first byte of the
9137** position list data (one byte passed the position-list size field).
9138** rowid field of the current entry. Usually this is the size field of the
9139** position list data. The exception is if the rowid for the current entry
9140** is the last thing on the leaf page.
9141**
9142** pLeaf:
9143** Buffer containing current leaf page data. Set to NULL at EOF.
9144**
9145** iTermLeafPgno, iTermLeafOffset:
9146** Leaf page number containing the last term read from the segment. And
9147** the offset immediately following the term data.
9148**
9149** flags:
9150** Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
9151**
9152** FTS5_SEGITER_ONETERM:
9153** If set, set the iterator to point to EOF after the current doclist
9154** has been exhausted. Do not proceed to the next term in the segment.
9155**
9156** FTS5_SEGITER_REVERSE:
9157** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
9158** it is set, iterate through rowid in descending order instead of the
9159** default ascending order.
9160**
9161** iRowidOffset/nRowidOffset/aRowidOffset:
9162** These are used if the FTS5_SEGITER_REVERSE flag is set.
9163**
9164** For each rowid on the page corresponding to the current term, the
9165** corresponding aRowidOffset[] entry is set to the byte offset of the
9166** start of the "position-list-size" field within the page.
9167**
9168** iTermIdx:
9169** Index of current term on iTermLeafPgno.
9170*/
9171struct Fts5SegIter {
9172 Fts5StructureSegment *pSeg; /* Segment to iterate through */
9173 int flags; /* Mask of configuration flags */
9174 int iLeafPgno; /* Current leaf page number */
9175 Fts5Data *pLeaf; /* Current leaf data */
9176 Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */
9177 i64 iLeafOffset; /* Byte offset within current leaf */
9178
9179 /* Next method */
9180 void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
9181
9182 /* The page and offset from which the current term was read. The offset
9183 ** is the offset of the first rowid in the current doclist. */
9184 int iTermLeafPgno;
9185 int iTermLeafOffset;
9186
9187 int iPgidxOff; /* Next offset in pgidx */
9188 int iEndofDoclist;
9189
9190 /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
9191 int iRowidOffset; /* Current entry in aRowidOffset[] */
9192 int nRowidOffset; /* Allocated size of aRowidOffset[] array */
9193 int *aRowidOffset; /* Array of offset to rowid fields */
9194
9195 Fts5DlidxIter *pDlidx; /* If there is a doclist-index */
9196
9197 /* Variables populated based on current entry. */
9198 Fts5Buffer term; /* Current term */
9199 i64 iRowid; /* Current rowid */
9200 int nPos; /* Number of bytes in current position list */
9201 u8 bDel; /* True if the delete flag is set */
9202};
9203
9204/*
9205** Argument is a pointer to an Fts5Data structure that contains a
9206** leaf page.
9207*/
9208#define ASSERT_SZLEAF_OK(x) assert( \
9209 (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
9210)
9211
9212#define FTS5_SEGITER_ONETERM 0x01
9213#define FTS5_SEGITER_REVERSE 0x02
9214
9215/*
9216** Argument is a pointer to an Fts5Data structure that contains a leaf
9217** page. This macro evaluates to true if the leaf contains no terms, or
9218** false if it contains at least one term.
9219*/
9220#define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
9221
9222#define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
9223
9224#define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
9225
9226/*
9227** Object for iterating through the merged results of one or more segments,
9228** visiting each term/rowid pair in the merged data.
9229**
9230** nSeg is always a power of two greater than or equal to the number of
9231** segments that this object is merging data from. Both the aSeg[] and
9232** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
9233** with zeroed objects - these are handled as if they were iterators opened
9234** on empty segments.
9235**
9236** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
9237** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
9238** comparison in this context is the index of the iterator that currently
9239** points to the smaller term/rowid combination. Iterators at EOF are
9240** considered to be greater than all other iterators.
9241**
9242** aFirst[1] contains the index in aSeg[] of the iterator that points to
9243** the smallest key overall. aFirst[0] is unused.
9244**
9245** poslist:
9246** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
9247** There is no way to tell if this is populated or not.
9248*/
9249struct Fts5Iter {
9250 Fts5IndexIter base; /* Base class containing output vars */
9251
9252 Fts5Index *pIndex; /* Index that owns this iterator */
9253 Fts5Buffer poslist; /* Buffer containing current poslist */
9254 Fts5Colset *pColset; /* Restrict matches to these columns */
9255
9256 /* Invoked to set output variables. */
9257 void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
9258
9259 int nSeg; /* Size of aSeg[] array */
9260 int bRev; /* True to iterate in reverse order */
9261 u8 bSkipEmpty; /* True to skip deleted entries */
9262
9263 i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */
9264 Fts5CResult *aFirst; /* Current merge state (see above) */
9265 Fts5SegIter aSeg[1]; /* Array of segment iterators */
9266};
9267
9268
9269/*
9270** An instance of the following type is used to iterate through the contents
9271** of a doclist-index record.
9272**
9273** pData:
9274** Record containing the doclist-index data.
9275**
9276** bEof:
9277** Set to true once iterator has reached EOF.
9278**
9279** iOff:
9280** Set to the current offset within record pData.
9281*/
9282struct Fts5DlidxLvl {
9283 Fts5Data *pData; /* Data for current page of this level */
9284 int iOff; /* Current offset into pData */
9285 int bEof; /* At EOF already */
9286 int iFirstOff; /* Used by reverse iterators */
9287
9288 /* Output variables */
9289 int iLeafPgno; /* Page number of current leaf page */
9290 i64 iRowid; /* First rowid on leaf iLeafPgno */
9291};
9292struct Fts5DlidxIter {
9293 int nLvl;
9294 int iSegid;
9295 Fts5DlidxLvl aLvl[1];
9296};
9297
9298static void fts5PutU16(u8 *aOut, u16 iVal){
9299 aOut[0] = (iVal>>8);
9300 aOut[1] = (iVal&0xFF);
9301}
9302
9303static u16 fts5GetU16(const u8 *aIn){
9304 return ((u16)aIn[0] << 8) + aIn[1];
9305}
9306
9307/*
9308** Allocate and return a buffer at least nByte bytes in size.
9309**
9310** If an OOM error is encountered, return NULL and set the error code in
9311** the Fts5Index handle passed as the first argument.
9312*/
9313static void *fts5IdxMalloc(Fts5Index *p, sqlite3_int64 nByte){
9314 return sqlite3Fts5MallocZero(&p->rc, nByte);
9315}
9316
9317/*
9318** Compare the contents of the pLeft buffer with the pRight/nRight blob.
9319**
9320** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
9321** +ve if pRight is smaller than pLeft. In other words:
9322**
9323** res = *pLeft - *pRight
9324*/
9325#ifdef SQLITE_DEBUG
9326static int fts5BufferCompareBlob(
9327 Fts5Buffer *pLeft, /* Left hand side of comparison */
9328 const u8 *pRight, int nRight /* Right hand side of comparison */
9329){
9330 int nCmp = MIN(pLeft->n, nRight);
9331 int res = memcmp(pLeft->p, pRight, nCmp);
9332 return (res==0 ? (pLeft->n - nRight) : res);
9333}
9334#endif
9335
9336/*
9337** Compare the contents of the two buffers using memcmp(). If one buffer
9338** is a prefix of the other, it is considered the lesser.
9339**
9340** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
9341** +ve if pRight is smaller than pLeft. In other words:
9342**
9343** res = *pLeft - *pRight
9344*/
9345static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
9346 int nCmp, res;
9347 nCmp = MIN(pLeft->n, pRight->n);
9348 assert( nCmp<=0 || pLeft->p!=0 );
9349 assert( nCmp<=0 || pRight->p!=0 );
9350 res = fts5Memcmp(pLeft->p, pRight->p, nCmp);
9351 return (res==0 ? (pLeft->n - pRight->n) : res);
9352}
9353
9354static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
9355 int ret;
9356 fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
9357 return ret;
9358}
9359
9360/*
9361** Close the read-only blob handle, if it is open.
9362*/
9363static void sqlite3Fts5IndexCloseReader(Fts5Index *p){
9364 if( p->pReader ){
9365 sqlite3_blob *pReader = p->pReader;
9366 p->pReader = 0;
9367 sqlite3_blob_close(pReader);
9368 }
9369}
9370
9371/*
9372** Retrieve a record from the %_data table.
9373**
9374** If an error occurs, NULL is returned and an error left in the
9375** Fts5Index object.
9376*/
9377static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
9378 Fts5Data *pRet = 0;
9379 if( p->rc==SQLITE_OK ){
9380 int rc = SQLITE_OK;
9381
9382 if( p->pReader ){
9383 /* This call may return SQLITE_ABORT if there has been a savepoint
9384 ** rollback since it was last used. In this case a new blob handle
9385 ** is required. */
9386 sqlite3_blob *pBlob = p->pReader;
9387 p->pReader = 0;
9388 rc = sqlite3_blob_reopen(pBlob, iRowid);
9389 assert( p->pReader==0 );
9390 p->pReader = pBlob;
9391 if( rc!=SQLITE_OK ){
9392 sqlite3Fts5IndexCloseReader(p);
9393 }
9394 if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
9395 }
9396
9397 /* If the blob handle is not open at this point, open it and seek
9398 ** to the requested entry. */
9399 if( p->pReader==0 && rc==SQLITE_OK ){
9400 Fts5Config *pConfig = p->pConfig;
9401 rc = sqlite3_blob_open(pConfig->db,
9402 pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
9403 );
9404 }
9405
9406 /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
9407 ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
9408 ** All the reasons those functions might return SQLITE_ERROR - missing
9409 ** table, missing row, non-blob/text in block column - indicate
9410 ** backing store corruption. */
9411 if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;
9412
9413 if( rc==SQLITE_OK ){
9414 u8 *aOut = 0; /* Read blob data into this buffer */
9415 int nByte = sqlite3_blob_bytes(p->pReader);
9416 sqlite3_int64 nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
9417 pRet = (Fts5Data*)sqlite3_malloc64(nAlloc);
9418 if( pRet ){
9419 pRet->nn = nByte;
9420 aOut = pRet->p = (u8*)&pRet[1];
9421 }else{
9422 rc = SQLITE_NOMEM;
9423 }
9424
9425 if( rc==SQLITE_OK ){
9426 rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
9427 }
9428 if( rc!=SQLITE_OK ){
9429 sqlite3_free(pRet);
9430 pRet = 0;
9431 }else{
9432 /* TODO1: Fix this */
9433 pRet->p[nByte] = 0x00;
9434 pRet->p[nByte+1] = 0x00;
9435 pRet->szLeaf = fts5GetU16(&pRet->p[2]);
9436 }
9437 }
9438 p->rc = rc;
9439 p->nRead++;
9440 }
9441
9442 assert( (pRet==0)==(p->rc!=SQLITE_OK) );
9443 return pRet;
9444}
9445
9446
9447/*
9448** Release a reference to data record returned by an earlier call to
9449** fts5DataRead().
9450*/
9451static void fts5DataRelease(Fts5Data *pData){
9452 sqlite3_free(pData);
9453}
9454
9455static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){
9456 Fts5Data *pRet = fts5DataRead(p, iRowid);
9457 if( pRet ){
9458 if( pRet->nn<4 || pRet->szLeaf>pRet->nn ){
9459 p->rc = FTS5_CORRUPT;
9460 fts5DataRelease(pRet);
9461 pRet = 0;
9462 }
9463 }
9464 return pRet;
9465}
9466
9467static int fts5IndexPrepareStmt(
9468 Fts5Index *p,
9469 sqlite3_stmt **ppStmt,
9470 char *zSql
9471){
9472 if( p->rc==SQLITE_OK ){
9473 if( zSql ){
9474 p->rc = sqlite3_prepare_v3(p->pConfig->db, zSql, -1,
9475 SQLITE_PREPARE_PERSISTENT|SQLITE_PREPARE_NO_VTAB,
9476 ppStmt, 0);
9477 }else{
9478 p->rc = SQLITE_NOMEM;
9479 }
9480 }
9481 sqlite3_free(zSql);
9482 return p->rc;
9483}
9484
9485
9486/*
9487** INSERT OR REPLACE a record into the %_data table.
9488*/
9489static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
9490 if( p->rc!=SQLITE_OK ) return;
9491
9492 if( p->pWriter==0 ){
9493 Fts5Config *pConfig = p->pConfig;
9494 fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf(
9495 "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
9496 pConfig->zDb, pConfig->zName
9497 ));
9498 if( p->rc ) return;
9499 }
9500
9501 sqlite3_bind_int64(p->pWriter, 1, iRowid);
9502 sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC);
9503 sqlite3_step(p->pWriter);
9504 p->rc = sqlite3_reset(p->pWriter);
9505 sqlite3_bind_null(p->pWriter, 2);
9506}
9507
9508/*
9509** Execute the following SQL:
9510**
9511** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
9512*/
9513static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
9514 if( p->rc!=SQLITE_OK ) return;
9515
9516 if( p->pDeleter==0 ){
9517 Fts5Config *pConfig = p->pConfig;
9518 char *zSql = sqlite3_mprintf(
9519 "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
9520 pConfig->zDb, pConfig->zName
9521 );
9522 if( fts5IndexPrepareStmt(p, &p->pDeleter, zSql) ) return;
9523 }
9524
9525 sqlite3_bind_int64(p->pDeleter, 1, iFirst);
9526 sqlite3_bind_int64(p->pDeleter, 2, iLast);
9527 sqlite3_step(p->pDeleter);
9528 p->rc = sqlite3_reset(p->pDeleter);
9529}
9530
9531/*
9532** Remove all records associated with segment iSegid.
9533*/
9534static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
9535 i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
9536 i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
9537 fts5DataDelete(p, iFirst, iLast);
9538 if( p->pIdxDeleter==0 ){
9539 Fts5Config *pConfig = p->pConfig;
9540 fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
9541 "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
9542 pConfig->zDb, pConfig->zName
9543 ));
9544 }
9545 if( p->rc==SQLITE_OK ){
9546 sqlite3_bind_int(p->pIdxDeleter, 1, iSegid);
9547 sqlite3_step(p->pIdxDeleter);
9548 p->rc = sqlite3_reset(p->pIdxDeleter);
9549 }
9550}
9551
9552/*
9553** Release a reference to an Fts5Structure object returned by an earlier
9554** call to fts5StructureRead() or fts5StructureDecode().
9555*/
9556static void fts5StructureRelease(Fts5Structure *pStruct){
9557 if( pStruct && 0>=(--pStruct->nRef) ){
9558 int i;
9559 assert( pStruct->nRef==0 );
9560 for(i=0; i<pStruct->nLevel; i++){
9561 sqlite3_free(pStruct->aLevel[i].aSeg);
9562 }
9563 sqlite3_free(pStruct);
9564 }
9565}
9566
9567static void fts5StructureRef(Fts5Structure *pStruct){
9568 pStruct->nRef++;
9569}
9570
9571static void *sqlite3Fts5StructureRef(Fts5Index *p){
9572 fts5StructureRef(p->pStruct);
9573 return (void*)p->pStruct;
9574}
9575static void sqlite3Fts5StructureRelease(void *p){
9576 if( p ){
9577 fts5StructureRelease((Fts5Structure*)p);
9578 }
9579}
9580static int sqlite3Fts5StructureTest(Fts5Index *p, void *pStruct){
9581 if( p->pStruct!=(Fts5Structure*)pStruct ){
9582 return SQLITE_ABORT;
9583 }
9584 return SQLITE_OK;
9585}
9586
9587/*
9588** Ensure that structure object (*pp) is writable.
9589**
9590** This function is a no-op if (*pRc) is not SQLITE_OK when it is called. If
9591** an error occurs, (*pRc) is set to an SQLite error code before returning.
9592*/
9593static void fts5StructureMakeWritable(int *pRc, Fts5Structure **pp){
9594 Fts5Structure *p = *pp;
9595 if( *pRc==SQLITE_OK && p->nRef>1 ){
9596 i64 nByte = sizeof(Fts5Structure)+(p->nLevel-1)*sizeof(Fts5StructureLevel);
9597 Fts5Structure *pNew;
9598 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(pRc, nByte);
9599 if( pNew ){
9600 int i;
9601 memcpy(pNew, p, nByte);
9602 for(i=0; i<p->nLevel; i++) pNew->aLevel[i].aSeg = 0;
9603 for(i=0; i<p->nLevel; i++){
9604 Fts5StructureLevel *pLvl = &pNew->aLevel[i];
9605 nByte = sizeof(Fts5StructureSegment) * pNew->aLevel[i].nSeg;
9606 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(pRc, nByte);
9607 if( pLvl->aSeg==0 ){
9608 for(i=0; i<p->nLevel; i++){
9609 sqlite3_free(pNew->aLevel[i].aSeg);
9610 }
9611 sqlite3_free(pNew);
9612 return;
9613 }
9614 memcpy(pLvl->aSeg, p->aLevel[i].aSeg, nByte);
9615 }
9616 p->nRef--;
9617 pNew->nRef = 1;
9618 }
9619 *pp = pNew;
9620 }
9621}
9622
9623/*
9624** Deserialize and return the structure record currently stored in serialized
9625** form within buffer pData/nData.
9626**
9627** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
9628** are over-allocated by one slot. This allows the structure contents
9629** to be more easily edited.
9630**
9631** If an error occurs, *ppOut is set to NULL and an SQLite error code
9632** returned. Otherwise, *ppOut is set to point to the new object and
9633** SQLITE_OK returned.
9634*/
9635static int fts5StructureDecode(
9636 const u8 *pData, /* Buffer containing serialized structure */
9637 int nData, /* Size of buffer pData in bytes */
9638 int *piCookie, /* Configuration cookie value */
9639 Fts5Structure **ppOut /* OUT: Deserialized object */
9640){
9641 int rc = SQLITE_OK;
9642 int i = 0;
9643 int iLvl;
9644 int nLevel = 0;
9645 int nSegment = 0;
9646 sqlite3_int64 nByte; /* Bytes of space to allocate at pRet */
9647 Fts5Structure *pRet = 0; /* Structure object to return */
9648
9649 /* Grab the cookie value */
9650 if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
9651 i = 4;
9652
9653 /* Read the total number of levels and segments from the start of the
9654 ** structure record. */
9655 i += fts5GetVarint32(&pData[i], nLevel);
9656 i += fts5GetVarint32(&pData[i], nSegment);
9657 if( nLevel>FTS5_MAX_SEGMENT || nLevel<0
9658 || nSegment>FTS5_MAX_SEGMENT || nSegment<0
9659 ){
9660 return FTS5_CORRUPT;
9661 }
9662 nByte = (
9663 sizeof(Fts5Structure) + /* Main structure */
9664 sizeof(Fts5StructureLevel) * (nLevel-1) /* aLevel[] array */
9665 );
9666 pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
9667
9668 if( pRet ){
9669 pRet->nRef = 1;
9670 pRet->nLevel = nLevel;
9671 pRet->nSegment = nSegment;
9672 i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
9673
9674 for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
9675 Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
9676 int nTotal = 0;
9677 int iSeg;
9678
9679 if( i>=nData ){
9680 rc = FTS5_CORRUPT;
9681 }else{
9682 i += fts5GetVarint32(&pData[i], pLvl->nMerge);
9683 i += fts5GetVarint32(&pData[i], nTotal);
9684 if( nTotal<pLvl->nMerge ) rc = FTS5_CORRUPT;
9685 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
9686 nTotal * sizeof(Fts5StructureSegment)
9687 );
9688 nSegment -= nTotal;
9689 }
9690
9691 if( rc==SQLITE_OK ){
9692 pLvl->nSeg = nTotal;
9693 for(iSeg=0; iSeg<nTotal; iSeg++){
9694 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
9695 if( i>=nData ){
9696 rc = FTS5_CORRUPT;
9697 break;
9698 }
9699 i += fts5GetVarint32(&pData[i], pSeg->iSegid);
9700 i += fts5GetVarint32(&pData[i], pSeg->pgnoFirst);
9701 i += fts5GetVarint32(&pData[i], pSeg->pgnoLast);
9702 if( pSeg->pgnoLast<pSeg->pgnoFirst ){
9703 rc = FTS5_CORRUPT;
9704 break;
9705 }
9706 }
9707 if( iLvl>0 && pLvl[-1].nMerge && nTotal==0 ) rc = FTS5_CORRUPT;
9708 if( iLvl==nLevel-1 && pLvl->nMerge ) rc = FTS5_CORRUPT;
9709 }
9710 }
9711 if( nSegment!=0 && rc==SQLITE_OK ) rc = FTS5_CORRUPT;
9712
9713 if( rc!=SQLITE_OK ){
9714 fts5StructureRelease(pRet);
9715 pRet = 0;
9716 }
9717 }
9718
9719 *ppOut = pRet;
9720 return rc;
9721}
9722
9723/*
9724** Add a level to the Fts5Structure.aLevel[] array of structure object
9725** (*ppStruct).
9726*/
9727static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
9728 fts5StructureMakeWritable(pRc, ppStruct);
9729 if( *pRc==SQLITE_OK ){
9730 Fts5Structure *pStruct = *ppStruct;
9731 int nLevel = pStruct->nLevel;
9732 sqlite3_int64 nByte = (
9733 sizeof(Fts5Structure) + /* Main structure */
9734 sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */
9735 );
9736
9737 pStruct = sqlite3_realloc64(pStruct, nByte);
9738 if( pStruct ){
9739 memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel));
9740 pStruct->nLevel++;
9741 *ppStruct = pStruct;
9742 }else{
9743 *pRc = SQLITE_NOMEM;
9744 }
9745 }
9746}
9747
9748/*
9749** Extend level iLvl so that there is room for at least nExtra more
9750** segments.
9751*/
9752static void fts5StructureExtendLevel(
9753 int *pRc,
9754 Fts5Structure *pStruct,
9755 int iLvl,
9756 int nExtra,
9757 int bInsert
9758){
9759 if( *pRc==SQLITE_OK ){
9760 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
9761 Fts5StructureSegment *aNew;
9762 sqlite3_int64 nByte;
9763
9764 nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment);
9765 aNew = sqlite3_realloc64(pLvl->aSeg, nByte);
9766 if( aNew ){
9767 if( bInsert==0 ){
9768 memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra);
9769 }else{
9770 int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment);
9771 memmove(&aNew[nExtra], aNew, nMove);
9772 memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra);
9773 }
9774 pLvl->aSeg = aNew;
9775 }else{
9776 *pRc = SQLITE_NOMEM;
9777 }
9778 }
9779}
9780
9781static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){
9782 Fts5Structure *pRet = 0;
9783 Fts5Config *pConfig = p->pConfig;
9784 int iCookie; /* Configuration cookie */
9785 Fts5Data *pData;
9786
9787 pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
9788 if( p->rc==SQLITE_OK ){
9789 /* TODO: Do we need this if the leaf-index is appended? Probably... */
9790 memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
9791 p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
9792 if( p->rc==SQLITE_OK && (pConfig->pgsz==0 || pConfig->iCookie!=iCookie) ){
9793 p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
9794 }
9795 fts5DataRelease(pData);
9796 if( p->rc!=SQLITE_OK ){
9797 fts5StructureRelease(pRet);
9798 pRet = 0;
9799 }
9800 }
9801
9802 return pRet;
9803}
9804
9805static i64 fts5IndexDataVersion(Fts5Index *p){
9806 i64 iVersion = 0;
9807
9808 if( p->rc==SQLITE_OK ){
9809 if( p->pDataVersion==0 ){
9810 p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion,
9811 sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb)
9812 );
9813 if( p->rc ) return 0;
9814 }
9815
9816 if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){
9817 iVersion = sqlite3_column_int64(p->pDataVersion, 0);
9818 }
9819 p->rc = sqlite3_reset(p->pDataVersion);
9820 }
9821
9822 return iVersion;
9823}
9824
9825/*
9826** Read, deserialize and return the structure record.
9827**
9828** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
9829** are over-allocated as described for function fts5StructureDecode()
9830** above.
9831**
9832** If an error occurs, NULL is returned and an error code left in the
9833** Fts5Index handle. If an error has already occurred when this function
9834** is called, it is a no-op.
9835*/
9836static Fts5Structure *fts5StructureRead(Fts5Index *p){
9837
9838 if( p->pStruct==0 ){
9839 p->iStructVersion = fts5IndexDataVersion(p);
9840 if( p->rc==SQLITE_OK ){
9841 p->pStruct = fts5StructureReadUncached(p);
9842 }
9843 }
9844
9845#if 0
9846 else{
9847 Fts5Structure *pTest = fts5StructureReadUncached(p);
9848 if( pTest ){
9849 int i, j;
9850 assert_nc( p->pStruct->nSegment==pTest->nSegment );
9851 assert_nc( p->pStruct->nLevel==pTest->nLevel );
9852 for(i=0; i<pTest->nLevel; i++){
9853 assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge );
9854 assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg );
9855 for(j=0; j<pTest->aLevel[i].nSeg; j++){
9856 Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j];
9857 Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j];
9858 assert_nc( p1->iSegid==p2->iSegid );
9859 assert_nc( p1->pgnoFirst==p2->pgnoFirst );
9860 assert_nc( p1->pgnoLast==p2->pgnoLast );
9861 }
9862 }
9863 fts5StructureRelease(pTest);
9864 }
9865 }
9866#endif
9867
9868 if( p->rc!=SQLITE_OK ) return 0;
9869 assert( p->iStructVersion!=0 );
9870 assert( p->pStruct!=0 );
9871 fts5StructureRef(p->pStruct);
9872 return p->pStruct;
9873}
9874
9875static void fts5StructureInvalidate(Fts5Index *p){
9876 if( p->pStruct ){
9877 fts5StructureRelease(p->pStruct);
9878 p->pStruct = 0;
9879 }
9880}
9881
9882/*
9883** Return the total number of segments in index structure pStruct. This
9884** function is only ever used as part of assert() conditions.
9885*/
9886#ifdef SQLITE_DEBUG
9887static int fts5StructureCountSegments(Fts5Structure *pStruct){
9888 int nSegment = 0; /* Total number of segments */
9889 if( pStruct ){
9890 int iLvl; /* Used to iterate through levels */
9891 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
9892 nSegment += pStruct->aLevel[iLvl].nSeg;
9893 }
9894 }
9895
9896 return nSegment;
9897}
9898#endif
9899
9900#define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \
9901 assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \
9902 memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \
9903 (pBuf)->n += nBlob; \
9904}
9905
9906#define fts5BufferSafeAppendVarint(pBuf, iVal) { \
9907 (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \
9908 assert( (pBuf)->nSpace>=(pBuf)->n ); \
9909}
9910
9911
9912/*
9913** Serialize and store the "structure" record.
9914**
9915** If an error occurs, leave an error code in the Fts5Index object. If an
9916** error has already occurred, this function is a no-op.
9917*/
9918static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
9919 if( p->rc==SQLITE_OK ){
9920 Fts5Buffer buf; /* Buffer to serialize record into */
9921 int iLvl; /* Used to iterate through levels */
9922 int iCookie; /* Cookie value to store */
9923
9924 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
9925 memset(&buf, 0, sizeof(Fts5Buffer));
9926
9927 /* Append the current configuration cookie */
9928 iCookie = p->pConfig->iCookie;
9929 if( iCookie<0 ) iCookie = 0;
9930
9931 if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, 4+9+9+9) ){
9932 sqlite3Fts5Put32(buf.p, iCookie);
9933 buf.n = 4;
9934 fts5BufferSafeAppendVarint(&buf, pStruct->nLevel);
9935 fts5BufferSafeAppendVarint(&buf, pStruct->nSegment);
9936 fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter);
9937 }
9938
9939 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
9940 int iSeg; /* Used to iterate through segments */
9941 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
9942 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
9943 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
9944 assert( pLvl->nMerge<=pLvl->nSeg );
9945
9946 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
9947 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
9948 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
9949 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
9950 }
9951 }
9952
9953 fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
9954 fts5BufferFree(&buf);
9955 }
9956}
9957
9958#if 0
9959static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*);
9960static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
9961 int rc = SQLITE_OK;
9962 Fts5Buffer buf;
9963 memset(&buf, 0, sizeof(buf));
9964 fts5DebugStructure(&rc, &buf, pStruct);
9965 fprintf(stdout, "%s: %s\n", zCaption, buf.p);
9966 fflush(stdout);
9967 fts5BufferFree(&buf);
9968}
9969#else
9970# define fts5PrintStructure(x,y)
9971#endif
9972
9973static int fts5SegmentSize(Fts5StructureSegment *pSeg){
9974 return 1 + pSeg->pgnoLast - pSeg->pgnoFirst;
9975}
9976
9977/*
9978** Return a copy of index structure pStruct. Except, promote as many
9979** segments as possible to level iPromote. If an OOM occurs, NULL is
9980** returned.
9981*/
9982static void fts5StructurePromoteTo(
9983 Fts5Index *p,
9984 int iPromote,
9985 int szPromote,
9986 Fts5Structure *pStruct
9987){
9988 int il, is;
9989 Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];
9990
9991 if( pOut->nMerge==0 ){
9992 for(il=iPromote+1; il<pStruct->nLevel; il++){
9993 Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
9994 if( pLvl->nMerge ) return;
9995 for(is=pLvl->nSeg-1; is>=0; is--){
9996 int sz = fts5SegmentSize(&pLvl->aSeg[is]);
9997 if( sz>szPromote ) return;
9998 fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1);
9999 if( p->rc ) return;
10000 memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment));
10001 pOut->nSeg++;
10002 pLvl->nSeg--;
10003 }
10004 }
10005 }
10006}
10007
10008/*
10009** A new segment has just been written to level iLvl of index structure
10010** pStruct. This function determines if any segments should be promoted
10011** as a result. Segments are promoted in two scenarios:
10012**
10013** a) If the segment just written is smaller than one or more segments
10014** within the previous populated level, it is promoted to the previous
10015** populated level.
10016**
10017** b) If the segment just written is larger than the newest segment on
10018** the next populated level, then that segment, and any other adjacent
10019** segments that are also smaller than the one just written, are
10020** promoted.
10021**
10022** If one or more segments are promoted, the structure object is updated
10023** to reflect this.
10024*/
10025static void fts5StructurePromote(
10026 Fts5Index *p, /* FTS5 backend object */
10027 int iLvl, /* Index level just updated */
10028 Fts5Structure *pStruct /* Index structure */
10029){
10030 if( p->rc==SQLITE_OK ){
10031 int iTst;
10032 int iPromote = -1;
10033 int szPromote = 0; /* Promote anything this size or smaller */
10034 Fts5StructureSegment *pSeg; /* Segment just written */
10035 int szSeg; /* Size of segment just written */
10036 int nSeg = pStruct->aLevel[iLvl].nSeg;
10037
10038 if( nSeg==0 ) return;
10039 pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
10040 szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
10041
10042 /* Check for condition (a) */
10043 for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
10044 if( iTst>=0 ){
10045 int i;
10046 int szMax = 0;
10047 Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
10048 assert( pTst->nMerge==0 );
10049 for(i=0; i<pTst->nSeg; i++){
10050 int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
10051 if( sz>szMax ) szMax = sz;
10052 }
10053 if( szMax>=szSeg ){
10054 /* Condition (a) is true. Promote the newest segment on level
10055 ** iLvl to level iTst. */
10056 iPromote = iTst;
10057 szPromote = szMax;
10058 }
10059 }
10060
10061 /* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
10062 ** is a no-op if it is not. */
10063 if( iPromote<0 ){
10064 iPromote = iLvl;
10065 szPromote = szSeg;
10066 }
10067 fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
10068 }
10069}
10070
10071
10072/*
10073** Advance the iterator passed as the only argument. If the end of the
10074** doclist-index page is reached, return non-zero.
10075*/
10076static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
10077 Fts5Data *pData = pLvl->pData;
10078
10079 if( pLvl->iOff==0 ){
10080 assert( pLvl->bEof==0 );
10081 pLvl->iOff = 1;
10082 pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
10083 pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
10084 pLvl->iFirstOff = pLvl->iOff;
10085 }else{
10086 int iOff;
10087 for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
10088 if( pData->p[iOff] ) break;
10089 }
10090
10091 if( iOff<pData->nn ){
10092 i64 iVal;
10093 pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
10094 iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
10095 pLvl->iRowid += iVal;
10096 pLvl->iOff = iOff;
10097 }else{
10098 pLvl->bEof = 1;
10099 }
10100 }
10101
10102 return pLvl->bEof;
10103}
10104
10105/*
10106** Advance the iterator passed as the only argument.
10107*/
10108static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
10109 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
10110
10111 assert( iLvl<pIter->nLvl );
10112 if( fts5DlidxLvlNext(pLvl) ){
10113 if( (iLvl+1) < pIter->nLvl ){
10114 fts5DlidxIterNextR(p, pIter, iLvl+1);
10115 if( pLvl[1].bEof==0 ){
10116 fts5DataRelease(pLvl->pData);
10117 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
10118 pLvl->pData = fts5DataRead(p,
10119 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
10120 );
10121 if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
10122 }
10123 }
10124 }
10125
10126 return pIter->aLvl[0].bEof;
10127}
10128static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
10129 return fts5DlidxIterNextR(p, pIter, 0);
10130}
10131
10132/*
10133** The iterator passed as the first argument has the following fields set
10134** as follows. This function sets up the rest of the iterator so that it
10135** points to the first rowid in the doclist-index.
10136**
10137** pData:
10138** pointer to doclist-index record,
10139**
10140** When this function is called pIter->iLeafPgno is the page number the
10141** doclist is associated with (the one featuring the term).
10142*/
10143static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
10144 int i;
10145 for(i=0; i<pIter->nLvl; i++){
10146 fts5DlidxLvlNext(&pIter->aLvl[i]);
10147 }
10148 return pIter->aLvl[0].bEof;
10149}
10150
10151
10152static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
10153 return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
10154}
10155
10156static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
10157 int i;
10158
10159 /* Advance each level to the last entry on the last page */
10160 for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
10161 Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
10162 while( fts5DlidxLvlNext(pLvl)==0 );
10163 pLvl->bEof = 0;
10164
10165 if( i>0 ){
10166 Fts5DlidxLvl *pChild = &pLvl[-1];
10167 fts5DataRelease(pChild->pData);
10168 memset(pChild, 0, sizeof(Fts5DlidxLvl));
10169 pChild->pData = fts5DataRead(p,
10170 FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
10171 );
10172 }
10173 }
10174}
10175
10176/*
10177** Move the iterator passed as the only argument to the previous entry.
10178*/
10179static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
10180 int iOff = pLvl->iOff;
10181
10182 assert( pLvl->bEof==0 );
10183 if( iOff<=pLvl->iFirstOff ){
10184 pLvl->bEof = 1;
10185 }else{
10186 u8 *a = pLvl->pData->p;
10187 i64 iVal;
10188 int iLimit;
10189 int ii;
10190 int nZero = 0;
10191
10192 /* Currently iOff points to the first byte of a varint. This block
10193 ** decrements iOff until it points to the first byte of the previous
10194 ** varint. Taking care not to read any memory locations that occur
10195 ** before the buffer in memory. */
10196 iLimit = (iOff>9 ? iOff-9 : 0);
10197 for(iOff--; iOff>iLimit; iOff--){
10198 if( (a[iOff-1] & 0x80)==0 ) break;
10199 }
10200
10201 fts5GetVarint(&a[iOff], (u64*)&iVal);
10202 pLvl->iRowid -= iVal;
10203 pLvl->iLeafPgno--;
10204
10205 /* Skip backwards past any 0x00 varints. */
10206 for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){
10207 nZero++;
10208 }
10209 if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){
10210 /* The byte immediately before the last 0x00 byte has the 0x80 bit
10211 ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80
10212 ** bytes before a[ii]. */
10213 int bZero = 0; /* True if last 0x00 counts */
10214 if( (ii-8)>=pLvl->iFirstOff ){
10215 int j;
10216 for(j=1; j<=8 && (a[ii-j] & 0x80); j++);
10217 bZero = (j>8);
10218 }
10219 if( bZero==0 ) nZero--;
10220 }
10221 pLvl->iLeafPgno -= nZero;
10222 pLvl->iOff = iOff - nZero;
10223 }
10224
10225 return pLvl->bEof;
10226}
10227
10228static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
10229 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
10230
10231 assert( iLvl<pIter->nLvl );
10232 if( fts5DlidxLvlPrev(pLvl) ){
10233 if( (iLvl+1) < pIter->nLvl ){
10234 fts5DlidxIterPrevR(p, pIter, iLvl+1);
10235 if( pLvl[1].bEof==0 ){
10236 fts5DataRelease(pLvl->pData);
10237 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
10238 pLvl->pData = fts5DataRead(p,
10239 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
10240 );
10241 if( pLvl->pData ){
10242 while( fts5DlidxLvlNext(pLvl)==0 );
10243 pLvl->bEof = 0;
10244 }
10245 }
10246 }
10247 }
10248
10249 return pIter->aLvl[0].bEof;
10250}
10251static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
10252 return fts5DlidxIterPrevR(p, pIter, 0);
10253}
10254
10255/*
10256** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
10257*/
10258static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
10259 if( pIter ){
10260 int i;
10261 for(i=0; i<pIter->nLvl; i++){
10262 fts5DataRelease(pIter->aLvl[i].pData);
10263 }
10264 sqlite3_free(pIter);
10265 }
10266}
10267
10268static Fts5DlidxIter *fts5DlidxIterInit(
10269 Fts5Index *p, /* Fts5 Backend to iterate within */
10270 int bRev, /* True for ORDER BY ASC */
10271 int iSegid, /* Segment id */
10272 int iLeafPg /* Leaf page number to load dlidx for */
10273){
10274 Fts5DlidxIter *pIter = 0;
10275 int i;
10276 int bDone = 0;
10277
10278 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
10279 sqlite3_int64 nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl);
10280 Fts5DlidxIter *pNew;
10281
10282 pNew = (Fts5DlidxIter*)sqlite3_realloc64(pIter, nByte);
10283 if( pNew==0 ){
10284 p->rc = SQLITE_NOMEM;
10285 }else{
10286 i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
10287 Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
10288 pIter = pNew;
10289 memset(pLvl, 0, sizeof(Fts5DlidxLvl));
10290 pLvl->pData = fts5DataRead(p, iRowid);
10291 if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
10292 bDone = 1;
10293 }
10294 pIter->nLvl = i+1;
10295 }
10296 }
10297
10298 if( p->rc==SQLITE_OK ){
10299 pIter->iSegid = iSegid;
10300 if( bRev==0 ){
10301 fts5DlidxIterFirst(pIter);
10302 }else{
10303 fts5DlidxIterLast(p, pIter);
10304 }
10305 }
10306
10307 if( p->rc!=SQLITE_OK ){
10308 fts5DlidxIterFree(pIter);
10309 pIter = 0;
10310 }
10311
10312 return pIter;
10313}
10314
10315static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
10316 return pIter->aLvl[0].iRowid;
10317}
10318static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
10319 return pIter->aLvl[0].iLeafPgno;
10320}
10321
10322/*
10323** Load the next leaf page into the segment iterator.
10324*/
10325static void fts5SegIterNextPage(
10326 Fts5Index *p, /* FTS5 backend object */
10327 Fts5SegIter *pIter /* Iterator to advance to next page */
10328){
10329 Fts5Data *pLeaf;
10330 Fts5StructureSegment *pSeg = pIter->pSeg;
10331 fts5DataRelease(pIter->pLeaf);
10332 pIter->iLeafPgno++;
10333 if( pIter->pNextLeaf ){
10334 pIter->pLeaf = pIter->pNextLeaf;
10335 pIter->pNextLeaf = 0;
10336 }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
10337 pIter->pLeaf = fts5LeafRead(p,
10338 FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
10339 );
10340 }else{
10341 pIter->pLeaf = 0;
10342 }
10343 pLeaf = pIter->pLeaf;
10344
10345 if( pLeaf ){
10346 pIter->iPgidxOff = pLeaf->szLeaf;
10347 if( fts5LeafIsTermless(pLeaf) ){
10348 pIter->iEndofDoclist = pLeaf->nn+1;
10349 }else{
10350 pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
10351 pIter->iEndofDoclist
10352 );
10353 }
10354 }
10355}
10356
10357/*
10358** Argument p points to a buffer containing a varint to be interpreted as a
10359** position list size field. Read the varint and return the number of bytes
10360** read. Before returning, set *pnSz to the number of bytes in the position
10361** list, and *pbDel to true if the delete flag is set, or false otherwise.
10362*/
10363static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
10364 int nSz;
10365 int n = 0;
10366 fts5FastGetVarint32(p, n, nSz);
10367 assert_nc( nSz>=0 );
10368 *pnSz = nSz/2;
10369 *pbDel = nSz & 0x0001;
10370 return n;
10371}
10372
10373/*
10374** Fts5SegIter.iLeafOffset currently points to the first byte of a
10375** position-list size field. Read the value of the field and store it
10376** in the following variables:
10377**
10378** Fts5SegIter.nPos
10379** Fts5SegIter.bDel
10380**
10381** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
10382** position list content (if any).
10383*/
10384static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
10385 if( p->rc==SQLITE_OK ){
10386 int iOff = pIter->iLeafOffset; /* Offset to read at */
10387 ASSERT_SZLEAF_OK(pIter->pLeaf);
10388 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
10389 int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf);
10390 pIter->bDel = 0;
10391 pIter->nPos = 1;
10392 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
10393 pIter->bDel = 1;
10394 iOff++;
10395 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
10396 pIter->nPos = 1;
10397 iOff++;
10398 }else{
10399 pIter->nPos = 0;
10400 }
10401 }
10402 }else{
10403 int nSz;
10404 fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
10405 pIter->bDel = (nSz & 0x0001);
10406 pIter->nPos = nSz>>1;
10407 assert_nc( pIter->nPos>=0 );
10408 }
10409 pIter->iLeafOffset = iOff;
10410 }
10411}
10412
10413static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
10414 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
10415 i64 iOff = pIter->iLeafOffset;
10416
10417 ASSERT_SZLEAF_OK(pIter->pLeaf);
10418 if( iOff>=pIter->pLeaf->szLeaf ){
10419 fts5SegIterNextPage(p, pIter);
10420 if( pIter->pLeaf==0 ){
10421 if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
10422 return;
10423 }
10424 iOff = 4;
10425 a = pIter->pLeaf->p;
10426 }
10427 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
10428 pIter->iLeafOffset = iOff;
10429}
10430
10431/*
10432** Fts5SegIter.iLeafOffset currently points to the first byte of the
10433** "nSuffix" field of a term. Function parameter nKeep contains the value
10434** of the "nPrefix" field (if there was one - it is passed 0 if this is
10435** the first term in the segment).
10436**
10437** This function populates:
10438**
10439** Fts5SegIter.term
10440** Fts5SegIter.rowid
10441**
10442** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
10443** the first position list. The position list belonging to document
10444** (Fts5SegIter.iRowid).
10445*/
10446static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
10447 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */
10448 i64 iOff = pIter->iLeafOffset; /* Offset to read at */
10449 int nNew; /* Bytes of new data */
10450
10451 iOff += fts5GetVarint32(&a[iOff], nNew);
10452 if( iOff+nNew>pIter->pLeaf->szLeaf || nKeep>pIter->term.n || nNew==0 ){
10453 p->rc = FTS5_CORRUPT;
10454 return;
10455 }
10456 pIter->term.n = nKeep;
10457 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
10458 assert( pIter->term.n<=pIter->term.nSpace );
10459 iOff += nNew;
10460 pIter->iTermLeafOffset = iOff;
10461 pIter->iTermLeafPgno = pIter->iLeafPgno;
10462 pIter->iLeafOffset = iOff;
10463
10464 if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
10465 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
10466 }else{
10467 int nExtra;
10468 pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
10469 pIter->iEndofDoclist += nExtra;
10470 }
10471
10472 fts5SegIterLoadRowid(p, pIter);
10473}
10474
10475static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
10476static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
10477static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
10478
10479static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
10480 if( pIter->flags & FTS5_SEGITER_REVERSE ){
10481 pIter->xNext = fts5SegIterNext_Reverse;
10482 }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
10483 pIter->xNext = fts5SegIterNext_None;
10484 }else{
10485 pIter->xNext = fts5SegIterNext;
10486 }
10487}
10488
10489/*
10490** Initialize the iterator object pIter to iterate through the entries in
10491** segment pSeg. The iterator is left pointing to the first entry when
10492** this function returns.
10493**
10494** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
10495** an error has already occurred when this function is called, it is a no-op.
10496*/
10497static void fts5SegIterInit(
10498 Fts5Index *p, /* FTS index object */
10499 Fts5StructureSegment *pSeg, /* Description of segment */
10500 Fts5SegIter *pIter /* Object to populate */
10501){
10502 if( pSeg->pgnoFirst==0 ){
10503 /* This happens if the segment is being used as an input to an incremental
10504 ** merge and all data has already been "trimmed". See function
10505 ** fts5TrimSegments() for details. In this case leave the iterator empty.
10506 ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
10507 ** at EOF already. */
10508 assert( pIter->pLeaf==0 );
10509 return;
10510 }
10511
10512 if( p->rc==SQLITE_OK ){
10513 memset(pIter, 0, sizeof(*pIter));
10514 fts5SegIterSetNext(p, pIter);
10515 pIter->pSeg = pSeg;
10516 pIter->iLeafPgno = pSeg->pgnoFirst-1;
10517 fts5SegIterNextPage(p, pIter);
10518 }
10519
10520 if( p->rc==SQLITE_OK ){
10521 pIter->iLeafOffset = 4;
10522 assert( pIter->pLeaf!=0 );
10523 assert_nc( pIter->pLeaf->nn>4 );
10524 assert_nc( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
10525 pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
10526 fts5SegIterLoadTerm(p, pIter, 0);
10527 fts5SegIterLoadNPos(p, pIter);
10528 }
10529}
10530
10531/*
10532** This function is only ever called on iterators created by calls to
10533** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
10534**
10535** The iterator is in an unusual state when this function is called: the
10536** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
10537** the position-list size field for the first relevant rowid on the page.
10538** Fts5SegIter.rowid is set, but nPos and bDel are not.
10539**
10540** This function advances the iterator so that it points to the last
10541** relevant rowid on the page and, if necessary, initializes the
10542** aRowidOffset[] and iRowidOffset variables. At this point the iterator
10543** is in its regular state - Fts5SegIter.iLeafOffset points to the first
10544** byte of the position list content associated with said rowid.
10545*/
10546static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
10547 int eDetail = p->pConfig->eDetail;
10548 int n = pIter->pLeaf->szLeaf;
10549 int i = pIter->iLeafOffset;
10550 u8 *a = pIter->pLeaf->p;
10551 int iRowidOffset = 0;
10552
10553 if( n>pIter->iEndofDoclist ){
10554 n = pIter->iEndofDoclist;
10555 }
10556
10557 ASSERT_SZLEAF_OK(pIter->pLeaf);
10558 while( 1 ){
10559 u64 iDelta = 0;
10560
10561 if( eDetail==FTS5_DETAIL_NONE ){
10562 /* todo */
10563 if( i<n && a[i]==0 ){
10564 i++;
10565 if( i<n && a[i]==0 ) i++;
10566 }
10567 }else{
10568 int nPos;
10569 int bDummy;
10570 i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
10571 i += nPos;
10572 }
10573 if( i>=n ) break;
10574 i += fts5GetVarint(&a[i], &iDelta);
10575 pIter->iRowid += iDelta;
10576
10577 /* If necessary, grow the pIter->aRowidOffset[] array. */
10578 if( iRowidOffset>=pIter->nRowidOffset ){
10579 int nNew = pIter->nRowidOffset + 8;
10580 int *aNew = (int*)sqlite3_realloc64(pIter->aRowidOffset,nNew*sizeof(int));
10581 if( aNew==0 ){
10582 p->rc = SQLITE_NOMEM;
10583 break;
10584 }
10585 pIter->aRowidOffset = aNew;
10586 pIter->nRowidOffset = nNew;
10587 }
10588
10589 pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
10590 pIter->iLeafOffset = i;
10591 }
10592 pIter->iRowidOffset = iRowidOffset;
10593 fts5SegIterLoadNPos(p, pIter);
10594}
10595
10596/*
10597**
10598*/
10599static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
10600 assert( pIter->flags & FTS5_SEGITER_REVERSE );
10601 assert( pIter->flags & FTS5_SEGITER_ONETERM );
10602
10603 fts5DataRelease(pIter->pLeaf);
10604 pIter->pLeaf = 0;
10605 while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
10606 Fts5Data *pNew;
10607 pIter->iLeafPgno--;
10608 pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
10609 pIter->pSeg->iSegid, pIter->iLeafPgno
10610 ));
10611 if( pNew ){
10612 /* iTermLeafOffset may be equal to szLeaf if the term is the last
10613 ** thing on the page - i.e. the first rowid is on the following page.
10614 ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
10615 if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
10616 assert( pIter->pLeaf==0 );
10617 if( pIter->iTermLeafOffset<pNew->szLeaf ){
10618 pIter->pLeaf = pNew;
10619 pIter->iLeafOffset = pIter->iTermLeafOffset;
10620 }
10621 }else{
10622 int iRowidOff;
10623 iRowidOff = fts5LeafFirstRowidOff(pNew);
10624 if( iRowidOff ){
10625 if( iRowidOff>=pNew->szLeaf ){
10626 p->rc = FTS5_CORRUPT;
10627 }else{
10628 pIter->pLeaf = pNew;
10629 pIter->iLeafOffset = iRowidOff;
10630 }
10631 }
10632 }
10633
10634 if( pIter->pLeaf ){
10635 u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
10636 pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
10637 break;
10638 }else{
10639 fts5DataRelease(pNew);
10640 }
10641 }
10642 }
10643
10644 if( pIter->pLeaf ){
10645 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
10646 fts5SegIterReverseInitPage(p, pIter);
10647 }
10648}
10649
10650/*
10651** Return true if the iterator passed as the second argument currently
10652** points to a delete marker. A delete marker is an entry with a 0 byte
10653** position-list.
10654*/
10655static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
10656 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
10657 return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
10658}
10659
10660/*
10661** Advance iterator pIter to the next entry.
10662**
10663** This version of fts5SegIterNext() is only used by reverse iterators.
10664*/
10665static void fts5SegIterNext_Reverse(
10666 Fts5Index *p, /* FTS5 backend object */
10667 Fts5SegIter *pIter, /* Iterator to advance */
10668 int *pbUnused /* Unused */
10669){
10670 assert( pIter->flags & FTS5_SEGITER_REVERSE );
10671 assert( pIter->pNextLeaf==0 );
10672 UNUSED_PARAM(pbUnused);
10673
10674 if( pIter->iRowidOffset>0 ){
10675 u8 *a = pIter->pLeaf->p;
10676 int iOff;
10677 u64 iDelta;
10678
10679 pIter->iRowidOffset--;
10680 pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset];
10681 fts5SegIterLoadNPos(p, pIter);
10682 iOff = pIter->iLeafOffset;
10683 if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){
10684 iOff += pIter->nPos;
10685 }
10686 fts5GetVarint(&a[iOff], &iDelta);
10687 pIter->iRowid -= iDelta;
10688 }else{
10689 fts5SegIterReverseNewPage(p, pIter);
10690 }
10691}
10692
10693/*
10694** Advance iterator pIter to the next entry.
10695**
10696** This version of fts5SegIterNext() is only used if detail=none and the
10697** iterator is not a reverse direction iterator.
10698*/
10699static void fts5SegIterNext_None(
10700 Fts5Index *p, /* FTS5 backend object */
10701 Fts5SegIter *pIter, /* Iterator to advance */
10702 int *pbNewTerm /* OUT: Set for new term */
10703){
10704 int iOff;
10705
10706 assert( p->rc==SQLITE_OK );
10707 assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 );
10708 assert( p->pConfig->eDetail==FTS5_DETAIL_NONE );
10709
10710 ASSERT_SZLEAF_OK(pIter->pLeaf);
10711 iOff = pIter->iLeafOffset;
10712
10713 /* Next entry is on the next page */
10714 if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
10715 fts5SegIterNextPage(p, pIter);
10716 if( p->rc || pIter->pLeaf==0 ) return;
10717 pIter->iRowid = 0;
10718 iOff = 4;
10719 }
10720
10721 if( iOff<pIter->iEndofDoclist ){
10722 /* Next entry is on the current page */
10723 i64 iDelta;
10724 iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta);
10725 pIter->iLeafOffset = iOff;
10726 pIter->iRowid += iDelta;
10727 }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){
10728 if( pIter->pSeg ){
10729 int nKeep = 0;
10730 if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){
10731 iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep);
10732 }
10733 pIter->iLeafOffset = iOff;
10734 fts5SegIterLoadTerm(p, pIter, nKeep);
10735 }else{
10736 const u8 *pList = 0;
10737 const char *zTerm = 0;
10738 int nList;
10739 sqlite3Fts5HashScanNext(p->pHash);
10740 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
10741 if( pList==0 ) goto next_none_eof;
10742 pIter->pLeaf->p = (u8*)pList;
10743 pIter->pLeaf->nn = nList;
10744 pIter->pLeaf->szLeaf = nList;
10745 pIter->iEndofDoclist = nList;
10746 sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm);
10747 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
10748 }
10749
10750 if( pbNewTerm ) *pbNewTerm = 1;
10751 }else{
10752 goto next_none_eof;
10753 }
10754
10755 fts5SegIterLoadNPos(p, pIter);
10756
10757 return;
10758 next_none_eof:
10759 fts5DataRelease(pIter->pLeaf);
10760 pIter->pLeaf = 0;
10761}
10762
10763
10764/*
10765** Advance iterator pIter to the next entry.
10766**
10767** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
10768** is not considered an error if the iterator reaches EOF. If an error has
10769** already occurred when this function is called, it is a no-op.
10770*/
10771static void fts5SegIterNext(
10772 Fts5Index *p, /* FTS5 backend object */
10773 Fts5SegIter *pIter, /* Iterator to advance */
10774 int *pbNewTerm /* OUT: Set for new term */
10775){
10776 Fts5Data *pLeaf = pIter->pLeaf;
10777 int iOff;
10778 int bNewTerm = 0;
10779 int nKeep = 0;
10780 u8 *a;
10781 int n;
10782
10783 assert( pbNewTerm==0 || *pbNewTerm==0 );
10784 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
10785
10786 /* Search for the end of the position list within the current page. */
10787 a = pLeaf->p;
10788 n = pLeaf->szLeaf;
10789
10790 ASSERT_SZLEAF_OK(pLeaf);
10791 iOff = pIter->iLeafOffset + pIter->nPos;
10792
10793 if( iOff<n ){
10794 /* The next entry is on the current page. */
10795 assert_nc( iOff<=pIter->iEndofDoclist );
10796 if( iOff>=pIter->iEndofDoclist ){
10797 bNewTerm = 1;
10798 if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
10799 iOff += fts5GetVarint32(&a[iOff], nKeep);
10800 }
10801 }else{
10802 u64 iDelta;
10803 iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
10804 pIter->iRowid += iDelta;
10805 assert_nc( iDelta>0 );
10806 }
10807 pIter->iLeafOffset = iOff;
10808
10809 }else if( pIter->pSeg==0 ){
10810 const u8 *pList = 0;
10811 const char *zTerm = 0;
10812 int nList = 0;
10813 assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
10814 if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
10815 sqlite3Fts5HashScanNext(p->pHash);
10816 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
10817 }
10818 if( pList==0 ){
10819 fts5DataRelease(pIter->pLeaf);
10820 pIter->pLeaf = 0;
10821 }else{
10822 pIter->pLeaf->p = (u8*)pList;
10823 pIter->pLeaf->nn = nList;
10824 pIter->pLeaf->szLeaf = nList;
10825 pIter->iEndofDoclist = nList+1;
10826 sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm),
10827 (u8*)zTerm);
10828 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
10829 *pbNewTerm = 1;
10830 }
10831 }else{
10832 iOff = 0;
10833 /* Next entry is not on the current page */
10834 while( iOff==0 ){
10835 fts5SegIterNextPage(p, pIter);
10836 pLeaf = pIter->pLeaf;
10837 if( pLeaf==0 ) break;
10838 ASSERT_SZLEAF_OK(pLeaf);
10839 if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
10840 iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
10841 pIter->iLeafOffset = iOff;
10842
10843 if( pLeaf->nn>pLeaf->szLeaf ){
10844 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
10845 &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
10846 );
10847 }
10848 }
10849 else if( pLeaf->nn>pLeaf->szLeaf ){
10850 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
10851 &pLeaf->p[pLeaf->szLeaf], iOff
10852 );
10853 pIter->iLeafOffset = iOff;
10854 pIter->iEndofDoclist = iOff;
10855 bNewTerm = 1;
10856 }
10857 assert_nc( iOff<pLeaf->szLeaf );
10858 if( iOff>pLeaf->szLeaf ){
10859 p->rc = FTS5_CORRUPT;
10860 return;
10861 }
10862 }
10863 }
10864
10865 /* Check if the iterator is now at EOF. If so, return early. */
10866 if( pIter->pLeaf ){
10867 if( bNewTerm ){
10868 if( pIter->flags & FTS5_SEGITER_ONETERM ){
10869 fts5DataRelease(pIter->pLeaf);
10870 pIter->pLeaf = 0;
10871 }else{
10872 fts5SegIterLoadTerm(p, pIter, nKeep);
10873 fts5SegIterLoadNPos(p, pIter);
10874 if( pbNewTerm ) *pbNewTerm = 1;
10875 }
10876 }else{
10877 /* The following could be done by calling fts5SegIterLoadNPos(). But
10878 ** this block is particularly performance critical, so equivalent
10879 ** code is inlined. */
10880 int nSz;
10881 assert_nc( pIter->iLeafOffset<=pIter->pLeaf->nn );
10882 fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
10883 pIter->bDel = (nSz & 0x0001);
10884 pIter->nPos = nSz>>1;
10885 assert_nc( pIter->nPos>=0 );
10886 }
10887 }
10888}
10889
10890#define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
10891
10892#define fts5IndexSkipVarint(a, iOff) { \
10893 int iEnd = iOff+9; \
10894 while( (a[iOff++] & 0x80) && iOff<iEnd ); \
10895}
10896
10897/*
10898** Iterator pIter currently points to the first rowid in a doclist. This
10899** function sets the iterator up so that iterates in reverse order through
10900** the doclist.
10901*/
10902static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
10903 Fts5DlidxIter *pDlidx = pIter->pDlidx;
10904 Fts5Data *pLast = 0;
10905 int pgnoLast = 0;
10906
10907 if( pDlidx ){
10908 int iSegid = pIter->pSeg->iSegid;
10909 pgnoLast = fts5DlidxIterPgno(pDlidx);
10910 pLast = fts5LeafRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
10911 }else{
10912 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
10913
10914 /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
10915 ** position-list content for the current rowid. Back it up so that it
10916 ** points to the start of the position-list size field. */
10917 int iPoslist;
10918 if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
10919 iPoslist = pIter->iTermLeafOffset;
10920 }else{
10921 iPoslist = 4;
10922 }
10923 fts5IndexSkipVarint(pLeaf->p, iPoslist);
10924 pIter->iLeafOffset = iPoslist;
10925
10926 /* If this condition is true then the largest rowid for the current
10927 ** term may not be stored on the current page. So search forward to
10928 ** see where said rowid really is. */
10929 if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
10930 int pgno;
10931 Fts5StructureSegment *pSeg = pIter->pSeg;
10932
10933 /* The last rowid in the doclist may not be on the current page. Search
10934 ** forward to find the page containing the last rowid. */
10935 for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
10936 i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
10937 Fts5Data *pNew = fts5LeafRead(p, iAbs);
10938 if( pNew ){
10939 int iRowid, bTermless;
10940 iRowid = fts5LeafFirstRowidOff(pNew);
10941 bTermless = fts5LeafIsTermless(pNew);
10942 if( iRowid ){
10943 SWAPVAL(Fts5Data*, pNew, pLast);
10944 pgnoLast = pgno;
10945 }
10946 fts5DataRelease(pNew);
10947 if( bTermless==0 ) break;
10948 }
10949 }
10950 }
10951 }
10952
10953 /* If pLast is NULL at this point, then the last rowid for this doclist
10954 ** lies on the page currently indicated by the iterator. In this case
10955 ** pIter->iLeafOffset is already set to point to the position-list size
10956 ** field associated with the first relevant rowid on the page.
10957 **
10958 ** Or, if pLast is non-NULL, then it is the page that contains the last
10959 ** rowid. In this case configure the iterator so that it points to the
10960 ** first rowid on this page.
10961 */
10962 if( pLast ){
10963 int iOff;
10964 fts5DataRelease(pIter->pLeaf);
10965 pIter->pLeaf = pLast;
10966 pIter->iLeafPgno = pgnoLast;
10967 iOff = fts5LeafFirstRowidOff(pLast);
10968 if( iOff>pLast->szLeaf ){
10969 p->rc = FTS5_CORRUPT;
10970 return;
10971 }
10972 iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
10973 pIter->iLeafOffset = iOff;
10974
10975 if( fts5LeafIsTermless(pLast) ){
10976 pIter->iEndofDoclist = pLast->nn+1;
10977 }else{
10978 pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
10979 }
10980 }
10981
10982 fts5SegIterReverseInitPage(p, pIter);
10983}
10984
10985/*
10986** Iterator pIter currently points to the first rowid of a doclist.
10987** There is a doclist-index associated with the final term on the current
10988** page. If the current term is the last term on the page, load the
10989** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
10990*/
10991static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
10992 int iSeg = pIter->pSeg->iSegid;
10993 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
10994 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
10995
10996 assert( pIter->flags & FTS5_SEGITER_ONETERM );
10997 assert( pIter->pDlidx==0 );
10998
10999 /* Check if the current doclist ends on this page. If it does, return
11000 ** early without loading the doclist-index (as it belongs to a different
11001 ** term. */
11002 if( pIter->iTermLeafPgno==pIter->iLeafPgno
11003 && pIter->iEndofDoclist<pLeaf->szLeaf
11004 ){
11005 return;
11006 }
11007
11008 pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
11009}
11010
11011/*
11012** The iterator object passed as the second argument currently contains
11013** no valid values except for the Fts5SegIter.pLeaf member variable. This
11014** function searches the leaf page for a term matching (pTerm/nTerm).
11015**
11016** If the specified term is found on the page, then the iterator is left
11017** pointing to it. If argument bGe is zero and the term is not found,
11018** the iterator is left pointing at EOF.
11019**
11020** If bGe is non-zero and the specified term is not found, then the
11021** iterator is left pointing to the smallest term in the segment that
11022** is larger than the specified term, even if this term is not on the
11023** current page.
11024*/
11025static void fts5LeafSeek(
11026 Fts5Index *p, /* Leave any error code here */
11027 int bGe, /* True for a >= search */
11028 Fts5SegIter *pIter, /* Iterator to seek */
11029 const u8 *pTerm, int nTerm /* Term to search for */
11030){
11031 u32 iOff;
11032 const u8 *a = pIter->pLeaf->p;
11033 u32 n = (u32)pIter->pLeaf->nn;
11034
11035 u32 nMatch = 0;
11036 u32 nKeep = 0;
11037 u32 nNew = 0;
11038 u32 iTermOff;
11039 u32 iPgidx; /* Current offset in pgidx */
11040 int bEndOfPage = 0;
11041
11042 assert( p->rc==SQLITE_OK );
11043
11044 iPgidx = (u32)pIter->pLeaf->szLeaf;
11045 iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
11046 iOff = iTermOff;
11047 if( iOff>n ){
11048 p->rc = FTS5_CORRUPT;
11049 return;
11050 }
11051
11052 while( 1 ){
11053
11054 /* Figure out how many new bytes are in this term */
11055 fts5FastGetVarint32(a, iOff, nNew);
11056 if( nKeep<nMatch ){
11057 goto search_failed;
11058 }
11059
11060 assert( nKeep>=nMatch );
11061 if( nKeep==nMatch ){
11062 u32 nCmp;
11063 u32 i;
11064 nCmp = (u32)MIN(nNew, nTerm-nMatch);
11065 for(i=0; i<nCmp; i++){
11066 if( a[iOff+i]!=pTerm[nMatch+i] ) break;
11067 }
11068 nMatch += i;
11069
11070 if( (u32)nTerm==nMatch ){
11071 if( i==nNew ){
11072 goto search_success;
11073 }else{
11074 goto search_failed;
11075 }
11076 }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
11077 goto search_failed;
11078 }
11079 }
11080
11081 if( iPgidx>=n ){
11082 bEndOfPage = 1;
11083 break;
11084 }
11085
11086 iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
11087 iTermOff += nKeep;
11088 iOff = iTermOff;
11089
11090 if( iOff>=n ){
11091 p->rc = FTS5_CORRUPT;
11092 return;
11093 }
11094
11095 /* Read the nKeep field of the next term. */
11096 fts5FastGetVarint32(a, iOff, nKeep);
11097 }
11098
11099 search_failed:
11100 if( bGe==0 ){
11101 fts5DataRelease(pIter->pLeaf);
11102 pIter->pLeaf = 0;
11103 return;
11104 }else if( bEndOfPage ){
11105 do {
11106 fts5SegIterNextPage(p, pIter);
11107 if( pIter->pLeaf==0 ) return;
11108 a = pIter->pLeaf->p;
11109 if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
11110 iPgidx = (u32)pIter->pLeaf->szLeaf;
11111 iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
11112 if( iOff<4 || (i64)iOff>=pIter->pLeaf->szLeaf ){
11113 p->rc = FTS5_CORRUPT;
11114 return;
11115 }else{
11116 nKeep = 0;
11117 iTermOff = iOff;
11118 n = (u32)pIter->pLeaf->nn;
11119 iOff += fts5GetVarint32(&a[iOff], nNew);
11120 break;
11121 }
11122 }
11123 }while( 1 );
11124 }
11125
11126 search_success:
11127 if( (i64)iOff+nNew>n || nNew<1 ){
11128 p->rc = FTS5_CORRUPT;
11129 return;
11130 }
11131 pIter->iLeafOffset = iOff + nNew;
11132 pIter->iTermLeafOffset = pIter->iLeafOffset;
11133 pIter->iTermLeafPgno = pIter->iLeafPgno;
11134
11135 fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
11136 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
11137
11138 if( iPgidx>=n ){
11139 pIter->iEndofDoclist = pIter->pLeaf->nn+1;
11140 }else{
11141 int nExtra;
11142 iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
11143 pIter->iEndofDoclist = iTermOff + nExtra;
11144 }
11145 pIter->iPgidxOff = iPgidx;
11146
11147 fts5SegIterLoadRowid(p, pIter);
11148 fts5SegIterLoadNPos(p, pIter);
11149}
11150
11151static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){
11152 if( p->pIdxSelect==0 ){
11153 Fts5Config *pConfig = p->pConfig;
11154 fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
11155 "SELECT pgno FROM '%q'.'%q_idx' WHERE "
11156 "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
11157 pConfig->zDb, pConfig->zName
11158 ));
11159 }
11160 return p->pIdxSelect;
11161}
11162
11163/*
11164** Initialize the object pIter to point to term pTerm/nTerm within segment
11165** pSeg. If there is no such term in the index, the iterator is set to EOF.
11166**
11167** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
11168** an error has already occurred when this function is called, it is a no-op.
11169*/
11170static void fts5SegIterSeekInit(
11171 Fts5Index *p, /* FTS5 backend */
11172 const u8 *pTerm, int nTerm, /* Term to seek to */
11173 int flags, /* Mask of FTS5INDEX_XXX flags */
11174 Fts5StructureSegment *pSeg, /* Description of segment */
11175 Fts5SegIter *pIter /* Object to populate */
11176){
11177 int iPg = 1;
11178 int bGe = (flags & FTS5INDEX_QUERY_SCAN);
11179 int bDlidx = 0; /* True if there is a doclist-index */
11180 sqlite3_stmt *pIdxSelect = 0;
11181
11182 assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
11183 assert( pTerm && nTerm );
11184 memset(pIter, 0, sizeof(*pIter));
11185 pIter->pSeg = pSeg;
11186
11187 /* This block sets stack variable iPg to the leaf page number that may
11188 ** contain term (pTerm/nTerm), if it is present in the segment. */
11189 pIdxSelect = fts5IdxSelectStmt(p);
11190 if( p->rc ) return;
11191 sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid);
11192 sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
11193 if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){
11194 i64 val = sqlite3_column_int(pIdxSelect, 0);
11195 iPg = (int)(val>>1);
11196 bDlidx = (val & 0x0001);
11197 }
11198 p->rc = sqlite3_reset(pIdxSelect);
11199 sqlite3_bind_null(pIdxSelect, 2);
11200
11201 if( iPg<pSeg->pgnoFirst ){
11202 iPg = pSeg->pgnoFirst;
11203 bDlidx = 0;
11204 }
11205
11206 pIter->iLeafPgno = iPg - 1;
11207 fts5SegIterNextPage(p, pIter);
11208
11209 if( pIter->pLeaf ){
11210 fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
11211 }
11212
11213 if( p->rc==SQLITE_OK && bGe==0 ){
11214 pIter->flags |= FTS5_SEGITER_ONETERM;
11215 if( pIter->pLeaf ){
11216 if( flags & FTS5INDEX_QUERY_DESC ){
11217 pIter->flags |= FTS5_SEGITER_REVERSE;
11218 }
11219 if( bDlidx ){
11220 fts5SegIterLoadDlidx(p, pIter);
11221 }
11222 if( flags & FTS5INDEX_QUERY_DESC ){
11223 fts5SegIterReverse(p, pIter);
11224 }
11225 }
11226 }
11227
11228 fts5SegIterSetNext(p, pIter);
11229
11230 /* Either:
11231 **
11232 ** 1) an error has occurred, or
11233 ** 2) the iterator points to EOF, or
11234 ** 3) the iterator points to an entry with term (pTerm/nTerm), or
11235 ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
11236 ** to an entry with a term greater than or equal to (pTerm/nTerm).
11237 */
11238 assert_nc( p->rc!=SQLITE_OK /* 1 */
11239 || pIter->pLeaf==0 /* 2 */
11240 || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */
11241 || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */
11242 );
11243}
11244
11245/*
11246** Initialize the object pIter to point to term pTerm/nTerm within the
11247** in-memory hash table. If there is no such term in the hash-table, the
11248** iterator is set to EOF.
11249**
11250** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
11251** an error has already occurred when this function is called, it is a no-op.
11252*/
11253static void fts5SegIterHashInit(
11254 Fts5Index *p, /* FTS5 backend */
11255 const u8 *pTerm, int nTerm, /* Term to seek to */
11256 int flags, /* Mask of FTS5INDEX_XXX flags */
11257 Fts5SegIter *pIter /* Object to populate */
11258){
11259 int nList = 0;
11260 const u8 *z = 0;
11261 int n = 0;
11262 Fts5Data *pLeaf = 0;
11263
11264 assert( p->pHash );
11265 assert( p->rc==SQLITE_OK );
11266
11267 if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){
11268 const u8 *pList = 0;
11269
11270 p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm);
11271 sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList);
11272 n = (z ? (int)strlen((const char*)z) : 0);
11273 if( pList ){
11274 pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
11275 if( pLeaf ){
11276 pLeaf->p = (u8*)pList;
11277 }
11278 }
11279 }else{
11280 p->rc = sqlite3Fts5HashQuery(p->pHash, sizeof(Fts5Data),
11281 (const char*)pTerm, nTerm, (void**)&pLeaf, &nList
11282 );
11283 if( pLeaf ){
11284 pLeaf->p = (u8*)&pLeaf[1];
11285 }
11286 z = pTerm;
11287 n = nTerm;
11288 pIter->flags |= FTS5_SEGITER_ONETERM;
11289 }
11290
11291 if( pLeaf ){
11292 sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
11293 pLeaf->nn = pLeaf->szLeaf = nList;
11294 pIter->pLeaf = pLeaf;
11295 pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
11296 pIter->iEndofDoclist = pLeaf->nn;
11297
11298 if( flags & FTS5INDEX_QUERY_DESC ){
11299 pIter->flags |= FTS5_SEGITER_REVERSE;
11300 fts5SegIterReverseInitPage(p, pIter);
11301 }else{
11302 fts5SegIterLoadNPos(p, pIter);
11303 }
11304 }
11305
11306 fts5SegIterSetNext(p, pIter);
11307}
11308
11309/*
11310** Zero the iterator passed as the only argument.
11311*/
11312static void fts5SegIterClear(Fts5SegIter *pIter){
11313 fts5BufferFree(&pIter->term);
11314 fts5DataRelease(pIter->pLeaf);
11315 fts5DataRelease(pIter->pNextLeaf);
11316 fts5DlidxIterFree(pIter->pDlidx);
11317 sqlite3_free(pIter->aRowidOffset);
11318 memset(pIter, 0, sizeof(Fts5SegIter));
11319}
11320
11321#ifdef SQLITE_DEBUG
11322
11323/*
11324** This function is used as part of the big assert() procedure implemented by
11325** fts5AssertMultiIterSetup(). It ensures that the result currently stored
11326** in *pRes is the correct result of comparing the current positions of the
11327** two iterators.
11328*/
11329static void fts5AssertComparisonResult(
11330 Fts5Iter *pIter,
11331 Fts5SegIter *p1,
11332 Fts5SegIter *p2,
11333 Fts5CResult *pRes
11334){
11335 int i1 = p1 - pIter->aSeg;
11336 int i2 = p2 - pIter->aSeg;
11337
11338 if( p1->pLeaf || p2->pLeaf ){
11339 if( p1->pLeaf==0 ){
11340 assert( pRes->iFirst==i2 );
11341 }else if( p2->pLeaf==0 ){
11342 assert( pRes->iFirst==i1 );
11343 }else{
11344 int nMin = MIN(p1->term.n, p2->term.n);
11345 int res = fts5Memcmp(p1->term.p, p2->term.p, nMin);
11346 if( res==0 ) res = p1->term.n - p2->term.n;
11347
11348 if( res==0 ){
11349 assert( pRes->bTermEq==1 );
11350 assert( p1->iRowid!=p2->iRowid );
11351 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1;
11352 }else{
11353 assert( pRes->bTermEq==0 );
11354 }
11355
11356 if( res<0 ){
11357 assert( pRes->iFirst==i1 );
11358 }else{
11359 assert( pRes->iFirst==i2 );
11360 }
11361 }
11362 }
11363}
11364
11365/*
11366** This function is a no-op unless SQLITE_DEBUG is defined when this module
11367** is compiled. In that case, this function is essentially an assert()
11368** statement used to verify that the contents of the pIter->aFirst[] array
11369** are correct.
11370*/
11371static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){
11372 if( p->rc==SQLITE_OK ){
11373 Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
11374 int i;
11375
11376 assert( (pFirst->pLeaf==0)==pIter->base.bEof );
11377
11378 /* Check that pIter->iSwitchRowid is set correctly. */
11379 for(i=0; i<pIter->nSeg; i++){
11380 Fts5SegIter *p1 = &pIter->aSeg[i];
11381 assert( p1==pFirst
11382 || p1->pLeaf==0
11383 || fts5BufferCompare(&pFirst->term, &p1->term)
11384 || p1->iRowid==pIter->iSwitchRowid
11385 || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev
11386 );
11387 }
11388
11389 for(i=0; i<pIter->nSeg; i+=2){
11390 Fts5SegIter *p1 = &pIter->aSeg[i];
11391 Fts5SegIter *p2 = &pIter->aSeg[i+1];
11392 Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2];
11393 fts5AssertComparisonResult(pIter, p1, p2, pRes);
11394 }
11395
11396 for(i=1; i<(pIter->nSeg / 2); i+=2){
11397 Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ];
11398 Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ];
11399 Fts5CResult *pRes = &pIter->aFirst[i];
11400 fts5AssertComparisonResult(pIter, p1, p2, pRes);
11401 }
11402 }
11403}
11404#else
11405# define fts5AssertMultiIterSetup(x,y)
11406#endif
11407
11408/*
11409** Do the comparison necessary to populate pIter->aFirst[iOut].
11410**
11411** If the returned value is non-zero, then it is the index of an entry
11412** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
11413** to a key that is a duplicate of another, higher priority,
11414** segment-iterator in the pSeg->aSeg[] array.
11415*/
11416static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
11417 int i1; /* Index of left-hand Fts5SegIter */
11418 int i2; /* Index of right-hand Fts5SegIter */
11419 int iRes;
11420 Fts5SegIter *p1; /* Left-hand Fts5SegIter */
11421 Fts5SegIter *p2; /* Right-hand Fts5SegIter */
11422 Fts5CResult *pRes = &pIter->aFirst[iOut];
11423
11424 assert( iOut<pIter->nSeg && iOut>0 );
11425 assert( pIter->bRev==0 || pIter->bRev==1 );
11426
11427 if( iOut>=(pIter->nSeg/2) ){
11428 i1 = (iOut - pIter->nSeg/2) * 2;
11429 i2 = i1 + 1;
11430 }else{
11431 i1 = pIter->aFirst[iOut*2].iFirst;
11432 i2 = pIter->aFirst[iOut*2+1].iFirst;
11433 }
11434 p1 = &pIter->aSeg[i1];
11435 p2 = &pIter->aSeg[i2];
11436
11437 pRes->bTermEq = 0;
11438 if( p1->pLeaf==0 ){ /* If p1 is at EOF */
11439 iRes = i2;
11440 }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */
11441 iRes = i1;
11442 }else{
11443 int res = fts5BufferCompare(&p1->term, &p2->term);
11444 if( res==0 ){
11445 assert_nc( i2>i1 );
11446 assert_nc( i2!=0 );
11447 pRes->bTermEq = 1;
11448 if( p1->iRowid==p2->iRowid ){
11449 p1->bDel = p2->bDel;
11450 return i2;
11451 }
11452 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
11453 }
11454 assert( res!=0 );
11455 if( res<0 ){
11456 iRes = i1;
11457 }else{
11458 iRes = i2;
11459 }
11460 }
11461
11462 pRes->iFirst = (u16)iRes;
11463 return 0;
11464}
11465
11466/*
11467** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
11468** It is an error if leaf iLeafPgno does not exist or contains no rowids.
11469*/
11470static void fts5SegIterGotoPage(
11471 Fts5Index *p, /* FTS5 backend object */
11472 Fts5SegIter *pIter, /* Iterator to advance */
11473 int iLeafPgno
11474){
11475 assert( iLeafPgno>pIter->iLeafPgno );
11476
11477 if( iLeafPgno>pIter->pSeg->pgnoLast ){
11478 p->rc = FTS5_CORRUPT;
11479 }else{
11480 fts5DataRelease(pIter->pNextLeaf);
11481 pIter->pNextLeaf = 0;
11482 pIter->iLeafPgno = iLeafPgno-1;
11483 fts5SegIterNextPage(p, pIter);
11484 assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );
11485
11486 if( p->rc==SQLITE_OK && ALWAYS(pIter->pLeaf!=0) ){
11487 int iOff;
11488 u8 *a = pIter->pLeaf->p;
11489 int n = pIter->pLeaf->szLeaf;
11490
11491 iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
11492 if( iOff<4 || iOff>=n ){
11493 p->rc = FTS5_CORRUPT;
11494 }else{
11495 iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
11496 pIter->iLeafOffset = iOff;
11497 fts5SegIterLoadNPos(p, pIter);
11498 }
11499 }
11500 }
11501}
11502
11503/*
11504** Advance the iterator passed as the second argument until it is at or
11505** past rowid iFrom. Regardless of the value of iFrom, the iterator is
11506** always advanced at least once.
11507*/
11508static void fts5SegIterNextFrom(
11509 Fts5Index *p, /* FTS5 backend object */
11510 Fts5SegIter *pIter, /* Iterator to advance */
11511 i64 iMatch /* Advance iterator at least this far */
11512){
11513 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
11514 Fts5DlidxIter *pDlidx = pIter->pDlidx;
11515 int iLeafPgno = pIter->iLeafPgno;
11516 int bMove = 1;
11517
11518 assert( pIter->flags & FTS5_SEGITER_ONETERM );
11519 assert( pIter->pDlidx );
11520 assert( pIter->pLeaf );
11521
11522 if( bRev==0 ){
11523 while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
11524 iLeafPgno = fts5DlidxIterPgno(pDlidx);
11525 fts5DlidxIterNext(p, pDlidx);
11526 }
11527 assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
11528 if( iLeafPgno>pIter->iLeafPgno ){
11529 fts5SegIterGotoPage(p, pIter, iLeafPgno);
11530 bMove = 0;
11531 }
11532 }else{
11533 assert( pIter->pNextLeaf==0 );
11534 assert( iMatch<pIter->iRowid );
11535 while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
11536 fts5DlidxIterPrev(p, pDlidx);
11537 }
11538 iLeafPgno = fts5DlidxIterPgno(pDlidx);
11539
11540 assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
11541
11542 if( iLeafPgno<pIter->iLeafPgno ){
11543 pIter->iLeafPgno = iLeafPgno+1;
11544 fts5SegIterReverseNewPage(p, pIter);
11545 bMove = 0;
11546 }
11547 }
11548
11549 do{
11550 if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0);
11551 if( pIter->pLeaf==0 ) break;
11552 if( bRev==0 && pIter->iRowid>=iMatch ) break;
11553 if( bRev!=0 && pIter->iRowid<=iMatch ) break;
11554 bMove = 1;
11555 }while( p->rc==SQLITE_OK );
11556}
11557
11558
11559/*
11560** Free the iterator object passed as the second argument.
11561*/
11562static void fts5MultiIterFree(Fts5Iter *pIter){
11563 if( pIter ){
11564 int i;
11565 for(i=0; i<pIter->nSeg; i++){
11566 fts5SegIterClear(&pIter->aSeg[i]);
11567 }
11568 fts5BufferFree(&pIter->poslist);
11569 sqlite3_free(pIter);
11570 }
11571}
11572
11573static void fts5MultiIterAdvanced(
11574 Fts5Index *p, /* FTS5 backend to iterate within */
11575 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
11576 int iChanged, /* Index of sub-iterator just advanced */
11577 int iMinset /* Minimum entry in aFirst[] to set */
11578){
11579 int i;
11580 for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
11581 int iEq;
11582 if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
11583 Fts5SegIter *pSeg = &pIter->aSeg[iEq];
11584 assert( p->rc==SQLITE_OK );
11585 pSeg->xNext(p, pSeg, 0);
11586 i = pIter->nSeg + iEq;
11587 }
11588 }
11589}
11590
11591/*
11592** Sub-iterator iChanged of iterator pIter has just been advanced. It still
11593** points to the same term though - just a different rowid. This function
11594** attempts to update the contents of the pIter->aFirst[] accordingly.
11595** If it does so successfully, 0 is returned. Otherwise 1.
11596**
11597** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
11598** on the iterator instead. That function does the same as this one, except
11599** that it deals with more complicated cases as well.
11600*/
11601static int fts5MultiIterAdvanceRowid(
11602 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */
11603 int iChanged, /* Index of sub-iterator just advanced */
11604 Fts5SegIter **ppFirst
11605){
11606 Fts5SegIter *pNew = &pIter->aSeg[iChanged];
11607
11608 if( pNew->iRowid==pIter->iSwitchRowid
11609 || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev
11610 ){
11611 int i;
11612 Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001];
11613 pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64;
11614 for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){
11615 Fts5CResult *pRes = &pIter->aFirst[i];
11616
11617 assert( pNew->pLeaf );
11618 assert( pRes->bTermEq==0 || pOther->pLeaf );
11619
11620 if( pRes->bTermEq ){
11621 if( pNew->iRowid==pOther->iRowid ){
11622 return 1;
11623 }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){
11624 pIter->iSwitchRowid = pOther->iRowid;
11625 pNew = pOther;
11626 }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){
11627 pIter->iSwitchRowid = pOther->iRowid;
11628 }
11629 }
11630 pRes->iFirst = (u16)(pNew - pIter->aSeg);
11631 if( i==1 ) break;
11632
11633 pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
11634 }
11635 }
11636
11637 *ppFirst = pNew;
11638 return 0;
11639}
11640
11641/*
11642** Set the pIter->bEof variable based on the state of the sub-iterators.
11643*/
11644static void fts5MultiIterSetEof(Fts5Iter *pIter){
11645 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
11646 pIter->base.bEof = pSeg->pLeaf==0;
11647 pIter->iSwitchRowid = pSeg->iRowid;
11648}
11649
11650/*
11651** Move the iterator to the next entry.
11652**
11653** If an error occurs, an error code is left in Fts5Index.rc. It is not
11654** considered an error if the iterator reaches EOF, or if it is already at
11655** EOF when this function is called.
11656*/
11657static void fts5MultiIterNext(
11658 Fts5Index *p,
11659 Fts5Iter *pIter,
11660 int bFrom, /* True if argument iFrom is valid */
11661 i64 iFrom /* Advance at least as far as this */
11662){
11663 int bUseFrom = bFrom;
11664 assert( pIter->base.bEof==0 );
11665 while( p->rc==SQLITE_OK ){
11666 int iFirst = pIter->aFirst[1].iFirst;
11667 int bNewTerm = 0;
11668 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
11669 assert( p->rc==SQLITE_OK );
11670 if( bUseFrom && pSeg->pDlidx ){
11671 fts5SegIterNextFrom(p, pSeg, iFrom);
11672 }else{
11673 pSeg->xNext(p, pSeg, &bNewTerm);
11674 }
11675
11676 if( pSeg->pLeaf==0 || bNewTerm
11677 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
11678 ){
11679 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
11680 fts5MultiIterSetEof(pIter);
11681 pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
11682 if( pSeg->pLeaf==0 ) return;
11683 }
11684
11685 fts5AssertMultiIterSetup(p, pIter);
11686 assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf );
11687 if( pIter->bSkipEmpty==0 || pSeg->nPos ){
11688 pIter->xSetOutputs(pIter, pSeg);
11689 return;
11690 }
11691 bUseFrom = 0;
11692 }
11693}
11694
11695static void fts5MultiIterNext2(
11696 Fts5Index *p,
11697 Fts5Iter *pIter,
11698 int *pbNewTerm /* OUT: True if *might* be new term */
11699){
11700 assert( pIter->bSkipEmpty );
11701 if( p->rc==SQLITE_OK ){
11702 *pbNewTerm = 0;
11703 do{
11704 int iFirst = pIter->aFirst[1].iFirst;
11705 Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
11706 int bNewTerm = 0;
11707
11708 assert( p->rc==SQLITE_OK );
11709 pSeg->xNext(p, pSeg, &bNewTerm);
11710 if( pSeg->pLeaf==0 || bNewTerm
11711 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
11712 ){
11713 fts5MultiIterAdvanced(p, pIter, iFirst, 1);
11714 fts5MultiIterSetEof(pIter);
11715 *pbNewTerm = 1;
11716 }
11717 fts5AssertMultiIterSetup(p, pIter);
11718
11719 }while( fts5MultiIterIsEmpty(p, pIter) );
11720 }
11721}
11722
11723static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){
11724 UNUSED_PARAM2(pUnused1, pUnused2);
11725}
11726
11727static Fts5Iter *fts5MultiIterAlloc(
11728 Fts5Index *p, /* FTS5 backend to iterate within */
11729 int nSeg
11730){
11731 Fts5Iter *pNew;
11732 int nSlot; /* Power of two >= nSeg */
11733
11734 for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
11735 pNew = fts5IdxMalloc(p,
11736 sizeof(Fts5Iter) + /* pNew */
11737 sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */
11738 sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */
11739 );
11740 if( pNew ){
11741 pNew->nSeg = nSlot;
11742 pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
11743 pNew->pIndex = p;
11744 pNew->xSetOutputs = fts5IterSetOutputs_Noop;
11745 }
11746 return pNew;
11747}
11748
11749static void fts5PoslistCallback(
11750 Fts5Index *pUnused,
11751 void *pContext,
11752 const u8 *pChunk, int nChunk
11753){
11754 UNUSED_PARAM(pUnused);
11755 assert_nc( nChunk>=0 );
11756 if( nChunk>0 ){
11757 fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
11758 }
11759}
11760
11761typedef struct PoslistCallbackCtx PoslistCallbackCtx;
11762struct PoslistCallbackCtx {
11763 Fts5Buffer *pBuf; /* Append to this buffer */
11764 Fts5Colset *pColset; /* Restrict matches to this column */
11765 int eState; /* See above */
11766};
11767
11768typedef struct PoslistOffsetsCtx PoslistOffsetsCtx;
11769struct PoslistOffsetsCtx {
11770 Fts5Buffer *pBuf; /* Append to this buffer */
11771 Fts5Colset *pColset; /* Restrict matches to this column */
11772 int iRead;
11773 int iWrite;
11774};
11775
11776/*
11777** TODO: Make this more efficient!
11778*/
11779static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
11780 int i;
11781 for(i=0; i<pColset->nCol; i++){
11782 if( pColset->aiCol[i]==iCol ) return 1;
11783 }
11784 return 0;
11785}
11786
11787static void fts5PoslistOffsetsCallback(
11788 Fts5Index *pUnused,
11789 void *pContext,
11790 const u8 *pChunk, int nChunk
11791){
11792 PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext;
11793 UNUSED_PARAM(pUnused);
11794 assert_nc( nChunk>=0 );
11795 if( nChunk>0 ){
11796 int i = 0;
11797 while( i<nChunk ){
11798 int iVal;
11799 i += fts5GetVarint32(&pChunk[i], iVal);
11800 iVal += pCtx->iRead - 2;
11801 pCtx->iRead = iVal;
11802 if( fts5IndexColsetTest(pCtx->pColset, iVal) ){
11803 fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite);
11804 pCtx->iWrite = iVal;
11805 }
11806 }
11807 }
11808}
11809
11810static void fts5PoslistFilterCallback(
11811 Fts5Index *pUnused,
11812 void *pContext,
11813 const u8 *pChunk, int nChunk
11814){
11815 PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
11816 UNUSED_PARAM(pUnused);
11817 assert_nc( nChunk>=0 );
11818 if( nChunk>0 ){
11819 /* Search through to find the first varint with value 1. This is the
11820 ** start of the next columns hits. */
11821 int i = 0;
11822 int iStart = 0;
11823
11824 if( pCtx->eState==2 ){
11825 int iCol;
11826 fts5FastGetVarint32(pChunk, i, iCol);
11827 if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
11828 pCtx->eState = 1;
11829 fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
11830 }else{
11831 pCtx->eState = 0;
11832 }
11833 }
11834
11835 do {
11836 while( i<nChunk && pChunk[i]!=0x01 ){
11837 while( pChunk[i] & 0x80 ) i++;
11838 i++;
11839 }
11840 if( pCtx->eState ){
11841 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
11842 }
11843 if( i<nChunk ){
11844 int iCol;
11845 iStart = i;
11846 i++;
11847 if( i>=nChunk ){
11848 pCtx->eState = 2;
11849 }else{
11850 fts5FastGetVarint32(pChunk, i, iCol);
11851 pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
11852 if( pCtx->eState ){
11853 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
11854 iStart = i;
11855 }
11856 }
11857 }
11858 }while( i<nChunk );
11859 }
11860}
11861
11862static void fts5ChunkIterate(
11863 Fts5Index *p, /* Index object */
11864 Fts5SegIter *pSeg, /* Poslist of this iterator */
11865 void *pCtx, /* Context pointer for xChunk callback */
11866 void (*xChunk)(Fts5Index*, void*, const u8*, int)
11867){
11868 int nRem = pSeg->nPos; /* Number of bytes still to come */
11869 Fts5Data *pData = 0;
11870 u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
11871 int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
11872 int pgno = pSeg->iLeafPgno;
11873 int pgnoSave = 0;
11874
11875 /* This function does not work with detail=none databases. */
11876 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
11877
11878 if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
11879 pgnoSave = pgno+1;
11880 }
11881
11882 while( 1 ){
11883 xChunk(p, pCtx, pChunk, nChunk);
11884 nRem -= nChunk;
11885 fts5DataRelease(pData);
11886 if( nRem<=0 ){
11887 break;
11888 }else if( pSeg->pSeg==0 ){
11889 p->rc = FTS5_CORRUPT;
11890 return;
11891 }else{
11892 pgno++;
11893 pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
11894 if( pData==0 ) break;
11895 pChunk = &pData->p[4];
11896 nChunk = MIN(nRem, pData->szLeaf - 4);
11897 if( pgno==pgnoSave ){
11898 assert( pSeg->pNextLeaf==0 );
11899 pSeg->pNextLeaf = pData;
11900 pData = 0;
11901 }
11902 }
11903 }
11904}
11905
11906/*
11907** Iterator pIter currently points to a valid entry (not EOF). This
11908** function appends the position list data for the current entry to
11909** buffer pBuf. It does not make a copy of the position-list size
11910** field.
11911*/
11912static void fts5SegiterPoslist(
11913 Fts5Index *p,
11914 Fts5SegIter *pSeg,
11915 Fts5Colset *pColset,
11916 Fts5Buffer *pBuf
11917){
11918 assert( pBuf!=0 );
11919 assert( pSeg!=0 );
11920 if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+FTS5_DATA_ZERO_PADDING) ){
11921 assert( pBuf->p!=0 );
11922 assert( pBuf->nSpace >= pBuf->n+pSeg->nPos+FTS5_DATA_ZERO_PADDING );
11923 memset(&pBuf->p[pBuf->n+pSeg->nPos], 0, FTS5_DATA_ZERO_PADDING);
11924 if( pColset==0 ){
11925 fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
11926 }else{
11927 if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){
11928 PoslistCallbackCtx sCtx;
11929 sCtx.pBuf = pBuf;
11930 sCtx.pColset = pColset;
11931 sCtx.eState = fts5IndexColsetTest(pColset, 0);
11932 assert( sCtx.eState==0 || sCtx.eState==1 );
11933 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
11934 }else{
11935 PoslistOffsetsCtx sCtx;
11936 memset(&sCtx, 0, sizeof(sCtx));
11937 sCtx.pBuf = pBuf;
11938 sCtx.pColset = pColset;
11939 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
11940 }
11941 }
11942 }
11943}
11944
11945/*
11946** Parameter pPos points to a buffer containing a position list, size nPos.
11947** This function filters it according to pColset (which must be non-NULL)
11948** and sets pIter->base.pData/nData to point to the new position list.
11949** If memory is required for the new position list, use buffer pIter->poslist.
11950** Or, if the new position list is a contiguous subset of the input, set
11951** pIter->base.pData/nData to point directly to it.
11952**
11953** This function is a no-op if *pRc is other than SQLITE_OK when it is
11954** called. If an OOM error is encountered, *pRc is set to SQLITE_NOMEM
11955** before returning.
11956*/
11957static void fts5IndexExtractColset(
11958 int *pRc,
11959 Fts5Colset *pColset, /* Colset to filter on */
11960 const u8 *pPos, int nPos, /* Position list */
11961 Fts5Iter *pIter
11962){
11963 if( *pRc==SQLITE_OK ){
11964 const u8 *p = pPos;
11965 const u8 *aCopy = p;
11966 const u8 *pEnd = &p[nPos]; /* One byte past end of position list */
11967 int i = 0;
11968 int iCurrent = 0;
11969
11970 if( pColset->nCol>1 && sqlite3Fts5BufferSize(pRc, &pIter->poslist, nPos) ){
11971 return;
11972 }
11973
11974 while( 1 ){
11975 while( pColset->aiCol[i]<iCurrent ){
11976 i++;
11977 if( i==pColset->nCol ){
11978 pIter->base.pData = pIter->poslist.p;
11979 pIter->base.nData = pIter->poslist.n;
11980 return;
11981 }
11982 }
11983
11984 /* Advance pointer p until it points to pEnd or an 0x01 byte that is
11985 ** not part of a varint */
11986 while( p<pEnd && *p!=0x01 ){
11987 while( *p++ & 0x80 );
11988 }
11989
11990 if( pColset->aiCol[i]==iCurrent ){
11991 if( pColset->nCol==1 ){
11992 pIter->base.pData = aCopy;
11993 pIter->base.nData = p-aCopy;
11994 return;
11995 }
11996 fts5BufferSafeAppendBlob(&pIter->poslist, aCopy, p-aCopy);
11997 }
11998 if( p>=pEnd ){
11999 pIter->base.pData = pIter->poslist.p;
12000 pIter->base.nData = pIter->poslist.n;
12001 return;
12002 }
12003 aCopy = p++;
12004 iCurrent = *p++;
12005 if( iCurrent & 0x80 ){
12006 p--;
12007 p += fts5GetVarint32(p, iCurrent);
12008 }
12009 }
12010 }
12011
12012}
12013
12014/*
12015** xSetOutputs callback used by detail=none tables.
12016*/
12017static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){
12018 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE );
12019 pIter->base.iRowid = pSeg->iRowid;
12020 pIter->base.nData = pSeg->nPos;
12021}
12022
12023/*
12024** xSetOutputs callback used by detail=full and detail=col tables when no
12025** column filters are specified.
12026*/
12027static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
12028 pIter->base.iRowid = pSeg->iRowid;
12029 pIter->base.nData = pSeg->nPos;
12030
12031 assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE );
12032 assert( pIter->pColset==0 );
12033
12034 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
12035 /* All data is stored on the current page. Populate the output
12036 ** variables to point into the body of the page object. */
12037 pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
12038 }else{
12039 /* The data is distributed over two or more pages. Copy it into the
12040 ** Fts5Iter.poslist buffer and then set the output pointer to point
12041 ** to this buffer. */
12042 fts5BufferZero(&pIter->poslist);
12043 fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
12044 pIter->base.pData = pIter->poslist.p;
12045 }
12046}
12047
12048/*
12049** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match
12050** against no columns at all).
12051*/
12052static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){
12053 UNUSED_PARAM(pSeg);
12054 pIter->base.nData = 0;
12055}
12056
12057/*
12058** xSetOutputs callback used by detail=col when there is a column filter
12059** and there are 100 or more columns. Also called as a fallback from
12060** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
12061*/
12062static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
12063 fts5BufferZero(&pIter->poslist);
12064 fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
12065 pIter->base.iRowid = pSeg->iRowid;
12066 pIter->base.pData = pIter->poslist.p;
12067 pIter->base.nData = pIter->poslist.n;
12068}
12069
12070/*
12071** xSetOutputs callback used when:
12072**
12073** * detail=col,
12074** * there is a column filter, and
12075** * the table contains 100 or fewer columns.
12076**
12077** The last point is to ensure all column numbers are stored as
12078** single-byte varints.
12079*/
12080static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){
12081
12082 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
12083 assert( pIter->pColset );
12084
12085 if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){
12086 fts5IterSetOutputs_Col(pIter, pSeg);
12087 }else{
12088 u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
12089 u8 *pEnd = (u8*)&a[pSeg->nPos];
12090 int iPrev = 0;
12091 int *aiCol = pIter->pColset->aiCol;
12092 int *aiColEnd = &aiCol[pIter->pColset->nCol];
12093
12094 u8 *aOut = pIter->poslist.p;
12095 int iPrevOut = 0;
12096
12097 pIter->base.iRowid = pSeg->iRowid;
12098
12099 while( a<pEnd ){
12100 iPrev += (int)a++[0] - 2;
12101 while( *aiCol<iPrev ){
12102 aiCol++;
12103 if( aiCol==aiColEnd ) goto setoutputs_col_out;
12104 }
12105 if( *aiCol==iPrev ){
12106 *aOut++ = (u8)((iPrev - iPrevOut) + 2);
12107 iPrevOut = iPrev;
12108 }
12109 }
12110
12111setoutputs_col_out:
12112 pIter->base.pData = pIter->poslist.p;
12113 pIter->base.nData = aOut - pIter->poslist.p;
12114 }
12115}
12116
12117/*
12118** xSetOutputs callback used by detail=full when there is a column filter.
12119*/
12120static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
12121 Fts5Colset *pColset = pIter->pColset;
12122 pIter->base.iRowid = pSeg->iRowid;
12123
12124 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL );
12125 assert( pColset );
12126
12127 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
12128 /* All data is stored on the current page. Populate the output
12129 ** variables to point into the body of the page object. */
12130 const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset];
12131 int *pRc = &pIter->pIndex->rc;
12132 fts5BufferZero(&pIter->poslist);
12133 fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, pIter);
12134 }else{
12135 /* The data is distributed over two or more pages. Copy it into the
12136 ** Fts5Iter.poslist buffer and then set the output pointer to point
12137 ** to this buffer. */
12138 fts5BufferZero(&pIter->poslist);
12139 fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
12140 pIter->base.pData = pIter->poslist.p;
12141 pIter->base.nData = pIter->poslist.n;
12142 }
12143}
12144
12145static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
12146 assert( pIter!=0 || (*pRc)!=SQLITE_OK );
12147 if( *pRc==SQLITE_OK ){
12148 Fts5Config *pConfig = pIter->pIndex->pConfig;
12149 if( pConfig->eDetail==FTS5_DETAIL_NONE ){
12150 pIter->xSetOutputs = fts5IterSetOutputs_None;
12151 }
12152
12153 else if( pIter->pColset==0 ){
12154 pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
12155 }
12156
12157 else if( pIter->pColset->nCol==0 ){
12158 pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset;
12159 }
12160
12161 else if( pConfig->eDetail==FTS5_DETAIL_FULL ){
12162 pIter->xSetOutputs = fts5IterSetOutputs_Full;
12163 }
12164
12165 else{
12166 assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS );
12167 if( pConfig->nCol<=100 ){
12168 pIter->xSetOutputs = fts5IterSetOutputs_Col100;
12169 sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
12170 }else{
12171 pIter->xSetOutputs = fts5IterSetOutputs_Col;
12172 }
12173 }
12174 }
12175}
12176
12177
12178/*
12179** Allocate a new Fts5Iter object.
12180**
12181** The new object will be used to iterate through data in structure pStruct.
12182** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
12183** is zero or greater, data from the first nSegment segments on level iLevel
12184** is merged.
12185**
12186** The iterator initially points to the first term/rowid entry in the
12187** iterated data.
12188*/
12189static void fts5MultiIterNew(
12190 Fts5Index *p, /* FTS5 backend to iterate within */
12191 Fts5Structure *pStruct, /* Structure of specific index */
12192 int flags, /* FTS5INDEX_QUERY_XXX flags */
12193 Fts5Colset *pColset, /* Colset to filter on (or NULL) */
12194 const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */
12195 int iLevel, /* Level to iterate (-1 for all) */
12196 int nSegment, /* Number of segments to merge (iLevel>=0) */
12197 Fts5Iter **ppOut /* New object */
12198){
12199 int nSeg = 0; /* Number of segment-iters in use */
12200 int iIter = 0; /* */
12201 int iSeg; /* Used to iterate through segments */
12202 Fts5StructureLevel *pLvl;
12203 Fts5Iter *pNew;
12204
12205 assert( (pTerm==0 && nTerm==0) || iLevel<0 );
12206
12207 /* Allocate space for the new multi-seg-iterator. */
12208 if( p->rc==SQLITE_OK ){
12209 if( iLevel<0 ){
12210 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
12211 nSeg = pStruct->nSegment;
12212 nSeg += (p->pHash ? 1 : 0);
12213 }else{
12214 nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
12215 }
12216 }
12217 *ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
12218 if( pNew==0 ){
12219 assert( p->rc!=SQLITE_OK );
12220 goto fts5MultiIterNew_post_check;
12221 }
12222 pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC));
12223 pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY));
12224 pNew->pColset = pColset;
12225 if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){
12226 fts5IterSetOutputCb(&p->rc, pNew);
12227 }
12228
12229 /* Initialize each of the component segment iterators. */
12230 if( p->rc==SQLITE_OK ){
12231 if( iLevel<0 ){
12232 Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
12233 if( p->pHash ){
12234 /* Add a segment iterator for the current contents of the hash table. */
12235 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
12236 fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
12237 }
12238 for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
12239 for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
12240 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
12241 Fts5SegIter *pIter = &pNew->aSeg[iIter++];
12242 if( pTerm==0 ){
12243 fts5SegIterInit(p, pSeg, pIter);
12244 }else{
12245 fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter);
12246 }
12247 }
12248 }
12249 }else{
12250 pLvl = &pStruct->aLevel[iLevel];
12251 for(iSeg=nSeg-1; iSeg>=0; iSeg--){
12252 fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
12253 }
12254 }
12255 assert( iIter==nSeg );
12256 }
12257
12258 /* If the above was successful, each component iterators now points
12259 ** to the first entry in its segment. In this case initialize the
12260 ** aFirst[] array. Or, if an error has occurred, free the iterator
12261 ** object and set the output variable to NULL. */
12262 if( p->rc==SQLITE_OK ){
12263 for(iIter=pNew->nSeg-1; iIter>0; iIter--){
12264 int iEq;
12265 if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
12266 Fts5SegIter *pSeg = &pNew->aSeg[iEq];
12267 if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
12268 fts5MultiIterAdvanced(p, pNew, iEq, iIter);
12269 }
12270 }
12271 fts5MultiIterSetEof(pNew);
12272 fts5AssertMultiIterSetup(p, pNew);
12273
12274 if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){
12275 fts5MultiIterNext(p, pNew, 0, 0);
12276 }else if( pNew->base.bEof==0 ){
12277 Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst];
12278 pNew->xSetOutputs(pNew, pSeg);
12279 }
12280
12281 }else{
12282 fts5MultiIterFree(pNew);
12283 *ppOut = 0;
12284 }
12285
12286fts5MultiIterNew_post_check:
12287 assert( (*ppOut)!=0 || p->rc!=SQLITE_OK );
12288 return;
12289}
12290
12291/*
12292** Create an Fts5Iter that iterates through the doclist provided
12293** as the second argument.
12294*/
12295static void fts5MultiIterNew2(
12296 Fts5Index *p, /* FTS5 backend to iterate within */
12297 Fts5Data *pData, /* Doclist to iterate through */
12298 int bDesc, /* True for descending rowid order */
12299 Fts5Iter **ppOut /* New object */
12300){
12301 Fts5Iter *pNew;
12302 pNew = fts5MultiIterAlloc(p, 2);
12303 if( pNew ){
12304 Fts5SegIter *pIter = &pNew->aSeg[1];
12305
12306 pIter->flags = FTS5_SEGITER_ONETERM;
12307 if( pData->szLeaf>0 ){
12308 pIter->pLeaf = pData;
12309 pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
12310 pIter->iEndofDoclist = pData->nn;
12311 pNew->aFirst[1].iFirst = 1;
12312 if( bDesc ){
12313 pNew->bRev = 1;
12314 pIter->flags |= FTS5_SEGITER_REVERSE;
12315 fts5SegIterReverseInitPage(p, pIter);
12316 }else{
12317 fts5SegIterLoadNPos(p, pIter);
12318 }
12319 pData = 0;
12320 }else{
12321 pNew->base.bEof = 1;
12322 }
12323 fts5SegIterSetNext(p, pIter);
12324
12325 *ppOut = pNew;
12326 }
12327
12328 fts5DataRelease(pData);
12329}
12330
12331/*
12332** Return true if the iterator is at EOF or if an error has occurred.
12333** False otherwise.
12334*/
12335static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
12336 assert( pIter!=0 || p->rc!=SQLITE_OK );
12337 assert( p->rc!=SQLITE_OK
12338 || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof
12339 );
12340 return (p->rc || pIter->base.bEof);
12341}
12342
12343/*
12344** Return the rowid of the entry that the iterator currently points
12345** to. If the iterator points to EOF when this function is called the
12346** results are undefined.
12347*/
12348static i64 fts5MultiIterRowid(Fts5Iter *pIter){
12349 assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
12350 return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
12351}
12352
12353/*
12354** Move the iterator to the next entry at or following iMatch.
12355*/
12356static void fts5MultiIterNextFrom(
12357 Fts5Index *p,
12358 Fts5Iter *pIter,
12359 i64 iMatch
12360){
12361 while( 1 ){
12362 i64 iRowid;
12363 fts5MultiIterNext(p, pIter, 1, iMatch);
12364 if( fts5MultiIterEof(p, pIter) ) break;
12365 iRowid = fts5MultiIterRowid(pIter);
12366 if( pIter->bRev==0 && iRowid>=iMatch ) break;
12367 if( pIter->bRev!=0 && iRowid<=iMatch ) break;
12368 }
12369}
12370
12371/*
12372** Return a pointer to a buffer containing the term associated with the
12373** entry that the iterator currently points to.
12374*/
12375static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
12376 Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
12377 *pn = p->term.n;
12378 return p->term.p;
12379}
12380
12381/*
12382** Allocate a new segment-id for the structure pStruct. The new segment
12383** id must be between 1 and 65335 inclusive, and must not be used by
12384** any currently existing segment. If a free segment id cannot be found,
12385** SQLITE_FULL is returned.
12386**
12387** If an error has already occurred, this function is a no-op. 0 is
12388** returned in this case.
12389*/
12390static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
12391 int iSegid = 0;
12392
12393 if( p->rc==SQLITE_OK ){
12394 if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
12395 p->rc = SQLITE_FULL;
12396 }else{
12397 /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
12398 ** array is 63 elements, or 252 bytes, in size. */
12399 u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32];
12400 int iLvl, iSeg;
12401 int i;
12402 u32 mask;
12403 memset(aUsed, 0, sizeof(aUsed));
12404 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
12405 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
12406 int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
12407 if( iId<=FTS5_MAX_SEGMENT && iId>0 ){
12408 aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32);
12409 }
12410 }
12411 }
12412
12413 for(i=0; aUsed[i]==0xFFFFFFFF; i++);
12414 mask = aUsed[i];
12415 for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++);
12416 iSegid += 1 + i*32;
12417
12418#ifdef SQLITE_DEBUG
12419 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
12420 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
12421 assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
12422 }
12423 }
12424 assert_nc( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT );
12425
12426 {
12427 sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p);
12428 if( p->rc==SQLITE_OK ){
12429 u8 aBlob[2] = {0xff, 0xff};
12430 sqlite3_bind_int(pIdxSelect, 1, iSegid);
12431 sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC);
12432 assert_nc( sqlite3_step(pIdxSelect)!=SQLITE_ROW );
12433 p->rc = sqlite3_reset(pIdxSelect);
12434 sqlite3_bind_null(pIdxSelect, 2);
12435 }
12436 }
12437#endif
12438 }
12439 }
12440
12441 return iSegid;
12442}
12443
12444/*
12445** Discard all data currently cached in the hash-tables.
12446*/
12447static void fts5IndexDiscardData(Fts5Index *p){
12448 assert( p->pHash || p->nPendingData==0 );
12449 if( p->pHash ){
12450 sqlite3Fts5HashClear(p->pHash);
12451 p->nPendingData = 0;
12452 }
12453}
12454
12455/*
12456** Return the size of the prefix, in bytes, that buffer
12457** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
12458**
12459** Buffer (pNew/<length-unknown>) is guaranteed to be greater
12460** than buffer (pOld/nOld).
12461*/
12462static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){
12463 int i;
12464 for(i=0; i<nOld; i++){
12465 if( pOld[i]!=pNew[i] ) break;
12466 }
12467 return i;
12468}
12469
12470static void fts5WriteDlidxClear(
12471 Fts5Index *p,
12472 Fts5SegWriter *pWriter,
12473 int bFlush /* If true, write dlidx to disk */
12474){
12475 int i;
12476 assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
12477 for(i=0; i<pWriter->nDlidx; i++){
12478 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
12479 if( pDlidx->buf.n==0 ) break;
12480 if( bFlush ){
12481 assert( pDlidx->pgno!=0 );
12482 fts5DataWrite(p,
12483 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
12484 pDlidx->buf.p, pDlidx->buf.n
12485 );
12486 }
12487 sqlite3Fts5BufferZero(&pDlidx->buf);
12488 pDlidx->bPrevValid = 0;
12489 }
12490}
12491
12492/*
12493** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
12494** Any new array elements are zeroed before returning.
12495*/
12496static int fts5WriteDlidxGrow(
12497 Fts5Index *p,
12498 Fts5SegWriter *pWriter,
12499 int nLvl
12500){
12501 if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
12502 Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc64(
12503 pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
12504 );
12505 if( aDlidx==0 ){
12506 p->rc = SQLITE_NOMEM;
12507 }else{
12508 size_t nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
12509 memset(&aDlidx[pWriter->nDlidx], 0, nByte);
12510 pWriter->aDlidx = aDlidx;
12511 pWriter->nDlidx = nLvl;
12512 }
12513 }
12514 return p->rc;
12515}
12516
12517/*
12518** If the current doclist-index accumulating in pWriter->aDlidx[] is large
12519** enough, flush it to disk and return 1. Otherwise discard it and return
12520** zero.
12521*/
12522static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){
12523 int bFlag = 0;
12524
12525 /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
12526 ** to the database, also write the doclist-index to disk. */
12527 if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
12528 bFlag = 1;
12529 }
12530 fts5WriteDlidxClear(p, pWriter, bFlag);
12531 pWriter->nEmpty = 0;
12532 return bFlag;
12533}
12534
12535/*
12536** This function is called whenever processing of the doclist for the
12537** last term on leaf page (pWriter->iBtPage) is completed.
12538**
12539** The doclist-index for that term is currently stored in-memory within the
12540** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
12541** writes it out to disk. Or, if it is too small to bother with, discards
12542** it.
12543**
12544** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
12545*/
12546static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
12547 int bFlag;
12548
12549 assert( pWriter->iBtPage || pWriter->nEmpty==0 );
12550 if( pWriter->iBtPage==0 ) return;
12551 bFlag = fts5WriteFlushDlidx(p, pWriter);
12552
12553 if( p->rc==SQLITE_OK ){
12554 const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
12555 /* The following was already done in fts5WriteInit(): */
12556 /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
12557 sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC);
12558 sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1));
12559 sqlite3_step(p->pIdxWriter);
12560 p->rc = sqlite3_reset(p->pIdxWriter);
12561 sqlite3_bind_null(p->pIdxWriter, 2);
12562 }
12563 pWriter->iBtPage = 0;
12564}
12565
12566/*
12567** This is called once for each leaf page except the first that contains
12568** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
12569** is larger than all terms written to earlier leaves, and equal to or
12570** smaller than the first term on the new leaf.
12571**
12572** If an error occurs, an error code is left in Fts5Index.rc. If an error
12573** has already occurred when this function is called, it is a no-op.
12574*/
12575static void fts5WriteBtreeTerm(
12576 Fts5Index *p, /* FTS5 backend object */
12577 Fts5SegWriter *pWriter, /* Writer object */
12578 int nTerm, const u8 *pTerm /* First term on new page */
12579){
12580 fts5WriteFlushBtree(p, pWriter);
12581 if( p->rc==SQLITE_OK ){
12582 fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
12583 pWriter->iBtPage = pWriter->writer.pgno;
12584 }
12585}
12586
12587/*
12588** This function is called when flushing a leaf page that contains no
12589** terms at all to disk.
12590*/
12591static void fts5WriteBtreeNoTerm(
12592 Fts5Index *p, /* FTS5 backend object */
12593 Fts5SegWriter *pWriter /* Writer object */
12594){
12595 /* If there were no rowids on the leaf page either and the doclist-index
12596 ** has already been started, append an 0x00 byte to it. */
12597 if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
12598 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
12599 assert( pDlidx->bPrevValid );
12600 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
12601 }
12602
12603 /* Increment the "number of sequential leaves without a term" counter. */
12604 pWriter->nEmpty++;
12605}
12606
12607static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
12608 i64 iRowid;
12609 int iOff;
12610
12611 iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
12612 fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
12613 return iRowid;
12614}
12615
12616/*
12617** Rowid iRowid has just been appended to the current leaf page. It is the
12618** first on the page. This function appends an appropriate entry to the current
12619** doclist-index.
12620*/
12621static void fts5WriteDlidxAppend(
12622 Fts5Index *p,
12623 Fts5SegWriter *pWriter,
12624 i64 iRowid
12625){
12626 int i;
12627 int bDone = 0;
12628
12629 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
12630 i64 iVal;
12631 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
12632
12633 if( pDlidx->buf.n>=p->pConfig->pgsz ){
12634 /* The current doclist-index page is full. Write it to disk and push
12635 ** a copy of iRowid (which will become the first rowid on the next
12636 ** doclist-index leaf page) up into the next level of the b-tree
12637 ** hierarchy. If the node being flushed is currently the root node,
12638 ** also push its first rowid upwards. */
12639 pDlidx->buf.p[0] = 0x01; /* Not the root node */
12640 fts5DataWrite(p,
12641 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
12642 pDlidx->buf.p, pDlidx->buf.n
12643 );
12644 fts5WriteDlidxGrow(p, pWriter, i+2);
12645 pDlidx = &pWriter->aDlidx[i];
12646 if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
12647 i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
12648
12649 /* This was the root node. Push its first rowid up to the new root. */
12650 pDlidx[1].pgno = pDlidx->pgno;
12651 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
12652 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
12653 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
12654 pDlidx[1].bPrevValid = 1;
12655 pDlidx[1].iPrev = iFirst;
12656 }
12657
12658 sqlite3Fts5BufferZero(&pDlidx->buf);
12659 pDlidx->bPrevValid = 0;
12660 pDlidx->pgno++;
12661 }else{
12662 bDone = 1;
12663 }
12664
12665 if( pDlidx->bPrevValid ){
12666 iVal = iRowid - pDlidx->iPrev;
12667 }else{
12668 i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
12669 assert( pDlidx->buf.n==0 );
12670 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
12671 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
12672 iVal = iRowid;
12673 }
12674
12675 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
12676 pDlidx->bPrevValid = 1;
12677 pDlidx->iPrev = iRowid;
12678 }
12679}
12680
12681static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
12682 static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
12683 Fts5PageWriter *pPage = &pWriter->writer;
12684 i64 iRowid;
12685
12686 assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
12687
12688 /* Set the szLeaf header field. */
12689 assert( 0==fts5GetU16(&pPage->buf.p[2]) );
12690 fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
12691
12692 if( pWriter->bFirstTermInPage ){
12693 /* No term was written to this page. */
12694 assert( pPage->pgidx.n==0 );
12695 fts5WriteBtreeNoTerm(p, pWriter);
12696 }else{
12697 /* Append the pgidx to the page buffer. Set the szLeaf header field. */
12698 fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
12699 }
12700
12701 /* Write the page out to disk */
12702 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
12703 fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
12704
12705 /* Initialize the next page. */
12706 fts5BufferZero(&pPage->buf);
12707 fts5BufferZero(&pPage->pgidx);
12708 fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
12709 pPage->iPrevPgidx = 0;
12710 pPage->pgno++;
12711
12712 /* Increase the leaves written counter */
12713 pWriter->nLeafWritten++;
12714
12715 /* The new leaf holds no terms or rowids */
12716 pWriter->bFirstTermInPage = 1;
12717 pWriter->bFirstRowidInPage = 1;
12718}
12719
12720/*
12721** Append term pTerm/nTerm to the segment being written by the writer passed
12722** as the second argument.
12723**
12724** If an error occurs, set the Fts5Index.rc error code. If an error has
12725** already occurred, this function is a no-op.
12726*/
12727static void fts5WriteAppendTerm(
12728 Fts5Index *p,
12729 Fts5SegWriter *pWriter,
12730 int nTerm, const u8 *pTerm
12731){
12732 int nPrefix; /* Bytes of prefix compression for term */
12733 Fts5PageWriter *pPage = &pWriter->writer;
12734 Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
12735 int nMin = MIN(pPage->term.n, nTerm);
12736
12737 assert( p->rc==SQLITE_OK );
12738 assert( pPage->buf.n>=4 );
12739 assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
12740
12741 /* If the current leaf page is full, flush it to disk. */
12742 if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
12743 if( pPage->buf.n>4 ){
12744 fts5WriteFlushLeaf(p, pWriter);
12745 if( p->rc!=SQLITE_OK ) return;
12746 }
12747 fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
12748 }
12749
12750 /* TODO1: Updating pgidx here. */
12751 pPgidx->n += sqlite3Fts5PutVarint(
12752 &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
12753 );
12754 pPage->iPrevPgidx = pPage->buf.n;
12755#if 0
12756 fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
12757 pPgidx->n += 2;
12758#endif
12759
12760 if( pWriter->bFirstTermInPage ){
12761 nPrefix = 0;
12762 if( pPage->pgno!=1 ){
12763 /* This is the first term on a leaf that is not the leftmost leaf in
12764 ** the segment b-tree. In this case it is necessary to add a term to
12765 ** the b-tree hierarchy that is (a) larger than the largest term
12766 ** already written to the segment and (b) smaller than or equal to
12767 ** this term. In other words, a prefix of (pTerm/nTerm) that is one
12768 ** byte longer than the longest prefix (pTerm/nTerm) shares with the
12769 ** previous term.
12770 **
12771 ** Usually, the previous term is available in pPage->term. The exception
12772 ** is if this is the first term written in an incremental-merge step.
12773 ** In this case the previous term is not available, so just write a
12774 ** copy of (pTerm/nTerm) into the parent node. This is slightly
12775 ** inefficient, but still correct. */
12776 int n = nTerm;
12777 if( pPage->term.n ){
12778 n = 1 + fts5PrefixCompress(nMin, pPage->term.p, pTerm);
12779 }
12780 fts5WriteBtreeTerm(p, pWriter, n, pTerm);
12781 if( p->rc!=SQLITE_OK ) return;
12782 pPage = &pWriter->writer;
12783 }
12784 }else{
12785 nPrefix = fts5PrefixCompress(nMin, pPage->term.p, pTerm);
12786 fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
12787 }
12788
12789 /* Append the number of bytes of new data, then the term data itself
12790 ** to the page. */
12791 fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
12792 fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);
12793
12794 /* Update the Fts5PageWriter.term field. */
12795 fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
12796 pWriter->bFirstTermInPage = 0;
12797
12798 pWriter->bFirstRowidInPage = 0;
12799 pWriter->bFirstRowidInDoclist = 1;
12800
12801 assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
12802 pWriter->aDlidx[0].pgno = pPage->pgno;
12803}
12804
12805/*
12806** Append a rowid and position-list size field to the writers output.
12807*/
12808static void fts5WriteAppendRowid(
12809 Fts5Index *p,
12810 Fts5SegWriter *pWriter,
12811 i64 iRowid
12812){
12813 if( p->rc==SQLITE_OK ){
12814 Fts5PageWriter *pPage = &pWriter->writer;
12815
12816 if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
12817 fts5WriteFlushLeaf(p, pWriter);
12818 }
12819
12820 /* If this is to be the first rowid written to the page, set the
12821 ** rowid-pointer in the page-header. Also append a value to the dlidx
12822 ** buffer, in case a doclist-index is required. */
12823 if( pWriter->bFirstRowidInPage ){
12824 fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
12825 fts5WriteDlidxAppend(p, pWriter, iRowid);
12826 }
12827
12828 /* Write the rowid. */
12829 if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
12830 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
12831 }else{
12832 assert_nc( p->rc || iRowid>pWriter->iPrevRowid );
12833 fts5BufferAppendVarint(&p->rc, &pPage->buf,
12834 (u64)iRowid - (u64)pWriter->iPrevRowid
12835 );
12836 }
12837 pWriter->iPrevRowid = iRowid;
12838 pWriter->bFirstRowidInDoclist = 0;
12839 pWriter->bFirstRowidInPage = 0;
12840 }
12841}
12842
12843static void fts5WriteAppendPoslistData(
12844 Fts5Index *p,
12845 Fts5SegWriter *pWriter,
12846 const u8 *aData,
12847 int nData
12848){
12849 Fts5PageWriter *pPage = &pWriter->writer;
12850 const u8 *a = aData;
12851 int n = nData;
12852
12853 assert( p->pConfig->pgsz>0 );
12854 while( p->rc==SQLITE_OK
12855 && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
12856 ){
12857 int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
12858 int nCopy = 0;
12859 while( nCopy<nReq ){
12860 i64 dummy;
12861 nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
12862 }
12863 fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
12864 a += nCopy;
12865 n -= nCopy;
12866 fts5WriteFlushLeaf(p, pWriter);
12867 }
12868 if( n>0 ){
12869 fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
12870 }
12871}
12872
12873/*
12874** Flush any data cached by the writer object to the database. Free any
12875** allocations associated with the writer.
12876*/
12877static void fts5WriteFinish(
12878 Fts5Index *p,
12879 Fts5SegWriter *pWriter, /* Writer object */
12880 int *pnLeaf /* OUT: Number of leaf pages in b-tree */
12881){
12882 int i;
12883 Fts5PageWriter *pLeaf = &pWriter->writer;
12884 if( p->rc==SQLITE_OK ){
12885 assert( pLeaf->pgno>=1 );
12886 if( pLeaf->buf.n>4 ){
12887 fts5WriteFlushLeaf(p, pWriter);
12888 }
12889 *pnLeaf = pLeaf->pgno-1;
12890 if( pLeaf->pgno>1 ){
12891 fts5WriteFlushBtree(p, pWriter);
12892 }
12893 }
12894 fts5BufferFree(&pLeaf->term);
12895 fts5BufferFree(&pLeaf->buf);
12896 fts5BufferFree(&pLeaf->pgidx);
12897 fts5BufferFree(&pWriter->btterm);
12898
12899 for(i=0; i<pWriter->nDlidx; i++){
12900 sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
12901 }
12902 sqlite3_free(pWriter->aDlidx);
12903}
12904
12905static void fts5WriteInit(
12906 Fts5Index *p,
12907 Fts5SegWriter *pWriter,
12908 int iSegid
12909){
12910 const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
12911
12912 memset(pWriter, 0, sizeof(Fts5SegWriter));
12913 pWriter->iSegid = iSegid;
12914
12915 fts5WriteDlidxGrow(p, pWriter, 1);
12916 pWriter->writer.pgno = 1;
12917 pWriter->bFirstTermInPage = 1;
12918 pWriter->iBtPage = 1;
12919
12920 assert( pWriter->writer.buf.n==0 );
12921 assert( pWriter->writer.pgidx.n==0 );
12922
12923 /* Grow the two buffers to pgsz + padding bytes in size. */
12924 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
12925 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
12926
12927 if( p->pIdxWriter==0 ){
12928 Fts5Config *pConfig = p->pConfig;
12929 fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
12930 "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
12931 pConfig->zDb, pConfig->zName
12932 ));
12933 }
12934
12935 if( p->rc==SQLITE_OK ){
12936 /* Initialize the 4-byte leaf-page header to 0x00. */
12937 memset(pWriter->writer.buf.p, 0, 4);
12938 pWriter->writer.buf.n = 4;
12939
12940 /* Bind the current output segment id to the index-writer. This is an
12941 ** optimization over binding the same value over and over as rows are
12942 ** inserted into %_idx by the current writer. */
12943 sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
12944 }
12945}
12946
12947/*
12948** Iterator pIter was used to iterate through the input segments of on an
12949** incremental merge operation. This function is called if the incremental
12950** merge step has finished but the input has not been completely exhausted.
12951*/
12952static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
12953 int i;
12954 Fts5Buffer buf;
12955 memset(&buf, 0, sizeof(Fts5Buffer));
12956 for(i=0; i<pIter->nSeg && p->rc==SQLITE_OK; i++){
12957 Fts5SegIter *pSeg = &pIter->aSeg[i];
12958 if( pSeg->pSeg==0 ){
12959 /* no-op */
12960 }else if( pSeg->pLeaf==0 ){
12961 /* All keys from this input segment have been transfered to the output.
12962 ** Set both the first and last page-numbers to 0 to indicate that the
12963 ** segment is now empty. */
12964 pSeg->pSeg->pgnoLast = 0;
12965 pSeg->pSeg->pgnoFirst = 0;
12966 }else{
12967 int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */
12968 i64 iLeafRowid;
12969 Fts5Data *pData;
12970 int iId = pSeg->pSeg->iSegid;
12971 u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
12972
12973 iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
12974 pData = fts5LeafRead(p, iLeafRowid);
12975 if( pData ){
12976 if( iOff>pData->szLeaf ){
12977 /* This can occur if the pages that the segments occupy overlap - if
12978 ** a single page has been assigned to more than one segment. In
12979 ** this case a prior iteration of this loop may have corrupted the
12980 ** segment currently being trimmed. */
12981 p->rc = FTS5_CORRUPT;
12982 }else{
12983 fts5BufferZero(&buf);
12984 fts5BufferGrow(&p->rc, &buf, pData->nn);
12985 fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
12986 fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
12987 fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
12988 fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff,&pData->p[iOff]);
12989 if( p->rc==SQLITE_OK ){
12990 /* Set the szLeaf field */
12991 fts5PutU16(&buf.p[2], (u16)buf.n);
12992 }
12993
12994 /* Set up the new page-index array */
12995 fts5BufferAppendVarint(&p->rc, &buf, 4);
12996 if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
12997 && pSeg->iEndofDoclist<pData->szLeaf
12998 && pSeg->iPgidxOff<=pData->nn
12999 ){
13000 int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
13001 fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
13002 fts5BufferAppendBlob(&p->rc, &buf,
13003 pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
13004 );
13005 }
13006
13007 pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
13008 fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
13009 fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
13010 }
13011 fts5DataRelease(pData);
13012 }
13013 }
13014 }
13015 fts5BufferFree(&buf);
13016}
13017
13018static void fts5MergeChunkCallback(
13019 Fts5Index *p,
13020 void *pCtx,
13021 const u8 *pChunk, int nChunk
13022){
13023 Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
13024 fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
13025}
13026
13027/*
13028**
13029*/
13030static void fts5IndexMergeLevel(
13031 Fts5Index *p, /* FTS5 backend object */
13032 Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */
13033 int iLvl, /* Level to read input from */
13034 int *pnRem /* Write up to this many output leaves */
13035){
13036 Fts5Structure *pStruct = *ppStruct;
13037 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
13038 Fts5StructureLevel *pLvlOut;
13039 Fts5Iter *pIter = 0; /* Iterator to read input data */
13040 int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */
13041 int nInput; /* Number of input segments */
13042 Fts5SegWriter writer; /* Writer object */
13043 Fts5StructureSegment *pSeg; /* Output segment */
13044 Fts5Buffer term;
13045 int bOldest; /* True if the output segment is the oldest */
13046 int eDetail = p->pConfig->eDetail;
13047 const int flags = FTS5INDEX_QUERY_NOOUTPUT;
13048 int bTermWritten = 0; /* True if current term already output */
13049
13050 assert( iLvl<pStruct->nLevel );
13051 assert( pLvl->nMerge<=pLvl->nSeg );
13052
13053 memset(&writer, 0, sizeof(Fts5SegWriter));
13054 memset(&term, 0, sizeof(Fts5Buffer));
13055 if( pLvl->nMerge ){
13056 pLvlOut = &pStruct->aLevel[iLvl+1];
13057 assert( pLvlOut->nSeg>0 );
13058 nInput = pLvl->nMerge;
13059 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
13060
13061 fts5WriteInit(p, &writer, pSeg->iSegid);
13062 writer.writer.pgno = pSeg->pgnoLast+1;
13063 writer.iBtPage = 0;
13064 }else{
13065 int iSegid = fts5AllocateSegid(p, pStruct);
13066
13067 /* Extend the Fts5Structure object as required to ensure the output
13068 ** segment exists. */
13069 if( iLvl==pStruct->nLevel-1 ){
13070 fts5StructureAddLevel(&p->rc, ppStruct);
13071 pStruct = *ppStruct;
13072 }
13073 fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
13074 if( p->rc ) return;
13075 pLvl = &pStruct->aLevel[iLvl];
13076 pLvlOut = &pStruct->aLevel[iLvl+1];
13077
13078 fts5WriteInit(p, &writer, iSegid);
13079
13080 /* Add the new segment to the output level */
13081 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
13082 pLvlOut->nSeg++;
13083 pSeg->pgnoFirst = 1;
13084 pSeg->iSegid = iSegid;
13085 pStruct->nSegment++;
13086
13087 /* Read input from all segments in the input level */
13088 nInput = pLvl->nSeg;
13089 }
13090 bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
13091
13092 assert( iLvl>=0 );
13093 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
13094 fts5MultiIterEof(p, pIter)==0;
13095 fts5MultiIterNext(p, pIter, 0, 0)
13096 ){
13097 Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
13098 int nPos; /* position-list size field value */
13099 int nTerm;
13100 const u8 *pTerm;
13101
13102 pTerm = fts5MultiIterTerm(pIter, &nTerm);
13103 if( nTerm!=term.n || fts5Memcmp(pTerm, term.p, nTerm) ){
13104 if( pnRem && writer.nLeafWritten>nRem ){
13105 break;
13106 }
13107 fts5BufferSet(&p->rc, &term, nTerm, pTerm);
13108 bTermWritten =0;
13109 }
13110
13111 /* Check for key annihilation. */
13112 if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue;
13113
13114 if( p->rc==SQLITE_OK && bTermWritten==0 ){
13115 /* This is a new term. Append a term to the output segment. */
13116 fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
13117 bTermWritten = 1;
13118 }
13119
13120 /* Append the rowid to the output */
13121 /* WRITEPOSLISTSIZE */
13122 fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
13123
13124 if( eDetail==FTS5_DETAIL_NONE ){
13125 if( pSegIter->bDel ){
13126 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
13127 if( pSegIter->nPos>0 ){
13128 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
13129 }
13130 }
13131 }else{
13132 /* Append the position-list data to the output */
13133 nPos = pSegIter->nPos*2 + pSegIter->bDel;
13134 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
13135 fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
13136 }
13137 }
13138
13139 /* Flush the last leaf page to disk. Set the output segment b-tree height
13140 ** and last leaf page number at the same time. */
13141 fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
13142
13143 assert( pIter!=0 || p->rc!=SQLITE_OK );
13144 if( fts5MultiIterEof(p, pIter) ){
13145 int i;
13146
13147 /* Remove the redundant segments from the %_data table */
13148 for(i=0; i<nInput; i++){
13149 fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid);
13150 }
13151
13152 /* Remove the redundant segments from the input level */
13153 if( pLvl->nSeg!=nInput ){
13154 int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
13155 memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
13156 }
13157 pStruct->nSegment -= nInput;
13158 pLvl->nSeg -= nInput;
13159 pLvl->nMerge = 0;
13160 if( pSeg->pgnoLast==0 ){
13161 pLvlOut->nSeg--;
13162 pStruct->nSegment--;
13163 }
13164 }else{
13165 assert( pSeg->pgnoLast>0 );
13166 fts5TrimSegments(p, pIter);
13167 pLvl->nMerge = nInput;
13168 }
13169
13170 fts5MultiIterFree(pIter);
13171 fts5BufferFree(&term);
13172 if( pnRem ) *pnRem -= writer.nLeafWritten;
13173}
13174
13175/*
13176** Do up to nPg pages of automerge work on the index.
13177**
13178** Return true if any changes were actually made, or false otherwise.
13179*/
13180static int fts5IndexMerge(
13181 Fts5Index *p, /* FTS5 backend object */
13182 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
13183 int nPg, /* Pages of work to do */
13184 int nMin /* Minimum number of segments to merge */
13185){
13186 int nRem = nPg;
13187 int bRet = 0;
13188 Fts5Structure *pStruct = *ppStruct;
13189 while( nRem>0 && p->rc==SQLITE_OK ){
13190 int iLvl; /* To iterate through levels */
13191 int iBestLvl = 0; /* Level offering the most input segments */
13192 int nBest = 0; /* Number of input segments on best level */
13193
13194 /* Set iBestLvl to the level to read input segments from. */
13195 assert( pStruct->nLevel>0 );
13196 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
13197 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
13198 if( pLvl->nMerge ){
13199 if( pLvl->nMerge>nBest ){
13200 iBestLvl = iLvl;
13201 nBest = pLvl->nMerge;
13202 }
13203 break;
13204 }
13205 if( pLvl->nSeg>nBest ){
13206 nBest = pLvl->nSeg;
13207 iBestLvl = iLvl;
13208 }
13209 }
13210
13211 /* If nBest is still 0, then the index must be empty. */
13212#ifdef SQLITE_DEBUG
13213 for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){
13214 assert( pStruct->aLevel[iLvl].nSeg==0 );
13215 }
13216#endif
13217
13218 if( nBest<nMin && pStruct->aLevel[iBestLvl].nMerge==0 ){
13219 break;
13220 }
13221 bRet = 1;
13222 fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
13223 if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
13224 fts5StructurePromote(p, iBestLvl+1, pStruct);
13225 }
13226 }
13227 *ppStruct = pStruct;
13228 return bRet;
13229}
13230
13231/*
13232** A total of nLeaf leaf pages of data has just been flushed to a level-0
13233** segment. This function updates the write-counter accordingly and, if
13234** necessary, performs incremental merge work.
13235**
13236** If an error occurs, set the Fts5Index.rc error code. If an error has
13237** already occurred, this function is a no-op.
13238*/
13239static void fts5IndexAutomerge(
13240 Fts5Index *p, /* FTS5 backend object */
13241 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */
13242 int nLeaf /* Number of output leaves just written */
13243){
13244 if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 && ALWAYS((*ppStruct)!=0) ){
13245 Fts5Structure *pStruct = *ppStruct;
13246 u64 nWrite; /* Initial value of write-counter */
13247 int nWork; /* Number of work-quanta to perform */
13248 int nRem; /* Number of leaf pages left to write */
13249
13250 /* Update the write-counter. While doing so, set nWork. */
13251 nWrite = pStruct->nWriteCounter;
13252 nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
13253 pStruct->nWriteCounter += nLeaf;
13254 nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
13255
13256 fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge);
13257 }
13258}
13259
13260static void fts5IndexCrisismerge(
13261 Fts5Index *p, /* FTS5 backend object */
13262 Fts5Structure **ppStruct /* IN/OUT: Current structure of index */
13263){
13264 const int nCrisis = p->pConfig->nCrisisMerge;
13265 Fts5Structure *pStruct = *ppStruct;
13266 int iLvl = 0;
13267
13268 assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 );
13269 while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
13270 fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
13271 assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
13272 fts5StructurePromote(p, iLvl+1, pStruct);
13273 iLvl++;
13274 }
13275 *ppStruct = pStruct;
13276}
13277
13278static int fts5IndexReturn(Fts5Index *p){
13279 int rc = p->rc;
13280 p->rc = SQLITE_OK;
13281 return rc;
13282}
13283
13284typedef struct Fts5FlushCtx Fts5FlushCtx;
13285struct Fts5FlushCtx {
13286 Fts5Index *pIdx;
13287 Fts5SegWriter writer;
13288};
13289
13290/*
13291** Buffer aBuf[] contains a list of varints, all small enough to fit
13292** in a 32-bit integer. Return the size of the largest prefix of this
13293** list nMax bytes or less in size.
13294*/
13295static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
13296 int ret;
13297 u32 dummy;
13298 ret = fts5GetVarint32(aBuf, dummy);
13299 if( ret<nMax ){
13300 while( 1 ){
13301 int i = fts5GetVarint32(&aBuf[ret], dummy);
13302 if( (ret + i) > nMax ) break;
13303 ret += i;
13304 }
13305 }
13306 return ret;
13307}
13308
13309/*
13310** Flush the contents of in-memory hash table iHash to a new level-0
13311** segment on disk. Also update the corresponding structure record.
13312**
13313** If an error occurs, set the Fts5Index.rc error code. If an error has
13314** already occurred, this function is a no-op.
13315*/
13316static void fts5FlushOneHash(Fts5Index *p){
13317 Fts5Hash *pHash = p->pHash;
13318 Fts5Structure *pStruct;
13319 int iSegid;
13320 int pgnoLast = 0; /* Last leaf page number in segment */
13321
13322 /* Obtain a reference to the index structure and allocate a new segment-id
13323 ** for the new level-0 segment. */
13324 pStruct = fts5StructureRead(p);
13325 iSegid = fts5AllocateSegid(p, pStruct);
13326 fts5StructureInvalidate(p);
13327
13328 if( iSegid ){
13329 const int pgsz = p->pConfig->pgsz;
13330 int eDetail = p->pConfig->eDetail;
13331 Fts5StructureSegment *pSeg; /* New segment within pStruct */
13332 Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */
13333 Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */
13334
13335 Fts5SegWriter writer;
13336 fts5WriteInit(p, &writer, iSegid);
13337
13338 pBuf = &writer.writer.buf;
13339 pPgidx = &writer.writer.pgidx;
13340
13341 /* fts5WriteInit() should have initialized the buffers to (most likely)
13342 ** the maximum space required. */
13343 assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
13344 assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
13345
13346 /* Begin scanning through hash table entries. This loop runs once for each
13347 ** term/doclist currently stored within the hash table. */
13348 if( p->rc==SQLITE_OK ){
13349 p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
13350 }
13351 while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
13352 const char *zTerm; /* Buffer containing term */
13353 const u8 *pDoclist; /* Pointer to doclist for this term */
13354 int nDoclist; /* Size of doclist in bytes */
13355
13356 /* Write the term for this entry to disk. */
13357 sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
13358 fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm);
13359 if( p->rc!=SQLITE_OK ) break;
13360
13361 assert( writer.bFirstRowidInPage==0 );
13362 if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
13363 /* The entire doclist will fit on the current leaf. */
13364 fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
13365 }else{
13366 i64 iRowid = 0;
13367 u64 iDelta = 0;
13368 int iOff = 0;
13369
13370 /* The entire doclist will not fit on this leaf. The following
13371 ** loop iterates through the poslists that make up the current
13372 ** doclist. */
13373 while( p->rc==SQLITE_OK && iOff<nDoclist ){
13374 iOff += fts5GetVarint(&pDoclist[iOff], &iDelta);
13375 iRowid += iDelta;
13376
13377 if( writer.bFirstRowidInPage ){
13378 fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */
13379 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
13380 writer.bFirstRowidInPage = 0;
13381 fts5WriteDlidxAppend(p, &writer, iRowid);
13382 if( p->rc!=SQLITE_OK ) break;
13383 }else{
13384 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta);
13385 }
13386 assert( pBuf->n<=pBuf->nSpace );
13387
13388 if( eDetail==FTS5_DETAIL_NONE ){
13389 if( iOff<nDoclist && pDoclist[iOff]==0 ){
13390 pBuf->p[pBuf->n++] = 0;
13391 iOff++;
13392 if( iOff<nDoclist && pDoclist[iOff]==0 ){
13393 pBuf->p[pBuf->n++] = 0;
13394 iOff++;
13395 }
13396 }
13397 if( (pBuf->n + pPgidx->n)>=pgsz ){
13398 fts5WriteFlushLeaf(p, &writer);
13399 }
13400 }else{
13401 int bDummy;
13402 int nPos;
13403 int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
13404 nCopy += nPos;
13405 if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
13406 /* The entire poslist will fit on the current leaf. So copy
13407 ** it in one go. */
13408 fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
13409 }else{
13410 /* The entire poslist will not fit on this leaf. So it needs
13411 ** to be broken into sections. The only qualification being
13412 ** that each varint must be stored contiguously. */
13413 const u8 *pPoslist = &pDoclist[iOff];
13414 int iPos = 0;
13415 while( p->rc==SQLITE_OK ){
13416 int nSpace = pgsz - pBuf->n - pPgidx->n;
13417 int n = 0;
13418 if( (nCopy - iPos)<=nSpace ){
13419 n = nCopy - iPos;
13420 }else{
13421 n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
13422 }
13423 assert( n>0 );
13424 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
13425 iPos += n;
13426 if( (pBuf->n + pPgidx->n)>=pgsz ){
13427 fts5WriteFlushLeaf(p, &writer);
13428 }
13429 if( iPos>=nCopy ) break;
13430 }
13431 }
13432 iOff += nCopy;
13433 }
13434 }
13435 }
13436
13437 /* TODO2: Doclist terminator written here. */
13438 /* pBuf->p[pBuf->n++] = '\0'; */
13439 assert( pBuf->n<=pBuf->nSpace );
13440 if( p->rc==SQLITE_OK ) sqlite3Fts5HashScanNext(pHash);
13441 }
13442 sqlite3Fts5HashClear(pHash);
13443 fts5WriteFinish(p, &writer, &pgnoLast);
13444
13445 /* Update the Fts5Structure. It is written back to the database by the
13446 ** fts5StructureRelease() call below. */
13447 if( pStruct->nLevel==0 ){
13448 fts5StructureAddLevel(&p->rc, &pStruct);
13449 }
13450 fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
13451 if( p->rc==SQLITE_OK ){
13452 pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
13453 pSeg->iSegid = iSegid;
13454 pSeg->pgnoFirst = 1;
13455 pSeg->pgnoLast = pgnoLast;
13456 pStruct->nSegment++;
13457 }
13458 fts5StructurePromote(p, 0, pStruct);
13459 }
13460
13461 fts5IndexAutomerge(p, &pStruct, pgnoLast);
13462 fts5IndexCrisismerge(p, &pStruct);
13463 fts5StructureWrite(p, pStruct);
13464 fts5StructureRelease(pStruct);
13465}
13466
13467/*
13468** Flush any data stored in the in-memory hash tables to the database.
13469*/
13470static void fts5IndexFlush(Fts5Index *p){
13471 /* Unless it is empty, flush the hash table to disk */
13472 if( p->nPendingData ){
13473 assert( p->pHash );
13474 p->nPendingData = 0;
13475 fts5FlushOneHash(p);
13476 }
13477}
13478
13479static Fts5Structure *fts5IndexOptimizeStruct(
13480 Fts5Index *p,
13481 Fts5Structure *pStruct
13482){
13483 Fts5Structure *pNew = 0;
13484 sqlite3_int64 nByte = sizeof(Fts5Structure);
13485 int nSeg = pStruct->nSegment;
13486 int i;
13487
13488 /* Figure out if this structure requires optimization. A structure does
13489 ** not require optimization if either:
13490 **
13491 ** + it consists of fewer than two segments, or
13492 ** + all segments are on the same level, or
13493 ** + all segments except one are currently inputs to a merge operation.
13494 **
13495 ** In the first case, return NULL. In the second, increment the ref-count
13496 ** on *pStruct and return a copy of the pointer to it.
13497 */
13498 if( nSeg<2 ) return 0;
13499 for(i=0; i<pStruct->nLevel; i++){
13500 int nThis = pStruct->aLevel[i].nSeg;
13501 if( nThis==nSeg || (nThis==nSeg-1 && pStruct->aLevel[i].nMerge==nThis) ){
13502 fts5StructureRef(pStruct);
13503 return pStruct;
13504 }
13505 assert( pStruct->aLevel[i].nMerge<=nThis );
13506 }
13507
13508 nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
13509 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
13510
13511 if( pNew ){
13512 Fts5StructureLevel *pLvl;
13513 nByte = nSeg * sizeof(Fts5StructureSegment);
13514 pNew->nLevel = pStruct->nLevel+1;
13515 pNew->nRef = 1;
13516 pNew->nWriteCounter = pStruct->nWriteCounter;
13517 pLvl = &pNew->aLevel[pStruct->nLevel];
13518 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
13519 if( pLvl->aSeg ){
13520 int iLvl, iSeg;
13521 int iSegOut = 0;
13522 /* Iterate through all segments, from oldest to newest. Add them to
13523 ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest
13524 ** segment in the data structure. */
13525 for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
13526 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
13527 pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
13528 iSegOut++;
13529 }
13530 }
13531 pNew->nSegment = pLvl->nSeg = nSeg;
13532 }else{
13533 sqlite3_free(pNew);
13534 pNew = 0;
13535 }
13536 }
13537
13538 return pNew;
13539}
13540
13541static int sqlite3Fts5IndexOptimize(Fts5Index *p){
13542 Fts5Structure *pStruct;
13543 Fts5Structure *pNew = 0;
13544
13545 assert( p->rc==SQLITE_OK );
13546 fts5IndexFlush(p);
13547 pStruct = fts5StructureRead(p);
13548 fts5StructureInvalidate(p);
13549
13550 if( pStruct ){
13551 pNew = fts5IndexOptimizeStruct(p, pStruct);
13552 }
13553 fts5StructureRelease(pStruct);
13554
13555 assert( pNew==0 || pNew->nSegment>0 );
13556 if( pNew ){
13557 int iLvl;
13558 for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){}
13559 while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
13560 int nRem = FTS5_OPT_WORK_UNIT;
13561 fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
13562 }
13563
13564 fts5StructureWrite(p, pNew);
13565 fts5StructureRelease(pNew);
13566 }
13567
13568 return fts5IndexReturn(p);
13569}
13570
13571/*
13572** This is called to implement the special "VALUES('merge', $nMerge)"
13573** INSERT command.
13574*/
13575static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
13576 Fts5Structure *pStruct = fts5StructureRead(p);
13577 if( pStruct ){
13578 int nMin = p->pConfig->nUsermerge;
13579 fts5StructureInvalidate(p);
13580 if( nMerge<0 ){
13581 Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct);
13582 fts5StructureRelease(pStruct);
13583 pStruct = pNew;
13584 nMin = 2;
13585 nMerge = nMerge*-1;
13586 }
13587 if( pStruct && pStruct->nLevel ){
13588 if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){
13589 fts5StructureWrite(p, pStruct);
13590 }
13591 }
13592 fts5StructureRelease(pStruct);
13593 }
13594 return fts5IndexReturn(p);
13595}
13596
13597static void fts5AppendRowid(
13598 Fts5Index *p,
13599 u64 iDelta,
13600 Fts5Iter *pUnused,
13601 Fts5Buffer *pBuf
13602){
13603 UNUSED_PARAM(pUnused);
13604 fts5BufferAppendVarint(&p->rc, pBuf, iDelta);
13605}
13606
13607static void fts5AppendPoslist(
13608 Fts5Index *p,
13609 u64 iDelta,
13610 Fts5Iter *pMulti,
13611 Fts5Buffer *pBuf
13612){
13613 int nData = pMulti->base.nData;
13614 int nByte = nData + 9 + 9 + FTS5_DATA_ZERO_PADDING;
13615 assert( nData>0 );
13616 if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nByte) ){
13617 fts5BufferSafeAppendVarint(pBuf, iDelta);
13618 fts5BufferSafeAppendVarint(pBuf, nData*2);
13619 fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData);
13620 memset(&pBuf->p[pBuf->n], 0, FTS5_DATA_ZERO_PADDING);
13621 }
13622}
13623
13624
13625static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
13626 u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist;
13627
13628 assert( pIter->aPoslist || (p==0 && pIter->aPoslist==0) );
13629 if( p>=pIter->aEof ){
13630 pIter->aPoslist = 0;
13631 }else{
13632 i64 iDelta;
13633
13634 p += fts5GetVarint(p, (u64*)&iDelta);
13635 pIter->iRowid += iDelta;
13636
13637 /* Read position list size */
13638 if( p[0] & 0x80 ){
13639 int nPos;
13640 pIter->nSize = fts5GetVarint32(p, nPos);
13641 pIter->nPoslist = (nPos>>1);
13642 }else{
13643 pIter->nPoslist = ((int)(p[0])) >> 1;
13644 pIter->nSize = 1;
13645 }
13646
13647 pIter->aPoslist = p;
13648 if( &pIter->aPoslist[pIter->nPoslist]>pIter->aEof ){
13649 pIter->aPoslist = 0;
13650 }
13651 }
13652}
13653
13654static void fts5DoclistIterInit(
13655 Fts5Buffer *pBuf,
13656 Fts5DoclistIter *pIter
13657){
13658 memset(pIter, 0, sizeof(*pIter));
13659 if( pBuf->n>0 ){
13660 pIter->aPoslist = pBuf->p;
13661 pIter->aEof = &pBuf->p[pBuf->n];
13662 fts5DoclistIterNext(pIter);
13663 }
13664}
13665
13666#if 0
13667/*
13668** Append a doclist to buffer pBuf.
13669**
13670** This function assumes that space within the buffer has already been
13671** allocated.
13672*/
13673static void fts5MergeAppendDocid(
13674 Fts5Buffer *pBuf, /* Buffer to write to */
13675 i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */
13676 i64 iRowid /* Rowid to append */
13677){
13678 assert( pBuf->n!=0 || (*piLastRowid)==0 );
13679 fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid);
13680 *piLastRowid = iRowid;
13681}
13682#endif
13683
13684#define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \
13685 assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \
13686 fts5BufferSafeAppendVarint((pBuf), (u64)(iRowid) - (u64)(iLastRowid)); \
13687 (iLastRowid) = (iRowid); \
13688}
13689
13690/*
13691** Swap the contents of buffer *p1 with that of *p2.
13692*/
13693static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
13694 Fts5Buffer tmp = *p1;
13695 *p1 = *p2;
13696 *p2 = tmp;
13697}
13698
13699static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){
13700 int i = *piOff;
13701 if( i>=pBuf->n ){
13702 *piOff = -1;
13703 }else{
13704 u64 iVal;
13705 *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal);
13706 *piRowid += iVal;
13707 }
13708}
13709
13710/*
13711** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
13712** In this case the buffers consist of a delta-encoded list of rowids only.
13713*/
13714static void fts5MergeRowidLists(
13715 Fts5Index *p, /* FTS5 backend object */
13716 Fts5Buffer *p1, /* First list to merge */
13717 int nBuf, /* Number of entries in apBuf[] */
13718 Fts5Buffer *aBuf /* Array of other lists to merge into p1 */
13719){
13720 int i1 = 0;
13721 int i2 = 0;
13722 i64 iRowid1 = 0;
13723 i64 iRowid2 = 0;
13724 i64 iOut = 0;
13725 Fts5Buffer *p2 = &aBuf[0];
13726 Fts5Buffer out;
13727
13728 (void)nBuf;
13729 memset(&out, 0, sizeof(out));
13730 assert( nBuf==1 );
13731 sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
13732 if( p->rc ) return;
13733
13734 fts5NextRowid(p1, &i1, &iRowid1);
13735 fts5NextRowid(p2, &i2, &iRowid2);
13736 while( i1>=0 || i2>=0 ){
13737 if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
13738 assert( iOut==0 || iRowid1>iOut );
13739 fts5BufferSafeAppendVarint(&out, iRowid1 - iOut);
13740 iOut = iRowid1;
13741 fts5NextRowid(p1, &i1, &iRowid1);
13742 }else{
13743 assert( iOut==0 || iRowid2>iOut );
13744 fts5BufferSafeAppendVarint(&out, iRowid2 - iOut);
13745 iOut = iRowid2;
13746 if( i1>=0 && iRowid1==iRowid2 ){
13747 fts5NextRowid(p1, &i1, &iRowid1);
13748 }
13749 fts5NextRowid(p2, &i2, &iRowid2);
13750 }
13751 }
13752
13753 fts5BufferSwap(&out, p1);
13754 fts5BufferFree(&out);
13755}
13756
13757typedef struct PrefixMerger PrefixMerger;
13758struct PrefixMerger {
13759 Fts5DoclistIter iter; /* Doclist iterator */
13760 i64 iPos; /* For iterating through a position list */
13761 int iOff;
13762 u8 *aPos;
13763 PrefixMerger *pNext; /* Next in docid/poslist order */
13764};
13765
13766static void fts5PrefixMergerInsertByRowid(
13767 PrefixMerger **ppHead,
13768 PrefixMerger *p
13769){
13770 if( p->iter.aPoslist ){
13771 PrefixMerger **pp = ppHead;
13772 while( *pp && p->iter.iRowid>(*pp)->iter.iRowid ){
13773 pp = &(*pp)->pNext;
13774 }
13775 p->pNext = *pp;
13776 *pp = p;
13777 }
13778}
13779
13780static void fts5PrefixMergerInsertByPosition(
13781 PrefixMerger **ppHead,
13782 PrefixMerger *p
13783){
13784 if( p->iPos>=0 ){
13785 PrefixMerger **pp = ppHead;
13786 while( *pp && p->iPos>(*pp)->iPos ){
13787 pp = &(*pp)->pNext;
13788 }
13789 p->pNext = *pp;
13790 *pp = p;
13791 }
13792}
13793
13794
13795/*
13796** Array aBuf[] contains nBuf doclists. These are all merged in with the
13797** doclist in buffer p1.
13798*/
13799static void fts5MergePrefixLists(
13800 Fts5Index *p, /* FTS5 backend object */
13801 Fts5Buffer *p1, /* First list to merge */
13802 int nBuf, /* Number of buffers in array aBuf[] */
13803 Fts5Buffer *aBuf /* Other lists to merge in */
13804){
13805#define fts5PrefixMergerNextPosition(p) \
13806 sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,&(p)->iOff,&(p)->iPos)
13807#define FTS5_MERGE_NLIST 16
13808 PrefixMerger aMerger[FTS5_MERGE_NLIST];
13809 PrefixMerger *pHead = 0;
13810 int i;
13811 int nOut = 0;
13812 Fts5Buffer out = {0, 0, 0};
13813 Fts5Buffer tmp = {0, 0, 0};
13814 i64 iLastRowid = 0;
13815
13816 /* Initialize a doclist-iterator for each input buffer. Arrange them in
13817 ** a linked-list starting at pHead in ascending order of rowid. Avoid
13818 ** linking any iterators already at EOF into the linked list at all. */
13819 assert( nBuf+1<=sizeof(aMerger)/sizeof(aMerger[0]) );
13820 memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1));
13821 pHead = &aMerger[nBuf];
13822 fts5DoclistIterInit(p1, &pHead->iter);
13823 for(i=0; i<nBuf; i++){
13824 fts5DoclistIterInit(&aBuf[i], &aMerger[i].iter);
13825 fts5PrefixMergerInsertByRowid(&pHead, &aMerger[i]);
13826 nOut += aBuf[i].n;
13827 }
13828 if( nOut==0 ) return;
13829 nOut += p1->n + 9 + 10*nBuf;
13830
13831 /* The maximum size of the output is equal to the sum of the
13832 ** input sizes + 1 varint (9 bytes). The extra varint is because if the
13833 ** first rowid in one input is a large negative number, and the first in
13834 ** the other a non-negative number, the delta for the non-negative
13835 ** number will be larger on disk than the literal integer value
13836 ** was.
13837 **
13838 ** Or, if the input position-lists are corrupt, then the output might
13839 ** include up to (nBuf+1) extra 10-byte positions created by interpreting -1
13840 ** (the value PoslistNext64() uses for EOF) as a position and appending
13841 ** it to the output. This can happen at most once for each input
13842 ** position-list, hence (nBuf+1) 10 byte paddings. */
13843 if( sqlite3Fts5BufferSize(&p->rc, &out, nOut) ) return;
13844
13845 while( pHead ){
13846 fts5MergeAppendDocid(&out, iLastRowid, pHead->iter.iRowid);
13847
13848 if( pHead->pNext && iLastRowid==pHead->pNext->iter.iRowid ){
13849 /* Merge data from two or more poslists */
13850 i64 iPrev = 0;
13851 int nTmp = FTS5_DATA_ZERO_PADDING;
13852 int nMerge = 0;
13853 PrefixMerger *pSave = pHead;
13854 PrefixMerger *pThis = 0;
13855 int nTail = 0;
13856
13857 pHead = 0;
13858 while( pSave && pSave->iter.iRowid==iLastRowid ){
13859 PrefixMerger *pNext = pSave->pNext;
13860 pSave->iOff = 0;
13861 pSave->iPos = 0;
13862 pSave->aPos = &pSave->iter.aPoslist[pSave->iter.nSize];
13863 fts5PrefixMergerNextPosition(pSave);
13864 nTmp += pSave->iter.nPoslist + 10;
13865 nMerge++;
13866 fts5PrefixMergerInsertByPosition(&pHead, pSave);
13867 pSave = pNext;
13868 }
13869
13870 if( pHead==0 || pHead->pNext==0 ){
13871 p->rc = FTS5_CORRUPT;
13872 break;
13873 }
13874
13875 /* See the earlier comment in this function for an explanation of why
13876 ** corrupt input position lists might cause the output to consume
13877 ** at most nMerge*10 bytes of unexpected space. */
13878 if( sqlite3Fts5BufferSize(&p->rc, &tmp, nTmp+nMerge*10) ){
13879 break;
13880 }
13881 fts5BufferZero(&tmp);
13882
13883 pThis = pHead;
13884 pHead = pThis->pNext;
13885 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
13886 fts5PrefixMergerNextPosition(pThis);
13887 fts5PrefixMergerInsertByPosition(&pHead, pThis);
13888
13889 while( pHead->pNext ){
13890 pThis = pHead;
13891 if( pThis->iPos!=iPrev ){
13892 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
13893 }
13894 fts5PrefixMergerNextPosition(pThis);
13895 pHead = pThis->pNext;
13896 fts5PrefixMergerInsertByPosition(&pHead, pThis);
13897 }
13898
13899 if( pHead->iPos!=iPrev ){
13900 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pHead->iPos);
13901 }
13902 nTail = pHead->iter.nPoslist - pHead->iOff;
13903
13904 /* WRITEPOSLISTSIZE */
13905 assert_nc( tmp.n+nTail<=nTmp );
13906 assert( tmp.n+nTail<=nTmp+nMerge*10 );
13907 if( tmp.n+nTail>nTmp-FTS5_DATA_ZERO_PADDING ){
13908 if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
13909 break;
13910 }
13911 fts5BufferSafeAppendVarint(&out, (tmp.n+nTail) * 2);
13912 fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n);
13913 if( nTail>0 ){
13914 fts5BufferSafeAppendBlob(&out, &pHead->aPos[pHead->iOff], nTail);
13915 }
13916
13917 pHead = pSave;
13918 for(i=0; i<nBuf+1; i++){
13919 PrefixMerger *pX = &aMerger[i];
13920 if( pX->iter.aPoslist && pX->iter.iRowid==iLastRowid ){
13921 fts5DoclistIterNext(&pX->iter);
13922 fts5PrefixMergerInsertByRowid(&pHead, pX);
13923 }
13924 }
13925
13926 }else{
13927 /* Copy poslist from pHead to output */
13928 PrefixMerger *pThis = pHead;
13929 Fts5DoclistIter *pI = &pThis->iter;
13930 fts5BufferSafeAppendBlob(&out, pI->aPoslist, pI->nPoslist+pI->nSize);
13931 fts5DoclistIterNext(pI);
13932 pHead = pThis->pNext;
13933 fts5PrefixMergerInsertByRowid(&pHead, pThis);
13934 }
13935 }
13936
13937 fts5BufferFree(p1);
13938 fts5BufferFree(&tmp);
13939 memset(&out.p[out.n], 0, FTS5_DATA_ZERO_PADDING);
13940 *p1 = out;
13941}
13942
13943static void fts5SetupPrefixIter(
13944 Fts5Index *p, /* Index to read from */
13945 int bDesc, /* True for "ORDER BY rowid DESC" */
13946 int iIdx, /* Index to scan for data */
13947 u8 *pToken, /* Buffer containing prefix to match */
13948 int nToken, /* Size of buffer pToken in bytes */
13949 Fts5Colset *pColset, /* Restrict matches to these columns */
13950 Fts5Iter **ppIter /* OUT: New iterator */
13951){
13952 Fts5Structure *pStruct;
13953 Fts5Buffer *aBuf;
13954 int nBuf = 32;
13955 int nMerge = 1;
13956
13957 void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*);
13958 void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*);
13959 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
13960 xMerge = fts5MergeRowidLists;
13961 xAppend = fts5AppendRowid;
13962 }else{
13963 nMerge = FTS5_MERGE_NLIST-1;
13964 nBuf = nMerge*8; /* Sufficient to merge (16^8)==(2^32) lists */
13965 xMerge = fts5MergePrefixLists;
13966 xAppend = fts5AppendPoslist;
13967 }
13968
13969 aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
13970 pStruct = fts5StructureRead(p);
13971
13972 if( aBuf && pStruct ){
13973 const int flags = FTS5INDEX_QUERY_SCAN
13974 | FTS5INDEX_QUERY_SKIPEMPTY
13975 | FTS5INDEX_QUERY_NOOUTPUT;
13976 int i;
13977 i64 iLastRowid = 0;
13978 Fts5Iter *p1 = 0; /* Iterator used to gather data from index */
13979 Fts5Data *pData;
13980 Fts5Buffer doclist;
13981 int bNewTerm = 1;
13982
13983 memset(&doclist, 0, sizeof(doclist));
13984 if( iIdx!=0 ){
13985 int dummy = 0;
13986 const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT;
13987 pToken[0] = FTS5_MAIN_PREFIX;
13988 fts5MultiIterNew(p, pStruct, f2, pColset, pToken, nToken, -1, 0, &p1);
13989 fts5IterSetOutputCb(&p->rc, p1);
13990 for(;
13991 fts5MultiIterEof(p, p1)==0;
13992 fts5MultiIterNext2(p, p1, &dummy)
13993 ){
13994 Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
13995 p1->xSetOutputs(p1, pSeg);
13996 if( p1->base.nData ){
13997 xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
13998 iLastRowid = p1->base.iRowid;
13999 }
14000 }
14001 fts5MultiIterFree(p1);
14002 }
14003
14004 pToken[0] = FTS5_MAIN_PREFIX + iIdx;
14005 fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
14006 fts5IterSetOutputCb(&p->rc, p1);
14007 for( /* no-op */ ;
14008 fts5MultiIterEof(p, p1)==0;
14009 fts5MultiIterNext2(p, p1, &bNewTerm)
14010 ){
14011 Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
14012 int nTerm = pSeg->term.n;
14013 const u8 *pTerm = pSeg->term.p;
14014 p1->xSetOutputs(p1, pSeg);
14015
14016 assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
14017 if( bNewTerm ){
14018 if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;
14019 }
14020
14021 if( p1->base.nData==0 ) continue;
14022
14023 if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){
14024 for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
14025 int i1 = i*nMerge;
14026 int iStore;
14027 assert( i1+nMerge<=nBuf );
14028 for(iStore=i1; iStore<i1+nMerge; iStore++){
14029 if( aBuf[iStore].n==0 ){
14030 fts5BufferSwap(&doclist, &aBuf[iStore]);
14031 fts5BufferZero(&doclist);
14032 break;
14033 }
14034 }
14035 if( iStore==i1+nMerge ){
14036 xMerge(p, &doclist, nMerge, &aBuf[i1]);
14037 for(iStore=i1; iStore<i1+nMerge; iStore++){
14038 fts5BufferZero(&aBuf[iStore]);
14039 }
14040 }
14041 }
14042 iLastRowid = 0;
14043 }
14044
14045 xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
14046 iLastRowid = p1->base.iRowid;
14047 }
14048
14049 assert( (nBuf%nMerge)==0 );
14050 for(i=0; i<nBuf; i+=nMerge){
14051 int iFree;
14052 if( p->rc==SQLITE_OK ){
14053 xMerge(p, &doclist, nMerge, &aBuf[i]);
14054 }
14055 for(iFree=i; iFree<i+nMerge; iFree++){
14056 fts5BufferFree(&aBuf[iFree]);
14057 }
14058 }
14059 fts5MultiIterFree(p1);
14060
14061 pData = fts5IdxMalloc(p, sizeof(Fts5Data)+doclist.n+FTS5_DATA_ZERO_PADDING);
14062 if( pData ){
14063 pData->p = (u8*)&pData[1];
14064 pData->nn = pData->szLeaf = doclist.n;
14065 if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n);
14066 fts5MultiIterNew2(p, pData, bDesc, ppIter);
14067 }
14068 fts5BufferFree(&doclist);
14069 }
14070
14071 fts5StructureRelease(pStruct);
14072 sqlite3_free(aBuf);
14073}
14074
14075
14076/*
14077** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
14078** to the document with rowid iRowid.
14079*/
14080static int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
14081 assert( p->rc==SQLITE_OK );
14082
14083 /* Allocate the hash table if it has not already been allocated */
14084 if( p->pHash==0 ){
14085 p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
14086 }
14087
14088 /* Flush the hash table to disk if required */
14089 if( iRowid<p->iWriteRowid
14090 || (iRowid==p->iWriteRowid && p->bDelete==0)
14091 || (p->nPendingData > p->pConfig->nHashSize)
14092 ){
14093 fts5IndexFlush(p);
14094 }
14095
14096 p->iWriteRowid = iRowid;
14097 p->bDelete = bDelete;
14098 return fts5IndexReturn(p);
14099}
14100
14101/*
14102** Commit data to disk.
14103*/
14104static int sqlite3Fts5IndexSync(Fts5Index *p){
14105 assert( p->rc==SQLITE_OK );
14106 fts5IndexFlush(p);
14107 sqlite3Fts5IndexCloseReader(p);
14108 return fts5IndexReturn(p);
14109}
14110
14111/*
14112** Discard any data stored in the in-memory hash tables. Do not write it
14113** to the database. Additionally, assume that the contents of the %_data
14114** table may have changed on disk. So any in-memory caches of %_data
14115** records must be invalidated.
14116*/
14117static int sqlite3Fts5IndexRollback(Fts5Index *p){
14118 sqlite3Fts5IndexCloseReader(p);
14119 fts5IndexDiscardData(p);
14120 fts5StructureInvalidate(p);
14121 /* assert( p->rc==SQLITE_OK ); */
14122 return SQLITE_OK;
14123}
14124
14125/*
14126** The %_data table is completely empty when this function is called. This
14127** function populates it with the initial structure objects for each index,
14128** and the initial version of the "averages" record (a zero-byte blob).
14129*/
14130static int sqlite3Fts5IndexReinit(Fts5Index *p){
14131 Fts5Structure s;
14132 fts5StructureInvalidate(p);
14133 fts5IndexDiscardData(p);
14134 memset(&s, 0, sizeof(Fts5Structure));
14135 fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
14136 fts5StructureWrite(p, &s);
14137 return fts5IndexReturn(p);
14138}
14139
14140/*
14141** Open a new Fts5Index handle. If the bCreate argument is true, create
14142** and initialize the underlying %_data table.
14143**
14144** If successful, set *pp to point to the new object and return SQLITE_OK.
14145** Otherwise, set *pp to NULL and return an SQLite error code.
14146*/
14147static int sqlite3Fts5IndexOpen(
14148 Fts5Config *pConfig,
14149 int bCreate,
14150 Fts5Index **pp,
14151 char **pzErr
14152){
14153 int rc = SQLITE_OK;
14154 Fts5Index *p; /* New object */
14155
14156 *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
14157 if( rc==SQLITE_OK ){
14158 p->pConfig = pConfig;
14159 p->nWorkUnit = FTS5_WORK_UNIT;
14160 p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
14161 if( p->zDataTbl && bCreate ){
14162 rc = sqlite3Fts5CreateTable(
14163 pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
14164 );
14165 if( rc==SQLITE_OK ){
14166 rc = sqlite3Fts5CreateTable(pConfig, "idx",
14167 "segid, term, pgno, PRIMARY KEY(segid, term)",
14168 1, pzErr
14169 );
14170 }
14171 if( rc==SQLITE_OK ){
14172 rc = sqlite3Fts5IndexReinit(p);
14173 }
14174 }
14175 }
14176
14177 assert( rc!=SQLITE_OK || p->rc==SQLITE_OK );
14178 if( rc ){
14179 sqlite3Fts5IndexClose(p);
14180 *pp = 0;
14181 }
14182 return rc;
14183}
14184
14185/*
14186** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
14187*/
14188static int sqlite3Fts5IndexClose(Fts5Index *p){
14189 int rc = SQLITE_OK;
14190 if( p ){
14191 assert( p->pReader==0 );
14192 fts5StructureInvalidate(p);
14193 sqlite3_finalize(p->pWriter);
14194 sqlite3_finalize(p->pDeleter);
14195 sqlite3_finalize(p->pIdxWriter);
14196 sqlite3_finalize(p->pIdxDeleter);
14197 sqlite3_finalize(p->pIdxSelect);
14198 sqlite3_finalize(p->pDataVersion);
14199 sqlite3Fts5HashFree(p->pHash);
14200 sqlite3_free(p->zDataTbl);
14201 sqlite3_free(p);
14202 }
14203 return rc;
14204}
14205
14206/*
14207** Argument p points to a buffer containing utf-8 text that is n bytes in
14208** size. Return the number of bytes in the nChar character prefix of the
14209** buffer, or 0 if there are less than nChar characters in total.
14210*/
14211static int sqlite3Fts5IndexCharlenToBytelen(
14212 const char *p,
14213 int nByte,
14214 int nChar
14215){
14216 int n = 0;
14217 int i;
14218 for(i=0; i<nChar; i++){
14219 if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */
14220 if( (unsigned char)p[n++]>=0xc0 ){
14221 if( n>=nByte ) return 0;
14222 while( (p[n] & 0xc0)==0x80 ){
14223 n++;
14224 if( n>=nByte ){
14225 if( i+1==nChar ) break;
14226 return 0;
14227 }
14228 }
14229 }
14230 }
14231 return n;
14232}
14233
14234/*
14235** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
14236** unicode characters in the string.
14237*/
14238static int fts5IndexCharlen(const char *pIn, int nIn){
14239 int nChar = 0;
14240 int i = 0;
14241 while( i<nIn ){
14242 if( (unsigned char)pIn[i++]>=0xc0 ){
14243 while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
14244 }
14245 nChar++;
14246 }
14247 return nChar;
14248}
14249
14250/*
14251** Insert or remove data to or from the index. Each time a document is
14252** added to or removed from the index, this function is called one or more
14253** times.
14254**
14255** For an insert, it must be called once for each token in the new document.
14256** If the operation is a delete, it must be called (at least) once for each
14257** unique token in the document with an iCol value less than zero. The iPos
14258** argument is ignored for a delete.
14259*/
14260static int sqlite3Fts5IndexWrite(
14261 Fts5Index *p, /* Index to write to */
14262 int iCol, /* Column token appears in (-ve -> delete) */
14263 int iPos, /* Position of token within column */
14264 const char *pToken, int nToken /* Token to add or remove to or from index */
14265){
14266 int i; /* Used to iterate through indexes */
14267 int rc = SQLITE_OK; /* Return code */
14268 Fts5Config *pConfig = p->pConfig;
14269
14270 assert( p->rc==SQLITE_OK );
14271 assert( (iCol<0)==p->bDelete );
14272
14273 /* Add the entry to the main terms index. */
14274 rc = sqlite3Fts5HashWrite(
14275 p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken
14276 );
14277
14278 for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
14279 const int nChar = pConfig->aPrefix[i];
14280 int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
14281 if( nByte ){
14282 rc = sqlite3Fts5HashWrite(p->pHash,
14283 p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
14284 nByte
14285 );
14286 }
14287 }
14288
14289 return rc;
14290}
14291
14292/*
14293** Open a new iterator to iterate though all rowid that match the
14294** specified token or token prefix.
14295*/
14296static int sqlite3Fts5IndexQuery(
14297 Fts5Index *p, /* FTS index to query */
14298 const char *pToken, int nToken, /* Token (or prefix) to query for */
14299 int flags, /* Mask of FTS5INDEX_QUERY_X flags */
14300 Fts5Colset *pColset, /* Match these columns only */
14301 Fts5IndexIter **ppIter /* OUT: New iterator object */
14302){
14303 Fts5Config *pConfig = p->pConfig;
14304 Fts5Iter *pRet = 0;
14305 Fts5Buffer buf = {0, 0, 0};
14306
14307 /* If the QUERY_SCAN flag is set, all other flags must be clear. */
14308 assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );
14309
14310 if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
14311 int iIdx = 0; /* Index to search */
14312 int iPrefixIdx = 0; /* +1 prefix index */
14313 if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken);
14314
14315 /* Figure out which index to search and set iIdx accordingly. If this
14316 ** is a prefix query for which there is no prefix index, set iIdx to
14317 ** greater than pConfig->nPrefix to indicate that the query will be
14318 ** satisfied by scanning multiple terms in the main index.
14319 **
14320 ** If the QUERY_TEST_NOIDX flag was specified, then this must be a
14321 ** prefix-query. Instead of using a prefix-index (if one exists),
14322 ** evaluate the prefix query using the main FTS index. This is used
14323 ** for internal sanity checking by the integrity-check in debug
14324 ** mode only. */
14325#ifdef SQLITE_DEBUG
14326 if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
14327 assert( flags & FTS5INDEX_QUERY_PREFIX );
14328 iIdx = 1+pConfig->nPrefix;
14329 }else
14330#endif
14331 if( flags & FTS5INDEX_QUERY_PREFIX ){
14332 int nChar = fts5IndexCharlen(pToken, nToken);
14333 for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
14334 int nIdxChar = pConfig->aPrefix[iIdx-1];
14335 if( nIdxChar==nChar ) break;
14336 if( nIdxChar==nChar+1 ) iPrefixIdx = iIdx;
14337 }
14338 }
14339
14340 if( iIdx<=pConfig->nPrefix ){
14341 /* Straight index lookup */
14342 Fts5Structure *pStruct = fts5StructureRead(p);
14343 buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
14344 if( pStruct ){
14345 fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY,
14346 pColset, buf.p, nToken+1, -1, 0, &pRet
14347 );
14348 fts5StructureRelease(pStruct);
14349 }
14350 }else{
14351 /* Scan multiple terms in the main index */
14352 int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
14353 fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet);
14354 if( pRet==0 ){
14355 assert( p->rc!=SQLITE_OK );
14356 }else{
14357 assert( pRet->pColset==0 );
14358 fts5IterSetOutputCb(&p->rc, pRet);
14359 if( p->rc==SQLITE_OK ){
14360 Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
14361 if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
14362 }
14363 }
14364 }
14365
14366 if( p->rc ){
14367 sqlite3Fts5IterClose((Fts5IndexIter*)pRet);
14368 pRet = 0;
14369 sqlite3Fts5IndexCloseReader(p);
14370 }
14371
14372 *ppIter = (Fts5IndexIter*)pRet;
14373 sqlite3Fts5BufferFree(&buf);
14374 }
14375 return fts5IndexReturn(p);
14376}
14377
14378/*
14379** Return true if the iterator passed as the only argument is at EOF.
14380*/
14381/*
14382** Move to the next matching rowid.
14383*/
14384static int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
14385 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
14386 assert( pIter->pIndex->rc==SQLITE_OK );
14387 fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
14388 return fts5IndexReturn(pIter->pIndex);
14389}
14390
14391/*
14392** Move to the next matching term/rowid. Used by the fts5vocab module.
14393*/
14394static int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
14395 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
14396 Fts5Index *p = pIter->pIndex;
14397
14398 assert( pIter->pIndex->rc==SQLITE_OK );
14399
14400 fts5MultiIterNext(p, pIter, 0, 0);
14401 if( p->rc==SQLITE_OK ){
14402 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
14403 if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){
14404 fts5DataRelease(pSeg->pLeaf);
14405 pSeg->pLeaf = 0;
14406 pIter->base.bEof = 1;
14407 }
14408 }
14409
14410 return fts5IndexReturn(pIter->pIndex);
14411}
14412
14413/*
14414** Move to the next matching rowid that occurs at or after iMatch. The
14415** definition of "at or after" depends on whether this iterator iterates
14416** in ascending or descending rowid order.
14417*/
14418static int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
14419 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
14420 fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
14421 return fts5IndexReturn(pIter->pIndex);
14422}
14423
14424/*
14425** Return the current term.
14426*/
14427static const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
14428 int n;
14429 const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n);
14430 assert_nc( z || n<=1 );
14431 *pn = n-1;
14432 return (z ? &z[1] : 0);
14433}
14434
14435/*
14436** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
14437*/
14438static void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
14439 if( pIndexIter ){
14440 Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
14441 Fts5Index *pIndex = pIter->pIndex;
14442 fts5MultiIterFree(pIter);
14443 sqlite3Fts5IndexCloseReader(pIndex);
14444 }
14445}
14446
14447/*
14448** Read and decode the "averages" record from the database.
14449**
14450** Parameter anSize must point to an array of size nCol, where nCol is
14451** the number of user defined columns in the FTS table.
14452*/
14453static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
14454 int nCol = p->pConfig->nCol;
14455 Fts5Data *pData;
14456
14457 *pnRow = 0;
14458 memset(anSize, 0, sizeof(i64) * nCol);
14459 pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
14460 if( p->rc==SQLITE_OK && pData->nn ){
14461 int i = 0;
14462 int iCol;
14463 i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
14464 for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
14465 i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
14466 }
14467 }
14468
14469 fts5DataRelease(pData);
14470 return fts5IndexReturn(p);
14471}
14472
14473/*
14474** Replace the current "averages" record with the contents of the buffer
14475** supplied as the second argument.
14476*/
14477static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){
14478 assert( p->rc==SQLITE_OK );
14479 fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData);
14480 return fts5IndexReturn(p);
14481}
14482
14483/*
14484** Return the total number of blocks this module has read from the %_data
14485** table since it was created.
14486*/
14487static int sqlite3Fts5IndexReads(Fts5Index *p){
14488 return p->nRead;
14489}
14490
14491/*
14492** Set the 32-bit cookie value stored at the start of all structure
14493** records to the value passed as the second argument.
14494**
14495** Return SQLITE_OK if successful, or an SQLite error code if an error
14496** occurs.
14497*/
14498static int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){
14499 int rc; /* Return code */
14500 Fts5Config *pConfig = p->pConfig; /* Configuration object */
14501 u8 aCookie[4]; /* Binary representation of iNew */
14502 sqlite3_blob *pBlob = 0;
14503
14504 assert( p->rc==SQLITE_OK );
14505 sqlite3Fts5Put32(aCookie, iNew);
14506
14507 rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl,
14508 "block", FTS5_STRUCTURE_ROWID, 1, &pBlob
14509 );
14510 if( rc==SQLITE_OK ){
14511 sqlite3_blob_write(pBlob, aCookie, 4, 0);
14512 rc = sqlite3_blob_close(pBlob);
14513 }
14514
14515 return rc;
14516}
14517
14518static int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
14519 Fts5Structure *pStruct;
14520 pStruct = fts5StructureRead(p);
14521 fts5StructureRelease(pStruct);
14522 return fts5IndexReturn(p);
14523}
14524
14525
14526/*************************************************************************
14527**************************************************************************
14528** Below this point is the implementation of the integrity-check
14529** functionality.
14530*/
14531
14532/*
14533** Return a simple checksum value based on the arguments.
14534*/
14535static u64 sqlite3Fts5IndexEntryCksum(
14536 i64 iRowid,
14537 int iCol,
14538 int iPos,
14539 int iIdx,
14540 const char *pTerm,
14541 int nTerm
14542){
14543 int i;
14544 u64 ret = iRowid;
14545 ret += (ret<<3) + iCol;
14546 ret += (ret<<3) + iPos;
14547 if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx);
14548 for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i];
14549 return ret;
14550}
14551
14552#ifdef SQLITE_DEBUG
14553/*
14554** This function is purely an internal test. It does not contribute to
14555** FTS functionality, or even the integrity-check, in any way.
14556**
14557** Instead, it tests that the same set of pgno/rowid combinations are
14558** visited regardless of whether the doclist-index identified by parameters
14559** iSegid/iLeaf is iterated in forwards or reverse order.
14560*/
14561static void fts5TestDlidxReverse(
14562 Fts5Index *p,
14563 int iSegid, /* Segment id to load from */
14564 int iLeaf /* Load doclist-index for this leaf */
14565){
14566 Fts5DlidxIter *pDlidx = 0;
14567 u64 cksum1 = 13;
14568 u64 cksum2 = 13;
14569
14570 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
14571 fts5DlidxIterEof(p, pDlidx)==0;
14572 fts5DlidxIterNext(p, pDlidx)
14573 ){
14574 i64 iRowid = fts5DlidxIterRowid(pDlidx);
14575 int pgno = fts5DlidxIterPgno(pDlidx);
14576 assert( pgno>iLeaf );
14577 cksum1 += iRowid + ((i64)pgno<<32);
14578 }
14579 fts5DlidxIterFree(pDlidx);
14580 pDlidx = 0;
14581
14582 for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
14583 fts5DlidxIterEof(p, pDlidx)==0;
14584 fts5DlidxIterPrev(p, pDlidx)
14585 ){
14586 i64 iRowid = fts5DlidxIterRowid(pDlidx);
14587 int pgno = fts5DlidxIterPgno(pDlidx);
14588 assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
14589 cksum2 += iRowid + ((i64)pgno<<32);
14590 }
14591 fts5DlidxIterFree(pDlidx);
14592 pDlidx = 0;
14593
14594 if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
14595}
14596
14597static int fts5QueryCksum(
14598 Fts5Index *p, /* Fts5 index object */
14599 int iIdx,
14600 const char *z, /* Index key to query for */
14601 int n, /* Size of index key in bytes */
14602 int flags, /* Flags for Fts5IndexQuery */
14603 u64 *pCksum /* IN/OUT: Checksum value */
14604){
14605 int eDetail = p->pConfig->eDetail;
14606 u64 cksum = *pCksum;
14607 Fts5IndexIter *pIter = 0;
14608 int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter);
14609
14610 while( rc==SQLITE_OK && ALWAYS(pIter!=0) && 0==sqlite3Fts5IterEof(pIter) ){
14611 i64 rowid = pIter->iRowid;
14612
14613 if( eDetail==FTS5_DETAIL_NONE ){
14614 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
14615 }else{
14616 Fts5PoslistReader sReader;
14617 for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
14618 sReader.bEof==0;
14619 sqlite3Fts5PoslistReaderNext(&sReader)
14620 ){
14621 int iCol = FTS5_POS2COLUMN(sReader.iPos);
14622 int iOff = FTS5_POS2OFFSET(sReader.iPos);
14623 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
14624 }
14625 }
14626 if( rc==SQLITE_OK ){
14627 rc = sqlite3Fts5IterNext(pIter);
14628 }
14629 }
14630 sqlite3Fts5IterClose(pIter);
14631
14632 *pCksum = cksum;
14633 return rc;
14634}
14635
14636/*
14637** Check if buffer z[], size n bytes, contains as series of valid utf-8
14638** encoded codepoints. If so, return 0. Otherwise, if the buffer does not
14639** contain valid utf-8, return non-zero.
14640*/
14641static int fts5TestUtf8(const char *z, int n){
14642 int i = 0;
14643 assert_nc( n>0 );
14644 while( i<n ){
14645 if( (z[i] & 0x80)==0x00 ){
14646 i++;
14647 }else
14648 if( (z[i] & 0xE0)==0xC0 ){
14649 if( i+1>=n || (z[i+1] & 0xC0)!=0x80 ) return 1;
14650 i += 2;
14651 }else
14652 if( (z[i] & 0xF0)==0xE0 ){
14653 if( i+2>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
14654 i += 3;
14655 }else
14656 if( (z[i] & 0xF8)==0xF0 ){
14657 if( i+3>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
14658 if( (z[i+2] & 0xC0)!=0x80 ) return 1;
14659 i += 3;
14660 }else{
14661 return 1;
14662 }
14663 }
14664
14665 return 0;
14666}
14667
14668/*
14669** This function is also purely an internal test. It does not contribute to
14670** FTS functionality, or even the integrity-check, in any way.
14671*/
14672static void fts5TestTerm(
14673 Fts5Index *p,
14674 Fts5Buffer *pPrev, /* Previous term */
14675 const char *z, int n, /* Possibly new term to test */
14676 u64 expected,
14677 u64 *pCksum
14678){
14679 int rc = p->rc;
14680 if( pPrev->n==0 ){
14681 fts5BufferSet(&rc, pPrev, n, (const u8*)z);
14682 }else
14683 if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){
14684 u64 cksum3 = *pCksum;
14685 const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */
14686 int nTerm = pPrev->n-1; /* Size of zTerm in bytes */
14687 int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX);
14688 int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
14689 u64 ck1 = 0;
14690 u64 ck2 = 0;
14691
14692 /* Check that the results returned for ASC and DESC queries are
14693 ** the same. If not, call this corruption. */
14694 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1);
14695 if( rc==SQLITE_OK ){
14696 int f = flags|FTS5INDEX_QUERY_DESC;
14697 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
14698 }
14699 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
14700
14701 /* If this is a prefix query, check that the results returned if the
14702 ** the index is disabled are the same. In both ASC and DESC order.
14703 **
14704 ** This check may only be performed if the hash table is empty. This
14705 ** is because the hash table only supports a single scan query at
14706 ** a time, and the multi-iter loop from which this function is called
14707 ** is already performing such a scan.
14708 **
14709 ** Also only do this if buffer zTerm contains nTerm bytes of valid
14710 ** utf-8. Otherwise, the last part of the buffer contents might contain
14711 ** a non-utf-8 sequence that happens to be a prefix of a valid utf-8
14712 ** character stored in the main fts index, which will cause the
14713 ** test to fail. */
14714 if( p->nPendingData==0 && 0==fts5TestUtf8(zTerm, nTerm) ){
14715 if( iIdx>0 && rc==SQLITE_OK ){
14716 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
14717 ck2 = 0;
14718 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
14719 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
14720 }
14721 if( iIdx>0 && rc==SQLITE_OK ){
14722 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
14723 ck2 = 0;
14724 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
14725 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
14726 }
14727 }
14728
14729 cksum3 ^= ck1;
14730 fts5BufferSet(&rc, pPrev, n, (const u8*)z);
14731
14732 if( rc==SQLITE_OK && cksum3!=expected ){
14733 rc = FTS5_CORRUPT;
14734 }
14735 *pCksum = cksum3;
14736 }
14737 p->rc = rc;
14738}
14739
14740#else
14741# define fts5TestDlidxReverse(x,y,z)
14742# define fts5TestTerm(u,v,w,x,y,z)
14743#endif
14744
14745/*
14746** Check that:
14747**
14748** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
14749** contain zero terms.
14750** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
14751** contain zero rowids.
14752*/
14753static void fts5IndexIntegrityCheckEmpty(
14754 Fts5Index *p,
14755 Fts5StructureSegment *pSeg, /* Segment to check internal consistency */
14756 int iFirst,
14757 int iNoRowid,
14758 int iLast
14759){
14760 int i;
14761
14762 /* Now check that the iter.nEmpty leaves following the current leaf
14763 ** (a) exist and (b) contain no terms. */
14764 for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
14765 Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
14766 if( pLeaf ){
14767 if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
14768 if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
14769 }
14770 fts5DataRelease(pLeaf);
14771 }
14772}
14773
14774static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
14775 int iTermOff = 0;
14776 int ii;
14777
14778 Fts5Buffer buf1 = {0,0,0};
14779 Fts5Buffer buf2 = {0,0,0};
14780
14781 ii = pLeaf->szLeaf;
14782 while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
14783 int res;
14784 int iOff;
14785 int nIncr;
14786
14787 ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
14788 iTermOff += nIncr;
14789 iOff = iTermOff;
14790
14791 if( iOff>=pLeaf->szLeaf ){
14792 p->rc = FTS5_CORRUPT;
14793 }else if( iTermOff==nIncr ){
14794 int nByte;
14795 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
14796 if( (iOff+nByte)>pLeaf->szLeaf ){
14797 p->rc = FTS5_CORRUPT;
14798 }else{
14799 fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
14800 }
14801 }else{
14802 int nKeep, nByte;
14803 iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
14804 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
14805 if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
14806 p->rc = FTS5_CORRUPT;
14807 }else{
14808 buf1.n = nKeep;
14809 fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
14810 }
14811
14812 if( p->rc==SQLITE_OK ){
14813 res = fts5BufferCompare(&buf1, &buf2);
14814 if( res<=0 ) p->rc = FTS5_CORRUPT;
14815 }
14816 }
14817 fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
14818 }
14819
14820 fts5BufferFree(&buf1);
14821 fts5BufferFree(&buf2);
14822}
14823
14824static void fts5IndexIntegrityCheckSegment(
14825 Fts5Index *p, /* FTS5 backend object */
14826 Fts5StructureSegment *pSeg /* Segment to check internal consistency */
14827){
14828 Fts5Config *pConfig = p->pConfig;
14829 sqlite3_stmt *pStmt = 0;
14830 int rc2;
14831 int iIdxPrevLeaf = pSeg->pgnoFirst-1;
14832 int iDlidxPrevLeaf = pSeg->pgnoLast;
14833
14834 if( pSeg->pgnoFirst==0 ) return;
14835
14836 fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
14837 "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d "
14838 "ORDER BY 1, 2",
14839 pConfig->zDb, pConfig->zName, pSeg->iSegid
14840 ));
14841
14842 /* Iterate through the b-tree hierarchy. */
14843 while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
14844 i64 iRow; /* Rowid for this leaf */
14845 Fts5Data *pLeaf; /* Data for this leaf */
14846
14847 const char *zIdxTerm = (const char*)sqlite3_column_blob(pStmt, 1);
14848 int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
14849 int iIdxLeaf = sqlite3_column_int(pStmt, 2);
14850 int bIdxDlidx = sqlite3_column_int(pStmt, 3);
14851
14852 /* If the leaf in question has already been trimmed from the segment,
14853 ** ignore this b-tree entry. Otherwise, load it into memory. */
14854 if( iIdxLeaf<pSeg->pgnoFirst ) continue;
14855 iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
14856 pLeaf = fts5LeafRead(p, iRow);
14857 if( pLeaf==0 ) break;
14858
14859 /* Check that the leaf contains at least one term, and that it is equal
14860 ** to or larger than the split-key in zIdxTerm. Also check that if there
14861 ** is also a rowid pointer within the leaf page header, it points to a
14862 ** location before the term. */
14863 if( pLeaf->nn<=pLeaf->szLeaf ){
14864 p->rc = FTS5_CORRUPT;
14865 }else{
14866 int iOff; /* Offset of first term on leaf */
14867 int iRowidOff; /* Offset of first rowid on leaf */
14868 int nTerm; /* Size of term on leaf in bytes */
14869 int res; /* Comparison of term and split-key */
14870
14871 iOff = fts5LeafFirstTermOff(pLeaf);
14872 iRowidOff = fts5LeafFirstRowidOff(pLeaf);
14873 if( iRowidOff>=iOff || iOff>=pLeaf->szLeaf ){
14874 p->rc = FTS5_CORRUPT;
14875 }else{
14876 iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
14877 res = fts5Memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
14878 if( res==0 ) res = nTerm - nIdxTerm;
14879 if( res<0 ) p->rc = FTS5_CORRUPT;
14880 }
14881
14882 fts5IntegrityCheckPgidx(p, pLeaf);
14883 }
14884 fts5DataRelease(pLeaf);
14885 if( p->rc ) break;
14886
14887 /* Now check that the iter.nEmpty leaves following the current leaf
14888 ** (a) exist and (b) contain no terms. */
14889 fts5IndexIntegrityCheckEmpty(
14890 p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
14891 );
14892 if( p->rc ) break;
14893
14894 /* If there is a doclist-index, check that it looks right. */
14895 if( bIdxDlidx ){
14896 Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */
14897 int iPrevLeaf = iIdxLeaf;
14898 int iSegid = pSeg->iSegid;
14899 int iPg = 0;
14900 i64 iKey;
14901
14902 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
14903 fts5DlidxIterEof(p, pDlidx)==0;
14904 fts5DlidxIterNext(p, pDlidx)
14905 ){
14906
14907 /* Check any rowid-less pages that occur before the current leaf. */
14908 for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
14909 iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
14910 pLeaf = fts5DataRead(p, iKey);
14911 if( pLeaf ){
14912 if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
14913 fts5DataRelease(pLeaf);
14914 }
14915 }
14916 iPrevLeaf = fts5DlidxIterPgno(pDlidx);
14917
14918 /* Check that the leaf page indicated by the iterator really does
14919 ** contain the rowid suggested by the same. */
14920 iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
14921 pLeaf = fts5DataRead(p, iKey);
14922 if( pLeaf ){
14923 i64 iRowid;
14924 int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
14925 ASSERT_SZLEAF_OK(pLeaf);
14926 if( iRowidOff>=pLeaf->szLeaf ){
14927 p->rc = FTS5_CORRUPT;
14928 }else{
14929 fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
14930 if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT;
14931 }
14932 fts5DataRelease(pLeaf);
14933 }
14934 }
14935
14936 iDlidxPrevLeaf = iPg;
14937 fts5DlidxIterFree(pDlidx);
14938 fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
14939 }else{
14940 iDlidxPrevLeaf = pSeg->pgnoLast;
14941 /* TODO: Check there is no doclist index */
14942 }
14943
14944 iIdxPrevLeaf = iIdxLeaf;
14945 }
14946
14947 rc2 = sqlite3_finalize(pStmt);
14948 if( p->rc==SQLITE_OK ) p->rc = rc2;
14949
14950 /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
14951#if 0
14952 if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
14953 p->rc = FTS5_CORRUPT;
14954 }
14955#endif
14956}
14957
14958
14959/*
14960** Run internal checks to ensure that the FTS index (a) is internally
14961** consistent and (b) contains entries for which the XOR of the checksums
14962** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
14963**
14964** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
14965** checksum does not match. Return SQLITE_OK if all checks pass without
14966** error, or some other SQLite error code if another error (e.g. OOM)
14967** occurs.
14968*/
14969static int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum, int bUseCksum){
14970 int eDetail = p->pConfig->eDetail;
14971 u64 cksum2 = 0; /* Checksum based on contents of indexes */
14972 Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */
14973 Fts5Iter *pIter; /* Used to iterate through entire index */
14974 Fts5Structure *pStruct; /* Index structure */
14975 int iLvl, iSeg;
14976
14977#ifdef SQLITE_DEBUG
14978 /* Used by extra internal tests only run if NDEBUG is not defined */
14979 u64 cksum3 = 0; /* Checksum based on contents of indexes */
14980 Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */
14981#endif
14982 const int flags = FTS5INDEX_QUERY_NOOUTPUT;
14983
14984 /* Load the FTS index structure */
14985 pStruct = fts5StructureRead(p);
14986 if( pStruct==0 ){
14987 assert( p->rc!=SQLITE_OK );
14988 return fts5IndexReturn(p);
14989 }
14990
14991 /* Check that the internal nodes of each segment match the leaves */
14992 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
14993 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
14994 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
14995 fts5IndexIntegrityCheckSegment(p, pSeg);
14996 }
14997 }
14998
14999 /* The cksum argument passed to this function is a checksum calculated
15000 ** based on all expected entries in the FTS index (including prefix index
15001 ** entries). This block checks that a checksum calculated based on the
15002 ** actual contents of FTS index is identical.
15003 **
15004 ** Two versions of the same checksum are calculated. The first (stack
15005 ** variable cksum2) based on entries extracted from the full-text index
15006 ** while doing a linear scan of each individual index in turn.
15007 **
15008 ** As each term visited by the linear scans, a separate query for the
15009 ** same term is performed. cksum3 is calculated based on the entries
15010 ** extracted by these queries.
15011 */
15012 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter);
15013 fts5MultiIterEof(p, pIter)==0;
15014 fts5MultiIterNext(p, pIter, 0, 0)
15015 ){
15016 int n; /* Size of term in bytes */
15017 i64 iPos = 0; /* Position read from poslist */
15018 int iOff = 0; /* Offset within poslist */
15019 i64 iRowid = fts5MultiIterRowid(pIter);
15020 char *z = (char*)fts5MultiIterTerm(pIter, &n);
15021
15022 /* If this is a new term, query for it. Update cksum3 with the results. */
15023 fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
15024 if( p->rc ) break;
15025
15026 if( eDetail==FTS5_DETAIL_NONE ){
15027 if( 0==fts5MultiIterIsEmpty(p, pIter) ){
15028 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n);
15029 }
15030 }else{
15031 poslist.n = 0;
15032 fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist);
15033 fts5BufferAppendBlob(&p->rc, &poslist, 4, (const u8*)"\0\0\0\0");
15034 while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
15035 int iCol = FTS5_POS2COLUMN(iPos);
15036 int iTokOff = FTS5_POS2OFFSET(iPos);
15037 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
15038 }
15039 }
15040 }
15041 fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
15042
15043 fts5MultiIterFree(pIter);
15044 if( p->rc==SQLITE_OK && bUseCksum && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
15045
15046 fts5StructureRelease(pStruct);
15047#ifdef SQLITE_DEBUG
15048 fts5BufferFree(&term);
15049#endif
15050 fts5BufferFree(&poslist);
15051 return fts5IndexReturn(p);
15052}
15053
15054/*************************************************************************
15055**************************************************************************
15056** Below this point is the implementation of the fts5_decode() scalar
15057** function only.
15058*/
15059
15060#ifdef SQLITE_TEST
15061/*
15062** Decode a segment-data rowid from the %_data table. This function is
15063** the opposite of macro FTS5_SEGMENT_ROWID().
15064*/
15065static void fts5DecodeRowid(
15066 i64 iRowid, /* Rowid from %_data table */
15067 int *piSegid, /* OUT: Segment id */
15068 int *pbDlidx, /* OUT: Dlidx flag */
15069 int *piHeight, /* OUT: Height */
15070 int *piPgno /* OUT: Page number */
15071){
15072 *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
15073 iRowid >>= FTS5_DATA_PAGE_B;
15074
15075 *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
15076 iRowid >>= FTS5_DATA_HEIGHT_B;
15077
15078 *pbDlidx = (int)(iRowid & 0x0001);
15079 iRowid >>= FTS5_DATA_DLI_B;
15080
15081 *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
15082}
15083#endif /* SQLITE_TEST */
15084
15085#ifdef SQLITE_TEST
15086static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
15087 int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */
15088 fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno);
15089
15090 if( iSegid==0 ){
15091 if( iKey==FTS5_AVERAGES_ROWID ){
15092 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
15093 }else{
15094 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
15095 }
15096 }
15097 else{
15098 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}",
15099 bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno
15100 );
15101 }
15102}
15103#endif /* SQLITE_TEST */
15104
15105#ifdef SQLITE_TEST
15106static void fts5DebugStructure(
15107 int *pRc, /* IN/OUT: error code */
15108 Fts5Buffer *pBuf,
15109 Fts5Structure *p
15110){
15111 int iLvl, iSeg; /* Iterate through levels, segments */
15112
15113 for(iLvl=0; iLvl<p->nLevel; iLvl++){
15114 Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
15115 sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
15116 " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
15117 );
15118 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
15119 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
15120 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}",
15121 pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
15122 );
15123 }
15124 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
15125 }
15126}
15127#endif /* SQLITE_TEST */
15128
15129#ifdef SQLITE_TEST
15130/*
15131** This is part of the fts5_decode() debugging aid.
15132**
15133** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
15134** function appends a human-readable representation of the same object
15135** to the buffer passed as the second argument.
15136*/
15137static void fts5DecodeStructure(
15138 int *pRc, /* IN/OUT: error code */
15139 Fts5Buffer *pBuf,
15140 const u8 *pBlob, int nBlob
15141){
15142 int rc; /* Return code */
15143 Fts5Structure *p = 0; /* Decoded structure object */
15144
15145 rc = fts5StructureDecode(pBlob, nBlob, 0, &p);
15146 if( rc!=SQLITE_OK ){
15147 *pRc = rc;
15148 return;
15149 }
15150
15151 fts5DebugStructure(pRc, pBuf, p);
15152 fts5StructureRelease(p);
15153}
15154#endif /* SQLITE_TEST */
15155
15156#ifdef SQLITE_TEST
15157/*
15158** This is part of the fts5_decode() debugging aid.
15159**
15160** Arguments pBlob/nBlob contain an "averages" record. This function
15161** appends a human-readable representation of record to the buffer passed
15162** as the second argument.
15163*/
15164static void fts5DecodeAverages(
15165 int *pRc, /* IN/OUT: error code */
15166 Fts5Buffer *pBuf,
15167 const u8 *pBlob, int nBlob
15168){
15169 int i = 0;
15170 const char *zSpace = "";
15171
15172 while( i<nBlob ){
15173 u64 iVal;
15174 i += sqlite3Fts5GetVarint(&pBlob[i], &iVal);
15175 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal);
15176 zSpace = " ";
15177 }
15178}
15179#endif /* SQLITE_TEST */
15180
15181#ifdef SQLITE_TEST
15182/*
15183** Buffer (a/n) is assumed to contain a list of serialized varints. Read
15184** each varint and append its string representation to buffer pBuf. Return
15185** after either the input buffer is exhausted or a 0 value is read.
15186**
15187** The return value is the number of bytes read from the input buffer.
15188*/
15189static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
15190 int iOff = 0;
15191 while( iOff<n ){
15192 int iVal;
15193 iOff += fts5GetVarint32(&a[iOff], iVal);
15194 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);
15195 }
15196 return iOff;
15197}
15198#endif /* SQLITE_TEST */
15199
15200#ifdef SQLITE_TEST
15201/*
15202** The start of buffer (a/n) contains the start of a doclist. The doclist
15203** may or may not finish within the buffer. This function appends a text
15204** representation of the part of the doclist that is present to buffer
15205** pBuf.
15206**
15207** The return value is the number of bytes read from the input buffer.
15208*/
15209static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
15210 i64 iDocid = 0;
15211 int iOff = 0;
15212
15213 if( n>0 ){
15214 iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
15215 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
15216 }
15217 while( iOff<n ){
15218 int nPos;
15219 int bDel;
15220 iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel);
15221 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":"");
15222 iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
15223 if( iOff<n ){
15224 i64 iDelta;
15225 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
15226 iDocid += iDelta;
15227 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
15228 }
15229 }
15230
15231 return iOff;
15232}
15233#endif /* SQLITE_TEST */
15234
15235#ifdef SQLITE_TEST
15236/*
15237** This function is part of the fts5_decode() debugging function. It is
15238** only ever used with detail=none tables.
15239**
15240** Buffer (pData/nData) contains a doclist in the format used by detail=none
15241** tables. This function appends a human-readable version of that list to
15242** buffer pBuf.
15243**
15244** If *pRc is other than SQLITE_OK when this function is called, it is a
15245** no-op. If an OOM or other error occurs within this function, *pRc is
15246** set to an SQLite error code before returning. The final state of buffer
15247** pBuf is undefined in this case.
15248*/
15249static void fts5DecodeRowidList(
15250 int *pRc, /* IN/OUT: Error code */
15251 Fts5Buffer *pBuf, /* Buffer to append text to */
15252 const u8 *pData, int nData /* Data to decode list-of-rowids from */
15253){
15254 int i = 0;
15255 i64 iRowid = 0;
15256
15257 while( i<nData ){
15258 const char *zApp = "";
15259 u64 iVal;
15260 i += sqlite3Fts5GetVarint(&pData[i], &iVal);
15261 iRowid += iVal;
15262
15263 if( i<nData && pData[i]==0x00 ){
15264 i++;
15265 if( i<nData && pData[i]==0x00 ){
15266 i++;
15267 zApp = "+";
15268 }else{
15269 zApp = "*";
15270 }
15271 }
15272
15273 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
15274 }
15275}
15276#endif /* SQLITE_TEST */
15277
15278#ifdef SQLITE_TEST
15279/*
15280** The implementation of user-defined scalar function fts5_decode().
15281*/
15282static void fts5DecodeFunction(
15283 sqlite3_context *pCtx, /* Function call context */
15284 int nArg, /* Number of args (always 2) */
15285 sqlite3_value **apVal /* Function arguments */
15286){
15287 i64 iRowid; /* Rowid for record being decoded */
15288 int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
15289 const u8 *aBlob; int n; /* Record to decode */
15290 u8 *a = 0;
15291 Fts5Buffer s; /* Build up text to return here */
15292 int rc = SQLITE_OK; /* Return code */
15293 sqlite3_int64 nSpace = 0;
15294 int eDetailNone = (sqlite3_user_data(pCtx)!=0);
15295
15296 assert( nArg==2 );
15297 UNUSED_PARAM(nArg);
15298 memset(&s, 0, sizeof(Fts5Buffer));
15299 iRowid = sqlite3_value_int64(apVal[0]);
15300
15301 /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
15302 ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
15303 ** buffer overreads even if the record is corrupt. */
15304 n = sqlite3_value_bytes(apVal[1]);
15305 aBlob = sqlite3_value_blob(apVal[1]);
15306 nSpace = n + FTS5_DATA_ZERO_PADDING;
15307 a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
15308 if( a==0 ) goto decode_out;
15309 if( n>0 ) memcpy(a, aBlob, n);
15310
15311 fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);
15312
15313 fts5DebugRowid(&rc, &s, iRowid);
15314 if( bDlidx ){
15315 Fts5Data dlidx;
15316 Fts5DlidxLvl lvl;
15317
15318 dlidx.p = a;
15319 dlidx.nn = n;
15320
15321 memset(&lvl, 0, sizeof(Fts5DlidxLvl));
15322 lvl.pData = &dlidx;
15323 lvl.iLeafPgno = iPgno;
15324
15325 for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
15326 sqlite3Fts5BufferAppendPrintf(&rc, &s,
15327 " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
15328 );
15329 }
15330 }else if( iSegid==0 ){
15331 if( iRowid==FTS5_AVERAGES_ROWID ){
15332 fts5DecodeAverages(&rc, &s, a, n);
15333 }else{
15334 fts5DecodeStructure(&rc, &s, a, n);
15335 }
15336 }else if( eDetailNone ){
15337 Fts5Buffer term; /* Current term read from page */
15338 int szLeaf;
15339 int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
15340 int iTermOff;
15341 int nKeep = 0;
15342 int iOff;
15343
15344 memset(&term, 0, sizeof(Fts5Buffer));
15345
15346 /* Decode any entries that occur before the first term. */
15347 if( szLeaf<n ){
15348 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff);
15349 }else{
15350 iTermOff = szLeaf;
15351 }
15352 fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
15353
15354 iOff = iTermOff;
15355 while( iOff<szLeaf ){
15356 int nAppend;
15357
15358 /* Read the term data for the next term*/
15359 iOff += fts5GetVarint32(&a[iOff], nAppend);
15360 term.n = nKeep;
15361 fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
15362 sqlite3Fts5BufferAppendPrintf(
15363 &rc, &s, " term=%.*s", term.n, (const char*)term.p
15364 );
15365 iOff += nAppend;
15366
15367 /* Figure out where the doclist for this term ends */
15368 if( iPgidxOff<n ){
15369 int nIncr;
15370 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr);
15371 iTermOff += nIncr;
15372 }else{
15373 iTermOff = szLeaf;
15374 }
15375
15376 fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
15377 iOff = iTermOff;
15378 if( iOff<szLeaf ){
15379 iOff += fts5GetVarint32(&a[iOff], nKeep);
15380 }
15381 }
15382
15383 fts5BufferFree(&term);
15384 }else{
15385 Fts5Buffer term; /* Current term read from page */
15386 int szLeaf; /* Offset of pgidx in a[] */
15387 int iPgidxOff;
15388 int iPgidxPrev = 0; /* Previous value read from pgidx */
15389 int iTermOff = 0;
15390 int iRowidOff = 0;
15391 int iOff;
15392 int nDoclist;
15393
15394 memset(&term, 0, sizeof(Fts5Buffer));
15395
15396 if( n<4 ){
15397 sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt");
15398 goto decode_out;
15399 }else{
15400 iRowidOff = fts5GetU16(&a[0]);
15401 iPgidxOff = szLeaf = fts5GetU16(&a[2]);
15402 if( iPgidxOff<n ){
15403 fts5GetVarint32(&a[iPgidxOff], iTermOff);
15404 }else if( iPgidxOff>n ){
15405 rc = FTS5_CORRUPT;
15406 goto decode_out;
15407 }
15408 }
15409
15410 /* Decode the position list tail at the start of the page */
15411 if( iRowidOff!=0 ){
15412 iOff = iRowidOff;
15413 }else if( iTermOff!=0 ){
15414 iOff = iTermOff;
15415 }else{
15416 iOff = szLeaf;
15417 }
15418 if( iOff>n ){
15419 rc = FTS5_CORRUPT;
15420 goto decode_out;
15421 }
15422 fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
15423
15424 /* Decode any more doclist data that appears on the page before the
15425 ** first term. */
15426 nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
15427 if( nDoclist+iOff>n ){
15428 rc = FTS5_CORRUPT;
15429 goto decode_out;
15430 }
15431 fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
15432
15433 while( iPgidxOff<n && rc==SQLITE_OK ){
15434 int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */
15435 int nByte; /* Bytes of data */
15436 int iEnd;
15437
15438 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
15439 iPgidxPrev += nByte;
15440 iOff = iPgidxPrev;
15441
15442 if( iPgidxOff<n ){
15443 fts5GetVarint32(&a[iPgidxOff], nByte);
15444 iEnd = iPgidxPrev + nByte;
15445 }else{
15446 iEnd = szLeaf;
15447 }
15448 if( iEnd>szLeaf ){
15449 rc = FTS5_CORRUPT;
15450 break;
15451 }
15452
15453 if( bFirst==0 ){
15454 iOff += fts5GetVarint32(&a[iOff], nByte);
15455 if( nByte>term.n ){
15456 rc = FTS5_CORRUPT;
15457 break;
15458 }
15459 term.n = nByte;
15460 }
15461 iOff += fts5GetVarint32(&a[iOff], nByte);
15462 if( iOff+nByte>n ){
15463 rc = FTS5_CORRUPT;
15464 break;
15465 }
15466 fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
15467 iOff += nByte;
15468
15469 sqlite3Fts5BufferAppendPrintf(
15470 &rc, &s, " term=%.*s", term.n, (const char*)term.p
15471 );
15472 iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
15473 }
15474
15475 fts5BufferFree(&term);
15476 }
15477
15478 decode_out:
15479 sqlite3_free(a);
15480 if( rc==SQLITE_OK ){
15481 sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
15482 }else{
15483 sqlite3_result_error_code(pCtx, rc);
15484 }
15485 fts5BufferFree(&s);
15486}
15487#endif /* SQLITE_TEST */
15488
15489#ifdef SQLITE_TEST
15490/*
15491** The implementation of user-defined scalar function fts5_rowid().
15492*/
15493static void fts5RowidFunction(
15494 sqlite3_context *pCtx, /* Function call context */
15495 int nArg, /* Number of args (always 2) */
15496 sqlite3_value **apVal /* Function arguments */
15497){
15498 const char *zArg;
15499 if( nArg==0 ){
15500 sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
15501 }else{
15502 zArg = (const char*)sqlite3_value_text(apVal[0]);
15503 if( 0==sqlite3_stricmp(zArg, "segment") ){
15504 i64 iRowid;
15505 int segid, pgno;
15506 if( nArg!=3 ){
15507 sqlite3_result_error(pCtx,
15508 "should be: fts5_rowid('segment', segid, pgno))", -1
15509 );
15510 }else{
15511 segid = sqlite3_value_int(apVal[1]);
15512 pgno = sqlite3_value_int(apVal[2]);
15513 iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
15514 sqlite3_result_int64(pCtx, iRowid);
15515 }
15516 }else{
15517 sqlite3_result_error(pCtx,
15518 "first arg to fts5_rowid() must be 'segment'" , -1
15519 );
15520 }
15521 }
15522}
15523#endif /* SQLITE_TEST */
15524
15525/*
15526** This is called as part of registering the FTS5 module with database
15527** connection db. It registers several user-defined scalar functions useful
15528** with FTS5.
15529**
15530** If successful, SQLITE_OK is returned. If an error occurs, some other
15531** SQLite error code is returned instead.
15532*/
15533static int sqlite3Fts5IndexInit(sqlite3 *db){
15534#ifdef SQLITE_TEST
15535 int rc = sqlite3_create_function(
15536 db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
15537 );
15538
15539 if( rc==SQLITE_OK ){
15540 rc = sqlite3_create_function(
15541 db, "fts5_decode_none", 2,
15542 SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
15543 );
15544 }
15545
15546 if( rc==SQLITE_OK ){
15547 rc = sqlite3_create_function(
15548 db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
15549 );
15550 }
15551 return rc;
15552#else
15553 return SQLITE_OK;
15554 UNUSED_PARAM(db);
15555#endif
15556}
15557
15558
15559static int sqlite3Fts5IndexReset(Fts5Index *p){
15560 assert( p->pStruct==0 || p->iStructVersion!=0 );
15561 if( fts5IndexDataVersion(p)!=p->iStructVersion ){
15562 fts5StructureInvalidate(p);
15563 }
15564 return fts5IndexReturn(p);
15565}
15566
15567#line 1 "fts5_main.c"
15568/*
15569** 2014 Jun 09
15570**
15571** The author disclaims copyright to this source code. In place of
15572** a legal notice, here is a blessing:
15573**
15574** May you do good and not evil.
15575** May you find forgiveness for yourself and forgive others.
15576** May you share freely, never taking more than you give.
15577**
15578******************************************************************************
15579**
15580** This is an SQLite module implementing full-text search.
15581*/
15582
15583
15584/* #include "fts5Int.h" */
15585
15586/*
15587** This variable is set to false when running tests for which the on disk
15588** structures should not be corrupt. Otherwise, true. If it is false, extra
15589** assert() conditions in the fts5 code are activated - conditions that are
15590** only true if it is guaranteed that the fts5 database is not corrupt.
15591*/
15592#ifdef SQLITE_DEBUG
15593int sqlite3_fts5_may_be_corrupt = 1;
15594#endif
15595
15596
15597typedef struct Fts5Auxdata Fts5Auxdata;
15598typedef struct Fts5Auxiliary Fts5Auxiliary;
15599typedef struct Fts5Cursor Fts5Cursor;
15600typedef struct Fts5FullTable Fts5FullTable;
15601typedef struct Fts5Sorter Fts5Sorter;
15602typedef struct Fts5TokenizerModule Fts5TokenizerModule;
15603
15604/*
15605** NOTES ON TRANSACTIONS:
15606**
15607** SQLite invokes the following virtual table methods as transactions are
15608** opened and closed by the user:
15609**
15610** xBegin(): Start of a new transaction.
15611** xSync(): Initial part of two-phase commit.
15612** xCommit(): Final part of two-phase commit.
15613** xRollback(): Rollback the transaction.
15614**
15615** Anything that is required as part of a commit that may fail is performed
15616** in the xSync() callback. Current versions of SQLite ignore any errors
15617** returned by xCommit().
15618**
15619** And as sub-transactions are opened/closed:
15620**
15621** xSavepoint(int S): Open savepoint S.
15622** xRelease(int S): Commit and close savepoint S.
15623** xRollbackTo(int S): Rollback to start of savepoint S.
15624**
15625** During a write-transaction the fts5_index.c module may cache some data
15626** in-memory. It is flushed to disk whenever xSync(), xRelease() or
15627** xSavepoint() is called. And discarded whenever xRollback() or xRollbackTo()
15628** is called.
15629**
15630** Additionally, if SQLITE_DEBUG is defined, an instance of the following
15631** structure is used to record the current transaction state. This information
15632** is not required, but it is used in the assert() statements executed by
15633** function fts5CheckTransactionState() (see below).
15634*/
15635struct Fts5TransactionState {
15636 int eState; /* 0==closed, 1==open, 2==synced */
15637 int iSavepoint; /* Number of open savepoints (0 -> none) */
15638};
15639
15640/*
15641** A single object of this type is allocated when the FTS5 module is
15642** registered with a database handle. It is used to store pointers to
15643** all registered FTS5 extensions - tokenizers and auxiliary functions.
15644*/
15645struct Fts5Global {
15646 fts5_api api; /* User visible part of object (see fts5.h) */
15647 sqlite3 *db; /* Associated database connection */
15648 i64 iNextId; /* Used to allocate unique cursor ids */
15649 Fts5Auxiliary *pAux; /* First in list of all aux. functions */
15650 Fts5TokenizerModule *pTok; /* First in list of all tokenizer modules */
15651 Fts5TokenizerModule *pDfltTok; /* Default tokenizer module */
15652 Fts5Cursor *pCsr; /* First in list of all open cursors */
15653};
15654
15655/*
15656** Each auxiliary function registered with the FTS5 module is represented
15657** by an object of the following type. All such objects are stored as part
15658** of the Fts5Global.pAux list.
15659*/
15660struct Fts5Auxiliary {
15661 Fts5Global *pGlobal; /* Global context for this function */
15662 char *zFunc; /* Function name (nul-terminated) */
15663 void *pUserData; /* User-data pointer */
15664 fts5_extension_function xFunc; /* Callback function */
15665 void (*xDestroy)(void*); /* Destructor function */
15666 Fts5Auxiliary *pNext; /* Next registered auxiliary function */
15667};
15668
15669/*
15670** Each tokenizer module registered with the FTS5 module is represented
15671** by an object of the following type. All such objects are stored as part
15672** of the Fts5Global.pTok list.
15673*/
15674struct Fts5TokenizerModule {
15675 char *zName; /* Name of tokenizer */
15676 void *pUserData; /* User pointer passed to xCreate() */
15677 fts5_tokenizer x; /* Tokenizer functions */
15678 void (*xDestroy)(void*); /* Destructor function */
15679 Fts5TokenizerModule *pNext; /* Next registered tokenizer module */
15680};
15681
15682struct Fts5FullTable {
15683 Fts5Table p; /* Public class members from fts5Int.h */
15684 Fts5Storage *pStorage; /* Document store */
15685 Fts5Global *pGlobal; /* Global (connection wide) data */
15686 Fts5Cursor *pSortCsr; /* Sort data from this cursor */
15687#ifdef SQLITE_DEBUG
15688 struct Fts5TransactionState ts;
15689#endif
15690};
15691
15692struct Fts5MatchPhrase {
15693 Fts5Buffer *pPoslist; /* Pointer to current poslist */
15694 int nTerm; /* Size of phrase in terms */
15695};
15696
15697/*
15698** pStmt:
15699** SELECT rowid, <fts> FROM <fts> ORDER BY +rank;
15700**
15701** aIdx[]:
15702** There is one entry in the aIdx[] array for each phrase in the query,
15703** the value of which is the offset within aPoslist[] following the last
15704** byte of the position list for the corresponding phrase.
15705*/
15706struct Fts5Sorter {
15707 sqlite3_stmt *pStmt;
15708 i64 iRowid; /* Current rowid */
15709 const u8 *aPoslist; /* Position lists for current row */
15710 int nIdx; /* Number of entries in aIdx[] */
15711 int aIdx[1]; /* Offsets into aPoslist for current row */
15712};
15713
15714
15715/*
15716** Virtual-table cursor object.
15717**
15718** iSpecial:
15719** If this is a 'special' query (refer to function fts5SpecialMatch()),
15720** then this variable contains the result of the query.
15721**
15722** iFirstRowid, iLastRowid:
15723** These variables are only used for FTS5_PLAN_MATCH cursors. Assuming the
15724** cursor iterates in ascending order of rowids, iFirstRowid is the lower
15725** limit of rowids to return, and iLastRowid the upper. In other words, the
15726** WHERE clause in the user's query might have been:
15727**
15728** <tbl> MATCH <expr> AND rowid BETWEEN $iFirstRowid AND $iLastRowid
15729**
15730** If the cursor iterates in descending order of rowid, iFirstRowid
15731** is the upper limit (i.e. the "first" rowid visited) and iLastRowid
15732** the lower.
15733*/
15734struct Fts5Cursor {
15735 sqlite3_vtab_cursor base; /* Base class used by SQLite core */
15736 Fts5Cursor *pNext; /* Next cursor in Fts5Cursor.pCsr list */
15737 int *aColumnSize; /* Values for xColumnSize() */
15738 i64 iCsrId; /* Cursor id */
15739
15740 /* Zero from this point onwards on cursor reset */
15741 int ePlan; /* FTS5_PLAN_XXX value */
15742 int bDesc; /* True for "ORDER BY rowid DESC" queries */
15743 i64 iFirstRowid; /* Return no rowids earlier than this */
15744 i64 iLastRowid; /* Return no rowids later than this */
15745 sqlite3_stmt *pStmt; /* Statement used to read %_content */
15746 Fts5Expr *pExpr; /* Expression for MATCH queries */
15747 Fts5Sorter *pSorter; /* Sorter for "ORDER BY rank" queries */
15748 int csrflags; /* Mask of cursor flags (see below) */
15749 i64 iSpecial; /* Result of special query */
15750
15751 /* "rank" function. Populated on demand from vtab.xColumn(). */
15752 char *zRank; /* Custom rank function */
15753 char *zRankArgs; /* Custom rank function args */
15754 Fts5Auxiliary *pRank; /* Rank callback (or NULL) */
15755 int nRankArg; /* Number of trailing arguments for rank() */
15756 sqlite3_value **apRankArg; /* Array of trailing arguments */
15757 sqlite3_stmt *pRankArgStmt; /* Origin of objects in apRankArg[] */
15758
15759 /* Auxiliary data storage */
15760 Fts5Auxiliary *pAux; /* Currently executing extension function */
15761 Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */
15762
15763 /* Cache used by auxiliary functions xInst() and xInstCount() */
15764 Fts5PoslistReader *aInstIter; /* One for each phrase */
15765 int nInstAlloc; /* Size of aInst[] array (entries / 3) */
15766 int nInstCount; /* Number of phrase instances */
15767 int *aInst; /* 3 integers per phrase instance */
15768};
15769
15770/*
15771** Bits that make up the "idxNum" parameter passed indirectly by
15772** xBestIndex() to xFilter().
15773*/
15774#define FTS5_BI_MATCH 0x0001 /* <tbl> MATCH ? */
15775#define FTS5_BI_RANK 0x0002 /* rank MATCH ? */
15776#define FTS5_BI_ROWID_EQ 0x0004 /* rowid == ? */
15777#define FTS5_BI_ROWID_LE 0x0008 /* rowid <= ? */
15778#define FTS5_BI_ROWID_GE 0x0010 /* rowid >= ? */
15779
15780#define FTS5_BI_ORDER_RANK 0x0020
15781#define FTS5_BI_ORDER_ROWID 0x0040
15782#define FTS5_BI_ORDER_DESC 0x0080
15783
15784/*
15785** Values for Fts5Cursor.csrflags
15786*/
15787#define FTS5CSR_EOF 0x01
15788#define FTS5CSR_REQUIRE_CONTENT 0x02
15789#define FTS5CSR_REQUIRE_DOCSIZE 0x04
15790#define FTS5CSR_REQUIRE_INST 0x08
15791#define FTS5CSR_FREE_ZRANK 0x10
15792#define FTS5CSR_REQUIRE_RESEEK 0x20
15793#define FTS5CSR_REQUIRE_POSLIST 0x40
15794
15795#define BitFlagAllTest(x,y) (((x) & (y))==(y))
15796#define BitFlagTest(x,y) (((x) & (y))!=0)
15797
15798
15799/*
15800** Macros to Set(), Clear() and Test() cursor flags.
15801*/
15802#define CsrFlagSet(pCsr, flag) ((pCsr)->csrflags |= (flag))
15803#define CsrFlagClear(pCsr, flag) ((pCsr)->csrflags &= ~(flag))
15804#define CsrFlagTest(pCsr, flag) ((pCsr)->csrflags & (flag))
15805
15806struct Fts5Auxdata {
15807 Fts5Auxiliary *pAux; /* Extension to which this belongs */
15808 void *pPtr; /* Pointer value */
15809 void(*xDelete)(void*); /* Destructor */
15810 Fts5Auxdata *pNext; /* Next object in linked list */
15811};
15812
15813#ifdef SQLITE_DEBUG
15814#define FTS5_BEGIN 1
15815#define FTS5_SYNC 2
15816#define FTS5_COMMIT 3
15817#define FTS5_ROLLBACK 4
15818#define FTS5_SAVEPOINT 5
15819#define FTS5_RELEASE 6
15820#define FTS5_ROLLBACKTO 7
15821static void fts5CheckTransactionState(Fts5FullTable *p, int op, int iSavepoint){
15822 switch( op ){
15823 case FTS5_BEGIN:
15824 assert( p->ts.eState==0 );
15825 p->ts.eState = 1;
15826 p->ts.iSavepoint = -1;
15827 break;
15828
15829 case FTS5_SYNC:
15830 assert( p->ts.eState==1 || p->ts.eState==2 );
15831 p->ts.eState = 2;
15832 break;
15833
15834 case FTS5_COMMIT:
15835 assert( p->ts.eState==2 );
15836 p->ts.eState = 0;
15837 break;
15838
15839 case FTS5_ROLLBACK:
15840 assert( p->ts.eState==1 || p->ts.eState==2 || p->ts.eState==0 );
15841 p->ts.eState = 0;
15842 break;
15843
15844 case FTS5_SAVEPOINT:
15845 assert( p->ts.eState>=1 );
15846 assert( iSavepoint>=0 );
15847 assert( iSavepoint>=p->ts.iSavepoint );
15848 p->ts.iSavepoint = iSavepoint;
15849 break;
15850
15851 case FTS5_RELEASE:
15852 assert( p->ts.eState>=1 );
15853 assert( iSavepoint>=0 );
15854 assert( iSavepoint<=p->ts.iSavepoint );
15855 p->ts.iSavepoint = iSavepoint-1;
15856 break;
15857
15858 case FTS5_ROLLBACKTO:
15859 assert( p->ts.eState>=1 );
15860 assert( iSavepoint>=-1 );
15861 /* The following assert() can fail if another vtab strikes an error
15862 ** within an xSavepoint() call then SQLite calls xRollbackTo() - without
15863 ** having called xSavepoint() on this vtab. */
15864 /* assert( iSavepoint<=p->ts.iSavepoint ); */
15865 p->ts.iSavepoint = iSavepoint;
15866 break;
15867 }
15868}
15869#else
15870# define fts5CheckTransactionState(x,y,z)
15871#endif
15872
15873/*
15874** Return true if pTab is a contentless table.
15875*/
15876static int fts5IsContentless(Fts5FullTable *pTab){
15877 return pTab->p.pConfig->eContent==FTS5_CONTENT_NONE;
15878}
15879
15880/*
15881** Delete a virtual table handle allocated by fts5InitVtab().
15882*/
15883static void fts5FreeVtab(Fts5FullTable *pTab){
15884 if( pTab ){
15885 sqlite3Fts5IndexClose(pTab->p.pIndex);
15886 sqlite3Fts5StorageClose(pTab->pStorage);
15887 sqlite3Fts5ConfigFree(pTab->p.pConfig);
15888 sqlite3_free(pTab);
15889 }
15890}
15891
15892/*
15893** The xDisconnect() virtual table method.
15894*/
15895static int fts5DisconnectMethod(sqlite3_vtab *pVtab){
15896 fts5FreeVtab((Fts5FullTable*)pVtab);
15897 return SQLITE_OK;
15898}
15899
15900/*
15901** The xDestroy() virtual table method.
15902*/
15903static int fts5DestroyMethod(sqlite3_vtab *pVtab){
15904 Fts5Table *pTab = (Fts5Table*)pVtab;
15905 int rc = sqlite3Fts5DropAll(pTab->pConfig);
15906 if( rc==SQLITE_OK ){
15907 fts5FreeVtab((Fts5FullTable*)pVtab);
15908 }
15909 return rc;
15910}
15911
15912/*
15913** This function is the implementation of both the xConnect and xCreate
15914** methods of the FTS3 virtual table.
15915**
15916** The argv[] array contains the following:
15917**
15918** argv[0] -> module name ("fts5")
15919** argv[1] -> database name
15920** argv[2] -> table name
15921** argv[...] -> "column name" and other module argument fields.
15922*/
15923static int fts5InitVtab(
15924 int bCreate, /* True for xCreate, false for xConnect */
15925 sqlite3 *db, /* The SQLite database connection */
15926 void *pAux, /* Hash table containing tokenizers */
15927 int argc, /* Number of elements in argv array */
15928 const char * const *argv, /* xCreate/xConnect argument array */
15929 sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
15930 char **pzErr /* Write any error message here */
15931){
15932 Fts5Global *pGlobal = (Fts5Global*)pAux;
15933 const char **azConfig = (const char**)argv;
15934 int rc = SQLITE_OK; /* Return code */
15935 Fts5Config *pConfig = 0; /* Results of parsing argc/argv */
15936 Fts5FullTable *pTab = 0; /* New virtual table object */
15937
15938 /* Allocate the new vtab object and parse the configuration */
15939 pTab = (Fts5FullTable*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5FullTable));
15940 if( rc==SQLITE_OK ){
15941 rc = sqlite3Fts5ConfigParse(pGlobal, db, argc, azConfig, &pConfig, pzErr);
15942 assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 );
15943 }
15944 if( rc==SQLITE_OK ){
15945 pTab->p.pConfig = pConfig;
15946 pTab->pGlobal = pGlobal;
15947 }
15948
15949 /* Open the index sub-system */
15950 if( rc==SQLITE_OK ){
15951 rc = sqlite3Fts5IndexOpen(pConfig, bCreate, &pTab->p.pIndex, pzErr);
15952 }
15953
15954 /* Open the storage sub-system */
15955 if( rc==SQLITE_OK ){
15956 rc = sqlite3Fts5StorageOpen(
15957 pConfig, pTab->p.pIndex, bCreate, &pTab->pStorage, pzErr
15958 );
15959 }
15960
15961 /* Call sqlite3_declare_vtab() */
15962 if( rc==SQLITE_OK ){
15963 rc = sqlite3Fts5ConfigDeclareVtab(pConfig);
15964 }
15965
15966 /* Load the initial configuration */
15967 if( rc==SQLITE_OK ){
15968 assert( pConfig->pzErrmsg==0 );
15969 pConfig->pzErrmsg = pzErr;
15970 rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex);
15971 sqlite3Fts5IndexRollback(pTab->p.pIndex);
15972 pConfig->pzErrmsg = 0;
15973 }
15974
15975 if( rc!=SQLITE_OK ){
15976 fts5FreeVtab(pTab);
15977 pTab = 0;
15978 }else if( bCreate ){
15979 fts5CheckTransactionState(pTab, FTS5_BEGIN, 0);
15980 }
15981 *ppVTab = (sqlite3_vtab*)pTab;
15982 return rc;
15983}
15984
15985/*
15986** The xConnect() and xCreate() methods for the virtual table. All the
15987** work is done in function fts5InitVtab().
15988*/
15989static int fts5ConnectMethod(
15990 sqlite3 *db, /* Database connection */
15991 void *pAux, /* Pointer to tokenizer hash table */
15992 int argc, /* Number of elements in argv array */
15993 const char * const *argv, /* xCreate/xConnect argument array */
15994 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
15995 char **pzErr /* OUT: sqlite3_malloc'd error message */
15996){
15997 return fts5InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr);
15998}
15999static int fts5CreateMethod(
16000 sqlite3 *db, /* Database connection */
16001 void *pAux, /* Pointer to tokenizer hash table */
16002 int argc, /* Number of elements in argv array */
16003 const char * const *argv, /* xCreate/xConnect argument array */
16004 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
16005 char **pzErr /* OUT: sqlite3_malloc'd error message */
16006){
16007 return fts5InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr);
16008}
16009
16010/*
16011** The different query plans.
16012*/
16013#define FTS5_PLAN_MATCH 1 /* (<tbl> MATCH ?) */
16014#define FTS5_PLAN_SOURCE 2 /* A source cursor for SORTED_MATCH */
16015#define FTS5_PLAN_SPECIAL 3 /* An internal query */
16016#define FTS5_PLAN_SORTED_MATCH 4 /* (<tbl> MATCH ? ORDER BY rank) */
16017#define FTS5_PLAN_SCAN 5 /* No usable constraint */
16018#define FTS5_PLAN_ROWID 6 /* (rowid = ?) */
16019
16020/*
16021** Set the SQLITE_INDEX_SCAN_UNIQUE flag in pIdxInfo->flags. Unless this
16022** extension is currently being used by a version of SQLite too old to
16023** support index-info flags. In that case this function is a no-op.
16024*/
16025static void fts5SetUniqueFlag(sqlite3_index_info *pIdxInfo){
16026#if SQLITE_VERSION_NUMBER>=3008012
16027#ifndef SQLITE_CORE
16028 if( sqlite3_libversion_number()>=3008012 )
16029#endif
16030 {
16031 pIdxInfo->idxFlags |= SQLITE_INDEX_SCAN_UNIQUE;
16032 }
16033#endif
16034}
16035
16036static int fts5UsePatternMatch(
16037 Fts5Config *pConfig,
16038 struct sqlite3_index_constraint *p
16039){
16040 assert( FTS5_PATTERN_GLOB==SQLITE_INDEX_CONSTRAINT_GLOB );
16041 assert( FTS5_PATTERN_LIKE==SQLITE_INDEX_CONSTRAINT_LIKE );
16042 if( pConfig->ePattern==FTS5_PATTERN_GLOB && p->op==FTS5_PATTERN_GLOB ){
16043 return 1;
16044 }
16045 if( pConfig->ePattern==FTS5_PATTERN_LIKE
16046 && (p->op==FTS5_PATTERN_LIKE || p->op==FTS5_PATTERN_GLOB)
16047 ){
16048 return 1;
16049 }
16050 return 0;
16051}
16052
16053/*
16054** Implementation of the xBestIndex method for FTS5 tables. Within the
16055** WHERE constraint, it searches for the following:
16056**
16057** 1. A MATCH constraint against the table column.
16058** 2. A MATCH constraint against the "rank" column.
16059** 3. A MATCH constraint against some other column.
16060** 4. An == constraint against the rowid column.
16061** 5. A < or <= constraint against the rowid column.
16062** 6. A > or >= constraint against the rowid column.
16063**
16064** Within the ORDER BY, the following are supported:
16065**
16066** 5. ORDER BY rank [ASC|DESC]
16067** 6. ORDER BY rowid [ASC|DESC]
16068**
16069** Information for the xFilter call is passed via both the idxNum and
16070** idxStr variables. Specifically, idxNum is a bitmask of the following
16071** flags used to encode the ORDER BY clause:
16072**
16073** FTS5_BI_ORDER_RANK
16074** FTS5_BI_ORDER_ROWID
16075** FTS5_BI_ORDER_DESC
16076**
16077** idxStr is used to encode data from the WHERE clause. For each argument
16078** passed to the xFilter method, the following is appended to idxStr:
16079**
16080** Match against table column: "m"
16081** Match against rank column: "r"
16082** Match against other column: "M<column-number>"
16083** LIKE against other column: "L<column-number>"
16084** GLOB against other column: "G<column-number>"
16085** Equality constraint against the rowid: "="
16086** A < or <= against the rowid: "<"
16087** A > or >= against the rowid: ">"
16088**
16089** This function ensures that there is at most one "r" or "=". And that if
16090** there exists an "=" then there is no "<" or ">".
16091**
16092** Costs are assigned as follows:
16093**
16094** a) If an unusable MATCH operator is present in the WHERE clause, the
16095** cost is unconditionally set to 1e50 (a really big number).
16096**
16097** a) If a MATCH operator is present, the cost depends on the other
16098** constraints also present. As follows:
16099**
16100** * No other constraints: cost=1000.0
16101** * One rowid range constraint: cost=750.0
16102** * Both rowid range constraints: cost=500.0
16103** * An == rowid constraint: cost=100.0
16104**
16105** b) Otherwise, if there is no MATCH:
16106**
16107** * No other constraints: cost=1000000.0
16108** * One rowid range constraint: cost=750000.0
16109** * Both rowid range constraints: cost=250000.0
16110** * An == rowid constraint: cost=10.0
16111**
16112** Costs are not modified by the ORDER BY clause.
16113*/
16114static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
16115 Fts5Table *pTab = (Fts5Table*)pVTab;
16116 Fts5Config *pConfig = pTab->pConfig;
16117 const int nCol = pConfig->nCol;
16118 int idxFlags = 0; /* Parameter passed through to xFilter() */
16119 int i;
16120
16121 char *idxStr;
16122 int iIdxStr = 0;
16123 int iCons = 0;
16124
16125 int bSeenEq = 0;
16126 int bSeenGt = 0;
16127 int bSeenLt = 0;
16128 int bSeenMatch = 0;
16129 int bSeenRank = 0;
16130
16131
16132 assert( SQLITE_INDEX_CONSTRAINT_EQ<SQLITE_INDEX_CONSTRAINT_MATCH );
16133 assert( SQLITE_INDEX_CONSTRAINT_GT<SQLITE_INDEX_CONSTRAINT_MATCH );
16134 assert( SQLITE_INDEX_CONSTRAINT_LE<SQLITE_INDEX_CONSTRAINT_MATCH );
16135 assert( SQLITE_INDEX_CONSTRAINT_GE<SQLITE_INDEX_CONSTRAINT_MATCH );
16136 assert( SQLITE_INDEX_CONSTRAINT_LE<SQLITE_INDEX_CONSTRAINT_MATCH );
16137
16138 if( pConfig->bLock ){
16139 pTab->base.zErrMsg = sqlite3_mprintf(
16140 "recursively defined fts5 content table"
16141 );
16142 return SQLITE_ERROR;
16143 }
16144
16145 idxStr = (char*)sqlite3_malloc(pInfo->nConstraint * 8 + 1);
16146 if( idxStr==0 ) return SQLITE_NOMEM;
16147 pInfo->idxStr = idxStr;
16148 pInfo->needToFreeIdxStr = 1;
16149
16150 for(i=0; i<pInfo->nConstraint; i++){
16151 struct sqlite3_index_constraint *p = &pInfo->aConstraint[i];
16152 int iCol = p->iColumn;
16153 if( p->op==SQLITE_INDEX_CONSTRAINT_MATCH
16154 || (p->op==SQLITE_INDEX_CONSTRAINT_EQ && iCol>=nCol)
16155 ){
16156 /* A MATCH operator or equivalent */
16157 if( p->usable==0 || iCol<0 ){
16158 /* As there exists an unusable MATCH constraint this is an
16159 ** unusable plan. Set a prohibitively high cost. */
16160 pInfo->estimatedCost = 1e50;
16161 assert( iIdxStr < pInfo->nConstraint*6 + 1 );
16162 idxStr[iIdxStr] = 0;
16163 return SQLITE_OK;
16164 }else{
16165 if( iCol==nCol+1 ){
16166 if( bSeenRank ) continue;
16167 idxStr[iIdxStr++] = 'r';
16168 bSeenRank = 1;
16169 }else if( iCol>=0 ){
16170 bSeenMatch = 1;
16171 idxStr[iIdxStr++] = 'M';
16172 sqlite3_snprintf(6, &idxStr[iIdxStr], "%d", iCol);
16173 idxStr += strlen(&idxStr[iIdxStr]);
16174 assert( idxStr[iIdxStr]=='\0' );
16175 }
16176 pInfo->aConstraintUsage[i].argvIndex = ++iCons;
16177 pInfo->aConstraintUsage[i].omit = 1;
16178 }
16179 }else if( p->usable ){
16180 if( iCol>=0 && iCol<nCol && fts5UsePatternMatch(pConfig, p) ){
16181 assert( p->op==FTS5_PATTERN_LIKE || p->op==FTS5_PATTERN_GLOB );
16182 idxStr[iIdxStr++] = p->op==FTS5_PATTERN_LIKE ? 'L' : 'G';
16183 sqlite3_snprintf(6, &idxStr[iIdxStr], "%d", iCol);
16184 idxStr += strlen(&idxStr[iIdxStr]);
16185 pInfo->aConstraintUsage[i].argvIndex = ++iCons;
16186 assert( idxStr[iIdxStr]=='\0' );
16187 }else if( bSeenEq==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ && iCol<0 ){
16188 idxStr[iIdxStr++] = '=';
16189 bSeenEq = 1;
16190 pInfo->aConstraintUsage[i].argvIndex = ++iCons;
16191 }
16192 }
16193 }
16194
16195 if( bSeenEq==0 ){
16196 for(i=0; i<pInfo->nConstraint; i++){
16197 struct sqlite3_index_constraint *p = &pInfo->aConstraint[i];
16198 if( p->iColumn<0 && p->usable ){
16199 int op = p->op;
16200 if( op==SQLITE_INDEX_CONSTRAINT_LT || op==SQLITE_INDEX_CONSTRAINT_LE ){
16201 if( bSeenLt ) continue;
16202 idxStr[iIdxStr++] = '<';
16203 pInfo->aConstraintUsage[i].argvIndex = ++iCons;
16204 bSeenLt = 1;
16205 }else
16206 if( op==SQLITE_INDEX_CONSTRAINT_GT || op==SQLITE_INDEX_CONSTRAINT_GE ){
16207 if( bSeenGt ) continue;
16208 idxStr[iIdxStr++] = '>';
16209 pInfo->aConstraintUsage[i].argvIndex = ++iCons;
16210 bSeenGt = 1;
16211 }
16212 }
16213 }
16214 }
16215 idxStr[iIdxStr] = '\0';
16216
16217 /* Set idxFlags flags for the ORDER BY clause */
16218 if( pInfo->nOrderBy==1 ){
16219 int iSort = pInfo->aOrderBy[0].iColumn;
16220 if( iSort==(pConfig->nCol+1) && bSeenMatch ){
16221 idxFlags |= FTS5_BI_ORDER_RANK;
16222 }else if( iSort==-1 ){
16223 idxFlags |= FTS5_BI_ORDER_ROWID;
16224 }
16225 if( BitFlagTest(idxFlags, FTS5_BI_ORDER_RANK|FTS5_BI_ORDER_ROWID) ){
16226 pInfo->orderByConsumed = 1;
16227 if( pInfo->aOrderBy[0].desc ){
16228 idxFlags |= FTS5_BI_ORDER_DESC;
16229 }
16230 }
16231 }
16232
16233 /* Calculate the estimated cost based on the flags set in idxFlags. */
16234 if( bSeenEq ){
16235 pInfo->estimatedCost = bSeenMatch ? 100.0 : 10.0;
16236 if( bSeenMatch==0 ) fts5SetUniqueFlag(pInfo);
16237 }else if( bSeenLt && bSeenGt ){
16238 pInfo->estimatedCost = bSeenMatch ? 500.0 : 250000.0;
16239 }else if( bSeenLt || bSeenGt ){
16240 pInfo->estimatedCost = bSeenMatch ? 750.0 : 750000.0;
16241 }else{
16242 pInfo->estimatedCost = bSeenMatch ? 1000.0 : 1000000.0;
16243 }
16244
16245 pInfo->idxNum = idxFlags;
16246 return SQLITE_OK;
16247}
16248
16249static int fts5NewTransaction(Fts5FullTable *pTab){
16250 Fts5Cursor *pCsr;
16251 for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
16252 if( pCsr->base.pVtab==(sqlite3_vtab*)pTab ) return SQLITE_OK;
16253 }
16254 return sqlite3Fts5StorageReset(pTab->pStorage);
16255}
16256
16257/*
16258** Implementation of xOpen method.
16259*/
16260static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
16261 Fts5FullTable *pTab = (Fts5FullTable*)pVTab;
16262 Fts5Config *pConfig = pTab->p.pConfig;
16263 Fts5Cursor *pCsr = 0; /* New cursor object */
16264 sqlite3_int64 nByte; /* Bytes of space to allocate */
16265 int rc; /* Return code */
16266
16267 rc = fts5NewTransaction(pTab);
16268 if( rc==SQLITE_OK ){
16269 nByte = sizeof(Fts5Cursor) + pConfig->nCol * sizeof(int);
16270 pCsr = (Fts5Cursor*)sqlite3_malloc64(nByte);
16271 if( pCsr ){
16272 Fts5Global *pGlobal = pTab->pGlobal;
16273 memset(pCsr, 0, (size_t)nByte);
16274 pCsr->aColumnSize = (int*)&pCsr[1];
16275 pCsr->pNext = pGlobal->pCsr;
16276 pGlobal->pCsr = pCsr;
16277 pCsr->iCsrId = ++pGlobal->iNextId;
16278 }else{
16279 rc = SQLITE_NOMEM;
16280 }
16281 }
16282 *ppCsr = (sqlite3_vtab_cursor*)pCsr;
16283 return rc;
16284}
16285
16286static int fts5StmtType(Fts5Cursor *pCsr){
16287 if( pCsr->ePlan==FTS5_PLAN_SCAN ){
16288 return (pCsr->bDesc) ? FTS5_STMT_SCAN_DESC : FTS5_STMT_SCAN_ASC;
16289 }
16290 return FTS5_STMT_LOOKUP;
16291}
16292
16293/*
16294** This function is called after the cursor passed as the only argument
16295** is moved to point at a different row. It clears all cached data
16296** specific to the previous row stored by the cursor object.
16297*/
16298static void fts5CsrNewrow(Fts5Cursor *pCsr){
16299 CsrFlagSet(pCsr,
16300 FTS5CSR_REQUIRE_CONTENT
16301 | FTS5CSR_REQUIRE_DOCSIZE
16302 | FTS5CSR_REQUIRE_INST
16303 | FTS5CSR_REQUIRE_POSLIST
16304 );
16305}
16306
16307static void fts5FreeCursorComponents(Fts5Cursor *pCsr){
16308 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
16309 Fts5Auxdata *pData;
16310 Fts5Auxdata *pNext;
16311
16312 sqlite3_free(pCsr->aInstIter);
16313 sqlite3_free(pCsr->aInst);
16314 if( pCsr->pStmt ){
16315 int eStmt = fts5StmtType(pCsr);
16316 sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt);
16317 }
16318 if( pCsr->pSorter ){
16319 Fts5Sorter *pSorter = pCsr->pSorter;
16320 sqlite3_finalize(pSorter->pStmt);
16321 sqlite3_free(pSorter);
16322 }
16323
16324 if( pCsr->ePlan!=FTS5_PLAN_SOURCE ){
16325 sqlite3Fts5ExprFree(pCsr->pExpr);
16326 }
16327
16328 for(pData=pCsr->pAuxdata; pData; pData=pNext){
16329 pNext = pData->pNext;
16330 if( pData->xDelete ) pData->xDelete(pData->pPtr);
16331 sqlite3_free(pData);
16332 }
16333
16334 sqlite3_finalize(pCsr->pRankArgStmt);
16335 sqlite3_free(pCsr->apRankArg);
16336
16337 if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK) ){
16338 sqlite3_free(pCsr->zRank);
16339 sqlite3_free(pCsr->zRankArgs);
16340 }
16341
16342 sqlite3Fts5IndexCloseReader(pTab->p.pIndex);
16343 memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan - (u8*)pCsr));
16344}
16345
16346
16347/*
16348** Close the cursor. For additional information see the documentation
16349** on the xClose method of the virtual table interface.
16350*/
16351static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){
16352 if( pCursor ){
16353 Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab);
16354 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
16355 Fts5Cursor **pp;
16356
16357 fts5FreeCursorComponents(pCsr);
16358 /* Remove the cursor from the Fts5Global.pCsr list */
16359 for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext);
16360 *pp = pCsr->pNext;
16361
16362 sqlite3_free(pCsr);
16363 }
16364 return SQLITE_OK;
16365}
16366
16367static int fts5SorterNext(Fts5Cursor *pCsr){
16368 Fts5Sorter *pSorter = pCsr->pSorter;
16369 int rc;
16370
16371 rc = sqlite3_step(pSorter->pStmt);
16372 if( rc==SQLITE_DONE ){
16373 rc = SQLITE_OK;
16374 CsrFlagSet(pCsr, FTS5CSR_EOF|FTS5CSR_REQUIRE_CONTENT);
16375 }else if( rc==SQLITE_ROW ){
16376 const u8 *a;
16377 const u8 *aBlob;
16378 int nBlob;
16379 int i;
16380 int iOff = 0;
16381 rc = SQLITE_OK;
16382
16383 pSorter->iRowid = sqlite3_column_int64(pSorter->pStmt, 0);
16384 nBlob = sqlite3_column_bytes(pSorter->pStmt, 1);
16385 aBlob = a = sqlite3_column_blob(pSorter->pStmt, 1);
16386
16387 /* nBlob==0 in detail=none mode. */
16388 if( nBlob>0 ){
16389 for(i=0; i<(pSorter->nIdx-1); i++){
16390 int iVal;
16391 a += fts5GetVarint32(a, iVal);
16392 iOff += iVal;
16393 pSorter->aIdx[i] = iOff;
16394 }
16395 pSorter->aIdx[i] = &aBlob[nBlob] - a;
16396 pSorter->aPoslist = a;
16397 }
16398
16399 fts5CsrNewrow(pCsr);
16400 }
16401
16402 return rc;
16403}
16404
16405
16406/*
16407** Set the FTS5CSR_REQUIRE_RESEEK flag on all FTS5_PLAN_MATCH cursors
16408** open on table pTab.
16409*/
16410static void fts5TripCursors(Fts5FullTable *pTab){
16411 Fts5Cursor *pCsr;
16412 for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
16413 if( pCsr->ePlan==FTS5_PLAN_MATCH
16414 && pCsr->base.pVtab==(sqlite3_vtab*)pTab
16415 ){
16416 CsrFlagSet(pCsr, FTS5CSR_REQUIRE_RESEEK);
16417 }
16418 }
16419}
16420
16421/*
16422** If the REQUIRE_RESEEK flag is set on the cursor passed as the first
16423** argument, close and reopen all Fts5IndexIter iterators that the cursor
16424** is using. Then attempt to move the cursor to a rowid equal to or laster
16425** (in the cursors sort order - ASC or DESC) than the current rowid.
16426**
16427** If the new rowid is not equal to the old, set output parameter *pbSkip
16428** to 1 before returning. Otherwise, leave it unchanged.
16429**
16430** Return SQLITE_OK if successful or if no reseek was required, or an
16431** error code if an error occurred.
16432*/
16433static int fts5CursorReseek(Fts5Cursor *pCsr, int *pbSkip){
16434 int rc = SQLITE_OK;
16435 assert( *pbSkip==0 );
16436 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_RESEEK) ){
16437 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
16438 int bDesc = pCsr->bDesc;
16439 i64 iRowid = sqlite3Fts5ExprRowid(pCsr->pExpr);
16440
16441 rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->p.pIndex, iRowid, bDesc);
16442 if( rc==SQLITE_OK && iRowid!=sqlite3Fts5ExprRowid(pCsr->pExpr) ){
16443 *pbSkip = 1;
16444 }
16445
16446 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_RESEEK);
16447 fts5CsrNewrow(pCsr);
16448 if( sqlite3Fts5ExprEof(pCsr->pExpr) ){
16449 CsrFlagSet(pCsr, FTS5CSR_EOF);
16450 *pbSkip = 1;
16451 }
16452 }
16453 return rc;
16454}
16455
16456
16457/*
16458** Advance the cursor to the next row in the table that matches the
16459** search criteria.
16460**
16461** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned
16462** even if we reach end-of-file. The fts5EofMethod() will be called
16463** subsequently to determine whether or not an EOF was hit.
16464*/
16465static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){
16466 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
16467 int rc;
16468
16469 assert( (pCsr->ePlan<3)==
16470 (pCsr->ePlan==FTS5_PLAN_MATCH || pCsr->ePlan==FTS5_PLAN_SOURCE)
16471 );
16472 assert( !CsrFlagTest(pCsr, FTS5CSR_EOF) );
16473
16474 if( pCsr->ePlan<3 ){
16475 int bSkip = 0;
16476 if( (rc = fts5CursorReseek(pCsr, &bSkip)) || bSkip ) return rc;
16477 rc = sqlite3Fts5ExprNext(pCsr->pExpr, pCsr->iLastRowid);
16478 CsrFlagSet(pCsr, sqlite3Fts5ExprEof(pCsr->pExpr));
16479 fts5CsrNewrow(pCsr);
16480 }else{
16481 switch( pCsr->ePlan ){
16482 case FTS5_PLAN_SPECIAL: {
16483 CsrFlagSet(pCsr, FTS5CSR_EOF);
16484 rc = SQLITE_OK;
16485 break;
16486 }
16487
16488 case FTS5_PLAN_SORTED_MATCH: {
16489 rc = fts5SorterNext(pCsr);
16490 break;
16491 }
16492
16493 default: {
16494 Fts5Config *pConfig = ((Fts5Table*)pCursor->pVtab)->pConfig;
16495 pConfig->bLock++;
16496 rc = sqlite3_step(pCsr->pStmt);
16497 pConfig->bLock--;
16498 if( rc!=SQLITE_ROW ){
16499 CsrFlagSet(pCsr, FTS5CSR_EOF);
16500 rc = sqlite3_reset(pCsr->pStmt);
16501 if( rc!=SQLITE_OK ){
16502 pCursor->pVtab->zErrMsg = sqlite3_mprintf(
16503 "%s", sqlite3_errmsg(pConfig->db)
16504 );
16505 }
16506 }else{
16507 rc = SQLITE_OK;
16508 }
16509 break;
16510 }
16511 }
16512 }
16513
16514 return rc;
16515}
16516
16517
16518static int fts5PrepareStatement(
16519 sqlite3_stmt **ppStmt,
16520 Fts5Config *pConfig,
16521 const char *zFmt,
16522 ...
16523){
16524 sqlite3_stmt *pRet = 0;
16525 int rc;
16526 char *zSql;
16527 va_list ap;
16528
16529 va_start(ap, zFmt);
16530 zSql = sqlite3_vmprintf(zFmt, ap);
16531 if( zSql==0 ){
16532 rc = SQLITE_NOMEM;
16533 }else{
16534 rc = sqlite3_prepare_v3(pConfig->db, zSql, -1,
16535 SQLITE_PREPARE_PERSISTENT, &pRet, 0);
16536 if( rc!=SQLITE_OK ){
16537 *pConfig->pzErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(pConfig->db));
16538 }
16539 sqlite3_free(zSql);
16540 }
16541
16542 va_end(ap);
16543 *ppStmt = pRet;
16544 return rc;
16545}
16546
16547static int fts5CursorFirstSorted(
16548 Fts5FullTable *pTab,
16549 Fts5Cursor *pCsr,
16550 int bDesc
16551){
16552 Fts5Config *pConfig = pTab->p.pConfig;
16553 Fts5Sorter *pSorter;
16554 int nPhrase;
16555 sqlite3_int64 nByte;
16556 int rc;
16557 const char *zRank = pCsr->zRank;
16558 const char *zRankArgs = pCsr->zRankArgs;
16559
16560 nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
16561 nByte = sizeof(Fts5Sorter) + sizeof(int) * (nPhrase-1);
16562 pSorter = (Fts5Sorter*)sqlite3_malloc64(nByte);
16563 if( pSorter==0 ) return SQLITE_NOMEM;
16564 memset(pSorter, 0, (size_t)nByte);
16565 pSorter->nIdx = nPhrase;
16566
16567 /* TODO: It would be better to have some system for reusing statement
16568 ** handles here, rather than preparing a new one for each query. But that
16569 ** is not possible as SQLite reference counts the virtual table objects.
16570 ** And since the statement required here reads from this very virtual
16571 ** table, saving it creates a circular reference.
16572 **
16573 ** If SQLite a built-in statement cache, this wouldn't be a problem. */
16574 rc = fts5PrepareStatement(&pSorter->pStmt, pConfig,
16575 "SELECT rowid, rank FROM %Q.%Q ORDER BY %s(\"%w\"%s%s) %s",
16576 pConfig->zDb, pConfig->zName, zRank, pConfig->zName,
16577 (zRankArgs ? ", " : ""),
16578 (zRankArgs ? zRankArgs : ""),
16579 bDesc ? "DESC" : "ASC"
16580 );
16581
16582 pCsr->pSorter = pSorter;
16583 if( rc==SQLITE_OK ){
16584 assert( pTab->pSortCsr==0 );
16585 pTab->pSortCsr = pCsr;
16586 rc = fts5SorterNext(pCsr);
16587 pTab->pSortCsr = 0;
16588 }
16589
16590 if( rc!=SQLITE_OK ){
16591 sqlite3_finalize(pSorter->pStmt);
16592 sqlite3_free(pSorter);
16593 pCsr->pSorter = 0;
16594 }
16595
16596 return rc;
16597}
16598
16599static int fts5CursorFirst(Fts5FullTable *pTab, Fts5Cursor *pCsr, int bDesc){
16600 int rc;
16601 Fts5Expr *pExpr = pCsr->pExpr;
16602 rc = sqlite3Fts5ExprFirst(pExpr, pTab->p.pIndex, pCsr->iFirstRowid, bDesc);
16603 if( sqlite3Fts5ExprEof(pExpr) ){
16604 CsrFlagSet(pCsr, FTS5CSR_EOF);
16605 }
16606 fts5CsrNewrow(pCsr);
16607 return rc;
16608}
16609
16610/*
16611** Process a "special" query. A special query is identified as one with a
16612** MATCH expression that begins with a '*' character. The remainder of
16613** the text passed to the MATCH operator are used as the special query
16614** parameters.
16615*/
16616static int fts5SpecialMatch(
16617 Fts5FullTable *pTab,
16618 Fts5Cursor *pCsr,
16619 const char *zQuery
16620){
16621 int rc = SQLITE_OK; /* Return code */
16622 const char *z = zQuery; /* Special query text */
16623 int n; /* Number of bytes in text at z */
16624
16625 while( z[0]==' ' ) z++;
16626 for(n=0; z[n] && z[n]!=' '; n++);
16627
16628 assert( pTab->p.base.zErrMsg==0 );
16629 pCsr->ePlan = FTS5_PLAN_SPECIAL;
16630
16631 if( n==5 && 0==sqlite3_strnicmp("reads", z, n) ){
16632 pCsr->iSpecial = sqlite3Fts5IndexReads(pTab->p.pIndex);
16633 }
16634 else if( n==2 && 0==sqlite3_strnicmp("id", z, n) ){
16635 pCsr->iSpecial = pCsr->iCsrId;
16636 }
16637 else{
16638 /* An unrecognized directive. Return an error message. */
16639 pTab->p.base.zErrMsg = sqlite3_mprintf("unknown special query: %.*s", n, z);
16640 rc = SQLITE_ERROR;
16641 }
16642
16643 return rc;
16644}
16645
16646/*
16647** Search for an auxiliary function named zName that can be used with table
16648** pTab. If one is found, return a pointer to the corresponding Fts5Auxiliary
16649** structure. Otherwise, if no such function exists, return NULL.
16650*/
16651static Fts5Auxiliary *fts5FindAuxiliary(Fts5FullTable *pTab, const char *zName){
16652 Fts5Auxiliary *pAux;
16653
16654 for(pAux=pTab->pGlobal->pAux; pAux; pAux=pAux->pNext){
16655 if( sqlite3_stricmp(zName, pAux->zFunc)==0 ) return pAux;
16656 }
16657
16658 /* No function of the specified name was found. Return 0. */
16659 return 0;
16660}
16661
16662
16663static int fts5FindRankFunction(Fts5Cursor *pCsr){
16664 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
16665 Fts5Config *pConfig = pTab->p.pConfig;
16666 int rc = SQLITE_OK;
16667 Fts5Auxiliary *pAux = 0;
16668 const char *zRank = pCsr->zRank;
16669 const char *zRankArgs = pCsr->zRankArgs;
16670
16671 if( zRankArgs ){
16672 char *zSql = sqlite3Fts5Mprintf(&rc, "SELECT %s", zRankArgs);
16673 if( zSql ){
16674 sqlite3_stmt *pStmt = 0;
16675 rc = sqlite3_prepare_v3(pConfig->db, zSql, -1,
16676 SQLITE_PREPARE_PERSISTENT, &pStmt, 0);
16677 sqlite3_free(zSql);
16678 assert( rc==SQLITE_OK || pCsr->pRankArgStmt==0 );
16679 if( rc==SQLITE_OK ){
16680 if( SQLITE_ROW==sqlite3_step(pStmt) ){
16681 sqlite3_int64 nByte;
16682 pCsr->nRankArg = sqlite3_column_count(pStmt);
16683 nByte = sizeof(sqlite3_value*)*pCsr->nRankArg;
16684 pCsr->apRankArg = (sqlite3_value**)sqlite3Fts5MallocZero(&rc, nByte);
16685 if( rc==SQLITE_OK ){
16686 int i;
16687 for(i=0; i<pCsr->nRankArg; i++){
16688 pCsr->apRankArg[i] = sqlite3_column_value(pStmt, i);
16689 }
16690 }
16691 pCsr->pRankArgStmt = pStmt;
16692 }else{
16693 rc = sqlite3_finalize(pStmt);
16694 assert( rc!=SQLITE_OK );
16695 }
16696 }
16697 }
16698 }
16699
16700 if( rc==SQLITE_OK ){
16701 pAux = fts5FindAuxiliary(pTab, zRank);
16702 if( pAux==0 ){
16703 assert( pTab->p.base.zErrMsg==0 );
16704 pTab->p.base.zErrMsg = sqlite3_mprintf("no such function: %s", zRank);
16705 rc = SQLITE_ERROR;
16706 }
16707 }
16708
16709 pCsr->pRank = pAux;
16710 return rc;
16711}
16712
16713
16714static int fts5CursorParseRank(
16715 Fts5Config *pConfig,
16716 Fts5Cursor *pCsr,
16717 sqlite3_value *pRank
16718){
16719 int rc = SQLITE_OK;
16720 if( pRank ){
16721 const char *z = (const char*)sqlite3_value_text(pRank);
16722 char *zRank = 0;
16723 char *zRankArgs = 0;
16724
16725 if( z==0 ){
16726 if( sqlite3_value_type(pRank)==SQLITE_NULL ) rc = SQLITE_ERROR;
16727 }else{
16728 rc = sqlite3Fts5ConfigParseRank(z, &zRank, &zRankArgs);
16729 }
16730 if( rc==SQLITE_OK ){
16731 pCsr->zRank = zRank;
16732 pCsr->zRankArgs = zRankArgs;
16733 CsrFlagSet(pCsr, FTS5CSR_FREE_ZRANK);
16734 }else if( rc==SQLITE_ERROR ){
16735 pCsr->base.pVtab->zErrMsg = sqlite3_mprintf(
16736 "parse error in rank function: %s", z
16737 );
16738 }
16739 }else{
16740 if( pConfig->zRank ){
16741 pCsr->zRank = (char*)pConfig->zRank;
16742 pCsr->zRankArgs = (char*)pConfig->zRankArgs;
16743 }else{
16744 pCsr->zRank = (char*)FTS5_DEFAULT_RANK;
16745 pCsr->zRankArgs = 0;
16746 }
16747 }
16748 return rc;
16749}
16750
16751static i64 fts5GetRowidLimit(sqlite3_value *pVal, i64 iDefault){
16752 if( pVal ){
16753 int eType = sqlite3_value_numeric_type(pVal);
16754 if( eType==SQLITE_INTEGER ){
16755 return sqlite3_value_int64(pVal);
16756 }
16757 }
16758 return iDefault;
16759}
16760
16761/*
16762** This is the xFilter interface for the virtual table. See
16763** the virtual table xFilter method documentation for additional
16764** information.
16765**
16766** There are three possible query strategies:
16767**
16768** 1. Full-text search using a MATCH operator.
16769** 2. A by-rowid lookup.
16770** 3. A full-table scan.
16771*/
16772static int fts5FilterMethod(
16773 sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
16774 int idxNum, /* Strategy index */
16775 const char *idxStr, /* Unused */
16776 int nVal, /* Number of elements in apVal */
16777 sqlite3_value **apVal /* Arguments for the indexing scheme */
16778){
16779 Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab);
16780 Fts5Config *pConfig = pTab->p.pConfig;
16781 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
16782 int rc = SQLITE_OK; /* Error code */
16783 int bDesc; /* True if ORDER BY [rank|rowid] DESC */
16784 int bOrderByRank; /* True if ORDER BY rank */
16785 sqlite3_value *pRank = 0; /* rank MATCH ? expression (or NULL) */
16786 sqlite3_value *pRowidEq = 0; /* rowid = ? expression (or NULL) */
16787 sqlite3_value *pRowidLe = 0; /* rowid <= ? expression (or NULL) */
16788 sqlite3_value *pRowidGe = 0; /* rowid >= ? expression (or NULL) */
16789 int iCol; /* Column on LHS of MATCH operator */
16790 char **pzErrmsg = pConfig->pzErrmsg;
16791 int i;
16792 int iIdxStr = 0;
16793 Fts5Expr *pExpr = 0;
16794
16795 if( pConfig->bLock ){
16796 pTab->p.base.zErrMsg = sqlite3_mprintf(
16797 "recursively defined fts5 content table"
16798 );
16799 return SQLITE_ERROR;
16800 }
16801
16802 if( pCsr->ePlan ){
16803 fts5FreeCursorComponents(pCsr);
16804 memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan-(u8*)pCsr));
16805 }
16806
16807 assert( pCsr->pStmt==0 );
16808 assert( pCsr->pExpr==0 );
16809 assert( pCsr->csrflags==0 );
16810 assert( pCsr->pRank==0 );
16811 assert( pCsr->zRank==0 );
16812 assert( pCsr->zRankArgs==0 );
16813 assert( pTab->pSortCsr==0 || nVal==0 );
16814
16815 assert( pzErrmsg==0 || pzErrmsg==&pTab->p.base.zErrMsg );
16816 pConfig->pzErrmsg = &pTab->p.base.zErrMsg;
16817
16818 /* Decode the arguments passed through to this function. */
16819 for(i=0; i<nVal; i++){
16820 switch( idxStr[iIdxStr++] ){
16821 case 'r':
16822 pRank = apVal[i];
16823 break;
16824 case 'M': {
16825 const char *zText = (const char*)sqlite3_value_text(apVal[i]);
16826 if( zText==0 ) zText = "";
16827 iCol = 0;
16828 do{
16829 iCol = iCol*10 + (idxStr[iIdxStr]-'0');
16830 iIdxStr++;
16831 }while( idxStr[iIdxStr]>='0' && idxStr[iIdxStr]<='9' );
16832
16833 if( zText[0]=='*' ){
16834 /* The user has issued a query of the form "MATCH '*...'". This
16835 ** indicates that the MATCH expression is not a full text query,
16836 ** but a request for an internal parameter. */
16837 rc = fts5SpecialMatch(pTab, pCsr, &zText[1]);
16838 goto filter_out;
16839 }else{
16840 char **pzErr = &pTab->p.base.zErrMsg;
16841 rc = sqlite3Fts5ExprNew(pConfig, 0, iCol, zText, &pExpr, pzErr);
16842 if( rc==SQLITE_OK ){
16843 rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr);
16844 pExpr = 0;
16845 }
16846 if( rc!=SQLITE_OK ) goto filter_out;
16847 }
16848
16849 break;
16850 }
16851 case 'L':
16852 case 'G': {
16853 int bGlob = (idxStr[iIdxStr-1]=='G');
16854 const char *zText = (const char*)sqlite3_value_text(apVal[i]);
16855 iCol = 0;
16856 do{
16857 iCol = iCol*10 + (idxStr[iIdxStr]-'0');
16858 iIdxStr++;
16859 }while( idxStr[iIdxStr]>='0' && idxStr[iIdxStr]<='9' );
16860 if( zText ){
16861 rc = sqlite3Fts5ExprPattern(pConfig, bGlob, iCol, zText, &pExpr);
16862 }
16863 if( rc==SQLITE_OK ){
16864 rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr);
16865 pExpr = 0;
16866 }
16867 if( rc!=SQLITE_OK ) goto filter_out;
16868 break;
16869 }
16870 case '=':
16871 pRowidEq = apVal[i];
16872 break;
16873 case '<':
16874 pRowidLe = apVal[i];
16875 break;
16876 default: assert( idxStr[iIdxStr-1]=='>' );
16877 pRowidGe = apVal[i];
16878 break;
16879 }
16880 }
16881 bOrderByRank = ((idxNum & FTS5_BI_ORDER_RANK) ? 1 : 0);
16882 pCsr->bDesc = bDesc = ((idxNum & FTS5_BI_ORDER_DESC) ? 1 : 0);
16883
16884 /* Set the cursor upper and lower rowid limits. Only some strategies
16885 ** actually use them. This is ok, as the xBestIndex() method leaves the
16886 ** sqlite3_index_constraint.omit flag clear for range constraints
16887 ** on the rowid field. */
16888 if( pRowidEq ){
16889 pRowidLe = pRowidGe = pRowidEq;
16890 }
16891 if( bDesc ){
16892 pCsr->iFirstRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64);
16893 pCsr->iLastRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64);
16894 }else{
16895 pCsr->iLastRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64);
16896 pCsr->iFirstRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64);
16897 }
16898
16899 if( pTab->pSortCsr ){
16900 /* If pSortCsr is non-NULL, then this call is being made as part of
16901 ** processing for a "... MATCH <expr> ORDER BY rank" query (ePlan is
16902 ** set to FTS5_PLAN_SORTED_MATCH). pSortCsr is the cursor that will
16903 ** return results to the user for this query. The current cursor
16904 ** (pCursor) is used to execute the query issued by function
16905 ** fts5CursorFirstSorted() above. */
16906 assert( pRowidEq==0 && pRowidLe==0 && pRowidGe==0 && pRank==0 );
16907 assert( nVal==0 && bOrderByRank==0 && bDesc==0 );
16908 assert( pCsr->iLastRowid==LARGEST_INT64 );
16909 assert( pCsr->iFirstRowid==SMALLEST_INT64 );
16910 if( pTab->pSortCsr->bDesc ){
16911 pCsr->iLastRowid = pTab->pSortCsr->iFirstRowid;
16912 pCsr->iFirstRowid = pTab->pSortCsr->iLastRowid;
16913 }else{
16914 pCsr->iLastRowid = pTab->pSortCsr->iLastRowid;
16915 pCsr->iFirstRowid = pTab->pSortCsr->iFirstRowid;
16916 }
16917 pCsr->ePlan = FTS5_PLAN_SOURCE;
16918 pCsr->pExpr = pTab->pSortCsr->pExpr;
16919 rc = fts5CursorFirst(pTab, pCsr, bDesc);
16920 }else if( pCsr->pExpr ){
16921 rc = fts5CursorParseRank(pConfig, pCsr, pRank);
16922 if( rc==SQLITE_OK ){
16923 if( bOrderByRank ){
16924 pCsr->ePlan = FTS5_PLAN_SORTED_MATCH;
16925 rc = fts5CursorFirstSorted(pTab, pCsr, bDesc);
16926 }else{
16927 pCsr->ePlan = FTS5_PLAN_MATCH;
16928 rc = fts5CursorFirst(pTab, pCsr, bDesc);
16929 }
16930 }
16931 }else if( pConfig->zContent==0 ){
16932 *pConfig->pzErrmsg = sqlite3_mprintf(
16933 "%s: table does not support scanning", pConfig->zName
16934 );
16935 rc = SQLITE_ERROR;
16936 }else{
16937 /* This is either a full-table scan (ePlan==FTS5_PLAN_SCAN) or a lookup
16938 ** by rowid (ePlan==FTS5_PLAN_ROWID). */
16939 pCsr->ePlan = (pRowidEq ? FTS5_PLAN_ROWID : FTS5_PLAN_SCAN);
16940 rc = sqlite3Fts5StorageStmt(
16941 pTab->pStorage, fts5StmtType(pCsr), &pCsr->pStmt, &pTab->p.base.zErrMsg
16942 );
16943 if( rc==SQLITE_OK ){
16944 if( pRowidEq!=0 ){
16945 assert( pCsr->ePlan==FTS5_PLAN_ROWID );
16946 sqlite3_bind_value(pCsr->pStmt, 1, pRowidEq);
16947 }else{
16948 sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iFirstRowid);
16949 sqlite3_bind_int64(pCsr->pStmt, 2, pCsr->iLastRowid);
16950 }
16951 rc = fts5NextMethod(pCursor);
16952 }
16953 }
16954
16955 filter_out:
16956 sqlite3Fts5ExprFree(pExpr);
16957 pConfig->pzErrmsg = pzErrmsg;
16958 return rc;
16959}
16960
16961/*
16962** This is the xEof method of the virtual table. SQLite calls this
16963** routine to find out if it has reached the end of a result set.
16964*/
16965static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){
16966 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
16967 return (CsrFlagTest(pCsr, FTS5CSR_EOF) ? 1 : 0);
16968}
16969
16970/*
16971** Return the rowid that the cursor currently points to.
16972*/
16973static i64 fts5CursorRowid(Fts5Cursor *pCsr){
16974 assert( pCsr->ePlan==FTS5_PLAN_MATCH
16975 || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH
16976 || pCsr->ePlan==FTS5_PLAN_SOURCE
16977 );
16978 if( pCsr->pSorter ){
16979 return pCsr->pSorter->iRowid;
16980 }else{
16981 return sqlite3Fts5ExprRowid(pCsr->pExpr);
16982 }
16983}
16984
16985/*
16986** This is the xRowid method. The SQLite core calls this routine to
16987** retrieve the rowid for the current row of the result set. fts5
16988** exposes %_content.rowid as the rowid for the virtual table. The
16989** rowid should be written to *pRowid.
16990*/
16991static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
16992 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
16993 int ePlan = pCsr->ePlan;
16994
16995 assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 );
16996 switch( ePlan ){
16997 case FTS5_PLAN_SPECIAL:
16998 *pRowid = 0;
16999 break;
17000
17001 case FTS5_PLAN_SOURCE:
17002 case FTS5_PLAN_MATCH:
17003 case FTS5_PLAN_SORTED_MATCH:
17004 *pRowid = fts5CursorRowid(pCsr);
17005 break;
17006
17007 default:
17008 *pRowid = sqlite3_column_int64(pCsr->pStmt, 0);
17009 break;
17010 }
17011
17012 return SQLITE_OK;
17013}
17014
17015/*
17016** If the cursor requires seeking (bSeekRequired flag is set), seek it.
17017** Return SQLITE_OK if no error occurs, or an SQLite error code otherwise.
17018**
17019** If argument bErrormsg is true and an error occurs, an error message may
17020** be left in sqlite3_vtab.zErrMsg.
17021*/
17022static int fts5SeekCursor(Fts5Cursor *pCsr, int bErrormsg){
17023 int rc = SQLITE_OK;
17024
17025 /* If the cursor does not yet have a statement handle, obtain one now. */
17026 if( pCsr->pStmt==0 ){
17027 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
17028 int eStmt = fts5StmtType(pCsr);
17029 rc = sqlite3Fts5StorageStmt(
17030 pTab->pStorage, eStmt, &pCsr->pStmt, (bErrormsg?&pTab->p.base.zErrMsg:0)
17031 );
17032 assert( rc!=SQLITE_OK || pTab->p.base.zErrMsg==0 );
17033 assert( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) );
17034 }
17035
17036 if( rc==SQLITE_OK && CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ){
17037 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
17038 assert( pCsr->pExpr );
17039 sqlite3_reset(pCsr->pStmt);
17040 sqlite3_bind_int64(pCsr->pStmt, 1, fts5CursorRowid(pCsr));
17041 pTab->pConfig->bLock++;
17042 rc = sqlite3_step(pCsr->pStmt);
17043 pTab->pConfig->bLock--;
17044 if( rc==SQLITE_ROW ){
17045 rc = SQLITE_OK;
17046 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_CONTENT);
17047 }else{
17048 rc = sqlite3_reset(pCsr->pStmt);
17049 if( rc==SQLITE_OK ){
17050 rc = FTS5_CORRUPT;
17051 }else if( pTab->pConfig->pzErrmsg ){
17052 *pTab->pConfig->pzErrmsg = sqlite3_mprintf(
17053 "%s", sqlite3_errmsg(pTab->pConfig->db)
17054 );
17055 }
17056 }
17057 }
17058 return rc;
17059}
17060
17061static void fts5SetVtabError(Fts5FullTable *p, const char *zFormat, ...){
17062 va_list ap; /* ... printf arguments */
17063 va_start(ap, zFormat);
17064 assert( p->p.base.zErrMsg==0 );
17065 p->p.base.zErrMsg = sqlite3_vmprintf(zFormat, ap);
17066 va_end(ap);
17067}
17068
17069/*
17070** This function is called to handle an FTS INSERT command. In other words,
17071** an INSERT statement of the form:
17072**
17073** INSERT INTO fts(fts) VALUES($pCmd)
17074** INSERT INTO fts(fts, rank) VALUES($pCmd, $pVal)
17075**
17076** Argument pVal is the value assigned to column "fts" by the INSERT
17077** statement. This function returns SQLITE_OK if successful, or an SQLite
17078** error code if an error occurs.
17079**
17080** The commands implemented by this function are documented in the "Special
17081** INSERT Directives" section of the documentation. It should be updated if
17082** more commands are added to this function.
17083*/
17084static int fts5SpecialInsert(
17085 Fts5FullTable *pTab, /* Fts5 table object */
17086 const char *zCmd, /* Text inserted into table-name column */
17087 sqlite3_value *pVal /* Value inserted into rank column */
17088){
17089 Fts5Config *pConfig = pTab->p.pConfig;
17090 int rc = SQLITE_OK;
17091 int bError = 0;
17092
17093 if( 0==sqlite3_stricmp("delete-all", zCmd) ){
17094 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
17095 fts5SetVtabError(pTab,
17096 "'delete-all' may only be used with a "
17097 "contentless or external content fts5 table"
17098 );
17099 rc = SQLITE_ERROR;
17100 }else{
17101 rc = sqlite3Fts5StorageDeleteAll(pTab->pStorage);
17102 }
17103 }else if( 0==sqlite3_stricmp("rebuild", zCmd) ){
17104 if( pConfig->eContent==FTS5_CONTENT_NONE ){
17105 fts5SetVtabError(pTab,
17106 "'rebuild' may not be used with a contentless fts5 table"
17107 );
17108 rc = SQLITE_ERROR;
17109 }else{
17110 rc = sqlite3Fts5StorageRebuild(pTab->pStorage);
17111 }
17112 }else if( 0==sqlite3_stricmp("optimize", zCmd) ){
17113 rc = sqlite3Fts5StorageOptimize(pTab->pStorage);
17114 }else if( 0==sqlite3_stricmp("merge", zCmd) ){
17115 int nMerge = sqlite3_value_int(pVal);
17116 rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge);
17117 }else if( 0==sqlite3_stricmp("integrity-check", zCmd) ){
17118 int iArg = sqlite3_value_int(pVal);
17119 rc = sqlite3Fts5StorageIntegrity(pTab->pStorage, iArg);
17120#ifdef SQLITE_DEBUG
17121 }else if( 0==sqlite3_stricmp("prefix-index", zCmd) ){
17122 pConfig->bPrefixIndex = sqlite3_value_int(pVal);
17123#endif
17124 }else{
17125 rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex);
17126 if( rc==SQLITE_OK ){
17127 rc = sqlite3Fts5ConfigSetValue(pTab->p.pConfig, zCmd, pVal, &bError);
17128 }
17129 if( rc==SQLITE_OK ){
17130 if( bError ){
17131 rc = SQLITE_ERROR;
17132 }else{
17133 rc = sqlite3Fts5StorageConfigValue(pTab->pStorage, zCmd, pVal, 0);
17134 }
17135 }
17136 }
17137 return rc;
17138}
17139
17140static int fts5SpecialDelete(
17141 Fts5FullTable *pTab,
17142 sqlite3_value **apVal
17143){
17144 int rc = SQLITE_OK;
17145 int eType1 = sqlite3_value_type(apVal[1]);
17146 if( eType1==SQLITE_INTEGER ){
17147 sqlite3_int64 iDel = sqlite3_value_int64(apVal[1]);
17148 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, &apVal[2]);
17149 }
17150 return rc;
17151}
17152
17153static void fts5StorageInsert(
17154 int *pRc,
17155 Fts5FullTable *pTab,
17156 sqlite3_value **apVal,
17157 i64 *piRowid
17158){
17159 int rc = *pRc;
17160 if( rc==SQLITE_OK ){
17161 rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, apVal, piRowid);
17162 }
17163 if( rc==SQLITE_OK ){
17164 rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal, *piRowid);
17165 }
17166 *pRc = rc;
17167}
17168
17169/*
17170** This function is the implementation of the xUpdate callback used by
17171** FTS3 virtual tables. It is invoked by SQLite each time a row is to be
17172** inserted, updated or deleted.
17173**
17174** A delete specifies a single argument - the rowid of the row to remove.
17175**
17176** Update and insert operations pass:
17177**
17178** 1. The "old" rowid, or NULL.
17179** 2. The "new" rowid.
17180** 3. Values for each of the nCol matchable columns.
17181** 4. Values for the two hidden columns (<tablename> and "rank").
17182*/
17183static int fts5UpdateMethod(
17184 sqlite3_vtab *pVtab, /* Virtual table handle */
17185 int nArg, /* Size of argument array */
17186 sqlite3_value **apVal, /* Array of arguments */
17187 sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */
17188){
17189 Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
17190 Fts5Config *pConfig = pTab->p.pConfig;
17191 int eType0; /* value_type() of apVal[0] */
17192 int rc = SQLITE_OK; /* Return code */
17193
17194 /* A transaction must be open when this is called. */
17195 assert( pTab->ts.eState==1 || pTab->ts.eState==2 );
17196
17197 assert( pVtab->zErrMsg==0 );
17198 assert( nArg==1 || nArg==(2+pConfig->nCol+2) );
17199 assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER
17200 || sqlite3_value_type(apVal[0])==SQLITE_NULL
17201 );
17202 assert( pTab->p.pConfig->pzErrmsg==0 );
17203 pTab->p.pConfig->pzErrmsg = &pTab->p.base.zErrMsg;
17204
17205 /* Put any active cursors into REQUIRE_SEEK state. */
17206 fts5TripCursors(pTab);
17207
17208 eType0 = sqlite3_value_type(apVal[0]);
17209 if( eType0==SQLITE_NULL
17210 && sqlite3_value_type(apVal[2+pConfig->nCol])!=SQLITE_NULL
17211 ){
17212 /* A "special" INSERT op. These are handled separately. */
17213 const char *z = (const char*)sqlite3_value_text(apVal[2+pConfig->nCol]);
17214 if( pConfig->eContent!=FTS5_CONTENT_NORMAL
17215 && 0==sqlite3_stricmp("delete", z)
17216 ){
17217 rc = fts5SpecialDelete(pTab, apVal);
17218 }else{
17219 rc = fts5SpecialInsert(pTab, z, apVal[2 + pConfig->nCol + 1]);
17220 }
17221 }else{
17222 /* A regular INSERT, UPDATE or DELETE statement. The trick here is that
17223 ** any conflict on the rowid value must be detected before any
17224 ** modifications are made to the database file. There are 4 cases:
17225 **
17226 ** 1) DELETE
17227 ** 2) UPDATE (rowid not modified)
17228 ** 3) UPDATE (rowid modified)
17229 ** 4) INSERT
17230 **
17231 ** Cases 3 and 4 may violate the rowid constraint.
17232 */
17233 int eConflict = SQLITE_ABORT;
17234 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
17235 eConflict = sqlite3_vtab_on_conflict(pConfig->db);
17236 }
17237
17238 assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL );
17239 assert( nArg!=1 || eType0==SQLITE_INTEGER );
17240
17241 /* Filter out attempts to run UPDATE or DELETE on contentless tables.
17242 ** This is not suported. */
17243 if( eType0==SQLITE_INTEGER && fts5IsContentless(pTab) ){
17244 pTab->p.base.zErrMsg = sqlite3_mprintf(
17245 "cannot %s contentless fts5 table: %s",
17246 (nArg>1 ? "UPDATE" : "DELETE from"), pConfig->zName
17247 );
17248 rc = SQLITE_ERROR;
17249 }
17250
17251 /* DELETE */
17252 else if( nArg==1 ){
17253 i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */
17254 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0);
17255 }
17256
17257 /* INSERT or UPDATE */
17258 else{
17259 int eType1 = sqlite3_value_numeric_type(apVal[1]);
17260
17261 if( eType1!=SQLITE_INTEGER && eType1!=SQLITE_NULL ){
17262 rc = SQLITE_MISMATCH;
17263 }
17264
17265 else if( eType0!=SQLITE_INTEGER ){
17266 /* If this is a REPLACE, first remove the current entry (if any) */
17267 if( eConflict==SQLITE_REPLACE && eType1==SQLITE_INTEGER ){
17268 i64 iNew = sqlite3_value_int64(apVal[1]); /* Rowid to delete */
17269 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0);
17270 }
17271 fts5StorageInsert(&rc, pTab, apVal, pRowid);
17272 }
17273
17274 /* UPDATE */
17275 else{
17276 i64 iOld = sqlite3_value_int64(apVal[0]); /* Old rowid */
17277 i64 iNew = sqlite3_value_int64(apVal[1]); /* New rowid */
17278 if( eType1==SQLITE_INTEGER && iOld!=iNew ){
17279 if( eConflict==SQLITE_REPLACE ){
17280 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0);
17281 if( rc==SQLITE_OK ){
17282 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0);
17283 }
17284 fts5StorageInsert(&rc, pTab, apVal, pRowid);
17285 }else{
17286 rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, apVal, pRowid);
17287 if( rc==SQLITE_OK ){
17288 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0);
17289 }
17290 if( rc==SQLITE_OK ){
17291 rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal,*pRowid);
17292 }
17293 }
17294 }else{
17295 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0);
17296 fts5StorageInsert(&rc, pTab, apVal, pRowid);
17297 }
17298 }
17299 }
17300 }
17301
17302 pTab->p.pConfig->pzErrmsg = 0;
17303 return rc;
17304}
17305
17306/*
17307** Implementation of xSync() method.
17308*/
17309static int fts5SyncMethod(sqlite3_vtab *pVtab){
17310 int rc;
17311 Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
17312 fts5CheckTransactionState(pTab, FTS5_SYNC, 0);
17313 pTab->p.pConfig->pzErrmsg = &pTab->p.base.zErrMsg;
17314 fts5TripCursors(pTab);
17315 rc = sqlite3Fts5StorageSync(pTab->pStorage);
17316 pTab->p.pConfig->pzErrmsg = 0;
17317 return rc;
17318}
17319
17320/*
17321** Implementation of xBegin() method.
17322*/
17323static int fts5BeginMethod(sqlite3_vtab *pVtab){
17324 fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_BEGIN, 0);
17325 fts5NewTransaction((Fts5FullTable*)pVtab);
17326 return SQLITE_OK;
17327}
17328
17329/*
17330** Implementation of xCommit() method. This is a no-op. The contents of
17331** the pending-terms hash-table have already been flushed into the database
17332** by fts5SyncMethod().
17333*/
17334static int fts5CommitMethod(sqlite3_vtab *pVtab){
17335 UNUSED_PARAM(pVtab); /* Call below is a no-op for NDEBUG builds */
17336 fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_COMMIT, 0);
17337 return SQLITE_OK;
17338}
17339
17340/*
17341** Implementation of xRollback(). Discard the contents of the pending-terms
17342** hash-table. Any changes made to the database are reverted by SQLite.
17343*/
17344static int fts5RollbackMethod(sqlite3_vtab *pVtab){
17345 int rc;
17346 Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
17347 fts5CheckTransactionState(pTab, FTS5_ROLLBACK, 0);
17348 rc = sqlite3Fts5StorageRollback(pTab->pStorage);
17349 return rc;
17350}
17351
17352static int fts5CsrPoslist(Fts5Cursor*, int, const u8**, int*);
17353
17354static void *fts5ApiUserData(Fts5Context *pCtx){
17355 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17356 return pCsr->pAux->pUserData;
17357}
17358
17359static int fts5ApiColumnCount(Fts5Context *pCtx){
17360 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17361 return ((Fts5Table*)(pCsr->base.pVtab))->pConfig->nCol;
17362}
17363
17364static int fts5ApiColumnTotalSize(
17365 Fts5Context *pCtx,
17366 int iCol,
17367 sqlite3_int64 *pnToken
17368){
17369 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17370 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
17371 return sqlite3Fts5StorageSize(pTab->pStorage, iCol, pnToken);
17372}
17373
17374static int fts5ApiRowCount(Fts5Context *pCtx, i64 *pnRow){
17375 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17376 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
17377 return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow);
17378}
17379
17380static int fts5ApiTokenize(
17381 Fts5Context *pCtx,
17382 const char *pText, int nText,
17383 void *pUserData,
17384 int (*xToken)(void*, int, const char*, int, int, int)
17385){
17386 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17387 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
17388 return sqlite3Fts5Tokenize(
17389 pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken
17390 );
17391}
17392
17393static int fts5ApiPhraseCount(Fts5Context *pCtx){
17394 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17395 return sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
17396}
17397
17398static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){
17399 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17400 return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase);
17401}
17402
17403static int fts5ApiColumnText(
17404 Fts5Context *pCtx,
17405 int iCol,
17406 const char **pz,
17407 int *pn
17408){
17409 int rc = SQLITE_OK;
17410 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17411 if( fts5IsContentless((Fts5FullTable*)(pCsr->base.pVtab))
17412 || pCsr->ePlan==FTS5_PLAN_SPECIAL
17413 ){
17414 *pz = 0;
17415 *pn = 0;
17416 }else{
17417 rc = fts5SeekCursor(pCsr, 0);
17418 if( rc==SQLITE_OK ){
17419 *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol+1);
17420 *pn = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
17421 }
17422 }
17423 return rc;
17424}
17425
17426static int fts5CsrPoslist(
17427 Fts5Cursor *pCsr,
17428 int iPhrase,
17429 const u8 **pa,
17430 int *pn
17431){
17432 Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
17433 int rc = SQLITE_OK;
17434 int bLive = (pCsr->pSorter==0);
17435
17436 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_POSLIST) ){
17437
17438 if( pConfig->eDetail!=FTS5_DETAIL_FULL ){
17439 Fts5PoslistPopulator *aPopulator;
17440 int i;
17441 aPopulator = sqlite3Fts5ExprClearPoslists(pCsr->pExpr, bLive);
17442 if( aPopulator==0 ) rc = SQLITE_NOMEM;
17443 for(i=0; i<pConfig->nCol && rc==SQLITE_OK; i++){
17444 int n; const char *z;
17445 rc = fts5ApiColumnText((Fts5Context*)pCsr, i, &z, &n);
17446 if( rc==SQLITE_OK ){
17447 rc = sqlite3Fts5ExprPopulatePoslists(
17448 pConfig, pCsr->pExpr, aPopulator, i, z, n
17449 );
17450 }
17451 }
17452 sqlite3_free(aPopulator);
17453
17454 if( pCsr->pSorter ){
17455 sqlite3Fts5ExprCheckPoslists(pCsr->pExpr, pCsr->pSorter->iRowid);
17456 }
17457 }
17458 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_POSLIST);
17459 }
17460
17461 if( pCsr->pSorter && pConfig->eDetail==FTS5_DETAIL_FULL ){
17462 Fts5Sorter *pSorter = pCsr->pSorter;
17463 int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]);
17464 *pn = pSorter->aIdx[iPhrase] - i1;
17465 *pa = &pSorter->aPoslist[i1];
17466 }else{
17467 *pn = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, pa);
17468 }
17469
17470 return rc;
17471}
17472
17473/*
17474** Ensure that the Fts5Cursor.nInstCount and aInst[] variables are populated
17475** correctly for the current view. Return SQLITE_OK if successful, or an
17476** SQLite error code otherwise.
17477*/
17478static int fts5CacheInstArray(Fts5Cursor *pCsr){
17479 int rc = SQLITE_OK;
17480 Fts5PoslistReader *aIter; /* One iterator for each phrase */
17481 int nIter; /* Number of iterators/phrases */
17482 int nCol = ((Fts5Table*)pCsr->base.pVtab)->pConfig->nCol;
17483
17484 nIter = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
17485 if( pCsr->aInstIter==0 ){
17486 sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * nIter;
17487 pCsr->aInstIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte);
17488 }
17489 aIter = pCsr->aInstIter;
17490
17491 if( aIter ){
17492 int nInst = 0; /* Number instances seen so far */
17493 int i;
17494
17495 /* Initialize all iterators */
17496 for(i=0; i<nIter && rc==SQLITE_OK; i++){
17497 const u8 *a;
17498 int n;
17499 rc = fts5CsrPoslist(pCsr, i, &a, &n);
17500 if( rc==SQLITE_OK ){
17501 sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
17502 }
17503 }
17504
17505 if( rc==SQLITE_OK ){
17506 while( 1 ){
17507 int *aInst;
17508 int iBest = -1;
17509 for(i=0; i<nIter; i++){
17510 if( (aIter[i].bEof==0)
17511 && (iBest<0 || aIter[i].iPos<aIter[iBest].iPos)
17512 ){
17513 iBest = i;
17514 }
17515 }
17516 if( iBest<0 ) break;
17517
17518 nInst++;
17519 if( nInst>=pCsr->nInstAlloc ){
17520 int nNewSize = pCsr->nInstAlloc ? pCsr->nInstAlloc*2 : 32;
17521 aInst = (int*)sqlite3_realloc64(
17522 pCsr->aInst, nNewSize*sizeof(int)*3
17523 );
17524 if( aInst ){
17525 pCsr->aInst = aInst;
17526 pCsr->nInstAlloc = nNewSize;
17527 }else{
17528 nInst--;
17529 rc = SQLITE_NOMEM;
17530 break;
17531 }
17532 }
17533
17534 aInst = &pCsr->aInst[3 * (nInst-1)];
17535 aInst[0] = iBest;
17536 aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos);
17537 aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos);
17538 if( aInst[1]<0 || aInst[1]>=nCol ){
17539 rc = FTS5_CORRUPT;
17540 break;
17541 }
17542 sqlite3Fts5PoslistReaderNext(&aIter[iBest]);
17543 }
17544 }
17545
17546 pCsr->nInstCount = nInst;
17547 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_INST);
17548 }
17549 return rc;
17550}
17551
17552static int fts5ApiInstCount(Fts5Context *pCtx, int *pnInst){
17553 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17554 int rc = SQLITE_OK;
17555 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0
17556 || SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){
17557 *pnInst = pCsr->nInstCount;
17558 }
17559 return rc;
17560}
17561
17562static int fts5ApiInst(
17563 Fts5Context *pCtx,
17564 int iIdx,
17565 int *piPhrase,
17566 int *piCol,
17567 int *piOff
17568){
17569 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17570 int rc = SQLITE_OK;
17571 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0
17572 || SQLITE_OK==(rc = fts5CacheInstArray(pCsr))
17573 ){
17574 if( iIdx<0 || iIdx>=pCsr->nInstCount ){
17575 rc = SQLITE_RANGE;
17576#if 0
17577 }else if( fts5IsOffsetless((Fts5Table*)pCsr->base.pVtab) ){
17578 *piPhrase = pCsr->aInst[iIdx*3];
17579 *piCol = pCsr->aInst[iIdx*3 + 2];
17580 *piOff = -1;
17581#endif
17582 }else{
17583 *piPhrase = pCsr->aInst[iIdx*3];
17584 *piCol = pCsr->aInst[iIdx*3 + 1];
17585 *piOff = pCsr->aInst[iIdx*3 + 2];
17586 }
17587 }
17588 return rc;
17589}
17590
17591static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){
17592 return fts5CursorRowid((Fts5Cursor*)pCtx);
17593}
17594
17595static int fts5ColumnSizeCb(
17596 void *pContext, /* Pointer to int */
17597 int tflags,
17598 const char *pUnused, /* Buffer containing token */
17599 int nUnused, /* Size of token in bytes */
17600 int iUnused1, /* Start offset of token */
17601 int iUnused2 /* End offset of token */
17602){
17603 int *pCnt = (int*)pContext;
17604 UNUSED_PARAM2(pUnused, nUnused);
17605 UNUSED_PARAM2(iUnused1, iUnused2);
17606 if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
17607 (*pCnt)++;
17608 }
17609 return SQLITE_OK;
17610}
17611
17612static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){
17613 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17614 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
17615 Fts5Config *pConfig = pTab->p.pConfig;
17616 int rc = SQLITE_OK;
17617
17618 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_DOCSIZE) ){
17619 if( pConfig->bColumnsize ){
17620 i64 iRowid = fts5CursorRowid(pCsr);
17621 rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize);
17622 }else if( pConfig->zContent==0 ){
17623 int i;
17624 for(i=0; i<pConfig->nCol; i++){
17625 if( pConfig->abUnindexed[i]==0 ){
17626 pCsr->aColumnSize[i] = -1;
17627 }
17628 }
17629 }else{
17630 int i;
17631 for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
17632 if( pConfig->abUnindexed[i]==0 ){
17633 const char *z; int n;
17634 void *p = (void*)(&pCsr->aColumnSize[i]);
17635 pCsr->aColumnSize[i] = 0;
17636 rc = fts5ApiColumnText(pCtx, i, &z, &n);
17637 if( rc==SQLITE_OK ){
17638 rc = sqlite3Fts5Tokenize(
17639 pConfig, FTS5_TOKENIZE_AUX, z, n, p, fts5ColumnSizeCb
17640 );
17641 }
17642 }
17643 }
17644 }
17645 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE);
17646 }
17647 if( iCol<0 ){
17648 int i;
17649 *pnToken = 0;
17650 for(i=0; i<pConfig->nCol; i++){
17651 *pnToken += pCsr->aColumnSize[i];
17652 }
17653 }else if( iCol<pConfig->nCol ){
17654 *pnToken = pCsr->aColumnSize[iCol];
17655 }else{
17656 *pnToken = 0;
17657 rc = SQLITE_RANGE;
17658 }
17659 return rc;
17660}
17661
17662/*
17663** Implementation of the xSetAuxdata() method.
17664*/
17665static int fts5ApiSetAuxdata(
17666 Fts5Context *pCtx, /* Fts5 context */
17667 void *pPtr, /* Pointer to save as auxdata */
17668 void(*xDelete)(void*) /* Destructor for pPtr (or NULL) */
17669){
17670 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17671 Fts5Auxdata *pData;
17672
17673 /* Search through the cursors list of Fts5Auxdata objects for one that
17674 ** corresponds to the currently executing auxiliary function. */
17675 for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){
17676 if( pData->pAux==pCsr->pAux ) break;
17677 }
17678
17679 if( pData ){
17680 if( pData->xDelete ){
17681 pData->xDelete(pData->pPtr);
17682 }
17683 }else{
17684 int rc = SQLITE_OK;
17685 pData = (Fts5Auxdata*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Auxdata));
17686 if( pData==0 ){
17687 if( xDelete ) xDelete(pPtr);
17688 return rc;
17689 }
17690 pData->pAux = pCsr->pAux;
17691 pData->pNext = pCsr->pAuxdata;
17692 pCsr->pAuxdata = pData;
17693 }
17694
17695 pData->xDelete = xDelete;
17696 pData->pPtr = pPtr;
17697 return SQLITE_OK;
17698}
17699
17700static void *fts5ApiGetAuxdata(Fts5Context *pCtx, int bClear){
17701 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17702 Fts5Auxdata *pData;
17703 void *pRet = 0;
17704
17705 for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){
17706 if( pData->pAux==pCsr->pAux ) break;
17707 }
17708
17709 if( pData ){
17710 pRet = pData->pPtr;
17711 if( bClear ){
17712 pData->pPtr = 0;
17713 pData->xDelete = 0;
17714 }
17715 }
17716
17717 return pRet;
17718}
17719
17720static void fts5ApiPhraseNext(
17721 Fts5Context *pUnused,
17722 Fts5PhraseIter *pIter,
17723 int *piCol, int *piOff
17724){
17725 UNUSED_PARAM(pUnused);
17726 if( pIter->a>=pIter->b ){
17727 *piCol = -1;
17728 *piOff = -1;
17729 }else{
17730 int iVal;
17731 pIter->a += fts5GetVarint32(pIter->a, iVal);
17732 if( iVal==1 ){
17733 pIter->a += fts5GetVarint32(pIter->a, iVal);
17734 *piCol = iVal;
17735 *piOff = 0;
17736 pIter->a += fts5GetVarint32(pIter->a, iVal);
17737 }
17738 *piOff += (iVal-2);
17739 }
17740}
17741
17742static int fts5ApiPhraseFirst(
17743 Fts5Context *pCtx,
17744 int iPhrase,
17745 Fts5PhraseIter *pIter,
17746 int *piCol, int *piOff
17747){
17748 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17749 int n;
17750 int rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n);
17751 if( rc==SQLITE_OK ){
17752 assert( pIter->a || n==0 );
17753 pIter->b = (pIter->a ? &pIter->a[n] : 0);
17754 *piCol = 0;
17755 *piOff = 0;
17756 fts5ApiPhraseNext(pCtx, pIter, piCol, piOff);
17757 }
17758 return rc;
17759}
17760
17761static void fts5ApiPhraseNextColumn(
17762 Fts5Context *pCtx,
17763 Fts5PhraseIter *pIter,
17764 int *piCol
17765){
17766 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17767 Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
17768
17769 if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
17770 if( pIter->a>=pIter->b ){
17771 *piCol = -1;
17772 }else{
17773 int iIncr;
17774 pIter->a += fts5GetVarint32(&pIter->a[0], iIncr);
17775 *piCol += (iIncr-2);
17776 }
17777 }else{
17778 while( 1 ){
17779 int dummy;
17780 if( pIter->a>=pIter->b ){
17781 *piCol = -1;
17782 return;
17783 }
17784 if( pIter->a[0]==0x01 ) break;
17785 pIter->a += fts5GetVarint32(pIter->a, dummy);
17786 }
17787 pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol);
17788 }
17789}
17790
17791static int fts5ApiPhraseFirstColumn(
17792 Fts5Context *pCtx,
17793 int iPhrase,
17794 Fts5PhraseIter *pIter,
17795 int *piCol
17796){
17797 int rc = SQLITE_OK;
17798 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17799 Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
17800
17801 if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
17802 Fts5Sorter *pSorter = pCsr->pSorter;
17803 int n;
17804 if( pSorter ){
17805 int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]);
17806 n = pSorter->aIdx[iPhrase] - i1;
17807 pIter->a = &pSorter->aPoslist[i1];
17808 }else{
17809 rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, iPhrase, &pIter->a, &n);
17810 }
17811 if( rc==SQLITE_OK ){
17812 assert( pIter->a || n==0 );
17813 pIter->b = (pIter->a ? &pIter->a[n] : 0);
17814 *piCol = 0;
17815 fts5ApiPhraseNextColumn(pCtx, pIter, piCol);
17816 }
17817 }else{
17818 int n;
17819 rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n);
17820 if( rc==SQLITE_OK ){
17821 assert( pIter->a || n==0 );
17822 pIter->b = (pIter->a ? &pIter->a[n] : 0);
17823 if( n<=0 ){
17824 *piCol = -1;
17825 }else if( pIter->a[0]==0x01 ){
17826 pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol);
17827 }else{
17828 *piCol = 0;
17829 }
17830 }
17831 }
17832
17833 return rc;
17834}
17835
17836
17837static int fts5ApiQueryPhrase(Fts5Context*, int, void*,
17838 int(*)(const Fts5ExtensionApi*, Fts5Context*, void*)
17839);
17840
17841static const Fts5ExtensionApi sFts5Api = {
17842 2, /* iVersion */
17843 fts5ApiUserData,
17844 fts5ApiColumnCount,
17845 fts5ApiRowCount,
17846 fts5ApiColumnTotalSize,
17847 fts5ApiTokenize,
17848 fts5ApiPhraseCount,
17849 fts5ApiPhraseSize,
17850 fts5ApiInstCount,
17851 fts5ApiInst,
17852 fts5ApiRowid,
17853 fts5ApiColumnText,
17854 fts5ApiColumnSize,
17855 fts5ApiQueryPhrase,
17856 fts5ApiSetAuxdata,
17857 fts5ApiGetAuxdata,
17858 fts5ApiPhraseFirst,
17859 fts5ApiPhraseNext,
17860 fts5ApiPhraseFirstColumn,
17861 fts5ApiPhraseNextColumn,
17862};
17863
17864/*
17865** Implementation of API function xQueryPhrase().
17866*/
17867static int fts5ApiQueryPhrase(
17868 Fts5Context *pCtx,
17869 int iPhrase,
17870 void *pUserData,
17871 int(*xCallback)(const Fts5ExtensionApi*, Fts5Context*, void*)
17872){
17873 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17874 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
17875 int rc;
17876 Fts5Cursor *pNew = 0;
17877
17878 rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew);
17879 if( rc==SQLITE_OK ){
17880 pNew->ePlan = FTS5_PLAN_MATCH;
17881 pNew->iFirstRowid = SMALLEST_INT64;
17882 pNew->iLastRowid = LARGEST_INT64;
17883 pNew->base.pVtab = (sqlite3_vtab*)pTab;
17884 rc = sqlite3Fts5ExprClonePhrase(pCsr->pExpr, iPhrase, &pNew->pExpr);
17885 }
17886
17887 if( rc==SQLITE_OK ){
17888 for(rc = fts5CursorFirst(pTab, pNew, 0);
17889 rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0;
17890 rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew)
17891 ){
17892 rc = xCallback(&sFts5Api, (Fts5Context*)pNew, pUserData);
17893 if( rc!=SQLITE_OK ){
17894 if( rc==SQLITE_DONE ) rc = SQLITE_OK;
17895 break;
17896 }
17897 }
17898 }
17899
17900 fts5CloseMethod((sqlite3_vtab_cursor*)pNew);
17901 return rc;
17902}
17903
17904static void fts5ApiInvoke(
17905 Fts5Auxiliary *pAux,
17906 Fts5Cursor *pCsr,
17907 sqlite3_context *context,
17908 int argc,
17909 sqlite3_value **argv
17910){
17911 assert( pCsr->pAux==0 );
17912 pCsr->pAux = pAux;
17913 pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc, argv);
17914 pCsr->pAux = 0;
17915}
17916
17917static Fts5Cursor *fts5CursorFromCsrid(Fts5Global *pGlobal, i64 iCsrId){
17918 Fts5Cursor *pCsr;
17919 for(pCsr=pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
17920 if( pCsr->iCsrId==iCsrId ) break;
17921 }
17922 return pCsr;
17923}
17924
17925static void fts5ApiCallback(
17926 sqlite3_context *context,
17927 int argc,
17928 sqlite3_value **argv
17929){
17930
17931 Fts5Auxiliary *pAux;
17932 Fts5Cursor *pCsr;
17933 i64 iCsrId;
17934
17935 assert( argc>=1 );
17936 pAux = (Fts5Auxiliary*)sqlite3_user_data(context);
17937 iCsrId = sqlite3_value_int64(argv[0]);
17938
17939 pCsr = fts5CursorFromCsrid(pAux->pGlobal, iCsrId);
17940 if( pCsr==0 || pCsr->ePlan==0 ){
17941 char *zErr = sqlite3_mprintf("no such cursor: %lld", iCsrId);
17942 sqlite3_result_error(context, zErr, -1);
17943 sqlite3_free(zErr);
17944 }else{
17945 fts5ApiInvoke(pAux, pCsr, context, argc-1, &argv[1]);
17946 }
17947}
17948
17949
17950/*
17951** Given cursor id iId, return a pointer to the corresponding Fts5Table
17952** object. Or NULL If the cursor id does not exist.
17953*/
17954static Fts5Table *sqlite3Fts5TableFromCsrid(
17955 Fts5Global *pGlobal, /* FTS5 global context for db handle */
17956 i64 iCsrId /* Id of cursor to find */
17957){
17958 Fts5Cursor *pCsr;
17959 pCsr = fts5CursorFromCsrid(pGlobal, iCsrId);
17960 if( pCsr ){
17961 return (Fts5Table*)pCsr->base.pVtab;
17962 }
17963 return 0;
17964}
17965
17966/*
17967** Return a "position-list blob" corresponding to the current position of
17968** cursor pCsr via sqlite3_result_blob(). A position-list blob contains
17969** the current position-list for each phrase in the query associated with
17970** cursor pCsr.
17971**
17972** A position-list blob begins with (nPhrase-1) varints, where nPhrase is
17973** the number of phrases in the query. Following the varints are the
17974** concatenated position lists for each phrase, in order.
17975**
17976** The first varint (if it exists) contains the size of the position list
17977** for phrase 0. The second (same disclaimer) contains the size of position
17978** list 1. And so on. There is no size field for the final position list,
17979** as it can be derived from the total size of the blob.
17980*/
17981static int fts5PoslistBlob(sqlite3_context *pCtx, Fts5Cursor *pCsr){
17982 int i;
17983 int rc = SQLITE_OK;
17984 int nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
17985 Fts5Buffer val;
17986
17987 memset(&val, 0, sizeof(Fts5Buffer));
17988 switch( ((Fts5Table*)(pCsr->base.pVtab))->pConfig->eDetail ){
17989 case FTS5_DETAIL_FULL:
17990
17991 /* Append the varints */
17992 for(i=0; i<(nPhrase-1); i++){
17993 const u8 *dummy;
17994 int nByte = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &dummy);
17995 sqlite3Fts5BufferAppendVarint(&rc, &val, nByte);
17996 }
17997
17998 /* Append the position lists */
17999 for(i=0; i<nPhrase; i++){
18000 const u8 *pPoslist;
18001 int nPoslist;
18002 nPoslist = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &pPoslist);
18003 sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist);
18004 }
18005 break;
18006
18007 case FTS5_DETAIL_COLUMNS:
18008
18009 /* Append the varints */
18010 for(i=0; rc==SQLITE_OK && i<(nPhrase-1); i++){
18011 const u8 *dummy;
18012 int nByte;
18013 rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &dummy, &nByte);
18014 sqlite3Fts5BufferAppendVarint(&rc, &val, nByte);
18015 }
18016
18017 /* Append the position lists */
18018 for(i=0; rc==SQLITE_OK && i<nPhrase; i++){
18019 const u8 *pPoslist;
18020 int nPoslist;
18021 rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &pPoslist, &nPoslist);
18022 sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist);
18023 }
18024 break;
18025
18026 default:
18027 break;
18028 }
18029
18030 sqlite3_result_blob(pCtx, val.p, val.n, sqlite3_free);
18031 return rc;
18032}
18033
18034/*
18035** This is the xColumn method, called by SQLite to request a value from
18036** the row that the supplied cursor currently points to.
18037*/
18038static int fts5ColumnMethod(
18039 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
18040 sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
18041 int iCol /* Index of column to read value from */
18042){
18043 Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab);
18044 Fts5Config *pConfig = pTab->p.pConfig;
18045 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
18046 int rc = SQLITE_OK;
18047
18048 assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 );
18049
18050 if( pCsr->ePlan==FTS5_PLAN_SPECIAL ){
18051 if( iCol==pConfig->nCol ){
18052 sqlite3_result_int64(pCtx, pCsr->iSpecial);
18053 }
18054 }else
18055
18056 if( iCol==pConfig->nCol ){
18057 /* User is requesting the value of the special column with the same name
18058 ** as the table. Return the cursor integer id number. This value is only
18059 ** useful in that it may be passed as the first argument to an FTS5
18060 ** auxiliary function. */
18061 sqlite3_result_int64(pCtx, pCsr->iCsrId);
18062 }else if( iCol==pConfig->nCol+1 ){
18063
18064 /* The value of the "rank" column. */
18065 if( pCsr->ePlan==FTS5_PLAN_SOURCE ){
18066 fts5PoslistBlob(pCtx, pCsr);
18067 }else if(
18068 pCsr->ePlan==FTS5_PLAN_MATCH
18069 || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH
18070 ){
18071 if( pCsr->pRank || SQLITE_OK==(rc = fts5FindRankFunction(pCsr)) ){
18072 fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, pCsr->nRankArg, pCsr->apRankArg);
18073 }
18074 }
18075 }else if( !fts5IsContentless(pTab) ){
18076 pConfig->pzErrmsg = &pTab->p.base.zErrMsg;
18077 rc = fts5SeekCursor(pCsr, 1);
18078 if( rc==SQLITE_OK ){
18079 sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1));
18080 }
18081 pConfig->pzErrmsg = 0;
18082 }
18083 return rc;
18084}
18085
18086
18087/*
18088** This routine implements the xFindFunction method for the FTS3
18089** virtual table.
18090*/
18091static int fts5FindFunctionMethod(
18092 sqlite3_vtab *pVtab, /* Virtual table handle */
18093 int nUnused, /* Number of SQL function arguments */
18094 const char *zName, /* Name of SQL function */
18095 void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */
18096 void **ppArg /* OUT: User data for *pxFunc */
18097){
18098 Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
18099 Fts5Auxiliary *pAux;
18100
18101 UNUSED_PARAM(nUnused);
18102 pAux = fts5FindAuxiliary(pTab, zName);
18103 if( pAux ){
18104 *pxFunc = fts5ApiCallback;
18105 *ppArg = (void*)pAux;
18106 return 1;
18107 }
18108
18109 /* No function of the specified name was found. Return 0. */
18110 return 0;
18111}
18112
18113/*
18114** Implementation of FTS5 xRename method. Rename an fts5 table.
18115*/
18116static int fts5RenameMethod(
18117 sqlite3_vtab *pVtab, /* Virtual table handle */
18118 const char *zName /* New name of table */
18119){
18120 Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
18121 return sqlite3Fts5StorageRename(pTab->pStorage, zName);
18122}
18123
18124static int sqlite3Fts5FlushToDisk(Fts5Table *pTab){
18125 fts5TripCursors((Fts5FullTable*)pTab);
18126 return sqlite3Fts5StorageSync(((Fts5FullTable*)pTab)->pStorage);
18127}
18128
18129/*
18130** The xSavepoint() method.
18131**
18132** Flush the contents of the pending-terms table to disk.
18133*/
18134static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
18135 UNUSED_PARAM(iSavepoint); /* Call below is a no-op for NDEBUG builds */
18136 fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_SAVEPOINT, iSavepoint);
18137 return sqlite3Fts5FlushToDisk((Fts5Table*)pVtab);
18138}
18139
18140/*
18141** The xRelease() method.
18142**
18143** This is a no-op.
18144*/
18145static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
18146 UNUSED_PARAM(iSavepoint); /* Call below is a no-op for NDEBUG builds */
18147 fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_RELEASE, iSavepoint);
18148 return sqlite3Fts5FlushToDisk((Fts5Table*)pVtab);
18149}
18150
18151/*
18152** The xRollbackTo() method.
18153**
18154** Discard the contents of the pending terms table.
18155*/
18156static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){
18157 Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
18158 UNUSED_PARAM(iSavepoint); /* Call below is a no-op for NDEBUG builds */
18159 fts5CheckTransactionState(pTab, FTS5_ROLLBACKTO, iSavepoint);
18160 fts5TripCursors(pTab);
18161 return sqlite3Fts5StorageRollback(pTab->pStorage);
18162}
18163
18164/*
18165** Register a new auxiliary function with global context pGlobal.
18166*/
18167static int fts5CreateAux(
18168 fts5_api *pApi, /* Global context (one per db handle) */
18169 const char *zName, /* Name of new function */
18170 void *pUserData, /* User data for aux. function */
18171 fts5_extension_function xFunc, /* Aux. function implementation */
18172 void(*xDestroy)(void*) /* Destructor for pUserData */
18173){
18174 Fts5Global *pGlobal = (Fts5Global*)pApi;
18175 int rc = sqlite3_overload_function(pGlobal->db, zName, -1);
18176 if( rc==SQLITE_OK ){
18177 Fts5Auxiliary *pAux;
18178 sqlite3_int64 nName; /* Size of zName in bytes, including \0 */
18179 sqlite3_int64 nByte; /* Bytes of space to allocate */
18180
18181 nName = strlen(zName) + 1;
18182 nByte = sizeof(Fts5Auxiliary) + nName;
18183 pAux = (Fts5Auxiliary*)sqlite3_malloc64(nByte);
18184 if( pAux ){
18185 memset(pAux, 0, (size_t)nByte);
18186 pAux->zFunc = (char*)&pAux[1];
18187 memcpy(pAux->zFunc, zName, nName);
18188 pAux->pGlobal = pGlobal;
18189 pAux->pUserData = pUserData;
18190 pAux->xFunc = xFunc;
18191 pAux->xDestroy = xDestroy;
18192 pAux->pNext = pGlobal->pAux;
18193 pGlobal->pAux = pAux;
18194 }else{
18195 rc = SQLITE_NOMEM;
18196 }
18197 }
18198
18199 return rc;
18200}
18201
18202/*
18203** Register a new tokenizer. This is the implementation of the
18204** fts5_api.xCreateTokenizer() method.
18205*/
18206static int fts5CreateTokenizer(
18207 fts5_api *pApi, /* Global context (one per db handle) */
18208 const char *zName, /* Name of new function */
18209 void *pUserData, /* User data for aux. function */
18210 fts5_tokenizer *pTokenizer, /* Tokenizer implementation */
18211 void(*xDestroy)(void*) /* Destructor for pUserData */
18212){
18213 Fts5Global *pGlobal = (Fts5Global*)pApi;
18214 Fts5TokenizerModule *pNew;
18215 sqlite3_int64 nName; /* Size of zName and its \0 terminator */
18216 sqlite3_int64 nByte; /* Bytes of space to allocate */
18217 int rc = SQLITE_OK;
18218
18219 nName = strlen(zName) + 1;
18220 nByte = sizeof(Fts5TokenizerModule) + nName;
18221 pNew = (Fts5TokenizerModule*)sqlite3_malloc64(nByte);
18222 if( pNew ){
18223 memset(pNew, 0, (size_t)nByte);
18224 pNew->zName = (char*)&pNew[1];
18225 memcpy(pNew->zName, zName, nName);
18226 pNew->pUserData = pUserData;
18227 pNew->x = *pTokenizer;
18228 pNew->xDestroy = xDestroy;
18229 pNew->pNext = pGlobal->pTok;
18230 pGlobal->pTok = pNew;
18231 if( pNew->pNext==0 ){
18232 pGlobal->pDfltTok = pNew;
18233 }
18234 }else{
18235 rc = SQLITE_NOMEM;
18236 }
18237
18238 return rc;
18239}
18240
18241static Fts5TokenizerModule *fts5LocateTokenizer(
18242 Fts5Global *pGlobal,
18243 const char *zName
18244){
18245 Fts5TokenizerModule *pMod = 0;
18246
18247 if( zName==0 ){
18248 pMod = pGlobal->pDfltTok;
18249 }else{
18250 for(pMod=pGlobal->pTok; pMod; pMod=pMod->pNext){
18251 if( sqlite3_stricmp(zName, pMod->zName)==0 ) break;
18252 }
18253 }
18254
18255 return pMod;
18256}
18257
18258/*
18259** Find a tokenizer. This is the implementation of the
18260** fts5_api.xFindTokenizer() method.
18261*/
18262static int fts5FindTokenizer(
18263 fts5_api *pApi, /* Global context (one per db handle) */
18264 const char *zName, /* Name of new function */
18265 void **ppUserData,
18266 fts5_tokenizer *pTokenizer /* Populate this object */
18267){
18268 int rc = SQLITE_OK;
18269 Fts5TokenizerModule *pMod;
18270
18271 pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName);
18272 if( pMod ){
18273 *pTokenizer = pMod->x;
18274 *ppUserData = pMod->pUserData;
18275 }else{
18276 memset(pTokenizer, 0, sizeof(fts5_tokenizer));
18277 rc = SQLITE_ERROR;
18278 }
18279
18280 return rc;
18281}
18282
18283static int sqlite3Fts5GetTokenizer(
18284 Fts5Global *pGlobal,
18285 const char **azArg,
18286 int nArg,
18287 Fts5Config *pConfig,
18288 char **pzErr
18289){
18290 Fts5TokenizerModule *pMod;
18291 int rc = SQLITE_OK;
18292
18293 pMod = fts5LocateTokenizer(pGlobal, nArg==0 ? 0 : azArg[0]);
18294 if( pMod==0 ){
18295 assert( nArg>0 );
18296 rc = SQLITE_ERROR;
18297 *pzErr = sqlite3_mprintf("no such tokenizer: %s", azArg[0]);
18298 }else{
18299 rc = pMod->x.xCreate(
18300 pMod->pUserData, (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->pTok
18301 );
18302 pConfig->pTokApi = &pMod->x;
18303 if( rc!=SQLITE_OK ){
18304 if( pzErr ) *pzErr = sqlite3_mprintf("error in tokenizer constructor");
18305 }else{
18306 pConfig->ePattern = sqlite3Fts5TokenizerPattern(
18307 pMod->x.xCreate, pConfig->pTok
18308 );
18309 }
18310 }
18311
18312 if( rc!=SQLITE_OK ){
18313 pConfig->pTokApi = 0;
18314 pConfig->pTok = 0;
18315 }
18316
18317 return rc;
18318}
18319
18320static void fts5ModuleDestroy(void *pCtx){
18321 Fts5TokenizerModule *pTok, *pNextTok;
18322 Fts5Auxiliary *pAux, *pNextAux;
18323 Fts5Global *pGlobal = (Fts5Global*)pCtx;
18324
18325 for(pAux=pGlobal->pAux; pAux; pAux=pNextAux){
18326 pNextAux = pAux->pNext;
18327 if( pAux->xDestroy ) pAux->xDestroy(pAux->pUserData);
18328 sqlite3_free(pAux);
18329 }
18330
18331 for(pTok=pGlobal->pTok; pTok; pTok=pNextTok){
18332 pNextTok = pTok->pNext;
18333 if( pTok->xDestroy ) pTok->xDestroy(pTok->pUserData);
18334 sqlite3_free(pTok);
18335 }
18336
18337 sqlite3_free(pGlobal);
18338}
18339
18340static void fts5Fts5Func(
18341 sqlite3_context *pCtx, /* Function call context */
18342 int nArg, /* Number of args */
18343 sqlite3_value **apArg /* Function arguments */
18344){
18345 Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx);
18346 fts5_api **ppApi;
18347 UNUSED_PARAM(nArg);
18348 assert( nArg==1 );
18349 ppApi = (fts5_api**)sqlite3_value_pointer(apArg[0], "fts5_api_ptr");
18350 if( ppApi ) *ppApi = &pGlobal->api;
18351}
18352
18353/*
18354** Implementation of fts5_source_id() function.
18355*/
18356static void fts5SourceIdFunc(
18357 sqlite3_context *pCtx, /* Function call context */
18358 int nArg, /* Number of args */
18359 sqlite3_value **apUnused /* Function arguments */
18360){
18361 assert( nArg==0 );
18362 UNUSED_PARAM2(nArg, apUnused);
18363 sqlite3_result_text(pCtx, "fts5: 2022-12-28 14:03:47 df5c253c0b3dd24916e4ec7cf77d3db5294cc9fd45ae7b9c5e82ad8197f38a24", -1, SQLITE_TRANSIENT);
18364}
18365
18366/*
18367** Return true if zName is the extension on one of the shadow tables used
18368** by this module.
18369*/
18370static int fts5ShadowName(const char *zName){
18371 static const char *azName[] = {
18372 "config", "content", "data", "docsize", "idx"
18373 };
18374 unsigned int i;
18375 for(i=0; i<sizeof(azName)/sizeof(azName[0]); i++){
18376 if( sqlite3_stricmp(zName, azName[i])==0 ) return 1;
18377 }
18378 return 0;
18379}
18380
18381static int fts5Init(sqlite3 *db){
18382 static const sqlite3_module fts5Mod = {
18383 /* iVersion */ 3,
18384 /* xCreate */ fts5CreateMethod,
18385 /* xConnect */ fts5ConnectMethod,
18386 /* xBestIndex */ fts5BestIndexMethod,
18387 /* xDisconnect */ fts5DisconnectMethod,
18388 /* xDestroy */ fts5DestroyMethod,
18389 /* xOpen */ fts5OpenMethod,
18390 /* xClose */ fts5CloseMethod,
18391 /* xFilter */ fts5FilterMethod,
18392 /* xNext */ fts5NextMethod,
18393 /* xEof */ fts5EofMethod,
18394 /* xColumn */ fts5ColumnMethod,
18395 /* xRowid */ fts5RowidMethod,
18396 /* xUpdate */ fts5UpdateMethod,
18397 /* xBegin */ fts5BeginMethod,
18398 /* xSync */ fts5SyncMethod,
18399 /* xCommit */ fts5CommitMethod,
18400 /* xRollback */ fts5RollbackMethod,
18401 /* xFindFunction */ fts5FindFunctionMethod,
18402 /* xRename */ fts5RenameMethod,
18403 /* xSavepoint */ fts5SavepointMethod,
18404 /* xRelease */ fts5ReleaseMethod,
18405 /* xRollbackTo */ fts5RollbackToMethod,
18406 /* xShadowName */ fts5ShadowName
18407 };
18408
18409 int rc;
18410 Fts5Global *pGlobal = 0;
18411
18412 pGlobal = (Fts5Global*)sqlite3_malloc(sizeof(Fts5Global));
18413 if( pGlobal==0 ){
18414 rc = SQLITE_NOMEM;
18415 }else{
18416 void *p = (void*)pGlobal;
18417 memset(pGlobal, 0, sizeof(Fts5Global));
18418 pGlobal->db = db;
18419 pGlobal->api.iVersion = 2;
18420 pGlobal->api.xCreateFunction = fts5CreateAux;
18421 pGlobal->api.xCreateTokenizer = fts5CreateTokenizer;
18422 pGlobal->api.xFindTokenizer = fts5FindTokenizer;
18423 rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy);
18424 if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db);
18425 if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db);
18426 if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(&pGlobal->api);
18427 if( rc==SQLITE_OK ) rc = sqlite3Fts5TokenizerInit(&pGlobal->api);
18428 if( rc==SQLITE_OK ) rc = sqlite3Fts5VocabInit(pGlobal, db);
18429 if( rc==SQLITE_OK ){
18430 rc = sqlite3_create_function(
18431 db, "fts5", 1, SQLITE_UTF8, p, fts5Fts5Func, 0, 0
18432 );
18433 }
18434 if( rc==SQLITE_OK ){
18435 rc = sqlite3_create_function(
18436 db, "fts5_source_id", 0, SQLITE_UTF8, p, fts5SourceIdFunc, 0, 0
18437 );
18438 }
18439 }
18440
18441 /* If SQLITE_FTS5_ENABLE_TEST_MI is defined, assume that the file
18442 ** fts5_test_mi.c is compiled and linked into the executable. And call
18443 ** its entry point to enable the matchinfo() demo. */
18444#ifdef SQLITE_FTS5_ENABLE_TEST_MI
18445 if( rc==SQLITE_OK ){
18446 extern int sqlite3Fts5TestRegisterMatchinfo(sqlite3*);
18447 rc = sqlite3Fts5TestRegisterMatchinfo(db);
18448 }
18449#endif
18450
18451 return rc;
18452}
18453
18454/*
18455** The following functions are used to register the module with SQLite. If
18456** this module is being built as part of the SQLite core (SQLITE_CORE is
18457** defined), then sqlite3_open() will call sqlite3Fts5Init() directly.
18458**
18459** Or, if this module is being built as a loadable extension,
18460** sqlite3Fts5Init() is omitted and the two standard entry points
18461** sqlite3_fts_init() and sqlite3_fts5_init() defined instead.
18462*/
18463#ifndef SQLITE_CORE
18464#ifdef _WIN32
18465__declspec(dllexport)
18466#endif
18467int sqlite3_fts_init(
18468 sqlite3 *db,
18469 char **pzErrMsg,
18470 const sqlite3_api_routines *pApi
18471){
18472 SQLITE_EXTENSION_INIT2(pApi);
18473 (void)pzErrMsg; /* Unused parameter */
18474 return fts5Init(db);
18475}
18476
18477#ifdef _WIN32
18478__declspec(dllexport)
18479#endif
18480int sqlite3_fts5_init(
18481 sqlite3 *db,
18482 char **pzErrMsg,
18483 const sqlite3_api_routines *pApi
18484){
18485 SQLITE_EXTENSION_INIT2(pApi);
18486 (void)pzErrMsg; /* Unused parameter */
18487 return fts5Init(db);
18488}
18489#else
18490int sqlite3Fts5Init(sqlite3 *db){
18491 return fts5Init(db);
18492}
18493#endif
18494
18495#line 1 "fts5_storage.c"
18496/*
18497** 2014 May 31
18498**
18499** The author disclaims copyright to this source code. In place of
18500** a legal notice, here is a blessing:
18501**
18502** May you do good and not evil.
18503** May you find forgiveness for yourself and forgive others.
18504** May you share freely, never taking more than you give.
18505**
18506******************************************************************************
18507**
18508*/
18509
18510
18511
18512/* #include "fts5Int.h" */
18513
18514struct Fts5Storage {
18515 Fts5Config *pConfig;
18516 Fts5Index *pIndex;
18517 int bTotalsValid; /* True if nTotalRow/aTotalSize[] are valid */
18518 i64 nTotalRow; /* Total number of rows in FTS table */
18519 i64 *aTotalSize; /* Total sizes of each column */
18520 sqlite3_stmt *aStmt[11];
18521};
18522
18523
18524#if FTS5_STMT_SCAN_ASC!=0
18525# error "FTS5_STMT_SCAN_ASC mismatch"
18526#endif
18527#if FTS5_STMT_SCAN_DESC!=1
18528# error "FTS5_STMT_SCAN_DESC mismatch"
18529#endif
18530#if FTS5_STMT_LOOKUP!=2
18531# error "FTS5_STMT_LOOKUP mismatch"
18532#endif
18533
18534#define FTS5_STMT_INSERT_CONTENT 3
18535#define FTS5_STMT_REPLACE_CONTENT 4
18536#define FTS5_STMT_DELETE_CONTENT 5
18537#define FTS5_STMT_REPLACE_DOCSIZE 6
18538#define FTS5_STMT_DELETE_DOCSIZE 7
18539#define FTS5_STMT_LOOKUP_DOCSIZE 8
18540#define FTS5_STMT_REPLACE_CONFIG 9
18541#define FTS5_STMT_SCAN 10
18542
18543/*
18544** Prepare the two insert statements - Fts5Storage.pInsertContent and
18545** Fts5Storage.pInsertDocsize - if they have not already been prepared.
18546** Return SQLITE_OK if successful, or an SQLite error code if an error
18547** occurs.
18548*/
18549static int fts5StorageGetStmt(
18550 Fts5Storage *p, /* Storage handle */
18551 int eStmt, /* FTS5_STMT_XXX constant */
18552 sqlite3_stmt **ppStmt, /* OUT: Prepared statement handle */
18553 char **pzErrMsg /* OUT: Error message (if any) */
18554){
18555 int rc = SQLITE_OK;
18556
18557 /* If there is no %_docsize table, there should be no requests for
18558 ** statements to operate on it. */
18559 assert( p->pConfig->bColumnsize || (
18560 eStmt!=FTS5_STMT_REPLACE_DOCSIZE
18561 && eStmt!=FTS5_STMT_DELETE_DOCSIZE
18562 && eStmt!=FTS5_STMT_LOOKUP_DOCSIZE
18563 ));
18564
18565 assert( eStmt>=0 && eStmt<ArraySize(p->aStmt) );
18566 if( p->aStmt[eStmt]==0 ){
18567 const char *azStmt[] = {
18568 "SELECT %s FROM %s T WHERE T.%Q >= ? AND T.%Q <= ? ORDER BY T.%Q ASC",
18569 "SELECT %s FROM %s T WHERE T.%Q <= ? AND T.%Q >= ? ORDER BY T.%Q DESC",
18570 "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP */
18571
18572 "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */
18573 "REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */
18574 "DELETE FROM %Q.'%q_content' WHERE id=?", /* DELETE_CONTENT */
18575 "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)", /* REPLACE_DOCSIZE */
18576 "DELETE FROM %Q.'%q_docsize' WHERE id=?", /* DELETE_DOCSIZE */
18577
18578 "SELECT sz FROM %Q.'%q_docsize' WHERE id=?", /* LOOKUP_DOCSIZE */
18579
18580 "REPLACE INTO %Q.'%q_config' VALUES(?,?)", /* REPLACE_CONFIG */
18581 "SELECT %s FROM %s AS T", /* SCAN */
18582 };
18583 Fts5Config *pC = p->pConfig;
18584 char *zSql = 0;
18585
18586 switch( eStmt ){
18587 case FTS5_STMT_SCAN:
18588 zSql = sqlite3_mprintf(azStmt[eStmt],
18589 pC->zContentExprlist, pC->zContent
18590 );
18591 break;
18592
18593 case FTS5_STMT_SCAN_ASC:
18594 case FTS5_STMT_SCAN_DESC:
18595 zSql = sqlite3_mprintf(azStmt[eStmt], pC->zContentExprlist,
18596 pC->zContent, pC->zContentRowid, pC->zContentRowid,
18597 pC->zContentRowid
18598 );
18599 break;
18600
18601 case FTS5_STMT_LOOKUP:
18602 zSql = sqlite3_mprintf(azStmt[eStmt],
18603 pC->zContentExprlist, pC->zContent, pC->zContentRowid
18604 );
18605 break;
18606
18607 case FTS5_STMT_INSERT_CONTENT:
18608 case FTS5_STMT_REPLACE_CONTENT: {
18609 int nCol = pC->nCol + 1;
18610 char *zBind;
18611 int i;
18612
18613 zBind = sqlite3_malloc64(1 + nCol*2);
18614 if( zBind ){
18615 for(i=0; i<nCol; i++){
18616 zBind[i*2] = '?';
18617 zBind[i*2 + 1] = ',';
18618 }
18619 zBind[i*2-1] = '\0';
18620 zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName, zBind);
18621 sqlite3_free(zBind);
18622 }
18623 break;
18624 }
18625
18626 default:
18627 zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName);
18628 break;
18629 }
18630
18631 if( zSql==0 ){
18632 rc = SQLITE_NOMEM;
18633 }else{
18634 int f = SQLITE_PREPARE_PERSISTENT;
18635 if( eStmt>FTS5_STMT_LOOKUP ) f |= SQLITE_PREPARE_NO_VTAB;
18636 p->pConfig->bLock++;
18637 rc = sqlite3_prepare_v3(pC->db, zSql, -1, f, &p->aStmt[eStmt], 0);
18638 p->pConfig->bLock--;
18639 sqlite3_free(zSql);
18640 if( rc!=SQLITE_OK && pzErrMsg ){
18641 *pzErrMsg = sqlite3_mprintf("%s", sqlite3_errmsg(pC->db));
18642 }
18643 }
18644 }
18645
18646 *ppStmt = p->aStmt[eStmt];
18647 sqlite3_reset(*ppStmt);
18648 return rc;
18649}
18650
18651
18652static int fts5ExecPrintf(
18653 sqlite3 *db,
18654 char **pzErr,
18655 const char *zFormat,
18656 ...
18657){
18658 int rc;
18659 va_list ap; /* ... printf arguments */
18660 char *zSql;
18661
18662 va_start(ap, zFormat);
18663 zSql = sqlite3_vmprintf(zFormat, ap);
18664
18665 if( zSql==0 ){
18666 rc = SQLITE_NOMEM;
18667 }else{
18668 rc = sqlite3_exec(db, zSql, 0, 0, pzErr);
18669 sqlite3_free(zSql);
18670 }
18671
18672 va_end(ap);
18673 return rc;
18674}
18675
18676/*
18677** Drop all shadow tables. Return SQLITE_OK if successful or an SQLite error
18678** code otherwise.
18679*/
18680static int sqlite3Fts5DropAll(Fts5Config *pConfig){
18681 int rc = fts5ExecPrintf(pConfig->db, 0,
18682 "DROP TABLE IF EXISTS %Q.'%q_data';"
18683 "DROP TABLE IF EXISTS %Q.'%q_idx';"
18684 "DROP TABLE IF EXISTS %Q.'%q_config';",
18685 pConfig->zDb, pConfig->zName,
18686 pConfig->zDb, pConfig->zName,
18687 pConfig->zDb, pConfig->zName
18688 );
18689 if( rc==SQLITE_OK && pConfig->bColumnsize ){
18690 rc = fts5ExecPrintf(pConfig->db, 0,
18691 "DROP TABLE IF EXISTS %Q.'%q_docsize';",
18692 pConfig->zDb, pConfig->zName
18693 );
18694 }
18695 if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
18696 rc = fts5ExecPrintf(pConfig->db, 0,
18697 "DROP TABLE IF EXISTS %Q.'%q_content';",
18698 pConfig->zDb, pConfig->zName
18699 );
18700 }
18701 return rc;
18702}
18703
18704static void fts5StorageRenameOne(
18705 Fts5Config *pConfig, /* Current FTS5 configuration */
18706 int *pRc, /* IN/OUT: Error code */
18707 const char *zTail, /* Tail of table name e.g. "data", "config" */
18708 const char *zName /* New name of FTS5 table */
18709){
18710 if( *pRc==SQLITE_OK ){
18711 *pRc = fts5ExecPrintf(pConfig->db, 0,
18712 "ALTER TABLE %Q.'%q_%s' RENAME TO '%q_%s';",
18713 pConfig->zDb, pConfig->zName, zTail, zName, zTail
18714 );
18715 }
18716}
18717
18718static int sqlite3Fts5StorageRename(Fts5Storage *pStorage, const char *zName){
18719 Fts5Config *pConfig = pStorage->pConfig;
18720 int rc = sqlite3Fts5StorageSync(pStorage);
18721
18722 fts5StorageRenameOne(pConfig, &rc, "data", zName);
18723 fts5StorageRenameOne(pConfig, &rc, "idx", zName);
18724 fts5StorageRenameOne(pConfig, &rc, "config", zName);
18725 if( pConfig->bColumnsize ){
18726 fts5StorageRenameOne(pConfig, &rc, "docsize", zName);
18727 }
18728 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
18729 fts5StorageRenameOne(pConfig, &rc, "content", zName);
18730 }
18731 return rc;
18732}
18733
18734/*
18735** Create the shadow table named zPost, with definition zDefn. Return
18736** SQLITE_OK if successful, or an SQLite error code otherwise.
18737*/
18738static int sqlite3Fts5CreateTable(
18739 Fts5Config *pConfig, /* FTS5 configuration */
18740 const char *zPost, /* Shadow table to create (e.g. "content") */
18741 const char *zDefn, /* Columns etc. for shadow table */
18742 int bWithout, /* True for without rowid */
18743 char **pzErr /* OUT: Error message */
18744){
18745 int rc;
18746 char *zErr = 0;
18747
18748 rc = fts5ExecPrintf(pConfig->db, &zErr, "CREATE TABLE %Q.'%q_%q'(%s)%s",
18749 pConfig->zDb, pConfig->zName, zPost, zDefn,
18750#ifndef SQLITE_FTS5_NO_WITHOUT_ROWID
18751 bWithout?" WITHOUT ROWID":
18752#endif
18753 ""
18754 );
18755 if( zErr ){
18756 *pzErr = sqlite3_mprintf(
18757 "fts5: error creating shadow table %q_%s: %s",
18758 pConfig->zName, zPost, zErr
18759 );
18760 sqlite3_free(zErr);
18761 }
18762
18763 return rc;
18764}
18765
18766/*
18767** Open a new Fts5Index handle. If the bCreate argument is true, create
18768** and initialize the underlying tables
18769**
18770** If successful, set *pp to point to the new object and return SQLITE_OK.
18771** Otherwise, set *pp to NULL and return an SQLite error code.
18772*/
18773static int sqlite3Fts5StorageOpen(
18774 Fts5Config *pConfig,
18775 Fts5Index *pIndex,
18776 int bCreate,
18777 Fts5Storage **pp,
18778 char **pzErr /* OUT: Error message */
18779){
18780 int rc = SQLITE_OK;
18781 Fts5Storage *p; /* New object */
18782 sqlite3_int64 nByte; /* Bytes of space to allocate */
18783
18784 nByte = sizeof(Fts5Storage) /* Fts5Storage object */
18785 + pConfig->nCol * sizeof(i64); /* Fts5Storage.aTotalSize[] */
18786 *pp = p = (Fts5Storage*)sqlite3_malloc64(nByte);
18787 if( !p ) return SQLITE_NOMEM;
18788
18789 memset(p, 0, (size_t)nByte);
18790 p->aTotalSize = (i64*)&p[1];
18791 p->pConfig = pConfig;
18792 p->pIndex = pIndex;
18793
18794 if( bCreate ){
18795 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
18796 int nDefn = 32 + pConfig->nCol*10;
18797 char *zDefn = sqlite3_malloc64(32 + (sqlite3_int64)pConfig->nCol * 10);
18798 if( zDefn==0 ){
18799 rc = SQLITE_NOMEM;
18800 }else{
18801 int i;
18802 int iOff;
18803 sqlite3_snprintf(nDefn, zDefn, "id INTEGER PRIMARY KEY");
18804 iOff = (int)strlen(zDefn);
18805 for(i=0; i<pConfig->nCol; i++){
18806 sqlite3_snprintf(nDefn-iOff, &zDefn[iOff], ", c%d", i);
18807 iOff += (int)strlen(&zDefn[iOff]);
18808 }
18809 rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, 0, pzErr);
18810 }
18811 sqlite3_free(zDefn);
18812 }
18813
18814 if( rc==SQLITE_OK && pConfig->bColumnsize ){
18815 rc = sqlite3Fts5CreateTable(
18816 pConfig, "docsize", "id INTEGER PRIMARY KEY, sz BLOB", 0, pzErr
18817 );
18818 }
18819 if( rc==SQLITE_OK ){
18820 rc = sqlite3Fts5CreateTable(
18821 pConfig, "config", "k PRIMARY KEY, v", 1, pzErr
18822 );
18823 }
18824 if( rc==SQLITE_OK ){
18825 rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION);
18826 }
18827 }
18828
18829 if( rc ){
18830 sqlite3Fts5StorageClose(p);
18831 *pp = 0;
18832 }
18833 return rc;
18834}
18835
18836/*
18837** Close a handle opened by an earlier call to sqlite3Fts5StorageOpen().
18838*/
18839static int sqlite3Fts5StorageClose(Fts5Storage *p){
18840 int rc = SQLITE_OK;
18841 if( p ){
18842 int i;
18843
18844 /* Finalize all SQL statements */
18845 for(i=0; i<ArraySize(p->aStmt); i++){
18846 sqlite3_finalize(p->aStmt[i]);
18847 }
18848
18849 sqlite3_free(p);
18850 }
18851 return rc;
18852}
18853
18854typedef struct Fts5InsertCtx Fts5InsertCtx;
18855struct Fts5InsertCtx {
18856 Fts5Storage *pStorage;
18857 int iCol;
18858 int szCol; /* Size of column value in tokens */
18859};
18860
18861/*
18862** Tokenization callback used when inserting tokens into the FTS index.
18863*/
18864static int fts5StorageInsertCallback(
18865 void *pContext, /* Pointer to Fts5InsertCtx object */
18866 int tflags,
18867 const char *pToken, /* Buffer containing token */
18868 int nToken, /* Size of token in bytes */
18869 int iUnused1, /* Start offset of token */
18870 int iUnused2 /* End offset of token */
18871){
18872 Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext;
18873 Fts5Index *pIdx = pCtx->pStorage->pIndex;
18874 UNUSED_PARAM2(iUnused1, iUnused2);
18875 if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
18876 if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
18877 pCtx->szCol++;
18878 }
18879 return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken);
18880}
18881
18882/*
18883** If a row with rowid iDel is present in the %_content table, add the
18884** delete-markers to the FTS index necessary to delete it. Do not actually
18885** remove the %_content row at this time though.
18886*/
18887static int fts5StorageDeleteFromIndex(
18888 Fts5Storage *p,
18889 i64 iDel,
18890 sqlite3_value **apVal
18891){
18892 Fts5Config *pConfig = p->pConfig;
18893 sqlite3_stmt *pSeek = 0; /* SELECT to read row iDel from %_data */
18894 int rc; /* Return code */
18895 int rc2; /* sqlite3_reset() return code */
18896 int iCol;
18897 Fts5InsertCtx ctx;
18898
18899 if( apVal==0 ){
18900 rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP, &pSeek, 0);
18901 if( rc!=SQLITE_OK ) return rc;
18902 sqlite3_bind_int64(pSeek, 1, iDel);
18903 if( sqlite3_step(pSeek)!=SQLITE_ROW ){
18904 return sqlite3_reset(pSeek);
18905 }
18906 }
18907
18908 ctx.pStorage = p;
18909 ctx.iCol = -1;
18910 rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 1, iDel);
18911 for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){
18912 if( pConfig->abUnindexed[iCol-1]==0 ){
18913 const char *zText;
18914 int nText;
18915 assert( pSeek==0 || apVal==0 );
18916 assert( pSeek!=0 || apVal!=0 );
18917 if( pSeek ){
18918 zText = (const char*)sqlite3_column_text(pSeek, iCol);
18919 nText = sqlite3_column_bytes(pSeek, iCol);
18920 }else if( ALWAYS(apVal) ){
18921 zText = (const char*)sqlite3_value_text(apVal[iCol-1]);
18922 nText = sqlite3_value_bytes(apVal[iCol-1]);
18923 }else{
18924 continue;
18925 }
18926 ctx.szCol = 0;
18927 rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT,
18928 zText, nText, (void*)&ctx, fts5StorageInsertCallback
18929 );
18930 p->aTotalSize[iCol-1] -= (i64)ctx.szCol;
18931 if( p->aTotalSize[iCol-1]<0 ){
18932 rc = FTS5_CORRUPT;
18933 }
18934 }
18935 }
18936 if( rc==SQLITE_OK && p->nTotalRow<1 ){
18937 rc = FTS5_CORRUPT;
18938 }else{
18939 p->nTotalRow--;
18940 }
18941
18942 rc2 = sqlite3_reset(pSeek);
18943 if( rc==SQLITE_OK ) rc = rc2;
18944 return rc;
18945}
18946
18947
18948/*
18949** Insert a record into the %_docsize table. Specifically, do:
18950**
18951** INSERT OR REPLACE INTO %_docsize(id, sz) VALUES(iRowid, pBuf);
18952**
18953** If there is no %_docsize table (as happens if the columnsize=0 option
18954** is specified when the FTS5 table is created), this function is a no-op.
18955*/
18956static int fts5StorageInsertDocsize(
18957 Fts5Storage *p, /* Storage module to write to */
18958 i64 iRowid, /* id value */
18959 Fts5Buffer *pBuf /* sz value */
18960){
18961 int rc = SQLITE_OK;
18962 if( p->pConfig->bColumnsize ){
18963 sqlite3_stmt *pReplace = 0;
18964 rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0);
18965 if( rc==SQLITE_OK ){
18966 sqlite3_bind_int64(pReplace, 1, iRowid);
18967 sqlite3_bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC);
18968 sqlite3_step(pReplace);
18969 rc = sqlite3_reset(pReplace);
18970 sqlite3_bind_null(pReplace, 2);
18971 }
18972 }
18973 return rc;
18974}
18975
18976/*
18977** Load the contents of the "averages" record from disk into the
18978** p->nTotalRow and p->aTotalSize[] variables. If successful, and if
18979** argument bCache is true, set the p->bTotalsValid flag to indicate
18980** that the contents of aTotalSize[] and nTotalRow are valid until
18981** further notice.
18982**
18983** Return SQLITE_OK if successful, or an SQLite error code if an error
18984** occurs.
18985*/
18986static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){
18987 int rc = SQLITE_OK;
18988 if( p->bTotalsValid==0 ){
18989 rc = sqlite3Fts5IndexGetAverages(p->pIndex, &p->nTotalRow, p->aTotalSize);
18990 p->bTotalsValid = bCache;
18991 }
18992 return rc;
18993}
18994
18995/*
18996** Store the current contents of the p->nTotalRow and p->aTotalSize[]
18997** variables in the "averages" record on disk.
18998**
18999** Return SQLITE_OK if successful, or an SQLite error code if an error
19000** occurs.
19001*/
19002static int fts5StorageSaveTotals(Fts5Storage *p){
19003 int nCol = p->pConfig->nCol;
19004 int i;
19005 Fts5Buffer buf;
19006 int rc = SQLITE_OK;
19007 memset(&buf, 0, sizeof(buf));
19008
19009 sqlite3Fts5BufferAppendVarint(&rc, &buf, p->nTotalRow);
19010 for(i=0; i<nCol; i++){
19011 sqlite3Fts5BufferAppendVarint(&rc, &buf, p->aTotalSize[i]);
19012 }
19013 if( rc==SQLITE_OK ){
19014 rc = sqlite3Fts5IndexSetAverages(p->pIndex, buf.p, buf.n);
19015 }
19016 sqlite3_free(buf.p);
19017
19018 return rc;
19019}
19020
19021/*
19022** Remove a row from the FTS table.
19023*/
19024static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel, sqlite3_value **apVal){
19025 Fts5Config *pConfig = p->pConfig;
19026 int rc;
19027 sqlite3_stmt *pDel = 0;
19028
19029 assert( pConfig->eContent!=FTS5_CONTENT_NORMAL || apVal==0 );
19030 rc = fts5StorageLoadTotals(p, 1);
19031
19032 /* Delete the index records */
19033 if( rc==SQLITE_OK ){
19034 rc = fts5StorageDeleteFromIndex(p, iDel, apVal);
19035 }
19036
19037 /* Delete the %_docsize record */
19038 if( rc==SQLITE_OK && pConfig->bColumnsize ){
19039 rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel, 0);
19040 if( rc==SQLITE_OK ){
19041 sqlite3_bind_int64(pDel, 1, iDel);
19042 sqlite3_step(pDel);
19043 rc = sqlite3_reset(pDel);
19044 }
19045 }
19046
19047 /* Delete the %_content record */
19048 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
19049 if( rc==SQLITE_OK ){
19050 rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_CONTENT, &pDel, 0);
19051 }
19052 if( rc==SQLITE_OK ){
19053 sqlite3_bind_int64(pDel, 1, iDel);
19054 sqlite3_step(pDel);
19055 rc = sqlite3_reset(pDel);
19056 }
19057 }
19058
19059 return rc;
19060}
19061
19062/*
19063** Delete all entries in the FTS5 index.
19064*/
19065static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p){
19066 Fts5Config *pConfig = p->pConfig;
19067 int rc;
19068
19069 p->bTotalsValid = 0;
19070
19071 /* Delete the contents of the %_data and %_docsize tables. */
19072 rc = fts5ExecPrintf(pConfig->db, 0,
19073 "DELETE FROM %Q.'%q_data';"
19074 "DELETE FROM %Q.'%q_idx';",
19075 pConfig->zDb, pConfig->zName,
19076 pConfig->zDb, pConfig->zName
19077 );
19078 if( rc==SQLITE_OK && pConfig->bColumnsize ){
19079 rc = fts5ExecPrintf(pConfig->db, 0,
19080 "DELETE FROM %Q.'%q_docsize';",
19081 pConfig->zDb, pConfig->zName
19082 );
19083 }
19084
19085 /* Reinitialize the %_data table. This call creates the initial structure
19086 ** and averages records. */
19087 if( rc==SQLITE_OK ){
19088 rc = sqlite3Fts5IndexReinit(p->pIndex);
19089 }
19090 if( rc==SQLITE_OK ){
19091 rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION);
19092 }
19093 return rc;
19094}
19095
19096static int sqlite3Fts5StorageRebuild(Fts5Storage *p){
19097 Fts5Buffer buf = {0,0,0};
19098 Fts5Config *pConfig = p->pConfig;
19099 sqlite3_stmt *pScan = 0;
19100 Fts5InsertCtx ctx;
19101 int rc, rc2;
19102
19103 memset(&ctx, 0, sizeof(Fts5InsertCtx));
19104 ctx.pStorage = p;
19105 rc = sqlite3Fts5StorageDeleteAll(p);
19106 if( rc==SQLITE_OK ){
19107 rc = fts5StorageLoadTotals(p, 1);
19108 }
19109
19110 if( rc==SQLITE_OK ){
19111 rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0);
19112 }
19113
19114 while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pScan) ){
19115 i64 iRowid = sqlite3_column_int64(pScan, 0);
19116
19117 sqlite3Fts5BufferZero(&buf);
19118 rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid);
19119 for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
19120 ctx.szCol = 0;
19121 if( pConfig->abUnindexed[ctx.iCol]==0 ){
19122 const char *zText = (const char*)sqlite3_column_text(pScan, ctx.iCol+1);
19123 int nText = sqlite3_column_bytes(pScan, ctx.iCol+1);
19124 rc = sqlite3Fts5Tokenize(pConfig,
19125 FTS5_TOKENIZE_DOCUMENT,
19126 zText, nText,
19127 (void*)&ctx,
19128 fts5StorageInsertCallback
19129 );
19130 }
19131 sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
19132 p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
19133 }
19134 p->nTotalRow++;
19135
19136 if( rc==SQLITE_OK ){
19137 rc = fts5StorageInsertDocsize(p, iRowid, &buf);
19138 }
19139 }
19140 sqlite3_free(buf.p);
19141 rc2 = sqlite3_reset(pScan);
19142 if( rc==SQLITE_OK ) rc = rc2;
19143
19144 /* Write the averages record */
19145 if( rc==SQLITE_OK ){
19146 rc = fts5StorageSaveTotals(p);
19147 }
19148 return rc;
19149}
19150
19151static int sqlite3Fts5StorageOptimize(Fts5Storage *p){
19152 return sqlite3Fts5IndexOptimize(p->pIndex);
19153}
19154
19155static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge){
19156 return sqlite3Fts5IndexMerge(p->pIndex, nMerge);
19157}
19158
19159static int sqlite3Fts5StorageReset(Fts5Storage *p){
19160 return sqlite3Fts5IndexReset(p->pIndex);
19161}
19162
19163/*
19164** Allocate a new rowid. This is used for "external content" tables when
19165** a NULL value is inserted into the rowid column. The new rowid is allocated
19166** by inserting a dummy row into the %_docsize table. The dummy will be
19167** overwritten later.
19168**
19169** If the %_docsize table does not exist, SQLITE_MISMATCH is returned. In
19170** this case the user is required to provide a rowid explicitly.
19171*/
19172static int fts5StorageNewRowid(Fts5Storage *p, i64 *piRowid){
19173 int rc = SQLITE_MISMATCH;
19174 if( p->pConfig->bColumnsize ){
19175 sqlite3_stmt *pReplace = 0;
19176 rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0);
19177 if( rc==SQLITE_OK ){
19178 sqlite3_bind_null(pReplace, 1);
19179 sqlite3_bind_null(pReplace, 2);
19180 sqlite3_step(pReplace);
19181 rc = sqlite3_reset(pReplace);
19182 }
19183 if( rc==SQLITE_OK ){
19184 *piRowid = sqlite3_last_insert_rowid(p->pConfig->db);
19185 }
19186 }
19187 return rc;
19188}
19189
19190/*
19191** Insert a new row into the FTS content table.
19192*/
19193static int sqlite3Fts5StorageContentInsert(
19194 Fts5Storage *p,
19195 sqlite3_value **apVal,
19196 i64 *piRowid
19197){
19198 Fts5Config *pConfig = p->pConfig;
19199 int rc = SQLITE_OK;
19200
19201 /* Insert the new row into the %_content table. */
19202 if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){
19203 if( sqlite3_value_type(apVal[1])==SQLITE_INTEGER ){
19204 *piRowid = sqlite3_value_int64(apVal[1]);
19205 }else{
19206 rc = fts5StorageNewRowid(p, piRowid);
19207 }
19208 }else{
19209 sqlite3_stmt *pInsert = 0; /* Statement to write %_content table */
19210 int i; /* Counter variable */
19211 rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT, &pInsert, 0);
19212 for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){
19213 rc = sqlite3_bind_value(pInsert, i, apVal[i]);
19214 }
19215 if( rc==SQLITE_OK ){
19216 sqlite3_step(pInsert);
19217 rc = sqlite3_reset(pInsert);
19218 }
19219 *piRowid = sqlite3_last_insert_rowid(pConfig->db);
19220 }
19221
19222 return rc;
19223}
19224
19225/*
19226** Insert new entries into the FTS index and %_docsize table.
19227*/
19228static int sqlite3Fts5StorageIndexInsert(
19229 Fts5Storage *p,
19230 sqlite3_value **apVal,
19231 i64 iRowid
19232){
19233 Fts5Config *pConfig = p->pConfig;
19234 int rc = SQLITE_OK; /* Return code */
19235 Fts5InsertCtx ctx; /* Tokenization callback context object */
19236 Fts5Buffer buf; /* Buffer used to build up %_docsize blob */
19237
19238 memset(&buf, 0, sizeof(Fts5Buffer));
19239 ctx.pStorage = p;
19240 rc = fts5StorageLoadTotals(p, 1);
19241
19242 if( rc==SQLITE_OK ){
19243 rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid);
19244 }
19245 for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
19246 ctx.szCol = 0;
19247 if( pConfig->abUnindexed[ctx.iCol]==0 ){
19248 const char *zText = (const char*)sqlite3_value_text(apVal[ctx.iCol+2]);
19249 int nText = sqlite3_value_bytes(apVal[ctx.iCol+2]);
19250 rc = sqlite3Fts5Tokenize(pConfig,
19251 FTS5_TOKENIZE_DOCUMENT,
19252 zText, nText,
19253 (void*)&ctx,
19254 fts5StorageInsertCallback
19255 );
19256 }
19257 sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
19258 p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
19259 }
19260 p->nTotalRow++;
19261
19262 /* Write the %_docsize record */
19263 if( rc==SQLITE_OK ){
19264 rc = fts5StorageInsertDocsize(p, iRowid, &buf);
19265 }
19266 sqlite3_free(buf.p);
19267
19268 return rc;
19269}
19270
19271static int fts5StorageCount(Fts5Storage *p, const char *zSuffix, i64 *pnRow){
19272 Fts5Config *pConfig = p->pConfig;
19273 char *zSql;
19274 int rc;
19275
19276 zSql = sqlite3_mprintf("SELECT count(*) FROM %Q.'%q_%s'",
19277 pConfig->zDb, pConfig->zName, zSuffix
19278 );
19279 if( zSql==0 ){
19280 rc = SQLITE_NOMEM;
19281 }else{
19282 sqlite3_stmt *pCnt = 0;
19283 rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pCnt, 0);
19284 if( rc==SQLITE_OK ){
19285 if( SQLITE_ROW==sqlite3_step(pCnt) ){
19286 *pnRow = sqlite3_column_int64(pCnt, 0);
19287 }
19288 rc = sqlite3_finalize(pCnt);
19289 }
19290 }
19291
19292 sqlite3_free(zSql);
19293 return rc;
19294}
19295
19296/*
19297** Context object used by sqlite3Fts5StorageIntegrity().
19298*/
19299typedef struct Fts5IntegrityCtx Fts5IntegrityCtx;
19300struct Fts5IntegrityCtx {
19301 i64 iRowid;
19302 int iCol;
19303 int szCol;
19304 u64 cksum;
19305 Fts5Termset *pTermset;
19306 Fts5Config *pConfig;
19307};
19308
19309
19310/*
19311** Tokenization callback used by integrity check.
19312*/
19313static int fts5StorageIntegrityCallback(
19314 void *pContext, /* Pointer to Fts5IntegrityCtx object */
19315 int tflags,
19316 const char *pToken, /* Buffer containing token */
19317 int nToken, /* Size of token in bytes */
19318 int iUnused1, /* Start offset of token */
19319 int iUnused2 /* End offset of token */
19320){
19321 Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
19322 Fts5Termset *pTermset = pCtx->pTermset;
19323 int bPresent;
19324 int ii;
19325 int rc = SQLITE_OK;
19326 int iPos;
19327 int iCol;
19328
19329 UNUSED_PARAM2(iUnused1, iUnused2);
19330 if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
19331
19332 if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
19333 pCtx->szCol++;
19334 }
19335
19336 switch( pCtx->pConfig->eDetail ){
19337 case FTS5_DETAIL_FULL:
19338 iPos = pCtx->szCol-1;
19339 iCol = pCtx->iCol;
19340 break;
19341
19342 case FTS5_DETAIL_COLUMNS:
19343 iPos = pCtx->iCol;
19344 iCol = 0;
19345 break;
19346
19347 default:
19348 assert( pCtx->pConfig->eDetail==FTS5_DETAIL_NONE );
19349 iPos = 0;
19350 iCol = 0;
19351 break;
19352 }
19353
19354 rc = sqlite3Fts5TermsetAdd(pTermset, 0, pToken, nToken, &bPresent);
19355 if( rc==SQLITE_OK && bPresent==0 ){
19356 pCtx->cksum ^= sqlite3Fts5IndexEntryCksum(
19357 pCtx->iRowid, iCol, iPos, 0, pToken, nToken
19358 );
19359 }
19360
19361 for(ii=0; rc==SQLITE_OK && ii<pCtx->pConfig->nPrefix; ii++){
19362 const int nChar = pCtx->pConfig->aPrefix[ii];
19363 int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
19364 if( nByte ){
19365 rc = sqlite3Fts5TermsetAdd(pTermset, ii+1, pToken, nByte, &bPresent);
19366 if( bPresent==0 ){
19367 pCtx->cksum ^= sqlite3Fts5IndexEntryCksum(
19368 pCtx->iRowid, iCol, iPos, ii+1, pToken, nByte
19369 );
19370 }
19371 }
19372 }
19373
19374 return rc;
19375}
19376
19377/*
19378** Check that the contents of the FTS index match that of the %_content
19379** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return
19380** some other SQLite error code if an error occurs while attempting to
19381** determine this.
19382*/
19383static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg){
19384 Fts5Config *pConfig = p->pConfig;
19385 int rc = SQLITE_OK; /* Return code */
19386 int *aColSize; /* Array of size pConfig->nCol */
19387 i64 *aTotalSize; /* Array of size pConfig->nCol */
19388 Fts5IntegrityCtx ctx;
19389 sqlite3_stmt *pScan;
19390 int bUseCksum;
19391
19392 memset(&ctx, 0, sizeof(Fts5IntegrityCtx));
19393 ctx.pConfig = p->pConfig;
19394 aTotalSize = (i64*)sqlite3_malloc64(pConfig->nCol*(sizeof(int)+sizeof(i64)));
19395 if( !aTotalSize ) return SQLITE_NOMEM;
19396 aColSize = (int*)&aTotalSize[pConfig->nCol];
19397 memset(aTotalSize, 0, sizeof(i64) * pConfig->nCol);
19398
19399 bUseCksum = (pConfig->eContent==FTS5_CONTENT_NORMAL
19400 || (pConfig->eContent==FTS5_CONTENT_EXTERNAL && iArg)
19401 );
19402 if( bUseCksum ){
19403 /* Generate the expected index checksum based on the contents of the
19404 ** %_content table. This block stores the checksum in ctx.cksum. */
19405 rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0);
19406 if( rc==SQLITE_OK ){
19407 int rc2;
19408 while( SQLITE_ROW==sqlite3_step(pScan) ){
19409 int i;
19410 ctx.iRowid = sqlite3_column_int64(pScan, 0);
19411 ctx.szCol = 0;
19412 if( pConfig->bColumnsize ){
19413 rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize);
19414 }
19415 if( rc==SQLITE_OK && pConfig->eDetail==FTS5_DETAIL_NONE ){
19416 rc = sqlite3Fts5TermsetNew(&ctx.pTermset);
19417 }
19418 for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
19419 if( pConfig->abUnindexed[i] ) continue;
19420 ctx.iCol = i;
19421 ctx.szCol = 0;
19422 if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
19423 rc = sqlite3Fts5TermsetNew(&ctx.pTermset);
19424 }
19425 if( rc==SQLITE_OK ){
19426 const char *zText = (const char*)sqlite3_column_text(pScan, i+1);
19427 int nText = sqlite3_column_bytes(pScan, i+1);
19428 rc = sqlite3Fts5Tokenize(pConfig,
19429 FTS5_TOKENIZE_DOCUMENT,
19430 zText, nText,
19431 (void*)&ctx,
19432 fts5StorageIntegrityCallback
19433 );
19434 }
19435 if( rc==SQLITE_OK && pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){
19436 rc = FTS5_CORRUPT;
19437 }
19438 aTotalSize[i] += ctx.szCol;
19439 if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
19440 sqlite3Fts5TermsetFree(ctx.pTermset);
19441 ctx.pTermset = 0;
19442 }
19443 }
19444 sqlite3Fts5TermsetFree(ctx.pTermset);
19445 ctx.pTermset = 0;
19446
19447 if( rc!=SQLITE_OK ) break;
19448 }
19449 rc2 = sqlite3_reset(pScan);
19450 if( rc==SQLITE_OK ) rc = rc2;
19451 }
19452
19453 /* Test that the "totals" (sometimes called "averages") record looks Ok */
19454 if( rc==SQLITE_OK ){
19455 int i;
19456 rc = fts5StorageLoadTotals(p, 0);
19457 for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
19458 if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT;
19459 }
19460 }
19461
19462 /* Check that the %_docsize and %_content tables contain the expected
19463 ** number of rows. */
19464 if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
19465 i64 nRow = 0;
19466 rc = fts5StorageCount(p, "content", &nRow);
19467 if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
19468 }
19469 if( rc==SQLITE_OK && pConfig->bColumnsize ){
19470 i64 nRow = 0;
19471 rc = fts5StorageCount(p, "docsize", &nRow);
19472 if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
19473 }
19474 }
19475
19476 /* Pass the expected checksum down to the FTS index module. It will
19477 ** verify, amongst other things, that it matches the checksum generated by
19478 ** inspecting the index itself. */
19479 if( rc==SQLITE_OK ){
19480 rc = sqlite3Fts5IndexIntegrityCheck(p->pIndex, ctx.cksum, bUseCksum);
19481 }
19482
19483 sqlite3_free(aTotalSize);
19484 return rc;
19485}
19486
19487/*
19488** Obtain an SQLite statement handle that may be used to read data from the
19489** %_content table.
19490*/
19491static int sqlite3Fts5StorageStmt(
19492 Fts5Storage *p,
19493 int eStmt,
19494 sqlite3_stmt **pp,
19495 char **pzErrMsg
19496){
19497 int rc;
19498 assert( eStmt==FTS5_STMT_SCAN_ASC
19499 || eStmt==FTS5_STMT_SCAN_DESC
19500 || eStmt==FTS5_STMT_LOOKUP
19501 );
19502 rc = fts5StorageGetStmt(p, eStmt, pp, pzErrMsg);
19503 if( rc==SQLITE_OK ){
19504 assert( p->aStmt[eStmt]==*pp );
19505 p->aStmt[eStmt] = 0;
19506 }
19507 return rc;
19508}
19509
19510/*
19511** Release an SQLite statement handle obtained via an earlier call to
19512** sqlite3Fts5StorageStmt(). The eStmt parameter passed to this function
19513** must match that passed to the sqlite3Fts5StorageStmt() call.
19514*/
19515static void sqlite3Fts5StorageStmtRelease(
19516 Fts5Storage *p,
19517 int eStmt,
19518 sqlite3_stmt *pStmt
19519){
19520 assert( eStmt==FTS5_STMT_SCAN_ASC
19521 || eStmt==FTS5_STMT_SCAN_DESC
19522 || eStmt==FTS5_STMT_LOOKUP
19523 );
19524 if( p->aStmt[eStmt]==0 ){
19525 sqlite3_reset(pStmt);
19526 p->aStmt[eStmt] = pStmt;
19527 }else{
19528 sqlite3_finalize(pStmt);
19529 }
19530}
19531
19532static int fts5StorageDecodeSizeArray(
19533 int *aCol, int nCol, /* Array to populate */
19534 const u8 *aBlob, int nBlob /* Record to read varints from */
19535){
19536 int i;
19537 int iOff = 0;
19538 for(i=0; i<nCol; i++){
19539 if( iOff>=nBlob ) return 1;
19540 iOff += fts5GetVarint32(&aBlob[iOff], aCol[i]);
19541 }
19542 return (iOff!=nBlob);
19543}
19544
19545/*
19546** Argument aCol points to an array of integers containing one entry for
19547** each table column. This function reads the %_docsize record for the
19548** specified rowid and populates aCol[] with the results.
19549**
19550** An SQLite error code is returned if an error occurs, or SQLITE_OK
19551** otherwise.
19552*/
19553static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){
19554 int nCol = p->pConfig->nCol; /* Number of user columns in table */
19555 sqlite3_stmt *pLookup = 0; /* Statement to query %_docsize */
19556 int rc; /* Return Code */
19557
19558 assert( p->pConfig->bColumnsize );
19559 rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0);
19560 if( pLookup ){
19561 int bCorrupt = 1;
19562 assert( rc==SQLITE_OK );
19563 sqlite3_bind_int64(pLookup, 1, iRowid);
19564 if( SQLITE_ROW==sqlite3_step(pLookup) ){
19565 const u8 *aBlob = sqlite3_column_blob(pLookup, 0);
19566 int nBlob = sqlite3_column_bytes(pLookup, 0);
19567 if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){
19568 bCorrupt = 0;
19569 }
19570 }
19571 rc = sqlite3_reset(pLookup);
19572 if( bCorrupt && rc==SQLITE_OK ){
19573 rc = FTS5_CORRUPT;
19574 }
19575 }else{
19576 assert( rc!=SQLITE_OK );
19577 }
19578
19579 return rc;
19580}
19581
19582static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){
19583 int rc = fts5StorageLoadTotals(p, 0);
19584 if( rc==SQLITE_OK ){
19585 *pnToken = 0;
19586 if( iCol<0 ){
19587 int i;
19588 for(i=0; i<p->pConfig->nCol; i++){
19589 *pnToken += p->aTotalSize[i];
19590 }
19591 }else if( iCol<p->pConfig->nCol ){
19592 *pnToken = p->aTotalSize[iCol];
19593 }else{
19594 rc = SQLITE_RANGE;
19595 }
19596 }
19597 return rc;
19598}
19599
19600static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow){
19601 int rc = fts5StorageLoadTotals(p, 0);
19602 if( rc==SQLITE_OK ){
19603 /* nTotalRow being zero does not necessarily indicate a corrupt
19604 ** database - it might be that the FTS5 table really does contain zero
19605 ** rows. However this function is only called from the xRowCount() API,
19606 ** and there is no way for that API to be invoked if the table contains
19607 ** no rows. Hence the FTS5_CORRUPT return. */
19608 *pnRow = p->nTotalRow;
19609 if( p->nTotalRow<=0 ) rc = FTS5_CORRUPT;
19610 }
19611 return rc;
19612}
19613
19614/*
19615** Flush any data currently held in-memory to disk.
19616*/
19617static int sqlite3Fts5StorageSync(Fts5Storage *p){
19618 int rc = SQLITE_OK;
19619 i64 iLastRowid = sqlite3_last_insert_rowid(p->pConfig->db);
19620 if( p->bTotalsValid ){
19621 rc = fts5StorageSaveTotals(p);
19622 p->bTotalsValid = 0;
19623 }
19624 if( rc==SQLITE_OK ){
19625 rc = sqlite3Fts5IndexSync(p->pIndex);
19626 }
19627 sqlite3_set_last_insert_rowid(p->pConfig->db, iLastRowid);
19628 return rc;
19629}
19630
19631static int sqlite3Fts5StorageRollback(Fts5Storage *p){
19632 p->bTotalsValid = 0;
19633 return sqlite3Fts5IndexRollback(p->pIndex);
19634}
19635
19636static int sqlite3Fts5StorageConfigValue(
19637 Fts5Storage *p,
19638 const char *z,
19639 sqlite3_value *pVal,
19640 int iVal
19641){
19642 sqlite3_stmt *pReplace = 0;
19643 int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_CONFIG, &pReplace, 0);
19644 if( rc==SQLITE_OK ){
19645 sqlite3_bind_text(pReplace, 1, z, -1, SQLITE_STATIC);
19646 if( pVal ){
19647 sqlite3_bind_value(pReplace, 2, pVal);
19648 }else{
19649 sqlite3_bind_int(pReplace, 2, iVal);
19650 }
19651 sqlite3_step(pReplace);
19652 rc = sqlite3_reset(pReplace);
19653 sqlite3_bind_null(pReplace, 1);
19654 }
19655 if( rc==SQLITE_OK && pVal ){
19656 int iNew = p->pConfig->iCookie + 1;
19657 rc = sqlite3Fts5IndexSetCookie(p->pIndex, iNew);
19658 if( rc==SQLITE_OK ){
19659 p->pConfig->iCookie = iNew;
19660 }
19661 }
19662 return rc;
19663}
19664
19665#line 1 "fts5_tokenize.c"
19666/*
19667** 2014 May 31
19668**
19669** The author disclaims copyright to this source code. In place of
19670** a legal notice, here is a blessing:
19671**
19672** May you do good and not evil.
19673** May you find forgiveness for yourself and forgive others.
19674** May you share freely, never taking more than you give.
19675**
19676******************************************************************************
19677*/
19678
19679
19680/* #include "fts5Int.h" */
19681
19682/**************************************************************************
19683** Start of ascii tokenizer implementation.
19684*/
19685
19686/*
19687** For tokenizers with no "unicode" modifier, the set of token characters
19688** is the same as the set of ASCII range alphanumeric characters.
19689*/
19690static unsigned char aAsciiTokenChar[128] = {
19691 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00..0x0F */
19692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10..0x1F */
19693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20..0x2F */
19694 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30..0x3F */
19695 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40..0x4F */
19696 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x50..0x5F */
19697 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60..0x6F */
19698 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70..0x7F */
19699};
19700
19701typedef struct AsciiTokenizer AsciiTokenizer;
19702struct AsciiTokenizer {
19703 unsigned char aTokenChar[128];
19704};
19705
19706static void fts5AsciiAddExceptions(
19707 AsciiTokenizer *p,
19708 const char *zArg,
19709 int bTokenChars
19710){
19711 int i;
19712 for(i=0; zArg[i]; i++){
19713 if( (zArg[i] & 0x80)==0 ){
19714 p->aTokenChar[(int)zArg[i]] = (unsigned char)bTokenChars;
19715 }
19716 }
19717}
19718
19719/*
19720** Delete a "ascii" tokenizer.
19721*/
19722static void fts5AsciiDelete(Fts5Tokenizer *p){
19723 sqlite3_free(p);
19724}
19725
19726/*
19727** Create an "ascii" tokenizer.
19728*/
19729static int fts5AsciiCreate(
19730 void *pUnused,
19731 const char **azArg, int nArg,
19732 Fts5Tokenizer **ppOut
19733){
19734 int rc = SQLITE_OK;
19735 AsciiTokenizer *p = 0;
19736 UNUSED_PARAM(pUnused);
19737 if( nArg%2 ){
19738 rc = SQLITE_ERROR;
19739 }else{
19740 p = sqlite3_malloc(sizeof(AsciiTokenizer));
19741 if( p==0 ){
19742 rc = SQLITE_NOMEM;
19743 }else{
19744 int i;
19745 memset(p, 0, sizeof(AsciiTokenizer));
19746 memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar));
19747 for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
19748 const char *zArg = azArg[i+1];
19749 if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){
19750 fts5AsciiAddExceptions(p, zArg, 1);
19751 }else
19752 if( 0==sqlite3_stricmp(azArg[i], "separators") ){
19753 fts5AsciiAddExceptions(p, zArg, 0);
19754 }else{
19755 rc = SQLITE_ERROR;
19756 }
19757 }
19758 if( rc!=SQLITE_OK ){
19759 fts5AsciiDelete((Fts5Tokenizer*)p);
19760 p = 0;
19761 }
19762 }
19763 }
19764
19765 *ppOut = (Fts5Tokenizer*)p;
19766 return rc;
19767}
19768
19769
19770static void asciiFold(char *aOut, const char *aIn, int nByte){
19771 int i;
19772 for(i=0; i<nByte; i++){
19773 char c = aIn[i];
19774 if( c>='A' && c<='Z' ) c += 32;
19775 aOut[i] = c;
19776 }
19777}
19778
19779/*
19780** Tokenize some text using the ascii tokenizer.
19781*/
19782static int fts5AsciiTokenize(
19783 Fts5Tokenizer *pTokenizer,
19784 void *pCtx,
19785 int iUnused,
19786 const char *pText, int nText,
19787 int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
19788){
19789 AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer;
19790 int rc = SQLITE_OK;
19791 int ie;
19792 int is = 0;
19793
19794 char aFold[64];
19795 int nFold = sizeof(aFold);
19796 char *pFold = aFold;
19797 unsigned char *a = p->aTokenChar;
19798
19799 UNUSED_PARAM(iUnused);
19800
19801 while( is<nText && rc==SQLITE_OK ){
19802 int nByte;
19803
19804 /* Skip any leading divider characters. */
19805 while( is<nText && ((pText[is]&0x80)==0 && a[(int)pText[is]]==0) ){
19806 is++;
19807 }
19808 if( is==nText ) break;
19809
19810 /* Count the token characters */
19811 ie = is+1;
19812 while( ie<nText && ((pText[ie]&0x80) || a[(int)pText[ie]] ) ){
19813 ie++;
19814 }
19815
19816 /* Fold to lower case */
19817 nByte = ie-is;
19818 if( nByte>nFold ){
19819 if( pFold!=aFold ) sqlite3_free(pFold);
19820 pFold = sqlite3_malloc64((sqlite3_int64)nByte*2);
19821 if( pFold==0 ){
19822 rc = SQLITE_NOMEM;
19823 break;
19824 }
19825 nFold = nByte*2;
19826 }
19827 asciiFold(pFold, &pText[is], nByte);
19828
19829 /* Invoke the token callback */
19830 rc = xToken(pCtx, 0, pFold, nByte, is, ie);
19831 is = ie+1;
19832 }
19833
19834 if( pFold!=aFold ) sqlite3_free(pFold);
19835 if( rc==SQLITE_DONE ) rc = SQLITE_OK;
19836 return rc;
19837}
19838
19839/**************************************************************************
19840** Start of unicode61 tokenizer implementation.
19841*/
19842
19843
19844/*
19845** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied
19846** from the sqlite3 source file utf.c. If this file is compiled as part
19847** of the amalgamation, they are not required.
19848*/
19849#ifndef SQLITE_AMALGAMATION
19850
19851static const unsigned char sqlite3Utf8Trans1[] = {
19852 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
19853 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
19854 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
19855 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
19856 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
19857 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
19858 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
19859 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
19860};
19861
19862#define READ_UTF8(zIn, zTerm, c) \
19863 c = *(zIn++); \
19864 if( c>=0xc0 ){ \
19865 c = sqlite3Utf8Trans1[c-0xc0]; \
19866 while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \
19867 c = (c<<6) + (0x3f & *(zIn++)); \
19868 } \
19869 if( c<0x80 \
19870 || (c&0xFFFFF800)==0xD800 \
19871 || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \
19872 }
19873
19874
19875#define WRITE_UTF8(zOut, c) { \
19876 if( c<0x00080 ){ \
19877 *zOut++ = (unsigned char)(c&0xFF); \
19878 } \
19879 else if( c<0x00800 ){ \
19880 *zOut++ = 0xC0 + (unsigned char)((c>>6)&0x1F); \
19881 *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \
19882 } \
19883 else if( c<0x10000 ){ \
19884 *zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F); \
19885 *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \
19886 *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \
19887 }else{ \
19888 *zOut++ = 0xF0 + (unsigned char)((c>>18) & 0x07); \
19889 *zOut++ = 0x80 + (unsigned char)((c>>12) & 0x3F); \
19890 *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \
19891 *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \
19892 } \
19893}
19894
19895#endif /* ifndef SQLITE_AMALGAMATION */
19896
19897typedef struct Unicode61Tokenizer Unicode61Tokenizer;
19898struct Unicode61Tokenizer {
19899 unsigned char aTokenChar[128]; /* ASCII range token characters */
19900 char *aFold; /* Buffer to fold text into */
19901 int nFold; /* Size of aFold[] in bytes */
19902 int eRemoveDiacritic; /* True if remove_diacritics=1 is set */
19903 int nException;
19904 int *aiException;
19905
19906 unsigned char aCategory[32]; /* True for token char categories */
19907};
19908
19909/* Values for eRemoveDiacritic (must match internals of fts5_unicode2.c) */
19910#define FTS5_REMOVE_DIACRITICS_NONE 0
19911#define FTS5_REMOVE_DIACRITICS_SIMPLE 1
19912#define FTS5_REMOVE_DIACRITICS_COMPLEX 2
19913
19914static int fts5UnicodeAddExceptions(
19915 Unicode61Tokenizer *p, /* Tokenizer object */
19916 const char *z, /* Characters to treat as exceptions */
19917 int bTokenChars /* 1 for 'tokenchars', 0 for 'separators' */
19918){
19919 int rc = SQLITE_OK;
19920 int n = (int)strlen(z);
19921 int *aNew;
19922
19923 if( n>0 ){
19924 aNew = (int*)sqlite3_realloc64(p->aiException,
19925 (n+p->nException)*sizeof(int));
19926 if( aNew ){
19927 int nNew = p->nException;
19928 const unsigned char *zCsr = (const unsigned char*)z;
19929 const unsigned char *zTerm = (const unsigned char*)&z[n];
19930 while( zCsr<zTerm ){
19931 u32 iCode;
19932 int bToken;
19933 READ_UTF8(zCsr, zTerm, iCode);
19934 if( iCode<128 ){
19935 p->aTokenChar[iCode] = (unsigned char)bTokenChars;
19936 }else{
19937 bToken = p->aCategory[sqlite3Fts5UnicodeCategory(iCode)];
19938 assert( (bToken==0 || bToken==1) );
19939 assert( (bTokenChars==0 || bTokenChars==1) );
19940 if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){
19941 int i;
19942 for(i=0; i<nNew; i++){
19943 if( (u32)aNew[i]>iCode ) break;
19944 }
19945 memmove(&aNew[i+1], &aNew[i], (nNew-i)*sizeof(int));
19946 aNew[i] = iCode;
19947 nNew++;
19948 }
19949 }
19950 }
19951 p->aiException = aNew;
19952 p->nException = nNew;
19953 }else{
19954 rc = SQLITE_NOMEM;
19955 }
19956 }
19957
19958 return rc;
19959}
19960
19961/*
19962** Return true if the p->aiException[] array contains the value iCode.
19963*/
19964static int fts5UnicodeIsException(Unicode61Tokenizer *p, int iCode){
19965 if( p->nException>0 ){
19966 int *a = p->aiException;
19967 int iLo = 0;
19968 int iHi = p->nException-1;
19969
19970 while( iHi>=iLo ){
19971 int iTest = (iHi + iLo) / 2;
19972 if( iCode==a[iTest] ){
19973 return 1;
19974 }else if( iCode>a[iTest] ){
19975 iLo = iTest+1;
19976 }else{
19977 iHi = iTest-1;
19978 }
19979 }
19980 }
19981
19982 return 0;
19983}
19984
19985/*
19986** Delete a "unicode61" tokenizer.
19987*/
19988static void fts5UnicodeDelete(Fts5Tokenizer *pTok){
19989 if( pTok ){
19990 Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTok;
19991 sqlite3_free(p->aiException);
19992 sqlite3_free(p->aFold);
19993 sqlite3_free(p);
19994 }
19995 return;
19996}
19997
19998static int unicodeSetCategories(Unicode61Tokenizer *p, const char *zCat){
19999 const char *z = zCat;
20000
20001 while( *z ){
20002 while( *z==' ' || *z=='\t' ) z++;
20003 if( *z && sqlite3Fts5UnicodeCatParse(z, p->aCategory) ){
20004 return SQLITE_ERROR;
20005 }
20006 while( *z!=' ' && *z!='\t' && *z!='\0' ) z++;
20007 }
20008
20009 sqlite3Fts5UnicodeAscii(p->aCategory, p->aTokenChar);
20010 return SQLITE_OK;
20011}
20012
20013/*
20014** Create a "unicode61" tokenizer.
20015*/
20016static int fts5UnicodeCreate(
20017 void *pUnused,
20018 const char **azArg, int nArg,
20019 Fts5Tokenizer **ppOut
20020){
20021 int rc = SQLITE_OK; /* Return code */
20022 Unicode61Tokenizer *p = 0; /* New tokenizer object */
20023
20024 UNUSED_PARAM(pUnused);
20025
20026 if( nArg%2 ){
20027 rc = SQLITE_ERROR;
20028 }else{
20029 p = (Unicode61Tokenizer*)sqlite3_malloc(sizeof(Unicode61Tokenizer));
20030 if( p ){
20031 const char *zCat = "L* N* Co";
20032 int i;
20033 memset(p, 0, sizeof(Unicode61Tokenizer));
20034
20035 p->eRemoveDiacritic = FTS5_REMOVE_DIACRITICS_SIMPLE;
20036 p->nFold = 64;
20037 p->aFold = sqlite3_malloc64(p->nFold * sizeof(char));
20038 if( p->aFold==0 ){
20039 rc = SQLITE_NOMEM;
20040 }
20041
20042 /* Search for a "categories" argument */
20043 for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
20044 if( 0==sqlite3_stricmp(azArg[i], "categories") ){
20045 zCat = azArg[i+1];
20046 }
20047 }
20048
20049 if( rc==SQLITE_OK ){
20050 rc = unicodeSetCategories(p, zCat);
20051 }
20052
20053 for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
20054 const char *zArg = azArg[i+1];
20055 if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){
20056 if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){
20057 rc = SQLITE_ERROR;
20058 }else{
20059 p->eRemoveDiacritic = (zArg[0] - '0');
20060 assert( p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_NONE
20061 || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_SIMPLE
20062 || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_COMPLEX
20063 );
20064 }
20065 }else
20066 if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){
20067 rc = fts5UnicodeAddExceptions(p, zArg, 1);
20068 }else
20069 if( 0==sqlite3_stricmp(azArg[i], "separators") ){
20070 rc = fts5UnicodeAddExceptions(p, zArg, 0);
20071 }else
20072 if( 0==sqlite3_stricmp(azArg[i], "categories") ){
20073 /* no-op */
20074 }else{
20075 rc = SQLITE_ERROR;
20076 }
20077 }
20078
20079 }else{
20080 rc = SQLITE_NOMEM;
20081 }
20082 if( rc!=SQLITE_OK ){
20083 fts5UnicodeDelete((Fts5Tokenizer*)p);
20084 p = 0;
20085 }
20086 *ppOut = (Fts5Tokenizer*)p;
20087 }
20088 return rc;
20089}
20090
20091/*
20092** Return true if, for the purposes of tokenizing with the tokenizer
20093** passed as the first argument, codepoint iCode is considered a token
20094** character (not a separator).
20095*/
20096static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){
20097 return (
20098 p->aCategory[sqlite3Fts5UnicodeCategory((u32)iCode)]
20099 ^ fts5UnicodeIsException(p, iCode)
20100 );
20101}
20102
20103static int fts5UnicodeTokenize(
20104 Fts5Tokenizer *pTokenizer,
20105 void *pCtx,
20106 int iUnused,
20107 const char *pText, int nText,
20108 int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
20109){
20110 Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer;
20111 int rc = SQLITE_OK;
20112 unsigned char *a = p->aTokenChar;
20113
20114 unsigned char *zTerm = (unsigned char*)&pText[nText];
20115 unsigned char *zCsr = (unsigned char *)pText;
20116
20117 /* Output buffer */
20118 char *aFold = p->aFold;
20119 int nFold = p->nFold;
20120 const char *pEnd = &aFold[nFold-6];
20121
20122 UNUSED_PARAM(iUnused);
20123
20124 /* Each iteration of this loop gobbles up a contiguous run of separators,
20125 ** then the next token. */
20126 while( rc==SQLITE_OK ){
20127 u32 iCode; /* non-ASCII codepoint read from input */
20128 char *zOut = aFold;
20129 int is;
20130 int ie;
20131
20132 /* Skip any separator characters. */
20133 while( 1 ){
20134 if( zCsr>=zTerm ) goto tokenize_done;
20135 if( *zCsr & 0x80 ) {
20136 /* A character outside of the ascii range. Skip past it if it is
20137 ** a separator character. Or break out of the loop if it is not. */
20138 is = zCsr - (unsigned char*)pText;
20139 READ_UTF8(zCsr, zTerm, iCode);
20140 if( fts5UnicodeIsAlnum(p, iCode) ){
20141 goto non_ascii_tokenchar;
20142 }
20143 }else{
20144 if( a[*zCsr] ){
20145 is = zCsr - (unsigned char*)pText;
20146 goto ascii_tokenchar;
20147 }
20148 zCsr++;
20149 }
20150 }
20151
20152 /* Run through the tokenchars. Fold them into the output buffer along
20153 ** the way. */
20154 while( zCsr<zTerm ){
20155
20156 /* Grow the output buffer so that there is sufficient space to fit the
20157 ** largest possible utf-8 character. */
20158 if( zOut>pEnd ){
20159 aFold = sqlite3_malloc64((sqlite3_int64)nFold*2);
20160 if( aFold==0 ){
20161 rc = SQLITE_NOMEM;
20162 goto tokenize_done;
20163 }
20164 zOut = &aFold[zOut - p->aFold];
20165 memcpy(aFold, p->aFold, nFold);
20166 sqlite3_free(p->aFold);
20167 p->aFold = aFold;
20168 p->nFold = nFold = nFold*2;
20169 pEnd = &aFold[nFold-6];
20170 }
20171
20172 if( *zCsr & 0x80 ){
20173 /* An non-ascii-range character. Fold it into the output buffer if
20174 ** it is a token character, or break out of the loop if it is not. */
20175 READ_UTF8(zCsr, zTerm, iCode);
20176 if( fts5UnicodeIsAlnum(p,iCode)||sqlite3Fts5UnicodeIsdiacritic(iCode) ){
20177 non_ascii_tokenchar:
20178 iCode = sqlite3Fts5UnicodeFold(iCode, p->eRemoveDiacritic);
20179 if( iCode ) WRITE_UTF8(zOut, iCode);
20180 }else{
20181 break;
20182 }
20183 }else if( a[*zCsr]==0 ){
20184 /* An ascii-range separator character. End of token. */
20185 break;
20186 }else{
20187 ascii_tokenchar:
20188 if( *zCsr>='A' && *zCsr<='Z' ){
20189 *zOut++ = *zCsr + 32;
20190 }else{
20191 *zOut++ = *zCsr;
20192 }
20193 zCsr++;
20194 }
20195 ie = zCsr - (unsigned char*)pText;
20196 }
20197
20198 /* Invoke the token callback */
20199 rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie);
20200 }
20201
20202 tokenize_done:
20203 if( rc==SQLITE_DONE ) rc = SQLITE_OK;
20204 return rc;
20205}
20206
20207/**************************************************************************
20208** Start of porter stemmer implementation.
20209*/
20210
20211/* Any tokens larger than this (in bytes) are passed through without
20212** stemming. */
20213#define FTS5_PORTER_MAX_TOKEN 64
20214
20215typedef struct PorterTokenizer PorterTokenizer;
20216struct PorterTokenizer {
20217 fts5_tokenizer tokenizer; /* Parent tokenizer module */
20218 Fts5Tokenizer *pTokenizer; /* Parent tokenizer instance */
20219 char aBuf[FTS5_PORTER_MAX_TOKEN + 64];
20220};
20221
20222/*
20223** Delete a "porter" tokenizer.
20224*/
20225static void fts5PorterDelete(Fts5Tokenizer *pTok){
20226 if( pTok ){
20227 PorterTokenizer *p = (PorterTokenizer*)pTok;
20228 if( p->pTokenizer ){
20229 p->tokenizer.xDelete(p->pTokenizer);
20230 }
20231 sqlite3_free(p);
20232 }
20233}
20234
20235/*
20236** Create a "porter" tokenizer.
20237*/
20238static int fts5PorterCreate(
20239 void *pCtx,
20240 const char **azArg, int nArg,
20241 Fts5Tokenizer **ppOut
20242){
20243 fts5_api *pApi = (fts5_api*)pCtx;
20244 int rc = SQLITE_OK;
20245 PorterTokenizer *pRet;
20246 void *pUserdata = 0;
20247 const char *zBase = "unicode61";
20248
20249 if( nArg>0 ){
20250 zBase = azArg[0];
20251 }
20252
20253 pRet = (PorterTokenizer*)sqlite3_malloc(sizeof(PorterTokenizer));
20254 if( pRet ){
20255 memset(pRet, 0, sizeof(PorterTokenizer));
20256 rc = pApi->xFindTokenizer(pApi, zBase, &pUserdata, &pRet->tokenizer);
20257 }else{
20258 rc = SQLITE_NOMEM;
20259 }
20260 if( rc==SQLITE_OK ){
20261 int nArg2 = (nArg>0 ? nArg-1 : 0);
20262 const char **azArg2 = (nArg2 ? &azArg[1] : 0);
20263 rc = pRet->tokenizer.xCreate(pUserdata, azArg2, nArg2, &pRet->pTokenizer);
20264 }
20265
20266 if( rc!=SQLITE_OK ){
20267 fts5PorterDelete((Fts5Tokenizer*)pRet);
20268 pRet = 0;
20269 }
20270 *ppOut = (Fts5Tokenizer*)pRet;
20271 return rc;
20272}
20273
20274typedef struct PorterContext PorterContext;
20275struct PorterContext {
20276 void *pCtx;
20277 int (*xToken)(void*, int, const char*, int, int, int);
20278 char *aBuf;
20279};
20280
20281typedef struct PorterRule PorterRule;
20282struct PorterRule {
20283 const char *zSuffix;
20284 int nSuffix;
20285 int (*xCond)(char *zStem, int nStem);
20286 const char *zOutput;
20287 int nOutput;
20288};
20289
20290#if 0
20291static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){
20292 int ret = -1;
20293 int nBuf = *pnBuf;
20294 PorterRule *p;
20295
20296 for(p=aRule; p->zSuffix; p++){
20297 assert( strlen(p->zSuffix)==p->nSuffix );
20298 assert( strlen(p->zOutput)==p->nOutput );
20299 if( nBuf<p->nSuffix ) continue;
20300 if( 0==memcmp(&aBuf[nBuf - p->nSuffix], p->zSuffix, p->nSuffix) ) break;
20301 }
20302
20303 if( p->zSuffix ){
20304 int nStem = nBuf - p->nSuffix;
20305 if( p->xCond==0 || p->xCond(aBuf, nStem) ){
20306 memcpy(&aBuf[nStem], p->zOutput, p->nOutput);
20307 *pnBuf = nStem + p->nOutput;
20308 ret = p - aRule;
20309 }
20310 }
20311
20312 return ret;
20313}
20314#endif
20315
20316static int fts5PorterIsVowel(char c, int bYIsVowel){
20317 return (
20318 c=='a' || c=='e' || c=='i' || c=='o' || c=='u' || (bYIsVowel && c=='y')
20319 );
20320}
20321
20322static int fts5PorterGobbleVC(char *zStem, int nStem, int bPrevCons){
20323 int i;
20324 int bCons = bPrevCons;
20325
20326 /* Scan for a vowel */
20327 for(i=0; i<nStem; i++){
20328 if( 0==(bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) break;
20329 }
20330
20331 /* Scan for a consonent */
20332 for(i++; i<nStem; i++){
20333 if( (bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) return i+1;
20334 }
20335 return 0;
20336}
20337
20338/* porter rule condition: (m > 0) */
20339static int fts5Porter_MGt0(char *zStem, int nStem){
20340 return !!fts5PorterGobbleVC(zStem, nStem, 0);
20341}
20342
20343/* porter rule condition: (m > 1) */
20344static int fts5Porter_MGt1(char *zStem, int nStem){
20345 int n;
20346 n = fts5PorterGobbleVC(zStem, nStem, 0);
20347 if( n && fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){
20348 return 1;
20349 }
20350 return 0;
20351}
20352
20353/* porter rule condition: (m = 1) */
20354static int fts5Porter_MEq1(char *zStem, int nStem){
20355 int n;
20356 n = fts5PorterGobbleVC(zStem, nStem, 0);
20357 if( n && 0==fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){
20358 return 1;
20359 }
20360 return 0;
20361}
20362
20363/* porter rule condition: (*o) */
20364static int fts5Porter_Ostar(char *zStem, int nStem){
20365 if( zStem[nStem-1]=='w' || zStem[nStem-1]=='x' || zStem[nStem-1]=='y' ){
20366 return 0;
20367 }else{
20368 int i;
20369 int mask = 0;
20370 int bCons = 0;
20371 for(i=0; i<nStem; i++){
20372 bCons = !fts5PorterIsVowel(zStem[i], bCons);
20373 assert( bCons==0 || bCons==1 );
20374 mask = (mask << 1) + bCons;
20375 }
20376 return ((mask & 0x0007)==0x0005);
20377 }
20378}
20379
20380/* porter rule condition: (m > 1 and (*S or *T)) */
20381static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){
20382 assert( nStem>0 );
20383 return (zStem[nStem-1]=='s' || zStem[nStem-1]=='t')
20384 && fts5Porter_MGt1(zStem, nStem);
20385}
20386
20387/* porter rule condition: (*v*) */
20388static int fts5Porter_Vowel(char *zStem, int nStem){
20389 int i;
20390 for(i=0; i<nStem; i++){
20391 if( fts5PorterIsVowel(zStem[i], i>0) ){
20392 return 1;
20393 }
20394 }
20395 return 0;
20396}
20397
20398
20399/**************************************************************************
20400***************************************************************************
20401** GENERATED CODE STARTS HERE (mkportersteps.tcl)
20402*/
20403
20404static int fts5PorterStep4(char *aBuf, int *pnBuf){
20405 int ret = 0;
20406 int nBuf = *pnBuf;
20407 switch( aBuf[nBuf-2] ){
20408
20409 case 'a':
20410 if( nBuf>2 && 0==memcmp("al", &aBuf[nBuf-2], 2) ){
20411 if( fts5Porter_MGt1(aBuf, nBuf-2) ){
20412 *pnBuf = nBuf - 2;
20413 }
20414 }
20415 break;
20416
20417 case 'c':
20418 if( nBuf>4 && 0==memcmp("ance", &aBuf[nBuf-4], 4) ){
20419 if( fts5Porter_MGt1(aBuf, nBuf-4) ){
20420 *pnBuf = nBuf - 4;
20421 }
20422 }else if( nBuf>4 && 0==memcmp("ence", &aBuf[nBuf-4], 4) ){
20423 if( fts5Porter_MGt1(aBuf, nBuf-4) ){
20424 *pnBuf = nBuf - 4;
20425 }
20426 }
20427 break;
20428
20429 case 'e':
20430 if( nBuf>2 && 0==memcmp("er", &aBuf[nBuf-2], 2) ){
20431 if( fts5Porter_MGt1(aBuf, nBuf-2) ){
20432 *pnBuf = nBuf - 2;
20433 }
20434 }
20435 break;
20436
20437 case 'i':
20438 if( nBuf>2 && 0==memcmp("ic", &aBuf[nBuf-2], 2) ){
20439 if( fts5Porter_MGt1(aBuf, nBuf-2) ){
20440 *pnBuf = nBuf - 2;
20441 }
20442 }
20443 break;
20444
20445 case 'l':
20446 if( nBuf>4 && 0==memcmp("able", &aBuf[nBuf-4], 4) ){
20447 if( fts5Porter_MGt1(aBuf, nBuf-4) ){
20448 *pnBuf = nBuf - 4;
20449 }
20450 }else if( nBuf>4 && 0==memcmp("ible", &aBuf[nBuf-4], 4) ){
20451 if( fts5Porter_MGt1(aBuf, nBuf-4) ){
20452 *pnBuf = nBuf - 4;
20453 }
20454 }
20455 break;
20456
20457 case 'n':
20458 if( nBuf>3 && 0==memcmp("ant", &aBuf[nBuf-3], 3) ){
20459 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
20460 *pnBuf = nBuf - 3;
20461 }
20462 }else if( nBuf>5 && 0==memcmp("ement", &aBuf[nBuf-5], 5) ){
20463 if( fts5Porter_MGt1(aBuf, nBuf-5) ){
20464 *pnBuf = nBuf - 5;
20465 }
20466 }else if( nBuf>4 && 0==memcmp("ment", &aBuf[nBuf-4], 4) ){
20467 if( fts5Porter_MGt1(aBuf, nBuf-4) ){
20468 *pnBuf = nBuf - 4;
20469 }
20470 }else if( nBuf>3 && 0==memcmp("ent", &aBuf[nBuf-3], 3) ){
20471 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
20472 *pnBuf = nBuf - 3;
20473 }
20474 }
20475 break;
20476
20477 case 'o':
20478 if( nBuf>3 && 0==memcmp("ion", &aBuf[nBuf-3], 3) ){
20479 if( fts5Porter_MGt1_and_S_or_T(aBuf, nBuf-3) ){
20480 *pnBuf = nBuf - 3;
20481 }
20482 }else if( nBuf>2 && 0==memcmp("ou", &aBuf[nBuf-2], 2) ){
20483 if( fts5Porter_MGt1(aBuf, nBuf-2) ){
20484 *pnBuf = nBuf - 2;
20485 }
20486 }
20487 break;
20488
20489 case 's':
20490 if( nBuf>3 && 0==memcmp("ism", &aBuf[nBuf-3], 3) ){
20491 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
20492 *pnBuf = nBuf - 3;
20493 }
20494 }
20495 break;
20496
20497 case 't':
20498 if( nBuf>3 && 0==memcmp("ate", &aBuf[nBuf-3], 3) ){
20499 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
20500 *pnBuf = nBuf - 3;
20501 }
20502 }else if( nBuf>3 && 0==memcmp("iti", &aBuf[nBuf-3], 3) ){
20503 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
20504 *pnBuf = nBuf - 3;
20505 }
20506 }
20507 break;
20508
20509 case 'u':
20510 if( nBuf>3 && 0==memcmp("ous", &aBuf[nBuf-3], 3) ){
20511 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
20512 *pnBuf = nBuf - 3;
20513 }
20514 }
20515 break;
20516
20517 case 'v':
20518 if( nBuf>3 && 0==memcmp("ive", &aBuf[nBuf-3], 3) ){
20519 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
20520 *pnBuf = nBuf - 3;
20521 }
20522 }
20523 break;
20524
20525 case 'z':
20526 if( nBuf>3 && 0==memcmp("ize", &aBuf[nBuf-3], 3) ){
20527 if( fts5Porter_MGt1(aBuf, nBuf-3) ){
20528 *pnBuf = nBuf - 3;
20529 }
20530 }
20531 break;
20532
20533 }
20534 return ret;
20535}
20536
20537
20538static int fts5PorterStep1B2(char *aBuf, int *pnBuf){
20539 int ret = 0;
20540 int nBuf = *pnBuf;
20541 switch( aBuf[nBuf-2] ){
20542
20543 case 'a':
20544 if( nBuf>2 && 0==memcmp("at", &aBuf[nBuf-2], 2) ){
20545 memcpy(&aBuf[nBuf-2], "ate", 3);
20546 *pnBuf = nBuf - 2 + 3;
20547 ret = 1;
20548 }
20549 break;
20550
20551 case 'b':
20552 if( nBuf>2 && 0==memcmp("bl", &aBuf[nBuf-2], 2) ){
20553 memcpy(&aBuf[nBuf-2], "ble", 3);
20554 *pnBuf = nBuf - 2 + 3;
20555 ret = 1;
20556 }
20557 break;
20558
20559 case 'i':
20560 if( nBuf>2 && 0==memcmp("iz", &aBuf[nBuf-2], 2) ){
20561 memcpy(&aBuf[nBuf-2], "ize", 3);
20562 *pnBuf = nBuf - 2 + 3;
20563 ret = 1;
20564 }
20565 break;
20566
20567 }
20568 return ret;
20569}
20570
20571
20572static int fts5PorterStep2(char *aBuf, int *pnBuf){
20573 int ret = 0;
20574 int nBuf = *pnBuf;
20575 switch( aBuf[nBuf-2] ){
20576
20577 case 'a':
20578 if( nBuf>7 && 0==memcmp("ational", &aBuf[nBuf-7], 7) ){
20579 if( fts5Porter_MGt0(aBuf, nBuf-7) ){
20580 memcpy(&aBuf[nBuf-7], "ate", 3);
20581 *pnBuf = nBuf - 7 + 3;
20582 }
20583 }else if( nBuf>6 && 0==memcmp("tional", &aBuf[nBuf-6], 6) ){
20584 if( fts5Porter_MGt0(aBuf, nBuf-6) ){
20585 memcpy(&aBuf[nBuf-6], "tion", 4);
20586 *pnBuf = nBuf - 6 + 4;
20587 }
20588 }
20589 break;
20590
20591 case 'c':
20592 if( nBuf>4 && 0==memcmp("enci", &aBuf[nBuf-4], 4) ){
20593 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
20594 memcpy(&aBuf[nBuf-4], "ence", 4);
20595 *pnBuf = nBuf - 4 + 4;
20596 }
20597 }else if( nBuf>4 && 0==memcmp("anci", &aBuf[nBuf-4], 4) ){
20598 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
20599 memcpy(&aBuf[nBuf-4], "ance", 4);
20600 *pnBuf = nBuf - 4 + 4;
20601 }
20602 }
20603 break;
20604
20605 case 'e':
20606 if( nBuf>4 && 0==memcmp("izer", &aBuf[nBuf-4], 4) ){
20607 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
20608 memcpy(&aBuf[nBuf-4], "ize", 3);
20609 *pnBuf = nBuf - 4 + 3;
20610 }
20611 }
20612 break;
20613
20614 case 'g':
20615 if( nBuf>4 && 0==memcmp("logi", &aBuf[nBuf-4], 4) ){
20616 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
20617 memcpy(&aBuf[nBuf-4], "log", 3);
20618 *pnBuf = nBuf - 4 + 3;
20619 }
20620 }
20621 break;
20622
20623 case 'l':
20624 if( nBuf>3 && 0==memcmp("bli", &aBuf[nBuf-3], 3) ){
20625 if( fts5Porter_MGt0(aBuf, nBuf-3) ){
20626 memcpy(&aBuf[nBuf-3], "ble", 3);
20627 *pnBuf = nBuf - 3 + 3;
20628 }
20629 }else if( nBuf>4 && 0==memcmp("alli", &aBuf[nBuf-4], 4) ){
20630 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
20631 memcpy(&aBuf[nBuf-4], "al", 2);
20632 *pnBuf = nBuf - 4 + 2;
20633 }
20634 }else if( nBuf>5 && 0==memcmp("entli", &aBuf[nBuf-5], 5) ){
20635 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20636 memcpy(&aBuf[nBuf-5], "ent", 3);
20637 *pnBuf = nBuf - 5 + 3;
20638 }
20639 }else if( nBuf>3 && 0==memcmp("eli", &aBuf[nBuf-3], 3) ){
20640 if( fts5Porter_MGt0(aBuf, nBuf-3) ){
20641 memcpy(&aBuf[nBuf-3], "e", 1);
20642 *pnBuf = nBuf - 3 + 1;
20643 }
20644 }else if( nBuf>5 && 0==memcmp("ousli", &aBuf[nBuf-5], 5) ){
20645 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20646 memcpy(&aBuf[nBuf-5], "ous", 3);
20647 *pnBuf = nBuf - 5 + 3;
20648 }
20649 }
20650 break;
20651
20652 case 'o':
20653 if( nBuf>7 && 0==memcmp("ization", &aBuf[nBuf-7], 7) ){
20654 if( fts5Porter_MGt0(aBuf, nBuf-7) ){
20655 memcpy(&aBuf[nBuf-7], "ize", 3);
20656 *pnBuf = nBuf - 7 + 3;
20657 }
20658 }else if( nBuf>5 && 0==memcmp("ation", &aBuf[nBuf-5], 5) ){
20659 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20660 memcpy(&aBuf[nBuf-5], "ate", 3);
20661 *pnBuf = nBuf - 5 + 3;
20662 }
20663 }else if( nBuf>4 && 0==memcmp("ator", &aBuf[nBuf-4], 4) ){
20664 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
20665 memcpy(&aBuf[nBuf-4], "ate", 3);
20666 *pnBuf = nBuf - 4 + 3;
20667 }
20668 }
20669 break;
20670
20671 case 's':
20672 if( nBuf>5 && 0==memcmp("alism", &aBuf[nBuf-5], 5) ){
20673 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20674 memcpy(&aBuf[nBuf-5], "al", 2);
20675 *pnBuf = nBuf - 5 + 2;
20676 }
20677 }else if( nBuf>7 && 0==memcmp("iveness", &aBuf[nBuf-7], 7) ){
20678 if( fts5Porter_MGt0(aBuf, nBuf-7) ){
20679 memcpy(&aBuf[nBuf-7], "ive", 3);
20680 *pnBuf = nBuf - 7 + 3;
20681 }
20682 }else if( nBuf>7 && 0==memcmp("fulness", &aBuf[nBuf-7], 7) ){
20683 if( fts5Porter_MGt0(aBuf, nBuf-7) ){
20684 memcpy(&aBuf[nBuf-7], "ful", 3);
20685 *pnBuf = nBuf - 7 + 3;
20686 }
20687 }else if( nBuf>7 && 0==memcmp("ousness", &aBuf[nBuf-7], 7) ){
20688 if( fts5Porter_MGt0(aBuf, nBuf-7) ){
20689 memcpy(&aBuf[nBuf-7], "ous", 3);
20690 *pnBuf = nBuf - 7 + 3;
20691 }
20692 }
20693 break;
20694
20695 case 't':
20696 if( nBuf>5 && 0==memcmp("aliti", &aBuf[nBuf-5], 5) ){
20697 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20698 memcpy(&aBuf[nBuf-5], "al", 2);
20699 *pnBuf = nBuf - 5 + 2;
20700 }
20701 }else if( nBuf>5 && 0==memcmp("iviti", &aBuf[nBuf-5], 5) ){
20702 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20703 memcpy(&aBuf[nBuf-5], "ive", 3);
20704 *pnBuf = nBuf - 5 + 3;
20705 }
20706 }else if( nBuf>6 && 0==memcmp("biliti", &aBuf[nBuf-6], 6) ){
20707 if( fts5Porter_MGt0(aBuf, nBuf-6) ){
20708 memcpy(&aBuf[nBuf-6], "ble", 3);
20709 *pnBuf = nBuf - 6 + 3;
20710 }
20711 }
20712 break;
20713
20714 }
20715 return ret;
20716}
20717
20718
20719static int fts5PorterStep3(char *aBuf, int *pnBuf){
20720 int ret = 0;
20721 int nBuf = *pnBuf;
20722 switch( aBuf[nBuf-2] ){
20723
20724 case 'a':
20725 if( nBuf>4 && 0==memcmp("ical", &aBuf[nBuf-4], 4) ){
20726 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
20727 memcpy(&aBuf[nBuf-4], "ic", 2);
20728 *pnBuf = nBuf - 4 + 2;
20729 }
20730 }
20731 break;
20732
20733 case 's':
20734 if( nBuf>4 && 0==memcmp("ness", &aBuf[nBuf-4], 4) ){
20735 if( fts5Porter_MGt0(aBuf, nBuf-4) ){
20736 *pnBuf = nBuf - 4;
20737 }
20738 }
20739 break;
20740
20741 case 't':
20742 if( nBuf>5 && 0==memcmp("icate", &aBuf[nBuf-5], 5) ){
20743 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20744 memcpy(&aBuf[nBuf-5], "ic", 2);
20745 *pnBuf = nBuf - 5 + 2;
20746 }
20747 }else if( nBuf>5 && 0==memcmp("iciti", &aBuf[nBuf-5], 5) ){
20748 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20749 memcpy(&aBuf[nBuf-5], "ic", 2);
20750 *pnBuf = nBuf - 5 + 2;
20751 }
20752 }
20753 break;
20754
20755 case 'u':
20756 if( nBuf>3 && 0==memcmp("ful", &aBuf[nBuf-3], 3) ){
20757 if( fts5Porter_MGt0(aBuf, nBuf-3) ){
20758 *pnBuf = nBuf - 3;
20759 }
20760 }
20761 break;
20762
20763 case 'v':
20764 if( nBuf>5 && 0==memcmp("ative", &aBuf[nBuf-5], 5) ){
20765 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20766 *pnBuf = nBuf - 5;
20767 }
20768 }
20769 break;
20770
20771 case 'z':
20772 if( nBuf>5 && 0==memcmp("alize", &aBuf[nBuf-5], 5) ){
20773 if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20774 memcpy(&aBuf[nBuf-5], "al", 2);
20775 *pnBuf = nBuf - 5 + 2;
20776 }
20777 }
20778 break;
20779
20780 }
20781 return ret;
20782}
20783
20784
20785static int fts5PorterStep1B(char *aBuf, int *pnBuf){
20786 int ret = 0;
20787 int nBuf = *pnBuf;
20788 switch( aBuf[nBuf-2] ){
20789
20790 case 'e':
20791 if( nBuf>3 && 0==memcmp("eed", &aBuf[nBuf-3], 3) ){
20792 if( fts5Porter_MGt0(aBuf, nBuf-3) ){
20793 memcpy(&aBuf[nBuf-3], "ee", 2);
20794 *pnBuf = nBuf - 3 + 2;
20795 }
20796 }else if( nBuf>2 && 0==memcmp("ed", &aBuf[nBuf-2], 2) ){
20797 if( fts5Porter_Vowel(aBuf, nBuf-2) ){
20798 *pnBuf = nBuf - 2;
20799 ret = 1;
20800 }
20801 }
20802 break;
20803
20804 case 'n':
20805 if( nBuf>3 && 0==memcmp("ing", &aBuf[nBuf-3], 3) ){
20806 if( fts5Porter_Vowel(aBuf, nBuf-3) ){
20807 *pnBuf = nBuf - 3;
20808 ret = 1;
20809 }
20810 }
20811 break;
20812
20813 }
20814 return ret;
20815}
20816
20817/*
20818** GENERATED CODE ENDS HERE (mkportersteps.tcl)
20819***************************************************************************
20820**************************************************************************/
20821
20822static void fts5PorterStep1A(char *aBuf, int *pnBuf){
20823 int nBuf = *pnBuf;
20824 if( aBuf[nBuf-1]=='s' ){
20825 if( aBuf[nBuf-2]=='e' ){
20826 if( (nBuf>4 && aBuf[nBuf-4]=='s' && aBuf[nBuf-3]=='s')
20827 || (nBuf>3 && aBuf[nBuf-3]=='i' )
20828 ){
20829 *pnBuf = nBuf-2;
20830 }else{
20831 *pnBuf = nBuf-1;
20832 }
20833 }
20834 else if( aBuf[nBuf-2]!='s' ){
20835 *pnBuf = nBuf-1;
20836 }
20837 }
20838}
20839
20840static int fts5PorterCb(
20841 void *pCtx,
20842 int tflags,
20843 const char *pToken,
20844 int nToken,
20845 int iStart,
20846 int iEnd
20847){
20848 PorterContext *p = (PorterContext*)pCtx;
20849
20850 char *aBuf;
20851 int nBuf;
20852
20853 if( nToken>FTS5_PORTER_MAX_TOKEN || nToken<3 ) goto pass_through;
20854 aBuf = p->aBuf;
20855 nBuf = nToken;
20856 memcpy(aBuf, pToken, nBuf);
20857
20858 /* Step 1. */
20859 fts5PorterStep1A(aBuf, &nBuf);
20860 if( fts5PorterStep1B(aBuf, &nBuf) ){
20861 if( fts5PorterStep1B2(aBuf, &nBuf)==0 ){
20862 char c = aBuf[nBuf-1];
20863 if( fts5PorterIsVowel(c, 0)==0
20864 && c!='l' && c!='s' && c!='z' && c==aBuf[nBuf-2]
20865 ){
20866 nBuf--;
20867 }else if( fts5Porter_MEq1(aBuf, nBuf) && fts5Porter_Ostar(aBuf, nBuf) ){
20868 aBuf[nBuf++] = 'e';
20869 }
20870 }
20871 }
20872
20873 /* Step 1C. */
20874 if( aBuf[nBuf-1]=='y' && fts5Porter_Vowel(aBuf, nBuf-1) ){
20875 aBuf[nBuf-1] = 'i';
20876 }
20877
20878 /* Steps 2 through 4. */
20879 fts5PorterStep2(aBuf, &nBuf);
20880 fts5PorterStep3(aBuf, &nBuf);
20881 fts5PorterStep4(aBuf, &nBuf);
20882
20883 /* Step 5a. */
20884 assert( nBuf>0 );
20885 if( aBuf[nBuf-1]=='e' ){
20886 if( fts5Porter_MGt1(aBuf, nBuf-1)
20887 || (fts5Porter_MEq1(aBuf, nBuf-1) && !fts5Porter_Ostar(aBuf, nBuf-1))
20888 ){
20889 nBuf--;
20890 }
20891 }
20892
20893 /* Step 5b. */
20894 if( nBuf>1 && aBuf[nBuf-1]=='l'
20895 && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1)
20896 ){
20897 nBuf--;
20898 }
20899
20900 return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd);
20901
20902 pass_through:
20903 return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd);
20904}
20905
20906/*
20907** Tokenize using the porter tokenizer.
20908*/
20909static int fts5PorterTokenize(
20910 Fts5Tokenizer *pTokenizer,
20911 void *pCtx,
20912 int flags,
20913 const char *pText, int nText,
20914 int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
20915){
20916 PorterTokenizer *p = (PorterTokenizer*)pTokenizer;
20917 PorterContext sCtx;
20918 sCtx.xToken = xToken;
20919 sCtx.pCtx = pCtx;
20920 sCtx.aBuf = p->aBuf;
20921 return p->tokenizer.xTokenize(
20922 p->pTokenizer, (void*)&sCtx, flags, pText, nText, fts5PorterCb
20923 );
20924}
20925
20926/**************************************************************************
20927** Start of trigram implementation.
20928*/
20929typedef struct TrigramTokenizer TrigramTokenizer;
20930struct TrigramTokenizer {
20931 int bFold; /* True to fold to lower-case */
20932};
20933
20934/*
20935** Free a trigram tokenizer.
20936*/
20937static void fts5TriDelete(Fts5Tokenizer *p){
20938 sqlite3_free(p);
20939}
20940
20941/*
20942** Allocate a trigram tokenizer.
20943*/
20944static int fts5TriCreate(
20945 void *pUnused,
20946 const char **azArg,
20947 int nArg,
20948 Fts5Tokenizer **ppOut
20949){
20950 int rc = SQLITE_OK;
20951 TrigramTokenizer *pNew = (TrigramTokenizer*)sqlite3_malloc(sizeof(*pNew));
20952 UNUSED_PARAM(pUnused);
20953 if( pNew==0 ){
20954 rc = SQLITE_NOMEM;
20955 }else{
20956 int i;
20957 pNew->bFold = 1;
20958 for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
20959 const char *zArg = azArg[i+1];
20960 if( 0==sqlite3_stricmp(azArg[i], "case_sensitive") ){
20961 if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){
20962 rc = SQLITE_ERROR;
20963 }else{
20964 pNew->bFold = (zArg[0]=='0');
20965 }
20966 }else{
20967 rc = SQLITE_ERROR;
20968 }
20969 }
20970 if( rc!=SQLITE_OK ){
20971 fts5TriDelete((Fts5Tokenizer*)pNew);
20972 pNew = 0;
20973 }
20974 }
20975 *ppOut = (Fts5Tokenizer*)pNew;
20976 return rc;
20977}
20978
20979/*
20980** Trigram tokenizer tokenize routine.
20981*/
20982static int fts5TriTokenize(
20983 Fts5Tokenizer *pTok,
20984 void *pCtx,
20985 int unusedFlags,
20986 const char *pText, int nText,
20987 int (*xToken)(void*, int, const char*, int, int, int)
20988){
20989 TrigramTokenizer *p = (TrigramTokenizer*)pTok;
20990 int rc = SQLITE_OK;
20991 char aBuf[32];
20992 const unsigned char *zIn = (const unsigned char*)pText;
20993 const unsigned char *zEof = &zIn[nText];
20994 u32 iCode;
20995
20996 UNUSED_PARAM(unusedFlags);
20997 while( 1 ){
20998 char *zOut = aBuf;
20999 int iStart = zIn - (const unsigned char*)pText;
21000 const unsigned char *zNext;
21001
21002 READ_UTF8(zIn, zEof, iCode);
21003 if( iCode==0 ) break;
21004 zNext = zIn;
21005 if( zIn<zEof ){
21006 if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, 0);
21007 WRITE_UTF8(zOut, iCode);
21008 READ_UTF8(zIn, zEof, iCode);
21009 if( iCode==0 ) break;
21010 }else{
21011 break;
21012 }
21013 if( zIn<zEof ){
21014 if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, 0);
21015 WRITE_UTF8(zOut, iCode);
21016 READ_UTF8(zIn, zEof, iCode);
21017 if( iCode==0 ) break;
21018 if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, 0);
21019 WRITE_UTF8(zOut, iCode);
21020 }else{
21021 break;
21022 }
21023 rc = xToken(pCtx, 0, aBuf, zOut-aBuf, iStart, iStart + zOut-aBuf);
21024 if( rc!=SQLITE_OK ) break;
21025 zIn = zNext;
21026 }
21027
21028 return rc;
21029}
21030
21031/*
21032** Argument xCreate is a pointer to a constructor function for a tokenizer.
21033** pTok is a tokenizer previously created using the same method. This function
21034** returns one of FTS5_PATTERN_NONE, FTS5_PATTERN_LIKE or FTS5_PATTERN_GLOB
21035** indicating the style of pattern matching that the tokenizer can support.
21036** In practice, this is:
21037**
21038** "trigram" tokenizer, case_sensitive=1 - FTS5_PATTERN_GLOB
21039** "trigram" tokenizer, case_sensitive=0 (the default) - FTS5_PATTERN_LIKE
21040** all other tokenizers - FTS5_PATTERN_NONE
21041*/
21042static int sqlite3Fts5TokenizerPattern(
21043 int (*xCreate)(void*, const char**, int, Fts5Tokenizer**),
21044 Fts5Tokenizer *pTok
21045){
21046 if( xCreate==fts5TriCreate ){
21047 TrigramTokenizer *p = (TrigramTokenizer*)pTok;
21048 return p->bFold ? FTS5_PATTERN_LIKE : FTS5_PATTERN_GLOB;
21049 }
21050 return FTS5_PATTERN_NONE;
21051}
21052
21053/*
21054** Register all built-in tokenizers with FTS5.
21055*/
21056static int sqlite3Fts5TokenizerInit(fts5_api *pApi){
21057 struct BuiltinTokenizer {
21058 const char *zName;
21059 fts5_tokenizer x;
21060 } aBuiltin[] = {
21061 { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}},
21062 { "ascii", {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }},
21063 { "porter", {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }},
21064 { "trigram", {fts5TriCreate, fts5TriDelete, fts5TriTokenize}},
21065 };
21066
21067 int rc = SQLITE_OK; /* Return code */
21068 int i; /* To iterate through builtin functions */
21069
21070 for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){
21071 rc = pApi->xCreateTokenizer(pApi,
21072 aBuiltin[i].zName,
21073 (void*)pApi,
21074 &aBuiltin[i].x,
21075 0
21076 );
21077 }
21078
21079 return rc;
21080}
21081
21082#line 1 "fts5_unicode2.c"
21083/*
21084** 2012-05-25
21085**
21086** The author disclaims copyright to this source code. In place of
21087** a legal notice, here is a blessing:
21088**
21089** May you do good and not evil.
21090** May you find forgiveness for yourself and forgive others.
21091** May you share freely, never taking more than you give.
21092**
21093******************************************************************************
21094*/
21095
21096/*
21097** DO NOT EDIT THIS MACHINE GENERATED FILE.
21098*/
21099
21100
21101#include <assert.h>
21102
21103
21104
21105/*
21106** If the argument is a codepoint corresponding to a lowercase letter
21107** in the ASCII range with a diacritic added, return the codepoint
21108** of the ASCII letter only. For example, if passed 235 - "LATIN
21109** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
21110** E"). The resuls of passing a codepoint that corresponds to an
21111** uppercase letter are undefined.
21112*/
21113static int fts5_remove_diacritic(int c, int bComplex){
21114 unsigned short aDia[] = {
21115 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995,
21116 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286,
21117 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732,
21118 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336,
21119 3456, 3696, 3712, 3728, 3744, 3766, 3832, 3896,
21120 3912, 3928, 3944, 3968, 4008, 4040, 4056, 4106,
21121 4138, 4170, 4202, 4234, 4266, 4296, 4312, 4344,
21122 4408, 4424, 4442, 4472, 4488, 4504, 6148, 6198,
21123 6264, 6280, 6360, 6429, 6505, 6529, 61448, 61468,
21124 61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704,
21125 61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914,
21126 61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218,
21127 62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554,
21128 62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766,
21129 62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118,
21130 63182, 63242, 63274, 63310, 63368, 63390,
21131 };
21132#define HIBIT ((unsigned char)0x80)
21133 unsigned char aChar[] = {
21134 '\0', 'a', 'c', 'e', 'i', 'n',
21135 'o', 'u', 'y', 'y', 'a', 'c',
21136 'd', 'e', 'e', 'g', 'h', 'i',
21137 'j', 'k', 'l', 'n', 'o', 'r',
21138 's', 't', 'u', 'u', 'w', 'y',
21139 'z', 'o', 'u', 'a', 'i', 'o',
21140 'u', 'u'|HIBIT, 'a'|HIBIT, 'g', 'k', 'o',
21141 'o'|HIBIT, 'j', 'g', 'n', 'a'|HIBIT, 'a',
21142 'e', 'i', 'o', 'r', 'u', 's',
21143 't', 'h', 'a', 'e', 'o'|HIBIT, 'o',
21144 'o'|HIBIT, 'y', '\0', '\0', '\0', '\0',
21145 '\0', '\0', '\0', '\0', 'a', 'b',
21146 'c'|HIBIT, 'd', 'd', 'e'|HIBIT, 'e', 'e'|HIBIT,
21147 'f', 'g', 'h', 'h', 'i', 'i'|HIBIT,
21148 'k', 'l', 'l'|HIBIT, 'l', 'm', 'n',
21149 'o'|HIBIT, 'p', 'r', 'r'|HIBIT, 'r', 's',
21150 's'|HIBIT, 't', 'u', 'u'|HIBIT, 'v', 'w',
21151 'w', 'x', 'y', 'z', 'h', 't',
21152 'w', 'y', 'a', 'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT,
21153 'e', 'e'|HIBIT, 'e'|HIBIT, 'i', 'o', 'o'|HIBIT,
21154 'o'|HIBIT, 'o'|HIBIT, 'u', 'u'|HIBIT, 'u'|HIBIT, 'y',
21155 };
21156
21157 unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
21158 int iRes = 0;
21159 int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
21160 int iLo = 0;
21161 while( iHi>=iLo ){
21162 int iTest = (iHi + iLo) / 2;
21163 if( key >= aDia[iTest] ){
21164 iRes = iTest;
21165 iLo = iTest+1;
21166 }else{
21167 iHi = iTest-1;
21168 }
21169 }
21170 assert( key>=aDia[iRes] );
21171 if( bComplex==0 && (aChar[iRes] & 0x80) ) return c;
21172 return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F);
21173}
21174
21175
21176/*
21177** Return true if the argument interpreted as a unicode codepoint
21178** is a diacritical modifier character.
21179*/
21180static int sqlite3Fts5UnicodeIsdiacritic(int c){
21181 unsigned int mask0 = 0x08029FDF;
21182 unsigned int mask1 = 0x000361F8;
21183 if( c<768 || c>817 ) return 0;
21184 return (c < 768+32) ?
21185 (mask0 & ((unsigned int)1 << (c-768))) :
21186 (mask1 & ((unsigned int)1 << (c-768-32)));
21187}
21188
21189
21190/*
21191** Interpret the argument as a unicode codepoint. If the codepoint
21192** is an upper case character that has a lower case equivalent,
21193** return the codepoint corresponding to the lower case version.
21194** Otherwise, return a copy of the argument.
21195**
21196** The results are undefined if the value passed to this function
21197** is less than zero.
21198*/
21199static int sqlite3Fts5UnicodeFold(int c, int eRemoveDiacritic){
21200 /* Each entry in the following array defines a rule for folding a range
21201 ** of codepoints to lower case. The rule applies to a range of nRange
21202 ** codepoints starting at codepoint iCode.
21203 **
21204 ** If the least significant bit in flags is clear, then the rule applies
21205 ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
21206 ** need to be folded). Or, if it is set, then the rule only applies to
21207 ** every second codepoint in the range, starting with codepoint C.
21208 **
21209 ** The 7 most significant bits in flags are an index into the aiOff[]
21210 ** array. If a specific codepoint C does require folding, then its lower
21211 ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
21212 **
21213 ** The contents of this array are generated by parsing the CaseFolding.txt
21214 ** file distributed as part of the "Unicode Character Database". See
21215 ** http://www.unicode.org for details.
21216 */
21217 static const struct TableEntry {
21218 unsigned short iCode;
21219 unsigned char flags;
21220 unsigned char nRange;
21221 } aEntry[] = {
21222 {65, 14, 26}, {181, 64, 1}, {192, 14, 23},
21223 {216, 14, 7}, {256, 1, 48}, {306, 1, 6},
21224 {313, 1, 16}, {330, 1, 46}, {376, 116, 1},
21225 {377, 1, 6}, {383, 104, 1}, {385, 50, 1},
21226 {386, 1, 4}, {390, 44, 1}, {391, 0, 1},
21227 {393, 42, 2}, {395, 0, 1}, {398, 32, 1},
21228 {399, 38, 1}, {400, 40, 1}, {401, 0, 1},
21229 {403, 42, 1}, {404, 46, 1}, {406, 52, 1},
21230 {407, 48, 1}, {408, 0, 1}, {412, 52, 1},
21231 {413, 54, 1}, {415, 56, 1}, {416, 1, 6},
21232 {422, 60, 1}, {423, 0, 1}, {425, 60, 1},
21233 {428, 0, 1}, {430, 60, 1}, {431, 0, 1},
21234 {433, 58, 2}, {435, 1, 4}, {439, 62, 1},
21235 {440, 0, 1}, {444, 0, 1}, {452, 2, 1},
21236 {453, 0, 1}, {455, 2, 1}, {456, 0, 1},
21237 {458, 2, 1}, {459, 1, 18}, {478, 1, 18},
21238 {497, 2, 1}, {498, 1, 4}, {502, 122, 1},
21239 {503, 134, 1}, {504, 1, 40}, {544, 110, 1},
21240 {546, 1, 18}, {570, 70, 1}, {571, 0, 1},
21241 {573, 108, 1}, {574, 68, 1}, {577, 0, 1},
21242 {579, 106, 1}, {580, 28, 1}, {581, 30, 1},
21243 {582, 1, 10}, {837, 36, 1}, {880, 1, 4},
21244 {886, 0, 1}, {902, 18, 1}, {904, 16, 3},
21245 {908, 26, 1}, {910, 24, 2}, {913, 14, 17},
21246 {931, 14, 9}, {962, 0, 1}, {975, 4, 1},
21247 {976, 140, 1}, {977, 142, 1}, {981, 146, 1},
21248 {982, 144, 1}, {984, 1, 24}, {1008, 136, 1},
21249 {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1},
21250 {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1},
21251 {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32},
21252 {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1},
21253 {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38},
21254 {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1},
21255 {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1},
21256 {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6},
21257 {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6},
21258 {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8},
21259 {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2},
21260 {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1},
21261 {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2},
21262 {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2},
21263 {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2},
21264 {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1},
21265 {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16},
21266 {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47},
21267 {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1},
21268 {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1},
21269 {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1},
21270 {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2},
21271 {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1},
21272 {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14},
21273 {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1},
21274 {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1},
21275 {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1},
21276 {65313, 14, 26},
21277 };
21278 static const unsigned short aiOff[] = {
21279 1, 2, 8, 15, 16, 26, 28, 32,
21280 37, 38, 40, 48, 63, 64, 69, 71,
21281 79, 80, 116, 202, 203, 205, 206, 207,
21282 209, 210, 211, 213, 214, 217, 218, 219,
21283 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721,
21284 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274,
21285 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406,
21286 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462,
21287 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511,
21288 65514, 65521, 65527, 65528, 65529,
21289 };
21290
21291 int ret = c;
21292
21293 assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
21294
21295 if( c<128 ){
21296 if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
21297 }else if( c<65536 ){
21298 const struct TableEntry *p;
21299 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
21300 int iLo = 0;
21301 int iRes = -1;
21302
21303 assert( c>aEntry[0].iCode );
21304 while( iHi>=iLo ){
21305 int iTest = (iHi + iLo) / 2;
21306 int cmp = (c - aEntry[iTest].iCode);
21307 if( cmp>=0 ){
21308 iRes = iTest;
21309 iLo = iTest+1;
21310 }else{
21311 iHi = iTest-1;
21312 }
21313 }
21314
21315 assert( iRes>=0 && c>=aEntry[iRes].iCode );
21316 p = &aEntry[iRes];
21317 if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
21318 ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
21319 assert( ret>0 );
21320 }
21321
21322 if( eRemoveDiacritic ){
21323 ret = fts5_remove_diacritic(ret, eRemoveDiacritic==2);
21324 }
21325 }
21326
21327 else if( c>=66560 && c<66600 ){
21328 ret = c + 40;
21329 }
21330
21331 return ret;
21332}
21333
21334
21335static int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){
21336 aArray[0] = 1;
21337 switch( zCat[0] ){
21338 case 'C':
21339 switch( zCat[1] ){
21340 case 'c': aArray[1] = 1; break;
21341 case 'f': aArray[2] = 1; break;
21342 case 'n': aArray[3] = 1; break;
21343 case 's': aArray[4] = 1; break;
21344 case 'o': aArray[31] = 1; break;
21345 case '*':
21346 aArray[1] = 1;
21347 aArray[2] = 1;
21348 aArray[3] = 1;
21349 aArray[4] = 1;
21350 aArray[31] = 1;
21351 break;
21352 default: return 1; }
21353 break;
21354
21355 case 'L':
21356 switch( zCat[1] ){
21357 case 'l': aArray[5] = 1; break;
21358 case 'm': aArray[6] = 1; break;
21359 case 'o': aArray[7] = 1; break;
21360 case 't': aArray[8] = 1; break;
21361 case 'u': aArray[9] = 1; break;
21362 case 'C': aArray[30] = 1; break;
21363 case '*':
21364 aArray[5] = 1;
21365 aArray[6] = 1;
21366 aArray[7] = 1;
21367 aArray[8] = 1;
21368 aArray[9] = 1;
21369 aArray[30] = 1;
21370 break;
21371 default: return 1; }
21372 break;
21373
21374 case 'M':
21375 switch( zCat[1] ){
21376 case 'c': aArray[10] = 1; break;
21377 case 'e': aArray[11] = 1; break;
21378 case 'n': aArray[12] = 1; break;
21379 case '*':
21380 aArray[10] = 1;
21381 aArray[11] = 1;
21382 aArray[12] = 1;
21383 break;
21384 default: return 1; }
21385 break;
21386
21387 case 'N':
21388 switch( zCat[1] ){
21389 case 'd': aArray[13] = 1; break;
21390 case 'l': aArray[14] = 1; break;
21391 case 'o': aArray[15] = 1; break;
21392 case '*':
21393 aArray[13] = 1;
21394 aArray[14] = 1;
21395 aArray[15] = 1;
21396 break;
21397 default: return 1; }
21398 break;
21399
21400 case 'P':
21401 switch( zCat[1] ){
21402 case 'c': aArray[16] = 1; break;
21403 case 'd': aArray[17] = 1; break;
21404 case 'e': aArray[18] = 1; break;
21405 case 'f': aArray[19] = 1; break;
21406 case 'i': aArray[20] = 1; break;
21407 case 'o': aArray[21] = 1; break;
21408 case 's': aArray[22] = 1; break;
21409 case '*':
21410 aArray[16] = 1;
21411 aArray[17] = 1;
21412 aArray[18] = 1;
21413 aArray[19] = 1;
21414 aArray[20] = 1;
21415 aArray[21] = 1;
21416 aArray[22] = 1;
21417 break;
21418 default: return 1; }
21419 break;
21420
21421 case 'S':
21422 switch( zCat[1] ){
21423 case 'c': aArray[23] = 1; break;
21424 case 'k': aArray[24] = 1; break;
21425 case 'm': aArray[25] = 1; break;
21426 case 'o': aArray[26] = 1; break;
21427 case '*':
21428 aArray[23] = 1;
21429 aArray[24] = 1;
21430 aArray[25] = 1;
21431 aArray[26] = 1;
21432 break;
21433 default: return 1; }
21434 break;
21435
21436 case 'Z':
21437 switch( zCat[1] ){
21438 case 'l': aArray[27] = 1; break;
21439 case 'p': aArray[28] = 1; break;
21440 case 's': aArray[29] = 1; break;
21441 case '*':
21442 aArray[27] = 1;
21443 aArray[28] = 1;
21444 aArray[29] = 1;
21445 break;
21446 default: return 1; }
21447 break;
21448
21449 }
21450 return 0;
21451}
21452
21453static u16 aFts5UnicodeBlock[] = {
21454 0, 1471, 1753, 1760, 1760, 1760, 1760, 1760, 1760, 1760,
21455 1760, 1760, 1760, 1760, 1760, 1763, 1765,
21456 };
21457static u16 aFts5UnicodeMap[] = {
21458 0, 32, 33, 36, 37, 40, 41, 42, 43, 44,
21459 45, 46, 48, 58, 60, 63, 65, 91, 92, 93,
21460 94, 95, 96, 97, 123, 124, 125, 126, 127, 160,
21461 161, 162, 166, 167, 168, 169, 170, 171, 172, 173,
21462 174, 175, 176, 177, 178, 180, 181, 182, 184, 185,
21463 186, 187, 188, 191, 192, 215, 216, 223, 247, 248,
21464 256, 312, 313, 329, 330, 377, 383, 385, 387, 388,
21465 391, 394, 396, 398, 402, 403, 405, 406, 409, 412,
21466 414, 415, 417, 418, 423, 427, 428, 431, 434, 436,
21467 437, 440, 442, 443, 444, 446, 448, 452, 453, 454,
21468 455, 456, 457, 458, 459, 460, 461, 477, 478, 496,
21469 497, 498, 499, 500, 503, 505, 506, 564, 570, 572,
21470 573, 575, 577, 580, 583, 584, 592, 660, 661, 688,
21471 706, 710, 722, 736, 741, 748, 749, 750, 751, 768,
21472 880, 884, 885, 886, 890, 891, 894, 900, 902, 903,
21473 904, 908, 910, 912, 913, 931, 940, 975, 977, 978,
21474 981, 984, 1008, 1012, 1014, 1015, 1018, 1020, 1021, 1072,
21475 1120, 1154, 1155, 1160, 1162, 1217, 1231, 1232, 1329, 1369,
21476 1370, 1377, 1417, 1418, 1423, 1425, 1470, 1471, 1472, 1473,
21477 1475, 1476, 1478, 1479, 1488, 1520, 1523, 1536, 1542, 1545,
21478 1547, 1548, 1550, 1552, 1563, 1566, 1568, 1600, 1601, 1611,
21479 1632, 1642, 1646, 1648, 1649, 1748, 1749, 1750, 1757, 1758,
21480 1759, 1765, 1767, 1769, 1770, 1774, 1776, 1786, 1789, 1791,
21481 1792, 1807, 1808, 1809, 1810, 1840, 1869, 1958, 1969, 1984,
21482 1994, 2027, 2036, 2038, 2039, 2042, 2048, 2070, 2074, 2075,
21483 2084, 2085, 2088, 2089, 2096, 2112, 2137, 2142, 2208, 2210,
21484 2276, 2304, 2307, 2308, 2362, 2363, 2364, 2365, 2366, 2369,
21485 2377, 2381, 2382, 2384, 2385, 2392, 2402, 2404, 2406, 2416,
21486 2417, 2418, 2425, 2433, 2434, 2437, 2447, 2451, 2474, 2482,
21487 2486, 2492, 2493, 2494, 2497, 2503, 2507, 2509, 2510, 2519,
21488 2524, 2527, 2530, 2534, 2544, 2546, 2548, 2554, 2555, 2561,
21489 2563, 2565, 2575, 2579, 2602, 2610, 2613, 2616, 2620, 2622,
21490 2625, 2631, 2635, 2641, 2649, 2654, 2662, 2672, 2674, 2677,
21491 2689, 2691, 2693, 2703, 2707, 2730, 2738, 2741, 2748, 2749,
21492 2750, 2753, 2759, 2761, 2763, 2765, 2768, 2784, 2786, 2790,
21493 2800, 2801, 2817, 2818, 2821, 2831, 2835, 2858, 2866, 2869,
21494 2876, 2877, 2878, 2879, 2880, 2881, 2887, 2891, 2893, 2902,
21495 2903, 2908, 2911, 2914, 2918, 2928, 2929, 2930, 2946, 2947,
21496 2949, 2958, 2962, 2969, 2972, 2974, 2979, 2984, 2990, 3006,
21497 3008, 3009, 3014, 3018, 3021, 3024, 3031, 3046, 3056, 3059,
21498 3065, 3066, 3073, 3077, 3086, 3090, 3114, 3125, 3133, 3134,
21499 3137, 3142, 3146, 3157, 3160, 3168, 3170, 3174, 3192, 3199,
21500 3202, 3205, 3214, 3218, 3242, 3253, 3260, 3261, 3262, 3263,
21501 3264, 3270, 3271, 3274, 3276, 3285, 3294, 3296, 3298, 3302,
21502 3313, 3330, 3333, 3342, 3346, 3389, 3390, 3393, 3398, 3402,
21503 3405, 3406, 3415, 3424, 3426, 3430, 3440, 3449, 3450, 3458,
21504 3461, 3482, 3507, 3517, 3520, 3530, 3535, 3538, 3542, 3544,
21505 3570, 3572, 3585, 3633, 3634, 3636, 3647, 3648, 3654, 3655,
21506 3663, 3664, 3674, 3713, 3716, 3719, 3722, 3725, 3732, 3737,
21507 3745, 3749, 3751, 3754, 3757, 3761, 3762, 3764, 3771, 3773,
21508 3776, 3782, 3784, 3792, 3804, 3840, 3841, 3844, 3859, 3860,
21509 3861, 3864, 3866, 3872, 3882, 3892, 3893, 3894, 3895, 3896,
21510 3897, 3898, 3899, 3900, 3901, 3902, 3904, 3913, 3953, 3967,
21511 3968, 3973, 3974, 3976, 3981, 3993, 4030, 4038, 4039, 4046,
21512 4048, 4053, 4057, 4096, 4139, 4141, 4145, 4146, 4152, 4153,
21513 4155, 4157, 4159, 4160, 4170, 4176, 4182, 4184, 4186, 4190,
21514 4193, 4194, 4197, 4199, 4206, 4209, 4213, 4226, 4227, 4229,
21515 4231, 4237, 4238, 4239, 4240, 4250, 4253, 4254, 4256, 4295,
21516 4301, 4304, 4347, 4348, 4349, 4682, 4688, 4696, 4698, 4704,
21517 4746, 4752, 4786, 4792, 4800, 4802, 4808, 4824, 4882, 4888,
21518 4957, 4960, 4969, 4992, 5008, 5024, 5120, 5121, 5741, 5743,
21519 5760, 5761, 5787, 5788, 5792, 5867, 5870, 5888, 5902, 5906,
21520 5920, 5938, 5941, 5952, 5970, 5984, 5998, 6002, 6016, 6068,
21521 6070, 6071, 6078, 6086, 6087, 6089, 6100, 6103, 6104, 6107,
21522 6108, 6109, 6112, 6128, 6144, 6150, 6151, 6155, 6158, 6160,
21523 6176, 6211, 6212, 6272, 6313, 6314, 6320, 6400, 6432, 6435,
21524 6439, 6441, 6448, 6450, 6451, 6457, 6464, 6468, 6470, 6480,
21525 6512, 6528, 6576, 6593, 6600, 6608, 6618, 6622, 6656, 6679,
21526 6681, 6686, 6688, 6741, 6742, 6743, 6744, 6752, 6753, 6754,
21527 6755, 6757, 6765, 6771, 6783, 6784, 6800, 6816, 6823, 6824,
21528 6912, 6916, 6917, 6964, 6965, 6966, 6971, 6972, 6973, 6978,
21529 6979, 6981, 6992, 7002, 7009, 7019, 7028, 7040, 7042, 7043,
21530 7073, 7074, 7078, 7080, 7082, 7083, 7084, 7086, 7088, 7098,
21531 7142, 7143, 7144, 7146, 7149, 7150, 7151, 7154, 7164, 7168,
21532 7204, 7212, 7220, 7222, 7227, 7232, 7245, 7248, 7258, 7288,
21533 7294, 7360, 7376, 7379, 7380, 7393, 7394, 7401, 7405, 7406,
21534 7410, 7412, 7413, 7424, 7468, 7531, 7544, 7545, 7579, 7616,
21535 7676, 7680, 7830, 7838, 7936, 7944, 7952, 7960, 7968, 7976,
21536 7984, 7992, 8000, 8008, 8016, 8025, 8027, 8029, 8031, 8033,
21537 8040, 8048, 8064, 8072, 8080, 8088, 8096, 8104, 8112, 8118,
21538 8120, 8124, 8125, 8126, 8127, 8130, 8134, 8136, 8140, 8141,
21539 8144, 8150, 8152, 8157, 8160, 8168, 8173, 8178, 8182, 8184,
21540 8188, 8189, 8192, 8203, 8208, 8214, 8216, 8217, 8218, 8219,
21541 8221, 8222, 8223, 8224, 8232, 8233, 8234, 8239, 8240, 8249,
21542 8250, 8251, 8255, 8257, 8260, 8261, 8262, 8263, 8274, 8275,
21543 8276, 8277, 8287, 8288, 8298, 8304, 8305, 8308, 8314, 8317,
21544 8318, 8319, 8320, 8330, 8333, 8334, 8336, 8352, 8400, 8413,
21545 8417, 8418, 8421, 8448, 8450, 8451, 8455, 8456, 8458, 8459,
21546 8462, 8464, 8467, 8468, 8469, 8470, 8472, 8473, 8478, 8484,
21547 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8495, 8496, 8500,
21548 8501, 8505, 8506, 8508, 8510, 8512, 8517, 8519, 8522, 8523,
21549 8524, 8526, 8527, 8528, 8544, 8579, 8581, 8585, 8592, 8597,
21550 8602, 8604, 8608, 8609, 8611, 8612, 8614, 8615, 8622, 8623,
21551 8654, 8656, 8658, 8659, 8660, 8661, 8692, 8960, 8968, 8972,
21552 8992, 8994, 9001, 9002, 9003, 9084, 9085, 9115, 9140, 9180,
21553 9186, 9216, 9280, 9312, 9372, 9450, 9472, 9655, 9656, 9665,
21554 9666, 9720, 9728, 9839, 9840, 9985, 10088, 10089, 10090, 10091,
21555 10092, 10093, 10094, 10095, 10096, 10097, 10098, 10099, 10100, 10101,
21556 10102, 10132, 10176, 10181, 10182, 10183, 10214, 10215, 10216, 10217,
21557 10218, 10219, 10220, 10221, 10222, 10223, 10224, 10240, 10496, 10627,
21558 10628, 10629, 10630, 10631, 10632, 10633, 10634, 10635, 10636, 10637,
21559 10638, 10639, 10640, 10641, 10642, 10643, 10644, 10645, 10646, 10647,
21560 10648, 10649, 10712, 10713, 10714, 10715, 10716, 10748, 10749, 10750,
21561 11008, 11056, 11077, 11079, 11088, 11264, 11312, 11360, 11363, 11365,
21562 11367, 11374, 11377, 11378, 11380, 11381, 11383, 11388, 11390, 11393,
21563 11394, 11492, 11493, 11499, 11503, 11506, 11513, 11517, 11518, 11520,
21564 11559, 11565, 11568, 11631, 11632, 11647, 11648, 11680, 11688, 11696,
21565 11704, 11712, 11720, 11728, 11736, 11744, 11776, 11778, 11779, 11780,
21566 11781, 11782, 11785, 11786, 11787, 11788, 11789, 11790, 11799, 11800,
21567 11802, 11803, 11804, 11805, 11806, 11808, 11809, 11810, 11811, 11812,
21568 11813, 11814, 11815, 11816, 11817, 11818, 11823, 11824, 11834, 11904,
21569 11931, 12032, 12272, 12288, 12289, 12292, 12293, 12294, 12295, 12296,
21570 12297, 12298, 12299, 12300, 12301, 12302, 12303, 12304, 12305, 12306,
21571 12308, 12309, 12310, 12311, 12312, 12313, 12314, 12315, 12316, 12317,
21572 12318, 12320, 12321, 12330, 12334, 12336, 12337, 12342, 12344, 12347,
21573 12348, 12349, 12350, 12353, 12441, 12443, 12445, 12447, 12448, 12449,
21574 12539, 12540, 12543, 12549, 12593, 12688, 12690, 12694, 12704, 12736,
21575 12784, 12800, 12832, 12842, 12872, 12880, 12881, 12896, 12928, 12938,
21576 12977, 12992, 13056, 13312, 19893, 19904, 19968, 40908, 40960, 40981,
21577 40982, 42128, 42192, 42232, 42238, 42240, 42508, 42509, 42512, 42528,
21578 42538, 42560, 42606, 42607, 42608, 42611, 42612, 42622, 42623, 42624,
21579 42655, 42656, 42726, 42736, 42738, 42752, 42775, 42784, 42786, 42800,
21580 42802, 42864, 42865, 42873, 42878, 42888, 42889, 42891, 42896, 42912,
21581 43000, 43002, 43003, 43010, 43011, 43014, 43015, 43019, 43020, 43043,
21582 43045, 43047, 43048, 43056, 43062, 43064, 43065, 43072, 43124, 43136,
21583 43138, 43188, 43204, 43214, 43216, 43232, 43250, 43256, 43259, 43264,
21584 43274, 43302, 43310, 43312, 43335, 43346, 43359, 43360, 43392, 43395,
21585 43396, 43443, 43444, 43446, 43450, 43452, 43453, 43457, 43471, 43472,
21586 43486, 43520, 43561, 43567, 43569, 43571, 43573, 43584, 43587, 43588,
21587 43596, 43597, 43600, 43612, 43616, 43632, 43633, 43639, 43642, 43643,
21588 43648, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713,
21589 43714, 43739, 43741, 43742, 43744, 43755, 43756, 43758, 43760, 43762,
21590 43763, 43765, 43766, 43777, 43785, 43793, 43808, 43816, 43968, 44003,
21591 44005, 44006, 44008, 44009, 44011, 44012, 44013, 44016, 44032, 55203,
21592 55216, 55243, 55296, 56191, 56319, 57343, 57344, 63743, 63744, 64112,
21593 64256, 64275, 64285, 64286, 64287, 64297, 64298, 64312, 64318, 64320,
21594 64323, 64326, 64434, 64467, 64830, 64831, 64848, 64914, 65008, 65020,
21595 65021, 65024, 65040, 65047, 65048, 65049, 65056, 65072, 65073, 65075,
21596 65077, 65078, 65079, 65080, 65081, 65082, 65083, 65084, 65085, 65086,
21597 65087, 65088, 65089, 65090, 65091, 65092, 65093, 65095, 65096, 65097,
21598 65101, 65104, 65108, 65112, 65113, 65114, 65115, 65116, 65117, 65118,
21599 65119, 65122, 65123, 65124, 65128, 65129, 65130, 65136, 65142, 65279,
21600 65281, 65284, 65285, 65288, 65289, 65290, 65291, 65292, 65293, 65294,
21601 65296, 65306, 65308, 65311, 65313, 65339, 65340, 65341, 65342, 65343,
21602 65344, 65345, 65371, 65372, 65373, 65374, 65375, 65376, 65377, 65378,
21603 65379, 65380, 65382, 65392, 65393, 65438, 65440, 65474, 65482, 65490,
21604 65498, 65504, 65506, 65507, 65508, 65509, 65512, 65513, 65517, 65529,
21605 65532, 0, 13, 40, 60, 63, 80, 128, 256, 263,
21606 311, 320, 373, 377, 394, 400, 464, 509, 640, 672,
21607 768, 800, 816, 833, 834, 842, 896, 927, 928, 968,
21608 976, 977, 1024, 1064, 1104, 1184, 2048, 2056, 2058, 2103,
21609 2108, 2111, 2135, 2136, 2304, 2326, 2335, 2336, 2367, 2432,
21610 2494, 2560, 2561, 2565, 2572, 2576, 2581, 2585, 2616, 2623,
21611 2624, 2640, 2656, 2685, 2687, 2816, 2873, 2880, 2904, 2912,
21612 2936, 3072, 3680, 4096, 4097, 4098, 4099, 4152, 4167, 4178,
21613 4198, 4224, 4226, 4227, 4272, 4275, 4279, 4281, 4283, 4285,
21614 4286, 4304, 4336, 4352, 4355, 4391, 4396, 4397, 4406, 4416,
21615 4480, 4482, 4483, 4531, 4534, 4543, 4545, 4549, 4560, 5760,
21616 5803, 5804, 5805, 5806, 5808, 5814, 5815, 5824, 8192, 9216,
21617 9328, 12288, 26624, 28416, 28496, 28497, 28559, 28563, 45056, 53248,
21618 53504, 53545, 53605, 53607, 53610, 53613, 53619, 53627, 53635, 53637,
21619 53644, 53674, 53678, 53760, 53826, 53829, 54016, 54112, 54272, 54298,
21620 54324, 54350, 54358, 54376, 54402, 54428, 54430, 54434, 54437, 54441,
21621 54446, 54454, 54459, 54461, 54469, 54480, 54506, 54532, 54535, 54541,
21622 54550, 54558, 54584, 54587, 54592, 54598, 54602, 54610, 54636, 54662,
21623 54688, 54714, 54740, 54766, 54792, 54818, 54844, 54870, 54896, 54922,
21624 54952, 54977, 54978, 55003, 55004, 55010, 55035, 55036, 55061, 55062,
21625 55068, 55093, 55094, 55119, 55120, 55126, 55151, 55152, 55177, 55178,
21626 55184, 55209, 55210, 55235, 55236, 55242, 55246, 60928, 60933, 60961,
21627 60964, 60967, 60969, 60980, 60985, 60987, 60994, 60999, 61001, 61003,
21628 61005, 61009, 61012, 61015, 61017, 61019, 61021, 61023, 61025, 61028,
21629 61031, 61036, 61044, 61049, 61054, 61056, 61067, 61089, 61093, 61099,
21630 61168, 61440, 61488, 61600, 61617, 61633, 61649, 61696, 61712, 61744,
21631 61808, 61926, 61968, 62016, 62032, 62208, 62256, 62263, 62336, 62368,
21632 62406, 62432, 62464, 62528, 62530, 62713, 62720, 62784, 62800, 62971,
21633 63045, 63104, 63232, 0, 42710, 42752, 46900, 46912, 47133, 63488,
21634 1, 32, 256, 0, 65533,
21635 };
21636static u16 aFts5UnicodeData[] = {
21637 1025, 61, 117, 55, 117, 54, 50, 53, 57, 53,
21638 49, 85, 333, 85, 121, 85, 841, 54, 53, 50,
21639 56, 48, 56, 837, 54, 57, 50, 57, 1057, 61,
21640 53, 151, 58, 53, 56, 58, 39, 52, 57, 34,
21641 58, 56, 58, 57, 79, 56, 37, 85, 56, 47,
21642 39, 51, 111, 53, 745, 57, 233, 773, 57, 261,
21643 1822, 37, 542, 37, 1534, 222, 69, 73, 37, 126,
21644 126, 73, 69, 137, 37, 73, 37, 105, 101, 73,
21645 37, 73, 37, 190, 158, 37, 126, 126, 73, 37,
21646 126, 94, 37, 39, 94, 69, 135, 41, 40, 37,
21647 41, 40, 37, 41, 40, 37, 542, 37, 606, 37,
21648 41, 40, 37, 126, 73, 37, 1886, 197, 73, 37,
21649 73, 69, 126, 105, 37, 286, 2181, 39, 869, 582,
21650 152, 390, 472, 166, 248, 38, 56, 38, 568, 3596,
21651 158, 38, 56, 94, 38, 101, 53, 88, 41, 53,
21652 105, 41, 73, 37, 553, 297, 1125, 94, 37, 105,
21653 101, 798, 133, 94, 57, 126, 94, 37, 1641, 1541,
21654 1118, 58, 172, 75, 1790, 478, 37, 2846, 1225, 38,
21655 213, 1253, 53, 49, 55, 1452, 49, 44, 53, 76,
21656 53, 76, 53, 44, 871, 103, 85, 162, 121, 85,
21657 55, 85, 90, 364, 53, 85, 1031, 38, 327, 684,
21658 333, 149, 71, 44, 3175, 53, 39, 236, 34, 58,
21659 204, 70, 76, 58, 140, 71, 333, 103, 90, 39,
21660 469, 34, 39, 44, 967, 876, 2855, 364, 39, 333,
21661 1063, 300, 70, 58, 117, 38, 711, 140, 38, 300,
21662 38, 108, 38, 172, 501, 807, 108, 53, 39, 359,
21663 876, 108, 42, 1735, 44, 42, 44, 39, 106, 268,
21664 138, 44, 74, 39, 236, 327, 76, 85, 333, 53,
21665 38, 199, 231, 44, 74, 263, 71, 711, 231, 39,
21666 135, 44, 39, 106, 140, 74, 74, 44, 39, 42,
21667 71, 103, 76, 333, 71, 87, 207, 58, 55, 76,
21668 42, 199, 71, 711, 231, 71, 71, 71, 44, 106,
21669 76, 76, 108, 44, 135, 39, 333, 76, 103, 44,
21670 76, 42, 295, 103, 711, 231, 71, 167, 44, 39,
21671 106, 172, 76, 42, 74, 44, 39, 71, 76, 333,
21672 53, 55, 44, 74, 263, 71, 711, 231, 71, 167,
21673 44, 39, 42, 44, 42, 140, 74, 74, 44, 44,
21674 42, 71, 103, 76, 333, 58, 39, 207, 44, 39,
21675 199, 103, 135, 71, 39, 71, 71, 103, 391, 74,
21676 44, 74, 106, 106, 44, 39, 42, 333, 111, 218,
21677 55, 58, 106, 263, 103, 743, 327, 167, 39, 108,
21678 138, 108, 140, 76, 71, 71, 76, 333, 239, 58,
21679 74, 263, 103, 743, 327, 167, 44, 39, 42, 44,
21680 170, 44, 74, 74, 76, 74, 39, 71, 76, 333,
21681 71, 74, 263, 103, 1319, 39, 106, 140, 106, 106,
21682 44, 39, 42, 71, 76, 333, 207, 58, 199, 74,
21683 583, 775, 295, 39, 231, 44, 106, 108, 44, 266,
21684 74, 53, 1543, 44, 71, 236, 55, 199, 38, 268,
21685 53, 333, 85, 71, 39, 71, 39, 39, 135, 231,
21686 103, 39, 39, 71, 135, 44, 71, 204, 76, 39,
21687 167, 38, 204, 333, 135, 39, 122, 501, 58, 53,
21688 122, 76, 218, 333, 335, 58, 44, 58, 44, 58,
21689 44, 54, 50, 54, 50, 74, 263, 1159, 460, 42,
21690 172, 53, 76, 167, 364, 1164, 282, 44, 218, 90,
21691 181, 154, 85, 1383, 74, 140, 42, 204, 42, 76,
21692 74, 76, 39, 333, 213, 199, 74, 76, 135, 108,
21693 39, 106, 71, 234, 103, 140, 423, 44, 74, 76,
21694 202, 44, 39, 42, 333, 106, 44, 90, 1225, 41,
21695 41, 1383, 53, 38, 10631, 135, 231, 39, 135, 1319,
21696 135, 1063, 135, 231, 39, 135, 487, 1831, 135, 2151,
21697 108, 309, 655, 519, 346, 2727, 49, 19847, 85, 551,
21698 61, 839, 54, 50, 2407, 117, 110, 423, 135, 108,
21699 583, 108, 85, 583, 76, 423, 103, 76, 1671, 76,
21700 42, 236, 266, 44, 74, 364, 117, 38, 117, 55,
21701 39, 44, 333, 335, 213, 49, 149, 108, 61, 333,
21702 1127, 38, 1671, 1319, 44, 39, 2247, 935, 108, 138,
21703 76, 106, 74, 44, 202, 108, 58, 85, 333, 967,
21704 167, 1415, 554, 231, 74, 333, 47, 1114, 743, 76,
21705 106, 85, 1703, 42, 44, 42, 236, 44, 42, 44,
21706 74, 268, 202, 332, 44, 333, 333, 245, 38, 213,
21707 140, 42, 1511, 44, 42, 172, 42, 44, 170, 44,
21708 74, 231, 333, 245, 346, 300, 314, 76, 42, 967,
21709 42, 140, 74, 76, 42, 44, 74, 71, 333, 1415,
21710 44, 42, 76, 106, 44, 42, 108, 74, 149, 1159,
21711 266, 268, 74, 76, 181, 333, 103, 333, 967, 198,
21712 85, 277, 108, 53, 428, 42, 236, 135, 44, 135,
21713 74, 44, 71, 1413, 2022, 421, 38, 1093, 1190, 1260,
21714 140, 4830, 261, 3166, 261, 265, 197, 201, 261, 265,
21715 261, 265, 197, 201, 261, 41, 41, 41, 94, 229,
21716 265, 453, 261, 264, 261, 264, 261, 264, 165, 69,
21717 137, 40, 56, 37, 120, 101, 69, 137, 40, 120,
21718 133, 69, 137, 120, 261, 169, 120, 101, 69, 137,
21719 40, 88, 381, 162, 209, 85, 52, 51, 54, 84,
21720 51, 54, 52, 277, 59, 60, 162, 61, 309, 52,
21721 51, 149, 80, 117, 57, 54, 50, 373, 57, 53,
21722 48, 341, 61, 162, 194, 47, 38, 207, 121, 54,
21723 50, 38, 335, 121, 54, 50, 422, 855, 428, 139,
21724 44, 107, 396, 90, 41, 154, 41, 90, 37, 105,
21725 69, 105, 37, 58, 41, 90, 57, 169, 218, 41,
21726 58, 41, 58, 41, 58, 137, 58, 37, 137, 37,
21727 135, 37, 90, 69, 73, 185, 94, 101, 58, 57,
21728 90, 37, 58, 527, 1134, 94, 142, 47, 185, 186,
21729 89, 154, 57, 90, 57, 90, 57, 250, 57, 1018,
21730 89, 90, 57, 58, 57, 1018, 8601, 282, 153, 666,
21731 89, 250, 54, 50, 2618, 57, 986, 825, 1306, 217,
21732 602, 1274, 378, 1935, 2522, 719, 5882, 57, 314, 57,
21733 1754, 281, 3578, 57, 4634, 3322, 54, 50, 54, 50,
21734 54, 50, 54, 50, 54, 50, 54, 50, 54, 50,
21735 975, 1434, 185, 54, 50, 1017, 54, 50, 54, 50,
21736 54, 50, 54, 50, 54, 50, 537, 8218, 4217, 54,
21737 50, 54, 50, 54, 50, 54, 50, 54, 50, 54,
21738 50, 54, 50, 54, 50, 54, 50, 54, 50, 54,
21739 50, 2041, 54, 50, 54, 50, 1049, 54, 50, 8281,
21740 1562, 697, 90, 217, 346, 1513, 1509, 126, 73, 69,
21741 254, 105, 37, 94, 37, 94, 165, 70, 105, 37,
21742 3166, 37, 218, 158, 108, 94, 149, 47, 85, 1221,
21743 37, 37, 1799, 38, 53, 44, 743, 231, 231, 231,
21744 231, 231, 231, 231, 231, 1036, 85, 52, 51, 52,
21745 51, 117, 52, 51, 53, 52, 51, 309, 49, 85,
21746 49, 53, 52, 51, 85, 52, 51, 54, 50, 54,
21747 50, 54, 50, 54, 50, 181, 38, 341, 81, 858,
21748 2874, 6874, 410, 61, 117, 58, 38, 39, 46, 54,
21749 50, 54, 50, 54, 50, 54, 50, 54, 50, 90,
21750 54, 50, 54, 50, 54, 50, 54, 50, 49, 54,
21751 82, 58, 302, 140, 74, 49, 166, 90, 110, 38,
21752 39, 53, 90, 2759, 76, 88, 70, 39, 49, 2887,
21753 53, 102, 39, 1319, 3015, 90, 143, 346, 871, 1178,
21754 519, 1018, 335, 986, 271, 58, 495, 1050, 335, 1274,
21755 495, 2042, 8218, 39, 39, 2074, 39, 39, 679, 38,
21756 36583, 1786, 1287, 198, 85, 8583, 38, 117, 519, 333,
21757 71, 1502, 39, 44, 107, 53, 332, 53, 38, 798,
21758 44, 2247, 334, 76, 213, 760, 294, 88, 478, 69,
21759 2014, 38, 261, 190, 350, 38, 88, 158, 158, 382,
21760 70, 37, 231, 44, 103, 44, 135, 44, 743, 74,
21761 76, 42, 154, 207, 90, 55, 58, 1671, 149, 74,
21762 1607, 522, 44, 85, 333, 588, 199, 117, 39, 333,
21763 903, 268, 85, 743, 364, 74, 53, 935, 108, 42,
21764 1511, 44, 74, 140, 74, 44, 138, 437, 38, 333,
21765 85, 1319, 204, 74, 76, 74, 76, 103, 44, 263,
21766 44, 42, 333, 149, 519, 38, 199, 122, 39, 42,
21767 1543, 44, 39, 108, 71, 76, 167, 76, 39, 44,
21768 39, 71, 38, 85, 359, 42, 76, 74, 85, 39,
21769 70, 42, 44, 199, 199, 199, 231, 231, 1127, 74,
21770 44, 74, 44, 74, 53, 42, 44, 333, 39, 39,
21771 743, 1575, 36, 68, 68, 36, 63, 63, 11719, 3399,
21772 229, 165, 39, 44, 327, 57, 423, 167, 39, 71,
21773 71, 3463, 536, 11623, 54, 50, 2055, 1735, 391, 55,
21774 58, 524, 245, 54, 50, 53, 236, 53, 81, 80,
21775 54, 50, 54, 50, 54, 50, 54, 50, 54, 50,
21776 54, 50, 54, 50, 54, 50, 85, 54, 50, 149,
21777 112, 117, 149, 49, 54, 50, 54, 50, 54, 50,
21778 117, 57, 49, 121, 53, 55, 85, 167, 4327, 34,
21779 117, 55, 117, 54, 50, 53, 57, 53, 49, 85,
21780 333, 85, 121, 85, 841, 54, 53, 50, 56, 48,
21781 56, 837, 54, 57, 50, 57, 54, 50, 53, 54,
21782 50, 85, 327, 38, 1447, 70, 999, 199, 199, 199,
21783 103, 87, 57, 56, 58, 87, 58, 153, 90, 98,
21784 90, 391, 839, 615, 71, 487, 455, 3943, 117, 1455,
21785 314, 1710, 143, 570, 47, 410, 1466, 44, 935, 1575,
21786 999, 143, 551, 46, 263, 46, 967, 53, 1159, 263,
21787 53, 174, 1289, 1285, 2503, 333, 199, 39, 1415, 71,
21788 39, 743, 53, 271, 711, 207, 53, 839, 53, 1799,
21789 71, 39, 108, 76, 140, 135, 103, 871, 108, 44,
21790 271, 309, 935, 79, 53, 1735, 245, 711, 271, 615,
21791 271, 2343, 1007, 42, 44, 42, 1703, 492, 245, 655,
21792 333, 76, 42, 1447, 106, 140, 74, 76, 85, 34,
21793 149, 807, 333, 108, 1159, 172, 42, 268, 333, 149,
21794 76, 42, 1543, 106, 300, 74, 135, 149, 333, 1383,
21795 44, 42, 44, 74, 204, 42, 44, 333, 28135, 3182,
21796 149, 34279, 18215, 2215, 39, 1482, 140, 422, 71, 7898,
21797 1274, 1946, 74, 108, 122, 202, 258, 268, 90, 236,
21798 986, 140, 1562, 2138, 108, 58, 2810, 591, 841, 837,
21799 841, 229, 581, 841, 837, 41, 73, 41, 73, 137,
21800 265, 133, 37, 229, 357, 841, 837, 73, 137, 265,
21801 233, 837, 73, 137, 169, 41, 233, 837, 841, 837,
21802 841, 837, 841, 837, 841, 837, 841, 837, 841, 901,
21803 809, 57, 805, 57, 197, 809, 57, 805, 57, 197,
21804 809, 57, 805, 57, 197, 809, 57, 805, 57, 197,
21805 809, 57, 805, 57, 197, 94, 1613, 135, 871, 71,
21806 39, 39, 327, 135, 39, 39, 39, 39, 39, 39,
21807 103, 71, 39, 39, 39, 39, 39, 39, 71, 39,
21808 135, 231, 135, 135, 39, 327, 551, 103, 167, 551,
21809 89, 1434, 3226, 506, 474, 506, 506, 367, 1018, 1946,
21810 1402, 954, 1402, 314, 90, 1082, 218, 2266, 666, 1210,
21811 186, 570, 2042, 58, 5850, 154, 2010, 154, 794, 2266,
21812 378, 2266, 3738, 39, 39, 39, 39, 39, 39, 17351,
21813 34, 3074, 7692, 63, 63,
21814 };
21815
21816static int sqlite3Fts5UnicodeCategory(u32 iCode) {
21817 int iRes = -1;
21818 int iHi;
21819 int iLo;
21820 int ret;
21821 u16 iKey;
21822
21823 if( iCode>=(1<<20) ){
21824 return 0;
21825 }
21826 iLo = aFts5UnicodeBlock[(iCode>>16)];
21827 iHi = aFts5UnicodeBlock[1+(iCode>>16)];
21828 iKey = (iCode & 0xFFFF);
21829 while( iHi>iLo ){
21830 int iTest = (iHi + iLo) / 2;
21831 assert( iTest>=iLo && iTest<iHi );
21832 if( iKey>=aFts5UnicodeMap[iTest] ){
21833 iRes = iTest;
21834 iLo = iTest+1;
21835 }else{
21836 iHi = iTest;
21837 }
21838 }
21839
21840 if( iRes<0 ) return 0;
21841 if( iKey>=(aFts5UnicodeMap[iRes]+(aFts5UnicodeData[iRes]>>5)) ) return 0;
21842 ret = aFts5UnicodeData[iRes] & 0x1F;
21843 if( ret!=30 ) return ret;
21844 return ((iKey - aFts5UnicodeMap[iRes]) & 0x01) ? 5 : 9;
21845}
21846
21847static void sqlite3Fts5UnicodeAscii(u8 *aArray, u8 *aAscii){
21848 int i = 0;
21849 int iTbl = 0;
21850 while( i<128 ){
21851 int bToken = aArray[ aFts5UnicodeData[iTbl] & 0x1F ];
21852 int n = (aFts5UnicodeData[iTbl] >> 5) + i;
21853 for(; i<128 && i<n; i++){
21854 aAscii[i] = (u8)bToken;
21855 }
21856 iTbl++;
21857 }
21858 aAscii[0] = 0; /* 0x00 is never a token character */
21859}
21860
21861
21862#line 1 "fts5_varint.c"
21863/*
21864** 2015 May 30
21865**
21866** The author disclaims copyright to this source code. In place of
21867** a legal notice, here is a blessing:
21868**
21869** May you do good and not evil.
21870** May you find forgiveness for yourself and forgive others.
21871** May you share freely, never taking more than you give.
21872**
21873******************************************************************************
21874**
21875** Routines for varint serialization and deserialization.
21876*/
21877
21878
21879/* #include "fts5Int.h" */
21880
21881/*
21882** This is a copy of the sqlite3GetVarint32() routine from the SQLite core.
21883** Except, this version does handle the single byte case that the core
21884** version depends on being handled before its function is called.
21885*/
21886static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){
21887 u32 a,b;
21888
21889 /* The 1-byte case. Overwhelmingly the most common. */
21890 a = *p;
21891 /* a: p0 (unmasked) */
21892 if (!(a&0x80))
21893 {
21894 /* Values between 0 and 127 */
21895 *v = a;
21896 return 1;
21897 }
21898
21899 /* The 2-byte case */
21900 p++;
21901 b = *p;
21902 /* b: p1 (unmasked) */
21903 if (!(b&0x80))
21904 {
21905 /* Values between 128 and 16383 */
21906 a &= 0x7f;
21907 a = a<<7;
21908 *v = a | b;
21909 return 2;
21910 }
21911
21912 /* The 3-byte case */
21913 p++;
21914 a = a<<14;
21915 a |= *p;
21916 /* a: p0<<14 | p2 (unmasked) */
21917 if (!(a&0x80))
21918 {
21919 /* Values between 16384 and 2097151 */
21920 a &= (0x7f<<14)|(0x7f);
21921 b &= 0x7f;
21922 b = b<<7;
21923 *v = a | b;
21924 return 3;
21925 }
21926
21927 /* A 32-bit varint is used to store size information in btrees.
21928 ** Objects are rarely larger than 2MiB limit of a 3-byte varint.
21929 ** A 3-byte varint is sufficient, for example, to record the size
21930 ** of a 1048569-byte BLOB or string.
21931 **
21932 ** We only unroll the first 1-, 2-, and 3- byte cases. The very
21933 ** rare larger cases can be handled by the slower 64-bit varint
21934 ** routine.
21935 */
21936 {
21937 u64 v64;
21938 u8 n;
21939 p -= 2;
21940 n = sqlite3Fts5GetVarint(p, &v64);
21941 *v = ((u32)v64) & 0x7FFFFFFF;
21942 assert( n>3 && n<=9 );
21943 return n;
21944 }
21945}
21946
21947
21948/*
21949** Bitmasks used by sqlite3GetVarint(). These precomputed constants
21950** are defined here rather than simply putting the constant expressions
21951** inline in order to work around bugs in the RVT compiler.
21952**
21953** SLOT_2_0 A mask for (0x7f<<14) | 0x7f
21954**
21955** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0
21956*/
21957#define SLOT_2_0 0x001fc07f
21958#define SLOT_4_2_0 0xf01fc07f
21959
21960/*
21961** Read a 64-bit variable-length integer from memory starting at p[0].
21962** Return the number of bytes read. The value is stored in *v.
21963*/
21964static u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){
21965 u32 a,b,s;
21966
21967 a = *p;
21968 /* a: p0 (unmasked) */
21969 if (!(a&0x80))
21970 {
21971 *v = a;
21972 return 1;
21973 }
21974
21975 p++;
21976 b = *p;
21977 /* b: p1 (unmasked) */
21978 if (!(b&0x80))
21979 {
21980 a &= 0x7f;
21981 a = a<<7;
21982 a |= b;
21983 *v = a;
21984 return 2;
21985 }
21986
21987 /* Verify that constants are precomputed correctly */
21988 assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) );
21989 assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) );
21990
21991 p++;
21992 a = a<<14;
21993 a |= *p;
21994 /* a: p0<<14 | p2 (unmasked) */
21995 if (!(a&0x80))
21996 {
21997 a &= SLOT_2_0;
21998 b &= 0x7f;
21999 b = b<<7;
22000 a |= b;
22001 *v = a;
22002 return 3;
22003 }
22004
22005 /* CSE1 from below */
22006 a &= SLOT_2_0;
22007 p++;
22008 b = b<<14;
22009 b |= *p;
22010 /* b: p1<<14 | p3 (unmasked) */
22011 if (!(b&0x80))
22012 {
22013 b &= SLOT_2_0;
22014 /* moved CSE1 up */
22015 /* a &= (0x7f<<14)|(0x7f); */
22016 a = a<<7;
22017 a |= b;
22018 *v = a;
22019 return 4;
22020 }
22021
22022 /* a: p0<<14 | p2 (masked) */
22023 /* b: p1<<14 | p3 (unmasked) */
22024 /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
22025 /* moved CSE1 up */
22026 /* a &= (0x7f<<14)|(0x7f); */
22027 b &= SLOT_2_0;
22028 s = a;
22029 /* s: p0<<14 | p2 (masked) */
22030
22031 p++;
22032 a = a<<14;
22033 a |= *p;
22034 /* a: p0<<28 | p2<<14 | p4 (unmasked) */
22035 if (!(a&0x80))
22036 {
22037 /* we can skip these cause they were (effectively) done above in calc'ing s */
22038 /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
22039 /* b &= (0x7f<<14)|(0x7f); */
22040 b = b<<7;
22041 a |= b;
22042 s = s>>18;
22043 *v = ((u64)s)<<32 | a;
22044 return 5;
22045 }
22046
22047 /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
22048 s = s<<7;
22049 s |= b;
22050 /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
22051
22052 p++;
22053 b = b<<14;
22054 b |= *p;
22055 /* b: p1<<28 | p3<<14 | p5 (unmasked) */
22056 if (!(b&0x80))
22057 {
22058 /* we can skip this cause it was (effectively) done above in calc'ing s */
22059 /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
22060 a &= SLOT_2_0;
22061 a = a<<7;
22062 a |= b;
22063 s = s>>18;
22064 *v = ((u64)s)<<32 | a;
22065 return 6;
22066 }
22067
22068 p++;
22069 a = a<<14;
22070 a |= *p;
22071 /* a: p2<<28 | p4<<14 | p6 (unmasked) */
22072 if (!(a&0x80))
22073 {
22074 a &= SLOT_4_2_0;
22075 b &= SLOT_2_0;
22076 b = b<<7;
22077 a |= b;
22078 s = s>>11;
22079 *v = ((u64)s)<<32 | a;
22080 return 7;
22081 }
22082
22083 /* CSE2 from below */
22084 a &= SLOT_2_0;
22085 p++;
22086 b = b<<14;
22087 b |= *p;
22088 /* b: p3<<28 | p5<<14 | p7 (unmasked) */
22089 if (!(b&0x80))
22090 {
22091 b &= SLOT_4_2_0;
22092 /* moved CSE2 up */
22093 /* a &= (0x7f<<14)|(0x7f); */
22094 a = a<<7;
22095 a |= b;
22096 s = s>>4;
22097 *v = ((u64)s)<<32 | a;
22098 return 8;
22099 }
22100
22101 p++;
22102 a = a<<15;
22103 a |= *p;
22104 /* a: p4<<29 | p6<<15 | p8 (unmasked) */
22105
22106 /* moved CSE2 up */
22107 /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */
22108 b &= SLOT_2_0;
22109 b = b<<8;
22110 a |= b;
22111
22112 s = s<<4;
22113 b = p[-4];
22114 b &= 0x7f;
22115 b = b>>3;
22116 s |= b;
22117
22118 *v = ((u64)s)<<32 | a;
22119
22120 return 9;
22121}
22122
22123/*
22124** The variable-length integer encoding is as follows:
22125**
22126** KEY:
22127** A = 0xxxxxxx 7 bits of data and one flag bit
22128** B = 1xxxxxxx 7 bits of data and one flag bit
22129** C = xxxxxxxx 8 bits of data
22130**
22131** 7 bits - A
22132** 14 bits - BA
22133** 21 bits - BBA
22134** 28 bits - BBBA
22135** 35 bits - BBBBA
22136** 42 bits - BBBBBA
22137** 49 bits - BBBBBBA
22138** 56 bits - BBBBBBBA
22139** 64 bits - BBBBBBBBC
22140*/
22141
22142#ifdef SQLITE_NOINLINE
22143# define FTS5_NOINLINE SQLITE_NOINLINE
22144#else
22145# define FTS5_NOINLINE
22146#endif
22147
22148/*
22149** Write a 64-bit variable-length integer to memory starting at p[0].
22150** The length of data write will be between 1 and 9 bytes. The number
22151** of bytes written is returned.
22152**
22153** A variable-length integer consists of the lower 7 bits of each byte
22154** for all bytes that have the 8th bit set and one byte with the 8th
22155** bit clear. Except, if we get to the 9th byte, it stores the full
22156** 8 bits and is the last byte.
22157*/
22158static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){
22159 int i, j, n;
22160 u8 buf[10];
22161 if( v & (((u64)0xff000000)<<32) ){
22162 p[8] = (u8)v;
22163 v >>= 8;
22164 for(i=7; i>=0; i--){
22165 p[i] = (u8)((v & 0x7f) | 0x80);
22166 v >>= 7;
22167 }
22168 return 9;
22169 }
22170 n = 0;
22171 do{
22172 buf[n++] = (u8)((v & 0x7f) | 0x80);
22173 v >>= 7;
22174 }while( v!=0 );
22175 buf[0] &= 0x7f;
22176 assert( n<=9 );
22177 for(i=0, j=n-1; j>=0; j--, i++){
22178 p[i] = buf[j];
22179 }
22180 return n;
22181}
22182
22183static int sqlite3Fts5PutVarint(unsigned char *p, u64 v){
22184 if( v<=0x7f ){
22185 p[0] = v&0x7f;
22186 return 1;
22187 }
22188 if( v<=0x3fff ){
22189 p[0] = ((v>>7)&0x7f)|0x80;
22190 p[1] = v&0x7f;
22191 return 2;
22192 }
22193 return fts5PutVarint64(p,v);
22194}
22195
22196
22197static int sqlite3Fts5GetVarintLen(u32 iVal){
22198#if 0
22199 if( iVal<(1 << 7 ) ) return 1;
22200#endif
22201 assert( iVal>=(1 << 7) );
22202 if( iVal<(1 << 14) ) return 2;
22203 if( iVal<(1 << 21) ) return 3;
22204 if( iVal<(1 << 28) ) return 4;
22205 return 5;
22206}
22207
22208#line 1 "fts5_vocab.c"
22209/*
22210** 2015 May 08
22211**
22212** The author disclaims copyright to this source code. In place of
22213** a legal notice, here is a blessing:
22214**
22215** May you do good and not evil.
22216** May you find forgiveness for yourself and forgive others.
22217** May you share freely, never taking more than you give.
22218**
22219******************************************************************************
22220**
22221** This is an SQLite virtual table module implementing direct access to an
22222** existing FTS5 index. The module may create several different types of
22223** tables:
22224**
22225** col:
22226** CREATE TABLE vocab(term, col, doc, cnt, PRIMARY KEY(term, col));
22227**
22228** One row for each term/column combination. The value of $doc is set to
22229** the number of fts5 rows that contain at least one instance of term
22230** $term within column $col. Field $cnt is set to the total number of
22231** instances of term $term in column $col (in any row of the fts5 table).
22232**
22233** row:
22234** CREATE TABLE vocab(term, doc, cnt, PRIMARY KEY(term));
22235**
22236** One row for each term in the database. The value of $doc is set to
22237** the number of fts5 rows that contain at least one instance of term
22238** $term. Field $cnt is set to the total number of instances of term
22239** $term in the database.
22240**
22241** instance:
22242** CREATE TABLE vocab(term, doc, col, offset, PRIMARY KEY(<all-fields>));
22243**
22244** One row for each term instance in the database.
22245*/
22246
22247
22248/* #include "fts5Int.h" */
22249
22250
22251typedef struct Fts5VocabTable Fts5VocabTable;
22252typedef struct Fts5VocabCursor Fts5VocabCursor;
22253
22254struct Fts5VocabTable {
22255 sqlite3_vtab base;
22256 char *zFts5Tbl; /* Name of fts5 table */
22257 char *zFts5Db; /* Db containing fts5 table */
22258 sqlite3 *db; /* Database handle */
22259 Fts5Global *pGlobal; /* FTS5 global object for this database */
22260 int eType; /* FTS5_VOCAB_COL, ROW or INSTANCE */
22261 unsigned bBusy; /* True if busy */
22262};
22263
22264struct Fts5VocabCursor {
22265 sqlite3_vtab_cursor base;
22266 sqlite3_stmt *pStmt; /* Statement holding lock on pIndex */
22267 Fts5Table *pFts5; /* Associated FTS5 table */
22268
22269 int bEof; /* True if this cursor is at EOF */
22270 Fts5IndexIter *pIter; /* Term/rowid iterator object */
22271 void *pStruct; /* From sqlite3Fts5StructureRef() */
22272
22273 int nLeTerm; /* Size of zLeTerm in bytes */
22274 char *zLeTerm; /* (term <= $zLeTerm) paramater, or NULL */
22275
22276 /* These are used by 'col' tables only */
22277 int iCol;
22278 i64 *aCnt;
22279 i64 *aDoc;
22280
22281 /* Output values used by all tables. */
22282 i64 rowid; /* This table's current rowid value */
22283 Fts5Buffer term; /* Current value of 'term' column */
22284
22285 /* Output values Used by 'instance' tables only */
22286 i64 iInstPos;
22287 int iInstOff;
22288};
22289
22290#define FTS5_VOCAB_COL 0
22291#define FTS5_VOCAB_ROW 1
22292#define FTS5_VOCAB_INSTANCE 2
22293
22294#define FTS5_VOCAB_COL_SCHEMA "term, col, doc, cnt"
22295#define FTS5_VOCAB_ROW_SCHEMA "term, doc, cnt"
22296#define FTS5_VOCAB_INST_SCHEMA "term, doc, col, offset"
22297
22298/*
22299** Bits for the mask used as the idxNum value by xBestIndex/xFilter.
22300*/
22301#define FTS5_VOCAB_TERM_EQ 0x01
22302#define FTS5_VOCAB_TERM_GE 0x02
22303#define FTS5_VOCAB_TERM_LE 0x04
22304
22305
22306/*
22307** Translate a string containing an fts5vocab table type to an
22308** FTS5_VOCAB_XXX constant. If successful, set *peType to the output
22309** value and return SQLITE_OK. Otherwise, set *pzErr to an error message
22310** and return SQLITE_ERROR.
22311*/
22312static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){
22313 int rc = SQLITE_OK;
22314 char *zCopy = sqlite3Fts5Strndup(&rc, zType, -1);
22315 if( rc==SQLITE_OK ){
22316 sqlite3Fts5Dequote(zCopy);
22317 if( sqlite3_stricmp(zCopy, "col")==0 ){
22318 *peType = FTS5_VOCAB_COL;
22319 }else
22320
22321 if( sqlite3_stricmp(zCopy, "row")==0 ){
22322 *peType = FTS5_VOCAB_ROW;
22323 }else
22324 if( sqlite3_stricmp(zCopy, "instance")==0 ){
22325 *peType = FTS5_VOCAB_INSTANCE;
22326 }else
22327 {
22328 *pzErr = sqlite3_mprintf("fts5vocab: unknown table type: %Q", zCopy);
22329 rc = SQLITE_ERROR;
22330 }
22331 sqlite3_free(zCopy);
22332 }
22333
22334 return rc;
22335}
22336
22337
22338/*
22339** The xDisconnect() virtual table method.
22340*/
22341static int fts5VocabDisconnectMethod(sqlite3_vtab *pVtab){
22342 Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
22343 sqlite3_free(pTab);
22344 return SQLITE_OK;
22345}
22346
22347/*
22348** The xDestroy() virtual table method.
22349*/
22350static int fts5VocabDestroyMethod(sqlite3_vtab *pVtab){
22351 Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
22352 sqlite3_free(pTab);
22353 return SQLITE_OK;
22354}
22355
22356/*
22357** This function is the implementation of both the xConnect and xCreate
22358** methods of the FTS3 virtual table.
22359**
22360** The argv[] array contains the following:
22361**
22362** argv[0] -> module name ("fts5vocab")
22363** argv[1] -> database name
22364** argv[2] -> table name
22365**
22366** then:
22367**
22368** argv[3] -> name of fts5 table
22369** argv[4] -> type of fts5vocab table
22370**
22371** or, for tables in the TEMP schema only.
22372**
22373** argv[3] -> name of fts5 tables database
22374** argv[4] -> name of fts5 table
22375** argv[5] -> type of fts5vocab table
22376*/
22377static int fts5VocabInitVtab(
22378 sqlite3 *db, /* The SQLite database connection */
22379 void *pAux, /* Pointer to Fts5Global object */
22380 int argc, /* Number of elements in argv array */
22381 const char * const *argv, /* xCreate/xConnect argument array */
22382 sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
22383 char **pzErr /* Write any error message here */
22384){
22385 const char *azSchema[] = {
22386 "CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA ")",
22387 "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA ")",
22388 "CREATE TABlE vocab(" FTS5_VOCAB_INST_SCHEMA ")"
22389 };
22390
22391 Fts5VocabTable *pRet = 0;
22392 int rc = SQLITE_OK; /* Return code */
22393 int bDb;
22394
22395 bDb = (argc==6 && strlen(argv[1])==4 && memcmp("temp", argv[1], 4)==0);
22396
22397 if( argc!=5 && bDb==0 ){
22398 *pzErr = sqlite3_mprintf("wrong number of vtable arguments");
22399 rc = SQLITE_ERROR;
22400 }else{
22401 int nByte; /* Bytes of space to allocate */
22402 const char *zDb = bDb ? argv[3] : argv[1];
22403 const char *zTab = bDb ? argv[4] : argv[3];
22404 const char *zType = bDb ? argv[5] : argv[4];
22405 int nDb = (int)strlen(zDb)+1;
22406 int nTab = (int)strlen(zTab)+1;
22407 int eType = 0;
22408
22409 rc = fts5VocabTableType(zType, pzErr, &eType);
22410 if( rc==SQLITE_OK ){
22411 assert( eType>=0 && eType<ArraySize(azSchema) );
22412 rc = sqlite3_declare_vtab(db, azSchema[eType]);
22413 }
22414
22415 nByte = sizeof(Fts5VocabTable) + nDb + nTab;
22416 pRet = sqlite3Fts5MallocZero(&rc, nByte);
22417 if( pRet ){
22418 pRet->pGlobal = (Fts5Global*)pAux;
22419 pRet->eType = eType;
22420 pRet->db = db;
22421 pRet->zFts5Tbl = (char*)&pRet[1];
22422 pRet->zFts5Db = &pRet->zFts5Tbl[nTab];
22423 memcpy(pRet->zFts5Tbl, zTab, nTab);
22424 memcpy(pRet->zFts5Db, zDb, nDb);
22425 sqlite3Fts5Dequote(pRet->zFts5Tbl);
22426 sqlite3Fts5Dequote(pRet->zFts5Db);
22427 }
22428 }
22429
22430 *ppVTab = (sqlite3_vtab*)pRet;
22431 return rc;
22432}
22433
22434
22435/*
22436** The xConnect() and xCreate() methods for the virtual table. All the
22437** work is done in function fts5VocabInitVtab().
22438*/
22439static int fts5VocabConnectMethod(
22440 sqlite3 *db, /* Database connection */
22441 void *pAux, /* Pointer to tokenizer hash table */
22442 int argc, /* Number of elements in argv array */
22443 const char * const *argv, /* xCreate/xConnect argument array */
22444 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
22445 char **pzErr /* OUT: sqlite3_malloc'd error message */
22446){
22447 return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
22448}
22449static int fts5VocabCreateMethod(
22450 sqlite3 *db, /* Database connection */
22451 void *pAux, /* Pointer to tokenizer hash table */
22452 int argc, /* Number of elements in argv array */
22453 const char * const *argv, /* xCreate/xConnect argument array */
22454 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */
22455 char **pzErr /* OUT: sqlite3_malloc'd error message */
22456){
22457 return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
22458}
22459
22460/*
22461** Implementation of the xBestIndex method.
22462**
22463** Only constraints of the form:
22464**
22465** term <= ?
22466** term == ?
22467** term >= ?
22468**
22469** are interpreted. Less-than and less-than-or-equal are treated
22470** identically, as are greater-than and greater-than-or-equal.
22471*/
22472static int fts5VocabBestIndexMethod(
22473 sqlite3_vtab *pUnused,
22474 sqlite3_index_info *pInfo
22475){
22476 int i;
22477 int iTermEq = -1;
22478 int iTermGe = -1;
22479 int iTermLe = -1;
22480 int idxNum = 0;
22481 int nArg = 0;
22482
22483 UNUSED_PARAM(pUnused);
22484
22485 for(i=0; i<pInfo->nConstraint; i++){
22486 struct sqlite3_index_constraint *p = &pInfo->aConstraint[i];
22487 if( p->usable==0 ) continue;
22488 if( p->iColumn==0 ){ /* term column */
22489 if( p->op==SQLITE_INDEX_CONSTRAINT_EQ ) iTermEq = i;
22490 if( p->op==SQLITE_INDEX_CONSTRAINT_LE ) iTermLe = i;
22491 if( p->op==SQLITE_INDEX_CONSTRAINT_LT ) iTermLe = i;
22492 if( p->op==SQLITE_INDEX_CONSTRAINT_GE ) iTermGe = i;
22493 if( p->op==SQLITE_INDEX_CONSTRAINT_GT ) iTermGe = i;
22494 }
22495 }
22496
22497 if( iTermEq>=0 ){
22498 idxNum |= FTS5_VOCAB_TERM_EQ;
22499 pInfo->aConstraintUsage[iTermEq].argvIndex = ++nArg;
22500 pInfo->estimatedCost = 100;
22501 }else{
22502 pInfo->estimatedCost = 1000000;
22503 if( iTermGe>=0 ){
22504 idxNum |= FTS5_VOCAB_TERM_GE;
22505 pInfo->aConstraintUsage[iTermGe].argvIndex = ++nArg;
22506 pInfo->estimatedCost = pInfo->estimatedCost / 2;
22507 }
22508 if( iTermLe>=0 ){
22509 idxNum |= FTS5_VOCAB_TERM_LE;
22510 pInfo->aConstraintUsage[iTermLe].argvIndex = ++nArg;
22511 pInfo->estimatedCost = pInfo->estimatedCost / 2;
22512 }
22513 }
22514
22515 /* This virtual table always delivers results in ascending order of
22516 ** the "term" column (column 0). So if the user has requested this
22517 ** specifically - "ORDER BY term" or "ORDER BY term ASC" - set the
22518 ** sqlite3_index_info.orderByConsumed flag to tell the core the results
22519 ** are already in sorted order. */
22520 if( pInfo->nOrderBy==1
22521 && pInfo->aOrderBy[0].iColumn==0
22522 && pInfo->aOrderBy[0].desc==0
22523 ){
22524 pInfo->orderByConsumed = 1;
22525 }
22526
22527 pInfo->idxNum = idxNum;
22528 return SQLITE_OK;
22529}
22530
22531/*
22532** Implementation of xOpen method.
22533*/
22534static int fts5VocabOpenMethod(
22535 sqlite3_vtab *pVTab,
22536 sqlite3_vtab_cursor **ppCsr
22537){
22538 Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab;
22539 Fts5Table *pFts5 = 0;
22540 Fts5VocabCursor *pCsr = 0;
22541 int rc = SQLITE_OK;
22542 sqlite3_stmt *pStmt = 0;
22543 char *zSql = 0;
22544
22545 if( pTab->bBusy ){
22546 pVTab->zErrMsg = sqlite3_mprintf(
22547 "recursive definition for %s.%s", pTab->zFts5Db, pTab->zFts5Tbl
22548 );
22549 return SQLITE_ERROR;
22550 }
22551 zSql = sqlite3Fts5Mprintf(&rc,
22552 "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'",
22553 pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl
22554 );
22555 if( zSql ){
22556 rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pStmt, 0);
22557 }
22558 sqlite3_free(zSql);
22559 assert( rc==SQLITE_OK || pStmt==0 );
22560 if( rc==SQLITE_ERROR ) rc = SQLITE_OK;
22561
22562 pTab->bBusy = 1;
22563 if( pStmt && sqlite3_step(pStmt)==SQLITE_ROW ){
22564 i64 iId = sqlite3_column_int64(pStmt, 0);
22565 pFts5 = sqlite3Fts5TableFromCsrid(pTab->pGlobal, iId);
22566 }
22567 pTab->bBusy = 0;
22568
22569 if( rc==SQLITE_OK ){
22570 if( pFts5==0 ){
22571 rc = sqlite3_finalize(pStmt);
22572 pStmt = 0;
22573 if( rc==SQLITE_OK ){
22574 pVTab->zErrMsg = sqlite3_mprintf(
22575 "no such fts5 table: %s.%s", pTab->zFts5Db, pTab->zFts5Tbl
22576 );
22577 rc = SQLITE_ERROR;
22578 }
22579 }else{
22580 rc = sqlite3Fts5FlushToDisk(pFts5);
22581 }
22582 }
22583
22584 if( rc==SQLITE_OK ){
22585 i64 nByte = pFts5->pConfig->nCol * sizeof(i64)*2 + sizeof(Fts5VocabCursor);
22586 pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte);
22587 }
22588
22589 if( pCsr ){
22590 pCsr->pFts5 = pFts5;
22591 pCsr->pStmt = pStmt;
22592 pCsr->aCnt = (i64*)&pCsr[1];
22593 pCsr->aDoc = &pCsr->aCnt[pFts5->pConfig->nCol];
22594 }else{
22595 sqlite3_finalize(pStmt);
22596 }
22597
22598 *ppCsr = (sqlite3_vtab_cursor*)pCsr;
22599 return rc;
22600}
22601
22602static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){
22603 pCsr->rowid = 0;
22604 sqlite3Fts5IterClose(pCsr->pIter);
22605 sqlite3Fts5StructureRelease(pCsr->pStruct);
22606 pCsr->pStruct = 0;
22607 pCsr->pIter = 0;
22608 sqlite3_free(pCsr->zLeTerm);
22609 pCsr->nLeTerm = -1;
22610 pCsr->zLeTerm = 0;
22611 pCsr->bEof = 0;
22612}
22613
22614/*
22615** Close the cursor. For additional information see the documentation
22616** on the xClose method of the virtual table interface.
22617*/
22618static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){
22619 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
22620 fts5VocabResetCursor(pCsr);
22621 sqlite3Fts5BufferFree(&pCsr->term);
22622 sqlite3_finalize(pCsr->pStmt);
22623 sqlite3_free(pCsr);
22624 return SQLITE_OK;
22625}
22626
22627static int fts5VocabInstanceNewTerm(Fts5VocabCursor *pCsr){
22628 int rc = SQLITE_OK;
22629
22630 if( sqlite3Fts5IterEof(pCsr->pIter) ){
22631 pCsr->bEof = 1;
22632 }else{
22633 const char *zTerm;
22634 int nTerm;
22635 zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
22636 if( pCsr->nLeTerm>=0 ){
22637 int nCmp = MIN(nTerm, pCsr->nLeTerm);
22638 int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp);
22639 if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){
22640 pCsr->bEof = 1;
22641 }
22642 }
22643
22644 sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm);
22645 }
22646 return rc;
22647}
22648
22649static int fts5VocabInstanceNext(Fts5VocabCursor *pCsr){
22650 int eDetail = pCsr->pFts5->pConfig->eDetail;
22651 int rc = SQLITE_OK;
22652 Fts5IndexIter *pIter = pCsr->pIter;
22653 i64 *pp = &pCsr->iInstPos;
22654 int *po = &pCsr->iInstOff;
22655
22656 assert( sqlite3Fts5IterEof(pIter)==0 );
22657 assert( pCsr->bEof==0 );
22658 while( eDetail==FTS5_DETAIL_NONE
22659 || sqlite3Fts5PoslistNext64(pIter->pData, pIter->nData, po, pp)
22660 ){
22661 pCsr->iInstPos = 0;
22662 pCsr->iInstOff = 0;
22663
22664 rc = sqlite3Fts5IterNextScan(pCsr->pIter);
22665 if( rc==SQLITE_OK ){
22666 rc = fts5VocabInstanceNewTerm(pCsr);
22667 if( pCsr->bEof || eDetail==FTS5_DETAIL_NONE ) break;
22668 }
22669 if( rc ){
22670 pCsr->bEof = 1;
22671 break;
22672 }
22673 }
22674
22675 return rc;
22676}
22677
22678/*
22679** Advance the cursor to the next row in the table.
22680*/
22681static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
22682 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
22683 Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab;
22684 int nCol = pCsr->pFts5->pConfig->nCol;
22685 int rc;
22686
22687 rc = sqlite3Fts5StructureTest(pCsr->pFts5->pIndex, pCsr->pStruct);
22688 if( rc!=SQLITE_OK ) return rc;
22689 pCsr->rowid++;
22690
22691 if( pTab->eType==FTS5_VOCAB_INSTANCE ){
22692 return fts5VocabInstanceNext(pCsr);
22693 }
22694
22695 if( pTab->eType==FTS5_VOCAB_COL ){
22696 for(pCsr->iCol++; pCsr->iCol<nCol; pCsr->iCol++){
22697 if( pCsr->aDoc[pCsr->iCol] ) break;
22698 }
22699 }
22700
22701 if( pTab->eType!=FTS5_VOCAB_COL || pCsr->iCol>=nCol ){
22702 if( sqlite3Fts5IterEof(pCsr->pIter) ){
22703 pCsr->bEof = 1;
22704 }else{
22705 const char *zTerm;
22706 int nTerm;
22707
22708 zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
22709 assert( nTerm>=0 );
22710 if( pCsr->nLeTerm>=0 ){
22711 int nCmp = MIN(nTerm, pCsr->nLeTerm);
22712 int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp);
22713 if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){
22714 pCsr->bEof = 1;
22715 return SQLITE_OK;
22716 }
22717 }
22718
22719 sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm);
22720 memset(pCsr->aCnt, 0, nCol * sizeof(i64));
22721 memset(pCsr->aDoc, 0, nCol * sizeof(i64));
22722 pCsr->iCol = 0;
22723
22724 assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW );
22725 while( rc==SQLITE_OK ){
22726 int eDetail = pCsr->pFts5->pConfig->eDetail;
22727 const u8 *pPos; int nPos; /* Position list */
22728 i64 iPos = 0; /* 64-bit position read from poslist */
22729 int iOff = 0; /* Current offset within position list */
22730
22731 pPos = pCsr->pIter->pData;
22732 nPos = pCsr->pIter->nData;
22733
22734 switch( pTab->eType ){
22735 case FTS5_VOCAB_ROW:
22736 if( eDetail==FTS5_DETAIL_FULL ){
22737 while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
22738 pCsr->aCnt[0]++;
22739 }
22740 }
22741 pCsr->aDoc[0]++;
22742 break;
22743
22744 case FTS5_VOCAB_COL:
22745 if( eDetail==FTS5_DETAIL_FULL ){
22746 int iCol = -1;
22747 while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
22748 int ii = FTS5_POS2COLUMN(iPos);
22749 if( iCol!=ii ){
22750 if( ii>=nCol ){
22751 rc = FTS5_CORRUPT;
22752 break;
22753 }
22754 pCsr->aDoc[ii]++;
22755 iCol = ii;
22756 }
22757 pCsr->aCnt[ii]++;
22758 }
22759 }else if( eDetail==FTS5_DETAIL_COLUMNS ){
22760 while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){
22761 assert_nc( iPos>=0 && iPos<nCol );
22762 if( iPos>=nCol ){
22763 rc = FTS5_CORRUPT;
22764 break;
22765 }
22766 pCsr->aDoc[iPos]++;
22767 }
22768 }else{
22769 assert( eDetail==FTS5_DETAIL_NONE );
22770 pCsr->aDoc[0]++;
22771 }
22772 break;
22773
22774 default:
22775 assert( pTab->eType==FTS5_VOCAB_INSTANCE );
22776 break;
22777 }
22778
22779 if( rc==SQLITE_OK ){
22780 rc = sqlite3Fts5IterNextScan(pCsr->pIter);
22781 }
22782 if( pTab->eType==FTS5_VOCAB_INSTANCE ) break;
22783
22784 if( rc==SQLITE_OK ){
22785 zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
22786 if( nTerm!=pCsr->term.n
22787 || (nTerm>0 && memcmp(zTerm, pCsr->term.p, nTerm))
22788 ){
22789 break;
22790 }
22791 if( sqlite3Fts5IterEof(pCsr->pIter) ) break;
22792 }
22793 }
22794 }
22795 }
22796
22797 if( rc==SQLITE_OK && pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL ){
22798 for(/* noop */; pCsr->iCol<nCol && pCsr->aDoc[pCsr->iCol]==0; pCsr->iCol++);
22799 if( pCsr->iCol==nCol ){
22800 rc = FTS5_CORRUPT;
22801 }
22802 }
22803 return rc;
22804}
22805
22806/*
22807** This is the xFilter implementation for the virtual table.
22808*/
22809static int fts5VocabFilterMethod(
22810 sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */
22811 int idxNum, /* Strategy index */
22812 const char *zUnused, /* Unused */
22813 int nUnused, /* Number of elements in apVal */
22814 sqlite3_value **apVal /* Arguments for the indexing scheme */
22815){
22816 Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab;
22817 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
22818 int eType = pTab->eType;
22819 int rc = SQLITE_OK;
22820
22821 int iVal = 0;
22822 int f = FTS5INDEX_QUERY_SCAN;
22823 const char *zTerm = 0;
22824 int nTerm = 0;
22825
22826 sqlite3_value *pEq = 0;
22827 sqlite3_value *pGe = 0;
22828 sqlite3_value *pLe = 0;
22829
22830 UNUSED_PARAM2(zUnused, nUnused);
22831
22832 fts5VocabResetCursor(pCsr);
22833 if( idxNum & FTS5_VOCAB_TERM_EQ ) pEq = apVal[iVal++];
22834 if( idxNum & FTS5_VOCAB_TERM_GE ) pGe = apVal[iVal++];
22835 if( idxNum & FTS5_VOCAB_TERM_LE ) pLe = apVal[iVal++];
22836
22837 if( pEq ){
22838 zTerm = (const char *)sqlite3_value_text(pEq);
22839 nTerm = sqlite3_value_bytes(pEq);
22840 f = 0;
22841 }else{
22842 if( pGe ){
22843 zTerm = (const char *)sqlite3_value_text(pGe);
22844 nTerm = sqlite3_value_bytes(pGe);
22845 }
22846 if( pLe ){
22847 const char *zCopy = (const char *)sqlite3_value_text(pLe);
22848 if( zCopy==0 ) zCopy = "";
22849 pCsr->nLeTerm = sqlite3_value_bytes(pLe);
22850 pCsr->zLeTerm = sqlite3_malloc(pCsr->nLeTerm+1);
22851 if( pCsr->zLeTerm==0 ){
22852 rc = SQLITE_NOMEM;
22853 }else{
22854 memcpy(pCsr->zLeTerm, zCopy, pCsr->nLeTerm+1);
22855 }
22856 }
22857 }
22858
22859 if( rc==SQLITE_OK ){
22860 Fts5Index *pIndex = pCsr->pFts5->pIndex;
22861 rc = sqlite3Fts5IndexQuery(pIndex, zTerm, nTerm, f, 0, &pCsr->pIter);
22862 if( rc==SQLITE_OK ){
22863 pCsr->pStruct = sqlite3Fts5StructureRef(pIndex);
22864 }
22865 }
22866 if( rc==SQLITE_OK && eType==FTS5_VOCAB_INSTANCE ){
22867 rc = fts5VocabInstanceNewTerm(pCsr);
22868 }
22869 if( rc==SQLITE_OK && !pCsr->bEof
22870 && (eType!=FTS5_VOCAB_INSTANCE
22871 || pCsr->pFts5->pConfig->eDetail!=FTS5_DETAIL_NONE)
22872 ){
22873 rc = fts5VocabNextMethod(pCursor);
22874 }
22875
22876 return rc;
22877}
22878
22879/*
22880** This is the xEof method of the virtual table. SQLite calls this
22881** routine to find out if it has reached the end of a result set.
22882*/
22883static int fts5VocabEofMethod(sqlite3_vtab_cursor *pCursor){
22884 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
22885 return pCsr->bEof;
22886}
22887
22888static int fts5VocabColumnMethod(
22889 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
22890 sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
22891 int iCol /* Index of column to read value from */
22892){
22893 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
22894 int eDetail = pCsr->pFts5->pConfig->eDetail;
22895 int eType = ((Fts5VocabTable*)(pCursor->pVtab))->eType;
22896 i64 iVal = 0;
22897
22898 if( iCol==0 ){
22899 sqlite3_result_text(
22900 pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT
22901 );
22902 }else if( eType==FTS5_VOCAB_COL ){
22903 assert( iCol==1 || iCol==2 || iCol==3 );
22904 if( iCol==1 ){
22905 if( eDetail!=FTS5_DETAIL_NONE ){
22906 const char *z = pCsr->pFts5->pConfig->azCol[pCsr->iCol];
22907 sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC);
22908 }
22909 }else if( iCol==2 ){
22910 iVal = pCsr->aDoc[pCsr->iCol];
22911 }else{
22912 iVal = pCsr->aCnt[pCsr->iCol];
22913 }
22914 }else if( eType==FTS5_VOCAB_ROW ){
22915 assert( iCol==1 || iCol==2 );
22916 if( iCol==1 ){
22917 iVal = pCsr->aDoc[0];
22918 }else{
22919 iVal = pCsr->aCnt[0];
22920 }
22921 }else{
22922 assert( eType==FTS5_VOCAB_INSTANCE );
22923 switch( iCol ){
22924 case 1:
22925 sqlite3_result_int64(pCtx, pCsr->pIter->iRowid);
22926 break;
22927 case 2: {
22928 int ii = -1;
22929 if( eDetail==FTS5_DETAIL_FULL ){
22930 ii = FTS5_POS2COLUMN(pCsr->iInstPos);
22931 }else if( eDetail==FTS5_DETAIL_COLUMNS ){
22932 ii = (int)pCsr->iInstPos;
22933 }
22934 if( ii>=0 && ii<pCsr->pFts5->pConfig->nCol ){
22935 const char *z = pCsr->pFts5->pConfig->azCol[ii];
22936 sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC);
22937 }
22938 break;
22939 }
22940 default: {
22941 assert( iCol==3 );
22942 if( eDetail==FTS5_DETAIL_FULL ){
22943 int ii = FTS5_POS2OFFSET(pCsr->iInstPos);
22944 sqlite3_result_int(pCtx, ii);
22945 }
22946 break;
22947 }
22948 }
22949 }
22950
22951 if( iVal>0 ) sqlite3_result_int64(pCtx, iVal);
22952 return SQLITE_OK;
22953}
22954
22955/*
22956** This is the xRowid method. The SQLite core calls this routine to
22957** retrieve the rowid for the current row of the result set. The
22958** rowid should be written to *pRowid.
22959*/
22960static int fts5VocabRowidMethod(
22961 sqlite3_vtab_cursor *pCursor,
22962 sqlite_int64 *pRowid
22963){
22964 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
22965 *pRowid = pCsr->rowid;
22966 return SQLITE_OK;
22967}
22968
22969static int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){
22970 static const sqlite3_module fts5Vocab = {
22971 /* iVersion */ 2,
22972 /* xCreate */ fts5VocabCreateMethod,
22973 /* xConnect */ fts5VocabConnectMethod,
22974 /* xBestIndex */ fts5VocabBestIndexMethod,
22975 /* xDisconnect */ fts5VocabDisconnectMethod,
22976 /* xDestroy */ fts5VocabDestroyMethod,
22977 /* xOpen */ fts5VocabOpenMethod,
22978 /* xClose */ fts5VocabCloseMethod,
22979 /* xFilter */ fts5VocabFilterMethod,
22980 /* xNext */ fts5VocabNextMethod,
22981 /* xEof */ fts5VocabEofMethod,
22982 /* xColumn */ fts5VocabColumnMethod,
22983 /* xRowid */ fts5VocabRowidMethod,
22984 /* xUpdate */ 0,
22985 /* xBegin */ 0,
22986 /* xSync */ 0,
22987 /* xCommit */ 0,
22988 /* xRollback */ 0,
22989 /* xFindFunction */ 0,
22990 /* xRename */ 0,
22991 /* xSavepoint */ 0,
22992 /* xRelease */ 0,
22993 /* xRollbackTo */ 0,
22994 /* xShadowName */ 0
22995 };
22996 void *p = (void*)pGlobal;
22997
22998 return sqlite3_create_module_v2(db, "fts5vocab", &fts5Vocab, p, 0);
22999}
23000
23001
23002
23003#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) */
23004