modernc.org/cc@v1.0.1/v2/testdata/_sqlite/ext/fts5/fts5.h (about)

     1  /*
     2  ** 2014 May 31
     3  **
     4  ** The author disclaims copyright to this source code.  In place of
     5  ** a legal notice, here is a blessing:
     6  **
     7  **    May you do good and not evil.
     8  **    May you find forgiveness for yourself and forgive others.
     9  **    May you share freely, never taking more than you give.
    10  **
    11  ******************************************************************************
    12  **
    13  ** Interfaces to extend FTS5. Using the interfaces defined in this file, 
    14  ** FTS5 may be extended with:
    15  **
    16  **     * custom tokenizers, and
    17  **     * custom auxiliary functions.
    18  */
    19  
    20  
    21  #ifndef _FTS5_H
    22  #define _FTS5_H
    23  
    24  #include "sqlite3.h"
    25  
    26  #ifdef __cplusplus
    27  extern "C" {
    28  #endif
    29  
    30  /*************************************************************************
    31  ** CUSTOM AUXILIARY FUNCTIONS
    32  **
    33  ** Virtual table implementations may overload SQL functions by implementing
    34  ** the sqlite3_module.xFindFunction() method.
    35  */
    36  
    37  typedef struct Fts5ExtensionApi Fts5ExtensionApi;
    38  typedef struct Fts5Context Fts5Context;
    39  typedef struct Fts5PhraseIter Fts5PhraseIter;
    40  
    41  typedef void (*fts5_extension_function)(
    42    const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
    43    Fts5Context *pFts,              /* First arg to pass to pApi functions */
    44    sqlite3_context *pCtx,          /* Context for returning result/error */
    45    int nVal,                       /* Number of values in apVal[] array */
    46    sqlite3_value **apVal           /* Array of trailing arguments */
    47  );
    48  
    49  struct Fts5PhraseIter {
    50    const unsigned char *a;
    51    const unsigned char *b;
    52  };
    53  
    54  /*
    55  ** EXTENSION API FUNCTIONS
    56  **
    57  ** xUserData(pFts):
    58  **   Return a copy of the context pointer the extension function was 
    59  **   registered with.
    60  **
    61  ** xColumnTotalSize(pFts, iCol, pnToken):
    62  **   If parameter iCol is less than zero, set output variable *pnToken
    63  **   to the total number of tokens in the FTS5 table. Or, if iCol is
    64  **   non-negative but less than the number of columns in the table, return
    65  **   the total number of tokens in column iCol, considering all rows in 
    66  **   the FTS5 table.
    67  **
    68  **   If parameter iCol is greater than or equal to the number of columns
    69  **   in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
    70  **   an OOM condition or IO error), an appropriate SQLite error code is 
    71  **   returned.
    72  **
    73  ** xColumnCount(pFts):
    74  **   Return the number of columns in the table.
    75  **
    76  ** xColumnSize(pFts, iCol, pnToken):
    77  **   If parameter iCol is less than zero, set output variable *pnToken
    78  **   to the total number of tokens in the current row. Or, if iCol is
    79  **   non-negative but less than the number of columns in the table, set
    80  **   *pnToken to the number of tokens in column iCol of the current row.
    81  **
    82  **   If parameter iCol is greater than or equal to the number of columns
    83  **   in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
    84  **   an OOM condition or IO error), an appropriate SQLite error code is 
    85  **   returned.
    86  **
    87  **   This function may be quite inefficient if used with an FTS5 table
    88  **   created with the "columnsize=0" option.
    89  **
    90  ** xColumnText:
    91  **   This function attempts to retrieve the text of column iCol of the
    92  **   current document. If successful, (*pz) is set to point to a buffer
    93  **   containing the text in utf-8 encoding, (*pn) is set to the size in bytes
    94  **   (not characters) of the buffer and SQLITE_OK is returned. Otherwise,
    95  **   if an error occurs, an SQLite error code is returned and the final values
    96  **   of (*pz) and (*pn) are undefined.
    97  **
    98  ** xPhraseCount:
    99  **   Returns the number of phrases in the current query expression.
   100  **
   101  ** xPhraseSize:
   102  **   Returns the number of tokens in phrase iPhrase of the query. Phrases
   103  **   are numbered starting from zero.
   104  **
   105  ** xInstCount:
   106  **   Set *pnInst to the total number of occurrences of all phrases within
   107  **   the query within the current row. Return SQLITE_OK if successful, or
   108  **   an error code (i.e. SQLITE_NOMEM) if an error occurs.
   109  **
   110  **   This API can be quite slow if used with an FTS5 table created with the
   111  **   "detail=none" or "detail=column" option. If the FTS5 table is created 
   112  **   with either "detail=none" or "detail=column" and "content=" option 
   113  **   (i.e. if it is a contentless table), then this API always returns 0.
   114  **
   115  ** xInst:
   116  **   Query for the details of phrase match iIdx within the current row.
   117  **   Phrase matches are numbered starting from zero, so the iIdx argument
   118  **   should be greater than or equal to zero and smaller than the value
   119  **   output by xInstCount().
   120  **
   121  **   Usually, output parameter *piPhrase is set to the phrase number, *piCol
   122  **   to the column in which it occurs and *piOff the token offset of the
   123  **   first token of the phrase. The exception is if the table was created
   124  **   with the offsets=0 option specified. In this case *piOff is always
   125  **   set to -1.
   126  **
   127  **   Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM) 
   128  **   if an error occurs.
   129  **
   130  **   This API can be quite slow if used with an FTS5 table created with the
   131  **   "detail=none" or "detail=column" option. 
   132  **
   133  ** xRowid:
   134  **   Returns the rowid of the current row.
   135  **
   136  ** xTokenize:
   137  **   Tokenize text using the tokenizer belonging to the FTS5 table.
   138  **
   139  ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
   140  **   This API function is used to query the FTS table for phrase iPhrase
   141  **   of the current query. Specifically, a query equivalent to:
   142  **
   143  **       ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid
   144  **
   145  **   with $p set to a phrase equivalent to the phrase iPhrase of the
   146  **   current query is executed. Any column filter that applies to
   147  **   phrase iPhrase of the current query is included in $p. For each 
   148  **   row visited, the callback function passed as the fourth argument 
   149  **   is invoked. The context and API objects passed to the callback 
   150  **   function may be used to access the properties of each matched row.
   151  **   Invoking Api.xUserData() returns a copy of the pointer passed as 
   152  **   the third argument to pUserData.
   153  **
   154  **   If the callback function returns any value other than SQLITE_OK, the
   155  **   query is abandoned and the xQueryPhrase function returns immediately.
   156  **   If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK.
   157  **   Otherwise, the error code is propagated upwards.
   158  **
   159  **   If the query runs to completion without incident, SQLITE_OK is returned.
   160  **   Or, if some error occurs before the query completes or is aborted by
   161  **   the callback, an SQLite error code is returned.
   162  **
   163  **
   164  ** xSetAuxdata(pFts5, pAux, xDelete)
   165  **
   166  **   Save the pointer passed as the second argument as the extension functions 
   167  **   "auxiliary data". The pointer may then be retrieved by the current or any
   168  **   future invocation of the same fts5 extension function made as part of
   169  **   of the same MATCH query using the xGetAuxdata() API.
   170  **
   171  **   Each extension function is allocated a single auxiliary data slot for
   172  **   each FTS query (MATCH expression). If the extension function is invoked 
   173  **   more than once for a single FTS query, then all invocations share a 
   174  **   single auxiliary data context.
   175  **
   176  **   If there is already an auxiliary data pointer when this function is
   177  **   invoked, then it is replaced by the new pointer. If an xDelete callback
   178  **   was specified along with the original pointer, it is invoked at this
   179  **   point.
   180  **
   181  **   The xDelete callback, if one is specified, is also invoked on the
   182  **   auxiliary data pointer after the FTS5 query has finished.
   183  **
   184  **   If an error (e.g. an OOM condition) occurs within this function, an
   185  **   the auxiliary data is set to NULL and an error code returned. If the
   186  **   xDelete parameter was not NULL, it is invoked on the auxiliary data
   187  **   pointer before returning.
   188  **
   189  **
   190  ** xGetAuxdata(pFts5, bClear)
   191  **
   192  **   Returns the current auxiliary data pointer for the fts5 extension 
   193  **   function. See the xSetAuxdata() method for details.
   194  **
   195  **   If the bClear argument is non-zero, then the auxiliary data is cleared
   196  **   (set to NULL) before this function returns. In this case the xDelete,
   197  **   if any, is not invoked.
   198  **
   199  **
   200  ** xRowCount(pFts5, pnRow)
   201  **
   202  **   This function is used to retrieve the total number of rows in the table.
   203  **   In other words, the same value that would be returned by:
   204  **
   205  **        SELECT count(*) FROM ftstable;
   206  **
   207  ** xPhraseFirst()
   208  **   This function is used, along with type Fts5PhraseIter and the xPhraseNext
   209  **   method, to iterate through all instances of a single query phrase within
   210  **   the current row. This is the same information as is accessible via the
   211  **   xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient
   212  **   to use, this API may be faster under some circumstances. To iterate 
   213  **   through instances of phrase iPhrase, use the following code:
   214  **
   215  **       Fts5PhraseIter iter;
   216  **       int iCol, iOff;
   217  **       for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
   218  **           iCol>=0;
   219  **           pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
   220  **       ){
   221  **         // An instance of phrase iPhrase at offset iOff of column iCol
   222  **       }
   223  **
   224  **   The Fts5PhraseIter structure is defined above. Applications should not
   225  **   modify this structure directly - it should only be used as shown above
   226  **   with the xPhraseFirst() and xPhraseNext() API methods (and by
   227  **   xPhraseFirstColumn() and xPhraseNextColumn() as illustrated below).
   228  **
   229  **   This API can be quite slow if used with an FTS5 table created with the
   230  **   "detail=none" or "detail=column" option. If the FTS5 table is created 
   231  **   with either "detail=none" or "detail=column" and "content=" option 
   232  **   (i.e. if it is a contentless table), then this API always iterates
   233  **   through an empty set (all calls to xPhraseFirst() set iCol to -1).
   234  **
   235  ** xPhraseNext()
   236  **   See xPhraseFirst above.
   237  **
   238  ** xPhraseFirstColumn()
   239  **   This function and xPhraseNextColumn() are similar to the xPhraseFirst()
   240  **   and xPhraseNext() APIs described above. The difference is that instead
   241  **   of iterating through all instances of a phrase in the current row, these
   242  **   APIs are used to iterate through the set of columns in the current row
   243  **   that contain one or more instances of a specified phrase. For example:
   244  **
   245  **       Fts5PhraseIter iter;
   246  **       int iCol;
   247  **       for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol);
   248  **           iCol>=0;
   249  **           pApi->xPhraseNextColumn(pFts, &iter, &iCol)
   250  **       ){
   251  **         // Column iCol contains at least one instance of phrase iPhrase
   252  **       }
   253  **
   254  **   This API can be quite slow if used with an FTS5 table created with the
   255  **   "detail=none" option. If the FTS5 table is created with either 
   256  **   "detail=none" "content=" option (i.e. if it is a contentless table), 
   257  **   then this API always iterates through an empty set (all calls to 
   258  **   xPhraseFirstColumn() set iCol to -1).
   259  **
   260  **   The information accessed using this API and its companion
   261  **   xPhraseFirstColumn() may also be obtained using xPhraseFirst/xPhraseNext
   262  **   (or xInst/xInstCount). The chief advantage of this API is that it is
   263  **   significantly more efficient than those alternatives when used with
   264  **   "detail=column" tables.  
   265  **
   266  ** xPhraseNextColumn()
   267  **   See xPhraseFirstColumn above.
   268  */
   269  struct Fts5ExtensionApi {
   270    int iVersion;                   /* Currently always set to 3 */
   271  
   272    void *(*xUserData)(Fts5Context*);
   273  
   274    int (*xColumnCount)(Fts5Context*);
   275    int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
   276    int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);
   277  
   278    int (*xTokenize)(Fts5Context*, 
   279      const char *pText, int nText, /* Text to tokenize */
   280      void *pCtx,                   /* Context passed to xToken() */
   281      int (*xToken)(void*, int, const char*, int, int, int)       /* Callback */
   282    );
   283  
   284    int (*xPhraseCount)(Fts5Context*);
   285    int (*xPhraseSize)(Fts5Context*, int iPhrase);
   286  
   287    int (*xInstCount)(Fts5Context*, int *pnInst);
   288    int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
   289  
   290    sqlite3_int64 (*xRowid)(Fts5Context*);
   291    int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
   292    int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
   293  
   294    int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData,
   295      int(*)(const Fts5ExtensionApi*,Fts5Context*,void*)
   296    );
   297    int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*));
   298    void *(*xGetAuxdata)(Fts5Context*, int bClear);
   299  
   300    int (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*);
   301    void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff);
   302  
   303    int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*);
   304    void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol);
   305  };
   306  
   307  /* 
   308  ** CUSTOM AUXILIARY FUNCTIONS
   309  *************************************************************************/
   310  
   311  /*************************************************************************
   312  ** CUSTOM TOKENIZERS
   313  **
   314  ** Applications may also register custom tokenizer types. A tokenizer 
   315  ** is registered by providing fts5 with a populated instance of the 
   316  ** following structure. All structure methods must be defined, setting
   317  ** any member of the fts5_tokenizer struct to NULL leads to undefined
   318  ** behaviour. The structure methods are expected to function as follows:
   319  **
   320  ** xCreate:
   321  **   This function is used to allocate and initialize a tokenizer instance.
   322  **   A tokenizer instance is required to actually tokenize text.
   323  **
   324  **   The first argument passed to this function is a copy of the (void*)
   325  **   pointer provided by the application when the fts5_tokenizer object
   326  **   was registered with FTS5 (the third argument to xCreateTokenizer()). 
   327  **   The second and third arguments are an array of nul-terminated strings
   328  **   containing the tokenizer arguments, if any, specified following the
   329  **   tokenizer name as part of the CREATE VIRTUAL TABLE statement used
   330  **   to create the FTS5 table.
   331  **
   332  **   The final argument is an output variable. If successful, (*ppOut) 
   333  **   should be set to point to the new tokenizer handle and SQLITE_OK
   334  **   returned. If an error occurs, some value other than SQLITE_OK should
   335  **   be returned. In this case, fts5 assumes that the final value of *ppOut 
   336  **   is undefined.
   337  **
   338  ** xDelete:
   339  **   This function is invoked to delete a tokenizer handle previously
   340  **   allocated using xCreate(). Fts5 guarantees that this function will
   341  **   be invoked exactly once for each successful call to xCreate().
   342  **
   343  ** xTokenize:
   344  **   This function is expected to tokenize the nText byte string indicated 
   345  **   by argument pText. pText may or may not be nul-terminated. The first
   346  **   argument passed to this function is a pointer to an Fts5Tokenizer object
   347  **   returned by an earlier call to xCreate().
   348  **
   349  **   The second argument indicates the reason that FTS5 is requesting
   350  **   tokenization of the supplied text. This is always one of the following
   351  **   four values:
   352  **
   353  **   <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into
   354  **            or removed from the FTS table. The tokenizer is being invoked to
   355  **            determine the set of tokens to add to (or delete from) the
   356  **            FTS index.
   357  **
   358  **       <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed 
   359  **            against the FTS index. The tokenizer is being called to tokenize 
   360  **            a bareword or quoted string specified as part of the query.
   361  **
   362  **       <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as
   363  **            FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is
   364  **            followed by a "*" character, indicating that the last token
   365  **            returned by the tokenizer will be treated as a token prefix.
   366  **
   367  **       <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to 
   368  **            satisfy an fts5_api.xTokenize() request made by an auxiliary
   369  **            function. Or an fts5_api.xColumnSize() request made by the same
   370  **            on a columnsize=0 database.  
   371  **   </ul>
   372  **
   373  **   For each token in the input string, the supplied callback xToken() must
   374  **   be invoked. The first argument to it should be a copy of the pointer
   375  **   passed as the second argument to xTokenize(). The third and fourth
   376  **   arguments are a pointer to a buffer containing the token text, and the
   377  **   size of the token in bytes. The 4th and 5th arguments are the byte offsets
   378  **   of the first byte of and first byte immediately following the text from
   379  **   which the token is derived within the input.
   380  **
   381  **   The second argument passed to the xToken() callback ("tflags") should
   382  **   normally be set to 0. The exception is if the tokenizer supports 
   383  **   synonyms. In this case see the discussion below for details.
   384  **
   385  **   FTS5 assumes the xToken() callback is invoked for each token in the 
   386  **   order that they occur within the input text.
   387  **
   388  **   If an xToken() callback returns any value other than SQLITE_OK, then
   389  **   the tokenization should be abandoned and the xTokenize() method should
   390  **   immediately return a copy of the xToken() return value. Or, if the
   391  **   input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
   392  **   if an error occurs with the xTokenize() implementation itself, it
   393  **   may abandon the tokenization and return any error code other than
   394  **   SQLITE_OK or SQLITE_DONE.
   395  **
   396  ** SYNONYM SUPPORT
   397  **
   398  **   Custom tokenizers may also support synonyms. Consider a case in which a
   399  **   user wishes to query for a phrase such as "first place". Using the 
   400  **   built-in tokenizers, the FTS5 query 'first + place' will match instances
   401  **   of "first place" within the document set, but not alternative forms
   402  **   such as "1st place". In some applications, it would be better to match
   403  **   all instances of "first place" or "1st place" regardless of which form
   404  **   the user specified in the MATCH query text.
   405  **
   406  **   There are several ways to approach this in FTS5:
   407  **
   408  **   <ol><li> By mapping all synonyms to a single token. In this case, the 
   409  **            In the above example, this means that the tokenizer returns the
   410  **            same token for inputs "first" and "1st". Say that token is in
   411  **            fact "first", so that when the user inserts the document "I won
   412  **            1st place" entries are added to the index for tokens "i", "won",
   413  **            "first" and "place". If the user then queries for '1st + place',
   414  **            the tokenizer substitutes "first" for "1st" and the query works
   415  **            as expected.
   416  **
   417  **       <li> By adding multiple synonyms for a single term to the FTS index.
   418  **            In this case, when tokenizing query text, the tokenizer may 
   419  **            provide multiple synonyms for a single term within the document.
   420  **            FTS5 then queries the index for each synonym individually. For
   421  **            example, faced with the query:
   422  **
   423  **   <codeblock>
   424  **     ... MATCH 'first place'</codeblock>
   425  **
   426  **            the tokenizer offers both "1st" and "first" as synonyms for the
   427  **            first token in the MATCH query and FTS5 effectively runs a query 
   428  **            similar to:
   429  **
   430  **   <codeblock>
   431  **     ... MATCH '(first OR 1st) place'</codeblock>
   432  **
   433  **            except that, for the purposes of auxiliary functions, the query
   434  **            still appears to contain just two phrases - "(first OR 1st)" 
   435  **            being treated as a single phrase.
   436  **
   437  **       <li> By adding multiple synonyms for a single term to the FTS index.
   438  **            Using this method, when tokenizing document text, the tokenizer
   439  **            provides multiple synonyms for each token. So that when a 
   440  **            document such as "I won first place" is tokenized, entries are
   441  **            added to the FTS index for "i", "won", "first", "1st" and
   442  **            "place".
   443  **
   444  **            This way, even if the tokenizer does not provide synonyms
   445  **            when tokenizing query text (it should not - to do would be
   446  **            inefficient), it doesn't matter if the user queries for 
   447  **            'first + place' or '1st + place', as there are entires in the
   448  **            FTS index corresponding to both forms of the first token.
   449  **   </ol>
   450  **
   451  **   Whether it is parsing document or query text, any call to xToken that
   452  **   specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit
   453  **   is considered to supply a synonym for the previous token. For example,
   454  **   when parsing the document "I won first place", a tokenizer that supports
   455  **   synonyms would call xToken() 5 times, as follows:
   456  **
   457  **   <codeblock>
   458  **       xToken(pCtx, 0, "i",                      1,  0,  1);
   459  **       xToken(pCtx, 0, "won",                    3,  2,  5);
   460  **       xToken(pCtx, 0, "first",                  5,  6, 11);
   461  **       xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3,  6, 11);
   462  **       xToken(pCtx, 0, "place",                  5, 12, 17);
   463  **</codeblock>
   464  **
   465  **   It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time
   466  **   xToken() is called. Multiple synonyms may be specified for a single token
   467  **   by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence. 
   468  **   There is no limit to the number of synonyms that may be provided for a
   469  **   single token.
   470  **
   471  **   In many cases, method (1) above is the best approach. It does not add 
   472  **   extra data to the FTS index or require FTS5 to query for multiple terms,
   473  **   so it is efficient in terms of disk space and query speed. However, it
   474  **   does not support prefix queries very well. If, as suggested above, the
   475  **   token "first" is subsituted for "1st" by the tokenizer, then the query:
   476  **
   477  **   <codeblock>
   478  **     ... MATCH '1s*'</codeblock>
   479  **
   480  **   will not match documents that contain the token "1st" (as the tokenizer
   481  **   will probably not map "1s" to any prefix of "first").
   482  **
   483  **   For full prefix support, method (3) may be preferred. In this case, 
   484  **   because the index contains entries for both "first" and "1st", prefix
   485  **   queries such as 'fi*' or '1s*' will match correctly. However, because
   486  **   extra entries are added to the FTS index, this method uses more space
   487  **   within the database.
   488  **
   489  **   Method (2) offers a midpoint between (1) and (3). Using this method,
   490  **   a query such as '1s*' will match documents that contain the literal 
   491  **   token "1st", but not "first" (assuming the tokenizer is not able to
   492  **   provide synonyms for prefixes). However, a non-prefix query like '1st'
   493  **   will match against "1st" and "first". This method does not require
   494  **   extra disk space, as no extra entries are added to the FTS index. 
   495  **   On the other hand, it may require more CPU cycles to run MATCH queries,
   496  **   as separate queries of the FTS index are required for each synonym.
   497  **
   498  **   When using methods (2) or (3), it is important that the tokenizer only
   499  **   provide synonyms when tokenizing document text (method (2)) or query
   500  **   text (method (3)), not both. Doing so will not cause any errors, but is
   501  **   inefficient.
   502  */
   503  typedef struct Fts5Tokenizer Fts5Tokenizer;
   504  typedef struct fts5_tokenizer fts5_tokenizer;
   505  struct fts5_tokenizer {
   506    int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
   507    void (*xDelete)(Fts5Tokenizer*);
   508    int (*xTokenize)(Fts5Tokenizer*, 
   509        void *pCtx,
   510        int flags,            /* Mask of FTS5_TOKENIZE_* flags */
   511        const char *pText, int nText, 
   512        int (*xToken)(
   513          void *pCtx,         /* Copy of 2nd argument to xTokenize() */
   514          int tflags,         /* Mask of FTS5_TOKEN_* flags */
   515          const char *pToken, /* Pointer to buffer containing token */
   516          int nToken,         /* Size of token in bytes */
   517          int iStart,         /* Byte offset of token within input text */
   518          int iEnd            /* Byte offset of end of token within input text */
   519        )
   520    );
   521  };
   522  
   523  /* Flags that may be passed as the third argument to xTokenize() */
   524  #define FTS5_TOKENIZE_QUERY     0x0001
   525  #define FTS5_TOKENIZE_PREFIX    0x0002
   526  #define FTS5_TOKENIZE_DOCUMENT  0x0004
   527  #define FTS5_TOKENIZE_AUX       0x0008
   528  
   529  /* Flags that may be passed by the tokenizer implementation back to FTS5
   530  ** as the third argument to the supplied xToken callback. */
   531  #define FTS5_TOKEN_COLOCATED    0x0001      /* Same position as prev. token */
   532  
   533  /*
   534  ** END OF CUSTOM TOKENIZERS
   535  *************************************************************************/
   536  
   537  /*************************************************************************
   538  ** FTS5 EXTENSION REGISTRATION API
   539  */
   540  typedef struct fts5_api fts5_api;
   541  struct fts5_api {
   542    int iVersion;                   /* Currently always set to 2 */
   543  
   544    /* Create a new tokenizer */
   545    int (*xCreateTokenizer)(
   546      fts5_api *pApi,
   547      const char *zName,
   548      void *pContext,
   549      fts5_tokenizer *pTokenizer,
   550      void (*xDestroy)(void*)
   551    );
   552  
   553    /* Find an existing tokenizer */
   554    int (*xFindTokenizer)(
   555      fts5_api *pApi,
   556      const char *zName,
   557      void **ppContext,
   558      fts5_tokenizer *pTokenizer
   559    );
   560  
   561    /* Create a new auxiliary function */
   562    int (*xCreateFunction)(
   563      fts5_api *pApi,
   564      const char *zName,
   565      void *pContext,
   566      fts5_extension_function xFunction,
   567      void (*xDestroy)(void*)
   568    );
   569  };
   570  
   571  /*
   572  ** END OF REGISTRATION API
   573  *************************************************************************/
   574  
   575  #ifdef __cplusplus
   576  }  /* end of the 'extern "C"' block */
   577  #endif
   578  
   579  #endif /* _FTS5_H */