modernc.org/cc@v1.0.1/v2/testdata/_sqlite/ext/fts3/fts3_tokenize_vtab.c (about)

     1  /*
     2  ** 2013 Apr 22
     3  **
     4  ** The author disclaims copyright to this source code.  In place of
     5  ** a legal notice, here is a blessing:
     6  **
     7  **    May you do good and not evil.
     8  **    May you find forgiveness for yourself and forgive others.
     9  **    May you share freely, never taking more than you give.
    10  **
    11  ******************************************************************************
    12  **
    13  ** This file contains code for the "fts3tokenize" virtual table module.
    14  ** An fts3tokenize virtual table is created as follows:
    15  **
    16  **   CREATE VIRTUAL TABLE <tbl> USING fts3tokenize(
    17  **       <tokenizer-name>, <arg-1>, ...
    18  **   );
    19  **
    20  ** The table created has the following schema:
    21  **
    22  **   CREATE TABLE <tbl>(input, token, start, end, position)
    23  **
    24  ** When queried, the query must include a WHERE clause of type:
    25  **
    26  **   input = <string>
    27  **
    28  ** The virtual table module tokenizes this <string>, using the FTS3 
    29  ** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE 
    30  ** statement and returns one row for each token in the result. With
    31  ** fields set as follows:
    32  **
    33  **   input:   Always set to a copy of <string>
    34  **   token:   A token from the input.
    35  **   start:   Byte offset of the token within the input <string>.
    36  **   end:     Byte offset of the byte immediately following the end of the
    37  **            token within the input string.
    38  **   pos:     Token offset of token within input.
    39  **
    40  */
    41  #include "fts3Int.h"
    42  #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
    43  
    44  #include <string.h>
    45  #include <assert.h>
    46  
    47  typedef struct Fts3tokTable Fts3tokTable;
    48  typedef struct Fts3tokCursor Fts3tokCursor;
    49  
    50  /*
    51  ** Virtual table structure.
    52  */
    53  struct Fts3tokTable {
    54    sqlite3_vtab base;              /* Base class used by SQLite core */
    55    const sqlite3_tokenizer_module *pMod;
    56    sqlite3_tokenizer *pTok;
    57  };
    58  
    59  /*
    60  ** Virtual table cursor structure.
    61  */
    62  struct Fts3tokCursor {
    63    sqlite3_vtab_cursor base;       /* Base class used by SQLite core */
    64    char *zInput;                   /* Input string */
    65    sqlite3_tokenizer_cursor *pCsr; /* Cursor to iterate through zInput */
    66    int iRowid;                     /* Current 'rowid' value */
    67    const char *zToken;             /* Current 'token' value */
    68    int nToken;                     /* Size of zToken in bytes */
    69    int iStart;                     /* Current 'start' value */
    70    int iEnd;                       /* Current 'end' value */
    71    int iPos;                       /* Current 'pos' value */
    72  };
    73  
    74  /*
    75  ** Query FTS for the tokenizer implementation named zName.
    76  */
    77  static int fts3tokQueryTokenizer(
    78    Fts3Hash *pHash,
    79    const char *zName,
    80    const sqlite3_tokenizer_module **pp,
    81    char **pzErr
    82  ){
    83    sqlite3_tokenizer_module *p;
    84    int nName = (int)strlen(zName);
    85  
    86    p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
    87    if( !p ){
    88      sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", zName);
    89      return SQLITE_ERROR;
    90    }
    91  
    92    *pp = p;
    93    return SQLITE_OK;
    94  }
    95  
    96  /*
    97  ** The second argument, argv[], is an array of pointers to nul-terminated
    98  ** strings. This function makes a copy of the array and strings into a 
    99  ** single block of memory. It then dequotes any of the strings that appear
   100  ** to be quoted.
   101  **
   102  ** If successful, output parameter *pazDequote is set to point at the
   103  ** array of dequoted strings and SQLITE_OK is returned. The caller is
   104  ** responsible for eventually calling sqlite3_free() to free the array
   105  ** in this case. Or, if an error occurs, an SQLite error code is returned.
   106  ** The final value of *pazDequote is undefined in this case.
   107  */
   108  static int fts3tokDequoteArray(
   109    int argc,                       /* Number of elements in argv[] */
   110    const char * const *argv,       /* Input array */
   111    char ***pazDequote              /* Output array */
   112  ){
   113    int rc = SQLITE_OK;             /* Return code */
   114    if( argc==0 ){
   115      *pazDequote = 0;
   116    }else{
   117      int i;
   118      int nByte = 0;
   119      char **azDequote;
   120  
   121      for(i=0; i<argc; i++){
   122        nByte += (int)(strlen(argv[i]) + 1);
   123      }
   124  
   125      *pazDequote = azDequote = sqlite3_malloc(sizeof(char *)*argc + nByte);
   126      if( azDequote==0 ){
   127        rc = SQLITE_NOMEM;
   128      }else{
   129        char *pSpace = (char *)&azDequote[argc];
   130        for(i=0; i<argc; i++){
   131          int n = (int)strlen(argv[i]);
   132          azDequote[i] = pSpace;
   133          memcpy(pSpace, argv[i], n+1);
   134          sqlite3Fts3Dequote(pSpace);
   135          pSpace += (n+1);
   136        }
   137      }
   138    }
   139  
   140    return rc;
   141  }
   142  
   143  /*
   144  ** Schema of the tokenizer table.
   145  */
   146  #define FTS3_TOK_SCHEMA "CREATE TABLE x(input, token, start, end, position)"
   147  
   148  /*
   149  ** This function does all the work for both the xConnect and xCreate methods.
   150  ** These tables have no persistent representation of their own, so xConnect
   151  ** and xCreate are identical operations.
   152  **
   153  **   argv[0]: module name
   154  **   argv[1]: database name 
   155  **   argv[2]: table name
   156  **   argv[3]: first argument (tokenizer name)
   157  */
   158  static int fts3tokConnectMethod(
   159    sqlite3 *db,                    /* Database connection */
   160    void *pHash,                    /* Hash table of tokenizers */
   161    int argc,                       /* Number of elements in argv array */
   162    const char * const *argv,       /* xCreate/xConnect argument array */
   163    sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
   164    char **pzErr                    /* OUT: sqlite3_malloc'd error message */
   165  ){
   166    Fts3tokTable *pTab = 0;
   167    const sqlite3_tokenizer_module *pMod = 0;
   168    sqlite3_tokenizer *pTok = 0;
   169    int rc;
   170    char **azDequote = 0;
   171    int nDequote;
   172  
   173    rc = sqlite3_declare_vtab(db, FTS3_TOK_SCHEMA);
   174    if( rc!=SQLITE_OK ) return rc;
   175  
   176    nDequote = argc-3;
   177    rc = fts3tokDequoteArray(nDequote, &argv[3], &azDequote);
   178  
   179    if( rc==SQLITE_OK ){
   180      const char *zModule;
   181      if( nDequote<1 ){
   182        zModule = "simple";
   183      }else{
   184        zModule = azDequote[0];
   185      }
   186      rc = fts3tokQueryTokenizer((Fts3Hash*)pHash, zModule, &pMod, pzErr);
   187    }
   188  
   189    assert( (rc==SQLITE_OK)==(pMod!=0) );
   190    if( rc==SQLITE_OK ){
   191      const char * const *azArg = (const char * const *)&azDequote[1];
   192      rc = pMod->xCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok);
   193    }
   194  
   195    if( rc==SQLITE_OK ){
   196      pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable));
   197      if( pTab==0 ){
   198        rc = SQLITE_NOMEM;
   199      }
   200    }
   201  
   202    if( rc==SQLITE_OK ){
   203      memset(pTab, 0, sizeof(Fts3tokTable));
   204      pTab->pMod = pMod;
   205      pTab->pTok = pTok;
   206      *ppVtab = &pTab->base;
   207    }else{
   208      if( pTok ){
   209        pMod->xDestroy(pTok);
   210      }
   211    }
   212  
   213    sqlite3_free(azDequote);
   214    return rc;
   215  }
   216  
   217  /*
   218  ** This function does the work for both the xDisconnect and xDestroy methods.
   219  ** These tables have no persistent representation of their own, so xDisconnect
   220  ** and xDestroy are identical operations.
   221  */
   222  static int fts3tokDisconnectMethod(sqlite3_vtab *pVtab){
   223    Fts3tokTable *pTab = (Fts3tokTable *)pVtab;
   224  
   225    pTab->pMod->xDestroy(pTab->pTok);
   226    sqlite3_free(pTab);
   227    return SQLITE_OK;
   228  }
   229  
   230  /*
   231  ** xBestIndex - Analyze a WHERE and ORDER BY clause.
   232  */
   233  static int fts3tokBestIndexMethod(
   234    sqlite3_vtab *pVTab, 
   235    sqlite3_index_info *pInfo
   236  ){
   237    int i;
   238    UNUSED_PARAMETER(pVTab);
   239  
   240    for(i=0; i<pInfo->nConstraint; i++){
   241      if( pInfo->aConstraint[i].usable 
   242       && pInfo->aConstraint[i].iColumn==0 
   243       && pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ 
   244      ){
   245        pInfo->idxNum = 1;
   246        pInfo->aConstraintUsage[i].argvIndex = 1;
   247        pInfo->aConstraintUsage[i].omit = 1;
   248        pInfo->estimatedCost = 1;
   249        return SQLITE_OK;
   250      }
   251    }
   252  
   253    pInfo->idxNum = 0;
   254    assert( pInfo->estimatedCost>1000000.0 );
   255  
   256    return SQLITE_OK;
   257  }
   258  
   259  /*
   260  ** xOpen - Open a cursor.
   261  */
   262  static int fts3tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
   263    Fts3tokCursor *pCsr;
   264    UNUSED_PARAMETER(pVTab);
   265  
   266    pCsr = (Fts3tokCursor *)sqlite3_malloc(sizeof(Fts3tokCursor));
   267    if( pCsr==0 ){
   268      return SQLITE_NOMEM;
   269    }
   270    memset(pCsr, 0, sizeof(Fts3tokCursor));
   271  
   272    *ppCsr = (sqlite3_vtab_cursor *)pCsr;
   273    return SQLITE_OK;
   274  }
   275  
   276  /*
   277  ** Reset the tokenizer cursor passed as the only argument. As if it had
   278  ** just been returned by fts3tokOpenMethod().
   279  */
   280  static void fts3tokResetCursor(Fts3tokCursor *pCsr){
   281    if( pCsr->pCsr ){
   282      Fts3tokTable *pTab = (Fts3tokTable *)(pCsr->base.pVtab);
   283      pTab->pMod->xClose(pCsr->pCsr);
   284      pCsr->pCsr = 0;
   285    }
   286    sqlite3_free(pCsr->zInput);
   287    pCsr->zInput = 0;
   288    pCsr->zToken = 0;
   289    pCsr->nToken = 0;
   290    pCsr->iStart = 0;
   291    pCsr->iEnd = 0;
   292    pCsr->iPos = 0;
   293    pCsr->iRowid = 0;
   294  }
   295  
   296  /*
   297  ** xClose - Close a cursor.
   298  */
   299  static int fts3tokCloseMethod(sqlite3_vtab_cursor *pCursor){
   300    Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
   301  
   302    fts3tokResetCursor(pCsr);
   303    sqlite3_free(pCsr);
   304    return SQLITE_OK;
   305  }
   306  
   307  /*
   308  ** xNext - Advance the cursor to the next row, if any.
   309  */
   310  static int fts3tokNextMethod(sqlite3_vtab_cursor *pCursor){
   311    Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
   312    Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab);
   313    int rc;                         /* Return code */
   314  
   315    pCsr->iRowid++;
   316    rc = pTab->pMod->xNext(pCsr->pCsr,
   317        &pCsr->zToken, &pCsr->nToken,
   318        &pCsr->iStart, &pCsr->iEnd, &pCsr->iPos
   319    );
   320  
   321    if( rc!=SQLITE_OK ){
   322      fts3tokResetCursor(pCsr);
   323      if( rc==SQLITE_DONE ) rc = SQLITE_OK;
   324    }
   325  
   326    return rc;
   327  }
   328  
   329  /*
   330  ** xFilter - Initialize a cursor to point at the start of its data.
   331  */
   332  static int fts3tokFilterMethod(
   333    sqlite3_vtab_cursor *pCursor,   /* The cursor used for this query */
   334    int idxNum,                     /* Strategy index */
   335    const char *idxStr,             /* Unused */
   336    int nVal,                       /* Number of elements in apVal */
   337    sqlite3_value **apVal           /* Arguments for the indexing scheme */
   338  ){
   339    int rc = SQLITE_ERROR;
   340    Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
   341    Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab);
   342    UNUSED_PARAMETER(idxStr);
   343    UNUSED_PARAMETER(nVal);
   344  
   345    fts3tokResetCursor(pCsr);
   346    if( idxNum==1 ){
   347      const char *zByte = (const char *)sqlite3_value_text(apVal[0]);
   348      int nByte = sqlite3_value_bytes(apVal[0]);
   349      pCsr->zInput = sqlite3_malloc(nByte+1);
   350      if( pCsr->zInput==0 ){
   351        rc = SQLITE_NOMEM;
   352      }else{
   353        memcpy(pCsr->zInput, zByte, nByte);
   354        pCsr->zInput[nByte] = 0;
   355        rc = pTab->pMod->xOpen(pTab->pTok, pCsr->zInput, nByte, &pCsr->pCsr);
   356        if( rc==SQLITE_OK ){
   357          pCsr->pCsr->pTokenizer = pTab->pTok;
   358        }
   359      }
   360    }
   361  
   362    if( rc!=SQLITE_OK ) return rc;
   363    return fts3tokNextMethod(pCursor);
   364  }
   365  
   366  /*
   367  ** xEof - Return true if the cursor is at EOF, or false otherwise.
   368  */
   369  static int fts3tokEofMethod(sqlite3_vtab_cursor *pCursor){
   370    Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
   371    return (pCsr->zToken==0);
   372  }
   373  
   374  /*
   375  ** xColumn - Return a column value.
   376  */
   377  static int fts3tokColumnMethod(
   378    sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
   379    sqlite3_context *pCtx,          /* Context for sqlite3_result_xxx() calls */
   380    int iCol                        /* Index of column to read value from */
   381  ){
   382    Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
   383  
   384    /* CREATE TABLE x(input, token, start, end, position) */
   385    switch( iCol ){
   386      case 0:
   387        sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT);
   388        break;
   389      case 1:
   390        sqlite3_result_text(pCtx, pCsr->zToken, pCsr->nToken, SQLITE_TRANSIENT);
   391        break;
   392      case 2:
   393        sqlite3_result_int(pCtx, pCsr->iStart);
   394        break;
   395      case 3:
   396        sqlite3_result_int(pCtx, pCsr->iEnd);
   397        break;
   398      default:
   399        assert( iCol==4 );
   400        sqlite3_result_int(pCtx, pCsr->iPos);
   401        break;
   402    }
   403    return SQLITE_OK;
   404  }
   405  
   406  /*
   407  ** xRowid - Return the current rowid for the cursor.
   408  */
   409  static int fts3tokRowidMethod(
   410    sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
   411    sqlite_int64 *pRowid            /* OUT: Rowid value */
   412  ){
   413    Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor;
   414    *pRowid = (sqlite3_int64)pCsr->iRowid;
   415    return SQLITE_OK;
   416  }
   417  
   418  /*
   419  ** Register the fts3tok module with database connection db. Return SQLITE_OK
   420  ** if successful or an error code if sqlite3_create_module() fails.
   421  */
   422  int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash){
   423    static const sqlite3_module fts3tok_module = {
   424       0,                           /* iVersion      */
   425       fts3tokConnectMethod,        /* xCreate       */
   426       fts3tokConnectMethod,        /* xConnect      */
   427       fts3tokBestIndexMethod,      /* xBestIndex    */
   428       fts3tokDisconnectMethod,     /* xDisconnect   */
   429       fts3tokDisconnectMethod,     /* xDestroy      */
   430       fts3tokOpenMethod,           /* xOpen         */
   431       fts3tokCloseMethod,          /* xClose        */
   432       fts3tokFilterMethod,         /* xFilter       */
   433       fts3tokNextMethod,           /* xNext         */
   434       fts3tokEofMethod,            /* xEof          */
   435       fts3tokColumnMethod,         /* xColumn       */
   436       fts3tokRowidMethod,          /* xRowid        */
   437       0,                           /* xUpdate       */
   438       0,                           /* xBegin        */
   439       0,                           /* xSync         */
   440       0,                           /* xCommit       */
   441       0,                           /* xRollback     */
   442       0,                           /* xFindFunction */
   443       0,                           /* xRename       */
   444       0,                           /* xSavepoint    */
   445       0,                           /* xRelease      */
   446       0                            /* xRollbackTo   */
   447    };
   448    int rc;                         /* Return code */
   449  
   450    rc = sqlite3_create_module(db, "fts3tokenize", &fts3tok_module, (void*)pHash);
   451    return rc;
   452  }
   453  
   454  #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */