modernc.org/cc@v1.0.1/v2/testdata/_sqlite/ext/fts2/fts2_tokenizer.c (about)

     1  /*
     2  ** 2007 June 22
     3  **
     4  ** The author disclaims copyright to this source code.  In place of
     5  ** a legal notice, here is a blessing:
     6  **
     7  **    May you do good and not evil.
     8  **    May you find forgiveness for yourself and forgive others.
     9  **    May you share freely, never taking more than you give.
    10  **
    11  ******************************************************************************
    12  **
    13  ** This is part of an SQLite module implementing full-text search.
    14  ** This particular file implements the generic tokenizer interface.
    15  */
    16  
    17  /*
    18  ** The code in this file is only compiled if:
    19  **
    20  **     * The FTS2 module is being built as an extension
    21  **       (in which case SQLITE_CORE is not defined), or
    22  **
    23  **     * The FTS2 module is being built into the core of
    24  **       SQLite (in which case SQLITE_ENABLE_FTS2 is defined).
    25  */
    26  #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
    27  
    28  
    29  #include "sqlite3.h"
    30  #include "sqlite3ext.h"
    31  SQLITE_EXTENSION_INIT3
    32  
    33  #include "fts2_hash.h"
    34  #include "fts2_tokenizer.h"
    35  #include <assert.h>
    36  
    37  /*
    38  ** Implementation of the SQL scalar function for accessing the underlying 
    39  ** hash table. This function may be called as follows:
    40  **
    41  **   SELECT <function-name>(<key-name>);
    42  **   SELECT <function-name>(<key-name>, <pointer>);
    43  **
    44  ** where <function-name> is the name passed as the second argument
    45  ** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer').
    46  **
    47  ** If the <pointer> argument is specified, it must be a blob value
    48  ** containing a pointer to be stored as the hash data corresponding
    49  ** to the string <key-name>. If <pointer> is not specified, then
    50  ** the string <key-name> must already exist in the has table. Otherwise,
    51  ** an error is returned.
    52  **
    53  ** Whether or not the <pointer> argument is specified, the value returned
    54  ** is a blob containing the pointer stored as the hash data corresponding
    55  ** to string <key-name> (after the hash-table is updated, if applicable).
    56  */
    57  static void scalarFunc(
    58    sqlite3_context *context,
    59    int argc,
    60    sqlite3_value **argv
    61  ){
    62    fts2Hash *pHash;
    63    void *pPtr = 0;
    64    const unsigned char *zName;
    65    int nName;
    66  
    67    assert( argc==1 || argc==2 );
    68  
    69    pHash = (fts2Hash *)sqlite3_user_data(context);
    70  
    71    zName = sqlite3_value_text(argv[0]);
    72    nName = sqlite3_value_bytes(argv[0])+1;
    73  
    74    if( argc==2 ){
    75      void *pOld;
    76      int n = sqlite3_value_bytes(argv[1]);
    77      if( n!=sizeof(pPtr) ){
    78        sqlite3_result_error(context, "argument type mismatch", -1);
    79        return;
    80      }
    81      pPtr = *(void **)sqlite3_value_blob(argv[1]);
    82      pOld = sqlite3Fts2HashInsert(pHash, (void *)zName, nName, pPtr);
    83      if( pOld==pPtr ){
    84        sqlite3_result_error(context, "out of memory", -1);
    85        return;
    86      }
    87    }else{
    88      pPtr = sqlite3Fts2HashFind(pHash, zName, nName);
    89      if( !pPtr ){
    90        char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
    91        sqlite3_result_error(context, zErr, -1);
    92        sqlite3_free(zErr);
    93        return;
    94      }
    95    }
    96  
    97    sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
    98  }
    99  
   100  #ifdef SQLITE_TEST
   101  
   102  #if defined(INCLUDE_SQLITE_TCL_H)
   103  #  include "sqlite_tcl.h"
   104  #else
   105  #  include "tcl.h"
   106  #endif
   107  #include <string.h>
   108  
   109  /*
   110  ** Implementation of a special SQL scalar function for testing tokenizers 
   111  ** designed to be used in concert with the Tcl testing framework. This
   112  ** function must be called with two arguments:
   113  **
   114  **   SELECT <function-name>(<key-name>, <input-string>);
   115  **   SELECT <function-name>(<key-name>, <pointer>);
   116  **
   117  ** where <function-name> is the name passed as the second argument
   118  ** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer')
   119  ** concatenated with the string '_test' (e.g. 'fts2_tokenizer_test').
   120  **
   121  ** The return value is a string that may be interpreted as a Tcl
   122  ** list. For each token in the <input-string>, three elements are
   123  ** added to the returned list. The first is the token position, the 
   124  ** second is the token text (folded, stemmed, etc.) and the third is the
   125  ** substring of <input-string> associated with the token. For example, 
   126  ** using the built-in "simple" tokenizer:
   127  **
   128  **   SELECT fts_tokenizer_test('simple', 'I don't see how');
   129  **
   130  ** will return the string:
   131  **
   132  **   "{0 i I 1 dont don't 2 see see 3 how how}"
   133  **   
   134  */
   135  static void testFunc(
   136    sqlite3_context *context,
   137    int argc,
   138    sqlite3_value **argv
   139  ){
   140    fts2Hash *pHash;
   141    sqlite3_tokenizer_module *p;
   142    sqlite3_tokenizer *pTokenizer = 0;
   143    sqlite3_tokenizer_cursor *pCsr = 0;
   144  
   145    const char *zErr = 0;
   146  
   147    const char *zName;
   148    int nName;
   149    const char *zInput;
   150    int nInput;
   151  
   152    const char *zArg = 0;
   153  
   154    const char *zToken;
   155    int nToken;
   156    int iStart;
   157    int iEnd;
   158    int iPos;
   159  
   160    Tcl_Obj *pRet;
   161  
   162    assert( argc==2 || argc==3 );
   163  
   164    nName = sqlite3_value_bytes(argv[0]);
   165    zName = (const char *)sqlite3_value_text(argv[0]);
   166    nInput = sqlite3_value_bytes(argv[argc-1]);
   167    zInput = (const char *)sqlite3_value_text(argv[argc-1]);
   168  
   169    if( argc==3 ){
   170      zArg = (const char *)sqlite3_value_text(argv[1]);
   171    }
   172  
   173    pHash = (fts2Hash *)sqlite3_user_data(context);
   174    p = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zName, nName+1);
   175  
   176    if( !p ){
   177      char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
   178      sqlite3_result_error(context, zErr, -1);
   179      sqlite3_free(zErr);
   180      return;
   181    }
   182  
   183    pRet = Tcl_NewObj();
   184    Tcl_IncrRefCount(pRet);
   185  
   186    if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){
   187      zErr = "error in xCreate()";
   188      goto finish;
   189    }
   190    pTokenizer->pModule = p;
   191    if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
   192      zErr = "error in xOpen()";
   193      goto finish;
   194    }
   195    pCsr->pTokenizer = pTokenizer;
   196  
   197    while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
   198      Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
   199      Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
   200      zToken = &zInput[iStart];
   201      nToken = iEnd-iStart;
   202      Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
   203    }
   204  
   205    if( SQLITE_OK!=p->xClose(pCsr) ){
   206      zErr = "error in xClose()";
   207      goto finish;
   208    }
   209    if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
   210      zErr = "error in xDestroy()";
   211      goto finish;
   212    }
   213  
   214  finish:
   215    if( zErr ){
   216      sqlite3_result_error(context, zErr, -1);
   217    }else{
   218      sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
   219    }
   220    Tcl_DecrRefCount(pRet);
   221  }
   222  
   223  static
   224  int registerTokenizer(
   225    sqlite3 *db, 
   226    char *zName, 
   227    const sqlite3_tokenizer_module *p
   228  ){
   229    int rc;
   230    sqlite3_stmt *pStmt;
   231    const char zSql[] = "SELECT fts2_tokenizer(?, ?)";
   232  
   233    rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
   234    if( rc!=SQLITE_OK ){
   235      return rc;
   236    }
   237  
   238    sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
   239    sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC);
   240    sqlite3_step(pStmt);
   241  
   242    return sqlite3_finalize(pStmt);
   243  }
   244  
   245  static
   246  int queryFts2Tokenizer(
   247    sqlite3 *db, 
   248    char *zName,  
   249    const sqlite3_tokenizer_module **pp
   250  ){
   251    int rc;
   252    sqlite3_stmt *pStmt;
   253    const char zSql[] = "SELECT fts2_tokenizer(?)";
   254  
   255    *pp = 0;
   256    rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0);
   257    if( rc!=SQLITE_OK ){
   258      return rc;
   259    }
   260  
   261    sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);
   262    if( SQLITE_ROW==sqlite3_step(pStmt) ){
   263      if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){
   264        memcpy(pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp));
   265      }
   266    }
   267  
   268    return sqlite3_finalize(pStmt);
   269  }
   270  
   271  void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
   272  
   273  /*
   274  ** Implementation of the scalar function fts2_tokenizer_internal_test().
   275  ** This function is used for testing only, it is not included in the
   276  ** build unless SQLITE_TEST is defined.
   277  **
   278  ** The purpose of this is to test that the fts2_tokenizer() function
   279  ** can be used as designed by the C-code in the queryFts2Tokenizer and
   280  ** registerTokenizer() functions above. These two functions are repeated
   281  ** in the README.tokenizer file as an example, so it is important to
   282  ** test them.
   283  **
   284  ** To run the tests, evaluate the fts2_tokenizer_internal_test() scalar
   285  ** function with no arguments. An assert() will fail if a problem is
   286  ** detected. i.e.:
   287  **
   288  **     SELECT fts2_tokenizer_internal_test();
   289  **
   290  */
   291  static void intTestFunc(
   292    sqlite3_context *context,
   293    int argc,
   294    sqlite3_value **argv
   295  ){
   296    int rc;
   297    const sqlite3_tokenizer_module *p1;
   298    const sqlite3_tokenizer_module *p2;
   299    sqlite3 *db = (sqlite3 *)sqlite3_user_data(context);
   300  
   301    /* Test the query function */
   302    sqlite3Fts2SimpleTokenizerModule(&p1);
   303    rc = queryFts2Tokenizer(db, "simple", &p2);
   304    assert( rc==SQLITE_OK );
   305    assert( p1==p2 );
   306    rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2);
   307    assert( rc==SQLITE_ERROR );
   308    assert( p2==0 );
   309    assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") );
   310  
   311    /* Test the storage function */
   312    rc = registerTokenizer(db, "nosuchtokenizer", p1);
   313    assert( rc==SQLITE_OK );
   314    rc = queryFts2Tokenizer(db, "nosuchtokenizer", &p2);
   315    assert( rc==SQLITE_OK );
   316    assert( p2==p1 );
   317  
   318    sqlite3_result_text(context, "ok", -1, SQLITE_STATIC);
   319  }
   320  
   321  #endif
   322  
   323  /*
   324  ** Set up SQL objects in database db used to access the contents of
   325  ** the hash table pointed to by argument pHash. The hash table must
   326  ** been initialized to use string keys, and to take a private copy 
   327  ** of the key when a value is inserted. i.e. by a call similar to:
   328  **
   329  **    sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1);
   330  **
   331  ** This function adds a scalar function (see header comment above
   332  ** scalarFunc() in this file for details) and, if ENABLE_TABLE is
   333  ** defined at compilation time, a temporary virtual table (see header 
   334  ** comment above struct HashTableVtab) to the database schema. Both 
   335  ** provide read/write access to the contents of *pHash.
   336  **
   337  ** The third argument to this function, zName, is used as the name
   338  ** of both the scalar and, if created, the virtual table.
   339  */
   340  int sqlite3Fts2InitHashTable(
   341    sqlite3 *db, 
   342    fts2Hash *pHash, 
   343    const char *zName
   344  ){
   345    int rc = SQLITE_OK;
   346    void *p = (void *)pHash;
   347    const int any = SQLITE_ANY;
   348    char *zTest = 0;
   349    char *zTest2 = 0;
   350  
   351  #ifdef SQLITE_TEST
   352    void *pdb = (void *)db;
   353    zTest = sqlite3_mprintf("%s_test", zName);
   354    zTest2 = sqlite3_mprintf("%s_internal_test", zName);
   355    if( !zTest || !zTest2 ){
   356      rc = SQLITE_NOMEM;
   357    }
   358  #endif
   359  
   360    if( rc!=SQLITE_OK
   361     || (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0))
   362     || (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0))
   363  #ifdef SQLITE_TEST
   364     || (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0))
   365     || (rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0))
   366     || (rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0))
   367  #endif
   368    );
   369  
   370    sqlite3_free(zTest);
   371    sqlite3_free(zTest2);
   372    return rc;
   373  }
   374  
   375  #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */