modernc.org/cc@v1.0.1/v2/testdata/_sqlite/ext/fts1/fts1.c (about)

     1  /* fts1 has a design flaw which can lead to database corruption (see
     2  ** below).  It is recommended not to use it any longer, instead use
     3  ** fts3 (or higher).  If you believe that your use of fts1 is safe,
     4  ** add -DSQLITE_ENABLE_BROKEN_FTS1=1 to your CFLAGS.
     5  */
     6  #if (!defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)) \
     7          && !defined(SQLITE_ENABLE_BROKEN_FTS1)
     8  #error fts1 has a design flaw and has been deprecated.
     9  #endif
    10  /* The flaw is that fts1 uses the content table's unaliased rowid as
    11  ** the unique docid.  fts1 embeds the rowid in the index it builds,
    12  ** and expects the rowid to not change.  The SQLite VACUUM operation
    13  ** will renumber such rowids, thereby breaking fts1.  If you are using
    14  ** fts1 in a system which has disabled VACUUM, then you can continue
    15  ** to use it safely.  Note that PRAGMA auto_vacuum does NOT disable
    16  ** VACUUM, though systems using auto_vacuum are unlikely to invoke
    17  ** VACUUM.
    18  **
    19  ** fts1 should be safe even across VACUUM if you only insert documents
    20  ** and never delete.
    21  */
    22  
    23  /* The author disclaims copyright to this source code.
    24   *
    25   * This is an SQLite module implementing full-text search.
    26   */
    27  
    28  /*
    29  ** The code in this file is only compiled if:
    30  **
    31  **     * The FTS1 module is being built as an extension
    32  **       (in which case SQLITE_CORE is not defined), or
    33  **
    34  **     * The FTS1 module is being built into the core of
    35  **       SQLite (in which case SQLITE_ENABLE_FTS1 is defined).
    36  */
    37  #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1)
    38  
    39  #if defined(SQLITE_ENABLE_FTS1) && !defined(SQLITE_CORE)
    40  # define SQLITE_CORE 1
    41  #endif
    42  
    43  #include <assert.h>
    44  #include <stdlib.h>
    45  #include <stdio.h>
    46  #include <string.h>
    47  #include <ctype.h>
    48  
    49  #include "fts1.h"
    50  #include "fts1_hash.h"
    51  #include "fts1_tokenizer.h"
    52  #include "sqlite3.h"
    53  #include "sqlite3ext.h"
    54  SQLITE_EXTENSION_INIT1
    55  
    56  
    57  #if 0
    58  # define TRACE(A)  printf A; fflush(stdout)
    59  #else
    60  # define TRACE(A)
    61  #endif
    62  
    63  /* utility functions */
    64  
    65  typedef struct StringBuffer {
    66    int len;      /* length, not including null terminator */
    67    int alloced;  /* Space allocated for s[] */ 
    68    char *s;      /* Content of the string */
    69  } StringBuffer;
    70  
    71  static void initStringBuffer(StringBuffer *sb){
    72    sb->len = 0;
    73    sb->alloced = 100;
    74    sb->s = malloc(100);
    75    sb->s[0] = '\0';
    76  }
    77  
    78  static void nappend(StringBuffer *sb, const char *zFrom, int nFrom){
    79    if( sb->len + nFrom >= sb->alloced ){
    80      sb->alloced = sb->len + nFrom + 100;
    81      sb->s = realloc(sb->s, sb->alloced+1);
    82      if( sb->s==0 ){
    83        initStringBuffer(sb);
    84        return;
    85      }
    86    }
    87    memcpy(sb->s + sb->len, zFrom, nFrom);
    88    sb->len += nFrom;
    89    sb->s[sb->len] = 0;
    90  }
    91  static void append(StringBuffer *sb, const char *zFrom){
    92    nappend(sb, zFrom, strlen(zFrom));
    93  }
    94  
    95  /* We encode variable-length integers in little-endian order using seven bits
    96   * per byte as follows:
    97  **
    98  ** KEY:
    99  **         A = 0xxxxxxx    7 bits of data and one flag bit
   100  **         B = 1xxxxxxx    7 bits of data and one flag bit
   101  **
   102  **  7 bits - A
   103  ** 14 bits - BA
   104  ** 21 bits - BBA
   105  ** and so on.
   106  */
   107  
   108  /* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */
   109  #define VARINT_MAX 10
   110  
   111  /* Write a 64-bit variable-length integer to memory starting at p[0].
   112   * The length of data written will be between 1 and VARINT_MAX bytes.
   113   * The number of bytes written is returned. */
   114  static int putVarint(char *p, sqlite_int64 v){
   115    unsigned char *q = (unsigned char *) p;
   116    sqlite_uint64 vu = v;
   117    do{
   118      *q++ = (unsigned char) ((vu & 0x7f) | 0x80);
   119      vu >>= 7;
   120    }while( vu!=0 );
   121    q[-1] &= 0x7f;  /* turn off high bit in final byte */
   122    assert( q - (unsigned char *)p <= VARINT_MAX );
   123    return (int) (q - (unsigned char *)p);
   124  }
   125  
   126  /* Read a 64-bit variable-length integer from memory starting at p[0].
   127   * Return the number of bytes read, or 0 on error.
   128   * The value is stored in *v. */
   129  static int getVarint(const char *p, sqlite_int64 *v){
   130    const unsigned char *q = (const unsigned char *) p;
   131    sqlite_uint64 x = 0, y = 1;
   132    while( (*q & 0x80) == 0x80 ){
   133      x += y * (*q++ & 0x7f);
   134      y <<= 7;
   135      if( q - (unsigned char *)p >= VARINT_MAX ){  /* bad data */
   136        assert( 0 );
   137        return 0;
   138      }
   139    }
   140    x += y * (*q++);
   141    *v = (sqlite_int64) x;
   142    return (int) (q - (unsigned char *)p);
   143  }
   144  
   145  static int getVarint32(const char *p, int *pi){
   146   sqlite_int64 i;
   147   int ret = getVarint(p, &i);
   148   *pi = (int) i;
   149   assert( *pi==i );
   150   return ret;
   151  }
   152  
   153  /*** Document lists ***
   154   *
   155   * A document list holds a sorted list of varint-encoded document IDs.
   156   *
   157   * A doclist with type DL_POSITIONS_OFFSETS is stored like this:
   158   *
   159   * array {
   160   *   varint docid;
   161   *   array {
   162   *     varint position;     (delta from previous position plus POS_BASE)
   163   *     varint startOffset;  (delta from previous startOffset)
   164   *     varint endOffset;    (delta from startOffset)
   165   *   }
   166   * }
   167   *
   168   * Here, array { X } means zero or more occurrences of X, adjacent in memory.
   169   *
   170   * A position list may hold positions for text in multiple columns.  A position
   171   * POS_COLUMN is followed by a varint containing the index of the column for
   172   * following positions in the list.  Any positions appearing before any
   173   * occurrences of POS_COLUMN are for column 0.
   174   *
   175   * A doclist with type DL_POSITIONS is like the above, but holds only docids
   176   * and positions without offset information.
   177   *
   178   * A doclist with type DL_DOCIDS is like the above, but holds only docids
   179   * without positions or offset information.
   180   *
   181   * On disk, every document list has positions and offsets, so we don't bother
   182   * to serialize a doclist's type.
   183   * 
   184   * We don't yet delta-encode document IDs; doing so will probably be a
   185   * modest win.
   186   *
   187   * NOTE(shess) I've thought of a slightly (1%) better offset encoding.
   188   * After the first offset, estimate the next offset by using the
   189   * current token position and the previous token position and offset,
   190   * offset to handle some variance.  So the estimate would be
   191   * (iPosition*w->iStartOffset/w->iPosition-64), which is delta-encoded
   192   * as normal.  Offsets more than 64 chars from the estimate are
   193   * encoded as the delta to the previous start offset + 128.  An
   194   * additional tiny increment can be gained by using the end offset of
   195   * the previous token to make the estimate a tiny bit more precise.
   196  */
   197  
   198  /* It is not safe to call isspace(), tolower(), or isalnum() on
   199  ** hi-bit-set characters.  This is the same solution used in the
   200  ** tokenizer.
   201  */
   202  /* TODO(shess) The snippet-generation code should be using the
   203  ** tokenizer-generated tokens rather than doing its own local
   204  ** tokenization.
   205  */
   206  /* TODO(shess) Is __isascii() a portable version of (c&0x80)==0? */
   207  static int safe_isspace(char c){
   208    return (c&0x80)==0 ? isspace((unsigned char)c) : 0;
   209  }
   210  static int safe_tolower(char c){
   211    return (c&0x80)==0 ? tolower((unsigned char)c) : c;
   212  }
   213  static int safe_isalnum(char c){
   214    return (c&0x80)==0 ? isalnum((unsigned char)c) : 0;
   215  }
   216  
   217  typedef enum DocListType {
   218    DL_DOCIDS,              /* docids only */
   219    DL_POSITIONS,           /* docids + positions */
   220    DL_POSITIONS_OFFSETS    /* docids + positions + offsets */
   221  } DocListType;
   222  
   223  /*
   224  ** By default, only positions and not offsets are stored in the doclists.
   225  ** To change this so that offsets are stored too, compile with
   226  **
   227  **          -DDL_DEFAULT=DL_POSITIONS_OFFSETS
   228  **
   229  */
   230  #ifndef DL_DEFAULT
   231  # define DL_DEFAULT DL_POSITIONS
   232  #endif
   233  
   234  typedef struct DocList {
   235    char *pData;
   236    int nData;
   237    DocListType iType;
   238    int iLastColumn;    /* the last column written */
   239    int iLastPos;       /* the last position written */
   240    int iLastOffset;    /* the last start offset written */
   241  } DocList;
   242  
   243  enum {
   244    POS_END = 0,        /* end of this position list */
   245    POS_COLUMN,         /* followed by new column number */
   246    POS_BASE
   247  };
   248  
   249  /* Initialize a new DocList to hold the given data. */
   250  static void docListInit(DocList *d, DocListType iType,
   251                          const char *pData, int nData){
   252    d->nData = nData;
   253    if( nData>0 ){
   254      d->pData = malloc(nData);
   255      memcpy(d->pData, pData, nData);
   256    } else {
   257      d->pData = NULL;
   258    }
   259    d->iType = iType;
   260    d->iLastColumn = 0;
   261    d->iLastPos = d->iLastOffset = 0;
   262  }
   263  
   264  /* Create a new dynamically-allocated DocList. */
   265  static DocList *docListNew(DocListType iType){
   266    DocList *d = (DocList *) malloc(sizeof(DocList));
   267    docListInit(d, iType, 0, 0);
   268    return d;
   269  }
   270  
   271  static void docListDestroy(DocList *d){
   272    free(d->pData);
   273  #ifndef NDEBUG
   274    memset(d, 0x55, sizeof(*d));
   275  #endif
   276  }
   277  
   278  static void docListDelete(DocList *d){
   279    docListDestroy(d);
   280    free(d);
   281  }
   282  
   283  static char *docListEnd(DocList *d){
   284    return d->pData + d->nData;
   285  }
   286  
   287  /* Append a varint to a DocList's data. */
   288  static void appendVarint(DocList *d, sqlite_int64 i){
   289    char c[VARINT_MAX];
   290    int n = putVarint(c, i);
   291    d->pData = realloc(d->pData, d->nData + n);
   292    memcpy(d->pData + d->nData, c, n);
   293    d->nData += n;
   294  }
   295  
   296  static void docListAddDocid(DocList *d, sqlite_int64 iDocid){
   297    appendVarint(d, iDocid);
   298    if( d->iType>=DL_POSITIONS ){
   299      appendVarint(d, POS_END);  /* initially empty position list */
   300      d->iLastColumn = 0;
   301      d->iLastPos = d->iLastOffset = 0;
   302    }
   303  }
   304  
   305  /* helper function for docListAddPos and docListAddPosOffset */
   306  static void addPos(DocList *d, int iColumn, int iPos){
   307    assert( d->nData>0 );
   308    --d->nData;  /* remove previous terminator */
   309    if( iColumn!=d->iLastColumn ){
   310      assert( iColumn>d->iLastColumn );
   311      appendVarint(d, POS_COLUMN);
   312      appendVarint(d, iColumn);
   313      d->iLastColumn = iColumn;
   314      d->iLastPos = d->iLastOffset = 0;
   315    }
   316    assert( iPos>=d->iLastPos );
   317    appendVarint(d, iPos-d->iLastPos+POS_BASE);
   318    d->iLastPos = iPos;
   319  }
   320  
   321  /* Add a position to the last position list in a doclist. */
   322  static void docListAddPos(DocList *d, int iColumn, int iPos){
   323    assert( d->iType==DL_POSITIONS );
   324    addPos(d, iColumn, iPos);
   325    appendVarint(d, POS_END);  /* add new terminator */
   326  }
   327  
   328  /*
   329  ** Add a position and starting and ending offsets to a doclist.
   330  **
   331  ** If the doclist is setup to handle only positions, then insert
   332  ** the position only and ignore the offsets.
   333  */
   334  static void docListAddPosOffset(
   335    DocList *d,             /* Doclist under construction */
   336    int iColumn,            /* Column the inserted term is part of */
   337    int iPos,               /* Position of the inserted term */
   338    int iStartOffset,       /* Starting offset of inserted term */
   339    int iEndOffset          /* Ending offset of inserted term */
   340  ){
   341    assert( d->iType>=DL_POSITIONS );
   342    addPos(d, iColumn, iPos);
   343    if( d->iType==DL_POSITIONS_OFFSETS ){
   344      assert( iStartOffset>=d->iLastOffset );
   345      appendVarint(d, iStartOffset-d->iLastOffset);
   346      d->iLastOffset = iStartOffset;
   347      assert( iEndOffset>=iStartOffset );
   348      appendVarint(d, iEndOffset-iStartOffset);
   349    }
   350    appendVarint(d, POS_END);  /* add new terminator */
   351  }
   352  
   353  /*
   354  ** A DocListReader object is a cursor into a doclist.  Initialize
   355  ** the cursor to the beginning of the doclist by calling readerInit().
   356  ** Then use routines
   357  **
   358  **      peekDocid()
   359  **      readDocid()
   360  **      readPosition()
   361  **      skipPositionList()
   362  **      and so forth...
   363  **
   364  ** to read information out of the doclist.  When we reach the end
   365  ** of the doclist, atEnd() returns TRUE.
   366  */
   367  typedef struct DocListReader {
   368    DocList *pDoclist;  /* The document list we are stepping through */
   369    char *p;            /* Pointer to next unread byte in the doclist */
   370    int iLastColumn;
   371    int iLastPos;  /* the last position read, or -1 when not in a position list */
   372  } DocListReader;
   373  
   374  /*
   375  ** Initialize the DocListReader r to point to the beginning of pDoclist.
   376  */
   377  static void readerInit(DocListReader *r, DocList *pDoclist){
   378    r->pDoclist = pDoclist;
   379    if( pDoclist!=NULL ){
   380      r->p = pDoclist->pData;
   381    }
   382    r->iLastColumn = -1;
   383    r->iLastPos = -1;
   384  }
   385  
   386  /*
   387  ** Return TRUE if we have reached then end of pReader and there is
   388  ** nothing else left to read.
   389  */
   390  static int atEnd(DocListReader *pReader){
   391    return pReader->pDoclist==0 || (pReader->p >= docListEnd(pReader->pDoclist));
   392  }
   393  
   394  /* Peek at the next docid without advancing the read pointer. 
   395  */
   396  static sqlite_int64 peekDocid(DocListReader *pReader){
   397    sqlite_int64 ret;
   398    assert( !atEnd(pReader) );
   399    assert( pReader->iLastPos==-1 );
   400    getVarint(pReader->p, &ret);
   401    return ret;
   402  }
   403  
   404  /* Read the next docid.   See also nextDocid().
   405  */
   406  static sqlite_int64 readDocid(DocListReader *pReader){
   407    sqlite_int64 ret;
   408    assert( !atEnd(pReader) );
   409    assert( pReader->iLastPos==-1 );
   410    pReader->p += getVarint(pReader->p, &ret);
   411    if( pReader->pDoclist->iType>=DL_POSITIONS ){
   412      pReader->iLastColumn = 0;
   413      pReader->iLastPos = 0;
   414    }
   415    return ret;
   416  }
   417  
   418  /* Read the next position and column index from a position list.
   419   * Returns the position, or -1 at the end of the list. */
   420  static int readPosition(DocListReader *pReader, int *iColumn){
   421    int i;
   422    int iType = pReader->pDoclist->iType;
   423  
   424    if( pReader->iLastPos==-1 ){
   425      return -1;
   426    }
   427    assert( !atEnd(pReader) );
   428  
   429    if( iType<DL_POSITIONS ){
   430      return -1;
   431    }
   432    pReader->p += getVarint32(pReader->p, &i);
   433    if( i==POS_END ){
   434      pReader->iLastColumn = pReader->iLastPos = -1;
   435      *iColumn = -1;
   436      return -1;
   437    }
   438    if( i==POS_COLUMN ){
   439      pReader->p += getVarint32(pReader->p, &pReader->iLastColumn);
   440      pReader->iLastPos = 0;
   441      pReader->p += getVarint32(pReader->p, &i);
   442      assert( i>=POS_BASE );
   443    }
   444    pReader->iLastPos += ((int) i)-POS_BASE;
   445    if( iType>=DL_POSITIONS_OFFSETS ){
   446      /* Skip over offsets, ignoring them for now. */
   447      int iStart, iEnd;
   448      pReader->p += getVarint32(pReader->p, &iStart);
   449      pReader->p += getVarint32(pReader->p, &iEnd);
   450    }
   451    *iColumn = pReader->iLastColumn;
   452    return pReader->iLastPos;
   453  }
   454  
   455  /* Skip past the end of a position list. */
   456  static void skipPositionList(DocListReader *pReader){
   457    DocList *p = pReader->pDoclist;
   458    if( p && p->iType>=DL_POSITIONS ){
   459      int iColumn;
   460      while( readPosition(pReader, &iColumn)!=-1 ){}
   461    }
   462  }
   463  
   464  /* Skip over a docid, including its position list if the doclist has
   465   * positions. */
   466  static void skipDocument(DocListReader *pReader){
   467    readDocid(pReader);
   468    skipPositionList(pReader);
   469  }
   470  
   471  /* Skip past all docids which are less than [iDocid].  Returns 1 if a docid
   472   * matching [iDocid] was found.  */
   473  static int skipToDocid(DocListReader *pReader, sqlite_int64 iDocid){
   474    sqlite_int64 d = 0;
   475    while( !atEnd(pReader) && (d=peekDocid(pReader))<iDocid ){
   476      skipDocument(pReader);
   477    }
   478    return !atEnd(pReader) && d==iDocid;
   479  }
   480  
   481  /* Return the first document in a document list.
   482  */
   483  static sqlite_int64 firstDocid(DocList *d){
   484    DocListReader r;
   485    readerInit(&r, d);
   486    return readDocid(&r);
   487  }
   488  
   489  #ifdef SQLITE_DEBUG
   490  /*
   491  ** This routine is used for debugging purpose only.
   492  **
   493  ** Write the content of a doclist to standard output.
   494  */
   495  static void printDoclist(DocList *p){
   496    DocListReader r;
   497    const char *zSep = "";
   498  
   499    readerInit(&r, p);
   500    while( !atEnd(&r) ){
   501      sqlite_int64 docid = readDocid(&r);
   502      if( docid==0 ){
   503        skipPositionList(&r);
   504        continue;
   505      }
   506      printf("%s%lld", zSep, docid);
   507      zSep =  ",";
   508      if( p->iType>=DL_POSITIONS ){
   509        int iPos, iCol;
   510        const char *zDiv = "";
   511        printf("(");
   512        while( (iPos = readPosition(&r, &iCol))>=0 ){
   513          printf("%s%d:%d", zDiv, iCol, iPos);
   514          zDiv = ":";
   515        }
   516        printf(")");
   517      }
   518    }
   519    printf("\n");
   520    fflush(stdout);
   521  }
   522  #endif /* SQLITE_DEBUG */
   523  
   524  /* Trim the given doclist to contain only positions in column
   525   * [iRestrictColumn]. */
   526  static void docListRestrictColumn(DocList *in, int iRestrictColumn){
   527    DocListReader r;
   528    DocList out;
   529  
   530    assert( in->iType>=DL_POSITIONS );
   531    readerInit(&r, in);
   532    docListInit(&out, DL_POSITIONS, NULL, 0);
   533  
   534    while( !atEnd(&r) ){
   535      sqlite_int64 iDocid = readDocid(&r);
   536      int iPos, iColumn;
   537  
   538      docListAddDocid(&out, iDocid);
   539      while( (iPos = readPosition(&r, &iColumn)) != -1 ){
   540        if( iColumn==iRestrictColumn ){
   541          docListAddPos(&out, iColumn, iPos);
   542        }
   543      }
   544    }
   545  
   546    docListDestroy(in);
   547    *in = out;
   548  }
   549  
   550  /* Trim the given doclist by discarding any docids without any remaining
   551   * positions. */
   552  static void docListDiscardEmpty(DocList *in) {
   553    DocListReader r;
   554    DocList out;
   555  
   556    /* TODO: It would be nice to implement this operation in place; that
   557     * could save a significant amount of memory in queries with long doclists. */
   558    assert( in->iType>=DL_POSITIONS );
   559    readerInit(&r, in);
   560    docListInit(&out, DL_POSITIONS, NULL, 0);
   561  
   562    while( !atEnd(&r) ){
   563      sqlite_int64 iDocid = readDocid(&r);
   564      int match = 0;
   565      int iPos, iColumn;
   566      while( (iPos = readPosition(&r, &iColumn)) != -1 ){
   567        if( !match ){
   568          docListAddDocid(&out, iDocid);
   569          match = 1;
   570        }
   571        docListAddPos(&out, iColumn, iPos);
   572      }
   573    }
   574  
   575    docListDestroy(in);
   576    *in = out;
   577  }
   578  
   579  /* Helper function for docListUpdate() and docListAccumulate().
   580  ** Splices a doclist element into the doclist represented by r,
   581  ** leaving r pointing after the newly spliced element.
   582  */
   583  static void docListSpliceElement(DocListReader *r, sqlite_int64 iDocid,
   584                                   const char *pSource, int nSource){
   585    DocList *d = r->pDoclist;
   586    char *pTarget;
   587    int nTarget, found;
   588  
   589    found = skipToDocid(r, iDocid);
   590  
   591    /* Describe slice in d to place pSource/nSource. */
   592    pTarget = r->p;
   593    if( found ){
   594      skipDocument(r);
   595      nTarget = r->p-pTarget;
   596    }else{
   597      nTarget = 0;
   598    }
   599  
   600    /* The sense of the following is that there are three possibilities.
   601    ** If nTarget==nSource, we should not move any memory nor realloc.
   602    ** If nTarget>nSource, trim target and realloc.
   603    ** If nTarget<nSource, realloc then expand target.
   604    */
   605    if( nTarget>nSource ){
   606      memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget));
   607    }
   608    if( nTarget!=nSource ){
   609      int iDoclist = pTarget-d->pData;
   610      d->pData = realloc(d->pData, d->nData+nSource-nTarget);
   611      pTarget = d->pData+iDoclist;
   612    }
   613    if( nTarget<nSource ){
   614      memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget));
   615    }
   616  
   617    memcpy(pTarget, pSource, nSource);
   618    d->nData += nSource-nTarget;
   619    r->p = pTarget+nSource;
   620  }
   621  
   622  /* Insert/update pUpdate into the doclist. */
   623  static void docListUpdate(DocList *d, DocList *pUpdate){
   624    DocListReader reader;
   625  
   626    assert( d!=NULL && pUpdate!=NULL );
   627    assert( d->iType==pUpdate->iType);
   628  
   629    readerInit(&reader, d);
   630    docListSpliceElement(&reader, firstDocid(pUpdate),
   631                         pUpdate->pData, pUpdate->nData);
   632  }
   633  
   634  /* Propagate elements from pUpdate to pAcc, overwriting elements with
   635  ** matching docids.
   636  */
   637  static void docListAccumulate(DocList *pAcc, DocList *pUpdate){
   638    DocListReader accReader, updateReader;
   639  
   640    /* Handle edge cases where one doclist is empty. */
   641    assert( pAcc!=NULL );
   642    if( pUpdate==NULL || pUpdate->nData==0 ) return;
   643    if( pAcc->nData==0 ){
   644      pAcc->pData = malloc(pUpdate->nData);
   645      memcpy(pAcc->pData, pUpdate->pData, pUpdate->nData);
   646      pAcc->nData = pUpdate->nData;
   647      return;
   648    }
   649  
   650    readerInit(&accReader, pAcc);
   651    readerInit(&updateReader, pUpdate);
   652  
   653    while( !atEnd(&updateReader) ){
   654      char *pSource = updateReader.p;
   655      sqlite_int64 iDocid = readDocid(&updateReader);
   656      skipPositionList(&updateReader);
   657      docListSpliceElement(&accReader, iDocid, pSource, updateReader.p-pSource);
   658    }
   659  }
   660  
   661  /*
   662  ** Read the next docid off of pIn.  Return 0 if we reach the end.
   663  *
   664  * TODO: This assumes that docids are never 0, but they may actually be 0 since
   665  * users can choose docids when inserting into a full-text table.  Fix this.
   666  */
   667  static sqlite_int64 nextDocid(DocListReader *pIn){
   668    skipPositionList(pIn);
   669    return atEnd(pIn) ? 0 : readDocid(pIn);
   670  }
   671  
   672  /*
   673  ** pLeft and pRight are two DocListReaders that are pointing to
   674  ** positions lists of the same document: iDocid. 
   675  **
   676  ** If there are no instances in pLeft or pRight where the position
   677  ** of pLeft is one less than the position of pRight, then this
   678  ** routine adds nothing to pOut.
   679  **
   680  ** If there are one or more instances where positions from pLeft
   681  ** are exactly one less than positions from pRight, then add a new
   682  ** document record to pOut.  If pOut wants to hold positions, then
   683  ** include the positions from pRight that are one more than a
   684  ** position in pLeft.  In other words:  pRight.iPos==pLeft.iPos+1.
   685  **
   686  ** pLeft and pRight are left pointing at the next document record.
   687  */
   688  static void mergePosList(
   689    DocListReader *pLeft,    /* Left position list */
   690    DocListReader *pRight,   /* Right position list */
   691    sqlite_int64 iDocid,     /* The docid from pLeft and pRight */
   692    DocList *pOut            /* Write the merged document record here */
   693  ){
   694    int iLeftCol, iLeftPos = readPosition(pLeft, &iLeftCol);
   695    int iRightCol, iRightPos = readPosition(pRight, &iRightCol);
   696    int match = 0;
   697  
   698    /* Loop until we've reached the end of both position lists. */
   699    while( iLeftPos!=-1 && iRightPos!=-1 ){
   700      if( iLeftCol==iRightCol && iLeftPos+1==iRightPos ){
   701        if( !match ){
   702          docListAddDocid(pOut, iDocid);
   703          match = 1;
   704        }
   705        if( pOut->iType>=DL_POSITIONS ){
   706          docListAddPos(pOut, iRightCol, iRightPos);
   707        }
   708        iLeftPos = readPosition(pLeft, &iLeftCol);
   709        iRightPos = readPosition(pRight, &iRightCol);
   710      }else if( iRightCol<iLeftCol ||
   711                (iRightCol==iLeftCol && iRightPos<iLeftPos+1) ){
   712        iRightPos = readPosition(pRight, &iRightCol);
   713      }else{
   714        iLeftPos = readPosition(pLeft, &iLeftCol);
   715      }
   716    }
   717    if( iLeftPos>=0 ) skipPositionList(pLeft);
   718    if( iRightPos>=0 ) skipPositionList(pRight);
   719  }
   720  
   721  /* We have two doclists:  pLeft and pRight.
   722  ** Write the phrase intersection of these two doclists into pOut.
   723  **
   724  ** A phrase intersection means that two documents only match
   725  ** if pLeft.iPos+1==pRight.iPos.
   726  **
   727  ** The output pOut may or may not contain positions.  If pOut
   728  ** does contain positions, they are the positions of pRight.
   729  */
   730  static void docListPhraseMerge(
   731    DocList *pLeft,    /* Doclist resulting from the words on the left */
   732    DocList *pRight,   /* Doclist for the next word to the right */
   733    DocList *pOut      /* Write the combined doclist here */
   734  ){
   735    DocListReader left, right;
   736    sqlite_int64 docidLeft, docidRight;
   737  
   738    readerInit(&left, pLeft);
   739    readerInit(&right, pRight);
   740    docidLeft = nextDocid(&left);
   741    docidRight = nextDocid(&right);
   742  
   743    while( docidLeft>0 && docidRight>0 ){
   744      if( docidLeft<docidRight ){
   745        docidLeft = nextDocid(&left);
   746      }else if( docidRight<docidLeft ){
   747        docidRight = nextDocid(&right);
   748      }else{
   749        mergePosList(&left, &right, docidLeft, pOut);
   750        docidLeft = nextDocid(&left);
   751        docidRight = nextDocid(&right);
   752      }
   753    }
   754  }
   755  
   756  /* We have two doclists:  pLeft and pRight.
   757  ** Write the intersection of these two doclists into pOut.
   758  ** Only docids are matched.  Position information is ignored.
   759  **
   760  ** The output pOut never holds positions.
   761  */
   762  static void docListAndMerge(
   763    DocList *pLeft,    /* Doclist resulting from the words on the left */
   764    DocList *pRight,   /* Doclist for the next word to the right */
   765    DocList *pOut      /* Write the combined doclist here */
   766  ){
   767    DocListReader left, right;
   768    sqlite_int64 docidLeft, docidRight;
   769  
   770    assert( pOut->iType<DL_POSITIONS );
   771  
   772    readerInit(&left, pLeft);
   773    readerInit(&right, pRight);
   774    docidLeft = nextDocid(&left);
   775    docidRight = nextDocid(&right);
   776  
   777    while( docidLeft>0 && docidRight>0 ){
   778      if( docidLeft<docidRight ){
   779        docidLeft = nextDocid(&left);
   780      }else if( docidRight<docidLeft ){
   781        docidRight = nextDocid(&right);
   782      }else{
   783        docListAddDocid(pOut, docidLeft);
   784        docidLeft = nextDocid(&left);
   785        docidRight = nextDocid(&right);
   786      }
   787    }
   788  }
   789  
   790  /* We have two doclists:  pLeft and pRight.
   791  ** Write the union of these two doclists into pOut.
   792  ** Only docids are matched.  Position information is ignored.
   793  **
   794  ** The output pOut never holds positions.
   795  */
   796  static void docListOrMerge(
   797    DocList *pLeft,    /* Doclist resulting from the words on the left */
   798    DocList *pRight,   /* Doclist for the next word to the right */
   799    DocList *pOut      /* Write the combined doclist here */
   800  ){
   801    DocListReader left, right;
   802    sqlite_int64 docidLeft, docidRight, priorLeft;
   803  
   804    readerInit(&left, pLeft);
   805    readerInit(&right, pRight);
   806    docidLeft = nextDocid(&left);
   807    docidRight = nextDocid(&right);
   808  
   809    while( docidLeft>0 && docidRight>0 ){
   810      if( docidLeft<=docidRight ){
   811        docListAddDocid(pOut, docidLeft);
   812      }else{
   813        docListAddDocid(pOut, docidRight);
   814      }
   815      priorLeft = docidLeft;
   816      if( docidLeft<=docidRight ){
   817        docidLeft = nextDocid(&left);
   818      }
   819      if( docidRight>0 && docidRight<=priorLeft ){
   820        docidRight = nextDocid(&right);
   821      }
   822    }
   823    while( docidLeft>0 ){
   824      docListAddDocid(pOut, docidLeft);
   825      docidLeft = nextDocid(&left);
   826    }
   827    while( docidRight>0 ){
   828      docListAddDocid(pOut, docidRight);
   829      docidRight = nextDocid(&right);
   830    }
   831  }
   832  
   833  /* We have two doclists:  pLeft and pRight.
   834  ** Write into pOut all documents that occur in pLeft but not
   835  ** in pRight.
   836  **
   837  ** Only docids are matched.  Position information is ignored.
   838  **
   839  ** The output pOut never holds positions.
   840  */
   841  static void docListExceptMerge(
   842    DocList *pLeft,    /* Doclist resulting from the words on the left */
   843    DocList *pRight,   /* Doclist for the next word to the right */
   844    DocList *pOut      /* Write the combined doclist here */
   845  ){
   846    DocListReader left, right;
   847    sqlite_int64 docidLeft, docidRight, priorLeft;
   848  
   849    readerInit(&left, pLeft);
   850    readerInit(&right, pRight);
   851    docidLeft = nextDocid(&left);
   852    docidRight = nextDocid(&right);
   853  
   854    while( docidLeft>0 && docidRight>0 ){
   855      priorLeft = docidLeft;
   856      if( docidLeft<docidRight ){
   857        docListAddDocid(pOut, docidLeft);
   858      }
   859      if( docidLeft<=docidRight ){
   860        docidLeft = nextDocid(&left);
   861      }
   862      if( docidRight>0 && docidRight<=priorLeft ){
   863        docidRight = nextDocid(&right);
   864      }
   865    }
   866    while( docidLeft>0 ){
   867      docListAddDocid(pOut, docidLeft);
   868      docidLeft = nextDocid(&left);
   869    }
   870  }
   871  
   872  static char *string_dup_n(const char *s, int n){
   873    char *str = malloc(n + 1);
   874    memcpy(str, s, n);
   875    str[n] = '\0';
   876    return str;
   877  }
   878  
   879  /* Duplicate a string; the caller must free() the returned string.
   880   * (We don't use strdup() since it is not part of the standard C library and
   881   * may not be available everywhere.) */
   882  static char *string_dup(const char *s){
   883    return string_dup_n(s, strlen(s));
   884  }
   885  
   886  /* Format a string, replacing each occurrence of the % character with
   887   * zDb.zName.  This may be more convenient than sqlite_mprintf()
   888   * when one string is used repeatedly in a format string.
   889   * The caller must free() the returned string. */
   890  static char *string_format(const char *zFormat,
   891                             const char *zDb, const char *zName){
   892    const char *p;
   893    size_t len = 0;
   894    size_t nDb = strlen(zDb);
   895    size_t nName = strlen(zName);
   896    size_t nFullTableName = nDb+1+nName;
   897    char *result;
   898    char *r;
   899  
   900    /* first compute length needed */
   901    for(p = zFormat ; *p ; ++p){
   902      len += (*p=='%' ? nFullTableName : 1);
   903    }
   904    len += 1;  /* for null terminator */
   905  
   906    r = result = malloc(len);
   907    for(p = zFormat; *p; ++p){
   908      if( *p=='%' ){
   909        memcpy(r, zDb, nDb);
   910        r += nDb;
   911        *r++ = '.';
   912        memcpy(r, zName, nName);
   913        r += nName;
   914      } else {
   915        *r++ = *p;
   916      }
   917    }
   918    *r++ = '\0';
   919    assert( r == result + len );
   920    return result;
   921  }
   922  
   923  static int sql_exec(sqlite3 *db, const char *zDb, const char *zName,
   924                      const char *zFormat){
   925    char *zCommand = string_format(zFormat, zDb, zName);
   926    int rc;
   927    TRACE(("FTS1 sql: %s\n", zCommand));
   928    rc = sqlite3_exec(db, zCommand, NULL, 0, NULL);
   929    free(zCommand);
   930    return rc;
   931  }
   932  
   933  static int sql_prepare(sqlite3 *db, const char *zDb, const char *zName,
   934                         sqlite3_stmt **ppStmt, const char *zFormat){
   935    char *zCommand = string_format(zFormat, zDb, zName);
   936    int rc;
   937    TRACE(("FTS1 prepare: %s\n", zCommand));
   938    rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL);
   939    free(zCommand);
   940    return rc;
   941  }
   942  
   943  /* end utility functions */
   944  
   945  /* Forward reference */
   946  typedef struct fulltext_vtab fulltext_vtab;
   947  
   948  /* A single term in a query is represented by an instances of
   949  ** the following structure.
   950  */
   951  typedef struct QueryTerm {
   952    short int nPhrase; /* How many following terms are part of the same phrase */
   953    short int iPhrase; /* This is the i-th term of a phrase. */
   954    short int iColumn; /* Column of the index that must match this term */
   955    signed char isOr;  /* this term is preceded by "OR" */
   956    signed char isNot; /* this term is preceded by "-" */
   957    char *pTerm;       /* text of the term.  '\000' terminated.  malloced */
   958    int nTerm;         /* Number of bytes in pTerm[] */
   959  } QueryTerm;
   960  
   961  
   962  /* A query string is parsed into a Query structure.
   963   *
   964   * We could, in theory, allow query strings to be complicated
   965   * nested expressions with precedence determined by parentheses.
   966   * But none of the major search engines do this.  (Perhaps the
   967   * feeling is that an parenthesized expression is two complex of
   968   * an idea for the average user to grasp.)  Taking our lead from
   969   * the major search engines, we will allow queries to be a list
   970   * of terms (with an implied AND operator) or phrases in double-quotes,
   971   * with a single optional "-" before each non-phrase term to designate
   972   * negation and an optional OR connector.
   973   *
   974   * OR binds more tightly than the implied AND, which is what the
   975   * major search engines seem to do.  So, for example:
   976   * 
   977   *    [one two OR three]     ==>    one AND (two OR three)
   978   *    [one OR two three]     ==>    (one OR two) AND three
   979   *
   980   * A "-" before a term matches all entries that lack that term.
   981   * The "-" must occur immediately before the term with in intervening
   982   * space.  This is how the search engines do it.
   983   *
   984   * A NOT term cannot be the right-hand operand of an OR.  If this
   985   * occurs in the query string, the NOT is ignored:
   986   *
   987   *    [one OR -two]          ==>    one OR two
   988   *
   989   */
   990  typedef struct Query {
   991    fulltext_vtab *pFts;  /* The full text index */
   992    int nTerms;           /* Number of terms in the query */
   993    QueryTerm *pTerms;    /* Array of terms.  Space obtained from malloc() */
   994    int nextIsOr;         /* Set the isOr flag on the next inserted term */
   995    int nextColumn;       /* Next word parsed must be in this column */
   996    int dfltColumn;       /* The default column */
   997  } Query;
   998  
   999  
  1000  /*
  1001  ** An instance of the following structure keeps track of generated
  1002  ** matching-word offset information and snippets.
  1003  */
  1004  typedef struct Snippet {
  1005    int nMatch;     /* Total number of matches */
  1006    int nAlloc;     /* Space allocated for aMatch[] */
  1007    struct snippetMatch { /* One entry for each matching term */
  1008      char snStatus;       /* Status flag for use while constructing snippets */
  1009      short int iCol;      /* The column that contains the match */
  1010      short int iTerm;     /* The index in Query.pTerms[] of the matching term */
  1011      short int nByte;     /* Number of bytes in the term */
  1012      int iStart;          /* The offset to the first character of the term */
  1013    } *aMatch;      /* Points to space obtained from malloc */
  1014    char *zOffset;  /* Text rendering of aMatch[] */
  1015    int nOffset;    /* strlen(zOffset) */
  1016    char *zSnippet; /* Snippet text */
  1017    int nSnippet;   /* strlen(zSnippet) */
  1018  } Snippet;
  1019  
  1020  
  1021  typedef enum QueryType {
  1022    QUERY_GENERIC,   /* table scan */
  1023    QUERY_ROWID,     /* lookup by rowid */
  1024    QUERY_FULLTEXT   /* QUERY_FULLTEXT + [i] is a full-text search for column i*/
  1025  } QueryType;
  1026  
  1027  /* TODO(shess) CHUNK_MAX controls how much data we allow in segment 0
  1028  ** before we start aggregating into larger segments.  Lower CHUNK_MAX
  1029  ** means that for a given input we have more individual segments per
  1030  ** term, which means more rows in the table and a bigger index (due to
  1031  ** both more rows and bigger rowids).  But it also reduces the average
  1032  ** cost of adding new elements to the segment 0 doclist, and it seems
  1033  ** to reduce the number of pages read and written during inserts.  256
  1034  ** was chosen by measuring insertion times for a certain input (first
  1035  ** 10k documents of Enron corpus), though including query performance
  1036  ** in the decision may argue for a larger value.
  1037  */
  1038  #define CHUNK_MAX 256
  1039  
  1040  typedef enum fulltext_statement {
  1041    CONTENT_INSERT_STMT,
  1042    CONTENT_SELECT_STMT,
  1043    CONTENT_UPDATE_STMT,
  1044    CONTENT_DELETE_STMT,
  1045  
  1046    TERM_SELECT_STMT,
  1047    TERM_SELECT_ALL_STMT,
  1048    TERM_INSERT_STMT,
  1049    TERM_UPDATE_STMT,
  1050    TERM_DELETE_STMT,
  1051  
  1052    MAX_STMT                     /* Always at end! */
  1053  } fulltext_statement;
  1054  
  1055  /* These must exactly match the enum above. */
  1056  /* TODO(adam): Is there some risk that a statement (in particular,
  1057  ** pTermSelectStmt) will be used in two cursors at once, e.g.  if a
  1058  ** query joins a virtual table to itself?  If so perhaps we should
  1059  ** move some of these to the cursor object.
  1060  */
  1061  static const char *const fulltext_zStatement[MAX_STMT] = {
  1062    /* CONTENT_INSERT */ NULL,  /* generated in contentInsertStatement() */
  1063    /* CONTENT_SELECT */ "select * from %_content where rowid = ?",
  1064    /* CONTENT_UPDATE */ NULL,  /* generated in contentUpdateStatement() */
  1065    /* CONTENT_DELETE */ "delete from %_content where rowid = ?",
  1066  
  1067    /* TERM_SELECT */
  1068    "select rowid, doclist from %_term where term = ? and segment = ?",
  1069    /* TERM_SELECT_ALL */
  1070    "select doclist from %_term where term = ? order by segment",
  1071    /* TERM_INSERT */
  1072    "insert into %_term (rowid, term, segment, doclist) values (?, ?, ?, ?)",
  1073    /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?",
  1074    /* TERM_DELETE */ "delete from %_term where rowid = ?",
  1075  };
  1076  
  1077  /*
  1078  ** A connection to a fulltext index is an instance of the following
  1079  ** structure.  The xCreate and xConnect methods create an instance
  1080  ** of this structure and xDestroy and xDisconnect free that instance.
  1081  ** All other methods receive a pointer to the structure as one of their
  1082  ** arguments.
  1083  */
  1084  struct fulltext_vtab {
  1085    sqlite3_vtab base;               /* Base class used by SQLite core */
  1086    sqlite3 *db;                     /* The database connection */
  1087    const char *zDb;                 /* logical database name */
  1088    const char *zName;               /* virtual table name */
  1089    int nColumn;                     /* number of columns in virtual table */
  1090    char **azColumn;                 /* column names.  malloced */
  1091    char **azContentColumn;          /* column names in content table; malloced */
  1092    sqlite3_tokenizer *pTokenizer;   /* tokenizer for inserts and queries */
  1093  
  1094    /* Precompiled statements which we keep as long as the table is
  1095    ** open.
  1096    */
  1097    sqlite3_stmt *pFulltextStatements[MAX_STMT];
  1098  };
  1099  
  1100  /*
  1101  ** When the core wants to do a query, it create a cursor using a
  1102  ** call to xOpen.  This structure is an instance of a cursor.  It
  1103  ** is destroyed by xClose.
  1104  */
  1105  typedef struct fulltext_cursor {
  1106    sqlite3_vtab_cursor base;        /* Base class used by SQLite core */
  1107    QueryType iCursorType;           /* Copy of sqlite3_index_info.idxNum */
  1108    sqlite3_stmt *pStmt;             /* Prepared statement in use by the cursor */
  1109    int eof;                         /* True if at End Of Results */
  1110    Query q;                         /* Parsed query string */
  1111    Snippet snippet;                 /* Cached snippet for the current row */
  1112    int iColumn;                     /* Column being searched */
  1113    DocListReader result;  /* used when iCursorType == QUERY_FULLTEXT */ 
  1114  } fulltext_cursor;
  1115  
  1116  static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
  1117    return (fulltext_vtab *) c->base.pVtab;
  1118  }
  1119  
  1120  static const sqlite3_module fulltextModule;   /* forward declaration */
  1121  
  1122  /* Append a list of strings separated by commas to a StringBuffer. */
  1123  static void appendList(StringBuffer *sb, int nString, char **azString){
  1124    int i;
  1125    for(i=0; i<nString; ++i){
  1126      if( i>0 ) append(sb, ", ");
  1127      append(sb, azString[i]);
  1128    }
  1129  }
  1130  
  1131  /* Return a dynamically generated statement of the form
  1132   *   insert into %_content (rowid, ...) values (?, ...)
  1133   */
  1134  static const char *contentInsertStatement(fulltext_vtab *v){
  1135    StringBuffer sb;
  1136    int i;
  1137  
  1138    initStringBuffer(&sb);
  1139    append(&sb, "insert into %_content (rowid, ");
  1140    appendList(&sb, v->nColumn, v->azContentColumn);
  1141    append(&sb, ") values (?");
  1142    for(i=0; i<v->nColumn; ++i)
  1143      append(&sb, ", ?");
  1144    append(&sb, ")");
  1145    return sb.s;
  1146  }
  1147  
  1148  /* Return a dynamically generated statement of the form
  1149   *   update %_content set [col_0] = ?, [col_1] = ?, ...
  1150   *                    where rowid = ?
  1151   */
  1152  static const char *contentUpdateStatement(fulltext_vtab *v){
  1153    StringBuffer sb;
  1154    int i;
  1155  
  1156    initStringBuffer(&sb);
  1157    append(&sb, "update %_content set ");
  1158    for(i=0; i<v->nColumn; ++i) {
  1159      if( i>0 ){
  1160        append(&sb, ", ");
  1161      }
  1162      append(&sb, v->azContentColumn[i]);
  1163      append(&sb, " = ?");
  1164    }
  1165    append(&sb, " where rowid = ?");
  1166    return sb.s;
  1167  }
  1168  
  1169  /* Puts a freshly-prepared statement determined by iStmt in *ppStmt.
  1170  ** If the indicated statement has never been prepared, it is prepared
  1171  ** and cached, otherwise the cached version is reset.
  1172  */
  1173  static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt,
  1174                               sqlite3_stmt **ppStmt){
  1175    assert( iStmt<MAX_STMT );
  1176    if( v->pFulltextStatements[iStmt]==NULL ){
  1177      const char *zStmt;
  1178      int rc;
  1179      switch( iStmt ){
  1180        case CONTENT_INSERT_STMT:
  1181          zStmt = contentInsertStatement(v); break;
  1182        case CONTENT_UPDATE_STMT:
  1183          zStmt = contentUpdateStatement(v); break;
  1184        default:
  1185          zStmt = fulltext_zStatement[iStmt];
  1186      }
  1187      rc = sql_prepare(v->db, v->zDb, v->zName, &v->pFulltextStatements[iStmt],
  1188                           zStmt);
  1189      if( zStmt != fulltext_zStatement[iStmt]) free((void *) zStmt);
  1190      if( rc!=SQLITE_OK ) return rc;
  1191    } else {
  1192      int rc = sqlite3_reset(v->pFulltextStatements[iStmt]);
  1193      if( rc!=SQLITE_OK ) return rc;
  1194    }
  1195  
  1196    *ppStmt = v->pFulltextStatements[iStmt];
  1197    return SQLITE_OK;
  1198  }
  1199  
  1200  /* Step the indicated statement, handling errors SQLITE_BUSY (by
  1201  ** retrying) and SQLITE_SCHEMA (by re-preparing and transferring
  1202  ** bindings to the new statement).
  1203  ** TODO(adam): We should extend this function so that it can work with
  1204  ** statements declared locally, not only globally cached statements.
  1205  */
  1206  static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt,
  1207                                sqlite3_stmt **ppStmt){
  1208    int rc;
  1209    sqlite3_stmt *s = *ppStmt;
  1210    assert( iStmt<MAX_STMT );
  1211    assert( s==v->pFulltextStatements[iStmt] );
  1212  
  1213    while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){
  1214      if( rc==SQLITE_BUSY ) continue;
  1215      if( rc!=SQLITE_ERROR ) return rc;
  1216  
  1217      /* If an SQLITE_SCHEMA error has occurred, then finalizing this
  1218       * statement is going to delete the fulltext_vtab structure. If
  1219       * the statement just executed is in the pFulltextStatements[]
  1220       * array, it will be finalized twice. So remove it before
  1221       * calling sqlite3_finalize().
  1222       */
  1223      v->pFulltextStatements[iStmt] = NULL;
  1224      rc = sqlite3_finalize(s);
  1225      break;
  1226    }
  1227    return rc;
  1228  
  1229   err:
  1230    sqlite3_finalize(s);
  1231    return rc;
  1232  }
  1233  
  1234  /* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK.
  1235  ** Useful for statements like UPDATE, where we expect no results.
  1236  */
  1237  static int sql_single_step_statement(fulltext_vtab *v,
  1238                                       fulltext_statement iStmt,
  1239                                       sqlite3_stmt **ppStmt){
  1240    int rc = sql_step_statement(v, iStmt, ppStmt);
  1241    return (rc==SQLITE_DONE) ? SQLITE_OK : rc;
  1242  }
  1243  
  1244  /* insert into %_content (rowid, ...) values ([rowid], [pValues]) */
  1245  static int content_insert(fulltext_vtab *v, sqlite3_value *rowid,
  1246                            sqlite3_value **pValues){
  1247    sqlite3_stmt *s;
  1248    int i;
  1249    int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s);
  1250    if( rc!=SQLITE_OK ) return rc;
  1251  
  1252    rc = sqlite3_bind_value(s, 1, rowid);
  1253    if( rc!=SQLITE_OK ) return rc;
  1254  
  1255    for(i=0; i<v->nColumn; ++i){
  1256      rc = sqlite3_bind_value(s, 2+i, pValues[i]);
  1257      if( rc!=SQLITE_OK ) return rc;
  1258    }
  1259  
  1260    return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s);
  1261  }
  1262  
  1263  /* update %_content set col0 = pValues[0], col1 = pValues[1], ...
  1264   *                  where rowid = [iRowid] */
  1265  static int content_update(fulltext_vtab *v, sqlite3_value **pValues,
  1266                            sqlite_int64 iRowid){
  1267    sqlite3_stmt *s;
  1268    int i;
  1269    int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s);
  1270    if( rc!=SQLITE_OK ) return rc;
  1271  
  1272    for(i=0; i<v->nColumn; ++i){
  1273      rc = sqlite3_bind_value(s, 1+i, pValues[i]);
  1274      if( rc!=SQLITE_OK ) return rc;
  1275    }
  1276  
  1277    rc = sqlite3_bind_int64(s, 1+v->nColumn, iRowid);
  1278    if( rc!=SQLITE_OK ) return rc;
  1279  
  1280    return sql_single_step_statement(v, CONTENT_UPDATE_STMT, &s);
  1281  }
  1282  
  1283  static void freeStringArray(int nString, const char **pString){
  1284    int i;
  1285  
  1286    for (i=0 ; i < nString ; ++i) {
  1287      if( pString[i]!=NULL ) free((void *) pString[i]);
  1288    }
  1289    free((void *) pString);
  1290  }
  1291  
  1292  /* select * from %_content where rowid = [iRow]
  1293   * The caller must delete the returned array and all strings in it.
  1294   * null fields will be NULL in the returned array.
  1295   *
  1296   * TODO: Perhaps we should return pointer/length strings here for consistency
  1297   * with other code which uses pointer/length. */
  1298  static int content_select(fulltext_vtab *v, sqlite_int64 iRow,
  1299                            const char ***pValues){
  1300    sqlite3_stmt *s;
  1301    const char **values;
  1302    int i;
  1303    int rc;
  1304  
  1305    *pValues = NULL;
  1306  
  1307    rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s);
  1308    if( rc!=SQLITE_OK ) return rc;
  1309  
  1310    rc = sqlite3_bind_int64(s, 1, iRow);
  1311    if( rc!=SQLITE_OK ) return rc;
  1312  
  1313    rc = sql_step_statement(v, CONTENT_SELECT_STMT, &s);
  1314    if( rc!=SQLITE_ROW ) return rc;
  1315  
  1316    values = (const char **) malloc(v->nColumn * sizeof(const char *));
  1317    for(i=0; i<v->nColumn; ++i){
  1318      if( sqlite3_column_type(s, i)==SQLITE_NULL ){
  1319        values[i] = NULL;
  1320      }else{
  1321        values[i] = string_dup((char*)sqlite3_column_text(s, i));
  1322      }
  1323    }
  1324  
  1325    /* We expect only one row.  We must execute another sqlite3_step()
  1326     * to complete the iteration; otherwise the table will remain locked. */
  1327    rc = sqlite3_step(s);
  1328    if( rc==SQLITE_DONE ){
  1329      *pValues = values;
  1330      return SQLITE_OK;
  1331    }
  1332  
  1333    freeStringArray(v->nColumn, values);
  1334    return rc;
  1335  }
  1336  
  1337  /* delete from %_content where rowid = [iRow ] */
  1338  static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){
  1339    sqlite3_stmt *s;
  1340    int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s);
  1341    if( rc!=SQLITE_OK ) return rc;
  1342  
  1343    rc = sqlite3_bind_int64(s, 1, iRow);
  1344    if( rc!=SQLITE_OK ) return rc;
  1345  
  1346    return sql_single_step_statement(v, CONTENT_DELETE_STMT, &s);
  1347  }
  1348  
  1349  /* select rowid, doclist from %_term
  1350   *  where term = [pTerm] and segment = [iSegment]
  1351   * If found, returns SQLITE_ROW; the caller must free the
  1352   * returned doclist.  If no rows found, returns SQLITE_DONE. */
  1353  static int term_select(fulltext_vtab *v, const char *pTerm, int nTerm,
  1354                         int iSegment,
  1355                         sqlite_int64 *rowid, DocList *out){
  1356    sqlite3_stmt *s;
  1357    int rc = sql_get_statement(v, TERM_SELECT_STMT, &s);
  1358    if( rc!=SQLITE_OK ) return rc;
  1359  
  1360    rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
  1361    if( rc!=SQLITE_OK ) return rc;
  1362  
  1363    rc = sqlite3_bind_int(s, 2, iSegment);
  1364    if( rc!=SQLITE_OK ) return rc;
  1365  
  1366    rc = sql_step_statement(v, TERM_SELECT_STMT, &s);
  1367    if( rc!=SQLITE_ROW ) return rc;
  1368  
  1369    *rowid = sqlite3_column_int64(s, 0);
  1370    docListInit(out, DL_DEFAULT,
  1371                sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1));
  1372  
  1373    /* We expect only one row.  We must execute another sqlite3_step()
  1374     * to complete the iteration; otherwise the table will remain locked. */
  1375    rc = sqlite3_step(s);
  1376    return rc==SQLITE_DONE ? SQLITE_ROW : rc;
  1377  }
  1378  
  1379  /* Load the segment doclists for term pTerm and merge them in
  1380  ** appropriate order into out.  Returns SQLITE_OK if successful.  If
  1381  ** there are no segments for pTerm, successfully returns an empty
  1382  ** doclist in out.
  1383  **
  1384  ** Each document consists of 1 or more "columns".  The number of
  1385  ** columns is v->nColumn.  If iColumn==v->nColumn, then return
  1386  ** position information about all columns.  If iColumn<v->nColumn,
  1387  ** then only return position information about the iColumn-th column
  1388  ** (where the first column is 0).
  1389  */
  1390  static int term_select_all(
  1391    fulltext_vtab *v,     /* The fulltext index we are querying against */
  1392    int iColumn,          /* If <nColumn, only look at the iColumn-th column */
  1393    const char *pTerm,    /* The term whose posting lists we want */
  1394    int nTerm,            /* Number of bytes in pTerm */
  1395    DocList *out          /* Write the resulting doclist here */
  1396  ){
  1397    DocList doclist;
  1398    sqlite3_stmt *s;
  1399    int rc = sql_get_statement(v, TERM_SELECT_ALL_STMT, &s);
  1400    if( rc!=SQLITE_OK ) return rc;
  1401  
  1402    rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC);
  1403    if( rc!=SQLITE_OK ) return rc;
  1404  
  1405    docListInit(&doclist, DL_DEFAULT, 0, 0);
  1406  
  1407    /* TODO(shess) Handle schema and busy errors. */
  1408    while( (rc=sql_step_statement(v, TERM_SELECT_ALL_STMT, &s))==SQLITE_ROW ){
  1409      DocList old;
  1410  
  1411      /* TODO(shess) If we processed doclists from oldest to newest, we
  1412      ** could skip the malloc() involved with the following call.  For
  1413      ** now, I'd rather keep this logic similar to index_insert_term().
  1414      ** We could additionally drop elements when we see deletes, but
  1415      ** that would require a distinct version of docListAccumulate().
  1416      */
  1417      docListInit(&old, DL_DEFAULT,
  1418                  sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0));
  1419  
  1420      if( iColumn<v->nColumn ){   /* querying a single column */
  1421        docListRestrictColumn(&old, iColumn);
  1422      }
  1423  
  1424      /* doclist contains the newer data, so write it over old.  Then
  1425      ** steal accumulated result for doclist.
  1426      */
  1427      docListAccumulate(&old, &doclist);
  1428      docListDestroy(&doclist);
  1429      doclist = old;
  1430    }
  1431    if( rc!=SQLITE_DONE ){
  1432      docListDestroy(&doclist);
  1433      return rc;
  1434    }
  1435  
  1436    docListDiscardEmpty(&doclist);
  1437    *out = doclist;
  1438    return SQLITE_OK;
  1439  }
  1440  
  1441  /* insert into %_term (rowid, term, segment, doclist)
  1442                 values ([piRowid], [pTerm], [iSegment], [doclist])
  1443  ** Lets sqlite select rowid if piRowid is NULL, else uses *piRowid.
  1444  **
  1445  ** NOTE(shess) piRowid is IN, with values of "space of int64" plus
  1446  ** null, it is not used to pass data back to the caller.
  1447  */
  1448  static int term_insert(fulltext_vtab *v, sqlite_int64 *piRowid,
  1449                         const char *pTerm, int nTerm,
  1450                         int iSegment, DocList *doclist){
  1451    sqlite3_stmt *s;
  1452    int rc = sql_get_statement(v, TERM_INSERT_STMT, &s);
  1453    if( rc!=SQLITE_OK ) return rc;
  1454  
  1455    if( piRowid==NULL ){
  1456      rc = sqlite3_bind_null(s, 1);
  1457    }else{
  1458      rc = sqlite3_bind_int64(s, 1, *piRowid);
  1459    }
  1460    if( rc!=SQLITE_OK ) return rc;
  1461  
  1462    rc = sqlite3_bind_text(s, 2, pTerm, nTerm, SQLITE_STATIC);
  1463    if( rc!=SQLITE_OK ) return rc;
  1464  
  1465    rc = sqlite3_bind_int(s, 3, iSegment);
  1466    if( rc!=SQLITE_OK ) return rc;
  1467  
  1468    rc = sqlite3_bind_blob(s, 4, doclist->pData, doclist->nData, SQLITE_STATIC);
  1469    if( rc!=SQLITE_OK ) return rc;
  1470  
  1471    return sql_single_step_statement(v, TERM_INSERT_STMT, &s);
  1472  }
  1473  
  1474  /* update %_term set doclist = [doclist] where rowid = [rowid] */
  1475  static int term_update(fulltext_vtab *v, sqlite_int64 rowid,
  1476                         DocList *doclist){
  1477    sqlite3_stmt *s;
  1478    int rc = sql_get_statement(v, TERM_UPDATE_STMT, &s);
  1479    if( rc!=SQLITE_OK ) return rc;
  1480  
  1481    rc = sqlite3_bind_blob(s, 1, doclist->pData, doclist->nData, SQLITE_STATIC);
  1482    if( rc!=SQLITE_OK ) return rc;
  1483  
  1484    rc = sqlite3_bind_int64(s, 2, rowid);
  1485    if( rc!=SQLITE_OK ) return rc;
  1486  
  1487    return sql_single_step_statement(v, TERM_UPDATE_STMT, &s);
  1488  }
  1489  
  1490  static int term_delete(fulltext_vtab *v, sqlite_int64 rowid){
  1491    sqlite3_stmt *s;
  1492    int rc = sql_get_statement(v, TERM_DELETE_STMT, &s);
  1493    if( rc!=SQLITE_OK ) return rc;
  1494  
  1495    rc = sqlite3_bind_int64(s, 1, rowid);
  1496    if( rc!=SQLITE_OK ) return rc;
  1497  
  1498    return sql_single_step_statement(v, TERM_DELETE_STMT, &s);
  1499  }
  1500  
  1501  /*
  1502  ** Free the memory used to contain a fulltext_vtab structure.
  1503  */
  1504  static void fulltext_vtab_destroy(fulltext_vtab *v){
  1505    int iStmt, i;
  1506  
  1507    TRACE(("FTS1 Destroy %p\n", v));
  1508    for( iStmt=0; iStmt<MAX_STMT; iStmt++ ){
  1509      if( v->pFulltextStatements[iStmt]!=NULL ){
  1510        sqlite3_finalize(v->pFulltextStatements[iStmt]);
  1511        v->pFulltextStatements[iStmt] = NULL;
  1512      }
  1513    }
  1514  
  1515    if( v->pTokenizer!=NULL ){
  1516      v->pTokenizer->pModule->xDestroy(v->pTokenizer);
  1517      v->pTokenizer = NULL;
  1518    }
  1519    
  1520    free(v->azColumn);
  1521    for(i = 0; i < v->nColumn; ++i) {
  1522      sqlite3_free(v->azContentColumn[i]);
  1523    }
  1524    free(v->azContentColumn);
  1525    free(v);
  1526  }
  1527  
  1528  /*
  1529  ** Token types for parsing the arguments to xConnect or xCreate.
  1530  */
  1531  #define TOKEN_EOF         0    /* End of file */
  1532  #define TOKEN_SPACE       1    /* Any kind of whitespace */
  1533  #define TOKEN_ID          2    /* An identifier */
  1534  #define TOKEN_STRING      3    /* A string literal */
  1535  #define TOKEN_PUNCT       4    /* A single punctuation character */
  1536  
  1537  /*
  1538  ** If X is a character that can be used in an identifier then
  1539  ** IdChar(X) will be true.  Otherwise it is false.
  1540  **
  1541  ** For ASCII, any character with the high-order bit set is
  1542  ** allowed in an identifier.  For 7-bit characters, 
  1543  ** sqlite3IsIdChar[X] must be 1.
  1544  **
  1545  ** Ticket #1066.  the SQL standard does not allow '$' in the
  1546  ** middle of identfiers.  But many SQL implementations do. 
  1547  ** SQLite will allow '$' in identifiers for compatibility.
  1548  ** But the feature is undocumented.
  1549  */
  1550  static const char isIdChar[] = {
  1551  /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
  1552      0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */
  1553      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
  1554      0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
  1555      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
  1556      0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
  1557      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
  1558  };
  1559  #define IdChar(C)  (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20]))
  1560  
  1561  
  1562  /*
  1563  ** Return the length of the token that begins at z[0]. 
  1564  ** Store the token type in *tokenType before returning.
  1565  */
  1566  static int getToken(const char *z, int *tokenType){
  1567    int i, c;
  1568    switch( *z ){
  1569      case 0: {
  1570        *tokenType = TOKEN_EOF;
  1571        return 0;
  1572      }
  1573      case ' ': case '\t': case '\n': case '\f': case '\r': {
  1574        for(i=1; safe_isspace(z[i]); i++){}
  1575        *tokenType = TOKEN_SPACE;
  1576        return i;
  1577      }
  1578      case '`':
  1579      case '\'':
  1580      case '"': {
  1581        int delim = z[0];
  1582        for(i=1; (c=z[i])!=0; i++){
  1583          if( c==delim ){
  1584            if( z[i+1]==delim ){
  1585              i++;
  1586            }else{
  1587              break;
  1588            }
  1589          }
  1590        }
  1591        *tokenType = TOKEN_STRING;
  1592        return i + (c!=0);
  1593      }
  1594      case '[': {
  1595        for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
  1596        *tokenType = TOKEN_ID;
  1597        return i;
  1598      }
  1599      default: {
  1600        if( !IdChar(*z) ){
  1601          break;
  1602        }
  1603        for(i=1; IdChar(z[i]); i++){}
  1604        *tokenType = TOKEN_ID;
  1605        return i;
  1606      }
  1607    }
  1608    *tokenType = TOKEN_PUNCT;
  1609    return 1;
  1610  }
  1611  
  1612  /*
  1613  ** A token extracted from a string is an instance of the following
  1614  ** structure.
  1615  */
  1616  typedef struct Token {
  1617    const char *z;       /* Pointer to token text.  Not '\000' terminated */
  1618    short int n;         /* Length of the token text in bytes. */
  1619  } Token;
  1620  
  1621  /*
  1622  ** Given a input string (which is really one of the argv[] parameters
  1623  ** passed into xConnect or xCreate) split the string up into tokens.
  1624  ** Return an array of pointers to '\000' terminated strings, one string
  1625  ** for each non-whitespace token.
  1626  **
  1627  ** The returned array is terminated by a single NULL pointer.
  1628  **
  1629  ** Space to hold the returned array is obtained from a single
  1630  ** malloc and should be freed by passing the return value to free().
  1631  ** The individual strings within the token list are all a part of
  1632  ** the single memory allocation and will all be freed at once.
  1633  */
  1634  static char **tokenizeString(const char *z, int *pnToken){
  1635    int nToken = 0;
  1636    Token *aToken = malloc( strlen(z) * sizeof(aToken[0]) );
  1637    int n = 1;
  1638    int e, i;
  1639    int totalSize = 0;
  1640    char **azToken;
  1641    char *zCopy;
  1642    while( n>0 ){
  1643      n = getToken(z, &e);
  1644      if( e!=TOKEN_SPACE ){
  1645        aToken[nToken].z = z;
  1646        aToken[nToken].n = n;
  1647        nToken++;
  1648        totalSize += n+1;
  1649      }
  1650      z += n;
  1651    }
  1652    azToken = (char**)malloc( nToken*sizeof(char*) + totalSize );
  1653    zCopy = (char*)&azToken[nToken];
  1654    nToken--;
  1655    for(i=0; i<nToken; i++){
  1656      azToken[i] = zCopy;
  1657      n = aToken[i].n;
  1658      memcpy(zCopy, aToken[i].z, n);
  1659      zCopy[n] = 0;
  1660      zCopy += n+1;
  1661    }
  1662    azToken[nToken] = 0;
  1663    free(aToken);
  1664    *pnToken = nToken;
  1665    return azToken;
  1666  }
  1667  
  1668  /*
  1669  ** Convert an SQL-style quoted string into a normal string by removing
  1670  ** the quote characters.  The conversion is done in-place.  If the
  1671  ** input does not begin with a quote character, then this routine
  1672  ** is a no-op.
  1673  **
  1674  ** Examples:
  1675  **
  1676  **     "abc"   becomes   abc
  1677  **     'xyz'   becomes   xyz
  1678  **     [pqr]   becomes   pqr
  1679  **     `mno`   becomes   mno
  1680  */
  1681  static void dequoteString(char *z){
  1682    int quote;
  1683    int i, j;
  1684    if( z==0 ) return;
  1685    quote = z[0];
  1686    switch( quote ){
  1687      case '\'':  break;
  1688      case '"':   break;
  1689      case '`':   break;                /* For MySQL compatibility */
  1690      case '[':   quote = ']';  break;  /* For MS SqlServer compatibility */
  1691      default:    return;
  1692    }
  1693    for(i=1, j=0; z[i]; i++){
  1694      if( z[i]==quote ){
  1695        if( z[i+1]==quote ){
  1696          z[j++] = quote;
  1697          i++;
  1698        }else{
  1699          z[j++] = 0;
  1700          break;
  1701        }
  1702      }else{
  1703        z[j++] = z[i];
  1704      }
  1705    }
  1706  }
  1707  
  1708  /*
  1709  ** The input azIn is a NULL-terminated list of tokens.  Remove the first
  1710  ** token and all punctuation tokens.  Remove the quotes from
  1711  ** around string literal tokens.
  1712  **
  1713  ** Example:
  1714  **
  1715  **     input:      tokenize chinese ( 'simplifed' , 'mixed' )
  1716  **     output:     chinese simplifed mixed
  1717  **
  1718  ** Another example:
  1719  **
  1720  **     input:      delimiters ( '[' , ']' , '...' )
  1721  **     output:     [ ] ...
  1722  */
  1723  static void tokenListToIdList(char **azIn){
  1724    int i, j;
  1725    if( azIn ){
  1726      for(i=0, j=-1; azIn[i]; i++){
  1727        if( safe_isalnum(azIn[i][0]) || azIn[i][1] ){
  1728          dequoteString(azIn[i]);
  1729          if( j>=0 ){
  1730            azIn[j] = azIn[i];
  1731          }
  1732          j++;
  1733        }
  1734      }
  1735      azIn[j] = 0;
  1736    }
  1737  }
  1738  
  1739  
  1740  /*
  1741  ** Find the first alphanumeric token in the string zIn.  Null-terminate
  1742  ** this token.  Remove any quotation marks.  And return a pointer to
  1743  ** the result.
  1744  */
  1745  static char *firstToken(char *zIn, char **pzTail){
  1746    int n, ttype;
  1747    while(1){
  1748      n = getToken(zIn, &ttype);
  1749      if( ttype==TOKEN_SPACE ){
  1750        zIn += n;
  1751      }else if( ttype==TOKEN_EOF ){
  1752        *pzTail = zIn;
  1753        return 0;
  1754      }else{
  1755        zIn[n] = 0;
  1756        *pzTail = &zIn[1];
  1757        dequoteString(zIn);
  1758        return zIn;
  1759      }
  1760    }
  1761    /*NOTREACHED*/
  1762  }
  1763  
  1764  /* Return true if...
  1765  **
  1766  **   *  s begins with the string t, ignoring case
  1767  **   *  s is longer than t
  1768  **   *  The first character of s beyond t is not a alphanumeric
  1769  ** 
  1770  ** Ignore leading space in *s.
  1771  **
  1772  ** To put it another way, return true if the first token of
  1773  ** s[] is t[].
  1774  */
  1775  static int startsWith(const char *s, const char *t){
  1776    while( safe_isspace(*s) ){ s++; }
  1777    while( *t ){
  1778      if( safe_tolower(*s++)!=safe_tolower(*t++) ) return 0;
  1779    }
  1780    return *s!='_' && !safe_isalnum(*s);
  1781  }
  1782  
  1783  /*
  1784  ** An instance of this structure defines the "spec" of a
  1785  ** full text index.  This structure is populated by parseSpec
  1786  ** and use by fulltextConnect and fulltextCreate.
  1787  */
  1788  typedef struct TableSpec {
  1789    const char *zDb;         /* Logical database name */
  1790    const char *zName;       /* Name of the full-text index */
  1791    int nColumn;             /* Number of columns to be indexed */
  1792    char **azColumn;         /* Original names of columns to be indexed */
  1793    char **azContentColumn;  /* Column names for %_content */
  1794    char **azTokenizer;      /* Name of tokenizer and its arguments */
  1795  } TableSpec;
  1796  
  1797  /*
  1798  ** Reclaim all of the memory used by a TableSpec
  1799  */
  1800  static void clearTableSpec(TableSpec *p) {
  1801    free(p->azColumn);
  1802    free(p->azContentColumn);
  1803    free(p->azTokenizer);
  1804  }
  1805  
  1806  /* Parse a CREATE VIRTUAL TABLE statement, which looks like this:
  1807   *
  1808   * CREATE VIRTUAL TABLE email
  1809   *        USING fts1(subject, body, tokenize mytokenizer(myarg))
  1810   *
  1811   * We return parsed information in a TableSpec structure.
  1812   * 
  1813   */
  1814  static int parseSpec(TableSpec *pSpec, int argc, const char *const*argv,
  1815                       char**pzErr){
  1816    int i, n;
  1817    char *z, *zDummy;
  1818    char **azArg;
  1819    const char *zTokenizer = 0;    /* argv[] entry describing the tokenizer */
  1820  
  1821    assert( argc>=3 );
  1822    /* Current interface:
  1823    ** argv[0] - module name
  1824    ** argv[1] - database name
  1825    ** argv[2] - table name
  1826    ** argv[3..] - columns, optionally followed by tokenizer specification
  1827    **             and snippet delimiters specification.
  1828    */
  1829  
  1830    /* Make a copy of the complete argv[][] array in a single allocation.
  1831    ** The argv[][] array is read-only and transient.  We can write to the
  1832    ** copy in order to modify things and the copy is persistent.
  1833    */
  1834    memset(pSpec, 0, sizeof(*pSpec));
  1835    for(i=n=0; i<argc; i++){
  1836      n += strlen(argv[i]) + 1;
  1837    }
  1838    azArg = malloc( sizeof(char*)*argc + n );
  1839    if( azArg==0 ){
  1840      return SQLITE_NOMEM;
  1841    }
  1842    z = (char*)&azArg[argc];
  1843    for(i=0; i<argc; i++){
  1844      azArg[i] = z;
  1845      strcpy(z, argv[i]);
  1846      z += strlen(z)+1;
  1847    }
  1848  
  1849    /* Identify the column names and the tokenizer and delimiter arguments
  1850    ** in the argv[][] array.
  1851    */
  1852    pSpec->zDb = azArg[1];
  1853    pSpec->zName = azArg[2];
  1854    pSpec->nColumn = 0;
  1855    pSpec->azColumn = azArg;
  1856    zTokenizer = "tokenize simple";
  1857    for(i=3; i<argc; ++i){
  1858      if( startsWith(azArg[i],"tokenize") ){
  1859        zTokenizer = azArg[i];
  1860      }else{
  1861        z = azArg[pSpec->nColumn] = firstToken(azArg[i], &zDummy);
  1862        pSpec->nColumn++;
  1863      }
  1864    }
  1865    if( pSpec->nColumn==0 ){
  1866      azArg[0] = "content";
  1867      pSpec->nColumn = 1;
  1868    }
  1869  
  1870    /*
  1871    ** Construct the list of content column names.
  1872    **
  1873    ** Each content column name will be of the form cNNAAAA
  1874    ** where NN is the column number and AAAA is the sanitized
  1875    ** column name.  "sanitized" means that special characters are
  1876    ** converted to "_".  The cNN prefix guarantees that all column
  1877    ** names are unique.
  1878    **
  1879    ** The AAAA suffix is not strictly necessary.  It is included
  1880    ** for the convenience of people who might examine the generated
  1881    ** %_content table and wonder what the columns are used for.
  1882    */
  1883    pSpec->azContentColumn = malloc( pSpec->nColumn * sizeof(char *) );
  1884    if( pSpec->azContentColumn==0 ){
  1885      clearTableSpec(pSpec);
  1886      return SQLITE_NOMEM;
  1887    }
  1888    for(i=0; i<pSpec->nColumn; i++){
  1889      char *p;
  1890      pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]);
  1891      for (p = pSpec->azContentColumn[i]; *p ; ++p) {
  1892        if( !safe_isalnum(*p) ) *p = '_';
  1893      }
  1894    }
  1895  
  1896    /*
  1897    ** Parse the tokenizer specification string.
  1898    */
  1899    pSpec->azTokenizer = tokenizeString(zTokenizer, &n);
  1900    tokenListToIdList(pSpec->azTokenizer);
  1901  
  1902    return SQLITE_OK;
  1903  }
  1904  
  1905  /*
  1906  ** Generate a CREATE TABLE statement that describes the schema of
  1907  ** the virtual table.  Return a pointer to this schema string.
  1908  **
  1909  ** Space is obtained from sqlite3_mprintf() and should be freed
  1910  ** using sqlite3_free().
  1911  */
  1912  static char *fulltextSchema(
  1913    int nColumn,                  /* Number of columns */
  1914    const char *const* azColumn,  /* List of columns */
  1915    const char *zTableName        /* Name of the table */
  1916  ){
  1917    int i;
  1918    char *zSchema, *zNext;
  1919    const char *zSep = "(";
  1920    zSchema = sqlite3_mprintf("CREATE TABLE x");
  1921    for(i=0; i<nColumn; i++){
  1922      zNext = sqlite3_mprintf("%s%s%Q", zSchema, zSep, azColumn[i]);
  1923      sqlite3_free(zSchema);
  1924      zSchema = zNext;
  1925      zSep = ",";
  1926    }
  1927    zNext = sqlite3_mprintf("%s,%Q)", zSchema, zTableName);
  1928    sqlite3_free(zSchema);
  1929    return zNext;
  1930  }
  1931  
  1932  /*
  1933  ** Build a new sqlite3_vtab structure that will describe the
  1934  ** fulltext index defined by spec.
  1935  */
  1936  static int constructVtab(
  1937    sqlite3 *db,              /* The SQLite database connection */
  1938    TableSpec *spec,          /* Parsed spec information from parseSpec() */
  1939    sqlite3_vtab **ppVTab,    /* Write the resulting vtab structure here */
  1940    char **pzErr              /* Write any error message here */
  1941  ){
  1942    int rc;
  1943    int n;
  1944    fulltext_vtab *v = 0;
  1945    const sqlite3_tokenizer_module *m = NULL;
  1946    char *schema;
  1947  
  1948    v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
  1949    if( v==0 ) return SQLITE_NOMEM;
  1950    memset(v, 0, sizeof(*v));
  1951    /* sqlite will initialize v->base */
  1952    v->db = db;
  1953    v->zDb = spec->zDb;       /* Freed when azColumn is freed */
  1954    v->zName = spec->zName;   /* Freed when azColumn is freed */
  1955    v->nColumn = spec->nColumn;
  1956    v->azContentColumn = spec->azContentColumn;
  1957    spec->azContentColumn = 0;
  1958    v->azColumn = spec->azColumn;
  1959    spec->azColumn = 0;
  1960  
  1961    if( spec->azTokenizer==0 ){
  1962      return SQLITE_NOMEM;
  1963    }
  1964    /* TODO(shess) For now, add new tokenizers as else if clauses. */
  1965    if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){
  1966      sqlite3Fts1SimpleTokenizerModule(&m);
  1967    }else if( startsWith(spec->azTokenizer[0], "porter") ){
  1968      sqlite3Fts1PorterTokenizerModule(&m);
  1969    }else{
  1970      *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]);
  1971      rc = SQLITE_ERROR;
  1972      goto err;
  1973    }
  1974    for(n=0; spec->azTokenizer[n]; n++){}
  1975    if( n ){
  1976      rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1],
  1977                      &v->pTokenizer);
  1978    }else{
  1979      rc = m->xCreate(0, 0, &v->pTokenizer);
  1980    }
  1981    if( rc!=SQLITE_OK ) goto err;
  1982    v->pTokenizer->pModule = m;
  1983  
  1984    /* TODO: verify the existence of backing tables foo_content, foo_term */
  1985  
  1986    schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn,
  1987                            spec->zName);
  1988    rc = sqlite3_declare_vtab(db, schema);
  1989    sqlite3_free(schema);
  1990    if( rc!=SQLITE_OK ) goto err;
  1991  
  1992    memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements));
  1993  
  1994    *ppVTab = &v->base;
  1995    TRACE(("FTS1 Connect %p\n", v));
  1996  
  1997    return rc;
  1998  
  1999  err:
  2000    fulltext_vtab_destroy(v);
  2001    return rc;
  2002  }
  2003  
  2004  static int fulltextConnect(
  2005    sqlite3 *db,
  2006    void *pAux,
  2007    int argc, const char *const*argv,
  2008    sqlite3_vtab **ppVTab,
  2009    char **pzErr
  2010  ){
  2011    TableSpec spec;
  2012    int rc = parseSpec(&spec, argc, argv, pzErr);
  2013    if( rc!=SQLITE_OK ) return rc;
  2014  
  2015    rc = constructVtab(db, &spec, ppVTab, pzErr);
  2016    clearTableSpec(&spec);
  2017    return rc;
  2018  }
  2019  
  2020    /* The %_content table holds the text of each document, with
  2021    ** the rowid used as the docid.
  2022    **
  2023    ** The %_term table maps each term to a document list blob
  2024    ** containing elements sorted by ascending docid, each element
  2025    ** encoded as:
  2026    **
  2027    **   docid varint-encoded
  2028    **   token elements:
  2029    **     position+1 varint-encoded as delta from previous position
  2030    **     start offset varint-encoded as delta from previous start offset
  2031    **     end offset varint-encoded as delta from start offset
  2032    **
  2033    ** The sentinel position of 0 indicates the end of the token list.
  2034    **
  2035    ** Additionally, doclist blobs are chunked into multiple segments,
  2036    ** using segment to order the segments.  New elements are added to
  2037    ** the segment at segment 0, until it exceeds CHUNK_MAX.  Then
  2038    ** segment 0 is deleted, and the doclist is inserted at segment 1.
  2039    ** If there is already a doclist at segment 1, the segment 0 doclist
  2040    ** is merged with it, the segment 1 doclist is deleted, and the
  2041    ** merged doclist is inserted at segment 2, repeating those
  2042    ** operations until an insert succeeds.
  2043    **
  2044    ** Since this structure doesn't allow us to update elements in place
  2045    ** in case of deletion or update, these are simply written to
  2046    ** segment 0 (with an empty token list in case of deletion), with
  2047    ** docListAccumulate() taking care to retain lower-segment
  2048    ** information in preference to higher-segment information.
  2049    */
  2050    /* TODO(shess) Provide a VACUUM type operation which both removes
  2051    ** deleted elements which are no longer necessary, and duplicated
  2052    ** elements.  I suspect this will probably not be necessary in
  2053    ** practice, though.
  2054    */
  2055  static int fulltextCreate(sqlite3 *db, void *pAux,
  2056                            int argc, const char * const *argv,
  2057                            sqlite3_vtab **ppVTab, char **pzErr){
  2058    int rc;
  2059    TableSpec spec;
  2060    StringBuffer schema;
  2061    TRACE(("FTS1 Create\n"));
  2062  
  2063    rc = parseSpec(&spec, argc, argv, pzErr);
  2064    if( rc!=SQLITE_OK ) return rc;
  2065  
  2066    initStringBuffer(&schema);
  2067    append(&schema, "CREATE TABLE %_content(");
  2068    appendList(&schema, spec.nColumn, spec.azContentColumn);
  2069    append(&schema, ")");
  2070    rc = sql_exec(db, spec.zDb, spec.zName, schema.s);
  2071    free(schema.s);
  2072    if( rc!=SQLITE_OK ) goto out;
  2073  
  2074    rc = sql_exec(db, spec.zDb, spec.zName,
  2075      "create table %_term(term text, segment integer, doclist blob, "
  2076                          "primary key(term, segment));");
  2077    if( rc!=SQLITE_OK ) goto out;
  2078  
  2079    rc = constructVtab(db, &spec, ppVTab, pzErr);
  2080  
  2081  out:
  2082    clearTableSpec(&spec);
  2083    return rc;
  2084  }
  2085  
  2086  /* Decide how to handle an SQL query. */
  2087  static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
  2088    int i;
  2089    TRACE(("FTS1 BestIndex\n"));
  2090  
  2091    for(i=0; i<pInfo->nConstraint; ++i){
  2092      const struct sqlite3_index_constraint *pConstraint;
  2093      pConstraint = &pInfo->aConstraint[i];
  2094      if( pConstraint->usable ) {
  2095        if( pConstraint->iColumn==-1 &&
  2096            pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){
  2097          pInfo->idxNum = QUERY_ROWID;      /* lookup by rowid */
  2098          TRACE(("FTS1 QUERY_ROWID\n"));
  2099        } else if( pConstraint->iColumn>=0 &&
  2100                   pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){
  2101          /* full-text search */
  2102          pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn;
  2103          TRACE(("FTS1 QUERY_FULLTEXT %d\n", pConstraint->iColumn));
  2104        } else continue;
  2105  
  2106        pInfo->aConstraintUsage[i].argvIndex = 1;
  2107        pInfo->aConstraintUsage[i].omit = 1;
  2108  
  2109        /* An arbitrary value for now.
  2110         * TODO: Perhaps rowid matches should be considered cheaper than
  2111         * full-text searches. */
  2112        pInfo->estimatedCost = 1.0;   
  2113  
  2114        return SQLITE_OK;
  2115      }
  2116    }
  2117    pInfo->idxNum = QUERY_GENERIC;
  2118    return SQLITE_OK;
  2119  }
  2120  
  2121  static int fulltextDisconnect(sqlite3_vtab *pVTab){
  2122    TRACE(("FTS1 Disconnect %p\n", pVTab));
  2123    fulltext_vtab_destroy((fulltext_vtab *)pVTab);
  2124    return SQLITE_OK;
  2125  }
  2126  
  2127  static int fulltextDestroy(sqlite3_vtab *pVTab){
  2128    fulltext_vtab *v = (fulltext_vtab *)pVTab;
  2129    int rc;
  2130  
  2131    TRACE(("FTS1 Destroy %p\n", pVTab));
  2132    rc = sql_exec(v->db, v->zDb, v->zName,
  2133                  "drop table if exists %_content;"
  2134                  "drop table if exists %_term;"
  2135                  );
  2136    if( rc!=SQLITE_OK ) return rc;
  2137  
  2138    fulltext_vtab_destroy((fulltext_vtab *)pVTab);
  2139    return SQLITE_OK;
  2140  }
  2141  
  2142  static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
  2143    fulltext_cursor *c;
  2144  
  2145    c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor), 1);
  2146    /* sqlite will initialize c->base */
  2147    *ppCursor = &c->base;
  2148    TRACE(("FTS1 Open %p: %p\n", pVTab, c));
  2149  
  2150    return SQLITE_OK;
  2151  }
  2152  
  2153  
  2154  /* Free all of the dynamically allocated memory held by *q
  2155  */
  2156  static void queryClear(Query *q){
  2157    int i;
  2158    for(i = 0; i < q->nTerms; ++i){
  2159      free(q->pTerms[i].pTerm);
  2160    }
  2161    free(q->pTerms);
  2162    memset(q, 0, sizeof(*q));
  2163  }
  2164  
  2165  /* Free all of the dynamically allocated memory held by the
  2166  ** Snippet
  2167  */
  2168  static void snippetClear(Snippet *p){
  2169    free(p->aMatch);
  2170    free(p->zOffset);
  2171    free(p->zSnippet);
  2172    memset(p, 0, sizeof(*p));
  2173  }
  2174  /*
  2175  ** Append a single entry to the p->aMatch[] log.
  2176  */
  2177  static void snippetAppendMatch(
  2178    Snippet *p,               /* Append the entry to this snippet */
  2179    int iCol, int iTerm,      /* The column and query term */
  2180    int iStart, int nByte     /* Offset and size of the match */
  2181  ){
  2182    int i;
  2183    struct snippetMatch *pMatch;
  2184    if( p->nMatch+1>=p->nAlloc ){
  2185      p->nAlloc = p->nAlloc*2 + 10;
  2186      p->aMatch = realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) );
  2187      if( p->aMatch==0 ){
  2188        p->nMatch = 0;
  2189        p->nAlloc = 0;
  2190        return;
  2191      }
  2192    }
  2193    i = p->nMatch++;
  2194    pMatch = &p->aMatch[i];
  2195    pMatch->iCol = iCol;
  2196    pMatch->iTerm = iTerm;
  2197    pMatch->iStart = iStart;
  2198    pMatch->nByte = nByte;
  2199  }
  2200  
  2201  /*
  2202  ** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
  2203  */
  2204  #define FTS1_ROTOR_SZ   (32)
  2205  #define FTS1_ROTOR_MASK (FTS1_ROTOR_SZ-1)
  2206  
  2207  /*
  2208  ** Add entries to pSnippet->aMatch[] for every match that occurs against
  2209  ** document zDoc[0..nDoc-1] which is stored in column iColumn.
  2210  */
  2211  static void snippetOffsetsOfColumn(
  2212    Query *pQuery,
  2213    Snippet *pSnippet,
  2214    int iColumn,
  2215    const char *zDoc,
  2216    int nDoc
  2217  ){
  2218    const sqlite3_tokenizer_module *pTModule;  /* The tokenizer module */
  2219    sqlite3_tokenizer *pTokenizer;             /* The specific tokenizer */
  2220    sqlite3_tokenizer_cursor *pTCursor;        /* Tokenizer cursor */
  2221    fulltext_vtab *pVtab;                /* The full text index */
  2222    int nColumn;                         /* Number of columns in the index */
  2223    const QueryTerm *aTerm;              /* Query string terms */
  2224    int nTerm;                           /* Number of query string terms */  
  2225    int i, j;                            /* Loop counters */
  2226    int rc;                              /* Return code */
  2227    unsigned int match, prevMatch;       /* Phrase search bitmasks */
  2228    const char *zToken;                  /* Next token from the tokenizer */
  2229    int nToken;                          /* Size of zToken */
  2230    int iBegin, iEnd, iPos;              /* Offsets of beginning and end */
  2231  
  2232    /* The following variables keep a circular buffer of the last
  2233    ** few tokens */
  2234    unsigned int iRotor = 0;             /* Index of current token */
  2235    int iRotorBegin[FTS1_ROTOR_SZ];      /* Beginning offset of token */
  2236    int iRotorLen[FTS1_ROTOR_SZ];        /* Length of token */
  2237  
  2238    pVtab = pQuery->pFts;
  2239    nColumn = pVtab->nColumn;
  2240    pTokenizer = pVtab->pTokenizer;
  2241    pTModule = pTokenizer->pModule;
  2242    rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor);
  2243    if( rc ) return;
  2244    pTCursor->pTokenizer = pTokenizer;
  2245    aTerm = pQuery->pTerms;
  2246    nTerm = pQuery->nTerms;
  2247    if( nTerm>=FTS1_ROTOR_SZ ){
  2248      nTerm = FTS1_ROTOR_SZ - 1;
  2249    }
  2250    prevMatch = 0;
  2251    while(1){
  2252      rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
  2253      if( rc ) break;
  2254      iRotorBegin[iRotor&FTS1_ROTOR_MASK] = iBegin;
  2255      iRotorLen[iRotor&FTS1_ROTOR_MASK] = iEnd-iBegin;
  2256      match = 0;
  2257      for(i=0; i<nTerm; i++){
  2258        int iCol;
  2259        iCol = aTerm[i].iColumn;
  2260        if( iCol>=0 && iCol<nColumn && iCol!=iColumn ) continue;
  2261        if( aTerm[i].nTerm!=nToken ) continue;
  2262        if( memcmp(aTerm[i].pTerm, zToken, nToken) ) continue;
  2263        if( aTerm[i].iPhrase>1 && (prevMatch & (1<<i))==0 ) continue;
  2264        match |= 1<<i;
  2265        if( i==nTerm-1 || aTerm[i+1].iPhrase==1 ){
  2266          for(j=aTerm[i].iPhrase-1; j>=0; j--){
  2267            int k = (iRotor-j) & FTS1_ROTOR_MASK;
  2268            snippetAppendMatch(pSnippet, iColumn, i-j,
  2269                  iRotorBegin[k], iRotorLen[k]);
  2270          }
  2271        }
  2272      }
  2273      prevMatch = match<<1;
  2274      iRotor++;
  2275    }
  2276    pTModule->xClose(pTCursor);  
  2277  }
  2278  
  2279  
  2280  /*
  2281  ** Compute all offsets for the current row of the query.  
  2282  ** If the offsets have already been computed, this routine is a no-op.
  2283  */
  2284  static void snippetAllOffsets(fulltext_cursor *p){
  2285    int nColumn;
  2286    int iColumn, i;
  2287    int iFirst, iLast;
  2288    fulltext_vtab *pFts;
  2289  
  2290    if( p->snippet.nMatch ) return;
  2291    if( p->q.nTerms==0 ) return;
  2292    pFts = p->q.pFts;
  2293    nColumn = pFts->nColumn;
  2294    iColumn = p->iCursorType - QUERY_FULLTEXT;
  2295    if( iColumn<0 || iColumn>=nColumn ){
  2296      iFirst = 0;
  2297      iLast = nColumn-1;
  2298    }else{
  2299      iFirst = iColumn;
  2300      iLast = iColumn;
  2301    }
  2302    for(i=iFirst; i<=iLast; i++){
  2303      const char *zDoc;
  2304      int nDoc;
  2305      zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1);
  2306      nDoc = sqlite3_column_bytes(p->pStmt, i+1);
  2307      snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc);
  2308    }
  2309  }
  2310  
  2311  /*
  2312  ** Convert the information in the aMatch[] array of the snippet
  2313  ** into the string zOffset[0..nOffset-1].
  2314  */
  2315  static void snippetOffsetText(Snippet *p){
  2316    int i;
  2317    int cnt = 0;
  2318    StringBuffer sb;
  2319    char zBuf[200];
  2320    if( p->zOffset ) return;
  2321    initStringBuffer(&sb);
  2322    for(i=0; i<p->nMatch; i++){
  2323      struct snippetMatch *pMatch = &p->aMatch[i];
  2324      zBuf[0] = ' ';
  2325      sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d",
  2326          pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte);
  2327      append(&sb, zBuf);
  2328      cnt++;
  2329    }
  2330    p->zOffset = sb.s;
  2331    p->nOffset = sb.len;
  2332  }
  2333  
  2334  /*
  2335  ** zDoc[0..nDoc-1] is phrase of text.  aMatch[0..nMatch-1] are a set
  2336  ** of matching words some of which might be in zDoc.  zDoc is column
  2337  ** number iCol.
  2338  **
  2339  ** iBreak is suggested spot in zDoc where we could begin or end an
  2340  ** excerpt.  Return a value similar to iBreak but possibly adjusted
  2341  ** to be a little left or right so that the break point is better.
  2342  */
  2343  static int wordBoundary(
  2344    int iBreak,                   /* The suggested break point */
  2345    const char *zDoc,             /* Document text */
  2346    int nDoc,                     /* Number of bytes in zDoc[] */
  2347    struct snippetMatch *aMatch,  /* Matching words */
  2348    int nMatch,                   /* Number of entries in aMatch[] */
  2349    int iCol                      /* The column number for zDoc[] */
  2350  ){
  2351    int i;
  2352    if( iBreak<=10 ){
  2353      return 0;
  2354    }
  2355    if( iBreak>=nDoc-10 ){
  2356      return nDoc;
  2357    }
  2358    for(i=0; i<nMatch && aMatch[i].iCol<iCol; i++){}
  2359    while( i<nMatch && aMatch[i].iStart+aMatch[i].nByte<iBreak ){ i++; }
  2360    if( i<nMatch ){
  2361      if( aMatch[i].iStart<iBreak+10 ){
  2362        return aMatch[i].iStart;
  2363      }
  2364      if( i>0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){
  2365        return aMatch[i-1].iStart;
  2366      }
  2367    }
  2368    for(i=1; i<=10; i++){
  2369      if( safe_isspace(zDoc[iBreak-i]) ){
  2370        return iBreak - i + 1;
  2371      }
  2372      if( safe_isspace(zDoc[iBreak+i]) ){
  2373        return iBreak + i + 1;
  2374      }
  2375    }
  2376    return iBreak;
  2377  }
  2378  
  2379  /*
  2380  ** If the StringBuffer does not end in white space, add a single
  2381  ** space character to the end.
  2382  */
  2383  static void appendWhiteSpace(StringBuffer *p){
  2384    if( p->len==0 ) return;
  2385    if( safe_isspace(p->s[p->len-1]) ) return;
  2386    append(p, " ");
  2387  }
  2388  
  2389  /*
  2390  ** Remove white space from teh end of the StringBuffer
  2391  */
  2392  static void trimWhiteSpace(StringBuffer *p){
  2393    while( p->len>0 && safe_isspace(p->s[p->len-1]) ){
  2394      p->len--;
  2395    }
  2396  }
  2397  
  2398  
  2399  
  2400  /*
  2401  ** Allowed values for Snippet.aMatch[].snStatus
  2402  */
  2403  #define SNIPPET_IGNORE  0   /* It is ok to omit this match from the snippet */
  2404  #define SNIPPET_DESIRED 1   /* We want to include this match in the snippet */
  2405  
  2406  /*
  2407  ** Generate the text of a snippet.
  2408  */
  2409  static void snippetText(
  2410    fulltext_cursor *pCursor,   /* The cursor we need the snippet for */
  2411    const char *zStartMark,     /* Markup to appear before each match */
  2412    const char *zEndMark,       /* Markup to appear after each match */
  2413    const char *zEllipsis       /* Ellipsis mark */
  2414  ){
  2415    int i, j;
  2416    struct snippetMatch *aMatch;
  2417    int nMatch;
  2418    int nDesired;
  2419    StringBuffer sb;
  2420    int tailCol;
  2421    int tailOffset;
  2422    int iCol;
  2423    int nDoc;
  2424    const char *zDoc;
  2425    int iStart, iEnd;
  2426    int tailEllipsis = 0;
  2427    int iMatch;
  2428    
  2429  
  2430    free(pCursor->snippet.zSnippet);
  2431    pCursor->snippet.zSnippet = 0;
  2432    aMatch = pCursor->snippet.aMatch;
  2433    nMatch = pCursor->snippet.nMatch;
  2434    initStringBuffer(&sb);
  2435  
  2436    for(i=0; i<nMatch; i++){
  2437      aMatch[i].snStatus = SNIPPET_IGNORE;
  2438    }
  2439    nDesired = 0;
  2440    for(i=0; i<pCursor->q.nTerms; i++){
  2441      for(j=0; j<nMatch; j++){
  2442        if( aMatch[j].iTerm==i ){
  2443          aMatch[j].snStatus = SNIPPET_DESIRED;
  2444          nDesired++;
  2445          break;
  2446        }
  2447      }
  2448    }
  2449  
  2450    iMatch = 0;
  2451    tailCol = -1;
  2452    tailOffset = 0;
  2453    for(i=0; i<nMatch && nDesired>0; i++){
  2454      if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue;
  2455      nDesired--;
  2456      iCol = aMatch[i].iCol;
  2457      zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1);
  2458      nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1);
  2459      iStart = aMatch[i].iStart - 40;
  2460      iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol);
  2461      if( iStart<=10 ){
  2462        iStart = 0;
  2463      }
  2464      if( iCol==tailCol && iStart<=tailOffset+20 ){
  2465        iStart = tailOffset;
  2466      }
  2467      if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){
  2468        trimWhiteSpace(&sb);
  2469        appendWhiteSpace(&sb);
  2470        append(&sb, zEllipsis);
  2471        appendWhiteSpace(&sb);
  2472      }
  2473      iEnd = aMatch[i].iStart + aMatch[i].nByte + 40;
  2474      iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol);
  2475      if( iEnd>=nDoc-10 ){
  2476        iEnd = nDoc;
  2477        tailEllipsis = 0;
  2478      }else{
  2479        tailEllipsis = 1;
  2480      }
  2481      while( iMatch<nMatch && aMatch[iMatch].iCol<iCol ){ iMatch++; }
  2482      while( iStart<iEnd ){
  2483        while( iMatch<nMatch && aMatch[iMatch].iStart<iStart
  2484               && aMatch[iMatch].iCol<=iCol ){
  2485          iMatch++;
  2486        }
  2487        if( iMatch<nMatch && aMatch[iMatch].iStart<iEnd
  2488               && aMatch[iMatch].iCol==iCol ){
  2489          nappend(&sb, &zDoc[iStart], aMatch[iMatch].iStart - iStart);
  2490          iStart = aMatch[iMatch].iStart;
  2491          append(&sb, zStartMark);
  2492          nappend(&sb, &zDoc[iStart], aMatch[iMatch].nByte);
  2493          append(&sb, zEndMark);
  2494          iStart += aMatch[iMatch].nByte;
  2495          for(j=iMatch+1; j<nMatch; j++){
  2496            if( aMatch[j].iTerm==aMatch[iMatch].iTerm
  2497                && aMatch[j].snStatus==SNIPPET_DESIRED ){
  2498              nDesired--;
  2499              aMatch[j].snStatus = SNIPPET_IGNORE;
  2500            }
  2501          }
  2502        }else{
  2503          nappend(&sb, &zDoc[iStart], iEnd - iStart);
  2504          iStart = iEnd;
  2505        }
  2506      }
  2507      tailCol = iCol;
  2508      tailOffset = iEnd;
  2509    }
  2510    trimWhiteSpace(&sb);
  2511    if( tailEllipsis ){
  2512      appendWhiteSpace(&sb);
  2513      append(&sb, zEllipsis);
  2514    }
  2515    pCursor->snippet.zSnippet = sb.s;
  2516    pCursor->snippet.nSnippet = sb.len;  
  2517  }
  2518  
  2519  
  2520  /*
  2521  ** Close the cursor.  For additional information see the documentation
  2522  ** on the xClose method of the virtual table interface.
  2523  */
  2524  static int fulltextClose(sqlite3_vtab_cursor *pCursor){
  2525    fulltext_cursor *c = (fulltext_cursor *) pCursor;
  2526    TRACE(("FTS1 Close %p\n", c));
  2527    sqlite3_finalize(c->pStmt);
  2528    queryClear(&c->q);
  2529    snippetClear(&c->snippet);
  2530    if( c->result.pDoclist!=NULL ){
  2531      docListDelete(c->result.pDoclist);
  2532    }
  2533    free(c);
  2534    return SQLITE_OK;
  2535  }
  2536  
  2537  static int fulltextNext(sqlite3_vtab_cursor *pCursor){
  2538    fulltext_cursor *c = (fulltext_cursor *) pCursor;
  2539    sqlite_int64 iDocid;
  2540    int rc;
  2541  
  2542    TRACE(("FTS1 Next %p\n", pCursor));
  2543    snippetClear(&c->snippet);
  2544    if( c->iCursorType < QUERY_FULLTEXT ){
  2545      /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
  2546      rc = sqlite3_step(c->pStmt);
  2547      switch( rc ){
  2548        case SQLITE_ROW:
  2549          c->eof = 0;
  2550          return SQLITE_OK;
  2551        case SQLITE_DONE:
  2552          c->eof = 1;
  2553          return SQLITE_OK;
  2554        default:
  2555          c->eof = 1;
  2556          return rc;
  2557      }
  2558    } else {  /* full-text query */
  2559      rc = sqlite3_reset(c->pStmt);
  2560      if( rc!=SQLITE_OK ) return rc;
  2561  
  2562      iDocid = nextDocid(&c->result);
  2563      if( iDocid==0 ){
  2564        c->eof = 1;
  2565        return SQLITE_OK;
  2566      }
  2567      rc = sqlite3_bind_int64(c->pStmt, 1, iDocid);
  2568      if( rc!=SQLITE_OK ) return rc;
  2569      /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
  2570      rc = sqlite3_step(c->pStmt);
  2571      if( rc==SQLITE_ROW ){   /* the case we expect */
  2572        c->eof = 0;
  2573        return SQLITE_OK;
  2574      }
  2575      /* an error occurred; abort */
  2576      return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
  2577    }
  2578  }
  2579  
  2580  
  2581  /* Return a DocList corresponding to the query term *pTerm.  If *pTerm
  2582  ** is the first term of a phrase query, go ahead and evaluate the phrase
  2583  ** query and return the doclist for the entire phrase query.
  2584  **
  2585  ** The result is stored in pTerm->doclist.
  2586  */
  2587  static int docListOfTerm(
  2588    fulltext_vtab *v,     /* The full text index */
  2589    int iColumn,          /* column to restrict to.  No restrition if >=nColumn */
  2590    QueryTerm *pQTerm,    /* Term we are looking for, or 1st term of a phrase */
  2591    DocList **ppResult    /* Write the result here */
  2592  ){
  2593    DocList *pLeft, *pRight, *pNew;
  2594    int i, rc;
  2595  
  2596    pLeft = docListNew(DL_POSITIONS);
  2597    rc = term_select_all(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pLeft);
  2598    if( rc ){
  2599      docListDelete(pLeft);
  2600      return rc;
  2601    }
  2602    for(i=1; i<=pQTerm->nPhrase; i++){
  2603      pRight = docListNew(DL_POSITIONS);
  2604      rc = term_select_all(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, pRight);
  2605      if( rc ){
  2606        docListDelete(pLeft);
  2607        return rc;
  2608      }
  2609      pNew = docListNew(i<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS);
  2610      docListPhraseMerge(pLeft, pRight, pNew);
  2611      docListDelete(pLeft);
  2612      docListDelete(pRight);
  2613      pLeft = pNew;
  2614    }
  2615    *ppResult = pLeft;
  2616    return SQLITE_OK;
  2617  }
  2618  
  2619  /* Add a new term pTerm[0..nTerm-1] to the query *q.
  2620  */
  2621  static void queryAdd(Query *q, const char *pTerm, int nTerm){
  2622    QueryTerm *t;
  2623    ++q->nTerms;
  2624    q->pTerms = realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0]));
  2625    if( q->pTerms==0 ){
  2626      q->nTerms = 0;
  2627      return;
  2628    }
  2629    t = &q->pTerms[q->nTerms - 1];
  2630    memset(t, 0, sizeof(*t));
  2631    t->pTerm = malloc(nTerm+1);
  2632    memcpy(t->pTerm, pTerm, nTerm);
  2633    t->pTerm[nTerm] = 0;
  2634    t->nTerm = nTerm;
  2635    t->isOr = q->nextIsOr;
  2636    q->nextIsOr = 0;
  2637    t->iColumn = q->nextColumn;
  2638    q->nextColumn = q->dfltColumn;
  2639  }
  2640  
  2641  /*
  2642  ** Check to see if the string zToken[0...nToken-1] matches any
  2643  ** column name in the virtual table.   If it does,
  2644  ** return the zero-indexed column number.  If not, return -1.
  2645  */
  2646  static int checkColumnSpecifier(
  2647    fulltext_vtab *pVtab,    /* The virtual table */
  2648    const char *zToken,      /* Text of the token */
  2649    int nToken               /* Number of characters in the token */
  2650  ){
  2651    int i;
  2652    for(i=0; i<pVtab->nColumn; i++){
  2653      if( memcmp(pVtab->azColumn[i], zToken, nToken)==0
  2654          && pVtab->azColumn[i][nToken]==0 ){
  2655        return i;
  2656      }
  2657    }
  2658    return -1;
  2659  }
  2660  
  2661  /*
  2662  ** Parse the text at pSegment[0..nSegment-1].  Add additional terms
  2663  ** to the query being assemblied in pQuery.
  2664  **
  2665  ** inPhrase is true if pSegment[0..nSegement-1] is contained within
  2666  ** double-quotes.  If inPhrase is true, then the first term
  2667  ** is marked with the number of terms in the phrase less one and
  2668  ** OR and "-" syntax is ignored.  If inPhrase is false, then every
  2669  ** term found is marked with nPhrase=0 and OR and "-" syntax is significant.
  2670  */
  2671  static int tokenizeSegment(
  2672    sqlite3_tokenizer *pTokenizer,          /* The tokenizer to use */
  2673    const char *pSegment, int nSegment,     /* Query expression being parsed */
  2674    int inPhrase,                           /* True if within "..." */
  2675    Query *pQuery                           /* Append results here */
  2676  ){
  2677    const sqlite3_tokenizer_module *pModule = pTokenizer->pModule;
  2678    sqlite3_tokenizer_cursor *pCursor;
  2679    int firstIndex = pQuery->nTerms;
  2680    int iCol;
  2681    int nTerm = 1;
  2682    
  2683    int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor);
  2684    if( rc!=SQLITE_OK ) return rc;
  2685    pCursor->pTokenizer = pTokenizer;
  2686  
  2687    while( 1 ){
  2688      const char *pToken;
  2689      int nToken, iBegin, iEnd, iPos;
  2690  
  2691      rc = pModule->xNext(pCursor,
  2692                          &pToken, &nToken,
  2693                          &iBegin, &iEnd, &iPos);
  2694      if( rc!=SQLITE_OK ) break;
  2695      if( !inPhrase &&
  2696          pSegment[iEnd]==':' &&
  2697           (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){
  2698        pQuery->nextColumn = iCol;
  2699        continue;
  2700      }
  2701      if( !inPhrase && pQuery->nTerms>0 && nToken==2
  2702           && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){
  2703        pQuery->nextIsOr = 1;
  2704        continue;
  2705      }
  2706      queryAdd(pQuery, pToken, nToken);
  2707      if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){
  2708        pQuery->pTerms[pQuery->nTerms-1].isNot = 1;
  2709      }
  2710      pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm;
  2711      if( inPhrase ){
  2712        nTerm++;
  2713      }
  2714    }
  2715  
  2716    if( inPhrase && pQuery->nTerms>firstIndex ){
  2717      pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1;
  2718    }
  2719  
  2720    return pModule->xClose(pCursor);
  2721  }
  2722  
  2723  /* Parse a query string, yielding a Query object pQuery.
  2724  **
  2725  ** The calling function will need to queryClear() to clean up
  2726  ** the dynamically allocated memory held by pQuery.
  2727  */
  2728  static int parseQuery(
  2729    fulltext_vtab *v,        /* The fulltext index */
  2730    const char *zInput,      /* Input text of the query string */
  2731    int nInput,              /* Size of the input text */
  2732    int dfltColumn,          /* Default column of the index to match against */
  2733    Query *pQuery            /* Write the parse results here. */
  2734  ){
  2735    int iInput, inPhrase = 0;
  2736  
  2737    if( zInput==0 ) nInput = 0;
  2738    if( nInput<0 ) nInput = strlen(zInput);
  2739    pQuery->nTerms = 0;
  2740    pQuery->pTerms = NULL;
  2741    pQuery->nextIsOr = 0;
  2742    pQuery->nextColumn = dfltColumn;
  2743    pQuery->dfltColumn = dfltColumn;
  2744    pQuery->pFts = v;
  2745  
  2746    for(iInput=0; iInput<nInput; ++iInput){
  2747      int i;
  2748      for(i=iInput; i<nInput && zInput[i]!='"'; ++i){}
  2749      if( i>iInput ){
  2750        tokenizeSegment(v->pTokenizer, zInput+iInput, i-iInput, inPhrase,
  2751                         pQuery);
  2752      }
  2753      iInput = i;
  2754      if( i<nInput ){
  2755        assert( zInput[i]=='"' );
  2756        inPhrase = !inPhrase;
  2757      }
  2758    }
  2759  
  2760    if( inPhrase ){
  2761      /* unmatched quote */
  2762      queryClear(pQuery);
  2763      return SQLITE_ERROR;
  2764    }
  2765    return SQLITE_OK;
  2766  }
  2767  
  2768  /* Perform a full-text query using the search expression in
  2769  ** zInput[0..nInput-1].  Return a list of matching documents
  2770  ** in pResult.
  2771  **
  2772  ** Queries must match column iColumn.  Or if iColumn>=nColumn
  2773  ** they are allowed to match against any column.
  2774  */
  2775  static int fulltextQuery(
  2776    fulltext_vtab *v,      /* The full text index */
  2777    int iColumn,           /* Match against this column by default */
  2778    const char *zInput,    /* The query string */
  2779    int nInput,            /* Number of bytes in zInput[] */
  2780    DocList **pResult,     /* Write the result doclist here */
  2781    Query *pQuery          /* Put parsed query string here */
  2782  ){
  2783    int i, iNext, rc;
  2784    DocList *pLeft = NULL;
  2785    DocList *pRight, *pNew, *pOr;
  2786    int nNot = 0;
  2787    QueryTerm *aTerm;
  2788  
  2789    rc = parseQuery(v, zInput, nInput, iColumn, pQuery);
  2790    if( rc!=SQLITE_OK ) return rc;
  2791  
  2792    /* Merge AND terms. */
  2793    aTerm = pQuery->pTerms;
  2794    for(i = 0; i<pQuery->nTerms; i=iNext){
  2795      if( aTerm[i].isNot ){
  2796        /* Handle all NOT terms in a separate pass */
  2797        nNot++;
  2798        iNext = i + aTerm[i].nPhrase+1;
  2799        continue;
  2800      }
  2801      iNext = i + aTerm[i].nPhrase + 1;
  2802      rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
  2803      if( rc ){
  2804        queryClear(pQuery);
  2805        return rc;
  2806      }
  2807      while( iNext<pQuery->nTerms && aTerm[iNext].isOr ){
  2808        rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &pOr);
  2809        iNext += aTerm[iNext].nPhrase + 1;
  2810        if( rc ){
  2811          queryClear(pQuery);
  2812          return rc;
  2813        }
  2814        pNew = docListNew(DL_DOCIDS);
  2815        docListOrMerge(pRight, pOr, pNew);
  2816        docListDelete(pRight);
  2817        docListDelete(pOr);
  2818        pRight = pNew;
  2819      }
  2820      if( pLeft==0 ){
  2821        pLeft = pRight;
  2822      }else{
  2823        pNew = docListNew(DL_DOCIDS);
  2824        docListAndMerge(pLeft, pRight, pNew);
  2825        docListDelete(pRight);
  2826        docListDelete(pLeft);
  2827        pLeft = pNew;
  2828      }
  2829    }
  2830  
  2831    if( nNot && pLeft==0 ){
  2832      /* We do not yet know how to handle a query of only NOT terms */
  2833      return SQLITE_ERROR;
  2834    }
  2835  
  2836    /* Do the EXCEPT terms */
  2837    for(i=0; i<pQuery->nTerms;  i += aTerm[i].nPhrase + 1){
  2838      if( !aTerm[i].isNot ) continue;
  2839      rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight);
  2840      if( rc ){
  2841        queryClear(pQuery);
  2842        docListDelete(pLeft);
  2843        return rc;
  2844      }
  2845      pNew = docListNew(DL_DOCIDS);
  2846      docListExceptMerge(pLeft, pRight, pNew);
  2847      docListDelete(pRight);
  2848      docListDelete(pLeft);
  2849      pLeft = pNew;
  2850    }
  2851  
  2852    *pResult = pLeft;
  2853    return rc;
  2854  }
  2855  
  2856  /*
  2857  ** This is the xFilter interface for the virtual table.  See
  2858  ** the virtual table xFilter method documentation for additional
  2859  ** information.
  2860  **
  2861  ** If idxNum==QUERY_GENERIC then do a full table scan against
  2862  ** the %_content table.
  2863  **
  2864  ** If idxNum==QUERY_ROWID then do a rowid lookup for a single entry
  2865  ** in the %_content table.
  2866  **
  2867  ** If idxNum>=QUERY_FULLTEXT then use the full text index.  The
  2868  ** column on the left-hand side of the MATCH operator is column
  2869  ** number idxNum-QUERY_FULLTEXT, 0 indexed.  argv[0] is the right-hand
  2870  ** side of the MATCH operator.
  2871  */
  2872  /* TODO(shess) Upgrade the cursor initialization and destruction to
  2873  ** account for fulltextFilter() being called multiple times on the
  2874  ** same cursor.  The current solution is very fragile.  Apply fix to
  2875  ** fts2 as appropriate.
  2876  */
  2877  static int fulltextFilter(
  2878    sqlite3_vtab_cursor *pCursor,     /* The cursor used for this query */
  2879    int idxNum, const char *idxStr,   /* Which indexing scheme to use */
  2880    int argc, sqlite3_value **argv    /* Arguments for the indexing scheme */
  2881  ){
  2882    fulltext_cursor *c = (fulltext_cursor *) pCursor;
  2883    fulltext_vtab *v = cursor_vtab(c);
  2884    int rc;
  2885    char *zSql;
  2886  
  2887    TRACE(("FTS1 Filter %p\n",pCursor));
  2888  
  2889    zSql = sqlite3_mprintf("select rowid, * from %%_content %s",
  2890                            idxNum==QUERY_GENERIC ? "" : "where rowid=?");
  2891    sqlite3_finalize(c->pStmt);
  2892    rc = sql_prepare(v->db, v->zDb, v->zName, &c->pStmt, zSql);
  2893    sqlite3_free(zSql);
  2894    if( rc!=SQLITE_OK ) return rc;
  2895  
  2896    c->iCursorType = idxNum;
  2897    switch( idxNum ){
  2898      case QUERY_GENERIC:
  2899        break;
  2900  
  2901      case QUERY_ROWID:
  2902        rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0]));
  2903        if( rc!=SQLITE_OK ) return rc;
  2904        break;
  2905  
  2906      default:   /* full-text search */
  2907      {
  2908        const char *zQuery = (const char *)sqlite3_value_text(argv[0]);
  2909        DocList *pResult;
  2910        assert( idxNum<=QUERY_FULLTEXT+v->nColumn);
  2911        assert( argc==1 );
  2912        queryClear(&c->q);
  2913        rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &pResult, &c->q);
  2914        if( rc!=SQLITE_OK ) return rc;
  2915        if( c->result.pDoclist!=NULL ) docListDelete(c->result.pDoclist);
  2916        readerInit(&c->result, pResult);
  2917        break;
  2918      }
  2919    }
  2920  
  2921    return fulltextNext(pCursor);
  2922  }
  2923  
  2924  /* This is the xEof method of the virtual table.  The SQLite core
  2925  ** calls this routine to find out if it has reached the end of
  2926  ** a query's results set.
  2927  */
  2928  static int fulltextEof(sqlite3_vtab_cursor *pCursor){
  2929    fulltext_cursor *c = (fulltext_cursor *) pCursor;
  2930    return c->eof;
  2931  }
  2932  
  2933  /* This is the xColumn method of the virtual table.  The SQLite
  2934  ** core calls this method during a query when it needs the value
  2935  ** of a column from the virtual table.  This method needs to use
  2936  ** one of the sqlite3_result_*() routines to store the requested
  2937  ** value back in the pContext.
  2938  */
  2939  static int fulltextColumn(sqlite3_vtab_cursor *pCursor,
  2940                            sqlite3_context *pContext, int idxCol){
  2941    fulltext_cursor *c = (fulltext_cursor *) pCursor;
  2942    fulltext_vtab *v = cursor_vtab(c);
  2943  
  2944    if( idxCol<v->nColumn ){
  2945      sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1);
  2946      sqlite3_result_value(pContext, pVal);
  2947    }else if( idxCol==v->nColumn ){
  2948      /* The extra column whose name is the same as the table.
  2949      ** Return a blob which is a pointer to the cursor
  2950      */
  2951      sqlite3_result_blob(pContext, &c, sizeof(c), SQLITE_TRANSIENT);
  2952    }
  2953    return SQLITE_OK;
  2954  }
  2955  
  2956  /* This is the xRowid method.  The SQLite core calls this routine to
  2957  ** retrive the rowid for the current row of the result set.  The
  2958  ** rowid should be written to *pRowid.
  2959  */
  2960  static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
  2961    fulltext_cursor *c = (fulltext_cursor *) pCursor;
  2962  
  2963    *pRowid = sqlite3_column_int64(c->pStmt, 0);
  2964    return SQLITE_OK;
  2965  }
  2966  
  2967  /* Add all terms in [zText] to the given hash table.  If [iColumn] > 0,
  2968   * we also store positions and offsets in the hash table using the given
  2969   * column number. */
  2970  static int buildTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iDocid,
  2971                        const char *zText, int iColumn){
  2972    sqlite3_tokenizer *pTokenizer = v->pTokenizer;
  2973    sqlite3_tokenizer_cursor *pCursor;
  2974    const char *pToken;
  2975    int nTokenBytes;
  2976    int iStartOffset, iEndOffset, iPosition;
  2977    int rc;
  2978  
  2979    rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor);
  2980    if( rc!=SQLITE_OK ) return rc;
  2981  
  2982    pCursor->pTokenizer = pTokenizer;
  2983    while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor,
  2984                                                 &pToken, &nTokenBytes,
  2985                                                 &iStartOffset, &iEndOffset,
  2986                                                 &iPosition) ){
  2987      DocList *p;
  2988  
  2989      /* Positions can't be negative; we use -1 as a terminator internally. */
  2990      if( iPosition<0 ){
  2991        pTokenizer->pModule->xClose(pCursor);
  2992        return SQLITE_ERROR;
  2993      }
  2994  
  2995      p = fts1HashFind(terms, pToken, nTokenBytes);
  2996      if( p==NULL ){
  2997        p = docListNew(DL_DEFAULT);
  2998        docListAddDocid(p, iDocid);
  2999        fts1HashInsert(terms, pToken, nTokenBytes, p);
  3000      }
  3001      if( iColumn>=0 ){
  3002        docListAddPosOffset(p, iColumn, iPosition, iStartOffset, iEndOffset);
  3003      }
  3004    }
  3005  
  3006    /* TODO(shess) Check return?  Should this be able to cause errors at
  3007    ** this point?  Actually, same question about sqlite3_finalize(),
  3008    ** though one could argue that failure there means that the data is
  3009    ** not durable.  *ponder*
  3010    */
  3011    pTokenizer->pModule->xClose(pCursor);
  3012    return rc;
  3013  }
  3014  
  3015  /* Update the %_terms table to map the term [pTerm] to the given rowid. */
  3016  static int index_insert_term(fulltext_vtab *v, const char *pTerm, int nTerm,
  3017                               DocList *d){
  3018    sqlite_int64 iIndexRow;
  3019    DocList doclist;
  3020    int iSegment = 0, rc;
  3021  
  3022    rc = term_select(v, pTerm, nTerm, iSegment, &iIndexRow, &doclist);
  3023    if( rc==SQLITE_DONE ){
  3024      docListInit(&doclist, DL_DEFAULT, 0, 0);
  3025      docListUpdate(&doclist, d);
  3026      /* TODO(shess) Consider length(doclist)>CHUNK_MAX? */
  3027      rc = term_insert(v, NULL, pTerm, nTerm, iSegment, &doclist);
  3028      goto err;
  3029    }
  3030    if( rc!=SQLITE_ROW ) return SQLITE_ERROR;
  3031  
  3032    docListUpdate(&doclist, d);
  3033    if( doclist.nData<=CHUNK_MAX ){
  3034      rc = term_update(v, iIndexRow, &doclist);
  3035      goto err;
  3036    }
  3037  
  3038    /* Doclist doesn't fit, delete what's there, and accumulate
  3039    ** forward.
  3040    */
  3041    rc = term_delete(v, iIndexRow);
  3042    if( rc!=SQLITE_OK ) goto err;
  3043  
  3044    /* Try to insert the doclist into a higher segment bucket.  On
  3045    ** failure, accumulate existing doclist with the doclist from that
  3046    ** bucket, and put results in the next bucket.
  3047    */
  3048    iSegment++;
  3049    while( (rc=term_insert(v, &iIndexRow, pTerm, nTerm, iSegment,
  3050                           &doclist))!=SQLITE_OK ){
  3051      sqlite_int64 iSegmentRow;
  3052      DocList old;
  3053      int rc2;
  3054  
  3055      /* Retain old error in case the term_insert() error was really an
  3056      ** error rather than a bounced insert.
  3057      */
  3058      rc2 = term_select(v, pTerm, nTerm, iSegment, &iSegmentRow, &old);
  3059      if( rc2!=SQLITE_ROW ) goto err;
  3060  
  3061      rc = term_delete(v, iSegmentRow);
  3062      if( rc!=SQLITE_OK ) goto err;
  3063  
  3064      /* Reusing lowest-number deleted row keeps the index smaller. */
  3065      if( iSegmentRow<iIndexRow ) iIndexRow = iSegmentRow;
  3066  
  3067      /* doclist contains the newer data, so accumulate it over old.
  3068      ** Then steal accumulated data for doclist.
  3069      */
  3070      docListAccumulate(&old, &doclist);
  3071      docListDestroy(&doclist);
  3072      doclist = old;
  3073  
  3074      iSegment++;
  3075    }
  3076  
  3077   err:
  3078    docListDestroy(&doclist);
  3079    return rc;
  3080  }
  3081  
  3082  /* Add doclists for all terms in [pValues] to the hash table [terms]. */
  3083  static int insertTerms(fulltext_vtab *v, fts1Hash *terms, sqlite_int64 iRowid,
  3084                  sqlite3_value **pValues){
  3085    int i;
  3086    for(i = 0; i < v->nColumn ; ++i){
  3087      char *zText = (char*)sqlite3_value_text(pValues[i]);
  3088      int rc = buildTerms(v, terms, iRowid, zText, i);
  3089      if( rc!=SQLITE_OK ) return rc;
  3090    }
  3091    return SQLITE_OK;
  3092  }
  3093  
  3094  /* Add empty doclists for all terms in the given row's content to the hash
  3095   * table [pTerms]. */
  3096  static int deleteTerms(fulltext_vtab *v, fts1Hash *pTerms, sqlite_int64 iRowid){
  3097    const char **pValues;
  3098    int i;
  3099  
  3100    int rc = content_select(v, iRowid, &pValues);
  3101    if( rc!=SQLITE_OK ) return rc;
  3102  
  3103    for(i = 0 ; i < v->nColumn; ++i) {
  3104      rc = buildTerms(v, pTerms, iRowid, pValues[i], -1);
  3105      if( rc!=SQLITE_OK ) break;
  3106    }
  3107  
  3108    freeStringArray(v->nColumn, pValues);
  3109    return SQLITE_OK;
  3110  }
  3111  
  3112  /* Insert a row into the %_content table; set *piRowid to be the ID of the
  3113   * new row.  Fill [pTerms] with new doclists for the %_term table. */
  3114  static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid,
  3115                          sqlite3_value **pValues,
  3116                          sqlite_int64 *piRowid, fts1Hash *pTerms){
  3117    int rc;
  3118  
  3119    rc = content_insert(v, pRequestRowid, pValues);  /* execute an SQL INSERT */
  3120    if( rc!=SQLITE_OK ) return rc;
  3121    *piRowid = sqlite3_last_insert_rowid(v->db);
  3122    return insertTerms(v, pTerms, *piRowid, pValues);
  3123  }
  3124  
  3125  /* Delete a row from the %_content table; fill [pTerms] with empty doclists
  3126   * to be written to the %_term table. */
  3127  static int index_delete(fulltext_vtab *v, sqlite_int64 iRow, fts1Hash *pTerms){
  3128    int rc = deleteTerms(v, pTerms, iRow);
  3129    if( rc!=SQLITE_OK ) return rc;
  3130    return content_delete(v, iRow);  /* execute an SQL DELETE */
  3131  }
  3132  
  3133  /* Update a row in the %_content table; fill [pTerms] with new doclists for the
  3134   * %_term table. */
  3135  static int index_update(fulltext_vtab *v, sqlite_int64 iRow,
  3136                          sqlite3_value **pValues, fts1Hash *pTerms){
  3137    /* Generate an empty doclist for each term that previously appeared in this
  3138     * row. */
  3139    int rc = deleteTerms(v, pTerms, iRow);
  3140    if( rc!=SQLITE_OK ) return rc;
  3141  
  3142    rc = content_update(v, pValues, iRow);  /* execute an SQL UPDATE */
  3143    if( rc!=SQLITE_OK ) return rc;
  3144  
  3145    /* Now add positions for terms which appear in the updated row. */
  3146    return insertTerms(v, pTerms, iRow, pValues);
  3147  }
  3148  
  3149  /* This function implements the xUpdate callback; it is the top-level entry
  3150   * point for inserting, deleting or updating a row in a full-text table. */
  3151  static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg,
  3152                     sqlite_int64 *pRowid){
  3153    fulltext_vtab *v = (fulltext_vtab *) pVtab;
  3154    fts1Hash terms;   /* maps term string -> PosList */
  3155    int rc;
  3156    fts1HashElem *e;
  3157  
  3158    TRACE(("FTS1 Update %p\n", pVtab));
  3159    
  3160    fts1HashInit(&terms, FTS1_HASH_STRING, 1);
  3161  
  3162    if( nArg<2 ){
  3163      rc = index_delete(v, sqlite3_value_int64(ppArg[0]), &terms);
  3164    } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){
  3165      /* An update:
  3166       * ppArg[0] = old rowid
  3167       * ppArg[1] = new rowid
  3168       * ppArg[2..2+v->nColumn-1] = values
  3169       * ppArg[2+v->nColumn] = value for magic column (we ignore this)
  3170       */
  3171      sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]);
  3172      if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER ||
  3173        sqlite3_value_int64(ppArg[1]) != rowid ){
  3174        rc = SQLITE_ERROR;  /* we don't allow changing the rowid */
  3175      } else {
  3176        assert( nArg==2+v->nColumn+1);
  3177        rc = index_update(v, rowid, &ppArg[2], &terms);
  3178      }
  3179    } else {
  3180      /* An insert:
  3181       * ppArg[1] = requested rowid
  3182       * ppArg[2..2+v->nColumn-1] = values
  3183       * ppArg[2+v->nColumn] = value for magic column (we ignore this)
  3184       */
  3185      assert( nArg==2+v->nColumn+1);
  3186      rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms);
  3187    }
  3188  
  3189    if( rc==SQLITE_OK ){
  3190      /* Write updated doclists to disk. */
  3191      for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
  3192        DocList *p = fts1HashData(e);
  3193        rc = index_insert_term(v, fts1HashKey(e), fts1HashKeysize(e), p);
  3194        if( rc!=SQLITE_OK ) break;
  3195      }
  3196    }
  3197  
  3198    /* clean up */
  3199    for(e=fts1HashFirst(&terms); e; e=fts1HashNext(e)){
  3200      DocList *p = fts1HashData(e);
  3201      docListDelete(p);
  3202    }
  3203    fts1HashClear(&terms);
  3204  
  3205    return rc;
  3206  }
  3207  
  3208  /*
  3209  ** Implementation of the snippet() function for FTS1
  3210  */
  3211  static void snippetFunc(
  3212    sqlite3_context *pContext,
  3213    int argc,
  3214    sqlite3_value **argv
  3215  ){
  3216    fulltext_cursor *pCursor;
  3217    if( argc<1 ) return;
  3218    if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
  3219        sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
  3220      sqlite3_result_error(pContext, "illegal first argument to html_snippet",-1);
  3221    }else{
  3222      const char *zStart = "<b>";
  3223      const char *zEnd = "</b>";
  3224      const char *zEllipsis = "<b>...</b>";
  3225      memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
  3226      if( argc>=2 ){
  3227        zStart = (const char*)sqlite3_value_text(argv[1]);
  3228        if( argc>=3 ){
  3229          zEnd = (const char*)sqlite3_value_text(argv[2]);
  3230          if( argc>=4 ){
  3231            zEllipsis = (const char*)sqlite3_value_text(argv[3]);
  3232          }
  3233        }
  3234      }
  3235      snippetAllOffsets(pCursor);
  3236      snippetText(pCursor, zStart, zEnd, zEllipsis);
  3237      sqlite3_result_text(pContext, pCursor->snippet.zSnippet,
  3238                          pCursor->snippet.nSnippet, SQLITE_STATIC);
  3239    }
  3240  }
  3241  
  3242  /*
  3243  ** Implementation of the offsets() function for FTS1
  3244  */
  3245  static void snippetOffsetsFunc(
  3246    sqlite3_context *pContext,
  3247    int argc,
  3248    sqlite3_value **argv
  3249  ){
  3250    fulltext_cursor *pCursor;
  3251    if( argc<1 ) return;
  3252    if( sqlite3_value_type(argv[0])!=SQLITE_BLOB ||
  3253        sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){
  3254      sqlite3_result_error(pContext, "illegal first argument to offsets",-1);
  3255    }else{
  3256      memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor));
  3257      snippetAllOffsets(pCursor);
  3258      snippetOffsetText(&pCursor->snippet);
  3259      sqlite3_result_text(pContext,
  3260                          pCursor->snippet.zOffset, pCursor->snippet.nOffset,
  3261                          SQLITE_STATIC);
  3262    }
  3263  }
  3264  
  3265  /*
  3266  ** This routine implements the xFindFunction method for the FTS1
  3267  ** virtual table.
  3268  */
  3269  static int fulltextFindFunction(
  3270    sqlite3_vtab *pVtab,
  3271    int nArg,
  3272    const char *zName,
  3273    void (**pxFunc)(sqlite3_context*,int,sqlite3_value**),
  3274    void **ppArg
  3275  ){
  3276    if( strcmp(zName,"snippet")==0 ){
  3277      *pxFunc = snippetFunc;
  3278      return 1;
  3279    }else if( strcmp(zName,"offsets")==0 ){
  3280      *pxFunc = snippetOffsetsFunc;
  3281      return 1;
  3282    }
  3283    return 0;
  3284  }
  3285  
  3286  /*
  3287  ** Rename an fts1 table.
  3288  */
  3289  static int fulltextRename(
  3290    sqlite3_vtab *pVtab,
  3291    const char *zName
  3292  ){
  3293    fulltext_vtab *p = (fulltext_vtab *)pVtab;
  3294    int rc = SQLITE_NOMEM;
  3295    char *zSql = sqlite3_mprintf(
  3296      "ALTER TABLE %Q.'%q_content'  RENAME TO '%q_content';"
  3297      "ALTER TABLE %Q.'%q_term' RENAME TO '%q_term';"
  3298      , p->zDb, p->zName, zName
  3299      , p->zDb, p->zName, zName
  3300    );
  3301    if( zSql ){
  3302      rc = sqlite3_exec(p->db, zSql, 0, 0, 0);
  3303      sqlite3_free(zSql);
  3304    }
  3305    return rc;
  3306  }
  3307  
  3308  static const sqlite3_module fulltextModule = {
  3309    /* iVersion      */ 0,
  3310    /* xCreate       */ fulltextCreate,
  3311    /* xConnect      */ fulltextConnect,
  3312    /* xBestIndex    */ fulltextBestIndex,
  3313    /* xDisconnect   */ fulltextDisconnect,
  3314    /* xDestroy      */ fulltextDestroy,
  3315    /* xOpen         */ fulltextOpen,
  3316    /* xClose        */ fulltextClose,
  3317    /* xFilter       */ fulltextFilter,
  3318    /* xNext         */ fulltextNext,
  3319    /* xEof          */ fulltextEof,
  3320    /* xColumn       */ fulltextColumn,
  3321    /* xRowid        */ fulltextRowid,
  3322    /* xUpdate       */ fulltextUpdate,
  3323    /* xBegin        */ 0, 
  3324    /* xSync         */ 0,
  3325    /* xCommit       */ 0,
  3326    /* xRollback     */ 0,
  3327    /* xFindFunction */ fulltextFindFunction,
  3328    /* xRename       */ fulltextRename,
  3329  };
  3330  
  3331  int sqlite3Fts1Init(sqlite3 *db){
  3332    sqlite3_overload_function(db, "snippet", -1);
  3333    sqlite3_overload_function(db, "offsets", -1);
  3334    return sqlite3_create_module(db, "fts1", &fulltextModule, 0);
  3335  }
  3336  
  3337  #if !SQLITE_CORE
  3338  #ifdef _WIN32
  3339  __declspec(dllexport)
  3340  #endif
  3341  int sqlite3_fts1_init(sqlite3 *db, char **pzErrMsg,
  3342                        const sqlite3_api_routines *pApi){
  3343    SQLITE_EXTENSION_INIT2(pApi)
  3344    return sqlite3Fts1Init(db);
  3345  }
  3346  #endif
  3347  
  3348  #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS1) */