modernc.org/cc@v1.0.1/v2/testdata/_sqlite/ext/misc/spellfix.c (about)

     1  /*
     2  ** 2012 April 10
     3  **
     4  ** The author disclaims copyright to this source code.  In place of
     5  ** a legal notice, here is a blessing:
     6  **
     7  **    May you do good and not evil.
     8  **    May you find forgiveness for yourself and forgive others.
     9  **    May you share freely, never taking more than you give.
    10  **
    11  *************************************************************************
    12  **
    13  ** This module implements the spellfix1 VIRTUAL TABLE that can be used
    14  ** to search a large vocabulary for close matches.  See separate
    15  ** documentation (http://www.sqlite.org/spellfix1.html) for details.
    16  */
    17  #include "sqlite3ext.h"
    18  SQLITE_EXTENSION_INIT1
    19  
    20  #ifndef SQLITE_AMALGAMATION
    21  # include <string.h>
    22  # include <stdio.h>
    23  # include <stdlib.h>
    24  # include <assert.h>
    25  # define ALWAYS(X)  1
    26  # define NEVER(X)   0
    27    typedef unsigned char u8;
    28    typedef unsigned short u16;
    29  #endif
    30  #include <ctype.h>
    31  
    32  #ifndef SQLITE_OMIT_VIRTUALTABLE
    33  
    34  /*
    35  ** Character classes for ASCII characters:
    36  **
    37  **   0   ''        Silent letters:   H W
    38  **   1   'A'       Any vowel:   A E I O U (Y)
    39  **   2   'B'       A bilabeal stop or fricative:  B F P V W
    40  **   3   'C'       Other fricatives or back stops:  C G J K Q S X Z
    41  **   4   'D'       Alveolar stops:  D T
    42  **   5   'H'       Letter H at the beginning of a word
    43  **   6   'L'       Glide:  L
    44  **   7   'R'       Semivowel:  R
    45  **   8   'M'       Nasals:  M N
    46  **   9   'Y'       Letter Y at the beginning of a word.
    47  **   10  '9'       Digits: 0 1 2 3 4 5 6 7 8 9
    48  **   11  ' '       White space
    49  **   12  '?'       Other.
    50  */
    51  #define CCLASS_SILENT         0
    52  #define CCLASS_VOWEL          1
    53  #define CCLASS_B              2
    54  #define CCLASS_C              3
    55  #define CCLASS_D              4
    56  #define CCLASS_H              5
    57  #define CCLASS_L              6
    58  #define CCLASS_R              7
    59  #define CCLASS_M              8
    60  #define CCLASS_Y              9
    61  #define CCLASS_DIGIT         10
    62  #define CCLASS_SPACE         11
    63  #define CCLASS_OTHER         12
    64  
    65  /*
    66  ** The following table gives the character class for non-initial ASCII
    67  ** characters.
    68  */
    69  static const unsigned char midClass[] = {
    70   /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
    71   /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
    72   /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
    73   /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
    74   /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
    75   /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
    76   /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
    77   /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
    78   /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
    79   /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
    80   /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
    81   /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
    82   /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
    83   /* ' */ CCLASS_SILENT,   /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
    84   /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
    85   /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
    86   /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
    87   /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
    88   /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
    89   /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
    90   /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
    91   /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
    92   /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
    93   /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
    94   /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
    95   /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
    96   /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
    97   /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
    98   /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
    99   /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_VOWEL,
   100   /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
   101   /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
   102   /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
   103   /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
   104   /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
   105   /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
   106   /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
   107   /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
   108   /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
   109   /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
   110   /* x */ CCLASS_C,        /* y */ CCLASS_VOWEL,   /* z */ CCLASS_C,
   111   /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
   112   /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   
   113  };
   114  /* 
   115  ** This tables gives the character class for ASCII characters that form the
   116  ** initial character of a word.  The only difference from midClass is with
   117  ** the letters H, W, and Y.
   118  */
   119  static const unsigned char initClass[] = {
   120   /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
   121   /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
   122   /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
   123   /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
   124   /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
   125   /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
   126   /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
   127   /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
   128   /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
   129   /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
   130   /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
   131   /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
   132   /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
   133   /* ' */ CCLASS_OTHER,    /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
   134   /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
   135   /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
   136   /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
   137   /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
   138   /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
   139   /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
   140   /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
   141   /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
   142   /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
   143   /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
   144   /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
   145   /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
   146   /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
   147   /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
   148   /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
   149   /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_Y,
   150   /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
   151   /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
   152   /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
   153   /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
   154   /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
   155   /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
   156   /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
   157   /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
   158   /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
   159   /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
   160   /* x */ CCLASS_C,        /* y */ CCLASS_Y,       /* z */ CCLASS_C,
   161   /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
   162   /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   
   163  };
   164  
   165  /*
   166  ** Mapping from the character class number (0-13) to a symbol for each
   167  ** character class.  Note that initClass[] can be used to map the class
   168  ** symbol back into the class number.
   169  */
   170  static const unsigned char className[] = ".ABCDHLRMY9 ?";
   171  
   172  /*
   173  ** Generate a "phonetic hash" from a string of ASCII characters
   174  ** in zIn[0..nIn-1].
   175  **
   176  **   * Map characters by character class as defined above.
   177  **   * Omit double-letters
   178  **   * Omit vowels beside R and L
   179  **   * Omit T when followed by CH
   180  **   * Omit W when followed by R
   181  **   * Omit D when followed by J or G
   182  **   * Omit K in KN or G in GN at the beginning of a word
   183  **
   184  ** Space to hold the result is obtained from sqlite3_malloc()
   185  **
   186  ** Return NULL if memory allocation fails.  
   187  */
   188  static unsigned char *phoneticHash(const unsigned char *zIn, int nIn){
   189    unsigned char *zOut = sqlite3_malloc64( nIn + 1 );
   190    int i;
   191    int nOut = 0;
   192    char cPrev = 0x77;
   193    char cPrevX = 0x77;
   194    const unsigned char *aClass = initClass;
   195  
   196    if( zOut==0 ) return 0;
   197    if( nIn>2 ){
   198      switch( zIn[0] ){
   199        case 'g': 
   200        case 'k': {
   201          if( zIn[1]=='n' ){ zIn++; nIn--; }
   202          break;
   203        }
   204      }
   205    }
   206    for(i=0; i<nIn; i++){
   207      unsigned char c = zIn[i];
   208      if( i+1<nIn ){
   209        if( c=='w' && zIn[i+1]=='r' ) continue;
   210        if( c=='d' && (zIn[i+1]=='j' || zIn[i+1]=='g') ) continue;
   211        if( i+2<nIn ){
   212          if( c=='t' && zIn[i+1]=='c' && zIn[i+2]=='h' ) continue;
   213        }
   214      }
   215      c = aClass[c&0x7f];
   216      if( c==CCLASS_SPACE ) continue;
   217      if( c==CCLASS_OTHER && cPrev!=CCLASS_DIGIT ) continue;
   218      aClass = midClass;
   219      if( c==CCLASS_VOWEL && (cPrevX==CCLASS_R || cPrevX==CCLASS_L) ){
   220         continue; /* No vowels beside L or R */ 
   221      }
   222      if( (c==CCLASS_R || c==CCLASS_L) && cPrevX==CCLASS_VOWEL ){
   223         nOut--;   /* No vowels beside L or R */
   224      }
   225      cPrev = c;
   226      if( c==CCLASS_SILENT ) continue;
   227      cPrevX = c;
   228      c = className[c];
   229      assert( nOut>=0 );
   230      if( nOut==0 || c!=zOut[nOut-1] ) zOut[nOut++] = c;
   231    }
   232    zOut[nOut] = 0;
   233    return zOut;
   234  }
   235  
   236  /*
   237  ** This is an SQL function wrapper around phoneticHash().  See
   238  ** the description of phoneticHash() for additional information.
   239  */
   240  static void phoneticHashSqlFunc(
   241    sqlite3_context *context,
   242    int argc,
   243    sqlite3_value **argv
   244  ){
   245    const unsigned char *zIn;
   246    unsigned char *zOut;
   247  
   248    zIn = sqlite3_value_text(argv[0]);
   249    if( zIn==0 ) return;
   250    zOut = phoneticHash(zIn, sqlite3_value_bytes(argv[0]));
   251    if( zOut==0 ){
   252      sqlite3_result_error_nomem(context);
   253    }else{
   254      sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
   255    }
   256  }
   257  
   258  /*
   259  ** Return the character class number for a character given its
   260  ** context.
   261  */
   262  static char characterClass(char cPrev, char c){
   263    return cPrev==0 ? initClass[c&0x7f] : midClass[c&0x7f];
   264  }
   265  
   266  /*
   267  ** Return the cost of inserting or deleting character c immediately
   268  ** following character cPrev.  If cPrev==0, that means c is the first
   269  ** character of the word.
   270  */
   271  static int insertOrDeleteCost(char cPrev, char c, char cNext){
   272    char classC = characterClass(cPrev, c);
   273    char classCprev;
   274  
   275    if( classC==CCLASS_SILENT ){
   276      /* Insert or delete "silent" characters such as H or W */
   277      return 1;
   278    }
   279    if( cPrev==c ){
   280      /* Repeated characters, or miss a repeat */
   281      return 10;
   282    }
   283    if( classC==CCLASS_VOWEL && (cPrev=='r' || cNext=='r') ){
   284      return 20;  /* Insert a vowel before or after 'r' */
   285    }
   286    classCprev = characterClass(cPrev, cPrev);
   287    if( classC==classCprev ){
   288      if( classC==CCLASS_VOWEL ){
   289        /* Remove or add a new vowel to a vowel cluster */
   290        return 15;
   291      }else{
   292        /* Remove or add a consonant not in the same class */
   293        return 50;
   294      }
   295    }
   296  
   297    /* any other character insertion or deletion */
   298    return 100;
   299  }
   300  
   301  /*
   302  ** Divide the insertion cost by this factor when appending to the
   303  ** end of the word.
   304  */
   305  #define FINAL_INS_COST_DIV  4
   306  
   307  /*
   308  ** Return the cost of substituting cTo in place of cFrom assuming
   309  ** the previous character is cPrev.  If cPrev==0 then cTo is the first
   310  ** character of the word.
   311  */
   312  static int substituteCost(char cPrev, char cFrom, char cTo){
   313    char classFrom, classTo;
   314    if( cFrom==cTo ){
   315      /* Exact match */
   316      return 0;
   317    }
   318    if( cFrom==(cTo^0x20) && ((cTo>='A' && cTo<='Z') || (cTo>='a' && cTo<='z')) ){
   319      /* differ only in case */
   320      return 0;
   321    }
   322    classFrom = characterClass(cPrev, cFrom);
   323    classTo = characterClass(cPrev, cTo);
   324    if( classFrom==classTo ){
   325      /* Same character class */
   326      return 40;
   327    }
   328    if( classFrom>=CCLASS_B && classFrom<=CCLASS_Y
   329        && classTo>=CCLASS_B && classTo<=CCLASS_Y ){
   330      /* Convert from one consonant to another, but in a different class */
   331      return 75;
   332    }
   333    /* Any other subsitution */
   334    return 100;
   335  }
   336  
   337  /*
   338  ** Given two strings zA and zB which are pure ASCII, return the cost
   339  ** of transforming zA into zB.  If zA ends with '*' assume that it is
   340  ** a prefix of zB and give only minimal penalty for extra characters
   341  ** on the end of zB.
   342  **
   343  ** Smaller numbers mean a closer match.
   344  **
   345  ** Negative values indicate an error:
   346  **    -1  One of the inputs is NULL
   347  **    -2  Non-ASCII characters on input
   348  **    -3  Unable to allocate memory 
   349  **
   350  ** If pnMatch is not NULL, then *pnMatch is set to the number of bytes
   351  ** of zB that matched the pattern in zA. If zA does not end with a '*',
   352  ** then this value is always the number of bytes in zB (i.e. strlen(zB)).
   353  ** If zA does end in a '*', then it is the number of bytes in the prefix
   354  ** of zB that was deemed to match zA.
   355  */
   356  static int editdist1(const char *zA, const char *zB, int *pnMatch){
   357    int nA, nB;            /* Number of characters in zA[] and zB[] */
   358    int xA, xB;            /* Loop counters for zA[] and zB[] */
   359    char cA = 0, cB;       /* Current character of zA and zB */
   360    char cAprev, cBprev;   /* Previous character of zA and zB */
   361    char cAnext, cBnext;   /* Next character in zA and zB */
   362    int d;                 /* North-west cost value */
   363    int dc = 0;            /* North-west character value */
   364    int res;               /* Final result */
   365    int *m;                /* The cost matrix */
   366    char *cx;              /* Corresponding character values */
   367    int *toFree = 0;       /* Malloced space */
   368    int nMatch = 0;
   369    int mStack[60+15];     /* Stack space to use if not too much is needed */
   370  
   371    /* Early out if either input is NULL */
   372    if( zA==0 || zB==0 ) return -1;
   373  
   374    /* Skip any common prefix */
   375    while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; nMatch++; }
   376    if( pnMatch ) *pnMatch = nMatch;
   377    if( zA[0]==0 && zB[0]==0 ) return 0;
   378  
   379  #if 0
   380    printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' ');
   381  #endif
   382  
   383    /* Verify input strings and measure their lengths */
   384    for(nA=0; zA[nA]; nA++){
   385      if( zA[nA]&0x80 ) return -2;
   386    }
   387    for(nB=0; zB[nB]; nB++){
   388      if( zB[nB]&0x80 ) return -2;
   389    }
   390  
   391    /* Special processing if either string is empty */
   392    if( nA==0 ){
   393      cBprev = (char)dc;
   394      for(xB=res=0; (cB = zB[xB])!=0; xB++){
   395        res += insertOrDeleteCost(cBprev, cB, zB[xB+1])/FINAL_INS_COST_DIV;
   396        cBprev = cB;
   397      }
   398      return res;
   399    }
   400    if( nB==0 ){
   401      cAprev = (char)dc;
   402      for(xA=res=0; (cA = zA[xA])!=0; xA++){
   403        res += insertOrDeleteCost(cAprev, cA, zA[xA+1]);
   404        cAprev = cA;
   405      }
   406      return res;
   407    }
   408  
   409    /* A is a prefix of B */
   410    if( zA[0]=='*' && zA[1]==0 ) return 0;
   411  
   412    /* Allocate and initialize the Wagner matrix */
   413    if( nB<(sizeof(mStack)*4)/(sizeof(mStack[0])*5) ){
   414      m = mStack;
   415    }else{
   416      m = toFree = sqlite3_malloc64( (nB+1)*5*sizeof(m[0])/4 );
   417      if( m==0 ) return -3;
   418    }
   419    cx = (char*)&m[nB+1];
   420  
   421    /* Compute the Wagner edit distance */
   422    m[0] = 0;
   423    cx[0] = (char)dc;
   424    cBprev = (char)dc;
   425    for(xB=1; xB<=nB; xB++){
   426      cBnext = zB[xB];
   427      cB = zB[xB-1];
   428      cx[xB] = cB;
   429      m[xB] = m[xB-1] + insertOrDeleteCost(cBprev, cB, cBnext);
   430      cBprev = cB;
   431    }
   432    cAprev = (char)dc;
   433    for(xA=1; xA<=nA; xA++){
   434      int lastA = (xA==nA);
   435      cA = zA[xA-1];
   436      cAnext = zA[xA];
   437      if( cA=='*' && lastA ) break;
   438      d = m[0];
   439      dc = cx[0];
   440      m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext);
   441      cBprev = 0;
   442      for(xB=1; xB<=nB; xB++){
   443        int totalCost, insCost, delCost, subCost, ncx;
   444        cB = zB[xB-1];
   445        cBnext = zB[xB];
   446  
   447        /* Cost to insert cB */
   448        insCost = insertOrDeleteCost(cx[xB-1], cB, cBnext);
   449        if( lastA ) insCost /= FINAL_INS_COST_DIV;
   450  
   451        /* Cost to delete cA */
   452        delCost = insertOrDeleteCost(cx[xB], cA, cBnext);
   453  
   454        /* Cost to substitute cA->cB */
   455        subCost = substituteCost(cx[xB-1], cA, cB);
   456  
   457        /* Best cost */
   458        totalCost = insCost + m[xB-1];
   459        ncx = cB;
   460        if( (delCost + m[xB])<totalCost ){
   461          totalCost = delCost + m[xB];
   462          ncx = cA;
   463        }
   464        if( (subCost + d)<totalCost ){
   465          totalCost = subCost + d;
   466        }
   467  
   468  #if 0
   469        printf("%d,%d d=%4d u=%4d r=%4d dc=%c cA=%c cB=%c"
   470               " ins=%4d del=%4d sub=%4d t=%4d ncx=%c\n",
   471               xA, xB, d, m[xB], m[xB-1], dc?dc:' ', cA, cB,
   472               insCost, delCost, subCost, totalCost, ncx?ncx:' ');
   473  #endif
   474  
   475        /* Update the matrix */
   476        d = m[xB];
   477        dc = cx[xB];
   478        m[xB] = totalCost;
   479        cx[xB] = (char)ncx;
   480        cBprev = cB;
   481      }
   482      cAprev = cA;
   483    }
   484  
   485    /* Free the wagner matrix and return the result */
   486    if( cA=='*' ){
   487      res = m[1];
   488      for(xB=1; xB<=nB; xB++){
   489        if( m[xB]<res ){
   490          res = m[xB];
   491          if( pnMatch ) *pnMatch = xB+nMatch;
   492        }
   493      }
   494    }else{
   495      res = m[nB];
   496      /* In the current implementation, pnMatch is always NULL if zA does
   497      ** not end in "*" */
   498      assert( pnMatch==0 );
   499    }
   500    sqlite3_free(toFree);
   501    return res;
   502  }
   503  
   504  /*
   505  ** Function:    editdist(A,B)
   506  **
   507  ** Return the cost of transforming string A into string B.  Both strings
   508  ** must be pure ASCII text.  If A ends with '*' then it is assumed to be
   509  ** a prefix of B and extra characters on the end of B have minimal additional
   510  ** cost.
   511  */
   512  static void editdistSqlFunc(
   513    sqlite3_context *context,
   514    int argc,
   515    sqlite3_value **argv
   516  ){
   517    int res = editdist1(
   518                      (const char*)sqlite3_value_text(argv[0]),
   519                      (const char*)sqlite3_value_text(argv[1]),
   520                      0);
   521    if( res<0 ){
   522      if( res==(-3) ){
   523        sqlite3_result_error_nomem(context);
   524      }else if( res==(-2) ){
   525        sqlite3_result_error(context, "non-ASCII input to editdist()", -1);
   526      }else{
   527        sqlite3_result_error(context, "NULL input to editdist()", -1);
   528      }
   529    }else{ 
   530      sqlite3_result_int(context, res);
   531    }
   532  }
   533  
   534  /* End of the fixed-cost edit distance implementation
   535  ******************************************************************************
   536  *****************************************************************************
   537  ** Begin: Configurable cost unicode edit distance routines
   538  */
   539  /* Forward declaration of structures */
   540  typedef struct EditDist3Cost EditDist3Cost;
   541  typedef struct EditDist3Config EditDist3Config;
   542  typedef struct EditDist3Point EditDist3Point;
   543  typedef struct EditDist3From EditDist3From;
   544  typedef struct EditDist3FromString EditDist3FromString;
   545  typedef struct EditDist3To EditDist3To;
   546  typedef struct EditDist3ToString EditDist3ToString;
   547  typedef struct EditDist3Lang EditDist3Lang;
   548  
   549  
   550  /*
   551  ** An entry in the edit cost table
   552  */
   553  struct EditDist3Cost {
   554    EditDist3Cost *pNext;     /* Next cost element */
   555    u8 nFrom;                 /* Number of bytes in aFrom */
   556    u8 nTo;                   /* Number of bytes in aTo */
   557    u16 iCost;                /* Cost of this transformation */
   558    char a[4]    ;            /* FROM string followed by TO string */
   559    /* Additional TO and FROM string bytes appended as necessary */
   560  };
   561  
   562  /*
   563  ** Edit costs for a particular language ID 
   564  */
   565  struct EditDist3Lang {
   566    int iLang;             /* Language ID */
   567    int iInsCost;          /* Default insertion cost */
   568    int iDelCost;          /* Default deletion cost */
   569    int iSubCost;          /* Default substitution cost */
   570    EditDist3Cost *pCost;  /* Costs */
   571  };
   572  
   573  
   574  /*
   575  ** The default EditDist3Lang object, with default costs.
   576  */
   577  static const EditDist3Lang editDist3Lang = { 0, 100, 100, 150, 0 };
   578  
   579  /*
   580  ** Complete configuration
   581  */
   582  struct EditDist3Config {
   583    int nLang;             /* Number of language IDs.  Size of a[] */
   584    EditDist3Lang *a;      /* One for each distinct language ID */
   585  };
   586  
   587  /*
   588  ** Extra information about each character in the FROM string.
   589  */
   590  struct EditDist3From {
   591    int nSubst;              /* Number of substitution cost entries */
   592    int nDel;                /* Number of deletion cost entries */
   593    int nByte;               /* Number of bytes in this character */
   594    EditDist3Cost **apSubst; /* Array of substitution costs for this element */
   595    EditDist3Cost **apDel;   /* Array of deletion cost entries */
   596  };
   597  
   598  /*
   599  ** A precompiled FROM string.
   600  *
   601  ** In the common case we expect the FROM string to be reused multiple times.
   602  ** In other words, the common case will be to measure the edit distance
   603  ** from a single origin string to multiple target strings.
   604  */
   605  struct EditDist3FromString {
   606    char *z;                 /* The complete text of the FROM string */
   607    int n;                   /* Number of characters in the FROM string */
   608    int isPrefix;            /* True if ends with '*' character */
   609    EditDist3From *a;        /* Extra info about each char of the FROM string */
   610  };
   611  
   612  /*
   613  ** Extra information about each character in the TO string.
   614  */
   615  struct EditDist3To {
   616    int nIns;                /* Number of insertion cost entries */
   617    int nByte;               /* Number of bytes in this character */
   618    EditDist3Cost **apIns;   /* Array of deletion cost entries */
   619  };
   620  
   621  /*
   622  ** A precompiled FROM string
   623  */
   624  struct EditDist3ToString {
   625    char *z;                 /* The complete text of the TO string */
   626    int n;                   /* Number of characters in the TO string */
   627    EditDist3To *a;          /* Extra info about each char of the TO string */
   628  };
   629  
   630  /*
   631  ** Clear or delete an instance of the object that records all edit-distance
   632  ** weights.
   633  */
   634  static void editDist3ConfigClear(EditDist3Config *p){
   635    int i;
   636    if( p==0 ) return;
   637    for(i=0; i<p->nLang; i++){
   638      EditDist3Cost *pCost, *pNext;
   639      pCost = p->a[i].pCost;
   640      while( pCost ){
   641        pNext = pCost->pNext;
   642        sqlite3_free(pCost);
   643        pCost = pNext;
   644      }
   645    }
   646    sqlite3_free(p->a);
   647    memset(p, 0, sizeof(*p));
   648  }
   649  static void editDist3ConfigDelete(void *pIn){
   650    EditDist3Config *p = (EditDist3Config*)pIn;
   651    editDist3ConfigClear(p);
   652    sqlite3_free(p);
   653  }
   654  
   655  /*
   656  ** Load all edit-distance weights from a table.
   657  */
   658  static int editDist3ConfigLoad(
   659    EditDist3Config *p,      /* The edit distance configuration to load */
   660    sqlite3 *db,            /* Load from this database */
   661    const char *zTable      /* Name of the table from which to load */
   662  ){
   663    sqlite3_stmt *pStmt;
   664    int rc, rc2;
   665    char *zSql;
   666    int iLangPrev = -9999;
   667    EditDist3Lang *pLang = 0;
   668  
   669    zSql = sqlite3_mprintf("SELECT iLang, cFrom, cTo, iCost"
   670                           " FROM \"%w\" WHERE iLang>=0 ORDER BY iLang", zTable);
   671    if( zSql==0 ) return SQLITE_NOMEM;
   672    rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
   673    sqlite3_free(zSql);
   674    if( rc ) return rc;
   675    editDist3ConfigClear(p);
   676    while( sqlite3_step(pStmt)==SQLITE_ROW ){
   677      int iLang = sqlite3_column_int(pStmt, 0);
   678      const char *zFrom = (const char*)sqlite3_column_text(pStmt, 1);
   679      int nFrom = zFrom ? sqlite3_column_bytes(pStmt, 1) : 0;
   680      const char *zTo = (const char*)sqlite3_column_text(pStmt, 2);
   681      int nTo = zTo ? sqlite3_column_bytes(pStmt, 2) : 0;
   682      int iCost = sqlite3_column_int(pStmt, 3);
   683  
   684      assert( zFrom!=0 || nFrom==0 );
   685      assert( zTo!=0 || nTo==0 );
   686      if( nFrom>100 || nTo>100 ) continue;
   687      if( iCost<0 ) continue;
   688      if( pLang==0 || iLang!=iLangPrev ){
   689        EditDist3Lang *pNew;
   690        pNew = sqlite3_realloc64(p->a, (p->nLang+1)*sizeof(p->a[0]));
   691        if( pNew==0 ){ rc = SQLITE_NOMEM; break; }
   692        p->a = pNew;
   693        pLang = &p->a[p->nLang];
   694        p->nLang++;
   695        pLang->iLang = iLang;
   696        pLang->iInsCost = 100;
   697        pLang->iDelCost = 100;
   698        pLang->iSubCost = 150;
   699        pLang->pCost = 0;
   700        iLangPrev = iLang;
   701      }
   702      if( nFrom==1 && zFrom[0]=='?' && nTo==0 ){
   703        pLang->iDelCost = iCost;
   704      }else if( nFrom==0 && nTo==1 && zTo[0]=='?' ){
   705        pLang->iInsCost = iCost;
   706      }else if( nFrom==1 && nTo==1 && zFrom[0]=='?' && zTo[0]=='?' ){
   707        pLang->iSubCost = iCost;
   708      }else{
   709        EditDist3Cost *pCost;
   710        int nExtra = nFrom + nTo - 4;
   711        if( nExtra<0 ) nExtra = 0;
   712        pCost = sqlite3_malloc64( sizeof(*pCost) + nExtra );
   713        if( pCost==0 ){ rc = SQLITE_NOMEM; break; }
   714        pCost->nFrom = (u8)nFrom;
   715        pCost->nTo = (u8)nTo;
   716        pCost->iCost = (u16)iCost;
   717        memcpy(pCost->a, zFrom, nFrom);
   718        memcpy(pCost->a + nFrom, zTo, nTo);
   719        pCost->pNext = pLang->pCost;
   720        pLang->pCost = pCost; 
   721      }
   722    }
   723    rc2 = sqlite3_finalize(pStmt);
   724    if( rc==SQLITE_OK ) rc = rc2;
   725    return rc;
   726  }
   727  
   728  /*
   729  ** Return the length (in bytes) of a utf-8 character.  Or return a maximum
   730  ** of N.
   731  */
   732  static int utf8Len(unsigned char c, int N){
   733    int len = 1;
   734    if( c>0x7f ){
   735      if( (c&0xe0)==0xc0 ){
   736        len = 2;
   737      }else if( (c&0xf0)==0xe0 ){
   738        len = 3;
   739      }else{
   740        len = 4;
   741      }
   742    }
   743    if( len>N ) len = N;
   744    return len;
   745  }
   746  
   747  /*
   748  ** Return TRUE (non-zero) if the To side of the given cost matches
   749  ** the given string.
   750  */
   751  static int matchTo(EditDist3Cost *p, const char *z, int n){
   752    if( p->nTo>n ) return 0;
   753    if( strncmp(p->a+p->nFrom, z, p->nTo)!=0 ) return 0;
   754    return 1;
   755  }
   756  
   757  /*
   758  ** Return TRUE (non-zero) if the From side of the given cost matches
   759  ** the given string.
   760  */
   761  static int matchFrom(EditDist3Cost *p, const char *z, int n){
   762    assert( p->nFrom<=n );
   763    if( strncmp(p->a, z, p->nFrom)!=0 ) return 0;
   764    return 1;
   765  }
   766  
   767  /*
   768  ** Return TRUE (non-zero) of the next FROM character and the next TO
   769  ** character are the same.
   770  */
   771  static int matchFromTo(
   772    EditDist3FromString *pStr,  /* Left hand string */
   773    int n1,                     /* Index of comparison character on the left */
   774    const char *z2,             /* Right-handl comparison character */
   775    int n2                      /* Bytes remaining in z2[] */
   776  ){
   777    int b1 = pStr->a[n1].nByte;
   778    if( b1>n2 ) return 0;
   779    if( memcmp(pStr->z+n1, z2, b1)!=0 ) return 0;
   780    return 1;
   781  }
   782  
   783  /*
   784  ** Delete an EditDist3FromString objecct
   785  */
   786  static void editDist3FromStringDelete(EditDist3FromString *p){
   787    int i;
   788    if( p ){
   789      for(i=0; i<p->n; i++){
   790        sqlite3_free(p->a[i].apDel);
   791        sqlite3_free(p->a[i].apSubst);
   792      }
   793      sqlite3_free(p);
   794    }
   795  }
   796  
   797  /*
   798  ** Create a EditDist3FromString object.
   799  */
   800  static EditDist3FromString *editDist3FromStringNew(
   801    const EditDist3Lang *pLang,
   802    const char *z,
   803    int n
   804  ){
   805    EditDist3FromString *pStr;
   806    EditDist3Cost *p;
   807    int i;
   808  
   809    if( z==0 ) return 0;
   810    if( n<0 ) n = (int)strlen(z);
   811    pStr = sqlite3_malloc64( sizeof(*pStr) + sizeof(pStr->a[0])*n + n + 1 );
   812    if( pStr==0 ) return 0;
   813    pStr->a = (EditDist3From*)&pStr[1];
   814    memset(pStr->a, 0, sizeof(pStr->a[0])*n);
   815    pStr->n = n;
   816    pStr->z = (char*)&pStr->a[n];
   817    memcpy(pStr->z, z, n+1);
   818    if( n && z[n-1]=='*' ){
   819      pStr->isPrefix = 1;
   820      n--;
   821      pStr->n--;
   822      pStr->z[n] = 0;
   823    }else{
   824      pStr->isPrefix = 0;
   825    }
   826  
   827    for(i=0; i<n; i++){
   828      EditDist3From *pFrom = &pStr->a[i];
   829      memset(pFrom, 0, sizeof(*pFrom));
   830      pFrom->nByte = utf8Len((unsigned char)z[i], n-i);
   831      for(p=pLang->pCost; p; p=p->pNext){
   832        EditDist3Cost **apNew;
   833        if( i+p->nFrom>n ) continue;
   834        if( matchFrom(p, z+i, n-i)==0 ) continue;
   835        if( p->nTo==0 ){
   836          apNew = sqlite3_realloc64(pFrom->apDel,
   837                                  sizeof(*apNew)*(pFrom->nDel+1));
   838          if( apNew==0 ) break;
   839          pFrom->apDel = apNew;
   840          apNew[pFrom->nDel++] = p;
   841        }else{
   842          apNew = sqlite3_realloc64(pFrom->apSubst,
   843                                  sizeof(*apNew)*(pFrom->nSubst+1));
   844          if( apNew==0 ) break;
   845          pFrom->apSubst = apNew;
   846          apNew[pFrom->nSubst++] = p;
   847        }
   848      }
   849      if( p ){
   850        editDist3FromStringDelete(pStr);
   851        pStr = 0;
   852        break;
   853      }
   854    }
   855    return pStr;
   856  }
   857  
   858  /*
   859  ** Update entry m[i] such that it is the minimum of its current value
   860  ** and m[j]+iCost.
   861  **
   862  ** If the iCost is 1,000,000 or greater, then consider the cost to be
   863  ** infinite and skip the update.
   864  */
   865  static void updateCost(
   866    unsigned int *m,
   867    int i,
   868    int j,
   869    int iCost
   870  ){
   871    assert( iCost>=0 );
   872    if( iCost<10000 ){
   873      unsigned int b = m[j] + iCost;
   874      if( b<m[i] ) m[i] = b;
   875    }
   876  }
   877  
   878  /*
   879  ** How much stack space (int bytes) to use for Wagner matrix in 
   880  ** editDist3Core().  If more space than this is required, the entire
   881  ** matrix is taken from the heap.  To reduce the load on the memory
   882  ** allocator, make this value as large as practical for the
   883  ** architecture in use.
   884  */
   885  #ifndef SQLITE_SPELLFIX_STACKALLOC_SZ
   886  # define SQLITE_SPELLFIX_STACKALLOC_SZ  (1024)
   887  #endif
   888  
   889  /* Compute the edit distance between two strings.
   890  **
   891  ** If an error occurs, return a negative number which is the error code.
   892  **
   893  ** If pnMatch is not NULL, then *pnMatch is set to the number of characters
   894  ** (not bytes) in z2 that matched the search pattern in *pFrom. If pFrom does
   895  ** not contain the pattern for a prefix-search, then this is always the number
   896  ** of characters in z2. If pFrom does contain a prefix search pattern, then
   897  ** it is the number of characters in the prefix of z2 that was deemed to 
   898  ** match pFrom.
   899  */
   900  static int editDist3Core(
   901    EditDist3FromString *pFrom,  /* The FROM string */
   902    const char *z2,              /* The TO string */
   903    int n2,                      /* Length of the TO string */
   904    const EditDist3Lang *pLang,  /* Edit weights for a particular language ID */
   905    int *pnMatch                 /* OUT: Characters in matched prefix */
   906  ){
   907    int k, n;
   908    int i1, b1;
   909    int i2, b2;
   910    EditDist3FromString f = *pFrom;
   911    EditDist3To *a2;
   912    unsigned int *m;
   913    unsigned int *pToFree;
   914    int szRow;
   915    EditDist3Cost *p;
   916    int res;
   917    sqlite3_uint64 nByte;
   918    unsigned int stackSpace[SQLITE_SPELLFIX_STACKALLOC_SZ/sizeof(unsigned int)];
   919  
   920    /* allocate the Wagner matrix and the aTo[] array for the TO string */
   921    n = (f.n+1)*(n2+1);
   922    n = (n+1)&~1;
   923    nByte = n*sizeof(m[0]) + sizeof(a2[0])*n2;
   924    if( nByte<=sizeof(stackSpace) ){
   925      m = stackSpace;
   926      pToFree = 0;
   927    }else{
   928      m = pToFree = sqlite3_malloc64( nByte );
   929      if( m==0 ) return -1;            /* Out of memory */
   930    }
   931    a2 = (EditDist3To*)&m[n];
   932    memset(a2, 0, sizeof(a2[0])*n2);
   933  
   934    /* Fill in the a1[] matrix for all characters of the TO string */
   935    for(i2=0; i2<n2; i2++){
   936      a2[i2].nByte = utf8Len((unsigned char)z2[i2], n2-i2);
   937      for(p=pLang->pCost; p; p=p->pNext){
   938        EditDist3Cost **apNew;
   939        if( p->nFrom>0 ) continue;
   940        if( i2+p->nTo>n2 ) continue;
   941        if( matchTo(p, z2+i2, n2-i2)==0 ) continue;
   942        a2[i2].nIns++;
   943        apNew = sqlite3_realloc64(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns);
   944        if( apNew==0 ){
   945          res = -1;  /* Out of memory */
   946          goto editDist3Abort;
   947        }
   948        a2[i2].apIns = apNew;
   949        a2[i2].apIns[a2[i2].nIns-1] = p;
   950      }
   951    }
   952  
   953    /* Prepare to compute the minimum edit distance */
   954    szRow = f.n+1;
   955    memset(m, 0x01, (n2+1)*szRow*sizeof(m[0]));
   956    m[0] = 0;
   957  
   958    /* First fill in the top-row of the matrix with FROM deletion costs */
   959    for(i1=0; i1<f.n; i1 += b1){
   960      b1 = f.a[i1].nByte;
   961      updateCost(m, i1+b1, i1, pLang->iDelCost);
   962      for(k=0; k<f.a[i1].nDel; k++){
   963        p = f.a[i1].apDel[k];
   964        updateCost(m, i1+p->nFrom, i1, p->iCost);
   965      }
   966    }
   967  
   968    /* Fill in all subsequent rows, top-to-bottom, left-to-right */
   969    for(i2=0; i2<n2; i2 += b2){
   970      int rx;      /* Starting index for current row */
   971      int rxp;     /* Starting index for previous row */
   972      b2 = a2[i2].nByte;
   973      rx = szRow*(i2+b2);
   974      rxp = szRow*i2;
   975      updateCost(m, rx, rxp, pLang->iInsCost);
   976      for(k=0; k<a2[i2].nIns; k++){
   977        p = a2[i2].apIns[k];
   978        updateCost(m, szRow*(i2+p->nTo), rxp, p->iCost);
   979      }
   980      for(i1=0; i1<f.n; i1+=b1){
   981        int cx;    /* Index of current cell */
   982        int cxp;   /* Index of cell immediately to the left */
   983        int cxd;   /* Index of cell to the left and one row above */
   984        int cxu;   /* Index of cell immediately above */
   985        b1 = f.a[i1].nByte;
   986        cxp = rx + i1;
   987        cx = cxp + b1;
   988        cxd = rxp + i1;
   989        cxu = cxd + b1;
   990        updateCost(m, cx, cxp, pLang->iDelCost);
   991        for(k=0; k<f.a[i1].nDel; k++){
   992          p = f.a[i1].apDel[k];
   993          updateCost(m, cxp+p->nFrom, cxp, p->iCost);
   994        }
   995        updateCost(m, cx, cxu, pLang->iInsCost);
   996        if( matchFromTo(&f, i1, z2+i2, n2-i2) ){
   997          updateCost(m, cx, cxd, 0);
   998        }
   999        updateCost(m, cx, cxd, pLang->iSubCost);
  1000        for(k=0; k<f.a[i1].nSubst; k++){
  1001          p = f.a[i1].apSubst[k];
  1002          if( matchTo(p, z2+i2, n2-i2) ){
  1003            updateCost(m, cxd+p->nFrom+szRow*p->nTo, cxd, p->iCost);
  1004          }
  1005        }
  1006      }
  1007    }
  1008  
  1009  #if 0  /* Enable for debugging */
  1010    printf("         ^");
  1011    for(i1=0; i1<f.n; i1++) printf(" %c-%2x", f.z[i1], f.z[i1]&0xff);
  1012    printf("\n   ^:");
  1013    for(i1=0; i1<szRow; i1++){
  1014      int v = m[i1];
  1015      if( v>9999 ) printf(" ****");
  1016      else         printf(" %4d", v);
  1017    }
  1018    printf("\n");
  1019    for(i2=0; i2<n2; i2++){
  1020      printf("%c-%02x:", z2[i2], z2[i2]&0xff);
  1021      for(i1=0; i1<szRow; i1++){
  1022        int v = m[(i2+1)*szRow+i1];
  1023        if( v>9999 ) printf(" ****");
  1024        else         printf(" %4d", v);
  1025      }
  1026      printf("\n");
  1027    }
  1028  #endif
  1029  
  1030    /* Free memory allocations and return the result */
  1031    res = (int)m[szRow*(n2+1)-1];
  1032    n = n2;
  1033    if( f.isPrefix ){
  1034      for(i2=1; i2<=n2; i2++){
  1035        int b = m[szRow*i2-1];
  1036        if( b<=res ){ 
  1037          res = b;
  1038          n = i2 - 1;
  1039        }
  1040      }
  1041    }
  1042    if( pnMatch ){
  1043      int nExtra = 0;
  1044      for(k=0; k<n; k++){
  1045        if( (z2[k] & 0xc0)==0x80 ) nExtra++;
  1046      }
  1047      *pnMatch = n - nExtra;
  1048    }
  1049  
  1050  editDist3Abort:
  1051    for(i2=0; i2<n2; i2++) sqlite3_free(a2[i2].apIns);
  1052    sqlite3_free(pToFree);
  1053    return res;
  1054  }
  1055  
  1056  /*
  1057  ** Get an appropriate EditDist3Lang object.
  1058  */
  1059  static const EditDist3Lang *editDist3FindLang(
  1060    EditDist3Config *pConfig,
  1061    int iLang
  1062  ){
  1063    int i;
  1064    for(i=0; i<pConfig->nLang; i++){
  1065      if( pConfig->a[i].iLang==iLang ) return &pConfig->a[i];
  1066    }
  1067    return &editDist3Lang;
  1068  }
  1069  
  1070  /*
  1071  ** Function:    editdist3(A,B,iLang)
  1072  **              editdist3(tablename)
  1073  **
  1074  ** Return the cost of transforming string A into string B using edit
  1075  ** weights for iLang.
  1076  **
  1077  ** The second form loads edit weights into memory from a table.
  1078  */
  1079  static void editDist3SqlFunc(
  1080    sqlite3_context *context,
  1081    int argc,
  1082    sqlite3_value **argv
  1083  ){
  1084    EditDist3Config *pConfig = (EditDist3Config*)sqlite3_user_data(context);
  1085    sqlite3 *db = sqlite3_context_db_handle(context);
  1086    int rc;
  1087    if( argc==1 ){
  1088      const char *zTable = (const char*)sqlite3_value_text(argv[0]);
  1089      rc = editDist3ConfigLoad(pConfig, db, zTable);
  1090      if( rc ) sqlite3_result_error_code(context, rc);
  1091    }else{
  1092      const char *zA = (const char*)sqlite3_value_text(argv[0]);
  1093      const char *zB = (const char*)sqlite3_value_text(argv[1]);
  1094      int nA = sqlite3_value_bytes(argv[0]);
  1095      int nB = sqlite3_value_bytes(argv[1]);
  1096      int iLang = argc==3 ? sqlite3_value_int(argv[2]) : 0;
  1097      const EditDist3Lang *pLang = editDist3FindLang(pConfig, iLang);
  1098      EditDist3FromString *pFrom;
  1099      int dist;
  1100  
  1101      pFrom = editDist3FromStringNew(pLang, zA, nA);
  1102      if( pFrom==0 ){
  1103        sqlite3_result_error_nomem(context);
  1104        return;
  1105      }
  1106      dist = editDist3Core(pFrom, zB, nB, pLang, 0);
  1107      editDist3FromStringDelete(pFrom);
  1108      if( dist==(-1) ){
  1109        sqlite3_result_error_nomem(context);
  1110      }else{
  1111        sqlite3_result_int(context, dist);
  1112      }
  1113    } 
  1114  }
  1115  
  1116  /*
  1117  ** Register the editDist3 function with SQLite
  1118  */
  1119  static int editDist3Install(sqlite3 *db){
  1120    int rc;
  1121    EditDist3Config *pConfig = sqlite3_malloc64( sizeof(*pConfig) );
  1122    if( pConfig==0 ) return SQLITE_NOMEM;
  1123    memset(pConfig, 0, sizeof(*pConfig));
  1124    rc = sqlite3_create_function_v2(db, "editdist3",
  1125                2, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0, 0);
  1126    if( rc==SQLITE_OK ){
  1127      rc = sqlite3_create_function_v2(db, "editdist3",
  1128                  3, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0, 0);
  1129    }
  1130    if( rc==SQLITE_OK ){
  1131      rc = sqlite3_create_function_v2(db, "editdist3",
  1132                  1, SQLITE_UTF8, pConfig, editDist3SqlFunc, 0, 0,
  1133                  editDist3ConfigDelete);
  1134    }else{
  1135      sqlite3_free(pConfig);
  1136    }
  1137    return rc;
  1138  }
  1139  /* End configurable cost unicode edit distance routines
  1140  ******************************************************************************
  1141  ******************************************************************************
  1142  ** Begin transliterate unicode-to-ascii implementation
  1143  */
  1144  
  1145  #if !SQLITE_AMALGAMATION
  1146  /*
  1147  ** This lookup table is used to help decode the first byte of
  1148  ** a multi-byte UTF8 character.
  1149  */
  1150  static const unsigned char sqlite3Utf8Trans1[] = {
  1151    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
  1152    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
  1153    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
  1154    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
  1155    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
  1156    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
  1157    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
  1158    0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
  1159  };
  1160  #endif
  1161  
  1162  /*
  1163  ** Return the value of the first UTF-8 character in the string.
  1164  */
  1165  static int utf8Read(const unsigned char *z, int n, int *pSize){
  1166    int c, i;
  1167  
  1168    /* All callers to this routine (in the current implementation)
  1169    ** always have n>0. */
  1170    if( NEVER(n==0) ){
  1171      c = i = 0;
  1172    }else{
  1173      c = z[0];
  1174      i = 1;
  1175      if( c>=0xc0 ){
  1176        c = sqlite3Utf8Trans1[c-0xc0];
  1177        while( i<n && (z[i] & 0xc0)==0x80 ){
  1178          c = (c<<6) + (0x3f & z[i++]);
  1179        }
  1180      }
  1181    }
  1182    *pSize = i;
  1183    return c;
  1184  }
  1185  
  1186  /*
  1187  ** Return the number of characters in the utf-8 string in the nIn byte
  1188  ** buffer pointed to by zIn.
  1189  */
  1190  static int utf8Charlen(const char *zIn, int nIn){
  1191    int i;
  1192    int nChar = 0;
  1193    for(i=0; i<nIn; nChar++){
  1194      int sz;
  1195      utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
  1196      i += sz;
  1197    }
  1198    return nChar;
  1199  }
  1200  
  1201  /*
  1202  ** Table of translations from unicode characters into ASCII.
  1203  */
  1204  static const struct {
  1205   unsigned short int cFrom;
  1206   unsigned char cTo0, cTo1;
  1207  } translit[] = {
  1208    { 0x00A0,  0x20, 0x00 },  /*   to   */
  1209    { 0x00B5,  0x75, 0x00 },  /* µ to u */
  1210    { 0x00C0,  0x41, 0x00 },  /* À to A */
  1211    { 0x00C1,  0x41, 0x00 },  /* Á to A */
  1212    { 0x00C2,  0x41, 0x00 },  /* Â to A */
  1213    { 0x00C3,  0x41, 0x00 },  /* Ã to A */
  1214    { 0x00C4,  0x41, 0x65 },  /* Ä to Ae */
  1215    { 0x00C5,  0x41, 0x61 },  /* Å to Aa */
  1216    { 0x00C6,  0x41, 0x45 },  /* Æ to AE */
  1217    { 0x00C7,  0x43, 0x00 },  /* Ç to C */
  1218    { 0x00C8,  0x45, 0x00 },  /* È to E */
  1219    { 0x00C9,  0x45, 0x00 },  /* É to E */
  1220    { 0x00CA,  0x45, 0x00 },  /* Ê to E */
  1221    { 0x00CB,  0x45, 0x00 },  /* Ë to E */
  1222    { 0x00CC,  0x49, 0x00 },  /* Ì to I */
  1223    { 0x00CD,  0x49, 0x00 },  /* Í to I */
  1224    { 0x00CE,  0x49, 0x00 },  /* Î to I */
  1225    { 0x00CF,  0x49, 0x00 },  /* Ï to I */
  1226    { 0x00D0,  0x44, 0x00 },  /* Ð to D */
  1227    { 0x00D1,  0x4E, 0x00 },  /* Ñ to N */
  1228    { 0x00D2,  0x4F, 0x00 },  /* Ò to O */
  1229    { 0x00D3,  0x4F, 0x00 },  /* Ó to O */
  1230    { 0x00D4,  0x4F, 0x00 },  /* Ô to O */
  1231    { 0x00D5,  0x4F, 0x00 },  /* Õ to O */
  1232    { 0x00D6,  0x4F, 0x65 },  /* Ö to Oe */
  1233    { 0x00D7,  0x78, 0x00 },  /* × to x */
  1234    { 0x00D8,  0x4F, 0x00 },  /* Ø to O */
  1235    { 0x00D9,  0x55, 0x00 },  /* Ù to U */
  1236    { 0x00DA,  0x55, 0x00 },  /* Ú to U */
  1237    { 0x00DB,  0x55, 0x00 },  /* Û to U */
  1238    { 0x00DC,  0x55, 0x65 },  /* Ü to Ue */
  1239    { 0x00DD,  0x59, 0x00 },  /* Ý to Y */
  1240    { 0x00DE,  0x54, 0x68 },  /* Þ to Th */
  1241    { 0x00DF,  0x73, 0x73 },  /* ß to ss */
  1242    { 0x00E0,  0x61, 0x00 },  /* à to a */
  1243    { 0x00E1,  0x61, 0x00 },  /* á to a */
  1244    { 0x00E2,  0x61, 0x00 },  /* â to a */
  1245    { 0x00E3,  0x61, 0x00 },  /* ã to a */
  1246    { 0x00E4,  0x61, 0x65 },  /* ä to ae */
  1247    { 0x00E5,  0x61, 0x61 },  /* å to aa */
  1248    { 0x00E6,  0x61, 0x65 },  /* æ to ae */
  1249    { 0x00E7,  0x63, 0x00 },  /* ç to c */
  1250    { 0x00E8,  0x65, 0x00 },  /* è to e */
  1251    { 0x00E9,  0x65, 0x00 },  /* é to e */
  1252    { 0x00EA,  0x65, 0x00 },  /* ê to e */
  1253    { 0x00EB,  0x65, 0x00 },  /* ë to e */
  1254    { 0x00EC,  0x69, 0x00 },  /* ì to i */
  1255    { 0x00ED,  0x69, 0x00 },  /* í to i */
  1256    { 0x00EE,  0x69, 0x00 },  /* î to i */
  1257    { 0x00EF,  0x69, 0x00 },  /* ï to i */
  1258    { 0x00F0,  0x64, 0x00 },  /* ð to d */
  1259    { 0x00F1,  0x6E, 0x00 },  /* ñ to n */
  1260    { 0x00F2,  0x6F, 0x00 },  /* ò to o */
  1261    { 0x00F3,  0x6F, 0x00 },  /* ó to o */
  1262    { 0x00F4,  0x6F, 0x00 },  /* ô to o */
  1263    { 0x00F5,  0x6F, 0x00 },  /* õ to o */
  1264    { 0x00F6,  0x6F, 0x65 },  /* ö to oe */
  1265    { 0x00F7,  0x3A, 0x00 },  /* ÷ to : */
  1266    { 0x00F8,  0x6F, 0x00 },  /* ø to o */
  1267    { 0x00F9,  0x75, 0x00 },  /* ù to u */
  1268    { 0x00FA,  0x75, 0x00 },  /* ú to u */
  1269    { 0x00FB,  0x75, 0x00 },  /* û to u */
  1270    { 0x00FC,  0x75, 0x65 },  /* ü to ue */
  1271    { 0x00FD,  0x79, 0x00 },  /* ý to y */
  1272    { 0x00FE,  0x74, 0x68 },  /* þ to th */
  1273    { 0x00FF,  0x79, 0x00 },  /* ÿ to y */
  1274    { 0x0100,  0x41, 0x00 },  /* Ā to A */
  1275    { 0x0101,  0x61, 0x00 },  /* ā to a */
  1276    { 0x0102,  0x41, 0x00 },  /* Ă to A */
  1277    { 0x0103,  0x61, 0x00 },  /* ă to a */
  1278    { 0x0104,  0x41, 0x00 },  /* Ą to A */
  1279    { 0x0105,  0x61, 0x00 },  /* ą to a */
  1280    { 0x0106,  0x43, 0x00 },  /* Ć to C */
  1281    { 0x0107,  0x63, 0x00 },  /* ć to c */
  1282    { 0x0108,  0x43, 0x68 },  /* Ĉ to Ch */
  1283    { 0x0109,  0x63, 0x68 },  /* ĉ to ch */
  1284    { 0x010A,  0x43, 0x00 },  /* Ċ to C */
  1285    { 0x010B,  0x63, 0x00 },  /* ċ to c */
  1286    { 0x010C,  0x43, 0x00 },  /* Č to C */
  1287    { 0x010D,  0x63, 0x00 },  /* č to c */
  1288    { 0x010E,  0x44, 0x00 },  /* Ď to D */
  1289    { 0x010F,  0x64, 0x00 },  /* ď to d */
  1290    { 0x0110,  0x44, 0x00 },  /* Đ to D */
  1291    { 0x0111,  0x64, 0x00 },  /* đ to d */
  1292    { 0x0112,  0x45, 0x00 },  /* Ē to E */
  1293    { 0x0113,  0x65, 0x00 },  /* ē to e */
  1294    { 0x0114,  0x45, 0x00 },  /* Ĕ to E */
  1295    { 0x0115,  0x65, 0x00 },  /* ĕ to e */
  1296    { 0x0116,  0x45, 0x00 },  /* Ė to E */
  1297    { 0x0117,  0x65, 0x00 },  /* ė to e */
  1298    { 0x0118,  0x45, 0x00 },  /* Ę to E */
  1299    { 0x0119,  0x65, 0x00 },  /* ę to e */
  1300    { 0x011A,  0x45, 0x00 },  /* Ě to E */
  1301    { 0x011B,  0x65, 0x00 },  /* ě to e */
  1302    { 0x011C,  0x47, 0x68 },  /* Ĝ to Gh */
  1303    { 0x011D,  0x67, 0x68 },  /* ĝ to gh */
  1304    { 0x011E,  0x47, 0x00 },  /* Ğ to G */
  1305    { 0x011F,  0x67, 0x00 },  /* ğ to g */
  1306    { 0x0120,  0x47, 0x00 },  /* Ġ to G */
  1307    { 0x0121,  0x67, 0x00 },  /* ġ to g */
  1308    { 0x0122,  0x47, 0x00 },  /* Ģ to G */
  1309    { 0x0123,  0x67, 0x00 },  /* ģ to g */
  1310    { 0x0124,  0x48, 0x68 },  /* Ĥ to Hh */
  1311    { 0x0125,  0x68, 0x68 },  /* ĥ to hh */
  1312    { 0x0126,  0x48, 0x00 },  /* Ħ to H */
  1313    { 0x0127,  0x68, 0x00 },  /* ħ to h */
  1314    { 0x0128,  0x49, 0x00 },  /* Ĩ to I */
  1315    { 0x0129,  0x69, 0x00 },  /* ĩ to i */
  1316    { 0x012A,  0x49, 0x00 },  /* Ī to I */
  1317    { 0x012B,  0x69, 0x00 },  /* ī to i */
  1318    { 0x012C,  0x49, 0x00 },  /* Ĭ to I */
  1319    { 0x012D,  0x69, 0x00 },  /* ĭ to i */
  1320    { 0x012E,  0x49, 0x00 },  /* Į to I */
  1321    { 0x012F,  0x69, 0x00 },  /* į to i */
  1322    { 0x0130,  0x49, 0x00 },  /* İ to I */
  1323    { 0x0131,  0x69, 0x00 },  /* ı to i */
  1324    { 0x0132,  0x49, 0x4A },  /* IJ to IJ */
  1325    { 0x0133,  0x69, 0x6A },  /* ij to ij */
  1326    { 0x0134,  0x4A, 0x68 },  /* Ĵ to Jh */
  1327    { 0x0135,  0x6A, 0x68 },  /* ĵ to jh */
  1328    { 0x0136,  0x4B, 0x00 },  /* Ķ to K */
  1329    { 0x0137,  0x6B, 0x00 },  /* ķ to k */
  1330    { 0x0138,  0x6B, 0x00 },  /* ĸ to k */
  1331    { 0x0139,  0x4C, 0x00 },  /* Ĺ to L */
  1332    { 0x013A,  0x6C, 0x00 },  /* ĺ to l */
  1333    { 0x013B,  0x4C, 0x00 },  /* Ļ to L */
  1334    { 0x013C,  0x6C, 0x00 },  /* ļ to l */
  1335    { 0x013D,  0x4C, 0x00 },  /* Ľ to L */
  1336    { 0x013E,  0x6C, 0x00 },  /* ľ to l */
  1337    { 0x013F,  0x4C, 0x2E },  /* Ŀ to L. */
  1338    { 0x0140,  0x6C, 0x2E },  /* ŀ to l. */
  1339    { 0x0141,  0x4C, 0x00 },  /* Ł to L */
  1340    { 0x0142,  0x6C, 0x00 },  /* ł to l */
  1341    { 0x0143,  0x4E, 0x00 },  /* Ń to N */
  1342    { 0x0144,  0x6E, 0x00 },  /* ń to n */
  1343    { 0x0145,  0x4E, 0x00 },  /* Ņ to N */
  1344    { 0x0146,  0x6E, 0x00 },  /* ņ to n */
  1345    { 0x0147,  0x4E, 0x00 },  /* Ň to N */
  1346    { 0x0148,  0x6E, 0x00 },  /* ň to n */
  1347    { 0x0149,  0x27, 0x6E },  /* ʼn to 'n */
  1348    { 0x014A,  0x4E, 0x47 },  /* Ŋ to NG */
  1349    { 0x014B,  0x6E, 0x67 },  /* ŋ to ng */
  1350    { 0x014C,  0x4F, 0x00 },  /* Ō to O */
  1351    { 0x014D,  0x6F, 0x00 },  /* ō to o */
  1352    { 0x014E,  0x4F, 0x00 },  /* Ŏ to O */
  1353    { 0x014F,  0x6F, 0x00 },  /* ŏ to o */
  1354    { 0x0150,  0x4F, 0x00 },  /* Ő to O */
  1355    { 0x0151,  0x6F, 0x00 },  /* ő to o */
  1356    { 0x0152,  0x4F, 0x45 },  /* Œ to OE */
  1357    { 0x0153,  0x6F, 0x65 },  /* œ to oe */
  1358    { 0x0154,  0x52, 0x00 },  /* Ŕ to R */
  1359    { 0x0155,  0x72, 0x00 },  /* ŕ to r */
  1360    { 0x0156,  0x52, 0x00 },  /* Ŗ to R */
  1361    { 0x0157,  0x72, 0x00 },  /* ŗ to r */
  1362    { 0x0158,  0x52, 0x00 },  /* Ř to R */
  1363    { 0x0159,  0x72, 0x00 },  /* ř to r */
  1364    { 0x015A,  0x53, 0x00 },  /* Ś to S */
  1365    { 0x015B,  0x73, 0x00 },  /* ś to s */
  1366    { 0x015C,  0x53, 0x68 },  /* Ŝ to Sh */
  1367    { 0x015D,  0x73, 0x68 },  /* ŝ to sh */
  1368    { 0x015E,  0x53, 0x00 },  /* Ş to S */
  1369    { 0x015F,  0x73, 0x00 },  /* ş to s */
  1370    { 0x0160,  0x53, 0x00 },  /* Š to S */
  1371    { 0x0161,  0x73, 0x00 },  /* š to s */
  1372    { 0x0162,  0x54, 0x00 },  /* Ţ to T */
  1373    { 0x0163,  0x74, 0x00 },  /* ţ to t */
  1374    { 0x0164,  0x54, 0x00 },  /* Ť to T */
  1375    { 0x0165,  0x74, 0x00 },  /* ť to t */
  1376    { 0x0166,  0x54, 0x00 },  /* Ŧ to T */
  1377    { 0x0167,  0x74, 0x00 },  /* ŧ to t */
  1378    { 0x0168,  0x55, 0x00 },  /* Ũ to U */
  1379    { 0x0169,  0x75, 0x00 },  /* ũ to u */
  1380    { 0x016A,  0x55, 0x00 },  /* Ū to U */
  1381    { 0x016B,  0x75, 0x00 },  /* ū to u */
  1382    { 0x016C,  0x55, 0x00 },  /* Ŭ to U */
  1383    { 0x016D,  0x75, 0x00 },  /* ŭ to u */
  1384    { 0x016E,  0x55, 0x00 },  /* Ů to U */
  1385    { 0x016F,  0x75, 0x00 },  /* ů to u */
  1386    { 0x0170,  0x55, 0x00 },  /* Ű to U */
  1387    { 0x0171,  0x75, 0x00 },  /* ű to u */
  1388    { 0x0172,  0x55, 0x00 },  /* Ų to U */
  1389    { 0x0173,  0x75, 0x00 },  /* ų to u */
  1390    { 0x0174,  0x57, 0x00 },  /* Ŵ to W */
  1391    { 0x0175,  0x77, 0x00 },  /* ŵ to w */
  1392    { 0x0176,  0x59, 0x00 },  /* Ŷ to Y */
  1393    { 0x0177,  0x79, 0x00 },  /* ŷ to y */
  1394    { 0x0178,  0x59, 0x00 },  /* Ÿ to Y */
  1395    { 0x0179,  0x5A, 0x00 },  /* Ź to Z */
  1396    { 0x017A,  0x7A, 0x00 },  /* ź to z */
  1397    { 0x017B,  0x5A, 0x00 },  /* Ż to Z */
  1398    { 0x017C,  0x7A, 0x00 },  /* ż to z */
  1399    { 0x017D,  0x5A, 0x00 },  /* Ž to Z */
  1400    { 0x017E,  0x7A, 0x00 },  /* ž to z */
  1401    { 0x017F,  0x73, 0x00 },  /* ſ to s */
  1402    { 0x0192,  0x66, 0x00 },  /* ƒ to f */
  1403    { 0x0218,  0x53, 0x00 },  /* Ș to S */
  1404    { 0x0219,  0x73, 0x00 },  /* ș to s */
  1405    { 0x021A,  0x54, 0x00 },  /* Ț to T */
  1406    { 0x021B,  0x74, 0x00 },  /* ț to t */
  1407    { 0x0386,  0x41, 0x00 },  /* Ά to A */
  1408    { 0x0388,  0x45, 0x00 },  /* Έ to E */
  1409    { 0x0389,  0x49, 0x00 },  /* Ή to I */
  1410    { 0x038A,  0x49, 0x00 },  /* Ί to I */
  1411    { 0x038C,  0x4f, 0x00 },  /* Ό to O */
  1412    { 0x038E,  0x59, 0x00 },  /* Ύ to Y */
  1413    { 0x038F,  0x4f, 0x00 },  /* Ώ to O */
  1414    { 0x0390,  0x69, 0x00 },  /* ΐ to i */
  1415    { 0x0391,  0x41, 0x00 },  /* Α to A */
  1416    { 0x0392,  0x42, 0x00 },  /* Β to B */
  1417    { 0x0393,  0x47, 0x00 },  /* Γ to G */
  1418    { 0x0394,  0x44, 0x00 },  /* Δ to D */
  1419    { 0x0395,  0x45, 0x00 },  /* Ε to E */
  1420    { 0x0396,  0x5a, 0x00 },  /* Ζ to Z */
  1421    { 0x0397,  0x49, 0x00 },  /* Η to I */
  1422    { 0x0398,  0x54, 0x68 },  /* Θ to Th */
  1423    { 0x0399,  0x49, 0x00 },  /* Ι to I */
  1424    { 0x039A,  0x4b, 0x00 },  /* Κ to K */
  1425    { 0x039B,  0x4c, 0x00 },  /* Λ to L */
  1426    { 0x039C,  0x4d, 0x00 },  /* Μ to M */
  1427    { 0x039D,  0x4e, 0x00 },  /* Ν to N */
  1428    { 0x039E,  0x58, 0x00 },  /* Ξ to X */
  1429    { 0x039F,  0x4f, 0x00 },  /* Ο to O */
  1430    { 0x03A0,  0x50, 0x00 },  /* Π to P */
  1431    { 0x03A1,  0x52, 0x00 },  /* Ρ to R */
  1432    { 0x03A3,  0x53, 0x00 },  /* Σ to S */
  1433    { 0x03A4,  0x54, 0x00 },  /* Τ to T */
  1434    { 0x03A5,  0x59, 0x00 },  /* Υ to Y */
  1435    { 0x03A6,  0x46, 0x00 },  /* Φ to F */
  1436    { 0x03A7,  0x43, 0x68 },  /* Χ to Ch */
  1437    { 0x03A8,  0x50, 0x73 },  /* Ψ to Ps */
  1438    { 0x03A9,  0x4f, 0x00 },  /* Ω to O */
  1439    { 0x03AA,  0x49, 0x00 },  /* Ϊ to I */
  1440    { 0x03AB,  0x59, 0x00 },  /* Ϋ to Y */
  1441    { 0x03AC,  0x61, 0x00 },  /* ά to a */
  1442    { 0x03AD,  0x65, 0x00 },  /* έ to e */
  1443    { 0x03AE,  0x69, 0x00 },  /* ή to i */
  1444    { 0x03AF,  0x69, 0x00 },  /* ί to i */
  1445    { 0x03B1,  0x61, 0x00 },  /* α to a */
  1446    { 0x03B2,  0x62, 0x00 },  /* β to b */
  1447    { 0x03B3,  0x67, 0x00 },  /* γ to g */
  1448    { 0x03B4,  0x64, 0x00 },  /* δ to d */
  1449    { 0x03B5,  0x65, 0x00 },  /* ε to e */
  1450    { 0x03B6,  0x7a, 0x00 },  /* ζ to z */
  1451    { 0x03B7,  0x69, 0x00 },  /* η to i */
  1452    { 0x03B8,  0x74, 0x68 },  /* θ to th */
  1453    { 0x03B9,  0x69, 0x00 },  /* ι to i */
  1454    { 0x03BA,  0x6b, 0x00 },  /* κ to k */
  1455    { 0x03BB,  0x6c, 0x00 },  /* λ to l */
  1456    { 0x03BC,  0x6d, 0x00 },  /* μ to m */
  1457    { 0x03BD,  0x6e, 0x00 },  /* ν to n */
  1458    { 0x03BE,  0x78, 0x00 },  /* ξ to x */
  1459    { 0x03BF,  0x6f, 0x00 },  /* ο to o */
  1460    { 0x03C0,  0x70, 0x00 },  /* π to p */
  1461    { 0x03C1,  0x72, 0x00 },  /* ρ to r */
  1462    { 0x03C3,  0x73, 0x00 },  /* σ to s */
  1463    { 0x03C4,  0x74, 0x00 },  /* τ to t */
  1464    { 0x03C5,  0x79, 0x00 },  /* υ to y */
  1465    { 0x03C6,  0x66, 0x00 },  /* φ to f */
  1466    { 0x03C7,  0x63, 0x68 },  /* χ to ch */
  1467    { 0x03C8,  0x70, 0x73 },  /* ψ to ps */
  1468    { 0x03C9,  0x6f, 0x00 },  /* ω to o */
  1469    { 0x03CA,  0x69, 0x00 },  /* ϊ to i */
  1470    { 0x03CB,  0x79, 0x00 },  /* ϋ to y */
  1471    { 0x03CC,  0x6f, 0x00 },  /* ό to o */
  1472    { 0x03CD,  0x79, 0x00 },  /* ύ to y */
  1473    { 0x03CE,  0x69, 0x00 },  /* ώ to i */
  1474    { 0x0400,  0x45, 0x00 },  /* Ѐ to E */
  1475    { 0x0401,  0x45, 0x00 },  /* Ё to E */
  1476    { 0x0402,  0x44, 0x00 },  /* Ђ to D */
  1477    { 0x0403,  0x47, 0x00 },  /* Ѓ to G */
  1478    { 0x0404,  0x45, 0x00 },  /* Є to E */
  1479    { 0x0405,  0x5a, 0x00 },  /* Ѕ to Z */
  1480    { 0x0406,  0x49, 0x00 },  /* І to I */
  1481    { 0x0407,  0x49, 0x00 },  /* Ї to I */
  1482    { 0x0408,  0x4a, 0x00 },  /* Ј to J */
  1483    { 0x0409,  0x49, 0x00 },  /* Љ to I */
  1484    { 0x040A,  0x4e, 0x00 },  /* Њ to N */
  1485    { 0x040B,  0x44, 0x00 },  /* Ћ to D */
  1486    { 0x040C,  0x4b, 0x00 },  /* Ќ to K */
  1487    { 0x040D,  0x49, 0x00 },  /* Ѝ to I */
  1488    { 0x040E,  0x55, 0x00 },  /* Ў to U */
  1489    { 0x040F,  0x44, 0x00 },  /* Џ to D */
  1490    { 0x0410,  0x41, 0x00 },  /* А to A */
  1491    { 0x0411,  0x42, 0x00 },  /* Б to B */
  1492    { 0x0412,  0x56, 0x00 },  /* В to V */
  1493    { 0x0413,  0x47, 0x00 },  /* Г to G */
  1494    { 0x0414,  0x44, 0x00 },  /* Д to D */
  1495    { 0x0415,  0x45, 0x00 },  /* Е to E */
  1496    { 0x0416,  0x5a, 0x68 },  /* Ж to Zh */
  1497    { 0x0417,  0x5a, 0x00 },  /* З to Z */
  1498    { 0x0418,  0x49, 0x00 },  /* И to I */
  1499    { 0x0419,  0x49, 0x00 },  /* Й to I */
  1500    { 0x041A,  0x4b, 0x00 },  /* К to K */
  1501    { 0x041B,  0x4c, 0x00 },  /* Л to L */
  1502    { 0x041C,  0x4d, 0x00 },  /* М to M */
  1503    { 0x041D,  0x4e, 0x00 },  /* Н to N */
  1504    { 0x041E,  0x4f, 0x00 },  /* О to O */
  1505    { 0x041F,  0x50, 0x00 },  /* П to P */
  1506    { 0x0420,  0x52, 0x00 },  /* Р to R */
  1507    { 0x0421,  0x53, 0x00 },  /* С to S */
  1508    { 0x0422,  0x54, 0x00 },  /* Т to T */
  1509    { 0x0423,  0x55, 0x00 },  /* У to U */
  1510    { 0x0424,  0x46, 0x00 },  /* Ф to F */
  1511    { 0x0425,  0x4b, 0x68 },  /* Х to Kh */
  1512    { 0x0426,  0x54, 0x63 },  /* Ц to Tc */
  1513    { 0x0427,  0x43, 0x68 },  /* Ч to Ch */
  1514    { 0x0428,  0x53, 0x68 },  /* Ш to Sh */
  1515    { 0x0429,  0x53, 0x68 },  /* Щ to Shch */
  1516    { 0x042A,  0x61, 0x00 },  /*  to A */
  1517    { 0x042B,  0x59, 0x00 },  /* Ы to Y */
  1518    { 0x042C,  0x59, 0x00 },  /*  to Y */
  1519    { 0x042D,  0x45, 0x00 },  /* Э to E */
  1520    { 0x042E,  0x49, 0x75 },  /* Ю to Iu */
  1521    { 0x042F,  0x49, 0x61 },  /* Я to Ia */
  1522    { 0x0430,  0x61, 0x00 },  /* а to a */
  1523    { 0x0431,  0x62, 0x00 },  /* б to b */
  1524    { 0x0432,  0x76, 0x00 },  /* в to v */
  1525    { 0x0433,  0x67, 0x00 },  /* г to g */
  1526    { 0x0434,  0x64, 0x00 },  /* д to d */
  1527    { 0x0435,  0x65, 0x00 },  /* е to e */
  1528    { 0x0436,  0x7a, 0x68 },  /* ж to zh */
  1529    { 0x0437,  0x7a, 0x00 },  /* з to z */
  1530    { 0x0438,  0x69, 0x00 },  /* и to i */
  1531    { 0x0439,  0x69, 0x00 },  /* й to i */
  1532    { 0x043A,  0x6b, 0x00 },  /* к to k */
  1533    { 0x043B,  0x6c, 0x00 },  /* л to l */
  1534    { 0x043C,  0x6d, 0x00 },  /* м to m */
  1535    { 0x043D,  0x6e, 0x00 },  /* н to n */
  1536    { 0x043E,  0x6f, 0x00 },  /* о to o */
  1537    { 0x043F,  0x70, 0x00 },  /* п to p */
  1538    { 0x0440,  0x72, 0x00 },  /* р to r */
  1539    { 0x0441,  0x73, 0x00 },  /* с to s */
  1540    { 0x0442,  0x74, 0x00 },  /* т to t */
  1541    { 0x0443,  0x75, 0x00 },  /* у to u */
  1542    { 0x0444,  0x66, 0x00 },  /* ф to f */
  1543    { 0x0445,  0x6b, 0x68 },  /* х to kh */
  1544    { 0x0446,  0x74, 0x63 },  /* ц to tc */
  1545    { 0x0447,  0x63, 0x68 },  /* ч to ch */
  1546    { 0x0448,  0x73, 0x68 },  /* ш to sh */
  1547    { 0x0449,  0x73, 0x68 },  /* щ to shch */
  1548    { 0x044A,  0x61, 0x00 },  /*  to a */
  1549    { 0x044B,  0x79, 0x00 },  /* ы to y */
  1550    { 0x044C,  0x79, 0x00 },  /*  to y */
  1551    { 0x044D,  0x65, 0x00 },  /* э to e */
  1552    { 0x044E,  0x69, 0x75 },  /* ю to iu */
  1553    { 0x044F,  0x69, 0x61 },  /* я to ia */
  1554    { 0x0450,  0x65, 0x00 },  /* ѐ to e */
  1555    { 0x0451,  0x65, 0x00 },  /* ё to e */
  1556    { 0x0452,  0x64, 0x00 },  /* ђ to d */
  1557    { 0x0453,  0x67, 0x00 },  /* ѓ to g */
  1558    { 0x0454,  0x65, 0x00 },  /* є to e */
  1559    { 0x0455,  0x7a, 0x00 },  /* ѕ to z */
  1560    { 0x0456,  0x69, 0x00 },  /* і to i */
  1561    { 0x0457,  0x69, 0x00 },  /* ї to i */
  1562    { 0x0458,  0x6a, 0x00 },  /* ј to j */
  1563    { 0x0459,  0x69, 0x00 },  /* љ to i */
  1564    { 0x045A,  0x6e, 0x00 },  /* њ to n */
  1565    { 0x045B,  0x64, 0x00 },  /* ћ to d */
  1566    { 0x045C,  0x6b, 0x00 },  /* ќ to k */
  1567    { 0x045D,  0x69, 0x00 },  /* ѝ to i */
  1568    { 0x045E,  0x75, 0x00 },  /* ў to u */
  1569    { 0x045F,  0x64, 0x00 },  /* џ to d */
  1570    { 0x1E02,  0x42, 0x00 },  /* Ḃ to B */
  1571    { 0x1E03,  0x62, 0x00 },  /* ḃ to b */
  1572    { 0x1E0A,  0x44, 0x00 },  /* Ḋ to D */
  1573    { 0x1E0B,  0x64, 0x00 },  /* ḋ to d */
  1574    { 0x1E1E,  0x46, 0x00 },  /* Ḟ to F */
  1575    { 0x1E1F,  0x66, 0x00 },  /* ḟ to f */
  1576    { 0x1E40,  0x4D, 0x00 },  /* Ṁ to M */
  1577    { 0x1E41,  0x6D, 0x00 },  /* ṁ to m */
  1578    { 0x1E56,  0x50, 0x00 },  /* Ṗ to P */
  1579    { 0x1E57,  0x70, 0x00 },  /* ṗ to p */
  1580    { 0x1E60,  0x53, 0x00 },  /* Ṡ to S */
  1581    { 0x1E61,  0x73, 0x00 },  /* ṡ to s */
  1582    { 0x1E6A,  0x54, 0x00 },  /* Ṫ to T */
  1583    { 0x1E6B,  0x74, 0x00 },  /* ṫ to t */
  1584    { 0x1E80,  0x57, 0x00 },  /* Ẁ to W */
  1585    { 0x1E81,  0x77, 0x00 },  /* ẁ to w */
  1586    { 0x1E82,  0x57, 0x00 },  /* Ẃ to W */
  1587    { 0x1E83,  0x77, 0x00 },  /* ẃ to w */
  1588    { 0x1E84,  0x57, 0x00 },  /* Ẅ to W */
  1589    { 0x1E85,  0x77, 0x00 },  /* ẅ to w */
  1590    { 0x1EF2,  0x59, 0x00 },  /* Ỳ to Y */
  1591    { 0x1EF3,  0x79, 0x00 },  /* ỳ to y */
  1592    { 0xFB00,  0x66, 0x66 },  /* ff to ff */
  1593    { 0xFB01,  0x66, 0x69 },  /* fi to fi */
  1594    { 0xFB02,  0x66, 0x6C },  /* fl to fl */
  1595    { 0xFB05,  0x73, 0x74 },  /* ſt to st */
  1596    { 0xFB06,  0x73, 0x74 },  /* st to st */
  1597  };
  1598  
  1599  /*
  1600  ** Convert the input string from UTF-8 into pure ASCII by converting
  1601  ** all non-ASCII characters to some combination of characters in the
  1602  ** ASCII subset.
  1603  **
  1604  ** The returned string might contain more characters than the input.
  1605  **
  1606  ** Space to hold the returned string comes from sqlite3_malloc() and
  1607  ** should be freed by the caller.
  1608  */
  1609  static unsigned char *transliterate(const unsigned char *zIn, int nIn){
  1610    unsigned char *zOut = sqlite3_malloc64( nIn*4 + 1 );
  1611    int c, sz, nOut;
  1612    if( zOut==0 ) return 0;
  1613    nOut = 0;
  1614    while( nIn>0 ){
  1615      c = utf8Read(zIn, nIn, &sz);
  1616      zIn += sz;
  1617      nIn -= sz;
  1618      if( c<=127 ){
  1619        zOut[nOut++] = (unsigned char)c;
  1620      }else{
  1621        int xTop, xBtm, x;
  1622        xTop = sizeof(translit)/sizeof(translit[0]) - 1;
  1623        xBtm = 0;
  1624        while( xTop>=xBtm ){
  1625          x = (xTop + xBtm)/2;
  1626          if( translit[x].cFrom==c ){
  1627            zOut[nOut++] = translit[x].cTo0;
  1628            if( translit[x].cTo1 ){
  1629              zOut[nOut++] = translit[x].cTo1;
  1630              /* Add an extra "ch" after the "sh" for Щ and щ */
  1631              if( c==0x0429 || c== 0x0449 ){
  1632                zOut[nOut++] = 'c';
  1633                zOut[nOut++] = 'h';
  1634              }
  1635            }
  1636            c = 0;
  1637            break;
  1638          }else if( translit[x].cFrom>c ){
  1639            xTop = x-1;
  1640          }else{
  1641            xBtm = x+1;
  1642          }
  1643        }
  1644        if( c ) zOut[nOut++] = '?';
  1645      }
  1646    }
  1647    zOut[nOut] = 0;
  1648    return zOut;
  1649  }
  1650  
  1651  /*
  1652  ** Return the number of characters in the shortest prefix of the input
  1653  ** string that transliterates to an ASCII string nTrans bytes or longer.
  1654  ** Or, if the transliteration of the input string is less than nTrans
  1655  ** bytes in size, return the number of characters in the input string.
  1656  */
  1657  static int translen_to_charlen(const char *zIn, int nIn, int nTrans){
  1658    int i, c, sz, nOut;
  1659    int nChar;
  1660  
  1661    i = nOut = 0;
  1662    for(nChar=0; i<nIn && nOut<nTrans; nChar++){
  1663      c = utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
  1664      i += sz;
  1665  
  1666      nOut++;
  1667      if( c>=128 ){
  1668        int xTop, xBtm, x;
  1669        xTop = sizeof(translit)/sizeof(translit[0]) - 1;
  1670        xBtm = 0;
  1671        while( xTop>=xBtm ){
  1672          x = (xTop + xBtm)/2;
  1673          if( translit[x].cFrom==c ){
  1674            if( translit[x].cTo1 ) nOut++;
  1675            if( c==0x0429 || c== 0x0449 ) nOut += 2;
  1676            break;
  1677          }else if( translit[x].cFrom>c ){
  1678            xTop = x-1;
  1679          }else{
  1680            xBtm = x+1;
  1681          }
  1682        }
  1683      }
  1684    }
  1685  
  1686    return nChar;
  1687  }
  1688  
  1689  
  1690  /*
  1691  **    spellfix1_translit(X)
  1692  **
  1693  ** Convert a string that contains non-ASCII Roman characters into 
  1694  ** pure ASCII.
  1695  */
  1696  static void transliterateSqlFunc(
  1697    sqlite3_context *context,
  1698    int argc,
  1699    sqlite3_value **argv
  1700  ){
  1701    const unsigned char *zIn = sqlite3_value_text(argv[0]);
  1702    int nIn = sqlite3_value_bytes(argv[0]);
  1703    unsigned char *zOut = transliterate(zIn, nIn);
  1704    if( zOut==0 ){
  1705      sqlite3_result_error_nomem(context);
  1706    }else{
  1707      sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
  1708    }
  1709  }
  1710  
  1711  /*
  1712  **    spellfix1_scriptcode(X)
  1713  **
  1714  ** Try to determine the dominant script used by the word X and return
  1715  ** its ISO 15924 numeric code.
  1716  **
  1717  ** The current implementation only understands the following scripts:
  1718  **
  1719  **    215  (Latin)
  1720  **    220  (Cyrillic)
  1721  **    200  (Greek)
  1722  **
  1723  ** This routine will return 998 if the input X contains characters from
  1724  ** two or more of the above scripts or 999 if X contains no characters
  1725  ** from any of the above scripts.
  1726  */
  1727  static void scriptCodeSqlFunc(
  1728    sqlite3_context *context,
  1729    int argc,
  1730    sqlite3_value **argv
  1731  ){
  1732    const unsigned char *zIn = sqlite3_value_text(argv[0]);
  1733    int nIn = sqlite3_value_bytes(argv[0]);
  1734    int c, sz;
  1735    int scriptMask = 0;
  1736    int res;
  1737    int seenDigit = 0;
  1738  # define SCRIPT_LATIN       0x0001
  1739  # define SCRIPT_CYRILLIC    0x0002
  1740  # define SCRIPT_GREEK       0x0004
  1741  # define SCRIPT_HEBREW      0x0008
  1742  # define SCRIPT_ARABIC      0x0010
  1743  
  1744    while( nIn>0 ){
  1745      c = utf8Read(zIn, nIn, &sz);
  1746      zIn += sz;
  1747      nIn -= sz;
  1748      if( c<0x02af ){
  1749        if( c>=0x80 || midClass[c&0x7f]<CCLASS_DIGIT ){
  1750          scriptMask |= SCRIPT_LATIN;
  1751        }else if( c>='0' && c<='9' ){
  1752          seenDigit = 1;
  1753        }
  1754      }else if( c>=0x0400 && c<=0x04ff ){
  1755        scriptMask |= SCRIPT_CYRILLIC;
  1756      }else if( c>=0x0386 && c<=0x03ce ){
  1757        scriptMask |= SCRIPT_GREEK;
  1758      }else if( c>=0x0590 && c<=0x05ff ){
  1759        scriptMask |= SCRIPT_HEBREW;
  1760      }else if( c>=0x0600 && c<=0x06ff ){
  1761        scriptMask |= SCRIPT_ARABIC;
  1762      }
  1763    }
  1764    if( scriptMask==0 && seenDigit ) scriptMask = SCRIPT_LATIN;
  1765    switch( scriptMask ){
  1766      case 0:                res = 999; break;
  1767      case SCRIPT_LATIN:     res = 215; break;
  1768      case SCRIPT_CYRILLIC:  res = 220; break;
  1769      case SCRIPT_GREEK:     res = 200; break;
  1770      case SCRIPT_HEBREW:    res = 125; break;
  1771      case SCRIPT_ARABIC:    res = 160; break;
  1772      default:               res = 998; break;
  1773    }
  1774    sqlite3_result_int(context, res);
  1775  }
  1776  
  1777  /* End transliterate
  1778  ******************************************************************************
  1779  ******************************************************************************
  1780  ** Begin spellfix1 virtual table.
  1781  */
  1782  
  1783  /* Maximum length of a phonehash used for querying the shadow table */
  1784  #define SPELLFIX_MX_HASH  32
  1785  
  1786  /* Maximum number of hash strings to examine per query */
  1787  #define SPELLFIX_MX_RUN   1
  1788  
  1789  typedef struct spellfix1_vtab spellfix1_vtab;
  1790  typedef struct spellfix1_cursor spellfix1_cursor;
  1791  
  1792  /* Fuzzy-search virtual table object */
  1793  struct spellfix1_vtab {
  1794    sqlite3_vtab base;         /* Base class - must be first */
  1795    sqlite3 *db;               /* Database connection */
  1796    char *zDbName;             /* Name of database holding this table */
  1797    char *zTableName;          /* Name of the virtual table */
  1798    char *zCostTable;          /* Table holding edit-distance cost numbers */
  1799    EditDist3Config *pConfig3; /* Parsed edit distance costs */
  1800  };
  1801  
  1802  /* Fuzzy-search cursor object */
  1803  struct spellfix1_cursor {
  1804    sqlite3_vtab_cursor base;    /* Base class - must be first */
  1805    spellfix1_vtab *pVTab;       /* The table to which this cursor belongs */
  1806    char *zPattern;              /* rhs of MATCH clause */
  1807    int idxNum;                  /* idxNum value passed to xFilter() */
  1808    int nRow;                    /* Number of rows of content */
  1809    int nAlloc;                  /* Number of allocated rows */
  1810    int iRow;                    /* Current row of content */
  1811    int iLang;                   /* Value of the langid= constraint */
  1812    int iTop;                    /* Value of the top= constraint */
  1813    int iScope;                  /* Value of the scope= constraint */
  1814    int nSearch;                 /* Number of vocabulary items checked */
  1815    sqlite3_stmt *pFullScan;     /* Shadow query for a full table scan */
  1816    struct spellfix1_row {       /* For each row of content */
  1817      sqlite3_int64 iRowid;         /* Rowid for this row */
  1818      char *zWord;                  /* Text for this row */
  1819      int iRank;                    /* Rank for this row */
  1820      int iDistance;                /* Distance from pattern for this row */
  1821      int iScore;                   /* Score for sorting */
  1822      int iMatchlen;                /* Value of matchlen column (or -1) */
  1823      char zHash[SPELLFIX_MX_HASH]; /* the phonehash used for this match */
  1824    } *a; 
  1825  };
  1826  
  1827  /*
  1828  ** Construct one or more SQL statements from the format string given
  1829  ** and then evaluate those statements. The success code is written
  1830  ** into *pRc.
  1831  **
  1832  ** If *pRc is initially non-zero then this routine is a no-op.
  1833  */
  1834  static void spellfix1DbExec(
  1835    int *pRc,              /* Success code */
  1836    sqlite3 *db,           /* Database in which to run SQL */
  1837    const char *zFormat,   /* Format string for SQL */
  1838    ...                    /* Arguments to the format string */
  1839  ){
  1840    va_list ap;
  1841    char *zSql;
  1842    if( *pRc ) return;
  1843    va_start(ap, zFormat);
  1844    zSql = sqlite3_vmprintf(zFormat, ap);
  1845    va_end(ap);
  1846    if( zSql==0 ){
  1847      *pRc = SQLITE_NOMEM;
  1848    }else{
  1849      *pRc = sqlite3_exec(db, zSql, 0, 0, 0);
  1850      sqlite3_free(zSql);
  1851    }
  1852  }
  1853  
  1854  /*
  1855  ** xDisconnect/xDestroy method for the fuzzy-search module.
  1856  */
  1857  static int spellfix1Uninit(int isDestroy, sqlite3_vtab *pVTab){
  1858    spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
  1859    int rc = SQLITE_OK;
  1860    if( isDestroy ){
  1861      sqlite3 *db = p->db;
  1862      spellfix1DbExec(&rc, db, "DROP TABLE IF EXISTS \"%w\".\"%w_vocab\"",
  1863                    p->zDbName, p->zTableName);
  1864    }
  1865    if( rc==SQLITE_OK ){
  1866      sqlite3_free(p->zTableName);
  1867      editDist3ConfigDelete(p->pConfig3);
  1868      sqlite3_free(p->zCostTable);
  1869      sqlite3_free(p);
  1870    }
  1871    return rc;
  1872  }
  1873  static int spellfix1Disconnect(sqlite3_vtab *pVTab){
  1874    return spellfix1Uninit(0, pVTab);
  1875  }
  1876  static int spellfix1Destroy(sqlite3_vtab *pVTab){
  1877    return spellfix1Uninit(1, pVTab);
  1878  }
  1879  
  1880  /*
  1881  ** Make a copy of a string.  Remove leading and trailing whitespace
  1882  ** and dequote it.
  1883  */
  1884  static char *spellfix1Dequote(const char *zIn){
  1885    char *zOut;
  1886    int i, j;
  1887    char c;
  1888    while( isspace((unsigned char)zIn[0]) ) zIn++;
  1889    zOut = sqlite3_mprintf("%s", zIn);
  1890    if( zOut==0 ) return 0;
  1891    i = (int)strlen(zOut);
  1892  #if 0  /* The parser will never leave spaces at the end */
  1893    while( i>0 && isspace(zOut[i-1]) ){ i--; }
  1894  #endif
  1895    zOut[i] = 0;
  1896    c = zOut[0];
  1897    if( c=='\'' || c=='"' ){
  1898      for(i=1, j=0; ALWAYS(zOut[i]); i++){
  1899        zOut[j++] = zOut[i];
  1900        if( zOut[i]==c ){
  1901          if( zOut[i+1]==c ){
  1902            i++;
  1903          }else{
  1904            zOut[j-1] = 0;
  1905            break;
  1906          }
  1907        }
  1908      }
  1909    }
  1910    return zOut;
  1911  }
  1912  
  1913  
  1914  /*
  1915  ** xConnect/xCreate method for the spellfix1 module. Arguments are:
  1916  **
  1917  **   argv[0]   -> module name  ("spellfix1")
  1918  **   argv[1]   -> database name
  1919  **   argv[2]   -> table name
  1920  **   argv[3].. -> optional arguments (i.e. "edit_cost_table" parameter)
  1921  */
  1922  static int spellfix1Init(
  1923    int isCreate,
  1924    sqlite3 *db,
  1925    void *pAux,
  1926    int argc, const char *const*argv,
  1927    sqlite3_vtab **ppVTab,
  1928    char **pzErr
  1929  ){
  1930    spellfix1_vtab *pNew = 0;
  1931    /* const char *zModule = argv[0]; // not used */
  1932    const char *zDbName = argv[1];
  1933    const char *zTableName = argv[2];
  1934    int nDbName;
  1935    int rc = SQLITE_OK;
  1936    int i;
  1937  
  1938    nDbName = (int)strlen(zDbName);
  1939    pNew = sqlite3_malloc64( sizeof(*pNew) + nDbName + 1);
  1940    if( pNew==0 ){
  1941      rc = SQLITE_NOMEM;
  1942    }else{
  1943      memset(pNew, 0, sizeof(*pNew));
  1944      pNew->zDbName = (char*)&pNew[1];
  1945      memcpy(pNew->zDbName, zDbName, nDbName+1);
  1946      pNew->zTableName = sqlite3_mprintf("%s", zTableName);
  1947      pNew->db = db;
  1948      if( pNew->zTableName==0 ){
  1949        rc = SQLITE_NOMEM;
  1950      }else{
  1951        rc = sqlite3_declare_vtab(db, 
  1952             "CREATE TABLE x(word,rank,distance,langid, "
  1953             "score, matchlen, phonehash HIDDEN, "
  1954             "top HIDDEN, scope HIDDEN, srchcnt HIDDEN, "
  1955             "soundslike HIDDEN, command HIDDEN)"
  1956        );
  1957  #define SPELLFIX_COL_WORD            0
  1958  #define SPELLFIX_COL_RANK            1
  1959  #define SPELLFIX_COL_DISTANCE        2
  1960  #define SPELLFIX_COL_LANGID          3
  1961  #define SPELLFIX_COL_SCORE           4
  1962  #define SPELLFIX_COL_MATCHLEN        5
  1963  #define SPELLFIX_COL_PHONEHASH       6
  1964  #define SPELLFIX_COL_TOP             7
  1965  #define SPELLFIX_COL_SCOPE           8
  1966  #define SPELLFIX_COL_SRCHCNT         9
  1967  #define SPELLFIX_COL_SOUNDSLIKE     10
  1968  #define SPELLFIX_COL_COMMAND        11
  1969      }
  1970      if( rc==SQLITE_OK && isCreate ){
  1971        spellfix1DbExec(&rc, db,
  1972           "CREATE TABLE IF NOT EXISTS \"%w\".\"%w_vocab\"(\n"
  1973           "  id INTEGER PRIMARY KEY,\n"
  1974           "  rank INT,\n"
  1975           "  langid INT,\n"
  1976           "  word TEXT,\n"
  1977           "  k1 TEXT,\n"
  1978           "  k2 TEXT\n"
  1979           ");\n",
  1980           zDbName, zTableName
  1981        );
  1982        spellfix1DbExec(&rc, db,
  1983           "CREATE INDEX IF NOT EXISTS \"%w\".\"%w_vocab_index_langid_k2\" "
  1984              "ON \"%w_vocab\"(langid,k2);",
  1985           zDbName, zTableName, zTableName
  1986        );
  1987      }
  1988      for(i=3; rc==SQLITE_OK && i<argc; i++){
  1989        if( strncmp(argv[i],"edit_cost_table=",16)==0 && pNew->zCostTable==0 ){
  1990          pNew->zCostTable = spellfix1Dequote(&argv[i][16]);
  1991          if( pNew->zCostTable==0 ) rc = SQLITE_NOMEM;
  1992          continue;
  1993        }
  1994        *pzErr = sqlite3_mprintf("bad argument to spellfix1(): \"%s\"", argv[i]);
  1995        rc = SQLITE_ERROR; 
  1996      }
  1997    }
  1998  
  1999    if( rc && pNew ){
  2000      *ppVTab = 0;
  2001      spellfix1Uninit(0, &pNew->base);
  2002    }else{
  2003      *ppVTab = (sqlite3_vtab *)pNew;
  2004    }
  2005    return rc;
  2006  }
  2007  
  2008  /*
  2009  ** The xConnect and xCreate methods
  2010  */
  2011  static int spellfix1Connect(
  2012    sqlite3 *db,
  2013    void *pAux,
  2014    int argc, const char *const*argv,
  2015    sqlite3_vtab **ppVTab,
  2016    char **pzErr
  2017  ){
  2018    return spellfix1Init(0, db, pAux, argc, argv, ppVTab, pzErr);
  2019  }
  2020  static int spellfix1Create(
  2021    sqlite3 *db,
  2022    void *pAux,
  2023    int argc, const char *const*argv,
  2024    sqlite3_vtab **ppVTab,
  2025    char **pzErr
  2026  ){
  2027    return spellfix1Init(1, db, pAux, argc, argv, ppVTab, pzErr);
  2028  }
  2029  
  2030  /*
  2031  ** Clear all of the content from a cursor.
  2032  */
  2033  static void spellfix1ResetCursor(spellfix1_cursor *pCur){
  2034    int i;
  2035    for(i=0; i<pCur->nRow; i++){
  2036      sqlite3_free(pCur->a[i].zWord);
  2037    }
  2038    pCur->nRow = 0;
  2039    pCur->iRow = 0;
  2040    pCur->nSearch = 0;
  2041    if( pCur->pFullScan ){
  2042      sqlite3_finalize(pCur->pFullScan);
  2043      pCur->pFullScan = 0;
  2044    }
  2045  }
  2046  
  2047  /*
  2048  ** Resize the cursor to hold up to N rows of content
  2049  */
  2050  static void spellfix1ResizeCursor(spellfix1_cursor *pCur, int N){
  2051    struct spellfix1_row *aNew;
  2052    assert( N>=pCur->nRow );
  2053    aNew = sqlite3_realloc64(pCur->a, sizeof(pCur->a[0])*N);
  2054    if( aNew==0 && N>0 ){
  2055      spellfix1ResetCursor(pCur);
  2056      sqlite3_free(pCur->a);
  2057      pCur->nAlloc = 0;
  2058      pCur->a = 0;
  2059    }else{
  2060      pCur->nAlloc = N;
  2061      pCur->a = aNew;
  2062    }
  2063  }
  2064  
  2065  
  2066  /*
  2067  ** Close a fuzzy-search cursor.
  2068  */
  2069  static int spellfix1Close(sqlite3_vtab_cursor *cur){
  2070    spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
  2071    spellfix1ResetCursor(pCur);
  2072    spellfix1ResizeCursor(pCur, 0);
  2073    sqlite3_free(pCur->zPattern);
  2074    sqlite3_free(pCur);
  2075    return SQLITE_OK;
  2076  }
  2077  
  2078  #define SPELLFIX_IDXNUM_MATCH  0x01         /* word MATCH $str */
  2079  #define SPELLFIX_IDXNUM_LANGID 0x02         /* langid == $langid */
  2080  #define SPELLFIX_IDXNUM_TOP    0x04         /* top = $top */
  2081  #define SPELLFIX_IDXNUM_SCOPE  0x08         /* scope = $scope */
  2082  #define SPELLFIX_IDXNUM_DISTLT 0x10         /* distance < $distance */
  2083  #define SPELLFIX_IDXNUM_DISTLE 0x20         /* distance <= $distance */
  2084  #define SPELLFIX_IDXNUM_ROWID  0x40         /* rowid = $rowid */
  2085  #define SPELLFIX_IDXNUM_DIST   (0x10|0x20)  /* DISTLT and DISTLE */
  2086  
  2087  /*
  2088  **
  2089  ** The plan number is a bitmask of the SPELLFIX_IDXNUM_* values defined
  2090  ** above.
  2091  **
  2092  ** filter.argv[*] values contains $str, $langid, $top, $scope and $rowid
  2093  ** if specified and in that order.
  2094  */
  2095  static int spellfix1BestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
  2096    int iPlan = 0;
  2097    int iLangTerm = -1;
  2098    int iTopTerm = -1;
  2099    int iScopeTerm = -1;
  2100    int iDistTerm = -1;
  2101    int iRowidTerm = -1;
  2102    int i;
  2103    const struct sqlite3_index_constraint *pConstraint;
  2104    pConstraint = pIdxInfo->aConstraint;
  2105    for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){
  2106      if( pConstraint->usable==0 ) continue;
  2107  
  2108      /* Terms of the form:  word MATCH $str */
  2109      if( (iPlan & SPELLFIX_IDXNUM_MATCH)==0 
  2110       && pConstraint->iColumn==SPELLFIX_COL_WORD
  2111       && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH
  2112      ){
  2113        iPlan |= SPELLFIX_IDXNUM_MATCH;
  2114        pIdxInfo->aConstraintUsage[i].argvIndex = 1;
  2115        pIdxInfo->aConstraintUsage[i].omit = 1;
  2116      }
  2117  
  2118      /* Terms of the form:  langid = $langid  */
  2119      if( (iPlan & SPELLFIX_IDXNUM_LANGID)==0
  2120       && pConstraint->iColumn==SPELLFIX_COL_LANGID
  2121       && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
  2122      ){
  2123        iPlan |= SPELLFIX_IDXNUM_LANGID;
  2124        iLangTerm = i;
  2125      }
  2126  
  2127      /* Terms of the form:  top = $top */
  2128      if( (iPlan & SPELLFIX_IDXNUM_TOP)==0
  2129       && pConstraint->iColumn==SPELLFIX_COL_TOP
  2130       && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
  2131      ){
  2132        iPlan |= SPELLFIX_IDXNUM_TOP;
  2133        iTopTerm = i;
  2134      }
  2135  
  2136      /* Terms of the form:  scope = $scope */
  2137      if( (iPlan & SPELLFIX_IDXNUM_SCOPE)==0
  2138       && pConstraint->iColumn==SPELLFIX_COL_SCOPE
  2139       && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
  2140      ){
  2141        iPlan |= SPELLFIX_IDXNUM_SCOPE;
  2142        iScopeTerm = i;
  2143      }
  2144  
  2145      /* Terms of the form:  distance < $dist or distance <= $dist */
  2146      if( (iPlan & SPELLFIX_IDXNUM_DIST)==0
  2147       && pConstraint->iColumn==SPELLFIX_COL_DISTANCE
  2148       && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT
  2149            || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE)
  2150      ){
  2151        if( pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ){
  2152          iPlan |= SPELLFIX_IDXNUM_DISTLT;
  2153        }else{
  2154          iPlan |= SPELLFIX_IDXNUM_DISTLE;
  2155        }
  2156        iDistTerm = i;
  2157      }
  2158  
  2159      /* Terms of the form:  distance < $dist or distance <= $dist */
  2160      if( (iPlan & SPELLFIX_IDXNUM_ROWID)==0
  2161       && pConstraint->iColumn<0
  2162       && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
  2163      ){
  2164        iPlan |= SPELLFIX_IDXNUM_ROWID;
  2165        iRowidTerm = i;
  2166      }
  2167    }
  2168    if( iPlan&SPELLFIX_IDXNUM_MATCH ){
  2169      int idx = 2;
  2170      pIdxInfo->idxNum = iPlan;
  2171      if( pIdxInfo->nOrderBy==1
  2172       && pIdxInfo->aOrderBy[0].iColumn==SPELLFIX_COL_SCORE
  2173       && pIdxInfo->aOrderBy[0].desc==0
  2174      ){
  2175        pIdxInfo->orderByConsumed = 1;  /* Default order by iScore */
  2176      }
  2177      if( iPlan&SPELLFIX_IDXNUM_LANGID ){
  2178        pIdxInfo->aConstraintUsage[iLangTerm].argvIndex = idx++;
  2179        pIdxInfo->aConstraintUsage[iLangTerm].omit = 1;
  2180      }
  2181      if( iPlan&SPELLFIX_IDXNUM_TOP ){
  2182        pIdxInfo->aConstraintUsage[iTopTerm].argvIndex = idx++;
  2183        pIdxInfo->aConstraintUsage[iTopTerm].omit = 1;
  2184      }
  2185      if( iPlan&SPELLFIX_IDXNUM_SCOPE ){
  2186        pIdxInfo->aConstraintUsage[iScopeTerm].argvIndex = idx++;
  2187        pIdxInfo->aConstraintUsage[iScopeTerm].omit = 1;
  2188      }
  2189      if( iPlan&SPELLFIX_IDXNUM_DIST ){
  2190        pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = idx++;
  2191        pIdxInfo->aConstraintUsage[iDistTerm].omit = 1;
  2192      }
  2193      pIdxInfo->estimatedCost = 1e5;
  2194    }else if( (iPlan & SPELLFIX_IDXNUM_ROWID) ){
  2195      pIdxInfo->idxNum = SPELLFIX_IDXNUM_ROWID;
  2196      pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1;
  2197      pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1;
  2198      pIdxInfo->estimatedCost = 5;
  2199    }else{
  2200      pIdxInfo->idxNum = 0;
  2201      pIdxInfo->estimatedCost = 1e50;
  2202    }
  2203    return SQLITE_OK;
  2204  }
  2205  
  2206  /*
  2207  ** Open a new fuzzy-search cursor.
  2208  */
  2209  static int spellfix1Open(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
  2210    spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
  2211    spellfix1_cursor *pCur;
  2212    pCur = sqlite3_malloc64( sizeof(*pCur) );
  2213    if( pCur==0 ) return SQLITE_NOMEM;
  2214    memset(pCur, 0, sizeof(*pCur));
  2215    pCur->pVTab = p;
  2216    *ppCursor = &pCur->base;
  2217    return SQLITE_OK;
  2218  }
  2219  
  2220  /*
  2221  ** Adjust a distance measurement by the words rank in order to show
  2222  ** preference to common words.
  2223  */
  2224  static int spellfix1Score(int iDistance, int iRank){
  2225    int iLog2;
  2226    for(iLog2=0; iRank>0; iLog2++, iRank>>=1){}
  2227    return iDistance + 32 - iLog2;
  2228  }
  2229  
  2230  /*
  2231  ** Compare two spellfix1_row objects for sorting purposes in qsort() such
  2232  ** that they sort in order of increasing distance.
  2233  */
  2234  static int SQLITE_CDECL spellfix1RowCompare(const void *A, const void *B){
  2235    const struct spellfix1_row *a = (const struct spellfix1_row*)A;
  2236    const struct spellfix1_row *b = (const struct spellfix1_row*)B;
  2237    return a->iScore - b->iScore;
  2238  }
  2239  
  2240  /*
  2241  ** A structure used to pass information from spellfix1FilterForMatch()
  2242  ** into spellfix1RunQuery().
  2243  */
  2244  typedef struct MatchQuery {
  2245    spellfix1_cursor *pCur;          /* The cursor being queried */
  2246    sqlite3_stmt *pStmt;             /* shadow table query statment */
  2247    char zHash[SPELLFIX_MX_HASH];    /* The current phonehash for zPattern */
  2248    const char *zPattern;            /* Transliterated input string */
  2249    int nPattern;                    /* Length of zPattern */
  2250    EditDist3FromString *pMatchStr3; /* Original unicode string */
  2251    EditDist3Config *pConfig3;       /* Edit-distance cost coefficients */
  2252    const EditDist3Lang *pLang;      /* The selected language coefficients */
  2253    int iLang;                       /* The language id */
  2254    int iScope;                      /* Default scope */
  2255    int iMaxDist;                    /* Maximum allowed edit distance, or -1 */
  2256    int rc;                          /* Error code */
  2257    int nRun;                  /* Number of prior runs for the same zPattern */
  2258    char azPrior[SPELLFIX_MX_RUN][SPELLFIX_MX_HASH];  /* Prior hashes */
  2259  } MatchQuery;
  2260  
  2261  /*
  2262  ** Run a query looking for the best matches against zPattern using
  2263  ** zHash as the character class seed hash.
  2264  */
  2265  static void spellfix1RunQuery(MatchQuery *p, const char *zQuery, int nQuery){
  2266    const char *zK1;
  2267    const char *zWord;
  2268    int iDist;
  2269    int iRank;
  2270    int iScore;
  2271    int iWorst = 0;
  2272    int idx;
  2273    int idxWorst = -1;
  2274    int i;
  2275    int iScope = p->iScope;
  2276    spellfix1_cursor *pCur = p->pCur;
  2277    sqlite3_stmt *pStmt = p->pStmt;
  2278    char zHash1[SPELLFIX_MX_HASH];
  2279    char zHash2[SPELLFIX_MX_HASH];
  2280    char *zClass;
  2281    int nClass;
  2282    int rc;
  2283  
  2284    if( pCur->a==0 || p->rc ) return;   /* Prior memory allocation failure */
  2285    zClass = (char*)phoneticHash((unsigned char*)zQuery, nQuery);
  2286    if( zClass==0 ){
  2287      p->rc = SQLITE_NOMEM;
  2288      return;
  2289    }
  2290    nClass = (int)strlen(zClass);
  2291    if( nClass>SPELLFIX_MX_HASH-2 ){
  2292      nClass = SPELLFIX_MX_HASH-2;
  2293      zClass[nClass] = 0;
  2294    }
  2295    if( nClass<=iScope ){
  2296      if( nClass>2 ){
  2297        iScope = nClass-1;
  2298      }else{
  2299        iScope = nClass;
  2300      }
  2301    }
  2302    memcpy(zHash1, zClass, iScope);
  2303    sqlite3_free(zClass);
  2304    zHash1[iScope] = 0;
  2305    memcpy(zHash2, zHash1, iScope);
  2306    zHash2[iScope] = 'Z';
  2307    zHash2[iScope+1] = 0;
  2308  #if SPELLFIX_MX_RUN>1
  2309    for(i=0; i<p->nRun; i++){
  2310      if( strcmp(p->azPrior[i], zHash1)==0 ) return;
  2311    }
  2312  #endif
  2313    assert( p->nRun<SPELLFIX_MX_RUN );
  2314    memcpy(p->azPrior[p->nRun++], zHash1, iScope+1);
  2315    if( sqlite3_bind_text(pStmt, 1, zHash1, -1, SQLITE_STATIC)==SQLITE_NOMEM
  2316     || sqlite3_bind_text(pStmt, 2, zHash2, -1, SQLITE_STATIC)==SQLITE_NOMEM
  2317    ){
  2318      p->rc = SQLITE_NOMEM;
  2319      return;
  2320    }
  2321  #if SPELLFIX_MX_RUN>1
  2322    for(i=0; i<pCur->nRow; i++){
  2323      if( pCur->a[i].iScore>iWorst ){
  2324        iWorst = pCur->a[i].iScore;
  2325        idxWorst = i;
  2326      }
  2327    }
  2328  #endif
  2329    while( sqlite3_step(pStmt)==SQLITE_ROW ){
  2330      int iMatchlen = -1;
  2331      iRank = sqlite3_column_int(pStmt, 2);
  2332      if( p->pMatchStr3 ){
  2333        int nWord = sqlite3_column_bytes(pStmt, 1);
  2334        zWord = (const char*)sqlite3_column_text(pStmt, 1);
  2335        iDist = editDist3Core(p->pMatchStr3, zWord, nWord, p->pLang, &iMatchlen);
  2336      }else{
  2337        zK1 = (const char*)sqlite3_column_text(pStmt, 3);
  2338        if( zK1==0 ) continue;
  2339        iDist = editdist1(p->zPattern, zK1, 0);
  2340      }
  2341      if( iDist<0 ){
  2342        p->rc = SQLITE_NOMEM;
  2343        break;
  2344      }
  2345      pCur->nSearch++;
  2346      
  2347      /* If there is a "distance < $dist" or "distance <= $dist" constraint,
  2348      ** check if this row meets it. If not, jump back up to the top of the
  2349      ** loop to process the next row. Otherwise, if the row does match the
  2350      ** distance constraint, check if the pCur->a[] array is already full.
  2351      ** If it is and no explicit "top = ?" constraint was present in the
  2352      ** query, grow the array to ensure there is room for the new entry. */
  2353      assert( (p->iMaxDist>=0)==((pCur->idxNum & SPELLFIX_IDXNUM_DIST) ? 1 : 0) );
  2354      if( p->iMaxDist>=0 ){
  2355        if( iDist>p->iMaxDist ) continue;
  2356        if( pCur->nRow>=pCur->nAlloc && (pCur->idxNum & SPELLFIX_IDXNUM_TOP)==0 ){
  2357          spellfix1ResizeCursor(pCur, pCur->nAlloc*2 + 10);
  2358          if( pCur->a==0 ) break;
  2359        }
  2360      }
  2361  
  2362      iScore = spellfix1Score(iDist,iRank);
  2363      if( pCur->nRow<pCur->nAlloc ){
  2364        idx = pCur->nRow;
  2365      }else if( iScore<iWorst ){
  2366        idx = idxWorst;
  2367        sqlite3_free(pCur->a[idx].zWord);
  2368      }else{
  2369        continue;
  2370      }
  2371  
  2372      pCur->a[idx].zWord = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 1));
  2373      if( pCur->a[idx].zWord==0 ){
  2374        p->rc = SQLITE_NOMEM;
  2375        break;
  2376      }
  2377      pCur->a[idx].iRowid = sqlite3_column_int64(pStmt, 0);
  2378      pCur->a[idx].iRank = iRank;
  2379      pCur->a[idx].iDistance = iDist;
  2380      pCur->a[idx].iScore = iScore;
  2381      pCur->a[idx].iMatchlen = iMatchlen;
  2382      memcpy(pCur->a[idx].zHash, zHash1, iScope+1);
  2383      if( pCur->nRow<pCur->nAlloc ) pCur->nRow++;
  2384      if( pCur->nRow==pCur->nAlloc ){
  2385        iWorst = pCur->a[0].iScore;
  2386        idxWorst = 0;
  2387        for(i=1; i<pCur->nRow; i++){
  2388          iScore = pCur->a[i].iScore;
  2389          if( iWorst<iScore ){
  2390            iWorst = iScore;
  2391            idxWorst = i;
  2392          }
  2393        }
  2394      }
  2395    }
  2396    rc = sqlite3_reset(pStmt);
  2397    if( rc ) p->rc = rc;
  2398  }
  2399  
  2400  /*
  2401  ** This version of the xFilter method work if the MATCH term is present
  2402  ** and we are doing a scan.
  2403  */
  2404  static int spellfix1FilterForMatch(
  2405    spellfix1_cursor *pCur,
  2406    int argc,
  2407    sqlite3_value **argv
  2408  ){
  2409    int idxNum = pCur->idxNum;
  2410    const unsigned char *zMatchThis;   /* RHS of the MATCH operator */
  2411    EditDist3FromString *pMatchStr3 = 0; /* zMatchThis as an editdist string */
  2412    char *zPattern;                    /* Transliteration of zMatchThis */
  2413    int nPattern;                      /* Length of zPattern */
  2414    int iLimit = 20;                   /* Max number of rows of output */
  2415    int iScope = 3;                    /* Use this many characters of zClass */
  2416    int iLang = 0;                     /* Language code */
  2417    char *zSql;                        /* SQL of shadow table query */
  2418    sqlite3_stmt *pStmt = 0;           /* Shadow table query */
  2419    int rc;                            /* Result code */
  2420    int idx = 1;                       /* Next available filter parameter */
  2421    spellfix1_vtab *p = pCur->pVTab;   /* The virtual table that owns pCur */
  2422    MatchQuery x;                      /* For passing info to RunQuery() */
  2423  
  2424    /* Load the cost table if we have not already done so */
  2425    if( p->zCostTable!=0 && p->pConfig3==0 ){
  2426      p->pConfig3 = sqlite3_malloc64( sizeof(p->pConfig3[0]) );
  2427      if( p->pConfig3==0 ) return SQLITE_NOMEM;
  2428      memset(p->pConfig3, 0, sizeof(p->pConfig3[0]));
  2429      rc = editDist3ConfigLoad(p->pConfig3, p->db, p->zCostTable);
  2430      if( rc ) return rc;
  2431    }
  2432    memset(&x, 0, sizeof(x));
  2433    x.iScope = 3;  /* Default scope if none specified by "WHERE scope=N" */
  2434    x.iMaxDist = -1;   /* Maximum allowed edit distance */
  2435  
  2436    if( idxNum&2 ){
  2437      iLang = sqlite3_value_int(argv[idx++]);
  2438    }
  2439    if( idxNum&4 ){
  2440      iLimit = sqlite3_value_int(argv[idx++]);
  2441      if( iLimit<1 ) iLimit = 1;
  2442    }
  2443    if( idxNum&8 ){
  2444      x.iScope = sqlite3_value_int(argv[idx++]);
  2445      if( x.iScope<1 ) x.iScope = 1;
  2446      if( x.iScope>SPELLFIX_MX_HASH-2 ) x.iScope = SPELLFIX_MX_HASH-2;
  2447    }
  2448    if( idxNum&(16|32) ){
  2449      x.iMaxDist = sqlite3_value_int(argv[idx++]);
  2450      if( idxNum&16 ) x.iMaxDist--;
  2451      if( x.iMaxDist<0 ) x.iMaxDist = 0;
  2452    }
  2453    spellfix1ResetCursor(pCur);
  2454    spellfix1ResizeCursor(pCur, iLimit);
  2455    zMatchThis = sqlite3_value_text(argv[0]);
  2456    if( zMatchThis==0 ) return SQLITE_OK;
  2457    if( p->pConfig3 ){
  2458      x.pLang = editDist3FindLang(p->pConfig3, iLang);
  2459      pMatchStr3 = editDist3FromStringNew(x.pLang, (const char*)zMatchThis, -1);
  2460      if( pMatchStr3==0 ){
  2461        x.rc = SQLITE_NOMEM;
  2462        goto filter_exit;
  2463      }
  2464    }else{
  2465      x.pLang = 0;
  2466    }
  2467    zPattern = (char*)transliterate(zMatchThis, sqlite3_value_bytes(argv[0]));
  2468    sqlite3_free(pCur->zPattern);
  2469    pCur->zPattern = zPattern;
  2470    if( zPattern==0 ){
  2471      x.rc = SQLITE_NOMEM;
  2472      goto filter_exit;
  2473    }
  2474    nPattern = (int)strlen(zPattern);
  2475    if( zPattern[nPattern-1]=='*' ) nPattern--;
  2476    zSql = sqlite3_mprintf(
  2477       "SELECT id, word, rank, k1"
  2478       "  FROM \"%w\".\"%w_vocab\""
  2479       " WHERE langid=%d AND k2>=?1 AND k2<?2",
  2480       p->zDbName, p->zTableName, iLang
  2481    );
  2482    if( zSql==0 ){
  2483      x.rc = SQLITE_NOMEM;
  2484      pStmt = 0;
  2485      goto filter_exit;
  2486    }
  2487    rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
  2488    sqlite3_free(zSql);
  2489    pCur->iLang = iLang;
  2490    x.pCur = pCur;
  2491    x.pStmt = pStmt;
  2492    x.zPattern = zPattern;
  2493    x.nPattern = nPattern;
  2494    x.pMatchStr3 = pMatchStr3;
  2495    x.iLang = iLang;
  2496    x.rc = rc;
  2497    x.pConfig3 = p->pConfig3;
  2498    if( x.rc==SQLITE_OK ){
  2499      spellfix1RunQuery(&x, zPattern, nPattern);
  2500    }
  2501  
  2502    if( pCur->a ){
  2503      qsort(pCur->a, pCur->nRow, sizeof(pCur->a[0]), spellfix1RowCompare);
  2504      pCur->iTop = iLimit;
  2505      pCur->iScope = iScope;
  2506    }else{
  2507      x.rc = SQLITE_NOMEM;
  2508    }
  2509  
  2510  filter_exit:
  2511    sqlite3_finalize(pStmt);
  2512    editDist3FromStringDelete(pMatchStr3);
  2513    return x.rc;
  2514  }
  2515  
  2516  /*
  2517  ** This version of xFilter handles a full-table scan case
  2518  */
  2519  static int spellfix1FilterForFullScan(
  2520    spellfix1_cursor *pCur,
  2521    int argc,
  2522    sqlite3_value **argv
  2523  ){
  2524    int rc = SQLITE_OK;
  2525    int idxNum = pCur->idxNum;
  2526    char *zSql;
  2527    spellfix1_vtab *pVTab = pCur->pVTab;
  2528    spellfix1ResetCursor(pCur);
  2529    assert( idxNum==0 || idxNum==64 );
  2530    zSql = sqlite3_mprintf(
  2531       "SELECT word, rank, NULL, langid, id FROM \"%w\".\"%w_vocab\"%s",
  2532       pVTab->zDbName, pVTab->zTableName,
  2533       ((idxNum & 64) ? " WHERE rowid=?" : "")
  2534    );
  2535    if( zSql==0 ) return SQLITE_NOMEM;
  2536    rc = sqlite3_prepare_v2(pVTab->db, zSql, -1, &pCur->pFullScan, 0);
  2537    sqlite3_free(zSql);
  2538    if( rc==SQLITE_OK && (idxNum & 64) ){
  2539      assert( argc==1 );
  2540      rc = sqlite3_bind_value(pCur->pFullScan, 1, argv[0]);
  2541    }
  2542    pCur->nRow = pCur->iRow = 0;
  2543    if( rc==SQLITE_OK ){
  2544      rc = sqlite3_step(pCur->pFullScan);
  2545      if( rc==SQLITE_ROW ){ pCur->iRow = -1; rc = SQLITE_OK; }
  2546      if( rc==SQLITE_DONE ){ rc = SQLITE_OK; }
  2547    }else{
  2548      pCur->iRow = 0;
  2549    }
  2550    return rc;
  2551  }
  2552  
  2553  
  2554  /*
  2555  ** Called to "rewind" a cursor back to the beginning so that
  2556  ** it starts its output over again.  Always called at least once
  2557  ** prior to any spellfix1Column, spellfix1Rowid, or spellfix1Eof call.
  2558  */
  2559  static int spellfix1Filter(
  2560    sqlite3_vtab_cursor *cur, 
  2561    int idxNum, const char *idxStr,
  2562    int argc, sqlite3_value **argv
  2563  ){
  2564    spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
  2565    int rc;
  2566    pCur->idxNum = idxNum;
  2567    if( idxNum & 1 ){
  2568      rc = spellfix1FilterForMatch(pCur, argc, argv);
  2569    }else{
  2570      rc = spellfix1FilterForFullScan(pCur, argc, argv);
  2571    }
  2572    return rc;
  2573  }
  2574  
  2575  
  2576  /*
  2577  ** Advance a cursor to its next row of output
  2578  */
  2579  static int spellfix1Next(sqlite3_vtab_cursor *cur){
  2580    spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
  2581    int rc = SQLITE_OK;
  2582    if( pCur->iRow < pCur->nRow ){
  2583      if( pCur->pFullScan ){
  2584        rc = sqlite3_step(pCur->pFullScan);
  2585        if( rc!=SQLITE_ROW ) pCur->iRow = pCur->nRow;
  2586        if( rc==SQLITE_ROW || rc==SQLITE_DONE ) rc = SQLITE_OK;
  2587      }else{
  2588        pCur->iRow++;
  2589      }
  2590    }
  2591    return rc;
  2592  }
  2593  
  2594  /*
  2595  ** Return TRUE if we are at the end-of-file
  2596  */
  2597  static int spellfix1Eof(sqlite3_vtab_cursor *cur){
  2598    spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
  2599    return pCur->iRow>=pCur->nRow;
  2600  }
  2601  
  2602  /*
  2603  ** Return columns from the current row.
  2604  */
  2605  static int spellfix1Column(
  2606    sqlite3_vtab_cursor *cur,
  2607    sqlite3_context *ctx,
  2608    int i
  2609  ){
  2610    spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
  2611    if( pCur->pFullScan ){
  2612      if( i<=SPELLFIX_COL_LANGID ){
  2613        sqlite3_result_value(ctx, sqlite3_column_value(pCur->pFullScan, i));
  2614      }else{
  2615        sqlite3_result_null(ctx);
  2616      }
  2617      return SQLITE_OK;
  2618    }
  2619    switch( i ){
  2620      case SPELLFIX_COL_WORD: {
  2621        sqlite3_result_text(ctx, pCur->a[pCur->iRow].zWord, -1, SQLITE_STATIC);
  2622        break;
  2623      }
  2624      case SPELLFIX_COL_RANK: {
  2625        sqlite3_result_int(ctx, pCur->a[pCur->iRow].iRank);
  2626        break;
  2627      }
  2628      case SPELLFIX_COL_DISTANCE: {
  2629        sqlite3_result_int(ctx, pCur->a[pCur->iRow].iDistance);
  2630        break;
  2631      }
  2632      case SPELLFIX_COL_LANGID: {
  2633        sqlite3_result_int(ctx, pCur->iLang);
  2634        break;
  2635      }
  2636      case SPELLFIX_COL_SCORE: {
  2637        sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore);
  2638        break;
  2639      }
  2640      case SPELLFIX_COL_MATCHLEN: {
  2641        int iMatchlen = pCur->a[pCur->iRow].iMatchlen;
  2642        if( iMatchlen<0 ){
  2643          int nPattern = (int)strlen(pCur->zPattern);
  2644          char *zWord = pCur->a[pCur->iRow].zWord;
  2645          int nWord = (int)strlen(zWord);
  2646  
  2647          if( nPattern>0 && pCur->zPattern[nPattern-1]=='*' ){
  2648            char *zTranslit;
  2649            int res;
  2650            zTranslit = (char *)transliterate((unsigned char *)zWord, nWord);
  2651            if( !zTranslit ) return SQLITE_NOMEM;
  2652            res = editdist1(pCur->zPattern, zTranslit, &iMatchlen);
  2653            sqlite3_free(zTranslit);
  2654            if( res<0 ) return SQLITE_NOMEM;
  2655            iMatchlen = translen_to_charlen(zWord, nWord, iMatchlen);
  2656          }else{
  2657            iMatchlen = utf8Charlen(zWord, nWord);
  2658          }
  2659        }
  2660  
  2661        sqlite3_result_int(ctx, iMatchlen);
  2662        break;
  2663      }
  2664      case SPELLFIX_COL_PHONEHASH: {
  2665        sqlite3_result_text(ctx, pCur->a[pCur->iRow].zHash, -1, SQLITE_STATIC);
  2666        break;
  2667      }
  2668      case SPELLFIX_COL_TOP: {
  2669        sqlite3_result_int(ctx, pCur->iTop);
  2670        break;
  2671      }
  2672      case SPELLFIX_COL_SCOPE: {
  2673        sqlite3_result_int(ctx, pCur->iScope);
  2674        break;
  2675      }
  2676      case SPELLFIX_COL_SRCHCNT: {
  2677        sqlite3_result_int(ctx, pCur->nSearch);
  2678        break;
  2679      }
  2680      default: {
  2681        sqlite3_result_null(ctx);
  2682        break;
  2683      }
  2684    }
  2685    return SQLITE_OK;
  2686  }
  2687  
  2688  /*
  2689  ** The rowid.
  2690  */
  2691  static int spellfix1Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
  2692    spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
  2693    if( pCur->pFullScan ){
  2694      *pRowid = sqlite3_column_int64(pCur->pFullScan, 4);
  2695    }else{
  2696      *pRowid = pCur->a[pCur->iRow].iRowid;
  2697    }
  2698    return SQLITE_OK;
  2699  }
  2700  
  2701  /*
  2702  ** This function is called by the xUpdate() method. It returns a string
  2703  ** containing the conflict mode that xUpdate() should use for the current
  2704  ** operation. One of: "ROLLBACK", "IGNORE", "ABORT" or "REPLACE".
  2705  */
  2706  static const char *spellfix1GetConflict(sqlite3 *db){
  2707    static const char *azConflict[] = {
  2708      /* Note: Instead of "FAIL" - "ABORT". */
  2709      "ROLLBACK", "IGNORE", "ABORT", "ABORT", "REPLACE"
  2710    };
  2711    int eConflict = sqlite3_vtab_on_conflict(db);
  2712  
  2713    assert( eConflict==SQLITE_ROLLBACK || eConflict==SQLITE_IGNORE
  2714         || eConflict==SQLITE_FAIL || eConflict==SQLITE_ABORT
  2715         || eConflict==SQLITE_REPLACE
  2716    );
  2717    assert( SQLITE_ROLLBACK==1 );
  2718    assert( SQLITE_IGNORE==2 );
  2719    assert( SQLITE_FAIL==3 );
  2720    assert( SQLITE_ABORT==4 );
  2721    assert( SQLITE_REPLACE==5 );
  2722  
  2723    return azConflict[eConflict-1];
  2724  }
  2725  
  2726  /*
  2727  ** The xUpdate() method.
  2728  */
  2729  static int spellfix1Update(
  2730    sqlite3_vtab *pVTab,
  2731    int argc,
  2732    sqlite3_value **argv,
  2733    sqlite_int64 *pRowid
  2734  ){
  2735    int rc = SQLITE_OK;
  2736    sqlite3_int64 rowid, newRowid;
  2737    spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
  2738    sqlite3 *db = p->db;
  2739  
  2740    if( argc==1 ){
  2741      /* A delete operation on the rowid given by argv[0] */
  2742      rowid = *pRowid = sqlite3_value_int64(argv[0]);
  2743      spellfix1DbExec(&rc, db, "DELETE FROM \"%w\".\"%w_vocab\" "
  2744                             " WHERE id=%lld",
  2745                    p->zDbName, p->zTableName, rowid);
  2746    }else{
  2747      const unsigned char *zWord = sqlite3_value_text(argv[SPELLFIX_COL_WORD+2]);
  2748      int nWord = sqlite3_value_bytes(argv[SPELLFIX_COL_WORD+2]);
  2749      int iLang = sqlite3_value_int(argv[SPELLFIX_COL_LANGID+2]);
  2750      int iRank = sqlite3_value_int(argv[SPELLFIX_COL_RANK+2]);
  2751      const unsigned char *zSoundslike =
  2752             sqlite3_value_text(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
  2753      int nSoundslike = sqlite3_value_bytes(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
  2754      char *zK1, *zK2;
  2755      int i;
  2756      char c;
  2757      const char *zConflict = spellfix1GetConflict(db);
  2758  
  2759      if( zWord==0 ){
  2760        /* Inserts of the form:  INSERT INTO table(command) VALUES('xyzzy');
  2761        ** cause zWord to be NULL, so we look at the "command" column to see
  2762        ** what special actions to take */
  2763        const char *zCmd = 
  2764           (const char*)sqlite3_value_text(argv[SPELLFIX_COL_COMMAND+2]);
  2765        if( zCmd==0 ){
  2766          pVTab->zErrMsg = sqlite3_mprintf("NOT NULL constraint failed: %s.word",
  2767                                           p->zTableName);
  2768          return SQLITE_CONSTRAINT_NOTNULL;
  2769        }
  2770        if( strcmp(zCmd,"reset")==0 ){
  2771          /* Reset the  edit cost table (if there is one). */
  2772          editDist3ConfigDelete(p->pConfig3);
  2773          p->pConfig3 = 0;
  2774          return SQLITE_OK;
  2775        }
  2776        if( strncmp(zCmd,"edit_cost_table=",16)==0 ){
  2777          editDist3ConfigDelete(p->pConfig3);
  2778          p->pConfig3 = 0;
  2779          sqlite3_free(p->zCostTable);
  2780          p->zCostTable = spellfix1Dequote(zCmd+16);
  2781          if( p->zCostTable==0 ) return SQLITE_NOMEM;
  2782          if( p->zCostTable[0]==0 || sqlite3_stricmp(p->zCostTable,"null")==0 ){
  2783            sqlite3_free(p->zCostTable);
  2784            p->zCostTable = 0;
  2785          }
  2786          return SQLITE_OK;
  2787        }
  2788        pVTab->zErrMsg = sqlite3_mprintf("unknown value for %s.command: \"%w\"",
  2789                                         p->zTableName, zCmd);
  2790        return SQLITE_ERROR;
  2791      }
  2792      if( iRank<1 ) iRank = 1;
  2793      if( zSoundslike ){
  2794        zK1 = (char*)transliterate(zSoundslike, nSoundslike);
  2795      }else{
  2796        zK1 = (char*)transliterate(zWord, nWord);
  2797      }
  2798      if( zK1==0 ) return SQLITE_NOMEM;
  2799      for(i=0; (c = zK1[i])!=0; i++){
  2800         if( c>='A' && c<='Z' ) zK1[i] += 'a' - 'A';
  2801      }
  2802      zK2 = (char*)phoneticHash((const unsigned char*)zK1, i);
  2803      if( zK2==0 ){
  2804        sqlite3_free(zK1);
  2805        return SQLITE_NOMEM;
  2806      }
  2807      if( sqlite3_value_type(argv[0])==SQLITE_NULL ){
  2808        if( sqlite3_value_type(argv[1])==SQLITE_NULL ){
  2809          spellfix1DbExec(&rc, db,
  2810                 "INSERT INTO \"%w\".\"%w_vocab\"(rank,langid,word,k1,k2) "
  2811                 "VALUES(%d,%d,%Q,%Q,%Q)",
  2812                 p->zDbName, p->zTableName,
  2813                 iRank, iLang, zWord, zK1, zK2
  2814          );
  2815        }else{
  2816          newRowid = sqlite3_value_int64(argv[1]);
  2817          spellfix1DbExec(&rc, db,
  2818              "INSERT OR %s INTO \"%w\".\"%w_vocab\"(id,rank,langid,word,k1,k2) "
  2819              "VALUES(%lld,%d,%d,%Q,%Q,%Q)",
  2820              zConflict, p->zDbName, p->zTableName,
  2821              newRowid, iRank, iLang, zWord, zK1, zK2
  2822          );
  2823        }
  2824        *pRowid = sqlite3_last_insert_rowid(db);
  2825      }else{
  2826        rowid = sqlite3_value_int64(argv[0]);
  2827        newRowid = *pRowid = sqlite3_value_int64(argv[1]);
  2828        spellfix1DbExec(&rc, db,
  2829               "UPDATE OR %s \"%w\".\"%w_vocab\" SET id=%lld, rank=%d, langid=%d,"
  2830               " word=%Q, k1=%Q, k2=%Q WHERE id=%lld",
  2831               zConflict, p->zDbName, p->zTableName, newRowid, iRank, iLang,
  2832               zWord, zK1, zK2, rowid
  2833        );
  2834      }
  2835      sqlite3_free(zK1);
  2836      sqlite3_free(zK2);
  2837    }
  2838    return rc;
  2839  }
  2840  
  2841  /*
  2842  ** Rename the spellfix1 table.
  2843  */
  2844  static int spellfix1Rename(sqlite3_vtab *pVTab, const char *zNew){
  2845    spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
  2846    sqlite3 *db = p->db;
  2847    int rc = SQLITE_OK;
  2848    char *zNewName = sqlite3_mprintf("%s", zNew);
  2849    if( zNewName==0 ){
  2850      return SQLITE_NOMEM;
  2851    }
  2852    spellfix1DbExec(&rc, db, 
  2853       "ALTER TABLE \"%w\".\"%w_vocab\" RENAME TO \"%w_vocab\"",
  2854       p->zDbName, p->zTableName, zNewName
  2855    );
  2856    if( rc==SQLITE_OK ){
  2857      sqlite3_free(p->zTableName);
  2858      p->zTableName = zNewName;
  2859    }else{
  2860      sqlite3_free(zNewName);
  2861    }
  2862    return rc;
  2863  }
  2864  
  2865  
  2866  /*
  2867  ** A virtual table module that provides fuzzy search.
  2868  */
  2869  static sqlite3_module spellfix1Module = {
  2870    0,                       /* iVersion */
  2871    spellfix1Create,         /* xCreate - handle CREATE VIRTUAL TABLE */
  2872    spellfix1Connect,        /* xConnect - reconnected to an existing table */
  2873    spellfix1BestIndex,      /* xBestIndex - figure out how to do a query */
  2874    spellfix1Disconnect,     /* xDisconnect - close a connection */
  2875    spellfix1Destroy,        /* xDestroy - handle DROP TABLE */
  2876    spellfix1Open,           /* xOpen - open a cursor */
  2877    spellfix1Close,          /* xClose - close a cursor */
  2878    spellfix1Filter,         /* xFilter - configure scan constraints */
  2879    spellfix1Next,           /* xNext - advance a cursor */
  2880    spellfix1Eof,            /* xEof - check for end of scan */
  2881    spellfix1Column,         /* xColumn - read data */
  2882    spellfix1Rowid,          /* xRowid - read data */
  2883    spellfix1Update,         /* xUpdate */
  2884    0,                       /* xBegin */
  2885    0,                       /* xSync */
  2886    0,                       /* xCommit */
  2887    0,                       /* xRollback */
  2888    0,                       /* xFindMethod */
  2889    spellfix1Rename,         /* xRename */
  2890  };
  2891  
  2892  /*
  2893  ** Register the various functions and the virtual table.
  2894  */
  2895  static int spellfix1Register(sqlite3 *db){
  2896    int rc = SQLITE_OK;
  2897    int i;
  2898    rc = sqlite3_create_function(db, "spellfix1_translit", 1, SQLITE_UTF8, 0,
  2899                                    transliterateSqlFunc, 0, 0);
  2900    if( rc==SQLITE_OK ){
  2901      rc = sqlite3_create_function(db, "spellfix1_editdist", 2, SQLITE_UTF8, 0,
  2902                                    editdistSqlFunc, 0, 0);
  2903    }
  2904    if( rc==SQLITE_OK ){
  2905      rc = sqlite3_create_function(db, "spellfix1_phonehash", 1, SQLITE_UTF8, 0,
  2906                                    phoneticHashSqlFunc, 0, 0);
  2907    }
  2908    if( rc==SQLITE_OK ){
  2909      rc = sqlite3_create_function(db, "spellfix1_scriptcode", 1, SQLITE_UTF8, 0,
  2910                                    scriptCodeSqlFunc, 0, 0);
  2911    }
  2912    if( rc==SQLITE_OK ){
  2913      rc = sqlite3_create_module(db, "spellfix1", &spellfix1Module, 0);
  2914    }
  2915    if( rc==SQLITE_OK ){
  2916      rc = editDist3Install(db);
  2917    }
  2918  
  2919    /* Verify sanity of the translit[] table */
  2920    for(i=0; i<sizeof(translit)/sizeof(translit[0])-1; i++){
  2921      assert( translit[i].cFrom<translit[i+1].cFrom );
  2922    }
  2923  
  2924    return rc;
  2925  }
  2926  
  2927  #endif /* SQLITE_OMIT_VIRTUALTABLE */
  2928  
  2929  /*
  2930  ** Extension load function.
  2931  */
  2932  #ifdef _WIN32
  2933  __declspec(dllexport)
  2934  #endif
  2935  int sqlite3_spellfix_init(
  2936    sqlite3 *db, 
  2937    char **pzErrMsg, 
  2938    const sqlite3_api_routines *pApi
  2939  ){
  2940    SQLITE_EXTENSION_INIT2(pApi);
  2941  #ifndef SQLITE_OMIT_VIRTUALTABLE
  2942    return spellfix1Register(db);
  2943  #endif
  2944    return SQLITE_OK;
  2945  }