modernc.org/cc@v1.0.1/v2/testdata/_sqlite/ext/fts5/test/fts5unicode2.test (about)

     1  # 2012 May 25
     2  #
     3  # The author disclaims copyright to this source code.  In place of
     4  # a legal notice, here is a blessing:
     5  #
     6  #    May you do good and not evil.
     7  #    May you find forgiveness for yourself and forgive others.
     8  #    May you share freely, never taking more than you give.
     9  #
    10  #*************************************************************************
    11  #
    12  # The tests in this file focus on testing the "unicode" FTS tokenizer.
    13  #
    14  # This is a modified copy of FTS4 test file "fts4_unicode.test".
    15  #
    16  
    17  source [file join [file dirname [info script]] fts5_common.tcl]
    18  set testprefix fts5unicode2
    19  
    20  # If SQLITE_ENABLE_FTS5 is defined, omit this file.
    21  ifcapable !fts5 {
    22    finish_test
    23    return
    24  }
    25  
    26  proc do_unicode_token_test {tn input res} {
    27    uplevel [list do_test $tn [list \
    28      sqlite3_fts5_tokenize -subst db "unicode61 remove_diacritics 0" $input
    29    ] [list {*}$res]]
    30  }
    31  
    32  proc do_unicode_token_test2 {tn input res} {
    33    uplevel [list do_test $tn [list \
    34      sqlite3_fts5_tokenize -subst db "unicode61" $input
    35    ] [list {*}$res]]
    36  }
    37  
    38  proc do_unicode_token_test3 {tn args} {
    39    set tokenizer [concat unicode61 {*}[lrange $args 0 end-2]]
    40    set input [lindex $args end-1]
    41    set res [lindex $args end]
    42    uplevel [list do_test $tn [list \
    43      sqlite3_fts5_tokenize -subst db $tokenizer $input
    44    ] [list {*}$res]]
    45  }
    46  
    47  do_unicode_token_test 1.0 {a B c D} {a a b B c c d D}
    48  
    49  do_unicode_token_test 1.1 "\uC4 \uD6 \uDC" \
    50      "\uE4 \uC4 \uF6 \uD6 \uFC \uDC"
    51  
    52  do_unicode_token_test 1.2 "x\uC4x x\uD6x x\uDCx" \
    53      "x\uE4x x\uC4x x\uF6x x\uD6x x\uFCx x\uDCx"
    54  
    55  # 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s.
    56  do_unicode_token_test 1.3 "\uDF" "\uDF \uDF"
    57  do_unicode_token_test 1.4 "\u1E9E" "\uDF \u1E9E"
    58  
    59  do_unicode_token_test 1.5 "The quick brown fox" {
    60    the The quick quick brown brown fox fox
    61  }
    62  do_unicode_token_test 1.6 "The\u00bfquick\u224ebrown\u2263fox" {
    63    the The quick quick brown brown fox fox
    64  }
    65  
    66  do_unicode_token_test2 1.7  {a B c D} {a a b B c c d D}
    67  do_unicode_token_test2 1.8  "\uC4 \uD6 \uDC" "a \uC4 o \uD6 u \uDC"
    68  
    69  do_unicode_token_test2 1.9  "x\uC4x x\uD6x x\uDCx" \
    70      "xax x\uC4x xox x\uD6x xux x\uDCx"
    71  
    72  # Check that diacritics are removed if remove_diacritics=1 is specified.
    73  # And that they do not break tokens.
    74  do_unicode_token_test2 1.10 "xx\u0301xx" "xxxx xx\u301xx"
    75  
    76  # Title-case mappings work
    77  do_unicode_token_test 1.11 "\u01c5" "\u01c6 \u01c5"
    78  
    79  do_unicode_token_test 1.12 "\u00C1abc\u00C2 \u00D1def\u00C3" \
    80      "\u00E1abc\u00E2 \u00C1abc\u00C2 \u00F1def\u00E3 \u00D1def\u00C3"
    81  
    82  do_unicode_token_test 1.13 "\u00A2abc\u00A3 \u00A4def\u00A5" \
    83      "abc abc def def"
    84  
    85  #-------------------------------------------------------------------------
    86  #
    87  set docs [list {
    88    Enhance the INSERT syntax to allow multiple rows to be inserted via the
    89    VALUES clause.
    90  } {
    91    Enhance the CREATE VIRTUAL TABLE command to support the IF NOT EXISTS clause.
    92  } {
    93    Added the sqlite3_stricmp() interface as a counterpart to sqlite3_strnicmp().
    94  } {
    95    Added the sqlite3_db_readonly() interface.
    96  } {
    97    Added the SQLITE_FCNTL_PRAGMA file control, giving VFS implementations the
    98    ability to add new PRAGMA statements or to override built-in PRAGMAs.  
    99  } {
   100    Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
   101    the same row that contains the maximum x value.
   102  } {
   103    Added support for the FTS4 languageid option.
   104  } {
   105    Documented support for the FTS4 content option. This feature has actually
   106    been in the code since version 3.7.9 but is only now considered to be
   107    officially supported.  
   108  } {
   109    Pending statements no longer block ROLLBACK. Instead, the pending statement
   110    will return SQLITE_ABORT upon next access after the ROLLBACK.  
   111  } {
   112    Improvements to the handling of CSV inputs in the command-line shell
   113  } {
   114    Fix a bug introduced in version 3.7.10 that might cause a LEFT JOIN to be
   115    incorrectly converted into an INNER JOIN if the WHERE clause indexable terms
   116    connected by OR.  
   117  }]
   118  
   119  set map(a) [list "\u00C4" "\u00E4"]  ; # LATIN LETTER A WITH DIAERESIS
   120  set map(e) [list "\u00CB" "\u00EB"]  ; # LATIN LETTER E WITH DIAERESIS
   121  set map(i) [list "\u00CF" "\u00EF"]  ; # LATIN LETTER I WITH DIAERESIS
   122  set map(o) [list "\u00D6" "\u00F6"]  ; # LATIN LETTER O WITH DIAERESIS
   123  set map(u) [list "\u00DC" "\u00FC"]  ; # LATIN LETTER U WITH DIAERESIS
   124  set map(y) [list "\u0178" "\u00FF"]  ; # LATIN LETTER Y WITH DIAERESIS
   125  set map(h) [list "\u1E26" "\u1E27"]  ; # LATIN LETTER H WITH DIAERESIS
   126  set map(w) [list "\u1E84" "\u1E85"]  ; # LATIN LETTER W WITH DIAERESIS
   127  set map(x) [list "\u1E8C" "\u1E8D"]  ; # LATIN LETTER X WITH DIAERESIS
   128  foreach k [array names map] {
   129    lappend mappings [string toupper $k] [lindex $map($k) 0] 
   130    lappend mappings $k [lindex $map($k) 1]
   131  }
   132  proc mapdoc {doc} { 
   133    set doc [regsub -all {[[:space:]]+} $doc " "]
   134    string map $::mappings [string trim $doc] 
   135  }
   136  
   137  do_test 2.0 {
   138    execsql { CREATE VIRTUAL TABLE t2 USING fts5(tokenize=unicode61, x); }
   139    foreach doc $docs {
   140      set d [mapdoc $doc]
   141      execsql { INSERT INTO t2 VALUES($d) }
   142    }
   143  } {}
   144  
   145  do_test 2.1 {
   146    set q [mapdoc "row"]
   147    execsql { SELECT * FROM t2 WHERE t2 MATCH $q }
   148  } [list [mapdoc {
   149    Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
   150    the same row that contains the maximum x value.
   151  }]]
   152  
   153  foreach {tn query snippet} {
   154    2 "row" {
   155       ...returns the value of y on the same [row] that contains 
   156       the maximum x value.
   157    }
   158    3 "ROW" {
   159       ...returns the value of y on the same [row] that contains 
   160       the maximum x value.
   161    }
   162    4 "rollback" {
   163       Pending statements no longer block [ROLLBACK]. Instead, the pending
   164       statement will return SQLITE_ABORT upon...
   165    }
   166    5 "rOllback" {
   167       Pending statements no longer block [ROLLBACK]. Instead, the pending
   168       statement will return SQLITE_ABORT upon...
   169    }
   170    6 "lang*" {
   171       Added support for the FTS4 [languageid] option.
   172    }
   173  } {
   174    do_test 2.$tn {
   175      set q [mapdoc $query]
   176      execsql { 
   177        SELECT snippet(t2, -1, '[', ']', '...', 15) FROM t2 WHERE t2 MATCH $q 
   178      }
   179    } [list [mapdoc $snippet]]
   180  }
   181  
   182  #-------------------------------------------------------------------------
   183  # Make sure the unicode61 tokenizer does not crash if it is passed a 
   184  # NULL pointer.
   185  reset_db
   186  do_execsql_test 3.1 {
   187    CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x, y);
   188    INSERT INTO t1 VALUES(NULL, 'a b c');
   189  }
   190  
   191  do_execsql_test 3.2 {
   192    SELECT snippet(t1, -1, '[', ']', '...', 15) FROM t1 WHERE t1 MATCH 'b'
   193  } {{a [b] c}}
   194  
   195  do_execsql_test 3.3 {
   196    BEGIN;
   197    DELETE FROM t1;
   198    INSERT INTO t1 VALUES('b b b b b b b b b b b', 'b b b b b b b b b b b b b');
   199    INSERT INTO t1 SELECT * FROM t1;
   200    INSERT INTO t1 SELECT * FROM t1;
   201    INSERT INTO t1 SELECT * FROM t1;
   202    INSERT INTO t1 SELECT * FROM t1;
   203    INSERT INTO t1 SELECT * FROM t1;
   204    INSERT INTO t1 SELECT * FROM t1;
   205    INSERT INTO t1 SELECT * FROM t1;
   206    INSERT INTO t1 SELECT * FROM t1;
   207    INSERT INTO t1 SELECT * FROM t1;
   208    INSERT INTO t1 SELECT * FROM t1;
   209    INSERT INTO t1 SELECT * FROM t1;
   210    INSERT INTO t1 SELECT * FROM t1;
   211    INSERT INTO t1 SELECT * FROM t1;
   212    INSERT INTO t1 SELECT * FROM t1;
   213    INSERT INTO t1 SELECT * FROM t1;
   214    INSERT INTO t1 SELECT * FROM t1;
   215    INSERT INTO t1 VALUES('a b c', NULL);
   216    INSERT INTO t1 VALUES('a x c', NULL);
   217    COMMIT;
   218  }
   219  
   220  do_execsql_test 3.4 {
   221    SELECT * FROM t1 WHERE t1 MATCH 'a b';
   222  } {{a b c} {}}
   223  
   224  #-------------------------------------------------------------------------
   225  #
   226  reset_db
   227  
   228  do_test 4.1 {
   229    set a "abc\uFFFEdef"
   230    set b "abc\uD800def"
   231    set c "\uFFFEdef"
   232    set d "\uD800def"
   233    execsql {
   234      CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x);
   235      INSERT INTO t1 VALUES($a);
   236      INSERT INTO t1 VALUES($b);
   237      INSERT INTO t1 VALUES($c);
   238      INSERT INTO t1 VALUES($d);
   239    }
   240  
   241    execsql "CREATE VIRTUAL TABLE t8 USING fts5(
   242        a, b, tokenize=\"unicode61 separators '\uFFFE\uD800\u00BF'\"
   243    )"
   244  } {}
   245  
   246  do_test 4.2 {
   247    set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}]
   248    set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}]
   249    set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
   250    set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
   251    execsql {
   252      INSERT INTO t1 VALUES($a);
   253      INSERT INTO t1 VALUES($b);
   254      INSERT INTO t1 VALUES($c);
   255      INSERT INTO t1 VALUES($d);
   256    }
   257  } {}
   258  
   259  do_test 4.3 {
   260    set a [binary format c* {0xF7 0xBF 0xBF 0xBF}]
   261    set b [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF}]
   262    set c [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF}]
   263    set d [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF}]
   264    execsql {
   265      INSERT INTO t1 VALUES($a);
   266      INSERT INTO t1 VALUES($b);
   267      INSERT INTO t1 VALUES($c);
   268      INSERT INTO t1 VALUES($d);
   269    }
   270  } {}
   271  
   272  do_test 4.4 {
   273    sqlite3_exec_hex db {
   274      CREATE VIRTUAL TABLE t9 USING fts5(a, b, 
   275        tokenize="unicode61 separators '%C09004'"
   276      );
   277      INSERT INTO t9(a) VALUES('abc%88def %89ghi%90');
   278    }
   279  } {0 {}}
   280  
   281  
   282  #-------------------------------------------------------------------------
   283  
   284  do_unicode_token_test3 5.1 {tokenchars {}} {
   285    sqlite3_reset sqlite3_column_int
   286  } {
   287    sqlite3 sqlite3 
   288    reset reset 
   289    sqlite3 sqlite3 
   290    column column 
   291    int int
   292  }
   293  
   294  do_unicode_token_test3 5.2 {tokenchars _} {
   295    sqlite3_reset sqlite3_column_int
   296  } {
   297    sqlite3_reset sqlite3_reset 
   298    sqlite3_column_int sqlite3_column_int
   299  }
   300  
   301  do_unicode_token_test3 5.3 {separators xyz} {
   302    Laotianxhorseyrunszfast
   303  } {
   304    laotian Laotian
   305    horse horse
   306    runs runs
   307    fast fast
   308  }
   309  
   310  do_unicode_token_test3 5.4 {tokenchars xyz} {
   311    Laotianxhorseyrunszfast
   312  } {
   313    laotianxhorseyrunszfast Laotianxhorseyrunszfast
   314  }
   315  
   316  do_unicode_token_test3 5.5 {tokenchars _} {separators zyx} {
   317    sqlite3_resetxsqlite3_column_intyhonda_phantom
   318  } {
   319    sqlite3_reset sqlite3_reset 
   320    sqlite3_column_int sqlite3_column_int
   321    honda_phantom honda_phantom
   322  }
   323  
   324  do_unicode_token_test3 5.6 "separators \u05D1" "abc\u05D1def" {
   325    abc abc def def
   326  }
   327  
   328  do_unicode_token_test3 5.7                             \
   329    "tokenchars \u2444\u2445"                            \
   330    "separators \u05D0\u05D1\u05D2"                      \
   331    "\u2444fre\u2445sh\u05D0water\u05D2fish.\u2445timer" \
   332    [list                                                \
   333      \u2444fre\u2445sh \u2444fre\u2445sh              \
   334      water water                                      \
   335      fish fish                                        \
   336      \u2445timer \u2445timer                          \
   337    ]
   338  
   339  # Check that it is not possible to add a standalone diacritic codepoint 
   340  # to either separators or tokenchars.
   341  do_unicode_token_test3 5.8 "separators \u0301" \
   342    "hello\u0301world \u0301helloworld"          \
   343    "helloworld hello\u0301world helloworld helloworld"
   344  
   345  do_unicode_token_test3 5.9 "tokenchars \u0301" \
   346    "hello\u0301world \u0301helloworld"          \
   347    "helloworld hello\u0301world helloworld helloworld"
   348  
   349  do_unicode_token_test3 5.10 "separators \u0301" \
   350    "remove_diacritics 0"                        \
   351    "hello\u0301world \u0301helloworld"          \
   352    "hello\u0301world hello\u0301world helloworld helloworld"
   353  
   354  do_unicode_token_test3 5.11 "tokenchars \u0301" \
   355    "remove_diacritics 0"                         \
   356    "hello\u0301world \u0301helloworld"           \
   357    "hello\u0301world hello\u0301world helloworld helloworld"
   358  
   359  #-------------------------------------------------------------------------
   360  
   361  proc do_tokenize {tokenizer txt} {
   362    set res [list]
   363    foreach {b c} [sqlite3_fts5_tokenize -subst db $tokenizer $txt] {
   364      lappend res $b
   365    }
   366    set res
   367  }
   368  
   369  # Argument $lCodepoint must be a list of codepoints (integers) that 
   370  # correspond to whitespace characters. This command creates a string
   371  # $W from the codepoints, then tokenizes "${W}hello{$W}world${W}" 
   372  # using tokenizer $tokenizer. The test passes if the tokenizer successfully
   373  # extracts the two 5 character tokens.
   374  #
   375  proc do_isspace_test {tn tokenizer lCp} {
   376    set whitespace [format [string repeat %c [llength $lCp]] {*}$lCp] 
   377    set txt "${whitespace}hello${whitespace}world${whitespace}"
   378    uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}]
   379  }
   380  
   381  set tokenizers [list unicode61]
   382  #ifcapable icu { lappend tokenizers icu }
   383  
   384  # Some tests to check that the tokenizers can both identify white-space 
   385  # codepoints. All codepoints tested below are of type "Zs" in the
   386  # UnicodeData.txt file.
   387  foreach T $tokenizers {
   388    do_isspace_test 6.$T.1 $T    32
   389    do_isspace_test 6.$T.2 $T    160
   390    do_isspace_test 6.$T.3 $T    5760
   391    do_isspace_test 6.$T.4 $T    6158
   392    do_isspace_test 6.$T.5 $T    8192
   393    do_isspace_test 6.$T.6 $T    8193
   394    do_isspace_test 6.$T.7 $T    8194
   395    do_isspace_test 6.$T.8 $T    8195
   396    do_isspace_test 6.$T.9 $T    8196
   397    do_isspace_test 6.$T.10 $T    8197
   398    do_isspace_test 6.$T.11 $T    8198
   399    do_isspace_test 6.$T.12 $T    8199
   400    do_isspace_test 6.$T.13 $T    8200
   401    do_isspace_test 6.$T.14 $T    8201
   402    do_isspace_test 6.$T.15 $T    8202
   403    do_isspace_test 6.$T.16 $T    8239
   404    do_isspace_test 6.$T.17 $T    8287
   405    do_isspace_test 6.$T.18 $T   12288
   406  
   407    do_isspace_test 6.$T.19 $T   {32 160 5760 6158}
   408    do_isspace_test 6.$T.20 $T   {8192 8193 8194 8195}
   409    do_isspace_test 6.$T.21 $T   {8196 8197 8198 8199}
   410    do_isspace_test 6.$T.22 $T   {8200 8201 8202 8239}
   411    do_isspace_test 6.$T.23 $T   {8287 12288}
   412  }
   413  
   414  
   415  #-------------------------------------------------------------------------
   416  # Test that the private use ranges are treated as alphanumeric.
   417  #
   418  foreach {tn1 c} {
   419    1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff
   420  } {
   421    foreach {tn2 config res} {
   422      1 ""             "hello*world hello*world"
   423      2 "separators *" "hello hello world world"
   424    } {
   425      set config [string map [list * $c] $config]
   426      set input  [string map [list * $c] "hello*world"]
   427      set output [string map [list * $c] $res]
   428      do_unicode_token_test3 7.$tn1.$tn2 {*}$config $input $output
   429    }
   430  }
   431  
   432  #-------------------------------------------------------------------------
   433  # Cursory test of remove_diacritics=0.
   434  #
   435  # 00C4;LATIN CAPITAL LETTER A WITH DIAERESIS
   436  # 00D6;LATIN CAPITAL LETTER O WITH DIAERESIS
   437  # 00E4;LATIN SMALL LETTER A WITH DIAERESIS
   438  # 00F6;LATIN SMALL LETTER O WITH DIAERESIS
   439  #
   440  do_execsql_test 8.1.1 "
   441    CREATE VIRTUAL TABLE t3 USING fts5(
   442      content, tokenize='unicode61 remove_diacritics 1'
   443    );
   444    INSERT INTO t3 VALUES('o');
   445    INSERT INTO t3 VALUES('a');
   446    INSERT INTO t3 VALUES('O');
   447    INSERT INTO t3 VALUES('A');
   448    INSERT INTO t3 VALUES('\xD6');
   449    INSERT INTO t3 VALUES('\xC4');
   450    INSERT INTO t3 VALUES('\xF6');
   451    INSERT INTO t3 VALUES('\xE4');
   452  "
   453  do_execsql_test 8.1.2 {
   454    SELECT rowid FROM t3 WHERE t3 MATCH 'o' ORDER BY rowid ASC;
   455  } {1 3 5 7}
   456  do_execsql_test 8.1.3 {
   457    SELECT rowid FROM t3 WHERE t3 MATCH 'a' ORDER BY rowid ASC;
   458  } {2 4 6 8}
   459  do_execsql_test 8.2.1 {
   460    CREATE VIRTUAL TABLE t4 USING fts5(
   461      content, tokenize='unicode61 remove_diacritics 0'
   462    );
   463    INSERT INTO t4 SELECT * FROM t3 ORDER BY rowid ASC;
   464  }
   465  do_execsql_test 8.2.2 {
   466    SELECT rowid FROM t4 WHERE t4 MATCH 'o' ORDER BY rowid ASC;
   467  } {1 3}
   468  do_execsql_test 8.2.3 {
   469    SELECT rowid FROM t4 WHERE t4 MATCH 'a' ORDER BY rowid ASC;
   470  } {2 4}
   471  
   472  #-------------------------------------------------------------------------
   473  #
   474  if 0 {
   475  foreach {tn sql} {
   476    1 {
   477      CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 [tokenchars= .]);
   478      CREATE VIRTUAL TABLE t6 USING fts4(
   479          tokenize=unicode61 [tokenchars=="] "tokenchars=[]");
   480      CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 [separators=x\xC4]);
   481    }
   482    2 {
   483      CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 "tokenchars= .");
   484      CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 "tokenchars=[=""]");
   485      CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 "separators=x\xC4");
   486    }
   487    3 {
   488      CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 'tokenchars= .');
   489      CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 'tokenchars=="[]');
   490      CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 'separators=x\xC4');
   491    }
   492    4 {
   493      CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 `tokenchars= .`);
   494      CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 `tokenchars=[="]`);
   495      CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 `separators=x\xC4`);
   496    }
   497  } {
   498    do_execsql_test 9.$tn.0 { 
   499      DROP TABLE IF EXISTS t5;
   500      DROP TABLE IF EXISTS t5aux;
   501      DROP TABLE IF EXISTS t6;
   502      DROP TABLE IF EXISTS t6aux;
   503      DROP TABLE IF EXISTS t7;
   504      DROP TABLE IF EXISTS t7aux;
   505    }
   506    do_execsql_test 9.$tn.1 $sql
   507  
   508    do_execsql_test 9.$tn.2 {
   509      CREATE VIRTUAL TABLE t5aux USING fts4aux(t5);
   510      INSERT INTO t5 VALUES('one two three/four.five.six');
   511      SELECT * FROM t5aux;
   512    } {
   513      four.five.six   * 1 1 four.five.six   0 1 1 
   514      {one two three} * 1 1 {one two three} 0 1 1
   515    }
   516  
   517    do_execsql_test 9.$tn.3 {
   518      CREATE VIRTUAL TABLE t6aux USING fts4aux(t6);
   519      INSERT INTO t6 VALUES('alpha=beta"gamma/delta[epsilon]zeta');
   520      SELECT * FROM t6aux;
   521    } {
   522      {alpha=beta"gamma}   * 1 1 {alpha=beta"gamma} 0 1 1 
   523      {delta[epsilon]zeta} * 1 1 {delta[epsilon]zeta} 0 1 1
   524    }
   525  
   526    do_execsql_test 9.$tn.4 {
   527      CREATE VIRTUAL TABLE t7aux USING fts4aux(t7);
   528      INSERT INTO t7 VALUES('alephxbeth\xC4gimel');
   529      SELECT * FROM t7aux;
   530    } {
   531      aleph * 1 1 aleph 0 1 1 
   532      beth  * 1 1 beth  0 1 1 
   533      gimel * 1 1 gimel 0 1 1
   534    }
   535  }
   536  
   537  # Check that multiple options are handled correctly.
   538  #
   539  do_execsql_test 10.1 {
   540    DROP TABLE IF EXISTS t1;
   541    CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61
   542      "tokenchars=xyz" "tokenchars=.=" "separators=.=" "separators=xy"
   543      "separators=a" "separators=a" "tokenchars=a" "tokenchars=a"
   544    );
   545  
   546    INSERT INTO t1 VALUES('oneatwoxthreeyfour');
   547    INSERT INTO t1 VALUES('a.single=word');
   548    CREATE VIRTUAL TABLE t1aux USING fts4aux(t1);
   549    SELECT * FROM t1aux;
   550  } {
   551    .single=word * 1 1 .single=word 0 1 1 
   552    four         * 1 1 four         0 1 1 
   553    one          * 1 1 one          0 1 1 
   554    three        * 1 1 three        0 1 1 
   555    two          * 1 1 two          0 1 1
   556  }
   557  
   558  # Test that case folding happens after tokenization, not before.
   559  #
   560  do_execsql_test 10.2 {
   561    DROP TABLE IF EXISTS t2;
   562    CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61 "separators=aB");
   563    INSERT INTO t2 VALUES('oneatwoBthree');
   564    INSERT INTO t2 VALUES('onebtwoAthree');
   565    CREATE VIRTUAL TABLE t2aux USING fts4aux(t2);
   566    SELECT * FROM t2aux;
   567  } {
   568    one           * 1 1 one           0 1 1 
   569    onebtwoathree * 1 1 onebtwoathree 0 1 1 
   570    three         * 1 1 three         0 1 1 
   571    two           * 1 1 two           0 1 1
   572  }
   573  
   574  # Test that the tokenchars and separators options work with the 
   575  # fts3tokenize table.
   576  #
   577  do_execsql_test 11.1 {
   578    CREATE VIRTUAL TABLE ft1 USING fts3tokenize(
   579      "unicode61", "tokenchars=@.", "separators=1234567890"
   580    );
   581    SELECT token FROM ft1 WHERE input = 'berlin@street123sydney.road';
   582  } {
   583    berlin@street sydney.road
   584  }
   585  
   586  }
   587  
   588  finish_test