modernc.org/cc@v1.0.1/v2/testdata/_sqlite/ext/fts5/test/fts5synonym.test (about)

     1  # 2014 Dec 20
     2  #
     3  # The author disclaims copyright to this source code.  In place of
     4  # a legal notice, here is a blessing:
     5  #
     6  #    May you do good and not evil.
     7  #    May you find forgiveness for yourself and forgive others.
     8  #    May you share freely, never taking more than you give.
     9  #
    10  #***********************************************************************
    11  #
    12  # Tests focusing on custom tokenizers that support synonyms.
    13  #
    14  
    15  source [file join [file dirname [info script]] fts5_common.tcl]
    16  set testprefix fts5synonym
    17  
    18  # If SQLITE_ENABLE_FTS5 is defined, omit this file.
    19  ifcapable !fts5 {
    20    finish_test
    21    return
    22  }
    23  
    24  proc tcl_create {args} { return "tcl_tokenize" }
    25  
    26  foreach_detail_mode $testprefix {
    27  
    28  #-------------------------------------------------------------------------
    29  # Warm body test for the code in fts5_tcl.c.
    30  #
    31  fts5_tclnum_register db
    32  do_execsql_test 1.0 {
    33    CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = "tclnum document", detail=%DETAIL%);
    34    INSERT INTO ft VALUES('abc def ghi');
    35    INSERT INTO ft VALUES('jkl mno pqr');
    36    SELECT rowid, x FROM ft WHERE ft MATCH 'def';
    37    SELECT x, rowid FROM ft WHERE ft MATCH 'pqr';
    38  } {1 {abc def ghi} {jkl mno pqr} 2}
    39  
    40  #-------------------------------------------------------------------------
    41  # Test a tokenizer that supports synonyms by adding extra entries to the
    42  # FTS index.
    43  #
    44  reset_db
    45  fts5_tclnum_register db
    46  
    47  do_execsql_test 2.0 {
    48    CREATE VIRTUAL TABLE ft USING fts5(
    49        x, tokenize = "tclnum document", detail=%DETAIL%
    50    );
    51    INSERT INTO ft VALUES('one two three');
    52    INSERT INTO ft VALUES('four five six');
    53    INSERT INTO ft VALUES('eight nine ten');
    54  } {}
    55  
    56  foreach {tn expr res} {
    57    1 "3" 1
    58    2 "eight OR 8 OR 5" {2 3}
    59    3 "10" {}
    60    4 "1*" {1}
    61    5 "1 + 2" {1}
    62  } {
    63    if {![fts5_expr_ok $expr ft]} continue
    64    do_execsql_test 2.1.$tn {
    65      SELECT rowid FROM ft WHERE ft MATCH $expr
    66    } $res
    67  }
    68  
    69  #-------------------------------------------------------------------------
    70  # Test some broken tokenizers:
    71  #
    72  #   3.1.*: A tokenizer that declares the very first token to be colocated.
    73  #
    74  #   3.2.*: A tokenizer that reports two identical tokens at the same position.
    75  #          This is allowed.
    76  #
    77  reset_db
    78  sqlite3_fts5_create_tokenizer db tcl tcl_create
    79  proc tcl_tokenize {tflags text} {
    80    set bColo 1
    81    foreach {w iStart iEnd} [fts5_tokenize_split $text] {
    82      if {$bColo} {
    83        sqlite3_fts5_token -colo $w $iStart $iEnd
    84        set bColo 0
    85      } {
    86        sqlite3_fts5_token $w $iStart $iEnd
    87      }
    88    }
    89  }
    90  do_execsql_test 3.1.0 {
    91    CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
    92    INSERT INTO ft VALUES('one two three');
    93    CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row);
    94    SELECT * FROM vv;
    95  } {
    96    one 1 1   three 1 1   two 1 1
    97  }
    98  
    99  do_execsql_test 3.1.1 {
   100    INSERT INTO ft(ft) VALUES('integrity-check');
   101  } {}
   102  
   103  proc tcl_tokenize {tflags text} {
   104    foreach {w iStart iEnd} [fts5_tokenize_split $text] {
   105      sqlite3_fts5_token $w $iStart $iEnd
   106    }
   107  }
   108  
   109  do_execsql_test 3.1.2 {
   110    SELECT rowid FROM ft WHERE ft MATCH 'one two three'
   111  } {1}
   112  
   113  reset_db
   114  sqlite3_fts5_create_tokenizer db tcl tcl_create
   115  proc tcl_tokenize {tflags text} {
   116    foreach {w iStart iEnd} [fts5_tokenize_split $text] {
   117      sqlite3_fts5_token $w $iStart $iEnd
   118      sqlite3_fts5_token -colo $w $iStart $iEnd
   119    }
   120  }
   121  do_execsql_test 3.2.0 {
   122    CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl);
   123    INSERT INTO ft VALUES('one one two three');
   124    CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row);
   125    SELECT * FROM vv;
   126  } {
   127    one 1 4   three 1 2   two 1 2
   128  }
   129  do_execsql_test 3.2.1 {
   130    SELECT rowid FROM ft WHERE ft MATCH 'one';
   131  } {1}
   132  do_execsql_test 3.2.2 {
   133    SELECT rowid FROM ft WHERE ft MATCH 'one two three';
   134  } {1}
   135  do_execsql_test 3.2.3 {
   136    SELECT rowid FROM ft WHERE ft MATCH 'one + one + two + three';
   137  } {1}
   138  do_execsql_test 3.2.4 {
   139    SELECT rowid FROM ft WHERE ft MATCH 'one two two three';
   140  } {1}
   141  do_execsql_test 3.2.5 {
   142    SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three';
   143  } {}
   144  
   145  #-------------------------------------------------------------------------
   146  # Check that expressions with synonyms can be parsed and executed.
   147  #
   148  reset_db
   149  fts5_tclnum_register db
   150  
   151  foreach {tn expr res} {
   152    1  {abc}                           {"abc"}
   153    2  {one}                           {"one"|"i"|"1"}
   154    3  {3}                             {"3"|"iii"|"three"}
   155    4  {3*}                            {"3" *}
   156  } {
   157    do_execsql_test 4.1.$tn {
   158      SELECT fts5_expr($expr, 'tokenize=tclnum')
   159    } [list $res]
   160  }
   161  
   162  do_execsql_test 4.2.1 {
   163    CREATE VIRTUAL TABLE xx USING fts5(x, tokenize=tclnum, detail=%DETAIL%);
   164    INSERT INTO xx VALUES('one two');
   165    INSERT INTO xx VALUES('three four');
   166  }
   167  
   168  do_execsql_test 4.2.2 {
   169    SELECT rowid FROM xx WHERE xx MATCH '2'
   170  } {1}
   171  
   172  do_execsql_test 4.2.3 {
   173    SELECT rowid FROM xx WHERE xx MATCH '3'
   174  } {2}
   175  
   176  do_test 5.0 {
   177    execsql { 
   178      CREATE VIRTUAL TABLE t1 USING fts5(a, b, tokenize=tclnum, detail=%DETAIL%)
   179    }
   180    foreach {rowid a b} {
   181      1 {four v 4 i three} {1 3 five five 4 one}
   182      2 {5 1 3 4 i} {2 2 v two 4}
   183      3 {5 i 5 2 four 4 1} {iii ii five two 1}
   184      4 {ii four 4 one 5 three five} {one 5 1 iii 4 3}
   185      5 {three i v i four 4 1} {ii five five five iii}
   186      6 {4 2 ii two 2 iii} {three 1 four 4 iv 1 iv}
   187      7 {ii ii two three 2 5} {iii i ii iii iii one one}
   188      8 {2 ii i two 3 three 2} {two iv v iii 3 five}
   189      9 {i 2 iv 3 five four v} {iii 4 three i three ii 1}
   190    } {
   191      execsql { INSERT INTO t1(rowid, a, b) VALUES($rowid, $a, $b) }
   192    }
   193  } {}
   194  
   195  
   196  foreach {tn q res} {
   197    1 {one} {
   198      1 {four v 4 [i] three} {[1] 3 five five 4 [one]}
   199      2 {5 [1] 3 4 [i]} {2 2 v two 4}
   200      3 {5 [i] 5 2 four 4 [1]} {iii ii five two [1]}
   201      4 {ii four 4 [one] 5 three five} {[one] 5 [1] iii 4 3}
   202      5 {three [i] v [i] four 4 [1]} {ii five five five iii}
   203      6 {4 2 ii two 2 iii} {three [1] four 4 iv [1] iv}
   204      7 {ii ii two three 2 5} {iii [i] ii iii iii [one] [one]}
   205      8 {2 ii [i] two 3 three 2} {two iv v iii 3 five}
   206      9 {[i] 2 iv 3 five four v} {iii 4 three [i] three ii [1]}
   207    }
   208    2 {five four} {
   209      1 {[four] [v] [4] i three} {1 3 [five] [five] [4] one}
   210      2 {[5] 1 3 [4] i} {2 2 [v] two [4]}
   211      3 {[5] i [5] 2 [four] [4] 1} {iii ii [five] two 1}
   212      4 {ii [four] [4] one [5] three [five]} {one [5] 1 iii [4] 3}
   213      5 {three i [v] i [four] [4] 1} {ii [five] [five] [five] iii}
   214      8 {2 ii i two 3 three 2} {two [iv] [v] iii 3 [five]}
   215      9 {i 2 [iv] 3 [five] [four] [v]} {iii [4] three i three ii 1}
   216    }
   217    3 {one OR two OR iii OR 4 OR v} {
   218      1 {[four] [v] [4] [i] [three]} {[1] [3] [five] [five] [4] [one]}
   219      2 {[5] [1] [3] [4] [i]} {[2] [2] [v] [two] [4]}
   220      3 {[5] [i] [5] [2] [four] [4] [1]} {[iii] [ii] [five] [two] [1]}
   221      4 {[ii] [four] [4] [one] [5] [three] [five]} {[one] [5] [1] [iii] [4] [3]}
   222      5 {[three] [i] [v] [i] [four] [4] [1]} {[ii] [five] [five] [five] [iii]}
   223      6 {[4] [2] [ii] [two] [2] [iii]} {[three] [1] [four] [4] [iv] [1] [iv]}
   224      7 {[ii] [ii] [two] [three] [2] [5]} {[iii] [i] [ii] [iii] [iii] [one] [one]}
   225      8 {[2] [ii] [i] [two] [3] [three] [2]} {[two] [iv] [v] [iii] [3] [five]}
   226      9 {[i] [2] [iv] [3] [five] [four] [v]} {[iii] [4] [three] [i] [three] [ii] [1]}
   227    }
   228  
   229    4 {5 + 1} {
   230      2 {[5 1] 3 4 i} {2 2 v two 4} 
   231      3 {[5 i] 5 2 four 4 1} {iii ii five two 1} 
   232      4 {ii four 4 one 5 three five} {one [5 1] iii 4 3} 
   233      5 {three i [v i] four 4 1} {ii five five five iii}
   234    }
   235  
   236    5 {one + two + three} {
   237      7 {ii ii two three 2 5} {iii [i ii iii] iii one one}
   238      8 {2 ii [i two 3] three 2} {two iv v iii 3 five}
   239    }
   240  
   241    6 {"v v"} {
   242      1 {four v 4 i three} {1 3 [five five] 4 one}
   243      5 {three i v i four 4 1} {ii [five five five] iii}
   244    }
   245  } {
   246    if {![fts5_expr_ok $q t1]} continue
   247    do_execsql_test 5.1.$tn {
   248      SELECT rowid, highlight(t1, 0, '[', ']'), highlight(t1, 1, '[', ']')
   249      FROM t1 WHERE t1 MATCH $q
   250    } $res
   251  }
   252  
   253  # Test that the xQueryPhrase() API works with synonyms.
   254  #
   255  proc mit {blob} {
   256    set scan(littleEndian) i*
   257    set scan(bigEndian) I*
   258    binary scan $blob $scan($::tcl_platform(byteOrder)) r
   259    return $r
   260  }
   261  db func mit mit
   262  sqlite3_fts5_register_matchinfo db
   263  
   264  foreach {tn q res} {
   265    1 {one} {
   266        1 {1 11 7 2 12 6}     2 {2 11 7 0 12 6} 
   267        3 {2 11 7 1 12 6}     4 {1 11 7 2 12 6} 
   268        5 {3 11 7 0 12 6}     6 {0 11 7 2 12 6} 
   269        7 {0 11 7 3 12 6}     8 {1 11 7 0 12 6} 
   270        9 {1 11 7 2 12 6}
   271    }
   272  } {
   273    do_execsql_test 5.2.$tn {
   274      SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH $q
   275    } $res
   276  }
   277  
   278  #-------------------------------------------------------------------------
   279  # Test terms with more than 4 synonyms.
   280  #
   281  reset_db
   282  sqlite3_fts5_create_tokenizer db tcl tcl_create
   283  proc tcl_tokenize {tflags text} {
   284    foreach {w iStart iEnd} [fts5_tokenize_split $text] {
   285      sqlite3_fts5_token $w $iStart $iEnd
   286      if {$tflags=="query" && [string length $w]==1} {
   287        for {set i 2} {$i<=10} {incr i} {
   288          sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
   289        }
   290      }
   291    }
   292  }
   293  
   294  do_execsql_test 6.0.1 {
   295    CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=tcl, detail=%DETAIL%);
   296    INSERT INTO t1 VALUES('yy xx qq');
   297    INSERT INTO t1 VALUES('yy xx xx');
   298  }
   299  if {[fts5_expr_ok "NEAR(y q)" t1]} {
   300    do_execsql_test 6.0.2 {
   301      SELECT * FROM t1 WHERE t1 MATCH 'NEAR(y q)';
   302    } {{yy xx qq}}
   303  }
   304  
   305  do_test 6.0.3 {
   306    execsql { 
   307      CREATE VIRTUAL TABLE t2 USING fts5(a, b, tokenize=tcl, detail=%DETAIL%)
   308    }
   309    foreach {rowid a b} {
   310      1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq aaaa}
   311      2 {ww oooooo bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq}
   312      3 {zzzz llll gggggg cccc uu} {hhhhhh aaaa ppppp rr ee jjjj}
   313      4 {r f i rrrrrr ww hhh} {aa yyy t x aaaaa ii}
   314      5 {fffff mm vvvv ooo ffffff kkkk tttt} {cccccc bb e zzz d n}
   315      6 {iii dddd hh qqqq ddd ooo} {ttt d c b aaaaaa qqqq}
   316      7 {jjjj rrrr v zzzzz u tt t} {ppppp pp dddd mm hhh uuu}
   317      8 {gggg rrrrrr kkkk vvvv gggg jjjjjj b} {dddddd jj r w cccc wwwwww ss}
   318      9 {kkkkk qqq oooo e tttttt mmm} {e ss qqqqqq hhhh llllll gg}
   319    } {
   320      execsql { INSERT INTO t2(rowid, a, b) VALUES($rowid, $a, $b) }
   321    }
   322  } {}
   323  
   324  foreach {tn q res} {
   325    1 {a} {
   326      1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq [aaaa]}
   327      3 {zzzz llll gggggg cccc uu} {hhhhhh [aaaa] ppppp rr ee jjjj}
   328      4 {r f i rrrrrr ww hhh} {[aa] yyy t x [aaaaa] ii}
   329      6 {iii dddd hh qqqq ddd ooo} {ttt d c b [aaaaaa] qqqq}
   330    }
   331  
   332    2 {a AND q} {
   333      1 {yyyy vvvvv [qq] oo yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]}
   334      6 {iii dddd hh [qqqq] ddd ooo} {ttt d c b [aaaaaa] [qqqq]}
   335    }
   336  
   337    3 {o OR (q AND a)} {
   338      1 {yyyy vvvvv [qq] [oo] yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]}
   339      2 {ww [oooooo] bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq}
   340      5 {fffff mm vvvv [ooo] ffffff kkkk tttt} {cccccc bb e zzz d n}
   341      6 {iii dddd hh [qqqq] ddd [ooo]} {ttt d c b [aaaaaa] [qqqq]}
   342      9 {kkkkk qqq [oooo] e tttttt mmm} {e ss qqqqqq hhhh llllll gg}
   343    }
   344  
   345    4 {NEAR(q y, 20)} {
   346      1 {[yyyy] vvvvv [qq] oo [yyyyyy] vvvv eee} {ffff uu r qq aaaa}
   347      2 {ww oooooo bbbbb ssssss mm} {ffffff [yy] iiii rr s ccc [qqqqq]}
   348    }
   349  } {
   350    if {![fts5_expr_ok $q t2]} continue
   351  
   352    do_execsql_test 6.1.$tn.asc {
   353      SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']')
   354      FROM t2 WHERE t2 MATCH $q
   355    } $res
   356  
   357    set res2 [list]
   358    foreach {rowid a b} $res {
   359      set res2 [concat [list $rowid $a $b] $res2]
   360    }
   361  
   362    do_execsql_test 6.1.$tn.desc {
   363      SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']')
   364      FROM t2 WHERE t2 MATCH $q ORDER BY rowid DESC
   365    } $res2
   366  }
   367  
   368  do_execsql_test 6.2.1 {
   369    INSERT INTO t2(rowid, a, b) VALUES(13,
   370        'x xx xxx xxxx xxxxx xxxxxx xxxxxxx', 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy'
   371    );
   372    SELECT rowid, highlight(t2, 0, '<', '>'), highlight(t2, 1, '(', ')')
   373    FROM t2 WHERE t2 MATCH 'x OR y'
   374  } {
   375    1 {<yyyy> vvvvv qq oo <yyyyyy> vvvv eee} {ffff uu r qq aaaa}
   376    2 {ww oooooo bbbbb ssssss mm} {ffffff (yy) iiii rr s ccc qqqqq}
   377    4 {r f i rrrrrr ww hhh} {aa (yyy) t (x) aaaaa ii}
   378    13 {<x> <xx> <xxx> <xxxx> <xxxxx> <xxxxxx> <xxxxxxx>}
   379       {(y) (yy) (yyy) (yyyy) (yyyyy) (yyyyyy) (yyyyyyy)}
   380  }
   381  
   382  #-------------------------------------------------------------------------
   383  # Test that the xColumnSize() API is not confused by colocated tokens.
   384  #
   385  reset_db
   386  sqlite3_fts5_create_tokenizer db tcl tcl_create
   387  fts5_aux_test_functions db
   388  proc tcl_tokenize {tflags text} {
   389    foreach {w iStart iEnd} [fts5_tokenize_split $text] {
   390      sqlite3_fts5_token $w $iStart $iEnd
   391      if {[string length $w]==1} {
   392        for {set i 2} {$i<=10} {incr i} {
   393          sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd
   394        }
   395      }
   396    }
   397  }
   398  
   399  do_execsql_test 7.0.1 {
   400    CREATE VIRTUAL TABLE t1 USING fts5(a, b, columnsize=1, tokenize=tcl, detail=%DETAIL%);
   401    INSERT INTO t1 VALUES('0 2 3', '4 5 6 7');
   402    INSERT INTO t1 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0');
   403    SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH '000 AND 00 AND 0';
   404  } {{3 4} {2 10}}
   405  
   406  do_execsql_test 7.0.2 {
   407    INSERT INTO t1(t1) VALUES('integrity-check');
   408  }
   409  
   410  do_execsql_test 7.1.1 {
   411    CREATE VIRTUAL TABLE t2 USING fts5(a, b, columnsize=0, tokenize=tcl, detail=%DETAIL%);
   412    INSERT INTO t2 VALUES('0 2 3', '4 5 6 7');
   413    INSERT INTO t2 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0');
   414    SELECT fts5_test_columnsize(t2) FROM t2 WHERE t2 MATCH '000 AND 00 AND 0';
   415  } {{3 4} {2 10}}
   416  
   417  do_execsql_test 7.1.2 {
   418    INSERT INTO t2(t2) VALUES('integrity-check');
   419  }
   420  
   421  } ;# foreach_detail_mode
   422  
   423  finish_test