gitlab.com/CoiaPrant/sqlite3@v1.19.1/testdata/tcl/fts4langid.test (about)

     1  # 2012 March 01
     2  #
     3  # The author disclaims copyright to this source code.  In place of
     4  # a legal notice, here is a blessing:
     5  #
     6  #    May you do good and not evil.
     7  #    May you find forgiveness for yourself and forgive others.
     8  #    May you share freely, never taking more than you give.
     9  #
    10  #*************************************************************************
    11  # This file implements regression tests for SQLite library.  The
    12  # focus of this script is testing the languageid=xxx FTS4 option.
    13  #
    14  
    15  set testdir [file dirname $argv0]
    16  source $testdir/tester.tcl
    17  
    18  # If SQLITE_ENABLE_FTS3 is defined, omit this file.
    19  ifcapable !fts3 {
    20    finish_test
    21    return
    22  }
    23  
    24  set ::testprefix fts4langid
    25  
    26  #---------------------------------------------------------------------------
    27  # Test plan:
    28  #
    29  #   1.* - Warm-body tests created for specific purposes during development.
    30  #         Passing these doesn't really prove much.
    31  #
    32  #   2.1.* - Test that FTS queries only ever return rows associated with
    33  #           the requested language.
    34  #
    35  #   2.2.* - Same as 2.1.*, after an 'optimize' command.
    36  #
    37  #   2.3.* - Same as 2.1.*, after a 'rebuild' command.
    38  #
    39  #   3.* - Tests with content= tables. Both where there is a real 
    40  #         underlying content table and where there is not.
    41  #
    42  #   4.* - Test that if one is provided, the tokenizer xLanguage method
    43  #         is called to configure the tokenizer before tokenizing query
    44  #         or document text.
    45  #
    46  #   5.* - Test the fts4aux table when the associated FTS4 table contains
    47  #         multiple languages.
    48  #
    49  
    50  do_execsql_test 1.1 {
    51    CREATE VIRTUAL TABLE t1 USING fts4(a, b, languageid=lang_id);
    52  }
    53  
    54  do_execsql_test 1.2 {
    55    SELECT sql FROM sqlite_master WHERE name = 't1_content';
    56  } {{CREATE TABLE 't1_content'(docid INTEGER PRIMARY KEY, 'c0a', 'c1b', langid)}}
    57  
    58  do_execsql_test 1.3 {SELECT docid FROM t1} {}
    59  do_execsql_test 1.4 {SELECT lang_id FROM t1} {}
    60  
    61  do_execsql_test 1.5 {INSERT INTO t1(a, b) VALUES('aaa', 'bbb')}
    62  do_execsql_test 1.6 {SELECT lang_id FROM t1 } {0}
    63  
    64  do_execsql_test 1.7 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 4)}
    65  do_execsql_test 1.8 {SELECT lang_id FROM t1 } {0 4}
    66  
    67  do_execsql_test 1.9  {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 'xyz')}
    68  do_execsql_test 1.10 {SELECT lang_id FROM t1} {0 4 0}
    69  
    70  do_execsql_test 1.11 {
    71    CREATE VIRTUAL TABLE t2 USING fts4;
    72    INSERT INTO t2 VALUES('abc');
    73  } 
    74  do_execsql_test 1.12 { SELECT rowid FROM t2 WHERE content MATCH 'abc' } 1
    75  
    76  do_execsql_test 1.13 {
    77    DROP TABLE t1;
    78    CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id);
    79    INSERT INTO t1(content)          VALUES('a b c');
    80    INSERT INTO t1(content, lang_id) VALUES('a b c', 1);
    81  }
    82  
    83  do_execsql_test 1.14 {
    84    SELECT rowid FROM t1 WHERE t1 MATCH 'b';
    85  } {1}
    86  do_execsql_test 1.15 {
    87    SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 0;
    88  } {1}
    89  
    90  do_execsql_test 1.16 {
    91    SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 1;
    92  } {2}
    93  
    94  do_catchsql_test 1.17 {
    95    INSERT INTO t1(content, lang_id) VALUES('123', -1);
    96  } {1 {constraint failed}}
    97  
    98  do_execsql_test 1.18 {
    99    DROP TABLE t1;
   100    CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id);
   101    INSERT INTO t1(content, lang_id) VALUES('A', 13);
   102    INSERT INTO t1(content, lang_id) VALUES('B', 13);
   103    INSERT INTO t1(content, lang_id) VALUES('C', 13);
   104    INSERT INTO t1(content, lang_id) VALUES('D', 13);
   105    INSERT INTO t1(content, lang_id) VALUES('E', 13);
   106    INSERT INTO t1(content, lang_id) VALUES('F', 13);
   107    INSERT INTO t1(content, lang_id) VALUES('G', 13);
   108    INSERT INTO t1(content, lang_id) VALUES('H', 13);
   109    INSERT INTO t1(content, lang_id) VALUES('I', 13);
   110    INSERT INTO t1(content, lang_id) VALUES('J', 13);
   111    INSERT INTO t1(content, lang_id) VALUES('K', 13);
   112    INSERT INTO t1(content, lang_id) VALUES('L', 13);
   113    INSERT INTO t1(content, lang_id) VALUES('M', 13);
   114    INSERT INTO t1(content, lang_id) VALUES('N', 13);
   115    INSERT INTO t1(content, lang_id) VALUES('O', 13);
   116    INSERT INTO t1(content, lang_id) VALUES('P', 13);
   117    INSERT INTO t1(content, lang_id) VALUES('Q', 13);
   118    INSERT INTO t1(content, lang_id) VALUES('R', 13);
   119    INSERT INTO t1(content, lang_id) VALUES('S', 13);
   120    SELECT rowid FROM t1 WHERE t1 MATCH 'A';
   121  } {}
   122  
   123  
   124  #-------------------------------------------------------------------------
   125  # Test cases 2.*
   126  #
   127  proc build_multilingual_db_1 {db} {
   128    $db eval { CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l) }
   129  
   130    set xwords [list zero one two three four five six seven eight nine ten]
   131    set ywords [list alpha beta gamma delta epsilon zeta eta theta iota kappa]
   132  
   133    for {set i 0} {$i < 1000} {incr i} {
   134      set iLangid [expr $i%9]
   135      set x ""
   136      set y ""
   137  
   138      set x [list]
   139      lappend x [lindex $xwords [expr ($i / 1000) % 10]]
   140      lappend x [lindex $xwords [expr ($i / 100)  % 10]]
   141      lappend x [lindex $xwords [expr ($i / 10)   % 10]]
   142      lappend x [lindex $xwords [expr ($i / 1)   % 10]]
   143  
   144      set y [list]
   145      lappend y [lindex $ywords [expr ($i / 1000) % 10]]
   146      lappend y [lindex $ywords [expr ($i / 100)  % 10]]
   147      lappend y [lindex $ywords [expr ($i / 10)   % 10]]
   148      lappend y [lindex $ywords [expr ($i / 1)   % 10]]
   149  
   150      $db eval { INSERT INTO t2(docid, x, y, l) VALUES($i, $x, $y, $iLangid) }
   151    }
   152  
   153    $db eval {
   154      CREATE TABLE data(x, y, l);
   155      INSERT INTO data(rowid, x, y, l) SELECT docid, x, y, l FROM t2;
   156    }
   157  }
   158  
   159  proc rowid_list_set_langid {langid} {
   160    set ::rowid_list_langid $langid
   161  }
   162  proc rowid_list {pattern} {
   163    set langid $::rowid_list_langid
   164    set res [list]
   165    db eval {SELECT rowid, x, y FROM data WHERE l = $langid ORDER BY rowid ASC} {
   166      if {[string match "*$pattern*" $x] || [string match "*$pattern*" $y]} {
   167        lappend res $rowid
   168      }
   169    }
   170    return $res
   171  }
   172  
   173  proc or_merge_list {list1 list2} {
   174    set res [list]
   175  
   176    set i1 0
   177    set i2 0
   178  
   179    set n1 [llength $list1]
   180    set n2 [llength $list2]
   181  
   182    while {$i1 < $n1 && $i2 < $n2} {
   183      set e1 [lindex $list1 $i1]
   184      set e2 [lindex $list2 $i2]
   185  
   186      if {$e1==$e2} {
   187        lappend res $e1
   188        incr i1
   189        incr i2
   190      } elseif {$e1 < $e2} {
   191        lappend res $e1
   192        incr i1
   193      } else {
   194        lappend res $e2
   195        incr i2
   196      }
   197    }
   198  
   199    concat $res [lrange $list1 $i1 end] [lrange $list2 $i2 end]
   200  }
   201  
   202  proc or_merge_lists {args} {
   203    set res [lindex $args 0]
   204    for {set i 1} {$i < [llength $args]} {incr i} {
   205      set res [or_merge_list $res [lindex $args $i]]
   206    }
   207    set res
   208  }
   209  
   210  proc and_merge_list {list1 list2} {
   211    foreach i $list2 { set a($i) 1 }
   212    set res [list]
   213    foreach i $list1 {
   214      if {[info exists a($i)]} {lappend res $i}
   215    }
   216    set res
   217  }
   218  
   219  
   220  proc and_merge_lists {args} {
   221    set res [lindex $args 0]
   222    for {set i 1} {$i < [llength $args]} {incr i} {
   223      set res [and_merge_list $res [lindex $args $i]]
   224    }
   225    set res
   226  }
   227  
   228  proc filter_list {list langid} {
   229    set res [list]
   230    foreach i $list {
   231      if {($i % 9) == $langid} {lappend res $i}
   232    }
   233    set res
   234  }
   235  
   236  do_test 2.0 { 
   237    reset_db
   238    build_multilingual_db_1 db
   239  } {}
   240  
   241  proc do_test_query1 {tn query res_script} {
   242    for {set langid 0} {$langid < 10} {incr langid} {
   243      rowid_list_set_langid $langid
   244      set res [eval $res_script]
   245  
   246      set actual [
   247        execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid}
   248      ]
   249      do_test $tn.$langid [list set {} $actual] $res
   250    }
   251  }
   252  
   253  # Run some queries. 
   254  do_test_query1 2.1.1  {delta}          { rowid_list delta }
   255  do_test_query1 2.1.2  {"zero one two"} { rowid_list "zero one two" }
   256  do_test_query1 2.1.3  {zero one two} {
   257    and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
   258  }
   259  do_test_query1 2.1.4  {"zero one" OR "one two"} {
   260    or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
   261  }
   262  
   263  # Now try the same tests as above, but after running the 'optimize'
   264  # command on the FTS table.
   265  #
   266  do_execsql_test 2.2 {
   267    INSERT INTO t2(t2) VALUES('optimize');
   268    SELECT count(*) FROM t2_segdir;
   269  } {9}
   270  do_test_query1 2.2.1 {delta}          { rowid_list delta }
   271  do_test_query1 2.2.2 {"zero one two"} { rowid_list "zero one two" }
   272  do_test_query1 2.2.3 {zero one two} {
   273    and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
   274  }
   275  do_test_query1 2.2.4 {"zero one" OR "one two"} {
   276    or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
   277  }
   278  
   279  # And rebuild.
   280  #
   281  do_test 2.3 { 
   282    reset_db
   283    build_multilingual_db_1 db
   284    execsql { INSERT INTO t2(t2) VALUES('rebuild') }
   285  } {}
   286  do_test_query1 2.3.1 {delta}          { rowid_list delta }
   287  do_test_query1 2.3.2 {"zero one two"} { rowid_list "zero one two" }
   288  do_test_query1 2.3.3 {zero one two} {
   289    and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
   290  }
   291  do_test_query1 2.3.4 {"zero one" OR "one two"} {
   292    or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
   293  }
   294  
   295  #-------------------------------------------------------------------------
   296  # Test cases 3.*
   297  #
   298  do_test 3.0 {
   299    reset_db
   300    build_multilingual_db_1 db
   301    execsql {
   302      CREATE TABLE t3_data(l, x, y);
   303      INSERT INTO t3_data(rowid, l, x, y) SELECT docid, l, x, y FROM t2;
   304      DROP TABLE t2;
   305    }
   306  } {}
   307  do_execsql_test 3.1 {
   308    CREATE VIRTUAL TABLE t2 USING fts4(content=t3_data, languageid=l);
   309    INSERT INTO t2(t2) VALUES('rebuild');
   310  }
   311  
   312  do_test_query1 3.1.1 {delta}          { rowid_list delta }
   313  do_test_query1 3.1.2 {"zero one two"} { rowid_list "zero one two" }
   314  do_test_query1 3.1.3 {zero one two} {
   315    and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
   316  }
   317  do_test_query1 3.1.4 {"zero one" OR "one two"} {
   318    or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
   319  }
   320  
   321  do_execsql_test 3.2.1 {
   322    DROP TABLE t2;
   323    CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l, content=nosuchtable);
   324  }
   325  
   326  do_execsql_test 3.2.2 {
   327    INSERT INTO t2(docid, x, y, l) SELECT rowid, x, y, l FROM t3_data;
   328  }
   329  
   330  do_execsql_test 3.2.3 {
   331    DROP TABLE t3_data;
   332  }
   333  
   334  do_test_query1 3.3.1 {delta}          { rowid_list delta }
   335  do_test_query1 3.3.2 {"zero one two"} { rowid_list "zero one two" }
   336  do_test_query1 3.3.3 {zero one two} {
   337    and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two]
   338  }
   339  do_test_query1 3.3.4 {"zero one" OR "one two"} {
   340    or_merge_lists [rowid_list "zero one"] [rowid_list "one two"]
   341  }
   342  
   343  do_execsql_test 3.4 {
   344    CREATE TABLE t8c(a, b);
   345    CREATE VIRTUAL TABLE t8 USING fts4(content=t8c, languageid=langid);
   346    INSERT INTO t8(docid, a, b) VALUES(-1, 'one two three', 'x y z');
   347    SELECT docid FROM t8 WHERE t8 MATCH 'one x' AND langid=0
   348  } {-1}
   349  
   350  #-------------------------------------------------------------------------
   351  # Test cases 4.*
   352  #
   353  proc build_multilingual_db_2 {db} {
   354    $db eval {
   355      CREATE VIRTUAL TABLE t4 USING fts4(
   356          tokenize=testtokenizer, 
   357          languageid=lid
   358      );
   359    }
   360    for {set i 0} {$i < 50} {incr i} {
   361      execsql { 
   362        INSERT INTO t4(docid, content, lid) VALUES($i, 'The Quick Brown Fox', $i) 
   363      }
   364    }
   365  }
   366  
   367  do_test 4.1.0 {
   368    reset_db
   369    set ptr [fts3_test_tokenizer]
   370    sqlite3_db_config db SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER 1
   371    execsql { SELECT fts3_tokenizer('testtokenizer', $ptr) }
   372    build_multilingual_db_2 db
   373  } {}
   374  do_execsql_test 4.1.1 {
   375    SELECT docid FROM t4 WHERE t4 MATCH 'quick';
   376  } {0}
   377  do_execsql_test 4.1.2 {
   378    SELECT docid FROM t4 WHERE t4 MATCH 'quick' AND lid=1;
   379  } {}
   380  do_execsql_test 4.1.3 {
   381    SELECT docid FROM t4 WHERE t4 MATCH 'Quick' AND lid=1;
   382  } {1}
   383  for {set i 0} {$i < 50} {incr i} {
   384    do_execsql_test 4.1.4.$i {
   385      SELECT count(*) FROM t4 WHERE t4 MATCH 'fox' AND lid=$i;
   386    } [expr 0==($i%2)]
   387  }
   388  do_catchsql_test 4.1.5 {
   389    INSERT INTO t4(content, lid) VALUES('hello world', 101)
   390  } {1 {SQL logic error}}
   391  
   392  #-------------------------------------------------------------------------
   393  # Test cases 5.*
   394  #
   395  # The following test cases are designed to detect a 32-bit overflow bug
   396  # that existed at one point.
   397  #
   398  proc build_multilingual_db_3 {db} {
   399    $db eval {
   400      CREATE VIRTUAL TABLE t5 USING fts4(languageid=lid);
   401    }
   402    set languages [list 0 1 2 [expr 1<<30]]
   403  
   404    foreach lid $languages {
   405      execsql {
   406        INSERT INTO t5(docid, content, lid) VALUES(
   407            $lid, 'My language is ' || $lid, $lid
   408        ) 
   409      }
   410    }
   411  }
   412  
   413  do_test 5.1.0 {
   414    reset_db
   415    build_multilingual_db_3 db
   416  } {}
   417  
   418  do_execsql_test 5.1.1 {
   419    SELECT level FROM t5_segdir;
   420  } [list 0 1024 2048 [expr 1<<40]]
   421  
   422  do_execsql_test 5.1.2 {SELECT docid FROM t5 WHERE t5 MATCH 'language'} 0
   423  foreach langid [list 0 1 2 [expr 1<<30]] {
   424    do_execsql_test 5.2.$langid { 
   425      SELECT docid FROM t5 WHERE t5 MATCH 'language' AND lid = $langid
   426    } $langid
   427  }
   428  
   429  set lid [expr 1<<30]
   430  do_execsql_test 5.3.1 {
   431    CREATE VIRTUAL TABLE t6 USING fts4(languageid=lid);
   432    INSERT INTO t6 VALUES('I belong to language 0!');
   433  }
   434  do_test 5.3.2 {
   435    for {set i 0} {$i < 20} {incr i} {
   436      execsql {
   437        INSERT INTO t6(content, lid) VALUES(
   438          'I (row '||$i||') belong to langauge N!', $lid
   439        );
   440      }
   441    }
   442    execsql { SELECT docid FROM t6 WHERE t6 MATCH 'belong' }
   443  } {1}
   444  
   445  do_test 5.3.3 {
   446    execsql { SELECT docid FROM t6 WHERE t6 MATCH 'belong' AND lid=$lid}
   447  } {2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21}
   448  
   449  do_execsql_test 5.3.4 { INSERT INTO t6(t6) VALUES('optimize') } {}
   450  do_execsql_test 5.3.5 { SELECT docid FROM t6 WHERE t6 MATCH 'belong' } {1}
   451  do_execsql_test 5.3.6 { 
   452    SELECT docid FROM t6 WHERE t6 MATCH 'belong' AND lid=$lid
   453  } {2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21}
   454  
   455  
   456  set lid [expr 1<<30]
   457  foreach lid [list 4 [expr 1<<30]] {
   458    do_execsql_test 5.4.$lid.1 {
   459      DELETE FROM t6;
   460      SELECT count(*) FROM t6_segdir;
   461      SELECT count(*) FROM t6_segments;
   462    } {0 0}
   463    do_execsql_test 5.4.$lid.2 {
   464      INSERT INTO t6(content, lid) VALUES('zero zero zero', $lid);
   465      INSERT INTO t6(content, lid) VALUES('zero zero one', $lid);
   466      INSERT INTO t6(content, lid) VALUES('zero one zero', $lid);
   467      INSERT INTO t6(content, lid) VALUES('zero one one', $lid);
   468      INSERT INTO t6(content, lid) VALUES('one zero zero', $lid);
   469      INSERT INTO t6(content, lid) VALUES('one zero one', $lid);
   470      INSERT INTO t6(content, lid) VALUES('one one zero', $lid);
   471      INSERT INTO t6(content, lid) VALUES('one one one', $lid);
   472  
   473      SELECT docid FROM t6 WHERE t6 MATCH '"zero zero"' AND lid=$lid;
   474    } {1 2 5}
   475  
   476    do_execsql_test 5.4.$lid.3 {
   477      SELECT count(*) FROM t6_segdir;
   478      SELECT count(*) FROM t6_segments;
   479    } {8 0}
   480  
   481    do_execsql_test 5.4.$lid.4 {
   482      INSERT INTO t6(t6) VALUES('merge=100,3');
   483      INSERT INTO t6(t6) VALUES('merge=100,3');
   484      SELECT docid FROM t6 WHERE t6 MATCH '"zero zero"' AND lid=$lid;
   485    } {1 2 5}
   486  
   487    do_execsql_test 5.4.$lid.5 {
   488      SELECT count(*) FROM t6_segdir;
   489      SELECT count(*) FROM t6_segments;
   490    } {1 2}
   491  }
   492  
   493  reset_db
   494  do_execsql_test 6.0 {
   495    CREATE VIRTUAL TABLE vt0 USING fts4(c0, languageid="lid");
   496    INSERT INTO vt0 VALUES ('a'), ('b');
   497    BEGIN;
   498      UPDATE vt0 SET lid = 1 WHERE lid=0;
   499  }
   500  do_execsql_test 6.1 {
   501    INSERT INTO vt0(vt0) VALUES('integrity-check');
   502  }
   503  do_execsql_test 6.2 {
   504    COMMIT;
   505    INSERT INTO vt0(vt0) VALUES('integrity-check');
   506  }
   507  finish_test