modernc.org/cc@v1.0.1/v2/testdata/_sqlite/ext/fts5/test/fts5prefix.test (about)

     1  # 2015 Jan 13
     2  #
     3  # The author disclaims copyright to this source code.  In place of
     4  # a legal notice, here is a blessing:
     5  #
     6  #    May you do good and not evil.
     7  #    May you find forgiveness for yourself and forgive others.
     8  #    May you share freely, never taking more than you give.
     9  #
    10  #***********************************************************************
    11  #
    12  # This file contains tests focused on prefix indexes.
    13  #
    14  
    15  source [file join [file dirname [info script]] fts5_common.tcl]
    16  set testprefix fts5prefix
    17  
    18  # If SQLITE_ENABLE_FTS5 is defined, omit this file.
    19  ifcapable !fts5 {
    20    finish_test
    21    return
    22  }
    23  
    24  do_execsql_test 1.0 {
    25    CREATE VIRTUAL TABLE xx USING fts5(x, prefix=1);
    26    INSERT INTO xx VALUES('one two three');
    27    INSERT INTO xx VALUES('four five six');
    28    INSERT INTO xx VALUES('seven eight nine ten');
    29  }
    30  
    31  do_execsql_test 1.1 {
    32    SELECT rowid FROM xx WHERE xx MATCH 't*'
    33  } {1 3}
    34  
    35  
    36  #-------------------------------------------------------------------------
    37  # Check that prefix indexes really do index n-character prefixes, not 
    38  # n-byte prefixes. Use the ascii tokenizer so as not to be confused by
    39  # diacritic removal.
    40  #
    41  do_execsql_test 2.0 { 
    42    CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize = ascii, prefix = 2) 
    43  }
    44  
    45  do_test 2.1 {
    46    foreach {rowid string} {
    47      1 "\xCA\xCB\xCC\xCD"
    48      2 "\u1234\u5678\u4321\u8765"
    49    } {
    50      execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $string) }
    51    }
    52  } {}
    53  
    54  do_execsql_test 2.2 {
    55    INSERT INTO t1(t1) VALUES('integrity-check');
    56  }
    57  
    58  foreach {tn q res} {
    59    1 "SELECT rowid FROM t1 WHERE t1 MATCH '\xCA\xCB*'" 1
    60    2 "SELECT rowid FROM t1 WHERE t1 MATCH '\u1234\u5678*'" 2
    61  } {
    62    do_execsql_test 2.3.$tn $q $res
    63  }
    64  
    65  #-------------------------------------------------------------------------
    66  # Check that prefix queries with:
    67  #
    68  #   * a column filter, and
    69  #   * no prefix index.
    70  #
    71  # work Ok.
    72  #
    73  do_execsql_test 3.0 {
    74    CREATE VIRTUAL TABLE t3 USING fts5(a, b, c);
    75    INSERT INTO t3(t3, rank) VALUES('pgsz', 32);
    76    BEGIN;
    77      INSERT INTO t3 VALUES('acb ccc bba', 'cca bba bca', 'bbc ccc bca'); -- 1
    78      INSERT INTO t3 VALUES('cbb cac cab', 'abb aac bba', 'aab ccc cac'); -- 2
    79      INSERT INTO t3 VALUES('aac bcb aac', 'acb bcb caa', 'aca bab bca'); -- 3
    80      INSERT INTO t3 VALUES('aab ccb ccc', 'aca cba cca', 'aca aac cbb'); -- 4
    81      INSERT INTO t3 VALUES('bac aab bab', 'ccb bac cba', 'acb aba abb'); -- 5
    82      INSERT INTO t3 VALUES('bab abc ccb', 'acb cba abb', 'cbb aaa cab'); -- 6
    83      INSERT INTO t3 VALUES('cbb bbc baa', 'aab aca baa', 'bcc cca aca'); -- 7
    84      INSERT INTO t3 VALUES('abc bba abb', 'cac abc cba', 'acc aac cac'); -- 8
    85      INSERT INTO t3 VALUES('bbc bbc cab', 'bcb ccb cba', 'bcc cac acb'); -- 9
    86    COMMIT;
    87  }
    88  
    89  foreach {tn match res} {
    90    1 "a : c*" {1 2 4 6 7 9}
    91    2 "b : c*" {1 3 4 5 6 8 9}
    92    3 "c : c*" {1 2 4 6 7 8 9}
    93    4 "a : b*" {1 3 5 6 7 8 9}
    94    5 "b : b*" {1 2 3 5 7 9}
    95    6 "c : b*" {1 3 7 9}
    96    7 "a : a*" {1 3 4 5 6 8}
    97    8 "b : a*" {2 3 4 6 7 8}
    98    9 "c : a*" {2 3 4 5 6 7 8 9}
    99  } {
   100    do_execsql_test 3.1.$tn {
   101      SELECT rowid FROM t3($match)
   102    } $res
   103  }
   104  
   105  do_test 3.2 {
   106    expr srand(0)
   107    execsql { DELETE FROM t3 }
   108    for {set i 0} {$i < 1000} {incr i} {
   109      set a [fts5_rnddoc 3]
   110      set b [fts5_rnddoc 8]
   111      set c [fts5_rnddoc 20]
   112      execsql { INSERT INTO t3 VALUES($a, $b, $c) }
   113    }
   114    execsql { INSERT INTO t3(t3) VALUES('integrity-check') }
   115  } {}
   116  
   117  proc gmatch {col pattern} {
   118    expr {[lsearch -glob $col $pattern]>=0}
   119  }
   120  db func gmatch gmatch
   121  
   122  proc ghl {col pattern} {
   123    foreach t $col {
   124      if {[string match $pattern $t]} {
   125        lappend res "*$t*"
   126      } else {
   127        lappend res $t
   128      }
   129    }
   130    set res
   131  }
   132  db func ghl ghl
   133  
   134  set COLS(a) 0
   135  set COLS(b) 1
   136  set COLS(c) 2
   137  
   138  for {set x 0} {$x<2} {incr x} {
   139    foreach {tn pattern} {
   140      1  {xa*}
   141      2  {xb*}
   142      3  {xc*}
   143      4  {xd*}
   144      5  {xe*}
   145      6  {xf*}
   146      7  {xg*}
   147      8  {xh*}
   148      9  {xi*}
   149      10 {xj*}
   150    } {
   151      foreach col {a b c} {
   152  
   153        # Check that the list of returned rowids is correct.
   154        #
   155        set res [db eval "SELECT rowid FROM t3 WHERE gmatch($col, '$pattern')"]
   156        set query "$col : $pattern"
   157        do_execsql_test 3.3.$x.$tn.$col.rowid {
   158          SELECT rowid FROM t3($query);
   159        } $res
   160  
   161        # Check that the highlight() function works.
   162        #
   163        set res [db eval \
   164          "SELECT ghl($col, '$pattern') FROM t3 WHERE gmatch($col, '$pattern')"
   165        ]
   166        set idx $COLS($col)
   167        do_execsql_test 3.3.$x.$tn.$col.highlight {
   168          SELECT highlight(t3, $idx, '*', '*') FROM t3($query);
   169        } $res
   170      }
   171  
   172      foreach colset {{a b} {b c} {c a} {a c} {b a}} {
   173        # Check that the list of returned rowids is correct.
   174        #
   175        foreach {col1 col2} $colset {}
   176        set expr "gmatch($col1, '$pattern') OR gmatch($col2, '$pattern')"
   177        set res [db eval "SELECT rowid FROM t3 WHERE $expr"]
   178        set query "{$colset} : $pattern"
   179        do_execsql_test 3.3.$x.$tn.{$colset}.rowid {
   180          SELECT rowid FROM t3($query);
   181        } $res
   182  
   183        set resq    "SELECT ghl($col1, '$pattern'), ghl($col2, '$pattern')"
   184        append resq " FROM t3 WHERE $expr"
   185        set res [db eval $resq]
   186        set idx1 $COLS($col1)
   187        set idx2 $COLS($col2)
   188        do_execsql_test 3.3.$x.$tn.{$colset}.highlight {
   189          SELECT highlight(t3, $idx1, '*', '*'), highlight(t3, $idx2, '*', '*')
   190            FROM t3($query)
   191        } $res
   192      }
   193    }
   194    execsql { INSERT INTO t3(t3) VALUES('optimize') }
   195    execsql { INSERT INTO t3(t3) VALUES('integrity-check') }
   196  }
   197  
   198  #-------------------------------------------------------------------------
   199  #
   200  reset_db
   201  do_execsql_test 4.0 {
   202    CREATE VIRTUAL TABLE t2 USING fts5(c1, c2);
   203    INSERT INTO t2 VALUES('xa xb', 'xb xa');
   204  
   205    INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 2
   206    INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 4
   207    INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 8
   208    INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 16
   209    INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 32
   210    INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 64
   211    INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 128
   212    INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 256
   213    INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 512
   214    INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 1024
   215    INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 2048
   216    INSERT INTO t2 SELECT c1||' '||c1, c2||' '||c2 FROM t2; -- 4096
   217  
   218    SELECT count(*) FROM t2('x*');
   219  } {4096}
   220  
   221  do_execsql_test 4.1 {
   222    UPDATE t2 SET c2 = 'ya yb';
   223    SELECT count(*) FROM t2('c1:x*');
   224    SELECT count(*) FROM t2('c2:x*');
   225  } {4096 0}
   226  
   227  do_execsql_test 4.2 {
   228    UPDATE t2 SET c2 = 'xa';
   229    SELECT count(*) FROM t2('c1:x*');
   230    SELECT count(*) FROM t2('c2:x*');
   231  } {4096 4096}
   232  
   233  #-------------------------------------------------------------------------
   234  #
   235  reset_db
   236  proc rnddoc {n} {
   237    set map [list a b c d]
   238    set doc [list]
   239    for {set i 0} {$i < $n} {incr i} {
   240      lappend doc "x[lindex $map [expr int(rand()*4)]]"
   241    }
   242    set doc
   243  }
   244  set cols [list]
   245  for {set i 1} {$i<250} {incr i} { 
   246    lappend cols "c$i" 
   247    lappend vals "'[rnddoc 10]'"
   248  }
   249  
   250  do_test 5.0 {
   251    execsql "CREATE VIRTUAL TABLE t4 USING fts5([join $cols ,])"
   252    execsql {INSERT INTO t4(t4, rank) VALUES('pgsz', 32)}
   253    execsql "INSERT INTO t4 VALUES([join $vals ,])"
   254    execsql "INSERT INTO t4 VALUES([join $vals ,])"
   255    execsql "INSERT INTO t4 VALUES([join $vals ,])"
   256    execsql "INSERT INTO t4 VALUES([join $vals ,])"
   257  } {}
   258  
   259  proc gmatch {col pattern} {
   260    expr {[lsearch -glob $col $pattern]>=0}
   261  }
   262  db func gmatch gmatch
   263  foreach {tn col pattern} {
   264    1 c100 {xa*}
   265    2 c200 {xb*}
   266  } {
   267    set res [db eval "SELECT rowid FROM t4 WHERE gmatch($col, \$pattern)"]
   268    set query "$col : $pattern"
   269    do_execsql_test 5.$tn { SELECT rowid FROM t4($query) } $res
   270  }
   271  
   272  reset_db
   273  db func fts5_rnddoc fts5_rnddoc
   274  do_test 6.0 {
   275    execsql {
   276      CREATE VIRTUAL TABLE t5 USING fts5(x, y);
   277      INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) );
   278      INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) );
   279      INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) );
   280      INSERT INTO t5 VALUES( fts5_rnddoc(10000), fts5_rnddoc(10000) );
   281    }
   282  } {}
   283  
   284  proc gmatch {col pattern} {
   285    expr {[lsearch -glob $col $pattern]>=0}
   286  }
   287  db func gmatch gmatch
   288  foreach {tn col pattern} {
   289    1 y {xa*}
   290    2 y {xb*}
   291    3 y {xc*}
   292    4 x {xa*}
   293    5 x {xb*}
   294    6 x {xc*}
   295  } {
   296    set res [db eval "SELECT rowid FROM t5 WHERE gmatch($col, \$pattern)"]
   297    set query "$col : $pattern"
   298    do_execsql_test 6.$tn { SELECT rowid FROM t5($query) } $res
   299  }
   300  
   301  #-------------------------------------------------------------------------
   302  # Check that the various ways of creating prefix indexes produce the
   303  # same database on disk.
   304  #
   305  save_prng_state
   306  foreach {tn create} {
   307    1 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix="1,2,3") }
   308    2 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix="1 2 3") }
   309    3 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix=1, prefix=2, prefix=3) }
   310    4 { CREATE VIRTUAL TABLE tt USING fts5(x, y, prefix="1 2", prefix=3) }
   311  } {
   312    execsql { DROP TABLE IF EXISTS tt }
   313    restore_prng_state
   314    execsql $create
   315    execsql {
   316      INSERT INTO tt VALUES('cc b ggg ccc aa eee hh', 'aa g b hh a e');
   317      INSERT INTO tt VALUES('cc bb cc gg j g cc', 'ii jjj ggg jjj cc cc');
   318      INSERT INTO tt VALUES('h eee cc h iii', 'aaa iii dd iii dd');
   319      INSERT INTO tt VALUES('jjj hh eee c e b gg', 'j bbb jj ddd jj');
   320      INSERT INTO tt VALUES('ii hhh aaa ff c hhh iii', 'j cc hh bb e');
   321      INSERT INTO tt VALUES('e fff hhh i aaa', 'g b aa gg c aa dd');
   322      INSERT INTO tt VALUES('i aaa ccc gg hhh aa h', 'j bbb bbb d ff');
   323      INSERT INTO tt VALUES('g f gg ff ff jjj d', 'jjj d j fff fff ee j');
   324      INSERT INTO tt VALUES('a cc e ccc jjj c', 'ccc iii d bb a eee g');
   325      INSERT INTO tt VALUES('jj hh hh bb bbb gg', 'j c jjj bb iii f');
   326      INSERT INTO tt VALUES('a ggg g cc ccc aa', 'jjj j j aaa c');
   327      INSERT INTO tt VALUES('ddd j dd b i', 'aaa bbb iii ggg ff ccc ddd');
   328      INSERT INTO tt VALUES('jj ii hh c ii h gg', 'hhh bbb ddd bbb hh g ggg');
   329      INSERT INTO tt VALUES('aa hhh ccc h ggg ccc', 'iii d jj a ff ii');
   330    }
   331  
   332    #db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM tt_data} {puts $r}
   333  
   334    if {$tn==1} {
   335      set ::checksum [execsql {SELECT md5sum(id, block) FROM tt_data}]
   336    } else {
   337      do_execsql_test 7.$tn {
   338        SELECT md5sum(id, block) FROM tt_data
   339      } [list $::checksum]
   340    }
   341  }
   342  
   343  finish_test