github.com/jdgcs/sqlite3@v1.12.1-0.20210908114423-bc5f96e4dd51/testdata/tcl/fts3rnd.test (about) 1 # 2009 December 03 2 # 3 # May you do good and not evil. 4 # May you find forgiveness for yourself and forgive others. 5 # May you share freely, never taking more than you give. 6 # 7 #*********************************************************************** 8 # 9 # Brute force (random data) tests for FTS3. 10 # 11 12 #------------------------------------------------------------------------- 13 # 14 # The FTS3 tests implemented in this file focus on testing that FTS3 15 # returns the correct set of documents for various types of full-text 16 # query. This is done using pseudo-randomly generated data and queries. 17 # The expected result of each query is calculated using Tcl code. 18 # 19 # 1. The database is initialized to contain a single table with three 20 # columns. 100 rows are inserted into the table. Each of the three 21 # values in each row is a document consisting of between 0 and 100 22 # terms. Terms are selected from a vocabulary of $G(nVocab) terms. 23 # 24 # 2. The following is performed 100 times: 25 # 26 # a. A row is inserted into the database. The row contents are 27 # generated as in step 1. The docid is a pseudo-randomly selected 28 # value between 0 and 1000000. 29 # 30 # b. A psuedo-randomly selected row is updated. One of its columns is 31 # set to contain a new document generated in the same way as the 32 # documents in step 1. 33 # 34 # c. A psuedo-randomly selected row is deleted. 35 # 36 # d. For each of several types of fts3 queries, 10 SELECT queries 37 # of the form: 38 # 39 # SELECT docid FROM <tbl> WHERE <tbl> MATCH '<query>' 40 # 41 # are evaluated. The results are compared to those calculated by 42 # Tcl code in this file. The patterns used for the different query 43 # types are: 44 # 45 # 1. query = <term> 46 # 2. query = <prefix> 47 # 3. query = "<term> <term>" 48 # 4. query = "<term> <term> <term>" 49 # 5. query = "<prefix> <prefix> <prefix>" 50 # 6. query = <term> NEAR <term> 51 # 7. query = <term> NEAR/11 <term> NEAR/11 <term> 52 # 8. query = <term> OR <term> 53 # 9. query = <term> NOT <term> 54 # 10. query = <term> AND <term> 55 # 11. query = <term> NEAR <term> OR <term> NEAR <term> 56 # 12. query = <term> NEAR <term> NOT <term> NEAR <term> 57 # 13. query = <term> NEAR <term> AND <term> NEAR <term> 58 # 59 # where <term> is a term psuedo-randomly selected from the vocabulary 60 # and prefix is the first 2 characters of such a term followed by 61 # a "*" character. 62 # 63 # Every second iteration, steps (a) through (d) above are performed 64 # within a single transaction. This forces the queries in (d) to 65 # read data from both the database and the in-memory hash table 66 # that caches the full-text index entries created by steps (a), (b) 67 # and (c) until the transaction is committed. 68 # 69 # The procedure above is run 5 times, using advisory fts3 node sizes of 50, 70 # 500, 1000 and 2000 bytes. 71 # 72 # After the test using an advisory node-size of 50, an OOM test is run using 73 # the database. This test is similar to step (d) above, except that it tests 74 # the effects of transient and persistent OOM conditions encountered while 75 # executing each query. 76 # 77 78 set testdir [file dirname $argv0] 79 source $testdir/tester.tcl 80 81 # If this build does not include FTS3, skip the tests in this file. 82 # 83 ifcapable !fts3 { finish_test ; return } 84 source $testdir/fts3_common.tcl 85 source $testdir/malloc_common.tcl 86 87 set G(nVocab) 100 88 89 set nVocab 100 90 set lVocab [list] 91 92 expr srand(0) 93 94 # Generate a vocabulary of nVocab words. Each word is 3 characters long. 95 # 96 set lChar {a b c d e f g h i j k l m n o p q r s t u v w x y z} 97 for {set i 0} {$i < $nVocab} {incr i} { 98 set len [expr int(rand()*3)+2] 99 set word [lindex $lChar [expr int(rand()*26)]] 100 append word [lindex $lChar [expr int(rand()*26)]] 101 if {$len>2} { append word [lindex $lChar [expr int(rand()*26)]] } 102 if {$len>3} { append word [lindex $lChar [expr int(rand()*26)]] } 103 lappend lVocab $word 104 } 105 106 proc random_term {} { 107 lindex $::lVocab [expr {int(rand()*$::nVocab)}] 108 } 109 110 # Return a document consisting of $nWord arbitrarily selected terms 111 # from the $::lVocab list. 112 # 113 proc generate_doc {nWord} { 114 set doc [list] 115 for {set i 0} {$i < $nWord} {incr i} { 116 lappend doc [random_term] 117 } 118 return $doc 119 } 120 121 122 123 # Primitives to update the table. 124 # 125 unset -nocomplain t1 126 proc insert_row {rowid} { 127 set a [generate_doc [expr int((rand()*100))]] 128 set b [generate_doc [expr int((rand()*100))]] 129 set c [generate_doc [expr int((rand()*100))]] 130 execsql { INSERT INTO t1(docid, a, b, c) VALUES($rowid, $a, $b, $c) } 131 set ::t1($rowid) [list $a $b $c] 132 } 133 proc delete_row {rowid} { 134 execsql { DELETE FROM t1 WHERE rowid = $rowid } 135 catch {unset ::t1($rowid)} 136 } 137 proc update_row {rowid} { 138 set cols {a b c} 139 set iCol [expr int(rand()*3)] 140 set doc [generate_doc [expr int((rand()*100))]] 141 lset ::t1($rowid) $iCol $doc 142 execsql "UPDATE t1 SET [lindex $cols $iCol] = \$doc WHERE rowid = \$rowid" 143 } 144 145 proc simple_phrase {zPrefix} { 146 set ret [list] 147 148 set reg [string map {* {[^ ]*}} $zPrefix] 149 set reg " $reg " 150 151 foreach key [lsort -integer [array names ::t1]] { 152 set value $::t1($key) 153 set cnt [list] 154 foreach col $value { 155 if {[regexp $reg " $col "]} { lappend ret $key ; break } 156 } 157 } 158 159 #lsort -uniq -integer $ret 160 set ret 161 } 162 163 # This [proc] is used to test the FTS3 matchinfo() function. 164 # 165 proc simple_token_matchinfo {zToken bDesc} { 166 167 set nDoc(0) 0 168 set nDoc(1) 0 169 set nDoc(2) 0 170 set nHit(0) 0 171 set nHit(1) 0 172 set nHit(2) 0 173 174 set dir -inc 175 if {$bDesc} { set dir -dec } 176 177 foreach key [array names ::t1] { 178 set value $::t1($key) 179 set a($key) [list] 180 foreach i {0 1 2} col $value { 181 set hit [llength [lsearch -all $col $zToken]] 182 lappend a($key) $hit 183 incr nHit($i) $hit 184 if {$hit>0} { incr nDoc($i) } 185 } 186 } 187 188 set ret [list] 189 foreach docid [lsort -integer $dir [array names a]] { 190 if { [lindex [lsort -integer $a($docid)] end] } { 191 set matchinfo [list 1 3] 192 foreach i {0 1 2} hit $a($docid) { 193 lappend matchinfo $hit $nHit($i) $nDoc($i) 194 } 195 lappend ret $docid $matchinfo 196 } 197 } 198 199 set ret 200 } 201 202 proc simple_near {termlist nNear} { 203 set ret [list] 204 205 foreach {key value} [array get ::t1] { 206 foreach v $value { 207 208 set l [lsearch -exact -all $v [lindex $termlist 0]] 209 foreach T [lrange $termlist 1 end] { 210 set l2 [list] 211 foreach i $l { 212 set iStart [expr $i - $nNear - 1] 213 set iEnd [expr $i + $nNear + 1] 214 if {$iStart < 0} {set iStart 0} 215 foreach i2 [lsearch -exact -all [lrange $v $iStart $iEnd] $T] { 216 incr i2 $iStart 217 if {$i2 != $i} { lappend l2 $i2 } 218 } 219 } 220 set l [lsort -uniq -integer $l2] 221 } 222 223 if {[llength $l]} { 224 #puts "MATCH($key): $v" 225 lappend ret $key 226 } 227 } 228 } 229 230 lsort -unique -integer $ret 231 } 232 233 # The following three procs: 234 # 235 # setup_not A B 236 # setup_or A B 237 # setup_and A B 238 # 239 # each take two arguments. Both arguments must be lists of integer values 240 # sorted by value. The return value is the list produced by evaluating 241 # the equivalent of "A op B", where op is the FTS3 operator NOT, OR or 242 # AND. 243 # 244 proc setop_not {A B} { 245 foreach b $B { set n($b) {} } 246 set ret [list] 247 foreach a $A { if {![info exists n($a)]} {lappend ret $a} } 248 return $ret 249 } 250 proc setop_or {A B} { 251 lsort -integer -uniq [concat $A $B] 252 } 253 proc setop_and {A B} { 254 foreach b $B { set n($b) {} } 255 set ret [list] 256 foreach a $A { if {[info exists n($a)]} {lappend ret $a} } 257 return $ret 258 } 259 260 proc mit {blob} { 261 set scan(littleEndian) i* 262 set scan(bigEndian) I* 263 binary scan $blob $scan($::tcl_platform(byteOrder)) r 264 return $r 265 } 266 db func mit mit 267 set sqlite_fts3_enable_parentheses 1 268 269 proc do_orderbydocid_test {tn sql res} { 270 uplevel [list do_select_test $tn.asc "$sql ORDER BY docid ASC" $res] 271 uplevel [list do_select_test $tn.desc "$sql ORDER BY docid DESC" \ 272 [lsort -int -dec $res] 273 ] 274 } 275 276 set NUM_TRIALS 100 277 278 foreach {nodesize order} { 279 50 DESC 280 50 ASC 281 500 ASC 282 1000 DESC 283 2000 ASC 284 } { 285 catch { array unset ::t1 } 286 set testname "$nodesize/$order" 287 288 # Create the FTS3 table. Populate it (and the Tcl array) with 100 rows. 289 # 290 db transaction { 291 catchsql { DROP TABLE t1 } 292 execsql "CREATE VIRTUAL TABLE t1 USING fts4(a, b, c, order=$order)" 293 execsql "INSERT INTO t1(t1) VALUES('nodesize=$nodesize')" 294 for {set i 0} {$i < 100} {incr i} { insert_row $i } 295 } 296 297 for {set iTest 1} {$iTest <= $NUM_TRIALS} {incr iTest} { 298 catchsql COMMIT 299 300 set DO_MALLOC_TEST 0 301 set nRep 10 302 if {$iTest==100 && $nodesize==50} { 303 set DO_MALLOC_TEST 1 304 set nRep 2 305 } 306 307 set ::testprefix fts3rnd-1.$testname.$iTest 308 309 # Delete one row, update one row and insert one row. 310 # 311 set rows [array names ::t1] 312 set nRow [llength $rows] 313 set iUpdate [lindex $rows [expr {int(rand()*$nRow)}]] 314 set iDelete $iUpdate 315 while {$iDelete == $iUpdate} { 316 set iDelete [lindex $rows [expr {int(rand()*$nRow)}]] 317 } 318 set iInsert $iUpdate 319 while {[info exists ::t1($iInsert)]} { 320 set iInsert [expr {int(rand()*1000000)}] 321 } 322 execsql BEGIN 323 insert_row $iInsert 324 update_row $iUpdate 325 delete_row $iDelete 326 if {0==($iTest%2)} { execsql COMMIT } 327 328 if {0==($iTest%2)} { 329 #do_test 0 { fts3_integrity_check t1 } ok 330 } 331 332 # Pick 10 terms from the vocabulary. Check that the results of querying 333 # the database for the set of documents containing each of these terms 334 # is the same as the result obtained by scanning the contents of the Tcl 335 # array for each term. 336 # 337 for {set i 0} {$i < 10} {incr i} { 338 set term [random_term] 339 do_select_test 1.$i.asc { 340 SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term 341 ORDER BY docid ASC 342 } [simple_token_matchinfo $term 0] 343 do_select_test 1.$i.desc { 344 SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term 345 ORDER BY docid DESC 346 } [simple_token_matchinfo $term 1] 347 } 348 349 # This time, use the first two characters of each term as a term prefix 350 # to query for. Test that querying the Tcl array produces the same results 351 # as querying the FTS3 table for the prefix. 352 # 353 for {set i 0} {$i < $nRep} {incr i} { 354 set prefix [string range [random_term] 0 end-1] 355 set match "${prefix}*" 356 do_orderbydocid_test 2.$i { 357 SELECT docid FROM t1 WHERE t1 MATCH $match 358 } [simple_phrase $match] 359 } 360 361 # Similar to the above, except for phrase queries. 362 # 363 for {set i 0} {$i < $nRep} {incr i} { 364 set term [list [random_term] [random_term]] 365 set match "\"$term\"" 366 do_orderbydocid_test 3.$i { 367 SELECT docid FROM t1 WHERE t1 MATCH $match 368 } [simple_phrase $term] 369 } 370 371 # Three word phrases. 372 # 373 for {set i 0} {$i < $nRep} {incr i} { 374 set term [list [random_term] [random_term] [random_term]] 375 set match "\"$term\"" 376 do_orderbydocid_test 4.$i { 377 SELECT docid FROM t1 WHERE t1 MATCH $match 378 } [simple_phrase $term] 379 } 380 381 # Three word phrases made up of term-prefixes. 382 # 383 for {set i 0} {$i < $nRep} {incr i} { 384 set query "[string range [random_term] 0 end-1]* " 385 append query "[string range [random_term] 0 end-1]* " 386 append query "[string range [random_term] 0 end-1]*" 387 388 set match "\"$query\"" 389 do_orderbydocid_test 5.$i { 390 SELECT docid FROM t1 WHERE t1 MATCH $match 391 } [simple_phrase $query] 392 } 393 394 # A NEAR query with terms as the arguments: 395 # 396 # ... MATCH '$term1 NEAR $term2' ... 397 # 398 for {set i 0} {$i < $nRep} {incr i} { 399 set terms [list [random_term] [random_term]] 400 set match [join $terms " NEAR "] 401 do_orderbydocid_test 6.$i { 402 SELECT docid FROM t1 WHERE t1 MATCH $match 403 } [simple_near $terms 10] 404 } 405 406 # A 3-way NEAR query with terms as the arguments. 407 # 408 for {set i 0} {$i < $nRep} {incr i} { 409 set terms [list [random_term] [random_term] [random_term]] 410 set nNear 11 411 set match [join $terms " NEAR/$nNear "] 412 do_orderbydocid_test 7.$i { 413 SELECT docid FROM t1 WHERE t1 MATCH $match 414 } [simple_near $terms $nNear] 415 } 416 417 # Set operations on simple term queries. 418 # 419 foreach {tn op proc} { 420 8 OR setop_or 421 9 NOT setop_not 422 10 AND setop_and 423 } { 424 for {set i 0} {$i < $nRep} {incr i} { 425 set term1 [random_term] 426 set term2 [random_term] 427 set match "$term1 $op $term2" 428 do_orderbydocid_test $tn.$i { 429 SELECT docid FROM t1 WHERE t1 MATCH $match 430 } [$proc [simple_phrase $term1] [simple_phrase $term2]] 431 } 432 } 433 434 # Set operations on NEAR queries. 435 # 436 foreach {tn op proc} { 437 11 OR setop_or 438 12 NOT setop_not 439 13 AND setop_and 440 } { 441 for {set i 0} {$i < $nRep} {incr i} { 442 set term1 [random_term] 443 set term2 [random_term] 444 set term3 [random_term] 445 set term4 [random_term] 446 set match "$term1 NEAR $term2 $op $term3 NEAR $term4" 447 do_orderbydocid_test $tn.$i { 448 SELECT docid FROM t1 WHERE t1 MATCH $match 449 } [$proc \ 450 [simple_near [list $term1 $term2] 10] \ 451 [simple_near [list $term3 $term4] 10] 452 ] 453 } 454 } 455 456 catchsql COMMIT 457 } 458 } 459 460 finish_test