gitlab.com/CoiaPrant/sqlite3@v1.19.1/testdata/tcl/fts4langid.test (about) 1 # 2012 March 01 2 # 3 # The author disclaims copyright to this source code. In place of 4 # a legal notice, here is a blessing: 5 # 6 # May you do good and not evil. 7 # May you find forgiveness for yourself and forgive others. 8 # May you share freely, never taking more than you give. 9 # 10 #************************************************************************* 11 # This file implements regression tests for SQLite library. The 12 # focus of this script is testing the languageid=xxx FTS4 option. 13 # 14 15 set testdir [file dirname $argv0] 16 source $testdir/tester.tcl 17 18 # If SQLITE_ENABLE_FTS3 is defined, omit this file. 19 ifcapable !fts3 { 20 finish_test 21 return 22 } 23 24 set ::testprefix fts4langid 25 26 #--------------------------------------------------------------------------- 27 # Test plan: 28 # 29 # 1.* - Warm-body tests created for specific purposes during development. 30 # Passing these doesn't really prove much. 31 # 32 # 2.1.* - Test that FTS queries only ever return rows associated with 33 # the requested language. 34 # 35 # 2.2.* - Same as 2.1.*, after an 'optimize' command. 36 # 37 # 2.3.* - Same as 2.1.*, after a 'rebuild' command. 38 # 39 # 3.* - Tests with content= tables. Both where there is a real 40 # underlying content table and where there is not. 41 # 42 # 4.* - Test that if one is provided, the tokenizer xLanguage method 43 # is called to configure the tokenizer before tokenizing query 44 # or document text. 45 # 46 # 5.* - Test the fts4aux table when the associated FTS4 table contains 47 # multiple languages. 48 # 49 50 do_execsql_test 1.1 { 51 CREATE VIRTUAL TABLE t1 USING fts4(a, b, languageid=lang_id); 52 } 53 54 do_execsql_test 1.2 { 55 SELECT sql FROM sqlite_master WHERE name = 't1_content'; 56 } {{CREATE TABLE 't1_content'(docid INTEGER PRIMARY KEY, 'c0a', 'c1b', langid)}} 57 58 do_execsql_test 1.3 {SELECT docid FROM t1} {} 59 do_execsql_test 1.4 {SELECT lang_id FROM t1} {} 60 61 do_execsql_test 1.5 {INSERT INTO t1(a, b) VALUES('aaa', 'bbb')} 62 do_execsql_test 1.6 {SELECT lang_id FROM t1 } {0} 63 64 do_execsql_test 1.7 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 4)} 65 do_execsql_test 1.8 {SELECT lang_id FROM t1 } {0 4} 66 67 do_execsql_test 1.9 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 'xyz')} 68 do_execsql_test 1.10 {SELECT lang_id FROM t1} {0 4 0} 69 70 do_execsql_test 1.11 { 71 CREATE VIRTUAL TABLE t2 USING fts4; 72 INSERT INTO t2 VALUES('abc'); 73 } 74 do_execsql_test 1.12 { SELECT rowid FROM t2 WHERE content MATCH 'abc' } 1 75 76 do_execsql_test 1.13 { 77 DROP TABLE t1; 78 CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id); 79 INSERT INTO t1(content) VALUES('a b c'); 80 INSERT INTO t1(content, lang_id) VALUES('a b c', 1); 81 } 82 83 do_execsql_test 1.14 { 84 SELECT rowid FROM t1 WHERE t1 MATCH 'b'; 85 } {1} 86 do_execsql_test 1.15 { 87 SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 0; 88 } {1} 89 90 do_execsql_test 1.16 { 91 SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 1; 92 } {2} 93 94 do_catchsql_test 1.17 { 95 INSERT INTO t1(content, lang_id) VALUES('123', -1); 96 } {1 {constraint failed}} 97 98 do_execsql_test 1.18 { 99 DROP TABLE t1; 100 CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id); 101 INSERT INTO t1(content, lang_id) VALUES('A', 13); 102 INSERT INTO t1(content, lang_id) VALUES('B', 13); 103 INSERT INTO t1(content, lang_id) VALUES('C', 13); 104 INSERT INTO t1(content, lang_id) VALUES('D', 13); 105 INSERT INTO t1(content, lang_id) VALUES('E', 13); 106 INSERT INTO t1(content, lang_id) VALUES('F', 13); 107 INSERT INTO t1(content, lang_id) VALUES('G', 13); 108 INSERT INTO t1(content, lang_id) VALUES('H', 13); 109 INSERT INTO t1(content, lang_id) VALUES('I', 13); 110 INSERT INTO t1(content, lang_id) VALUES('J', 13); 111 INSERT INTO t1(content, lang_id) VALUES('K', 13); 112 INSERT INTO t1(content, lang_id) VALUES('L', 13); 113 INSERT INTO t1(content, lang_id) VALUES('M', 13); 114 INSERT INTO t1(content, lang_id) VALUES('N', 13); 115 INSERT INTO t1(content, lang_id) VALUES('O', 13); 116 INSERT INTO t1(content, lang_id) VALUES('P', 13); 117 INSERT INTO t1(content, lang_id) VALUES('Q', 13); 118 INSERT INTO t1(content, lang_id) VALUES('R', 13); 119 INSERT INTO t1(content, lang_id) VALUES('S', 13); 120 SELECT rowid FROM t1 WHERE t1 MATCH 'A'; 121 } {} 122 123 124 #------------------------------------------------------------------------- 125 # Test cases 2.* 126 # 127 proc build_multilingual_db_1 {db} { 128 $db eval { CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l) } 129 130 set xwords [list zero one two three four five six seven eight nine ten] 131 set ywords [list alpha beta gamma delta epsilon zeta eta theta iota kappa] 132 133 for {set i 0} {$i < 1000} {incr i} { 134 set iLangid [expr $i%9] 135 set x "" 136 set y "" 137 138 set x [list] 139 lappend x [lindex $xwords [expr ($i / 1000) % 10]] 140 lappend x [lindex $xwords [expr ($i / 100) % 10]] 141 lappend x [lindex $xwords [expr ($i / 10) % 10]] 142 lappend x [lindex $xwords [expr ($i / 1) % 10]] 143 144 set y [list] 145 lappend y [lindex $ywords [expr ($i / 1000) % 10]] 146 lappend y [lindex $ywords [expr ($i / 100) % 10]] 147 lappend y [lindex $ywords [expr ($i / 10) % 10]] 148 lappend y [lindex $ywords [expr ($i / 1) % 10]] 149 150 $db eval { INSERT INTO t2(docid, x, y, l) VALUES($i, $x, $y, $iLangid) } 151 } 152 153 $db eval { 154 CREATE TABLE data(x, y, l); 155 INSERT INTO data(rowid, x, y, l) SELECT docid, x, y, l FROM t2; 156 } 157 } 158 159 proc rowid_list_set_langid {langid} { 160 set ::rowid_list_langid $langid 161 } 162 proc rowid_list {pattern} { 163 set langid $::rowid_list_langid 164 set res [list] 165 db eval {SELECT rowid, x, y FROM data WHERE l = $langid ORDER BY rowid ASC} { 166 if {[string match "*$pattern*" $x] || [string match "*$pattern*" $y]} { 167 lappend res $rowid 168 } 169 } 170 return $res 171 } 172 173 proc or_merge_list {list1 list2} { 174 set res [list] 175 176 set i1 0 177 set i2 0 178 179 set n1 [llength $list1] 180 set n2 [llength $list2] 181 182 while {$i1 < $n1 && $i2 < $n2} { 183 set e1 [lindex $list1 $i1] 184 set e2 [lindex $list2 $i2] 185 186 if {$e1==$e2} { 187 lappend res $e1 188 incr i1 189 incr i2 190 } elseif {$e1 < $e2} { 191 lappend res $e1 192 incr i1 193 } else { 194 lappend res $e2 195 incr i2 196 } 197 } 198 199 concat $res [lrange $list1 $i1 end] [lrange $list2 $i2 end] 200 } 201 202 proc or_merge_lists {args} { 203 set res [lindex $args 0] 204 for {set i 1} {$i < [llength $args]} {incr i} { 205 set res [or_merge_list $res [lindex $args $i]] 206 } 207 set res 208 } 209 210 proc and_merge_list {list1 list2} { 211 foreach i $list2 { set a($i) 1 } 212 set res [list] 213 foreach i $list1 { 214 if {[info exists a($i)]} {lappend res $i} 215 } 216 set res 217 } 218 219 220 proc and_merge_lists {args} { 221 set res [lindex $args 0] 222 for {set i 1} {$i < [llength $args]} {incr i} { 223 set res [and_merge_list $res [lindex $args $i]] 224 } 225 set res 226 } 227 228 proc filter_list {list langid} { 229 set res [list] 230 foreach i $list { 231 if {($i % 9) == $langid} {lappend res $i} 232 } 233 set res 234 } 235 236 do_test 2.0 { 237 reset_db 238 build_multilingual_db_1 db 239 } {} 240 241 proc do_test_query1 {tn query res_script} { 242 for {set langid 0} {$langid < 10} {incr langid} { 243 rowid_list_set_langid $langid 244 set res [eval $res_script] 245 246 set actual [ 247 execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid} 248 ] 249 do_test $tn.$langid [list set {} $actual] $res 250 } 251 } 252 253 # Run some queries. 254 do_test_query1 2.1.1 {delta} { rowid_list delta } 255 do_test_query1 2.1.2 {"zero one two"} { rowid_list "zero one two" } 256 do_test_query1 2.1.3 {zero one two} { 257 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] 258 } 259 do_test_query1 2.1.4 {"zero one" OR "one two"} { 260 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] 261 } 262 263 # Now try the same tests as above, but after running the 'optimize' 264 # command on the FTS table. 265 # 266 do_execsql_test 2.2 { 267 INSERT INTO t2(t2) VALUES('optimize'); 268 SELECT count(*) FROM t2_segdir; 269 } {9} 270 do_test_query1 2.2.1 {delta} { rowid_list delta } 271 do_test_query1 2.2.2 {"zero one two"} { rowid_list "zero one two" } 272 do_test_query1 2.2.3 {zero one two} { 273 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] 274 } 275 do_test_query1 2.2.4 {"zero one" OR "one two"} { 276 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] 277 } 278 279 # And rebuild. 280 # 281 do_test 2.3 { 282 reset_db 283 build_multilingual_db_1 db 284 execsql { INSERT INTO t2(t2) VALUES('rebuild') } 285 } {} 286 do_test_query1 2.3.1 {delta} { rowid_list delta } 287 do_test_query1 2.3.2 {"zero one two"} { rowid_list "zero one two" } 288 do_test_query1 2.3.3 {zero one two} { 289 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] 290 } 291 do_test_query1 2.3.4 {"zero one" OR "one two"} { 292 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] 293 } 294 295 #------------------------------------------------------------------------- 296 # Test cases 3.* 297 # 298 do_test 3.0 { 299 reset_db 300 build_multilingual_db_1 db 301 execsql { 302 CREATE TABLE t3_data(l, x, y); 303 INSERT INTO t3_data(rowid, l, x, y) SELECT docid, l, x, y FROM t2; 304 DROP TABLE t2; 305 } 306 } {} 307 do_execsql_test 3.1 { 308 CREATE VIRTUAL TABLE t2 USING fts4(content=t3_data, languageid=l); 309 INSERT INTO t2(t2) VALUES('rebuild'); 310 } 311 312 do_test_query1 3.1.1 {delta} { rowid_list delta } 313 do_test_query1 3.1.2 {"zero one two"} { rowid_list "zero one two" } 314 do_test_query1 3.1.3 {zero one two} { 315 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] 316 } 317 do_test_query1 3.1.4 {"zero one" OR "one two"} { 318 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] 319 } 320 321 do_execsql_test 3.2.1 { 322 DROP TABLE t2; 323 CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l, content=nosuchtable); 324 } 325 326 do_execsql_test 3.2.2 { 327 INSERT INTO t2(docid, x, y, l) SELECT rowid, x, y, l FROM t3_data; 328 } 329 330 do_execsql_test 3.2.3 { 331 DROP TABLE t3_data; 332 } 333 334 do_test_query1 3.3.1 {delta} { rowid_list delta } 335 do_test_query1 3.3.2 {"zero one two"} { rowid_list "zero one two" } 336 do_test_query1 3.3.3 {zero one two} { 337 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] 338 } 339 do_test_query1 3.3.4 {"zero one" OR "one two"} { 340 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] 341 } 342 343 do_execsql_test 3.4 { 344 CREATE TABLE t8c(a, b); 345 CREATE VIRTUAL TABLE t8 USING fts4(content=t8c, languageid=langid); 346 INSERT INTO t8(docid, a, b) VALUES(-1, 'one two three', 'x y z'); 347 SELECT docid FROM t8 WHERE t8 MATCH 'one x' AND langid=0 348 } {-1} 349 350 #------------------------------------------------------------------------- 351 # Test cases 4.* 352 # 353 proc build_multilingual_db_2 {db} { 354 $db eval { 355 CREATE VIRTUAL TABLE t4 USING fts4( 356 tokenize=testtokenizer, 357 languageid=lid 358 ); 359 } 360 for {set i 0} {$i < 50} {incr i} { 361 execsql { 362 INSERT INTO t4(docid, content, lid) VALUES($i, 'The Quick Brown Fox', $i) 363 } 364 } 365 } 366 367 do_test 4.1.0 { 368 reset_db 369 set ptr [fts3_test_tokenizer] 370 sqlite3_db_config db SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER 1 371 execsql { SELECT fts3_tokenizer('testtokenizer', $ptr) } 372 build_multilingual_db_2 db 373 } {} 374 do_execsql_test 4.1.1 { 375 SELECT docid FROM t4 WHERE t4 MATCH 'quick'; 376 } {0} 377 do_execsql_test 4.1.2 { 378 SELECT docid FROM t4 WHERE t4 MATCH 'quick' AND lid=1; 379 } {} 380 do_execsql_test 4.1.3 { 381 SELECT docid FROM t4 WHERE t4 MATCH 'Quick' AND lid=1; 382 } {1} 383 for {set i 0} {$i < 50} {incr i} { 384 do_execsql_test 4.1.4.$i { 385 SELECT count(*) FROM t4 WHERE t4 MATCH 'fox' AND lid=$i; 386 } [expr 0==($i%2)] 387 } 388 do_catchsql_test 4.1.5 { 389 INSERT INTO t4(content, lid) VALUES('hello world', 101) 390 } {1 {SQL logic error}} 391 392 #------------------------------------------------------------------------- 393 # Test cases 5.* 394 # 395 # The following test cases are designed to detect a 32-bit overflow bug 396 # that existed at one point. 397 # 398 proc build_multilingual_db_3 {db} { 399 $db eval { 400 CREATE VIRTUAL TABLE t5 USING fts4(languageid=lid); 401 } 402 set languages [list 0 1 2 [expr 1<<30]] 403 404 foreach lid $languages { 405 execsql { 406 INSERT INTO t5(docid, content, lid) VALUES( 407 $lid, 'My language is ' || $lid, $lid 408 ) 409 } 410 } 411 } 412 413 do_test 5.1.0 { 414 reset_db 415 build_multilingual_db_3 db 416 } {} 417 418 do_execsql_test 5.1.1 { 419 SELECT level FROM t5_segdir; 420 } [list 0 1024 2048 [expr 1<<40]] 421 422 do_execsql_test 5.1.2 {SELECT docid FROM t5 WHERE t5 MATCH 'language'} 0 423 foreach langid [list 0 1 2 [expr 1<<30]] { 424 do_execsql_test 5.2.$langid { 425 SELECT docid FROM t5 WHERE t5 MATCH 'language' AND lid = $langid 426 } $langid 427 } 428 429 set lid [expr 1<<30] 430 do_execsql_test 5.3.1 { 431 CREATE VIRTUAL TABLE t6 USING fts4(languageid=lid); 432 INSERT INTO t6 VALUES('I belong to language 0!'); 433 } 434 do_test 5.3.2 { 435 for {set i 0} {$i < 20} {incr i} { 436 execsql { 437 INSERT INTO t6(content, lid) VALUES( 438 'I (row '||$i||') belong to langauge N!', $lid 439 ); 440 } 441 } 442 execsql { SELECT docid FROM t6 WHERE t6 MATCH 'belong' } 443 } {1} 444 445 do_test 5.3.3 { 446 execsql { SELECT docid FROM t6 WHERE t6 MATCH 'belong' AND lid=$lid} 447 } {2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21} 448 449 do_execsql_test 5.3.4 { INSERT INTO t6(t6) VALUES('optimize') } {} 450 do_execsql_test 5.3.5 { SELECT docid FROM t6 WHERE t6 MATCH 'belong' } {1} 451 do_execsql_test 5.3.6 { 452 SELECT docid FROM t6 WHERE t6 MATCH 'belong' AND lid=$lid 453 } {2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21} 454 455 456 set lid [expr 1<<30] 457 foreach lid [list 4 [expr 1<<30]] { 458 do_execsql_test 5.4.$lid.1 { 459 DELETE FROM t6; 460 SELECT count(*) FROM t6_segdir; 461 SELECT count(*) FROM t6_segments; 462 } {0 0} 463 do_execsql_test 5.4.$lid.2 { 464 INSERT INTO t6(content, lid) VALUES('zero zero zero', $lid); 465 INSERT INTO t6(content, lid) VALUES('zero zero one', $lid); 466 INSERT INTO t6(content, lid) VALUES('zero one zero', $lid); 467 INSERT INTO t6(content, lid) VALUES('zero one one', $lid); 468 INSERT INTO t6(content, lid) VALUES('one zero zero', $lid); 469 INSERT INTO t6(content, lid) VALUES('one zero one', $lid); 470 INSERT INTO t6(content, lid) VALUES('one one zero', $lid); 471 INSERT INTO t6(content, lid) VALUES('one one one', $lid); 472 473 SELECT docid FROM t6 WHERE t6 MATCH '"zero zero"' AND lid=$lid; 474 } {1 2 5} 475 476 do_execsql_test 5.4.$lid.3 { 477 SELECT count(*) FROM t6_segdir; 478 SELECT count(*) FROM t6_segments; 479 } {8 0} 480 481 do_execsql_test 5.4.$lid.4 { 482 INSERT INTO t6(t6) VALUES('merge=100,3'); 483 INSERT INTO t6(t6) VALUES('merge=100,3'); 484 SELECT docid FROM t6 WHERE t6 MATCH '"zero zero"' AND lid=$lid; 485 } {1 2 5} 486 487 do_execsql_test 5.4.$lid.5 { 488 SELECT count(*) FROM t6_segdir; 489 SELECT count(*) FROM t6_segments; 490 } {1 2} 491 } 492 493 reset_db 494 do_execsql_test 6.0 { 495 CREATE VIRTUAL TABLE vt0 USING fts4(c0, languageid="lid"); 496 INSERT INTO vt0 VALUES ('a'), ('b'); 497 BEGIN; 498 UPDATE vt0 SET lid = 1 WHERE lid=0; 499 } 500 do_execsql_test 6.1 { 501 INSERT INTO vt0(vt0) VALUES('integrity-check'); 502 } 503 do_execsql_test 6.2 { 504 COMMIT; 505 INSERT INTO vt0(vt0) VALUES('integrity-check'); 506 } 507 finish_test