modernc.org/cc@v1.0.1/v2/testdata/_sqlite/test/fts3_common.tcl (about) 1 # 2009 November 04 2 # 3 # The author disclaims copyright to this source code. In place of 4 # a legal notice, here is a blessing: 5 # 6 # May you do good and not evil. 7 # May you find forgiveness for yourself and forgive others. 8 # May you share freely, never taking more than you give. 9 # 10 #*********************************************************************** 11 # 12 # This file contains common code used the fts3 tests. At one point 13 # equivalent functionality was implemented in C code. But it is easier 14 # to use Tcl. 15 # 16 17 #------------------------------------------------------------------------- 18 # INSTRUCTIONS 19 # 20 # The following commands are available: 21 # 22 # fts3_build_db_1 N 23 # Using database handle [db] create an FTS4 table named t1 and populate 24 # it with N rows of data. N must be less than 10,000. Refer to the 25 # header comments above the proc implementation below for details. 26 # 27 # fts3_build_db_2 N 28 # Using database handle [db] create an FTS4 table named t2 and populate 29 # it with N rows of data. N must be less than 100,000. Refer to the 30 # header comments above the proc implementation below for details. 31 # 32 # fts3_integrity_check TBL 33 # TBL must be an FTS table in the database currently opened by handle 34 # [db]. This proc loads and tokenizes all documents within the table, 35 # then checks that the current contents of the FTS index matches the 36 # results. 37 # 38 # fts3_terms TBL WHERE 39 # Todo. 40 # 41 # fts3_doclist TBL TERM WHERE 42 # Todo. 43 # 44 # 45 # 46 47 #------------------------------------------------------------------------- 48 # USAGE: fts3_build_db_1 SWITCHES N 49 # 50 # Build a sample FTS table in the database opened by database connection 51 # [db]. The name of the new table is "t1". 52 # 53 proc fts3_build_db_1 {args} { 54 55 set default(-module) fts4 56 57 set nArg [llength $args] 58 if {($nArg%2)==0} { 59 error "wrong # args: should be \"fts3_build_db_1 ?switches? n\"" 60 } 61 62 set n [lindex $args [expr $nArg-1]] 63 array set opts [array get default] 64 array set opts [lrange $args 0 [expr $nArg-2]] 65 foreach k [array names opts] { 66 if {0==[info exists default($k)]} { error "unknown option: $k" } 67 } 68 69 if {$n > 10000} {error "n must be <= 10000"} 70 db eval "CREATE VIRTUAL TABLE t1 USING $opts(-module) (x, y)" 71 72 set xwords [list zero one two three four five six seven eight nine ten] 73 set ywords [list alpha beta gamma delta epsilon zeta eta theta iota kappa] 74 75 for {set i 0} {$i < $n} {incr i} { 76 set x "" 77 set y "" 78 79 set x [list] 80 lappend x [lindex $xwords [expr ($i / 1000) % 10]] 81 lappend x [lindex $xwords [expr ($i / 100) % 10]] 82 lappend x [lindex $xwords [expr ($i / 10) % 10]] 83 lappend x [lindex $xwords [expr ($i / 1) % 10]] 84 85 set y [list] 86 lappend y [lindex $ywords [expr ($i / 1000) % 10]] 87 lappend y [lindex $ywords [expr ($i / 100) % 10]] 88 lappend y [lindex $ywords [expr ($i / 10) % 10]] 89 lappend y [lindex $ywords [expr ($i / 1) % 10]] 90 91 db eval { INSERT INTO t1(docid, x, y) VALUES($i, $x, $y) } 92 } 93 } 94 95 #------------------------------------------------------------------------- 96 # USAGE: fts3_build_db_2 N ARGS 97 # 98 # Build a sample FTS table in the database opened by database connection 99 # [db]. The name of the new table is "t2". 100 # 101 proc fts3_build_db_2 {args} { 102 103 set default(-module) fts4 104 set default(-extra) "" 105 106 set nArg [llength $args] 107 if {($nArg%2)==0} { 108 error "wrong # args: should be \"fts3_build_db_1 ?switches? n\"" 109 } 110 111 set n [lindex $args [expr $nArg-1]] 112 array set opts [array get default] 113 array set opts [lrange $args 0 [expr $nArg-2]] 114 foreach k [array names opts] { 115 if {0==[info exists default($k)]} { error "unknown option: $k" } 116 } 117 118 if {$n > 100000} {error "n must be <= 100000"} 119 120 set sql "CREATE VIRTUAL TABLE t2 USING $opts(-module) (content" 121 if {$opts(-extra) != ""} { 122 append sql ", " $opts(-extra) 123 } 124 append sql ")" 125 db eval $sql 126 127 set chars [list a b c d e f g h i j k l m n o p q r s t u v w x y z ""] 128 129 for {set i 0} {$i < $n} {incr i} { 130 set word "" 131 set nChar [llength $chars] 132 append word [lindex $chars [expr {($i / 1) % $nChar}]] 133 append word [lindex $chars [expr {($i / $nChar) % $nChar}]] 134 append word [lindex $chars [expr {($i / ($nChar*$nChar)) % $nChar}]] 135 136 db eval { INSERT INTO t2(docid, content) VALUES($i, $word) } 137 } 138 } 139 140 #------------------------------------------------------------------------- 141 # USAGE: fts3_integrity_check TBL 142 # 143 # This proc is used to verify that the full-text index is consistent with 144 # the contents of the fts3 table. In other words, it checks that the 145 # data in the %_contents table matches that in the %_segdir and %_segments 146 # tables. 147 # 148 # This is not an efficient procedure. It uses a lot of memory and a lot 149 # of CPU. But it is better than not checking at all. 150 # 151 # The procedure is: 152 # 153 # 1) Read the entire full-text index from the %_segdir and %_segments 154 # tables into memory. For each entry in the index, the following is 155 # done: 156 # 157 # set C($iDocid,$iCol,$iPosition) $zTerm 158 # 159 # 2) Iterate through each column of each row of the %_content table. 160 # Tokenize all documents, and check that for each token there is 161 # a corresponding entry in the $C array. After checking a token, 162 # [unset] the $C array entry. 163 # 164 # 3) Check that array $C is now empty. 165 # 166 # 167 proc fts3_integrity_check {tbl} { 168 169 fts3_read2 $tbl 1 A 170 171 foreach zTerm [array names A] { 172 #puts $zTerm 173 foreach doclist $A($zTerm) { 174 set docid 0 175 while {[string length $doclist]>0} { 176 set iCol 0 177 set iPos 0 178 set lPos [list] 179 set lCol [list] 180 181 # First varint of a doclist-entry is the docid. Delta-compressed 182 # with respect to the docid of the previous entry. 183 # 184 incr docid [gobble_varint doclist] 185 if {[info exists D($zTerm,$docid)]} { 186 while {[set iDelta [gobble_varint doclist]] != 0} {} 187 continue 188 } 189 set D($zTerm,$docid) 1 190 191 # Gobble varints until the 0x00 that terminates the doclist-entry 192 # is found. 193 while {[set iDelta [gobble_varint doclist]] > 0} { 194 if {$iDelta == 1} { 195 set iCol [gobble_varint doclist] 196 set iPos 0 197 } else { 198 incr iPos $iDelta 199 incr iPos -2 200 set C($docid,$iCol,$iPos) $zTerm 201 } 202 } 203 } 204 } 205 } 206 207 foreach key [array names C] { 208 #puts "$key -> $C($key)" 209 } 210 211 212 db eval "SELECT * FROM ${tbl}_content" E { 213 set iCol 0 214 set iDoc $E(docid) 215 foreach col [lrange $E(*) 1 end] { 216 set c $E($col) 217 set sql {SELECT fts3_tokenizer_test('simple', $c)} 218 219 foreach {pos term dummy} [db one $sql] { 220 if {![info exists C($iDoc,$iCol,$pos)]} { 221 set es "Error at docid=$iDoc col=$iCol pos=$pos. Index is missing" 222 lappend errors $es 223 } else { 224 if {[string compare $C($iDoc,$iCol,$pos) $term]} { 225 set es "Error at docid=$iDoc col=$iCol pos=$pos. Index " 226 append es "has \"$C($iDoc,$iCol,$pos)\", document has \"$term\"" 227 lappend errors $es 228 } 229 unset C($iDoc,$iCol,$pos) 230 } 231 } 232 incr iCol 233 } 234 } 235 236 foreach c [array names C] { 237 lappend errors "Bad index entry: $c -> $C($c)" 238 } 239 240 if {[info exists errors]} { return [join $errors "\n"] } 241 return "ok" 242 } 243 244 # USAGE: fts3_terms TBL WHERE 245 # 246 # Argument TBL must be the name of an FTS3 table. Argument WHERE is an 247 # SQL expression that will be used as the WHERE clause when scanning 248 # the %_segdir table. As in the following query: 249 # 250 # "SELECT * FROM ${TBL}_segdir WHERE ${WHERE}" 251 # 252 # This function returns a list of all terms present in the segments 253 # selected by the statement above. 254 # 255 proc fts3_terms {tbl where} { 256 fts3_read $tbl $where a 257 return [lsort [array names a]] 258 } 259 260 261 # USAGE: fts3_doclist TBL TERM WHERE 262 # 263 # Argument TBL must be the name of an FTS3 table. TERM is a term that may 264 # or may not be present in the table. Argument WHERE is used to select a 265 # subset of the b-tree segments in the associated full-text index as 266 # described above for [fts3_terms]. 267 # 268 # This function returns the results of merging the doclists associated 269 # with TERM in the selected segments. Each doclist is an element of the 270 # returned list. Each doclist is formatted as follows: 271 # 272 # [$docid ?$col[$off1 $off2...]?...] 273 # 274 # The formatting is odd for a Tcl command in order to be compatible with 275 # the original C-language implementation. If argument WHERE is "1", then 276 # any empty doclists are omitted from the returned list. 277 # 278 proc fts3_doclist {tbl term where} { 279 fts3_read $tbl $where a 280 281 282 foreach doclist $a($term) { 283 set docid 0 284 285 while {[string length $doclist]>0} { 286 set iCol 0 287 set iPos 0 288 set lPos [list] 289 set lCol [list] 290 incr docid [gobble_varint doclist] 291 292 while {[set iDelta [gobble_varint doclist]] > 0} { 293 if {$iDelta == 1} { 294 lappend lCol [list $iCol $lPos] 295 set iPos 0 296 set lPos [list] 297 set iCol [gobble_varint doclist] 298 } else { 299 incr iPos $iDelta 300 incr iPos -2 301 lappend lPos $iPos 302 } 303 } 304 305 if {[llength $lPos]>0} { 306 lappend lCol [list $iCol $lPos] 307 } 308 309 if {$where != "1" || [llength $lCol]>0} { 310 set ret($docid) $lCol 311 } else { 312 unset -nocomplain ret($docid) 313 } 314 } 315 } 316 317 set lDoc [list] 318 foreach docid [lsort -integer [array names ret]] { 319 set lCol [list] 320 set cols "" 321 foreach col $ret($docid) { 322 foreach {iCol lPos} $col {} 323 append cols " $iCol\[[join $lPos { }]\]" 324 } 325 lappend lDoc "\[${docid}${cols}\]" 326 } 327 328 join $lDoc " " 329 } 330 331 ########################################################################### 332 333 proc gobble_varint {varname} { 334 upvar $varname blob 335 set n [read_fts3varint $blob ret] 336 set blob [string range $blob $n end] 337 return $ret 338 } 339 proc gobble_string {varname nLength} { 340 upvar $varname blob 341 set ret [string range $blob 0 [expr $nLength-1]] 342 set blob [string range $blob $nLength end] 343 return $ret 344 } 345 346 # The argument is a blob of data representing an FTS3 segment leaf. 347 # Return a list consisting of alternating terms (strings) and doclists 348 # (blobs of data). 349 # 350 proc fts3_readleaf {blob} { 351 set zPrev "" 352 set terms [list] 353 354 while {[string length $blob] > 0} { 355 set nPrefix [gobble_varint blob] 356 set nSuffix [gobble_varint blob] 357 358 set zTerm [string range $zPrev 0 [expr $nPrefix-1]] 359 append zTerm [gobble_string blob $nSuffix] 360 set nDoclist [gobble_varint blob] 361 set doclist [gobble_string blob $nDoclist] 362 363 lappend terms $zTerm $doclist 364 set zPrev $zTerm 365 } 366 367 return $terms 368 } 369 370 proc fts3_read2 {tbl where varname} { 371 upvar $varname a 372 array unset a 373 db eval " SELECT start_block, leaves_end_block, root 374 FROM ${tbl}_segdir WHERE $where 375 ORDER BY level ASC, idx DESC 376 " { 377 set c 0 378 binary scan $root c c 379 if {$c==0} { 380 foreach {t d} [fts3_readleaf $root] { lappend a($t) $d } 381 } else { 382 db eval " SELECT block 383 FROM ${tbl}_segments 384 WHERE blockid>=$start_block AND blockid<=$leaves_end_block 385 ORDER BY blockid 386 " { 387 foreach {t d} [fts3_readleaf $block] { lappend a($t) $d } 388 } 389 } 390 } 391 } 392 393 proc fts3_read {tbl where varname} { 394 upvar $varname a 395 array unset a 396 db eval " SELECT start_block, leaves_end_block, root 397 FROM ${tbl}_segdir WHERE $where 398 ORDER BY level DESC, idx ASC 399 " { 400 if {$start_block == 0} { 401 foreach {t d} [fts3_readleaf $root] { lappend a($t) $d } 402 } else { 403 db eval " SELECT block 404 FROM ${tbl}_segments 405 WHERE blockid>=$start_block AND blockid<$leaves_end_block 406 ORDER BY blockid 407 " { 408 foreach {t d} [fts3_readleaf $block] { lappend a($t) $d } 409 410 } 411 } 412 } 413 } 414 415 ########################################################################## 416