modernc.org/cc@v1.0.1/v2/testdata/_sqlite/ext/fts5/fts5_index.c (about) 1 /* 2 ** 2014 May 31 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ****************************************************************************** 12 ** 13 ** Low level access to the FTS index stored in the database file. The 14 ** routines in this file file implement all read and write access to the 15 ** %_data table. Other parts of the system access this functionality via 16 ** the interface defined in fts5Int.h. 17 */ 18 19 20 #include "fts5Int.h" 21 22 /* 23 ** Overview: 24 ** 25 ** The %_data table contains all the FTS indexes for an FTS5 virtual table. 26 ** As well as the main term index, there may be up to 31 prefix indexes. 27 ** The format is similar to FTS3/4, except that: 28 ** 29 ** * all segment b-tree leaf data is stored in fixed size page records 30 ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is 31 ** taken to ensure it is possible to iterate in either direction through 32 ** the entries in a doclist, or to seek to a specific entry within a 33 ** doclist, without loading it into memory. 34 ** 35 ** * large doclists that span many pages have associated "doclist index" 36 ** records that contain a copy of the first rowid on each page spanned by 37 ** the doclist. This is used to speed up seek operations, and merges of 38 ** large doclists with very small doclists. 39 ** 40 ** * extra fields in the "structure record" record the state of ongoing 41 ** incremental merge operations. 42 ** 43 */ 44 45 46 #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */ 47 #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ 48 49 #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ 50 51 #define FTS5_MAIN_PREFIX '0' 52 53 #if FTS5_MAX_PREFIX_INDEXES > 31 54 # error "FTS5_MAX_PREFIX_INDEXES is too large" 55 #endif 56 57 /* 58 ** Details: 59 ** 60 ** The %_data table managed by this module, 61 ** 62 ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB); 63 ** 64 ** , contains the following 5 types of records. See the comments surrounding 65 ** the FTS5_*_ROWID macros below for a description of how %_data rowids are 66 ** assigned to each fo them. 67 ** 68 ** 1. Structure Records: 69 ** 70 ** The set of segments that make up an index - the index structure - are 71 ** recorded in a single record within the %_data table. The record consists 72 ** of a single 32-bit configuration cookie value followed by a list of 73 ** SQLite varints. If the FTS table features more than one index (because 74 ** there are one or more prefix indexes), it is guaranteed that all share 75 ** the same cookie value. 76 ** 77 ** Immediately following the configuration cookie, the record begins with 78 ** three varints: 79 ** 80 ** + number of levels, 81 ** + total number of segments on all levels, 82 ** + value of write counter. 83 ** 84 ** Then, for each level from 0 to nMax: 85 ** 86 ** + number of input segments in ongoing merge. 87 ** + total number of segments in level. 88 ** + for each segment from oldest to newest: 89 ** + segment id (always > 0) 90 ** + first leaf page number (often 1, always greater than 0) 91 ** + final leaf page number 92 ** 93 ** 2. The Averages Record: 94 ** 95 ** A single record within the %_data table. The data is a list of varints. 96 ** The first value is the number of rows in the index. Then, for each column 97 ** from left to right, the total number of tokens in the column for all 98 ** rows of the table. 99 ** 100 ** 3. Segment leaves: 101 ** 102 ** TERM/DOCLIST FORMAT: 103 ** 104 ** Most of each segment leaf is taken up by term/doclist data. The 105 ** general format of term/doclist, starting with the first term 106 ** on the leaf page, is: 107 ** 108 ** varint : size of first term 109 ** blob: first term data 110 ** doclist: first doclist 111 ** zero-or-more { 112 ** varint: number of bytes in common with previous term 113 ** varint: number of bytes of new term data (nNew) 114 ** blob: nNew bytes of new term data 115 ** doclist: next doclist 116 ** } 117 ** 118 ** doclist format: 119 ** 120 ** varint: first rowid 121 ** poslist: first poslist 122 ** zero-or-more { 123 ** varint: rowid delta (always > 0) 124 ** poslist: next poslist 125 ** } 126 ** 127 ** poslist format: 128 ** 129 ** varint: size of poslist in bytes multiplied by 2, not including 130 ** this field. Plus 1 if this entry carries the "delete" flag. 131 ** collist: collist for column 0 132 ** zero-or-more { 133 ** 0x01 byte 134 ** varint: column number (I) 135 ** collist: collist for column I 136 ** } 137 ** 138 ** collist format: 139 ** 140 ** varint: first offset + 2 141 ** zero-or-more { 142 ** varint: offset delta + 2 143 ** } 144 ** 145 ** PAGE FORMAT 146 ** 147 ** Each leaf page begins with a 4-byte header containing 2 16-bit 148 ** unsigned integer fields in big-endian format. They are: 149 ** 150 ** * The byte offset of the first rowid on the page, if it exists 151 ** and occurs before the first term (otherwise 0). 152 ** 153 ** * The byte offset of the start of the page footer. If the page 154 ** footer is 0 bytes in size, then this field is the same as the 155 ** size of the leaf page in bytes. 156 ** 157 ** The page footer consists of a single varint for each term located 158 ** on the page. Each varint is the byte offset of the current term 159 ** within the page, delta-compressed against the previous value. In 160 ** other words, the first varint in the footer is the byte offset of 161 ** the first term, the second is the byte offset of the second less that 162 ** of the first, and so on. 163 ** 164 ** The term/doclist format described above is accurate if the entire 165 ** term/doclist data fits on a single leaf page. If this is not the case, 166 ** the format is changed in two ways: 167 ** 168 ** + if the first rowid on a page occurs before the first term, it 169 ** is stored as a literal value: 170 ** 171 ** varint: first rowid 172 ** 173 ** + the first term on each page is stored in the same way as the 174 ** very first term of the segment: 175 ** 176 ** varint : size of first term 177 ** blob: first term data 178 ** 179 ** 5. Segment doclist indexes: 180 ** 181 ** Doclist indexes are themselves b-trees, however they usually consist of 182 ** a single leaf record only. The format of each doclist index leaf page 183 ** is: 184 ** 185 ** * Flags byte. Bits are: 186 ** 0x01: Clear if leaf is also the root page, otherwise set. 187 ** 188 ** * Page number of fts index leaf page. As a varint. 189 ** 190 ** * First rowid on page indicated by previous field. As a varint. 191 ** 192 ** * A list of varints, one for each subsequent termless page. A 193 ** positive delta if the termless page contains at least one rowid, 194 ** or an 0x00 byte otherwise. 195 ** 196 ** Internal doclist index nodes are: 197 ** 198 ** * Flags byte. Bits are: 199 ** 0x01: Clear for root page, otherwise set. 200 ** 201 ** * Page number of first child page. As a varint. 202 ** 203 ** * Copy of first rowid on page indicated by previous field. As a varint. 204 ** 205 ** * A list of delta-encoded varints - the first rowid on each subsequent 206 ** child page. 207 ** 208 */ 209 210 /* 211 ** Rowids for the averages and structure records in the %_data table. 212 */ 213 #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */ 214 #define FTS5_STRUCTURE_ROWID 10 /* The structure record */ 215 216 /* 217 ** Macros determining the rowids used by segment leaves and dlidx leaves 218 ** and nodes. All nodes and leaves are stored in the %_data table with large 219 ** positive rowids. 220 ** 221 ** Each segment has a unique non-zero 16-bit id. 222 ** 223 ** The rowid for each segment leaf is found by passing the segment id and 224 ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered 225 ** sequentially starting from 1. 226 */ 227 #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */ 228 #define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */ 229 #define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */ 230 #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */ 231 232 #define fts5_dri(segid, dlidx, height, pgno) ( \ 233 ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \ 234 ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \ 235 ((i64)(height) << (FTS5_DATA_PAGE_B)) + \ 236 ((i64)(pgno)) \ 237 ) 238 239 #define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno) 240 #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno) 241 242 /* 243 ** Maximum segments permitted in a single index 244 */ 245 #define FTS5_MAX_SEGMENT 2000 246 247 #ifdef SQLITE_DEBUG 248 int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } 249 #endif 250 251 252 /* 253 ** Each time a blob is read from the %_data table, it is padded with this 254 ** many zero bytes. This makes it easier to decode the various record formats 255 ** without overreading if the records are corrupt. 256 */ 257 #define FTS5_DATA_ZERO_PADDING 8 258 #define FTS5_DATA_PADDING 20 259 260 typedef struct Fts5Data Fts5Data; 261 typedef struct Fts5DlidxIter Fts5DlidxIter; 262 typedef struct Fts5DlidxLvl Fts5DlidxLvl; 263 typedef struct Fts5DlidxWriter Fts5DlidxWriter; 264 typedef struct Fts5Iter Fts5Iter; 265 typedef struct Fts5PageWriter Fts5PageWriter; 266 typedef struct Fts5SegIter Fts5SegIter; 267 typedef struct Fts5DoclistIter Fts5DoclistIter; 268 typedef struct Fts5SegWriter Fts5SegWriter; 269 typedef struct Fts5Structure Fts5Structure; 270 typedef struct Fts5StructureLevel Fts5StructureLevel; 271 typedef struct Fts5StructureSegment Fts5StructureSegment; 272 273 struct Fts5Data { 274 u8 *p; /* Pointer to buffer containing record */ 275 int nn; /* Size of record in bytes */ 276 int szLeaf; /* Size of leaf without page-index */ 277 }; 278 279 /* 280 ** One object per %_data table. 281 */ 282 struct Fts5Index { 283 Fts5Config *pConfig; /* Virtual table configuration */ 284 char *zDataTbl; /* Name of %_data table */ 285 int nWorkUnit; /* Leaf pages in a "unit" of work */ 286 287 /* 288 ** Variables related to the accumulation of tokens and doclists within the 289 ** in-memory hash tables before they are flushed to disk. 290 */ 291 Fts5Hash *pHash; /* Hash table for in-memory data */ 292 int nPendingData; /* Current bytes of pending data */ 293 i64 iWriteRowid; /* Rowid for current doc being written */ 294 int bDelete; /* Current write is a delete */ 295 296 /* Error state. */ 297 int rc; /* Current error code */ 298 299 /* State used by the fts5DataXXX() functions. */ 300 sqlite3_blob *pReader; /* RO incr-blob open on %_data table */ 301 sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */ 302 sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ 303 sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */ 304 sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=? */ 305 sqlite3_stmt *pIdxSelect; 306 int nRead; /* Total number of blocks read */ 307 308 sqlite3_stmt *pDataVersion; 309 i64 iStructVersion; /* data_version when pStruct read */ 310 Fts5Structure *pStruct; /* Current db structure (or NULL) */ 311 }; 312 313 struct Fts5DoclistIter { 314 u8 *aEof; /* Pointer to 1 byte past end of doclist */ 315 316 /* Output variables. aPoslist==0 at EOF */ 317 i64 iRowid; 318 u8 *aPoslist; 319 int nPoslist; 320 int nSize; 321 }; 322 323 /* 324 ** The contents of the "structure" record for each index are represented 325 ** using an Fts5Structure record in memory. Which uses instances of the 326 ** other Fts5StructureXXX types as components. 327 */ 328 struct Fts5StructureSegment { 329 int iSegid; /* Segment id */ 330 int pgnoFirst; /* First leaf page number in segment */ 331 int pgnoLast; /* Last leaf page number in segment */ 332 }; 333 struct Fts5StructureLevel { 334 int nMerge; /* Number of segments in incr-merge */ 335 int nSeg; /* Total number of segments on level */ 336 Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */ 337 }; 338 struct Fts5Structure { 339 int nRef; /* Object reference count */ 340 u64 nWriteCounter; /* Total leaves written to level 0 */ 341 int nSegment; /* Total segments in this structure */ 342 int nLevel; /* Number of levels in this index */ 343 Fts5StructureLevel aLevel[1]; /* Array of nLevel level objects */ 344 }; 345 346 /* 347 ** An object of type Fts5SegWriter is used to write to segments. 348 */ 349 struct Fts5PageWriter { 350 int pgno; /* Page number for this page */ 351 int iPrevPgidx; /* Previous value written into pgidx */ 352 Fts5Buffer buf; /* Buffer containing leaf data */ 353 Fts5Buffer pgidx; /* Buffer containing page-index */ 354 Fts5Buffer term; /* Buffer containing previous term on page */ 355 }; 356 struct Fts5DlidxWriter { 357 int pgno; /* Page number for this page */ 358 int bPrevValid; /* True if iPrev is valid */ 359 i64 iPrev; /* Previous rowid value written to page */ 360 Fts5Buffer buf; /* Buffer containing page data */ 361 }; 362 struct Fts5SegWriter { 363 int iSegid; /* Segid to write to */ 364 Fts5PageWriter writer; /* PageWriter object */ 365 i64 iPrevRowid; /* Previous rowid written to current leaf */ 366 u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ 367 u8 bFirstRowidInPage; /* True if next rowid is first in page */ 368 /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */ 369 u8 bFirstTermInPage; /* True if next term will be first in leaf */ 370 int nLeafWritten; /* Number of leaf pages written */ 371 int nEmpty; /* Number of contiguous term-less nodes */ 372 373 int nDlidx; /* Allocated size of aDlidx[] array */ 374 Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */ 375 376 /* Values to insert into the %_idx table */ 377 Fts5Buffer btterm; /* Next term to insert into %_idx table */ 378 int iBtPage; /* Page number corresponding to btterm */ 379 }; 380 381 typedef struct Fts5CResult Fts5CResult; 382 struct Fts5CResult { 383 u16 iFirst; /* aSeg[] index of firstest iterator */ 384 u8 bTermEq; /* True if the terms are equal */ 385 }; 386 387 /* 388 ** Object for iterating through a single segment, visiting each term/rowid 389 ** pair in the segment. 390 ** 391 ** pSeg: 392 ** The segment to iterate through. 393 ** 394 ** iLeafPgno: 395 ** Current leaf page number within segment. 396 ** 397 ** iLeafOffset: 398 ** Byte offset within the current leaf that is the first byte of the 399 ** position list data (one byte passed the position-list size field). 400 ** rowid field of the current entry. Usually this is the size field of the 401 ** position list data. The exception is if the rowid for the current entry 402 ** is the last thing on the leaf page. 403 ** 404 ** pLeaf: 405 ** Buffer containing current leaf page data. Set to NULL at EOF. 406 ** 407 ** iTermLeafPgno, iTermLeafOffset: 408 ** Leaf page number containing the last term read from the segment. And 409 ** the offset immediately following the term data. 410 ** 411 ** flags: 412 ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows: 413 ** 414 ** FTS5_SEGITER_ONETERM: 415 ** If set, set the iterator to point to EOF after the current doclist 416 ** has been exhausted. Do not proceed to the next term in the segment. 417 ** 418 ** FTS5_SEGITER_REVERSE: 419 ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If 420 ** it is set, iterate through rowid in descending order instead of the 421 ** default ascending order. 422 ** 423 ** iRowidOffset/nRowidOffset/aRowidOffset: 424 ** These are used if the FTS5_SEGITER_REVERSE flag is set. 425 ** 426 ** For each rowid on the page corresponding to the current term, the 427 ** corresponding aRowidOffset[] entry is set to the byte offset of the 428 ** start of the "position-list-size" field within the page. 429 ** 430 ** iTermIdx: 431 ** Index of current term on iTermLeafPgno. 432 */ 433 struct Fts5SegIter { 434 Fts5StructureSegment *pSeg; /* Segment to iterate through */ 435 int flags; /* Mask of configuration flags */ 436 int iLeafPgno; /* Current leaf page number */ 437 Fts5Data *pLeaf; /* Current leaf data */ 438 Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */ 439 int iLeafOffset; /* Byte offset within current leaf */ 440 441 /* Next method */ 442 void (*xNext)(Fts5Index*, Fts5SegIter*, int*); 443 444 /* The page and offset from which the current term was read. The offset 445 ** is the offset of the first rowid in the current doclist. */ 446 int iTermLeafPgno; 447 int iTermLeafOffset; 448 449 int iPgidxOff; /* Next offset in pgidx */ 450 int iEndofDoclist; 451 452 /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ 453 int iRowidOffset; /* Current entry in aRowidOffset[] */ 454 int nRowidOffset; /* Allocated size of aRowidOffset[] array */ 455 int *aRowidOffset; /* Array of offset to rowid fields */ 456 457 Fts5DlidxIter *pDlidx; /* If there is a doclist-index */ 458 459 /* Variables populated based on current entry. */ 460 Fts5Buffer term; /* Current term */ 461 i64 iRowid; /* Current rowid */ 462 int nPos; /* Number of bytes in current position list */ 463 u8 bDel; /* True if the delete flag is set */ 464 }; 465 466 /* 467 ** Argument is a pointer to an Fts5Data structure that contains a 468 ** leaf page. 469 */ 470 #define ASSERT_SZLEAF_OK(x) assert( \ 471 (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \ 472 ) 473 474 #define FTS5_SEGITER_ONETERM 0x01 475 #define FTS5_SEGITER_REVERSE 0x02 476 477 /* 478 ** Argument is a pointer to an Fts5Data structure that contains a leaf 479 ** page. This macro evaluates to true if the leaf contains no terms, or 480 ** false if it contains at least one term. 481 */ 482 #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn) 483 484 #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2])) 485 486 #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p)) 487 488 /* 489 ** Object for iterating through the merged results of one or more segments, 490 ** visiting each term/rowid pair in the merged data. 491 ** 492 ** nSeg is always a power of two greater than or equal to the number of 493 ** segments that this object is merging data from. Both the aSeg[] and 494 ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded 495 ** with zeroed objects - these are handled as if they were iterators opened 496 ** on empty segments. 497 ** 498 ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an 499 ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the 500 ** comparison in this context is the index of the iterator that currently 501 ** points to the smaller term/rowid combination. Iterators at EOF are 502 ** considered to be greater than all other iterators. 503 ** 504 ** aFirst[1] contains the index in aSeg[] of the iterator that points to 505 ** the smallest key overall. aFirst[0] is unused. 506 ** 507 ** poslist: 508 ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. 509 ** There is no way to tell if this is populated or not. 510 */ 511 struct Fts5Iter { 512 Fts5IndexIter base; /* Base class containing output vars */ 513 514 Fts5Index *pIndex; /* Index that owns this iterator */ 515 Fts5Structure *pStruct; /* Database structure for this iterator */ 516 Fts5Buffer poslist; /* Buffer containing current poslist */ 517 Fts5Colset *pColset; /* Restrict matches to these columns */ 518 519 /* Invoked to set output variables. */ 520 void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*); 521 522 int nSeg; /* Size of aSeg[] array */ 523 int bRev; /* True to iterate in reverse order */ 524 u8 bSkipEmpty; /* True to skip deleted entries */ 525 526 i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */ 527 Fts5CResult *aFirst; /* Current merge state (see above) */ 528 Fts5SegIter aSeg[1]; /* Array of segment iterators */ 529 }; 530 531 532 /* 533 ** An instance of the following type is used to iterate through the contents 534 ** of a doclist-index record. 535 ** 536 ** pData: 537 ** Record containing the doclist-index data. 538 ** 539 ** bEof: 540 ** Set to true once iterator has reached EOF. 541 ** 542 ** iOff: 543 ** Set to the current offset within record pData. 544 */ 545 struct Fts5DlidxLvl { 546 Fts5Data *pData; /* Data for current page of this level */ 547 int iOff; /* Current offset into pData */ 548 int bEof; /* At EOF already */ 549 int iFirstOff; /* Used by reverse iterators */ 550 551 /* Output variables */ 552 int iLeafPgno; /* Page number of current leaf page */ 553 i64 iRowid; /* First rowid on leaf iLeafPgno */ 554 }; 555 struct Fts5DlidxIter { 556 int nLvl; 557 int iSegid; 558 Fts5DlidxLvl aLvl[1]; 559 }; 560 561 static void fts5PutU16(u8 *aOut, u16 iVal){ 562 aOut[0] = (iVal>>8); 563 aOut[1] = (iVal&0xFF); 564 } 565 566 static u16 fts5GetU16(const u8 *aIn){ 567 return ((u16)aIn[0] << 8) + aIn[1]; 568 } 569 570 /* 571 ** Allocate and return a buffer at least nByte bytes in size. 572 ** 573 ** If an OOM error is encountered, return NULL and set the error code in 574 ** the Fts5Index handle passed as the first argument. 575 */ 576 static void *fts5IdxMalloc(Fts5Index *p, int nByte){ 577 return sqlite3Fts5MallocZero(&p->rc, nByte); 578 } 579 580 /* 581 ** Compare the contents of the pLeft buffer with the pRight/nRight blob. 582 ** 583 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or 584 ** +ve if pRight is smaller than pLeft. In other words: 585 ** 586 ** res = *pLeft - *pRight 587 */ 588 #ifdef SQLITE_DEBUG 589 static int fts5BufferCompareBlob( 590 Fts5Buffer *pLeft, /* Left hand side of comparison */ 591 const u8 *pRight, int nRight /* Right hand side of comparison */ 592 ){ 593 int nCmp = MIN(pLeft->n, nRight); 594 int res = memcmp(pLeft->p, pRight, nCmp); 595 return (res==0 ? (pLeft->n - nRight) : res); 596 } 597 #endif 598 599 /* 600 ** Compare the contents of the two buffers using memcmp(). If one buffer 601 ** is a prefix of the other, it is considered the lesser. 602 ** 603 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or 604 ** +ve if pRight is smaller than pLeft. In other words: 605 ** 606 ** res = *pLeft - *pRight 607 */ 608 static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){ 609 int nCmp = MIN(pLeft->n, pRight->n); 610 int res = memcmp(pLeft->p, pRight->p, nCmp); 611 return (res==0 ? (pLeft->n - pRight->n) : res); 612 } 613 614 static int fts5LeafFirstTermOff(Fts5Data *pLeaf){ 615 int ret; 616 fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret); 617 return ret; 618 } 619 620 /* 621 ** Close the read-only blob handle, if it is open. 622 */ 623 static void fts5CloseReader(Fts5Index *p){ 624 if( p->pReader ){ 625 sqlite3_blob *pReader = p->pReader; 626 p->pReader = 0; 627 sqlite3_blob_close(pReader); 628 } 629 } 630 631 /* 632 ** Retrieve a record from the %_data table. 633 ** 634 ** If an error occurs, NULL is returned and an error left in the 635 ** Fts5Index object. 636 */ 637 static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ 638 Fts5Data *pRet = 0; 639 if( p->rc==SQLITE_OK ){ 640 int rc = SQLITE_OK; 641 642 if( p->pReader ){ 643 /* This call may return SQLITE_ABORT if there has been a savepoint 644 ** rollback since it was last used. In this case a new blob handle 645 ** is required. */ 646 sqlite3_blob *pBlob = p->pReader; 647 p->pReader = 0; 648 rc = sqlite3_blob_reopen(pBlob, iRowid); 649 assert( p->pReader==0 ); 650 p->pReader = pBlob; 651 if( rc!=SQLITE_OK ){ 652 fts5CloseReader(p); 653 } 654 if( rc==SQLITE_ABORT ) rc = SQLITE_OK; 655 } 656 657 /* If the blob handle is not open at this point, open it and seek 658 ** to the requested entry. */ 659 if( p->pReader==0 && rc==SQLITE_OK ){ 660 Fts5Config *pConfig = p->pConfig; 661 rc = sqlite3_blob_open(pConfig->db, 662 pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader 663 ); 664 } 665 666 /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls 667 ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead. 668 ** All the reasons those functions might return SQLITE_ERROR - missing 669 ** table, missing row, non-blob/text in block column - indicate 670 ** backing store corruption. */ 671 if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT; 672 673 if( rc==SQLITE_OK ){ 674 u8 *aOut = 0; /* Read blob data into this buffer */ 675 int nByte = sqlite3_blob_bytes(p->pReader); 676 int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING; 677 pRet = (Fts5Data*)sqlite3_malloc(nAlloc); 678 if( pRet ){ 679 pRet->nn = nByte; 680 aOut = pRet->p = (u8*)&pRet[1]; 681 }else{ 682 rc = SQLITE_NOMEM; 683 } 684 685 if( rc==SQLITE_OK ){ 686 rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0); 687 } 688 if( rc!=SQLITE_OK ){ 689 sqlite3_free(pRet); 690 pRet = 0; 691 }else{ 692 /* TODO1: Fix this */ 693 pRet->szLeaf = fts5GetU16(&pRet->p[2]); 694 } 695 } 696 p->rc = rc; 697 p->nRead++; 698 } 699 700 assert( (pRet==0)==(p->rc!=SQLITE_OK) ); 701 return pRet; 702 } 703 704 /* 705 ** Release a reference to data record returned by an earlier call to 706 ** fts5DataRead(). 707 */ 708 static void fts5DataRelease(Fts5Data *pData){ 709 sqlite3_free(pData); 710 } 711 712 static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){ 713 Fts5Data *pRet = fts5DataRead(p, iRowid); 714 if( pRet ){ 715 if( pRet->szLeaf>pRet->nn ){ 716 p->rc = FTS5_CORRUPT; 717 fts5DataRelease(pRet); 718 pRet = 0; 719 } 720 } 721 return pRet; 722 } 723 724 static int fts5IndexPrepareStmt( 725 Fts5Index *p, 726 sqlite3_stmt **ppStmt, 727 char *zSql 728 ){ 729 if( p->rc==SQLITE_OK ){ 730 if( zSql ){ 731 p->rc = sqlite3_prepare_v3(p->pConfig->db, zSql, -1, 732 SQLITE_PREPARE_PERSISTENT, ppStmt, 0); 733 }else{ 734 p->rc = SQLITE_NOMEM; 735 } 736 } 737 sqlite3_free(zSql); 738 return p->rc; 739 } 740 741 742 /* 743 ** INSERT OR REPLACE a record into the %_data table. 744 */ 745 static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){ 746 if( p->rc!=SQLITE_OK ) return; 747 748 if( p->pWriter==0 ){ 749 Fts5Config *pConfig = p->pConfig; 750 fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf( 751 "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)", 752 pConfig->zDb, pConfig->zName 753 )); 754 if( p->rc ) return; 755 } 756 757 sqlite3_bind_int64(p->pWriter, 1, iRowid); 758 sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC); 759 sqlite3_step(p->pWriter); 760 p->rc = sqlite3_reset(p->pWriter); 761 } 762 763 /* 764 ** Execute the following SQL: 765 ** 766 ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast 767 */ 768 static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){ 769 if( p->rc!=SQLITE_OK ) return; 770 771 if( p->pDeleter==0 ){ 772 int rc; 773 Fts5Config *pConfig = p->pConfig; 774 char *zSql = sqlite3_mprintf( 775 "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?", 776 pConfig->zDb, pConfig->zName 777 ); 778 if( zSql==0 ){ 779 rc = SQLITE_NOMEM; 780 }else{ 781 rc = sqlite3_prepare_v3(pConfig->db, zSql, -1, 782 SQLITE_PREPARE_PERSISTENT, &p->pDeleter, 0); 783 sqlite3_free(zSql); 784 } 785 if( rc!=SQLITE_OK ){ 786 p->rc = rc; 787 return; 788 } 789 } 790 791 sqlite3_bind_int64(p->pDeleter, 1, iFirst); 792 sqlite3_bind_int64(p->pDeleter, 2, iLast); 793 sqlite3_step(p->pDeleter); 794 p->rc = sqlite3_reset(p->pDeleter); 795 } 796 797 /* 798 ** Remove all records associated with segment iSegid. 799 */ 800 static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){ 801 i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0); 802 i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1; 803 fts5DataDelete(p, iFirst, iLast); 804 if( p->pIdxDeleter==0 ){ 805 Fts5Config *pConfig = p->pConfig; 806 fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf( 807 "DELETE FROM '%q'.'%q_idx' WHERE segid=?", 808 pConfig->zDb, pConfig->zName 809 )); 810 } 811 if( p->rc==SQLITE_OK ){ 812 sqlite3_bind_int(p->pIdxDeleter, 1, iSegid); 813 sqlite3_step(p->pIdxDeleter); 814 p->rc = sqlite3_reset(p->pIdxDeleter); 815 } 816 } 817 818 /* 819 ** Release a reference to an Fts5Structure object returned by an earlier 820 ** call to fts5StructureRead() or fts5StructureDecode(). 821 */ 822 static void fts5StructureRelease(Fts5Structure *pStruct){ 823 if( pStruct && 0>=(--pStruct->nRef) ){ 824 int i; 825 assert( pStruct->nRef==0 ); 826 for(i=0; i<pStruct->nLevel; i++){ 827 sqlite3_free(pStruct->aLevel[i].aSeg); 828 } 829 sqlite3_free(pStruct); 830 } 831 } 832 833 static void fts5StructureRef(Fts5Structure *pStruct){ 834 pStruct->nRef++; 835 } 836 837 /* 838 ** Deserialize and return the structure record currently stored in serialized 839 ** form within buffer pData/nData. 840 ** 841 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array 842 ** are over-allocated by one slot. This allows the structure contents 843 ** to be more easily edited. 844 ** 845 ** If an error occurs, *ppOut is set to NULL and an SQLite error code 846 ** returned. Otherwise, *ppOut is set to point to the new object and 847 ** SQLITE_OK returned. 848 */ 849 static int fts5StructureDecode( 850 const u8 *pData, /* Buffer containing serialized structure */ 851 int nData, /* Size of buffer pData in bytes */ 852 int *piCookie, /* Configuration cookie value */ 853 Fts5Structure **ppOut /* OUT: Deserialized object */ 854 ){ 855 int rc = SQLITE_OK; 856 int i = 0; 857 int iLvl; 858 int nLevel = 0; 859 int nSegment = 0; 860 int nByte; /* Bytes of space to allocate at pRet */ 861 Fts5Structure *pRet = 0; /* Structure object to return */ 862 863 /* Grab the cookie value */ 864 if( piCookie ) *piCookie = sqlite3Fts5Get32(pData); 865 i = 4; 866 867 /* Read the total number of levels and segments from the start of the 868 ** structure record. */ 869 i += fts5GetVarint32(&pData[i], nLevel); 870 i += fts5GetVarint32(&pData[i], nSegment); 871 nByte = ( 872 sizeof(Fts5Structure) + /* Main structure */ 873 sizeof(Fts5StructureLevel) * (nLevel-1) /* aLevel[] array */ 874 ); 875 pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte); 876 877 if( pRet ){ 878 pRet->nRef = 1; 879 pRet->nLevel = nLevel; 880 pRet->nSegment = nSegment; 881 i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter); 882 883 for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){ 884 Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl]; 885 int nTotal = 0; 886 int iSeg; 887 888 if( i>=nData ){ 889 rc = FTS5_CORRUPT; 890 }else{ 891 i += fts5GetVarint32(&pData[i], pLvl->nMerge); 892 i += fts5GetVarint32(&pData[i], nTotal); 893 assert( nTotal>=pLvl->nMerge ); 894 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc, 895 nTotal * sizeof(Fts5StructureSegment) 896 ); 897 } 898 899 if( rc==SQLITE_OK ){ 900 pLvl->nSeg = nTotal; 901 for(iSeg=0; iSeg<nTotal; iSeg++){ 902 if( i>=nData ){ 903 rc = FTS5_CORRUPT; 904 break; 905 } 906 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].iSegid); 907 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst); 908 i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast); 909 } 910 } 911 } 912 if( rc!=SQLITE_OK ){ 913 fts5StructureRelease(pRet); 914 pRet = 0; 915 } 916 } 917 918 *ppOut = pRet; 919 return rc; 920 } 921 922 /* 923 ** 924 */ 925 static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){ 926 if( *pRc==SQLITE_OK ){ 927 Fts5Structure *pStruct = *ppStruct; 928 int nLevel = pStruct->nLevel; 929 int nByte = ( 930 sizeof(Fts5Structure) + /* Main structure */ 931 sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */ 932 ); 933 934 pStruct = sqlite3_realloc(pStruct, nByte); 935 if( pStruct ){ 936 memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel)); 937 pStruct->nLevel++; 938 *ppStruct = pStruct; 939 }else{ 940 *pRc = SQLITE_NOMEM; 941 } 942 } 943 } 944 945 /* 946 ** Extend level iLvl so that there is room for at least nExtra more 947 ** segments. 948 */ 949 static void fts5StructureExtendLevel( 950 int *pRc, 951 Fts5Structure *pStruct, 952 int iLvl, 953 int nExtra, 954 int bInsert 955 ){ 956 if( *pRc==SQLITE_OK ){ 957 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; 958 Fts5StructureSegment *aNew; 959 int nByte; 960 961 nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment); 962 aNew = sqlite3_realloc(pLvl->aSeg, nByte); 963 if( aNew ){ 964 if( bInsert==0 ){ 965 memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra); 966 }else{ 967 int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment); 968 memmove(&aNew[nExtra], aNew, nMove); 969 memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra); 970 } 971 pLvl->aSeg = aNew; 972 }else{ 973 *pRc = SQLITE_NOMEM; 974 } 975 } 976 } 977 978 static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){ 979 Fts5Structure *pRet = 0; 980 Fts5Config *pConfig = p->pConfig; 981 int iCookie; /* Configuration cookie */ 982 Fts5Data *pData; 983 984 pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID); 985 if( p->rc==SQLITE_OK ){ 986 /* TODO: Do we need this if the leaf-index is appended? Probably... */ 987 memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING); 988 p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet); 989 if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){ 990 p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie); 991 } 992 fts5DataRelease(pData); 993 if( p->rc!=SQLITE_OK ){ 994 fts5StructureRelease(pRet); 995 pRet = 0; 996 } 997 } 998 999 return pRet; 1000 } 1001 1002 static i64 fts5IndexDataVersion(Fts5Index *p){ 1003 i64 iVersion = 0; 1004 1005 if( p->rc==SQLITE_OK ){ 1006 if( p->pDataVersion==0 ){ 1007 p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion, 1008 sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb) 1009 ); 1010 if( p->rc ) return 0; 1011 } 1012 1013 if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){ 1014 iVersion = sqlite3_column_int64(p->pDataVersion, 0); 1015 } 1016 p->rc = sqlite3_reset(p->pDataVersion); 1017 } 1018 1019 return iVersion; 1020 } 1021 1022 /* 1023 ** Read, deserialize and return the structure record. 1024 ** 1025 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array 1026 ** are over-allocated as described for function fts5StructureDecode() 1027 ** above. 1028 ** 1029 ** If an error occurs, NULL is returned and an error code left in the 1030 ** Fts5Index handle. If an error has already occurred when this function 1031 ** is called, it is a no-op. 1032 */ 1033 static Fts5Structure *fts5StructureRead(Fts5Index *p){ 1034 1035 if( p->pStruct==0 ){ 1036 p->iStructVersion = fts5IndexDataVersion(p); 1037 if( p->rc==SQLITE_OK ){ 1038 p->pStruct = fts5StructureReadUncached(p); 1039 } 1040 } 1041 1042 #if 0 1043 else{ 1044 Fts5Structure *pTest = fts5StructureReadUncached(p); 1045 if( pTest ){ 1046 int i, j; 1047 assert_nc( p->pStruct->nSegment==pTest->nSegment ); 1048 assert_nc( p->pStruct->nLevel==pTest->nLevel ); 1049 for(i=0; i<pTest->nLevel; i++){ 1050 assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge ); 1051 assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg ); 1052 for(j=0; j<pTest->aLevel[i].nSeg; j++){ 1053 Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j]; 1054 Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j]; 1055 assert_nc( p1->iSegid==p2->iSegid ); 1056 assert_nc( p1->pgnoFirst==p2->pgnoFirst ); 1057 assert_nc( p1->pgnoLast==p2->pgnoLast ); 1058 } 1059 } 1060 fts5StructureRelease(pTest); 1061 } 1062 } 1063 #endif 1064 1065 if( p->rc!=SQLITE_OK ) return 0; 1066 assert( p->iStructVersion!=0 ); 1067 assert( p->pStruct!=0 ); 1068 fts5StructureRef(p->pStruct); 1069 return p->pStruct; 1070 } 1071 1072 static void fts5StructureInvalidate(Fts5Index *p){ 1073 if( p->pStruct ){ 1074 fts5StructureRelease(p->pStruct); 1075 p->pStruct = 0; 1076 } 1077 } 1078 1079 /* 1080 ** Return the total number of segments in index structure pStruct. This 1081 ** function is only ever used as part of assert() conditions. 1082 */ 1083 #ifdef SQLITE_DEBUG 1084 static int fts5StructureCountSegments(Fts5Structure *pStruct){ 1085 int nSegment = 0; /* Total number of segments */ 1086 if( pStruct ){ 1087 int iLvl; /* Used to iterate through levels */ 1088 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ 1089 nSegment += pStruct->aLevel[iLvl].nSeg; 1090 } 1091 } 1092 1093 return nSegment; 1094 } 1095 #endif 1096 1097 #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \ 1098 assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \ 1099 memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \ 1100 (pBuf)->n += nBlob; \ 1101 } 1102 1103 #define fts5BufferSafeAppendVarint(pBuf, iVal) { \ 1104 (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \ 1105 assert( (pBuf)->nSpace>=(pBuf)->n ); \ 1106 } 1107 1108 1109 /* 1110 ** Serialize and store the "structure" record. 1111 ** 1112 ** If an error occurs, leave an error code in the Fts5Index object. If an 1113 ** error has already occurred, this function is a no-op. 1114 */ 1115 static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){ 1116 if( p->rc==SQLITE_OK ){ 1117 Fts5Buffer buf; /* Buffer to serialize record into */ 1118 int iLvl; /* Used to iterate through levels */ 1119 int iCookie; /* Cookie value to store */ 1120 1121 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); 1122 memset(&buf, 0, sizeof(Fts5Buffer)); 1123 1124 /* Append the current configuration cookie */ 1125 iCookie = p->pConfig->iCookie; 1126 if( iCookie<0 ) iCookie = 0; 1127 1128 if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, 4+9+9+9) ){ 1129 sqlite3Fts5Put32(buf.p, iCookie); 1130 buf.n = 4; 1131 fts5BufferSafeAppendVarint(&buf, pStruct->nLevel); 1132 fts5BufferSafeAppendVarint(&buf, pStruct->nSegment); 1133 fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter); 1134 } 1135 1136 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ 1137 int iSeg; /* Used to iterate through segments */ 1138 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; 1139 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge); 1140 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg); 1141 assert( pLvl->nMerge<=pLvl->nSeg ); 1142 1143 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ 1144 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid); 1145 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst); 1146 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast); 1147 } 1148 } 1149 1150 fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n); 1151 fts5BufferFree(&buf); 1152 } 1153 } 1154 1155 #if 0 1156 static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*); 1157 static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){ 1158 int rc = SQLITE_OK; 1159 Fts5Buffer buf; 1160 memset(&buf, 0, sizeof(buf)); 1161 fts5DebugStructure(&rc, &buf, pStruct); 1162 fprintf(stdout, "%s: %s\n", zCaption, buf.p); 1163 fflush(stdout); 1164 fts5BufferFree(&buf); 1165 } 1166 #else 1167 # define fts5PrintStructure(x,y) 1168 #endif 1169 1170 static int fts5SegmentSize(Fts5StructureSegment *pSeg){ 1171 return 1 + pSeg->pgnoLast - pSeg->pgnoFirst; 1172 } 1173 1174 /* 1175 ** Return a copy of index structure pStruct. Except, promote as many 1176 ** segments as possible to level iPromote. If an OOM occurs, NULL is 1177 ** returned. 1178 */ 1179 static void fts5StructurePromoteTo( 1180 Fts5Index *p, 1181 int iPromote, 1182 int szPromote, 1183 Fts5Structure *pStruct 1184 ){ 1185 int il, is; 1186 Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote]; 1187 1188 if( pOut->nMerge==0 ){ 1189 for(il=iPromote+1; il<pStruct->nLevel; il++){ 1190 Fts5StructureLevel *pLvl = &pStruct->aLevel[il]; 1191 if( pLvl->nMerge ) return; 1192 for(is=pLvl->nSeg-1; is>=0; is--){ 1193 int sz = fts5SegmentSize(&pLvl->aSeg[is]); 1194 if( sz>szPromote ) return; 1195 fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1); 1196 if( p->rc ) return; 1197 memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment)); 1198 pOut->nSeg++; 1199 pLvl->nSeg--; 1200 } 1201 } 1202 } 1203 } 1204 1205 /* 1206 ** A new segment has just been written to level iLvl of index structure 1207 ** pStruct. This function determines if any segments should be promoted 1208 ** as a result. Segments are promoted in two scenarios: 1209 ** 1210 ** a) If the segment just written is smaller than one or more segments 1211 ** within the previous populated level, it is promoted to the previous 1212 ** populated level. 1213 ** 1214 ** b) If the segment just written is larger than the newest segment on 1215 ** the next populated level, then that segment, and any other adjacent 1216 ** segments that are also smaller than the one just written, are 1217 ** promoted. 1218 ** 1219 ** If one or more segments are promoted, the structure object is updated 1220 ** to reflect this. 1221 */ 1222 static void fts5StructurePromote( 1223 Fts5Index *p, /* FTS5 backend object */ 1224 int iLvl, /* Index level just updated */ 1225 Fts5Structure *pStruct /* Index structure */ 1226 ){ 1227 if( p->rc==SQLITE_OK ){ 1228 int iTst; 1229 int iPromote = -1; 1230 int szPromote = 0; /* Promote anything this size or smaller */ 1231 Fts5StructureSegment *pSeg; /* Segment just written */ 1232 int szSeg; /* Size of segment just written */ 1233 int nSeg = pStruct->aLevel[iLvl].nSeg; 1234 1235 if( nSeg==0 ) return; 1236 pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1]; 1237 szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst); 1238 1239 /* Check for condition (a) */ 1240 for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--); 1241 if( iTst>=0 ){ 1242 int i; 1243 int szMax = 0; 1244 Fts5StructureLevel *pTst = &pStruct->aLevel[iTst]; 1245 assert( pTst->nMerge==0 ); 1246 for(i=0; i<pTst->nSeg; i++){ 1247 int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1; 1248 if( sz>szMax ) szMax = sz; 1249 } 1250 if( szMax>=szSeg ){ 1251 /* Condition (a) is true. Promote the newest segment on level 1252 ** iLvl to level iTst. */ 1253 iPromote = iTst; 1254 szPromote = szMax; 1255 } 1256 } 1257 1258 /* If condition (a) is not met, assume (b) is true. StructurePromoteTo() 1259 ** is a no-op if it is not. */ 1260 if( iPromote<0 ){ 1261 iPromote = iLvl; 1262 szPromote = szSeg; 1263 } 1264 fts5StructurePromoteTo(p, iPromote, szPromote, pStruct); 1265 } 1266 } 1267 1268 1269 /* 1270 ** Advance the iterator passed as the only argument. If the end of the 1271 ** doclist-index page is reached, return non-zero. 1272 */ 1273 static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ 1274 Fts5Data *pData = pLvl->pData; 1275 1276 if( pLvl->iOff==0 ){ 1277 assert( pLvl->bEof==0 ); 1278 pLvl->iOff = 1; 1279 pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno); 1280 pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid); 1281 pLvl->iFirstOff = pLvl->iOff; 1282 }else{ 1283 int iOff; 1284 for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){ 1285 if( pData->p[iOff] ) break; 1286 } 1287 1288 if( iOff<pData->nn ){ 1289 i64 iVal; 1290 pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1; 1291 iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal); 1292 pLvl->iRowid += iVal; 1293 pLvl->iOff = iOff; 1294 }else{ 1295 pLvl->bEof = 1; 1296 } 1297 } 1298 1299 return pLvl->bEof; 1300 } 1301 1302 /* 1303 ** Advance the iterator passed as the only argument. 1304 */ 1305 static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ 1306 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; 1307 1308 assert( iLvl<pIter->nLvl ); 1309 if( fts5DlidxLvlNext(pLvl) ){ 1310 if( (iLvl+1) < pIter->nLvl ){ 1311 fts5DlidxIterNextR(p, pIter, iLvl+1); 1312 if( pLvl[1].bEof==0 ){ 1313 fts5DataRelease(pLvl->pData); 1314 memset(pLvl, 0, sizeof(Fts5DlidxLvl)); 1315 pLvl->pData = fts5DataRead(p, 1316 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) 1317 ); 1318 if( pLvl->pData ) fts5DlidxLvlNext(pLvl); 1319 } 1320 } 1321 } 1322 1323 return pIter->aLvl[0].bEof; 1324 } 1325 static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){ 1326 return fts5DlidxIterNextR(p, pIter, 0); 1327 } 1328 1329 /* 1330 ** The iterator passed as the first argument has the following fields set 1331 ** as follows. This function sets up the rest of the iterator so that it 1332 ** points to the first rowid in the doclist-index. 1333 ** 1334 ** pData: 1335 ** pointer to doclist-index record, 1336 ** 1337 ** When this function is called pIter->iLeafPgno is the page number the 1338 ** doclist is associated with (the one featuring the term). 1339 */ 1340 static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ 1341 int i; 1342 for(i=0; i<pIter->nLvl; i++){ 1343 fts5DlidxLvlNext(&pIter->aLvl[i]); 1344 } 1345 return pIter->aLvl[0].bEof; 1346 } 1347 1348 1349 static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ 1350 return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof; 1351 } 1352 1353 static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){ 1354 int i; 1355 1356 /* Advance each level to the last entry on the last page */ 1357 for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){ 1358 Fts5DlidxLvl *pLvl = &pIter->aLvl[i]; 1359 while( fts5DlidxLvlNext(pLvl)==0 ); 1360 pLvl->bEof = 0; 1361 1362 if( i>0 ){ 1363 Fts5DlidxLvl *pChild = &pLvl[-1]; 1364 fts5DataRelease(pChild->pData); 1365 memset(pChild, 0, sizeof(Fts5DlidxLvl)); 1366 pChild->pData = fts5DataRead(p, 1367 FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno) 1368 ); 1369 } 1370 } 1371 } 1372 1373 /* 1374 ** Move the iterator passed as the only argument to the previous entry. 1375 */ 1376 static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){ 1377 int iOff = pLvl->iOff; 1378 1379 assert( pLvl->bEof==0 ); 1380 if( iOff<=pLvl->iFirstOff ){ 1381 pLvl->bEof = 1; 1382 }else{ 1383 u8 *a = pLvl->pData->p; 1384 i64 iVal; 1385 int iLimit; 1386 int ii; 1387 int nZero = 0; 1388 1389 /* Currently iOff points to the first byte of a varint. This block 1390 ** decrements iOff until it points to the first byte of the previous 1391 ** varint. Taking care not to read any memory locations that occur 1392 ** before the buffer in memory. */ 1393 iLimit = (iOff>9 ? iOff-9 : 0); 1394 for(iOff--; iOff>iLimit; iOff--){ 1395 if( (a[iOff-1] & 0x80)==0 ) break; 1396 } 1397 1398 fts5GetVarint(&a[iOff], (u64*)&iVal); 1399 pLvl->iRowid -= iVal; 1400 pLvl->iLeafPgno--; 1401 1402 /* Skip backwards past any 0x00 varints. */ 1403 for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){ 1404 nZero++; 1405 } 1406 if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){ 1407 /* The byte immediately before the last 0x00 byte has the 0x80 bit 1408 ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80 1409 ** bytes before a[ii]. */ 1410 int bZero = 0; /* True if last 0x00 counts */ 1411 if( (ii-8)>=pLvl->iFirstOff ){ 1412 int j; 1413 for(j=1; j<=8 && (a[ii-j] & 0x80); j++); 1414 bZero = (j>8); 1415 } 1416 if( bZero==0 ) nZero--; 1417 } 1418 pLvl->iLeafPgno -= nZero; 1419 pLvl->iOff = iOff - nZero; 1420 } 1421 1422 return pLvl->bEof; 1423 } 1424 1425 static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ 1426 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; 1427 1428 assert( iLvl<pIter->nLvl ); 1429 if( fts5DlidxLvlPrev(pLvl) ){ 1430 if( (iLvl+1) < pIter->nLvl ){ 1431 fts5DlidxIterPrevR(p, pIter, iLvl+1); 1432 if( pLvl[1].bEof==0 ){ 1433 fts5DataRelease(pLvl->pData); 1434 memset(pLvl, 0, sizeof(Fts5DlidxLvl)); 1435 pLvl->pData = fts5DataRead(p, 1436 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) 1437 ); 1438 if( pLvl->pData ){ 1439 while( fts5DlidxLvlNext(pLvl)==0 ); 1440 pLvl->bEof = 0; 1441 } 1442 } 1443 } 1444 } 1445 1446 return pIter->aLvl[0].bEof; 1447 } 1448 static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){ 1449 return fts5DlidxIterPrevR(p, pIter, 0); 1450 } 1451 1452 /* 1453 ** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). 1454 */ 1455 static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ 1456 if( pIter ){ 1457 int i; 1458 for(i=0; i<pIter->nLvl; i++){ 1459 fts5DataRelease(pIter->aLvl[i].pData); 1460 } 1461 sqlite3_free(pIter); 1462 } 1463 } 1464 1465 static Fts5DlidxIter *fts5DlidxIterInit( 1466 Fts5Index *p, /* Fts5 Backend to iterate within */ 1467 int bRev, /* True for ORDER BY ASC */ 1468 int iSegid, /* Segment id */ 1469 int iLeafPg /* Leaf page number to load dlidx for */ 1470 ){ 1471 Fts5DlidxIter *pIter = 0; 1472 int i; 1473 int bDone = 0; 1474 1475 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ 1476 int nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl); 1477 Fts5DlidxIter *pNew; 1478 1479 pNew = (Fts5DlidxIter*)sqlite3_realloc(pIter, nByte); 1480 if( pNew==0 ){ 1481 p->rc = SQLITE_NOMEM; 1482 }else{ 1483 i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg); 1484 Fts5DlidxLvl *pLvl = &pNew->aLvl[i]; 1485 pIter = pNew; 1486 memset(pLvl, 0, sizeof(Fts5DlidxLvl)); 1487 pLvl->pData = fts5DataRead(p, iRowid); 1488 if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){ 1489 bDone = 1; 1490 } 1491 pIter->nLvl = i+1; 1492 } 1493 } 1494 1495 if( p->rc==SQLITE_OK ){ 1496 pIter->iSegid = iSegid; 1497 if( bRev==0 ){ 1498 fts5DlidxIterFirst(pIter); 1499 }else{ 1500 fts5DlidxIterLast(p, pIter); 1501 } 1502 } 1503 1504 if( p->rc!=SQLITE_OK ){ 1505 fts5DlidxIterFree(pIter); 1506 pIter = 0; 1507 } 1508 1509 return pIter; 1510 } 1511 1512 static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){ 1513 return pIter->aLvl[0].iRowid; 1514 } 1515 static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){ 1516 return pIter->aLvl[0].iLeafPgno; 1517 } 1518 1519 /* 1520 ** Load the next leaf page into the segment iterator. 1521 */ 1522 static void fts5SegIterNextPage( 1523 Fts5Index *p, /* FTS5 backend object */ 1524 Fts5SegIter *pIter /* Iterator to advance to next page */ 1525 ){ 1526 Fts5Data *pLeaf; 1527 Fts5StructureSegment *pSeg = pIter->pSeg; 1528 fts5DataRelease(pIter->pLeaf); 1529 pIter->iLeafPgno++; 1530 if( pIter->pNextLeaf ){ 1531 pIter->pLeaf = pIter->pNextLeaf; 1532 pIter->pNextLeaf = 0; 1533 }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){ 1534 pIter->pLeaf = fts5LeafRead(p, 1535 FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno) 1536 ); 1537 }else{ 1538 pIter->pLeaf = 0; 1539 } 1540 pLeaf = pIter->pLeaf; 1541 1542 if( pLeaf ){ 1543 pIter->iPgidxOff = pLeaf->szLeaf; 1544 if( fts5LeafIsTermless(pLeaf) ){ 1545 pIter->iEndofDoclist = pLeaf->nn+1; 1546 }else{ 1547 pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff], 1548 pIter->iEndofDoclist 1549 ); 1550 } 1551 } 1552 } 1553 1554 /* 1555 ** Argument p points to a buffer containing a varint to be interpreted as a 1556 ** position list size field. Read the varint and return the number of bytes 1557 ** read. Before returning, set *pnSz to the number of bytes in the position 1558 ** list, and *pbDel to true if the delete flag is set, or false otherwise. 1559 */ 1560 static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){ 1561 int nSz; 1562 int n = 0; 1563 fts5FastGetVarint32(p, n, nSz); 1564 assert_nc( nSz>=0 ); 1565 *pnSz = nSz/2; 1566 *pbDel = nSz & 0x0001; 1567 return n; 1568 } 1569 1570 /* 1571 ** Fts5SegIter.iLeafOffset currently points to the first byte of a 1572 ** position-list size field. Read the value of the field and store it 1573 ** in the following variables: 1574 ** 1575 ** Fts5SegIter.nPos 1576 ** Fts5SegIter.bDel 1577 ** 1578 ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the 1579 ** position list content (if any). 1580 */ 1581 static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ 1582 if( p->rc==SQLITE_OK ){ 1583 int iOff = pIter->iLeafOffset; /* Offset to read at */ 1584 ASSERT_SZLEAF_OK(pIter->pLeaf); 1585 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ 1586 int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf); 1587 pIter->bDel = 0; 1588 pIter->nPos = 1; 1589 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){ 1590 pIter->bDel = 1; 1591 iOff++; 1592 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){ 1593 pIter->nPos = 1; 1594 iOff++; 1595 }else{ 1596 pIter->nPos = 0; 1597 } 1598 } 1599 }else{ 1600 int nSz; 1601 fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz); 1602 pIter->bDel = (nSz & 0x0001); 1603 pIter->nPos = nSz>>1; 1604 assert_nc( pIter->nPos>=0 ); 1605 } 1606 pIter->iLeafOffset = iOff; 1607 } 1608 } 1609 1610 static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){ 1611 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ 1612 int iOff = pIter->iLeafOffset; 1613 1614 ASSERT_SZLEAF_OK(pIter->pLeaf); 1615 if( iOff>=pIter->pLeaf->szLeaf ){ 1616 fts5SegIterNextPage(p, pIter); 1617 if( pIter->pLeaf==0 ){ 1618 if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; 1619 return; 1620 } 1621 iOff = 4; 1622 a = pIter->pLeaf->p; 1623 } 1624 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); 1625 pIter->iLeafOffset = iOff; 1626 } 1627 1628 /* 1629 ** Fts5SegIter.iLeafOffset currently points to the first byte of the 1630 ** "nSuffix" field of a term. Function parameter nKeep contains the value 1631 ** of the "nPrefix" field (if there was one - it is passed 0 if this is 1632 ** the first term in the segment). 1633 ** 1634 ** This function populates: 1635 ** 1636 ** Fts5SegIter.term 1637 ** Fts5SegIter.rowid 1638 ** 1639 ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of 1640 ** the first position list. The position list belonging to document 1641 ** (Fts5SegIter.iRowid). 1642 */ 1643 static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ 1644 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ 1645 int iOff = pIter->iLeafOffset; /* Offset to read at */ 1646 int nNew; /* Bytes of new data */ 1647 1648 iOff += fts5GetVarint32(&a[iOff], nNew); 1649 if( iOff+nNew>pIter->pLeaf->nn ){ 1650 p->rc = FTS5_CORRUPT; 1651 return; 1652 } 1653 pIter->term.n = nKeep; 1654 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); 1655 iOff += nNew; 1656 pIter->iTermLeafOffset = iOff; 1657 pIter->iTermLeafPgno = pIter->iLeafPgno; 1658 pIter->iLeafOffset = iOff; 1659 1660 if( pIter->iPgidxOff>=pIter->pLeaf->nn ){ 1661 pIter->iEndofDoclist = pIter->pLeaf->nn+1; 1662 }else{ 1663 int nExtra; 1664 pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra); 1665 pIter->iEndofDoclist += nExtra; 1666 } 1667 1668 fts5SegIterLoadRowid(p, pIter); 1669 } 1670 1671 static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*); 1672 static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*); 1673 static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*); 1674 1675 static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){ 1676 if( pIter->flags & FTS5_SEGITER_REVERSE ){ 1677 pIter->xNext = fts5SegIterNext_Reverse; 1678 }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ 1679 pIter->xNext = fts5SegIterNext_None; 1680 }else{ 1681 pIter->xNext = fts5SegIterNext; 1682 } 1683 } 1684 1685 /* 1686 ** Initialize the iterator object pIter to iterate through the entries in 1687 ** segment pSeg. The iterator is left pointing to the first entry when 1688 ** this function returns. 1689 ** 1690 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If 1691 ** an error has already occurred when this function is called, it is a no-op. 1692 */ 1693 static void fts5SegIterInit( 1694 Fts5Index *p, /* FTS index object */ 1695 Fts5StructureSegment *pSeg, /* Description of segment */ 1696 Fts5SegIter *pIter /* Object to populate */ 1697 ){ 1698 if( pSeg->pgnoFirst==0 ){ 1699 /* This happens if the segment is being used as an input to an incremental 1700 ** merge and all data has already been "trimmed". See function 1701 ** fts5TrimSegments() for details. In this case leave the iterator empty. 1702 ** The caller will see the (pIter->pLeaf==0) and assume the iterator is 1703 ** at EOF already. */ 1704 assert( pIter->pLeaf==0 ); 1705 return; 1706 } 1707 1708 if( p->rc==SQLITE_OK ){ 1709 memset(pIter, 0, sizeof(*pIter)); 1710 fts5SegIterSetNext(p, pIter); 1711 pIter->pSeg = pSeg; 1712 pIter->iLeafPgno = pSeg->pgnoFirst-1; 1713 fts5SegIterNextPage(p, pIter); 1714 } 1715 1716 if( p->rc==SQLITE_OK ){ 1717 pIter->iLeafOffset = 4; 1718 assert_nc( pIter->pLeaf->nn>4 ); 1719 assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 ); 1720 pIter->iPgidxOff = pIter->pLeaf->szLeaf+1; 1721 fts5SegIterLoadTerm(p, pIter, 0); 1722 fts5SegIterLoadNPos(p, pIter); 1723 } 1724 } 1725 1726 /* 1727 ** This function is only ever called on iterators created by calls to 1728 ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set. 1729 ** 1730 ** The iterator is in an unusual state when this function is called: the 1731 ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of 1732 ** the position-list size field for the first relevant rowid on the page. 1733 ** Fts5SegIter.rowid is set, but nPos and bDel are not. 1734 ** 1735 ** This function advances the iterator so that it points to the last 1736 ** relevant rowid on the page and, if necessary, initializes the 1737 ** aRowidOffset[] and iRowidOffset variables. At this point the iterator 1738 ** is in its regular state - Fts5SegIter.iLeafOffset points to the first 1739 ** byte of the position list content associated with said rowid. 1740 */ 1741 static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ 1742 int eDetail = p->pConfig->eDetail; 1743 int n = pIter->pLeaf->szLeaf; 1744 int i = pIter->iLeafOffset; 1745 u8 *a = pIter->pLeaf->p; 1746 int iRowidOffset = 0; 1747 1748 if( n>pIter->iEndofDoclist ){ 1749 n = pIter->iEndofDoclist; 1750 } 1751 1752 ASSERT_SZLEAF_OK(pIter->pLeaf); 1753 while( 1 ){ 1754 i64 iDelta = 0; 1755 1756 if( eDetail==FTS5_DETAIL_NONE ){ 1757 /* todo */ 1758 if( i<n && a[i]==0 ){ 1759 i++; 1760 if( i<n && a[i]==0 ) i++; 1761 } 1762 }else{ 1763 int nPos; 1764 int bDummy; 1765 i += fts5GetPoslistSize(&a[i], &nPos, &bDummy); 1766 i += nPos; 1767 } 1768 if( i>=n ) break; 1769 i += fts5GetVarint(&a[i], (u64*)&iDelta); 1770 pIter->iRowid += iDelta; 1771 1772 /* If necessary, grow the pIter->aRowidOffset[] array. */ 1773 if( iRowidOffset>=pIter->nRowidOffset ){ 1774 int nNew = pIter->nRowidOffset + 8; 1775 int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int)); 1776 if( aNew==0 ){ 1777 p->rc = SQLITE_NOMEM; 1778 break; 1779 } 1780 pIter->aRowidOffset = aNew; 1781 pIter->nRowidOffset = nNew; 1782 } 1783 1784 pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset; 1785 pIter->iLeafOffset = i; 1786 } 1787 pIter->iRowidOffset = iRowidOffset; 1788 fts5SegIterLoadNPos(p, pIter); 1789 } 1790 1791 /* 1792 ** 1793 */ 1794 static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ 1795 assert( pIter->flags & FTS5_SEGITER_REVERSE ); 1796 assert( pIter->flags & FTS5_SEGITER_ONETERM ); 1797 1798 fts5DataRelease(pIter->pLeaf); 1799 pIter->pLeaf = 0; 1800 while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){ 1801 Fts5Data *pNew; 1802 pIter->iLeafPgno--; 1803 pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( 1804 pIter->pSeg->iSegid, pIter->iLeafPgno 1805 )); 1806 if( pNew ){ 1807 /* iTermLeafOffset may be equal to szLeaf if the term is the last 1808 ** thing on the page - i.e. the first rowid is on the following page. 1809 ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */ 1810 if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ 1811 assert( pIter->pLeaf==0 ); 1812 if( pIter->iTermLeafOffset<pNew->szLeaf ){ 1813 pIter->pLeaf = pNew; 1814 pIter->iLeafOffset = pIter->iTermLeafOffset; 1815 } 1816 }else{ 1817 int iRowidOff; 1818 iRowidOff = fts5LeafFirstRowidOff(pNew); 1819 if( iRowidOff ){ 1820 pIter->pLeaf = pNew; 1821 pIter->iLeafOffset = iRowidOff; 1822 } 1823 } 1824 1825 if( pIter->pLeaf ){ 1826 u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; 1827 pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid); 1828 break; 1829 }else{ 1830 fts5DataRelease(pNew); 1831 } 1832 } 1833 } 1834 1835 if( pIter->pLeaf ){ 1836 pIter->iEndofDoclist = pIter->pLeaf->nn+1; 1837 fts5SegIterReverseInitPage(p, pIter); 1838 } 1839 } 1840 1841 /* 1842 ** Return true if the iterator passed as the second argument currently 1843 ** points to a delete marker. A delete marker is an entry with a 0 byte 1844 ** position-list. 1845 */ 1846 static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){ 1847 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; 1848 return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0); 1849 } 1850 1851 /* 1852 ** Advance iterator pIter to the next entry. 1853 ** 1854 ** This version of fts5SegIterNext() is only used by reverse iterators. 1855 */ 1856 static void fts5SegIterNext_Reverse( 1857 Fts5Index *p, /* FTS5 backend object */ 1858 Fts5SegIter *pIter, /* Iterator to advance */ 1859 int *pbUnused /* Unused */ 1860 ){ 1861 assert( pIter->flags & FTS5_SEGITER_REVERSE ); 1862 assert( pIter->pNextLeaf==0 ); 1863 UNUSED_PARAM(pbUnused); 1864 1865 if( pIter->iRowidOffset>0 ){ 1866 u8 *a = pIter->pLeaf->p; 1867 int iOff; 1868 i64 iDelta; 1869 1870 pIter->iRowidOffset--; 1871 pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset]; 1872 fts5SegIterLoadNPos(p, pIter); 1873 iOff = pIter->iLeafOffset; 1874 if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){ 1875 iOff += pIter->nPos; 1876 } 1877 fts5GetVarint(&a[iOff], (u64*)&iDelta); 1878 pIter->iRowid -= iDelta; 1879 }else{ 1880 fts5SegIterReverseNewPage(p, pIter); 1881 } 1882 } 1883 1884 /* 1885 ** Advance iterator pIter to the next entry. 1886 ** 1887 ** This version of fts5SegIterNext() is only used if detail=none and the 1888 ** iterator is not a reverse direction iterator. 1889 */ 1890 static void fts5SegIterNext_None( 1891 Fts5Index *p, /* FTS5 backend object */ 1892 Fts5SegIter *pIter, /* Iterator to advance */ 1893 int *pbNewTerm /* OUT: Set for new term */ 1894 ){ 1895 int iOff; 1896 1897 assert( p->rc==SQLITE_OK ); 1898 assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 ); 1899 assert( p->pConfig->eDetail==FTS5_DETAIL_NONE ); 1900 1901 ASSERT_SZLEAF_OK(pIter->pLeaf); 1902 iOff = pIter->iLeafOffset; 1903 1904 /* Next entry is on the next page */ 1905 if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){ 1906 fts5SegIterNextPage(p, pIter); 1907 if( p->rc || pIter->pLeaf==0 ) return; 1908 pIter->iRowid = 0; 1909 iOff = 4; 1910 } 1911 1912 if( iOff<pIter->iEndofDoclist ){ 1913 /* Next entry is on the current page */ 1914 i64 iDelta; 1915 iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta); 1916 pIter->iLeafOffset = iOff; 1917 pIter->iRowid += iDelta; 1918 }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){ 1919 if( pIter->pSeg ){ 1920 int nKeep = 0; 1921 if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){ 1922 iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep); 1923 } 1924 pIter->iLeafOffset = iOff; 1925 fts5SegIterLoadTerm(p, pIter, nKeep); 1926 }else{ 1927 const u8 *pList = 0; 1928 const char *zTerm = 0; 1929 int nList; 1930 sqlite3Fts5HashScanNext(p->pHash); 1931 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); 1932 if( pList==0 ) goto next_none_eof; 1933 pIter->pLeaf->p = (u8*)pList; 1934 pIter->pLeaf->nn = nList; 1935 pIter->pLeaf->szLeaf = nList; 1936 pIter->iEndofDoclist = nList; 1937 sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm); 1938 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); 1939 } 1940 1941 if( pbNewTerm ) *pbNewTerm = 1; 1942 }else{ 1943 goto next_none_eof; 1944 } 1945 1946 fts5SegIterLoadNPos(p, pIter); 1947 1948 return; 1949 next_none_eof: 1950 fts5DataRelease(pIter->pLeaf); 1951 pIter->pLeaf = 0; 1952 } 1953 1954 1955 /* 1956 ** Advance iterator pIter to the next entry. 1957 ** 1958 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It 1959 ** is not considered an error if the iterator reaches EOF. If an error has 1960 ** already occurred when this function is called, it is a no-op. 1961 */ 1962 static void fts5SegIterNext( 1963 Fts5Index *p, /* FTS5 backend object */ 1964 Fts5SegIter *pIter, /* Iterator to advance */ 1965 int *pbNewTerm /* OUT: Set for new term */ 1966 ){ 1967 Fts5Data *pLeaf = pIter->pLeaf; 1968 int iOff; 1969 int bNewTerm = 0; 1970 int nKeep = 0; 1971 u8 *a; 1972 int n; 1973 1974 assert( pbNewTerm==0 || *pbNewTerm==0 ); 1975 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE ); 1976 1977 /* Search for the end of the position list within the current page. */ 1978 a = pLeaf->p; 1979 n = pLeaf->szLeaf; 1980 1981 ASSERT_SZLEAF_OK(pLeaf); 1982 iOff = pIter->iLeafOffset + pIter->nPos; 1983 1984 if( iOff<n ){ 1985 /* The next entry is on the current page. */ 1986 assert_nc( iOff<=pIter->iEndofDoclist ); 1987 if( iOff>=pIter->iEndofDoclist ){ 1988 bNewTerm = 1; 1989 if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ 1990 iOff += fts5GetVarint32(&a[iOff], nKeep); 1991 } 1992 }else{ 1993 u64 iDelta; 1994 iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta); 1995 pIter->iRowid += iDelta; 1996 assert_nc( iDelta>0 ); 1997 } 1998 pIter->iLeafOffset = iOff; 1999 2000 }else if( pIter->pSeg==0 ){ 2001 const u8 *pList = 0; 2002 const char *zTerm = 0; 2003 int nList = 0; 2004 assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm ); 2005 if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){ 2006 sqlite3Fts5HashScanNext(p->pHash); 2007 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); 2008 } 2009 if( pList==0 ){ 2010 fts5DataRelease(pIter->pLeaf); 2011 pIter->pLeaf = 0; 2012 }else{ 2013 pIter->pLeaf->p = (u8*)pList; 2014 pIter->pLeaf->nn = nList; 2015 pIter->pLeaf->szLeaf = nList; 2016 pIter->iEndofDoclist = nList+1; 2017 sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm), 2018 (u8*)zTerm); 2019 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); 2020 *pbNewTerm = 1; 2021 } 2022 }else{ 2023 iOff = 0; 2024 /* Next entry is not on the current page */ 2025 while( iOff==0 ){ 2026 fts5SegIterNextPage(p, pIter); 2027 pLeaf = pIter->pLeaf; 2028 if( pLeaf==0 ) break; 2029 ASSERT_SZLEAF_OK(pLeaf); 2030 if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){ 2031 iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); 2032 pIter->iLeafOffset = iOff; 2033 2034 if( pLeaf->nn>pLeaf->szLeaf ){ 2035 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( 2036 &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist 2037 ); 2038 } 2039 } 2040 else if( pLeaf->nn>pLeaf->szLeaf ){ 2041 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( 2042 &pLeaf->p[pLeaf->szLeaf], iOff 2043 ); 2044 pIter->iLeafOffset = iOff; 2045 pIter->iEndofDoclist = iOff; 2046 bNewTerm = 1; 2047 } 2048 assert_nc( iOff<pLeaf->szLeaf ); 2049 if( iOff>pLeaf->szLeaf ){ 2050 p->rc = FTS5_CORRUPT; 2051 return; 2052 } 2053 } 2054 } 2055 2056 /* Check if the iterator is now at EOF. If so, return early. */ 2057 if( pIter->pLeaf ){ 2058 if( bNewTerm ){ 2059 if( pIter->flags & FTS5_SEGITER_ONETERM ){ 2060 fts5DataRelease(pIter->pLeaf); 2061 pIter->pLeaf = 0; 2062 }else{ 2063 fts5SegIterLoadTerm(p, pIter, nKeep); 2064 fts5SegIterLoadNPos(p, pIter); 2065 if( pbNewTerm ) *pbNewTerm = 1; 2066 } 2067 }else{ 2068 /* The following could be done by calling fts5SegIterLoadNPos(). But 2069 ** this block is particularly performance critical, so equivalent 2070 ** code is inlined. 2071 ** 2072 ** Later: Switched back to fts5SegIterLoadNPos() because it supports 2073 ** detail=none mode. Not ideal. 2074 */ 2075 int nSz; 2076 assert( p->rc==SQLITE_OK ); 2077 assert( pIter->iLeafOffset<=pIter->pLeaf->nn ); 2078 fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz); 2079 pIter->bDel = (nSz & 0x0001); 2080 pIter->nPos = nSz>>1; 2081 assert_nc( pIter->nPos>=0 ); 2082 } 2083 } 2084 } 2085 2086 #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; } 2087 2088 #define fts5IndexSkipVarint(a, iOff) { \ 2089 int iEnd = iOff+9; \ 2090 while( (a[iOff++] & 0x80) && iOff<iEnd ); \ 2091 } 2092 2093 /* 2094 ** Iterator pIter currently points to the first rowid in a doclist. This 2095 ** function sets the iterator up so that iterates in reverse order through 2096 ** the doclist. 2097 */ 2098 static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ 2099 Fts5DlidxIter *pDlidx = pIter->pDlidx; 2100 Fts5Data *pLast = 0; 2101 int pgnoLast = 0; 2102 2103 if( pDlidx ){ 2104 int iSegid = pIter->pSeg->iSegid; 2105 pgnoLast = fts5DlidxIterPgno(pDlidx); 2106 pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast)); 2107 }else{ 2108 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ 2109 2110 /* Currently, Fts5SegIter.iLeafOffset points to the first byte of 2111 ** position-list content for the current rowid. Back it up so that it 2112 ** points to the start of the position-list size field. */ 2113 int iPoslist; 2114 if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ 2115 iPoslist = pIter->iTermLeafOffset; 2116 }else{ 2117 iPoslist = 4; 2118 } 2119 fts5IndexSkipVarint(pLeaf->p, iPoslist); 2120 pIter->iLeafOffset = iPoslist; 2121 2122 /* If this condition is true then the largest rowid for the current 2123 ** term may not be stored on the current page. So search forward to 2124 ** see where said rowid really is. */ 2125 if( pIter->iEndofDoclist>=pLeaf->szLeaf ){ 2126 int pgno; 2127 Fts5StructureSegment *pSeg = pIter->pSeg; 2128 2129 /* The last rowid in the doclist may not be on the current page. Search 2130 ** forward to find the page containing the last rowid. */ 2131 for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){ 2132 i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno); 2133 Fts5Data *pNew = fts5DataRead(p, iAbs); 2134 if( pNew ){ 2135 int iRowid, bTermless; 2136 iRowid = fts5LeafFirstRowidOff(pNew); 2137 bTermless = fts5LeafIsTermless(pNew); 2138 if( iRowid ){ 2139 SWAPVAL(Fts5Data*, pNew, pLast); 2140 pgnoLast = pgno; 2141 } 2142 fts5DataRelease(pNew); 2143 if( bTermless==0 ) break; 2144 } 2145 } 2146 } 2147 } 2148 2149 /* If pLast is NULL at this point, then the last rowid for this doclist 2150 ** lies on the page currently indicated by the iterator. In this case 2151 ** pIter->iLeafOffset is already set to point to the position-list size 2152 ** field associated with the first relevant rowid on the page. 2153 ** 2154 ** Or, if pLast is non-NULL, then it is the page that contains the last 2155 ** rowid. In this case configure the iterator so that it points to the 2156 ** first rowid on this page. 2157 */ 2158 if( pLast ){ 2159 int iOff; 2160 fts5DataRelease(pIter->pLeaf); 2161 pIter->pLeaf = pLast; 2162 pIter->iLeafPgno = pgnoLast; 2163 iOff = fts5LeafFirstRowidOff(pLast); 2164 iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); 2165 pIter->iLeafOffset = iOff; 2166 2167 if( fts5LeafIsTermless(pLast) ){ 2168 pIter->iEndofDoclist = pLast->nn+1; 2169 }else{ 2170 pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast); 2171 } 2172 2173 } 2174 2175 fts5SegIterReverseInitPage(p, pIter); 2176 } 2177 2178 /* 2179 ** Iterator pIter currently points to the first rowid of a doclist. 2180 ** There is a doclist-index associated with the final term on the current 2181 ** page. If the current term is the last term on the page, load the 2182 ** doclist-index from disk and initialize an iterator at (pIter->pDlidx). 2183 */ 2184 static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){ 2185 int iSeg = pIter->pSeg->iSegid; 2186 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); 2187 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ 2188 2189 assert( pIter->flags & FTS5_SEGITER_ONETERM ); 2190 assert( pIter->pDlidx==0 ); 2191 2192 /* Check if the current doclist ends on this page. If it does, return 2193 ** early without loading the doclist-index (as it belongs to a different 2194 ** term. */ 2195 if( pIter->iTermLeafPgno==pIter->iLeafPgno 2196 && pIter->iEndofDoclist<pLeaf->szLeaf 2197 ){ 2198 return; 2199 } 2200 2201 pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); 2202 } 2203 2204 /* 2205 ** The iterator object passed as the second argument currently contains 2206 ** no valid values except for the Fts5SegIter.pLeaf member variable. This 2207 ** function searches the leaf page for a term matching (pTerm/nTerm). 2208 ** 2209 ** If the specified term is found on the page, then the iterator is left 2210 ** pointing to it. If argument bGe is zero and the term is not found, 2211 ** the iterator is left pointing at EOF. 2212 ** 2213 ** If bGe is non-zero and the specified term is not found, then the 2214 ** iterator is left pointing to the smallest term in the segment that 2215 ** is larger than the specified term, even if this term is not on the 2216 ** current page. 2217 */ 2218 static void fts5LeafSeek( 2219 Fts5Index *p, /* Leave any error code here */ 2220 int bGe, /* True for a >= search */ 2221 Fts5SegIter *pIter, /* Iterator to seek */ 2222 const u8 *pTerm, int nTerm /* Term to search for */ 2223 ){ 2224 int iOff; 2225 const u8 *a = pIter->pLeaf->p; 2226 int szLeaf = pIter->pLeaf->szLeaf; 2227 int n = pIter->pLeaf->nn; 2228 2229 int nMatch = 0; 2230 int nKeep = 0; 2231 int nNew = 0; 2232 int iTermOff; 2233 int iPgidx; /* Current offset in pgidx */ 2234 int bEndOfPage = 0; 2235 2236 assert( p->rc==SQLITE_OK ); 2237 2238 iPgidx = szLeaf; 2239 iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff); 2240 iOff = iTermOff; 2241 if( iOff>n ){ 2242 p->rc = FTS5_CORRUPT; 2243 return; 2244 } 2245 2246 while( 1 ){ 2247 2248 /* Figure out how many new bytes are in this term */ 2249 fts5FastGetVarint32(a, iOff, nNew); 2250 if( nKeep<nMatch ){ 2251 goto search_failed; 2252 } 2253 2254 assert( nKeep>=nMatch ); 2255 if( nKeep==nMatch ){ 2256 int nCmp; 2257 int i; 2258 nCmp = MIN(nNew, nTerm-nMatch); 2259 for(i=0; i<nCmp; i++){ 2260 if( a[iOff+i]!=pTerm[nMatch+i] ) break; 2261 } 2262 nMatch += i; 2263 2264 if( nTerm==nMatch ){ 2265 if( i==nNew ){ 2266 goto search_success; 2267 }else{ 2268 goto search_failed; 2269 } 2270 }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){ 2271 goto search_failed; 2272 } 2273 } 2274 2275 if( iPgidx>=n ){ 2276 bEndOfPage = 1; 2277 break; 2278 } 2279 2280 iPgidx += fts5GetVarint32(&a[iPgidx], nKeep); 2281 iTermOff += nKeep; 2282 iOff = iTermOff; 2283 2284 if( iOff>=n ){ 2285 p->rc = FTS5_CORRUPT; 2286 return; 2287 } 2288 2289 /* Read the nKeep field of the next term. */ 2290 fts5FastGetVarint32(a, iOff, nKeep); 2291 } 2292 2293 search_failed: 2294 if( bGe==0 ){ 2295 fts5DataRelease(pIter->pLeaf); 2296 pIter->pLeaf = 0; 2297 return; 2298 }else if( bEndOfPage ){ 2299 do { 2300 fts5SegIterNextPage(p, pIter); 2301 if( pIter->pLeaf==0 ) return; 2302 a = pIter->pLeaf->p; 2303 if( fts5LeafIsTermless(pIter->pLeaf)==0 ){ 2304 iPgidx = pIter->pLeaf->szLeaf; 2305 iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff); 2306 if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){ 2307 p->rc = FTS5_CORRUPT; 2308 }else{ 2309 nKeep = 0; 2310 iTermOff = iOff; 2311 n = pIter->pLeaf->nn; 2312 iOff += fts5GetVarint32(&a[iOff], nNew); 2313 break; 2314 } 2315 } 2316 }while( 1 ); 2317 } 2318 2319 search_success: 2320 2321 pIter->iLeafOffset = iOff + nNew; 2322 pIter->iTermLeafOffset = pIter->iLeafOffset; 2323 pIter->iTermLeafPgno = pIter->iLeafPgno; 2324 2325 fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm); 2326 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); 2327 2328 if( iPgidx>=n ){ 2329 pIter->iEndofDoclist = pIter->pLeaf->nn+1; 2330 }else{ 2331 int nExtra; 2332 iPgidx += fts5GetVarint32(&a[iPgidx], nExtra); 2333 pIter->iEndofDoclist = iTermOff + nExtra; 2334 } 2335 pIter->iPgidxOff = iPgidx; 2336 2337 fts5SegIterLoadRowid(p, pIter); 2338 fts5SegIterLoadNPos(p, pIter); 2339 } 2340 2341 static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){ 2342 if( p->pIdxSelect==0 ){ 2343 Fts5Config *pConfig = p->pConfig; 2344 fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf( 2345 "SELECT pgno FROM '%q'.'%q_idx' WHERE " 2346 "segid=? AND term<=? ORDER BY term DESC LIMIT 1", 2347 pConfig->zDb, pConfig->zName 2348 )); 2349 } 2350 return p->pIdxSelect; 2351 } 2352 2353 /* 2354 ** Initialize the object pIter to point to term pTerm/nTerm within segment 2355 ** pSeg. If there is no such term in the index, the iterator is set to EOF. 2356 ** 2357 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If 2358 ** an error has already occurred when this function is called, it is a no-op. 2359 */ 2360 static void fts5SegIterSeekInit( 2361 Fts5Index *p, /* FTS5 backend */ 2362 const u8 *pTerm, int nTerm, /* Term to seek to */ 2363 int flags, /* Mask of FTS5INDEX_XXX flags */ 2364 Fts5StructureSegment *pSeg, /* Description of segment */ 2365 Fts5SegIter *pIter /* Object to populate */ 2366 ){ 2367 int iPg = 1; 2368 int bGe = (flags & FTS5INDEX_QUERY_SCAN); 2369 int bDlidx = 0; /* True if there is a doclist-index */ 2370 sqlite3_stmt *pIdxSelect = 0; 2371 2372 assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 ); 2373 assert( pTerm && nTerm ); 2374 memset(pIter, 0, sizeof(*pIter)); 2375 pIter->pSeg = pSeg; 2376 2377 /* This block sets stack variable iPg to the leaf page number that may 2378 ** contain term (pTerm/nTerm), if it is present in the segment. */ 2379 pIdxSelect = fts5IdxSelectStmt(p); 2380 if( p->rc ) return; 2381 sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid); 2382 sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC); 2383 if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){ 2384 i64 val = sqlite3_column_int(pIdxSelect, 0); 2385 iPg = (int)(val>>1); 2386 bDlidx = (val & 0x0001); 2387 } 2388 p->rc = sqlite3_reset(pIdxSelect); 2389 2390 if( iPg<pSeg->pgnoFirst ){ 2391 iPg = pSeg->pgnoFirst; 2392 bDlidx = 0; 2393 } 2394 2395 pIter->iLeafPgno = iPg - 1; 2396 fts5SegIterNextPage(p, pIter); 2397 2398 if( pIter->pLeaf ){ 2399 fts5LeafSeek(p, bGe, pIter, pTerm, nTerm); 2400 } 2401 2402 if( p->rc==SQLITE_OK && bGe==0 ){ 2403 pIter->flags |= FTS5_SEGITER_ONETERM; 2404 if( pIter->pLeaf ){ 2405 if( flags & FTS5INDEX_QUERY_DESC ){ 2406 pIter->flags |= FTS5_SEGITER_REVERSE; 2407 } 2408 if( bDlidx ){ 2409 fts5SegIterLoadDlidx(p, pIter); 2410 } 2411 if( flags & FTS5INDEX_QUERY_DESC ){ 2412 fts5SegIterReverse(p, pIter); 2413 } 2414 } 2415 } 2416 2417 fts5SegIterSetNext(p, pIter); 2418 2419 /* Either: 2420 ** 2421 ** 1) an error has occurred, or 2422 ** 2) the iterator points to EOF, or 2423 ** 3) the iterator points to an entry with term (pTerm/nTerm), or 2424 ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points 2425 ** to an entry with a term greater than or equal to (pTerm/nTerm). 2426 */ 2427 assert( p->rc!=SQLITE_OK /* 1 */ 2428 || pIter->pLeaf==0 /* 2 */ 2429 || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */ 2430 || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */ 2431 ); 2432 } 2433 2434 /* 2435 ** Initialize the object pIter to point to term pTerm/nTerm within the 2436 ** in-memory hash table. If there is no such term in the hash-table, the 2437 ** iterator is set to EOF. 2438 ** 2439 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If 2440 ** an error has already occurred when this function is called, it is a no-op. 2441 */ 2442 static void fts5SegIterHashInit( 2443 Fts5Index *p, /* FTS5 backend */ 2444 const u8 *pTerm, int nTerm, /* Term to seek to */ 2445 int flags, /* Mask of FTS5INDEX_XXX flags */ 2446 Fts5SegIter *pIter /* Object to populate */ 2447 ){ 2448 const u8 *pList = 0; 2449 int nList = 0; 2450 const u8 *z = 0; 2451 int n = 0; 2452 2453 assert( p->pHash ); 2454 assert( p->rc==SQLITE_OK ); 2455 2456 if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){ 2457 p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm); 2458 sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList); 2459 n = (z ? (int)strlen((const char*)z) : 0); 2460 }else{ 2461 pIter->flags |= FTS5_SEGITER_ONETERM; 2462 sqlite3Fts5HashQuery(p->pHash, (const char*)pTerm, nTerm, &pList, &nList); 2463 z = pTerm; 2464 n = nTerm; 2465 } 2466 2467 if( pList ){ 2468 Fts5Data *pLeaf; 2469 sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z); 2470 pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); 2471 if( pLeaf==0 ) return; 2472 pLeaf->p = (u8*)pList; 2473 pLeaf->nn = pLeaf->szLeaf = nList; 2474 pIter->pLeaf = pLeaf; 2475 pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid); 2476 pIter->iEndofDoclist = pLeaf->nn; 2477 2478 if( flags & FTS5INDEX_QUERY_DESC ){ 2479 pIter->flags |= FTS5_SEGITER_REVERSE; 2480 fts5SegIterReverseInitPage(p, pIter); 2481 }else{ 2482 fts5SegIterLoadNPos(p, pIter); 2483 } 2484 } 2485 2486 fts5SegIterSetNext(p, pIter); 2487 } 2488 2489 /* 2490 ** Zero the iterator passed as the only argument. 2491 */ 2492 static void fts5SegIterClear(Fts5SegIter *pIter){ 2493 fts5BufferFree(&pIter->term); 2494 fts5DataRelease(pIter->pLeaf); 2495 fts5DataRelease(pIter->pNextLeaf); 2496 fts5DlidxIterFree(pIter->pDlidx); 2497 sqlite3_free(pIter->aRowidOffset); 2498 memset(pIter, 0, sizeof(Fts5SegIter)); 2499 } 2500 2501 #ifdef SQLITE_DEBUG 2502 2503 /* 2504 ** This function is used as part of the big assert() procedure implemented by 2505 ** fts5AssertMultiIterSetup(). It ensures that the result currently stored 2506 ** in *pRes is the correct result of comparing the current positions of the 2507 ** two iterators. 2508 */ 2509 static void fts5AssertComparisonResult( 2510 Fts5Iter *pIter, 2511 Fts5SegIter *p1, 2512 Fts5SegIter *p2, 2513 Fts5CResult *pRes 2514 ){ 2515 int i1 = p1 - pIter->aSeg; 2516 int i2 = p2 - pIter->aSeg; 2517 2518 if( p1->pLeaf || p2->pLeaf ){ 2519 if( p1->pLeaf==0 ){ 2520 assert( pRes->iFirst==i2 ); 2521 }else if( p2->pLeaf==0 ){ 2522 assert( pRes->iFirst==i1 ); 2523 }else{ 2524 int nMin = MIN(p1->term.n, p2->term.n); 2525 int res = memcmp(p1->term.p, p2->term.p, nMin); 2526 if( res==0 ) res = p1->term.n - p2->term.n; 2527 2528 if( res==0 ){ 2529 assert( pRes->bTermEq==1 ); 2530 assert( p1->iRowid!=p2->iRowid ); 2531 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1; 2532 }else{ 2533 assert( pRes->bTermEq==0 ); 2534 } 2535 2536 if( res<0 ){ 2537 assert( pRes->iFirst==i1 ); 2538 }else{ 2539 assert( pRes->iFirst==i2 ); 2540 } 2541 } 2542 } 2543 } 2544 2545 /* 2546 ** This function is a no-op unless SQLITE_DEBUG is defined when this module 2547 ** is compiled. In that case, this function is essentially an assert() 2548 ** statement used to verify that the contents of the pIter->aFirst[] array 2549 ** are correct. 2550 */ 2551 static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){ 2552 if( p->rc==SQLITE_OK ){ 2553 Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; 2554 int i; 2555 2556 assert( (pFirst->pLeaf==0)==pIter->base.bEof ); 2557 2558 /* Check that pIter->iSwitchRowid is set correctly. */ 2559 for(i=0; i<pIter->nSeg; i++){ 2560 Fts5SegIter *p1 = &pIter->aSeg[i]; 2561 assert( p1==pFirst 2562 || p1->pLeaf==0 2563 || fts5BufferCompare(&pFirst->term, &p1->term) 2564 || p1->iRowid==pIter->iSwitchRowid 2565 || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev 2566 ); 2567 } 2568 2569 for(i=0; i<pIter->nSeg; i+=2){ 2570 Fts5SegIter *p1 = &pIter->aSeg[i]; 2571 Fts5SegIter *p2 = &pIter->aSeg[i+1]; 2572 Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2]; 2573 fts5AssertComparisonResult(pIter, p1, p2, pRes); 2574 } 2575 2576 for(i=1; i<(pIter->nSeg / 2); i+=2){ 2577 Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ]; 2578 Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ]; 2579 Fts5CResult *pRes = &pIter->aFirst[i]; 2580 fts5AssertComparisonResult(pIter, p1, p2, pRes); 2581 } 2582 } 2583 } 2584 #else 2585 # define fts5AssertMultiIterSetup(x,y) 2586 #endif 2587 2588 /* 2589 ** Do the comparison necessary to populate pIter->aFirst[iOut]. 2590 ** 2591 ** If the returned value is non-zero, then it is the index of an entry 2592 ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing 2593 ** to a key that is a duplicate of another, higher priority, 2594 ** segment-iterator in the pSeg->aSeg[] array. 2595 */ 2596 static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){ 2597 int i1; /* Index of left-hand Fts5SegIter */ 2598 int i2; /* Index of right-hand Fts5SegIter */ 2599 int iRes; 2600 Fts5SegIter *p1; /* Left-hand Fts5SegIter */ 2601 Fts5SegIter *p2; /* Right-hand Fts5SegIter */ 2602 Fts5CResult *pRes = &pIter->aFirst[iOut]; 2603 2604 assert( iOut<pIter->nSeg && iOut>0 ); 2605 assert( pIter->bRev==0 || pIter->bRev==1 ); 2606 2607 if( iOut>=(pIter->nSeg/2) ){ 2608 i1 = (iOut - pIter->nSeg/2) * 2; 2609 i2 = i1 + 1; 2610 }else{ 2611 i1 = pIter->aFirst[iOut*2].iFirst; 2612 i2 = pIter->aFirst[iOut*2+1].iFirst; 2613 } 2614 p1 = &pIter->aSeg[i1]; 2615 p2 = &pIter->aSeg[i2]; 2616 2617 pRes->bTermEq = 0; 2618 if( p1->pLeaf==0 ){ /* If p1 is at EOF */ 2619 iRes = i2; 2620 }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */ 2621 iRes = i1; 2622 }else{ 2623 int res = fts5BufferCompare(&p1->term, &p2->term); 2624 if( res==0 ){ 2625 assert( i2>i1 ); 2626 assert( i2!=0 ); 2627 pRes->bTermEq = 1; 2628 if( p1->iRowid==p2->iRowid ){ 2629 p1->bDel = p2->bDel; 2630 return i2; 2631 } 2632 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1; 2633 } 2634 assert( res!=0 ); 2635 if( res<0 ){ 2636 iRes = i1; 2637 }else{ 2638 iRes = i2; 2639 } 2640 } 2641 2642 pRes->iFirst = (u16)iRes; 2643 return 0; 2644 } 2645 2646 /* 2647 ** Move the seg-iter so that it points to the first rowid on page iLeafPgno. 2648 ** It is an error if leaf iLeafPgno does not exist or contains no rowids. 2649 */ 2650 static void fts5SegIterGotoPage( 2651 Fts5Index *p, /* FTS5 backend object */ 2652 Fts5SegIter *pIter, /* Iterator to advance */ 2653 int iLeafPgno 2654 ){ 2655 assert( iLeafPgno>pIter->iLeafPgno ); 2656 2657 if( iLeafPgno>pIter->pSeg->pgnoLast ){ 2658 p->rc = FTS5_CORRUPT; 2659 }else{ 2660 fts5DataRelease(pIter->pNextLeaf); 2661 pIter->pNextLeaf = 0; 2662 pIter->iLeafPgno = iLeafPgno-1; 2663 fts5SegIterNextPage(p, pIter); 2664 assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); 2665 2666 if( p->rc==SQLITE_OK ){ 2667 int iOff; 2668 u8 *a = pIter->pLeaf->p; 2669 int n = pIter->pLeaf->szLeaf; 2670 2671 iOff = fts5LeafFirstRowidOff(pIter->pLeaf); 2672 if( iOff<4 || iOff>=n ){ 2673 p->rc = FTS5_CORRUPT; 2674 }else{ 2675 iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); 2676 pIter->iLeafOffset = iOff; 2677 fts5SegIterLoadNPos(p, pIter); 2678 } 2679 } 2680 } 2681 } 2682 2683 /* 2684 ** Advance the iterator passed as the second argument until it is at or 2685 ** past rowid iFrom. Regardless of the value of iFrom, the iterator is 2686 ** always advanced at least once. 2687 */ 2688 static void fts5SegIterNextFrom( 2689 Fts5Index *p, /* FTS5 backend object */ 2690 Fts5SegIter *pIter, /* Iterator to advance */ 2691 i64 iMatch /* Advance iterator at least this far */ 2692 ){ 2693 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); 2694 Fts5DlidxIter *pDlidx = pIter->pDlidx; 2695 int iLeafPgno = pIter->iLeafPgno; 2696 int bMove = 1; 2697 2698 assert( pIter->flags & FTS5_SEGITER_ONETERM ); 2699 assert( pIter->pDlidx ); 2700 assert( pIter->pLeaf ); 2701 2702 if( bRev==0 ){ 2703 while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){ 2704 iLeafPgno = fts5DlidxIterPgno(pDlidx); 2705 fts5DlidxIterNext(p, pDlidx); 2706 } 2707 assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc ); 2708 if( iLeafPgno>pIter->iLeafPgno ){ 2709 fts5SegIterGotoPage(p, pIter, iLeafPgno); 2710 bMove = 0; 2711 } 2712 }else{ 2713 assert( pIter->pNextLeaf==0 ); 2714 assert( iMatch<pIter->iRowid ); 2715 while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){ 2716 fts5DlidxIterPrev(p, pDlidx); 2717 } 2718 iLeafPgno = fts5DlidxIterPgno(pDlidx); 2719 2720 assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno ); 2721 2722 if( iLeafPgno<pIter->iLeafPgno ){ 2723 pIter->iLeafPgno = iLeafPgno+1; 2724 fts5SegIterReverseNewPage(p, pIter); 2725 bMove = 0; 2726 } 2727 } 2728 2729 do{ 2730 if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0); 2731 if( pIter->pLeaf==0 ) break; 2732 if( bRev==0 && pIter->iRowid>=iMatch ) break; 2733 if( bRev!=0 && pIter->iRowid<=iMatch ) break; 2734 bMove = 1; 2735 }while( p->rc==SQLITE_OK ); 2736 } 2737 2738 2739 /* 2740 ** Free the iterator object passed as the second argument. 2741 */ 2742 static void fts5MultiIterFree(Fts5Iter *pIter){ 2743 if( pIter ){ 2744 int i; 2745 for(i=0; i<pIter->nSeg; i++){ 2746 fts5SegIterClear(&pIter->aSeg[i]); 2747 } 2748 fts5StructureRelease(pIter->pStruct); 2749 fts5BufferFree(&pIter->poslist); 2750 sqlite3_free(pIter); 2751 } 2752 } 2753 2754 static void fts5MultiIterAdvanced( 2755 Fts5Index *p, /* FTS5 backend to iterate within */ 2756 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ 2757 int iChanged, /* Index of sub-iterator just advanced */ 2758 int iMinset /* Minimum entry in aFirst[] to set */ 2759 ){ 2760 int i; 2761 for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ 2762 int iEq; 2763 if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ 2764 Fts5SegIter *pSeg = &pIter->aSeg[iEq]; 2765 assert( p->rc==SQLITE_OK ); 2766 pSeg->xNext(p, pSeg, 0); 2767 i = pIter->nSeg + iEq; 2768 } 2769 } 2770 } 2771 2772 /* 2773 ** Sub-iterator iChanged of iterator pIter has just been advanced. It still 2774 ** points to the same term though - just a different rowid. This function 2775 ** attempts to update the contents of the pIter->aFirst[] accordingly. 2776 ** If it does so successfully, 0 is returned. Otherwise 1. 2777 ** 2778 ** If non-zero is returned, the caller should call fts5MultiIterAdvanced() 2779 ** on the iterator instead. That function does the same as this one, except 2780 ** that it deals with more complicated cases as well. 2781 */ 2782 static int fts5MultiIterAdvanceRowid( 2783 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ 2784 int iChanged, /* Index of sub-iterator just advanced */ 2785 Fts5SegIter **ppFirst 2786 ){ 2787 Fts5SegIter *pNew = &pIter->aSeg[iChanged]; 2788 2789 if( pNew->iRowid==pIter->iSwitchRowid 2790 || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev 2791 ){ 2792 int i; 2793 Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001]; 2794 pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64; 2795 for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){ 2796 Fts5CResult *pRes = &pIter->aFirst[i]; 2797 2798 assert( pNew->pLeaf ); 2799 assert( pRes->bTermEq==0 || pOther->pLeaf ); 2800 2801 if( pRes->bTermEq ){ 2802 if( pNew->iRowid==pOther->iRowid ){ 2803 return 1; 2804 }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){ 2805 pIter->iSwitchRowid = pOther->iRowid; 2806 pNew = pOther; 2807 }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){ 2808 pIter->iSwitchRowid = pOther->iRowid; 2809 } 2810 } 2811 pRes->iFirst = (u16)(pNew - pIter->aSeg); 2812 if( i==1 ) break; 2813 2814 pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ]; 2815 } 2816 } 2817 2818 *ppFirst = pNew; 2819 return 0; 2820 } 2821 2822 /* 2823 ** Set the pIter->bEof variable based on the state of the sub-iterators. 2824 */ 2825 static void fts5MultiIterSetEof(Fts5Iter *pIter){ 2826 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; 2827 pIter->base.bEof = pSeg->pLeaf==0; 2828 pIter->iSwitchRowid = pSeg->iRowid; 2829 } 2830 2831 /* 2832 ** Move the iterator to the next entry. 2833 ** 2834 ** If an error occurs, an error code is left in Fts5Index.rc. It is not 2835 ** considered an error if the iterator reaches EOF, or if it is already at 2836 ** EOF when this function is called. 2837 */ 2838 static void fts5MultiIterNext( 2839 Fts5Index *p, 2840 Fts5Iter *pIter, 2841 int bFrom, /* True if argument iFrom is valid */ 2842 i64 iFrom /* Advance at least as far as this */ 2843 ){ 2844 int bUseFrom = bFrom; 2845 assert( pIter->base.bEof==0 ); 2846 while( p->rc==SQLITE_OK ){ 2847 int iFirst = pIter->aFirst[1].iFirst; 2848 int bNewTerm = 0; 2849 Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; 2850 assert( p->rc==SQLITE_OK ); 2851 if( bUseFrom && pSeg->pDlidx ){ 2852 fts5SegIterNextFrom(p, pSeg, iFrom); 2853 }else{ 2854 pSeg->xNext(p, pSeg, &bNewTerm); 2855 } 2856 2857 if( pSeg->pLeaf==0 || bNewTerm 2858 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg) 2859 ){ 2860 fts5MultiIterAdvanced(p, pIter, iFirst, 1); 2861 fts5MultiIterSetEof(pIter); 2862 pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; 2863 if( pSeg->pLeaf==0 ) return; 2864 } 2865 2866 fts5AssertMultiIterSetup(p, pIter); 2867 assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf ); 2868 if( pIter->bSkipEmpty==0 || pSeg->nPos ){ 2869 pIter->xSetOutputs(pIter, pSeg); 2870 return; 2871 } 2872 bUseFrom = 0; 2873 } 2874 } 2875 2876 static void fts5MultiIterNext2( 2877 Fts5Index *p, 2878 Fts5Iter *pIter, 2879 int *pbNewTerm /* OUT: True if *might* be new term */ 2880 ){ 2881 assert( pIter->bSkipEmpty ); 2882 if( p->rc==SQLITE_OK ){ 2883 *pbNewTerm = 0; 2884 do{ 2885 int iFirst = pIter->aFirst[1].iFirst; 2886 Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; 2887 int bNewTerm = 0; 2888 2889 assert( p->rc==SQLITE_OK ); 2890 pSeg->xNext(p, pSeg, &bNewTerm); 2891 if( pSeg->pLeaf==0 || bNewTerm 2892 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg) 2893 ){ 2894 fts5MultiIterAdvanced(p, pIter, iFirst, 1); 2895 fts5MultiIterSetEof(pIter); 2896 *pbNewTerm = 1; 2897 } 2898 fts5AssertMultiIterSetup(p, pIter); 2899 2900 }while( fts5MultiIterIsEmpty(p, pIter) ); 2901 } 2902 } 2903 2904 static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){ 2905 UNUSED_PARAM2(pUnused1, pUnused2); 2906 } 2907 2908 static Fts5Iter *fts5MultiIterAlloc( 2909 Fts5Index *p, /* FTS5 backend to iterate within */ 2910 int nSeg 2911 ){ 2912 Fts5Iter *pNew; 2913 int nSlot; /* Power of two >= nSeg */ 2914 2915 for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2); 2916 pNew = fts5IdxMalloc(p, 2917 sizeof(Fts5Iter) + /* pNew */ 2918 sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */ 2919 sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */ 2920 ); 2921 if( pNew ){ 2922 pNew->nSeg = nSlot; 2923 pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; 2924 pNew->pIndex = p; 2925 pNew->xSetOutputs = fts5IterSetOutputs_Noop; 2926 } 2927 return pNew; 2928 } 2929 2930 static void fts5PoslistCallback( 2931 Fts5Index *pUnused, 2932 void *pContext, 2933 const u8 *pChunk, int nChunk 2934 ){ 2935 UNUSED_PARAM(pUnused); 2936 assert_nc( nChunk>=0 ); 2937 if( nChunk>0 ){ 2938 fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk); 2939 } 2940 } 2941 2942 typedef struct PoslistCallbackCtx PoslistCallbackCtx; 2943 struct PoslistCallbackCtx { 2944 Fts5Buffer *pBuf; /* Append to this buffer */ 2945 Fts5Colset *pColset; /* Restrict matches to this column */ 2946 int eState; /* See above */ 2947 }; 2948 2949 typedef struct PoslistOffsetsCtx PoslistOffsetsCtx; 2950 struct PoslistOffsetsCtx { 2951 Fts5Buffer *pBuf; /* Append to this buffer */ 2952 Fts5Colset *pColset; /* Restrict matches to this column */ 2953 int iRead; 2954 int iWrite; 2955 }; 2956 2957 /* 2958 ** TODO: Make this more efficient! 2959 */ 2960 static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){ 2961 int i; 2962 for(i=0; i<pColset->nCol; i++){ 2963 if( pColset->aiCol[i]==iCol ) return 1; 2964 } 2965 return 0; 2966 } 2967 2968 static void fts5PoslistOffsetsCallback( 2969 Fts5Index *pUnused, 2970 void *pContext, 2971 const u8 *pChunk, int nChunk 2972 ){ 2973 PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext; 2974 UNUSED_PARAM(pUnused); 2975 assert_nc( nChunk>=0 ); 2976 if( nChunk>0 ){ 2977 int i = 0; 2978 while( i<nChunk ){ 2979 int iVal; 2980 i += fts5GetVarint32(&pChunk[i], iVal); 2981 iVal += pCtx->iRead - 2; 2982 pCtx->iRead = iVal; 2983 if( fts5IndexColsetTest(pCtx->pColset, iVal) ){ 2984 fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite); 2985 pCtx->iWrite = iVal; 2986 } 2987 } 2988 } 2989 } 2990 2991 static void fts5PoslistFilterCallback( 2992 Fts5Index *pUnused, 2993 void *pContext, 2994 const u8 *pChunk, int nChunk 2995 ){ 2996 PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext; 2997 UNUSED_PARAM(pUnused); 2998 assert_nc( nChunk>=0 ); 2999 if( nChunk>0 ){ 3000 /* Search through to find the first varint with value 1. This is the 3001 ** start of the next columns hits. */ 3002 int i = 0; 3003 int iStart = 0; 3004 3005 if( pCtx->eState==2 ){ 3006 int iCol; 3007 fts5FastGetVarint32(pChunk, i, iCol); 3008 if( fts5IndexColsetTest(pCtx->pColset, iCol) ){ 3009 pCtx->eState = 1; 3010 fts5BufferSafeAppendVarint(pCtx->pBuf, 1); 3011 }else{ 3012 pCtx->eState = 0; 3013 } 3014 } 3015 3016 do { 3017 while( i<nChunk && pChunk[i]!=0x01 ){ 3018 while( pChunk[i] & 0x80 ) i++; 3019 i++; 3020 } 3021 if( pCtx->eState ){ 3022 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart); 3023 } 3024 if( i<nChunk ){ 3025 int iCol; 3026 iStart = i; 3027 i++; 3028 if( i>=nChunk ){ 3029 pCtx->eState = 2; 3030 }else{ 3031 fts5FastGetVarint32(pChunk, i, iCol); 3032 pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol); 3033 if( pCtx->eState ){ 3034 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart); 3035 iStart = i; 3036 } 3037 } 3038 } 3039 }while( i<nChunk ); 3040 } 3041 } 3042 3043 static void fts5ChunkIterate( 3044 Fts5Index *p, /* Index object */ 3045 Fts5SegIter *pSeg, /* Poslist of this iterator */ 3046 void *pCtx, /* Context pointer for xChunk callback */ 3047 void (*xChunk)(Fts5Index*, void*, const u8*, int) 3048 ){ 3049 int nRem = pSeg->nPos; /* Number of bytes still to come */ 3050 Fts5Data *pData = 0; 3051 u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; 3052 int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset); 3053 int pgno = pSeg->iLeafPgno; 3054 int pgnoSave = 0; 3055 3056 /* This function does notmwork with detail=none databases. */ 3057 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE ); 3058 3059 if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){ 3060 pgnoSave = pgno+1; 3061 } 3062 3063 while( 1 ){ 3064 xChunk(p, pCtx, pChunk, nChunk); 3065 nRem -= nChunk; 3066 fts5DataRelease(pData); 3067 if( nRem<=0 ){ 3068 break; 3069 }else{ 3070 pgno++; 3071 pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno)); 3072 if( pData==0 ) break; 3073 pChunk = &pData->p[4]; 3074 nChunk = MIN(nRem, pData->szLeaf - 4); 3075 if( pgno==pgnoSave ){ 3076 assert( pSeg->pNextLeaf==0 ); 3077 pSeg->pNextLeaf = pData; 3078 pData = 0; 3079 } 3080 } 3081 } 3082 } 3083 3084 /* 3085 ** Iterator pIter currently points to a valid entry (not EOF). This 3086 ** function appends the position list data for the current entry to 3087 ** buffer pBuf. It does not make a copy of the position-list size 3088 ** field. 3089 */ 3090 static void fts5SegiterPoslist( 3091 Fts5Index *p, 3092 Fts5SegIter *pSeg, 3093 Fts5Colset *pColset, 3094 Fts5Buffer *pBuf 3095 ){ 3096 if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos) ){ 3097 if( pColset==0 ){ 3098 fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback); 3099 }else{ 3100 if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){ 3101 PoslistCallbackCtx sCtx; 3102 sCtx.pBuf = pBuf; 3103 sCtx.pColset = pColset; 3104 sCtx.eState = fts5IndexColsetTest(pColset, 0); 3105 assert( sCtx.eState==0 || sCtx.eState==1 ); 3106 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback); 3107 }else{ 3108 PoslistOffsetsCtx sCtx; 3109 memset(&sCtx, 0, sizeof(sCtx)); 3110 sCtx.pBuf = pBuf; 3111 sCtx.pColset = pColset; 3112 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback); 3113 } 3114 } 3115 } 3116 } 3117 3118 /* 3119 ** IN/OUT parameter (*pa) points to a position list n bytes in size. If 3120 ** the position list contains entries for column iCol, then (*pa) is set 3121 ** to point to the sub-position-list for that column and the number of 3122 ** bytes in it returned. Or, if the argument position list does not 3123 ** contain any entries for column iCol, return 0. 3124 */ 3125 static int fts5IndexExtractCol( 3126 const u8 **pa, /* IN/OUT: Pointer to poslist */ 3127 int n, /* IN: Size of poslist in bytes */ 3128 int iCol /* Column to extract from poslist */ 3129 ){ 3130 int iCurrent = 0; /* Anything before the first 0x01 is col 0 */ 3131 const u8 *p = *pa; 3132 const u8 *pEnd = &p[n]; /* One byte past end of position list */ 3133 3134 while( iCol>iCurrent ){ 3135 /* Advance pointer p until it points to pEnd or an 0x01 byte that is 3136 ** not part of a varint. Note that it is not possible for a negative 3137 ** or extremely large varint to occur within an uncorrupted position 3138 ** list. So the last byte of each varint may be assumed to have a clear 3139 ** 0x80 bit. */ 3140 while( *p!=0x01 ){ 3141 while( *p++ & 0x80 ); 3142 if( p>=pEnd ) return 0; 3143 } 3144 *pa = p++; 3145 iCurrent = *p++; 3146 if( iCurrent & 0x80 ){ 3147 p--; 3148 p += fts5GetVarint32(p, iCurrent); 3149 } 3150 } 3151 if( iCol!=iCurrent ) return 0; 3152 3153 /* Advance pointer p until it points to pEnd or an 0x01 byte that is 3154 ** not part of a varint */ 3155 while( p<pEnd && *p!=0x01 ){ 3156 while( *p++ & 0x80 ); 3157 } 3158 3159 return p - (*pa); 3160 } 3161 3162 static void fts5IndexExtractColset( 3163 int *pRc, 3164 Fts5Colset *pColset, /* Colset to filter on */ 3165 const u8 *pPos, int nPos, /* Position list */ 3166 Fts5Buffer *pBuf /* Output buffer */ 3167 ){ 3168 if( *pRc==SQLITE_OK ){ 3169 int i; 3170 fts5BufferZero(pBuf); 3171 for(i=0; i<pColset->nCol; i++){ 3172 const u8 *pSub = pPos; 3173 int nSub = fts5IndexExtractCol(&pSub, nPos, pColset->aiCol[i]); 3174 if( nSub ){ 3175 fts5BufferAppendBlob(pRc, pBuf, nSub, pSub); 3176 } 3177 } 3178 } 3179 } 3180 3181 /* 3182 ** xSetOutputs callback used by detail=none tables. 3183 */ 3184 static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){ 3185 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE ); 3186 pIter->base.iRowid = pSeg->iRowid; 3187 pIter->base.nData = pSeg->nPos; 3188 } 3189 3190 /* 3191 ** xSetOutputs callback used by detail=full and detail=col tables when no 3192 ** column filters are specified. 3193 */ 3194 static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){ 3195 pIter->base.iRowid = pSeg->iRowid; 3196 pIter->base.nData = pSeg->nPos; 3197 3198 assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE ); 3199 assert( pIter->pColset==0 ); 3200 3201 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ 3202 /* All data is stored on the current page. Populate the output 3203 ** variables to point into the body of the page object. */ 3204 pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset]; 3205 }else{ 3206 /* The data is distributed over two or more pages. Copy it into the 3207 ** Fts5Iter.poslist buffer and then set the output pointer to point 3208 ** to this buffer. */ 3209 fts5BufferZero(&pIter->poslist); 3210 fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist); 3211 pIter->base.pData = pIter->poslist.p; 3212 } 3213 } 3214 3215 /* 3216 ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match 3217 ** against no columns at all). 3218 */ 3219 static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){ 3220 UNUSED_PARAM(pSeg); 3221 pIter->base.nData = 0; 3222 } 3223 3224 /* 3225 ** xSetOutputs callback used by detail=col when there is a column filter 3226 ** and there are 100 or more columns. Also called as a fallback from 3227 ** fts5IterSetOutputs_Col100 if the column-list spans more than one page. 3228 */ 3229 static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){ 3230 fts5BufferZero(&pIter->poslist); 3231 fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist); 3232 pIter->base.iRowid = pSeg->iRowid; 3233 pIter->base.pData = pIter->poslist.p; 3234 pIter->base.nData = pIter->poslist.n; 3235 } 3236 3237 /* 3238 ** xSetOutputs callback used when: 3239 ** 3240 ** * detail=col, 3241 ** * there is a column filter, and 3242 ** * the table contains 100 or fewer columns. 3243 ** 3244 ** The last point is to ensure all column numbers are stored as 3245 ** single-byte varints. 3246 */ 3247 static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){ 3248 3249 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS ); 3250 assert( pIter->pColset ); 3251 3252 if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){ 3253 fts5IterSetOutputs_Col(pIter, pSeg); 3254 }else{ 3255 u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset]; 3256 u8 *pEnd = (u8*)&a[pSeg->nPos]; 3257 int iPrev = 0; 3258 int *aiCol = pIter->pColset->aiCol; 3259 int *aiColEnd = &aiCol[pIter->pColset->nCol]; 3260 3261 u8 *aOut = pIter->poslist.p; 3262 int iPrevOut = 0; 3263 3264 pIter->base.iRowid = pSeg->iRowid; 3265 3266 while( a<pEnd ){ 3267 iPrev += (int)a++[0] - 2; 3268 while( *aiCol<iPrev ){ 3269 aiCol++; 3270 if( aiCol==aiColEnd ) goto setoutputs_col_out; 3271 } 3272 if( *aiCol==iPrev ){ 3273 *aOut++ = (u8)((iPrev - iPrevOut) + 2); 3274 iPrevOut = iPrev; 3275 } 3276 } 3277 3278 setoutputs_col_out: 3279 pIter->base.pData = pIter->poslist.p; 3280 pIter->base.nData = aOut - pIter->poslist.p; 3281 } 3282 } 3283 3284 /* 3285 ** xSetOutputs callback used by detail=full when there is a column filter. 3286 */ 3287 static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){ 3288 Fts5Colset *pColset = pIter->pColset; 3289 pIter->base.iRowid = pSeg->iRowid; 3290 3291 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL ); 3292 assert( pColset ); 3293 3294 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ 3295 /* All data is stored on the current page. Populate the output 3296 ** variables to point into the body of the page object. */ 3297 const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset]; 3298 if( pColset->nCol==1 ){ 3299 pIter->base.nData = fts5IndexExtractCol(&a, pSeg->nPos,pColset->aiCol[0]); 3300 pIter->base.pData = a; 3301 }else{ 3302 int *pRc = &pIter->pIndex->rc; 3303 fts5BufferZero(&pIter->poslist); 3304 fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, &pIter->poslist); 3305 pIter->base.pData = pIter->poslist.p; 3306 pIter->base.nData = pIter->poslist.n; 3307 } 3308 }else{ 3309 /* The data is distributed over two or more pages. Copy it into the 3310 ** Fts5Iter.poslist buffer and then set the output pointer to point 3311 ** to this buffer. */ 3312 fts5BufferZero(&pIter->poslist); 3313 fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist); 3314 pIter->base.pData = pIter->poslist.p; 3315 pIter->base.nData = pIter->poslist.n; 3316 } 3317 } 3318 3319 static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){ 3320 if( *pRc==SQLITE_OK ){ 3321 Fts5Config *pConfig = pIter->pIndex->pConfig; 3322 if( pConfig->eDetail==FTS5_DETAIL_NONE ){ 3323 pIter->xSetOutputs = fts5IterSetOutputs_None; 3324 } 3325 3326 else if( pIter->pColset==0 ){ 3327 pIter->xSetOutputs = fts5IterSetOutputs_Nocolset; 3328 } 3329 3330 else if( pIter->pColset->nCol==0 ){ 3331 pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset; 3332 } 3333 3334 else if( pConfig->eDetail==FTS5_DETAIL_FULL ){ 3335 pIter->xSetOutputs = fts5IterSetOutputs_Full; 3336 } 3337 3338 else{ 3339 assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS ); 3340 if( pConfig->nCol<=100 ){ 3341 pIter->xSetOutputs = fts5IterSetOutputs_Col100; 3342 sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol); 3343 }else{ 3344 pIter->xSetOutputs = fts5IterSetOutputs_Col; 3345 } 3346 } 3347 } 3348 } 3349 3350 3351 /* 3352 ** Allocate a new Fts5Iter object. 3353 ** 3354 ** The new object will be used to iterate through data in structure pStruct. 3355 ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel 3356 ** is zero or greater, data from the first nSegment segments on level iLevel 3357 ** is merged. 3358 ** 3359 ** The iterator initially points to the first term/rowid entry in the 3360 ** iterated data. 3361 */ 3362 static void fts5MultiIterNew( 3363 Fts5Index *p, /* FTS5 backend to iterate within */ 3364 Fts5Structure *pStruct, /* Structure of specific index */ 3365 int flags, /* FTS5INDEX_QUERY_XXX flags */ 3366 Fts5Colset *pColset, /* Colset to filter on (or NULL) */ 3367 const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ 3368 int iLevel, /* Level to iterate (-1 for all) */ 3369 int nSegment, /* Number of segments to merge (iLevel>=0) */ 3370 Fts5Iter **ppOut /* New object */ 3371 ){ 3372 int nSeg = 0; /* Number of segment-iters in use */ 3373 int iIter = 0; /* */ 3374 int iSeg; /* Used to iterate through segments */ 3375 Fts5StructureLevel *pLvl; 3376 Fts5Iter *pNew; 3377 3378 assert( (pTerm==0 && nTerm==0) || iLevel<0 ); 3379 3380 /* Allocate space for the new multi-seg-iterator. */ 3381 if( p->rc==SQLITE_OK ){ 3382 if( iLevel<0 ){ 3383 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); 3384 nSeg = pStruct->nSegment; 3385 nSeg += (p->pHash ? 1 : 0); 3386 }else{ 3387 nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); 3388 } 3389 } 3390 *ppOut = pNew = fts5MultiIterAlloc(p, nSeg); 3391 if( pNew==0 ) return; 3392 pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC)); 3393 pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY)); 3394 pNew->pStruct = pStruct; 3395 pNew->pColset = pColset; 3396 fts5StructureRef(pStruct); 3397 if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){ 3398 fts5IterSetOutputCb(&p->rc, pNew); 3399 } 3400 3401 /* Initialize each of the component segment iterators. */ 3402 if( p->rc==SQLITE_OK ){ 3403 if( iLevel<0 ){ 3404 Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; 3405 if( p->pHash ){ 3406 /* Add a segment iterator for the current contents of the hash table. */ 3407 Fts5SegIter *pIter = &pNew->aSeg[iIter++]; 3408 fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter); 3409 } 3410 for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){ 3411 for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){ 3412 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; 3413 Fts5SegIter *pIter = &pNew->aSeg[iIter++]; 3414 if( pTerm==0 ){ 3415 fts5SegIterInit(p, pSeg, pIter); 3416 }else{ 3417 fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter); 3418 } 3419 } 3420 } 3421 }else{ 3422 pLvl = &pStruct->aLevel[iLevel]; 3423 for(iSeg=nSeg-1; iSeg>=0; iSeg--){ 3424 fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); 3425 } 3426 } 3427 assert( iIter==nSeg ); 3428 } 3429 3430 /* If the above was successful, each component iterators now points 3431 ** to the first entry in its segment. In this case initialize the 3432 ** aFirst[] array. Or, if an error has occurred, free the iterator 3433 ** object and set the output variable to NULL. */ 3434 if( p->rc==SQLITE_OK ){ 3435 for(iIter=pNew->nSeg-1; iIter>0; iIter--){ 3436 int iEq; 3437 if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ 3438 Fts5SegIter *pSeg = &pNew->aSeg[iEq]; 3439 if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0); 3440 fts5MultiIterAdvanced(p, pNew, iEq, iIter); 3441 } 3442 } 3443 fts5MultiIterSetEof(pNew); 3444 fts5AssertMultiIterSetup(p, pNew); 3445 3446 if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){ 3447 fts5MultiIterNext(p, pNew, 0, 0); 3448 }else if( pNew->base.bEof==0 ){ 3449 Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst]; 3450 pNew->xSetOutputs(pNew, pSeg); 3451 } 3452 3453 }else{ 3454 fts5MultiIterFree(pNew); 3455 *ppOut = 0; 3456 } 3457 } 3458 3459 /* 3460 ** Create an Fts5Iter that iterates through the doclist provided 3461 ** as the second argument. 3462 */ 3463 static void fts5MultiIterNew2( 3464 Fts5Index *p, /* FTS5 backend to iterate within */ 3465 Fts5Data *pData, /* Doclist to iterate through */ 3466 int bDesc, /* True for descending rowid order */ 3467 Fts5Iter **ppOut /* New object */ 3468 ){ 3469 Fts5Iter *pNew; 3470 pNew = fts5MultiIterAlloc(p, 2); 3471 if( pNew ){ 3472 Fts5SegIter *pIter = &pNew->aSeg[1]; 3473 3474 pIter->flags = FTS5_SEGITER_ONETERM; 3475 if( pData->szLeaf>0 ){ 3476 pIter->pLeaf = pData; 3477 pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid); 3478 pIter->iEndofDoclist = pData->nn; 3479 pNew->aFirst[1].iFirst = 1; 3480 if( bDesc ){ 3481 pNew->bRev = 1; 3482 pIter->flags |= FTS5_SEGITER_REVERSE; 3483 fts5SegIterReverseInitPage(p, pIter); 3484 }else{ 3485 fts5SegIterLoadNPos(p, pIter); 3486 } 3487 pData = 0; 3488 }else{ 3489 pNew->base.bEof = 1; 3490 } 3491 fts5SegIterSetNext(p, pIter); 3492 3493 *ppOut = pNew; 3494 } 3495 3496 fts5DataRelease(pData); 3497 } 3498 3499 /* 3500 ** Return true if the iterator is at EOF or if an error has occurred. 3501 ** False otherwise. 3502 */ 3503 static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){ 3504 assert( p->rc 3505 || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof 3506 ); 3507 return (p->rc || pIter->base.bEof); 3508 } 3509 3510 /* 3511 ** Return the rowid of the entry that the iterator currently points 3512 ** to. If the iterator points to EOF when this function is called the 3513 ** results are undefined. 3514 */ 3515 static i64 fts5MultiIterRowid(Fts5Iter *pIter){ 3516 assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf ); 3517 return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid; 3518 } 3519 3520 /* 3521 ** Move the iterator to the next entry at or following iMatch. 3522 */ 3523 static void fts5MultiIterNextFrom( 3524 Fts5Index *p, 3525 Fts5Iter *pIter, 3526 i64 iMatch 3527 ){ 3528 while( 1 ){ 3529 i64 iRowid; 3530 fts5MultiIterNext(p, pIter, 1, iMatch); 3531 if( fts5MultiIterEof(p, pIter) ) break; 3532 iRowid = fts5MultiIterRowid(pIter); 3533 if( pIter->bRev==0 && iRowid>=iMatch ) break; 3534 if( pIter->bRev!=0 && iRowid<=iMatch ) break; 3535 } 3536 } 3537 3538 /* 3539 ** Return a pointer to a buffer containing the term associated with the 3540 ** entry that the iterator currently points to. 3541 */ 3542 static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){ 3543 Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; 3544 *pn = p->term.n; 3545 return p->term.p; 3546 } 3547 3548 /* 3549 ** Allocate a new segment-id for the structure pStruct. The new segment 3550 ** id must be between 1 and 65335 inclusive, and must not be used by 3551 ** any currently existing segment. If a free segment id cannot be found, 3552 ** SQLITE_FULL is returned. 3553 ** 3554 ** If an error has already occurred, this function is a no-op. 0 is 3555 ** returned in this case. 3556 */ 3557 static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ 3558 int iSegid = 0; 3559 3560 if( p->rc==SQLITE_OK ){ 3561 if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){ 3562 p->rc = SQLITE_FULL; 3563 }else{ 3564 /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following 3565 ** array is 63 elements, or 252 bytes, in size. */ 3566 u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32]; 3567 int iLvl, iSeg; 3568 int i; 3569 u32 mask; 3570 memset(aUsed, 0, sizeof(aUsed)); 3571 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ 3572 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ 3573 int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid; 3574 if( iId<=FTS5_MAX_SEGMENT ){ 3575 aUsed[(iId-1) / 32] |= 1 << ((iId-1) % 32); 3576 } 3577 } 3578 } 3579 3580 for(i=0; aUsed[i]==0xFFFFFFFF; i++); 3581 mask = aUsed[i]; 3582 for(iSegid=0; mask & (1 << iSegid); iSegid++); 3583 iSegid += 1 + i*32; 3584 3585 #ifdef SQLITE_DEBUG 3586 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ 3587 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ 3588 assert( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ); 3589 } 3590 } 3591 assert( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT ); 3592 3593 { 3594 sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p); 3595 if( p->rc==SQLITE_OK ){ 3596 u8 aBlob[2] = {0xff, 0xff}; 3597 sqlite3_bind_int(pIdxSelect, 1, iSegid); 3598 sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC); 3599 assert( sqlite3_step(pIdxSelect)!=SQLITE_ROW ); 3600 p->rc = sqlite3_reset(pIdxSelect); 3601 } 3602 } 3603 #endif 3604 } 3605 } 3606 3607 return iSegid; 3608 } 3609 3610 /* 3611 ** Discard all data currently cached in the hash-tables. 3612 */ 3613 static void fts5IndexDiscardData(Fts5Index *p){ 3614 assert( p->pHash || p->nPendingData==0 ); 3615 if( p->pHash ){ 3616 sqlite3Fts5HashClear(p->pHash); 3617 p->nPendingData = 0; 3618 } 3619 } 3620 3621 /* 3622 ** Return the size of the prefix, in bytes, that buffer 3623 ** (pNew/<length-unknown>) shares with buffer (pOld/nOld). 3624 ** 3625 ** Buffer (pNew/<length-unknown>) is guaranteed to be greater 3626 ** than buffer (pOld/nOld). 3627 */ 3628 static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){ 3629 int i; 3630 for(i=0; i<nOld; i++){ 3631 if( pOld[i]!=pNew[i] ) break; 3632 } 3633 return i; 3634 } 3635 3636 static void fts5WriteDlidxClear( 3637 Fts5Index *p, 3638 Fts5SegWriter *pWriter, 3639 int bFlush /* If true, write dlidx to disk */ 3640 ){ 3641 int i; 3642 assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) ); 3643 for(i=0; i<pWriter->nDlidx; i++){ 3644 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; 3645 if( pDlidx->buf.n==0 ) break; 3646 if( bFlush ){ 3647 assert( pDlidx->pgno!=0 ); 3648 fts5DataWrite(p, 3649 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), 3650 pDlidx->buf.p, pDlidx->buf.n 3651 ); 3652 } 3653 sqlite3Fts5BufferZero(&pDlidx->buf); 3654 pDlidx->bPrevValid = 0; 3655 } 3656 } 3657 3658 /* 3659 ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size. 3660 ** Any new array elements are zeroed before returning. 3661 */ 3662 static int fts5WriteDlidxGrow( 3663 Fts5Index *p, 3664 Fts5SegWriter *pWriter, 3665 int nLvl 3666 ){ 3667 if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){ 3668 Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc( 3669 pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl 3670 ); 3671 if( aDlidx==0 ){ 3672 p->rc = SQLITE_NOMEM; 3673 }else{ 3674 int nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx); 3675 memset(&aDlidx[pWriter->nDlidx], 0, nByte); 3676 pWriter->aDlidx = aDlidx; 3677 pWriter->nDlidx = nLvl; 3678 } 3679 } 3680 return p->rc; 3681 } 3682 3683 /* 3684 ** If the current doclist-index accumulating in pWriter->aDlidx[] is large 3685 ** enough, flush it to disk and return 1. Otherwise discard it and return 3686 ** zero. 3687 */ 3688 static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){ 3689 int bFlag = 0; 3690 3691 /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written 3692 ** to the database, also write the doclist-index to disk. */ 3693 if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ 3694 bFlag = 1; 3695 } 3696 fts5WriteDlidxClear(p, pWriter, bFlag); 3697 pWriter->nEmpty = 0; 3698 return bFlag; 3699 } 3700 3701 /* 3702 ** This function is called whenever processing of the doclist for the 3703 ** last term on leaf page (pWriter->iBtPage) is completed. 3704 ** 3705 ** The doclist-index for that term is currently stored in-memory within the 3706 ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function 3707 ** writes it out to disk. Or, if it is too small to bother with, discards 3708 ** it. 3709 ** 3710 ** Fts5SegWriter.btterm currently contains the first term on page iBtPage. 3711 */ 3712 static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){ 3713 int bFlag; 3714 3715 assert( pWriter->iBtPage || pWriter->nEmpty==0 ); 3716 if( pWriter->iBtPage==0 ) return; 3717 bFlag = fts5WriteFlushDlidx(p, pWriter); 3718 3719 if( p->rc==SQLITE_OK ){ 3720 const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:""); 3721 /* The following was already done in fts5WriteInit(): */ 3722 /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */ 3723 sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC); 3724 sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1)); 3725 sqlite3_step(p->pIdxWriter); 3726 p->rc = sqlite3_reset(p->pIdxWriter); 3727 } 3728 pWriter->iBtPage = 0; 3729 } 3730 3731 /* 3732 ** This is called once for each leaf page except the first that contains 3733 ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that 3734 ** is larger than all terms written to earlier leaves, and equal to or 3735 ** smaller than the first term on the new leaf. 3736 ** 3737 ** If an error occurs, an error code is left in Fts5Index.rc. If an error 3738 ** has already occurred when this function is called, it is a no-op. 3739 */ 3740 static void fts5WriteBtreeTerm( 3741 Fts5Index *p, /* FTS5 backend object */ 3742 Fts5SegWriter *pWriter, /* Writer object */ 3743 int nTerm, const u8 *pTerm /* First term on new page */ 3744 ){ 3745 fts5WriteFlushBtree(p, pWriter); 3746 fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm); 3747 pWriter->iBtPage = pWriter->writer.pgno; 3748 } 3749 3750 /* 3751 ** This function is called when flushing a leaf page that contains no 3752 ** terms at all to disk. 3753 */ 3754 static void fts5WriteBtreeNoTerm( 3755 Fts5Index *p, /* FTS5 backend object */ 3756 Fts5SegWriter *pWriter /* Writer object */ 3757 ){ 3758 /* If there were no rowids on the leaf page either and the doclist-index 3759 ** has already been started, append an 0x00 byte to it. */ 3760 if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){ 3761 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0]; 3762 assert( pDlidx->bPrevValid ); 3763 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0); 3764 } 3765 3766 /* Increment the "number of sequential leaves without a term" counter. */ 3767 pWriter->nEmpty++; 3768 } 3769 3770 static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){ 3771 i64 iRowid; 3772 int iOff; 3773 3774 iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid); 3775 fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid); 3776 return iRowid; 3777 } 3778 3779 /* 3780 ** Rowid iRowid has just been appended to the current leaf page. It is the 3781 ** first on the page. This function appends an appropriate entry to the current 3782 ** doclist-index. 3783 */ 3784 static void fts5WriteDlidxAppend( 3785 Fts5Index *p, 3786 Fts5SegWriter *pWriter, 3787 i64 iRowid 3788 ){ 3789 int i; 3790 int bDone = 0; 3791 3792 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ 3793 i64 iVal; 3794 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; 3795 3796 if( pDlidx->buf.n>=p->pConfig->pgsz ){ 3797 /* The current doclist-index page is full. Write it to disk and push 3798 ** a copy of iRowid (which will become the first rowid on the next 3799 ** doclist-index leaf page) up into the next level of the b-tree 3800 ** hierarchy. If the node being flushed is currently the root node, 3801 ** also push its first rowid upwards. */ 3802 pDlidx->buf.p[0] = 0x01; /* Not the root node */ 3803 fts5DataWrite(p, 3804 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), 3805 pDlidx->buf.p, pDlidx->buf.n 3806 ); 3807 fts5WriteDlidxGrow(p, pWriter, i+2); 3808 pDlidx = &pWriter->aDlidx[i]; 3809 if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){ 3810 i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf); 3811 3812 /* This was the root node. Push its first rowid up to the new root. */ 3813 pDlidx[1].pgno = pDlidx->pgno; 3814 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0); 3815 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno); 3816 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst); 3817 pDlidx[1].bPrevValid = 1; 3818 pDlidx[1].iPrev = iFirst; 3819 } 3820 3821 sqlite3Fts5BufferZero(&pDlidx->buf); 3822 pDlidx->bPrevValid = 0; 3823 pDlidx->pgno++; 3824 }else{ 3825 bDone = 1; 3826 } 3827 3828 if( pDlidx->bPrevValid ){ 3829 iVal = iRowid - pDlidx->iPrev; 3830 }else{ 3831 i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno); 3832 assert( pDlidx->buf.n==0 ); 3833 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone); 3834 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno); 3835 iVal = iRowid; 3836 } 3837 3838 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal); 3839 pDlidx->bPrevValid = 1; 3840 pDlidx->iPrev = iRowid; 3841 } 3842 } 3843 3844 static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ 3845 static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; 3846 Fts5PageWriter *pPage = &pWriter->writer; 3847 i64 iRowid; 3848 3849 assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) ); 3850 3851 /* Set the szLeaf header field. */ 3852 assert( 0==fts5GetU16(&pPage->buf.p[2]) ); 3853 fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n); 3854 3855 if( pWriter->bFirstTermInPage ){ 3856 /* No term was written to this page. */ 3857 assert( pPage->pgidx.n==0 ); 3858 fts5WriteBtreeNoTerm(p, pWriter); 3859 }else{ 3860 /* Append the pgidx to the page buffer. Set the szLeaf header field. */ 3861 fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p); 3862 } 3863 3864 /* Write the page out to disk */ 3865 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno); 3866 fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); 3867 3868 /* Initialize the next page. */ 3869 fts5BufferZero(&pPage->buf); 3870 fts5BufferZero(&pPage->pgidx); 3871 fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); 3872 pPage->iPrevPgidx = 0; 3873 pPage->pgno++; 3874 3875 /* Increase the leaves written counter */ 3876 pWriter->nLeafWritten++; 3877 3878 /* The new leaf holds no terms or rowids */ 3879 pWriter->bFirstTermInPage = 1; 3880 pWriter->bFirstRowidInPage = 1; 3881 } 3882 3883 /* 3884 ** Append term pTerm/nTerm to the segment being written by the writer passed 3885 ** as the second argument. 3886 ** 3887 ** If an error occurs, set the Fts5Index.rc error code. If an error has 3888 ** already occurred, this function is a no-op. 3889 */ 3890 static void fts5WriteAppendTerm( 3891 Fts5Index *p, 3892 Fts5SegWriter *pWriter, 3893 int nTerm, const u8 *pTerm 3894 ){ 3895 int nPrefix; /* Bytes of prefix compression for term */ 3896 Fts5PageWriter *pPage = &pWriter->writer; 3897 Fts5Buffer *pPgidx = &pWriter->writer.pgidx; 3898 3899 assert( p->rc==SQLITE_OK ); 3900 assert( pPage->buf.n>=4 ); 3901 assert( pPage->buf.n>4 || pWriter->bFirstTermInPage ); 3902 3903 /* If the current leaf page is full, flush it to disk. */ 3904 if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){ 3905 if( pPage->buf.n>4 ){ 3906 fts5WriteFlushLeaf(p, pWriter); 3907 } 3908 fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING); 3909 } 3910 3911 /* TODO1: Updating pgidx here. */ 3912 pPgidx->n += sqlite3Fts5PutVarint( 3913 &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx 3914 ); 3915 pPage->iPrevPgidx = pPage->buf.n; 3916 #if 0 3917 fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n); 3918 pPgidx->n += 2; 3919 #endif 3920 3921 if( pWriter->bFirstTermInPage ){ 3922 nPrefix = 0; 3923 if( pPage->pgno!=1 ){ 3924 /* This is the first term on a leaf that is not the leftmost leaf in 3925 ** the segment b-tree. In this case it is necessary to add a term to 3926 ** the b-tree hierarchy that is (a) larger than the largest term 3927 ** already written to the segment and (b) smaller than or equal to 3928 ** this term. In other words, a prefix of (pTerm/nTerm) that is one 3929 ** byte longer than the longest prefix (pTerm/nTerm) shares with the 3930 ** previous term. 3931 ** 3932 ** Usually, the previous term is available in pPage->term. The exception 3933 ** is if this is the first term written in an incremental-merge step. 3934 ** In this case the previous term is not available, so just write a 3935 ** copy of (pTerm/nTerm) into the parent node. This is slightly 3936 ** inefficient, but still correct. */ 3937 int n = nTerm; 3938 if( pPage->term.n ){ 3939 n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm); 3940 } 3941 fts5WriteBtreeTerm(p, pWriter, n, pTerm); 3942 pPage = &pWriter->writer; 3943 } 3944 }else{ 3945 nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, pTerm); 3946 fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix); 3947 } 3948 3949 /* Append the number of bytes of new data, then the term data itself 3950 ** to the page. */ 3951 fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix); 3952 fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]); 3953 3954 /* Update the Fts5PageWriter.term field. */ 3955 fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); 3956 pWriter->bFirstTermInPage = 0; 3957 3958 pWriter->bFirstRowidInPage = 0; 3959 pWriter->bFirstRowidInDoclist = 1; 3960 3961 assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) ); 3962 pWriter->aDlidx[0].pgno = pPage->pgno; 3963 } 3964 3965 /* 3966 ** Append a rowid and position-list size field to the writers output. 3967 */ 3968 static void fts5WriteAppendRowid( 3969 Fts5Index *p, 3970 Fts5SegWriter *pWriter, 3971 i64 iRowid 3972 ){ 3973 if( p->rc==SQLITE_OK ){ 3974 Fts5PageWriter *pPage = &pWriter->writer; 3975 3976 if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ 3977 fts5WriteFlushLeaf(p, pWriter); 3978 } 3979 3980 /* If this is to be the first rowid written to the page, set the 3981 ** rowid-pointer in the page-header. Also append a value to the dlidx 3982 ** buffer, in case a doclist-index is required. */ 3983 if( pWriter->bFirstRowidInPage ){ 3984 fts5PutU16(pPage->buf.p, (u16)pPage->buf.n); 3985 fts5WriteDlidxAppend(p, pWriter, iRowid); 3986 } 3987 3988 /* Write the rowid. */ 3989 if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){ 3990 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid); 3991 }else{ 3992 assert( p->rc || iRowid>pWriter->iPrevRowid ); 3993 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid); 3994 } 3995 pWriter->iPrevRowid = iRowid; 3996 pWriter->bFirstRowidInDoclist = 0; 3997 pWriter->bFirstRowidInPage = 0; 3998 } 3999 } 4000 4001 static void fts5WriteAppendPoslistData( 4002 Fts5Index *p, 4003 Fts5SegWriter *pWriter, 4004 const u8 *aData, 4005 int nData 4006 ){ 4007 Fts5PageWriter *pPage = &pWriter->writer; 4008 const u8 *a = aData; 4009 int n = nData; 4010 4011 assert( p->pConfig->pgsz>0 ); 4012 while( p->rc==SQLITE_OK 4013 && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz 4014 ){ 4015 int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n; 4016 int nCopy = 0; 4017 while( nCopy<nReq ){ 4018 i64 dummy; 4019 nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy); 4020 } 4021 fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a); 4022 a += nCopy; 4023 n -= nCopy; 4024 fts5WriteFlushLeaf(p, pWriter); 4025 } 4026 if( n>0 ){ 4027 fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a); 4028 } 4029 } 4030 4031 /* 4032 ** Flush any data cached by the writer object to the database. Free any 4033 ** allocations associated with the writer. 4034 */ 4035 static void fts5WriteFinish( 4036 Fts5Index *p, 4037 Fts5SegWriter *pWriter, /* Writer object */ 4038 int *pnLeaf /* OUT: Number of leaf pages in b-tree */ 4039 ){ 4040 int i; 4041 Fts5PageWriter *pLeaf = &pWriter->writer; 4042 if( p->rc==SQLITE_OK ){ 4043 assert( pLeaf->pgno>=1 ); 4044 if( pLeaf->buf.n>4 ){ 4045 fts5WriteFlushLeaf(p, pWriter); 4046 } 4047 *pnLeaf = pLeaf->pgno-1; 4048 if( pLeaf->pgno>1 ){ 4049 fts5WriteFlushBtree(p, pWriter); 4050 } 4051 } 4052 fts5BufferFree(&pLeaf->term); 4053 fts5BufferFree(&pLeaf->buf); 4054 fts5BufferFree(&pLeaf->pgidx); 4055 fts5BufferFree(&pWriter->btterm); 4056 4057 for(i=0; i<pWriter->nDlidx; i++){ 4058 sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf); 4059 } 4060 sqlite3_free(pWriter->aDlidx); 4061 } 4062 4063 static void fts5WriteInit( 4064 Fts5Index *p, 4065 Fts5SegWriter *pWriter, 4066 int iSegid 4067 ){ 4068 const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING; 4069 4070 memset(pWriter, 0, sizeof(Fts5SegWriter)); 4071 pWriter->iSegid = iSegid; 4072 4073 fts5WriteDlidxGrow(p, pWriter, 1); 4074 pWriter->writer.pgno = 1; 4075 pWriter->bFirstTermInPage = 1; 4076 pWriter->iBtPage = 1; 4077 4078 assert( pWriter->writer.buf.n==0 ); 4079 assert( pWriter->writer.pgidx.n==0 ); 4080 4081 /* Grow the two buffers to pgsz + padding bytes in size. */ 4082 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer); 4083 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer); 4084 4085 if( p->pIdxWriter==0 ){ 4086 Fts5Config *pConfig = p->pConfig; 4087 fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf( 4088 "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)", 4089 pConfig->zDb, pConfig->zName 4090 )); 4091 } 4092 4093 if( p->rc==SQLITE_OK ){ 4094 /* Initialize the 4-byte leaf-page header to 0x00. */ 4095 memset(pWriter->writer.buf.p, 0, 4); 4096 pWriter->writer.buf.n = 4; 4097 4098 /* Bind the current output segment id to the index-writer. This is an 4099 ** optimization over binding the same value over and over as rows are 4100 ** inserted into %_idx by the current writer. */ 4101 sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); 4102 } 4103 } 4104 4105 /* 4106 ** Iterator pIter was used to iterate through the input segments of on an 4107 ** incremental merge operation. This function is called if the incremental 4108 ** merge step has finished but the input has not been completely exhausted. 4109 */ 4110 static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){ 4111 int i; 4112 Fts5Buffer buf; 4113 memset(&buf, 0, sizeof(Fts5Buffer)); 4114 for(i=0; i<pIter->nSeg; i++){ 4115 Fts5SegIter *pSeg = &pIter->aSeg[i]; 4116 if( pSeg->pSeg==0 ){ 4117 /* no-op */ 4118 }else if( pSeg->pLeaf==0 ){ 4119 /* All keys from this input segment have been transfered to the output. 4120 ** Set both the first and last page-numbers to 0 to indicate that the 4121 ** segment is now empty. */ 4122 pSeg->pSeg->pgnoLast = 0; 4123 pSeg->pSeg->pgnoFirst = 0; 4124 }else{ 4125 int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */ 4126 i64 iLeafRowid; 4127 Fts5Data *pData; 4128 int iId = pSeg->pSeg->iSegid; 4129 u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00}; 4130 4131 iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno); 4132 pData = fts5DataRead(p, iLeafRowid); 4133 if( pData ){ 4134 fts5BufferZero(&buf); 4135 fts5BufferGrow(&p->rc, &buf, pData->nn); 4136 fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr); 4137 fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n); 4138 fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p); 4139 fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]); 4140 if( p->rc==SQLITE_OK ){ 4141 /* Set the szLeaf field */ 4142 fts5PutU16(&buf.p[2], (u16)buf.n); 4143 } 4144 4145 /* Set up the new page-index array */ 4146 fts5BufferAppendVarint(&p->rc, &buf, 4); 4147 if( pSeg->iLeafPgno==pSeg->iTermLeafPgno 4148 && pSeg->iEndofDoclist<pData->szLeaf 4149 ){ 4150 int nDiff = pData->szLeaf - pSeg->iEndofDoclist; 4151 fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4); 4152 fts5BufferAppendBlob(&p->rc, &buf, 4153 pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff] 4154 ); 4155 } 4156 4157 fts5DataRelease(pData); 4158 pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; 4159 fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid); 4160 fts5DataWrite(p, iLeafRowid, buf.p, buf.n); 4161 } 4162 } 4163 } 4164 fts5BufferFree(&buf); 4165 } 4166 4167 static void fts5MergeChunkCallback( 4168 Fts5Index *p, 4169 void *pCtx, 4170 const u8 *pChunk, int nChunk 4171 ){ 4172 Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx; 4173 fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk); 4174 } 4175 4176 /* 4177 ** 4178 */ 4179 static void fts5IndexMergeLevel( 4180 Fts5Index *p, /* FTS5 backend object */ 4181 Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */ 4182 int iLvl, /* Level to read input from */ 4183 int *pnRem /* Write up to this many output leaves */ 4184 ){ 4185 Fts5Structure *pStruct = *ppStruct; 4186 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; 4187 Fts5StructureLevel *pLvlOut; 4188 Fts5Iter *pIter = 0; /* Iterator to read input data */ 4189 int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ 4190 int nInput; /* Number of input segments */ 4191 Fts5SegWriter writer; /* Writer object */ 4192 Fts5StructureSegment *pSeg; /* Output segment */ 4193 Fts5Buffer term; 4194 int bOldest; /* True if the output segment is the oldest */ 4195 int eDetail = p->pConfig->eDetail; 4196 const int flags = FTS5INDEX_QUERY_NOOUTPUT; 4197 int bTermWritten = 0; /* True if current term already output */ 4198 4199 assert( iLvl<pStruct->nLevel ); 4200 assert( pLvl->nMerge<=pLvl->nSeg ); 4201 4202 memset(&writer, 0, sizeof(Fts5SegWriter)); 4203 memset(&term, 0, sizeof(Fts5Buffer)); 4204 if( pLvl->nMerge ){ 4205 pLvlOut = &pStruct->aLevel[iLvl+1]; 4206 assert( pLvlOut->nSeg>0 ); 4207 nInput = pLvl->nMerge; 4208 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; 4209 4210 fts5WriteInit(p, &writer, pSeg->iSegid); 4211 writer.writer.pgno = pSeg->pgnoLast+1; 4212 writer.iBtPage = 0; 4213 }else{ 4214 int iSegid = fts5AllocateSegid(p, pStruct); 4215 4216 /* Extend the Fts5Structure object as required to ensure the output 4217 ** segment exists. */ 4218 if( iLvl==pStruct->nLevel-1 ){ 4219 fts5StructureAddLevel(&p->rc, ppStruct); 4220 pStruct = *ppStruct; 4221 } 4222 fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0); 4223 if( p->rc ) return; 4224 pLvl = &pStruct->aLevel[iLvl]; 4225 pLvlOut = &pStruct->aLevel[iLvl+1]; 4226 4227 fts5WriteInit(p, &writer, iSegid); 4228 4229 /* Add the new segment to the output level */ 4230 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg]; 4231 pLvlOut->nSeg++; 4232 pSeg->pgnoFirst = 1; 4233 pSeg->iSegid = iSegid; 4234 pStruct->nSegment++; 4235 4236 /* Read input from all segments in the input level */ 4237 nInput = pLvl->nSeg; 4238 } 4239 bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2); 4240 4241 assert( iLvl>=0 ); 4242 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter); 4243 fts5MultiIterEof(p, pIter)==0; 4244 fts5MultiIterNext(p, pIter, 0, 0) 4245 ){ 4246 Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; 4247 int nPos; /* position-list size field value */ 4248 int nTerm; 4249 const u8 *pTerm; 4250 4251 pTerm = fts5MultiIterTerm(pIter, &nTerm); 4252 if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ 4253 if( pnRem && writer.nLeafWritten>nRem ){ 4254 break; 4255 } 4256 fts5BufferSet(&p->rc, &term, nTerm, pTerm); 4257 bTermWritten =0; 4258 } 4259 4260 /* Check for key annihilation. */ 4261 if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue; 4262 4263 if( p->rc==SQLITE_OK && bTermWritten==0 ){ 4264 /* This is a new term. Append a term to the output segment. */ 4265 fts5WriteAppendTerm(p, &writer, nTerm, pTerm); 4266 bTermWritten = 1; 4267 } 4268 4269 /* Append the rowid to the output */ 4270 /* WRITEPOSLISTSIZE */ 4271 fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); 4272 4273 if( eDetail==FTS5_DETAIL_NONE ){ 4274 if( pSegIter->bDel ){ 4275 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0); 4276 if( pSegIter->nPos>0 ){ 4277 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0); 4278 } 4279 } 4280 }else{ 4281 /* Append the position-list data to the output */ 4282 nPos = pSegIter->nPos*2 + pSegIter->bDel; 4283 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos); 4284 fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback); 4285 } 4286 } 4287 4288 /* Flush the last leaf page to disk. Set the output segment b-tree height 4289 ** and last leaf page number at the same time. */ 4290 fts5WriteFinish(p, &writer, &pSeg->pgnoLast); 4291 4292 if( fts5MultiIterEof(p, pIter) ){ 4293 int i; 4294 4295 /* Remove the redundant segments from the %_data table */ 4296 for(i=0; i<nInput; i++){ 4297 fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid); 4298 } 4299 4300 /* Remove the redundant segments from the input level */ 4301 if( pLvl->nSeg!=nInput ){ 4302 int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment); 4303 memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove); 4304 } 4305 pStruct->nSegment -= nInput; 4306 pLvl->nSeg -= nInput; 4307 pLvl->nMerge = 0; 4308 if( pSeg->pgnoLast==0 ){ 4309 pLvlOut->nSeg--; 4310 pStruct->nSegment--; 4311 } 4312 }else{ 4313 assert( pSeg->pgnoLast>0 ); 4314 fts5TrimSegments(p, pIter); 4315 pLvl->nMerge = nInput; 4316 } 4317 4318 fts5MultiIterFree(pIter); 4319 fts5BufferFree(&term); 4320 if( pnRem ) *pnRem -= writer.nLeafWritten; 4321 } 4322 4323 /* 4324 ** Do up to nPg pages of automerge work on the index. 4325 ** 4326 ** Return true if any changes were actually made, or false otherwise. 4327 */ 4328 static int fts5IndexMerge( 4329 Fts5Index *p, /* FTS5 backend object */ 4330 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ 4331 int nPg, /* Pages of work to do */ 4332 int nMin /* Minimum number of segments to merge */ 4333 ){ 4334 int nRem = nPg; 4335 int bRet = 0; 4336 Fts5Structure *pStruct = *ppStruct; 4337 while( nRem>0 && p->rc==SQLITE_OK ){ 4338 int iLvl; /* To iterate through levels */ 4339 int iBestLvl = 0; /* Level offering the most input segments */ 4340 int nBest = 0; /* Number of input segments on best level */ 4341 4342 /* Set iBestLvl to the level to read input segments from. */ 4343 assert( pStruct->nLevel>0 ); 4344 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ 4345 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; 4346 if( pLvl->nMerge ){ 4347 if( pLvl->nMerge>nBest ){ 4348 iBestLvl = iLvl; 4349 nBest = pLvl->nMerge; 4350 } 4351 break; 4352 } 4353 if( pLvl->nSeg>nBest ){ 4354 nBest = pLvl->nSeg; 4355 iBestLvl = iLvl; 4356 } 4357 } 4358 4359 /* If nBest is still 0, then the index must be empty. */ 4360 #ifdef SQLITE_DEBUG 4361 for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){ 4362 assert( pStruct->aLevel[iLvl].nSeg==0 ); 4363 } 4364 #endif 4365 4366 if( nBest<nMin && pStruct->aLevel[iBestLvl].nMerge==0 ){ 4367 break; 4368 } 4369 bRet = 1; 4370 fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem); 4371 if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){ 4372 fts5StructurePromote(p, iBestLvl+1, pStruct); 4373 } 4374 } 4375 *ppStruct = pStruct; 4376 return bRet; 4377 } 4378 4379 /* 4380 ** A total of nLeaf leaf pages of data has just been flushed to a level-0 4381 ** segment. This function updates the write-counter accordingly and, if 4382 ** necessary, performs incremental merge work. 4383 ** 4384 ** If an error occurs, set the Fts5Index.rc error code. If an error has 4385 ** already occurred, this function is a no-op. 4386 */ 4387 static void fts5IndexAutomerge( 4388 Fts5Index *p, /* FTS5 backend object */ 4389 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ 4390 int nLeaf /* Number of output leaves just written */ 4391 ){ 4392 if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 ){ 4393 Fts5Structure *pStruct = *ppStruct; 4394 u64 nWrite; /* Initial value of write-counter */ 4395 int nWork; /* Number of work-quanta to perform */ 4396 int nRem; /* Number of leaf pages left to write */ 4397 4398 /* Update the write-counter. While doing so, set nWork. */ 4399 nWrite = pStruct->nWriteCounter; 4400 nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit)); 4401 pStruct->nWriteCounter += nLeaf; 4402 nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel); 4403 4404 fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge); 4405 } 4406 } 4407 4408 static void fts5IndexCrisismerge( 4409 Fts5Index *p, /* FTS5 backend object */ 4410 Fts5Structure **ppStruct /* IN/OUT: Current structure of index */ 4411 ){ 4412 const int nCrisis = p->pConfig->nCrisisMerge; 4413 Fts5Structure *pStruct = *ppStruct; 4414 int iLvl = 0; 4415 4416 assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 ); 4417 while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){ 4418 fts5IndexMergeLevel(p, &pStruct, iLvl, 0); 4419 assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) ); 4420 fts5StructurePromote(p, iLvl+1, pStruct); 4421 iLvl++; 4422 } 4423 *ppStruct = pStruct; 4424 } 4425 4426 static int fts5IndexReturn(Fts5Index *p){ 4427 int rc = p->rc; 4428 p->rc = SQLITE_OK; 4429 return rc; 4430 } 4431 4432 typedef struct Fts5FlushCtx Fts5FlushCtx; 4433 struct Fts5FlushCtx { 4434 Fts5Index *pIdx; 4435 Fts5SegWriter writer; 4436 }; 4437 4438 /* 4439 ** Buffer aBuf[] contains a list of varints, all small enough to fit 4440 ** in a 32-bit integer. Return the size of the largest prefix of this 4441 ** list nMax bytes or less in size. 4442 */ 4443 static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ 4444 int ret; 4445 u32 dummy; 4446 ret = fts5GetVarint32(aBuf, dummy); 4447 if( ret<nMax ){ 4448 while( 1 ){ 4449 int i = fts5GetVarint32(&aBuf[ret], dummy); 4450 if( (ret + i) > nMax ) break; 4451 ret += i; 4452 } 4453 } 4454 return ret; 4455 } 4456 4457 /* 4458 ** Flush the contents of in-memory hash table iHash to a new level-0 4459 ** segment on disk. Also update the corresponding structure record. 4460 ** 4461 ** If an error occurs, set the Fts5Index.rc error code. If an error has 4462 ** already occurred, this function is a no-op. 4463 */ 4464 static void fts5FlushOneHash(Fts5Index *p){ 4465 Fts5Hash *pHash = p->pHash; 4466 Fts5Structure *pStruct; 4467 int iSegid; 4468 int pgnoLast = 0; /* Last leaf page number in segment */ 4469 4470 /* Obtain a reference to the index structure and allocate a new segment-id 4471 ** for the new level-0 segment. */ 4472 pStruct = fts5StructureRead(p); 4473 iSegid = fts5AllocateSegid(p, pStruct); 4474 fts5StructureInvalidate(p); 4475 4476 if( iSegid ){ 4477 const int pgsz = p->pConfig->pgsz; 4478 int eDetail = p->pConfig->eDetail; 4479 Fts5StructureSegment *pSeg; /* New segment within pStruct */ 4480 Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ 4481 Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */ 4482 4483 Fts5SegWriter writer; 4484 fts5WriteInit(p, &writer, iSegid); 4485 4486 pBuf = &writer.writer.buf; 4487 pPgidx = &writer.writer.pgidx; 4488 4489 /* fts5WriteInit() should have initialized the buffers to (most likely) 4490 ** the maximum space required. */ 4491 assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) ); 4492 assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) ); 4493 4494 /* Begin scanning through hash table entries. This loop runs once for each 4495 ** term/doclist currently stored within the hash table. */ 4496 if( p->rc==SQLITE_OK ){ 4497 p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); 4498 } 4499 while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ 4500 const char *zTerm; /* Buffer containing term */ 4501 const u8 *pDoclist; /* Pointer to doclist for this term */ 4502 int nDoclist; /* Size of doclist in bytes */ 4503 4504 /* Write the term for this entry to disk. */ 4505 sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); 4506 fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm); 4507 4508 assert( writer.bFirstRowidInPage==0 ); 4509 if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){ 4510 /* The entire doclist will fit on the current leaf. */ 4511 fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); 4512 }else{ 4513 i64 iRowid = 0; 4514 i64 iDelta = 0; 4515 int iOff = 0; 4516 4517 /* The entire doclist will not fit on this leaf. The following 4518 ** loop iterates through the poslists that make up the current 4519 ** doclist. */ 4520 while( p->rc==SQLITE_OK && iOff<nDoclist ){ 4521 iOff += fts5GetVarint(&pDoclist[iOff], (u64*)&iDelta); 4522 iRowid += iDelta; 4523 4524 if( writer.bFirstRowidInPage ){ 4525 fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */ 4526 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid); 4527 writer.bFirstRowidInPage = 0; 4528 fts5WriteDlidxAppend(p, &writer, iRowid); 4529 }else{ 4530 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta); 4531 } 4532 assert( pBuf->n<=pBuf->nSpace ); 4533 4534 if( eDetail==FTS5_DETAIL_NONE ){ 4535 if( iOff<nDoclist && pDoclist[iOff]==0 ){ 4536 pBuf->p[pBuf->n++] = 0; 4537 iOff++; 4538 if( iOff<nDoclist && pDoclist[iOff]==0 ){ 4539 pBuf->p[pBuf->n++] = 0; 4540 iOff++; 4541 } 4542 } 4543 if( (pBuf->n + pPgidx->n)>=pgsz ){ 4544 fts5WriteFlushLeaf(p, &writer); 4545 } 4546 }else{ 4547 int bDummy; 4548 int nPos; 4549 int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy); 4550 nCopy += nPos; 4551 if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){ 4552 /* The entire poslist will fit on the current leaf. So copy 4553 ** it in one go. */ 4554 fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy); 4555 }else{ 4556 /* The entire poslist will not fit on this leaf. So it needs 4557 ** to be broken into sections. The only qualification being 4558 ** that each varint must be stored contiguously. */ 4559 const u8 *pPoslist = &pDoclist[iOff]; 4560 int iPos = 0; 4561 while( p->rc==SQLITE_OK ){ 4562 int nSpace = pgsz - pBuf->n - pPgidx->n; 4563 int n = 0; 4564 if( (nCopy - iPos)<=nSpace ){ 4565 n = nCopy - iPos; 4566 }else{ 4567 n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); 4568 } 4569 assert( n>0 ); 4570 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); 4571 iPos += n; 4572 if( (pBuf->n + pPgidx->n)>=pgsz ){ 4573 fts5WriteFlushLeaf(p, &writer); 4574 } 4575 if( iPos>=nCopy ) break; 4576 } 4577 } 4578 iOff += nCopy; 4579 } 4580 } 4581 } 4582 4583 /* TODO2: Doclist terminator written here. */ 4584 /* pBuf->p[pBuf->n++] = '\0'; */ 4585 assert( pBuf->n<=pBuf->nSpace ); 4586 sqlite3Fts5HashScanNext(pHash); 4587 } 4588 sqlite3Fts5HashClear(pHash); 4589 fts5WriteFinish(p, &writer, &pgnoLast); 4590 4591 /* Update the Fts5Structure. It is written back to the database by the 4592 ** fts5StructureRelease() call below. */ 4593 if( pStruct->nLevel==0 ){ 4594 fts5StructureAddLevel(&p->rc, &pStruct); 4595 } 4596 fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); 4597 if( p->rc==SQLITE_OK ){ 4598 pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; 4599 pSeg->iSegid = iSegid; 4600 pSeg->pgnoFirst = 1; 4601 pSeg->pgnoLast = pgnoLast; 4602 pStruct->nSegment++; 4603 } 4604 fts5StructurePromote(p, 0, pStruct); 4605 } 4606 4607 fts5IndexAutomerge(p, &pStruct, pgnoLast); 4608 fts5IndexCrisismerge(p, &pStruct); 4609 fts5StructureWrite(p, pStruct); 4610 fts5StructureRelease(pStruct); 4611 } 4612 4613 /* 4614 ** Flush any data stored in the in-memory hash tables to the database. 4615 */ 4616 static void fts5IndexFlush(Fts5Index *p){ 4617 /* Unless it is empty, flush the hash table to disk */ 4618 if( p->nPendingData ){ 4619 assert( p->pHash ); 4620 p->nPendingData = 0; 4621 fts5FlushOneHash(p); 4622 } 4623 } 4624 4625 static Fts5Structure *fts5IndexOptimizeStruct( 4626 Fts5Index *p, 4627 Fts5Structure *pStruct 4628 ){ 4629 Fts5Structure *pNew = 0; 4630 int nByte = sizeof(Fts5Structure); 4631 int nSeg = pStruct->nSegment; 4632 int i; 4633 4634 /* Figure out if this structure requires optimization. A structure does 4635 ** not require optimization if either: 4636 ** 4637 ** + it consists of fewer than two segments, or 4638 ** + all segments are on the same level, or 4639 ** + all segments except one are currently inputs to a merge operation. 4640 ** 4641 ** In the first case, return NULL. In the second, increment the ref-count 4642 ** on *pStruct and return a copy of the pointer to it. 4643 */ 4644 if( nSeg<2 ) return 0; 4645 for(i=0; i<pStruct->nLevel; i++){ 4646 int nThis = pStruct->aLevel[i].nSeg; 4647 if( nThis==nSeg || (nThis==nSeg-1 && pStruct->aLevel[i].nMerge==nThis) ){ 4648 fts5StructureRef(pStruct); 4649 return pStruct; 4650 } 4651 assert( pStruct->aLevel[i].nMerge<=nThis ); 4652 } 4653 4654 nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel); 4655 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte); 4656 4657 if( pNew ){ 4658 Fts5StructureLevel *pLvl; 4659 nByte = nSeg * sizeof(Fts5StructureSegment); 4660 pNew->nLevel = pStruct->nLevel+1; 4661 pNew->nRef = 1; 4662 pNew->nWriteCounter = pStruct->nWriteCounter; 4663 pLvl = &pNew->aLevel[pStruct->nLevel]; 4664 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte); 4665 if( pLvl->aSeg ){ 4666 int iLvl, iSeg; 4667 int iSegOut = 0; 4668 /* Iterate through all segments, from oldest to newest. Add them to 4669 ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest 4670 ** segment in the data structure. */ 4671 for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){ 4672 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ 4673 pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg]; 4674 iSegOut++; 4675 } 4676 } 4677 pNew->nSegment = pLvl->nSeg = nSeg; 4678 }else{ 4679 sqlite3_free(pNew); 4680 pNew = 0; 4681 } 4682 } 4683 4684 return pNew; 4685 } 4686 4687 int sqlite3Fts5IndexOptimize(Fts5Index *p){ 4688 Fts5Structure *pStruct; 4689 Fts5Structure *pNew = 0; 4690 4691 assert( p->rc==SQLITE_OK ); 4692 fts5IndexFlush(p); 4693 pStruct = fts5StructureRead(p); 4694 fts5StructureInvalidate(p); 4695 4696 if( pStruct ){ 4697 pNew = fts5IndexOptimizeStruct(p, pStruct); 4698 } 4699 fts5StructureRelease(pStruct); 4700 4701 assert( pNew==0 || pNew->nSegment>0 ); 4702 if( pNew ){ 4703 int iLvl; 4704 for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){} 4705 while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){ 4706 int nRem = FTS5_OPT_WORK_UNIT; 4707 fts5IndexMergeLevel(p, &pNew, iLvl, &nRem); 4708 } 4709 4710 fts5StructureWrite(p, pNew); 4711 fts5StructureRelease(pNew); 4712 } 4713 4714 return fts5IndexReturn(p); 4715 } 4716 4717 /* 4718 ** This is called to implement the special "VALUES('merge', $nMerge)" 4719 ** INSERT command. 4720 */ 4721 int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){ 4722 Fts5Structure *pStruct = fts5StructureRead(p); 4723 if( pStruct ){ 4724 int nMin = p->pConfig->nUsermerge; 4725 fts5StructureInvalidate(p); 4726 if( nMerge<0 ){ 4727 Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct); 4728 fts5StructureRelease(pStruct); 4729 pStruct = pNew; 4730 nMin = 2; 4731 nMerge = nMerge*-1; 4732 } 4733 if( pStruct && pStruct->nLevel ){ 4734 if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){ 4735 fts5StructureWrite(p, pStruct); 4736 } 4737 } 4738 fts5StructureRelease(pStruct); 4739 } 4740 return fts5IndexReturn(p); 4741 } 4742 4743 static void fts5AppendRowid( 4744 Fts5Index *p, 4745 i64 iDelta, 4746 Fts5Iter *pUnused, 4747 Fts5Buffer *pBuf 4748 ){ 4749 UNUSED_PARAM(pUnused); 4750 fts5BufferAppendVarint(&p->rc, pBuf, iDelta); 4751 } 4752 4753 static void fts5AppendPoslist( 4754 Fts5Index *p, 4755 i64 iDelta, 4756 Fts5Iter *pMulti, 4757 Fts5Buffer *pBuf 4758 ){ 4759 int nData = pMulti->base.nData; 4760 assert( nData>0 ); 4761 if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nData+9+9) ){ 4762 fts5BufferSafeAppendVarint(pBuf, iDelta); 4763 fts5BufferSafeAppendVarint(pBuf, nData*2); 4764 fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData); 4765 } 4766 } 4767 4768 4769 static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ 4770 u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist; 4771 4772 assert( pIter->aPoslist ); 4773 if( p>=pIter->aEof ){ 4774 pIter->aPoslist = 0; 4775 }else{ 4776 i64 iDelta; 4777 4778 p += fts5GetVarint(p, (u64*)&iDelta); 4779 pIter->iRowid += iDelta; 4780 4781 /* Read position list size */ 4782 if( p[0] & 0x80 ){ 4783 int nPos; 4784 pIter->nSize = fts5GetVarint32(p, nPos); 4785 pIter->nPoslist = (nPos>>1); 4786 }else{ 4787 pIter->nPoslist = ((int)(p[0])) >> 1; 4788 pIter->nSize = 1; 4789 } 4790 4791 pIter->aPoslist = p; 4792 } 4793 } 4794 4795 static void fts5DoclistIterInit( 4796 Fts5Buffer *pBuf, 4797 Fts5DoclistIter *pIter 4798 ){ 4799 memset(pIter, 0, sizeof(*pIter)); 4800 pIter->aPoslist = pBuf->p; 4801 pIter->aEof = &pBuf->p[pBuf->n]; 4802 fts5DoclistIterNext(pIter); 4803 } 4804 4805 #if 0 4806 /* 4807 ** Append a doclist to buffer pBuf. 4808 ** 4809 ** This function assumes that space within the buffer has already been 4810 ** allocated. 4811 */ 4812 static void fts5MergeAppendDocid( 4813 Fts5Buffer *pBuf, /* Buffer to write to */ 4814 i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */ 4815 i64 iRowid /* Rowid to append */ 4816 ){ 4817 assert( pBuf->n!=0 || (*piLastRowid)==0 ); 4818 fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid); 4819 *piLastRowid = iRowid; 4820 } 4821 #endif 4822 4823 #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \ 4824 assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \ 4825 fts5BufferSafeAppendVarint((pBuf), (iRowid) - (iLastRowid)); \ 4826 (iLastRowid) = (iRowid); \ 4827 } 4828 4829 /* 4830 ** Swap the contents of buffer *p1 with that of *p2. 4831 */ 4832 static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){ 4833 Fts5Buffer tmp = *p1; 4834 *p1 = *p2; 4835 *p2 = tmp; 4836 } 4837 4838 static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){ 4839 int i = *piOff; 4840 if( i>=pBuf->n ){ 4841 *piOff = -1; 4842 }else{ 4843 u64 iVal; 4844 *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal); 4845 *piRowid += iVal; 4846 } 4847 } 4848 4849 /* 4850 ** This is the equivalent of fts5MergePrefixLists() for detail=none mode. 4851 ** In this case the buffers consist of a delta-encoded list of rowids only. 4852 */ 4853 static void fts5MergeRowidLists( 4854 Fts5Index *p, /* FTS5 backend object */ 4855 Fts5Buffer *p1, /* First list to merge */ 4856 Fts5Buffer *p2 /* Second list to merge */ 4857 ){ 4858 int i1 = 0; 4859 int i2 = 0; 4860 i64 iRowid1 = 0; 4861 i64 iRowid2 = 0; 4862 i64 iOut = 0; 4863 4864 Fts5Buffer out; 4865 memset(&out, 0, sizeof(out)); 4866 sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n); 4867 if( p->rc ) return; 4868 4869 fts5NextRowid(p1, &i1, &iRowid1); 4870 fts5NextRowid(p2, &i2, &iRowid2); 4871 while( i1>=0 || i2>=0 ){ 4872 if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){ 4873 assert( iOut==0 || iRowid1>iOut ); 4874 fts5BufferSafeAppendVarint(&out, iRowid1 - iOut); 4875 iOut = iRowid1; 4876 fts5NextRowid(p1, &i1, &iRowid1); 4877 }else{ 4878 assert( iOut==0 || iRowid2>iOut ); 4879 fts5BufferSafeAppendVarint(&out, iRowid2 - iOut); 4880 iOut = iRowid2; 4881 if( i1>=0 && iRowid1==iRowid2 ){ 4882 fts5NextRowid(p1, &i1, &iRowid1); 4883 } 4884 fts5NextRowid(p2, &i2, &iRowid2); 4885 } 4886 } 4887 4888 fts5BufferSwap(&out, p1); 4889 fts5BufferFree(&out); 4890 } 4891 4892 /* 4893 ** Buffers p1 and p2 contain doclists. This function merges the content 4894 ** of the two doclists together and sets buffer p1 to the result before 4895 ** returning. 4896 ** 4897 ** If an error occurs, an error code is left in p->rc. If an error has 4898 ** already occurred, this function is a no-op. 4899 */ 4900 static void fts5MergePrefixLists( 4901 Fts5Index *p, /* FTS5 backend object */ 4902 Fts5Buffer *p1, /* First list to merge */ 4903 Fts5Buffer *p2 /* Second list to merge */ 4904 ){ 4905 if( p2->n ){ 4906 i64 iLastRowid = 0; 4907 Fts5DoclistIter i1; 4908 Fts5DoclistIter i2; 4909 Fts5Buffer out = {0, 0, 0}; 4910 Fts5Buffer tmp = {0, 0, 0}; 4911 4912 if( sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n) ) return; 4913 fts5DoclistIterInit(p1, &i1); 4914 fts5DoclistIterInit(p2, &i2); 4915 4916 while( 1 ){ 4917 if( i1.iRowid<i2.iRowid ){ 4918 /* Copy entry from i1 */ 4919 fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid); 4920 fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.nPoslist+i1.nSize); 4921 fts5DoclistIterNext(&i1); 4922 if( i1.aPoslist==0 ) break; 4923 } 4924 else if( i2.iRowid!=i1.iRowid ){ 4925 /* Copy entry from i2 */ 4926 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid); 4927 fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.nPoslist+i2.nSize); 4928 fts5DoclistIterNext(&i2); 4929 if( i2.aPoslist==0 ) break; 4930 } 4931 else{ 4932 /* Merge the two position lists. */ 4933 i64 iPos1 = 0; 4934 i64 iPos2 = 0; 4935 int iOff1 = 0; 4936 int iOff2 = 0; 4937 u8 *a1 = &i1.aPoslist[i1.nSize]; 4938 u8 *a2 = &i2.aPoslist[i2.nSize]; 4939 4940 i64 iPrev = 0; 4941 Fts5PoslistWriter writer; 4942 memset(&writer, 0, sizeof(writer)); 4943 4944 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid); 4945 fts5BufferZero(&tmp); 4946 sqlite3Fts5BufferSize(&p->rc, &tmp, i1.nPoslist + i2.nPoslist); 4947 if( p->rc ) break; 4948 4949 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1); 4950 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2); 4951 assert( iPos1>=0 && iPos2>=0 ); 4952 4953 if( iPos1<iPos2 ){ 4954 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1); 4955 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1); 4956 }else{ 4957 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2); 4958 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2); 4959 } 4960 4961 if( iPos1>=0 && iPos2>=0 ){ 4962 while( 1 ){ 4963 if( iPos1<iPos2 ){ 4964 if( iPos1!=iPrev ){ 4965 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1); 4966 } 4967 sqlite3Fts5PoslistNext64(a1, i1.nPoslist, &iOff1, &iPos1); 4968 if( iPos1<0 ) break; 4969 }else{ 4970 assert( iPos2!=iPrev ); 4971 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2); 4972 sqlite3Fts5PoslistNext64(a2, i2.nPoslist, &iOff2, &iPos2); 4973 if( iPos2<0 ) break; 4974 } 4975 } 4976 } 4977 4978 if( iPos1>=0 ){ 4979 if( iPos1!=iPrev ){ 4980 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos1); 4981 } 4982 fts5BufferSafeAppendBlob(&tmp, &a1[iOff1], i1.nPoslist-iOff1); 4983 }else{ 4984 assert( iPos2>=0 && iPos2!=iPrev ); 4985 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, iPos2); 4986 fts5BufferSafeAppendBlob(&tmp, &a2[iOff2], i2.nPoslist-iOff2); 4987 } 4988 4989 /* WRITEPOSLISTSIZE */ 4990 fts5BufferSafeAppendVarint(&out, tmp.n * 2); 4991 fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n); 4992 fts5DoclistIterNext(&i1); 4993 fts5DoclistIterNext(&i2); 4994 if( i1.aPoslist==0 || i2.aPoslist==0 ) break; 4995 } 4996 } 4997 4998 if( i1.aPoslist ){ 4999 fts5MergeAppendDocid(&out, iLastRowid, i1.iRowid); 5000 fts5BufferSafeAppendBlob(&out, i1.aPoslist, i1.aEof - i1.aPoslist); 5001 } 5002 else if( i2.aPoslist ){ 5003 fts5MergeAppendDocid(&out, iLastRowid, i2.iRowid); 5004 fts5BufferSafeAppendBlob(&out, i2.aPoslist, i2.aEof - i2.aPoslist); 5005 } 5006 5007 fts5BufferSet(&p->rc, p1, out.n, out.p); 5008 fts5BufferFree(&tmp); 5009 fts5BufferFree(&out); 5010 } 5011 } 5012 5013 static void fts5SetupPrefixIter( 5014 Fts5Index *p, /* Index to read from */ 5015 int bDesc, /* True for "ORDER BY rowid DESC" */ 5016 const u8 *pToken, /* Buffer containing prefix to match */ 5017 int nToken, /* Size of buffer pToken in bytes */ 5018 Fts5Colset *pColset, /* Restrict matches to these columns */ 5019 Fts5Iter **ppIter /* OUT: New iterator */ 5020 ){ 5021 Fts5Structure *pStruct; 5022 Fts5Buffer *aBuf; 5023 const int nBuf = 32; 5024 5025 void (*xMerge)(Fts5Index*, Fts5Buffer*, Fts5Buffer*); 5026 void (*xAppend)(Fts5Index*, i64, Fts5Iter*, Fts5Buffer*); 5027 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ 5028 xMerge = fts5MergeRowidLists; 5029 xAppend = fts5AppendRowid; 5030 }else{ 5031 xMerge = fts5MergePrefixLists; 5032 xAppend = fts5AppendPoslist; 5033 } 5034 5035 aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); 5036 pStruct = fts5StructureRead(p); 5037 5038 if( aBuf && pStruct ){ 5039 const int flags = FTS5INDEX_QUERY_SCAN 5040 | FTS5INDEX_QUERY_SKIPEMPTY 5041 | FTS5INDEX_QUERY_NOOUTPUT; 5042 int i; 5043 i64 iLastRowid = 0; 5044 Fts5Iter *p1 = 0; /* Iterator used to gather data from index */ 5045 Fts5Data *pData; 5046 Fts5Buffer doclist; 5047 int bNewTerm = 1; 5048 5049 memset(&doclist, 0, sizeof(doclist)); 5050 fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1); 5051 fts5IterSetOutputCb(&p->rc, p1); 5052 for( /* no-op */ ; 5053 fts5MultiIterEof(p, p1)==0; 5054 fts5MultiIterNext2(p, p1, &bNewTerm) 5055 ){ 5056 Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; 5057 int nTerm = pSeg->term.n; 5058 const u8 *pTerm = pSeg->term.p; 5059 p1->xSetOutputs(p1, pSeg); 5060 5061 assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); 5062 if( bNewTerm ){ 5063 if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break; 5064 } 5065 5066 if( p1->base.nData==0 ) continue; 5067 5068 if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){ 5069 for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ 5070 assert( i<nBuf ); 5071 if( aBuf[i].n==0 ){ 5072 fts5BufferSwap(&doclist, &aBuf[i]); 5073 fts5BufferZero(&doclist); 5074 }else{ 5075 xMerge(p, &doclist, &aBuf[i]); 5076 fts5BufferZero(&aBuf[i]); 5077 } 5078 } 5079 iLastRowid = 0; 5080 } 5081 5082 xAppend(p, p1->base.iRowid-iLastRowid, p1, &doclist); 5083 iLastRowid = p1->base.iRowid; 5084 } 5085 5086 for(i=0; i<nBuf; i++){ 5087 if( p->rc==SQLITE_OK ){ 5088 xMerge(p, &doclist, &aBuf[i]); 5089 } 5090 fts5BufferFree(&aBuf[i]); 5091 } 5092 fts5MultiIterFree(p1); 5093 5094 pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n); 5095 if( pData ){ 5096 pData->p = (u8*)&pData[1]; 5097 pData->nn = pData->szLeaf = doclist.n; 5098 if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n); 5099 fts5MultiIterNew2(p, pData, bDesc, ppIter); 5100 } 5101 fts5BufferFree(&doclist); 5102 } 5103 5104 fts5StructureRelease(pStruct); 5105 sqlite3_free(aBuf); 5106 } 5107 5108 5109 /* 5110 ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain 5111 ** to the document with rowid iRowid. 5112 */ 5113 int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){ 5114 assert( p->rc==SQLITE_OK ); 5115 5116 /* Allocate the hash table if it has not already been allocated */ 5117 if( p->pHash==0 ){ 5118 p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData); 5119 } 5120 5121 /* Flush the hash table to disk if required */ 5122 if( iRowid<p->iWriteRowid 5123 || (iRowid==p->iWriteRowid && p->bDelete==0) 5124 || (p->nPendingData > p->pConfig->nHashSize) 5125 ){ 5126 fts5IndexFlush(p); 5127 } 5128 5129 p->iWriteRowid = iRowid; 5130 p->bDelete = bDelete; 5131 return fts5IndexReturn(p); 5132 } 5133 5134 /* 5135 ** Commit data to disk. 5136 */ 5137 int sqlite3Fts5IndexSync(Fts5Index *p){ 5138 assert( p->rc==SQLITE_OK ); 5139 fts5IndexFlush(p); 5140 fts5CloseReader(p); 5141 return fts5IndexReturn(p); 5142 } 5143 5144 /* 5145 ** Discard any data stored in the in-memory hash tables. Do not write it 5146 ** to the database. Additionally, assume that the contents of the %_data 5147 ** table may have changed on disk. So any in-memory caches of %_data 5148 ** records must be invalidated. 5149 */ 5150 int sqlite3Fts5IndexRollback(Fts5Index *p){ 5151 fts5CloseReader(p); 5152 fts5IndexDiscardData(p); 5153 fts5StructureInvalidate(p); 5154 /* assert( p->rc==SQLITE_OK ); */ 5155 return SQLITE_OK; 5156 } 5157 5158 /* 5159 ** The %_data table is completely empty when this function is called. This 5160 ** function populates it with the initial structure objects for each index, 5161 ** and the initial version of the "averages" record (a zero-byte blob). 5162 */ 5163 int sqlite3Fts5IndexReinit(Fts5Index *p){ 5164 Fts5Structure s; 5165 fts5StructureInvalidate(p); 5166 memset(&s, 0, sizeof(Fts5Structure)); 5167 fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0); 5168 fts5StructureWrite(p, &s); 5169 return fts5IndexReturn(p); 5170 } 5171 5172 /* 5173 ** Open a new Fts5Index handle. If the bCreate argument is true, create 5174 ** and initialize the underlying %_data table. 5175 ** 5176 ** If successful, set *pp to point to the new object and return SQLITE_OK. 5177 ** Otherwise, set *pp to NULL and return an SQLite error code. 5178 */ 5179 int sqlite3Fts5IndexOpen( 5180 Fts5Config *pConfig, 5181 int bCreate, 5182 Fts5Index **pp, 5183 char **pzErr 5184 ){ 5185 int rc = SQLITE_OK; 5186 Fts5Index *p; /* New object */ 5187 5188 *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index)); 5189 if( rc==SQLITE_OK ){ 5190 p->pConfig = pConfig; 5191 p->nWorkUnit = FTS5_WORK_UNIT; 5192 p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName); 5193 if( p->zDataTbl && bCreate ){ 5194 rc = sqlite3Fts5CreateTable( 5195 pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr 5196 ); 5197 if( rc==SQLITE_OK ){ 5198 rc = sqlite3Fts5CreateTable(pConfig, "idx", 5199 "segid, term, pgno, PRIMARY KEY(segid, term)", 5200 1, pzErr 5201 ); 5202 } 5203 if( rc==SQLITE_OK ){ 5204 rc = sqlite3Fts5IndexReinit(p); 5205 } 5206 } 5207 } 5208 5209 assert( rc!=SQLITE_OK || p->rc==SQLITE_OK ); 5210 if( rc ){ 5211 sqlite3Fts5IndexClose(p); 5212 *pp = 0; 5213 } 5214 return rc; 5215 } 5216 5217 /* 5218 ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen(). 5219 */ 5220 int sqlite3Fts5IndexClose(Fts5Index *p){ 5221 int rc = SQLITE_OK; 5222 if( p ){ 5223 assert( p->pReader==0 ); 5224 fts5StructureInvalidate(p); 5225 sqlite3_finalize(p->pWriter); 5226 sqlite3_finalize(p->pDeleter); 5227 sqlite3_finalize(p->pIdxWriter); 5228 sqlite3_finalize(p->pIdxDeleter); 5229 sqlite3_finalize(p->pIdxSelect); 5230 sqlite3_finalize(p->pDataVersion); 5231 sqlite3Fts5HashFree(p->pHash); 5232 sqlite3_free(p->zDataTbl); 5233 sqlite3_free(p); 5234 } 5235 return rc; 5236 } 5237 5238 /* 5239 ** Argument p points to a buffer containing utf-8 text that is n bytes in 5240 ** size. Return the number of bytes in the nChar character prefix of the 5241 ** buffer, or 0 if there are less than nChar characters in total. 5242 */ 5243 int sqlite3Fts5IndexCharlenToBytelen( 5244 const char *p, 5245 int nByte, 5246 int nChar 5247 ){ 5248 int n = 0; 5249 int i; 5250 for(i=0; i<nChar; i++){ 5251 if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */ 5252 if( (unsigned char)p[n++]>=0xc0 ){ 5253 while( (p[n] & 0xc0)==0x80 ) n++; 5254 } 5255 } 5256 return n; 5257 } 5258 5259 /* 5260 ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of 5261 ** unicode characters in the string. 5262 */ 5263 static int fts5IndexCharlen(const char *pIn, int nIn){ 5264 int nChar = 0; 5265 int i = 0; 5266 while( i<nIn ){ 5267 if( (unsigned char)pIn[i++]>=0xc0 ){ 5268 while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++; 5269 } 5270 nChar++; 5271 } 5272 return nChar; 5273 } 5274 5275 /* 5276 ** Insert or remove data to or from the index. Each time a document is 5277 ** added to or removed from the index, this function is called one or more 5278 ** times. 5279 ** 5280 ** For an insert, it must be called once for each token in the new document. 5281 ** If the operation is a delete, it must be called (at least) once for each 5282 ** unique token in the document with an iCol value less than zero. The iPos 5283 ** argument is ignored for a delete. 5284 */ 5285 int sqlite3Fts5IndexWrite( 5286 Fts5Index *p, /* Index to write to */ 5287 int iCol, /* Column token appears in (-ve -> delete) */ 5288 int iPos, /* Position of token within column */ 5289 const char *pToken, int nToken /* Token to add or remove to or from index */ 5290 ){ 5291 int i; /* Used to iterate through indexes */ 5292 int rc = SQLITE_OK; /* Return code */ 5293 Fts5Config *pConfig = p->pConfig; 5294 5295 assert( p->rc==SQLITE_OK ); 5296 assert( (iCol<0)==p->bDelete ); 5297 5298 /* Add the entry to the main terms index. */ 5299 rc = sqlite3Fts5HashWrite( 5300 p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken 5301 ); 5302 5303 for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){ 5304 const int nChar = pConfig->aPrefix[i]; 5305 int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); 5306 if( nByte ){ 5307 rc = sqlite3Fts5HashWrite(p->pHash, 5308 p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken, 5309 nByte 5310 ); 5311 } 5312 } 5313 5314 return rc; 5315 } 5316 5317 /* 5318 ** Open a new iterator to iterate though all rowid that match the 5319 ** specified token or token prefix. 5320 */ 5321 int sqlite3Fts5IndexQuery( 5322 Fts5Index *p, /* FTS index to query */ 5323 const char *pToken, int nToken, /* Token (or prefix) to query for */ 5324 int flags, /* Mask of FTS5INDEX_QUERY_X flags */ 5325 Fts5Colset *pColset, /* Match these columns only */ 5326 Fts5IndexIter **ppIter /* OUT: New iterator object */ 5327 ){ 5328 Fts5Config *pConfig = p->pConfig; 5329 Fts5Iter *pRet = 0; 5330 Fts5Buffer buf = {0, 0, 0}; 5331 5332 /* If the QUERY_SCAN flag is set, all other flags must be clear. */ 5333 assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN ); 5334 5335 if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){ 5336 int iIdx = 0; /* Index to search */ 5337 if( nToken ) memcpy(&buf.p[1], pToken, nToken); 5338 5339 /* Figure out which index to search and set iIdx accordingly. If this 5340 ** is a prefix query for which there is no prefix index, set iIdx to 5341 ** greater than pConfig->nPrefix to indicate that the query will be 5342 ** satisfied by scanning multiple terms in the main index. 5343 ** 5344 ** If the QUERY_TEST_NOIDX flag was specified, then this must be a 5345 ** prefix-query. Instead of using a prefix-index (if one exists), 5346 ** evaluate the prefix query using the main FTS index. This is used 5347 ** for internal sanity checking by the integrity-check in debug 5348 ** mode only. */ 5349 #ifdef SQLITE_DEBUG 5350 if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){ 5351 assert( flags & FTS5INDEX_QUERY_PREFIX ); 5352 iIdx = 1+pConfig->nPrefix; 5353 }else 5354 #endif 5355 if( flags & FTS5INDEX_QUERY_PREFIX ){ 5356 int nChar = fts5IndexCharlen(pToken, nToken); 5357 for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ 5358 if( pConfig->aPrefix[iIdx-1]==nChar ) break; 5359 } 5360 } 5361 5362 if( iIdx<=pConfig->nPrefix ){ 5363 /* Straight index lookup */ 5364 Fts5Structure *pStruct = fts5StructureRead(p); 5365 buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx); 5366 if( pStruct ){ 5367 fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY, 5368 pColset, buf.p, nToken+1, -1, 0, &pRet 5369 ); 5370 fts5StructureRelease(pStruct); 5371 } 5372 }else{ 5373 /* Scan multiple terms in the main index */ 5374 int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; 5375 buf.p[0] = FTS5_MAIN_PREFIX; 5376 fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pColset, &pRet); 5377 assert( p->rc!=SQLITE_OK || pRet->pColset==0 ); 5378 fts5IterSetOutputCb(&p->rc, pRet); 5379 if( p->rc==SQLITE_OK ){ 5380 Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst]; 5381 if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg); 5382 } 5383 } 5384 5385 if( p->rc ){ 5386 sqlite3Fts5IterClose((Fts5IndexIter*)pRet); 5387 pRet = 0; 5388 fts5CloseReader(p); 5389 } 5390 5391 *ppIter = &pRet->base; 5392 sqlite3Fts5BufferFree(&buf); 5393 } 5394 return fts5IndexReturn(p); 5395 } 5396 5397 /* 5398 ** Return true if the iterator passed as the only argument is at EOF. 5399 */ 5400 /* 5401 ** Move to the next matching rowid. 5402 */ 5403 int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){ 5404 Fts5Iter *pIter = (Fts5Iter*)pIndexIter; 5405 assert( pIter->pIndex->rc==SQLITE_OK ); 5406 fts5MultiIterNext(pIter->pIndex, pIter, 0, 0); 5407 return fts5IndexReturn(pIter->pIndex); 5408 } 5409 5410 /* 5411 ** Move to the next matching term/rowid. Used by the fts5vocab module. 5412 */ 5413 int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){ 5414 Fts5Iter *pIter = (Fts5Iter*)pIndexIter; 5415 Fts5Index *p = pIter->pIndex; 5416 5417 assert( pIter->pIndex->rc==SQLITE_OK ); 5418 5419 fts5MultiIterNext(p, pIter, 0, 0); 5420 if( p->rc==SQLITE_OK ){ 5421 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; 5422 if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){ 5423 fts5DataRelease(pSeg->pLeaf); 5424 pSeg->pLeaf = 0; 5425 pIter->base.bEof = 1; 5426 } 5427 } 5428 5429 return fts5IndexReturn(pIter->pIndex); 5430 } 5431 5432 /* 5433 ** Move to the next matching rowid that occurs at or after iMatch. The 5434 ** definition of "at or after" depends on whether this iterator iterates 5435 ** in ascending or descending rowid order. 5436 */ 5437 int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){ 5438 Fts5Iter *pIter = (Fts5Iter*)pIndexIter; 5439 fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch); 5440 return fts5IndexReturn(pIter->pIndex); 5441 } 5442 5443 /* 5444 ** Return the current term. 5445 */ 5446 const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ 5447 int n; 5448 const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n); 5449 *pn = n-1; 5450 return &z[1]; 5451 } 5452 5453 /* 5454 ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). 5455 */ 5456 void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){ 5457 if( pIndexIter ){ 5458 Fts5Iter *pIter = (Fts5Iter*)pIndexIter; 5459 Fts5Index *pIndex = pIter->pIndex; 5460 fts5MultiIterFree(pIter); 5461 fts5CloseReader(pIndex); 5462 } 5463 } 5464 5465 /* 5466 ** Read and decode the "averages" record from the database. 5467 ** 5468 ** Parameter anSize must point to an array of size nCol, where nCol is 5469 ** the number of user defined columns in the FTS table. 5470 */ 5471 int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){ 5472 int nCol = p->pConfig->nCol; 5473 Fts5Data *pData; 5474 5475 *pnRow = 0; 5476 memset(anSize, 0, sizeof(i64) * nCol); 5477 pData = fts5DataRead(p, FTS5_AVERAGES_ROWID); 5478 if( p->rc==SQLITE_OK && pData->nn ){ 5479 int i = 0; 5480 int iCol; 5481 i += fts5GetVarint(&pData->p[i], (u64*)pnRow); 5482 for(iCol=0; i<pData->nn && iCol<nCol; iCol++){ 5483 i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]); 5484 } 5485 } 5486 5487 fts5DataRelease(pData); 5488 return fts5IndexReturn(p); 5489 } 5490 5491 /* 5492 ** Replace the current "averages" record with the contents of the buffer 5493 ** supplied as the second argument. 5494 */ 5495 int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){ 5496 assert( p->rc==SQLITE_OK ); 5497 fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData); 5498 return fts5IndexReturn(p); 5499 } 5500 5501 /* 5502 ** Return the total number of blocks this module has read from the %_data 5503 ** table since it was created. 5504 */ 5505 int sqlite3Fts5IndexReads(Fts5Index *p){ 5506 return p->nRead; 5507 } 5508 5509 /* 5510 ** Set the 32-bit cookie value stored at the start of all structure 5511 ** records to the value passed as the second argument. 5512 ** 5513 ** Return SQLITE_OK if successful, or an SQLite error code if an error 5514 ** occurs. 5515 */ 5516 int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ 5517 int rc; /* Return code */ 5518 Fts5Config *pConfig = p->pConfig; /* Configuration object */ 5519 u8 aCookie[4]; /* Binary representation of iNew */ 5520 sqlite3_blob *pBlob = 0; 5521 5522 assert( p->rc==SQLITE_OK ); 5523 sqlite3Fts5Put32(aCookie, iNew); 5524 5525 rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, 5526 "block", FTS5_STRUCTURE_ROWID, 1, &pBlob 5527 ); 5528 if( rc==SQLITE_OK ){ 5529 sqlite3_blob_write(pBlob, aCookie, 4, 0); 5530 rc = sqlite3_blob_close(pBlob); 5531 } 5532 5533 return rc; 5534 } 5535 5536 int sqlite3Fts5IndexLoadConfig(Fts5Index *p){ 5537 Fts5Structure *pStruct; 5538 pStruct = fts5StructureRead(p); 5539 fts5StructureRelease(pStruct); 5540 return fts5IndexReturn(p); 5541 } 5542 5543 5544 /************************************************************************* 5545 ************************************************************************** 5546 ** Below this point is the implementation of the integrity-check 5547 ** functionality. 5548 */ 5549 5550 /* 5551 ** Return a simple checksum value based on the arguments. 5552 */ 5553 u64 sqlite3Fts5IndexEntryCksum( 5554 i64 iRowid, 5555 int iCol, 5556 int iPos, 5557 int iIdx, 5558 const char *pTerm, 5559 int nTerm 5560 ){ 5561 int i; 5562 u64 ret = iRowid; 5563 ret += (ret<<3) + iCol; 5564 ret += (ret<<3) + iPos; 5565 if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx); 5566 for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i]; 5567 return ret; 5568 } 5569 5570 #ifdef SQLITE_DEBUG 5571 /* 5572 ** This function is purely an internal test. It does not contribute to 5573 ** FTS functionality, or even the integrity-check, in any way. 5574 ** 5575 ** Instead, it tests that the same set of pgno/rowid combinations are 5576 ** visited regardless of whether the doclist-index identified by parameters 5577 ** iSegid/iLeaf is iterated in forwards or reverse order. 5578 */ 5579 static void fts5TestDlidxReverse( 5580 Fts5Index *p, 5581 int iSegid, /* Segment id to load from */ 5582 int iLeaf /* Load doclist-index for this leaf */ 5583 ){ 5584 Fts5DlidxIter *pDlidx = 0; 5585 u64 cksum1 = 13; 5586 u64 cksum2 = 13; 5587 5588 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf); 5589 fts5DlidxIterEof(p, pDlidx)==0; 5590 fts5DlidxIterNext(p, pDlidx) 5591 ){ 5592 i64 iRowid = fts5DlidxIterRowid(pDlidx); 5593 int pgno = fts5DlidxIterPgno(pDlidx); 5594 assert( pgno>iLeaf ); 5595 cksum1 += iRowid + ((i64)pgno<<32); 5596 } 5597 fts5DlidxIterFree(pDlidx); 5598 pDlidx = 0; 5599 5600 for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf); 5601 fts5DlidxIterEof(p, pDlidx)==0; 5602 fts5DlidxIterPrev(p, pDlidx) 5603 ){ 5604 i64 iRowid = fts5DlidxIterRowid(pDlidx); 5605 int pgno = fts5DlidxIterPgno(pDlidx); 5606 assert( fts5DlidxIterPgno(pDlidx)>iLeaf ); 5607 cksum2 += iRowid + ((i64)pgno<<32); 5608 } 5609 fts5DlidxIterFree(pDlidx); 5610 pDlidx = 0; 5611 5612 if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; 5613 } 5614 5615 static int fts5QueryCksum( 5616 Fts5Index *p, /* Fts5 index object */ 5617 int iIdx, 5618 const char *z, /* Index key to query for */ 5619 int n, /* Size of index key in bytes */ 5620 int flags, /* Flags for Fts5IndexQuery */ 5621 u64 *pCksum /* IN/OUT: Checksum value */ 5622 ){ 5623 int eDetail = p->pConfig->eDetail; 5624 u64 cksum = *pCksum; 5625 Fts5IndexIter *pIter = 0; 5626 int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter); 5627 5628 while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIter) ){ 5629 i64 rowid = pIter->iRowid; 5630 5631 if( eDetail==FTS5_DETAIL_NONE ){ 5632 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n); 5633 }else{ 5634 Fts5PoslistReader sReader; 5635 for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader); 5636 sReader.bEof==0; 5637 sqlite3Fts5PoslistReaderNext(&sReader) 5638 ){ 5639 int iCol = FTS5_POS2COLUMN(sReader.iPos); 5640 int iOff = FTS5_POS2OFFSET(sReader.iPos); 5641 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); 5642 } 5643 } 5644 if( rc==SQLITE_OK ){ 5645 rc = sqlite3Fts5IterNext(pIter); 5646 } 5647 } 5648 sqlite3Fts5IterClose(pIter); 5649 5650 *pCksum = cksum; 5651 return rc; 5652 } 5653 5654 5655 /* 5656 ** This function is also purely an internal test. It does not contribute to 5657 ** FTS functionality, or even the integrity-check, in any way. 5658 */ 5659 static void fts5TestTerm( 5660 Fts5Index *p, 5661 Fts5Buffer *pPrev, /* Previous term */ 5662 const char *z, int n, /* Possibly new term to test */ 5663 u64 expected, 5664 u64 *pCksum 5665 ){ 5666 int rc = p->rc; 5667 if( pPrev->n==0 ){ 5668 fts5BufferSet(&rc, pPrev, n, (const u8*)z); 5669 }else 5670 if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){ 5671 u64 cksum3 = *pCksum; 5672 const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */ 5673 int nTerm = pPrev->n-1; /* Size of zTerm in bytes */ 5674 int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX); 5675 int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX); 5676 u64 ck1 = 0; 5677 u64 ck2 = 0; 5678 5679 /* Check that the results returned for ASC and DESC queries are 5680 ** the same. If not, call this corruption. */ 5681 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1); 5682 if( rc==SQLITE_OK ){ 5683 int f = flags|FTS5INDEX_QUERY_DESC; 5684 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); 5685 } 5686 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; 5687 5688 /* If this is a prefix query, check that the results returned if the 5689 ** the index is disabled are the same. In both ASC and DESC order. 5690 ** 5691 ** This check may only be performed if the hash table is empty. This 5692 ** is because the hash table only supports a single scan query at 5693 ** a time, and the multi-iter loop from which this function is called 5694 ** is already performing such a scan. */ 5695 if( p->nPendingData==0 ){ 5696 if( iIdx>0 && rc==SQLITE_OK ){ 5697 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; 5698 ck2 = 0; 5699 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); 5700 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; 5701 } 5702 if( iIdx>0 && rc==SQLITE_OK ){ 5703 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; 5704 ck2 = 0; 5705 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); 5706 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; 5707 } 5708 } 5709 5710 cksum3 ^= ck1; 5711 fts5BufferSet(&rc, pPrev, n, (const u8*)z); 5712 5713 if( rc==SQLITE_OK && cksum3!=expected ){ 5714 rc = FTS5_CORRUPT; 5715 } 5716 *pCksum = cksum3; 5717 } 5718 p->rc = rc; 5719 } 5720 5721 #else 5722 # define fts5TestDlidxReverse(x,y,z) 5723 # define fts5TestTerm(u,v,w,x,y,z) 5724 #endif 5725 5726 /* 5727 ** Check that: 5728 ** 5729 ** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and 5730 ** contain zero terms. 5731 ** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and 5732 ** contain zero rowids. 5733 */ 5734 static void fts5IndexIntegrityCheckEmpty( 5735 Fts5Index *p, 5736 Fts5StructureSegment *pSeg, /* Segment to check internal consistency */ 5737 int iFirst, 5738 int iNoRowid, 5739 int iLast 5740 ){ 5741 int i; 5742 5743 /* Now check that the iter.nEmpty leaves following the current leaf 5744 ** (a) exist and (b) contain no terms. */ 5745 for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){ 5746 Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i)); 5747 if( pLeaf ){ 5748 if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT; 5749 if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT; 5750 } 5751 fts5DataRelease(pLeaf); 5752 } 5753 } 5754 5755 static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){ 5756 int iTermOff = 0; 5757 int ii; 5758 5759 Fts5Buffer buf1 = {0,0,0}; 5760 Fts5Buffer buf2 = {0,0,0}; 5761 5762 ii = pLeaf->szLeaf; 5763 while( ii<pLeaf->nn && p->rc==SQLITE_OK ){ 5764 int res; 5765 int iOff; 5766 int nIncr; 5767 5768 ii += fts5GetVarint32(&pLeaf->p[ii], nIncr); 5769 iTermOff += nIncr; 5770 iOff = iTermOff; 5771 5772 if( iOff>=pLeaf->szLeaf ){ 5773 p->rc = FTS5_CORRUPT; 5774 }else if( iTermOff==nIncr ){ 5775 int nByte; 5776 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); 5777 if( (iOff+nByte)>pLeaf->szLeaf ){ 5778 p->rc = FTS5_CORRUPT; 5779 }else{ 5780 fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]); 5781 } 5782 }else{ 5783 int nKeep, nByte; 5784 iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep); 5785 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); 5786 if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){ 5787 p->rc = FTS5_CORRUPT; 5788 }else{ 5789 buf1.n = nKeep; 5790 fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]); 5791 } 5792 5793 if( p->rc==SQLITE_OK ){ 5794 res = fts5BufferCompare(&buf1, &buf2); 5795 if( res<=0 ) p->rc = FTS5_CORRUPT; 5796 } 5797 } 5798 fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p); 5799 } 5800 5801 fts5BufferFree(&buf1); 5802 fts5BufferFree(&buf2); 5803 } 5804 5805 static void fts5IndexIntegrityCheckSegment( 5806 Fts5Index *p, /* FTS5 backend object */ 5807 Fts5StructureSegment *pSeg /* Segment to check internal consistency */ 5808 ){ 5809 Fts5Config *pConfig = p->pConfig; 5810 sqlite3_stmt *pStmt = 0; 5811 int rc2; 5812 int iIdxPrevLeaf = pSeg->pgnoFirst-1; 5813 int iDlidxPrevLeaf = pSeg->pgnoLast; 5814 5815 if( pSeg->pgnoFirst==0 ) return; 5816 5817 fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf( 5818 "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d", 5819 pConfig->zDb, pConfig->zName, pSeg->iSegid 5820 )); 5821 5822 /* Iterate through the b-tree hierarchy. */ 5823 while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ 5824 i64 iRow; /* Rowid for this leaf */ 5825 Fts5Data *pLeaf; /* Data for this leaf */ 5826 5827 int nIdxTerm = sqlite3_column_bytes(pStmt, 1); 5828 const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1); 5829 int iIdxLeaf = sqlite3_column_int(pStmt, 2); 5830 int bIdxDlidx = sqlite3_column_int(pStmt, 3); 5831 5832 /* If the leaf in question has already been trimmed from the segment, 5833 ** ignore this b-tree entry. Otherwise, load it into memory. */ 5834 if( iIdxLeaf<pSeg->pgnoFirst ) continue; 5835 iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf); 5836 pLeaf = fts5LeafRead(p, iRow); 5837 if( pLeaf==0 ) break; 5838 5839 /* Check that the leaf contains at least one term, and that it is equal 5840 ** to or larger than the split-key in zIdxTerm. Also check that if there 5841 ** is also a rowid pointer within the leaf page header, it points to a 5842 ** location before the term. */ 5843 if( pLeaf->nn<=pLeaf->szLeaf ){ 5844 p->rc = FTS5_CORRUPT; 5845 }else{ 5846 int iOff; /* Offset of first term on leaf */ 5847 int iRowidOff; /* Offset of first rowid on leaf */ 5848 int nTerm; /* Size of term on leaf in bytes */ 5849 int res; /* Comparison of term and split-key */ 5850 5851 iOff = fts5LeafFirstTermOff(pLeaf); 5852 iRowidOff = fts5LeafFirstRowidOff(pLeaf); 5853 if( iRowidOff>=iOff ){ 5854 p->rc = FTS5_CORRUPT; 5855 }else{ 5856 iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm); 5857 res = memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm)); 5858 if( res==0 ) res = nTerm - nIdxTerm; 5859 if( res<0 ) p->rc = FTS5_CORRUPT; 5860 } 5861 5862 fts5IntegrityCheckPgidx(p, pLeaf); 5863 } 5864 fts5DataRelease(pLeaf); 5865 if( p->rc ) break; 5866 5867 /* Now check that the iter.nEmpty leaves following the current leaf 5868 ** (a) exist and (b) contain no terms. */ 5869 fts5IndexIntegrityCheckEmpty( 5870 p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1 5871 ); 5872 if( p->rc ) break; 5873 5874 /* If there is a doclist-index, check that it looks right. */ 5875 if( bIdxDlidx ){ 5876 Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */ 5877 int iPrevLeaf = iIdxLeaf; 5878 int iSegid = pSeg->iSegid; 5879 int iPg = 0; 5880 i64 iKey; 5881 5882 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf); 5883 fts5DlidxIterEof(p, pDlidx)==0; 5884 fts5DlidxIterNext(p, pDlidx) 5885 ){ 5886 5887 /* Check any rowid-less pages that occur before the current leaf. */ 5888 for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){ 5889 iKey = FTS5_SEGMENT_ROWID(iSegid, iPg); 5890 pLeaf = fts5DataRead(p, iKey); 5891 if( pLeaf ){ 5892 if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT; 5893 fts5DataRelease(pLeaf); 5894 } 5895 } 5896 iPrevLeaf = fts5DlidxIterPgno(pDlidx); 5897 5898 /* Check that the leaf page indicated by the iterator really does 5899 ** contain the rowid suggested by the same. */ 5900 iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf); 5901 pLeaf = fts5DataRead(p, iKey); 5902 if( pLeaf ){ 5903 i64 iRowid; 5904 int iRowidOff = fts5LeafFirstRowidOff(pLeaf); 5905 ASSERT_SZLEAF_OK(pLeaf); 5906 if( iRowidOff>=pLeaf->szLeaf ){ 5907 p->rc = FTS5_CORRUPT; 5908 }else{ 5909 fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); 5910 if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT; 5911 } 5912 fts5DataRelease(pLeaf); 5913 } 5914 } 5915 5916 iDlidxPrevLeaf = iPg; 5917 fts5DlidxIterFree(pDlidx); 5918 fts5TestDlidxReverse(p, iSegid, iIdxLeaf); 5919 }else{ 5920 iDlidxPrevLeaf = pSeg->pgnoLast; 5921 /* TODO: Check there is no doclist index */ 5922 } 5923 5924 iIdxPrevLeaf = iIdxLeaf; 5925 } 5926 5927 rc2 = sqlite3_finalize(pStmt); 5928 if( p->rc==SQLITE_OK ) p->rc = rc2; 5929 5930 /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */ 5931 #if 0 5932 if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ 5933 p->rc = FTS5_CORRUPT; 5934 } 5935 #endif 5936 } 5937 5938 5939 /* 5940 ** Run internal checks to ensure that the FTS index (a) is internally 5941 ** consistent and (b) contains entries for which the XOR of the checksums 5942 ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum. 5943 ** 5944 ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the 5945 ** checksum does not match. Return SQLITE_OK if all checks pass without 5946 ** error, or some other SQLite error code if another error (e.g. OOM) 5947 ** occurs. 5948 */ 5949 int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ 5950 int eDetail = p->pConfig->eDetail; 5951 u64 cksum2 = 0; /* Checksum based on contents of indexes */ 5952 Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */ 5953 Fts5Iter *pIter; /* Used to iterate through entire index */ 5954 Fts5Structure *pStruct; /* Index structure */ 5955 5956 #ifdef SQLITE_DEBUG 5957 /* Used by extra internal tests only run if NDEBUG is not defined */ 5958 u64 cksum3 = 0; /* Checksum based on contents of indexes */ 5959 Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ 5960 #endif 5961 const int flags = FTS5INDEX_QUERY_NOOUTPUT; 5962 5963 /* Load the FTS index structure */ 5964 pStruct = fts5StructureRead(p); 5965 5966 /* Check that the internal nodes of each segment match the leaves */ 5967 if( pStruct ){ 5968 int iLvl, iSeg; 5969 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ 5970 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ 5971 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; 5972 fts5IndexIntegrityCheckSegment(p, pSeg); 5973 } 5974 } 5975 } 5976 5977 /* The cksum argument passed to this function is a checksum calculated 5978 ** based on all expected entries in the FTS index (including prefix index 5979 ** entries). This block checks that a checksum calculated based on the 5980 ** actual contents of FTS index is identical. 5981 ** 5982 ** Two versions of the same checksum are calculated. The first (stack 5983 ** variable cksum2) based on entries extracted from the full-text index 5984 ** while doing a linear scan of each individual index in turn. 5985 ** 5986 ** As each term visited by the linear scans, a separate query for the 5987 ** same term is performed. cksum3 is calculated based on the entries 5988 ** extracted by these queries. 5989 */ 5990 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter); 5991 fts5MultiIterEof(p, pIter)==0; 5992 fts5MultiIterNext(p, pIter, 0, 0) 5993 ){ 5994 int n; /* Size of term in bytes */ 5995 i64 iPos = 0; /* Position read from poslist */ 5996 int iOff = 0; /* Offset within poslist */ 5997 i64 iRowid = fts5MultiIterRowid(pIter); 5998 char *z = (char*)fts5MultiIterTerm(pIter, &n); 5999 6000 /* If this is a new term, query for it. Update cksum3 with the results. */ 6001 fts5TestTerm(p, &term, z, n, cksum2, &cksum3); 6002 6003 if( eDetail==FTS5_DETAIL_NONE ){ 6004 if( 0==fts5MultiIterIsEmpty(p, pIter) ){ 6005 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n); 6006 } 6007 }else{ 6008 poslist.n = 0; 6009 fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist); 6010 while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ 6011 int iCol = FTS5_POS2COLUMN(iPos); 6012 int iTokOff = FTS5_POS2OFFSET(iPos); 6013 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n); 6014 } 6015 } 6016 } 6017 fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3); 6018 6019 fts5MultiIterFree(pIter); 6020 if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT; 6021 6022 fts5StructureRelease(pStruct); 6023 #ifdef SQLITE_DEBUG 6024 fts5BufferFree(&term); 6025 #endif 6026 fts5BufferFree(&poslist); 6027 return fts5IndexReturn(p); 6028 } 6029 6030 /************************************************************************* 6031 ************************************************************************** 6032 ** Below this point is the implementation of the fts5_decode() scalar 6033 ** function only. 6034 */ 6035 6036 /* 6037 ** Decode a segment-data rowid from the %_data table. This function is 6038 ** the opposite of macro FTS5_SEGMENT_ROWID(). 6039 */ 6040 static void fts5DecodeRowid( 6041 i64 iRowid, /* Rowid from %_data table */ 6042 int *piSegid, /* OUT: Segment id */ 6043 int *pbDlidx, /* OUT: Dlidx flag */ 6044 int *piHeight, /* OUT: Height */ 6045 int *piPgno /* OUT: Page number */ 6046 ){ 6047 *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1)); 6048 iRowid >>= FTS5_DATA_PAGE_B; 6049 6050 *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); 6051 iRowid >>= FTS5_DATA_HEIGHT_B; 6052 6053 *pbDlidx = (int)(iRowid & 0x0001); 6054 iRowid >>= FTS5_DATA_DLI_B; 6055 6056 *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); 6057 } 6058 6059 static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ 6060 int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */ 6061 fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno); 6062 6063 if( iSegid==0 ){ 6064 if( iKey==FTS5_AVERAGES_ROWID ){ 6065 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} "); 6066 }else{ 6067 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}"); 6068 } 6069 } 6070 else{ 6071 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}", 6072 bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno 6073 ); 6074 } 6075 } 6076 6077 static void fts5DebugStructure( 6078 int *pRc, /* IN/OUT: error code */ 6079 Fts5Buffer *pBuf, 6080 Fts5Structure *p 6081 ){ 6082 int iLvl, iSeg; /* Iterate through levels, segments */ 6083 6084 for(iLvl=0; iLvl<p->nLevel; iLvl++){ 6085 Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; 6086 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, 6087 " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg 6088 ); 6089 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ 6090 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; 6091 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}", 6092 pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast 6093 ); 6094 } 6095 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); 6096 } 6097 } 6098 6099 /* 6100 ** This is part of the fts5_decode() debugging aid. 6101 ** 6102 ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This 6103 ** function appends a human-readable representation of the same object 6104 ** to the buffer passed as the second argument. 6105 */ 6106 static void fts5DecodeStructure( 6107 int *pRc, /* IN/OUT: error code */ 6108 Fts5Buffer *pBuf, 6109 const u8 *pBlob, int nBlob 6110 ){ 6111 int rc; /* Return code */ 6112 Fts5Structure *p = 0; /* Decoded structure object */ 6113 6114 rc = fts5StructureDecode(pBlob, nBlob, 0, &p); 6115 if( rc!=SQLITE_OK ){ 6116 *pRc = rc; 6117 return; 6118 } 6119 6120 fts5DebugStructure(pRc, pBuf, p); 6121 fts5StructureRelease(p); 6122 } 6123 6124 /* 6125 ** This is part of the fts5_decode() debugging aid. 6126 ** 6127 ** Arguments pBlob/nBlob contain an "averages" record. This function 6128 ** appends a human-readable representation of record to the buffer passed 6129 ** as the second argument. 6130 */ 6131 static void fts5DecodeAverages( 6132 int *pRc, /* IN/OUT: error code */ 6133 Fts5Buffer *pBuf, 6134 const u8 *pBlob, int nBlob 6135 ){ 6136 int i = 0; 6137 const char *zSpace = ""; 6138 6139 while( i<nBlob ){ 6140 u64 iVal; 6141 i += sqlite3Fts5GetVarint(&pBlob[i], &iVal); 6142 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal); 6143 zSpace = " "; 6144 } 6145 } 6146 6147 /* 6148 ** Buffer (a/n) is assumed to contain a list of serialized varints. Read 6149 ** each varint and append its string representation to buffer pBuf. Return 6150 ** after either the input buffer is exhausted or a 0 value is read. 6151 ** 6152 ** The return value is the number of bytes read from the input buffer. 6153 */ 6154 static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ 6155 int iOff = 0; 6156 while( iOff<n ){ 6157 int iVal; 6158 iOff += fts5GetVarint32(&a[iOff], iVal); 6159 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal); 6160 } 6161 return iOff; 6162 } 6163 6164 /* 6165 ** The start of buffer (a/n) contains the start of a doclist. The doclist 6166 ** may or may not finish within the buffer. This function appends a text 6167 ** representation of the part of the doclist that is present to buffer 6168 ** pBuf. 6169 ** 6170 ** The return value is the number of bytes read from the input buffer. 6171 */ 6172 static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ 6173 i64 iDocid = 0; 6174 int iOff = 0; 6175 6176 if( n>0 ){ 6177 iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid); 6178 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid); 6179 } 6180 while( iOff<n ){ 6181 int nPos; 6182 int bDel; 6183 iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel); 6184 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":""); 6185 iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos)); 6186 if( iOff<n ){ 6187 i64 iDelta; 6188 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta); 6189 iDocid += iDelta; 6190 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid); 6191 } 6192 } 6193 6194 return iOff; 6195 } 6196 6197 /* 6198 ** This function is part of the fts5_decode() debugging function. It is 6199 ** only ever used with detail=none tables. 6200 ** 6201 ** Buffer (pData/nData) contains a doclist in the format used by detail=none 6202 ** tables. This function appends a human-readable version of that list to 6203 ** buffer pBuf. 6204 ** 6205 ** If *pRc is other than SQLITE_OK when this function is called, it is a 6206 ** no-op. If an OOM or other error occurs within this function, *pRc is 6207 ** set to an SQLite error code before returning. The final state of buffer 6208 ** pBuf is undefined in this case. 6209 */ 6210 static void fts5DecodeRowidList( 6211 int *pRc, /* IN/OUT: Error code */ 6212 Fts5Buffer *pBuf, /* Buffer to append text to */ 6213 const u8 *pData, int nData /* Data to decode list-of-rowids from */ 6214 ){ 6215 int i = 0; 6216 i64 iRowid = 0; 6217 6218 while( i<nData ){ 6219 const char *zApp = ""; 6220 u64 iVal; 6221 i += sqlite3Fts5GetVarint(&pData[i], &iVal); 6222 iRowid += iVal; 6223 6224 if( i<nData && pData[i]==0x00 ){ 6225 i++; 6226 if( i<nData && pData[i]==0x00 ){ 6227 i++; 6228 zApp = "+"; 6229 }else{ 6230 zApp = "*"; 6231 } 6232 } 6233 6234 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp); 6235 } 6236 } 6237 6238 /* 6239 ** The implementation of user-defined scalar function fts5_decode(). 6240 */ 6241 static void fts5DecodeFunction( 6242 sqlite3_context *pCtx, /* Function call context */ 6243 int nArg, /* Number of args (always 2) */ 6244 sqlite3_value **apVal /* Function arguments */ 6245 ){ 6246 i64 iRowid; /* Rowid for record being decoded */ 6247 int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */ 6248 const u8 *aBlob; int n; /* Record to decode */ 6249 u8 *a = 0; 6250 Fts5Buffer s; /* Build up text to return here */ 6251 int rc = SQLITE_OK; /* Return code */ 6252 int nSpace = 0; 6253 int eDetailNone = (sqlite3_user_data(pCtx)!=0); 6254 6255 assert( nArg==2 ); 6256 UNUSED_PARAM(nArg); 6257 memset(&s, 0, sizeof(Fts5Buffer)); 6258 iRowid = sqlite3_value_int64(apVal[0]); 6259 6260 /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[] 6261 ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents 6262 ** buffer overreads even if the record is corrupt. */ 6263 n = sqlite3_value_bytes(apVal[1]); 6264 aBlob = sqlite3_value_blob(apVal[1]); 6265 nSpace = n + FTS5_DATA_ZERO_PADDING; 6266 a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); 6267 if( a==0 ) goto decode_out; 6268 memcpy(a, aBlob, n); 6269 6270 6271 fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno); 6272 6273 fts5DebugRowid(&rc, &s, iRowid); 6274 if( bDlidx ){ 6275 Fts5Data dlidx; 6276 Fts5DlidxLvl lvl; 6277 6278 dlidx.p = a; 6279 dlidx.nn = n; 6280 6281 memset(&lvl, 0, sizeof(Fts5DlidxLvl)); 6282 lvl.pData = &dlidx; 6283 lvl.iLeafPgno = iPgno; 6284 6285 for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){ 6286 sqlite3Fts5BufferAppendPrintf(&rc, &s, 6287 " %d(%lld)", lvl.iLeafPgno, lvl.iRowid 6288 ); 6289 } 6290 }else if( iSegid==0 ){ 6291 if( iRowid==FTS5_AVERAGES_ROWID ){ 6292 fts5DecodeAverages(&rc, &s, a, n); 6293 }else{ 6294 fts5DecodeStructure(&rc, &s, a, n); 6295 } 6296 }else if( eDetailNone ){ 6297 Fts5Buffer term; /* Current term read from page */ 6298 int szLeaf; 6299 int iPgidxOff = szLeaf = fts5GetU16(&a[2]); 6300 int iTermOff; 6301 int nKeep = 0; 6302 int iOff; 6303 6304 memset(&term, 0, sizeof(Fts5Buffer)); 6305 6306 /* Decode any entries that occur before the first term. */ 6307 if( szLeaf<n ){ 6308 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff); 6309 }else{ 6310 iTermOff = szLeaf; 6311 } 6312 fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4); 6313 6314 iOff = iTermOff; 6315 while( iOff<szLeaf ){ 6316 int nAppend; 6317 6318 /* Read the term data for the next term*/ 6319 iOff += fts5GetVarint32(&a[iOff], nAppend); 6320 term.n = nKeep; 6321 fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]); 6322 sqlite3Fts5BufferAppendPrintf( 6323 &rc, &s, " term=%.*s", term.n, (const char*)term.p 6324 ); 6325 iOff += nAppend; 6326 6327 /* Figure out where the doclist for this term ends */ 6328 if( iPgidxOff<n ){ 6329 int nIncr; 6330 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr); 6331 iTermOff += nIncr; 6332 }else{ 6333 iTermOff = szLeaf; 6334 } 6335 6336 fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff); 6337 iOff = iTermOff; 6338 if( iOff<szLeaf ){ 6339 iOff += fts5GetVarint32(&a[iOff], nKeep); 6340 } 6341 } 6342 6343 fts5BufferFree(&term); 6344 }else{ 6345 Fts5Buffer term; /* Current term read from page */ 6346 int szLeaf; /* Offset of pgidx in a[] */ 6347 int iPgidxOff; 6348 int iPgidxPrev = 0; /* Previous value read from pgidx */ 6349 int iTermOff = 0; 6350 int iRowidOff = 0; 6351 int iOff; 6352 int nDoclist; 6353 6354 memset(&term, 0, sizeof(Fts5Buffer)); 6355 6356 if( n<4 ){ 6357 sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt"); 6358 goto decode_out; 6359 }else{ 6360 iRowidOff = fts5GetU16(&a[0]); 6361 iPgidxOff = szLeaf = fts5GetU16(&a[2]); 6362 if( iPgidxOff<n ){ 6363 fts5GetVarint32(&a[iPgidxOff], iTermOff); 6364 } 6365 } 6366 6367 /* Decode the position list tail at the start of the page */ 6368 if( iRowidOff!=0 ){ 6369 iOff = iRowidOff; 6370 }else if( iTermOff!=0 ){ 6371 iOff = iTermOff; 6372 }else{ 6373 iOff = szLeaf; 6374 } 6375 fts5DecodePoslist(&rc, &s, &a[4], iOff-4); 6376 6377 /* Decode any more doclist data that appears on the page before the 6378 ** first term. */ 6379 nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff; 6380 fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist); 6381 6382 while( iPgidxOff<n ){ 6383 int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */ 6384 int nByte; /* Bytes of data */ 6385 int iEnd; 6386 6387 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte); 6388 iPgidxPrev += nByte; 6389 iOff = iPgidxPrev; 6390 6391 if( iPgidxOff<n ){ 6392 fts5GetVarint32(&a[iPgidxOff], nByte); 6393 iEnd = iPgidxPrev + nByte; 6394 }else{ 6395 iEnd = szLeaf; 6396 } 6397 6398 if( bFirst==0 ){ 6399 iOff += fts5GetVarint32(&a[iOff], nByte); 6400 term.n = nByte; 6401 } 6402 iOff += fts5GetVarint32(&a[iOff], nByte); 6403 fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]); 6404 iOff += nByte; 6405 6406 sqlite3Fts5BufferAppendPrintf( 6407 &rc, &s, " term=%.*s", term.n, (const char*)term.p 6408 ); 6409 iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff); 6410 } 6411 6412 fts5BufferFree(&term); 6413 } 6414 6415 decode_out: 6416 sqlite3_free(a); 6417 if( rc==SQLITE_OK ){ 6418 sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT); 6419 }else{ 6420 sqlite3_result_error_code(pCtx, rc); 6421 } 6422 fts5BufferFree(&s); 6423 } 6424 6425 /* 6426 ** The implementation of user-defined scalar function fts5_rowid(). 6427 */ 6428 static void fts5RowidFunction( 6429 sqlite3_context *pCtx, /* Function call context */ 6430 int nArg, /* Number of args (always 2) */ 6431 sqlite3_value **apVal /* Function arguments */ 6432 ){ 6433 const char *zArg; 6434 if( nArg==0 ){ 6435 sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1); 6436 }else{ 6437 zArg = (const char*)sqlite3_value_text(apVal[0]); 6438 if( 0==sqlite3_stricmp(zArg, "segment") ){ 6439 i64 iRowid; 6440 int segid, pgno; 6441 if( nArg!=3 ){ 6442 sqlite3_result_error(pCtx, 6443 "should be: fts5_rowid('segment', segid, pgno))", -1 6444 ); 6445 }else{ 6446 segid = sqlite3_value_int(apVal[1]); 6447 pgno = sqlite3_value_int(apVal[2]); 6448 iRowid = FTS5_SEGMENT_ROWID(segid, pgno); 6449 sqlite3_result_int64(pCtx, iRowid); 6450 } 6451 }else{ 6452 sqlite3_result_error(pCtx, 6453 "first arg to fts5_rowid() must be 'segment'" , -1 6454 ); 6455 } 6456 } 6457 } 6458 6459 /* 6460 ** This is called as part of registering the FTS5 module with database 6461 ** connection db. It registers several user-defined scalar functions useful 6462 ** with FTS5. 6463 ** 6464 ** If successful, SQLITE_OK is returned. If an error occurs, some other 6465 ** SQLite error code is returned instead. 6466 */ 6467 int sqlite3Fts5IndexInit(sqlite3 *db){ 6468 int rc = sqlite3_create_function( 6469 db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0 6470 ); 6471 6472 if( rc==SQLITE_OK ){ 6473 rc = sqlite3_create_function( 6474 db, "fts5_decode_none", 2, 6475 SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0 6476 ); 6477 } 6478 6479 if( rc==SQLITE_OK ){ 6480 rc = sqlite3_create_function( 6481 db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0 6482 ); 6483 } 6484 return rc; 6485 } 6486 6487 6488 int sqlite3Fts5IndexReset(Fts5Index *p){ 6489 assert( p->pStruct==0 || p->iStructVersion!=0 ); 6490 if( fts5IndexDataVersion(p)!=p->iStructVersion ){ 6491 fts5StructureInvalidate(p); 6492 } 6493 return fts5IndexReturn(p); 6494 }