modernc.org/cc@v1.0.1/v2/testdata/_sqlite/ext/fts5/fts5_hash.c (about) 1 /* 2 ** 2014 August 11 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ****************************************************************************** 12 ** 13 */ 14 15 16 17 #include "fts5Int.h" 18 19 typedef struct Fts5HashEntry Fts5HashEntry; 20 21 /* 22 ** This file contains the implementation of an in-memory hash table used 23 ** to accumuluate "term -> doclist" content before it is flused to a level-0 24 ** segment. 25 */ 26 27 28 struct Fts5Hash { 29 int eDetail; /* Copy of Fts5Config.eDetail */ 30 int *pnByte; /* Pointer to bytes counter */ 31 int nEntry; /* Number of entries currently in hash */ 32 int nSlot; /* Size of aSlot[] array */ 33 Fts5HashEntry *pScan; /* Current ordered scan item */ 34 Fts5HashEntry **aSlot; /* Array of hash slots */ 35 }; 36 37 /* 38 ** Each entry in the hash table is represented by an object of the 39 ** following type. Each object, its key (a nul-terminated string) and 40 ** its current data are stored in a single memory allocation. The 41 ** key immediately follows the object in memory. The position list 42 ** data immediately follows the key data in memory. 43 ** 44 ** The data that follows the key is in a similar, but not identical format 45 ** to the doclist data stored in the database. It is: 46 ** 47 ** * Rowid, as a varint 48 ** * Position list, without 0x00 terminator. 49 ** * Size of previous position list and rowid, as a 4 byte 50 ** big-endian integer. 51 ** 52 ** iRowidOff: 53 ** Offset of last rowid written to data area. Relative to first byte of 54 ** structure. 55 ** 56 ** nData: 57 ** Bytes of data written since iRowidOff. 58 */ 59 struct Fts5HashEntry { 60 Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */ 61 Fts5HashEntry *pScanNext; /* Next entry in sorted order */ 62 63 int nAlloc; /* Total size of allocation */ 64 int iSzPoslist; /* Offset of space for 4-byte poslist size */ 65 int nData; /* Total bytes of data (incl. structure) */ 66 int nKey; /* Length of key in bytes */ 67 u8 bDel; /* Set delete-flag @ iSzPoslist */ 68 u8 bContent; /* Set content-flag (detail=none mode) */ 69 i16 iCol; /* Column of last value written */ 70 int iPos; /* Position of last value written */ 71 i64 iRowid; /* Rowid of last value written */ 72 }; 73 74 /* 75 ** Eqivalent to: 76 ** 77 ** char *fts5EntryKey(Fts5HashEntry *pEntry){ return zKey; } 78 */ 79 #define fts5EntryKey(p) ( ((char *)(&(p)[1])) ) 80 81 82 /* 83 ** Allocate a new hash table. 84 */ 85 int sqlite3Fts5HashNew(Fts5Config *pConfig, Fts5Hash **ppNew, int *pnByte){ 86 int rc = SQLITE_OK; 87 Fts5Hash *pNew; 88 89 *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash)); 90 if( pNew==0 ){ 91 rc = SQLITE_NOMEM; 92 }else{ 93 int nByte; 94 memset(pNew, 0, sizeof(Fts5Hash)); 95 pNew->pnByte = pnByte; 96 pNew->eDetail = pConfig->eDetail; 97 98 pNew->nSlot = 1024; 99 nByte = sizeof(Fts5HashEntry*) * pNew->nSlot; 100 pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc(nByte); 101 if( pNew->aSlot==0 ){ 102 sqlite3_free(pNew); 103 *ppNew = 0; 104 rc = SQLITE_NOMEM; 105 }else{ 106 memset(pNew->aSlot, 0, nByte); 107 } 108 } 109 return rc; 110 } 111 112 /* 113 ** Free a hash table object. 114 */ 115 void sqlite3Fts5HashFree(Fts5Hash *pHash){ 116 if( pHash ){ 117 sqlite3Fts5HashClear(pHash); 118 sqlite3_free(pHash->aSlot); 119 sqlite3_free(pHash); 120 } 121 } 122 123 /* 124 ** Empty (but do not delete) a hash table. 125 */ 126 void sqlite3Fts5HashClear(Fts5Hash *pHash){ 127 int i; 128 for(i=0; i<pHash->nSlot; i++){ 129 Fts5HashEntry *pNext; 130 Fts5HashEntry *pSlot; 131 for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){ 132 pNext = pSlot->pHashNext; 133 sqlite3_free(pSlot); 134 } 135 } 136 memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*)); 137 pHash->nEntry = 0; 138 } 139 140 static unsigned int fts5HashKey(int nSlot, const u8 *p, int n){ 141 int i; 142 unsigned int h = 13; 143 for(i=n-1; i>=0; i--){ 144 h = (h << 3) ^ h ^ p[i]; 145 } 146 return (h % nSlot); 147 } 148 149 static unsigned int fts5HashKey2(int nSlot, u8 b, const u8 *p, int n){ 150 int i; 151 unsigned int h = 13; 152 for(i=n-1; i>=0; i--){ 153 h = (h << 3) ^ h ^ p[i]; 154 } 155 h = (h << 3) ^ h ^ b; 156 return (h % nSlot); 157 } 158 159 /* 160 ** Resize the hash table by doubling the number of slots. 161 */ 162 static int fts5HashResize(Fts5Hash *pHash){ 163 int nNew = pHash->nSlot*2; 164 int i; 165 Fts5HashEntry **apNew; 166 Fts5HashEntry **apOld = pHash->aSlot; 167 168 apNew = (Fts5HashEntry**)sqlite3_malloc(nNew*sizeof(Fts5HashEntry*)); 169 if( !apNew ) return SQLITE_NOMEM; 170 memset(apNew, 0, nNew*sizeof(Fts5HashEntry*)); 171 172 for(i=0; i<pHash->nSlot; i++){ 173 while( apOld[i] ){ 174 unsigned int iHash; 175 Fts5HashEntry *p = apOld[i]; 176 apOld[i] = p->pHashNext; 177 iHash = fts5HashKey(nNew, (u8*)fts5EntryKey(p), 178 (int)strlen(fts5EntryKey(p))); 179 p->pHashNext = apNew[iHash]; 180 apNew[iHash] = p; 181 } 182 } 183 184 sqlite3_free(apOld); 185 pHash->nSlot = nNew; 186 pHash->aSlot = apNew; 187 return SQLITE_OK; 188 } 189 190 static void fts5HashAddPoslistSize(Fts5Hash *pHash, Fts5HashEntry *p){ 191 if( p->iSzPoslist ){ 192 u8 *pPtr = (u8*)p; 193 if( pHash->eDetail==FTS5_DETAIL_NONE ){ 194 assert( p->nData==p->iSzPoslist ); 195 if( p->bDel ){ 196 pPtr[p->nData++] = 0x00; 197 if( p->bContent ){ 198 pPtr[p->nData++] = 0x00; 199 } 200 } 201 }else{ 202 int nSz = (p->nData - p->iSzPoslist - 1); /* Size in bytes */ 203 int nPos = nSz*2 + p->bDel; /* Value of nPos field */ 204 205 assert( p->bDel==0 || p->bDel==1 ); 206 if( nPos<=127 ){ 207 pPtr[p->iSzPoslist] = (u8)nPos; 208 }else{ 209 int nByte = sqlite3Fts5GetVarintLen((u32)nPos); 210 memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz); 211 sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos); 212 p->nData += (nByte-1); 213 } 214 } 215 216 p->iSzPoslist = 0; 217 p->bDel = 0; 218 p->bContent = 0; 219 } 220 } 221 222 /* 223 ** Add an entry to the in-memory hash table. The key is the concatenation 224 ** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos). 225 ** 226 ** (bByte || pToken) -> (iRowid,iCol,iPos) 227 ** 228 ** Or, if iCol is negative, then the value is a delete marker. 229 */ 230 int sqlite3Fts5HashWrite( 231 Fts5Hash *pHash, 232 i64 iRowid, /* Rowid for this entry */ 233 int iCol, /* Column token appears in (-ve -> delete) */ 234 int iPos, /* Position of token within column */ 235 char bByte, /* First byte of token */ 236 const char *pToken, int nToken /* Token to add or remove to or from index */ 237 ){ 238 unsigned int iHash; 239 Fts5HashEntry *p; 240 u8 *pPtr; 241 int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ 242 int bNew; /* If non-delete entry should be written */ 243 244 bNew = (pHash->eDetail==FTS5_DETAIL_FULL); 245 246 /* Attempt to locate an existing hash entry */ 247 iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); 248 for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ 249 char *zKey = fts5EntryKey(p); 250 if( zKey[0]==bByte 251 && p->nKey==nToken 252 && memcmp(&zKey[1], pToken, nToken)==0 253 ){ 254 break; 255 } 256 } 257 258 /* If an existing hash entry cannot be found, create a new one. */ 259 if( p==0 ){ 260 /* Figure out how much space to allocate */ 261 char *zKey; 262 int nByte = sizeof(Fts5HashEntry) + (nToken+1) + 1 + 64; 263 if( nByte<128 ) nByte = 128; 264 265 /* Grow the Fts5Hash.aSlot[] array if necessary. */ 266 if( (pHash->nEntry*2)>=pHash->nSlot ){ 267 int rc = fts5HashResize(pHash); 268 if( rc!=SQLITE_OK ) return rc; 269 iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); 270 } 271 272 /* Allocate new Fts5HashEntry and add it to the hash table. */ 273 p = (Fts5HashEntry*)sqlite3_malloc(nByte); 274 if( !p ) return SQLITE_NOMEM; 275 memset(p, 0, sizeof(Fts5HashEntry)); 276 p->nAlloc = nByte; 277 zKey = fts5EntryKey(p); 278 zKey[0] = bByte; 279 memcpy(&zKey[1], pToken, nToken); 280 assert( iHash==fts5HashKey(pHash->nSlot, (u8*)zKey, nToken+1) ); 281 p->nKey = nToken; 282 zKey[nToken+1] = '\0'; 283 p->nData = nToken+1 + 1 + sizeof(Fts5HashEntry); 284 p->pHashNext = pHash->aSlot[iHash]; 285 pHash->aSlot[iHash] = p; 286 pHash->nEntry++; 287 288 /* Add the first rowid field to the hash-entry */ 289 p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid); 290 p->iRowid = iRowid; 291 292 p->iSzPoslist = p->nData; 293 if( pHash->eDetail!=FTS5_DETAIL_NONE ){ 294 p->nData += 1; 295 p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1); 296 } 297 298 nIncr += p->nData; 299 }else{ 300 301 /* Appending to an existing hash-entry. Check that there is enough 302 ** space to append the largest possible new entry. Worst case scenario 303 ** is: 304 ** 305 ** + 9 bytes for a new rowid, 306 ** + 4 byte reserved for the "poslist size" varint. 307 ** + 1 byte for a "new column" byte, 308 ** + 3 bytes for a new column number (16-bit max) as a varint, 309 ** + 5 bytes for the new position offset (32-bit max). 310 */ 311 if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){ 312 int nNew = p->nAlloc * 2; 313 Fts5HashEntry *pNew; 314 Fts5HashEntry **pp; 315 pNew = (Fts5HashEntry*)sqlite3_realloc(p, nNew); 316 if( pNew==0 ) return SQLITE_NOMEM; 317 pNew->nAlloc = nNew; 318 for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext); 319 *pp = pNew; 320 p = pNew; 321 } 322 nIncr -= p->nData; 323 } 324 assert( (p->nAlloc - p->nData) >= (9 + 4 + 1 + 3 + 5) ); 325 326 pPtr = (u8*)p; 327 328 /* If this is a new rowid, append the 4-byte size field for the previous 329 ** entry, and the new rowid for this entry. */ 330 if( iRowid!=p->iRowid ){ 331 fts5HashAddPoslistSize(pHash, p); 332 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iRowid - p->iRowid); 333 p->iRowid = iRowid; 334 bNew = 1; 335 p->iSzPoslist = p->nData; 336 if( pHash->eDetail!=FTS5_DETAIL_NONE ){ 337 p->nData += 1; 338 p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1); 339 p->iPos = 0; 340 } 341 } 342 343 if( iCol>=0 ){ 344 if( pHash->eDetail==FTS5_DETAIL_NONE ){ 345 p->bContent = 1; 346 }else{ 347 /* Append a new column value, if necessary */ 348 assert( iCol>=p->iCol ); 349 if( iCol!=p->iCol ){ 350 if( pHash->eDetail==FTS5_DETAIL_FULL ){ 351 pPtr[p->nData++] = 0x01; 352 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol); 353 p->iCol = (i16)iCol; 354 p->iPos = 0; 355 }else{ 356 bNew = 1; 357 p->iCol = (i16)(iPos = iCol); 358 } 359 } 360 361 /* Append the new position offset, if necessary */ 362 if( bNew ){ 363 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2); 364 p->iPos = iPos; 365 } 366 } 367 }else{ 368 /* This is a delete. Set the delete flag. */ 369 p->bDel = 1; 370 } 371 372 nIncr += p->nData; 373 *pHash->pnByte += nIncr; 374 return SQLITE_OK; 375 } 376 377 378 /* 379 ** Arguments pLeft and pRight point to linked-lists of hash-entry objects, 380 ** each sorted in key order. This function merges the two lists into a 381 ** single list and returns a pointer to its first element. 382 */ 383 static Fts5HashEntry *fts5HashEntryMerge( 384 Fts5HashEntry *pLeft, 385 Fts5HashEntry *pRight 386 ){ 387 Fts5HashEntry *p1 = pLeft; 388 Fts5HashEntry *p2 = pRight; 389 Fts5HashEntry *pRet = 0; 390 Fts5HashEntry **ppOut = &pRet; 391 392 while( p1 || p2 ){ 393 if( p1==0 ){ 394 *ppOut = p2; 395 p2 = 0; 396 }else if( p2==0 ){ 397 *ppOut = p1; 398 p1 = 0; 399 }else{ 400 int i = 0; 401 char *zKey1 = fts5EntryKey(p1); 402 char *zKey2 = fts5EntryKey(p2); 403 while( zKey1[i]==zKey2[i] ) i++; 404 405 if( ((u8)zKey1[i])>((u8)zKey2[i]) ){ 406 /* p2 is smaller */ 407 *ppOut = p2; 408 ppOut = &p2->pScanNext; 409 p2 = p2->pScanNext; 410 }else{ 411 /* p1 is smaller */ 412 *ppOut = p1; 413 ppOut = &p1->pScanNext; 414 p1 = p1->pScanNext; 415 } 416 *ppOut = 0; 417 } 418 } 419 420 return pRet; 421 } 422 423 /* 424 ** Extract all tokens from hash table iHash and link them into a list 425 ** in sorted order. The hash table is cleared before returning. It is 426 ** the responsibility of the caller to free the elements of the returned 427 ** list. 428 */ 429 static int fts5HashEntrySort( 430 Fts5Hash *pHash, 431 const char *pTerm, int nTerm, /* Query prefix, if any */ 432 Fts5HashEntry **ppSorted 433 ){ 434 const int nMergeSlot = 32; 435 Fts5HashEntry **ap; 436 Fts5HashEntry *pList; 437 int iSlot; 438 int i; 439 440 *ppSorted = 0; 441 ap = sqlite3_malloc(sizeof(Fts5HashEntry*) * nMergeSlot); 442 if( !ap ) return SQLITE_NOMEM; 443 memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot); 444 445 for(iSlot=0; iSlot<pHash->nSlot; iSlot++){ 446 Fts5HashEntry *pIter; 447 for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){ 448 if( pTerm==0 || 0==memcmp(fts5EntryKey(pIter), pTerm, nTerm) ){ 449 Fts5HashEntry *pEntry = pIter; 450 pEntry->pScanNext = 0; 451 for(i=0; ap[i]; i++){ 452 pEntry = fts5HashEntryMerge(pEntry, ap[i]); 453 ap[i] = 0; 454 } 455 ap[i] = pEntry; 456 } 457 } 458 } 459 460 pList = 0; 461 for(i=0; i<nMergeSlot; i++){ 462 pList = fts5HashEntryMerge(pList, ap[i]); 463 } 464 465 pHash->nEntry = 0; 466 sqlite3_free(ap); 467 *ppSorted = pList; 468 return SQLITE_OK; 469 } 470 471 /* 472 ** Query the hash table for a doclist associated with term pTerm/nTerm. 473 */ 474 int sqlite3Fts5HashQuery( 475 Fts5Hash *pHash, /* Hash table to query */ 476 const char *pTerm, int nTerm, /* Query term */ 477 const u8 **ppDoclist, /* OUT: Pointer to doclist for pTerm */ 478 int *pnDoclist /* OUT: Size of doclist in bytes */ 479 ){ 480 unsigned int iHash = fts5HashKey(pHash->nSlot, (const u8*)pTerm, nTerm); 481 char *zKey = 0; 482 Fts5HashEntry *p; 483 484 for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ 485 zKey = fts5EntryKey(p); 486 if( memcmp(zKey, pTerm, nTerm)==0 && zKey[nTerm]==0 ) break; 487 } 488 489 if( p ){ 490 fts5HashAddPoslistSize(pHash, p); 491 *ppDoclist = (const u8*)&zKey[nTerm+1]; 492 *pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm + 1); 493 }else{ 494 *ppDoclist = 0; 495 *pnDoclist = 0; 496 } 497 498 return SQLITE_OK; 499 } 500 501 int sqlite3Fts5HashScanInit( 502 Fts5Hash *p, /* Hash table to query */ 503 const char *pTerm, int nTerm /* Query prefix */ 504 ){ 505 return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan); 506 } 507 508 void sqlite3Fts5HashScanNext(Fts5Hash *p){ 509 assert( !sqlite3Fts5HashScanEof(p) ); 510 p->pScan = p->pScan->pScanNext; 511 } 512 513 int sqlite3Fts5HashScanEof(Fts5Hash *p){ 514 return (p->pScan==0); 515 } 516 517 void sqlite3Fts5HashScanEntry( 518 Fts5Hash *pHash, 519 const char **pzTerm, /* OUT: term (nul-terminated) */ 520 const u8 **ppDoclist, /* OUT: pointer to doclist */ 521 int *pnDoclist /* OUT: size of doclist in bytes */ 522 ){ 523 Fts5HashEntry *p; 524 if( (p = pHash->pScan) ){ 525 char *zKey = fts5EntryKey(p); 526 int nTerm = (int)strlen(zKey); 527 fts5HashAddPoslistSize(pHash, p); 528 *pzTerm = zKey; 529 *ppDoclist = (const u8*)&zKey[nTerm+1]; 530 *pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm + 1); 531 }else{ 532 *pzTerm = 0; 533 *ppDoclist = 0; 534 *pnDoclist = 0; 535 } 536 } 537