modernc.org/cc@v1.0.1/v2/testdata/_sqlite/ext/lsm1/lsm_shared.c (about) 1 /* 2 ** 2012-01-23 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** 13 ** Utilities used to help multiple LSM clients to coexist within the 14 ** same process space. 15 */ 16 #include "lsmInt.h" 17 18 /* 19 ** Global data. All global variables used by code in this file are grouped 20 ** into the following structure instance. 21 ** 22 ** pDatabase: 23 ** Linked list of all Database objects allocated within this process. 24 ** This list may not be traversed without holding the global mutex (see 25 ** functions enterGlobalMutex() and leaveGlobalMutex()). 26 */ 27 static struct SharedData { 28 Database *pDatabase; /* Linked list of all Database objects */ 29 } gShared; 30 31 /* 32 ** Database structure. There is one such structure for each distinct 33 ** database accessed by this process. They are stored in the singly linked 34 ** list starting at global variable gShared.pDatabase. Database objects are 35 ** reference counted. Once the number of connections to the associated 36 ** database drops to zero, they are removed from the linked list and deleted. 37 ** 38 ** pFile: 39 ** In multi-process mode, this file descriptor is used to obtain locks 40 ** and to access shared-memory. In single process mode, its only job is 41 ** to hold the exclusive lock on the file. 42 ** 43 */ 44 struct Database { 45 /* Protected by the global mutex (enterGlobalMutex/leaveGlobalMutex): */ 46 char *zName; /* Canonical path to database file */ 47 int nName; /* strlen(zName) */ 48 int nDbRef; /* Number of associated lsm_db handles */ 49 Database *pDbNext; /* Next Database structure in global list */ 50 51 /* Protected by the local mutex (pClientMutex) */ 52 int bReadonly; /* True if Database.pFile is read-only */ 53 int bMultiProc; /* True if running in multi-process mode */ 54 lsm_file *pFile; /* Used for locks/shm in multi-proc mode */ 55 LsmFile *pLsmFile; /* List of deferred closes */ 56 lsm_mutex *pClientMutex; /* Protects the apShmChunk[] and pConn */ 57 int nShmChunk; /* Number of entries in apShmChunk[] array */ 58 void **apShmChunk; /* Array of "shared" memory regions */ 59 lsm_db *pConn; /* List of connections to this db. */ 60 }; 61 62 /* 63 ** Functions to enter and leave the global mutex. This mutex is used 64 ** to protect the global linked-list headed at gShared.pDatabase. 65 */ 66 static int enterGlobalMutex(lsm_env *pEnv){ 67 lsm_mutex *p; 68 int rc = lsmMutexStatic(pEnv, LSM_MUTEX_GLOBAL, &p); 69 if( rc==LSM_OK ) lsmMutexEnter(pEnv, p); 70 return rc; 71 } 72 static void leaveGlobalMutex(lsm_env *pEnv){ 73 lsm_mutex *p; 74 lsmMutexStatic(pEnv, LSM_MUTEX_GLOBAL, &p); 75 lsmMutexLeave(pEnv, p); 76 } 77 78 #ifdef LSM_DEBUG 79 static int holdingGlobalMutex(lsm_env *pEnv){ 80 lsm_mutex *p; 81 lsmMutexStatic(pEnv, LSM_MUTEX_GLOBAL, &p); 82 return lsmMutexHeld(pEnv, p); 83 } 84 #endif 85 86 #if 0 87 static void assertNotInFreelist(Freelist *p, int iBlk){ 88 int i; 89 for(i=0; i<p->nEntry; i++){ 90 assert( p->aEntry[i].iBlk!=iBlk ); 91 } 92 } 93 #else 94 # define assertNotInFreelist(x,y) 95 #endif 96 97 /* 98 ** Append an entry to the free-list. If (iId==-1), this is a delete. 99 */ 100 int freelistAppend(lsm_db *db, u32 iBlk, i64 iId){ 101 lsm_env *pEnv = db->pEnv; 102 Freelist *p; 103 int i; 104 105 assert( iId==-1 || iId>=0 ); 106 p = db->bUseFreelist ? db->pFreelist : &db->pWorker->freelist; 107 108 /* Extend the space allocated for the freelist, if required */ 109 assert( p->nAlloc>=p->nEntry ); 110 if( p->nAlloc==p->nEntry ){ 111 int nNew; 112 int nByte; 113 FreelistEntry *aNew; 114 115 nNew = (p->nAlloc==0 ? 4 : p->nAlloc*2); 116 nByte = sizeof(FreelistEntry) * nNew; 117 aNew = (FreelistEntry *)lsmRealloc(pEnv, p->aEntry, nByte); 118 if( !aNew ) return LSM_NOMEM_BKPT; 119 p->nAlloc = nNew; 120 p->aEntry = aNew; 121 } 122 123 for(i=0; i<p->nEntry; i++){ 124 assert( i==0 || p->aEntry[i].iBlk > p->aEntry[i-1].iBlk ); 125 if( p->aEntry[i].iBlk>=iBlk ) break; 126 } 127 128 if( i<p->nEntry && p->aEntry[i].iBlk==iBlk ){ 129 /* Clobber an existing entry */ 130 p->aEntry[i].iId = iId; 131 }else{ 132 /* Insert a new entry into the list */ 133 int nByte = sizeof(FreelistEntry)*(p->nEntry-i); 134 memmove(&p->aEntry[i+1], &p->aEntry[i], nByte); 135 p->aEntry[i].iBlk = iBlk; 136 p->aEntry[i].iId = iId; 137 p->nEntry++; 138 } 139 140 return LSM_OK; 141 } 142 143 /* 144 ** This function frees all resources held by the Database structure passed 145 ** as the only argument. 146 */ 147 static void freeDatabase(lsm_env *pEnv, Database *p){ 148 assert( holdingGlobalMutex(pEnv) ); 149 if( p ){ 150 /* Free the mutexes */ 151 lsmMutexDel(pEnv, p->pClientMutex); 152 153 if( p->pFile ){ 154 lsmEnvClose(pEnv, p->pFile); 155 } 156 157 /* Free the array of shm pointers */ 158 lsmFree(pEnv, p->apShmChunk); 159 160 /* Free the memory allocated for the Database struct itself */ 161 lsmFree(pEnv, p); 162 } 163 } 164 165 typedef struct DbTruncateCtx DbTruncateCtx; 166 struct DbTruncateCtx { 167 int nBlock; 168 i64 iInUse; 169 }; 170 171 static int dbTruncateCb(void *pCtx, int iBlk, i64 iSnapshot){ 172 DbTruncateCtx *p = (DbTruncateCtx *)pCtx; 173 if( iBlk!=p->nBlock || (p->iInUse>=0 && iSnapshot>=p->iInUse) ) return 1; 174 p->nBlock--; 175 return 0; 176 } 177 178 static int dbTruncate(lsm_db *pDb, i64 iInUse){ 179 int rc = LSM_OK; 180 #if 0 181 int i; 182 DbTruncateCtx ctx; 183 184 assert( pDb->pWorker ); 185 ctx.nBlock = pDb->pWorker->nBlock; 186 ctx.iInUse = iInUse; 187 188 rc = lsmWalkFreelist(pDb, 1, dbTruncateCb, (void *)&ctx); 189 for(i=ctx.nBlock+1; rc==LSM_OK && i<=pDb->pWorker->nBlock; i++){ 190 rc = freelistAppend(pDb, i, -1); 191 } 192 193 if( rc==LSM_OK ){ 194 #ifdef LSM_LOG_FREELIST 195 if( ctx.nBlock!=pDb->pWorker->nBlock ){ 196 lsmLogMessage(pDb, 0, 197 "dbTruncate(): truncated db to %d blocks",ctx.nBlock 198 ); 199 } 200 #endif 201 pDb->pWorker->nBlock = ctx.nBlock; 202 } 203 #endif 204 return rc; 205 } 206 207 208 /* 209 ** This function is called during database shutdown (when the number of 210 ** connections drops from one to zero). It truncates the database file 211 ** to as small a size as possible without truncating away any blocks that 212 ** contain data. 213 */ 214 static int dbTruncateFile(lsm_db *pDb){ 215 int rc; 216 217 assert( pDb->pWorker==0 ); 218 assert( lsmShmAssertLock(pDb, LSM_LOCK_DMS1, LSM_LOCK_EXCL) ); 219 rc = lsmCheckpointLoadWorker(pDb); 220 221 if( rc==LSM_OK ){ 222 DbTruncateCtx ctx; 223 224 /* Walk the database free-block-list in reverse order. Set ctx.nBlock 225 ** to the block number of the last block in the database that actually 226 ** contains data. */ 227 ctx.nBlock = pDb->pWorker->nBlock; 228 ctx.iInUse = -1; 229 rc = lsmWalkFreelist(pDb, 1, dbTruncateCb, (void *)&ctx); 230 231 /* If the last block that contains data is not already the last block in 232 ** the database file, truncate the database file so that it is. */ 233 if( rc==LSM_OK ){ 234 rc = lsmFsTruncateDb( 235 pDb->pFS, (i64)ctx.nBlock*lsmFsBlockSize(pDb->pFS) 236 ); 237 } 238 } 239 240 lsmFreeSnapshot(pDb->pEnv, pDb->pWorker); 241 pDb->pWorker = 0; 242 return rc; 243 } 244 245 static void doDbDisconnect(lsm_db *pDb){ 246 int rc; 247 248 if( pDb->bReadonly ){ 249 lsmShmLock(pDb, LSM_LOCK_DMS3, LSM_LOCK_UNLOCK, 0); 250 }else{ 251 /* Block for an exclusive lock on DMS1. This lock serializes all calls 252 ** to doDbConnect() and doDbDisconnect() across all processes. */ 253 rc = lsmShmLock(pDb, LSM_LOCK_DMS1, LSM_LOCK_EXCL, 1); 254 if( rc==LSM_OK ){ 255 256 lsmShmLock(pDb, LSM_LOCK_DMS2, LSM_LOCK_UNLOCK, 0); 257 258 /* Try an exclusive lock on DMS2. If successful, this is the last 259 ** connection to the database. In this case flush the contents of the 260 ** in-memory tree to disk and write a checkpoint. */ 261 rc = lsmShmTestLock(pDb, LSM_LOCK_DMS2, 1, LSM_LOCK_EXCL); 262 if( rc==LSM_OK ){ 263 rc = lsmShmTestLock(pDb, LSM_LOCK_CHECKPOINTER, 1, LSM_LOCK_EXCL); 264 } 265 if( rc==LSM_OK ){ 266 int bReadonly = 0; /* True if there exist read-only conns. */ 267 268 /* Flush the in-memory tree, if required. If there is data to flush, 269 ** this will create a new client snapshot in Database.pClient. The 270 ** checkpoint (serialization) of this snapshot may be written to disk 271 ** by the following block. 272 ** 273 ** There is no need to take a WRITER lock here. That there are no 274 ** other locks on DMS2 guarantees that there are no other read-write 275 ** connections at this time (and the lock on DMS1 guarantees that 276 ** no new ones may appear). 277 */ 278 rc = lsmTreeLoadHeader(pDb, 0); 279 if( rc==LSM_OK && (lsmTreeHasOld(pDb) || lsmTreeSize(pDb)>0) ){ 280 rc = lsmFlushTreeToDisk(pDb); 281 } 282 283 /* Now check if there are any read-only connections. If there are, 284 ** then do not truncate the db file or unlink the shared-memory 285 ** region. */ 286 if( rc==LSM_OK ){ 287 rc = lsmShmTestLock(pDb, LSM_LOCK_DMS3, 1, LSM_LOCK_EXCL); 288 if( rc==LSM_BUSY ){ 289 bReadonly = 1; 290 rc = LSM_OK; 291 } 292 } 293 294 /* Write a checkpoint to disk. */ 295 if( rc==LSM_OK ){ 296 rc = lsmCheckpointWrite(pDb, 0); 297 } 298 299 /* If the checkpoint was written successfully, delete the log file 300 ** and, if possible, truncate the database file. */ 301 if( rc==LSM_OK ){ 302 int bRotrans = 0; 303 Database *p = pDb->pDatabase; 304 305 /* The log file may only be deleted if there are no clients 306 ** read-only clients running rotrans transactions. */ 307 rc = lsmDetectRoTrans(pDb, &bRotrans); 308 if( rc==LSM_OK && bRotrans==0 ){ 309 lsmFsCloseAndDeleteLog(pDb->pFS); 310 } 311 312 /* The database may only be truncated if there exist no read-only 313 ** clients - either connected or running rotrans transactions. */ 314 if( bReadonly==0 && bRotrans==0 ){ 315 lsmFsUnmap(pDb->pFS); 316 dbTruncateFile(pDb); 317 if( p->pFile && p->bMultiProc ){ 318 lsmEnvShmUnmap(pDb->pEnv, p->pFile, 1); 319 } 320 } 321 } 322 } 323 } 324 325 if( pDb->iRwclient>=0 ){ 326 lsmShmLock(pDb, LSM_LOCK_RWCLIENT(pDb->iRwclient), LSM_LOCK_UNLOCK, 0); 327 pDb->iRwclient = -1; 328 } 329 330 lsmShmLock(pDb, LSM_LOCK_DMS1, LSM_LOCK_UNLOCK, 0); 331 } 332 pDb->pShmhdr = 0; 333 } 334 335 static int doDbConnect(lsm_db *pDb){ 336 const int nUsMax = 100000; /* Max value for nUs */ 337 int nUs = 1000; /* us to wait between DMS1 attempts */ 338 int rc; 339 340 /* Obtain a pointer to the shared-memory header */ 341 assert( pDb->pShmhdr==0 ); 342 assert( pDb->bReadonly==0 ); 343 344 /* Block for an exclusive lock on DMS1. This lock serializes all calls 345 ** to doDbConnect() and doDbDisconnect() across all processes. */ 346 while( 1 ){ 347 rc = lsmShmLock(pDb, LSM_LOCK_DMS1, LSM_LOCK_EXCL, 1); 348 if( rc!=LSM_BUSY ) break; 349 lsmEnvSleep(pDb->pEnv, nUs); 350 nUs = nUs * 2; 351 if( nUs>nUsMax ) nUs = nUsMax; 352 } 353 if( rc==LSM_OK ){ 354 rc = lsmShmCacheChunks(pDb, 1); 355 } 356 if( rc!=LSM_OK ) return rc; 357 pDb->pShmhdr = (ShmHeader *)pDb->apShm[0]; 358 359 /* Try an exclusive lock on DMS2/DMS3. If successful, this is the first 360 ** and only connection to the database. In this case initialize the 361 ** shared-memory and run log file recovery. */ 362 assert( LSM_LOCK_DMS3==1+LSM_LOCK_DMS2 ); 363 rc = lsmShmTestLock(pDb, LSM_LOCK_DMS2, 2, LSM_LOCK_EXCL); 364 if( rc==LSM_OK ){ 365 memset(pDb->pShmhdr, 0, sizeof(ShmHeader)); 366 rc = lsmCheckpointRecover(pDb); 367 if( rc==LSM_OK ){ 368 rc = lsmLogRecover(pDb); 369 } 370 if( rc==LSM_OK ){ 371 ShmHeader *pShm = pDb->pShmhdr; 372 pShm->aReader[0].iLsmId = lsmCheckpointId(pShm->aSnap1, 0); 373 pShm->aReader[0].iTreeId = pDb->treehdr.iUsedShmid; 374 } 375 }else if( rc==LSM_BUSY ){ 376 rc = LSM_OK; 377 } 378 379 /* Take a shared lock on DMS2. In multi-process mode this lock "cannot" 380 ** fail, as connections may only hold an exclusive lock on DMS2 if they 381 ** first hold an exclusive lock on DMS1. And this connection is currently 382 ** holding the exclusive lock on DSM1. 383 ** 384 ** However, if some other connection has the database open in single-process 385 ** mode, this operation will fail. In this case, return the error to the 386 ** caller - the attempt to connect to the db has failed. 387 */ 388 if( rc==LSM_OK ){ 389 rc = lsmShmLock(pDb, LSM_LOCK_DMS2, LSM_LOCK_SHARED, 0); 390 } 391 392 /* If anything went wrong, unlock DMS2. Otherwise, try to take an exclusive 393 ** lock on one of the LSM_LOCK_RWCLIENT() locks. Unlock DMS1 in any case. */ 394 if( rc!=LSM_OK ){ 395 pDb->pShmhdr = 0; 396 }else{ 397 int i; 398 for(i=0; i<LSM_LOCK_NRWCLIENT; i++){ 399 int rc2 = lsmShmLock(pDb, LSM_LOCK_RWCLIENT(i), LSM_LOCK_EXCL, 0); 400 if( rc2==LSM_OK ) pDb->iRwclient = i; 401 if( rc2!=LSM_BUSY ){ 402 rc = rc2; 403 break; 404 } 405 } 406 } 407 lsmShmLock(pDb, LSM_LOCK_DMS1, LSM_LOCK_UNLOCK, 0); 408 409 return rc; 410 } 411 412 static int dbOpenSharedFd(lsm_env *pEnv, Database *p, int bRoOk){ 413 int rc; 414 415 rc = lsmEnvOpen(pEnv, p->zName, 0, &p->pFile); 416 if( rc==LSM_IOERR && bRoOk ){ 417 rc = lsmEnvOpen(pEnv, p->zName, LSM_OPEN_READONLY, &p->pFile); 418 p->bReadonly = 1; 419 } 420 421 return rc; 422 } 423 424 /* 425 ** Return a reference to the shared Database handle for the database 426 ** identified by canonical path zName. If this is the first connection to 427 ** the named database, a new Database object is allocated. Otherwise, a 428 ** pointer to an existing object is returned. 429 ** 430 ** If successful, *ppDatabase is set to point to the shared Database 431 ** structure and LSM_OK returned. Otherwise, *ppDatabase is set to NULL 432 ** and and LSM error code returned. 433 ** 434 ** Each successful call to this function should be (eventually) matched 435 ** by a call to lsmDbDatabaseRelease(). 436 */ 437 int lsmDbDatabaseConnect( 438 lsm_db *pDb, /* Database handle */ 439 const char *zName /* Full-path to db file */ 440 ){ 441 lsm_env *pEnv = pDb->pEnv; 442 int rc; /* Return code */ 443 Database *p = 0; /* Pointer returned via *ppDatabase */ 444 int nName = lsmStrlen(zName); 445 446 assert( pDb->pDatabase==0 ); 447 rc = enterGlobalMutex(pEnv); 448 if( rc==LSM_OK ){ 449 450 /* Search the global list for an existing object. TODO: Need something 451 ** better than the memcmp() below to figure out if a given Database 452 ** object represents the requested file. */ 453 for(p=gShared.pDatabase; p; p=p->pDbNext){ 454 if( nName==p->nName && 0==memcmp(zName, p->zName, nName) ) break; 455 } 456 457 /* If no suitable Database object was found, allocate a new one. */ 458 if( p==0 ){ 459 p = (Database *)lsmMallocZeroRc(pEnv, sizeof(Database)+nName+1, &rc); 460 461 /* If the allocation was successful, fill in other fields and 462 ** allocate the client mutex. */ 463 if( rc==LSM_OK ){ 464 p->bMultiProc = pDb->bMultiProc; 465 p->zName = (char *)&p[1]; 466 p->nName = nName; 467 memcpy((void *)p->zName, zName, nName+1); 468 rc = lsmMutexNew(pEnv, &p->pClientMutex); 469 } 470 471 /* If nothing has gone wrong so far, open the shared fd. And if that 472 ** succeeds and this connection requested single-process mode, 473 ** attempt to take the exclusive lock on DMS2. */ 474 if( rc==LSM_OK ){ 475 int bReadonly = (pDb->bReadonly && pDb->bMultiProc); 476 rc = dbOpenSharedFd(pDb->pEnv, p, bReadonly); 477 } 478 479 if( rc==LSM_OK && p->bMultiProc==0 ){ 480 /* Hold an exclusive lock DMS1 while grabbing DMS2. This ensures 481 ** that any ongoing call to doDbDisconnect() (even one in another 482 ** process) is finished before proceeding. */ 483 assert( p->bReadonly==0 ); 484 rc = lsmEnvLock(pDb->pEnv, p->pFile, LSM_LOCK_DMS1, LSM_LOCK_EXCL); 485 if( rc==LSM_OK ){ 486 rc = lsmEnvLock(pDb->pEnv, p->pFile, LSM_LOCK_DMS2, LSM_LOCK_EXCL); 487 lsmEnvLock(pDb->pEnv, p->pFile, LSM_LOCK_DMS1, LSM_LOCK_UNLOCK); 488 } 489 } 490 491 if( rc==LSM_OK ){ 492 p->pDbNext = gShared.pDatabase; 493 gShared.pDatabase = p; 494 }else{ 495 freeDatabase(pEnv, p); 496 p = 0; 497 } 498 } 499 500 if( p ){ 501 p->nDbRef++; 502 } 503 leaveGlobalMutex(pEnv); 504 505 if( p ){ 506 lsmMutexEnter(pDb->pEnv, p->pClientMutex); 507 pDb->pNext = p->pConn; 508 p->pConn = pDb; 509 lsmMutexLeave(pDb->pEnv, p->pClientMutex); 510 } 511 } 512 513 pDb->pDatabase = p; 514 if( rc==LSM_OK ){ 515 assert( p ); 516 rc = lsmFsOpen(pDb, zName, p->bReadonly); 517 } 518 519 /* If the db handle is read-write, then connect to the system now. Run 520 ** recovery as necessary. Or, if this is a read-only database handle, 521 ** defer attempting to connect to the system until a read-transaction 522 ** is opened. */ 523 if( pDb->bReadonly==0 ){ 524 if( rc==LSM_OK ){ 525 rc = lsmFsConfigure(pDb); 526 } 527 if( rc==LSM_OK ){ 528 rc = doDbConnect(pDb); 529 } 530 } 531 532 return rc; 533 } 534 535 static void dbDeferClose(lsm_db *pDb){ 536 if( pDb->pFS ){ 537 LsmFile *pLsmFile; 538 Database *p = pDb->pDatabase; 539 pLsmFile = lsmFsDeferClose(pDb->pFS); 540 pLsmFile->pNext = p->pLsmFile; 541 p->pLsmFile = pLsmFile; 542 } 543 } 544 545 LsmFile *lsmDbRecycleFd(lsm_db *db){ 546 LsmFile *pRet; 547 Database *p = db->pDatabase; 548 lsmMutexEnter(db->pEnv, p->pClientMutex); 549 if( (pRet = p->pLsmFile)!=0 ){ 550 p->pLsmFile = pRet->pNext; 551 } 552 lsmMutexLeave(db->pEnv, p->pClientMutex); 553 return pRet; 554 } 555 556 /* 557 ** Release a reference to a Database object obtained from 558 ** lsmDbDatabaseConnect(). There should be exactly one call to this function 559 ** for each successful call to Find(). 560 */ 561 void lsmDbDatabaseRelease(lsm_db *pDb){ 562 Database *p = pDb->pDatabase; 563 if( p ){ 564 lsm_db **ppDb; 565 566 if( pDb->pShmhdr ){ 567 doDbDisconnect(pDb); 568 } 569 570 lsmFsUnmap(pDb->pFS); 571 lsmMutexEnter(pDb->pEnv, p->pClientMutex); 572 for(ppDb=&p->pConn; *ppDb!=pDb; ppDb=&((*ppDb)->pNext)); 573 *ppDb = pDb->pNext; 574 dbDeferClose(pDb); 575 lsmMutexLeave(pDb->pEnv, p->pClientMutex); 576 577 enterGlobalMutex(pDb->pEnv); 578 p->nDbRef--; 579 if( p->nDbRef==0 ){ 580 LsmFile *pIter; 581 LsmFile *pNext; 582 Database **pp; 583 584 /* Remove the Database structure from the linked list. */ 585 for(pp=&gShared.pDatabase; *pp!=p; pp=&((*pp)->pDbNext)); 586 *pp = p->pDbNext; 587 588 /* If they were allocated from the heap, free the shared memory chunks */ 589 if( p->bMultiProc==0 ){ 590 int i; 591 for(i=0; i<p->nShmChunk; i++){ 592 lsmFree(pDb->pEnv, p->apShmChunk[i]); 593 } 594 } 595 596 /* Close any outstanding file descriptors */ 597 for(pIter=p->pLsmFile; pIter; pIter=pNext){ 598 pNext = pIter->pNext; 599 lsmEnvClose(pDb->pEnv, pIter->pFile); 600 lsmFree(pDb->pEnv, pIter); 601 } 602 freeDatabase(pDb->pEnv, p); 603 } 604 leaveGlobalMutex(pDb->pEnv); 605 } 606 } 607 608 Level *lsmDbSnapshotLevel(Snapshot *pSnapshot){ 609 return pSnapshot->pLevel; 610 } 611 612 void lsmDbSnapshotSetLevel(Snapshot *pSnap, Level *pLevel){ 613 pSnap->pLevel = pLevel; 614 } 615 616 /* TODO: Shuffle things around to get rid of this */ 617 static int firstSnapshotInUse(lsm_db *, i64 *); 618 619 /* 620 ** Context object used by the lsmWalkFreelist() utility. 621 */ 622 typedef struct WalkFreelistCtx WalkFreelistCtx; 623 struct WalkFreelistCtx { 624 lsm_db *pDb; 625 int bReverse; 626 Freelist *pFreelist; 627 int iFree; 628 int (*xUsr)(void *, int, i64); /* User callback function */ 629 void *pUsrctx; /* User callback context */ 630 int bDone; /* Set to true after xUsr() returns true */ 631 }; 632 633 /* 634 ** Callback used by lsmWalkFreelist(). 635 */ 636 static int walkFreelistCb(void *pCtx, int iBlk, i64 iSnapshot){ 637 WalkFreelistCtx *p = (WalkFreelistCtx *)pCtx; 638 const int iDir = (p->bReverse ? -1 : 1); 639 Freelist *pFree = p->pFreelist; 640 641 assert( p->bDone==0 ); 642 assert( iBlk>=0 ); 643 if( pFree ){ 644 while( (p->iFree < pFree->nEntry) && p->iFree>=0 ){ 645 FreelistEntry *pEntry = &pFree->aEntry[p->iFree]; 646 if( (p->bReverse==0 && pEntry->iBlk>(u32)iBlk) 647 || (p->bReverse!=0 && pEntry->iBlk<(u32)iBlk) 648 ){ 649 break; 650 }else{ 651 p->iFree += iDir; 652 if( pEntry->iId>=0 653 && p->xUsr(p->pUsrctx, pEntry->iBlk, pEntry->iId) 654 ){ 655 p->bDone = 1; 656 return 1; 657 } 658 if( pEntry->iBlk==(u32)iBlk ) return 0; 659 } 660 } 661 } 662 663 if( p->xUsr(p->pUsrctx, iBlk, iSnapshot) ){ 664 p->bDone = 1; 665 return 1; 666 } 667 return 0; 668 } 669 670 /* 671 ** The database handle passed as the first argument must be the worker 672 ** connection. This function iterates through the contents of the current 673 ** free block list, invoking the supplied callback once for each list 674 ** element. 675 ** 676 ** The difference between this function and lsmSortedWalkFreelist() is 677 ** that lsmSortedWalkFreelist() only considers those free-list elements 678 ** stored within the LSM. This function also merges in any in-memory 679 ** elements. 680 */ 681 int lsmWalkFreelist( 682 lsm_db *pDb, /* Database handle (must be worker) */ 683 int bReverse, /* True to iterate from largest to smallest */ 684 int (*x)(void *, int, i64), /* Callback function */ 685 void *pCtx /* First argument to pass to callback */ 686 ){ 687 const int iDir = (bReverse ? -1 : 1); 688 int rc; 689 int iCtx; 690 691 WalkFreelistCtx ctx[2]; 692 693 ctx[0].pDb = pDb; 694 ctx[0].bReverse = bReverse; 695 ctx[0].pFreelist = &pDb->pWorker->freelist; 696 if( ctx[0].pFreelist && bReverse ){ 697 ctx[0].iFree = ctx[0].pFreelist->nEntry-1; 698 }else{ 699 ctx[0].iFree = 0; 700 } 701 ctx[0].xUsr = walkFreelistCb; 702 ctx[0].pUsrctx = (void *)&ctx[1]; 703 ctx[0].bDone = 0; 704 705 ctx[1].pDb = pDb; 706 ctx[1].bReverse = bReverse; 707 ctx[1].pFreelist = pDb->pFreelist; 708 if( ctx[1].pFreelist && bReverse ){ 709 ctx[1].iFree = ctx[1].pFreelist->nEntry-1; 710 }else{ 711 ctx[1].iFree = 0; 712 } 713 ctx[1].xUsr = x; 714 ctx[1].pUsrctx = pCtx; 715 ctx[1].bDone = 0; 716 717 rc = lsmSortedWalkFreelist(pDb, bReverse, walkFreelistCb, (void *)&ctx[0]); 718 719 if( ctx[0].bDone==0 ){ 720 for(iCtx=0; iCtx<2; iCtx++){ 721 int i; 722 WalkFreelistCtx *p = &ctx[iCtx]; 723 for(i=p->iFree; 724 p->pFreelist && rc==LSM_OK && i<p->pFreelist->nEntry && i>=0; 725 i += iDir 726 ){ 727 FreelistEntry *pEntry = &p->pFreelist->aEntry[i]; 728 if( pEntry->iId>=0 && p->xUsr(p->pUsrctx, pEntry->iBlk, pEntry->iId) ){ 729 return LSM_OK; 730 } 731 } 732 } 733 } 734 735 return rc; 736 } 737 738 739 typedef struct FindFreeblockCtx FindFreeblockCtx; 740 struct FindFreeblockCtx { 741 i64 iInUse; 742 int iRet; 743 int bNotOne; 744 }; 745 746 static int findFreeblockCb(void *pCtx, int iBlk, i64 iSnapshot){ 747 FindFreeblockCtx *p = (FindFreeblockCtx *)pCtx; 748 if( iSnapshot<p->iInUse && (iBlk!=1 || p->bNotOne==0) ){ 749 p->iRet = iBlk; 750 return 1; 751 } 752 return 0; 753 } 754 755 static int findFreeblock(lsm_db *pDb, i64 iInUse, int bNotOne, int *piRet){ 756 int rc; /* Return code */ 757 FindFreeblockCtx ctx; /* Context object */ 758 759 ctx.iInUse = iInUse; 760 ctx.iRet = 0; 761 ctx.bNotOne = bNotOne; 762 rc = lsmWalkFreelist(pDb, 0, findFreeblockCb, (void *)&ctx); 763 *piRet = ctx.iRet; 764 765 return rc; 766 } 767 768 /* 769 ** Allocate a new database file block to write data to, either by extending 770 ** the database file or by recycling a free-list entry. The worker snapshot 771 ** must be held in order to call this function. 772 ** 773 ** If successful, *piBlk is set to the block number allocated and LSM_OK is 774 ** returned. Otherwise, *piBlk is zeroed and an lsm error code returned. 775 */ 776 int lsmBlockAllocate(lsm_db *pDb, int iBefore, int *piBlk){ 777 Snapshot *p = pDb->pWorker; 778 int iRet = 0; /* Block number of allocated block */ 779 int rc = LSM_OK; 780 i64 iInUse = 0; /* Snapshot id still in use */ 781 i64 iSynced = 0; /* Snapshot id synced to disk */ 782 783 assert( p ); 784 785 #ifdef LSM_LOG_FREELIST 786 { 787 static int nCall = 0; 788 char *zFree = 0; 789 nCall++; 790 rc = lsmInfoFreelist(pDb, &zFree); 791 if( rc!=LSM_OK ) return rc; 792 lsmLogMessage(pDb, 0, "lsmBlockAllocate(): %d freelist: %s", nCall, zFree); 793 lsmFree(pDb->pEnv, zFree); 794 } 795 #endif 796 797 /* Set iInUse to the smallest snapshot id that is either: 798 ** 799 ** * Currently in use by a database client, 800 ** * May be used by a database client in the future, or 801 ** * Is the most recently checkpointed snapshot (i.e. the one that will 802 ** be used following recovery if a failure occurs at this point). 803 */ 804 rc = lsmCheckpointSynced(pDb, &iSynced, 0, 0); 805 if( rc==LSM_OK && iSynced==0 ) iSynced = p->iId; 806 iInUse = iSynced; 807 if( rc==LSM_OK && pDb->iReader>=0 ){ 808 assert( pDb->pClient ); 809 iInUse = LSM_MIN(iInUse, pDb->pClient->iId); 810 } 811 if( rc==LSM_OK ) rc = firstSnapshotInUse(pDb, &iInUse); 812 813 #ifdef LSM_LOG_FREELIST 814 { 815 lsmLogMessage(pDb, 0, "lsmBlockAllocate(): " 816 "snapshot-in-use: %lld (iSynced=%lld) (client-id=%lld)", 817 iInUse, iSynced, (pDb->iReader>=0 ? pDb->pClient->iId : 0) 818 ); 819 } 820 #endif 821 822 823 /* Unless there exists a read-only transaction (which prevents us from 824 ** recycling any blocks regardless, query the free block list for a 825 ** suitable block to reuse. 826 ** 827 ** It might seem more natural to check for a read-only transaction at 828 ** the start of this function. However, it is better do wait until after 829 ** the call to lsmCheckpointSynced() to do so. 830 */ 831 if( rc==LSM_OK ){ 832 int bRotrans; 833 rc = lsmDetectRoTrans(pDb, &bRotrans); 834 835 if( rc==LSM_OK && bRotrans==0 ){ 836 rc = findFreeblock(pDb, iInUse, (iBefore>0), &iRet); 837 } 838 } 839 840 if( iBefore>0 && (iRet<=0 || iRet>=iBefore) ){ 841 iRet = 0; 842 843 }else if( rc==LSM_OK ){ 844 /* If a block was found in the free block list, use it and remove it from 845 ** the list. Otherwise, if no suitable block was found, allocate one from 846 ** the end of the file. */ 847 if( iRet>0 ){ 848 #ifdef LSM_LOG_FREELIST 849 lsmLogMessage(pDb, 0, 850 "reusing block %d (snapshot-in-use=%lld)", iRet, iInUse); 851 #endif 852 rc = freelistAppend(pDb, iRet, -1); 853 if( rc==LSM_OK ){ 854 rc = dbTruncate(pDb, iInUse); 855 } 856 }else{ 857 iRet = ++(p->nBlock); 858 #ifdef LSM_LOG_FREELIST 859 lsmLogMessage(pDb, 0, "extending file to %d blocks", iRet); 860 #endif 861 } 862 } 863 864 assert( iBefore>0 || iRet>0 || rc!=LSM_OK ); 865 *piBlk = iRet; 866 return rc; 867 } 868 869 /* 870 ** Free a database block. The worker snapshot must be held in order to call 871 ** this function. 872 ** 873 ** If successful, LSM_OK is returned. Otherwise, an lsm error code (e.g. 874 ** LSM_NOMEM). 875 */ 876 int lsmBlockFree(lsm_db *pDb, int iBlk){ 877 Snapshot *p = pDb->pWorker; 878 assert( lsmShmAssertWorker(pDb) ); 879 880 #ifdef LSM_LOG_FREELIST 881 lsmLogMessage(pDb, LSM_OK, "lsmBlockFree(): Free block %d", iBlk); 882 #endif 883 884 return freelistAppend(pDb, iBlk, p->iId); 885 } 886 887 /* 888 ** Refree a database block. The worker snapshot must be held in order to call 889 ** this function. 890 ** 891 ** Refreeing is required when a block is allocated using lsmBlockAllocate() 892 ** but then not used. This function is used to push the block back onto 893 ** the freelist. Refreeing a block is different from freeing is, as a refreed 894 ** block may be reused immediately. Whereas a freed block can not be reused 895 ** until (at least) after the next checkpoint. 896 */ 897 int lsmBlockRefree(lsm_db *pDb, int iBlk){ 898 int rc = LSM_OK; /* Return code */ 899 900 #ifdef LSM_LOG_FREELIST 901 lsmLogMessage(pDb, LSM_OK, "lsmBlockRefree(): Refree block %d", iBlk); 902 #endif 903 904 rc = freelistAppend(pDb, iBlk, 0); 905 return rc; 906 } 907 908 /* 909 ** If required, copy a database checkpoint from shared memory into the 910 ** database itself. 911 ** 912 ** The WORKER lock must not be held when this is called. This is because 913 ** this function may indirectly call fsync(). And the WORKER lock should 914 ** not be held that long (in case it is required by a client flushing an 915 ** in-memory tree to disk). 916 */ 917 int lsmCheckpointWrite(lsm_db *pDb, u32 *pnWrite){ 918 int rc; /* Return Code */ 919 u32 nWrite = 0; 920 921 assert( pDb->pWorker==0 ); 922 assert( 1 || pDb->pClient==0 ); 923 assert( lsmShmAssertLock(pDb, LSM_LOCK_WORKER, LSM_LOCK_UNLOCK) ); 924 925 rc = lsmShmLock(pDb, LSM_LOCK_CHECKPOINTER, LSM_LOCK_EXCL, 0); 926 if( rc!=LSM_OK ) return rc; 927 928 rc = lsmCheckpointLoad(pDb, 0); 929 if( rc==LSM_OK ){ 930 int nBlock = lsmCheckpointNBlock(pDb->aSnapshot); 931 ShmHeader *pShm = pDb->pShmhdr; 932 int bDone = 0; /* True if checkpoint is already stored */ 933 934 /* Check if this checkpoint has already been written to the database 935 ** file. If so, set variable bDone to true. */ 936 if( pShm->iMetaPage ){ 937 MetaPage *pPg; /* Meta page */ 938 u8 *aData; /* Meta-page data buffer */ 939 int nData; /* Size of aData[] in bytes */ 940 i64 iCkpt; /* Id of checkpoint just loaded */ 941 i64 iDisk = 0; /* Id of checkpoint already stored in db */ 942 iCkpt = lsmCheckpointId(pDb->aSnapshot, 0); 943 rc = lsmFsMetaPageGet(pDb->pFS, 0, pShm->iMetaPage, &pPg); 944 if( rc==LSM_OK ){ 945 aData = lsmFsMetaPageData(pPg, &nData); 946 iDisk = lsmCheckpointId((u32 *)aData, 1); 947 nWrite = lsmCheckpointNWrite((u32 *)aData, 1); 948 lsmFsMetaPageRelease(pPg); 949 } 950 bDone = (iDisk>=iCkpt); 951 } 952 953 if( rc==LSM_OK && bDone==0 ){ 954 int iMeta = (pShm->iMetaPage % 2) + 1; 955 if( pDb->eSafety!=LSM_SAFETY_OFF ){ 956 rc = lsmFsSyncDb(pDb->pFS, nBlock); 957 } 958 if( rc==LSM_OK ) rc = lsmCheckpointStore(pDb, iMeta); 959 if( rc==LSM_OK && pDb->eSafety!=LSM_SAFETY_OFF){ 960 rc = lsmFsSyncDb(pDb->pFS, 0); 961 } 962 if( rc==LSM_OK ){ 963 pShm->iMetaPage = iMeta; 964 nWrite = lsmCheckpointNWrite(pDb->aSnapshot, 0) - nWrite; 965 } 966 #ifdef LSM_LOG_WORK 967 lsmLogMessage(pDb, 0, "finish checkpoint %d", 968 (int)lsmCheckpointId(pDb->aSnapshot, 0) 969 ); 970 #endif 971 } 972 } 973 974 lsmShmLock(pDb, LSM_LOCK_CHECKPOINTER, LSM_LOCK_UNLOCK, 0); 975 if( pnWrite && rc==LSM_OK ) *pnWrite = nWrite; 976 return rc; 977 } 978 979 int lsmBeginWork(lsm_db *pDb){ 980 int rc; 981 982 /* Attempt to take the WORKER lock */ 983 rc = lsmShmLock(pDb, LSM_LOCK_WORKER, LSM_LOCK_EXCL, 0); 984 985 /* Deserialize the current worker snapshot */ 986 if( rc==LSM_OK ){ 987 rc = lsmCheckpointLoadWorker(pDb); 988 } 989 return rc; 990 } 991 992 void lsmFreeSnapshot(lsm_env *pEnv, Snapshot *p){ 993 if( p ){ 994 lsmSortedFreeLevel(pEnv, p->pLevel); 995 lsmFree(pEnv, p->freelist.aEntry); 996 lsmFree(pEnv, p->redirect.a); 997 lsmFree(pEnv, p); 998 } 999 } 1000 1001 /* 1002 ** Attempt to populate one of the read-lock slots to contain lock values 1003 ** iLsm/iShm. Or, if such a slot exists already, this function is a no-op. 1004 ** 1005 ** It is not an error if no slot can be populated because the write-lock 1006 ** cannot be obtained. If any other error occurs, return an LSM error code. 1007 ** Otherwise, LSM_OK. 1008 ** 1009 ** This function is called at various points to try to ensure that there 1010 ** always exists at least one read-lock slot that can be used by a read-only 1011 ** client. And so that, in the usual case, there is an "exact match" available 1012 ** whenever a read transaction is opened by any client. At present this 1013 ** function is called when: 1014 ** 1015 ** * A write transaction that called lsmTreeDiscardOld() is committed, and 1016 ** * Whenever the working snapshot is updated (i.e. lsmFinishWork()). 1017 */ 1018 static int dbSetReadLock(lsm_db *db, i64 iLsm, u32 iShm){ 1019 int rc = LSM_OK; 1020 ShmHeader *pShm = db->pShmhdr; 1021 int i; 1022 1023 /* Check if there is already a slot containing the required values. */ 1024 for(i=0; i<LSM_LOCK_NREADER; i++){ 1025 ShmReader *p = &pShm->aReader[i]; 1026 if( p->iLsmId==iLsm && p->iTreeId==iShm ) return LSM_OK; 1027 } 1028 1029 /* Iterate through all read-lock slots, attempting to take a write-lock 1030 ** on each of them. If a write-lock succeeds, populate the locked slot 1031 ** with the required values and break out of the loop. */ 1032 for(i=0; rc==LSM_OK && i<LSM_LOCK_NREADER; i++){ 1033 rc = lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_EXCL, 0); 1034 if( rc==LSM_BUSY ){ 1035 rc = LSM_OK; 1036 }else{ 1037 ShmReader *p = &pShm->aReader[i]; 1038 p->iLsmId = iLsm; 1039 p->iTreeId = iShm; 1040 lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_UNLOCK, 0); 1041 break; 1042 } 1043 } 1044 1045 return rc; 1046 } 1047 1048 /* 1049 ** Release the read-lock currently held by connection db. 1050 */ 1051 int dbReleaseReadlock(lsm_db *db){ 1052 int rc = LSM_OK; 1053 if( db->iReader>=0 ){ 1054 rc = lsmShmLock(db, LSM_LOCK_READER(db->iReader), LSM_LOCK_UNLOCK, 0); 1055 db->iReader = -1; 1056 } 1057 db->bRoTrans = 0; 1058 return rc; 1059 } 1060 1061 1062 /* 1063 ** Argument bFlush is true if the contents of the in-memory tree has just 1064 ** been flushed to disk. The significance of this is that once the snapshot 1065 ** created to hold the updated state of the database is synced to disk, log 1066 ** file space can be recycled. 1067 */ 1068 void lsmFinishWork(lsm_db *pDb, int bFlush, int *pRc){ 1069 int rc = *pRc; 1070 assert( rc!=0 || pDb->pWorker ); 1071 if( pDb->pWorker ){ 1072 /* If no error has occurred, serialize the worker snapshot and write 1073 ** it to shared memory. */ 1074 if( rc==LSM_OK ){ 1075 rc = lsmSaveWorker(pDb, bFlush); 1076 } 1077 1078 /* Assuming no error has occurred, update a read lock slot with the 1079 ** new snapshot id (see comments above function dbSetReadLock()). */ 1080 if( rc==LSM_OK ){ 1081 if( pDb->iReader<0 ){ 1082 rc = lsmTreeLoadHeader(pDb, 0); 1083 } 1084 if( rc==LSM_OK ){ 1085 rc = dbSetReadLock(pDb, pDb->pWorker->iId, pDb->treehdr.iUsedShmid); 1086 } 1087 } 1088 1089 /* Free the snapshot object. */ 1090 lsmFreeSnapshot(pDb->pEnv, pDb->pWorker); 1091 pDb->pWorker = 0; 1092 } 1093 1094 lsmShmLock(pDb, LSM_LOCK_WORKER, LSM_LOCK_UNLOCK, 0); 1095 *pRc = rc; 1096 } 1097 1098 /* 1099 ** Called when recovery is finished. 1100 */ 1101 int lsmFinishRecovery(lsm_db *pDb){ 1102 lsmTreeEndTransaction(pDb, 1); 1103 return LSM_OK; 1104 } 1105 1106 /* 1107 ** Check if the currently configured compression functions 1108 ** (LSM_CONFIG_SET_COMPRESSION) are compatible with a database that has its 1109 ** compression id set to iReq. Compression routines are compatible if iReq 1110 ** is zero (indicating the database is empty), or if it is equal to the 1111 ** compression id of the configured compression routines. 1112 ** 1113 ** If the check shows that the current compression are incompatible and there 1114 ** is a compression factory registered, give it a chance to install new 1115 ** compression routines. 1116 ** 1117 ** If, after any registered factory is invoked, the compression functions 1118 ** are still incompatible, return LSM_MISMATCH. Otherwise, LSM_OK. 1119 */ 1120 int lsmCheckCompressionId(lsm_db *pDb, u32 iReq){ 1121 if( iReq!=LSM_COMPRESSION_EMPTY && pDb->compress.iId!=iReq ){ 1122 if( pDb->factory.xFactory ){ 1123 pDb->bInFactory = 1; 1124 pDb->factory.xFactory(pDb->factory.pCtx, pDb, iReq); 1125 pDb->bInFactory = 0; 1126 } 1127 if( pDb->compress.iId!=iReq ){ 1128 /* Incompatible */ 1129 return LSM_MISMATCH; 1130 } 1131 } 1132 /* Compatible */ 1133 return LSM_OK; 1134 } 1135 1136 /* 1137 ** Begin a read transaction. This function is a no-op if the connection 1138 ** passed as the only argument already has an open read transaction. 1139 */ 1140 int lsmBeginReadTrans(lsm_db *pDb){ 1141 const int MAX_READLOCK_ATTEMPTS = 10; 1142 const int nMaxAttempt = (pDb->bRoTrans ? 1 : MAX_READLOCK_ATTEMPTS); 1143 1144 int rc = LSM_OK; /* Return code */ 1145 int iAttempt = 0; 1146 1147 assert( pDb->pWorker==0 ); 1148 1149 while( rc==LSM_OK && pDb->iReader<0 && (iAttempt++)<nMaxAttempt ){ 1150 int iTreehdr = 0; 1151 int iSnap = 0; 1152 assert( pDb->pCsr==0 && pDb->nTransOpen==0 ); 1153 1154 /* Load the in-memory tree header. */ 1155 rc = lsmTreeLoadHeader(pDb, &iTreehdr); 1156 1157 /* Load the database snapshot */ 1158 if( rc==LSM_OK ){ 1159 if( lsmCheckpointClientCacheOk(pDb)==0 ){ 1160 lsmFreeSnapshot(pDb->pEnv, pDb->pClient); 1161 pDb->pClient = 0; 1162 lsmMCursorFreeCache(pDb); 1163 lsmFsPurgeCache(pDb->pFS); 1164 rc = lsmCheckpointLoad(pDb, &iSnap); 1165 }else{ 1166 iSnap = 1; 1167 } 1168 } 1169 1170 /* Take a read-lock on the tree and snapshot just loaded. Then check 1171 ** that the shared-memory still contains the same values. If so, proceed. 1172 ** Otherwise, relinquish the read-lock and retry the whole procedure 1173 ** (starting with loading the in-memory tree header). */ 1174 if( rc==LSM_OK ){ 1175 u32 iShmMax = pDb->treehdr.iUsedShmid; 1176 u32 iShmMin = pDb->treehdr.iNextShmid+1-LSM_MAX_SHMCHUNKS; 1177 rc = lsmReadlock( 1178 pDb, lsmCheckpointId(pDb->aSnapshot, 0), iShmMin, iShmMax 1179 ); 1180 if( rc==LSM_OK ){ 1181 if( lsmTreeLoadHeaderOk(pDb, iTreehdr) 1182 && lsmCheckpointLoadOk(pDb, iSnap) 1183 ){ 1184 /* Read lock has been successfully obtained. Deserialize the 1185 ** checkpoint just loaded. TODO: This will be removed after 1186 ** lsm_sorted.c is changed to work directly from the serialized 1187 ** version of the snapshot. */ 1188 if( pDb->pClient==0 ){ 1189 rc = lsmCheckpointDeserialize(pDb, 0, pDb->aSnapshot,&pDb->pClient); 1190 } 1191 assert( (rc==LSM_OK)==(pDb->pClient!=0) ); 1192 assert( pDb->iReader>=0 ); 1193 1194 /* Check that the client has the right compression hooks loaded. 1195 ** If not, set rc to LSM_MISMATCH. */ 1196 if( rc==LSM_OK ){ 1197 rc = lsmCheckCompressionId(pDb, pDb->pClient->iCmpId); 1198 } 1199 }else{ 1200 rc = dbReleaseReadlock(pDb); 1201 } 1202 } 1203 1204 if( rc==LSM_BUSY ){ 1205 rc = LSM_OK; 1206 } 1207 } 1208 #if 0 1209 if( rc==LSM_OK && pDb->pClient ){ 1210 fprintf(stderr, 1211 "reading %p: snapshot:%d used-shmid:%d trans-id:%d iOldShmid=%d\n", 1212 (void *)pDb, 1213 (int)pDb->pClient->iId, (int)pDb->treehdr.iUsedShmid, 1214 (int)pDb->treehdr.root.iTransId, 1215 (int)pDb->treehdr.iOldShmid 1216 ); 1217 } 1218 #endif 1219 } 1220 1221 if( rc==LSM_OK ){ 1222 rc = lsmShmCacheChunks(pDb, pDb->treehdr.nChunk); 1223 } 1224 if( rc!=LSM_OK ){ 1225 dbReleaseReadlock(pDb); 1226 } 1227 if( pDb->pClient==0 && rc==LSM_OK ) rc = LSM_BUSY; 1228 return rc; 1229 } 1230 1231 /* 1232 ** This function is used by a read-write connection to determine if there 1233 ** are currently one or more read-only transactions open on the database 1234 ** (in this context a read-only transaction is one opened by a read-only 1235 ** connection on a non-live database). 1236 ** 1237 ** If no error occurs, LSM_OK is returned and *pbExists is set to true if 1238 ** some other connection has a read-only transaction open, or false 1239 ** otherwise. If an error occurs an LSM error code is returned and the final 1240 ** value of *pbExist is undefined. 1241 */ 1242 int lsmDetectRoTrans(lsm_db *db, int *pbExist){ 1243 int rc; 1244 1245 /* Only a read-write connection may use this function. */ 1246 assert( db->bReadonly==0 ); 1247 1248 rc = lsmShmTestLock(db, LSM_LOCK_ROTRANS, 1, LSM_LOCK_EXCL); 1249 if( rc==LSM_BUSY ){ 1250 *pbExist = 1; 1251 rc = LSM_OK; 1252 }else{ 1253 *pbExist = 0; 1254 } 1255 1256 return rc; 1257 } 1258 1259 /* 1260 ** db is a read-only database handle in the disconnected state. This function 1261 ** attempts to open a read-transaction on the database. This may involve 1262 ** connecting to the database system (opening shared memory etc.). 1263 */ 1264 int lsmBeginRoTrans(lsm_db *db){ 1265 int rc = LSM_OK; 1266 1267 assert( db->bReadonly && db->pShmhdr==0 ); 1268 assert( db->iReader<0 ); 1269 1270 if( db->bRoTrans==0 ){ 1271 1272 /* Attempt a shared-lock on DMS1. */ 1273 rc = lsmShmLock(db, LSM_LOCK_DMS1, LSM_LOCK_SHARED, 0); 1274 if( rc!=LSM_OK ) return rc; 1275 1276 rc = lsmShmTestLock( 1277 db, LSM_LOCK_RWCLIENT(0), LSM_LOCK_NREADER, LSM_LOCK_SHARED 1278 ); 1279 if( rc==LSM_OK ){ 1280 /* System is not live. Take a SHARED lock on the ROTRANS byte and 1281 ** release DMS1. Locking ROTRANS tells all read-write clients that they 1282 ** may not recycle any disk space from within the database or log files, 1283 ** as a read-only client may be using it. */ 1284 rc = lsmShmLock(db, LSM_LOCK_ROTRANS, LSM_LOCK_SHARED, 0); 1285 lsmShmLock(db, LSM_LOCK_DMS1, LSM_LOCK_UNLOCK, 0); 1286 1287 if( rc==LSM_OK ){ 1288 db->bRoTrans = 1; 1289 rc = lsmShmCacheChunks(db, 1); 1290 if( rc==LSM_OK ){ 1291 db->pShmhdr = (ShmHeader *)db->apShm[0]; 1292 memset(db->pShmhdr, 0, sizeof(ShmHeader)); 1293 rc = lsmCheckpointRecover(db); 1294 if( rc==LSM_OK ){ 1295 rc = lsmLogRecover(db); 1296 } 1297 } 1298 } 1299 }else if( rc==LSM_BUSY ){ 1300 /* System is live! */ 1301 rc = lsmShmLock(db, LSM_LOCK_DMS3, LSM_LOCK_SHARED, 0); 1302 lsmShmLock(db, LSM_LOCK_DMS1, LSM_LOCK_UNLOCK, 0); 1303 if( rc==LSM_OK ){ 1304 rc = lsmShmCacheChunks(db, 1); 1305 if( rc==LSM_OK ){ 1306 db->pShmhdr = (ShmHeader *)db->apShm[0]; 1307 } 1308 } 1309 } 1310 1311 if( rc==LSM_OK ){ 1312 rc = lsmBeginReadTrans(db); 1313 } 1314 } 1315 1316 return rc; 1317 } 1318 1319 /* 1320 ** Close the currently open read transaction. 1321 */ 1322 void lsmFinishReadTrans(lsm_db *pDb){ 1323 1324 /* Worker connections should not be closing read transactions. And 1325 ** read transactions should only be closed after all cursors and write 1326 ** transactions have been closed. Finally pClient should be non-NULL 1327 ** only iff pDb->iReader>=0. */ 1328 assert( pDb->pWorker==0 ); 1329 assert( pDb->pCsr==0 && pDb->nTransOpen==0 ); 1330 1331 if( pDb->bRoTrans ){ 1332 int i; 1333 for(i=0; i<pDb->nShm; i++){ 1334 lsmFree(pDb->pEnv, pDb->apShm[i]); 1335 } 1336 lsmFree(pDb->pEnv, pDb->apShm); 1337 pDb->apShm = 0; 1338 pDb->nShm = 0; 1339 pDb->pShmhdr = 0; 1340 1341 lsmShmLock(pDb, LSM_LOCK_ROTRANS, LSM_LOCK_UNLOCK, 0); 1342 } 1343 dbReleaseReadlock(pDb); 1344 } 1345 1346 /* 1347 ** Open a write transaction. 1348 */ 1349 int lsmBeginWriteTrans(lsm_db *pDb){ 1350 int rc = LSM_OK; /* Return code */ 1351 ShmHeader *pShm = pDb->pShmhdr; /* Shared memory header */ 1352 1353 assert( pDb->nTransOpen==0 ); 1354 assert( pDb->bDiscardOld==0 ); 1355 assert( pDb->bReadonly==0 ); 1356 1357 /* If there is no read-transaction open, open one now. */ 1358 if( pDb->iReader<0 ){ 1359 rc = lsmBeginReadTrans(pDb); 1360 } 1361 1362 /* Attempt to take the WRITER lock */ 1363 if( rc==LSM_OK ){ 1364 rc = lsmShmLock(pDb, LSM_LOCK_WRITER, LSM_LOCK_EXCL, 0); 1365 } 1366 1367 /* If the previous writer failed mid-transaction, run emergency rollback. */ 1368 if( rc==LSM_OK && pShm->bWriter ){ 1369 rc = lsmTreeRepair(pDb); 1370 if( rc==LSM_OK ) pShm->bWriter = 0; 1371 } 1372 1373 /* Check that this connection is currently reading from the most recent 1374 ** version of the database. If not, return LSM_BUSY. */ 1375 if( rc==LSM_OK && memcmp(&pShm->hdr1, &pDb->treehdr, sizeof(TreeHeader)) ){ 1376 rc = LSM_BUSY; 1377 } 1378 1379 if( rc==LSM_OK ){ 1380 rc = lsmLogBegin(pDb); 1381 } 1382 1383 /* If everything was successful, set the "transaction-in-progress" flag 1384 ** and return LSM_OK. Otherwise, if some error occurred, relinquish the 1385 ** WRITER lock and return an error code. */ 1386 if( rc==LSM_OK ){ 1387 TreeHeader *p = &pDb->treehdr; 1388 pShm->bWriter = 1; 1389 p->root.iTransId++; 1390 if( lsmTreeHasOld(pDb) && p->iOldLog==pDb->pClient->iLogOff ){ 1391 lsmTreeDiscardOld(pDb); 1392 pDb->bDiscardOld = 1; 1393 } 1394 }else{ 1395 lsmShmLock(pDb, LSM_LOCK_WRITER, LSM_LOCK_UNLOCK, 0); 1396 if( pDb->pCsr==0 ) lsmFinishReadTrans(pDb); 1397 } 1398 return rc; 1399 } 1400 1401 /* 1402 ** End the current write transaction. The connection is left with an open 1403 ** read transaction. It is an error to call this if there is no open write 1404 ** transaction. 1405 ** 1406 ** If the transaction was committed, then a commit record has already been 1407 ** written into the log file when this function is called. Or, if the 1408 ** transaction was rolled back, both the log file and in-memory tree 1409 ** structure have already been restored. In either case, this function 1410 ** merely releases locks and other resources held by the write-transaction. 1411 ** 1412 ** LSM_OK is returned if successful, or an LSM error code otherwise. 1413 */ 1414 int lsmFinishWriteTrans(lsm_db *pDb, int bCommit){ 1415 int rc = LSM_OK; 1416 int bFlush = 0; 1417 1418 lsmLogEnd(pDb, bCommit); 1419 if( rc==LSM_OK && bCommit && lsmTreeSize(pDb)>pDb->nTreeLimit ){ 1420 bFlush = 1; 1421 lsmTreeMakeOld(pDb); 1422 } 1423 lsmTreeEndTransaction(pDb, bCommit); 1424 1425 if( rc==LSM_OK ){ 1426 if( bFlush && pDb->bAutowork ){ 1427 rc = lsmSortedAutoWork(pDb, 1); 1428 }else if( bCommit && pDb->bDiscardOld ){ 1429 rc = dbSetReadLock(pDb, pDb->pClient->iId, pDb->treehdr.iUsedShmid); 1430 } 1431 } 1432 pDb->bDiscardOld = 0; 1433 lsmShmLock(pDb, LSM_LOCK_WRITER, LSM_LOCK_UNLOCK, 0); 1434 1435 if( bFlush && pDb->bAutowork==0 && pDb->xWork ){ 1436 pDb->xWork(pDb, pDb->pWorkCtx); 1437 } 1438 return rc; 1439 } 1440 1441 1442 /* 1443 ** Return non-zero if the caller is holding the client mutex. 1444 */ 1445 #ifdef LSM_DEBUG 1446 int lsmHoldingClientMutex(lsm_db *pDb){ 1447 return lsmMutexHeld(pDb->pEnv, pDb->pDatabase->pClientMutex); 1448 } 1449 #endif 1450 1451 static int slotIsUsable(ShmReader *p, i64 iLsm, u32 iShmMin, u32 iShmMax){ 1452 return( 1453 p->iLsmId && p->iLsmId<=iLsm 1454 && shm_sequence_ge(iShmMax, p->iTreeId) 1455 && shm_sequence_ge(p->iTreeId, iShmMin) 1456 ); 1457 } 1458 1459 /* 1460 ** Obtain a read-lock on database version identified by the combination 1461 ** of snapshot iLsm and tree iTree. Return LSM_OK if successful, or 1462 ** an LSM error code otherwise. 1463 */ 1464 int lsmReadlock(lsm_db *db, i64 iLsm, u32 iShmMin, u32 iShmMax){ 1465 int rc = LSM_OK; 1466 ShmHeader *pShm = db->pShmhdr; 1467 int i; 1468 1469 assert( db->iReader<0 ); 1470 assert( shm_sequence_ge(iShmMax, iShmMin) ); 1471 1472 /* This is a no-op if the read-only transaction flag is set. */ 1473 if( db->bRoTrans ){ 1474 db->iReader = 0; 1475 return LSM_OK; 1476 } 1477 1478 /* Search for an exact match. */ 1479 for(i=0; db->iReader<0 && rc==LSM_OK && i<LSM_LOCK_NREADER; i++){ 1480 ShmReader *p = &pShm->aReader[i]; 1481 if( p->iLsmId==iLsm && p->iTreeId==iShmMax ){ 1482 rc = lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_SHARED, 0); 1483 if( rc==LSM_OK && p->iLsmId==iLsm && p->iTreeId==iShmMax ){ 1484 db->iReader = i; 1485 }else if( rc==LSM_BUSY ){ 1486 rc = LSM_OK; 1487 } 1488 } 1489 } 1490 1491 /* Try to obtain a write-lock on each slot, in order. If successful, set 1492 ** the slot values to iLsm/iTree. */ 1493 for(i=0; db->iReader<0 && rc==LSM_OK && i<LSM_LOCK_NREADER; i++){ 1494 rc = lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_EXCL, 0); 1495 if( rc==LSM_BUSY ){ 1496 rc = LSM_OK; 1497 }else{ 1498 ShmReader *p = &pShm->aReader[i]; 1499 p->iLsmId = iLsm; 1500 p->iTreeId = iShmMax; 1501 rc = lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_SHARED, 0); 1502 assert( rc!=LSM_BUSY ); 1503 if( rc==LSM_OK ) db->iReader = i; 1504 } 1505 } 1506 1507 /* Search for any usable slot */ 1508 for(i=0; db->iReader<0 && rc==LSM_OK && i<LSM_LOCK_NREADER; i++){ 1509 ShmReader *p = &pShm->aReader[i]; 1510 if( slotIsUsable(p, iLsm, iShmMin, iShmMax) ){ 1511 rc = lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_SHARED, 0); 1512 if( rc==LSM_OK && slotIsUsable(p, iLsm, iShmMin, iShmMax) ){ 1513 db->iReader = i; 1514 }else if( rc==LSM_BUSY ){ 1515 rc = LSM_OK; 1516 } 1517 } 1518 } 1519 1520 if( rc==LSM_OK && db->iReader<0 ){ 1521 rc = LSM_BUSY; 1522 } 1523 return rc; 1524 } 1525 1526 /* 1527 ** This is used to check if there exists a read-lock locking a particular 1528 ** version of either the in-memory tree or database file. 1529 ** 1530 ** If iLsmId is non-zero, then it is a snapshot id. If there exists a 1531 ** read-lock using this snapshot or newer, set *pbInUse to true. Or, 1532 ** if there is no such read-lock, set it to false. 1533 ** 1534 ** Or, if iLsmId is zero, then iShmid is a shared-memory sequence id. 1535 ** Search for a read-lock using this sequence id or newer. etc. 1536 */ 1537 static int isInUse(lsm_db *db, i64 iLsmId, u32 iShmid, int *pbInUse){ 1538 ShmHeader *pShm = db->pShmhdr; 1539 int i; 1540 int rc = LSM_OK; 1541 1542 for(i=0; rc==LSM_OK && i<LSM_LOCK_NREADER; i++){ 1543 ShmReader *p = &pShm->aReader[i]; 1544 if( p->iLsmId ){ 1545 if( (iLsmId!=0 && p->iLsmId!=0 && iLsmId>=p->iLsmId) 1546 || (iLsmId==0 && shm_sequence_ge(p->iTreeId, iShmid)) 1547 ){ 1548 rc = lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_EXCL, 0); 1549 if( rc==LSM_OK ){ 1550 p->iLsmId = 0; 1551 lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_UNLOCK, 0); 1552 } 1553 } 1554 } 1555 } 1556 1557 if( rc==LSM_BUSY ){ 1558 *pbInUse = 1; 1559 return LSM_OK; 1560 } 1561 *pbInUse = 0; 1562 return rc; 1563 } 1564 1565 /* 1566 ** This function is called by worker connections to determine the smallest 1567 ** snapshot id that is currently in use by a database client. The worker 1568 ** connection uses this result to determine whether or not it is safe to 1569 ** recycle a database block. 1570 */ 1571 static int firstSnapshotInUse( 1572 lsm_db *db, /* Database handle */ 1573 i64 *piInUse /* IN/OUT: Smallest snapshot id in use */ 1574 ){ 1575 ShmHeader *pShm = db->pShmhdr; 1576 i64 iInUse = *piInUse; 1577 int i; 1578 1579 assert( iInUse>0 ); 1580 for(i=0; i<LSM_LOCK_NREADER; i++){ 1581 ShmReader *p = &pShm->aReader[i]; 1582 if( p->iLsmId ){ 1583 i64 iThis = p->iLsmId; 1584 if( iThis!=0 && iInUse>iThis ){ 1585 int rc = lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_EXCL, 0); 1586 if( rc==LSM_OK ){ 1587 p->iLsmId = 0; 1588 lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_UNLOCK, 0); 1589 }else if( rc==LSM_BUSY ){ 1590 iInUse = iThis; 1591 }else{ 1592 /* Some error other than LSM_BUSY. Return the error code to 1593 ** the caller in this case. */ 1594 return rc; 1595 } 1596 } 1597 } 1598 } 1599 1600 *piInUse = iInUse; 1601 return LSM_OK; 1602 } 1603 1604 int lsmTreeInUse(lsm_db *db, u32 iShmid, int *pbInUse){ 1605 if( db->treehdr.iUsedShmid==iShmid ){ 1606 *pbInUse = 1; 1607 return LSM_OK; 1608 } 1609 return isInUse(db, 0, iShmid, pbInUse); 1610 } 1611 1612 int lsmLsmInUse(lsm_db *db, i64 iLsmId, int *pbInUse){ 1613 if( db->pClient && db->pClient->iId<=iLsmId ){ 1614 *pbInUse = 1; 1615 return LSM_OK; 1616 } 1617 return isInUse(db, iLsmId, 0, pbInUse); 1618 } 1619 1620 /* 1621 ** This function may only be called after a successful call to 1622 ** lsmDbDatabaseConnect(). It returns true if the connection is in 1623 ** multi-process mode, or false otherwise. 1624 */ 1625 int lsmDbMultiProc(lsm_db *pDb){ 1626 return pDb->pDatabase && pDb->pDatabase->bMultiProc; 1627 } 1628 1629 1630 /************************************************************************* 1631 ************************************************************************** 1632 ************************************************************************** 1633 ************************************************************************** 1634 ************************************************************************** 1635 *************************************************************************/ 1636 1637 /* 1638 ** Ensure that database connection db has cached pointers to at least the 1639 ** first nChunk chunks of shared memory. 1640 */ 1641 int lsmShmCacheChunks(lsm_db *db, int nChunk){ 1642 int rc = LSM_OK; 1643 if( nChunk>db->nShm ){ 1644 static const int NINCR = 16; 1645 Database *p = db->pDatabase; 1646 lsm_env *pEnv = db->pEnv; 1647 int nAlloc; 1648 int i; 1649 1650 /* Ensure that the db->apShm[] array is large enough. If an attempt to 1651 ** allocate memory fails, return LSM_NOMEM immediately. The apShm[] array 1652 ** is always extended in multiples of 16 entries - so the actual allocated 1653 ** size can be inferred from nShm. */ 1654 nAlloc = ((db->nShm + NINCR - 1) / NINCR) * NINCR; 1655 while( nChunk>=nAlloc ){ 1656 void **apShm; 1657 nAlloc += NINCR; 1658 apShm = lsmRealloc(pEnv, db->apShm, sizeof(void*)*nAlloc); 1659 if( !apShm ) return LSM_NOMEM_BKPT; 1660 db->apShm = apShm; 1661 } 1662 1663 if( db->bRoTrans ){ 1664 for(i=db->nShm; rc==LSM_OK && i<nChunk; i++){ 1665 db->apShm[i] = lsmMallocZeroRc(pEnv, LSM_SHM_CHUNK_SIZE, &rc); 1666 db->nShm++; 1667 } 1668 1669 }else{ 1670 1671 /* Enter the client mutex */ 1672 lsmMutexEnter(pEnv, p->pClientMutex); 1673 1674 /* Extend the Database objects apShmChunk[] array if necessary. Using the 1675 ** same pattern as for the lsm_db.apShm[] array above. */ 1676 nAlloc = ((p->nShmChunk + NINCR - 1) / NINCR) * NINCR; 1677 while( nChunk>=nAlloc ){ 1678 void **apShm; 1679 nAlloc += NINCR; 1680 apShm = lsmRealloc(pEnv, p->apShmChunk, sizeof(void*)*nAlloc); 1681 if( !apShm ){ 1682 rc = LSM_NOMEM_BKPT; 1683 break; 1684 } 1685 p->apShmChunk = apShm; 1686 } 1687 1688 for(i=db->nShm; rc==LSM_OK && i<nChunk; i++){ 1689 if( i>=p->nShmChunk ){ 1690 void *pChunk = 0; 1691 if( p->bMultiProc==0 ){ 1692 /* Single process mode */ 1693 pChunk = lsmMallocZeroRc(pEnv, LSM_SHM_CHUNK_SIZE, &rc); 1694 }else{ 1695 /* Multi-process mode */ 1696 rc = lsmEnvShmMap(pEnv, p->pFile, i, LSM_SHM_CHUNK_SIZE, &pChunk); 1697 } 1698 if( rc==LSM_OK ){ 1699 p->apShmChunk[i] = pChunk; 1700 p->nShmChunk++; 1701 } 1702 } 1703 if( rc==LSM_OK ){ 1704 db->apShm[i] = p->apShmChunk[i]; 1705 db->nShm++; 1706 } 1707 } 1708 1709 /* Release the client mutex */ 1710 lsmMutexLeave(pEnv, p->pClientMutex); 1711 } 1712 } 1713 1714 return rc; 1715 } 1716 1717 static int lockSharedFile(lsm_env *pEnv, Database *p, int iLock, int eOp){ 1718 int rc = LSM_OK; 1719 if( p->bMultiProc ){ 1720 rc = lsmEnvLock(pEnv, p->pFile, iLock, eOp); 1721 } 1722 return rc; 1723 } 1724 1725 /* 1726 ** Test if it would be possible for connection db to obtain a lock of type 1727 ** eType on the nLock locks starting at iLock. If so, return LSM_OK. If it 1728 ** would not be possible to obtain the lock due to a lock held by another 1729 ** connection, return LSM_BUSY. If an IO or other error occurs (i.e. in the 1730 ** lsm_env.xTestLock function), return some other LSM error code. 1731 ** 1732 ** Note that this function never actually locks the database - it merely 1733 ** queries the system to see if there exists a lock that would prevent 1734 ** it from doing so. 1735 */ 1736 int lsmShmTestLock( 1737 lsm_db *db, 1738 int iLock, 1739 int nLock, 1740 int eOp 1741 ){ 1742 int rc = LSM_OK; 1743 lsm_db *pIter; 1744 Database *p = db->pDatabase; 1745 int i; 1746 u64 mask = 0; 1747 1748 for(i=iLock; i<(iLock+nLock); i++){ 1749 mask |= ((u64)1 << (iLock-1)); 1750 if( eOp==LSM_LOCK_EXCL ) mask |= ((u64)1 << (iLock+32-1)); 1751 } 1752 1753 lsmMutexEnter(db->pEnv, p->pClientMutex); 1754 for(pIter=p->pConn; pIter; pIter=pIter->pNext){ 1755 if( pIter!=db && (pIter->mLock & mask) ){ 1756 assert( pIter!=db ); 1757 break; 1758 } 1759 } 1760 1761 if( pIter ){ 1762 rc = LSM_BUSY; 1763 }else if( p->bMultiProc ){ 1764 rc = lsmEnvTestLock(db->pEnv, p->pFile, iLock, nLock, eOp); 1765 } 1766 1767 lsmMutexLeave(db->pEnv, p->pClientMutex); 1768 return rc; 1769 } 1770 1771 /* 1772 ** Attempt to obtain the lock identified by the iLock and bExcl parameters. 1773 ** If successful, return LSM_OK. If the lock cannot be obtained because 1774 ** there exists some other conflicting lock, return LSM_BUSY. If some other 1775 ** error occurs, return an LSM error code. 1776 ** 1777 ** Parameter iLock must be one of LSM_LOCK_WRITER, WORKER or CHECKPOINTER, 1778 ** or else a value returned by the LSM_LOCK_READER macro. 1779 */ 1780 int lsmShmLock( 1781 lsm_db *db, 1782 int iLock, 1783 int eOp, /* One of LSM_LOCK_UNLOCK, SHARED or EXCL */ 1784 int bBlock /* True for a blocking lock */ 1785 ){ 1786 lsm_db *pIter; 1787 const u64 me = ((u64)1 << (iLock-1)); 1788 const u64 ms = ((u64)1 << (iLock+32-1)); 1789 int rc = LSM_OK; 1790 Database *p = db->pDatabase; 1791 1792 assert( eOp!=LSM_LOCK_EXCL || p->bReadonly==0 ); 1793 assert( iLock>=1 && iLock<=LSM_LOCK_RWCLIENT(LSM_LOCK_NRWCLIENT-1) ); 1794 assert( LSM_LOCK_RWCLIENT(LSM_LOCK_NRWCLIENT-1)<=32 ); 1795 assert( eOp==LSM_LOCK_UNLOCK || eOp==LSM_LOCK_SHARED || eOp==LSM_LOCK_EXCL ); 1796 1797 /* Check for a no-op. Proceed only if this is not one of those. */ 1798 if( (eOp==LSM_LOCK_UNLOCK && (db->mLock & (me|ms))!=0) 1799 || (eOp==LSM_LOCK_SHARED && (db->mLock & (me|ms))!=ms) 1800 || (eOp==LSM_LOCK_EXCL && (db->mLock & me)==0) 1801 ){ 1802 int nExcl = 0; /* Number of connections holding EXCLUSIVE */ 1803 int nShared = 0; /* Number of connections holding SHARED */ 1804 lsmMutexEnter(db->pEnv, p->pClientMutex); 1805 1806 /* Figure out the locks currently held by this process on iLock, not 1807 ** including any held by connection db. */ 1808 for(pIter=p->pConn; pIter; pIter=pIter->pNext){ 1809 assert( (pIter->mLock & me)==0 || (pIter->mLock & ms)!=0 ); 1810 if( pIter!=db ){ 1811 if( pIter->mLock & me ){ 1812 nExcl++; 1813 }else if( pIter->mLock & ms ){ 1814 nShared++; 1815 } 1816 } 1817 } 1818 assert( nExcl==0 || nExcl==1 ); 1819 assert( nExcl==0 || nShared==0 ); 1820 assert( nExcl==0 || (db->mLock & (me|ms))==0 ); 1821 1822 switch( eOp ){ 1823 case LSM_LOCK_UNLOCK: 1824 if( nShared==0 ){ 1825 lockSharedFile(db->pEnv, p, iLock, LSM_LOCK_UNLOCK); 1826 } 1827 db->mLock &= ~(me|ms); 1828 break; 1829 1830 case LSM_LOCK_SHARED: 1831 if( nExcl ){ 1832 rc = LSM_BUSY; 1833 }else{ 1834 if( nShared==0 ){ 1835 rc = lockSharedFile(db->pEnv, p, iLock, LSM_LOCK_SHARED); 1836 } 1837 if( rc==LSM_OK ){ 1838 db->mLock |= ms; 1839 db->mLock &= ~me; 1840 } 1841 } 1842 break; 1843 1844 default: 1845 assert( eOp==LSM_LOCK_EXCL ); 1846 if( nExcl || nShared ){ 1847 rc = LSM_BUSY; 1848 }else{ 1849 rc = lockSharedFile(db->pEnv, p, iLock, LSM_LOCK_EXCL); 1850 if( rc==LSM_OK ){ 1851 db->mLock |= (me|ms); 1852 } 1853 } 1854 break; 1855 } 1856 1857 lsmMutexLeave(db->pEnv, p->pClientMutex); 1858 } 1859 1860 return rc; 1861 } 1862 1863 #ifdef LSM_DEBUG 1864 1865 int shmLockType(lsm_db *db, int iLock){ 1866 const u64 me = ((u64)1 << (iLock-1)); 1867 const u64 ms = ((u64)1 << (iLock+32-1)); 1868 1869 if( db->mLock & me ) return LSM_LOCK_EXCL; 1870 if( db->mLock & ms ) return LSM_LOCK_SHARED; 1871 return LSM_LOCK_UNLOCK; 1872 } 1873 1874 /* 1875 ** The arguments passed to this function are similar to those passed to 1876 ** the lsmShmLock() function. However, instead of obtaining a new lock 1877 ** this function returns true if the specified connection already holds 1878 ** (or does not hold) such a lock, depending on the value of eOp. As 1879 ** follows: 1880 ** 1881 ** (eOp==LSM_LOCK_UNLOCK) -> true if db has no lock on iLock 1882 ** (eOp==LSM_LOCK_SHARED) -> true if db has at least a SHARED lock on iLock. 1883 ** (eOp==LSM_LOCK_EXCL) -> true if db has an EXCLUSIVE lock on iLock. 1884 */ 1885 int lsmShmAssertLock(lsm_db *db, int iLock, int eOp){ 1886 int ret = 0; 1887 int eHave; 1888 1889 assert( iLock>=1 && iLock<=LSM_LOCK_READER(LSM_LOCK_NREADER-1) ); 1890 assert( iLock<=16 ); 1891 assert( eOp==LSM_LOCK_UNLOCK || eOp==LSM_LOCK_SHARED || eOp==LSM_LOCK_EXCL ); 1892 1893 eHave = shmLockType(db, iLock); 1894 1895 switch( eOp ){ 1896 case LSM_LOCK_UNLOCK: 1897 ret = (eHave==LSM_LOCK_UNLOCK); 1898 break; 1899 case LSM_LOCK_SHARED: 1900 ret = (eHave!=LSM_LOCK_UNLOCK); 1901 break; 1902 case LSM_LOCK_EXCL: 1903 ret = (eHave==LSM_LOCK_EXCL); 1904 break; 1905 default: 1906 assert( !"bad eOp value passed to lsmShmAssertLock()" ); 1907 break; 1908 } 1909 1910 return ret; 1911 } 1912 1913 int lsmShmAssertWorker(lsm_db *db){ 1914 return lsmShmAssertLock(db, LSM_LOCK_WORKER, LSM_LOCK_EXCL) && db->pWorker; 1915 } 1916 1917 /* 1918 ** This function does not contribute to library functionality, and is not 1919 ** included in release builds. It is intended to be called from within 1920 ** an interactive debugger. 1921 ** 1922 ** When called, this function prints a single line of human readable output 1923 ** to stdout describing the locks currently held by the connection. For 1924 ** example: 1925 ** 1926 ** (gdb) call print_db_locks(pDb) 1927 ** (shared on dms2) (exclusive on writer) 1928 */ 1929 void print_db_locks(lsm_db *db){ 1930 int iLock; 1931 for(iLock=0; iLock<16; iLock++){ 1932 int bOne = 0; 1933 const char *azLock[] = {0, "shared", "exclusive"}; 1934 const char *azName[] = { 1935 0, "dms1", "dms2", "writer", "worker", "checkpointer", 1936 "reader0", "reader1", "reader2", "reader3", "reader4", "reader5" 1937 }; 1938 int eHave = shmLockType(db, iLock); 1939 if( azLock[eHave] ){ 1940 printf("%s(%s on %s)", (bOne?" ":""), azLock[eHave], azName[iLock]); 1941 bOne = 1; 1942 } 1943 } 1944 printf("\n"); 1945 } 1946 void print_all_db_locks(lsm_db *db){ 1947 lsm_db *p; 1948 for(p=db->pDatabase->pConn; p; p=p->pNext){ 1949 printf("%s connection %p ", ((p==db)?"*":""), p); 1950 print_db_locks(p); 1951 } 1952 } 1953 #endif 1954 1955 void lsmShmBarrier(lsm_db *db){ 1956 lsmEnvShmBarrier(db->pEnv); 1957 } 1958 1959 int lsm_checkpoint(lsm_db *pDb, int *pnKB){ 1960 int rc; /* Return code */ 1961 u32 nWrite = 0; /* Number of pages checkpointed */ 1962 1963 /* Attempt the checkpoint. If successful, nWrite is set to the number of 1964 ** pages written between this and the previous checkpoint. */ 1965 rc = lsmCheckpointWrite(pDb, &nWrite); 1966 1967 /* If required, calculate the output variable (KB of data checkpointed). 1968 ** Set it to zero if an error occured. */ 1969 if( pnKB ){ 1970 int nKB = 0; 1971 if( rc==LSM_OK && nWrite ){ 1972 nKB = (((i64)nWrite * lsmFsPageSize(pDb->pFS)) + 1023) / 1024; 1973 } 1974 *pnKB = nKB; 1975 } 1976 1977 return rc; 1978 }