modernc.org/cc@v1.0.1/v2/testdata/_sqlite/ext/lsm1/lsm_log.c (about) 1 /* 2 ** 2011-08-13 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** 13 ** This file contains the implementation of LSM database logging. Logging 14 ** has one purpose in LSM - to make transactions durable. 15 ** 16 ** When data is written to an LSM database, it is initially stored in an 17 ** in-memory tree structure. Since this structure is in volatile memory, 18 ** if a power failure or application crash occurs it may be lost. To 19 ** prevent loss of data in this case, each time a record is written to the 20 ** in-memory tree an equivalent record is appended to the log on disk. 21 ** If a power failure or application crash does occur, data can be recovered 22 ** by reading the log. 23 ** 24 ** A log file consists of the following types of records representing data 25 ** written into the database: 26 ** 27 ** LOG_WRITE: A key-value pair written to the database. 28 ** LOG_DELETE: A delete key issued to the database. 29 ** LOG_COMMIT: A transaction commit. 30 ** 31 ** And the following types of records for ancillary purposes.. 32 ** 33 ** LOG_EOF: A record indicating the end of a log file. 34 ** LOG_PAD1: A single byte padding record. 35 ** LOG_PAD2: An N byte padding record (N>1). 36 ** LOG_JUMP: A pointer to another offset within the log file. 37 ** 38 ** Each transaction written to the log contains one or more LOG_WRITE and/or 39 ** LOG_DELETE records, followed by a LOG_COMMIT record. The LOG_COMMIT record 40 ** contains an 8-byte checksum based on all previous data written to the 41 ** log file. 42 ** 43 ** LOG CHECKSUMS & RECOVERY 44 ** 45 ** Checksums are found in two types of log records: LOG_COMMIT and 46 ** LOG_CKSUM records. In order to recover content from a log, a client 47 ** reads each record from the start of the log, calculating a checksum as 48 ** it does. Each time a LOG_COMMIT or LOG_CKSUM is encountered, the 49 ** recovery process verifies that the checksum stored in the log 50 ** matches the calculated checksum. If it does not, the recovery process 51 ** can stop reading the log. 52 ** 53 ** If a recovery process reads records (other than COMMIT or CKSUM) 54 ** consisting of at least LSM_CKSUM_MAXDATA bytes, then the next record in 55 ** the log must be either a LOG_CKSUM or LOG_COMMIT record. If it is 56 ** not, the recovery process also stops reading the log. 57 ** 58 ** To recover the log file, it must be read twice. The first time to 59 ** determine the location of the last valid commit record. And the second 60 ** time to load data into the in-memory tree. 61 ** 62 ** Todo: Surely there is a better way... 63 ** 64 ** LOG WRAPPING 65 ** 66 ** If the log file were never deleted or wrapped, it would be possible to 67 ** read it from start to end each time is required recovery (i.e each time 68 ** the number of database clients changes from 0 to 1). Effectively reading 69 ** the entire history of the database each time. This would quickly become 70 ** inefficient. Additionally, since the log file would grow without bound, 71 ** it wastes storage space. 72 ** 73 ** Instead, part of each checkpoint written into the database file contains 74 ** a log offset (and other information required to read the log starting at 75 ** at this offset) at which to begin recovery. Offset $O. 76 ** 77 ** Once a checkpoint has been written and synced into the database file, it 78 ** is guaranteed that no recovery process will need to read any data before 79 ** offset $O of the log file. It is therefore safe to begin overwriting 80 ** any data that occurs before offset $O. 81 ** 82 ** This implementation separates the log into three regions mapped into 83 ** the log file - regions 0, 1 and 2. During recovery, regions are read 84 ** in ascending order (i.e. 0, then 1, then 2). Each region is zero or 85 ** more bytes in size. 86 ** 87 ** |---1---|..|--0--|.|--2--|.... 88 ** 89 ** New records are always appended to the end of region 2. 90 ** 91 ** Initially (when it is empty), all three regions are zero bytes in size. 92 ** Each of them are located at the beginning of the file. As records are 93 ** added to the log, region 2 grows, so that the log consists of a zero 94 ** byte region 1, followed by a zero byte region 0, followed by an N byte 95 ** region 2. After one or more checkpoints have been written to disk, 96 ** the start point of region 2 is moved to $O. For example: 97 ** 98 ** A) ||.........|--2--|.... 99 ** 100 ** (both regions 0 and 1 are 0 bytes in size at offset 0). 101 ** 102 ** Eventually, the log wraps around to write new records into the start. 103 ** At this point, region 2 is renamed to region 0. Region 0 is renamed 104 ** to region 2. After appending a few records to the new region 2, the 105 ** log file looks like this: 106 ** 107 ** B) ||--2--|...|--0--|.... 108 ** 109 ** (region 1 is still 0 bytes in size, located at offset 0). 110 ** 111 ** Any checkpoints made at this point may reduce the size of region 0. 112 ** However, if they do not, and region 2 expands so that it is about to 113 ** overwrite the start of region 0, then region 2 is renamed to region 1, 114 ** and a new region 2 created at the end of the file following the existing 115 ** region 0. 116 ** 117 ** C) |---1---|..|--0--|.|-2-| 118 ** 119 ** In this state records are appended to region 2 until checkpoints have 120 ** contracted regions 0 AND 1 UNTil they are both zero bytes in size. They 121 ** are then shifted to the start of the log file, leaving the system in 122 ** the equivalent of state A above. 123 ** 124 ** Alternatively, state B may transition directly to state A if the size 125 ** of region 0 is reduced to zero bytes before region 2 threatens to 126 ** encroach upon it. 127 ** 128 ** LOG_PAD1 & LOG_PAD2 RECORDS 129 ** 130 ** PAD1 and PAD2 records may appear in a log file at any point. They allow 131 ** a process writing the log file align the beginning of transactions with 132 ** the beginning of disk sectors, which increases robustness. 133 ** 134 ** RECORD FORMATS: 135 ** 136 ** LOG_EOF: * A single 0x00 byte. 137 ** 138 ** LOG_PAD1: * A single 0x01 byte. 139 ** 140 ** LOG_PAD2: * A single 0x02 byte, followed by 141 ** * The number of unused bytes (N) as a varint, 142 ** * An N byte block of unused space. 143 ** 144 ** LOG_COMMIT: * A single 0x03 byte. 145 ** * An 8-byte checksum. 146 ** 147 ** LOG_JUMP: * A single 0x04 byte. 148 ** * Absolute file offset to jump to, encoded as a varint. 149 ** 150 ** LOG_WRITE: * A single 0x06 or 0x07 byte, 151 ** * The number of bytes in the key, encoded as a varint, 152 ** * The number of bytes in the value, encoded as a varint, 153 ** * If the first byte was 0x07, an 8 byte checksum. 154 ** * The key data, 155 ** * The value data. 156 ** 157 ** LOG_DELETE: * A single 0x08 or 0x09 byte, 158 ** * The number of bytes in the key, encoded as a varint, 159 ** * If the first byte was 0x09, an 8 byte checksum. 160 ** * The key data. 161 ** 162 ** Varints are as described in lsm_varint.c (SQLite 4 format). 163 ** 164 ** CHECKSUMS: 165 ** 166 ** The checksum is calculated using two 32-bit unsigned integers, s0 and 167 ** s1. The initial value for both is 42. It is updated each time a record 168 ** is written into the log file by treating the encoded (binary) record as 169 ** an array of 32-bit little-endian integers. Then, if x[] is the integer 170 ** array, updating the checksum accumulators as follows: 171 ** 172 ** for i from 0 to n-1 step 2: 173 ** s0 += x[i] + s1; 174 ** s1 += x[i+1] + s0; 175 ** endfor 176 ** 177 ** If the record is not an even multiple of 8-bytes in size it is padded 178 ** with zeroes to make it so before the checksum is updated. 179 ** 180 ** The checksum stored in a COMMIT, WRITE or DELETE is based on all bytes 181 ** up to the start of the 8-byte checksum itself, including the COMMIT, 182 ** WRITE or DELETE fields that appear before the checksum in the record. 183 ** 184 ** VARINT FORMAT 185 ** 186 ** See lsm_varint.c. 187 */ 188 189 #ifndef _LSM_INT_H 190 # include "lsmInt.h" 191 #endif 192 193 /* Log record types */ 194 #define LSM_LOG_EOF 0x00 195 #define LSM_LOG_PAD1 0x01 196 #define LSM_LOG_PAD2 0x02 197 #define LSM_LOG_COMMIT 0x03 198 #define LSM_LOG_JUMP 0x04 199 200 #define LSM_LOG_WRITE 0x06 201 #define LSM_LOG_WRITE_CKSUM 0x07 202 203 #define LSM_LOG_DELETE 0x08 204 #define LSM_LOG_DELETE_CKSUM 0x09 205 206 #define LSM_LOG_DRANGE 0x0A 207 #define LSM_LOG_DRANGE_CKSUM 0x0B 208 209 /* Require a checksum every 32KB. */ 210 #define LSM_CKSUM_MAXDATA (32*1024) 211 212 /* Do not wrap a log file smaller than this in bytes. */ 213 #define LSM_MIN_LOGWRAP (128*1024) 214 215 /* 216 ** szSector: 217 ** Commit records must be aligned to end on szSector boundaries. If 218 ** the safety-mode is set to NORMAL or OFF, this value is 1. Otherwise, 219 ** if the safety-mode is set to FULL, it is the size of the file-system 220 ** sectors as reported by lsmFsSectorSize(). 221 */ 222 struct LogWriter { 223 u32 cksum0; /* Checksum 0 at offset iOff */ 224 u32 cksum1; /* Checksum 1 at offset iOff */ 225 int iCksumBuf; /* Bytes of buf that have been checksummed */ 226 i64 iOff; /* Offset at start of buffer buf */ 227 int szSector; /* Sector size for this transaction */ 228 LogRegion jump; /* Avoid writing to this region */ 229 i64 iRegion1End; /* End of first region written by trans */ 230 i64 iRegion2Start; /* Start of second regions written by trans */ 231 LsmString buf; /* Buffer containing data not yet written */ 232 }; 233 234 /* 235 ** Return the result of interpreting the first 4 bytes in buffer aIn as 236 ** a 32-bit unsigned little-endian integer. 237 */ 238 static u32 getU32le(u8 *aIn){ 239 return ((u32)aIn[3] << 24) 240 + ((u32)aIn[2] << 16) 241 + ((u32)aIn[1] << 8) 242 + ((u32)aIn[0]); 243 } 244 245 246 /* 247 ** This function is the same as logCksum(), except that pointer "a" need 248 ** not be aligned to an 8-byte boundary or padded with zero bytes. This 249 ** version is slower, but sometimes more convenient to use. 250 */ 251 static void logCksumUnaligned( 252 char *z, /* Input buffer */ 253 int n, /* Size of input buffer in bytes */ 254 u32 *pCksum0, /* IN/OUT: Checksum value 1 */ 255 u32 *pCksum1 /* IN/OUT: Checksum value 2 */ 256 ){ 257 u8 *a = (u8 *)z; 258 u32 cksum0 = *pCksum0; 259 u32 cksum1 = *pCksum1; 260 int nIn = (n/8) * 8; 261 int i; 262 263 assert( n>0 ); 264 for(i=0; i<nIn; i+=8){ 265 cksum0 += getU32le(&a[i]) + cksum1; 266 cksum1 += getU32le(&a[i+4]) + cksum0; 267 } 268 269 if( nIn!=n ){ 270 u8 aBuf[8] = {0, 0, 0, 0, 0, 0, 0, 0}; 271 assert( (n-nIn)<8 && n>nIn ); 272 memcpy(aBuf, &a[nIn], n-nIn); 273 cksum0 += getU32le(aBuf) + cksum1; 274 cksum1 += getU32le(&aBuf[4]) + cksum0; 275 } 276 277 *pCksum0 = cksum0; 278 *pCksum1 = cksum1; 279 } 280 281 /* 282 ** Update pLog->cksum0 and pLog->cksum1 so that the first nBuf bytes in the 283 ** write buffer (pLog->buf) are included in the checksum. 284 */ 285 static void logUpdateCksum(LogWriter *pLog, int nBuf){ 286 assert( (pLog->iCksumBuf % 8)==0 ); 287 assert( pLog->iCksumBuf<=nBuf ); 288 assert( (nBuf % 8)==0 || nBuf==pLog->buf.n ); 289 if( nBuf>pLog->iCksumBuf ){ 290 logCksumUnaligned( 291 &pLog->buf.z[pLog->iCksumBuf], nBuf-pLog->iCksumBuf, 292 &pLog->cksum0, &pLog->cksum1 293 ); 294 } 295 pLog->iCksumBuf = nBuf; 296 } 297 298 static i64 firstByteOnSector(LogWriter *pLog, i64 iOff){ 299 return (iOff / pLog->szSector) * pLog->szSector; 300 } 301 static i64 lastByteOnSector(LogWriter *pLog, i64 iOff){ 302 return firstByteOnSector(pLog, iOff) + pLog->szSector - 1; 303 } 304 305 /* 306 ** If possible, reclaim log file space. Log file space is reclaimed after 307 ** a snapshot that points to the same data in the database file is synced 308 ** into the db header. 309 */ 310 static int logReclaimSpace(lsm_db *pDb){ 311 int rc; 312 int iMeta; 313 int bRotrans; /* True if there exists some ro-trans */ 314 315 /* Test if there exists some other connection with a read-only transaction 316 ** open. If there does, then log file space may not be reclaimed. */ 317 rc = lsmDetectRoTrans(pDb, &bRotrans); 318 if( rc!=LSM_OK || bRotrans ) return rc; 319 320 iMeta = (int)pDb->pShmhdr->iMetaPage; 321 if( iMeta==1 || iMeta==2 ){ 322 DbLog *pLog = &pDb->treehdr.log; 323 i64 iSyncedId; 324 325 /* Read the snapshot-id of the snapshot stored on meta-page iMeta. Note 326 ** that in theory, the value read is untrustworthy (due to a race 327 ** condition - see comments above lsmFsReadSyncedId()). So it is only 328 ** ever used to conclude that no log space can be reclaimed. If it seems 329 ** to indicate that it may be possible to reclaim log space, a 330 ** second call to lsmCheckpointSynced() (which does return trustworthy 331 ** values) is made below to confirm. */ 332 rc = lsmFsReadSyncedId(pDb, iMeta, &iSyncedId); 333 334 if( rc==LSM_OK && pLog->iSnapshotId!=iSyncedId ){ 335 i64 iSnapshotId = 0; 336 i64 iOff = 0; 337 rc = lsmCheckpointSynced(pDb, &iSnapshotId, &iOff, 0); 338 if( rc==LSM_OK && pLog->iSnapshotId<iSnapshotId ){ 339 int iRegion; 340 for(iRegion=0; iRegion<3; iRegion++){ 341 LogRegion *p = &pLog->aRegion[iRegion]; 342 if( iOff>=p->iStart && iOff<=p->iEnd ) break; 343 p->iStart = 0; 344 p->iEnd = 0; 345 } 346 assert( iRegion<3 ); 347 pLog->aRegion[iRegion].iStart = iOff; 348 pLog->iSnapshotId = iSnapshotId; 349 } 350 } 351 } 352 return rc; 353 } 354 355 /* 356 ** This function is called when a write-transaction is first opened. It 357 ** is assumed that the caller is holding the client-mutex when it is 358 ** called. 359 ** 360 ** Before returning, this function allocates the LogWriter object that 361 ** will be used to write to the log file during the write transaction. 362 ** LSM_OK is returned if no error occurs, otherwise an LSM error code. 363 */ 364 int lsmLogBegin(lsm_db *pDb){ 365 int rc = LSM_OK; 366 LogWriter *pNew; 367 LogRegion *aReg; 368 369 if( pDb->bUseLog==0 ) return LSM_OK; 370 371 /* If the log file has not yet been opened, open it now. Also allocate 372 ** the LogWriter structure, if it has not already been allocated. */ 373 rc = lsmFsOpenLog(pDb, 0); 374 if( pDb->pLogWriter==0 ){ 375 pNew = lsmMallocZeroRc(pDb->pEnv, sizeof(LogWriter), &rc); 376 if( pNew ){ 377 lsmStringInit(&pNew->buf, pDb->pEnv); 378 rc = lsmStringExtend(&pNew->buf, 2); 379 } 380 pDb->pLogWriter = pNew; 381 }else{ 382 pNew = pDb->pLogWriter; 383 assert( (u8 *)(&pNew[1])==(u8 *)(&((&pNew->buf)[1])) ); 384 memset(pNew, 0, ((u8 *)&pNew->buf) - (u8 *)pNew); 385 pNew->buf.n = 0; 386 } 387 388 if( rc==LSM_OK ){ 389 /* The following call detects whether or not a new snapshot has been 390 ** synced into the database file. If so, it updates the contents of 391 ** the pDb->treehdr.log structure to reclaim any space in the log 392 ** file that is no longer required. 393 ** 394 ** TODO: Calling this every transaction is overkill. And since the 395 ** call has to read and checksum a snapshot from the database file, 396 ** it is expensive. It would be better to figure out a way so that 397 ** this is only called occasionally - say for every 32KB written to 398 ** the log file. 399 */ 400 rc = logReclaimSpace(pDb); 401 } 402 if( rc!=LSM_OK ){ 403 lsmLogClose(pDb); 404 return rc; 405 } 406 407 /* Set the effective sector-size for this transaction. Sectors are assumed 408 ** to be one byte in size if the safety-mode is OFF or NORMAL, or as 409 ** reported by lsmFsSectorSize if it is FULL. */ 410 if( pDb->eSafety==LSM_SAFETY_FULL ){ 411 pNew->szSector = lsmFsSectorSize(pDb->pFS); 412 assert( pNew->szSector>0 ); 413 }else{ 414 pNew->szSector = 1; 415 } 416 417 /* There are now three scenarios: 418 ** 419 ** 1) Regions 0 and 1 are both zero bytes in size and region 2 begins 420 ** at a file offset greater than LSM_MIN_LOGWRAP. In this case, wrap 421 ** around to the start and write data into the start of the log file. 422 ** 423 ** 2) Region 1 is zero bytes in size and region 2 occurs earlier in the 424 ** file than region 0. In this case, append data to region 2, but 425 ** remember to jump over region 1 if required. 426 ** 427 ** 3) Region 2 is the last in the file. Append to it. 428 */ 429 aReg = &pDb->treehdr.log.aRegion[0]; 430 431 assert( aReg[0].iEnd==0 || aReg[0].iEnd>aReg[0].iStart ); 432 assert( aReg[1].iEnd==0 || aReg[1].iEnd>aReg[1].iStart ); 433 434 pNew->cksum0 = pDb->treehdr.log.cksum0; 435 pNew->cksum1 = pDb->treehdr.log.cksum1; 436 437 if( aReg[0].iEnd==0 && aReg[1].iEnd==0 && aReg[2].iStart>=LSM_MIN_LOGWRAP ){ 438 /* Case 1. Wrap around to the start of the file. Write an LSM_LOG_JUMP 439 ** into the log file in this case. Pad it out to 8 bytes using a PAD2 440 ** record so that the checksums can be updated immediately. */ 441 u8 aJump[] = { 442 LSM_LOG_PAD2, 0x04, 0x00, 0x00, 0x00, 0x00, LSM_LOG_JUMP, 0x00 443 }; 444 445 lsmStringBinAppend(&pNew->buf, aJump, sizeof(aJump)); 446 logUpdateCksum(pNew, pNew->buf.n); 447 rc = lsmFsWriteLog(pDb->pFS, aReg[2].iEnd, &pNew->buf); 448 pNew->iCksumBuf = pNew->buf.n = 0; 449 450 aReg[2].iEnd += 8; 451 pNew->jump = aReg[0] = aReg[2]; 452 aReg[2].iStart = aReg[2].iEnd = 0; 453 }else if( aReg[1].iEnd==0 && aReg[2].iEnd<aReg[0].iEnd ){ 454 /* Case 2. */ 455 pNew->iOff = aReg[2].iEnd; 456 pNew->jump = aReg[0]; 457 }else{ 458 /* Case 3. */ 459 assert( aReg[2].iStart>=aReg[0].iEnd && aReg[2].iStart>=aReg[1].iEnd ); 460 pNew->iOff = aReg[2].iEnd; 461 } 462 463 if( pNew->jump.iStart ){ 464 i64 iRound; 465 assert( pNew->jump.iStart>pNew->iOff ); 466 467 iRound = firstByteOnSector(pNew, pNew->jump.iStart); 468 if( iRound>pNew->iOff ) pNew->jump.iStart = iRound; 469 pNew->jump.iEnd = lastByteOnSector(pNew, pNew->jump.iEnd); 470 } 471 472 assert( pDb->pLogWriter==pNew ); 473 return rc; 474 } 475 476 /* 477 ** This function is called when a write-transaction is being closed. 478 ** Parameter bCommit is true if the transaction is being committed, 479 ** or false otherwise. The caller must hold the client-mutex to call 480 ** this function. 481 ** 482 ** A call to this function deletes the LogWriter object allocated by 483 ** lsmLogBegin(). If the transaction is being committed, the shared state 484 ** in *pLog is updated before returning. 485 */ 486 void lsmLogEnd(lsm_db *pDb, int bCommit){ 487 DbLog *pLog; 488 LogWriter *p; 489 p = pDb->pLogWriter; 490 491 if( p==0 ) return; 492 pLog = &pDb->treehdr.log; 493 494 if( bCommit ){ 495 pLog->aRegion[2].iEnd = p->iOff; 496 pLog->cksum0 = p->cksum0; 497 pLog->cksum1 = p->cksum1; 498 if( p->iRegion1End ){ 499 /* This happens when the transaction had to jump over some other 500 ** part of the log. */ 501 assert( pLog->aRegion[1].iEnd==0 ); 502 assert( pLog->aRegion[2].iStart<p->iRegion1End ); 503 pLog->aRegion[1].iStart = pLog->aRegion[2].iStart; 504 pLog->aRegion[1].iEnd = p->iRegion1End; 505 pLog->aRegion[2].iStart = p->iRegion2Start; 506 } 507 } 508 } 509 510 static int jumpIfRequired( 511 lsm_db *pDb, 512 LogWriter *pLog, 513 int nReq, 514 int *pbJump 515 ){ 516 /* Determine if it is necessary to add an LSM_LOG_JUMP to jump over the 517 ** jump region before writing the LSM_LOG_WRITE or DELETE record. This 518 ** is necessary if there is insufficient room between the current offset 519 ** and the jump region to fit the new WRITE/DELETE record and the largest 520 ** possible JUMP record with up to 7 bytes of padding (a total of 17 521 ** bytes). */ 522 if( (pLog->jump.iStart > (pLog->iOff + pLog->buf.n)) 523 && (pLog->jump.iStart < (pLog->iOff + pLog->buf.n + (nReq + 17))) 524 ){ 525 int rc; /* Return code */ 526 i64 iJump; /* Offset to jump to */ 527 u8 aJump[10]; /* Encoded jump record */ 528 int nJump; /* Valid bytes in aJump[] */ 529 int nPad; /* Bytes of padding required */ 530 531 /* Serialize the JUMP record */ 532 iJump = pLog->jump.iEnd+1; 533 aJump[0] = LSM_LOG_JUMP; 534 nJump = 1 + lsmVarintPut64(&aJump[1], iJump); 535 536 /* Adding padding to the contents of the buffer so that it will be a 537 ** multiple of 8 bytes in size after the JUMP record is appended. This 538 ** is not strictly required, it just makes the keeping the running 539 ** checksum up to date in this file a little simpler. */ 540 nPad = (pLog->buf.n + nJump) % 8; 541 if( nPad ){ 542 u8 aPad[7] = {0,0,0,0,0,0,0}; 543 nPad = 8-nPad; 544 if( nPad==1 ){ 545 aPad[0] = LSM_LOG_PAD1; 546 }else{ 547 aPad[0] = LSM_LOG_PAD2; 548 aPad[1] = (u8)(nPad-2); 549 } 550 rc = lsmStringBinAppend(&pLog->buf, aPad, nPad); 551 if( rc!=LSM_OK ) return rc; 552 } 553 554 /* Append the JUMP record to the buffer. Then flush the buffer to disk 555 ** and update the checksums. The next write to the log file (assuming 556 ** there is no transaction rollback) will be to offset iJump (just past 557 ** the jump region). */ 558 rc = lsmStringBinAppend(&pLog->buf, aJump, nJump); 559 if( rc!=LSM_OK ) return rc; 560 assert( (pLog->buf.n % 8)==0 ); 561 rc = lsmFsWriteLog(pDb->pFS, pLog->iOff, &pLog->buf); 562 if( rc!=LSM_OK ) return rc; 563 logUpdateCksum(pLog, pLog->buf.n); 564 pLog->iRegion1End = (pLog->iOff + pLog->buf.n); 565 pLog->iRegion2Start = iJump; 566 pLog->iOff = iJump; 567 pLog->iCksumBuf = pLog->buf.n = 0; 568 if( pbJump ) *pbJump = 1; 569 } 570 571 return LSM_OK; 572 } 573 574 static int logCksumAndFlush(lsm_db *pDb){ 575 int rc; /* Return code */ 576 LogWriter *pLog = pDb->pLogWriter; 577 578 /* Calculate the checksum value. Append it to the buffer. */ 579 logUpdateCksum(pLog, pLog->buf.n); 580 lsmPutU32((u8 *)&pLog->buf.z[pLog->buf.n], pLog->cksum0); 581 pLog->buf.n += 4; 582 lsmPutU32((u8 *)&pLog->buf.z[pLog->buf.n], pLog->cksum1); 583 pLog->buf.n += 4; 584 585 /* Write the contents of the buffer to disk. */ 586 rc = lsmFsWriteLog(pDb->pFS, pLog->iOff, &pLog->buf); 587 pLog->iOff += pLog->buf.n; 588 pLog->iCksumBuf = pLog->buf.n = 0; 589 590 return rc; 591 } 592 593 /* 594 ** Write the contents of the log-buffer to disk. Then write either a CKSUM 595 ** or COMMIT record, depending on the value of parameter eType. 596 */ 597 static int logFlush(lsm_db *pDb, int eType){ 598 int rc; 599 int nReq; 600 LogWriter *pLog = pDb->pLogWriter; 601 602 assert( eType==LSM_LOG_COMMIT ); 603 assert( pLog ); 604 605 /* Commit record is always 9 bytes in size. */ 606 nReq = 9; 607 if( eType==LSM_LOG_COMMIT && pLog->szSector>1 ) nReq += pLog->szSector + 17; 608 rc = jumpIfRequired(pDb, pLog, nReq, 0); 609 610 /* If this is a COMMIT, add padding to the log so that the COMMIT record 611 ** is aligned against the end of a disk sector. In other words, add padding 612 ** so that the first byte following the COMMIT record lies on a different 613 ** sector. */ 614 if( eType==LSM_LOG_COMMIT && pLog->szSector>1 ){ 615 int nPad; /* Bytes of padding to add */ 616 617 /* Determine the value of nPad. */ 618 nPad = ((pLog->iOff + pLog->buf.n + 9) % pLog->szSector); 619 if( nPad ) nPad = pLog->szSector - nPad; 620 rc = lsmStringExtend(&pLog->buf, nPad); 621 if( rc!=LSM_OK ) return rc; 622 623 while( nPad ){ 624 if( nPad==1 ){ 625 pLog->buf.z[pLog->buf.n++] = LSM_LOG_PAD1; 626 nPad = 0; 627 }else{ 628 int n = LSM_MIN(200, nPad-2); 629 pLog->buf.z[pLog->buf.n++] = LSM_LOG_PAD2; 630 pLog->buf.z[pLog->buf.n++] = (char)n; 631 nPad -= 2; 632 memset(&pLog->buf.z[pLog->buf.n], 0x2B, n); 633 pLog->buf.n += n; 634 nPad -= n; 635 } 636 } 637 } 638 639 /* Make sure there is room in the log-buffer to add the CKSUM or COMMIT 640 ** record. Then add the first byte of it. */ 641 rc = lsmStringExtend(&pLog->buf, 9); 642 if( rc!=LSM_OK ) return rc; 643 pLog->buf.z[pLog->buf.n++] = (char)eType; 644 memset(&pLog->buf.z[pLog->buf.n], 0, 8); 645 646 rc = logCksumAndFlush(pDb); 647 648 /* If this is a commit and synchronous=full, sync the log to disk. */ 649 if( rc==LSM_OK && eType==LSM_LOG_COMMIT && pDb->eSafety==LSM_SAFETY_FULL ){ 650 rc = lsmFsSyncLog(pDb->pFS); 651 } 652 return rc; 653 } 654 655 /* 656 ** Append an LSM_LOG_WRITE (if nVal>=0) or LSM_LOG_DELETE (if nVal<0) 657 ** record to the database log. 658 */ 659 int lsmLogWrite( 660 lsm_db *pDb, /* Database handle */ 661 int eType, 662 void *pKey, int nKey, /* Database key to write to log */ 663 void *pVal, int nVal /* Database value (or nVal<0) to write */ 664 ){ 665 int rc = LSM_OK; 666 LogWriter *pLog; /* Log object to write to */ 667 int nReq; /* Bytes of space required in log */ 668 int bCksum = 0; /* True to embed a checksum in this record */ 669 670 assert( eType==LSM_WRITE || eType==LSM_DELETE || eType==LSM_DRANGE ); 671 assert( LSM_LOG_WRITE==LSM_WRITE ); 672 assert( LSM_LOG_DELETE==LSM_DELETE ); 673 assert( LSM_LOG_DRANGE==LSM_DRANGE ); 674 assert( (eType==LSM_LOG_DELETE)==(nVal<0) ); 675 676 if( pDb->bUseLog==0 ) return LSM_OK; 677 pLog = pDb->pLogWriter; 678 679 /* Determine how many bytes of space are required, assuming that a checksum 680 ** will be embedded in this record (even though it may not be). */ 681 nReq = 1 + lsmVarintLen32(nKey) + 8 + nKey; 682 if( eType!=LSM_LOG_DELETE ) nReq += lsmVarintLen32(nVal) + nVal; 683 684 /* Jump over the jump region if required. Set bCksum to true to tell the 685 ** code below to include a checksum in the record if either (a) writing 686 ** this record would mean that more than LSM_CKSUM_MAXDATA bytes of data 687 ** have been written to the log since the last checksum, or (b) the jump 688 ** is taken. */ 689 rc = jumpIfRequired(pDb, pLog, nReq, &bCksum); 690 if( (pLog->buf.n+nReq) > LSM_CKSUM_MAXDATA ) bCksum = 1; 691 692 if( rc==LSM_OK ){ 693 rc = lsmStringExtend(&pLog->buf, nReq); 694 } 695 if( rc==LSM_OK ){ 696 u8 *a = (u8 *)&pLog->buf.z[pLog->buf.n]; 697 698 /* Write the record header - the type byte followed by either 1 (for 699 ** DELETE) or 2 (for WRITE) varints. */ 700 assert( LSM_LOG_WRITE_CKSUM == (LSM_LOG_WRITE | 0x0001) ); 701 assert( LSM_LOG_DELETE_CKSUM == (LSM_LOG_DELETE | 0x0001) ); 702 assert( LSM_LOG_DRANGE_CKSUM == (LSM_LOG_DRANGE | 0x0001) ); 703 *(a++) = (u8)eType | (u8)bCksum; 704 a += lsmVarintPut32(a, nKey); 705 if( eType!=LSM_LOG_DELETE ) a += lsmVarintPut32(a, nVal); 706 707 if( bCksum ){ 708 pLog->buf.n = (a - (u8 *)pLog->buf.z); 709 rc = logCksumAndFlush(pDb); 710 a = (u8 *)&pLog->buf.z[pLog->buf.n]; 711 } 712 713 memcpy(a, pKey, nKey); 714 a += nKey; 715 if( eType!=LSM_LOG_DELETE ){ 716 memcpy(a, pVal, nVal); 717 a += nVal; 718 } 719 pLog->buf.n = a - (u8 *)pLog->buf.z; 720 assert( pLog->buf.n<=pLog->buf.nAlloc ); 721 } 722 723 return rc; 724 } 725 726 /* 727 ** Append an LSM_LOG_COMMIT record to the database log. 728 */ 729 int lsmLogCommit(lsm_db *pDb){ 730 if( pDb->bUseLog==0 ) return LSM_OK; 731 return logFlush(pDb, LSM_LOG_COMMIT); 732 } 733 734 /* 735 ** Store the current offset and other checksum related information in the 736 ** structure *pMark. Later, *pMark can be passed to lsmLogSeek() to "rewind" 737 ** the LogWriter object to the current log file offset. This is used when 738 ** rolling back savepoint transactions. 739 */ 740 void lsmLogTell( 741 lsm_db *pDb, /* Database handle */ 742 LogMark *pMark /* Populate this object with current offset */ 743 ){ 744 LogWriter *pLog; 745 int nCksum; 746 747 if( pDb->bUseLog==0 ) return; 748 pLog = pDb->pLogWriter; 749 nCksum = pLog->buf.n & 0xFFFFFFF8; 750 logUpdateCksum(pLog, nCksum); 751 assert( pLog->iCksumBuf==nCksum ); 752 pMark->nBuf = pLog->buf.n - nCksum; 753 memcpy(pMark->aBuf, &pLog->buf.z[nCksum], pMark->nBuf); 754 755 pMark->iOff = pLog->iOff + pLog->buf.n; 756 pMark->cksum0 = pLog->cksum0; 757 pMark->cksum1 = pLog->cksum1; 758 } 759 760 /* 761 ** Seek (rewind) back to the log file offset stored by an ealier call to 762 ** lsmLogTell() in *pMark. 763 */ 764 void lsmLogSeek( 765 lsm_db *pDb, /* Database handle */ 766 LogMark *pMark /* Object containing log offset to seek to */ 767 ){ 768 LogWriter *pLog; 769 770 if( pDb->bUseLog==0 ) return; 771 pLog = pDb->pLogWriter; 772 773 assert( pMark->iOff<=pLog->iOff+pLog->buf.n ); 774 if( (pMark->iOff & 0xFFFFFFF8)>=pLog->iOff ){ 775 pLog->buf.n = (int)(pMark->iOff - pLog->iOff); 776 pLog->iCksumBuf = (pLog->buf.n & 0xFFFFFFF8); 777 }else{ 778 pLog->buf.n = pMark->nBuf; 779 memcpy(pLog->buf.z, pMark->aBuf, pMark->nBuf); 780 pLog->iCksumBuf = 0; 781 pLog->iOff = pMark->iOff - pMark->nBuf; 782 } 783 pLog->cksum0 = pMark->cksum0; 784 pLog->cksum1 = pMark->cksum1; 785 786 if( pMark->iOff > pLog->iRegion1End ) pLog->iRegion1End = 0; 787 if( pMark->iOff > pLog->iRegion2Start ) pLog->iRegion2Start = 0; 788 } 789 790 /* 791 ** This function does the work for an lsm_info(LOG_STRUCTURE) request. 792 */ 793 int lsmInfoLogStructure(lsm_db *pDb, char **pzVal){ 794 int rc = LSM_OK; 795 char *zVal = 0; 796 797 /* If there is no read or write transaction open, read the latest 798 ** tree-header from shared-memory to report on. If necessary, update 799 ** it based on the contents of the database header. 800 ** 801 ** No locks are taken here - these are passive read operations only. 802 */ 803 if( pDb->pCsr==0 && pDb->nTransOpen==0 ){ 804 rc = lsmTreeLoadHeader(pDb, 0); 805 if( rc==LSM_OK ) rc = logReclaimSpace(pDb); 806 } 807 808 if( rc==LSM_OK ){ 809 DbLog *pLog = &pDb->treehdr.log; 810 zVal = lsmMallocPrintf(pDb->pEnv, 811 "%d %d %d %d %d %d", 812 (int)pLog->aRegion[0].iStart, (int)pLog->aRegion[0].iEnd, 813 (int)pLog->aRegion[1].iStart, (int)pLog->aRegion[1].iEnd, 814 (int)pLog->aRegion[2].iStart, (int)pLog->aRegion[2].iEnd 815 ); 816 if( !zVal ) rc = LSM_NOMEM_BKPT; 817 } 818 819 *pzVal = zVal; 820 return rc; 821 } 822 823 /************************************************************************* 824 ** Begin code for log recovery. 825 */ 826 827 typedef struct LogReader LogReader; 828 struct LogReader { 829 FileSystem *pFS; /* File system to read from */ 830 i64 iOff; /* File offset at end of buf content */ 831 int iBuf; /* Current read offset in buf */ 832 LsmString buf; /* Buffer containing file content */ 833 834 int iCksumBuf; /* Offset in buf corresponding to cksum[01] */ 835 u32 cksum0; /* Checksum 0 at offset iCksumBuf */ 836 u32 cksum1; /* Checksum 1 at offset iCksumBuf */ 837 }; 838 839 static void logReaderBlob( 840 LogReader *p, /* Log reader object */ 841 LsmString *pBuf, /* Dynamic storage, if required */ 842 int nBlob, /* Number of bytes to read */ 843 u8 **ppBlob, /* OUT: Pointer to blob read */ 844 int *pRc /* IN/OUT: Error code */ 845 ){ 846 static const int LOG_READ_SIZE = 512; 847 int rc = *pRc; /* Return code */ 848 int nReq = nBlob; /* Bytes required */ 849 850 while( rc==LSM_OK && nReq>0 ){ 851 int nAvail; /* Bytes of data available in p->buf */ 852 if( p->buf.n==p->iBuf ){ 853 int nCksum; /* Total bytes requiring checksum */ 854 int nCarry = 0; /* Total bytes requiring checksum */ 855 856 nCksum = p->iBuf - p->iCksumBuf; 857 if( nCksum>0 ){ 858 nCarry = nCksum % 8; 859 nCksum = ((nCksum / 8) * 8); 860 if( nCksum>0 ){ 861 logCksumUnaligned( 862 &p->buf.z[p->iCksumBuf], nCksum, &p->cksum0, &p->cksum1 863 ); 864 } 865 } 866 if( nCarry>0 ) memcpy(p->buf.z, &p->buf.z[p->iBuf-nCarry], nCarry); 867 p->buf.n = nCarry; 868 p->iBuf = nCarry; 869 870 rc = lsmFsReadLog(p->pFS, p->iOff, LOG_READ_SIZE, &p->buf); 871 if( rc!=LSM_OK ) break; 872 p->iCksumBuf = 0; 873 p->iOff += LOG_READ_SIZE; 874 } 875 876 nAvail = p->buf.n - p->iBuf; 877 if( ppBlob && nReq==nBlob && nBlob<=nAvail ){ 878 *ppBlob = (u8 *)&p->buf.z[p->iBuf]; 879 p->iBuf += nBlob; 880 nReq = 0; 881 }else{ 882 int nCopy = LSM_MIN(nAvail, nReq); 883 if( nBlob==nReq ){ 884 pBuf->n = 0; 885 } 886 rc = lsmStringBinAppend(pBuf, (u8 *)&p->buf.z[p->iBuf], nCopy); 887 nReq -= nCopy; 888 p->iBuf += nCopy; 889 if( nReq==0 && ppBlob ){ 890 *ppBlob = (u8*)pBuf->z; 891 } 892 } 893 } 894 895 *pRc = rc; 896 } 897 898 static void logReaderVarint( 899 LogReader *p, 900 LsmString *pBuf, 901 int *piVal, /* OUT: Value read from log */ 902 int *pRc /* IN/OUT: Error code */ 903 ){ 904 if( *pRc==LSM_OK ){ 905 u8 *aVarint; 906 if( p->buf.n==p->iBuf ){ 907 logReaderBlob(p, 0, 10, &aVarint, pRc); 908 if( LSM_OK==*pRc ) p->iBuf -= (10 - lsmVarintGet32(aVarint, piVal)); 909 }else{ 910 logReaderBlob(p, pBuf, lsmVarintSize(p->buf.z[p->iBuf]), &aVarint, pRc); 911 if( LSM_OK==*pRc ) lsmVarintGet32(aVarint, piVal); 912 } 913 } 914 } 915 916 static void logReaderByte(LogReader *p, u8 *pByte, int *pRc){ 917 u8 *pPtr = 0; 918 logReaderBlob(p, 0, 1, &pPtr, pRc); 919 if( pPtr ) *pByte = *pPtr; 920 } 921 922 static void logReaderCksum(LogReader *p, LsmString *pBuf, int *pbEof, int *pRc){ 923 if( *pRc==LSM_OK ){ 924 u8 *pPtr = 0; 925 u32 cksum0, cksum1; 926 int nCksum = p->iBuf - p->iCksumBuf; 927 928 /* Update in-memory (expected) checksums */ 929 assert( nCksum>=0 ); 930 logCksumUnaligned(&p->buf.z[p->iCksumBuf], nCksum, &p->cksum0, &p->cksum1); 931 p->iCksumBuf = p->iBuf + 8; 932 logReaderBlob(p, pBuf, 8, &pPtr, pRc); 933 assert( pPtr || *pRc ); 934 935 /* Read the checksums from the log file. Set *pbEof if they do not match. */ 936 if( pPtr ){ 937 cksum0 = lsmGetU32(pPtr); 938 cksum1 = lsmGetU32(&pPtr[4]); 939 *pbEof = (cksum0!=p->cksum0 || cksum1!=p->cksum1); 940 p->iCksumBuf = p->iBuf; 941 } 942 } 943 } 944 945 static void logReaderInit( 946 lsm_db *pDb, /* Database handle */ 947 DbLog *pLog, /* Log object associated with pDb */ 948 int bInitBuf, /* True if p->buf is uninitialized */ 949 LogReader *p /* Initialize this LogReader object */ 950 ){ 951 p->pFS = pDb->pFS; 952 p->iOff = pLog->aRegion[2].iStart; 953 p->cksum0 = pLog->cksum0; 954 p->cksum1 = pLog->cksum1; 955 if( bInitBuf ){ lsmStringInit(&p->buf, pDb->pEnv); } 956 p->buf.n = 0; 957 p->iCksumBuf = 0; 958 p->iBuf = 0; 959 } 960 961 /* 962 ** This function is called after reading the header of a LOG_DELETE or 963 ** LOG_WRITE record. Parameter nByte is the total size of the key and 964 ** value that follow the header just read. Return true if the size and 965 ** position of the record indicate that it should contain a checksum. 966 */ 967 static int logRequireCksum(LogReader *p, int nByte){ 968 return ((p->iBuf + nByte - p->iCksumBuf) > LSM_CKSUM_MAXDATA); 969 } 970 971 /* 972 ** Recover the contents of the log file. 973 */ 974 int lsmLogRecover(lsm_db *pDb){ 975 LsmString buf1; /* Key buffer */ 976 LsmString buf2; /* Value buffer */ 977 LogReader reader; /* Log reader object */ 978 int rc = LSM_OK; /* Return code */ 979 int nCommit = 0; /* Number of transactions to recover */ 980 int iPass; 981 int nJump = 0; /* Number of LSM_LOG_JUMP records in pass 0 */ 982 DbLog *pLog; 983 int bOpen; 984 985 rc = lsmFsOpenLog(pDb, &bOpen); 986 if( rc!=LSM_OK ) return rc; 987 988 rc = lsmTreeInit(pDb); 989 if( rc!=LSM_OK ) return rc; 990 991 pLog = &pDb->treehdr.log; 992 lsmCheckpointLogoffset(pDb->pShmhdr->aSnap2, pLog); 993 994 logReaderInit(pDb, pLog, 1, &reader); 995 lsmStringInit(&buf1, pDb->pEnv); 996 lsmStringInit(&buf2, pDb->pEnv); 997 998 /* The outer for() loop runs at most twice. The first iteration is to 999 ** count the number of committed transactions in the log. The second 1000 ** iterates through those transactions and updates the in-memory tree 1001 ** structure with their contents. */ 1002 if( bOpen ){ 1003 for(iPass=0; iPass<2 && rc==LSM_OK; iPass++){ 1004 int bEof = 0; 1005 1006 while( rc==LSM_OK && !bEof ){ 1007 u8 eType = 0; 1008 logReaderByte(&reader, &eType, &rc); 1009 1010 switch( eType ){ 1011 case LSM_LOG_PAD1: 1012 break; 1013 1014 case LSM_LOG_PAD2: { 1015 int nPad; 1016 logReaderVarint(&reader, &buf1, &nPad, &rc); 1017 logReaderBlob(&reader, &buf1, nPad, 0, &rc); 1018 break; 1019 } 1020 1021 case LSM_LOG_DRANGE: 1022 case LSM_LOG_DRANGE_CKSUM: 1023 case LSM_LOG_WRITE: 1024 case LSM_LOG_WRITE_CKSUM: { 1025 int nKey; 1026 int nVal; 1027 u8 *aVal; 1028 logReaderVarint(&reader, &buf1, &nKey, &rc); 1029 logReaderVarint(&reader, &buf2, &nVal, &rc); 1030 1031 if( eType==LSM_LOG_WRITE_CKSUM || eType==LSM_LOG_DRANGE_CKSUM ){ 1032 logReaderCksum(&reader, &buf1, &bEof, &rc); 1033 }else{ 1034 bEof = logRequireCksum(&reader, nKey+nVal); 1035 } 1036 if( bEof ) break; 1037 1038 logReaderBlob(&reader, &buf1, nKey, 0, &rc); 1039 logReaderBlob(&reader, &buf2, nVal, &aVal, &rc); 1040 if( iPass==1 && rc==LSM_OK ){ 1041 if( eType==LSM_LOG_WRITE || eType==LSM_LOG_WRITE_CKSUM ){ 1042 rc = lsmTreeInsert(pDb, (u8 *)buf1.z, nKey, aVal, nVal); 1043 }else{ 1044 rc = lsmTreeDelete(pDb, (u8 *)buf1.z, nKey, aVal, nVal); 1045 } 1046 } 1047 break; 1048 } 1049 1050 case LSM_LOG_DELETE: 1051 case LSM_LOG_DELETE_CKSUM: { 1052 int nKey; u8 *aKey; 1053 logReaderVarint(&reader, &buf1, &nKey, &rc); 1054 1055 if( eType==LSM_LOG_DELETE_CKSUM ){ 1056 logReaderCksum(&reader, &buf1, &bEof, &rc); 1057 }else{ 1058 bEof = logRequireCksum(&reader, nKey); 1059 } 1060 if( bEof ) break; 1061 1062 logReaderBlob(&reader, &buf1, nKey, &aKey, &rc); 1063 if( iPass==1 && rc==LSM_OK ){ 1064 rc = lsmTreeInsert(pDb, aKey, nKey, NULL, -1); 1065 } 1066 break; 1067 } 1068 1069 case LSM_LOG_COMMIT: 1070 logReaderCksum(&reader, &buf1, &bEof, &rc); 1071 if( bEof==0 ){ 1072 nCommit++; 1073 assert( nCommit>0 || iPass==1 ); 1074 if( nCommit==0 ) bEof = 1; 1075 } 1076 break; 1077 1078 case LSM_LOG_JUMP: { 1079 int iOff = 0; 1080 logReaderVarint(&reader, &buf1, &iOff, &rc); 1081 if( rc==LSM_OK ){ 1082 if( iPass==1 ){ 1083 if( pLog->aRegion[2].iStart==0 ){ 1084 assert( pLog->aRegion[1].iStart==0 ); 1085 pLog->aRegion[1].iEnd = reader.iOff; 1086 }else{ 1087 assert( pLog->aRegion[0].iStart==0 ); 1088 pLog->aRegion[0].iStart = pLog->aRegion[2].iStart; 1089 pLog->aRegion[0].iEnd = reader.iOff-reader.buf.n+reader.iBuf; 1090 } 1091 pLog->aRegion[2].iStart = iOff; 1092 }else{ 1093 if( (nJump++)==2 ){ 1094 bEof = 1; 1095 } 1096 } 1097 1098 reader.iOff = iOff; 1099 reader.buf.n = reader.iBuf; 1100 } 1101 break; 1102 } 1103 1104 default: 1105 /* Including LSM_LOG_EOF */ 1106 bEof = 1; 1107 break; 1108 } 1109 } 1110 1111 if( rc==LSM_OK && iPass==0 ){ 1112 if( nCommit==0 ){ 1113 if( pLog->aRegion[2].iStart==0 ){ 1114 iPass = 1; 1115 }else{ 1116 pLog->aRegion[2].iStart = 0; 1117 iPass = -1; 1118 lsmCheckpointZeroLogoffset(pDb); 1119 } 1120 } 1121 logReaderInit(pDb, pLog, 0, &reader); 1122 nCommit = nCommit * -1; 1123 } 1124 } 1125 } 1126 1127 /* Initialize DbLog object */ 1128 if( rc==LSM_OK ){ 1129 pLog->aRegion[2].iEnd = reader.iOff - reader.buf.n + reader.iBuf; 1130 pLog->cksum0 = reader.cksum0; 1131 pLog->cksum1 = reader.cksum1; 1132 } 1133 1134 if( rc==LSM_OK ){ 1135 rc = lsmFinishRecovery(pDb); 1136 }else{ 1137 lsmFinishRecovery(pDb); 1138 } 1139 1140 if( pDb->bRoTrans ){ 1141 lsmFsCloseLog(pDb); 1142 } 1143 1144 lsmStringClear(&buf1); 1145 lsmStringClear(&buf2); 1146 lsmStringClear(&reader.buf); 1147 return rc; 1148 } 1149 1150 void lsmLogClose(lsm_db *db){ 1151 if( db->pLogWriter ){ 1152 lsmFree(db->pEnv, db->pLogWriter->buf.z); 1153 lsmFree(db->pEnv, db->pLogWriter); 1154 db->pLogWriter = 0; 1155 } 1156 }