github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/logio/logio.go (about) 1 // Copyright 2019 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache 2.0 3 // license that can be found in the LICENSE file. 4 5 // Package logio implements a failure-tolerant log, typically used as 6 // a write-ahead log. Logs are "history oblivious": new log entries 7 // do not depend on previous entries; and logs may be concatenated on 8 // block boundaries while preserving integrity. Likewise, logs may be 9 // read from a stream without seeking. 10 // 11 // Data layout 12 // 13 // Logio follows the leveldb log format [1] with some modifications 14 // to permit efficient re-syncing from the end of a log, as well as 15 // to use a modern checksum algorithm (xxhash). 16 // 17 // A log file is a sequence of 32kB blocks, each containing a sequence 18 // of records and possibly followed by padding. Records may not span 19 // blocks; log entries that would straddle block boundaries are broken 20 // up into multiple records, to be reassembled at read time. 21 // 22 // block := record* padding? 23 // 24 // record := 25 // checksum uint32 // xxhash[2] checksum of the remainder of the record 26 // type uint8 // the record type, detailed below 27 // length uint16 // the length of the record data, below 28 // offset uint64 // the offset (in bytes) of this record from the record that begins the entry 29 // data [length]uint8 // the record data 30 // 31 // The record types are as follows: 32 // 33 // FULL=1 // the record contains the full entry 34 // FIRST=2 // the record is the first in an assembly 35 // MIDDLE=3 // the record is in the middle of an assembly 36 // LAST=4 // the record concludes an assembly 37 // 38 // Thus, entries are assembled by reading a sequence of records: 39 // 40 // entry := 41 // FULL 42 // | FIRST MIDDLE* LAST 43 // 44 // Failure tolerance 45 // 46 // Logio recovers from record corruption (e.g., checksum errors) and truncated 47 // writes by re-syncing at read time. If a corrupt record is encountered, the 48 // reader skips to the next block boundary (which always begins a record) and 49 // finds the first FULL or FIRST record to re-commence reading. 50 // 51 // [1] https://github.com/google/leveldb/blob/master/doc/log_format.md 52 // [2] http://cyan4973.github.io/xxHash/ 53 package logio 54 55 import ( 56 "encoding/binary" 57 ) 58 59 // Blocksz is the size of the blocks written to the log files 60 // produced by this package. See package docs for a detailed 61 // description. 62 const Blocksz = 32 << 10 63 64 const headersz = 4 + 1 + 2 + 8 65 66 var byteOrder = binary.LittleEndian 67 68 var zeros = make([]byte, Blocksz) 69 70 const ( 71 recordFull uint8 = 1 + iota 72 recordFirst 73 recordMiddle 74 recordLast 75 )