github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/logio/logio.go (about)

     1  // Copyright 2019 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache 2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package logio implements a failure-tolerant log, typically used as
     6  // a write-ahead log. Logs are "history oblivious": new log entries
     7  // do not depend on previous entries; and logs may be concatenated on
     8  // block boundaries while preserving integrity. Likewise, logs may be
     9  // read from a stream without seeking.
    10  //
    11  // Data layout
    12  //
    13  // Logio follows the leveldb log format [1] with some modifications
    14  // to permit efficient re-syncing from the end of a log, as well as
    15  // to use a modern checksum algorithm (xxhash).
    16  //
    17  // A log file is a sequence of 32kB blocks, each containing a sequence
    18  // of records and possibly followed by padding. Records may not span
    19  // blocks; log entries that would straddle block boundaries are broken
    20  // up into multiple records, to be reassembled at read time.
    21  //
    22  //	block := record* padding?
    23  //
    24  //	record :=
    25  //		checksum uint32     // xxhash[2] checksum of the remainder of the record
    26  //		type uint8          // the record type, detailed below
    27  //		length uint16       // the length of the record data, below
    28  //		offset uint64       // the offset (in bytes) of this record from the record that begins the entry
    29  //		data [length]uint8  // the record data
    30  //
    31  // The record types are as follows:
    32  //
    33  //	FULL=1     // the record contains the full entry
    34  //	FIRST=2    // the record is the first in an assembly
    35  //	MIDDLE=3   // the record is in the middle of an assembly
    36  //	LAST=4     // the record concludes an assembly
    37  //
    38  // Thus, entries are assembled by reading a sequence of records:
    39  //
    40  //	entry :=
    41  //		  FULL
    42  //		| FIRST MIDDLE* LAST
    43  //
    44  // Failure tolerance
    45  //
    46  // Logio recovers from record corruption (e.g., checksum errors) and truncated
    47  // writes by re-syncing at read time. If a corrupt record is encountered, the
    48  // reader skips to the next block boundary (which always begins a record) and
    49  // finds the first FULL or FIRST record to re-commence reading.
    50  //
    51  // [1] https://github.com/google/leveldb/blob/master/doc/log_format.md
    52  // [2] http://cyan4973.github.io/xxHash/
    53  package logio
    54  
    55  import (
    56  	"encoding/binary"
    57  )
    58  
    59  // Blocksz is the size of the blocks written to the log files
    60  // produced by this package. See package docs for a detailed
    61  // description.
    62  const Blocksz = 32 << 10
    63  
    64  const headersz = 4 + 1 + 2 + 8
    65  
    66  var byteOrder = binary.LittleEndian
    67  
    68  var zeros = make([]byte, Blocksz)
    69  
    70  const (
    71  	recordFull uint8 = 1 + iota
    72  	recordFirst
    73  	recordMiddle
    74  	recordLast
    75  )