github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/mapio/doc.go (about)

     1  // Copyright 2018 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache 2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  /*
     6  	Package mapio implements a sorted, on-disk map, similar to the
     7  	SSTable data structure used in Bigtable [1], Cassandra [2], and
     8  	others. Maps are read-only, and are produced by a Writer. Each
     9  	Writer expects keys to be appended in lexicographic order. Buf
    10  	provides a means of buffering writes to be sorted before appended to
    11  	a Writer.
    12  
    13  	Mapio's on-disk layout loosely follows that of LevelDB [3]. Each Map
    14  	is a sequence of blocks; each block comprises a sequence of entries,
    15  	followed by a trailer:
    16  
    17  		block := blockEntry* blockTrailer
    18  		blockEntry :=
    19  			nshared:   uvarint           // number of bytes shared with previous key
    20  			nunshared: uvarint           // number of new bytes in this entry's key
    21  			nvalue:    uvarint           // number of bytes in value
    22  			key:       uint8[nunshared]  // the (prefix compressed) key
    23  			value:     uint8[nvalue]     // the entry's value
    24  		blockTrailer :=
    25  			restarts:  uint32[nrestart]  // array of key restarts
    26  			nrestart:  uint32            // size of restart array
    27  			type:      uint8             // block type (should be 0; reserved for future use)
    28  			crc32:     uint32            // IEEE crc32 of contents and trailer
    29  
    30  	Maps prefix compress each key by storing the number of bytes shared
    31  	with the previous key. Maps contain a number of restart points:
    32  	points at which the full key is specified (and nshared = 0). The
    33  	restart point are stored in an array in the block trailer. This
    34  	array can be used to perform binary search for keys.
    35  
    36  	A Map is a sequence of data blocks, followed by an index block,
    37  	followed by a trailer.
    38  
    39  		map := block(data)* block(meta)* block(index) mapTrailer
    40  		mapTrailer :=
    41  			meta:	blockAddr[20]  // zero-padded address of the meta block index (tbd)
    42  			index:  blockAddr[20]  // zero-padded address of index
    43  			magic:	uint64         // magic (0xa8b2374e8558bc76)
    44  		blockAddr :=
    45  			offset: uvarint        // offset of block in map
    46  			len:    uvarint        // length of block
    47  
    48  	The index block contains one entry for each block in the map: each
    49  	entry's key is the last key in that block; the entry's value is a
    50  	blockAddr containing the position of that block. This arrangement
    51  	allows the reader to binary search the index block then search the
    52  	found block.
    53  
    54  	[1] https://static.googleusercontent.com/media/research.google.com/en//archive/bigtable-osdi06.pdf
    55  	[2] https://www.cs.cornell.edu/projects/ladis2009/papers/lakshman-ladis2009.pdf
    56  	[3] https://github.com/google/leveldb
    57  */
    58  package mapio