github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/table.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package nbs
    23  
    24  import (
    25  	"context"
    26  	"crypto/sha512"
    27  	"hash/crc32"
    28  	"io"
    29  
    30  	"golang.org/x/sync/errgroup"
    31  
    32  	"github.com/dolthub/dolt/go/store/chunks"
    33  	"github.com/dolthub/dolt/go/store/hash"
    34  )
    35  
    36  /*
    37     An NBS Table stores N byte slices ("chunks") which are addressed by a 20-byte hash of their
    38     contents. The footer encodes N as well as the total bytes consumed by all contained chunks.
    39     An Index maps each address to the position of its corresponding chunk. Addresses are logically sorted within the Index, but the corresponding chunks need not be.
    40  
    41     Table:
    42     +----------------+----------------+-----+----------------+-------+--------+
    43     | Chunk Record 0 | Chunk Record 1 | ... | Chunk Record N | Index | Footer |
    44     +----------------+----------------+-----+----------------+-------+--------+
    45  
    46     Chunk Record:
    47     +---------------------------+----------------+
    48     | (Chunk Length) Chunk Data | (Uint32) CRC32 |
    49     +---------------------------+----------------+
    50  
    51     Index:
    52     +------------+---------+----------+
    53     | Prefix Map | Lengths | Suffixes |
    54     +------------+---------+----------+
    55  
    56     Prefix Map:
    57     +--------------+--------------+-----+----------------+
    58     | Prefix Tuple | Prefix Tuple | ... | Prefix Tuple N |
    59     +--------------+--------------+-----+----------------+
    60  
    61       -The Prefix Map contains N Prefix Tuples.
    62       -Each Prefix Tuple corresponds to a unique Chunk Record in the Table.
    63       -The Prefix Tuples are sorted in increasing lexicographic order within the Prefix Map.
    64       -NB: THE SAME PREFIX MAY APPEAR MULTIPLE TIMES, as distinct Hashes (referring to distinct Chunks) may share the same Prefix.
    65  
    66     Prefix Tuple:
    67     +-----------------+------------------+
    68     | (8) Hash Prefix | (Uint32) Ordinal |
    69     +-----------------+------------------+
    70  
    71       -First 8 bytes of a Chunk's Hash
    72       -Ordinal is the 0-based ordinal position of the associated record within the sequence of chunk records, the associated Length within Lengths, and the associated Hash Suffix within Suffixes.
    73  
    74     Lengths:
    75     +-----------------+-----------------+-----+-------------------+
    76     | (Uint32) Length | (Uint32) Length | ... | (Uint32) Length N |
    77     +-----------------+-----------------+-----+-------------------+
    78  
    79       - Each Length is the length of a Chunk Record in this Table.
    80       - Length M must correspond to Chunk Record M for 0 <= M <= N
    81  
    82     Suffixes:
    83     +------------------+------------------+-----+--------------------+
    84     | (12) Hash Suffix | (12) Hash Suffix | ... | (12) Hash Suffix N |
    85     +------------------+------------------+-----+--------------------+
    86  
    87       - Each Hash Suffix is the last 12 bytes of a Chunk in this Table.
    88       - Hash Suffix M must correspond to Chunk Record M for 0 <= M <= N
    89  
    90     Footer:
    91     +----------------------+----------------------------------------+------------------+
    92     | (Uint32) Chunk Count | (Uint64) Total Uncompressed Chunk Data | (8) Magic Number |
    93     +----------------------+----------------------------------------+------------------+
    94  
    95       -Total Uncompressed Chunk Data is the sum of the uncompressed byte lengths of all contained chunk byte slices.
    96       -Magic Number is the first 8 bytes of the SHA256 hash of "https://github.com/attic-labs/nbs".
    97  
    98      NOTE: Unsigned integer quanities, hashes and hash suffix are all encoded big-endian
    99  
   100  
   101    Looking up Chunks in an NBS Table
   102    There are two phases to loading chunk data for a given Hash from an NBS Table: Checking for the chunk's presence, and fetching the chunk's bytes. When performing a has-check, only the first phase is necessary.
   103  
   104    Phase one: Chunk presence
   105    - Slice off the first 8 bytes of your Hash to create a Prefix
   106    - Since the Prefix Tuples in the Prefix Map are in lexicographic order, binary search the Prefix Map for the desired Prefix.
   107    - For all Prefix Tuples with a matching Prefix:
   108      - Load the Ordinal
   109      - Use the Ordinal to index into Suffixes
   110      - Check the Suffix of your Hash against the loaded Suffix
   111      - If they match, your chunk is in this Table in the Chunk Record indicated by Ordinal
   112      - If they don't match, continue to the next matching Prefix Tuple
   113    - If not found, your chunk is not in this Table.
   114  
   115    Phase two: Loading Chunk data
   116    - Take the Ordinal discovered in Phase one
   117    - Calculate the Offset of your desired Chunk Record: Sum(Lengths[0]...Lengths[Ordinal-1])
   118    - Load Lengths[Ordinal] bytes from Table[Offset]
   119    - Verify that the CRC of the loaded bytes matches the CRC stored in the Chunk Record.
   120  
   121  */
   122  
   123  const (
   124  	uint64Size      = 8
   125  	uint32Size      = 4
   126  	ordinalSize     = uint32Size
   127  	lengthSize      = uint32Size
   128  	offsetSize      = uint64Size
   129  	magicNumber     = "\xff\xb5\xd8\xc2\x24\x63\xee\x50"
   130  	magicNumberSize = 8 //len(magicNumber)
   131  	footerSize      = uint32Size + uint64Size + magicNumberSize
   132  	prefixTupleSize = hash.PrefixLen + ordinalSize
   133  	checksumSize    = uint32Size
   134  	maxChunkSize    = 0xffffffff // Snappy won't compress slices bigger than this
   135  
   136  	doltMagicNumber = "DOLTARC" // NBS doesn't support this, but we want to give a reasonable error message if one is encountered.
   137  	doltMagicSize   = 7         // len(doltMagicNumber)
   138  )
   139  
   140  var crcTable = crc32.MakeTable(crc32.Castagnoli)
   141  
   142  func crc(b []byte) uint32 {
   143  	return crc32.Update(0, crcTable, b)
   144  }
   145  
   146  func computeHashDefault(data []byte) hash.Hash {
   147  	r := sha512.Sum512(data)
   148  	return hash.New(r[:hash.ByteLen])
   149  }
   150  
   151  var computeAddr = computeHashDefault
   152  
   153  type hasRecord struct {
   154  	a      *hash.Hash
   155  	prefix uint64
   156  	order  int
   157  	has    bool
   158  }
   159  
   160  type hasRecordByPrefix []hasRecord
   161  
   162  func (hs hasRecordByPrefix) Len() int           { return len(hs) }
   163  func (hs hasRecordByPrefix) Less(i, j int) bool { return hs[i].prefix < hs[j].prefix }
   164  func (hs hasRecordByPrefix) Swap(i, j int)      { hs[i], hs[j] = hs[j], hs[i] }
   165  
   166  type hasRecordByOrder []hasRecord
   167  
   168  func (hs hasRecordByOrder) Len() int           { return len(hs) }
   169  func (hs hasRecordByOrder) Less(i, j int) bool { return hs[i].order < hs[j].order }
   170  func (hs hasRecordByOrder) Swap(i, j int)      { hs[i], hs[j] = hs[j], hs[i] }
   171  
   172  type getRecord struct {
   173  	a      *hash.Hash
   174  	prefix uint64
   175  	found  bool
   176  }
   177  
   178  type getRecordByPrefix []getRecord
   179  
   180  func (hs getRecordByPrefix) Len() int           { return len(hs) }
   181  func (hs getRecordByPrefix) Less(i, j int) bool { return hs[i].prefix < hs[j].prefix }
   182  func (hs getRecordByPrefix) Swap(i, j int)      { hs[i], hs[j] = hs[j], hs[i] }
   183  
   184  type extractRecord struct {
   185  	a    hash.Hash
   186  	data []byte
   187  	err  error
   188  }
   189  
   190  type chunkReader interface {
   191  	// has returns true if a chunk with addr |h| is present.
   192  	has(h hash.Hash) (bool, error)
   193  
   194  	// hasMany sets hasRecord.has to true for each present hasRecord query, it returns
   195  	// true if any hasRecord query was not found in this chunkReader.
   196  	hasMany(addrs []hasRecord) (bool, error)
   197  
   198  	// get returns the chunk data for a chunk with addr |h| if present, and nil otherwise.
   199  	get(ctx context.Context, h hash.Hash, stats *Stats) ([]byte, error)
   200  
   201  	// getMany sets getRecord.found to true, and calls |found| for each present getRecord query.
   202  	// It returns true if any getRecord query was not found in this chunkReader.
   203  	getMany(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(context.Context, *chunks.Chunk), stats *Stats) (bool, error)
   204  
   205  	// getManyCompressed sets getRecord.found to true, and calls |found| for each present getRecord query.
   206  	// It returns true if any getRecord query was not found in this chunkReader.
   207  	getManyCompressed(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(context.Context, CompressedChunk), stats *Stats) (bool, error)
   208  
   209  	// count returns the chunk count for this chunkReader.
   210  	count() (uint32, error)
   211  
   212  	// uncompressedLen returns the total uncompressed length this chunkReader.
   213  	uncompressedLen() (uint64, error)
   214  
   215  	// close releases resources retained by the |chunkReader|.
   216  	close() error
   217  }
   218  
   219  type chunkSource interface {
   220  	chunkReader
   221  
   222  	// hash returns the hash address of this chunkSource.
   223  	hash() hash.Hash
   224  
   225  	// opens a Reader to the first byte of the chunkData segment of this table.
   226  	reader(context.Context) (io.ReadCloser, uint64, error)
   227  
   228  	// getRecordRanges sets getRecord.found to true, and returns a Range for each present getRecord query.
   229  	getRecordRanges(requests []getRecord) (map[hash.Hash]Range, error)
   230  
   231  	// index returns the tableIndex of this chunkSource.
   232  	index() (tableIndex, error)
   233  
   234  	// clone returns a |chunkSource| with the same contents as the
   235  	// original, but with independent |Close| behavior. A |chunkSource|
   236  	// cannot be |Close|d more than once, so if a |chunkSource| is being
   237  	// retained in two objects with independent life-cycle, it should be
   238  	// |Clone|d first.
   239  	clone() (chunkSource, error)
   240  
   241  	// currentSize returns the current total physical size of the chunkSource.
   242  	currentSize() uint64
   243  }
   244  
   245  type chunkSources []chunkSource
   246  
   247  type chunkSourceSet map[hash.Hash]chunkSource
   248  
   249  func copyChunkSourceSet(s chunkSourceSet) (cp chunkSourceSet) {
   250  	cp = make(chunkSourceSet, len(s))
   251  	for k, v := range s {
   252  		cp[k] = v
   253  	}
   254  	return
   255  }