github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/table.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 package nbs 23 24 import ( 25 "context" 26 "crypto/sha512" 27 "hash/crc32" 28 "io" 29 30 "golang.org/x/sync/errgroup" 31 32 "github.com/dolthub/dolt/go/store/chunks" 33 "github.com/dolthub/dolt/go/store/hash" 34 ) 35 36 /* 37 An NBS Table stores N byte slices ("chunks") which are addressed by a 20-byte hash of their 38 contents. The footer encodes N as well as the total bytes consumed by all contained chunks. 39 An Index maps each address to the position of its corresponding chunk. Addresses are logically sorted within the Index, but the corresponding chunks need not be. 40 41 Table: 42 +----------------+----------------+-----+----------------+-------+--------+ 43 | Chunk Record 0 | Chunk Record 1 | ... | Chunk Record N | Index | Footer | 44 +----------------+----------------+-----+----------------+-------+--------+ 45 46 Chunk Record: 47 +---------------------------+----------------+ 48 | (Chunk Length) Chunk Data | (Uint32) CRC32 | 49 +---------------------------+----------------+ 50 51 Index: 52 +------------+---------+----------+ 53 | Prefix Map | Lengths | Suffixes | 54 +------------+---------+----------+ 55 56 Prefix Map: 57 +--------------+--------------+-----+----------------+ 58 | Prefix Tuple | Prefix Tuple | ... | Prefix Tuple N | 59 +--------------+--------------+-----+----------------+ 60 61 -The Prefix Map contains N Prefix Tuples. 62 -Each Prefix Tuple corresponds to a unique Chunk Record in the Table. 63 -The Prefix Tuples are sorted in increasing lexicographic order within the Prefix Map. 64 -NB: THE SAME PREFIX MAY APPEAR MULTIPLE TIMES, as distinct Hashes (referring to distinct Chunks) may share the same Prefix. 65 66 Prefix Tuple: 67 +-----------------+------------------+ 68 | (8) Hash Prefix | (Uint32) Ordinal | 69 +-----------------+------------------+ 70 71 -First 8 bytes of a Chunk's Hash 72 -Ordinal is the 0-based ordinal position of the associated record within the sequence of chunk records, the associated Length within Lengths, and the associated Hash Suffix within Suffixes. 73 74 Lengths: 75 +-----------------+-----------------+-----+-------------------+ 76 | (Uint32) Length | (Uint32) Length | ... | (Uint32) Length N | 77 +-----------------+-----------------+-----+-------------------+ 78 79 - Each Length is the length of a Chunk Record in this Table. 80 - Length M must correspond to Chunk Record M for 0 <= M <= N 81 82 Suffixes: 83 +------------------+------------------+-----+--------------------+ 84 | (12) Hash Suffix | (12) Hash Suffix | ... | (12) Hash Suffix N | 85 +------------------+------------------+-----+--------------------+ 86 87 - Each Hash Suffix is the last 12 bytes of a Chunk in this Table. 88 - Hash Suffix M must correspond to Chunk Record M for 0 <= M <= N 89 90 Footer: 91 +----------------------+----------------------------------------+------------------+ 92 | (Uint32) Chunk Count | (Uint64) Total Uncompressed Chunk Data | (8) Magic Number | 93 +----------------------+----------------------------------------+------------------+ 94 95 -Total Uncompressed Chunk Data is the sum of the uncompressed byte lengths of all contained chunk byte slices. 96 -Magic Number is the first 8 bytes of the SHA256 hash of "https://github.com/attic-labs/nbs". 97 98 NOTE: Unsigned integer quanities, hashes and hash suffix are all encoded big-endian 99 100 101 Looking up Chunks in an NBS Table 102 There are two phases to loading chunk data for a given Hash from an NBS Table: Checking for the chunk's presence, and fetching the chunk's bytes. When performing a has-check, only the first phase is necessary. 103 104 Phase one: Chunk presence 105 - Slice off the first 8 bytes of your Hash to create a Prefix 106 - Since the Prefix Tuples in the Prefix Map are in lexicographic order, binary search the Prefix Map for the desired Prefix. 107 - For all Prefix Tuples with a matching Prefix: 108 - Load the Ordinal 109 - Use the Ordinal to index into Suffixes 110 - Check the Suffix of your Hash against the loaded Suffix 111 - If they match, your chunk is in this Table in the Chunk Record indicated by Ordinal 112 - If they don't match, continue to the next matching Prefix Tuple 113 - If not found, your chunk is not in this Table. 114 115 Phase two: Loading Chunk data 116 - Take the Ordinal discovered in Phase one 117 - Calculate the Offset of your desired Chunk Record: Sum(Lengths[0]...Lengths[Ordinal-1]) 118 - Load Lengths[Ordinal] bytes from Table[Offset] 119 - Verify that the CRC of the loaded bytes matches the CRC stored in the Chunk Record. 120 121 */ 122 123 const ( 124 uint64Size = 8 125 uint32Size = 4 126 ordinalSize = uint32Size 127 lengthSize = uint32Size 128 offsetSize = uint64Size 129 magicNumber = "\xff\xb5\xd8\xc2\x24\x63\xee\x50" 130 magicNumberSize = 8 //len(magicNumber) 131 footerSize = uint32Size + uint64Size + magicNumberSize 132 prefixTupleSize = hash.PrefixLen + ordinalSize 133 checksumSize = uint32Size 134 maxChunkSize = 0xffffffff // Snappy won't compress slices bigger than this 135 136 doltMagicNumber = "DOLTARC" // NBS doesn't support this, but we want to give a reasonable error message if one is encountered. 137 doltMagicSize = 7 // len(doltMagicNumber) 138 ) 139 140 var crcTable = crc32.MakeTable(crc32.Castagnoli) 141 142 func crc(b []byte) uint32 { 143 return crc32.Update(0, crcTable, b) 144 } 145 146 func computeHashDefault(data []byte) hash.Hash { 147 r := sha512.Sum512(data) 148 return hash.New(r[:hash.ByteLen]) 149 } 150 151 var computeAddr = computeHashDefault 152 153 type hasRecord struct { 154 a *hash.Hash 155 prefix uint64 156 order int 157 has bool 158 } 159 160 type hasRecordByPrefix []hasRecord 161 162 func (hs hasRecordByPrefix) Len() int { return len(hs) } 163 func (hs hasRecordByPrefix) Less(i, j int) bool { return hs[i].prefix < hs[j].prefix } 164 func (hs hasRecordByPrefix) Swap(i, j int) { hs[i], hs[j] = hs[j], hs[i] } 165 166 type hasRecordByOrder []hasRecord 167 168 func (hs hasRecordByOrder) Len() int { return len(hs) } 169 func (hs hasRecordByOrder) Less(i, j int) bool { return hs[i].order < hs[j].order } 170 func (hs hasRecordByOrder) Swap(i, j int) { hs[i], hs[j] = hs[j], hs[i] } 171 172 type getRecord struct { 173 a *hash.Hash 174 prefix uint64 175 found bool 176 } 177 178 type getRecordByPrefix []getRecord 179 180 func (hs getRecordByPrefix) Len() int { return len(hs) } 181 func (hs getRecordByPrefix) Less(i, j int) bool { return hs[i].prefix < hs[j].prefix } 182 func (hs getRecordByPrefix) Swap(i, j int) { hs[i], hs[j] = hs[j], hs[i] } 183 184 type extractRecord struct { 185 a hash.Hash 186 data []byte 187 err error 188 } 189 190 type chunkReader interface { 191 // has returns true if a chunk with addr |h| is present. 192 has(h hash.Hash) (bool, error) 193 194 // hasMany sets hasRecord.has to true for each present hasRecord query, it returns 195 // true if any hasRecord query was not found in this chunkReader. 196 hasMany(addrs []hasRecord) (bool, error) 197 198 // get returns the chunk data for a chunk with addr |h| if present, and nil otherwise. 199 get(ctx context.Context, h hash.Hash, stats *Stats) ([]byte, error) 200 201 // getMany sets getRecord.found to true, and calls |found| for each present getRecord query. 202 // It returns true if any getRecord query was not found in this chunkReader. 203 getMany(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(context.Context, *chunks.Chunk), stats *Stats) (bool, error) 204 205 // getManyCompressed sets getRecord.found to true, and calls |found| for each present getRecord query. 206 // It returns true if any getRecord query was not found in this chunkReader. 207 getManyCompressed(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(context.Context, CompressedChunk), stats *Stats) (bool, error) 208 209 // count returns the chunk count for this chunkReader. 210 count() (uint32, error) 211 212 // uncompressedLen returns the total uncompressed length this chunkReader. 213 uncompressedLen() (uint64, error) 214 215 // close releases resources retained by the |chunkReader|. 216 close() error 217 } 218 219 type chunkSource interface { 220 chunkReader 221 222 // hash returns the hash address of this chunkSource. 223 hash() hash.Hash 224 225 // opens a Reader to the first byte of the chunkData segment of this table. 226 reader(context.Context) (io.ReadCloser, uint64, error) 227 228 // getRecordRanges sets getRecord.found to true, and returns a Range for each present getRecord query. 229 getRecordRanges(requests []getRecord) (map[hash.Hash]Range, error) 230 231 // index returns the tableIndex of this chunkSource. 232 index() (tableIndex, error) 233 234 // clone returns a |chunkSource| with the same contents as the 235 // original, but with independent |Close| behavior. A |chunkSource| 236 // cannot be |Close|d more than once, so if a |chunkSource| is being 237 // retained in two objects with independent life-cycle, it should be 238 // |Clone|d first. 239 clone() (chunkSource, error) 240 241 // currentSize returns the current total physical size of the chunkSource. 242 currentSize() uint64 243 } 244 245 type chunkSources []chunkSource 246 247 type chunkSourceSet map[hash.Hash]chunkSource 248 249 func copyChunkSourceSet(s chunkSourceSet) (cp chunkSourceSet) { 250 cp = make(chunkSourceSet, len(s)) 251 for k, v := range s { 252 cp[k] = v 253 } 254 return 255 }