github.com/jbendotnet/noms@v0.0.0-20190904222105-c43e4293ea92/go/nbs/table.go (about) 1 // Copyright 2016 Attic Labs, Inc. All rights reserved. 2 // Licensed under the Apache License, version 2.0: 3 // http://www.apache.org/licenses/LICENSE-2.0 4 5 package nbs 6 7 import ( 8 "bytes" 9 "crypto/sha512" 10 "encoding/base32" 11 "encoding/binary" 12 "hash/crc32" 13 "io" 14 "sync" 15 16 "github.com/attic-labs/noms/go/chunks" 17 ) 18 19 /* 20 An NBS Table stores N byte slices ("chunks") which are addressed by a 20-byte hash of their 21 contents. The footer encodes N as well as the total bytes consumed by all contained chunks. 22 An Index maps each address to the position of its corresponding chunk. Addresses are logically sorted within the Index, but the corresponding chunks need not be. 23 24 Table: 25 +----------------+----------------+-----+----------------+-------+--------+ 26 | Chunk Record 0 | Chunk Record 1 | ... | Chunk Record N | Index | Footer | 27 +----------------+----------------+-----+----------------+-------+--------+ 28 29 Chunk Record: 30 +---------------------------+----------------+ 31 | (Chunk Length) Chunk Data | (Uint32) CRC32 | 32 +---------------------------+----------------+ 33 34 Index: 35 +------------+---------+----------+ 36 | Prefix Map | Lengths | Suffixes | 37 +------------+---------+----------+ 38 39 Prefix Map: 40 +--------------+--------------+-----+----------------+ 41 | Prefix Tuple | Prefix Tuple | ... | Prefix Tuple N | 42 +--------------+--------------+-----+----------------+ 43 44 -The Prefix Map contains N Prefix Tuples. 45 -Each Prefix Tuple corresponds to a unique Chunk Record in the Table. 46 -The Prefix Tuples are sorted in increasing lexicographic order within the Prefix Map. 47 -NB: THE SAME PREFIX MAY APPEAR MULTIPLE TIMES, as distinct Hashes (referring to distinct Chunks) may share the same Prefix. 48 49 Prefix Tuple: 50 +-----------------+------------------+ 51 | (8) Hash Prefix | (Uint32) Ordinal | 52 +-----------------+------------------+ 53 54 -First 8 bytes of a Chunk's Hash 55 -Ordinal is the 0-based ordinal position of the associated record within the sequence of chunk records, the associated Length within Lengths, and the associated Hash Suffix within Suffixes. 56 57 Lengths: 58 +-----------------+-----------------+-----+-------------------+ 59 | (Uint32) Length | (Uint32) Length | ... | (Uint32) Length N | 60 +-----------------+-----------------+-----+-------------------+ 61 62 - Each Length is the length of a Chunk Record in this Table. 63 - Length M must correspond to Chunk Record M for 0 <= M <= N 64 65 Suffixes: 66 +------------------+------------------+-----+--------------------+ 67 | (12) Hash Suffix | (12) Hash Suffix | ... | (12) Hash Suffix N | 68 +------------------+------------------+-----+--------------------+ 69 70 - Each Hash Suffix is the last 12 bytes of a Chunk in this Table. 71 - Hash Suffix M must correspond to Chunk Record M for 0 <= M <= N 72 73 Footer: 74 +----------------------+----------------------------------------+------------------+ 75 | (Uint32) Chunk Count | (Uint64) Total Uncompressed Chunk Data | (8) Magic Number | 76 +----------------------+----------------------------------------+------------------+ 77 78 -Total Uncompressed Chunk Data is the sum of the uncompressed byte lengths of all contained chunk byte slices. 79 -Magic Number is the first 8 bytes of the SHA256 hash of "https://github.com/attic-labs/nbs". 80 81 NOTE: Unsigned integer quanities, hashes and hash suffix are all encoded big-endian 82 83 84 Looking up Chunks in an NBS Table 85 There are two phases to loading chunk data for a given Hash from an NBS Table: Checking for the chunk's presence, and fetching the chunk's bytes. When performing a has-check, only the first phase is necessary. 86 87 Phase one: Chunk presence 88 - Slice off the first 8 bytes of your Hash to create a Prefix 89 - Since the Prefix Tuples in the Prefix Map are in lexicographic order, binary search the Prefix Map for the desired Prefix. 90 - For all Prefix Tuples with a matching Prefix: 91 - Load the Ordinal 92 - Use the Ordinal to index into Suffixes 93 - Check the Suffix of your Hash against the loaded Suffix 94 - If they match, your chunk is in this Table in the Chunk Record indicated by Ordinal 95 - If they don't match, continue to the next matching Prefix Tuple 96 - If not found, your chunk is not in this Table. 97 98 Phase two: Loading Chunk data 99 - Take the Ordinal discovered in Phase one 100 - Calculate the Offset of your desired Chunk Record: Sum(Lengths[0]...Lengths[Ordinal-1]) 101 - Load Lengths[Ordinal] bytes from Table[Offset] 102 - Check the first 4 bytes of the loaded data against the last 4 bytes of your desired Hash. They should match, and the rest of the data is your Chunk data. 103 */ 104 105 const ( 106 addrSize uint64 = 20 107 addrPrefixSize uint64 = 8 108 addrSuffixSize = addrSize - addrPrefixSize 109 uint64Size uint64 = 8 110 uint32Size uint64 = 4 111 ordinalSize uint64 = uint32Size 112 lengthSize uint64 = uint32Size 113 magicNumber = "\xff\xb5\xd8\xc2\x24\x63\xee\x50" 114 magicNumberSize uint64 = uint64(len(magicNumber)) 115 footerSize = uint32Size + uint64Size + magicNumberSize 116 prefixTupleSize = addrPrefixSize + ordinalSize 117 checksumSize uint64 = uint32Size 118 maxChunkLengthSize uint64 = binary.MaxVarintLen64 119 maxChunkSize uint64 = 0xffffffff // Snappy won't compress slices bigger than this 120 ) 121 122 var crcTable = crc32.MakeTable(crc32.Castagnoli) 123 124 func crc(b []byte) uint32 { 125 return crc32.Update(0, crcTable, b) 126 } 127 128 func computeAddrDefault(data []byte) addr { 129 r := sha512.Sum512(data) 130 h := addr{} 131 copy(h[:], r[:addrSize]) 132 return h 133 } 134 135 var computeAddr = computeAddrDefault 136 137 type addr [addrSize]byte 138 139 var encoding = base32.NewEncoding("0123456789abcdefghijklmnopqrstuv") 140 141 func (a addr) String() string { 142 return encoding.EncodeToString(a[:]) 143 } 144 145 func (a addr) Prefix() uint64 { 146 return binary.BigEndian.Uint64(a[:]) 147 } 148 149 func (a addr) Checksum() uint32 { 150 return binary.BigEndian.Uint32(a[addrSize-checksumSize:]) 151 } 152 153 func ParseAddr(b []byte) (h addr) { 154 encoding.Decode(h[:], b) 155 return 156 } 157 158 func ValidateAddr(s string) bool { 159 _, err := encoding.DecodeString(s) 160 return err == nil 161 } 162 163 type addrSlice []addr 164 165 func (hs addrSlice) Len() int { return len(hs) } 166 func (hs addrSlice) Less(i, j int) bool { return bytes.Compare(hs[i][:], hs[j][:]) < 0 } 167 func (hs addrSlice) Swap(i, j int) { hs[i], hs[j] = hs[j], hs[i] } 168 169 type hasRecord struct { 170 a *addr 171 prefix uint64 172 order int 173 has bool 174 } 175 176 type hasRecordByPrefix []hasRecord 177 178 func (hs hasRecordByPrefix) Len() int { return len(hs) } 179 func (hs hasRecordByPrefix) Less(i, j int) bool { return hs[i].prefix < hs[j].prefix } 180 func (hs hasRecordByPrefix) Swap(i, j int) { hs[i], hs[j] = hs[j], hs[i] } 181 182 type hasRecordByOrder []hasRecord 183 184 func (hs hasRecordByOrder) Len() int { return len(hs) } 185 func (hs hasRecordByOrder) Less(i, j int) bool { return hs[i].order < hs[j].order } 186 func (hs hasRecordByOrder) Swap(i, j int) { hs[i], hs[j] = hs[j], hs[i] } 187 188 type getRecord struct { 189 a *addr 190 prefix uint64 191 found bool 192 } 193 194 type getRecordByPrefix []getRecord 195 196 func (hs getRecordByPrefix) Len() int { return len(hs) } 197 func (hs getRecordByPrefix) Less(i, j int) bool { return hs[i].prefix < hs[j].prefix } 198 func (hs getRecordByPrefix) Swap(i, j int) { hs[i], hs[j] = hs[j], hs[i] } 199 200 type extractRecord struct { 201 a addr 202 data []byte 203 err interface{} // only set when there was a panic during extraction. 204 } 205 206 type chunkReader interface { 207 has(h addr) bool 208 hasMany(addrs []hasRecord) bool 209 get(h addr, stats *Stats) []byte 210 getMany(reqs []getRecord, foundChunks chan *chunks.Chunk, wg *sync.WaitGroup, stats *Stats) bool 211 count() uint32 212 uncompressedLen() uint64 213 extract(chunks chan<- extractRecord) 214 } 215 216 type chunkReadPlanner interface { 217 findOffsets(reqs []getRecord) (ors offsetRecSlice, remaining bool) 218 getManyAtOffsets( 219 reqs []getRecord, 220 offsetRecords offsetRecSlice, 221 foundChunks chan *chunks.Chunk, 222 wg *sync.WaitGroup, 223 stats *Stats, 224 ) (remaining bool) 225 } 226 227 type chunkSource interface { 228 chunkReader 229 hash() addr 230 calcReads(reqs []getRecord, blockSize uint64) (reads int, remaining bool) 231 232 // opens a Reader to the first byte of the chunkData segment of this table. 233 reader() io.Reader 234 index() tableIndex 235 } 236 237 type chunkSources []chunkSource