github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/journal_index_record.go (about) 1 // Copyright 2023 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package nbs 16 17 import ( 18 "bufio" 19 "encoding/binary" 20 "errors" 21 "fmt" 22 "hash/crc32" 23 "io" 24 25 "github.com/dolthub/dolt/go/store/d" 26 "github.com/dolthub/dolt/go/store/hash" 27 ) 28 29 // indexRec is a record in a chunk journal index file. Index records 30 // serve as out-of-band chunk indexes into the chunk journal that allow 31 // bootstrapping the journal without reading each record in the journal. 32 // 33 // Like journalRec, its serialization format uses uint8 tag prefixes 34 // to identify fields and allow for format evolution. 35 type indexRec struct { 36 // index record length 37 length uint32 38 39 // root hash of commit when this index record was written 40 lastRoot hash.Hash 41 42 // file offsets for the region of the journal file 43 // that |payload| indexes. end points to a root hash 44 // record in the journal containing |lastRoot|. 45 // we expect a sequence of index records to cover 46 // contiguous regions of the journal file. 47 start, end uint64 48 49 // index record kind 50 kind indexRecKind 51 52 // encoded chunk index 53 payload []byte 54 55 // index record crc32 checksum 56 checksum uint32 57 } 58 59 type indexRecKind uint8 60 61 const ( 62 unknownIndexRecKind indexRecKind = 0 63 tableIndexRecKind indexRecKind = 1 64 ) 65 66 type indexRecTag uint8 67 68 const ( 69 unknownIndexRecTag indexRecTag = 0 70 lastRootIndexRecTag indexRecTag = 1 71 startOffsetIndexRecTag indexRecTag = 2 72 endOffsetIndexRecTag indexRecTag = 3 73 kindIndexRecTag indexRecTag = 4 74 payloadIndexRecTag indexRecTag = 5 75 ) 76 77 const ( 78 indexRecTagSz = 1 79 indexRecLenSz = 4 80 indexRecKindSz = 1 81 indexRecLastRootSz = 20 82 indexRecOffsetSz = 8 83 indexRecChecksumSz = 4 84 lookupSz = 16 + uint64Size + uint32Size 85 lookupMetaSz = uint64Size + uint64Size + uint32Size + hash.ByteLen 86 ) 87 88 func journalIndexRecordSize(idx []byte) (recordSz uint32) { 89 recordSz += indexRecLenSz 90 recordSz += indexRecTagSz + indexRecLastRootSz 91 recordSz += indexRecTagSz + indexRecOffsetSz 92 recordSz += indexRecTagSz + indexRecOffsetSz 93 recordSz += indexRecTagSz + indexRecKindSz 94 recordSz += indexRecTagSz // payload tag 95 recordSz += uint32(len(idx)) 96 recordSz += indexRecChecksumSz 97 return 98 } 99 100 func writeJournalIndexRecord(buf []byte, root hash.Hash, start, end uint64, idx []byte) (n uint32) { 101 //defer trace.StartRegion(ctx, "writeJournalIndexRecord").End() 102 103 // length 104 l := journalIndexRecordSize(idx) 105 writeUint32(buf[:indexRecLenSz], l) 106 n += indexRecLenSz 107 // last root 108 buf[n] = byte(lastRootIndexRecTag) 109 n += indexRecTagSz 110 copy(buf[n:], root[:]) 111 n += indexRecLastRootSz 112 // start offset 113 buf[n] = byte(startOffsetIndexRecTag) 114 n += indexRecTagSz 115 writeUint64(buf[n:], start) 116 n += indexRecOffsetSz 117 // end offset 118 buf[n] = byte(endOffsetIndexRecTag) 119 n += indexRecTagSz 120 writeUint64(buf[n:], end) 121 n += indexRecOffsetSz 122 // kind 123 buf[n] = byte(kindIndexRecTag) 124 n += indexRecTagSz 125 buf[n] = byte(tableIndexRecKind) 126 n += indexRecKindSz 127 // payload 128 buf[n] = byte(payloadIndexRecTag) 129 n += indexRecTagSz 130 copy(buf[n:], idx) 131 n += uint32(len(idx)) 132 // checksum 133 writeUint32(buf[n:], crc(buf[:n])) 134 n += indexRecChecksumSz 135 d.PanicIfFalse(l == n) 136 return 137 } 138 139 func readJournalIndexRecord(buf []byte) (rec indexRec, err error) { 140 rec.length = readUint32(buf) 141 buf = buf[indexRecLenSz:] 142 for len(buf) > indexRecChecksumSz { 143 tag := indexRecTag(buf[0]) 144 buf = buf[indexRecTagSz:] 145 switch tag { 146 case lastRootIndexRecTag: 147 copy(rec.lastRoot[:], buf) 148 buf = buf[indexRecLastRootSz:] 149 case startOffsetIndexRecTag: 150 rec.start = readUint64(buf) 151 buf = buf[indexRecOffsetSz:] 152 case endOffsetIndexRecTag: 153 rec.end = readUint64(buf) 154 buf = buf[indexRecOffsetSz:] 155 case kindIndexRecTag: 156 rec.kind = indexRecKind(buf[0]) 157 buf = buf[indexRecKindSz:] 158 case payloadIndexRecTag: 159 sz := len(buf) - indexRecChecksumSz 160 rec.payload = buf[:sz] 161 buf = buf[sz:] 162 case unknownIndexRecTag: 163 fallthrough 164 default: 165 err = fmt.Errorf("unknown record field tag: %d", tag) 166 return 167 } 168 } 169 rec.checksum = readUint32(buf[:indexRecChecksumSz]) 170 return 171 } 172 173 func validateIndexRecord(buf []byte) bool { 174 if len(buf) < (indexRecLenSz + indexRecChecksumSz) { 175 return false 176 } 177 off := readUint32(buf) 178 if int(off) > len(buf) { 179 return false 180 } 181 off -= indexRecChecksumSz 182 return crc(buf[:off]) == readUint32(buf[off:]) 183 } 184 185 type lookupMeta struct { 186 batchStart int64 187 batchEnd int64 188 checkSum uint32 189 latestHash hash.Hash 190 } 191 192 const indexRecTypeSize = 1 193 const ( 194 indexRecChunk byte = iota 195 indexRecMeta 196 ) 197 198 // processIndexRecords reads batches of chunk index lookups into the journal. 199 // An index batch looks like |lookup|lookup|...|meta|. The first byte of a record 200 // indicates whether it is a |lookup| or |meta|. Only callback errors are returned. 201 // The caller is expected to track the latest lookupMeta end offset and truncate 202 // the index to compensate for partially written batches. 203 func processIndexRecords(rd *bufio.Reader, sz int64, cb func(lookupMeta, []lookup, uint32) error) (off int64, err error) { 204 var batchCrc uint32 205 var batch []lookup 206 var batchOff int64 207 for off < sz { 208 recTag, err := rd.ReadByte() 209 if err != nil { 210 return off, nil 211 } 212 batchOff += 1 213 214 switch recTag { 215 case indexRecChunk: 216 l, err := readIndexLookup(rd) 217 if err != nil { 218 return off, nil 219 } 220 batchOff += lookupSz 221 batch = append(batch, l) 222 batchCrc = crc32.Update(batchCrc, crcTable, l.a[:]) 223 224 case indexRecMeta: 225 m, err := readIndexMeta(rd) 226 if err != nil { 227 return off, nil 228 } 229 if err := cb(m, batch, batchCrc); err != nil { 230 return off, err 231 } 232 batch = nil 233 batchCrc = 0 234 off += batchOff + lookupMetaSz 235 batchOff = 0 236 default: 237 return off, ErrMalformedIndex 238 } 239 } 240 return off, nil 241 } 242 243 var ErrMalformedIndex = errors.New("journal index is malformed") 244 245 // readIndexLookup reads a sequence of |chunkAddress|journalOffset|chunkLength| 246 // that is used to speed up |journal.ranges| initialization. 247 func readIndexLookup(r *bufio.Reader) (lookup, error) { 248 addr := addr16{} 249 if _, err := io.ReadFull(r, addr[:]); err != nil { 250 return lookup{}, err 251 } 252 253 var offsetBuf [uint64Size]byte 254 if _, err := io.ReadFull(r, offsetBuf[:]); err != nil { 255 return lookup{}, err 256 } 257 offset := binary.BigEndian.Uint64(offsetBuf[:]) 258 259 var lengthBuf [uint32Size]byte 260 if _, err := io.ReadFull(r, lengthBuf[:]); err != nil { 261 return lookup{}, err 262 } 263 length := binary.BigEndian.Uint32(lengthBuf[:]) 264 265 return lookup{a: addr, r: Range{Offset: offset, Length: length}}, nil 266 } 267 268 // readIndexMeta reads a sequence of |journalStart|journalEnd|lastRootHash|checksum| 269 // that is used to validate a range of lookups on read. A corrupted lookup in the 270 // start-end range will cause the checksum/crc check to fail. The last root hash 271 // is a duplicate sanity check. 272 func readIndexMeta(r *bufio.Reader) (lookupMeta, error) { 273 var startBuf [offsetSize]byte 274 if _, err := io.ReadFull(r, startBuf[:]); err != nil { 275 return lookupMeta{}, err 276 } 277 startOff := binary.BigEndian.Uint64(startBuf[:]) 278 279 var endBuf [offsetSize]byte 280 if _, err := io.ReadFull(r, endBuf[:]); err != nil { 281 return lookupMeta{}, err 282 } 283 endOff := binary.BigEndian.Uint64(endBuf[:]) 284 285 var checksumBuf [checksumSize]byte 286 if _, err := io.ReadFull(r, checksumBuf[:]); err != nil { 287 return lookupMeta{}, err 288 } 289 checksum := binary.BigEndian.Uint32(checksumBuf[:]) 290 291 addr := hash.Hash{} 292 if _, err := io.ReadFull(r, addr[:]); err != nil { 293 return lookupMeta{}, err 294 } 295 296 return lookupMeta{ 297 batchStart: int64(startOff), 298 batchEnd: int64(endOff), 299 checkSum: checksum, 300 latestHash: addr, 301 }, nil 302 } 303 304 func writeIndexLookup(w *bufio.Writer, l lookup) error { 305 w.WriteByte(indexRecChunk) 306 307 if _, err := w.Write(l.a[:]); err != nil { 308 return err 309 } 310 311 var offsetBuf [offsetSize]byte 312 binary.BigEndian.PutUint64(offsetBuf[:], l.r.Offset) 313 if _, err := w.Write(offsetBuf[:]); err != nil { 314 return err 315 } 316 317 var lengthBuf [lengthSize]byte 318 binary.BigEndian.PutUint32(lengthBuf[:], l.r.Length) 319 if _, err := w.Write(lengthBuf[:]); err != nil { 320 return err 321 } 322 323 return nil 324 } 325 326 // writeJournalIndexMeta writes a metadata record for an index range to verify 327 // index bootstrapping integrity. Includes the range of index lookups, a CRC 328 // checksum, and the latest root hash before |end|. 329 func writeJournalIndexMeta(w *bufio.Writer, root hash.Hash, start, end int64, checksum uint32) error { 330 // |journal start|journal end|last root hash|range checkSum| 331 332 if err := w.WriteByte(indexRecMeta); err != nil { 333 return err 334 } 335 336 startBuf := make([]byte, offsetSize) 337 binary.BigEndian.PutUint64(startBuf, uint64(start)) 338 if _, err := w.Write(startBuf); err != nil { 339 return err 340 } 341 342 endBuf := make([]byte, offsetSize) 343 binary.BigEndian.PutUint64(endBuf, uint64(end)) 344 if _, err := w.Write(endBuf); err != nil { 345 return err 346 } 347 348 checksumBuf := make([]byte, checksumSize) 349 binary.BigEndian.PutUint32(checksumBuf, checksum) 350 if _, err := w.Write(checksumBuf); err != nil { 351 return err 352 } 353 354 if _, err := w.Write(root[:]); err != nil { 355 return err 356 } 357 358 return nil 359 } 360 361 type lookup struct { 362 a addr16 363 r Range 364 }