github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/archive_writer.go (about) 1 // Copyright 2024 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package nbs 16 17 import ( 18 "bytes" 19 "crypto/sha512" 20 "encoding/binary" 21 "fmt" 22 "io" 23 "sort" 24 "sync" 25 26 "github.com/dolthub/gozstd" 27 28 "github.com/dolthub/dolt/go/store/hash" 29 ) 30 31 type stagedByteSpanSlice []byteSpan 32 33 type stagedChunkRef struct { 34 hash hash.Hash 35 dictionary, data uint32 36 } 37 type stagedChunkRefSlice []stagedChunkRef 38 39 type stage int 40 41 const ( 42 stageByteSpan stage = iota 43 stageIndex 44 stageMetadata 45 stageFooter 46 stageFlush 47 ) 48 49 type archiveWriter struct { 50 output *HashingByteSink 51 bytesWritten uint64 52 stagedBytes stagedByteSpanSlice 53 stagedChunks stagedChunkRefSlice 54 seenChunks hash.HashSet 55 indexLen uint32 56 metadataLen uint32 57 dataCheckSum sha512Sum 58 indexCheckSum sha512Sum 59 metadataCheckSum sha512Sum 60 workflowStage stage 61 } 62 63 /* 64 There is a workflow to writing an archive: 65 1. writeByteSpan: Write a group of bytes to the archive. This will immediately write the bytes to the output, and 66 return an ID for the byte span. Caller must keep track of this ID. 67 2. stageChunk: Given a hash, dictionary (as byteSpan ID), and data (as byteSpan ID), stage a chunk for writing. This 68 does not write anything to disk yet. 69 3. Repeat steps 1 and 2 as necessary. You can interleave them, but all chunks must be staged before the next step. 70 4. finalizeByteSpans: At this point, all byte spans have been written out, and the checksum for the data block 71 is calculated. No more byte spans can be written after this step. 72 5. writeIndex: Write the index to the archive. This will do all the work of writing the byte span map, prefix map, 73 chunk references, and suffixes. Index checksum is calculated at the end of this step. 74 6. writeMetadata: Write the metadataSpan to the archive. Calculate the metadataSpan checksum at the end of this step. 75 7. writeFooter: Write the footer to the archive. This will write out the index length, byte span count, chunk count. 76 8. flushToFile: Write the archive to disk and move into its new home. 77 78 When all of these steps have been completed without error, the ByteSink used to create the writer can be flushed and closed 79 to complete the archive writing process. 80 */ 81 82 func newArchiveWriterWithSink(bs ByteSink) *archiveWriter { 83 hbs := NewSHA512HashingByteSink(bs) 84 return &archiveWriter{output: hbs, seenChunks: hash.HashSet{}} 85 } 86 87 // writeByteSpan writes a byte span to the archive, returning the ByteSpan ID if the write was successful. Note 88 // that writing an empty byte span is a no-op and will return 0. Also, the slice passed in is copied, so the caller 89 // can reuse the slice after this call. 90 func (aw *archiveWriter) writeByteSpan(b []byte) (uint32, error) { 91 if aw.workflowStage != stageByteSpan { 92 return 0, fmt.Errorf("Runtime error: writeByteSpan called out of order") 93 } 94 95 if len(b) == 0 { 96 return 0, nil 97 } 98 99 offset := aw.bytesWritten 100 101 written, err := aw.output.Write(b) 102 if err != nil { 103 return 0, err 104 } 105 if written != len(b) { 106 return 0, io.ErrShortWrite 107 } 108 aw.bytesWritten += uint64(written) 109 110 aw.stagedBytes = append(aw.stagedBytes, byteSpan{offset, uint64(written)}) 111 112 return uint32(len(aw.stagedBytes)), nil 113 } 114 115 func (aw *archiveWriter) chunkSeen(h hash.Hash) bool { 116 return aw.seenChunks.Has(h) 117 } 118 119 func (aw *archiveWriter) stageChunk(hash hash.Hash, dictionary, data uint32) error { 120 if aw.workflowStage != stageByteSpan { 121 return fmt.Errorf("Runtime error: stageChunk called out of order") 122 } 123 124 if data == 0 || data > uint32(len(aw.stagedBytes)) { 125 return ErrInvalidChunkRange 126 } 127 if aw.seenChunks.Has(hash) { 128 return ErrDuplicateChunkWritten 129 } 130 if dictionary > uint32(len(aw.stagedBytes)) { 131 return ErrInvalidDictionaryRange 132 } 133 134 aw.seenChunks.Insert(hash) 135 aw.stagedChunks = append(aw.stagedChunks, stagedChunkRef{hash, dictionary, data}) 136 return nil 137 } 138 139 func (scrs stagedChunkRefSlice) Len() int { 140 return len(scrs) 141 } 142 func (scrs stagedChunkRefSlice) Less(i, j int) bool { 143 return bytes.Compare(scrs[i].hash[:], scrs[j].hash[:]) == -1 144 } 145 func (scrs stagedChunkRefSlice) Swap(i, j int) { 146 scrs[i], scrs[j] = scrs[j], scrs[i] 147 } 148 149 func (aw *archiveWriter) finalizeByteSpans() error { 150 if aw.workflowStage != stageByteSpan { 151 return fmt.Errorf("Runtime error: finalizeByteSpans called out of order") 152 } 153 154 // Get the checksum for the data written so far 155 aw.dataCheckSum = sha512Sum(aw.output.GetSum()) 156 aw.output.ResetHasher() 157 aw.workflowStage = stageIndex 158 159 return nil 160 } 161 162 type streamCounter struct { 163 wrapped io.Writer 164 count uint64 165 } 166 167 func (sc *streamCounter) Write(p []byte) (n int, err error) { 168 n, err = sc.wrapped.Write(p) 169 // n may be non-0, even if err is non-nil. 170 sc.count += uint64(n) 171 return 172 } 173 174 var _ io.Writer = &streamCounter{} 175 176 // writeIndex writes the index to the archive. Expects the hasher to be reset before be called, and will reset it. It 177 // sets the indexLen and indexCheckSum fields on the archiveWriter, and updates the bytesWritten field. 178 func (aw *archiveWriter) writeIndex() error { 179 if aw.workflowStage != stageIndex { 180 return fmt.Errorf("Runtime error: writeIndex called out of order") 181 } 182 183 redr, wrtr := io.Pipe() 184 outCount := &streamCounter{wrapped: aw.output} 185 var wg sync.WaitGroup 186 wg.Add(1) 187 go func() { 188 err := gozstd.StreamCompressLevel(outCount, redr, 6) 189 if err != nil { 190 redr.CloseWithError(err) // This will cause the writer to return the error. 191 } else { 192 redr.Close() 193 } 194 wg.Done() 195 }() 196 197 // Write out the stagedByteSpans 198 for _, bs := range aw.stagedBytes { 199 err := writeVarUint64(wrtr, bs.length) 200 if err != nil { 201 return err 202 } 203 } 204 205 // sort stagedChunks by hash.Prefix(). Note this isn't a perfect sort for hashes, we are just grouping them by prefix 206 sort.Sort(aw.stagedChunks) 207 208 // We lay down the sorted chunk list in it's three forms. 209 // Prefix Map 210 lastPrefix := uint64(0) 211 for _, scr := range aw.stagedChunks { 212 delta := scr.hash.Prefix() - lastPrefix 213 err := binary.Write(wrtr, binary.BigEndian, delta) 214 if err != nil { 215 return err 216 } 217 lastPrefix += delta 218 } 219 // ChunkReferences 220 for _, scr := range aw.stagedChunks { 221 err := writeVarUint64(wrtr, uint64(scr.dictionary)) 222 if err != nil { 223 return err 224 } 225 226 err = writeVarUint64(wrtr, uint64(scr.data)) 227 if err != nil { 228 return err 229 } 230 } 231 232 // Stop compressing data going to the output sink 233 err := wrtr.Close() 234 if err != nil { 235 return err 236 } 237 wg.Wait() 238 indexSize := outCount.count 239 240 // Suffixes (uncompresssed) 241 for _, scr := range aw.stagedChunks { 242 _, err := aw.output.Write(scr.hash.Suffix()) 243 if err != nil { 244 return err 245 } 246 indexSize += hash.SuffixLen 247 } 248 249 aw.indexLen = uint32(indexSize) 250 aw.bytesWritten += indexSize 251 aw.indexCheckSum = sha512Sum(aw.output.GetSum()) 252 aw.output.ResetHasher() 253 aw.workflowStage = stageMetadata 254 255 return nil 256 } 257 258 // writeMetadata writes the metadataSpan to the archive. Expects the hasher to be reset before be called, and will reset it. 259 // It sets the metadataLen and metadataCheckSum fields on the archiveWriter, and updates the bytesWritten field. 260 // 261 // Empty input is allowed. 262 func (aw *archiveWriter) writeMetadata(data []byte) error { 263 if aw.workflowStage != stageMetadata { 264 return fmt.Errorf("Runtime error: writeMetadata called out of order") 265 } 266 267 if data == nil { 268 data = []byte{} 269 } 270 271 written, err := aw.output.Write(data) 272 if err != nil { 273 return err 274 } 275 aw.bytesWritten += uint64(written) 276 aw.metadataLen = uint32(written) 277 aw.metadataCheckSum = sha512Sum(aw.output.GetSum()) 278 aw.output.ResetHasher() 279 aw.workflowStage = stageFooter 280 281 return nil 282 } 283 284 func (aw *archiveWriter) writeFooter() error { 285 if aw.workflowStage != stageFooter { 286 return fmt.Errorf("Runtime error: writeFooter called out of order") 287 } 288 289 // Write out the index length 290 err := aw.writeUint32(aw.indexLen) 291 if err != nil { 292 return err 293 } 294 295 // Write out the byte span count 296 err = aw.writeUint32(uint32(len(aw.stagedBytes))) 297 if err != nil { 298 return err 299 } 300 301 // Write out the chunk count 302 err = aw.writeUint32(uint32(len(aw.stagedChunks))) 303 if err != nil { 304 return err 305 } 306 307 // Write out the metadataSpan length 308 err = aw.writeUint32(aw.metadataLen) 309 if err != nil { 310 return err 311 } 312 313 err = aw.writeCheckSums() 314 if err != nil { 315 return err 316 } 317 318 // Write out the format version 319 _, err = aw.output.Write([]byte{archiveFormatVersion}) 320 if err != nil { 321 return err 322 } 323 aw.bytesWritten++ 324 325 // Write out the file signature 326 _, err = aw.output.Write([]byte(archiveFileSignature)) 327 if err != nil { 328 return err 329 } 330 aw.bytesWritten += archiveFileSigSize 331 aw.workflowStage = stageFlush 332 333 return nil 334 } 335 336 func (aw *archiveWriter) writeCheckSums() error { 337 err := aw.writeSha512(aw.dataCheckSum) 338 if err != nil { 339 return err 340 } 341 342 err = aw.writeSha512(aw.indexCheckSum) 343 if err != nil { 344 return err 345 } 346 347 return aw.writeSha512(aw.metadataCheckSum) 348 } 349 350 func (aw *archiveWriter) writeSha512(sha sha512Sum) error { 351 _, err := aw.output.Write(sha[:]) 352 if err != nil { 353 return err 354 } 355 356 aw.bytesWritten += sha512.Size 357 return nil 358 } 359 360 // Write a uint32 to the archive. Increments the bytesWritten field. 361 func (aw *archiveWriter) writeUint32(val uint32) error { 362 err := binary.Write(aw.output, binary.BigEndian, val) 363 if err != nil { 364 return err 365 } 366 367 aw.bytesWritten += uint32Size 368 return nil 369 } 370 371 // Write a uint64 to the archive as a varint. This is used during the index writing process, so we expect the io.Writer 372 // to keep track of the written byte count. 373 func writeVarUint64(w io.Writer, val uint64) error { 374 var buf [binary.MaxVarintLen64]byte 375 n := binary.PutUvarint(buf[:], val) 376 _, err := w.Write(buf[:n]) 377 return err 378 } 379 380 func (aw *archiveWriter) flushToFile(path string) error { 381 if aw.workflowStage != stageFlush { 382 return fmt.Errorf("Runtime error: flushToFile called out of order") 383 } 384 385 if bs, ok := aw.output.backingSink.(*BufferedFileByteSink); ok { 386 err := bs.finish() 387 if err != nil { 388 return err 389 } 390 } 391 392 return aw.output.FlushToFile(path) 393 }