github.com/grailbio/base@v0.0.11/logio/writer.go (about) 1 // Copyright 2019 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache 2.0 3 // license that can be found in the LICENSE file. 4 5 package logio 6 7 import ( 8 "io" 9 10 xxhash "github.com/cespare/xxhash/v2" 11 ) 12 13 // Append writes an entry to the io.Writer w. The writer must be 14 // positioned at the provided offset. If non-nil, Append will use the 15 // scratch buffer for working space, avoiding additional allocation. 16 // The scratch buffer must be at least Blocksz. 17 func Append(w io.Writer, off int64, data, scratch []byte) (nwrite int, err error) { 18 if n := off % Blocksz; n > 0 && n < headersz { 19 // Corrupted file: skip to the next block boundary. 20 // 21 // TODO(marius): make sure that in the case of append failure 22 // that occurs at the end of the file, that we can recover without 23 // exposing the error to the user. 24 n, err := w.Write(zeros[:Blocksz-n]) 25 nwrite += n 26 if err != nil { 27 return nwrite, err 28 } 29 } else if left := Blocksz - n; left <= headersz { 30 // Need padding. 31 n, err := w.Write(zeros[:left]) 32 nwrite += n 33 if err != nil { 34 return nwrite, err 35 } 36 } 37 for base := nwrite; len(data) > 0; { 38 n := Blocksz - int(off+int64(nwrite))%Blocksz 39 n -= headersz 40 var typ uint8 41 switch { 42 case len(data) <= n && nwrite == base: 43 typ, n = recordFull, len(data) 44 case len(data) <= n: 45 typ, n = recordLast, len(data) 46 case nwrite == base: 47 typ = recordFirst 48 default: 49 typ = recordMiddle 50 } 51 scratch = appendRecord(scratch[:0], typ, uint64(nwrite-base), data[:n]) 52 data = data[n:] 53 n, err = w.Write(scratch) 54 nwrite += n 55 if err != nil { 56 return nwrite, err 57 } 58 } 59 return nwrite, nil 60 } 61 62 // Aligned aligns the provided offset for the next write: it returns 63 // the offset at which the next record will be written, if a writer 64 // with the provided offset is provided to Append. This can be used 65 // to index into logio files. 66 func Aligned(off int64) int64 { 67 if n := int64(Blocksz - off%Blocksz); n <= headersz { 68 return off + n 69 } 70 return off 71 } 72 73 // A Writer appends to a log file. Writers are thin stateful wrappers 74 // around Append. 75 type Writer struct { 76 wr io.Writer 77 off int64 78 scratch []byte 79 } 80 81 // NewWriter returns a new writer that appends log entries to the 82 // provided io.Writer. The offset given must be the offset into the 83 // underlying IO stream represented by wr. 84 func NewWriter(wr io.Writer, offset int64) *Writer { 85 return &Writer{wr: wr, off: offset, scratch: make([]byte, Blocksz)} 86 } 87 88 // Append appends a new entry to the log file. Appending an empty 89 // record is a no-op. Note that the writer appends only appends to 90 // the underlying stream. It is the responsibility of the caller to 91 // ensure that the writes are committed to stable storage (e.g., by 92 // calling file.Sync). 93 func (w *Writer) Append(data []byte) error { 94 n, err := Append(w.wr, w.off, data, w.scratch) 95 w.off += int64(n) 96 return err 97 } 98 99 // Tell returns the offset of the next record to be appended. 100 // This may be used to index into the log file. 101 func (w *Writer) Tell() int64 { 102 return Aligned(w.off) 103 } 104 105 // appendRecord appends a record, specified by typ, offset, and data, to p. p 106 // must have enough capacity for the record. 107 func appendRecord(p []byte, typ uint8, offset uint64, data []byte) []byte { 108 off := len(p) 109 p = p[:off+headersz+len(data)] 110 p[off+4] = typ 111 byteOrder.PutUint16(p[off+5:], uint16(len(data))) 112 byteOrder.PutUint64(p[off+7:], offset) 113 copy(p[off+15:], data) 114 byteOrder.PutUint32(p[off:], checksum(p[off+4:])) 115 return p 116 } 117 118 func (w *Writer) write(p []byte) error { 119 n, err := w.wr.Write(p) 120 w.off += int64(n) 121 return err 122 } 123 124 func checksum(data []byte) uint32 { 125 h := xxhash.Sum64(data) 126 return uint32(h<<32) ^ uint32(h) 127 }