github.com/grailbio/base@v0.0.11/logio/writer.go (about)

     1  // Copyright 2019 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache 2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package logio
     6  
     7  import (
     8  	"io"
     9  
    10  	xxhash "github.com/cespare/xxhash/v2"
    11  )
    12  
    13  // Append writes an entry to the io.Writer w. The writer must be
    14  // positioned at the provided offset. If non-nil, Append will use the
    15  // scratch buffer for working space, avoiding additional allocation.
    16  // The scratch buffer must be at least Blocksz.
    17  func Append(w io.Writer, off int64, data, scratch []byte) (nwrite int, err error) {
    18  	if n := off % Blocksz; n > 0 && n < headersz {
    19  		// Corrupted file: skip to the next block boundary.
    20  		//
    21  		// TODO(marius): make sure that in the case of append failure
    22  		// that occurs at the end of the file, that we can recover without
    23  		// exposing the error to the user.
    24  		n, err := w.Write(zeros[:Blocksz-n])
    25  		nwrite += n
    26  		if err != nil {
    27  			return nwrite, err
    28  		}
    29  	} else if left := Blocksz - n; left <= headersz {
    30  		// Need padding.
    31  		n, err := w.Write(zeros[:left])
    32  		nwrite += n
    33  		if err != nil {
    34  			return nwrite, err
    35  		}
    36  	}
    37  	for base := nwrite; len(data) > 0; {
    38  		n := Blocksz - int(off+int64(nwrite))%Blocksz
    39  		n -= headersz
    40  		var typ uint8
    41  		switch {
    42  		case len(data) <= n && nwrite == base:
    43  			typ, n = recordFull, len(data)
    44  		case len(data) <= n:
    45  			typ, n = recordLast, len(data)
    46  		case nwrite == base:
    47  			typ = recordFirst
    48  		default:
    49  			typ = recordMiddle
    50  		}
    51  		scratch = appendRecord(scratch[:0], typ, uint64(nwrite-base), data[:n])
    52  		data = data[n:]
    53  		n, err = w.Write(scratch)
    54  		nwrite += n
    55  		if err != nil {
    56  			return nwrite, err
    57  		}
    58  	}
    59  	return nwrite, nil
    60  }
    61  
    62  // Aligned aligns the provided offset for the next write: it returns
    63  // the offset at which the next record will be written, if a writer
    64  // with the provided offset is provided to Append. This can be used
    65  // to index into logio files.
    66  func Aligned(off int64) int64 {
    67  	if n := int64(Blocksz - off%Blocksz); n <= headersz {
    68  		return off + n
    69  	}
    70  	return off
    71  }
    72  
    73  // A Writer appends to a log file. Writers are thin stateful wrappers
    74  // around Append.
    75  type Writer struct {
    76  	wr      io.Writer
    77  	off     int64
    78  	scratch []byte
    79  }
    80  
    81  // NewWriter returns a new writer that appends log entries to the
    82  // provided io.Writer. The offset given must be the offset into the
    83  // underlying IO stream represented by wr.
    84  func NewWriter(wr io.Writer, offset int64) *Writer {
    85  	return &Writer{wr: wr, off: offset, scratch: make([]byte, Blocksz)}
    86  }
    87  
    88  // Append appends a new entry to the log file. Appending an empty
    89  // record is a no-op. Note that the writer appends only appends to
    90  // the underlying stream. It is the responsibility of the caller to
    91  // ensure that the writes are committed to stable storage (e.g., by
    92  // calling file.Sync).
    93  func (w *Writer) Append(data []byte) error {
    94  	n, err := Append(w.wr, w.off, data, w.scratch)
    95  	w.off += int64(n)
    96  	return err
    97  }
    98  
    99  // Tell returns the offset of the next record to be appended.
   100  // This may be used to index into the log file.
   101  func (w *Writer) Tell() int64 {
   102  	return Aligned(w.off)
   103  }
   104  
   105  // appendRecord appends a record, specified by typ, offset, and data, to p. p
   106  // must have enough capacity for the record.
   107  func appendRecord(p []byte, typ uint8, offset uint64, data []byte) []byte {
   108  	off := len(p)
   109  	p = p[:off+headersz+len(data)]
   110  	p[off+4] = typ
   111  	byteOrder.PutUint16(p[off+5:], uint16(len(data)))
   112  	byteOrder.PutUint64(p[off+7:], offset)
   113  	copy(p[off+15:], data)
   114  	byteOrder.PutUint32(p[off:], checksum(p[off+4:]))
   115  	return p
   116  }
   117  
   118  func (w *Writer) write(p []byte) error {
   119  	n, err := w.wr.Write(p)
   120  	w.off += int64(n)
   121  	return err
   122  }
   123  
   124  func checksum(data []byte) uint32 {
   125  	h := xxhash.Sum64(data)
   126  	return uint32(h<<32) ^ uint32(h)
   127  }