github.com/grailbio/base@v0.0.11/mapio/writer.go (about) 1 // Copyright 2018 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache 2.0 3 // license that can be found in the LICENSE file. 4 5 package mapio 6 7 import ( 8 "encoding/binary" 9 "io" 10 ) 11 12 const ( 13 maxBlockAddrSize = binary.MaxVarintLen64 + // offset 14 binary.MaxVarintLen64 // len 15 16 mapTrailerSize = maxBlockAddrSize + // meta block index (padded) 17 maxBlockAddrSize + // index address (padded) 18 8 // magic 19 20 mapTrailerMagic = 0xa8b2374e8558bc76 21 ) 22 23 type blockAddr struct { 24 off uint64 25 len uint64 26 } 27 28 func putBlockAddr(p []byte, b blockAddr) int { 29 off := binary.PutUvarint(p, b.off) 30 return off + binary.PutUvarint(p[off:], b.len) 31 } 32 33 func getBlockAddr(p []byte) (b blockAddr, n int) { 34 var m int 35 b.off, n = binary.Uvarint(p) 36 b.len, m = binary.Uvarint(p[n:]) 37 n += m 38 return 39 } 40 41 // A Writer appends key-value pairs to a map. Keys must be appended 42 // in lexicographic order. 43 type Writer struct { 44 data, index blockBuffer 45 w io.Writer 46 47 lastKey []byte 48 49 blockSize int 50 off int 51 } 52 53 const ( 54 defaultBlockSize = 1 << 12 55 defaultRestartInterval = 16 56 ) 57 58 // WriteOption represents a tunable writer parameter. 59 type WriteOption func(*Writer) 60 61 // BlockSize sets the writer's target block size to sz (in bytes). 62 // Note that keys and values cannot straddle blocks, so that if large 63 // data are added to a map, block sizes can grow large. The default 64 // target block size is 4KB. 65 func BlockSize(sz int) WriteOption { 66 return func(w *Writer) { 67 w.blockSize = sz 68 } 69 } 70 71 // RestartInterval sets the writer's restart interval to 72 // provided value. The default restart interval is 16. 73 func RestartInterval(iv int) WriteOption { 74 return func(w *Writer) { 75 w.data.restartInterval = iv 76 w.index.restartInterval = iv 77 } 78 } 79 80 // NewWriter returns a new Writer that writes a map to the provided 81 // io.Writer. BlockSize specifies the target block size, while 82 // restartInterval determines the frequency of key restart points, 83 // which trades off lookup performance with size. See package docs 84 // for more details. 85 func NewWriter(w io.Writer, opts ...WriteOption) *Writer { 86 wr := &Writer{ 87 w: w, 88 blockSize: defaultBlockSize, 89 } 90 wr.data.restartInterval = defaultRestartInterval 91 wr.index.restartInterval = defaultRestartInterval 92 for _, opt := range opts { 93 opt(wr) 94 } 95 return wr 96 } 97 98 // Append appends an entry to the maps. Keys must be provided 99 // in lexicographic order. 100 func (w *Writer) Append(key, value []byte) error { 101 w.data.Append(key, value) 102 if w.lastKey == nil || cap(w.lastKey) < len(key) { 103 w.lastKey = make([]byte, len(key)) 104 } else { 105 w.lastKey = w.lastKey[:len(key)] 106 } 107 copy(w.lastKey, key) 108 if w.data.Len() > w.blockSize { 109 return w.Flush() 110 } 111 return nil 112 } 113 114 // Flush creates a new block with the current contents. It forces the 115 // creation of a new block, and overrides the Writer's block size 116 // parameter. 117 func (w *Writer) Flush() error { 118 w.data.Finish() 119 n, err := w.w.Write(w.data.Bytes()) 120 if err != nil { 121 return err 122 } 123 w.data.Reset() 124 off := w.off 125 w.off += n 126 127 // TODO(marius): we can get more clever about key compression here: 128 // We need to guarantee that the lastKey <= indexKey < firstKey, 129 // where firstKey is the first key in the next block. We can thus 130 // construct a more minimal key to store in the index. 131 b := make([]byte, maxBlockAddrSize) 132 n = putBlockAddr(b, blockAddr{uint64(off), uint64(n)}) 133 w.index.Append(w.lastKey, b[:n]) 134 135 return nil 136 } 137 138 // Close flushes the last block of the writer and writes the map's 139 // trailer. After successful close, the map is ready to be opened. 140 func (w *Writer) Close() error { 141 if err := w.Flush(); err != nil { 142 return err 143 } 144 w.index.Finish() 145 n, err := w.w.Write(w.index.Bytes()) 146 if err != nil { 147 return err 148 } 149 w.index.Reset() 150 indexAddr := blockAddr{uint64(w.off), uint64(n)} 151 w.off += n 152 153 trailer := make([]byte, mapTrailerSize) 154 putBlockAddr(trailer, blockAddr{}) // address of meta block index. tbd. 155 putBlockAddr(trailer[maxBlockAddrSize:], indexAddr) 156 order.PutUint64(trailer[len(trailer)-8:], mapTrailerMagic) 157 _, err = w.w.Write(trailer) 158 return err 159 }