github.com/petermattis/pebble@v0.0.0-20190905164901-ab51a2166067/ptable/writer.go (about) 1 // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package ptable 6 7 import ( 8 "bufio" 9 "encoding/binary" 10 "errors" 11 "io" 12 13 "github.com/golang/snappy" 14 "github.com/petermattis/pebble/internal/base" 15 "github.com/petermattis/pebble/internal/crc" 16 "github.com/petermattis/pebble/vfs" 17 ) 18 19 const ( 20 blockTrailerLen = 5 21 blockHandleMaxLen = 10 + 10 22 footerLen = 1 + 2*blockHandleMaxLen + 4 + 8 23 magicOffset = footerLen - len(magic) 24 versionOffset = magicOffset - 4 25 26 magic = "\xf7\xcf\xf4\x85\xb7\x41\xe2\x88" 27 28 noChecksum = 0 29 checksumCRC32c = 1 30 31 formatVersion = 3 32 33 // The block type gives the per-block compression format. 34 // These constants are part of the file format and should not be changed. 35 // They are different from the db.Compression constants because the latter 36 // are designed so that the zero value of the db.Compression type means to 37 // use the default compression (which is snappy). 38 noCompressionBlockType = 0 39 snappyCompressionBlockType = 1 40 ) 41 42 // Silence unused warning. 43 var _ = noChecksum 44 45 // blockHandle is the file offset and length of a block. 46 type blockHandle struct { 47 offset, length uint64 48 } 49 50 // decodeBlockHandle returns the block handle encoded at the start of src, as 51 // well as the number of bytes it occupies. It returns zero if given invalid 52 // input. 53 func decodeBlockHandle(src []byte) (blockHandle, int) { 54 offset, n := binary.Uvarint(src) 55 length, m := binary.Uvarint(src[n:]) 56 if n == 0 || m == 0 { 57 return blockHandle{}, 0 58 } 59 return blockHandle{offset, length}, n + m 60 } 61 62 func encodeBlockHandle(dst []byte, b blockHandle) int { 63 n := binary.PutUvarint(dst, b.offset) 64 m := binary.PutUvarint(dst[n:], b.length) 65 return n + m 66 } 67 68 // Writer ... 69 type Writer struct { 70 env *Env 71 writer io.Writer 72 bufWriter *bufio.Writer 73 closer io.Closer 74 err error 75 // The next four fields are copied from a db.Options. 76 blockSize int 77 compression base.Compression 78 // The data block and index block writers. 79 block blockWriter 80 indexBlock blockWriter 81 // compressedBuf is the destination buffer for snappy compression. It is 82 // re-used over the lifetime of the writer, avoiding the allocation of a 83 // temporary buffer for each block. 84 compressedBuf []byte 85 // offset is the offset (relative to the table start) of the next block to be 86 // written. 87 offset uint64 88 // tmp is a scratch buffer, large enough to hold either footerLen bytes, 89 // blockTrailerLen bytes, or (5 * binary.MaxVarintLen64) bytes. 90 tmp [footerLen]byte 91 } 92 93 var indexColTypes = []ColumnType{ColumnTypeBytes, ColumnTypeInt64} 94 95 // NewWriter ... 96 func NewWriter(f vfs.File, env *Env, _ *base.Options, lo *base.LevelOptions) *Writer { 97 lo = lo.EnsureDefaults() 98 w := &Writer{ 99 env: env, 100 writer: f, 101 closer: f, 102 blockSize: lo.BlockSize, 103 compression: lo.Compression, 104 } 105 if f == nil { 106 w.err = errors.New("pebble/table: nil file") 107 return w 108 } 109 110 // If f does not have a Flush method, do our own buffering. 111 type flusher interface { 112 Flush() error 113 } 114 if _, ok := f.(flusher); ok { 115 w.writer = f 116 } else { 117 w.bufWriter = bufio.NewWriter(f) 118 w.writer = w.bufWriter 119 } 120 121 colTypes := make([]ColumnType, len(w.env.Schema)) 122 for i := range w.env.Schema { 123 colTypes[i] = w.env.Schema[i].Type 124 } 125 w.block.init(colTypes) 126 w.indexBlock.init(indexColTypes) 127 return w 128 } 129 130 // AddKV adds a row encoded in a key/value pair to the table. The encoded 131 // column data must match the table schema. Data must be added in sorted order. 132 func (w *Writer) AddKV(key, value []byte) error { 133 if w.err != nil { 134 return w.err 135 } 136 if w.block.cols[0].count == 0 { 137 w.addIndex(key) 138 } 139 w.env.Decode(key, value, nil, &w.block) 140 w.maybeFinishBlock() 141 return w.err 142 } 143 144 // AddRow adds a row to the table. The columns in the row must match the table 145 // schema. Data must be added in sorted order. 146 func (w *Writer) AddRow(row RowReader) error { 147 if w.err != nil { 148 return w.err 149 } 150 if w.block.cols[0].count == 0 { 151 key, _ := w.env.Encode(row, nil) 152 w.addIndex(key) 153 } 154 w.block.PutRow(row) 155 w.maybeFinishBlock() 156 return w.err 157 } 158 159 // EstimatedSize ... 160 func (w *Writer) EstimatedSize() uint64 { 161 return w.offset + uint64(w.block.Size()+w.indexBlock.Size()) 162 } 163 164 // Close ... 165 func (w *Writer) Close() (err error) { 166 defer func() { 167 if w.closer == nil { 168 return 169 } 170 err1 := w.closer.Close() 171 if err == nil { 172 err = err1 173 } 174 w.closer = nil 175 }() 176 177 if w.err != nil { 178 return w.err 179 } 180 181 if w.block.cols[0].count > 0 { 182 _, err := w.finishBlock(&w.block) 183 if err != nil { 184 w.err = err 185 return w.err 186 } 187 } 188 189 // Add the dummy final index entry and write the index block. 190 w.addIndex(nil) 191 indexBlockHandle, err := w.finishBlock(&w.indexBlock) 192 if err != nil { 193 w.err = err 194 return w.err 195 } 196 197 // Write the table footer. 198 footer := w.tmp[:footerLen] 199 for i := range footer { 200 footer[i] = 0 201 } 202 footer[0] = checksumCRC32c 203 n := 1 204 n += encodeBlockHandle(footer[n:], blockHandle{}) 205 n += encodeBlockHandle(footer[n:], indexBlockHandle) 206 binary.LittleEndian.PutUint32(footer[versionOffset:], formatVersion) 207 copy(footer[magicOffset:], magic) 208 if _, err := w.writer.Write(footer); err != nil { 209 w.err = err 210 return w.err 211 } 212 213 // Flush the buffer. 214 if w.bufWriter != nil { 215 if err := w.bufWriter.Flush(); err != nil { 216 w.err = err 217 return err 218 } 219 } 220 221 // Make any future calls to Set or Close return an error. 222 w.err = errors.New("pebble/table: writer is closed") 223 return nil 224 } 225 226 func (w *Writer) addIndex(key []byte) { 227 w.indexBlock.PutBytes(0, key) 228 w.indexBlock.PutInt64(1, int64(w.offset)) 229 } 230 231 func (w *Writer) maybeFinishBlock() { 232 if int(w.block.Size()) < w.blockSize { 233 return 234 } 235 _, w.err = w.finishBlock(&w.block) 236 } 237 238 func (w *Writer) finishBlock(block *blockWriter) (blockHandle, error) { 239 b := block.Finish() 240 blockType := byte(noCompressionBlockType) 241 if w.compression == base.SnappyCompression { 242 compressed := snappy.Encode(w.compressedBuf, b) 243 w.compressedBuf = compressed[:cap(compressed)] 244 if len(compressed) < len(b)-len(b)/8 { 245 blockType = snappyCompressionBlockType 246 b = compressed 247 } 248 } 249 250 // Reset the per-block state. 251 block.reset() 252 return w.writeRawBlock(b, blockType) 253 } 254 255 func (w *Writer) writeRawBlock(b []byte, blockType byte) (blockHandle, error) { 256 w.tmp[0] = blockType 257 258 // Calculate the checksum. 259 checksum := crc.New(b).Update(w.tmp[:1]).Value() 260 binary.LittleEndian.PutUint32(w.tmp[1:5], checksum) 261 262 // Write the bytes to the file. 263 if _, err := w.writer.Write(b); err != nil { 264 return blockHandle{}, err 265 } 266 if _, err := w.writer.Write(w.tmp[:5]); err != nil { 267 return blockHandle{}, err 268 } 269 bh := blockHandle{w.offset, uint64(len(b))} 270 w.offset += uint64(len(b)) + blockTrailerLen 271 return bh, nil 272 }