github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/third_party/code.google.com/p/leveldb-go/leveldb/table/writer.go (about) 1 // Copyright 2011 The LevelDB-Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package table 6 7 import ( 8 "bufio" 9 "bytes" 10 "encoding/binary" 11 "errors" 12 "fmt" 13 "io" 14 15 "camlistore.org/third_party/code.google.com/p/leveldb-go/leveldb/crc" 16 "camlistore.org/third_party/code.google.com/p/leveldb-go/leveldb/db" 17 "camlistore.org/third_party/code.google.com/p/snappy-go/snappy" 18 ) 19 20 // indexEntry is a block handle and the length of the separator key. 21 type indexEntry struct { 22 bh blockHandle 23 keyLen int 24 } 25 26 // Writer is a table writer. It implements the DB interface, as documented 27 // in the leveldb/db package. 28 type Writer struct { 29 writer io.Writer 30 bufWriter *bufio.Writer 31 closer io.Closer 32 err error 33 // The next four fields are copied from a db.Options. 34 blockRestartInterval int 35 blockSize int 36 cmp db.Comparer 37 compression db.Compression 38 // A table is a series of blocks and a block's index entry contains a 39 // separator key between one block and the next. Thus, a finished block 40 // cannot be written until the first key in the next block is seen. 41 // pendingBH is the blockHandle of a finished block that is waiting for 42 // the next call to Set. If the writer is not in this state, pendingBH 43 // is zero. 44 pendingBH blockHandle 45 // offset is the offset (relative to the table start) of the next block 46 // to be written. 47 offset uint64 48 // prevKey is a copy of the key most recently passed to Set. 49 prevKey []byte 50 // indexKeys and indexEntries hold the separator keys between each block 51 // and the successor key for the final block. indexKeys contains the key's 52 // bytes concatenated together. The keyLen field of each indexEntries 53 // element is the length of the respective separator key. 54 indexKeys []byte 55 indexEntries []indexEntry 56 // The next three fields hold data for the current block: 57 // - buf is the accumulated uncompressed bytes, 58 // - nEntries is the number of entries, 59 // - restarts are the offsets (relative to the block start) of each 60 // restart point. 61 buf bytes.Buffer 62 nEntries int 63 restarts []uint32 64 // compressedBuf is the destination buffer for snappy compression. It is 65 // re-used over the lifetime of the writer, avoiding the allocation of a 66 // temporary buffer for each block. 67 compressedBuf []byte 68 // tmp is a scratch buffer, large enough to hold either footerLen bytes, 69 // blockTrailerLen bytes, or (5 * binary.MaxVarintLen64) bytes. 70 tmp [50]byte 71 } 72 73 // Writer implements the db.DB interface. 74 var _ db.DB = (*Writer)(nil) 75 76 // Get is provided to implement the DB interface, but returns an error, as a 77 // Writer cannot read from a table. 78 func (w *Writer) Get(key []byte, o *db.ReadOptions) ([]byte, error) { 79 return nil, errors.New("leveldb/table: cannot Get from a write-only table") 80 } 81 82 // Delete is provided to implement the DB interface, but returns an error, as a 83 // Writer can only append key/value pairs. 84 func (w *Writer) Delete(key []byte, o *db.WriteOptions) error { 85 return errors.New("leveldb/table: cannot Delete from a table") 86 } 87 88 // Find is provided to implement the DB interface, but returns an error, as a 89 // Writer cannot read from a table. 90 func (w *Writer) Find(key []byte, o *db.ReadOptions) db.Iterator { 91 return &tableIter{ 92 err: errors.New("leveldb/table: cannot Find from a write-only table"), 93 } 94 } 95 96 // Set implements DB.Set, as documented in the leveldb/db package. For a given 97 // Writer, the keys passed to Set must be in increasing order. 98 func (w *Writer) Set(key, value []byte, o *db.WriteOptions) error { 99 if w.err != nil { 100 return w.err 101 } 102 if w.cmp.Compare(w.prevKey, key) >= 0 { 103 w.err = fmt.Errorf("leveldb/table: Set called in non-increasing key order: %q, %q", w.prevKey, key) 104 return w.err 105 } 106 w.flushPendingBH(key) 107 w.append(key, value, w.nEntries%w.blockRestartInterval == 0) 108 // If the estimated block size is sufficiently large, finish the current block. 109 if w.buf.Len()+4*(len(w.restarts)+1) >= w.blockSize { 110 bh, err := w.finishBlock() 111 if err != nil { 112 w.err = err 113 return w.err 114 } 115 w.pendingBH = bh 116 } 117 return nil 118 } 119 120 // flushPendingBH adds any pending block handle to the index entries. 121 func (w *Writer) flushPendingBH(key []byte) { 122 if w.pendingBH.length == 0 { 123 // A valid blockHandle must be non-zero. 124 // In particular, it must have a non-zero length. 125 return 126 } 127 n0 := len(w.indexKeys) 128 w.indexKeys = w.cmp.AppendSeparator(w.indexKeys, w.prevKey, key) 129 n1 := len(w.indexKeys) 130 w.indexEntries = append(w.indexEntries, indexEntry{w.pendingBH, n1 - n0}) 131 w.pendingBH = blockHandle{} 132 } 133 134 // append appends a key/value pair, which may also be a restart point. 135 func (w *Writer) append(key, value []byte, restart bool) { 136 nShared := 0 137 if restart { 138 w.restarts = append(w.restarts, uint32(w.buf.Len())) 139 } else { 140 nShared = db.SharedPrefixLen(w.prevKey, key) 141 } 142 w.prevKey = append(w.prevKey[:0], key...) 143 w.nEntries++ 144 n := binary.PutUvarint(w.tmp[0:], uint64(nShared)) 145 n += binary.PutUvarint(w.tmp[n:], uint64(len(key)-nShared)) 146 n += binary.PutUvarint(w.tmp[n:], uint64(len(value))) 147 w.buf.Write(w.tmp[:n]) 148 w.buf.Write(key[nShared:]) 149 w.buf.Write(value) 150 } 151 152 // finishBlock finishes the current block and returns its block handle, which is 153 // its offset and length in the table. 154 func (w *Writer) finishBlock() (blockHandle, error) { 155 // Write the restart points to the buffer. 156 if w.nEntries == 0 { 157 // Every block must have at least one restart point. 158 w.restarts = w.restarts[:1] 159 w.restarts[0] = 0 160 } 161 tmp4 := w.tmp[:4] 162 for _, x := range w.restarts { 163 binary.LittleEndian.PutUint32(tmp4, x) 164 w.buf.Write(tmp4) 165 } 166 binary.LittleEndian.PutUint32(tmp4, uint32(len(w.restarts))) 167 w.buf.Write(tmp4) 168 169 // Compress the buffer, discarding the result if the improvement 170 // isn't at least 12.5%. 171 b := w.buf.Bytes() 172 w.tmp[0] = noCompressionBlockType 173 if w.compression == db.SnappyCompression { 174 compressed, err := snappy.Encode(w.compressedBuf, b) 175 if err != nil { 176 return blockHandle{}, err 177 } 178 w.compressedBuf = compressed[:cap(compressed)] 179 if len(compressed) < len(b)-len(b)/8 { 180 w.tmp[0] = snappyCompressionBlockType 181 b = compressed 182 } 183 } 184 185 // Calculate the checksum. 186 checksum := crc.New(b).Update(w.tmp[:1]).Value() 187 binary.LittleEndian.PutUint32(w.tmp[1:5], checksum) 188 189 // Write the bytes to the file. 190 if _, err := w.writer.Write(b); err != nil { 191 return blockHandle{}, err 192 } 193 if _, err := w.writer.Write(w.tmp[:5]); err != nil { 194 return blockHandle{}, err 195 } 196 bh := blockHandle{w.offset, uint64(len(b))} 197 w.offset += uint64(len(b)) + blockTrailerLen 198 199 // Reset the per-block state. 200 w.buf.Reset() 201 w.nEntries = 0 202 w.restarts = w.restarts[:0] 203 return bh, nil 204 } 205 206 // Close implements DB.Close, as documented in the leveldb/db package. 207 func (w *Writer) Close() (err error) { 208 defer func() { 209 if w.closer == nil { 210 return 211 } 212 err1 := w.closer.Close() 213 if err == nil { 214 err = err1 215 } 216 w.closer = nil 217 }() 218 if w.err != nil { 219 return w.err 220 } 221 222 // Finish the last data block, or force an empty data block if there 223 // aren't any data blocks at all. 224 if w.nEntries > 0 || len(w.indexEntries) == 0 { 225 bh, err := w.finishBlock() 226 if err != nil { 227 w.err = err 228 return w.err 229 } 230 w.pendingBH = bh 231 w.flushPendingBH(nil) 232 } 233 234 // Write the (empty) metaindex block. 235 metaindexBlockHandle, err := w.finishBlock() 236 if err != nil { 237 w.err = err 238 return w.err 239 } 240 241 // Write the index block. 242 // writer.append uses w.tmp[:3*binary.MaxVarintLen64]. 243 i0, tmp := 0, w.tmp[3*binary.MaxVarintLen64:5*binary.MaxVarintLen64] 244 for _, ie := range w.indexEntries { 245 n := encodeBlockHandle(tmp, ie.bh) 246 i1 := i0 + ie.keyLen 247 w.append(w.indexKeys[i0:i1], tmp[:n], true) 248 i0 = i1 249 } 250 indexBlockHandle, err := w.finishBlock() 251 if err != nil { 252 w.err = err 253 return w.err 254 } 255 256 // Write the table footer. 257 footer := w.tmp[:footerLen] 258 for i := range footer { 259 footer[i] = 0 260 } 261 n := encodeBlockHandle(footer, metaindexBlockHandle) 262 encodeBlockHandle(footer[n:], indexBlockHandle) 263 copy(footer[footerLen-len(magic):], magic) 264 if _, err := w.writer.Write(footer); err != nil { 265 w.err = err 266 return w.err 267 } 268 269 // Flush the buffer. 270 if w.bufWriter != nil { 271 if err := w.bufWriter.Flush(); err != nil { 272 w.err = err 273 return err 274 } 275 } 276 277 // Make any future calls to Set or Close return an error. 278 w.err = errors.New("leveldb/table: writer is closed") 279 return nil 280 } 281 282 // NewWriter returns a new table writer for the file. Closing the writer will 283 // close the file. 284 func NewWriter(f File, o *db.Options) *Writer { 285 w := &Writer{ 286 closer: f, 287 blockRestartInterval: o.GetBlockRestartInterval(), 288 blockSize: o.GetBlockSize(), 289 cmp: o.GetComparer(), 290 compression: o.GetCompression(), 291 prevKey: make([]byte, 0, 256), 292 restarts: make([]uint32, 0, 256), 293 } 294 if f == nil { 295 w.err = errors.New("leveldb/table: nil file") 296 return w 297 } 298 // If f does not have a Flush method, do our own buffering. 299 type flusher interface { 300 Flush() error 301 } 302 if _, ok := f.(flusher); ok { 303 w.writer = f 304 } else { 305 w.bufWriter = bufio.NewWriter(f) 306 w.writer = w.bufWriter 307 } 308 return w 309 }