github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/third_party/code.google.com/p/leveldb-go/leveldb/table/table.go (about) 1 // Copyright 2011 The LevelDB-Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 /* 6 Package table implements readers and writers of leveldb tables. 7 8 Tables are either opened for reading or created for writing but not both. 9 10 A reader can create iterators, which yield all key/value pairs whose keys 11 are 'greater than or equal' to a starting key. There may be multiple key/ 12 value pairs that have the same key. 13 14 A reader can be used concurrently. Multiple goroutines can call Find 15 concurrently, and each iterator can run concurrently with other iterators. 16 However, any particular iterator should not be used concurrently, and 17 iterators should not be used once a reader is closed. 18 19 A writer writes key/value pairs in increasing key order, and cannot be used 20 concurrently. A table cannot be read until the writer has finished. 21 22 Readers and writers can be created with various options. Passing a nil 23 Options pointer is valid and means to use the default values. 24 25 One such option is to define the 'less than' ordering for keys. The default 26 Comparer uses the natural ordering consistent with bytes.Compare. The same 27 ordering should be used for reading and writing a table. 28 29 To return the value for a key: 30 31 r := table.NewReader(file, options) 32 defer r.Close() 33 return r.Get(key) 34 35 To count the number of entries in a table: 36 37 i, n := r.Find(nil), 0 38 for i.Next() { 39 n++ 40 } 41 if err := i.Close(); err != nil { 42 return 0, err 43 } 44 return n, nil 45 46 To write a table with three entries: 47 48 w := table.NewWriter(file, options) 49 if err := w.Set([]byte("apple"), []byte("red")); err != nil { 50 w.Close() 51 return err 52 } 53 if err := w.Set([]byte("banana"), []byte("yellow")); err != nil { 54 w.Close() 55 return err 56 } 57 if err := w.Set([]byte("cherry"), []byte("red")); err != nil { 58 w.Close() 59 return err 60 } 61 return w.Close() 62 */ 63 package table 64 65 /* 66 The table file format looks like: 67 68 <start_of_file> 69 [data block 0] 70 [data block 1] 71 ... 72 [data block N-1] 73 [meta block 0] 74 [meta block 1] 75 ... 76 [meta block K-1] 77 [metaindex block] 78 [index block] 79 [footer] 80 <end_of_file> 81 82 Each block consists of some data and a 5 byte trailer: a 1 byte block type and 83 a 4 byte checksum of the compressed data. The block type gives the per-block 84 compression used; each block is compressed independently. The checksum 85 algorithm is described in the leveldb/crc package. 86 87 The decompressed block data consists of a sequence of key/value entries 88 followed by a trailer. Each key is encoded as a shared prefix length and a 89 remainder string. For example, if two adjacent keys are "tweedledee" and 90 "tweedledum", then the second key would be encoded as {8, "um"}. The shared 91 prefix length is varint encoded. The remainder string and the value are 92 encoded as a varint-encoded length followed by the literal contents. To 93 continue the example, suppose that the key "tweedledum" mapped to the value 94 "socks". The encoded key/value entry would be: "\x08\x02\x05umsocks". 95 96 Every block has a restart interval I. Every I'th key/value entry in that block 97 is called a restart point, and shares no key prefix with the previous entry. 98 Continuing the example above, if the key after "tweedledum" was "two", but was 99 part of a restart point, then that key would be encoded as {0, "two"} instead 100 of {2, "o"}. If a block has P restart points, then the block trailer consists 101 of (P+1)*4 bytes: (P+1) little-endian uint32 values. The first P of these 102 uint32 values are the block offsets of each restart point. The final uint32 103 value is P itself. Thus, when seeking for a particular key, one can use binary 104 search to find the largest restart point whose key is <= the key sought. 105 106 An index block is a block with N key/value entries. The i'th value is the 107 encoded block handle of the i'th data block. The i'th key is a separator for 108 i < N-1, and a successor for i == N-1. The separator between blocks i and i+1 109 is a key that is >= every key in block i and is < every key i block i+1. The 110 successor for the final block is a key that is >= every key in block N-1. The 111 index block restart interval is 1: every entry is a restart point. 112 113 The table footer is exactly 48 bytes long: 114 - the block handle for the metaindex block, 115 - the block handle for the index block, 116 - padding to take the two items above up to 40 bytes, 117 - an 8-byte magic string. 118 119 A block handle is an offset and a length; the length does not include the 5 120 byte trailer. Both numbers are varint-encoded, with no padding between the two 121 values. The maximum size of an encoded block handle is therefore 20 bytes. 122 */ 123 124 const ( 125 blockTrailerLen = 5 126 footerLen = 48 127 128 magic = "\x57\xfb\x80\x8b\x24\x75\x47\xdb" 129 130 // The block type gives the per-block compression format. 131 // These constants are part of the file format and should not be changed. 132 // They are different from the db.Compression constants because the latter 133 // are designed so that the zero value of the db.Compression type means to 134 // use the default compression (which is snappy). 135 noCompressionBlockType = 0 136 snappyCompressionBlockType = 1 137 )