github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/third_party/code.google.com/p/leveldb-go/leveldb/table/table.go (about)

     1  // Copyright 2011 The LevelDB-Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  /*
     6  Package table implements readers and writers of leveldb tables.
     7  
     8  Tables are either opened for reading or created for writing but not both.
     9  
    10  A reader can create iterators, which yield all key/value pairs whose keys
    11  are 'greater than or equal' to a starting key. There may be multiple key/
    12  value pairs that have the same key.
    13  
    14  A reader can be used concurrently. Multiple goroutines can call Find
    15  concurrently, and each iterator can run concurrently with other iterators.
    16  However, any particular iterator should not be used concurrently, and
    17  iterators should not be used once a reader is closed.
    18  
    19  A writer writes key/value pairs in increasing key order, and cannot be used
    20  concurrently. A table cannot be read until the writer has finished.
    21  
    22  Readers and writers can be created with various options. Passing a nil
    23  Options pointer is valid and means to use the default values.
    24  
    25  One such option is to define the 'less than' ordering for keys. The default
    26  Comparer uses the natural ordering consistent with bytes.Compare. The same
    27  ordering should be used for reading and writing a table.
    28  
    29  To return the value for a key:
    30  
    31  	r := table.NewReader(file, options)
    32  	defer r.Close()
    33  	return r.Get(key)
    34  
    35  To count the number of entries in a table:
    36  
    37  	i, n := r.Find(nil), 0
    38  	for i.Next() {
    39  		n++
    40  	}
    41  	if err := i.Close(); err != nil {
    42  		return 0, err
    43  	}
    44  	return n, nil
    45  
    46  To write a table with three entries:
    47  
    48  	w := table.NewWriter(file, options)
    49  	if err := w.Set([]byte("apple"), []byte("red")); err != nil {
    50  		w.Close()
    51  		return err
    52  	}
    53  	if err := w.Set([]byte("banana"), []byte("yellow")); err != nil {
    54  		w.Close()
    55  		return err
    56  	}
    57  	if err := w.Set([]byte("cherry"), []byte("red")); err != nil {
    58  		w.Close()
    59  		return err
    60  	}
    61  	return w.Close()
    62  */
    63  package table
    64  
    65  /*
    66  The table file format looks like:
    67  
    68  <start_of_file>
    69  [data block 0]
    70  [data block 1]
    71  ...
    72  [data block N-1]
    73  [meta block 0]
    74  [meta block 1]
    75  ...
    76  [meta block K-1]
    77  [metaindex block]
    78  [index block]
    79  [footer]
    80  <end_of_file>
    81  
    82  Each block consists of some data and a 5 byte trailer: a 1 byte block type and
    83  a 4 byte checksum of the compressed data. The block type gives the per-block
    84  compression used; each block is compressed independently. The checksum
    85  algorithm is described in the leveldb/crc package.
    86  
    87  The decompressed block data consists of a sequence of key/value entries
    88  followed by a trailer. Each key is encoded as a shared prefix length and a
    89  remainder string. For example, if two adjacent keys are "tweedledee" and
    90  "tweedledum", then the second key would be encoded as {8, "um"}. The shared
    91  prefix length is varint encoded. The remainder string and the value are
    92  encoded as a varint-encoded length followed by the literal contents. To
    93  continue the example, suppose that the key "tweedledum" mapped to the value
    94  "socks". The encoded key/value entry would be: "\x08\x02\x05umsocks".
    95  
    96  Every block has a restart interval I. Every I'th key/value entry in that block
    97  is called a restart point, and shares no key prefix with the previous entry.
    98  Continuing the example above, if the key after "tweedledum" was "two", but was
    99  part of a restart point, then that key would be encoded as {0, "two"} instead
   100  of {2, "o"}. If a block has P restart points, then the block trailer consists
   101  of (P+1)*4 bytes: (P+1) little-endian uint32 values. The first P of these
   102  uint32 values are the block offsets of each restart point. The final uint32
   103  value is P itself. Thus, when seeking for a particular key, one can use binary
   104  search to find the largest restart point whose key is <= the key sought.
   105  
   106  An index block is a block with N key/value entries. The i'th value is the
   107  encoded block handle of the i'th data block. The i'th key is a separator for
   108  i < N-1, and a successor for i == N-1. The separator between blocks i and i+1
   109  is a key that is >= every key in block i and is < every key i block i+1. The
   110  successor for the final block is a key that is >= every key in block N-1. The
   111  index block restart interval is 1: every entry is a restart point.
   112  
   113  The table footer is exactly 48 bytes long:
   114    - the block handle for the metaindex block,
   115    - the block handle for the index block,
   116    - padding to take the two items above up to 40 bytes,
   117    - an 8-byte magic string.
   118  
   119  A block handle is an offset and a length; the length does not include the 5
   120  byte trailer. Both numbers are varint-encoded, with no padding between the two
   121  values. The maximum size of an encoded block handle is therefore 20 bytes.
   122  */
   123  
   124  const (
   125  	blockTrailerLen = 5
   126  	footerLen       = 48
   127  
   128  	magic = "\x57\xfb\x80\x8b\x24\x75\x47\xdb"
   129  
   130  	// The block type gives the per-block compression format.
   131  	// These constants are part of the file format and should not be changed.
   132  	// They are different from the db.Compression constants because the latter
   133  	// are designed so that the zero value of the db.Compression type means to
   134  	// use the default compression (which is snappy).
   135  	noCompressionBlockType     = 0
   136  	snappyCompressionBlockType = 1
   137  )