github.com/cellofellow/gopkg@v0.0.0-20140722061823-eec0544a62ad/database/leveldb.chai2010/src/block_builder.cc (about)

     1  // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style license that can be
     3  // found in the LICENSE file. See the AUTHORS file for names of contributors.
     4  //
     5  // BlockBuilder generates blocks where keys are prefix-compressed:
     6  //
     7  // When we store a key, we drop the prefix shared with the previous
     8  // string.  This helps reduce the space requirement significantly.
     9  // Furthermore, once every K keys, we do not apply the prefix
    10  // compression and store the entire key.  We call this a "restart
    11  // point".  The tail end of the block stores the offsets of all of the
    12  // restart points, and can be used to do a binary search when looking
    13  // for a particular key.  Values are stored as-is (without compression)
    14  // immediately following the corresponding key.
    15  //
    16  // An entry for a particular key-value pair has the form:
    17  //     shared_bytes: varint32
    18  //     unshared_bytes: varint32
    19  //     value_length: varint32
    20  //     key_delta: char[unshared_bytes]
    21  //     value: char[value_length]
    22  // shared_bytes == 0 for restart points.
    23  //
    24  // The trailer of the block has the form:
    25  //     restarts: uint32[num_restarts]
    26  //     num_restarts: uint32
    27  // restarts[i] contains the offset within the block of the ith restart point.
    28  
    29  #include "table/block_builder.h"
    30  
    31  #include <algorithm>
    32  #include <assert.h>
    33  #include "leveldb/comparator.h"
    34  #include "leveldb/table_builder.h"
    35  #include "util/coding.h"
    36  
    37  namespace leveldb {
    38  
    39  BlockBuilder::BlockBuilder(const Options* options)
    40      : options_(options),
    41        restarts_(),
    42        counter_(0),
    43        finished_(false) {
    44    assert(options->block_restart_interval >= 1);
    45    restarts_.push_back(0);       // First restart point is at offset 0
    46  }
    47  
    48  void BlockBuilder::Reset() {
    49    buffer_.clear();
    50    restarts_.clear();
    51    restarts_.push_back(0);       // First restart point is at offset 0
    52    counter_ = 0;
    53    finished_ = false;
    54    last_key_.clear();
    55  }
    56  
    57  size_t BlockBuilder::CurrentSizeEstimate() const {
    58    return (buffer_.size() +                        // Raw data buffer
    59            restarts_.size() * sizeof(uint32_t) +   // Restart array
    60            sizeof(uint32_t));                      // Restart array length
    61  }
    62  
    63  Slice BlockBuilder::Finish() {
    64    // Append restart array
    65    for (size_t i = 0; i < restarts_.size(); i++) {
    66      PutFixed32(&buffer_, restarts_[i]);
    67    }
    68    PutFixed32(&buffer_, restarts_.size());
    69    finished_ = true;
    70    return Slice(buffer_);
    71  }
    72  
    73  void BlockBuilder::Add(const Slice& key, const Slice& value) {
    74    Slice last_key_piece(last_key_);
    75    assert(!finished_);
    76    assert(counter_ <= options_->block_restart_interval);
    77    assert(buffer_.empty() // No values yet?
    78           || options_->comparator->Compare(key, last_key_piece) > 0);
    79    size_t shared = 0;
    80    if (counter_ < options_->block_restart_interval) {
    81      // See how much sharing to do with previous string
    82      const size_t min_length = std::min(last_key_piece.size(), key.size());
    83      while ((shared < min_length) && (last_key_piece[shared] == key[shared])) {
    84        shared++;
    85      }
    86    } else {
    87      // Restart compression
    88      restarts_.push_back(buffer_.size());
    89      counter_ = 0;
    90    }
    91    const size_t non_shared = key.size() - shared;
    92  
    93    // Add "<shared><non_shared><value_size>" to buffer_
    94    PutVarint32(&buffer_, shared);
    95    PutVarint32(&buffer_, non_shared);
    96    PutVarint32(&buffer_, value.size());
    97  
    98    // Add string delta to buffer_ followed by value
    99    buffer_.append(key.data() + shared, non_shared);
   100    buffer_.append(value.data(), value.size());
   101  
   102    // Update state
   103    last_key_.resize(shared);
   104    last_key_.append(key.data() + shared, non_shared);
   105    assert(Slice(last_key_) == key);
   106    counter_++;
   107  }
   108  
   109  }  // namespace leveldb