github.com/cellofellow/gopkg@v0.0.0-20140722061823-eec0544a62ad/database/leveldb.chai2010/src/bloom.cc (about)

     1  // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style license that can be
     3  // found in the LICENSE file. See the AUTHORS file for names of contributors.
     4  
     5  #include "leveldb/filter_policy.h"
     6  
     7  #include "leveldb/slice.h"
     8  #include "util/hash.h"
     9  
    10  namespace leveldb {
    11  
    12  namespace {
    13  static uint32_t BloomHash(const Slice& key) {
    14    return Hash(key.data(), key.size(), 0xbc9f1d34);
    15  }
    16  
    17  class BloomFilterPolicy : public FilterPolicy {
    18   private:
    19    size_t bits_per_key_;
    20    size_t k_;
    21  
    22   public:
    23    explicit BloomFilterPolicy(int bits_per_key)
    24        : bits_per_key_(bits_per_key) {
    25      // We intentionally round down to reduce probing cost a little bit
    26      k_ = static_cast<size_t>(bits_per_key * 0.69);  // 0.69 =~ ln(2)
    27      if (k_ < 1) k_ = 1;
    28      if (k_ > 30) k_ = 30;
    29    }
    30  
    31    virtual const char* Name() const {
    32      return "leveldb.BuiltinBloomFilter";
    33    }
    34  
    35    virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
    36      // Compute bloom filter size (in both bits and bytes)
    37      size_t bits = n * bits_per_key_;
    38  
    39      // For small n, we can see a very high false positive rate.  Fix it
    40      // by enforcing a minimum bloom filter length.
    41      if (bits < 64) bits = 64;
    42  
    43      size_t bytes = (bits + 7) / 8;
    44      bits = bytes * 8;
    45  
    46      const size_t init_size = dst->size();
    47      dst->resize(init_size + bytes, 0);
    48      dst->push_back(static_cast<char>(k_));  // Remember # of probes in filter
    49      char* array = &(*dst)[init_size];
    50      for (size_t i = 0; i < n; i++) {
    51        // Use double-hashing to generate a sequence of hash values.
    52        // See analysis in [Kirsch,Mitzenmacher 2006].
    53        uint32_t h = BloomHash(keys[i]);
    54        const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
    55        for (size_t j = 0; j < k_; j++) {
    56          const uint32_t bitpos = h % bits;
    57          array[bitpos/8] |= (1 << (bitpos % 8));
    58          h += delta;
    59        }
    60      }
    61    }
    62  
    63    virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
    64      const size_t len = bloom_filter.size();
    65      if (len < 2) return false;
    66  
    67      const char* array = bloom_filter.data();
    68      const size_t bits = (len - 1) * 8;
    69  
    70      // Use the encoded k so that we can read filters generated by
    71      // bloom filters created using different parameters.
    72      const size_t k = array[len-1];
    73      if (k > 30) {
    74        // Reserved for potentially new encodings for short bloom filters.
    75        // Consider it a match.
    76        return true;
    77      }
    78  
    79      uint32_t h = BloomHash(key);
    80      const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
    81      for (size_t j = 0; j < k; j++) {
    82        const uint32_t bitpos = h % bits;
    83        if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
    84        h += delta;
    85      }
    86      return true;
    87    }
    88  };
    89  }
    90  
    91  const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
    92    return new BloomFilterPolicy(bits_per_key);
    93  }
    94  
    95  }  // namespace leveldb