github.com/cellofellow/gopkg@v0.0.0-20140722061823-eec0544a62ad/database/leveldb.chai2010/src/version_set.h (about)

     1  // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style license that can be
     3  // found in the LICENSE file. See the AUTHORS file for names of contributors.
     4  //
     5  // The representation of a DBImpl consists of a set of Versions.  The
     6  // newest version is called "current".  Older versions may be kept
     7  // around to provide a consistent view to live iterators.
     8  //
     9  // Each Version keeps track of a set of Table files per level.  The
    10  // entire set of versions is maintained in a VersionSet.
    11  //
    12  // Version,VersionSet are thread-compatible, but require external
    13  // synchronization on all accesses.
    14  
    15  #ifndef STORAGE_LEVELDB_DB_VERSION_SET_H_
    16  #define STORAGE_LEVELDB_DB_VERSION_SET_H_
    17  
    18  #include <map>
    19  #include <set>
    20  #include <vector>
    21  #include "db/dbformat.h"
    22  #include "db/version_edit.h"
    23  #include "port/port.h"
    24  #include "port/thread_annotations.h"
    25  
    26  namespace leveldb {
    27  
    28  namespace log { class Writer; }
    29  
    30  class Compaction;
    31  class Iterator;
    32  class MemTable;
    33  class TableBuilder;
    34  class TableCache;
    35  class Version;
    36  class VersionSet;
    37  class WritableFile;
    38  
    39  // Return the smallest index i such that files[i]->largest >= key.
    40  // Return files.size() if there is no such file.
    41  // REQUIRES: "files" contains a sorted list of non-overlapping files.
    42  extern int FindFile(const InternalKeyComparator& icmp,
    43                      const std::vector<FileMetaData*>& files,
    44                      const Slice& key);
    45  
    46  // Returns true iff some file in "files" overlaps the user key range
    47  // [*smallest,*largest].
    48  // smallest==NULL represents a key smaller than all keys in the DB.
    49  // largest==NULL represents a key largest than all keys in the DB.
    50  // REQUIRES: If disjoint_sorted_files, files[] contains disjoint ranges
    51  //           in sorted order.
    52  extern bool SomeFileOverlapsRange(
    53      const InternalKeyComparator& icmp,
    54      bool disjoint_sorted_files,
    55      const std::vector<FileMetaData*>& files,
    56      const Slice* smallest_user_key,
    57      const Slice* largest_user_key);
    58  
    59  class Version {
    60   public:
    61    // Append to *iters a sequence of iterators that will
    62    // yield the contents of this Version when merged together.
    63    // REQUIRES: This version has been saved (see VersionSet::SaveTo)
    64    void AddIterators(const ReadOptions&, std::vector<Iterator*>* iters);
    65  
    66    // Lookup the value for key.  If found, store it in *val and
    67    // return OK.  Else return a non-OK status.  Fills *stats.
    68    // REQUIRES: lock is not held
    69    struct GetStats {
    70      FileMetaData* seek_file;
    71      int seek_file_level;
    72    };
    73    Status Get(const ReadOptions&, const LookupKey& key, std::string* val,
    74               GetStats* stats);
    75  
    76    // Adds "stats" into the current state.  Returns true if a new
    77    // compaction may need to be triggered, false otherwise.
    78    // REQUIRES: lock is held
    79    bool UpdateStats(const GetStats& stats);
    80  
    81    // Record a sample of bytes read at the specified internal key.
    82    // Samples are taken approximately once every config::kReadBytesPeriod
    83    // bytes.  Returns true if a new compaction may need to be triggered.
    84    // REQUIRES: lock is held
    85    bool RecordReadSample(Slice key);
    86  
    87    // Reference count management (so Versions do not disappear out from
    88    // under live iterators)
    89    void Ref();
    90    void Unref();
    91  
    92    void GetOverlappingInputs(
    93        int level,
    94        const InternalKey* begin,         // NULL means before all keys
    95        const InternalKey* end,           // NULL means after all keys
    96        std::vector<FileMetaData*>* inputs);
    97  
    98    // Returns true iff some file in the specified level overlaps
    99    // some part of [*smallest_user_key,*largest_user_key].
   100    // smallest_user_key==NULL represents a key smaller than all keys in the DB.
   101    // largest_user_key==NULL represents a key largest than all keys in the DB.
   102    bool OverlapInLevel(int level,
   103                        const Slice* smallest_user_key,
   104                        const Slice* largest_user_key);
   105  
   106    // Return the level at which we should place a new memtable compaction
   107    // result that covers the range [smallest_user_key,largest_user_key].
   108    int PickLevelForMemTableOutput(const Slice& smallest_user_key,
   109                                   const Slice& largest_user_key);
   110  
   111    int NumFiles(int level) const { return files_[level].size(); }
   112  
   113    // Return a human readable string that describes this version's contents.
   114    std::string DebugString() const;
   115  
   116   private:
   117    friend class Compaction;
   118    friend class VersionSet;
   119  
   120    class LevelFileNumIterator;
   121    Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const;
   122  
   123    // Call func(arg, level, f) for every file that overlaps user_key in
   124    // order from newest to oldest.  If an invocation of func returns
   125    // false, makes no more calls.
   126    //
   127    // REQUIRES: user portion of internal_key == user_key.
   128    void ForEachOverlapping(Slice user_key, Slice internal_key,
   129                            void* arg,
   130                            bool (*func)(void*, int, FileMetaData*));
   131  
   132    VersionSet* vset_;            // VersionSet to which this Version belongs
   133    Version* next_;               // Next version in linked list
   134    Version* prev_;               // Previous version in linked list
   135    int refs_;                    // Number of live refs to this version
   136  
   137    // List of files per level
   138    std::vector<FileMetaData*> files_[config::kNumLevels];
   139  
   140    // Next file to compact based on seek stats.
   141    FileMetaData* file_to_compact_;
   142    int file_to_compact_level_;
   143  
   144    // Level that should be compacted next and its compaction score.
   145    // Score < 1 means compaction is not strictly needed.  These fields
   146    // are initialized by Finalize().
   147    double compaction_score_;
   148    int compaction_level_;
   149  
   150    explicit Version(VersionSet* vset)
   151        : vset_(vset), next_(this), prev_(this), refs_(0),
   152          file_to_compact_(NULL),
   153          file_to_compact_level_(-1),
   154          compaction_score_(-1),
   155          compaction_level_(-1) {
   156    }
   157  
   158    ~Version();
   159  
   160    // No copying allowed
   161    Version(const Version&);
   162    void operator=(const Version&);
   163  };
   164  
   165  class VersionSet {
   166   public:
   167    VersionSet(const std::string& dbname,
   168               const Options* options,
   169               TableCache* table_cache,
   170               const InternalKeyComparator*);
   171    ~VersionSet();
   172  
   173    // Apply *edit to the current version to form a new descriptor that
   174    // is both saved to persistent state and installed as the new
   175    // current version.  Will release *mu while actually writing to the file.
   176    // REQUIRES: *mu is held on entry.
   177    // REQUIRES: no other thread concurrently calls LogAndApply()
   178    Status LogAndApply(VersionEdit* edit, port::Mutex* mu)
   179        EXCLUSIVE_LOCKS_REQUIRED(mu);
   180  
   181    // Recover the last saved descriptor from persistent storage.
   182    Status Recover();
   183  
   184    // Return the current version.
   185    Version* current() const { return current_; }
   186  
   187    // Return the current manifest file number
   188    uint64_t ManifestFileNumber() const { return manifest_file_number_; }
   189  
   190    // Allocate and return a new file number
   191    uint64_t NewFileNumber() { return next_file_number_++; }
   192  
   193    // Arrange to reuse "file_number" unless a newer file number has
   194    // already been allocated.
   195    // REQUIRES: "file_number" was returned by a call to NewFileNumber().
   196    void ReuseFileNumber(uint64_t file_number) {
   197      if (next_file_number_ == file_number + 1) {
   198        next_file_number_ = file_number;
   199      }
   200    }
   201  
   202    // Return the number of Table files at the specified level.
   203    int NumLevelFiles(int level) const;
   204  
   205    // Return the combined file size of all files at the specified level.
   206    int64_t NumLevelBytes(int level) const;
   207  
   208    // Return the last sequence number.
   209    uint64_t LastSequence() const { return last_sequence_; }
   210  
   211    // Set the last sequence number to s.
   212    void SetLastSequence(uint64_t s) {
   213      assert(s >= last_sequence_);
   214      last_sequence_ = s;
   215    }
   216  
   217    // Mark the specified file number as used.
   218    void MarkFileNumberUsed(uint64_t number);
   219  
   220    // Return the current log file number.
   221    uint64_t LogNumber() const { return log_number_; }
   222  
   223    // Return the log file number for the log file that is currently
   224    // being compacted, or zero if there is no such log file.
   225    uint64_t PrevLogNumber() const { return prev_log_number_; }
   226  
   227    // Pick level and inputs for a new compaction.
   228    // Returns NULL if there is no compaction to be done.
   229    // Otherwise returns a pointer to a heap-allocated object that
   230    // describes the compaction.  Caller should delete the result.
   231    Compaction* PickCompaction();
   232  
   233    // Return a compaction object for compacting the range [begin,end] in
   234    // the specified level.  Returns NULL if there is nothing in that
   235    // level that overlaps the specified range.  Caller should delete
   236    // the result.
   237    Compaction* CompactRange(
   238        int level,
   239        const InternalKey* begin,
   240        const InternalKey* end);
   241  
   242    // Return the maximum overlapping data (in bytes) at next level for any
   243    // file at a level >= 1.
   244    int64_t MaxNextLevelOverlappingBytes();
   245  
   246    // Create an iterator that reads over the compaction inputs for "*c".
   247    // The caller should delete the iterator when no longer needed.
   248    Iterator* MakeInputIterator(Compaction* c);
   249  
   250    // Returns true iff some level needs a compaction.
   251    bool NeedsCompaction() const {
   252      Version* v = current_;
   253      return (v->compaction_score_ >= 1) || (v->file_to_compact_ != NULL);
   254    }
   255  
   256    // Add all files listed in any live version to *live.
   257    // May also mutate some internal state.
   258    void AddLiveFiles(std::set<uint64_t>* live);
   259  
   260    // Return the approximate offset in the database of the data for
   261    // "key" as of version "v".
   262    uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key);
   263  
   264    // Return a human-readable short (single-line) summary of the number
   265    // of files per level.  Uses *scratch as backing store.
   266    struct LevelSummaryStorage {
   267      char buffer[100];
   268    };
   269    const char* LevelSummary(LevelSummaryStorage* scratch) const;
   270  
   271   private:
   272    class Builder;
   273  
   274    friend class Compaction;
   275    friend class Version;
   276  
   277    void Finalize(Version* v);
   278  
   279    void GetRange(const std::vector<FileMetaData*>& inputs,
   280                  InternalKey* smallest,
   281                  InternalKey* largest);
   282  
   283    void GetRange2(const std::vector<FileMetaData*>& inputs1,
   284                   const std::vector<FileMetaData*>& inputs2,
   285                   InternalKey* smallest,
   286                   InternalKey* largest);
   287  
   288    void SetupOtherInputs(Compaction* c);
   289  
   290    // Save current contents to *log
   291    Status WriteSnapshot(log::Writer* log);
   292  
   293    void AppendVersion(Version* v);
   294  
   295    bool ManifestContains(const std::string& record) const;
   296  
   297    Env* const env_;
   298    const std::string dbname_;
   299    const Options* const options_;
   300    TableCache* const table_cache_;
   301    const InternalKeyComparator icmp_;
   302    uint64_t next_file_number_;
   303    uint64_t manifest_file_number_;
   304    uint64_t last_sequence_;
   305    uint64_t log_number_;
   306    uint64_t prev_log_number_;  // 0 or backing store for memtable being compacted
   307  
   308    // Opened lazily
   309    WritableFile* descriptor_file_;
   310    log::Writer* descriptor_log_;
   311    Version dummy_versions_;  // Head of circular doubly-linked list of versions.
   312    Version* current_;        // == dummy_versions_.prev_
   313  
   314    // Per-level key at which the next compaction at that level should start.
   315    // Either an empty string, or a valid InternalKey.
   316    std::string compact_pointer_[config::kNumLevels];
   317  
   318    // No copying allowed
   319    VersionSet(const VersionSet&);
   320    void operator=(const VersionSet&);
   321  };
   322  
   323  // A Compaction encapsulates information about a compaction.
   324  class Compaction {
   325   public:
   326    ~Compaction();
   327  
   328    // Return the level that is being compacted.  Inputs from "level"
   329    // and "level+1" will be merged to produce a set of "level+1" files.
   330    int level() const { return level_; }
   331  
   332    // Return the object that holds the edits to the descriptor done
   333    // by this compaction.
   334    VersionEdit* edit() { return &edit_; }
   335  
   336    // "which" must be either 0 or 1
   337    int num_input_files(int which) const { return inputs_[which].size(); }
   338  
   339    // Return the ith input file at "level()+which" ("which" must be 0 or 1).
   340    FileMetaData* input(int which, int i) const { return inputs_[which][i]; }
   341  
   342    // Maximum size of files to build during this compaction.
   343    uint64_t MaxOutputFileSize() const { return max_output_file_size_; }
   344  
   345    // Is this a trivial compaction that can be implemented by just
   346    // moving a single input file to the next level (no merging or splitting)
   347    bool IsTrivialMove() const;
   348  
   349    // Add all inputs to this compaction as delete operations to *edit.
   350    void AddInputDeletions(VersionEdit* edit);
   351  
   352    // Returns true if the information we have available guarantees that
   353    // the compaction is producing data in "level+1" for which no data exists
   354    // in levels greater than "level+1".
   355    bool IsBaseLevelForKey(const Slice& user_key);
   356  
   357    // Returns true iff we should stop building the current output
   358    // before processing "internal_key".
   359    bool ShouldStopBefore(const Slice& internal_key);
   360  
   361    // Release the input version for the compaction, once the compaction
   362    // is successful.
   363    void ReleaseInputs();
   364  
   365   private:
   366    friend class Version;
   367    friend class VersionSet;
   368  
   369    explicit Compaction(int level);
   370  
   371    int level_;
   372    uint64_t max_output_file_size_;
   373    Version* input_version_;
   374    VersionEdit edit_;
   375  
   376    // Each compaction reads inputs from "level_" and "level_+1"
   377    std::vector<FileMetaData*> inputs_[2];      // The two sets of inputs
   378  
   379    // State used to check for number of of overlapping grandparent files
   380    // (parent == level_ + 1, grandparent == level_ + 2)
   381    std::vector<FileMetaData*> grandparents_;
   382    size_t grandparent_index_;  // Index in grandparent_starts_
   383    bool seen_key_;             // Some output key has been seen
   384    int64_t overlapped_bytes_;  // Bytes of overlap between current output
   385                                // and grandparent files
   386  
   387    // State for implementing IsBaseLevelForKey
   388  
   389    // level_ptrs_ holds indices into input_version_->levels_: our state
   390    // is that we are positioned at one of the file ranges for each
   391    // higher level than the ones involved in this compaction (i.e. for
   392    // all L >= level_ + 2).
   393    size_t level_ptrs_[config::kNumLevels];
   394  };
   395  
   396  }  // namespace leveldb
   397  
   398  #endif  // STORAGE_LEVELDB_DB_VERSION_SET_H_