
     1  // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     5  // +build rocksdb
     7  package main
     9  import (
    10  	"log"
    11  	"strconv"
    12  	"strings"
    14  	""
    15  	""
    16  	""
    17  	""
    18  )
    20  // Adapters for rocksDB
    21  type rocksDB struct {
    22  	d       *engine.RocksDB
    23  	ballast []byte
    24  }
    26  func newRocksDB(dir string) DB {
    27  	// TODO: match Pebble / Rocks options
    28  	r, err := engine.NewRocksDB(
    29  		engine.RocksDBConfig{
    30  			Dir: dir,
    31  		},
    32  		engine.NewRocksDBCache(cacheSize),
    33  	)
    34  	if err != nil {
    35  		log.Fatal(err)
    36  	}
    37  	return rocksDB{
    38  		d:       r,
    39  		ballast: make([]byte, 1<<30),
    40  	}
    41  }
    43  type rocksDBIterator struct {
    44  	iter       engine.Iterator
    45  	lowerBound []byte
    46  	upperBound []byte
    47  }
    49  type rocksDBBatch struct {
    50  	batch engine.Batch
    51  }
    53  func (i rocksDBIterator) SeekGE(key []byte) bool {
    54  	// TODO: unnecessary overhead here. Change the interface.
    55  	userKey, _, ok := mvccSplitKey(key)
    56  	if !ok {
    57  		panic("mvccSplitKey failed")
    58  	}
    59  	i.iter.Seek(engine.MVCCKey{
    60  		Key: userKey,
    61  	})
    62  	return i.Valid()
    63  }
    65  func (i rocksDBIterator) Valid() bool {
    66  	valid, _ := i.iter.Valid()
    67  	return valid
    68  }
    70  func (i rocksDBIterator) Key() []byte {
    71  	key := i.iter.Key()
    72  	return []byte(key.Key)
    73  }
    75  func (i rocksDBIterator) Value() []byte {
    76  	return i.iter.Value()
    77  }
    79  func (i rocksDBIterator) First() bool {
    80  	return i.SeekGE(i.lowerBound)
    81  }
    83  func (i rocksDBIterator) Next() bool {
    84  	i.iter.Next()
    85  	valid, _ := i.iter.Valid()
    86  	return valid
    87  }
    89  func (i rocksDBIterator) Last() bool {
    90  	// TODO: unnecessary overhead here. Change the interface.
    91  	userKey, _, ok := mvccSplitKey(i.upperBound)
    92  	if !ok {
    93  		panic("mvccSplitKey failed")
    94  	}
    95  	i.iter.SeekReverse(engine.MVCCKey{
    96  		Key: userKey,
    97  	})
    98  	return i.Valid()
    99  }
   101  func (i rocksDBIterator) Prev() bool {
   102  	i.iter.Prev()
   103  	valid, _ := i.iter.Valid()
   104  	return valid
   105  }
   107  func (i rocksDBIterator) Close() error {
   108  	i.iter.Close()
   109  	return nil
   110  }
   112  func (b rocksDBBatch) Commit(opts *pebble.WriteOptions) error {
   113  	return b.batch.Commit(opts.Sync)
   114  }
   116  func (b rocksDBBatch) Set(key, value []byte, _ *pebble.WriteOptions) error {
   117  	// TODO: unnecessary overhead here. Change the interface.
   118  	userKey, _, ok := mvccSplitKey(key)
   119  	if !ok {
   120  		panic("mvccSplitKey failed")
   121  	}
   122  	ts := hlc.Timestamp{WallTime: 1}
   123  	return b.batch.Put(engine.MVCCKey{Key: userKey, Timestamp: ts}, value)
   124  }
   126  func (b rocksDBBatch) LogData(data []byte, _ *pebble.WriteOptions) error {
   127  	return b.batch.LogData(data)
   128  }
   130  func (b rocksDBBatch) Repr() []byte {
   131  	return b.batch.Repr()
   132  }
   134  func (r rocksDB) Flush() error {
   135  	return r.d.Flush()
   136  }
   138  func (r rocksDB) NewIter(opts *pebble.IterOptions) iterator {
   139  	ropts := engine.IterOptions{}
   140  	if opts != nil {
   141  		ropts.LowerBound = opts.LowerBound
   142  		ropts.UpperBound = opts.UpperBound
   143  	} else {
   144  		ropts.UpperBound = roachpb.KeyMax
   145  	}
   146  	iter := r.d.NewIterator(ropts)
   147  	return rocksDBIterator{
   148  		iter:       iter,
   149  		lowerBound: ropts.LowerBound,
   150  		upperBound: ropts.UpperBound,
   151  	}
   152  }
   154  func (r rocksDB) NewBatch() batch {
   155  	return rocksDBBatch{r.d.NewBatch()}
   156  }
   158  func (r rocksDB) Scan(key []byte, count int64, reverse bool) error {
   159  	// TODO: unnecessary overhead here. Change the interface.
   160  	beginKey, _, ok := mvccSplitKey(key)
   161  	if !ok {
   162  		panic("mvccSplitKey failed")
   163  	}
   164  	endKey := roachpb.KeyMax
   165  	ropts := engine.IterOptions{
   166  		LowerBound: key,
   167  	}
   168  	if reverse {
   169  		endKey = beginKey
   170  		beginKey = roachpb.KeyMin
   171  		ropts.UpperBound = key
   172  		ropts.LowerBound = nil
   173  	}
   175  	iter := r.d.NewIterator(ropts)
   176  	defer iter.Close()
   177  	// We hard code a timestamp with walltime=1 in the data, so we just have to
   178  	// use a larger timestamp here (walltime=10).
   179  	ts := hlc.Timestamp{WallTime: 10}
   180  	_, numKVs, _, intents, err := iter.MVCCScan(
   181  		beginKey, endKey, count, ts, engine.MVCCScanOptions{Reverse: reverse},
   182  	)
   183  	if numKVs > count {
   184  		panic("MVCCScan returned too many keys")
   185  	}
   186  	if len(intents) > 0 {
   187  		panic("MVCCScan found intents")
   188  	}
   189  	return err
   190  }
   192  func (r rocksDB) Metrics() *pebble.VersionMetrics {
   193  	stats := r.d.GetCompactionStats()
   194  	var inLevelsSection bool
   195  	var vMetrics pebble.VersionMetrics
   196  	for _, line := range strings.Split(stats, "\n") {
   197  		if strings.HasPrefix(line, "-----") {
   198  			continue
   199  		}
   200  		if !inLevelsSection && strings.HasPrefix(line, "Level") {
   201  			inLevelsSection = true
   202  			continue
   203  		}
   204  		if strings.HasPrefix(line, "Flush(GB):") {
   205  			// line looks like:
   206  			// "Flush(GB): cumulative 0.302, interval 0.302"
   207  			// pretend cumulative flush is WAL size and L0 input since we don't have
   208  			// access to WAL stats in rocks.
   209  			// TODO: this is slightly different than Pebble which uses the real physical
   210  			// WAL size. This way prevents compression ratio from affecting write-amp,
   211  			// but it also prevents apples-to-apples w-amp comparison.
   212  			fields := strings.Fields(line)
   213  			field := fields[2]
   214  			walWrittenGB, _ := strconv.ParseFloat(field[0:len(field)-1], 64)
   215  			vMetrics.Levels[0].BytesIn = uint64(1024.0 * 1024.0 * 1024.0 * walWrittenGB)
   216  			vMetrics.WAL.BytesWritten = vMetrics.Levels[0].BytesIn
   217  		}
   218  		if inLevelsSection && strings.HasPrefix(line, " Sum") {
   219  			inLevelsSection = false
   220  			continue
   221  		}
   222  		if inLevelsSection {
   223  			fields := strings.Fields(line)
   224  			level, _ := strconv.Atoi(fields[0][1:])
   225  			if level < 0 || level > 6 {
   226  				panic("expected at most 7 levels")
   227  			}
   228  			vMetrics.Levels[level].NumFiles, _ = strconv.ParseInt(strings.Split(fields[1], "/")[0], 10, 64)
   229  			size, _ := strconv.ParseFloat(fields[2], 64)
   230  			if fields[3] == "KB" {
   231  				size *= 1024.0
   232  			} else if fields[3] == "MB" {
   233  				size *= 1024.0 * 1024.0
   234  			} else if fields[3] == "GB" {
   235  				size *= 1024.0 * 1024.0 * 1024.0
   236  			} else {
   237  				panic("unknown unit")
   238  			}
   239  			vMetrics.Levels[level].Size = uint64(size)
   240  			vMetrics.Levels[level].Score, _ = strconv.ParseFloat(fields[4], 64)
   241  			if level > 0 {
   242  				bytesInGB, _ := strconv.ParseFloat(fields[6], 64)
   243  				vMetrics.Levels[level].BytesIn = uint64(1024.0 * 1024.0 * 1024.0 * bytesInGB)
   244  			}
   245  			bytesMovedGB, _ := strconv.ParseFloat(fields[10], 64)
   246  			vMetrics.Levels[level].BytesMoved = uint64(1024.0 * 1024.0 * 1024.0 * bytesMovedGB)
   247  			bytesReadGB, _ := strconv.ParseFloat(fields[5], 64)
   248  			vMetrics.Levels[level].BytesRead = uint64(1024.0 * 1024.0 * 1024.0 * bytesReadGB)
   249  			bytesWrittenGB, _ := strconv.ParseFloat(fields[8], 64)
   250  			vMetrics.Levels[level].BytesWritten = uint64(1024.0 * 1024.0 * 1024.0 * bytesWrittenGB)
   251  		}
   252  	}
   253  	return &vMetrics
   254  }