github.com/petermattis/pebble@v0.0.0-20190905164901-ab51a2166067/cmd/pebble/rocksdb.go (about) 1 // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 // +build rocksdb 6 7 package main 8 9 import ( 10 "log" 11 "strconv" 12 "strings" 13 14 "github.com/cockroachdb/cockroach/pkg/roachpb" 15 "github.com/cockroachdb/cockroach/pkg/storage/engine" 16 "github.com/cockroachdb/cockroach/pkg/util/hlc" 17 "github.com/petermattis/pebble" 18 ) 19 20 // Adapters for rocksDB 21 type rocksDB struct { 22 d *engine.RocksDB 23 ballast []byte 24 } 25 26 func newRocksDB(dir string) DB { 27 // TODO: match Pebble / Rocks options 28 r, err := engine.NewRocksDB( 29 engine.RocksDBConfig{ 30 Dir: dir, 31 }, 32 engine.NewRocksDBCache(cacheSize), 33 ) 34 if err != nil { 35 log.Fatal(err) 36 } 37 return rocksDB{ 38 d: r, 39 ballast: make([]byte, 1<<30), 40 } 41 } 42 43 type rocksDBIterator struct { 44 iter engine.Iterator 45 lowerBound []byte 46 upperBound []byte 47 } 48 49 type rocksDBBatch struct { 50 batch engine.Batch 51 } 52 53 func (i rocksDBIterator) SeekGE(key []byte) bool { 54 // TODO: unnecessary overhead here. Change the interface. 55 userKey, _, ok := mvccSplitKey(key) 56 if !ok { 57 panic("mvccSplitKey failed") 58 } 59 i.iter.Seek(engine.MVCCKey{ 60 Key: userKey, 61 }) 62 return i.Valid() 63 } 64 65 func (i rocksDBIterator) Valid() bool { 66 valid, _ := i.iter.Valid() 67 return valid 68 } 69 70 func (i rocksDBIterator) Key() []byte { 71 key := i.iter.Key() 72 return []byte(key.Key) 73 } 74 75 func (i rocksDBIterator) Value() []byte { 76 return i.iter.Value() 77 } 78 79 func (i rocksDBIterator) First() bool { 80 return i.SeekGE(i.lowerBound) 81 } 82 83 func (i rocksDBIterator) Next() bool { 84 i.iter.Next() 85 valid, _ := i.iter.Valid() 86 return valid 87 } 88 89 func (i rocksDBIterator) Last() bool { 90 // TODO: unnecessary overhead here. Change the interface. 91 userKey, _, ok := mvccSplitKey(i.upperBound) 92 if !ok { 93 panic("mvccSplitKey failed") 94 } 95 i.iter.SeekReverse(engine.MVCCKey{ 96 Key: userKey, 97 }) 98 return i.Valid() 99 } 100 101 func (i rocksDBIterator) Prev() bool { 102 i.iter.Prev() 103 valid, _ := i.iter.Valid() 104 return valid 105 } 106 107 func (i rocksDBIterator) Close() error { 108 i.iter.Close() 109 return nil 110 } 111 112 func (b rocksDBBatch) Commit(opts *pebble.WriteOptions) error { 113 return b.batch.Commit(opts.Sync) 114 } 115 116 func (b rocksDBBatch) Set(key, value []byte, _ *pebble.WriteOptions) error { 117 // TODO: unnecessary overhead here. Change the interface. 118 userKey, _, ok := mvccSplitKey(key) 119 if !ok { 120 panic("mvccSplitKey failed") 121 } 122 ts := hlc.Timestamp{WallTime: 1} 123 return b.batch.Put(engine.MVCCKey{Key: userKey, Timestamp: ts}, value) 124 } 125 126 func (b rocksDBBatch) LogData(data []byte, _ *pebble.WriteOptions) error { 127 return b.batch.LogData(data) 128 } 129 130 func (b rocksDBBatch) Repr() []byte { 131 return b.batch.Repr() 132 } 133 134 func (r rocksDB) Flush() error { 135 return r.d.Flush() 136 } 137 138 func (r rocksDB) NewIter(opts *pebble.IterOptions) iterator { 139 ropts := engine.IterOptions{} 140 if opts != nil { 141 ropts.LowerBound = opts.LowerBound 142 ropts.UpperBound = opts.UpperBound 143 } else { 144 ropts.UpperBound = roachpb.KeyMax 145 } 146 iter := r.d.NewIterator(ropts) 147 return rocksDBIterator{ 148 iter: iter, 149 lowerBound: ropts.LowerBound, 150 upperBound: ropts.UpperBound, 151 } 152 } 153 154 func (r rocksDB) NewBatch() batch { 155 return rocksDBBatch{r.d.NewBatch()} 156 } 157 158 func (r rocksDB) Scan(key []byte, count int64, reverse bool) error { 159 // TODO: unnecessary overhead here. Change the interface. 160 beginKey, _, ok := mvccSplitKey(key) 161 if !ok { 162 panic("mvccSplitKey failed") 163 } 164 endKey := roachpb.KeyMax 165 ropts := engine.IterOptions{ 166 LowerBound: key, 167 } 168 if reverse { 169 endKey = beginKey 170 beginKey = roachpb.KeyMin 171 ropts.UpperBound = key 172 ropts.LowerBound = nil 173 } 174 175 iter := r.d.NewIterator(ropts) 176 defer iter.Close() 177 // We hard code a timestamp with walltime=1 in the data, so we just have to 178 // use a larger timestamp here (walltime=10). 179 ts := hlc.Timestamp{WallTime: 10} 180 _, numKVs, _, intents, err := iter.MVCCScan( 181 beginKey, endKey, count, ts, engine.MVCCScanOptions{Reverse: reverse}, 182 ) 183 if numKVs > count { 184 panic("MVCCScan returned too many keys") 185 } 186 if len(intents) > 0 { 187 panic("MVCCScan found intents") 188 } 189 return err 190 } 191 192 func (r rocksDB) Metrics() *pebble.VersionMetrics { 193 stats := r.d.GetCompactionStats() 194 var inLevelsSection bool 195 var vMetrics pebble.VersionMetrics 196 for _, line := range strings.Split(stats, "\n") { 197 if strings.HasPrefix(line, "-----") { 198 continue 199 } 200 if !inLevelsSection && strings.HasPrefix(line, "Level") { 201 inLevelsSection = true 202 continue 203 } 204 if strings.HasPrefix(line, "Flush(GB):") { 205 // line looks like: 206 // "Flush(GB): cumulative 0.302, interval 0.302" 207 // pretend cumulative flush is WAL size and L0 input since we don't have 208 // access to WAL stats in rocks. 209 // TODO: this is slightly different than Pebble which uses the real physical 210 // WAL size. This way prevents compression ratio from affecting write-amp, 211 // but it also prevents apples-to-apples w-amp comparison. 212 fields := strings.Fields(line) 213 field := fields[2] 214 walWrittenGB, _ := strconv.ParseFloat(field[0:len(field)-1], 64) 215 vMetrics.Levels[0].BytesIn = uint64(1024.0 * 1024.0 * 1024.0 * walWrittenGB) 216 vMetrics.WAL.BytesWritten = vMetrics.Levels[0].BytesIn 217 } 218 if inLevelsSection && strings.HasPrefix(line, " Sum") { 219 inLevelsSection = false 220 continue 221 } 222 if inLevelsSection { 223 fields := strings.Fields(line) 224 level, _ := strconv.Atoi(fields[0][1:]) 225 if level < 0 || level > 6 { 226 panic("expected at most 7 levels") 227 } 228 vMetrics.Levels[level].NumFiles, _ = strconv.ParseInt(strings.Split(fields[1], "/")[0], 10, 64) 229 size, _ := strconv.ParseFloat(fields[2], 64) 230 if fields[3] == "KB" { 231 size *= 1024.0 232 } else if fields[3] == "MB" { 233 size *= 1024.0 * 1024.0 234 } else if fields[3] == "GB" { 235 size *= 1024.0 * 1024.0 * 1024.0 236 } else { 237 panic("unknown unit") 238 } 239 vMetrics.Levels[level].Size = uint64(size) 240 vMetrics.Levels[level].Score, _ = strconv.ParseFloat(fields[4], 64) 241 if level > 0 { 242 bytesInGB, _ := strconv.ParseFloat(fields[6], 64) 243 vMetrics.Levels[level].BytesIn = uint64(1024.0 * 1024.0 * 1024.0 * bytesInGB) 244 } 245 bytesMovedGB, _ := strconv.ParseFloat(fields[10], 64) 246 vMetrics.Levels[level].BytesMoved = uint64(1024.0 * 1024.0 * 1024.0 * bytesMovedGB) 247 bytesReadGB, _ := strconv.ParseFloat(fields[5], 64) 248 vMetrics.Levels[level].BytesRead = uint64(1024.0 * 1024.0 * 1024.0 * bytesReadGB) 249 bytesWrittenGB, _ := strconv.ParseFloat(fields[8], 64) 250 vMetrics.Levels[level].BytesWritten = uint64(1024.0 * 1024.0 * 1024.0 * bytesWrittenGB) 251 } 252 } 253 return &vMetrics 254 }