github.com/df-mc/goleveldb@v1.1.9/leveldb/session_compaction.go (about) 1 // Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> 2 // All rights reserved. 3 // 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE file. 6 7 package leveldb 8 9 import ( 10 "sync/atomic" 11 12 "github.com/df-mc/goleveldb/leveldb/iterator" 13 "github.com/df-mc/goleveldb/leveldb/memdb" 14 "github.com/df-mc/goleveldb/leveldb/opt" 15 ) 16 17 const ( 18 undefinedCompaction = iota 19 level0Compaction 20 nonLevel0Compaction 21 seekCompaction 22 ) 23 24 func (s *session) pickMemdbLevel(umin, umax []byte, maxLevel int) int { 25 v := s.version() 26 defer v.release() 27 return v.pickMemdbLevel(umin, umax, maxLevel) 28 } 29 30 func (s *session) flushMemdb(rec *sessionRecord, mdb *memdb.DB, maxLevel int) (int, error) { 31 // Create sorted table. 32 iter := mdb.NewIterator(nil) 33 defer iter.Release() 34 t, n, err := s.tops.createFrom(iter) 35 if err != nil { 36 return 0, err 37 } 38 39 // Pick level other than zero can cause compaction issue with large 40 // bulk insert and delete on strictly incrementing key-space. The 41 // problem is that the small deletion markers trapped at lower level, 42 // while key/value entries keep growing at higher level. Since the 43 // key-space is strictly incrementing it will not overlaps with 44 // higher level, thus maximum possible level is always picked, while 45 // overlapping deletion marker pushed into lower level. 46 // See: https://github.com/df-mc/goleveldb/issues/127. 47 flushLevel := s.pickMemdbLevel(t.imin.ukey(), t.imax.ukey(), maxLevel) 48 rec.addTableFile(flushLevel, t) 49 50 s.logf("memdb@flush created L%d@%d N·%d S·%s %q:%q", flushLevel, t.fd.Num, n, shortenb(int(t.size)), t.imin, t.imax) 51 return flushLevel, nil 52 } 53 54 // Pick a compaction based on current state; need external synchronization. 55 func (s *session) pickCompaction() *compaction { 56 v := s.version() 57 58 var sourceLevel int 59 var t0 tFiles 60 var typ int 61 if v.cScore >= 1 { 62 sourceLevel = v.cLevel 63 cptr := s.getCompPtr(sourceLevel) 64 tables := v.levels[sourceLevel] 65 for _, t := range tables { 66 if cptr == nil || s.icmp.Compare(t.imax, cptr) > 0 { 67 t0 = append(t0, t) 68 break 69 } 70 } 71 if len(t0) == 0 { 72 t0 = append(t0, tables[0]) 73 } 74 if sourceLevel == 0 { 75 typ = level0Compaction 76 } else { 77 typ = nonLevel0Compaction 78 } 79 } else { 80 if p := atomic.LoadPointer(&v.cSeek); p != nil { 81 ts := (*tSet)(p) 82 sourceLevel = ts.level 83 t0 = append(t0, ts.table) 84 typ = seekCompaction 85 } else { 86 v.release() 87 return nil 88 } 89 } 90 91 return newCompaction(s, v, sourceLevel, t0, typ) 92 } 93 94 // Create compaction from given level and range; need external synchronization. 95 func (s *session) getCompactionRange(sourceLevel int, umin, umax []byte, noLimit bool) *compaction { 96 v := s.version() 97 98 if sourceLevel >= len(v.levels) { 99 v.release() 100 return nil 101 } 102 103 t0 := v.levels[sourceLevel].getOverlaps(nil, s.icmp, umin, umax, sourceLevel == 0) 104 if len(t0) == 0 { 105 v.release() 106 return nil 107 } 108 109 // Avoid compacting too much in one shot in case the range is large. 110 // But we cannot do this for level-0 since level-0 files can overlap 111 // and we must not pick one file and drop another older file if the 112 // two files overlap. 113 if !noLimit && sourceLevel > 0 { 114 limit := int64(v.s.o.GetCompactionSourceLimit(sourceLevel)) 115 total := int64(0) 116 for i, t := range t0 { 117 total += t.size 118 if total >= limit { 119 s.logf("table@compaction limiting F·%d -> F·%d", len(t0), i+1) 120 t0 = t0[:i+1] 121 break 122 } 123 } 124 } 125 126 typ := level0Compaction 127 if sourceLevel != 0 { 128 typ = nonLevel0Compaction 129 } 130 return newCompaction(s, v, sourceLevel, t0, typ) 131 } 132 133 func newCompaction(s *session, v *version, sourceLevel int, t0 tFiles, typ int) *compaction { 134 c := &compaction{ 135 s: s, 136 v: v, 137 typ: typ, 138 sourceLevel: sourceLevel, 139 levels: [2]tFiles{t0, nil}, 140 maxGPOverlaps: int64(s.o.GetCompactionGPOverlaps(sourceLevel)), 141 tPtrs: make([]int, len(v.levels)), 142 } 143 c.expand() 144 c.save() 145 return c 146 } 147 148 // compaction represent a compaction state. 149 type compaction struct { 150 s *session 151 v *version 152 153 typ int 154 sourceLevel int 155 levels [2]tFiles 156 maxGPOverlaps int64 157 158 gp tFiles 159 gpi int 160 seenKey bool 161 gpOverlappedBytes int64 162 imin, imax internalKey 163 tPtrs []int 164 released bool 165 166 snapGPI int 167 snapSeenKey bool 168 snapGPOverlappedBytes int64 169 snapTPtrs []int 170 } 171 172 func (c *compaction) save() { 173 c.snapGPI = c.gpi 174 c.snapSeenKey = c.seenKey 175 c.snapGPOverlappedBytes = c.gpOverlappedBytes 176 c.snapTPtrs = append(c.snapTPtrs[:0], c.tPtrs...) 177 } 178 179 func (c *compaction) restore() { 180 c.gpi = c.snapGPI 181 c.seenKey = c.snapSeenKey 182 c.gpOverlappedBytes = c.snapGPOverlappedBytes 183 c.tPtrs = append(c.tPtrs[:0], c.snapTPtrs...) 184 } 185 186 func (c *compaction) release() { 187 if !c.released { 188 c.released = true 189 c.v.release() 190 } 191 } 192 193 // Expand compacted tables; need external synchronization. 194 func (c *compaction) expand() { 195 limit := int64(c.s.o.GetCompactionExpandLimit(c.sourceLevel)) 196 vt0 := c.v.levels[c.sourceLevel] 197 vt1 := tFiles{} 198 if level := c.sourceLevel + 1; level < len(c.v.levels) { 199 vt1 = c.v.levels[level] 200 } 201 202 t0, t1 := c.levels[0], c.levels[1] 203 imin, imax := t0.getRange(c.s.icmp) 204 205 // For non-zero levels, the ukey can't hop across tables at all. 206 if c.sourceLevel == 0 { 207 // We expand t0 here just incase ukey hop across tables. 208 t0 = vt0.getOverlaps(t0, c.s.icmp, imin.ukey(), imax.ukey(), c.sourceLevel == 0) 209 if len(t0) != len(c.levels[0]) { 210 imin, imax = t0.getRange(c.s.icmp) 211 } 212 } 213 t1 = vt1.getOverlaps(t1, c.s.icmp, imin.ukey(), imax.ukey(), false) 214 // Get entire range covered by compaction. 215 amin, amax := append(t0, t1...).getRange(c.s.icmp) 216 217 // See if we can grow the number of inputs in "sourceLevel" without 218 // changing the number of "sourceLevel+1" files we pick up. 219 if len(t1) > 0 { 220 exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), c.sourceLevel == 0) 221 if len(exp0) > len(t0) && t1.size()+exp0.size() < limit { 222 xmin, xmax := exp0.getRange(c.s.icmp) 223 exp1 := vt1.getOverlaps(nil, c.s.icmp, xmin.ukey(), xmax.ukey(), false) 224 if len(exp1) == len(t1) { 225 c.s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)", 226 c.sourceLevel, c.sourceLevel+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())), 227 len(exp0), shortenb(int(exp0.size())), len(exp1), shortenb(int(exp1.size()))) 228 imin, imax = xmin, xmax 229 t0, t1 = exp0, exp1 230 amin, amax = append(t0, t1...).getRange(c.s.icmp) 231 } 232 } 233 } 234 235 // Compute the set of grandparent files that overlap this compaction 236 // (parent == sourceLevel+1; grandparent == sourceLevel+2) 237 if level := c.sourceLevel + 2; level < len(c.v.levels) { 238 c.gp = c.v.levels[level].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false) 239 } 240 241 c.levels[0], c.levels[1] = t0, t1 242 c.imin, c.imax = imin, imax 243 } 244 245 // Check whether compaction is trivial. 246 func (c *compaction) trivial() bool { 247 return len(c.levels[0]) == 1 && len(c.levels[1]) == 0 && c.gp.size() <= c.maxGPOverlaps 248 } 249 250 func (c *compaction) baseLevelForKey(ukey []byte) bool { 251 for level := c.sourceLevel + 2; level < len(c.v.levels); level++ { 252 tables := c.v.levels[level] 253 for c.tPtrs[level] < len(tables) { 254 t := tables[c.tPtrs[level]] 255 if c.s.icmp.uCompare(ukey, t.imax.ukey()) <= 0 { 256 // We've advanced far enough. 257 if c.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 { 258 // Key falls in this file's range, so definitely not base level. 259 return false 260 } 261 break 262 } 263 c.tPtrs[level]++ 264 } 265 } 266 return true 267 } 268 269 func (c *compaction) shouldStopBefore(ikey internalKey) bool { 270 for ; c.gpi < len(c.gp); c.gpi++ { 271 gp := c.gp[c.gpi] 272 if c.s.icmp.Compare(ikey, gp.imax) <= 0 { 273 break 274 } 275 if c.seenKey { 276 c.gpOverlappedBytes += gp.size 277 } 278 } 279 c.seenKey = true 280 281 if c.gpOverlappedBytes > c.maxGPOverlaps { 282 // Too much overlap for current output; start new output. 283 c.gpOverlappedBytes = 0 284 return true 285 } 286 return false 287 } 288 289 // Creates an iterator. 290 func (c *compaction) newIterator() iterator.Iterator { 291 // Creates iterator slice. 292 icap := len(c.levels) 293 if c.sourceLevel == 0 { 294 // Special case for level-0. 295 icap = len(c.levels[0]) + 1 296 } 297 its := make([]iterator.Iterator, 0, icap) 298 299 // Options. 300 ro := &opt.ReadOptions{ 301 DontFillCache: true, 302 Strict: opt.StrictOverride, 303 } 304 strict := c.s.o.GetStrict(opt.StrictCompaction) 305 if strict { 306 ro.Strict |= opt.StrictReader 307 } 308 309 for i, tables := range c.levels { 310 if len(tables) == 0 { 311 continue 312 } 313 314 // Level-0 is not sorted and may overlaps each other. 315 if c.sourceLevel+i == 0 { 316 for _, t := range tables { 317 its = append(its, c.s.tops.newIterator(t, nil, ro)) 318 } 319 } else { 320 it := iterator.NewIndexedIterator(tables.newIndexIterator(c.s.tops, c.s.icmp, nil, ro), strict) 321 its = append(its, it) 322 } 323 } 324 325 return iterator.NewMergedIterator(its, c.s.icmp, strict) 326 }