github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/compaction.go (about) 1 // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bitalosdb 16 17 import ( 18 "context" 19 "errors" 20 "runtime/debug" 21 "runtime/pprof" 22 "sort" 23 24 "github.com/zuoyebang/bitalosdb/internal/base" 25 "github.com/zuoyebang/bitalosdb/internal/bitask" 26 "github.com/zuoyebang/bitalosdb/internal/utils" 27 ) 28 29 var errFlushInvariant = errors.New("bitalosdb: flush next log number is unset") 30 var flushLabels = pprof.Labels("bitalosdb", "flush") 31 32 type fileInfo struct { 33 fileNum FileNum 34 fileSize uint64 35 } 36 37 type compaction struct { 38 cmp Compare 39 logger Logger 40 flushing flushableList 41 bytesIterated uint64 42 bytesWritten int64 43 keyWritten int64 44 keyPrefixDeleteKind int64 45 prefixDeleteNum int64 46 } 47 48 func newFlush(opts *Options, flushing flushableList) *compaction { 49 return &compaction{ 50 cmp: opts.Comparer.Compare, 51 logger: opts.Logger, 52 flushing: flushing, 53 } 54 } 55 56 func (c *compaction) newInputIter() internalIterator { 57 if len(c.flushing) == 1 { 58 f := c.flushing[0] 59 iter := f.newFlushIter(nil, &c.bytesIterated) 60 return iter 61 } 62 iters := make([]internalIterator, 0, len(c.flushing)) 63 for i := range c.flushing { 64 f := c.flushing[i] 65 iters = append(iters, f.newFlushIter(nil, &c.bytesIterated)) 66 } 67 return newMergingIter(c.logger, c.cmp, iters...) 68 } 69 70 func (c *compaction) String() string { 71 return "memtable flush\n" 72 } 73 74 func (s *Bitower) passedFlushThreshold() bool { 75 var n int 76 var size uint64 77 for ; n < len(s.mu.mem.queue)-1; n++ { 78 if !s.mu.mem.queue[n].readyForFlush() { 79 break 80 } 81 if s.mu.mem.queue[n].flushForced { 82 size += uint64(s.memTableSize) 83 } else { 84 size += s.mu.mem.queue[n].totalBytes() 85 } 86 } 87 if n == 0 { 88 return false 89 } 90 91 minFlushSize := uint64(s.memTableSize) / 2 92 return size >= minFlushSize 93 } 94 95 func (s *Bitower) maybeScheduleFlush(needReport bool) { 96 if s.mu.compact.flushing || s.db.IsClosed() || len(s.mu.mem.queue) <= 1 { 97 return 98 } 99 100 if !s.passedFlushThreshold() { 101 return 102 } 103 104 s.mu.compact.flushing = true 105 106 s.db.memFlushTask.PushTask(&bitask.MemFlushTaskData{ 107 Index: s.index, 108 NeedReport: needReport, 109 }) 110 } 111 112 func (s *Bitower) flush(needReport bool) { 113 pprof.Do(context.Background(), flushLabels, func(context.Context) { 114 defer func() { 115 if r := recover(); r != any(nil) { 116 s.db.opts.Logger.Errorf("[BITOWER %d] flush panic err:%v stack:%s", s.index, r, string(debug.Stack())) 117 } 118 }() 119 120 s.mu.Lock() 121 defer s.mu.Unlock() 122 123 defer func() { 124 s.mu.compact.flushing = false 125 s.maybeScheduleFlush(true) 126 s.mu.compact.cond.Broadcast() 127 }() 128 129 if err := s.flush1(needReport); err != nil { 130 s.db.opts.EventListener.BackgroundError(err) 131 } 132 }) 133 } 134 135 func (s *Bitower) flush1(needReport bool) (err error) { 136 var n int 137 for ; n < len(s.mu.mem.queue)-1; n++ { 138 if !s.mu.mem.queue[n].readyForFlush() { 139 break 140 } 141 } 142 if n == 0 { 143 return nil 144 } 145 146 minUnflushedLogNum := s.mu.mem.queue[n].logNum 147 if !s.db.opts.DisableWAL { 148 for i := 0; i < n; i++ { 149 logNum := s.mu.mem.queue[i].logNum 150 if logNum >= minUnflushedLogNum { 151 return errFlushInvariant 152 } 153 } 154 } 155 156 if needReport && s.db.opts.FlushReporter != nil { 157 s.db.opts.FlushReporter(s.db.opts.Id) 158 } 159 160 c := newFlush(s.db.opts, s.mu.mem.queue[:n]) 161 162 err = s.runCompaction(c, n) 163 if err == nil { 164 sme := &bitowerMetaEditor{MinUnflushedLogNum: minUnflushedLogNum} 165 err = s.metaApply(sme) 166 } 167 168 var flushed flushableList 169 if err == nil { 170 flushed = s.mu.mem.queue[:n] 171 s.mu.mem.queue = s.mu.mem.queue[n:] 172 s.updateReadState() 173 } 174 175 s.doDeleteObsoleteFiles() 176 177 s.mu.Unlock() 178 defer s.mu.Lock() 179 180 for i := range flushed { 181 flushed[i].readerUnref() 182 close(flushed[i].flushed) 183 } 184 185 return err 186 } 187 188 func (s *Bitower) runCompaction(c *compaction, memNum int) (err error) { 189 s.mu.Unlock() 190 defer s.mu.Lock() 191 192 d := s.db 193 194 iter := &compactionIter{ 195 cmp: c.cmp, 196 iter: c.newInputIter(), 197 } 198 199 defer func() { 200 err = utils.FirstError(err, iter.Close()) 201 }() 202 203 d.dbState.SetBitowerHighPriority(s.index, true) 204 d.dbState.LockBitowerWrite(s.index) 205 defer func() { 206 d.dbState.SetBitowerHighPriority(s.index, false) 207 d.dbState.UnlockBitowerWrite(s.index) 208 }() 209 210 d.opts.EventListener.FlushBegin(FlushInfo{ 211 Index: s.index, 212 Input: memNum, 213 }) 214 startTime := d.timeNow() 215 defer func() { 216 info := FlushInfo{ 217 Index: s.index, 218 Input: memNum, 219 Iterated: c.bytesIterated, 220 Written: c.bytesWritten, 221 keyWritten: c.keyWritten, 222 keyPrefixDeleteKind: c.keyPrefixDeleteKind, 223 prefixDeleteNum: c.prefixDeleteNum, 224 Duration: d.timeNow().Sub(startTime), 225 Done: true, 226 Err: err, 227 } 228 d.flushMemTime.Store(info.Duration.Milliseconds()) 229 d.opts.EventListener.FlushEnd(info) 230 }() 231 232 var lastPrefixDelete uint64 233 var writer *flushBitowerWriter 234 235 checkKeyPrefixDelete := func(ik *InternalKey) bool { 236 if lastPrefixDelete == 0 { 237 return false 238 } 239 240 keyPrefixDelete := d.optspool.BaseOptions.KeyPrefixDeleteFunc(ik.UserKey) 241 if lastPrefixDelete == keyPrefixDelete { 242 return true 243 } else { 244 lastPrefixDelete = 0 245 return false 246 } 247 } 248 249 writer, err = s.newFlushWriter() 250 if err != nil { 251 return err 252 } 253 254 defer func() { 255 err = writer.Finish() 256 }() 257 258 for key, val := iter.First(); key != nil; key, val = iter.Next() { 259 switch key.Kind() { 260 case InternalKeyKindSet: 261 if checkKeyPrefixDelete(key) { 262 c.prefixDeleteNum++ 263 continue 264 } 265 266 if d.optspool.BaseOptions.KvCheckExpire(key.UserKey, val) { 267 key.SetKind(InternalKeyKindDelete) 268 val = nil 269 } 270 case InternalKeyKindDelete: 271 if checkKeyPrefixDelete(key) { 272 continue 273 } 274 case InternalKeyKindPrefixDelete: 275 lastPrefixDelete = d.optspool.BaseOptions.KeyPrefixDeleteFunc(key.UserKey) 276 c.keyPrefixDeleteKind++ 277 } 278 279 if err = writer.Set(*key, val); err != nil { 280 return err 281 } 282 283 c.bytesWritten += int64(key.Size() + len(val)) 284 c.keyWritten++ 285 } 286 287 return nil 288 } 289 290 func (s *Bitower) doDeleteObsoleteFiles() { 291 var obsoleteLogs []fileInfo 292 for i := range s.mu.log.queue { 293 if s.mu.log.queue[i].fileNum >= s.getMinUnflushedLogNum() { 294 obsoleteLogs = s.mu.log.queue[:i] 295 s.mu.log.queue = s.mu.log.queue[i:] 296 break 297 } 298 } 299 300 s.mu.Unlock() 301 defer s.mu.Lock() 302 303 for _, f := range obsoleteLogs { 304 if s.logRecycler.add(f) { 305 continue 306 } 307 308 filename := s.makeWalFilename(f.fileNum) 309 s.db.optspool.BaseOptions.DeleteFilePacer.AddFile(filename) 310 s.db.opts.EventListener.WALDeleted(WALDeleteInfo{ 311 Index: s.index, 312 Path: filename, 313 FileNum: f.fileNum, 314 }) 315 } 316 } 317 318 func (s *Bitower) scanObsoleteFiles(list []string) { 319 if s.mu.compact.flushing { 320 return 321 } 322 323 var obsoleteLogs []fileInfo 324 325 minUnflushedLogNum := s.getMinUnflushedLogNum() 326 for _, filename := range list { 327 ft, fn, ok := base.ParseFilename(s.db.opts.FS, filename) 328 if ok && ft == fileTypeLog && fn < minUnflushedLogNum { 329 fi := fileInfo{fileNum: fn} 330 if stat, err := s.db.opts.FS.Stat(filename); err == nil { 331 fi.fileSize = uint64(stat.Size()) 332 } 333 obsoleteLogs = append(obsoleteLogs, fi) 334 } 335 } 336 337 s.mu.log.queue = merge(s.mu.log.queue, obsoleteLogs) 338 } 339 340 func merge(a, b []fileInfo) []fileInfo { 341 if len(b) == 0 { 342 return a 343 } 344 345 a = append(a, b...) 346 sort.Slice(a, func(i, j int) bool { 347 return a[i].fileNum < a[j].fileNum 348 }) 349 350 n := 0 351 for i := 0; i < len(a); i++ { 352 if n == 0 || a[i].fileNum != a[n-1].fileNum { 353 a[n] = a[i] 354 n++ 355 } 356 } 357 return a[:n] 358 }