github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/bitpage/compaction.go (about) 1 // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bitpage 16 17 import ( 18 "fmt" 19 "runtime/debug" 20 "strings" 21 "time" 22 23 "github.com/cockroachdb/errors" 24 "github.com/zuoyebang/bitalosdb/internal/bitask" 25 26 "github.com/zuoyebang/bitalosdb/internal/consts" 27 "github.com/zuoyebang/bitalosdb/internal/humanize" 28 ) 29 30 func (p *page) makeNewArrayTable() (*arrayTable, *flushableEntry, error) { 31 fn := p.bp.meta.getNextAtFileNum(p.pn) 32 path := p.bp.makeFilePath(fileTypeArrayTable, p.pn, fn) 33 return p.newArrayTable(path, fn, false) 34 } 35 36 func (p *page) flush(sentinel []byte, logTag string) (err error) { 37 defer func() { 38 if r := recover(); r != nil { 39 err = errors.Errorf("%s flush panic err:%v stack:%s", logTag, r, string(debug.Stack())) 40 } 41 }() 42 43 var n int 44 var flushing flushableList 45 var minUnflushedStFileNum FileNum 46 var oldSize uint64 47 48 prepareFlushing := func() error { 49 p.mu.Lock() 50 defer p.mu.Unlock() 51 52 if err := p.makeMutableForWrite(false); err != nil { 53 return errors.Wrap(err, "bitpage: makeMutableForWrite fail") 54 } 55 56 n = len(p.mu.stQueue) - 1 57 minUnflushedStFileNum = p.mu.stQueue[n].fileNum 58 59 for i := 0; i < n; i++ { 60 logNum := p.mu.stQueue[i].fileNum 61 if logNum >= minUnflushedStFileNum { 62 return errors.New("bitpage: flush next file number is unset") 63 } 64 flushing = append(flushing, p.mu.stQueue[i]) 65 oldSize += p.mu.stQueue[i].inuseBytes() 66 } 67 68 if n > 0 && p.mu.arrtable != nil { 69 flushing = append(flushing, p.mu.arrtable) 70 oldSize += p.mu.arrtable.inuseBytes() 71 } 72 return nil 73 } 74 75 p.bp.opts.Logger.Infof("%s flush start", logTag) 76 p.setFlushState(pageFlushStateStart) 77 78 if err = prepareFlushing(); err != nil { 79 return errors.Wrapf(err, "bitpage: %s flush prepareFlushing fail", logTag) 80 } 81 82 if len(flushing) == 0 { 83 return nil 84 } 85 86 var atEntry *flushableEntry 87 atEntry, err = p.runFlush(flushing, oldSize, logTag) 88 if err != nil { 89 return errors.Wrapf(err, "bitpage: %s flush fail", logTag) 90 } 91 92 p.mu.Lock() 93 p.bp.meta.setMinUnflushedStFileNum(p.pn, minUnflushedStFileNum) 94 p.mu.stQueue = p.mu.stQueue[n:] 95 p.mu.arrtable = atEntry 96 p.updateReadState() 97 for i := range flushing { 98 flushing[i].setObsolete() 99 flushing[i].readerUnref() 100 } 101 p.mu.Unlock() 102 103 p.setFlushState(pageFlushStateFinish) 104 105 if atEntry == nil { 106 return nil 107 } 108 109 if sentinel != nil && p.bp.pageNoneSplit(p.pn) { 110 newSize := atEntry.inuseBytes() 111 if newSize > p.bp.opts.BitpageSplitSize { 112 p.bp.opts.BitpageTaskPushFunc(&bitask.BitpageTaskData{ 113 Index: p.bp.index, 114 Event: bitask.BitpageEventSplit, 115 Pn: uint32(p.pn), 116 Sentinel: sentinel, 117 }) 118 p.bp.meta.setSplitState(p.pn, pageSplitStateSendTask) 119 p.bp.opts.Logger.Infof("%s push split task", logTag) 120 } 121 } 122 123 return nil 124 } 125 126 func (p *page) runFlush(flushing flushableList, oldSize uint64, logTag string) (atEntry *flushableEntry, retErr error) { 127 var iiter internalIterator 128 var at *arrayTable 129 var keyPrefixDeleteKind, prefixDeleteNum int 130 var lastPrefixDelete uint64 131 132 startTime := time.Now() 133 if len(flushing) == 1 { 134 iiter = flushing[0].newIter(&iterCompactOpts) 135 } else { 136 its := make([]internalIterator, 0, len(flushing)) 137 for i := range flushing { 138 its = append(its, flushing[i].newIter(&iterCompactOpts)) 139 } 140 iiter = newMergingIter(p.bp.opts.Logger, p.bp.opts.Cmp, its...) 141 } 142 143 iter := newCompactionIter(p.bp, iiter) 144 defer func() { 145 if iter != nil { 146 _ = iter.Close() 147 } 148 149 if retErr != nil && atEntry != nil { 150 atEntry.setObsolete() 151 atEntry.readerUnref() 152 } 153 }() 154 155 deleteBitableKey := func(ik *internalKey) { 156 if err := p.bp.opts.BitableDeleteCB(ik.UserKey); err != nil { 157 p.bp.opts.Logger.Errorf("%s BitableDeleteCB fail key:%s err:%s", logTag, ik.String(), err) 158 } 159 } 160 161 checkKeyPrefixDelete := func(ik *internalKey) bool { 162 if lastPrefixDelete == 0 { 163 return false 164 } 165 166 keyPrefixDelete := p.bp.opts.KeyPrefixDeleteFunc(ik.UserKey) 167 if lastPrefixDelete == keyPrefixDelete { 168 deleteBitableKey(ik) 169 return true 170 } else { 171 lastPrefixDelete = 0 172 return false 173 } 174 } 175 176 p.bp.opts.Logger.Infof("%s runFlush start flushing(%d)", logTag, len(flushing)) 177 178 for iterKey, iterValue := iter.First(); iterKey != nil; iterKey, iterValue = iter.Next() { 179 switch iterKey.Kind() { 180 case internalKeyKindSet: 181 if checkKeyPrefixDelete(iterKey) { 182 prefixDeleteNum++ 183 continue 184 } 185 186 if p.bp.opts.CheckExpireCB(iterKey.UserKey, iterValue) { 187 deleteBitableKey(iterKey) 188 } else { 189 if at == nil { 190 at, atEntry, retErr = p.makeNewArrayTable() 191 if retErr != nil { 192 return nil, retErr 193 } 194 } 195 if _, err := at.writeItem(iterKey.UserKey, iterValue); err != nil { 196 p.bp.opts.Logger.Errorf("%s writeItem fail err:%s", logTag, err) 197 } 198 } 199 200 case internalKeyKindDelete: 201 deleteBitableKey(iterKey) 202 203 case internalKeyKindPrefixDelete: 204 lastPrefixDelete = p.bp.opts.KeyPrefixDeleteFunc(iterKey.UserKey) 205 keyPrefixDeleteKind++ 206 } 207 } 208 209 if at == nil { 210 p.bp.opts.Logger.Infof("%s runFlush finish atNil oldSize(%s) newSize(0) in %.3fs", 211 logTag, humanize.Uint64(oldSize), time.Since(startTime).Seconds()) 212 return nil, nil 213 } 214 215 if retErr = at.writeFinish(); retErr != nil { 216 return nil, retErr 217 } 218 219 p.bp.meta.setNextArrayTableFileNum(p.pn) 220 221 duration := time.Since(startTime) 222 p.bp.opts.Logger.Infof("%s runFlush finish flushed(%s) at(%s) atVersion(%d) atSize(%s) keys(%d) keysPdKind(%d) pdNum(%d), in %.3fs", 223 logTag, 224 humanize.Uint64(oldSize), 225 at.filename, 226 at.getVersion(), 227 humanize.Uint64(uint64(at.size)), 228 at.itemCount(), 229 keyPrefixDeleteKind, 230 prefixDeleteNum, 231 duration.Seconds(), 232 ) 233 234 return atEntry, nil 235 } 236 237 func (p *page) split(logTag string, pages []*page) (retErr error) { 238 splitNum := len(pages) 239 p.bp.opts.Logger.Infof("%s start splitNum:%d", logTag, splitNum) 240 startTime := time.Now() 241 defer func() { 242 if r := recover(); r != any(nil) { 243 p.bp.opts.Logger.Errorf("%s split panic err:%v stack:%s", logTag, r, string(debug.Stack())) 244 } 245 }() 246 247 var flushing flushableList 248 var oldSize uint64 249 250 p.mu.Lock() 251 flushing = append(flushing, p.mu.stQueue...) 252 flushing = append(flushing, p.mu.arrtable) 253 p.mu.Unlock() 254 255 its := make([]internalIterator, 0, len(flushing)) 256 for i := range flushing { 257 its = append(its, flushing[i].newIter(&iterCompactOpts)) 258 oldSize += flushing[i].dataBytes() 259 } 260 iiter := newMergingIter(p.bp.opts.Logger, p.bp.opts.Cmp, its...) 261 iter := newCompactionIter(p.bp, iiter) 262 defer iter.Close() 263 264 var ats [consts.BitpageSplitNum]*arrayTable 265 var atEntrys [consts.BitpageSplitNum]*flushableEntry 266 267 for i := 0; i < splitNum; i++ { 268 at, atEntry, err := pages[i].makeNewArrayTable() 269 if retErr != nil { 270 retErr = err 271 return 272 } 273 274 ats[i] = at 275 atEntrys[i] = atEntry 276 } 277 278 var current int 279 var wn uint32 280 var writeBytes uint64 281 var atCurrent *arrayTable 282 var lastPage, finished bool 283 284 atCurrent = ats[current] 285 splitSize := oldSize / uint64(splitNum) 286 for key, val := iter.First(); key != nil; key, val = iter.Next() { 287 if key.Kind() != internalKeyKindSet { 288 continue 289 } 290 291 wn, retErr = atCurrent.writeItem(key.UserKey, val) 292 if retErr != nil { 293 return 294 } 295 296 if finished { 297 finished = false 298 } 299 300 writeBytes += uint64(wn) 301 if !lastPage && writeBytes >= splitSize { 302 current++ 303 if current == splitNum { 304 lastPage = true 305 } else { 306 if retErr = atCurrent.writeFinish(); retErr != nil { 307 return 308 } 309 atCurrent = ats[current] 310 writeBytes = 0 311 finished = true 312 } 313 } 314 } 315 if !finished { 316 if retErr = atCurrent.writeFinish(); retErr != nil { 317 return 318 } 319 } 320 321 var newPageInfo strings.Builder 322 for i := 0; i < splitNum; i++ { 323 if !ats[i].empty() { 324 pages[i].maxKey = ats[i].getMaxKey() 325 pages[i].mu.arrtable = atEntrys[i] 326 pages[i].updateReadState() 327 newPageInfo.WriteString(fmt.Sprintf("%s ", humanize.Uint64(ats[i].inuseBytes()))) 328 } else { 329 p.bp.opts.Logger.Infof("%s free empty page pn:%d", logTag, pages[i].pn) 330 _ = p.bp.FreePage(pages[i].pn, false) 331 pages[i] = nil 332 } 333 } 334 335 p.bp.opts.Logger.Infof("%s finish splitSize(%s) oldSize(%s) newSize(%s), cost:%.3fs", 336 logTag, 337 humanize.Uint64(splitSize), 338 humanize.Uint64(oldSize), 339 newPageInfo.String(), 340 time.Since(startTime).Seconds(), 341 ) 342 343 return 344 }