github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/bitpage/compaction.go (about)

     1  // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bitpage
    16  
    17  import (
    18  	"fmt"
    19  	"runtime/debug"
    20  	"strings"
    21  	"time"
    22  
    23  	"github.com/cockroachdb/errors"
    24  	"github.com/zuoyebang/bitalosdb/internal/bitask"
    25  
    26  	"github.com/zuoyebang/bitalosdb/internal/consts"
    27  	"github.com/zuoyebang/bitalosdb/internal/humanize"
    28  )
    29  
    30  func (p *page) makeNewArrayTable() (*arrayTable, *flushableEntry, error) {
    31  	fn := p.bp.meta.getNextAtFileNum(p.pn)
    32  	path := p.bp.makeFilePath(fileTypeArrayTable, p.pn, fn)
    33  	return p.newArrayTable(path, fn, false)
    34  }
    35  
    36  func (p *page) flush(sentinel []byte, logTag string) (err error) {
    37  	defer func() {
    38  		if r := recover(); r != nil {
    39  			err = errors.Errorf("%s flush panic err:%v stack:%s", logTag, r, string(debug.Stack()))
    40  		}
    41  	}()
    42  
    43  	var n int
    44  	var flushing flushableList
    45  	var minUnflushedStFileNum FileNum
    46  	var oldSize uint64
    47  
    48  	prepareFlushing := func() error {
    49  		p.mu.Lock()
    50  		defer p.mu.Unlock()
    51  
    52  		if err := p.makeMutableForWrite(false); err != nil {
    53  			return errors.Wrap(err, "bitpage: makeMutableForWrite fail")
    54  		}
    55  
    56  		n = len(p.mu.stQueue) - 1
    57  		minUnflushedStFileNum = p.mu.stQueue[n].fileNum
    58  
    59  		for i := 0; i < n; i++ {
    60  			logNum := p.mu.stQueue[i].fileNum
    61  			if logNum >= minUnflushedStFileNum {
    62  				return errors.New("bitpage: flush next file number is unset")
    63  			}
    64  			flushing = append(flushing, p.mu.stQueue[i])
    65  			oldSize += p.mu.stQueue[i].inuseBytes()
    66  		}
    67  
    68  		if n > 0 && p.mu.arrtable != nil {
    69  			flushing = append(flushing, p.mu.arrtable)
    70  			oldSize += p.mu.arrtable.inuseBytes()
    71  		}
    72  		return nil
    73  	}
    74  
    75  	p.bp.opts.Logger.Infof("%s flush start", logTag)
    76  	p.setFlushState(pageFlushStateStart)
    77  
    78  	if err = prepareFlushing(); err != nil {
    79  		return errors.Wrapf(err, "bitpage: %s flush prepareFlushing fail", logTag)
    80  	}
    81  
    82  	if len(flushing) == 0 {
    83  		return nil
    84  	}
    85  
    86  	var atEntry *flushableEntry
    87  	atEntry, err = p.runFlush(flushing, oldSize, logTag)
    88  	if err != nil {
    89  		return errors.Wrapf(err, "bitpage: %s flush fail", logTag)
    90  	}
    91  
    92  	p.mu.Lock()
    93  	p.bp.meta.setMinUnflushedStFileNum(p.pn, minUnflushedStFileNum)
    94  	p.mu.stQueue = p.mu.stQueue[n:]
    95  	p.mu.arrtable = atEntry
    96  	p.updateReadState()
    97  	for i := range flushing {
    98  		flushing[i].setObsolete()
    99  		flushing[i].readerUnref()
   100  	}
   101  	p.mu.Unlock()
   102  
   103  	p.setFlushState(pageFlushStateFinish)
   104  
   105  	if atEntry == nil {
   106  		return nil
   107  	}
   108  
   109  	if sentinel != nil && p.bp.pageNoneSplit(p.pn) {
   110  		newSize := atEntry.inuseBytes()
   111  		if newSize > p.bp.opts.BitpageSplitSize {
   112  			p.bp.opts.BitpageTaskPushFunc(&bitask.BitpageTaskData{
   113  				Index:    p.bp.index,
   114  				Event:    bitask.BitpageEventSplit,
   115  				Pn:       uint32(p.pn),
   116  				Sentinel: sentinel,
   117  			})
   118  			p.bp.meta.setSplitState(p.pn, pageSplitStateSendTask)
   119  			p.bp.opts.Logger.Infof("%s push split task", logTag)
   120  		}
   121  	}
   122  
   123  	return nil
   124  }
   125  
   126  func (p *page) runFlush(flushing flushableList, oldSize uint64, logTag string) (atEntry *flushableEntry, retErr error) {
   127  	var iiter internalIterator
   128  	var at *arrayTable
   129  	var keyPrefixDeleteKind, prefixDeleteNum int
   130  	var lastPrefixDelete uint64
   131  
   132  	startTime := time.Now()
   133  	if len(flushing) == 1 {
   134  		iiter = flushing[0].newIter(&iterCompactOpts)
   135  	} else {
   136  		its := make([]internalIterator, 0, len(flushing))
   137  		for i := range flushing {
   138  			its = append(its, flushing[i].newIter(&iterCompactOpts))
   139  		}
   140  		iiter = newMergingIter(p.bp.opts.Logger, p.bp.opts.Cmp, its...)
   141  	}
   142  
   143  	iter := newCompactionIter(p.bp, iiter)
   144  	defer func() {
   145  		if iter != nil {
   146  			_ = iter.Close()
   147  		}
   148  
   149  		if retErr != nil && atEntry != nil {
   150  			atEntry.setObsolete()
   151  			atEntry.readerUnref()
   152  		}
   153  	}()
   154  
   155  	deleteBitableKey := func(ik *internalKey) {
   156  		if err := p.bp.opts.BitableDeleteCB(ik.UserKey); err != nil {
   157  			p.bp.opts.Logger.Errorf("%s BitableDeleteCB fail key:%s err:%s", logTag, ik.String(), err)
   158  		}
   159  	}
   160  
   161  	checkKeyPrefixDelete := func(ik *internalKey) bool {
   162  		if lastPrefixDelete == 0 {
   163  			return false
   164  		}
   165  
   166  		keyPrefixDelete := p.bp.opts.KeyPrefixDeleteFunc(ik.UserKey)
   167  		if lastPrefixDelete == keyPrefixDelete {
   168  			deleteBitableKey(ik)
   169  			return true
   170  		} else {
   171  			lastPrefixDelete = 0
   172  			return false
   173  		}
   174  	}
   175  
   176  	p.bp.opts.Logger.Infof("%s runFlush start flushing(%d)", logTag, len(flushing))
   177  
   178  	for iterKey, iterValue := iter.First(); iterKey != nil; iterKey, iterValue = iter.Next() {
   179  		switch iterKey.Kind() {
   180  		case internalKeyKindSet:
   181  			if checkKeyPrefixDelete(iterKey) {
   182  				prefixDeleteNum++
   183  				continue
   184  			}
   185  
   186  			if p.bp.opts.CheckExpireCB(iterKey.UserKey, iterValue) {
   187  				deleteBitableKey(iterKey)
   188  			} else {
   189  				if at == nil {
   190  					at, atEntry, retErr = p.makeNewArrayTable()
   191  					if retErr != nil {
   192  						return nil, retErr
   193  					}
   194  				}
   195  				if _, err := at.writeItem(iterKey.UserKey, iterValue); err != nil {
   196  					p.bp.opts.Logger.Errorf("%s writeItem fail err:%s", logTag, err)
   197  				}
   198  			}
   199  
   200  		case internalKeyKindDelete:
   201  			deleteBitableKey(iterKey)
   202  
   203  		case internalKeyKindPrefixDelete:
   204  			lastPrefixDelete = p.bp.opts.KeyPrefixDeleteFunc(iterKey.UserKey)
   205  			keyPrefixDeleteKind++
   206  		}
   207  	}
   208  
   209  	if at == nil {
   210  		p.bp.opts.Logger.Infof("%s runFlush finish atNil oldSize(%s) newSize(0) in %.3fs",
   211  			logTag, humanize.Uint64(oldSize), time.Since(startTime).Seconds())
   212  		return nil, nil
   213  	}
   214  
   215  	if retErr = at.writeFinish(); retErr != nil {
   216  		return nil, retErr
   217  	}
   218  
   219  	p.bp.meta.setNextArrayTableFileNum(p.pn)
   220  
   221  	duration := time.Since(startTime)
   222  	p.bp.opts.Logger.Infof("%s runFlush finish flushed(%s) at(%s) atVersion(%d) atSize(%s) keys(%d) keysPdKind(%d) pdNum(%d), in %.3fs",
   223  		logTag,
   224  		humanize.Uint64(oldSize),
   225  		at.filename,
   226  		at.getVersion(),
   227  		humanize.Uint64(uint64(at.size)),
   228  		at.itemCount(),
   229  		keyPrefixDeleteKind,
   230  		prefixDeleteNum,
   231  		duration.Seconds(),
   232  	)
   233  
   234  	return atEntry, nil
   235  }
   236  
   237  func (p *page) split(logTag string, pages []*page) (retErr error) {
   238  	splitNum := len(pages)
   239  	p.bp.opts.Logger.Infof("%s start splitNum:%d", logTag, splitNum)
   240  	startTime := time.Now()
   241  	defer func() {
   242  		if r := recover(); r != any(nil) {
   243  			p.bp.opts.Logger.Errorf("%s split panic err:%v stack:%s", logTag, r, string(debug.Stack()))
   244  		}
   245  	}()
   246  
   247  	var flushing flushableList
   248  	var oldSize uint64
   249  
   250  	p.mu.Lock()
   251  	flushing = append(flushing, p.mu.stQueue...)
   252  	flushing = append(flushing, p.mu.arrtable)
   253  	p.mu.Unlock()
   254  
   255  	its := make([]internalIterator, 0, len(flushing))
   256  	for i := range flushing {
   257  		its = append(its, flushing[i].newIter(&iterCompactOpts))
   258  		oldSize += flushing[i].dataBytes()
   259  	}
   260  	iiter := newMergingIter(p.bp.opts.Logger, p.bp.opts.Cmp, its...)
   261  	iter := newCompactionIter(p.bp, iiter)
   262  	defer iter.Close()
   263  
   264  	var ats [consts.BitpageSplitNum]*arrayTable
   265  	var atEntrys [consts.BitpageSplitNum]*flushableEntry
   266  
   267  	for i := 0; i < splitNum; i++ {
   268  		at, atEntry, err := pages[i].makeNewArrayTable()
   269  		if retErr != nil {
   270  			retErr = err
   271  			return
   272  		}
   273  
   274  		ats[i] = at
   275  		atEntrys[i] = atEntry
   276  	}
   277  
   278  	var current int
   279  	var wn uint32
   280  	var writeBytes uint64
   281  	var atCurrent *arrayTable
   282  	var lastPage, finished bool
   283  
   284  	atCurrent = ats[current]
   285  	splitSize := oldSize / uint64(splitNum)
   286  	for key, val := iter.First(); key != nil; key, val = iter.Next() {
   287  		if key.Kind() != internalKeyKindSet {
   288  			continue
   289  		}
   290  
   291  		wn, retErr = atCurrent.writeItem(key.UserKey, val)
   292  		if retErr != nil {
   293  			return
   294  		}
   295  
   296  		if finished {
   297  			finished = false
   298  		}
   299  
   300  		writeBytes += uint64(wn)
   301  		if !lastPage && writeBytes >= splitSize {
   302  			current++
   303  			if current == splitNum {
   304  				lastPage = true
   305  			} else {
   306  				if retErr = atCurrent.writeFinish(); retErr != nil {
   307  					return
   308  				}
   309  				atCurrent = ats[current]
   310  				writeBytes = 0
   311  				finished = true
   312  			}
   313  		}
   314  	}
   315  	if !finished {
   316  		if retErr = atCurrent.writeFinish(); retErr != nil {
   317  			return
   318  		}
   319  	}
   320  
   321  	var newPageInfo strings.Builder
   322  	for i := 0; i < splitNum; i++ {
   323  		if !ats[i].empty() {
   324  			pages[i].maxKey = ats[i].getMaxKey()
   325  			pages[i].mu.arrtable = atEntrys[i]
   326  			pages[i].updateReadState()
   327  			newPageInfo.WriteString(fmt.Sprintf("%s ", humanize.Uint64(ats[i].inuseBytes())))
   328  		} else {
   329  			p.bp.opts.Logger.Infof("%s free empty page pn:%d", logTag, pages[i].pn)
   330  			_ = p.bp.FreePage(pages[i].pn, false)
   331  			pages[i] = nil
   332  		}
   333  	}
   334  
   335  	p.bp.opts.Logger.Infof("%s finish splitSize(%s) oldSize(%s) newSize(%s), cost:%.3fs",
   336  		logTag,
   337  		humanize.Uint64(splitSize),
   338  		humanize.Uint64(oldSize),
   339  		newPageInfo.String(),
   340  		time.Since(startTime).Seconds(),
   341  	)
   342  
   343  	return
   344  }