github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/bithash/compact.go (about)

     1  // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bithash
    16  
    17  import (
    18  	"bufio"
    19  	"bytes"
    20  	"encoding/binary"
    21  	"errors"
    22  	"io"
    23  	"io/fs"
    24  	"os"
    25  
    26  	"github.com/cockroachdb/errors/oserror"
    27  	"github.com/zuoyebang/bitalosdb/internal/base"
    28  	"github.com/zuoyebang/bitalosdb/internal/utils"
    29  )
    30  
    31  const (
    32  	fileNumMapMagicLen    = 8
    33  	fileNumMapFooterLen   = 4 + fileNumMapMagicLen
    34  	fileNumMapMagic       = "\xf7\xcf\xf4\x85\xb7\x41\xe2\x88"
    35  	fileNumMapMagicOffset = fileNumMapFooterLen - fileNumMapMagicLen
    36  	fileNumMapVersion     = 1
    37  	fileNumMapRecordLen   = 8
    38  )
    39  
    40  const (
    41  	compactLogRecordLen   = 10
    42  	compactLogHeaderLen   = 16
    43  	compactLogWriteOffset = 0
    44  	compactLogReadOffset  = 8
    45  	compactLogDataOffset  = compactLogHeaderLen
    46  )
    47  
    48  const (
    49  	compactLogKindSet uint16 = 1 + iota
    50  	compactLogKindDelete
    51  )
    52  
    53  const (
    54  	compactMaxFileNum  = 8
    55  	compactMaxMiniSize = 50 << 20
    56  )
    57  
    58  type CompactFiles struct {
    59  	FileNum    FileNum
    60  	DelPercent float64
    61  	Size       int64
    62  }
    63  
    64  func (b *Bithash) CheckFilesDelPercent(cfgPercent float64) []CompactFiles {
    65  	var compactFiles []CompactFiles
    66  	var findNum int
    67  
    68  	b.meta.mu.RLock()
    69  	defer b.meta.mu.RUnlock()
    70  
    71  	for fn, fileMeta := range b.meta.mu.filesMeta {
    72  		if fileMeta.state != fileMetaStateImmutable || fileMeta.keyNum == 0 || (cfgPercent > 0.0 && fileMeta.delKeyNum == 0) {
    73  			continue
    74  		}
    75  
    76  		delPercent := float64(fileMeta.delKeyNum) / float64(fileMeta.keyNum)
    77  		if delPercent >= cfgPercent {
    78  			b.logger.Infof("[COMPACTBITHASH %d] checkFilesDelPercent %s delPercent:%.4f cfgPercent:%.2f", b.index, fileMeta, delPercent, cfgPercent)
    79  			compactFiles = append(compactFiles, CompactFiles{
    80  				FileNum:    fn,
    81  				DelPercent: delPercent,
    82  			})
    83  			findNum++
    84  			if findNum >= compactMaxFileNum {
    85  				break
    86  			}
    87  		}
    88  	}
    89  
    90  	return compactFiles
    91  }
    92  
    93  func (b *Bithash) CheckFilesMiniSize() []CompactFiles {
    94  	var compactFiles []CompactFiles
    95  
    96  	b.meta.mu.RLock()
    97  	defer b.meta.mu.RUnlock()
    98  
    99  	for fn, fileMeta := range b.meta.mu.filesMeta {
   100  		if fileMeta.state != fileMetaStateImmutable {
   101  			continue
   102  		}
   103  
   104  		fileSize := b.fileSize(fn)
   105  		if fileSize <= compactMaxMiniSize {
   106  			b.logger.Infof("[COMPACTBITHASH %d] checkFilesMiniSize %s fileSize:%s", b.index, fileMeta, utils.FmtSize(uint64(fileSize)))
   107  			compactFiles = append(compactFiles, CompactFiles{
   108  				FileNum: fn,
   109  				Size:    fileSize,
   110  			})
   111  		}
   112  	}
   113  
   114  	return compactFiles
   115  }
   116  
   117  type compactLogWriter struct {
   118  	b           *Bithash
   119  	file        *os.File
   120  	filename    string
   121  	writeOffset uint64
   122  	readOffset  uint64
   123  	recordBuf   [compactLogRecordLen]byte
   124  	headerBuf   [compactLogHeaderLen]byte
   125  }
   126  
   127  func initCompactLog(b *Bithash) (err error) {
   128  	var file *os.File
   129  	var isNewFile bool
   130  	filename := MakeFilepath(b.fs, b.dirname, fileTypeCompactLog, 0)
   131  	_, err = b.fs.Stat(filename)
   132  	if errors.Is(err, fs.ErrNotExist) {
   133  		isNewFile = true
   134  	}
   135  	file, err = os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0666)
   136  	if err != nil {
   137  		return err
   138  	}
   139  
   140  	defer func() {
   141  		if err != nil {
   142  			err = file.Close()
   143  		}
   144  	}()
   145  
   146  	w := &compactLogWriter{
   147  		b:           b,
   148  		file:        file,
   149  		filename:    filename,
   150  		writeOffset: compactLogDataOffset,
   151  		readOffset:  compactLogDataOffset,
   152  	}
   153  
   154  	if isNewFile {
   155  		err = w.setHeader()
   156  	} else {
   157  		err = w.readHeader()
   158  	}
   159  	if err != nil {
   160  		return err
   161  	}
   162  
   163  	b.logger.Infof("[BITHASH %d] open compactLog file filename:%s writeOffset:%d readOffset:%d",
   164  		b.index, base.GetFilePathBase(filename), w.writeOffset, w.readOffset)
   165  
   166  	b.cLogWriter = w
   167  
   168  	// 重放未读的log日志
   169  	if err = b.cLogWriter.replayLog(); err != nil {
   170  		return err
   171  	}
   172  
   173  	return nil
   174  }
   175  
   176  func (w *compactLogWriter) readHeader() error {
   177  	var buf [16]byte
   178  	if _, err := w.file.ReadAt(buf[:], compactLogWriteOffset); err != nil {
   179  		return err
   180  	}
   181  	w.writeOffset = binary.LittleEndian.Uint64(buf[0:8])
   182  	w.readOffset = binary.LittleEndian.Uint64(buf[8:16])
   183  	return nil
   184  }
   185  
   186  func (w *compactLogWriter) setHeader() error {
   187  	binary.LittleEndian.PutUint64(w.headerBuf[0:8], w.writeOffset)
   188  	binary.LittleEndian.PutUint64(w.headerBuf[8:16], w.readOffset)
   189  	_, err := w.file.WriteAt(w.headerBuf[:], compactLogWriteOffset)
   190  	return err
   191  }
   192  
   193  func (w *compactLogWriter) reset() error {
   194  	w.writeOffset = compactLogDataOffset
   195  	w.readOffset = compactLogDataOffset
   196  	return w.setHeader()
   197  }
   198  
   199  func (w *compactLogWriter) writeRecord(kind uint16, srcFn, dstFn FileNum) error {
   200  	binary.LittleEndian.PutUint16(w.recordBuf[0:2], kind)
   201  	binary.LittleEndian.PutUint32(w.recordBuf[2:6], uint32(srcFn))
   202  	binary.LittleEndian.PutUint32(w.recordBuf[6:10], uint32(dstFn))
   203  	_, err := w.file.WriteAt(w.recordBuf[:], int64(w.writeOffset))
   204  	if err != nil {
   205  		return err
   206  	}
   207  
   208  	binary.LittleEndian.PutUint64(w.headerBuf[:8], w.writeOffset+compactLogRecordLen)
   209  	if _, err = w.file.WriteAt(w.headerBuf[:8], compactLogWriteOffset); err != nil {
   210  		return err
   211  	}
   212  
   213  	w.writeOffset += compactLogRecordLen
   214  	return nil
   215  }
   216  
   217  func (w *compactLogWriter) replayLog() (err error) {
   218  	if w.readOffset == w.writeOffset {
   219  		return nil
   220  	}
   221  
   222  	w.b.mufn.Lock()
   223  	defer w.b.mufn.Unlock()
   224  
   225  	var buf [compactLogRecordLen]byte
   226  	var srcFn, dstFn FileNum
   227  	var kind uint16
   228  	num := 0
   229  	for w.readOffset < w.writeOffset {
   230  		if _, err = w.file.ReadAt(buf[:], int64(w.readOffset)); err != nil {
   231  			break
   232  		}
   233  
   234  		kind = binary.LittleEndian.Uint16(buf[0:2])
   235  		srcFn = FileNum(binary.LittleEndian.Uint32(buf[2:6]))
   236  		dstFn = FileNum(binary.LittleEndian.Uint32(buf[6:10]))
   237  		if kind == compactLogKindSet {
   238  			w.b.mufn.fnMap[srcFn] = dstFn
   239  		} else if kind == compactLogKindDelete {
   240  			delete(w.b.mufn.fnMap, srcFn)
   241  		}
   242  
   243  		w.readOffset += compactLogRecordLen
   244  		num++
   245  	}
   246  
   247  	if err != nil && err != io.EOF {
   248  		return err
   249  	}
   250  
   251  	if err = w.setHeader(); err != nil {
   252  		return err
   253  	}
   254  
   255  	w.b.cLogUpdate = true
   256  	w.b.logger.Infof("bithash replay end logNum:%d", num)
   257  	return nil
   258  }
   259  
   260  func (w *compactLogWriter) close() (err error) {
   261  	if err = w.file.Sync(); err != nil {
   262  		return
   263  	}
   264  	if err = w.file.Close(); err != nil {
   265  		return
   266  	}
   267  	return
   268  }
   269  
   270  func initFileNumMap(b *Bithash) error {
   271  	filename := MakeFilepath(b.fs, b.dirname, fileTypeFileNumMap, 0)
   272  	_, err := b.fs.Stat(filename)
   273  	if oserror.IsNotExist(err) {
   274  		if err = createFileNumMapFile(b, filename); err != nil {
   275  			return err
   276  		}
   277  	}
   278  
   279  	if err = readFileNumMapFile(b, filename); err != nil {
   280  		return err
   281  	}
   282  
   283  	if err = initCompactLog(b); err != nil {
   284  		return err
   285  	}
   286  
   287  	if err = writeFileNumMapFile(b); err != nil {
   288  		return err
   289  	}
   290  
   291  	if err = b.cLogWriter.reset(); err != nil {
   292  		return err
   293  	}
   294  
   295  	b.logger.Infof("[BITHASH %d] initFileNumMap success compactLog readOffset:%d writeOffset:%d",
   296  		b.index, b.cLogWriter.readOffset, b.cLogWriter.writeOffset)
   297  
   298  	return nil
   299  }
   300  
   301  func encodeFileNumMapFooter() []byte {
   302  	buf := make([]byte, fileNumMapFooterLen)
   303  	binary.LittleEndian.PutUint32(buf[0:4], fileNumMapVersion)
   304  	copy(buf[4:fileNumMapFooterLen], fileNumMapMagic)
   305  	buf = buf[:fileNumMapFooterLen]
   306  	return buf
   307  }
   308  
   309  func createFileNumMapFile(b *Bithash, filename string) (err error) {
   310  	var file File
   311  	file, err = b.fs.Create(filename)
   312  	if err != nil {
   313  		return err
   314  	}
   315  
   316  	defer func() {
   317  		if file != nil {
   318  			err = file.Close()
   319  		}
   320  		if err != nil {
   321  			err = b.fs.Remove(filename)
   322  		}
   323  	}()
   324  
   325  	if _, err = file.Write(encodeFileNumMapFooter()); err != nil {
   326  		return err
   327  	}
   328  	if err = file.Sync(); err != nil {
   329  		return err
   330  	}
   331  	return nil
   332  }
   333  
   334  func readFileNumMapFile(b *Bithash, filename string) (err error) {
   335  	var dataSize int64
   336  	var file File
   337  	file, err = b.fs.Open(filename)
   338  	if err != nil {
   339  		return err
   340  	}
   341  	defer file.Close()
   342  
   343  	checkFooter := func(f ReadableFile) bool {
   344  		stat, err := f.Stat()
   345  		if err != nil {
   346  			return false
   347  		}
   348  		dataSize = stat.Size() - fileNumMapFooterLen
   349  		if dataSize < 0 {
   350  			return false
   351  		}
   352  		buf := [fileNumMapMagicLen]byte{}
   353  		n, err := f.ReadAt(buf[:], dataSize+fileNumMapMagicOffset)
   354  		if err != nil && err != io.EOF {
   355  			return false
   356  		}
   357  		return bytes.Equal(buf[:n], []byte(fileNumMapMagic))
   358  	}
   359  	if !checkFooter(file) {
   360  		return ErrBhFileNumMapCheckFail
   361  	}
   362  
   363  	if dataSize == 0 {
   364  		return nil
   365  	}
   366  
   367  	var readBuf [fileNumMapRecordLen]byte
   368  	var srcFn, dstFn FileNum
   369  	var offset int64
   370  	r := bufio.NewReaderSize(file, int(dataSize))
   371  
   372  	b.mufn.Lock()
   373  	defer b.mufn.Unlock()
   374  
   375  	for offset < dataSize {
   376  		n, err := r.Read(readBuf[:])
   377  		if err != nil {
   378  			if err != io.EOF {
   379  				return err
   380  			}
   381  			break
   382  		}
   383  		if n != fileNumMapRecordLen {
   384  			return errors.New("read FILENUMMAP incomplete data")
   385  		}
   386  
   387  		offset += fileNumMapRecordLen
   388  		srcFn = FileNum(binary.LittleEndian.Uint32(readBuf[0:4]))
   389  		dstFn = FileNum(binary.LittleEndian.Uint32(readBuf[4:8]))
   390  		b.mufn.fnMap[srcFn] = dstFn
   391  	}
   392  
   393  	return nil
   394  }
   395  
   396  func writeFileNumMapFile(b *Bithash) (err error) {
   397  	if b.cLogUpdate == false {
   398  		return nil
   399  	}
   400  
   401  	var file File
   402  	fileNumMapTmp := MakeFilepath(b.fs, b.dirname, fileTypeFileNumMapTmp, 0)
   403  	fileNumMap := MakeFilepath(b.fs, b.dirname, fileTypeFileNumMap, 0)
   404  
   405  	b.logger.Infof("bithash write FileNumMap file start filename:%s", fileNumMap)
   406  
   407  	file, err = b.fs.Create(fileNumMapTmp)
   408  	if err != nil {
   409  		return err
   410  	}
   411  
   412  	defer func() {
   413  		if _, e := b.fs.Stat(fileNumMapTmp); e == nil {
   414  			err = b.fs.Remove(fileNumMapTmp)
   415  		}
   416  	}()
   417  
   418  	b.mufn.RLock()
   419  	defer b.mufn.RUnlock()
   420  
   421  	var buf [fileNumMapRecordLen]byte
   422  	fnNum := len(b.mufn.fnMap)
   423  	size := fnNum*fileNumMapRecordLen + fileNumMapFooterLen
   424  	w := bufio.NewWriterSize(file, size)
   425  
   426  	for srcFn, dstFn := range b.mufn.fnMap {
   427  		binary.LittleEndian.PutUint32(buf[0:4], uint32(srcFn))
   428  		binary.LittleEndian.PutUint32(buf[4:8], uint32(dstFn))
   429  		if _, err = w.Write(buf[:]); err != nil {
   430  			return err
   431  		}
   432  	}
   433  
   434  	if _, err = w.Write(encodeFileNumMapFooter()); err != nil {
   435  		return err
   436  	}
   437  	if err = w.Flush(); err != nil {
   438  		return err
   439  	}
   440  	if err = file.Sync(); err != nil {
   441  		return err
   442  	}
   443  	if err = file.Close(); err != nil {
   444  		return err
   445  	}
   446  
   447  	if err = b.fs.Rename(fileNumMapTmp, fileNumMap); err != nil {
   448  		return err
   449  	}
   450  
   451  	b.logger.Infof("bithash write FileNumMap file end filename:%s writeFnNum:%d", fileNumMap, fnNum)
   452  
   453  	return nil
   454  }