github.com/rosedblabs/rosedb/v2@v2.3.7-0.20240423093736-a89ea823e5b9/merge.go (about)

     1  package rosedb
     2  
     3  import (
     4  	"encoding/binary"
     5  	"fmt"
     6  	"io"
     7  	"math"
     8  	"os"
     9  	"path/filepath"
    10  	"sync/atomic"
    11  	"time"
    12  
    13  	"github.com/rosedblabs/rosedb/v2/index"
    14  	"github.com/rosedblabs/wal"
    15  	"github.com/valyala/bytebufferpool"
    16  )
    17  
    18  const (
    19  	mergeDirSuffixName   = "-merge"
    20  	mergeFinishedBatchID = 0
    21  )
    22  
    23  // Merge merges all the data files in the database.
    24  // It will iterate all the data files, find the valid data,
    25  // and rewrite the data to the new data file.
    26  //
    27  // Merge operation maybe a very time-consuming operation when the database is large.
    28  // So it is recommended to perform this operation when the database is idle.
    29  //
    30  // If reopenAfterDone is true, the original file will be replaced by the merge file,
    31  // and db's index will be rebuilt after the merge completes.
    32  func (db *DB) Merge(reopenAfterDone bool) error {
    33  	if err := db.doMerge(); err != nil {
    34  		return err
    35  	}
    36  	if !reopenAfterDone {
    37  		return nil
    38  	}
    39  
    40  	db.mu.Lock()
    41  	defer db.mu.Unlock()
    42  
    43  	// close current files
    44  	_ = db.closeFiles()
    45  
    46  	// replace original file
    47  	err := loadMergeFiles(db.options.DirPath)
    48  	if err != nil {
    49  		return err
    50  	}
    51  
    52  	// open data files
    53  	if db.dataFiles, err = db.openWalFiles(); err != nil {
    54  		return err
    55  	}
    56  
    57  	// discard the old index first.
    58  	db.index = index.NewIndexer()
    59  	// rebuild index
    60  	if err = db.loadIndex(); err != nil {
    61  		return err
    62  	}
    63  
    64  	return nil
    65  }
    66  
    67  func (db *DB) doMerge() error {
    68  	db.mu.Lock()
    69  	// check if the database is closed
    70  	if db.closed {
    71  		db.mu.Unlock()
    72  		return ErrDBClosed
    73  	}
    74  	// check if the data files is empty
    75  	if db.dataFiles.IsEmpty() {
    76  		db.mu.Unlock()
    77  		return nil
    78  	}
    79  	// check if the merge operation is running
    80  	if atomic.LoadUint32(&db.mergeRunning) == 1 {
    81  		db.mu.Unlock()
    82  		return ErrMergeRunning
    83  	}
    84  	// set the mergeRunning flag to true
    85  	atomic.StoreUint32(&db.mergeRunning, 1)
    86  	// set the mergeRunning flag to false when the merge operation is completed
    87  	defer atomic.StoreUint32(&db.mergeRunning, 0)
    88  
    89  	prevActiveSegId := db.dataFiles.ActiveSegmentID()
    90  	// rotate the write-ahead log, create a new active segment file.
    91  	// so all the older segment files will be merged.
    92  	if err := db.dataFiles.OpenNewActiveSegment(); err != nil {
    93  		db.mu.Unlock()
    94  		return err
    95  	}
    96  
    97  	// we can unlock the mutex here, because the write-ahead log files has been rotated,
    98  	// and the new active segment file will be used for the subsequent writes.
    99  	// Our Merge operation will only read from the older segment files.
   100  	db.mu.Unlock()
   101  
   102  	// open a merge db to write the data to the new data file.
   103  	// delete the merge directory if it exists and create a new one.
   104  	mergeDB, err := db.openMergeDB()
   105  	if err != nil {
   106  		return err
   107  	}
   108  	defer func() {
   109  		_ = mergeDB.Close()
   110  	}()
   111  
   112  	buf := bytebufferpool.Get()
   113  	now := time.Now().UnixNano()
   114  	defer bytebufferpool.Put(buf)
   115  
   116  	// iterate all the data files, and write the valid data to the new data file.
   117  	reader := db.dataFiles.NewReaderWithMax(prevActiveSegId)
   118  	for {
   119  		buf.Reset()
   120  		chunk, position, err := reader.Next()
   121  		if err != nil {
   122  			if err == io.EOF {
   123  				break
   124  			}
   125  			return err
   126  		}
   127  		record := decodeLogRecord(chunk)
   128  		// Only handle the normal log record, LogRecordDeleted and LogRecordBatchFinished
   129  		// will be ignored, because they are not valid data.
   130  		if record.Type == LogRecordNormal && (record.Expire == 0 || record.Expire > now) {
   131  			db.mu.RLock()
   132  			indexPos := db.index.Get(record.Key)
   133  			db.mu.RUnlock()
   134  			if indexPos != nil && positionEquals(indexPos, position) {
   135  				// clear the batch id of the record,
   136  				// all data after merge will be valid data, so the batch id should be 0.
   137  				record.BatchId = mergeFinishedBatchID
   138  				// Since the mergeDB will never be used for any read or write operations,
   139  				// it is not necessary to update the index.
   140  				newPosition, err := mergeDB.dataFiles.Write(encodeLogRecord(record, mergeDB.encodeHeader, buf))
   141  				if err != nil {
   142  					return err
   143  				}
   144  				// And now we should write the new position to the write-ahead log,
   145  				// which is so-called HINT FILE in bitcask paper.
   146  				// The HINT FILE will be used to rebuild the index quickly when the database is restarted.
   147  				_, err = mergeDB.hintFile.Write(encodeHintRecord(record.Key, newPosition))
   148  				if err != nil {
   149  					return err
   150  				}
   151  			}
   152  		}
   153  	}
   154  
   155  	// After rewrite all the data, we should add a file to indicate that the merge operation is completed.
   156  	// So when we restart the database, we can know that the merge is completed if the file exists,
   157  	// otherwise, we will delete the merge directory and redo the merge operation again.
   158  	mergeFinFile, err := mergeDB.openMergeFinishedFile()
   159  	if err != nil {
   160  		return err
   161  	}
   162  	_, err = mergeFinFile.Write(encodeMergeFinRecord(prevActiveSegId))
   163  	if err != nil {
   164  		return err
   165  	}
   166  	// close the merge finished file
   167  	if err := mergeFinFile.Close(); err != nil {
   168  		return err
   169  	}
   170  
   171  	// all done successfully, return nil
   172  	return nil
   173  }
   174  
   175  func (db *DB) openMergeDB() (*DB, error) {
   176  	mergePath := mergeDirPath(db.options.DirPath)
   177  	// delete the merge directory if it exists
   178  	if err := os.RemoveAll(mergePath); err != nil {
   179  		return nil, err
   180  	}
   181  	options := db.options
   182  	// we don't need to use the original sync policy,
   183  	// because we can sync the data file manually after the merge operation is completed.
   184  	options.Sync, options.BytesPerSync = false, 0
   185  	options.DirPath = mergePath
   186  	mergeDB, err := Open(options)
   187  	if err != nil {
   188  		return nil, err
   189  	}
   190  
   191  	// open the hint files to write the new position of the data.
   192  	hintFile, err := wal.Open(wal.Options{
   193  		DirPath: options.DirPath,
   194  		// we don't need to rotate the hint file, just write all data to a single file.
   195  		SegmentSize:    math.MaxInt64,
   196  		SegmentFileExt: hintFileNameSuffix,
   197  		Sync:           false,
   198  		BytesPerSync:   0,
   199  		BlockCache:     0,
   200  	})
   201  	if err != nil {
   202  		return nil, err
   203  	}
   204  	mergeDB.hintFile = hintFile
   205  	return mergeDB, nil
   206  }
   207  
   208  func mergeDirPath(dirPath string) string {
   209  	dir := filepath.Dir(filepath.Clean(dirPath))
   210  	base := filepath.Base(dirPath)
   211  	return filepath.Join(dir, base+mergeDirSuffixName)
   212  }
   213  
   214  func (db *DB) openMergeFinishedFile() (*wal.WAL, error) {
   215  	return wal.Open(wal.Options{
   216  		DirPath:        db.options.DirPath,
   217  		SegmentSize:    GB,
   218  		SegmentFileExt: mergeFinNameSuffix,
   219  		Sync:           false,
   220  		BytesPerSync:   0,
   221  		BlockCache:     0,
   222  	})
   223  }
   224  
   225  func positionEquals(a, b *wal.ChunkPosition) bool {
   226  	return a.SegmentId == b.SegmentId &&
   227  		a.BlockNumber == b.BlockNumber &&
   228  		a.ChunkOffset == b.ChunkOffset
   229  }
   230  
   231  // loadMergeFiles loads all the merge files, and copy the data to the original data directory.
   232  // If there is no merge files, or the merge operation is not completed, it will return nil.
   233  func loadMergeFiles(dirPath string) error {
   234  	// check if there is a merge directory
   235  	mergeDirPath := mergeDirPath(dirPath)
   236  	if _, err := os.Stat(mergeDirPath); err != nil {
   237  		// does not exist, just return.
   238  		if os.IsNotExist(err) {
   239  			return nil
   240  		}
   241  		return err
   242  	}
   243  
   244  	// remove the merge directory at last
   245  	defer func() {
   246  		_ = os.RemoveAll(mergeDirPath)
   247  	}()
   248  
   249  	copyFile := func(suffix string, fileId uint32, force bool) {
   250  		srcFile := wal.SegmentFileName(mergeDirPath, suffix, fileId)
   251  		stat, err := os.Stat(srcFile)
   252  		if os.IsNotExist(err) {
   253  			return
   254  		}
   255  		if err != nil {
   256  			panic(fmt.Sprintf("loadMergeFiles: failed to get src file stat %v", err))
   257  		}
   258  		if !force && stat.Size() == 0 {
   259  			return
   260  		}
   261  		destFile := wal.SegmentFileName(dirPath, suffix, fileId)
   262  		_ = os.Rename(srcFile, destFile)
   263  	}
   264  
   265  	// get the merge finished segment id
   266  	mergeFinSegmentId, err := getMergeFinSegmentId(mergeDirPath)
   267  	if err != nil {
   268  		return err
   269  	}
   270  	// now we get the merge finished segment id, so all the segment id less than the merge finished segment id
   271  	// should be moved to the original data directory, and the original data files should be deleted.
   272  	for fileId := uint32(1); fileId <= mergeFinSegmentId; fileId++ {
   273  		destFile := wal.SegmentFileName(dirPath, dataFileNameSuffix, fileId)
   274  		// will have bug here if continue, check it later.todo
   275  
   276  		// If we call Merge multiple times, some segment files will be deleted earlier, so just skip them.
   277  		// if _, err = os.Stat(destFile); os.IsNotExist(err) {
   278  		// 	continue
   279  		// } else if err != nil {
   280  		// 	return err
   281  		// }
   282  
   283  		// remove the original data file
   284  		if _, err = os.Stat(destFile); err == nil {
   285  			if err = os.Remove(destFile); err != nil {
   286  				return err
   287  			}
   288  		}
   289  		// move the merge data file to the original data directory
   290  		copyFile(dataFileNameSuffix, fileId, false)
   291  	}
   292  
   293  	// copy MERGEFINISHED and HINT files to the original data directory
   294  	// there is only one merge finished file, so the file id is always 1,
   295  	// the same as the hint file.
   296  	copyFile(mergeFinNameSuffix, 1, true)
   297  	copyFile(hintFileNameSuffix, 1, true)
   298  
   299  	return nil
   300  }
   301  
   302  func getMergeFinSegmentId(mergePath string) (wal.SegmentID, error) {
   303  	// check if the merge operation is completed
   304  	mergeFinFile, err := os.Open(wal.SegmentFileName(mergePath, mergeFinNameSuffix, 1))
   305  	if err != nil {
   306  		// if the merge finished file does not exist, it means that the merge operation is not completed.
   307  		// so we should remove the merge directory and return nil.
   308  		return 0, nil
   309  	}
   310  	defer func() {
   311  		_ = mergeFinFile.Close()
   312  	}()
   313  
   314  	// Only 4 bytes are needed to store the segment id.
   315  	// And the first 7 bytes are chunk header.
   316  	mergeFinBuf := make([]byte, 4)
   317  	if _, err := mergeFinFile.ReadAt(mergeFinBuf, 7); err != nil {
   318  		return 0, err
   319  	}
   320  	mergeFinSegmentId := binary.LittleEndian.Uint32(mergeFinBuf)
   321  	return mergeFinSegmentId, nil
   322  }
   323  
   324  func (db *DB) loadIndexFromHintFile() error {
   325  	hintFile, err := wal.Open(wal.Options{
   326  		DirPath: db.options.DirPath,
   327  		// we don't need to rotate the hint file, just write all data to the same file.
   328  		SegmentSize:    math.MaxInt64,
   329  		SegmentFileExt: hintFileNameSuffix,
   330  		BlockCache:     32 * KB * 10,
   331  	})
   332  	if err != nil {
   333  		return err
   334  	}
   335  	defer func() {
   336  		_ = hintFile.Close()
   337  	}()
   338  
   339  	// read all the hint records from the hint file
   340  	reader := hintFile.NewReader()
   341  	for {
   342  		chunk, _, err := reader.Next()
   343  		if err != nil {
   344  			if err == io.EOF {
   345  				break
   346  			}
   347  			return err
   348  		}
   349  
   350  		key, position := decodeHintRecord(chunk)
   351  		// All the hint records are valid because it is generated by the merge operation.
   352  		// So just put them into the index without checking.
   353  		db.index.Put(key, position)
   354  	}
   355  	return nil
   356  }