github.com/nutsdb/nutsdb@v1.0.4/merge.go (about)

     1  // Copyright 2023 The nutsdb Author. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package nutsdb
    16  
    17  import (
    18  	"bytes"
    19  	"errors"
    20  	"fmt"
    21  	"io"
    22  	"math"
    23  	"os"
    24  	"time"
    25  
    26  	"github.com/xujiajun/utils/strconv2"
    27  )
    28  
    29  var ErrDontNeedMerge = errors.New("the number of files waiting to be merged is at least 2")
    30  
    31  func (db *DB) Merge() error {
    32  	db.mergeStartCh <- struct{}{}
    33  	return <-db.mergeEndCh
    34  }
    35  
    36  // Merge removes dirty data and reduce data redundancy,following these steps:
    37  //
    38  // 1. Filter delete or expired entry.
    39  //
    40  // 2. Write entry to activeFile if the key not exist,if exist miss this write operation.
    41  //
    42  // 3. Filter the entry which is committed.
    43  //
    44  // 4. At last remove the merged files.
    45  //
    46  // Caveat: merge is Called means starting multiple write transactions, and it
    47  // will affect the other write request. so execute it at the appropriate time.
    48  func (db *DB) merge() error {
    49  	var (
    50  		off              int64
    51  		pendingMergeFIds []int
    52  	)
    53  
    54  	// to prevent the initiation of multiple merges simultaneously.
    55  	db.mu.Lock()
    56  
    57  	if db.isMerging {
    58  		db.mu.Unlock()
    59  		return ErrIsMerging
    60  	}
    61  
    62  	db.isMerging = true
    63  	defer func() {
    64  		db.isMerging = false
    65  	}()
    66  
    67  	_, pendingMergeFIds = db.getMaxFileIDAndFileIDs()
    68  	if len(pendingMergeFIds) < 2 {
    69  		db.mu.Unlock()
    70  		return ErrDontNeedMerge
    71  	}
    72  
    73  	db.MaxFileID++
    74  
    75  	if !db.opt.SyncEnable && db.opt.RWMode == MMap {
    76  		if err := db.ActiveFile.rwManager.Sync(); err != nil {
    77  			db.mu.Unlock()
    78  			return err
    79  		}
    80  	}
    81  
    82  	if err := db.ActiveFile.rwManager.Release(); err != nil {
    83  		db.mu.Unlock()
    84  		return err
    85  	}
    86  
    87  	var err error
    88  	path := getDataPath(db.MaxFileID, db.opt.Dir)
    89  	db.ActiveFile, err = db.fm.getDataFile(path, db.opt.SegmentSize)
    90  	if err != nil {
    91  		db.mu.Unlock()
    92  		return err
    93  	}
    94  
    95  	db.ActiveFile.fileID = db.MaxFileID
    96  
    97  	db.mu.Unlock()
    98  
    99  	mergingPath := make([]string, len(pendingMergeFIds))
   100  
   101  	for i, pendingMergeFId := range pendingMergeFIds {
   102  		off = 0
   103  		path := getDataPath(int64(pendingMergeFId), db.opt.Dir)
   104  		fr, err := newFileRecovery(path, db.opt.BufferSizeOfRecovery)
   105  		if err != nil {
   106  			return err
   107  		}
   108  
   109  		for {
   110  			if entry, err := fr.readEntry(off); err == nil {
   111  				if entry == nil {
   112  					break
   113  				}
   114  
   115  				if entry.isFilter() {
   116  					off += entry.Size()
   117  					if off >= db.opt.SegmentSize {
   118  						break
   119  					}
   120  					continue
   121  				}
   122  
   123  				// Due to the lack of concurrency safety in the index,
   124  				// there is a possibility that a race condition might occur when the merge goroutine reads the index,
   125  				// while a transaction is being committed, causing modifications to the index.
   126  				// To address this issue, we need to use a transaction to perform this operation.
   127  				err := db.Update(func(tx *Tx) error {
   128  					// check if we have a new entry with same key and bucket
   129  					if ok := db.isPendingMergeEntry(entry); ok {
   130  						bucket, err := db.bm.GetBucketById(entry.Meta.BucketId)
   131  						if err != nil {
   132  							return err
   133  						}
   134  						bucketName := bucket.Name
   135  						if entry.Meta.Flag == DataLPushFlag {
   136  							return tx.LPushRaw(bucketName, entry.Key, entry.Value)
   137  						}
   138  
   139  						if entry.Meta.Flag == DataRPushFlag {
   140  							return tx.RPushRaw(bucketName, entry.Key, entry.Value)
   141  						}
   142  
   143  						return tx.put(
   144  							bucketName,
   145  							entry.Key,
   146  							entry.Value,
   147  							entry.Meta.TTL,
   148  							entry.Meta.Flag,
   149  							entry.Meta.Timestamp,
   150  							entry.Meta.Ds,
   151  						)
   152  					}
   153  
   154  					return nil
   155  				})
   156  				if err != nil {
   157  					_ = fr.release()
   158  					return err
   159  				}
   160  
   161  				off += entry.Size()
   162  				if off >= db.opt.SegmentSize {
   163  					break
   164  				}
   165  
   166  			} else {
   167  				if errors.Is(err, io.EOF) || errors.Is(err, ErrIndexOutOfBound) || errors.Is(err, io.ErrUnexpectedEOF) || errors.Is(err, ErrHeaderSizeOutOfBounds) {
   168  					break
   169  				}
   170  				return fmt.Errorf("when merge operation build hintIndex readAt err: %s", err)
   171  			}
   172  		}
   173  
   174  		err = fr.release()
   175  		if err != nil {
   176  			return err
   177  		}
   178  		mergingPath[i] = path
   179  	}
   180  
   181  	db.mu.Lock()
   182  	defer db.mu.Unlock()
   183  
   184  	for i := 0; i < len(mergingPath); i++ {
   185  		if err := os.Remove(mergingPath[i]); err != nil {
   186  			return fmt.Errorf("when merge err: %s", err)
   187  		}
   188  	}
   189  
   190  	return nil
   191  }
   192  
   193  func (db *DB) mergeWorker() {
   194  	var ticker *time.Ticker
   195  
   196  	if db.opt.MergeInterval != 0 {
   197  		ticker = time.NewTicker(db.opt.MergeInterval)
   198  	} else {
   199  		ticker = time.NewTicker(math.MaxInt)
   200  		ticker.Stop()
   201  	}
   202  
   203  	for {
   204  		select {
   205  		case <-db.mergeStartCh:
   206  			db.mergeEndCh <- db.merge()
   207  			// if automatic merging is enabled, then after a manual merge
   208  			// the t needs to be reset.
   209  			if db.opt.MergeInterval != 0 {
   210  				ticker.Reset(db.opt.MergeInterval)
   211  			}
   212  		case <-ticker.C:
   213  			_ = db.merge()
   214  		case <-db.mergeWorkCloseCh:
   215  			return
   216  		}
   217  	}
   218  }
   219  
   220  func (db *DB) isPendingMergeEntry(entry *Entry) bool {
   221  	bucket, err := db.bm.GetBucketById(entry.Meta.BucketId)
   222  	if err != nil {
   223  		return false
   224  	}
   225  	bucketId := bucket.Id
   226  	switch {
   227  	case entry.IsBelongsToBPlusTree():
   228  		return db.isPendingBtreeEntry(bucketId, entry)
   229  	case entry.IsBelongsToList():
   230  		return db.isPendingListEntry(bucketId, entry)
   231  	case entry.IsBelongsToSet():
   232  		return db.isPendingSetEntry(bucketId, entry)
   233  	case entry.IsBelongsToSortSet():
   234  		return db.isPendingZSetEntry(bucketId, entry)
   235  	}
   236  	return false
   237  }
   238  
   239  func (db *DB) isPendingBtreeEntry(bucketId BucketId, entry *Entry) bool {
   240  	idx, exist := db.Index.bTree.exist(bucketId)
   241  	if !exist {
   242  		return false
   243  	}
   244  
   245  	r, ok := idx.Find(entry.Key)
   246  	if !ok {
   247  		return false
   248  	}
   249  
   250  	if r.IsExpired() {
   251  		db.tm.del(bucketId, string(entry.Key))
   252  		idx.Delete(entry.Key)
   253  		return false
   254  	}
   255  
   256  	if r.TxID != entry.Meta.TxID || r.Timestamp != entry.Meta.Timestamp {
   257  		return false
   258  	}
   259  
   260  	return true
   261  }
   262  
   263  func (db *DB) isPendingSetEntry(bucketId BucketId, entry *Entry) bool {
   264  	setIdx, exist := db.Index.set.exist(bucketId)
   265  	if !exist {
   266  		return false
   267  	}
   268  
   269  	isMember, err := setIdx.SIsMember(string(entry.Key), entry.Value)
   270  	if err != nil || !isMember {
   271  		return false
   272  	}
   273  
   274  	return true
   275  }
   276  
   277  func (db *DB) isPendingZSetEntry(bucketId BucketId, entry *Entry) bool {
   278  	key, score := splitStringFloat64Str(string(entry.Key), SeparatorForZSetKey)
   279  	sortedSetIdx, exist := db.Index.sortedSet.exist(bucketId)
   280  	if !exist {
   281  		return false
   282  	}
   283  	s, err := sortedSetIdx.ZScore(key, entry.Value)
   284  	if err != nil || s != score {
   285  		return false
   286  	}
   287  
   288  	return true
   289  }
   290  
   291  func (db *DB) isPendingListEntry(bucketId BucketId, entry *Entry) bool {
   292  	var userKeyStr string
   293  	var curSeq uint64
   294  	var userKey []byte
   295  
   296  	if entry.Meta.Flag == DataExpireListFlag {
   297  		userKeyStr = string(entry.Key)
   298  		list, exist := db.Index.list.exist(bucketId)
   299  		if !exist {
   300  			return false
   301  		}
   302  
   303  		if _, ok := list.Items[userKeyStr]; !ok {
   304  			return false
   305  		}
   306  
   307  		t, _ := strconv2.StrToInt64(string(entry.Value))
   308  		ttl := uint32(t)
   309  		if _, ok := list.TTL[userKeyStr]; !ok {
   310  			return false
   311  		}
   312  
   313  		if list.TTL[userKeyStr] != ttl || list.TimeStamp[userKeyStr] != entry.Meta.Timestamp {
   314  			return false
   315  		}
   316  
   317  		return true
   318  	}
   319  
   320  	if entry.Meta.Flag == DataLPushFlag || entry.Meta.Flag == DataRPushFlag {
   321  		userKey, curSeq = decodeListKey(entry.Key)
   322  		userKeyStr = string(userKey)
   323  
   324  		list, exist := db.Index.list.exist(bucketId)
   325  		if !exist {
   326  			return false
   327  		}
   328  
   329  		if _, ok := list.Items[userKeyStr]; !ok {
   330  			return false
   331  		}
   332  
   333  		r, ok := list.Items[userKeyStr].Find(ConvertUint64ToBigEndianBytes(curSeq))
   334  		if !ok {
   335  			return false
   336  		}
   337  
   338  		if !bytes.Equal(r.Key, entry.Key) || r.TxID != entry.Meta.TxID || r.Timestamp != entry.Meta.Timestamp {
   339  			return false
   340  		}
   341  
   342  		return true
   343  	}
   344  
   345  	return false
   346  }