github.com/nutsdb/nutsdb@v1.0.4/merge.go (about) 1 // Copyright 2023 The nutsdb Author. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package nutsdb 16 17 import ( 18 "bytes" 19 "errors" 20 "fmt" 21 "io" 22 "math" 23 "os" 24 "time" 25 26 "github.com/xujiajun/utils/strconv2" 27 ) 28 29 var ErrDontNeedMerge = errors.New("the number of files waiting to be merged is at least 2") 30 31 func (db *DB) Merge() error { 32 db.mergeStartCh <- struct{}{} 33 return <-db.mergeEndCh 34 } 35 36 // Merge removes dirty data and reduce data redundancy,following these steps: 37 // 38 // 1. Filter delete or expired entry. 39 // 40 // 2. Write entry to activeFile if the key not exist,if exist miss this write operation. 41 // 42 // 3. Filter the entry which is committed. 43 // 44 // 4. At last remove the merged files. 45 // 46 // Caveat: merge is Called means starting multiple write transactions, and it 47 // will affect the other write request. so execute it at the appropriate time. 48 func (db *DB) merge() error { 49 var ( 50 off int64 51 pendingMergeFIds []int 52 ) 53 54 // to prevent the initiation of multiple merges simultaneously. 55 db.mu.Lock() 56 57 if db.isMerging { 58 db.mu.Unlock() 59 return ErrIsMerging 60 } 61 62 db.isMerging = true 63 defer func() { 64 db.isMerging = false 65 }() 66 67 _, pendingMergeFIds = db.getMaxFileIDAndFileIDs() 68 if len(pendingMergeFIds) < 2 { 69 db.mu.Unlock() 70 return ErrDontNeedMerge 71 } 72 73 db.MaxFileID++ 74 75 if !db.opt.SyncEnable && db.opt.RWMode == MMap { 76 if err := db.ActiveFile.rwManager.Sync(); err != nil { 77 db.mu.Unlock() 78 return err 79 } 80 } 81 82 if err := db.ActiveFile.rwManager.Release(); err != nil { 83 db.mu.Unlock() 84 return err 85 } 86 87 var err error 88 path := getDataPath(db.MaxFileID, db.opt.Dir) 89 db.ActiveFile, err = db.fm.getDataFile(path, db.opt.SegmentSize) 90 if err != nil { 91 db.mu.Unlock() 92 return err 93 } 94 95 db.ActiveFile.fileID = db.MaxFileID 96 97 db.mu.Unlock() 98 99 mergingPath := make([]string, len(pendingMergeFIds)) 100 101 for i, pendingMergeFId := range pendingMergeFIds { 102 off = 0 103 path := getDataPath(int64(pendingMergeFId), db.opt.Dir) 104 fr, err := newFileRecovery(path, db.opt.BufferSizeOfRecovery) 105 if err != nil { 106 return err 107 } 108 109 for { 110 if entry, err := fr.readEntry(off); err == nil { 111 if entry == nil { 112 break 113 } 114 115 if entry.isFilter() { 116 off += entry.Size() 117 if off >= db.opt.SegmentSize { 118 break 119 } 120 continue 121 } 122 123 // Due to the lack of concurrency safety in the index, 124 // there is a possibility that a race condition might occur when the merge goroutine reads the index, 125 // while a transaction is being committed, causing modifications to the index. 126 // To address this issue, we need to use a transaction to perform this operation. 127 err := db.Update(func(tx *Tx) error { 128 // check if we have a new entry with same key and bucket 129 if ok := db.isPendingMergeEntry(entry); ok { 130 bucket, err := db.bm.GetBucketById(entry.Meta.BucketId) 131 if err != nil { 132 return err 133 } 134 bucketName := bucket.Name 135 if entry.Meta.Flag == DataLPushFlag { 136 return tx.LPushRaw(bucketName, entry.Key, entry.Value) 137 } 138 139 if entry.Meta.Flag == DataRPushFlag { 140 return tx.RPushRaw(bucketName, entry.Key, entry.Value) 141 } 142 143 return tx.put( 144 bucketName, 145 entry.Key, 146 entry.Value, 147 entry.Meta.TTL, 148 entry.Meta.Flag, 149 entry.Meta.Timestamp, 150 entry.Meta.Ds, 151 ) 152 } 153 154 return nil 155 }) 156 if err != nil { 157 _ = fr.release() 158 return err 159 } 160 161 off += entry.Size() 162 if off >= db.opt.SegmentSize { 163 break 164 } 165 166 } else { 167 if errors.Is(err, io.EOF) || errors.Is(err, ErrIndexOutOfBound) || errors.Is(err, io.ErrUnexpectedEOF) || errors.Is(err, ErrHeaderSizeOutOfBounds) { 168 break 169 } 170 return fmt.Errorf("when merge operation build hintIndex readAt err: %s", err) 171 } 172 } 173 174 err = fr.release() 175 if err != nil { 176 return err 177 } 178 mergingPath[i] = path 179 } 180 181 db.mu.Lock() 182 defer db.mu.Unlock() 183 184 for i := 0; i < len(mergingPath); i++ { 185 if err := os.Remove(mergingPath[i]); err != nil { 186 return fmt.Errorf("when merge err: %s", err) 187 } 188 } 189 190 return nil 191 } 192 193 func (db *DB) mergeWorker() { 194 var ticker *time.Ticker 195 196 if db.opt.MergeInterval != 0 { 197 ticker = time.NewTicker(db.opt.MergeInterval) 198 } else { 199 ticker = time.NewTicker(math.MaxInt) 200 ticker.Stop() 201 } 202 203 for { 204 select { 205 case <-db.mergeStartCh: 206 db.mergeEndCh <- db.merge() 207 // if automatic merging is enabled, then after a manual merge 208 // the t needs to be reset. 209 if db.opt.MergeInterval != 0 { 210 ticker.Reset(db.opt.MergeInterval) 211 } 212 case <-ticker.C: 213 _ = db.merge() 214 case <-db.mergeWorkCloseCh: 215 return 216 } 217 } 218 } 219 220 func (db *DB) isPendingMergeEntry(entry *Entry) bool { 221 bucket, err := db.bm.GetBucketById(entry.Meta.BucketId) 222 if err != nil { 223 return false 224 } 225 bucketId := bucket.Id 226 switch { 227 case entry.IsBelongsToBPlusTree(): 228 return db.isPendingBtreeEntry(bucketId, entry) 229 case entry.IsBelongsToList(): 230 return db.isPendingListEntry(bucketId, entry) 231 case entry.IsBelongsToSet(): 232 return db.isPendingSetEntry(bucketId, entry) 233 case entry.IsBelongsToSortSet(): 234 return db.isPendingZSetEntry(bucketId, entry) 235 } 236 return false 237 } 238 239 func (db *DB) isPendingBtreeEntry(bucketId BucketId, entry *Entry) bool { 240 idx, exist := db.Index.bTree.exist(bucketId) 241 if !exist { 242 return false 243 } 244 245 r, ok := idx.Find(entry.Key) 246 if !ok { 247 return false 248 } 249 250 if r.IsExpired() { 251 db.tm.del(bucketId, string(entry.Key)) 252 idx.Delete(entry.Key) 253 return false 254 } 255 256 if r.TxID != entry.Meta.TxID || r.Timestamp != entry.Meta.Timestamp { 257 return false 258 } 259 260 return true 261 } 262 263 func (db *DB) isPendingSetEntry(bucketId BucketId, entry *Entry) bool { 264 setIdx, exist := db.Index.set.exist(bucketId) 265 if !exist { 266 return false 267 } 268 269 isMember, err := setIdx.SIsMember(string(entry.Key), entry.Value) 270 if err != nil || !isMember { 271 return false 272 } 273 274 return true 275 } 276 277 func (db *DB) isPendingZSetEntry(bucketId BucketId, entry *Entry) bool { 278 key, score := splitStringFloat64Str(string(entry.Key), SeparatorForZSetKey) 279 sortedSetIdx, exist := db.Index.sortedSet.exist(bucketId) 280 if !exist { 281 return false 282 } 283 s, err := sortedSetIdx.ZScore(key, entry.Value) 284 if err != nil || s != score { 285 return false 286 } 287 288 return true 289 } 290 291 func (db *DB) isPendingListEntry(bucketId BucketId, entry *Entry) bool { 292 var userKeyStr string 293 var curSeq uint64 294 var userKey []byte 295 296 if entry.Meta.Flag == DataExpireListFlag { 297 userKeyStr = string(entry.Key) 298 list, exist := db.Index.list.exist(bucketId) 299 if !exist { 300 return false 301 } 302 303 if _, ok := list.Items[userKeyStr]; !ok { 304 return false 305 } 306 307 t, _ := strconv2.StrToInt64(string(entry.Value)) 308 ttl := uint32(t) 309 if _, ok := list.TTL[userKeyStr]; !ok { 310 return false 311 } 312 313 if list.TTL[userKeyStr] != ttl || list.TimeStamp[userKeyStr] != entry.Meta.Timestamp { 314 return false 315 } 316 317 return true 318 } 319 320 if entry.Meta.Flag == DataLPushFlag || entry.Meta.Flag == DataRPushFlag { 321 userKey, curSeq = decodeListKey(entry.Key) 322 userKeyStr = string(userKey) 323 324 list, exist := db.Index.list.exist(bucketId) 325 if !exist { 326 return false 327 } 328 329 if _, ok := list.Items[userKeyStr]; !ok { 330 return false 331 } 332 333 r, ok := list.Items[userKeyStr].Find(ConvertUint64ToBigEndianBytes(curSeq)) 334 if !ok { 335 return false 336 } 337 338 if !bytes.Equal(r.Key, entry.Key) || r.TxID != entry.Meta.TxID || r.Timestamp != entry.Meta.Timestamp { 339 return false 340 } 341 342 return true 343 } 344 345 return false 346 }