github.com/rosedblabs/rosedb/v2@v2.3.7-0.20240423093736-a89ea823e5b9/merge.go (about) 1 package rosedb 2 3 import ( 4 "encoding/binary" 5 "fmt" 6 "io" 7 "math" 8 "os" 9 "path/filepath" 10 "sync/atomic" 11 "time" 12 13 "github.com/rosedblabs/rosedb/v2/index" 14 "github.com/rosedblabs/wal" 15 "github.com/valyala/bytebufferpool" 16 ) 17 18 const ( 19 mergeDirSuffixName = "-merge" 20 mergeFinishedBatchID = 0 21 ) 22 23 // Merge merges all the data files in the database. 24 // It will iterate all the data files, find the valid data, 25 // and rewrite the data to the new data file. 26 // 27 // Merge operation maybe a very time-consuming operation when the database is large. 28 // So it is recommended to perform this operation when the database is idle. 29 // 30 // If reopenAfterDone is true, the original file will be replaced by the merge file, 31 // and db's index will be rebuilt after the merge completes. 32 func (db *DB) Merge(reopenAfterDone bool) error { 33 if err := db.doMerge(); err != nil { 34 return err 35 } 36 if !reopenAfterDone { 37 return nil 38 } 39 40 db.mu.Lock() 41 defer db.mu.Unlock() 42 43 // close current files 44 _ = db.closeFiles() 45 46 // replace original file 47 err := loadMergeFiles(db.options.DirPath) 48 if err != nil { 49 return err 50 } 51 52 // open data files 53 if db.dataFiles, err = db.openWalFiles(); err != nil { 54 return err 55 } 56 57 // discard the old index first. 58 db.index = index.NewIndexer() 59 // rebuild index 60 if err = db.loadIndex(); err != nil { 61 return err 62 } 63 64 return nil 65 } 66 67 func (db *DB) doMerge() error { 68 db.mu.Lock() 69 // check if the database is closed 70 if db.closed { 71 db.mu.Unlock() 72 return ErrDBClosed 73 } 74 // check if the data files is empty 75 if db.dataFiles.IsEmpty() { 76 db.mu.Unlock() 77 return nil 78 } 79 // check if the merge operation is running 80 if atomic.LoadUint32(&db.mergeRunning) == 1 { 81 db.mu.Unlock() 82 return ErrMergeRunning 83 } 84 // set the mergeRunning flag to true 85 atomic.StoreUint32(&db.mergeRunning, 1) 86 // set the mergeRunning flag to false when the merge operation is completed 87 defer atomic.StoreUint32(&db.mergeRunning, 0) 88 89 prevActiveSegId := db.dataFiles.ActiveSegmentID() 90 // rotate the write-ahead log, create a new active segment file. 91 // so all the older segment files will be merged. 92 if err := db.dataFiles.OpenNewActiveSegment(); err != nil { 93 db.mu.Unlock() 94 return err 95 } 96 97 // we can unlock the mutex here, because the write-ahead log files has been rotated, 98 // and the new active segment file will be used for the subsequent writes. 99 // Our Merge operation will only read from the older segment files. 100 db.mu.Unlock() 101 102 // open a merge db to write the data to the new data file. 103 // delete the merge directory if it exists and create a new one. 104 mergeDB, err := db.openMergeDB() 105 if err != nil { 106 return err 107 } 108 defer func() { 109 _ = mergeDB.Close() 110 }() 111 112 buf := bytebufferpool.Get() 113 now := time.Now().UnixNano() 114 defer bytebufferpool.Put(buf) 115 116 // iterate all the data files, and write the valid data to the new data file. 117 reader := db.dataFiles.NewReaderWithMax(prevActiveSegId) 118 for { 119 buf.Reset() 120 chunk, position, err := reader.Next() 121 if err != nil { 122 if err == io.EOF { 123 break 124 } 125 return err 126 } 127 record := decodeLogRecord(chunk) 128 // Only handle the normal log record, LogRecordDeleted and LogRecordBatchFinished 129 // will be ignored, because they are not valid data. 130 if record.Type == LogRecordNormal && (record.Expire == 0 || record.Expire > now) { 131 db.mu.RLock() 132 indexPos := db.index.Get(record.Key) 133 db.mu.RUnlock() 134 if indexPos != nil && positionEquals(indexPos, position) { 135 // clear the batch id of the record, 136 // all data after merge will be valid data, so the batch id should be 0. 137 record.BatchId = mergeFinishedBatchID 138 // Since the mergeDB will never be used for any read or write operations, 139 // it is not necessary to update the index. 140 newPosition, err := mergeDB.dataFiles.Write(encodeLogRecord(record, mergeDB.encodeHeader, buf)) 141 if err != nil { 142 return err 143 } 144 // And now we should write the new position to the write-ahead log, 145 // which is so-called HINT FILE in bitcask paper. 146 // The HINT FILE will be used to rebuild the index quickly when the database is restarted. 147 _, err = mergeDB.hintFile.Write(encodeHintRecord(record.Key, newPosition)) 148 if err != nil { 149 return err 150 } 151 } 152 } 153 } 154 155 // After rewrite all the data, we should add a file to indicate that the merge operation is completed. 156 // So when we restart the database, we can know that the merge is completed if the file exists, 157 // otherwise, we will delete the merge directory and redo the merge operation again. 158 mergeFinFile, err := mergeDB.openMergeFinishedFile() 159 if err != nil { 160 return err 161 } 162 _, err = mergeFinFile.Write(encodeMergeFinRecord(prevActiveSegId)) 163 if err != nil { 164 return err 165 } 166 // close the merge finished file 167 if err := mergeFinFile.Close(); err != nil { 168 return err 169 } 170 171 // all done successfully, return nil 172 return nil 173 } 174 175 func (db *DB) openMergeDB() (*DB, error) { 176 mergePath := mergeDirPath(db.options.DirPath) 177 // delete the merge directory if it exists 178 if err := os.RemoveAll(mergePath); err != nil { 179 return nil, err 180 } 181 options := db.options 182 // we don't need to use the original sync policy, 183 // because we can sync the data file manually after the merge operation is completed. 184 options.Sync, options.BytesPerSync = false, 0 185 options.DirPath = mergePath 186 mergeDB, err := Open(options) 187 if err != nil { 188 return nil, err 189 } 190 191 // open the hint files to write the new position of the data. 192 hintFile, err := wal.Open(wal.Options{ 193 DirPath: options.DirPath, 194 // we don't need to rotate the hint file, just write all data to a single file. 195 SegmentSize: math.MaxInt64, 196 SegmentFileExt: hintFileNameSuffix, 197 Sync: false, 198 BytesPerSync: 0, 199 BlockCache: 0, 200 }) 201 if err != nil { 202 return nil, err 203 } 204 mergeDB.hintFile = hintFile 205 return mergeDB, nil 206 } 207 208 func mergeDirPath(dirPath string) string { 209 dir := filepath.Dir(filepath.Clean(dirPath)) 210 base := filepath.Base(dirPath) 211 return filepath.Join(dir, base+mergeDirSuffixName) 212 } 213 214 func (db *DB) openMergeFinishedFile() (*wal.WAL, error) { 215 return wal.Open(wal.Options{ 216 DirPath: db.options.DirPath, 217 SegmentSize: GB, 218 SegmentFileExt: mergeFinNameSuffix, 219 Sync: false, 220 BytesPerSync: 0, 221 BlockCache: 0, 222 }) 223 } 224 225 func positionEquals(a, b *wal.ChunkPosition) bool { 226 return a.SegmentId == b.SegmentId && 227 a.BlockNumber == b.BlockNumber && 228 a.ChunkOffset == b.ChunkOffset 229 } 230 231 // loadMergeFiles loads all the merge files, and copy the data to the original data directory. 232 // If there is no merge files, or the merge operation is not completed, it will return nil. 233 func loadMergeFiles(dirPath string) error { 234 // check if there is a merge directory 235 mergeDirPath := mergeDirPath(dirPath) 236 if _, err := os.Stat(mergeDirPath); err != nil { 237 // does not exist, just return. 238 if os.IsNotExist(err) { 239 return nil 240 } 241 return err 242 } 243 244 // remove the merge directory at last 245 defer func() { 246 _ = os.RemoveAll(mergeDirPath) 247 }() 248 249 copyFile := func(suffix string, fileId uint32, force bool) { 250 srcFile := wal.SegmentFileName(mergeDirPath, suffix, fileId) 251 stat, err := os.Stat(srcFile) 252 if os.IsNotExist(err) { 253 return 254 } 255 if err != nil { 256 panic(fmt.Sprintf("loadMergeFiles: failed to get src file stat %v", err)) 257 } 258 if !force && stat.Size() == 0 { 259 return 260 } 261 destFile := wal.SegmentFileName(dirPath, suffix, fileId) 262 _ = os.Rename(srcFile, destFile) 263 } 264 265 // get the merge finished segment id 266 mergeFinSegmentId, err := getMergeFinSegmentId(mergeDirPath) 267 if err != nil { 268 return err 269 } 270 // now we get the merge finished segment id, so all the segment id less than the merge finished segment id 271 // should be moved to the original data directory, and the original data files should be deleted. 272 for fileId := uint32(1); fileId <= mergeFinSegmentId; fileId++ { 273 destFile := wal.SegmentFileName(dirPath, dataFileNameSuffix, fileId) 274 // will have bug here if continue, check it later.todo 275 276 // If we call Merge multiple times, some segment files will be deleted earlier, so just skip them. 277 // if _, err = os.Stat(destFile); os.IsNotExist(err) { 278 // continue 279 // } else if err != nil { 280 // return err 281 // } 282 283 // remove the original data file 284 if _, err = os.Stat(destFile); err == nil { 285 if err = os.Remove(destFile); err != nil { 286 return err 287 } 288 } 289 // move the merge data file to the original data directory 290 copyFile(dataFileNameSuffix, fileId, false) 291 } 292 293 // copy MERGEFINISHED and HINT files to the original data directory 294 // there is only one merge finished file, so the file id is always 1, 295 // the same as the hint file. 296 copyFile(mergeFinNameSuffix, 1, true) 297 copyFile(hintFileNameSuffix, 1, true) 298 299 return nil 300 } 301 302 func getMergeFinSegmentId(mergePath string) (wal.SegmentID, error) { 303 // check if the merge operation is completed 304 mergeFinFile, err := os.Open(wal.SegmentFileName(mergePath, mergeFinNameSuffix, 1)) 305 if err != nil { 306 // if the merge finished file does not exist, it means that the merge operation is not completed. 307 // so we should remove the merge directory and return nil. 308 return 0, nil 309 } 310 defer func() { 311 _ = mergeFinFile.Close() 312 }() 313 314 // Only 4 bytes are needed to store the segment id. 315 // And the first 7 bytes are chunk header. 316 mergeFinBuf := make([]byte, 4) 317 if _, err := mergeFinFile.ReadAt(mergeFinBuf, 7); err != nil { 318 return 0, err 319 } 320 mergeFinSegmentId := binary.LittleEndian.Uint32(mergeFinBuf) 321 return mergeFinSegmentId, nil 322 } 323 324 func (db *DB) loadIndexFromHintFile() error { 325 hintFile, err := wal.Open(wal.Options{ 326 DirPath: db.options.DirPath, 327 // we don't need to rotate the hint file, just write all data to the same file. 328 SegmentSize: math.MaxInt64, 329 SegmentFileExt: hintFileNameSuffix, 330 BlockCache: 32 * KB * 10, 331 }) 332 if err != nil { 333 return err 334 } 335 defer func() { 336 _ = hintFile.Close() 337 }() 338 339 // read all the hint records from the hint file 340 reader := hintFile.NewReader() 341 for { 342 chunk, _, err := reader.Next() 343 if err != nil { 344 if err == io.EOF { 345 break 346 } 347 return err 348 } 349 350 key, position := decodeHintRecord(chunk) 351 // All the hint records are valid because it is generated by the merge operation. 352 // So just put them into the index without checking. 353 db.index.Put(key, position) 354 } 355 return nil 356 }