github.com/flower-corp/rosedb@v1.1.2-0.20230117132829-21dc4f7b319a/db.go (about) 1 package rosedb 2 3 import ( 4 "encoding/binary" 5 "errors" 6 "github.com/flower-corp/rosedb/ds/art" 7 "github.com/flower-corp/rosedb/ds/zset" 8 "github.com/flower-corp/rosedb/flock" 9 "github.com/flower-corp/rosedb/logfile" 10 "github.com/flower-corp/rosedb/logger" 11 "github.com/flower-corp/rosedb/util" 12 "io" 13 "io/ioutil" 14 "math" 15 "os" 16 "os/signal" 17 "path/filepath" 18 "sort" 19 "strconv" 20 "strings" 21 "sync" 22 "sync/atomic" 23 "syscall" 24 "time" 25 ) 26 27 var ( 28 // ErrKeyNotFound key not found 29 ErrKeyNotFound = errors.New("key not found") 30 31 // ErrLogFileNotFound log file not found 32 ErrLogFileNotFound = errors.New("log file not found") 33 34 // ErrWrongNumberOfArgs doesn't match key-value pair numbers 35 ErrWrongNumberOfArgs = errors.New("wrong number of arguments") 36 37 // ErrIntegerOverflow overflows int64 limitations 38 ErrIntegerOverflow = errors.New("increment or decrement overflow") 39 40 // ErrWrongValueType value is not a number 41 ErrWrongValueType = errors.New("value is not an integer") 42 43 // ErrWrongIndex index is out of range 44 ErrWrongIndex = errors.New("index is out of range") 45 46 // ErrGCRunning log file gc is running 47 ErrGCRunning = errors.New("log file gc is running, retry later") 48 ) 49 50 const ( 51 logFileTypeNum = 5 52 encodeHeaderSize = 10 53 initialListSeq = math.MaxUint32 / 2 54 discardFilePath = "DISCARD" 55 lockFileName = "FLOCK" 56 ) 57 58 type ( 59 // RoseDB a db instance. 60 RoseDB struct { 61 activeLogFiles map[DataType]*logfile.LogFile 62 archivedLogFiles map[DataType]archivedFiles 63 fidMap map[DataType][]uint32 // only used at startup, never update even though log files changed. 64 discards map[DataType]*discard 65 opts Options 66 strIndex *strIndex // String indexes(adaptive-radix-tree). 67 listIndex *listIndex // List indexes. 68 hashIndex *hashIndex // Hash indexes. 69 setIndex *setIndex // Set indexes. 70 zsetIndex *zsetIndex // Sorted set indexes. 71 mu sync.RWMutex 72 fileLock *flock.FileLockGuard 73 closed uint32 74 gcState int32 75 } 76 77 archivedFiles map[uint32]*logfile.LogFile 78 79 valuePos struct { 80 fid uint32 81 offset int64 82 entrySize int 83 } 84 85 strIndex struct { 86 mu *sync.RWMutex 87 idxTree *art.AdaptiveRadixTree 88 } 89 90 indexNode struct { 91 value []byte 92 fid uint32 93 offset int64 94 entrySize int 95 expiredAt int64 96 } 97 98 listIndex struct { 99 mu *sync.RWMutex 100 trees map[string]*art.AdaptiveRadixTree 101 } 102 103 hashIndex struct { 104 mu *sync.RWMutex 105 trees map[string]*art.AdaptiveRadixTree 106 } 107 108 setIndex struct { 109 mu *sync.RWMutex 110 murhash *util.Murmur128 111 trees map[string]*art.AdaptiveRadixTree 112 } 113 114 zsetIndex struct { 115 mu *sync.RWMutex 116 indexes *zset.SortedSet 117 murhash *util.Murmur128 118 trees map[string]*art.AdaptiveRadixTree 119 } 120 ) 121 122 func newStrsIndex() *strIndex { 123 return &strIndex{idxTree: art.NewART(), mu: new(sync.RWMutex)} 124 } 125 126 func newListIdx() *listIndex { 127 return &listIndex{trees: make(map[string]*art.AdaptiveRadixTree), mu: new(sync.RWMutex)} 128 } 129 130 func newHashIdx() *hashIndex { 131 return &hashIndex{trees: make(map[string]*art.AdaptiveRadixTree), mu: new(sync.RWMutex)} 132 } 133 134 func newSetIdx() *setIndex { 135 return &setIndex{ 136 murhash: util.NewMurmur128(), 137 trees: make(map[string]*art.AdaptiveRadixTree), 138 mu: new(sync.RWMutex), 139 } 140 } 141 142 func newZSetIdx() *zsetIndex { 143 return &zsetIndex{ 144 indexes: zset.New(), 145 murhash: util.NewMurmur128(), 146 trees: make(map[string]*art.AdaptiveRadixTree), 147 mu: new(sync.RWMutex), 148 } 149 } 150 151 // Open a rosedb instance. You must call Close after using it. 152 func Open(opts Options) (*RoseDB, error) { 153 // create the dir path if not exists. 154 if !util.PathExist(opts.DBPath) { 155 if err := os.MkdirAll(opts.DBPath, os.ModePerm); err != nil { 156 return nil, err 157 } 158 } 159 160 // acquire file lock to prevent multiple processes from accessing the same directory. 161 lockPath := filepath.Join(opts.DBPath, lockFileName) 162 lockGuard, err := flock.AcquireFileLock(lockPath, false) 163 if err != nil { 164 return nil, err 165 } 166 167 db := &RoseDB{ 168 activeLogFiles: make(map[DataType]*logfile.LogFile), 169 archivedLogFiles: make(map[DataType]archivedFiles), 170 opts: opts, 171 fileLock: lockGuard, 172 strIndex: newStrsIndex(), 173 listIndex: newListIdx(), 174 hashIndex: newHashIdx(), 175 setIndex: newSetIdx(), 176 zsetIndex: newZSetIdx(), 177 } 178 179 // init discard file. 180 if err := db.initDiscard(); err != nil { 181 return nil, err 182 } 183 184 // load the log files from disk. 185 if err := db.loadLogFiles(); err != nil { 186 return nil, err 187 } 188 189 // load indexes from log files. 190 if err := db.loadIndexFromLogFiles(); err != nil { 191 return nil, err 192 } 193 194 // handle log files garbage collection. 195 go db.handleLogFileGC() 196 return db, nil 197 } 198 199 // Close db and save relative configs. 200 func (db *RoseDB) Close() error { 201 db.mu.Lock() 202 defer db.mu.Unlock() 203 204 if db.fileLock != nil { 205 _ = db.fileLock.Release() 206 } 207 // close and sync the active file. 208 for _, activeFile := range db.activeLogFiles { 209 _ = activeFile.Close() 210 } 211 // close the archived files. 212 for _, archived := range db.archivedLogFiles { 213 for _, file := range archived { 214 _ = file.Sync() 215 _ = file.Close() 216 } 217 } 218 // close discard channel. 219 for _, dis := range db.discards { 220 dis.closeChan() 221 } 222 atomic.StoreUint32(&db.closed, 1) 223 db.strIndex = nil 224 db.hashIndex = nil 225 db.listIndex = nil 226 db.zsetIndex = nil 227 db.setIndex = nil 228 return nil 229 } 230 231 // Sync persist the db files to stable storage. 232 func (db *RoseDB) Sync() error { 233 db.mu.Lock() 234 defer db.mu.Unlock() 235 236 // iterate and sync all the active files. 237 for _, activeFile := range db.activeLogFiles { 238 if err := activeFile.Sync(); err != nil { 239 return err 240 } 241 } 242 // sync discard file. 243 for _, dis := range db.discards { 244 if err := dis.sync(); err != nil { 245 return err 246 } 247 } 248 return nil 249 } 250 251 // Backup copies the db directory to the given path for backup. 252 // It will create the path if it does not exist. 253 func (db *RoseDB) Backup(path string) error { 254 // if log file gc is running, can not backuo the db. 255 if atomic.LoadInt32(&db.gcState) > 0 { 256 return ErrGCRunning 257 } 258 259 if err := db.Sync(); err != nil { 260 return err 261 } 262 if !util.PathExist(path) { 263 if err := os.MkdirAll(path, os.ModePerm); err != nil { 264 return err 265 } 266 } 267 db.mu.Lock() 268 defer db.mu.Unlock() 269 return util.CopyDir(db.opts.DBPath, path) 270 } 271 272 // RunLogFileGC run log file garbage collection manually. 273 func (db *RoseDB) RunLogFileGC(dataType DataType, fid int, gcRatio float64) error { 274 if atomic.LoadInt32(&db.gcState) > 0 { 275 return ErrGCRunning 276 } 277 return db.doRunGC(dataType, fid, gcRatio) 278 } 279 280 func (db *RoseDB) isClosed() bool { 281 return atomic.LoadUint32(&db.closed) == 1 282 } 283 284 func (db *RoseDB) getActiveLogFile(dataType DataType) *logfile.LogFile { 285 db.mu.RLock() 286 defer db.mu.RUnlock() 287 return db.activeLogFiles[dataType] 288 } 289 290 func (db *RoseDB) getArchivedLogFile(dataType DataType, fid uint32) *logfile.LogFile { 291 var lf *logfile.LogFile 292 db.mu.RLock() 293 defer db.mu.RUnlock() 294 if db.archivedLogFiles[dataType] != nil { 295 lf = db.archivedLogFiles[dataType][fid] 296 } 297 return lf 298 } 299 300 // write entry to log file. 301 func (db *RoseDB) writeLogEntry(ent *logfile.LogEntry, dataType DataType) (*valuePos, error) { 302 if err := db.initLogFile(dataType); err != nil { 303 return nil, err 304 } 305 activeLogFile := db.getActiveLogFile(dataType) 306 if activeLogFile == nil { 307 return nil, ErrLogFileNotFound 308 } 309 310 opts := db.opts 311 entBuf, esize := logfile.EncodeEntry(ent) 312 if activeLogFile.WriteAt+int64(esize) > opts.LogFileSizeThreshold { 313 if err := activeLogFile.Sync(); err != nil { 314 return nil, err 315 } 316 317 db.mu.Lock() 318 // save the old log file in archived files. 319 activeFileId := activeLogFile.Fid 320 if db.archivedLogFiles[dataType] == nil { 321 db.archivedLogFiles[dataType] = make(archivedFiles) 322 } 323 db.archivedLogFiles[dataType][activeFileId] = activeLogFile 324 325 // open a new log file. 326 ftype, iotype := logfile.FileType(dataType), logfile.IOType(opts.IoType) 327 lf, err := logfile.OpenLogFile(opts.DBPath, activeFileId+1, opts.LogFileSizeThreshold, ftype, iotype) 328 if err != nil { 329 db.mu.Unlock() 330 return nil, err 331 } 332 db.discards[dataType].setTotal(lf.Fid, uint32(opts.LogFileSizeThreshold)) 333 db.activeLogFiles[dataType] = lf 334 activeLogFile = lf 335 db.mu.Unlock() 336 } 337 338 writeAt := atomic.LoadInt64(&activeLogFile.WriteAt) 339 // write entry and sync(if necessary) 340 if err := activeLogFile.Write(entBuf); err != nil { 341 return nil, err 342 } 343 if opts.Sync { 344 if err := activeLogFile.Sync(); err != nil { 345 return nil, err 346 } 347 } 348 return &valuePos{fid: activeLogFile.Fid, offset: writeAt}, nil 349 } 350 351 func (db *RoseDB) loadLogFiles() error { 352 db.mu.Lock() 353 defer db.mu.Unlock() 354 fileInfos, err := ioutil.ReadDir(db.opts.DBPath) 355 if err != nil { 356 return err 357 } 358 359 fidMap := make(map[DataType][]uint32) 360 for _, file := range fileInfos { 361 if strings.HasPrefix(file.Name(), logfile.FilePrefix) { 362 splitNames := strings.Split(file.Name(), ".") 363 fid, err := strconv.Atoi(splitNames[2]) 364 if err != nil { 365 return err 366 } 367 typ := DataType(logfile.FileTypesMap[splitNames[1]]) 368 fidMap[typ] = append(fidMap[typ], uint32(fid)) 369 } 370 } 371 db.fidMap = fidMap 372 373 for dataType, fids := range fidMap { 374 if db.archivedLogFiles[dataType] == nil { 375 db.archivedLogFiles[dataType] = make(archivedFiles) 376 } 377 if len(fids) == 0 { 378 continue 379 } 380 // load log file in order. 381 sort.Slice(fids, func(i, j int) bool { 382 return fids[i] < fids[j] 383 }) 384 385 opts := db.opts 386 for i, fid := range fids { 387 ftype, iotype := logfile.FileType(dataType), logfile.IOType(opts.IoType) 388 lf, err := logfile.OpenLogFile(opts.DBPath, fid, opts.LogFileSizeThreshold, ftype, iotype) 389 if err != nil { 390 return err 391 } 392 // latest one is active log file. 393 if i == len(fids)-1 { 394 db.activeLogFiles[dataType] = lf 395 } else { 396 db.archivedLogFiles[dataType][fid] = lf 397 } 398 } 399 } 400 return nil 401 } 402 403 func (db *RoseDB) initLogFile(dataType DataType) error { 404 db.mu.Lock() 405 defer db.mu.Unlock() 406 407 if db.activeLogFiles[dataType] != nil { 408 return nil 409 } 410 opts := db.opts 411 ftype, iotype := logfile.FileType(dataType), logfile.IOType(opts.IoType) 412 lf, err := logfile.OpenLogFile(opts.DBPath, logfile.InitialLogFileId, opts.LogFileSizeThreshold, ftype, iotype) 413 if err != nil { 414 return err 415 } 416 417 db.discards[dataType].setTotal(lf.Fid, uint32(opts.LogFileSizeThreshold)) 418 db.activeLogFiles[dataType] = lf 419 return nil 420 } 421 422 func (db *RoseDB) initDiscard() error { 423 discardPath := filepath.Join(db.opts.DBPath, discardFilePath) 424 if !util.PathExist(discardPath) { 425 if err := os.MkdirAll(discardPath, os.ModePerm); err != nil { 426 return err 427 } 428 } 429 430 discards := make(map[DataType]*discard) 431 for i := String; i < logFileTypeNum; i++ { 432 name := logfile.FileNamesMap[logfile.FileType(i)] + discardFileName 433 dis, err := newDiscard(discardPath, name, db.opts.DiscardBufferSize) 434 if err != nil { 435 return err 436 } 437 discards[i] = dis 438 } 439 db.discards = discards 440 return nil 441 } 442 443 func (db *RoseDB) encodeKey(key, subKey []byte) []byte { 444 header := make([]byte, encodeHeaderSize) 445 var index int 446 index += binary.PutVarint(header[index:], int64(len(key))) 447 index += binary.PutVarint(header[index:], int64(len(subKey))) 448 length := len(key) + len(subKey) 449 if length > 0 { 450 buf := make([]byte, length+index) 451 copy(buf[:index], header[:index]) 452 copy(buf[index:index+len(key)], key) 453 copy(buf[index+len(key):], subKey) 454 return buf 455 } 456 return header[:index] 457 } 458 459 func (db *RoseDB) decodeKey(key []byte) ([]byte, []byte) { 460 var index int 461 keySize, i := binary.Varint(key[index:]) 462 index += i 463 _, i = binary.Varint(key[index:]) 464 index += i 465 sep := index + int(keySize) 466 return key[index:sep], key[sep:] 467 } 468 469 func (db *RoseDB) sendDiscard(oldVal interface{}, updated bool, dataType DataType) { 470 if !updated || oldVal == nil { 471 return 472 } 473 node, _ := oldVal.(*indexNode) 474 if node == nil || node.entrySize <= 0 { 475 return 476 } 477 select { 478 case db.discards[dataType].valChan <- node: 479 default: 480 logger.Warn("send to discard chan fail") 481 } 482 } 483 484 func (db *RoseDB) handleLogFileGC() { 485 if db.opts.LogFileGCInterval <= 0 { 486 return 487 } 488 489 quitSig := make(chan os.Signal, 1) 490 signal.Notify(quitSig, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) 491 ticker := time.NewTicker(db.opts.LogFileGCInterval) 492 defer ticker.Stop() 493 for { 494 select { 495 case <-ticker.C: 496 if atomic.LoadInt32(&db.gcState) > 0 { 497 logger.Warn("log file gc is running, skip it") 498 break 499 } 500 for dType := String; dType < logFileTypeNum; dType++ { 501 go func(dataType DataType) { 502 err := db.doRunGC(dataType, -1, db.opts.LogFileGCRatio) 503 if err != nil { 504 logger.Errorf("log file gc err, dataType: [%v], err: [%v]", dataType, err) 505 } 506 }(dType) 507 } 508 case <-quitSig: 509 return 510 } 511 } 512 } 513 514 func (db *RoseDB) doRunGC(dataType DataType, specifiedFid int, gcRatio float64) error { 515 atomic.AddInt32(&db.gcState, 1) 516 defer atomic.AddInt32(&db.gcState, -1) 517 518 maybeRewriteStrs := func(fid uint32, offset int64, ent *logfile.LogEntry) error { 519 db.strIndex.mu.Lock() 520 defer db.strIndex.mu.Unlock() 521 indexVal := db.strIndex.idxTree.Get(ent.Key) 522 if indexVal == nil { 523 return nil 524 } 525 526 node, _ := indexVal.(*indexNode) 527 if node != nil && node.fid == fid && node.offset == offset { 528 // rewrite entry 529 valuePos, err := db.writeLogEntry(ent, String) 530 if err != nil { 531 return err 532 } 533 // update index 534 if err = db.updateIndexTree(db.strIndex.idxTree, ent, valuePos, false, String); err != nil { 535 return err 536 } 537 } 538 return nil 539 } 540 541 maybeRewriteList := func(fid uint32, offset int64, ent *logfile.LogEntry) error { 542 db.listIndex.mu.Lock() 543 defer db.listIndex.mu.Unlock() 544 var listKey = ent.Key 545 if ent.Type != logfile.TypeListMeta { 546 listKey, _ = db.decodeListKey(ent.Key) 547 } 548 if db.listIndex.trees[string(listKey)] == nil { 549 return nil 550 } 551 idxTree := db.listIndex.trees[string(listKey)] 552 indexVal := idxTree.Get(ent.Key) 553 if indexVal == nil { 554 return nil 555 } 556 557 node, _ := indexVal.(*indexNode) 558 if node != nil && node.fid == fid && node.offset == offset { 559 valuePos, err := db.writeLogEntry(ent, List) 560 if err != nil { 561 return err 562 } 563 if err = db.updateIndexTree(idxTree, ent, valuePos, false, List); err != nil { 564 return err 565 } 566 } 567 return nil 568 } 569 570 maybeRewriteHash := func(fid uint32, offset int64, ent *logfile.LogEntry) error { 571 db.hashIndex.mu.Lock() 572 defer db.hashIndex.mu.Unlock() 573 key, field := db.decodeKey(ent.Key) 574 if db.hashIndex.trees[string(key)] == nil { 575 return nil 576 } 577 idxTree := db.hashIndex.trees[string(key)] 578 indexVal := idxTree.Get(field) 579 if indexVal == nil { 580 return nil 581 } 582 583 node, _ := indexVal.(*indexNode) 584 if node != nil && node.fid == fid && node.offset == offset { 585 // rewrite entry 586 valuePos, err := db.writeLogEntry(ent, Hash) 587 if err != nil { 588 return err 589 } 590 // update index 591 entry := &logfile.LogEntry{Key: field, Value: ent.Value} 592 _, size := logfile.EncodeEntry(ent) 593 valuePos.entrySize = size 594 if err = db.updateIndexTree(idxTree, entry, valuePos, false, Hash); err != nil { 595 return err 596 } 597 } 598 return nil 599 } 600 601 maybeRewriteSets := func(fid uint32, offset int64, ent *logfile.LogEntry) error { 602 db.setIndex.mu.Lock() 603 defer db.setIndex.mu.Unlock() 604 if db.setIndex.trees[string(ent.Key)] == nil { 605 return nil 606 } 607 idxTree := db.setIndex.trees[string(ent.Key)] 608 if err := db.setIndex.murhash.Write(ent.Value); err != nil { 609 logger.Fatalf("fail to write murmur hash: %v", err) 610 } 611 sum := db.setIndex.murhash.EncodeSum128() 612 db.setIndex.murhash.Reset() 613 614 indexVal := idxTree.Get(sum) 615 if indexVal == nil { 616 return nil 617 } 618 node, _ := indexVal.(*indexNode) 619 if node != nil && node.fid == fid && node.offset == offset { 620 // rewrite entry 621 valuePos, err := db.writeLogEntry(ent, Set) 622 if err != nil { 623 return err 624 } 625 // update index 626 entry := &logfile.LogEntry{Key: sum, Value: ent.Value} 627 _, size := logfile.EncodeEntry(ent) 628 valuePos.entrySize = size 629 if err = db.updateIndexTree(idxTree, entry, valuePos, false, Set); err != nil { 630 return err 631 } 632 } 633 return nil 634 } 635 636 maybeRewriteZSet := func(fid uint32, offset int64, ent *logfile.LogEntry) error { 637 db.zsetIndex.mu.Lock() 638 defer db.zsetIndex.mu.Unlock() 639 key, _ := db.decodeKey(ent.Key) 640 if db.zsetIndex.trees[string(key)] == nil { 641 return nil 642 } 643 idxTree := db.zsetIndex.trees[string(key)] 644 if err := db.zsetIndex.murhash.Write(ent.Value); err != nil { 645 logger.Fatalf("fail to write murmur hash: %v", err) 646 } 647 sum := db.zsetIndex.murhash.EncodeSum128() 648 db.zsetIndex.murhash.Reset() 649 650 indexVal := idxTree.Get(sum) 651 if indexVal == nil { 652 return nil 653 } 654 node, _ := indexVal.(*indexNode) 655 if node != nil && node.fid == fid && node.offset == offset { 656 valuePos, err := db.writeLogEntry(ent, ZSet) 657 if err != nil { 658 return err 659 } 660 entry := &logfile.LogEntry{Key: sum, Value: ent.Value} 661 _, size := logfile.EncodeEntry(ent) 662 valuePos.entrySize = size 663 if err = db.updateIndexTree(idxTree, entry, valuePos, false, ZSet); err != nil { 664 return err 665 } 666 } 667 return nil 668 } 669 670 activeLogFile := db.getActiveLogFile(dataType) 671 if activeLogFile == nil { 672 return nil 673 } 674 if err := db.discards[dataType].sync(); err != nil { 675 return err 676 } 677 ccl, err := db.discards[dataType].getCCL(activeLogFile.Fid, gcRatio) 678 if err != nil { 679 return err 680 } 681 682 for _, fid := range ccl { 683 if specifiedFid >= 0 && uint32(specifiedFid) != fid { 684 continue 685 } 686 archivedFile := db.getArchivedLogFile(dataType, fid) 687 if archivedFile == nil { 688 continue 689 } 690 691 var offset int64 692 for { 693 ent, size, err := archivedFile.ReadLogEntry(offset) 694 if err != nil { 695 if err == io.EOF || err == logfile.ErrEndOfEntry { 696 break 697 } 698 return err 699 } 700 var off = offset 701 offset += size 702 if ent.Type == logfile.TypeDelete { 703 continue 704 } 705 ts := time.Now().Unix() 706 if ent.ExpiredAt != 0 && ent.ExpiredAt <= ts { 707 continue 708 } 709 var rewriteErr error 710 switch dataType { 711 case String: 712 rewriteErr = maybeRewriteStrs(archivedFile.Fid, off, ent) 713 case List: 714 rewriteErr = maybeRewriteList(archivedFile.Fid, off, ent) 715 case Hash: 716 rewriteErr = maybeRewriteHash(archivedFile.Fid, off, ent) 717 case Set: 718 rewriteErr = maybeRewriteSets(archivedFile.Fid, off, ent) 719 case ZSet: 720 rewriteErr = maybeRewriteZSet(archivedFile.Fid, off, ent) 721 } 722 if rewriteErr != nil { 723 return rewriteErr 724 } 725 } 726 727 // delete older log file. 728 db.mu.Lock() 729 delete(db.archivedLogFiles[dataType], fid) 730 _ = archivedFile.Delete() 731 db.mu.Unlock() 732 // clear discard state. 733 db.discards[dataType].clear(fid) 734 } 735 return nil 736 }