github.com/scottcagno/storage@v1.8.0/pkg/swal/swal.go (about) 1 package swal 2 3 import ( 4 "errors" 5 "fmt" 6 "github.com/scottcagno/storage/pkg/lsmt/binary" 7 "io" 8 "os" 9 "path/filepath" 10 "runtime" 11 "strings" 12 "sync" 13 ) 14 15 var ErrOutOfBounds = errors.New("swal: out of bounds") 16 17 const ( 18 FilePrefix = "dat-" 19 FileSuffix = ".seg" 20 remainingTrigger = 64 21 ) 22 23 // SWAL is a write-ahead log structure 24 type SWAL struct { 25 lock sync.RWMutex // lock is a mutual exclusion lock 26 conf *SWALConfig 27 r *binary.Reader // r is a binary reader 28 w *binary.Writer // w is a binary writer 29 firstIndex int64 // firstIndex is the index of the first segEntry 30 lastIndex int64 // lastIndex is the index of the last segEntry 31 segments []*segment // segments is an index of the current file segments 32 active *segment // active is the current active segment 33 } 34 35 // OpenSWAL opens and returns a new write-ahead log structure 36 func OpenSWAL(c *SWALConfig) (*SWAL, error) { 37 // check config 38 conf := checkWALConfig(c) 39 // TODO: consider replacing `filepath.Abs()`, and `filepath.ToSlash()` 40 // TODO: with `filepath.Clean()` at some point or another. It should 41 // TODO: close enough to the same (possibly even better), so yeah. 42 // make sure we are working with absolute paths 43 base, err := filepath.Abs(conf.BasePath) 44 if err != nil { 45 return nil, err 46 } 47 // sanitize any path separators 48 base = filepath.ToSlash(base) 49 // create any directories if they are not there 50 err = os.MkdirAll(base, os.ModeDir) 51 if err != nil { 52 return nil, err 53 } 54 // create a new write-ahead log instance 55 l := &SWAL{ 56 conf: conf, 57 firstIndex: 0, 58 lastIndex: 1, 59 segments: make([]*segment, 0), 60 } 61 // attempt to load segments 62 err = l.loadIndex() 63 if err != nil { 64 return nil, err 65 } 66 // return write-ahead log 67 return l, nil 68 } 69 70 func (l *SWAL) CloseAndRemove() error { 71 // lock 72 l.lock.Lock() 73 defer l.lock.Unlock() 74 // sync and close writer 75 err := l.w.Close() 76 if err != nil { 77 return err 78 } 79 // close reader 80 err = l.r.Close() 81 if err != nil { 82 return err 83 } 84 // reset the segments 85 l.segments = make([]*segment, 0) 86 // reset first and last index 87 l.firstIndex = 0 88 l.lastIndex = 1 89 // erase all files 90 err = os.RemoveAll(l.conf.BasePath) 91 if err != nil { 92 return err 93 } 94 return nil 95 } 96 97 // loadIndex initializes the segment index. It looks for segment 98 // files in the base directory and attempts to index the segment as 99 // well as any of the entries within the segment. If this is a new 100 // instance, it will create a new segment that is ready for writing. 101 func (l *SWAL) loadIndex() error { 102 // lock 103 l.lock.Lock() 104 defer l.lock.Unlock() 105 // get the files in the base directory path 106 files, err := os.ReadDir(l.conf.BasePath) 107 if err != nil { 108 return err 109 } 110 // list the files in the base directory path and attempt to index the entries 111 for _, file := range files { 112 // skip non data files 113 if file.IsDir() || 114 !strings.HasPrefix(file.Name(), FilePrefix) || 115 !strings.HasSuffix(file.Name(), FileSuffix) { 116 continue // skip this, continue on to the next file 117 } 118 // check the size of segment file 119 fi, err := file.Info() 120 if err != nil { 121 return err 122 } 123 // if the file is empty, remove it and skip to next file 124 if fi.Size() == 0 { 125 err = os.Remove(filepath.Join(l.conf.BasePath, file.Name())) 126 if err != nil { 127 return err 128 } 129 continue // make sure we skip to next segment 130 } 131 // attempt to load segment (and index entries in segment) 132 s, err := l.loadSegmentFile(filepath.Join(l.conf.BasePath, file.Name())) 133 if err != nil { 134 return err 135 } 136 // segment has been loaded successfully, append to the segments list 137 l.segments = append(l.segments, s) 138 } 139 // check to see if any segments were found. If not, initialize a new one 140 if len(l.segments) == 0 { 141 // create a new segment file 142 s, err := l.makeSegmentFile(l.lastIndex) 143 if err != nil { 144 return err 145 } 146 // segment has been created successfully, append to the segments list 147 l.segments = append(l.segments, s) 148 } 149 // segments have either been loaded or created, so now we 150 // should go about updating the active segment pointer to 151 // point to the "tail" (the last segment in the segment list) 152 l.active = l.getLastSegment() 153 // we should be good to go, lets attempt to open a file 154 // reader to work with the active segment 155 l.r, err = binary.OpenReader(l.active.path) 156 if err != nil { 157 return err 158 } 159 // and then attempt to open a file writer to also work 160 // with the active segment, so we can begin appending data 161 l.w, err = binary.OpenWriterWithSync(l.active.path, l.conf.SyncOnWrite) 162 if err != nil { 163 return err 164 } 165 // finally, update the firstIndex and lastIndex 166 l.firstIndex = l.segments[0].index 167 // and update last index 168 l.lastIndex = l.getLastSegment().getLastIndex() 169 return nil 170 } 171 172 // loadSegment attempts to open the segment file at the path provided 173 // and index the entries within the segment. It will return an os.PathError 174 // if the file does not exist, an io.ErrUnexpectedEOF if the file exists 175 // but is empty and has no data to read, and ErrSegmentFull if the file 176 // has met the maxFileSize. It will return the segment and nil error on success. 177 func (l *SWAL) loadSegmentFile(path string) (*segment, error) { 178 // check to make sure path exists before continuing 179 _, err := os.Stat(path) 180 if err != nil { 181 return nil, err 182 } 183 // attempt to open existing segment file for reading 184 fd, err := os.OpenFile(path, os.O_RDONLY, 0666) 185 if err != nil { 186 return nil, err 187 } 188 // defer file close 189 defer func(fd *os.File) { 190 _ = fd.Close() 191 }(fd) 192 // create a new segment to append indexed entries to 193 s := &segment{ 194 path: path, 195 entries: make([]segEntry, 0), 196 } 197 // read segment file and index entries 198 index, err := GetIndexFromFileName(filepath.Base(fd.Name())) 199 if err != nil { 200 return nil, err 201 } 202 for { 203 // get the current offset of the 204 // reader for the segEntry later 205 offset, err := binary.Offset(fd) 206 if err != nil { 207 return nil, err 208 } 209 // read and decode segEntry 210 _, err = binary.DecodeEntry(fd) 211 if err != nil { 212 if err == io.EOF || err == io.ErrUnexpectedEOF { 213 break 214 } 215 return nil, err 216 } 217 // get current offset 218 // add segEntry index to segment entries list 219 s.entries = append(s.entries, segEntry{ 220 index: index, 221 offset: offset, 222 }) 223 // continue to process the next segEntry 224 index++ 225 } 226 // make sure to fill out the segment index from the first segEntry index 227 s.index = s.entries[0].index 228 // get the offset of the reader to calculate bytes remaining 229 offset, err := binary.Offset(fd) 230 if err != nil { 231 return nil, err 232 } 233 // update the segment remaining bytes 234 s.remaining = defaultMaxSegmentSize - offset 235 return s, nil 236 } 237 238 // makeSegment attempts to make a new segment automatically using the timestamp 239 // as the segment name. On success, it will simply return a new segment and a nil error 240 func (l *SWAL) makeSegmentFile(index int64) (*segment, error) { 241 // create a new file 242 path := filepath.Join(l.conf.BasePath, MakeFileNameFromIndex(index)) 243 fd, err := os.Create(path) 244 if err != nil { 245 return nil, err 246 } 247 // don't forget to close it 248 err = fd.Close() 249 if err != nil { 250 return nil, err 251 } 252 // create and return new segment 253 s := &segment{ 254 path: path, 255 index: l.lastIndex, 256 entries: make([]segEntry, 0), 257 remaining: l.conf.MaxSegmentSize, 258 } 259 return s, nil 260 } 261 262 // findSegmentIndex performs binary search to find the segment containing provided index 263 func (l *SWAL) findSegmentIndex(index int64) int { 264 // declare for later 265 i, j := 0, len(l.segments) 266 // otherwise, perform binary search 267 for i < j { 268 h := i + (j-i)/2 269 if index >= l.segments[h].index { 270 i = h + 1 271 } else { 272 j = h 273 } 274 } 275 return i - 1 276 } 277 278 // getLastSegment returns the tail segment in the segments index list 279 func (l *SWAL) getLastSegment() *segment { 280 return l.segments[len(l.segments)-1] 281 } 282 283 // cycleSegment adds a new segment to replace the current (active) segment 284 func (l *SWAL) cycleSegment() error { 285 // sync and close current file segment 286 err := l.w.Close() 287 if err != nil { 288 return err 289 } 290 // create a new segment file 291 s, err := l.makeSegmentFile(l.lastIndex) 292 if err != nil { 293 return err 294 } 295 // add segment to segment index list 296 l.segments = append(l.segments, s) 297 // update the active segment pointer 298 l.active = l.getLastSegment() 299 // open file writer associated with active segment 300 l.w, err = binary.OpenWriterWithSync(l.active.path, l.conf.SyncOnWrite) 301 if err != nil { 302 return err 303 } 304 // update file reader associated with the active segment 305 l.r, err = binary.OpenReader(l.active.path) 306 if err != nil { 307 return err 308 } 309 return nil 310 } 311 312 // Read reads an segEntry from the write-ahead log at the specified index 313 func (l *SWAL) Read(index int64) (*binary.Entry, error) { 314 // read lock 315 l.lock.RLock() 316 defer l.lock.RUnlock() 317 // error checking 318 if index < l.firstIndex || index > l.lastIndex { 319 return nil, ErrOutOfBounds 320 } 321 var err error 322 // find the segment containing the provided index 323 s := l.segments[l.findSegmentIndex(index)] 324 // make sure we are reading from the correct file 325 l.r, err = l.r.ReadFrom(s.path) 326 if err != nil { 327 return nil, err 328 } 329 // find the offset for the segEntry containing the provided index 330 offset := s.entries[s.findEntryIndex(index)].offset 331 // read segEntry at offset 332 e, err := l.r.ReadEntryAt(offset) 333 if err != nil { 334 return nil, err 335 } 336 return e, nil 337 } 338 339 // Write writes an segEntry to the write-ahead log in an append-only fashion 340 func (l *SWAL) Write(e *binary.Entry) (int64, error) { 341 // lock 342 l.lock.Lock() 343 defer l.lock.Unlock() 344 // write segEntry 345 offset, err := l.w.WriteEntry(e) 346 if err != nil { 347 return 0, err 348 } 349 // add new segEntry to the segment index 350 l.active.entries = append(l.active.entries, segEntry{ 351 index: l.lastIndex, 352 offset: offset, 353 }) 354 // update lastIndex 355 l.lastIndex++ 356 // grab the current offset written 357 offset2, err := l.w.Offset() 358 if err != nil { 359 return 0, err 360 } 361 // update segment remaining 362 l.active.remaining -= offset2 - offset 363 // check to see if the active segment needs to be cycled 364 if l.active.remaining < remainingTrigger { 365 err = l.cycleSegment() 366 if err != nil { 367 return 0, err 368 } 369 } 370 return l.lastIndex - 1, nil 371 } 372 373 // WriteBatch writes a batch of entries performing no syncing until the end of the batch 374 func (l *SWAL) WriteBatch(batch *binary.Batch) error { 375 // lock 376 l.lock.Lock() 377 defer l.lock.Unlock() 378 // check sync policy 379 changedSyncPolicy := false 380 if l.conf.SyncOnWrite == true { 381 l.conf.SyncOnWrite = false // if it's on, temporarily disable 382 l.w.SetSyncOnWrite(false) 383 changedSyncPolicy = true 384 } 385 // iterate batch 386 for i := range batch.Entries { 387 // entry 388 e := batch.Entries[i] 389 // write entry to data file 390 offset, err := l.w.WriteEntry(e) 391 if err != nil { 392 return err 393 } 394 // add new segEntry to the segment index 395 l.active.entries = append(l.active.entries, segEntry{ 396 index: l.lastIndex, 397 offset: offset, 398 }) 399 // update lastIndex 400 l.lastIndex++ 401 // grab the current offset written 402 offset2, err := l.w.Offset() 403 if err != nil { 404 return err 405 } 406 // update segment remaining 407 l.active.remaining -= offset2 - offset 408 // check to see if the active segment needs to be cycled 409 if l.active.remaining < remainingTrigger { 410 err = l.cycleSegment() 411 if err != nil { 412 return err 413 } 414 } 415 } 416 // after batch, set everything back how it was 417 if changedSyncPolicy { 418 l.conf.SyncOnWrite = true 419 l.w.SetSyncOnWrite(true) 420 } 421 // after batch has been written, do sync 422 err := l.w.Sync() 423 if err != nil { 424 return err 425 } 426 return nil 427 } 428 429 // Scan provides an iterator method for the write-ahead log 430 func (l *SWAL) Scan(iter func(e *binary.Entry) bool) error { 431 // lock 432 l.lock.Lock() 433 defer l.lock.Unlock() 434 // init for any errors 435 var err error 436 // range the segment index 437 for _, sidx := range l.segments { 438 //fmt.Printf("segment: %s\n", sidx) 439 // make sure we are reading the right data 440 l.r, err = l.r.ReadFrom(sidx.path) 441 if err != nil { 442 return err 443 } 444 // range the segment entries index 445 for _, eidx := range sidx.entries { 446 // read segEntry 447 e, err := l.r.ReadEntryAt(eidx.offset) 448 if err != nil { 449 if err == io.EOF || err == io.ErrUnexpectedEOF { 450 break 451 } 452 return err 453 } 454 // check segEntry against iterator boolean function 455 if !iter(e) { 456 // if it returns false, then process next segEntry 457 continue 458 } 459 } 460 // outside segEntry loop 461 } 462 // outside segment loop 463 return nil 464 } 465 466 // TruncateFront removes all segments and entries before specified index 467 func (l *SWAL) TruncateFront(index int64) error { 468 // lock 469 l.lock.Lock() 470 defer l.lock.Unlock() 471 // perform bounds check 472 if index == 0 || 473 l.lastIndex == 0 || 474 index < l.firstIndex || index > l.lastIndex { 475 return ErrOutOfBounds 476 } 477 if index == l.firstIndex { 478 return nil // nothing to truncate 479 } 480 // locate segment in segment index list containing specified index 481 sidx := l.findSegmentIndex(index) 482 // isolate whole segments that can be removed 483 for i := 0; i < sidx; i++ { 484 // remove segment file 485 err := os.Remove(l.segments[i].path) 486 if err != nil { 487 return err 488 } 489 } 490 // remove segments from segment index (cut, i-j) 491 i, j := 0, sidx 492 copy(l.segments[i:], l.segments[j:]) 493 for k, n := len(l.segments)-j+i, len(l.segments); k < n; k++ { 494 l.segments[k] = nil // or the zero value of T 495 } 496 l.segments = l.segments[:len(l.segments)-j+i] 497 // update firstIndex 498 l.firstIndex = l.segments[0].index 499 // prepare to re-write partial segment 500 var err error 501 var entries []segEntry 502 tmpfd, err := os.Create(filepath.Join(l.conf.BasePath, "tmp-partial.seg")) 503 if err != nil { 504 return err 505 } 506 // after the segment index cut, segment 0 will 507 // contain the partials that we must re-write 508 if l.segments[0].index < index { 509 // make sure we are reading from the correct path 510 l.r, err = l.r.ReadFrom(l.segments[0].path) 511 if err != nil { 512 return err 513 } 514 // range the entries within this segment to find 515 // the ones that are greater than the index and 516 // write those to a temporary buffer.... 517 for _, ent := range l.segments[0].entries { 518 if ent.index < index { 519 continue // skip 520 } 521 // read segEntry 522 e, err := l.r.ReadEntryAt(ent.offset) 523 if err != nil { 524 return err 525 } 526 // write segEntry to temp file 527 ent.offset, err = binary.EncodeEntry(tmpfd, e) 528 if err != nil { 529 return err 530 } 531 // sync write 532 err = tmpfd.Sync() 533 if err != nil { 534 return err 535 } 536 // append to a new entries list 537 entries = append(entries, ent) 538 } 539 // move reader back to active segment file 540 l.r, err = l.r.ReadFrom(l.active.path) 541 if err != nil { 542 return err 543 } 544 // close temp file 545 err = tmpfd.Close() 546 if err != nil { 547 return err 548 } 549 // remove partial segment file 550 err = os.Remove(l.segments[0].path) 551 if err != nil { 552 return err 553 } 554 // change temp file name 555 err = os.Rename(tmpfd.Name(), l.segments[0].path) 556 if err != nil { 557 return err 558 } 559 // update segment 560 l.segments[0].entries = entries 561 l.segments[0].index = entries[0].index 562 } 563 return nil 564 } 565 566 func (l *SWAL) GetConfig() *SWALConfig { 567 // lock 568 l.lock.Lock() 569 defer l.lock.Unlock() 570 return l.conf 571 } 572 573 func (l *SWAL) Sync() error { 574 // lock 575 l.lock.Lock() 576 defer l.lock.Unlock() 577 err := l.w.Sync() 578 if err != nil { 579 return err 580 } 581 return nil 582 } 583 584 // Count returns the number of entries currently in the write-ahead log 585 func (l *SWAL) Count() int { 586 // lock 587 l.lock.Lock() 588 defer l.lock.Unlock() 589 // get count 590 var count int 591 for _, s := range l.segments { 592 count += len(s.entries) 593 } 594 // return count 595 return count 596 } 597 598 // FirstIndex returns the write-ahead logs first index 599 func (l *SWAL) FirstIndex() int64 { 600 // lock 601 l.lock.Lock() 602 defer l.lock.Unlock() 603 return l.firstIndex 604 } 605 606 // LastIndex returns the write-ahead logs first index 607 func (l *SWAL) LastIndex() int64 { 608 // lock 609 l.lock.Lock() 610 defer l.lock.Unlock() 611 return l.lastIndex 612 } 613 614 // Close syncs and closes the write-ahead log 615 func (l *SWAL) Close() error { 616 // lock 617 l.lock.Lock() 618 defer l.lock.Unlock() 619 // sync and close writer 620 err := l.w.Close() 621 if err != nil { 622 return err 623 } 624 // close reader 625 err = l.r.Close() 626 if err != nil { 627 return err 628 } 629 // clean everything else up 630 l.r = nil 631 l.w = nil 632 l.firstIndex = 0 633 l.lastIndex = 0 634 l.segments = nil 635 l.active = nil 636 // force gc for good measure 637 runtime.GC() 638 return nil 639 } 640 641 // String is the stringer method for the write-ahead log 642 func (l *SWAL) String() string { 643 var ss string 644 ss += fmt.Sprintf("\n\n[write-ahead log]\n") 645 ss += fmt.Sprintf("base: %q\n", l.conf.BasePath) 646 ss += fmt.Sprintf("firstIndex: %d\n", l.firstIndex) 647 ss += fmt.Sprintf("lastIndex: %d\n", l.lastIndex) 648 ss += fmt.Sprintf("segments: %d\n", len(l.segments)) 649 if l.active != nil { 650 ss += fmt.Sprintf("active: %q\n", filepath.Base(l.active.path)) 651 } 652 if len(l.segments) > 0 { 653 for i, s := range l.segments { 654 ss += fmt.Sprintf("segment[%d]:\n", i) 655 ss += fmt.Sprintf("\tpath: %q\n", filepath.Base(s.path)) 656 ss += fmt.Sprintf("\tindex: %d\n", s.index) 657 ss += fmt.Sprintf("\tentries: %d\n", len(s.entries)) 658 ss += fmt.Sprintf("\tremaining: %d\n", s.remaining) 659 } 660 } 661 ss += "\n" 662 return ss 663 }