github.com/scottcagno/storage@v1.8.0/pkg/_junk/_lsmtree/wal/wal.go (about) 1 package wal 2 3 import ( 4 "errors" 5 "fmt" 6 "github.com/scottcagno/storage/pkg/_junk/_lsmtree/encoding/binary" 7 "io" 8 "os" 9 "path/filepath" 10 "runtime" 11 "strings" 12 "sync" 13 "time" 14 ) 15 16 const ( 17 LogPrefix = "wal-" 18 LogSuffix = ".seg" 19 20 defaultMaxFileSize uint64 = 16 << 10 // 16 KB 21 ) 22 23 var ( 24 maxFileSize = defaultMaxFileSize 25 26 ErrOutOfBounds = errors.New("error: out of bounds") 27 ErrSegmentFull = errors.New("error: segment is full") 28 ErrFileClosed = errors.New("error: file closed") 29 ErrBadArgument = errors.New("error: bad argument") 30 ErrNoPathProvided = errors.New("error: no path provided") 31 ErrOptionsMissing = errors.New("error: options missing") 32 ) 33 34 // segEntry contains the metadata for a single segEntry within the file segment 35 type segEntry struct { 36 index int64 // index is the "id" of this segEntry 37 offset int64 // offset is the actual offset of this segEntry in the segment file 38 } 39 40 // String is the stringer method for an segEntry 41 func (e segEntry) String() string { 42 return fmt.Sprintf("segEntry.index=%d, segEntry.offset=%d", e.index, e.offset) 43 } 44 45 // segment contains the metadata for the file segment 46 type segment struct { 47 path string // path is the full path to this segment file 48 index int64 // starting index of the segment 49 entries []segEntry // entries is an index of the entries in the segment 50 remaining uint64 // remaining is the bytes left after max file size minus segEntry data 51 } 52 53 // String is the stringer method for a segment 54 func (s *segment) String() string { 55 var ss string 56 ss += fmt.Sprintf("path: %q\n", filepath.Base(s.path)) 57 ss += fmt.Sprintf("index: %d\n", s.index) 58 ss += fmt.Sprintf("entries: %d\n", len(s.entries)) 59 ss += fmt.Sprintf("remaining: %d\n", s.remaining) 60 return ss 61 } 62 63 // makeFileName returns a file name using the provided timestamp. 64 // If t is nil, it will create a new name using time.Now() 65 func makeFileName(t time.Time) string { 66 //tf := t.Format("2006-01-03_15:04:05:000000") 67 //return fmt.Sprintf("%s%s%s", LogPrefix, time.RFC3339Nano, LogSuffix) 68 return fmt.Sprintf("%s%d%s", LogPrefix, time.Now().UnixMicro(), LogSuffix) 69 } 70 71 // getFirstIndex returns the first index in the entries list 72 func (s *segment) getFirstIndex() int64 { 73 return s.index 74 } 75 76 // getLastIndex returns the last index in the entries list 77 func (s *segment) getLastIndex() int64 { 78 if len(s.entries) > 0 { 79 return s.entries[len(s.entries)-1].index 80 } 81 return s.index 82 } 83 84 // findEntryIndex performs binary search to find the segEntry containing provided index 85 func (s *segment) findEntryIndex(index int64) int { 86 // declare for later 87 i, j := 0, len(s.entries) 88 // otherwise, perform binary search 89 for i < j { 90 h := i + (j-i)/2 91 if index >= s.entries[h].index { 92 i = h + 1 93 } else { 94 j = h 95 } 96 } 97 return i - 1 98 } 99 100 // WAL is a write-ahead log structure 101 type WAL struct { 102 lock sync.RWMutex // lock is a mutual exclusion lock 103 base string // base is the base filepath 104 r *binary.Reader // r is a binary reader 105 w *binary.Writer // w is a binary writer 106 firstIndex int64 // firstIndex is the index of the first segEntry 107 lastIndex int64 // lastIndex is the index of the last segEntry 108 segments []*segment // segments is an index of the current file segments 109 active *segment // active is the current active segment 110 } 111 112 // Open opens and returns a new write-ahead log structure 113 func Open(base string) (*WAL, error) { 114 // make sure we are working with absolute paths 115 base, err := filepath.Abs(base) 116 if err != nil { 117 return nil, err 118 } 119 // sanitize any path separators 120 base = filepath.ToSlash(base) 121 // create any directories if they are not there 122 err = os.MkdirAll(base, os.ModeDir) 123 if err != nil { 124 return nil, err 125 } 126 // create a new write-ahead log instance 127 l := &WAL{ 128 base: base, 129 firstIndex: 0, 130 lastIndex: 1, 131 segments: make([]*segment, 0), 132 } 133 // attempt to load segments 134 err = l.loadIndex() 135 if err != nil { 136 return nil, err 137 } 138 // return write-ahead log 139 return l, nil 140 } 141 142 // loadIndex initializes the segment index. It looks for segment 143 // files in the base directory and attempts to index the segment as 144 // well as any of the entries within the segment. If this is a new 145 // instance, it will create a new segment that is ready for writing. 146 func (l *WAL) loadIndex() error { 147 // lock 148 l.lock.Lock() 149 defer l.lock.Unlock() 150 // get the files in the base directory path 151 files, err := os.ReadDir(l.base) 152 if err != nil { 153 return err 154 } 155 // list the files in the base directory path and attempt to index the entries 156 for _, file := range files { 157 // skip non data files 158 if file.IsDir() || 159 !strings.HasPrefix(file.Name(), LogPrefix) || 160 !strings.HasSuffix(file.Name(), LogSuffix) { 161 continue // skip this, continue on to the next file 162 } 163 // attempt to load segment (and index entries in segment) 164 s, err := l.loadSegmentFile(filepath.Join(l.base, file.Name())) 165 if err != nil { 166 return err 167 } 168 // segment has been loaded successfully, append to the segments list 169 l.segments = append(l.segments, s) 170 } 171 // check to see if any segments were found. If not, initialize a new one 172 if len(l.segments) == 0 { 173 // create a new segment file 174 s, err := l.makeSegmentFile() 175 if err != nil { 176 return err 177 } 178 // segment has been created successfully, append to the segments list 179 l.segments = append(l.segments, s) 180 } 181 // segments have either been loaded or created, so now we 182 // should go about updating the active segment pointer to 183 // point to the "tail" (the last segment in the segment list) 184 l.active = l.getLastSegment() 185 // we should be good to go, lets attempt to open a file 186 // reader to work with the active segment 187 l.r, err = binary.OpenReader(l.active.path) 188 if err != nil { 189 return err 190 } 191 // and then attempt to open a file writer to also work 192 // with the active segment, so we can begin appending data 193 l.w, err = binary.OpenWriter(l.active.path) 194 if err != nil { 195 return err 196 } 197 // finally, update the firstIndex and lastIndex 198 l.firstIndex = l.segments[0].index 199 // and update last index 200 l.lastIndex = l.getLastSegment().getLastIndex() 201 return nil 202 } 203 204 // loadSegment attempts to open the segment file at the path provided 205 // and index the entries within the segment. It will return an os.PathError 206 // if the file does not exist, an io.ErrUnexpectedEOF if the file exists 207 // but is empty and has no data to read, and ErrSegmentFull if the file 208 // has met the maxFileSize. It will return the segment and nil error on success. 209 func (l *WAL) loadSegmentFile(path string) (*segment, error) { 210 // check to make sure path exists before continuing 211 _, err := os.Stat(path) 212 if err != nil { 213 return nil, err 214 } 215 // attempt to open existing segment file for reading 216 fd, err := os.OpenFile(path, os.O_RDONLY, 0666) 217 if err != nil { 218 return nil, err 219 } 220 // defer file close 221 defer func(fd *os.File) { 222 _ = fd.Close() 223 }(fd) 224 // create a new segment to append indexed entries to 225 s := &segment{ 226 path: path, 227 entries: make([]segEntry, 0), 228 } 229 // read segment file and index entries 230 for { 231 // get the current offset of the 232 // reader for the segEntry later 233 offset, err := binary.Offset(fd) 234 if err != nil { 235 return nil, err 236 } 237 // read and decode segEntry 238 e, err := binary.DecodeEntry(fd) 239 if err != nil { 240 if err == io.EOF || err == io.ErrUnexpectedEOF { 241 break 242 } 243 return nil, err 244 } 245 // get current offset 246 // add segEntry index to segment entries list 247 s.entries = append(s.entries, segEntry{ 248 index: e.Id, 249 offset: offset, 250 }) 251 // continue to process the next segEntry 252 } 253 // make sure to fill out the segment index from the first segEntry index 254 s.index = s.entries[0].index 255 // get the offset of the reader to calculate bytes remaining 256 offset, err := binary.Offset(fd) 257 if err != nil { 258 return nil, err 259 } 260 // update the segment remaining bytes 261 s.remaining = maxFileSize - uint64(offset) 262 return s, nil 263 } 264 265 // makeSegment attempts to make a new segment automatically using the timestamp 266 // as the segment name. On success, it will simply return a new segment and a nil error 267 func (l *WAL) makeSegmentFile() (*segment, error) { 268 // create a new file 269 path := filepath.Join(l.base, makeFileName(time.Now())) 270 fd, err := os.Create(path) 271 if err != nil { 272 return nil, err 273 } 274 // don't forget to close it 275 err = fd.Close() 276 if err != nil { 277 return nil, err 278 } 279 // create and return new segment 280 s := &segment{ 281 path: path, 282 index: l.lastIndex, 283 entries: make([]segEntry, 0), 284 remaining: maxFileSize, 285 } 286 return s, nil 287 } 288 289 // findSegmentIndex performs binary search to find the segment containing provided index 290 func (l *WAL) findSegmentIndex(index int64) int { 291 // declare for later 292 i, j := 0, len(l.segments) 293 // otherwise, perform binary search 294 for i < j { 295 h := i + (j-i)/2 296 if index >= l.segments[h].index { 297 i = h + 1 298 } else { 299 j = h 300 } 301 } 302 return i - 1 303 } 304 305 // getLastSegment returns the tail segment in the segments index list 306 func (l *WAL) getLastSegment() *segment { 307 return l.segments[len(l.segments)-1] 308 } 309 310 // cycleSegment adds a new segment to replace the current (active) segment 311 func (l *WAL) cycleSegment() error { 312 // sync and close current file segment 313 err := l.w.Close() 314 if err != nil { 315 return err 316 } 317 // create a new segment file 318 s, err := l.makeSegmentFile() 319 if err != nil { 320 return err 321 } 322 // add segment to segment index list 323 l.segments = append(l.segments, s) 324 // update the active segment pointer 325 l.active = l.getLastSegment() 326 // open file writer associated with active segment 327 l.w, err = binary.OpenWriter(l.active.path) 328 if err != nil { 329 return err 330 } 331 // update file reader associated with the active segment 332 l.r, err = binary.OpenReader(l.active.path) 333 if err != nil { 334 return err 335 } 336 return nil 337 } 338 339 // Read reads an segEntry from the write-ahead log at the specified index 340 func (l *WAL) Read(index int64) (string, []byte, error) { 341 // read lock 342 l.lock.RLock() 343 defer l.lock.RUnlock() 344 // error checking 345 if index < l.firstIndex || index > l.lastIndex { 346 return "", nil, ErrOutOfBounds 347 } 348 var err error 349 // find the segment containing the provided index 350 s := l.segments[l.findSegmentIndex(index)] 351 // make sure we are reading from the correct file 352 l.r, err = l.r.ReadFrom(s.path) 353 if err != nil { 354 return "", nil, err 355 } 356 // find the offset for the segEntry containing the provided index 357 offset := s.entries[s.findEntryIndex(index)].offset 358 // read segEntry at offset 359 e, err := l.r.ReadEntryAt(offset) 360 if err != nil { 361 return "", nil, err 362 } 363 return string(e.Key), e.Value, nil 364 } 365 366 // WriteIndexEntry writes an segEntry to the write-ahead log in an append-only fashion 367 func (l *WAL) Write(key string, value []byte) (int64, error) { 368 // lock 369 l.lock.Lock() 370 defer l.lock.Unlock() 371 // write segEntry 372 offset, err := l.w.WriteEntry(&binary.DataEntry{ 373 Id: l.lastIndex, 374 Key: []byte(key), 375 Value: value, 376 }) 377 if err != nil { 378 return 0, err 379 } 380 // add new segEntry to the segment index 381 l.active.entries = append(l.active.entries, segEntry{ 382 index: l.lastIndex, 383 offset: offset, 384 }) 385 // update lastIndex 386 l.lastIndex++ 387 // grab the current offset written 388 offset2, err := l.w.Offset() 389 if err != nil { 390 return 0, err 391 } 392 // update segment remaining 393 l.active.remaining -= uint64(offset2 - offset) 394 // check to see if the active segment needs to be cycled 395 if l.active.remaining < 64 { 396 err = l.cycleSegment() 397 if err != nil { 398 return 0, err 399 } 400 } 401 return l.lastIndex - 1, nil 402 } 403 404 // Scan provides an iterator method for the write-ahead log 405 func (l *WAL) Scan(iter func(index int64, key string, value []byte) bool) error { 406 // lock 407 l.lock.Lock() 408 defer l.lock.Unlock() 409 // init for any errors 410 var err error 411 // range the segment index 412 for _, sidx := range l.segments { 413 fmt.Printf("segment: %s\n", sidx) 414 // make sure we are reading the right data 415 l.r, err = l.r.ReadFrom(sidx.path) 416 if err != nil { 417 return err 418 } 419 // range the segment entries index 420 for _, eidx := range sidx.entries { 421 // read segEntry 422 e, err := l.r.ReadEntryAt(eidx.offset) 423 if err != nil { 424 if err == io.EOF || err == io.ErrUnexpectedEOF { 425 break 426 } 427 return err 428 } 429 // check segEntry against iterator boolean function 430 if !iter(e.Id, string(e.Key), e.Value) { 431 // if it returns false, then process next segEntry 432 continue 433 } 434 } 435 // outside segEntry loop 436 } 437 // outside segment loop 438 return nil 439 } 440 441 // TruncateFront removes all segments and entries before specified index 442 func (l *WAL) TruncateFront(index int64) error { 443 // lock 444 l.lock.Lock() 445 defer l.lock.Unlock() 446 // perform bounds check 447 if index == 0 || 448 l.lastIndex == 0 || 449 index < l.firstIndex || index > l.lastIndex { 450 return ErrOutOfBounds 451 } 452 if index == l.firstIndex { 453 return nil // nothing to truncate 454 } 455 // locate segment in segment index list containing specified index 456 sidx := l.findSegmentIndex(index) 457 // isolate whole segments that can be removed 458 for i := 0; i < sidx; i++ { 459 // remove segment file 460 err := os.Remove(l.segments[i].path) 461 if err != nil { 462 return err 463 } 464 } 465 // remove segments from segment index (cut, i-j) 466 i, j := 0, sidx 467 copy(l.segments[i:], l.segments[j:]) 468 for k, n := len(l.segments)-j+i, len(l.segments); k < n; k++ { 469 l.segments[k] = nil // or the zero value of T 470 } 471 l.segments = l.segments[:len(l.segments)-j+i] 472 // update firstIndex 473 l.firstIndex = l.segments[0].index 474 // prepare to re-write partial segment 475 var err error 476 var entries []segEntry 477 tmpfd, err := os.Create(filepath.Join(l.base, "tmp-partial.seg")) 478 if err != nil { 479 return err 480 } 481 // after the segment index cut, segment 0 will 482 // contain the partials that we must re-write 483 if l.segments[0].index < index { 484 // make sure we are reading from the correct path 485 l.r, err = l.r.ReadFrom(l.segments[0].path) 486 if err != nil { 487 return err 488 } 489 // range the entries within this segment to find 490 // the ones that are greater than the index and 491 // write those to a temporary buffer.... 492 for _, ent := range l.segments[0].entries { 493 if ent.index < index { 494 continue // skip 495 } 496 // read segEntry 497 e, err := l.r.ReadEntryAt(ent.offset) 498 if err != nil { 499 return err 500 } 501 // write segEntry to temp file 502 ent.offset, err = binary.EncodeEntry(tmpfd, e) 503 if err != nil { 504 return err 505 } 506 // sync write 507 err = tmpfd.Sync() 508 if err != nil { 509 return err 510 } 511 // append to a new entries list 512 entries = append(entries, ent) 513 } 514 // move reader back to active segment file 515 l.r, err = l.r.ReadFrom(l.active.path) 516 if err != nil { 517 return err 518 } 519 // close temp file 520 err = tmpfd.Close() 521 if err != nil { 522 return err 523 } 524 // remove partial segment file 525 err = os.Remove(l.segments[0].path) 526 if err != nil { 527 return err 528 } 529 // change temp file name 530 err = os.Rename(tmpfd.Name(), l.segments[0].path) 531 if err != nil { 532 return err 533 } 534 // update segment 535 l.segments[0].entries = entries 536 l.segments[0].index = entries[0].index 537 } 538 return nil 539 } 540 541 // Count returns the number of entries currently in the write-ahead log 542 func (l *WAL) Count() int { 543 // lock 544 l.lock.Lock() 545 defer l.lock.Unlock() 546 // get count 547 var count int 548 for _, s := range l.segments { 549 count += len(s.entries) 550 } 551 // return count 552 return count 553 } 554 555 // FirstIndex returns the write-ahead logs first index 556 func (l *WAL) FirstIndex() int64 { 557 // lock 558 l.lock.Lock() 559 defer l.lock.Unlock() 560 return l.firstIndex 561 } 562 563 // LastIndex returns the write-ahead logs first index 564 func (l *WAL) LastIndex() int64 { 565 // lock 566 l.lock.Lock() 567 defer l.lock.Unlock() 568 return l.lastIndex 569 } 570 571 // Close syncs and closes the write-ahead log 572 func (l *WAL) Close() error { 573 // lock 574 l.lock.Lock() 575 defer l.lock.Unlock() 576 // sync and close writer 577 err := l.w.Close() 578 if err != nil { 579 return err 580 } 581 // close reader 582 err = l.r.Close() 583 if err != nil { 584 return err 585 } 586 // clean everything else up 587 l.base = "" 588 l.r = nil 589 l.w = nil 590 l.firstIndex = 0 591 l.lastIndex = 0 592 l.segments = nil 593 l.active = nil 594 // force gc for good measure 595 runtime.GC() 596 return nil 597 } 598 599 func (l *WAL) Path() string { 600 return l.base 601 } 602 603 // String is the stringer method for the write-ahead log 604 func (l *WAL) String() string { 605 var ss string 606 ss += fmt.Sprintf("\n\n[write-ahead log]\n") 607 ss += fmt.Sprintf("base: %q\n", l.base) 608 ss += fmt.Sprintf("firstIndex: %d\n", l.firstIndex) 609 ss += fmt.Sprintf("lastIndex: %d\n", l.lastIndex) 610 ss += fmt.Sprintf("segments: %d\n", len(l.segments)) 611 if l.active != nil { 612 ss += fmt.Sprintf("active: %q\n", filepath.Base(l.active.path)) 613 } 614 if len(l.segments) > 0 { 615 for i, s := range l.segments { 616 ss += fmt.Sprintf("segment[%d]:\n", i) 617 ss += fmt.Sprintf("\tpath: %q\n", filepath.Base(s.path)) 618 ss += fmt.Sprintf("\tindex: %d\n", s.index) 619 ss += fmt.Sprintf("\tentries: %d\n", len(s.entries)) 620 ss += fmt.Sprintf("\tremaining: %d\n", s.remaining) 621 } 622 } 623 ss += "\n" 624 return ss 625 }