github.com/scottcagno/storage@v1.8.0/pkg/_junk/_x/file/manager.go (about) 1 package file 2 3 import ( 4 "fmt" 5 "github.com/scottcagno/storage/pkg/_junk/_lsmtree/encoding/binary" 6 "io" 7 "os" 8 "path/filepath" 9 "runtime" 10 "strings" 11 "sync" 12 ) 13 14 // SegmentManager is a segmented file structure 15 type SegmentManager struct { 16 lock sync.RWMutex // lock is a mutual exclusion lock 17 base string // base is the base filepath 18 r *binary.Reader // r is a binary reader 19 w *binary.Writer // w is a binary writer 20 firstIndex int64 // firstIndex is the index of the first entry 21 lastIndex int64 // lastIndex is the index of the last entry 22 segments []*Segment // segments is an index of the current file segments 23 active *Segment // active is the current active Segment 24 } 25 26 // Open opens and returns a new segmented file structure 27 func Open(path string) (*SegmentManager, error) { 28 // make sure we are working with absolute paths 29 base, err := filepath.Abs(path) 30 if err != nil { 31 return nil, err 32 } 33 // sanitize any path separators 34 base = filepath.ToSlash(base) 35 // create any directories if they are not there 36 err = os.MkdirAll(base, os.ModeDir) 37 if err != nil { 38 return nil, err 39 } 40 // create a new segmented file instance 41 sf := &SegmentManager{ 42 base: base, 43 firstIndex: 0, 44 lastIndex: 1, 45 segments: make([]*Segment, 0), 46 } 47 // attempt to load segments 48 err = sf.loadSegmentIndex() 49 if err != nil { 50 return nil, err 51 } 52 // return segmented file 53 return sf, nil 54 } 55 56 // loadIndex initializes the Segment index. It looks for Segment 57 // files in the base directory and attempts to index the Segment as 58 // well as any of the entries within the Segment. If this is a new 59 // instance, it will create a new Segment that is ready for writing. 60 func (sf *SegmentManager) loadSegmentIndex() error { 61 // lock 62 sf.lock.Lock() 63 defer sf.lock.Unlock() 64 // get the files in the base directory path 65 files, err := os.ReadDir(sf.base) 66 if err != nil { 67 return err 68 } 69 // list the files in the base directory path and attempt to index the entries 70 for _, file := range files { 71 // skip non data files 72 if file.IsDir() || 73 !strings.HasPrefix(file.Name(), FilePrefix) || 74 !strings.HasSuffix(file.Name(), FileSuffix) { 75 continue // skip this, continue on to the next file 76 } 77 // attempt to load Segment (and index entries in Segment) 78 s, err := OpenSegment(filepath.Join(sf.base, file.Name())) 79 if err != nil { 80 return err 81 } 82 // Segment has been loaded successfully, append to the segments list 83 sf.segments = append(sf.segments, s) 84 } 85 // check to see if any segments were found. If not, initialize a new one 86 if len(sf.segments) == 0 { 87 // create a new Segment file 88 s, err := CreateSegment(sf.base, sf.lastIndex) 89 if err != nil { 90 return err 91 } 92 // Segment has been created successfully, append to the segments list 93 sf.segments = append(sf.segments, s) 94 } 95 // segments have either been loaded or created, so now we 96 // should go about updating the active Segment pointer to 97 // point to the "tail" (the last Segment in the Segment list) 98 sf.active = sf.getLastSegment() 99 // load active Segment entry index 100 sf.active.loadEntryIndex() 101 // we should be good to go, lets attempt to open a file 102 // reader to work with the active Segment 103 sf.r, err = binary.OpenReader(sf.active.path) 104 if err != nil { 105 return err 106 } 107 // and then attempt to open a file writer to also work 108 // with the active Segment, so we can begin appending data 109 sf.w, err = binary.OpenWriter(sf.active.path) 110 if err != nil { 111 return err 112 } 113 // finally, update the firstIndex and lastIndex 114 sf.firstIndex = sf.segments[0].index 115 // and update last index 116 sf.lastIndex = sf.getLastSegment().getLastIndex() 117 return nil 118 } 119 120 func (sf *SegmentManager) LoadSegment(index int64) (*Segment, error) { 121 s := sf.active 122 if index >= s.index { 123 return s, nil 124 } 125 s = sf.segments[sf.findSegmentIndex(index)] 126 if len(s.entries) == 0 { 127 _, err := s.loadEntryIndex() 128 if err != nil { 129 return nil, err 130 } 131 } 132 sf.active = s 133 return s, nil 134 } 135 136 // findSegmentIndex performs binary search to find the Segment containing provided index 137 func (sf *SegmentManager) findSegmentIndex(index int64) int { 138 // declare for later 139 i, j := 0, len(sf.segments) 140 // otherwise, perform binary search 141 for i < j { 142 h := i + (j-i)/2 143 if index >= sf.segments[h].index { 144 i = h + 1 145 } else { 146 j = h 147 } 148 } 149 return i - 1 150 } 151 152 // getLastSegment returns the tail Segment in the segments index list 153 func (sf *SegmentManager) getLastSegment() *Segment { 154 return sf.segments[len(sf.segments)-1] 155 } 156 157 // cycleSegment adds a new Segment to replace the current (active) Segment 158 func (sf *SegmentManager) cycleSegment2(err error) error { 159 // check to see if we need to cycle 160 if err == nil && err != ErrSegmentFull { 161 return nil 162 } 163 // sync and close current file Segment 164 err = sf.w.Close() 165 if err != nil { 166 return err 167 } 168 // create a new Segment file 169 s, err := CreateSegment(sf.base, sf.lastIndex) 170 if err != nil { 171 return err 172 } 173 // add Segment to Segment index list 174 sf.segments = append(sf.segments, s) 175 // update the active Segment pointer 176 sf.active = sf.getLastSegment() 177 // open file writer associated with active Segment 178 sf.w, err = binary.OpenWriter(sf.active.path) 179 if err != nil { 180 return err 181 } 182 // update file reader associated with the active Segment 183 sf.r, err = binary.OpenReader(sf.active.path) 184 if err != nil { 185 return err 186 } 187 return nil 188 } 189 190 // cycleSegment adds a new Segment to replace the current (active) Segment 191 func (sf *SegmentManager) cycleSegment(remaining int64) error { 192 // check to see if we need to cycle 193 if remaining > 0 { 194 return nil 195 } 196 // sync and close current file Segment 197 err := sf.w.Close() 198 if err != nil { 199 return err 200 } 201 // create a new Segment file 202 s, err := CreateSegment(sf.base, sf.lastIndex) 203 if err != nil { 204 return err 205 } 206 // add Segment to Segment index list 207 sf.segments = append(sf.segments, s) 208 // update the active Segment pointer 209 sf.active = sf.getLastSegment() 210 // open file writer associated with active Segment 211 sf.w, err = binary.OpenWriter(sf.active.path) 212 if err != nil { 213 return err 214 } 215 // update file reader associated with the active Segment 216 sf.r, err = binary.OpenReader(sf.active.path) 217 if err != nil { 218 return err 219 } 220 return nil 221 } 222 223 // Read reads an entry from the segmented file at the specified index 224 func (sf *SegmentManager) Read(index int64) (string, []byte, error) { 225 // read lock 226 sf.lock.RLock() 227 defer sf.lock.RUnlock() 228 // error checking 229 if index < sf.firstIndex || index > sf.lastIndex { 230 return "", nil, ErrOutOfBounds 231 } 232 var err error 233 // find the Segment containing the provided index 234 //s := sf.segments[sf.findSegmentIndex(index)] 235 s, err := sf.LoadSegment(index) 236 if err != nil { 237 return "", nil, err 238 } 239 // make sure we are reading from the correct file 240 sf.r, err = sf.r.ReadFrom(s.path) 241 if err != nil { 242 return "", nil, err 243 } 244 // find the offset for the entry containing the provided index 245 offset := s.entries[s.findEntryIndex(index)].offset 246 // read entry at offset 247 e, err := sf.r.ReadEntryAt(offset) 248 if err != nil { 249 return "", nil, err 250 } 251 return string(e.Key), e.Value, nil 252 } 253 254 // ReadDataEntryUsingSegment reads an entry from the segmented file at the specified index 255 func (sf *SegmentManager) ReadDataEntryUsingSegment(index int64) (string, []byte, error) { 256 // read lock 257 sf.lock.RLock() 258 defer sf.lock.RUnlock() 259 // error checking 260 if index < sf.firstIndex || index > sf.lastIndex { 261 return "", nil, ErrOutOfBounds 262 } 263 var err error 264 // find the Segment containing the provided index 265 s, err := sf.LoadSegment(index) 266 if err != nil { 267 return "", nil, err 268 } 269 e, err := s.ReadDataEntry(index) 270 if err != nil { 271 return "", nil, err 272 } 273 return string(e.Key), e.Value, nil 274 } 275 276 // WriteIndexEntry writes an entry to the segmented file in an append-only fashion 277 func (sf *SegmentManager) _Write(key string, value []byte) (int64, error) { 278 // lock 279 sf.lock.Lock() 280 defer sf.lock.Unlock() 281 // write entry 282 e := &binary.DataEntry{ 283 Id: sf.lastIndex, 284 Key: []byte(key), 285 Value: value, 286 } 287 offset, err := sf.w.WriteEntry(e) 288 if err != nil { 289 return 0, err 290 } 291 // add new entry to the Segment index 292 sf.active.entries = append(sf.active.entries, entry{ 293 index: sf.lastIndex, 294 offset: offset, 295 }) 296 // update lastIndex 297 sf.lastIndex++ 298 // grab the current offset written 299 offset2, err := sf.w.Offset() 300 if err != nil { 301 return 0, err 302 } 303 // update Segment remaining 304 sf.active.remaining -= offset2 - offset 305 // check to see if the active Segment needs to be cycled 306 if sf.active.remaining < 64 { 307 err = sf.cycleSegment(int64(sf.active.remaining - 64)) 308 if err != nil { 309 return 0, err 310 } 311 } 312 return sf.lastIndex - 1, nil 313 } 314 315 // Write2 writes an entry to the segmented file in an append-only fashion 316 func (sf *SegmentManager) Write(key string, value []byte) (int64, error) { 317 // lock 318 sf.lock.Lock() 319 defer sf.lock.Unlock() 320 e := &binary.DataEntry{ 321 Id: sf.lastIndex, 322 Key: []byte(key), 323 Value: value, 324 } 325 // write entry 326 offset, err := sf.w.WriteEntry(e) 327 if err != nil { 328 return -1, err 329 } 330 // add new entry to the Segment index 331 sf.active.entries = append(sf.active.entries, entry{ 332 index: sf.lastIndex, 333 offset: offset, 334 }) 335 // update lastIndex 336 sf.lastIndex++ 337 // get updated offset to check cycle 338 offset, err = sf.w.Offset() 339 if err != nil { 340 return -1, err 341 } 342 // check to see if the active Segment needs to be cycled 343 err = sf.cycleSegment(int64(maxFileSize) - offset) 344 if err != nil { 345 return -1, err 346 } 347 return sf.lastIndex - 1, nil 348 } 349 350 func (sf *SegmentManager) WriteDataEntryUsingSegment(key string, value []byte) (int64, error) { 351 // lock 352 sf.lock.Lock() 353 defer sf.lock.Unlock() 354 e := &binary.DataEntry{ 355 Id: sf.lastIndex, 356 Key: []byte(key), 357 Value: value, 358 } 359 // write entry 360 offset, err := sf.active.WriteDataEntry(e) 361 if err != nil { 362 return -1, err 363 } 364 // check cycle segment 365 err = sf.cycleSegment(maxFileSize - offset + 64) 366 // update lastIndex 367 sf.lastIndex++ 368 // return index, and nil 369 return sf.lastIndex - 1, nil 370 } 371 372 // Scan provides an iterator method for the segmented file 373 func (sf *SegmentManager) Scan(iter func(index int64, key string, value []byte) bool) error { 374 // lock 375 sf.lock.Lock() 376 defer sf.lock.Unlock() 377 // init for any errors 378 var err error 379 // range the Segment index 380 for _, sidx := range sf.segments { 381 fmt.Printf("Segment: %s\n", sidx) 382 // make sure we are reading the right data 383 sf.r, err = sf.r.ReadFrom(sidx.path) 384 if err != nil { 385 return err 386 } 387 // range the Segment entries index 388 for _, eidx := range sidx.entries { 389 // read entry 390 e, err := sf.r.ReadEntryAt(eidx.offset) 391 if err != nil { 392 if err == io.EOF || err == io.ErrUnexpectedEOF { 393 break 394 } 395 return err 396 } 397 // check entry against iterator boolean function 398 if !iter(e.Id, string(e.Key), e.Value) { 399 // if it returns false, then process next entry 400 continue 401 } 402 } 403 // outside entry loop 404 } 405 // outside Segment loop 406 return nil 407 } 408 409 // TruncateFront removes all segments and entries before specified index 410 func (sf *SegmentManager) TruncateFront(index int64) error { 411 // lock 412 sf.lock.Lock() 413 defer sf.lock.Unlock() 414 // perform bounds check 415 if index == 0 || 416 sf.lastIndex == 0 || 417 index < sf.firstIndex || index > sf.lastIndex { 418 return ErrOutOfBounds 419 } 420 // more easy checking 421 if index == sf.firstIndex { 422 return nil // nothing to truncate 423 } 424 // locate segment in the segment index list containing specified index 425 sidx := sf.findSegmentIndex(index) 426 // remove all whole segments before index "sidx" 427 for i := 0; i < sidx; i++ { 428 // remove Segment file 429 err := os.Remove(sf.segments[i].path) 430 if err != nil { 431 return err 432 } 433 } 434 // remove segments from Segment index (cut, i-j) 435 i, j := 0, sidx 436 copy(sf.segments[i:], sf.segments[j:]) 437 for k, n := len(sf.segments)-j+i, len(sf.segments); k < n; k++ { 438 sf.segments[k] = nil // or the zero value of T 439 } 440 sf.segments = sf.segments[:len(sf.segments)-j+i] 441 // update firstIndex 442 sf.firstIndex = sf.segments[0].index 443 // prepare to re-write partial Segment 444 //var err error 445 tmpfd, err := os.Create(filepath.Join(sf.base, 446 fmt.Sprintf("%stmp-part%s", FilePrefix, FileSuffix))) 447 if err != nil { 448 return err 449 } 450 // after the Segment index cut, Segment 0 will 451 // contain the partials that we must re-write 452 if sf.segments[0].index < index { 453 // make sure we are reading from the correct path 454 sf.r, err = sf.r.ReadFrom(sf.segments[0].path) 455 if err != nil { 456 return err 457 } 458 // init temp entries list 459 var entries []entry 460 // make sure entry index is loaded 461 if !sf.segments[0].hasEntriesLoaded() { 462 _, err := sf.segments[0].loadEntryIndex() 463 if err != nil { 464 return err 465 } 466 } 467 // range the entries within this Segment to find 468 // the ones that are greater than the index and 469 // write those to a temporary buffer.... 470 for _, ent := range sf.segments[0].entries { 471 if ent.index < index { 472 continue // skip 473 } 474 // read entry 475 e, err := sf.r.ReadEntryAt(ent.offset) 476 if err != nil { 477 return err 478 } 479 // write entry to temp file 480 ent.offset, err = binary.EncodeEntry(tmpfd, e) 481 if err != nil { 482 return err 483 } 484 // sync write 485 err = tmpfd.Sync() 486 if err != nil { 487 return err 488 } 489 // append to a new entries list 490 entries = append(entries, ent) 491 } 492 // move reader back to active Segment file 493 sf.r, err = sf.r.ReadFrom(sf.active.path) 494 if err != nil { 495 return err 496 } 497 // close temp file 498 err = tmpfd.Close() 499 if err != nil { 500 return err 501 } 502 // remove partial Segment file 503 err = os.Remove(sf.segments[0].path) 504 if err != nil { 505 return err 506 } 507 // change temp file name 508 err = os.Rename(tmpfd.Name(), sf.segments[0].path) 509 if err != nil { 510 return err 511 } 512 // update Segment 513 sf.segments[0].entries = entries 514 sf.segments[0].index = entries[0].index 515 } 516 return nil 517 } 518 519 func (sf *SegmentManager) TruncateBack(index int64) error { 520 // TODO: implement 521 return nil 522 } 523 524 // Sort (stable) sorts entries (and re-writes them) in forward or reverse Lexicographic order 525 func (sf *SegmentManager) Sort() error { 526 // TODO: implement 527 return nil 528 } 529 530 // CompactAndMerge removes any blank sections or duplicate entries and then merges (re-writes) 531 // the data into a different Segment size using the maxSegSize provided 532 func (sf *SegmentManager) CompactAndMerge(maxSegSize int64) error { 533 // TODO: implement 534 return nil 535 } 536 537 // Count returns the number of entries currently in the segmented file 538 func (sf *SegmentManager) Count() int { 539 // lock 540 sf.lock.Lock() 541 defer sf.lock.Unlock() 542 // get count 543 var count int 544 for _, s := range sf.segments { 545 count += len(s.entries) 546 } 547 // return count 548 return count 549 } 550 551 func (sf *SegmentManager) Path() string { 552 return sf.base 553 } 554 555 // FirstIndex returns the segmented files first index 556 func (sf *SegmentManager) FirstIndex() int64 { 557 // lock 558 sf.lock.Lock() 559 defer sf.lock.Unlock() 560 return sf.firstIndex 561 } 562 563 // LastIndex returns the segmented files first index 564 func (sf *SegmentManager) LastIndex() int64 { 565 // lock 566 sf.lock.Lock() 567 defer sf.lock.Unlock() 568 return sf.lastIndex 569 } 570 571 // Close syncs and closes the segmented file 572 func (sf *SegmentManager) Close() error { 573 // lock 574 sf.lock.Lock() 575 defer sf.lock.Unlock() 576 // sync and close writer 577 err := sf.w.Close() 578 if err != nil { 579 return err 580 } 581 // close reader 582 err = sf.r.Close() 583 if err != nil { 584 return err 585 } 586 // clean everything else up 587 sf.base = "" 588 sf.r = nil 589 sf.w = nil 590 sf.firstIndex = 0 591 sf.lastIndex = 0 592 sf.segments = nil 593 sf.active = nil 594 // force gc for good measure 595 runtime.GC() 596 return nil 597 } 598 599 // String is the stringer method for the segmented file 600 func (sf *SegmentManager) String() string { 601 var ss string 602 ss += fmt.Sprintf("\n\n[segmented file]\n") 603 ss += fmt.Sprintf("base: %q\n", sf.base) 604 ss += fmt.Sprintf("firstIndex: %d\n", sf.firstIndex) 605 ss += fmt.Sprintf("lastIndex: %d\n", sf.lastIndex) 606 ss += fmt.Sprintf("segments: %d\n", len(sf.segments)) 607 if sf.active != nil { 608 ss += fmt.Sprintf("active: %q\n", filepath.Base(sf.active.path)) 609 } 610 if len(sf.segments) > 0 { 611 for i, s := range sf.segments { 612 ss += fmt.Sprintf("Segment[%d]:\n", i) 613 ss += fmt.Sprintf("\tpath: %q\n", filepath.Base(s.path)) 614 ss += fmt.Sprintf("\tindex: %d\n", s.index) 615 ss += fmt.Sprintf("\tentries: %d\n", len(s.entries)) 616 ss += fmt.Sprintf("\tremaining: %d\n", s.remaining) 617 } 618 } 619 ss += "\n" 620 return ss 621 }