github.com/tendermint/tmlibs@v0.9.0/autofile/group.go (about) 1 package autofile 2 3 import ( 4 "bufio" 5 "errors" 6 "fmt" 7 "io" 8 "log" 9 "os" 10 "path" 11 "path/filepath" 12 "regexp" 13 "strconv" 14 "strings" 15 "sync" 16 "time" 17 18 cmn "github.com/tendermint/tmlibs/common" 19 ) 20 21 const ( 22 groupCheckDuration = 5000 * time.Millisecond 23 defaultHeadSizeLimit = 10 * 1024 * 1024 // 10MB 24 defaultTotalSizeLimit = 1 * 1024 * 1024 * 1024 // 1GB 25 maxFilesToRemove = 4 // needs to be greater than 1 26 ) 27 28 /* 29 You can open a Group to keep restrictions on an AutoFile, like 30 the maximum size of each chunk, and/or the total amount of bytes 31 stored in the group. 32 33 The first file to be written in the Group.Dir is the head file. 34 35 Dir/ 36 - <HeadPath> 37 38 Once the Head file reaches the size limit, it will be rotated. 39 40 Dir/ 41 - <HeadPath>.000 // First rolled file 42 - <HeadPath> // New head path, starts empty. 43 // The implicit index is 001. 44 45 As more files are written, the index numbers grow... 46 47 Dir/ 48 - <HeadPath>.000 // First rolled file 49 - <HeadPath>.001 // Second rolled file 50 - ... 51 - <HeadPath> // New head path 52 53 The Group can also be used to binary-search for some line, 54 assuming that marker lines are written occasionally. 55 */ 56 type Group struct { 57 cmn.BaseService 58 59 ID string 60 Head *AutoFile // The head AutoFile to write to 61 headBuf *bufio.Writer 62 Dir string // Directory that contains .Head 63 ticker *time.Ticker 64 mtx sync.Mutex 65 headSizeLimit int64 66 totalSizeLimit int64 67 minIndex int // Includes head 68 maxIndex int // Includes head, where Head will move to 69 70 // TODO: When we start deleting files, we need to start tracking GroupReaders 71 // and their dependencies. 72 } 73 74 // OpenGroup creates a new Group with head at headPath. It returns an error if 75 // it fails to open head file. 76 func OpenGroup(headPath string) (g *Group, err error) { 77 dir := path.Dir(headPath) 78 head, err := OpenAutoFile(headPath) 79 if err != nil { 80 return nil, err 81 } 82 83 g = &Group{ 84 ID: "group:" + head.ID, 85 Head: head, 86 headBuf: bufio.NewWriterSize(head, 4096*10), 87 Dir: dir, 88 ticker: time.NewTicker(groupCheckDuration), 89 headSizeLimit: defaultHeadSizeLimit, 90 totalSizeLimit: defaultTotalSizeLimit, 91 minIndex: 0, 92 maxIndex: 0, 93 } 94 g.BaseService = *cmn.NewBaseService(nil, "Group", g) 95 96 gInfo := g.readGroupInfo() 97 g.minIndex = gInfo.MinIndex 98 g.maxIndex = gInfo.MaxIndex 99 return 100 } 101 102 // OnStart implements Service by starting the goroutine that checks file and 103 // group limits. 104 func (g *Group) OnStart() error { 105 go g.processTicks() 106 return nil 107 } 108 109 // OnStop implements Service by stopping the goroutine described above. 110 // NOTE: g.Head must be closed separately using Close. 111 func (g *Group) OnStop() { 112 g.ticker.Stop() 113 g.Flush() // flush any uncommitted data 114 } 115 116 // Close closes the head file. The group must be stopped by this moment. 117 func (g *Group) Close() { 118 g.Flush() // flush any uncommitted data 119 120 g.mtx.Lock() 121 _ = g.Head.closeFile() 122 g.mtx.Unlock() 123 } 124 125 // SetHeadSizeLimit allows you to overwrite default head size limit - 10MB. 126 func (g *Group) SetHeadSizeLimit(limit int64) { 127 g.mtx.Lock() 128 g.headSizeLimit = limit 129 g.mtx.Unlock() 130 } 131 132 // HeadSizeLimit returns the current head size limit. 133 func (g *Group) HeadSizeLimit() int64 { 134 g.mtx.Lock() 135 defer g.mtx.Unlock() 136 return g.headSizeLimit 137 } 138 139 // SetTotalSizeLimit allows you to overwrite default total size limit of the 140 // group - 1GB. 141 func (g *Group) SetTotalSizeLimit(limit int64) { 142 g.mtx.Lock() 143 g.totalSizeLimit = limit 144 g.mtx.Unlock() 145 } 146 147 // TotalSizeLimit returns total size limit of the group. 148 func (g *Group) TotalSizeLimit() int64 { 149 g.mtx.Lock() 150 defer g.mtx.Unlock() 151 return g.totalSizeLimit 152 } 153 154 // MaxIndex returns index of the last file in the group. 155 func (g *Group) MaxIndex() int { 156 g.mtx.Lock() 157 defer g.mtx.Unlock() 158 return g.maxIndex 159 } 160 161 // MinIndex returns index of the first file in the group. 162 func (g *Group) MinIndex() int { 163 g.mtx.Lock() 164 defer g.mtx.Unlock() 165 return g.minIndex 166 } 167 168 // Write writes the contents of p into the current head of the group. It 169 // returns the number of bytes written. If nn < len(p), it also returns an 170 // error explaining why the write is short. 171 // NOTE: Writes are buffered so they don't write synchronously 172 // TODO: Make it halt if space is unavailable 173 func (g *Group) Write(p []byte) (nn int, err error) { 174 g.mtx.Lock() 175 defer g.mtx.Unlock() 176 return g.headBuf.Write(p) 177 } 178 179 // WriteLine writes line into the current head of the group. It also appends "\n". 180 // NOTE: Writes are buffered so they don't write synchronously 181 // TODO: Make it halt if space is unavailable 182 func (g *Group) WriteLine(line string) error { 183 g.mtx.Lock() 184 defer g.mtx.Unlock() 185 _, err := g.headBuf.Write([]byte(line + "\n")) 186 return err 187 } 188 189 // Flush writes any buffered data to the underlying file and commits the 190 // current content of the file to stable storage. 191 func (g *Group) Flush() error { 192 g.mtx.Lock() 193 defer g.mtx.Unlock() 194 err := g.headBuf.Flush() 195 if err == nil { 196 err = g.Head.Sync() 197 } 198 return err 199 } 200 201 func (g *Group) processTicks() { 202 for { 203 _, ok := <-g.ticker.C 204 if !ok { 205 return // Done. 206 } 207 g.checkHeadSizeLimit() 208 g.checkTotalSizeLimit() 209 } 210 } 211 212 // NOTE: for testing 213 func (g *Group) stopTicker() { 214 g.ticker.Stop() 215 } 216 217 // NOTE: this function is called manually in tests. 218 func (g *Group) checkHeadSizeLimit() { 219 limit := g.HeadSizeLimit() 220 if limit == 0 { 221 return 222 } 223 size, err := g.Head.Size() 224 if err != nil { 225 panic(err) 226 } 227 if size >= limit { 228 g.RotateFile() 229 } 230 } 231 232 func (g *Group) checkTotalSizeLimit() { 233 limit := g.TotalSizeLimit() 234 if limit == 0 { 235 return 236 } 237 238 gInfo := g.readGroupInfo() 239 totalSize := gInfo.TotalSize 240 for i := 0; i < maxFilesToRemove; i++ { 241 index := gInfo.MinIndex + i 242 if totalSize < limit { 243 return 244 } 245 if index == gInfo.MaxIndex { 246 // Special degenerate case, just do nothing. 247 log.Println("WARNING: Group's head " + g.Head.Path + "may grow without bound") 248 return 249 } 250 pathToRemove := filePathForIndex(g.Head.Path, index, gInfo.MaxIndex) 251 fileInfo, err := os.Stat(pathToRemove) 252 if err != nil { 253 log.Println("WARNING: Failed to fetch info for file @" + pathToRemove) 254 continue 255 } 256 err = os.Remove(pathToRemove) 257 if err != nil { 258 log.Println(err) 259 return 260 } 261 totalSize -= fileInfo.Size() 262 } 263 } 264 265 // RotateFile causes group to close the current head and assign it some index. 266 // Note it does not create a new head. 267 func (g *Group) RotateFile() { 268 g.mtx.Lock() 269 defer g.mtx.Unlock() 270 271 headPath := g.Head.Path 272 273 if err := g.Head.closeFile(); err != nil { 274 panic(err) 275 } 276 277 indexPath := filePathForIndex(headPath, g.maxIndex, g.maxIndex+1) 278 if err := os.Rename(headPath, indexPath); err != nil { 279 panic(err) 280 } 281 282 g.maxIndex++ 283 } 284 285 // NewReader returns a new group reader. 286 // CONTRACT: Caller must close the returned GroupReader. 287 func (g *Group) NewReader(index int) (*GroupReader, error) { 288 r := newGroupReader(g) 289 err := r.SetIndex(index) 290 if err != nil { 291 return nil, err 292 } 293 return r, nil 294 } 295 296 // Returns -1 if line comes after, 0 if found, 1 if line comes before. 297 type SearchFunc func(line string) (int, error) 298 299 // Searches for the right file in Group, then returns a GroupReader to start 300 // streaming lines. 301 // Returns true if an exact match was found, otherwise returns the next greater 302 // line that starts with prefix. 303 // CONTRACT: Caller must close the returned GroupReader 304 func (g *Group) Search(prefix string, cmp SearchFunc) (*GroupReader, bool, error) { 305 g.mtx.Lock() 306 minIndex, maxIndex := g.minIndex, g.maxIndex 307 g.mtx.Unlock() 308 // Now minIndex/maxIndex may change meanwhile, 309 // but it shouldn't be a big deal 310 // (maybe we'll want to limit scanUntil though) 311 312 for { 313 curIndex := (minIndex + maxIndex + 1) / 2 314 315 // Base case, when there's only 1 choice left. 316 if minIndex == maxIndex { 317 r, err := g.NewReader(maxIndex) 318 if err != nil { 319 return nil, false, err 320 } 321 match, err := scanUntil(r, prefix, cmp) 322 if err != nil { 323 r.Close() 324 return nil, false, err 325 } 326 return r, match, err 327 } 328 329 // Read starting roughly at the middle file, 330 // until we find line that has prefix. 331 r, err := g.NewReader(curIndex) 332 if err != nil { 333 return nil, false, err 334 } 335 foundIndex, line, err := scanNext(r, prefix) 336 r.Close() 337 if err != nil { 338 return nil, false, err 339 } 340 341 // Compare this line to our search query. 342 val, err := cmp(line) 343 if err != nil { 344 return nil, false, err 345 } 346 if val < 0 { 347 // Line will come later 348 minIndex = foundIndex 349 } else if val == 0 { 350 // Stroke of luck, found the line 351 r, err := g.NewReader(foundIndex) 352 if err != nil { 353 return nil, false, err 354 } 355 match, err := scanUntil(r, prefix, cmp) 356 if !match { 357 panic("Expected match to be true") 358 } 359 if err != nil { 360 r.Close() 361 return nil, false, err 362 } 363 return r, true, err 364 } else { 365 // We passed it 366 maxIndex = curIndex - 1 367 } 368 } 369 370 } 371 372 // Scans and returns the first line that starts with 'prefix' 373 // Consumes line and returns it. 374 func scanNext(r *GroupReader, prefix string) (int, string, error) { 375 for { 376 line, err := r.ReadLine() 377 if err != nil { 378 return 0, "", err 379 } 380 if !strings.HasPrefix(line, prefix) { 381 continue 382 } 383 index := r.CurIndex() 384 return index, line, nil 385 } 386 } 387 388 // Returns true iff an exact match was found. 389 // Pushes line, does not consume it. 390 func scanUntil(r *GroupReader, prefix string, cmp SearchFunc) (bool, error) { 391 for { 392 line, err := r.ReadLine() 393 if err != nil { 394 return false, err 395 } 396 if !strings.HasPrefix(line, prefix) { 397 continue 398 } 399 val, err := cmp(line) 400 if err != nil { 401 return false, err 402 } 403 if val < 0 { 404 continue 405 } else if val == 0 { 406 r.PushLine(line) 407 return true, nil 408 } else { 409 r.PushLine(line) 410 return false, nil 411 } 412 } 413 } 414 415 // Searches backwards for the last line in Group with prefix. 416 // Scans each file forward until the end to find the last match. 417 func (g *Group) FindLast(prefix string) (match string, found bool, err error) { 418 g.mtx.Lock() 419 minIndex, maxIndex := g.minIndex, g.maxIndex 420 g.mtx.Unlock() 421 422 r, err := g.NewReader(maxIndex) 423 if err != nil { 424 return "", false, err 425 } 426 defer r.Close() 427 428 // Open files from the back and read 429 GROUP_LOOP: 430 for i := maxIndex; i >= minIndex; i-- { 431 err := r.SetIndex(i) 432 if err != nil { 433 return "", false, err 434 } 435 // Scan each line and test whether line matches 436 for { 437 line, err := r.ReadLine() 438 if err == io.EOF { 439 if found { 440 return match, found, nil 441 } 442 continue GROUP_LOOP 443 } else if err != nil { 444 return "", false, err 445 } 446 if strings.HasPrefix(line, prefix) { 447 match = line 448 found = true 449 } 450 if r.CurIndex() > i { 451 if found { 452 return match, found, nil 453 } 454 continue GROUP_LOOP 455 } 456 } 457 } 458 459 return 460 } 461 462 // GroupInfo holds information about the group. 463 type GroupInfo struct { 464 MinIndex int // index of the first file in the group, including head 465 MaxIndex int // index of the last file in the group, including head 466 TotalSize int64 // total size of the group 467 HeadSize int64 // size of the head 468 } 469 470 // Returns info after scanning all files in g.Head's dir. 471 func (g *Group) ReadGroupInfo() GroupInfo { 472 g.mtx.Lock() 473 defer g.mtx.Unlock() 474 return g.readGroupInfo() 475 } 476 477 // Index includes the head. 478 // CONTRACT: caller should have called g.mtx.Lock 479 func (g *Group) readGroupInfo() GroupInfo { 480 groupDir := filepath.Dir(g.Head.Path) 481 headBase := filepath.Base(g.Head.Path) 482 var minIndex, maxIndex int = -1, -1 483 var totalSize, headSize int64 = 0, 0 484 485 dir, err := os.Open(groupDir) 486 if err != nil { 487 panic(err) 488 } 489 defer dir.Close() 490 fiz, err := dir.Readdir(0) 491 if err != nil { 492 panic(err) 493 } 494 495 // For each file in the directory, filter by pattern 496 for _, fileInfo := range fiz { 497 if fileInfo.Name() == headBase { 498 fileSize := fileInfo.Size() 499 totalSize += fileSize 500 headSize = fileSize 501 continue 502 } else if strings.HasPrefix(fileInfo.Name(), headBase) { 503 fileSize := fileInfo.Size() 504 totalSize += fileSize 505 indexedFilePattern := regexp.MustCompile(`^.+\.([0-9]{3,})$`) 506 submatch := indexedFilePattern.FindSubmatch([]byte(fileInfo.Name())) 507 if len(submatch) != 0 { 508 // Matches 509 fileIndex, err := strconv.Atoi(string(submatch[1])) 510 if err != nil { 511 panic(err) 512 } 513 if maxIndex < fileIndex { 514 maxIndex = fileIndex 515 } 516 if minIndex == -1 || fileIndex < minIndex { 517 minIndex = fileIndex 518 } 519 } 520 } 521 } 522 523 // Now account for the head. 524 if minIndex == -1 { 525 // If there were no numbered files, 526 // then the head is index 0. 527 minIndex, maxIndex = 0, 0 528 } else { 529 // Otherwise, the head file is 1 greater 530 maxIndex++ 531 } 532 return GroupInfo{minIndex, maxIndex, totalSize, headSize} 533 } 534 535 func filePathForIndex(headPath string, index int, maxIndex int) string { 536 if index == maxIndex { 537 return headPath 538 } 539 return fmt.Sprintf("%v.%03d", headPath, index) 540 } 541 542 //-------------------------------------------------------------------------------- 543 544 // GroupReader provides an interface for reading from a Group. 545 type GroupReader struct { 546 *Group 547 mtx sync.Mutex 548 curIndex int 549 curFile *os.File 550 curReader *bufio.Reader 551 curLine []byte 552 } 553 554 func newGroupReader(g *Group) *GroupReader { 555 return &GroupReader{ 556 Group: g, 557 curIndex: 0, 558 curFile: nil, 559 curReader: nil, 560 curLine: nil, 561 } 562 } 563 564 // Close closes the GroupReader by closing the cursor file. 565 func (gr *GroupReader) Close() error { 566 gr.mtx.Lock() 567 defer gr.mtx.Unlock() 568 569 if gr.curReader != nil { 570 err := gr.curFile.Close() 571 gr.curIndex = 0 572 gr.curReader = nil 573 gr.curFile = nil 574 gr.curLine = nil 575 return err 576 } 577 return nil 578 } 579 580 // Read implements io.Reader, reading bytes from the current Reader 581 // incrementing index until enough bytes are read. 582 func (gr *GroupReader) Read(p []byte) (n int, err error) { 583 lenP := len(p) 584 if lenP == 0 { 585 return 0, errors.New("given empty slice") 586 } 587 588 gr.mtx.Lock() 589 defer gr.mtx.Unlock() 590 591 // Open file if not open yet 592 if gr.curReader == nil { 593 if err = gr.openFile(gr.curIndex); err != nil { 594 return 0, err 595 } 596 } 597 598 // Iterate over files until enough bytes are read 599 var nn int 600 for { 601 nn, err = gr.curReader.Read(p[n:]) 602 n += nn 603 if err == io.EOF { 604 if n >= lenP { 605 return n, nil 606 } 607 // Open the next file 608 if err1 := gr.openFile(gr.curIndex + 1); err1 != nil { 609 return n, err1 610 } 611 } else if err != nil { 612 return n, err 613 } else if nn == 0 { // empty file 614 return n, err 615 } 616 } 617 } 618 619 // ReadLine reads a line (without delimiter). 620 // just return io.EOF if no new lines found. 621 func (gr *GroupReader) ReadLine() (string, error) { 622 gr.mtx.Lock() 623 defer gr.mtx.Unlock() 624 625 // From PushLine 626 if gr.curLine != nil { 627 line := string(gr.curLine) 628 gr.curLine = nil 629 return line, nil 630 } 631 632 // Open file if not open yet 633 if gr.curReader == nil { 634 err := gr.openFile(gr.curIndex) 635 if err != nil { 636 return "", err 637 } 638 } 639 640 // Iterate over files until line is found 641 var linePrefix string 642 for { 643 bytesRead, err := gr.curReader.ReadBytes('\n') 644 if err == io.EOF { 645 // Open the next file 646 if err1 := gr.openFile(gr.curIndex + 1); err1 != nil { 647 return "", err1 648 } 649 if len(bytesRead) > 0 && bytesRead[len(bytesRead)-1] == byte('\n') { 650 return linePrefix + string(bytesRead[:len(bytesRead)-1]), nil 651 } 652 linePrefix += string(bytesRead) 653 continue 654 } else if err != nil { 655 return "", err 656 } 657 return linePrefix + string(bytesRead[:len(bytesRead)-1]), nil 658 } 659 } 660 661 // IF index > gr.Group.maxIndex, returns io.EOF 662 // CONTRACT: caller should hold gr.mtx 663 func (gr *GroupReader) openFile(index int) error { 664 665 // Lock on Group to ensure that head doesn't move in the meanwhile. 666 gr.Group.mtx.Lock() 667 defer gr.Group.mtx.Unlock() 668 669 if index > gr.Group.maxIndex { 670 return io.EOF 671 } 672 673 curFilePath := filePathForIndex(gr.Head.Path, index, gr.Group.maxIndex) 674 curFile, err := os.Open(curFilePath) 675 if err != nil { 676 return err 677 } 678 curReader := bufio.NewReader(curFile) 679 680 // Update gr.cur* 681 if gr.curFile != nil { 682 gr.curFile.Close() // TODO return error? 683 } 684 gr.curIndex = index 685 gr.curFile = curFile 686 gr.curReader = curReader 687 gr.curLine = nil 688 return nil 689 } 690 691 // PushLine makes the given line the current one, so the next time somebody 692 // calls ReadLine, this line will be returned. 693 // panics if called twice without calling ReadLine. 694 func (gr *GroupReader) PushLine(line string) { 695 gr.mtx.Lock() 696 defer gr.mtx.Unlock() 697 698 if gr.curLine == nil { 699 gr.curLine = []byte(line) 700 } else { 701 panic("PushLine failed, already have line") 702 } 703 } 704 705 // CurIndex returns cursor's file index. 706 func (gr *GroupReader) CurIndex() int { 707 gr.mtx.Lock() 708 defer gr.mtx.Unlock() 709 return gr.curIndex 710 } 711 712 // SetIndex sets the cursor's file index to index by opening a file at this 713 // position. 714 func (gr *GroupReader) SetIndex(index int) error { 715 gr.mtx.Lock() 716 defer gr.mtx.Unlock() 717 return gr.openFile(index) 718 } 719 720 //-------------------------------------------------------------------------------- 721 722 // A simple SearchFunc that assumes that the marker is of form 723 // <prefix><number>. 724 // For example, if prefix is '#HEIGHT:', the markers of expected to be of the form: 725 // 726 // #HEIGHT:1 727 // ... 728 // #HEIGHT:2 729 // ... 730 func MakeSimpleSearchFunc(prefix string, target int) SearchFunc { 731 return func(line string) (int, error) { 732 if !strings.HasPrefix(line, prefix) { 733 return -1, errors.New(cmn.Fmt("Marker line did not have prefix: %v", prefix)) 734 } 735 i, err := strconv.Atoi(line[len(prefix):]) 736 if err != nil { 737 return -1, errors.New(cmn.Fmt("Failed to parse marker line: %v", err.Error())) 738 } 739 if target < i { 740 return 1, nil 741 } else if target == i { 742 return 0, nil 743 } else { 744 return -1, nil 745 } 746 } 747 }