github.com/vipernet-xyz/tendermint-core@v0.32.0/libs/autofile/group.go (about)

     1  package autofile
     2  
     3  import (
     4  	"bufio"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"path/filepath"
    10  	"regexp"
    11  	"strconv"
    12  	"strings"
    13  	"sync"
    14  	"time"
    15  
    16  	"github.com/tendermint/tendermint/libs/service"
    17  )
    18  
    19  const (
    20  	defaultGroupCheckDuration = 5000 * time.Millisecond
    21  	defaultHeadSizeLimit      = 10 * 1024 * 1024       // 10MB
    22  	defaultTotalSizeLimit     = 1 * 1024 * 1024 * 1024 // 1GB
    23  	maxFilesToRemove          = 4                      // needs to be greater than 1
    24  )
    25  
    26  /*
    27  You can open a Group to keep restrictions on an AutoFile, like
    28  the maximum size of each chunk, and/or the total amount of bytes
    29  stored in the group.
    30  
    31  The first file to be written in the Group.Dir is the head file.
    32  
    33  	Dir/
    34  	- <HeadPath>
    35  
    36  Once the Head file reaches the size limit, it will be rotated.
    37  
    38  	Dir/
    39  	- <HeadPath>.000   // First rolled file
    40  	- <HeadPath>       // New head path, starts empty.
    41  										 // The implicit index is 001.
    42  
    43  As more files are written, the index numbers grow...
    44  
    45  	Dir/
    46  	- <HeadPath>.000   // First rolled file
    47  	- <HeadPath>.001   // Second rolled file
    48  	- ...
    49  	- <HeadPath>       // New head path
    50  
    51  The Group can also be used to binary-search for some line,
    52  assuming that marker lines are written occasionally.
    53  */
    54  type Group struct {
    55  	service.BaseService
    56  
    57  	ID                 string
    58  	Head               *AutoFile // The head AutoFile to write to
    59  	headBuf            *bufio.Writer
    60  	Dir                string // Directory that contains .Head
    61  	ticker             *time.Ticker
    62  	mtx                sync.Mutex
    63  	headSizeLimit      int64
    64  	totalSizeLimit     int64
    65  	groupCheckDuration time.Duration
    66  	minIndex           int // Includes head
    67  	maxIndex           int // Includes head, where Head will move to
    68  
    69  	// close this when the processTicks routine is done.
    70  	// this ensures we can cleanup the dir after calling Stop
    71  	// and the routine won't be trying to access it anymore
    72  	doneProcessTicks chan struct{}
    73  
    74  	// TODO: When we start deleting files, we need to start tracking GroupReaders
    75  	// and their dependencies.
    76  }
    77  
    78  // OpenGroup creates a new Group with head at headPath. It returns an error if
    79  // it fails to open head file.
    80  func OpenGroup(headPath string, groupOptions ...func(*Group)) (*Group, error) {
    81  	dir, err := filepath.Abs(filepath.Dir(headPath))
    82  	if err != nil {
    83  		return nil, err
    84  	}
    85  	head, err := OpenAutoFile(headPath)
    86  	if err != nil {
    87  		return nil, err
    88  	}
    89  
    90  	g := &Group{
    91  		ID:                 "group:" + head.ID,
    92  		Head:               head,
    93  		headBuf:            bufio.NewWriterSize(head, 4096*10),
    94  		Dir:                dir,
    95  		headSizeLimit:      defaultHeadSizeLimit,
    96  		totalSizeLimit:     defaultTotalSizeLimit,
    97  		groupCheckDuration: defaultGroupCheckDuration,
    98  		minIndex:           0,
    99  		maxIndex:           0,
   100  		doneProcessTicks:   make(chan struct{}),
   101  	}
   102  
   103  	for _, option := range groupOptions {
   104  		option(g)
   105  	}
   106  
   107  	g.BaseService = *service.NewBaseService(nil, "Group", g)
   108  
   109  	gInfo := g.readGroupInfo()
   110  	g.minIndex = gInfo.MinIndex
   111  	g.maxIndex = gInfo.MaxIndex
   112  	return g, nil
   113  }
   114  
   115  // GroupCheckDuration allows you to overwrite default groupCheckDuration.
   116  func GroupCheckDuration(duration time.Duration) func(*Group) {
   117  	return func(g *Group) {
   118  		g.groupCheckDuration = duration
   119  	}
   120  }
   121  
   122  // GroupHeadSizeLimit allows you to overwrite default head size limit - 10MB.
   123  func GroupHeadSizeLimit(limit int64) func(*Group) {
   124  	return func(g *Group) {
   125  		g.headSizeLimit = limit
   126  	}
   127  }
   128  
   129  // GroupTotalSizeLimit allows you to overwrite default total size limit of the group - 1GB.
   130  func GroupTotalSizeLimit(limit int64) func(*Group) {
   131  	return func(g *Group) {
   132  		g.totalSizeLimit = limit
   133  	}
   134  }
   135  
   136  // OnStart implements service.Service by starting the goroutine that checks file
   137  // and group limits.
   138  func (g *Group) OnStart() error {
   139  	g.ticker = time.NewTicker(g.groupCheckDuration)
   140  	go g.processTicks()
   141  	return nil
   142  }
   143  
   144  // OnStop implements service.Service by stopping the goroutine described above.
   145  // NOTE: g.Head must be closed separately using Close.
   146  func (g *Group) OnStop() {
   147  	g.ticker.Stop()
   148  	g.FlushAndSync()
   149  }
   150  
   151  // Wait blocks until all internal goroutines are finished. Supposed to be
   152  // called after Stop.
   153  func (g *Group) Wait() {
   154  	// wait for processTicks routine to finish
   155  	<-g.doneProcessTicks
   156  }
   157  
   158  // Close closes the head file. The group must be stopped by this moment.
   159  func (g *Group) Close() {
   160  	g.FlushAndSync()
   161  
   162  	g.mtx.Lock()
   163  	_ = g.Head.closeFile()
   164  	g.mtx.Unlock()
   165  }
   166  
   167  // HeadSizeLimit returns the current head size limit.
   168  func (g *Group) HeadSizeLimit() int64 {
   169  	g.mtx.Lock()
   170  	defer g.mtx.Unlock()
   171  	return g.headSizeLimit
   172  }
   173  
   174  // TotalSizeLimit returns total size limit of the group.
   175  func (g *Group) TotalSizeLimit() int64 {
   176  	g.mtx.Lock()
   177  	defer g.mtx.Unlock()
   178  	return g.totalSizeLimit
   179  }
   180  
   181  // MaxIndex returns index of the last file in the group.
   182  func (g *Group) MaxIndex() int {
   183  	g.mtx.Lock()
   184  	defer g.mtx.Unlock()
   185  	return g.maxIndex
   186  }
   187  
   188  // MinIndex returns index of the first file in the group.
   189  func (g *Group) MinIndex() int {
   190  	g.mtx.Lock()
   191  	defer g.mtx.Unlock()
   192  	return g.minIndex
   193  }
   194  
   195  // Write writes the contents of p into the current head of the group. It
   196  // returns the number of bytes written. If nn < len(p), it also returns an
   197  // error explaining why the write is short.
   198  // NOTE: Writes are buffered so they don't write synchronously
   199  // TODO: Make it halt if space is unavailable
   200  func (g *Group) Write(p []byte) (nn int, err error) {
   201  	g.mtx.Lock()
   202  	defer g.mtx.Unlock()
   203  	return g.headBuf.Write(p)
   204  }
   205  
   206  // WriteLine writes line into the current head of the group. It also appends "\n".
   207  // NOTE: Writes are buffered so they don't write synchronously
   208  // TODO: Make it halt if space is unavailable
   209  func (g *Group) WriteLine(line string) error {
   210  	g.mtx.Lock()
   211  	defer g.mtx.Unlock()
   212  	_, err := g.headBuf.Write([]byte(line + "\n"))
   213  	return err
   214  }
   215  
   216  // Buffered returns the size of the currently buffered data.
   217  func (g *Group) Buffered() int {
   218  	g.mtx.Lock()
   219  	defer g.mtx.Unlock()
   220  	return g.headBuf.Buffered()
   221  }
   222  
   223  // FlushAndSync writes any buffered data to the underlying file and commits the
   224  // current content of the file to stable storage (fsync).
   225  func (g *Group) FlushAndSync() error {
   226  	g.mtx.Lock()
   227  	defer g.mtx.Unlock()
   228  	err := g.headBuf.Flush()
   229  	if err == nil {
   230  		err = g.Head.Sync()
   231  	}
   232  	return err
   233  }
   234  
   235  func (g *Group) processTicks() {
   236  	defer close(g.doneProcessTicks)
   237  	for {
   238  		select {
   239  		case <-g.ticker.C:
   240  			g.checkHeadSizeLimit()
   241  			g.checkTotalSizeLimit()
   242  		case <-g.Quit():
   243  			return
   244  		}
   245  	}
   246  }
   247  
   248  // NOTE: this function is called manually in tests.
   249  func (g *Group) checkHeadSizeLimit() {
   250  	limit := g.HeadSizeLimit()
   251  	if limit == 0 {
   252  		return
   253  	}
   254  	size, err := g.Head.Size()
   255  	if err != nil {
   256  		g.Logger.Error("Group's head may grow without bound", "head", g.Head.Path, "err", err)
   257  		return
   258  	}
   259  	if size >= limit {
   260  		g.RotateFile()
   261  	}
   262  }
   263  
   264  func (g *Group) checkTotalSizeLimit() {
   265  	limit := g.TotalSizeLimit()
   266  	if limit == 0 {
   267  		return
   268  	}
   269  
   270  	gInfo := g.readGroupInfo()
   271  	totalSize := gInfo.TotalSize
   272  	for i := 0; i < maxFilesToRemove; i++ {
   273  		index := gInfo.MinIndex + i
   274  		if totalSize < limit {
   275  			return
   276  		}
   277  		if index == gInfo.MaxIndex {
   278  			// Special degenerate case, just do nothing.
   279  			g.Logger.Error("Group's head may grow without bound", "head", g.Head.Path)
   280  			return
   281  		}
   282  		pathToRemove := filePathForIndex(g.Head.Path, index, gInfo.MaxIndex)
   283  		fInfo, err := os.Stat(pathToRemove)
   284  		if err != nil {
   285  			g.Logger.Error("Failed to fetch info for file", "file", pathToRemove)
   286  			continue
   287  		}
   288  		err = os.Remove(pathToRemove)
   289  		if err != nil {
   290  			g.Logger.Error("Failed to remove path", "path", pathToRemove)
   291  			return
   292  		}
   293  		totalSize -= fInfo.Size()
   294  	}
   295  }
   296  
   297  // RotateFile causes group to close the current head and assign it some index.
   298  // Note it does not create a new head.
   299  func (g *Group) RotateFile() {
   300  	g.mtx.Lock()
   301  	defer g.mtx.Unlock()
   302  
   303  	headPath := g.Head.Path
   304  
   305  	if err := g.headBuf.Flush(); err != nil {
   306  		panic(err)
   307  	}
   308  
   309  	if err := g.Head.Sync(); err != nil {
   310  		panic(err)
   311  	}
   312  
   313  	if err := g.Head.closeFile(); err != nil {
   314  		panic(err)
   315  	}
   316  
   317  	indexPath := filePathForIndex(headPath, g.maxIndex, g.maxIndex+1)
   318  	if err := os.Rename(headPath, indexPath); err != nil {
   319  		panic(err)
   320  	}
   321  
   322  	g.maxIndex++
   323  }
   324  
   325  // NewReader returns a new group reader.
   326  // CONTRACT: Caller must close the returned GroupReader.
   327  func (g *Group) NewReader(index int) (*GroupReader, error) {
   328  	r := newGroupReader(g)
   329  	err := r.SetIndex(index)
   330  	if err != nil {
   331  		return nil, err
   332  	}
   333  	return r, nil
   334  }
   335  
   336  // GroupInfo holds information about the group.
   337  type GroupInfo struct {
   338  	MinIndex  int   // index of the first file in the group, including head
   339  	MaxIndex  int   // index of the last file in the group, including head
   340  	TotalSize int64 // total size of the group
   341  	HeadSize  int64 // size of the head
   342  }
   343  
   344  // Returns info after scanning all files in g.Head's dir.
   345  func (g *Group) ReadGroupInfo() GroupInfo {
   346  	g.mtx.Lock()
   347  	defer g.mtx.Unlock()
   348  	return g.readGroupInfo()
   349  }
   350  
   351  // Index includes the head.
   352  // CONTRACT: caller should have called g.mtx.Lock
   353  func (g *Group) readGroupInfo() GroupInfo {
   354  	groupDir := filepath.Dir(g.Head.Path)
   355  	headBase := filepath.Base(g.Head.Path)
   356  	var minIndex, maxIndex int = -1, -1
   357  	var totalSize, headSize int64 = 0, 0
   358  
   359  	dir, err := os.Open(groupDir)
   360  	if err != nil {
   361  		panic(err)
   362  	}
   363  	defer dir.Close()
   364  	fiz, err := dir.Readdir(0)
   365  	if err != nil {
   366  		panic(err)
   367  	}
   368  
   369  	// For each file in the directory, filter by pattern
   370  	for _, fileInfo := range fiz {
   371  		if fileInfo.Name() == headBase {
   372  			fileSize := fileInfo.Size()
   373  			totalSize += fileSize
   374  			headSize = fileSize
   375  			continue
   376  		} else if strings.HasPrefix(fileInfo.Name(), headBase) {
   377  			fileSize := fileInfo.Size()
   378  			totalSize += fileSize
   379  			indexedFilePattern := regexp.MustCompile(`^.+\.([0-9]{3,})$`)
   380  			submatch := indexedFilePattern.FindSubmatch([]byte(fileInfo.Name()))
   381  			if len(submatch) != 0 {
   382  				// Matches
   383  				fileIndex, err := strconv.Atoi(string(submatch[1]))
   384  				if err != nil {
   385  					panic(err)
   386  				}
   387  				if maxIndex < fileIndex {
   388  					maxIndex = fileIndex
   389  				}
   390  				if minIndex == -1 || fileIndex < minIndex {
   391  					minIndex = fileIndex
   392  				}
   393  			}
   394  		}
   395  	}
   396  
   397  	// Now account for the head.
   398  	if minIndex == -1 {
   399  		// If there were no numbered files,
   400  		// then the head is index 0.
   401  		minIndex, maxIndex = 0, 0
   402  	} else {
   403  		// Otherwise, the head file is 1 greater
   404  		maxIndex++
   405  	}
   406  	return GroupInfo{minIndex, maxIndex, totalSize, headSize}
   407  }
   408  
   409  func filePathForIndex(headPath string, index int, maxIndex int) string {
   410  	if index == maxIndex {
   411  		return headPath
   412  	}
   413  	return fmt.Sprintf("%v.%03d", headPath, index)
   414  }
   415  
   416  //--------------------------------------------------------------------------------
   417  
   418  // GroupReader provides an interface for reading from a Group.
   419  type GroupReader struct {
   420  	*Group
   421  	mtx       sync.Mutex
   422  	curIndex  int
   423  	curFile   *os.File
   424  	curReader *bufio.Reader
   425  	curLine   []byte
   426  }
   427  
   428  func newGroupReader(g *Group) *GroupReader {
   429  	return &GroupReader{
   430  		Group:     g,
   431  		curIndex:  0,
   432  		curFile:   nil,
   433  		curReader: nil,
   434  		curLine:   nil,
   435  	}
   436  }
   437  
   438  // Close closes the GroupReader by closing the cursor file.
   439  func (gr *GroupReader) Close() error {
   440  	gr.mtx.Lock()
   441  	defer gr.mtx.Unlock()
   442  
   443  	if gr.curReader != nil {
   444  		err := gr.curFile.Close()
   445  		gr.curIndex = 0
   446  		gr.curReader = nil
   447  		gr.curFile = nil
   448  		gr.curLine = nil
   449  		return err
   450  	}
   451  	return nil
   452  }
   453  
   454  // Read implements io.Reader, reading bytes from the current Reader
   455  // incrementing index until enough bytes are read.
   456  func (gr *GroupReader) Read(p []byte) (n int, err error) {
   457  	lenP := len(p)
   458  	if lenP == 0 {
   459  		return 0, errors.New("given empty slice")
   460  	}
   461  
   462  	gr.mtx.Lock()
   463  	defer gr.mtx.Unlock()
   464  
   465  	// Open file if not open yet
   466  	if gr.curReader == nil {
   467  		if err = gr.openFile(gr.curIndex); err != nil {
   468  			return 0, err
   469  		}
   470  	}
   471  
   472  	// Iterate over files until enough bytes are read
   473  	var nn int
   474  	for {
   475  		nn, err = gr.curReader.Read(p[n:])
   476  		n += nn
   477  		switch {
   478  		case err == io.EOF:
   479  			if n >= lenP {
   480  				return n, nil
   481  			}
   482  			// Open the next file
   483  			if err1 := gr.openFile(gr.curIndex + 1); err1 != nil {
   484  				return n, err1
   485  			}
   486  		case err != nil:
   487  			return n, err
   488  		case nn == 0: // empty file
   489  			return n, err
   490  		}
   491  	}
   492  }
   493  
   494  // IF index > gr.Group.maxIndex, returns io.EOF
   495  // CONTRACT: caller should hold gr.mtx
   496  func (gr *GroupReader) openFile(index int) error {
   497  	// Lock on Group to ensure that head doesn't move in the meanwhile.
   498  	gr.Group.mtx.Lock()
   499  	defer gr.Group.mtx.Unlock()
   500  
   501  	if index > gr.Group.maxIndex {
   502  		return io.EOF
   503  	}
   504  
   505  	curFilePath := filePathForIndex(gr.Head.Path, index, gr.Group.maxIndex)
   506  	curFile, err := os.OpenFile(curFilePath, os.O_RDONLY|os.O_CREATE, autoFilePerms)
   507  	if err != nil {
   508  		return err
   509  	}
   510  	curReader := bufio.NewReader(curFile)
   511  
   512  	// Update gr.cur*
   513  	if gr.curFile != nil {
   514  		gr.curFile.Close() // TODO return error?
   515  	}
   516  	gr.curIndex = index
   517  	gr.curFile = curFile
   518  	gr.curReader = curReader
   519  	gr.curLine = nil
   520  	return nil
   521  }
   522  
   523  // CurIndex returns cursor's file index.
   524  func (gr *GroupReader) CurIndex() int {
   525  	gr.mtx.Lock()
   526  	defer gr.mtx.Unlock()
   527  	return gr.curIndex
   528  }
   529  
   530  // SetIndex sets the cursor's file index to index by opening a file at this
   531  // position.
   532  func (gr *GroupReader) SetIndex(index int) error {
   533  	gr.mtx.Lock()
   534  	defer gr.mtx.Unlock()
   535  	return gr.openFile(index)
   536  }