github.com/ari-anchor/sei-tendermint@v0.0.0-20230519144642-dc826b7b56bb/internal/libs/autofile/group.go (about)

     1  package autofile
     2  
     3  import (
     4  	"bufio"
     5  	"context"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"os"
    10  	"path/filepath"
    11  	"regexp"
    12  	"strconv"
    13  	"strings"
    14  	"sync"
    15  	"time"
    16  
    17  	"github.com/ari-anchor/sei-tendermint/libs/log"
    18  	"github.com/ari-anchor/sei-tendermint/libs/service"
    19  )
    20  
    21  const (
    22  	defaultGroupCheckDuration = 5000 * time.Millisecond
    23  	defaultHeadSizeLimit      = 10 * 1024 * 1024       // 10MB
    24  	defaultTotalSizeLimit     = 1 * 1024 * 1024 * 1024 // 1GB
    25  	maxFilesToRemove          = 4                      // needs to be greater than 1
    26  )
    27  
    28  /*
    29  You can open a Group to keep restrictions on an AutoFile, like
    30  the maximum size of each chunk, and/or the total amount of bytes
    31  stored in the group.
    32  
    33  The first file to be written in the Group.Dir is the head file.
    34  
    35  	Dir/
    36  	- <HeadPath>
    37  
    38  Once the Head file reaches the size limit, it will be rotated.
    39  
    40  	Dir/
    41  	- <HeadPath>.000   // First rolled file
    42  	- <HeadPath>       // New head path, starts empty.
    43  										 // The implicit index is 001.
    44  
    45  As more files are written, the index numbers grow...
    46  
    47  	Dir/
    48  	- <HeadPath>.000   // First rolled file
    49  	- <HeadPath>.001   // Second rolled file
    50  	- ...
    51  	- <HeadPath>       // New head path
    52  
    53  The Group can also be used to binary-search for some line,
    54  assuming that marker lines are written occasionally.
    55  */
    56  type Group struct {
    57  	service.BaseService
    58  	logger log.Logger
    59  
    60  	ID                 string
    61  	Head               *AutoFile // The head AutoFile to write to
    62  	headBuf            *bufio.Writer
    63  	Dir                string // Directory that contains .Head
    64  	ticker             *time.Ticker
    65  	mtx                sync.Mutex
    66  	headSizeLimit      int64
    67  	totalSizeLimit     int64
    68  	groupCheckDuration time.Duration
    69  	minIndex           int // Includes head
    70  	maxIndex           int // Includes head, where Head will move to
    71  
    72  	// TODO: When we start deleting files, we need to start tracking GroupReaders
    73  	// and their dependencies.
    74  }
    75  
    76  // OpenGroup creates a new Group with head at headPath. It returns an error if
    77  // it fails to open head file.
    78  func OpenGroup(ctx context.Context, logger log.Logger, headPath string, groupOptions ...func(*Group)) (*Group, error) {
    79  	dir, err := filepath.Abs(filepath.Dir(headPath))
    80  	if err != nil {
    81  		return nil, err
    82  	}
    83  	head, err := OpenAutoFile(ctx, headPath)
    84  	if err != nil {
    85  		return nil, err
    86  	}
    87  
    88  	g := &Group{
    89  		logger:             logger,
    90  		ID:                 "group:" + head.ID,
    91  		Head:               head,
    92  		headBuf:            bufio.NewWriterSize(head, 4096*10),
    93  		Dir:                dir,
    94  		headSizeLimit:      defaultHeadSizeLimit,
    95  		totalSizeLimit:     defaultTotalSizeLimit,
    96  		groupCheckDuration: defaultGroupCheckDuration,
    97  		minIndex:           0,
    98  		maxIndex:           0,
    99  	}
   100  
   101  	for _, option := range groupOptions {
   102  		option(g)
   103  	}
   104  
   105  	g.BaseService = *service.NewBaseService(logger, "Group", g)
   106  
   107  	gInfo := g.readGroupInfo()
   108  	g.minIndex = gInfo.MinIndex
   109  	g.maxIndex = gInfo.MaxIndex
   110  	return g, nil
   111  }
   112  
   113  // GroupCheckDuration allows you to overwrite default groupCheckDuration.
   114  func GroupCheckDuration(duration time.Duration) func(*Group) {
   115  	return func(g *Group) {
   116  		g.groupCheckDuration = duration
   117  	}
   118  }
   119  
   120  // GroupHeadSizeLimit allows you to overwrite default head size limit - 10MB.
   121  func GroupHeadSizeLimit(limit int64) func(*Group) {
   122  	return func(g *Group) {
   123  		g.headSizeLimit = limit
   124  	}
   125  }
   126  
   127  // GroupTotalSizeLimit allows you to overwrite default total size limit of the group - 1GB.
   128  func GroupTotalSizeLimit(limit int64) func(*Group) {
   129  	return func(g *Group) {
   130  		g.totalSizeLimit = limit
   131  	}
   132  }
   133  
   134  // OnStart implements service.Service by starting the goroutine that checks file
   135  // and group limits.
   136  func (g *Group) OnStart(ctx context.Context) error {
   137  	g.ticker = time.NewTicker(g.groupCheckDuration)
   138  	go g.processTicks(ctx)
   139  	return nil
   140  }
   141  
   142  // OnStop implements service.Service by stopping the goroutine described above.
   143  // NOTE: g.Head must be closed separately using Close.
   144  func (g *Group) OnStop() {
   145  	g.ticker.Stop()
   146  	if err := g.FlushAndSync(); err != nil {
   147  		g.logger.Error("error flushing to disk", "err", err)
   148  	}
   149  }
   150  
   151  // Close closes the head file. The group must be stopped by this moment.
   152  func (g *Group) Close() {
   153  	if err := g.FlushAndSync(); err != nil {
   154  		g.logger.Error("error flushing to disk", "err", err)
   155  	}
   156  
   157  	g.mtx.Lock()
   158  	_ = g.Head.Close()
   159  	g.mtx.Unlock()
   160  }
   161  
   162  // HeadSizeLimit returns the current head size limit.
   163  func (g *Group) HeadSizeLimit() int64 {
   164  	g.mtx.Lock()
   165  	defer g.mtx.Unlock()
   166  	return g.headSizeLimit
   167  }
   168  
   169  // TotalSizeLimit returns total size limit of the group.
   170  func (g *Group) TotalSizeLimit() int64 {
   171  	g.mtx.Lock()
   172  	defer g.mtx.Unlock()
   173  	return g.totalSizeLimit
   174  }
   175  
   176  // MaxIndex returns index of the last file in the group.
   177  func (g *Group) MaxIndex() int {
   178  	g.mtx.Lock()
   179  	defer g.mtx.Unlock()
   180  	return g.maxIndex
   181  }
   182  
   183  // MinIndex returns index of the first file in the group.
   184  func (g *Group) MinIndex() int {
   185  	g.mtx.Lock()
   186  	defer g.mtx.Unlock()
   187  	return g.minIndex
   188  }
   189  
   190  // Write writes the contents of p into the current head of the group. It
   191  // returns the number of bytes written. If nn < len(p), it also returns an
   192  // error explaining why the write is short.
   193  // NOTE: Writes are buffered so they don't write synchronously
   194  // TODO: Make it halt if space is unavailable
   195  func (g *Group) Write(p []byte) (nn int, err error) {
   196  	g.mtx.Lock()
   197  	defer g.mtx.Unlock()
   198  	return g.headBuf.Write(p)
   199  }
   200  
   201  // WriteLine writes line into the current head of the group. It also appends "\n".
   202  // NOTE: Writes are buffered so they don't write synchronously
   203  // TODO: Make it halt if space is unavailable
   204  func (g *Group) WriteLine(line string) error {
   205  	g.mtx.Lock()
   206  	defer g.mtx.Unlock()
   207  	_, err := g.headBuf.Write([]byte(line + "\n"))
   208  	return err
   209  }
   210  
   211  // Buffered returns the size of the currently buffered data.
   212  func (g *Group) Buffered() int {
   213  	g.mtx.Lock()
   214  	defer g.mtx.Unlock()
   215  	return g.headBuf.Buffered()
   216  }
   217  
   218  // FlushAndSync writes any buffered data to the underlying file and commits the
   219  // current content of the file to stable storage (fsync).
   220  func (g *Group) FlushAndSync() error {
   221  	g.mtx.Lock()
   222  	defer g.mtx.Unlock()
   223  	err := g.headBuf.Flush()
   224  	if err == nil {
   225  		err = g.Head.Sync()
   226  	}
   227  	return err
   228  }
   229  
   230  func (g *Group) processTicks(ctx context.Context) {
   231  	for {
   232  		select {
   233  		case <-ctx.Done():
   234  			return
   235  		case <-g.ticker.C:
   236  			g.checkHeadSizeLimit(ctx)
   237  			g.checkTotalSizeLimit(ctx)
   238  		}
   239  	}
   240  }
   241  
   242  // NOTE: this function is called manually in tests.
   243  func (g *Group) checkHeadSizeLimit(ctx context.Context) {
   244  	limit := g.HeadSizeLimit()
   245  	if limit == 0 {
   246  		return
   247  	}
   248  	size, err := g.Head.Size()
   249  	if err != nil {
   250  		g.logger.Error("Group's head may grow without bound", "head", g.Head.Path, "err", err)
   251  		return
   252  	}
   253  	if size >= limit {
   254  		g.rotateFile(ctx)
   255  	}
   256  }
   257  
   258  func (g *Group) checkTotalSizeLimit(ctx context.Context) {
   259  	g.mtx.Lock()
   260  	defer g.mtx.Unlock()
   261  
   262  	if err := ctx.Err(); err != nil {
   263  		return
   264  	}
   265  
   266  	if g.totalSizeLimit == 0 {
   267  		return
   268  	}
   269  
   270  	gInfo := g.readGroupInfo()
   271  	totalSize := gInfo.TotalSize
   272  	for i := 0; i < maxFilesToRemove; i++ {
   273  		index := gInfo.MinIndex + i
   274  		if totalSize < g.totalSizeLimit {
   275  			return
   276  		}
   277  		if index == gInfo.MaxIndex {
   278  			// Special degenerate case, just do nothing.
   279  			g.logger.Error("Group's head may grow without bound", "head", g.Head.Path)
   280  			return
   281  		}
   282  
   283  		if ctx.Err() != nil {
   284  			return
   285  		}
   286  
   287  		pathToRemove := filePathForIndex(g.Head.Path, index, gInfo.MaxIndex)
   288  		fInfo, err := os.Stat(pathToRemove)
   289  		if err != nil {
   290  			g.logger.Error("Failed to fetch info for file", "file", pathToRemove)
   291  			continue
   292  		}
   293  
   294  		if ctx.Err() != nil {
   295  			return
   296  		}
   297  
   298  		if err = os.Remove(pathToRemove); err != nil {
   299  			g.logger.Error("Failed to remove path", "path", pathToRemove)
   300  			return
   301  		}
   302  		totalSize -= fInfo.Size()
   303  	}
   304  }
   305  
   306  // rotateFile causes group to close the current head and assign it
   307  // some index. Panics if it encounters an error.
   308  func (g *Group) rotateFile(ctx context.Context) {
   309  	g.mtx.Lock()
   310  	defer g.mtx.Unlock()
   311  
   312  	if err := ctx.Err(); err != nil {
   313  		return
   314  	}
   315  
   316  	headPath := g.Head.Path
   317  
   318  	if err := g.headBuf.Flush(); err != nil {
   319  		panic(err)
   320  	}
   321  	if err := g.Head.Sync(); err != nil {
   322  		panic(err)
   323  	}
   324  	err := g.Head.withLock(func() error {
   325  		if err := ctx.Err(); err != nil {
   326  			return err
   327  		}
   328  
   329  		if err := g.Head.unsyncCloseFile(); err != nil {
   330  			return err
   331  		}
   332  
   333  		indexPath := filePathForIndex(headPath, g.maxIndex, g.maxIndex+1)
   334  		return os.Rename(headPath, indexPath)
   335  	})
   336  	if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
   337  		return
   338  	}
   339  	if err != nil {
   340  		panic(err)
   341  	}
   342  
   343  	g.maxIndex++
   344  }
   345  
   346  // NewReader returns a new group reader.
   347  // CONTRACT: Caller must close the returned GroupReader.
   348  func (g *Group) NewReader(index int) (*GroupReader, error) {
   349  	r := newGroupReader(g)
   350  	err := r.SetIndex(index)
   351  	if err != nil {
   352  		return nil, err
   353  	}
   354  	return r, nil
   355  }
   356  
   357  // GroupInfo holds information about the group.
   358  type GroupInfo struct {
   359  	MinIndex  int   // index of the first file in the group, including head
   360  	MaxIndex  int   // index of the last file in the group, including head
   361  	TotalSize int64 // total size of the group
   362  	HeadSize  int64 // size of the head
   363  }
   364  
   365  // Returns info after scanning all files in g.Head's dir.
   366  func (g *Group) ReadGroupInfo() GroupInfo {
   367  	g.mtx.Lock()
   368  	defer g.mtx.Unlock()
   369  	return g.readGroupInfo()
   370  }
   371  
   372  // Index includes the head.
   373  // CONTRACT: caller should have called g.mtx.Lock
   374  func (g *Group) readGroupInfo() GroupInfo {
   375  	groupDir := filepath.Dir(g.Head.Path)
   376  	headBase := filepath.Base(g.Head.Path)
   377  	var minIndex, maxIndex int = -1, -1
   378  	var totalSize, headSize int64 = 0, 0
   379  
   380  	dir, err := os.Open(groupDir)
   381  	if err != nil {
   382  		panic(err)
   383  	}
   384  	defer dir.Close()
   385  	fiz, err := dir.Readdir(0)
   386  	if err != nil {
   387  		panic(err)
   388  	}
   389  
   390  	// For each file in the directory, filter by pattern
   391  	for _, fileInfo := range fiz {
   392  		if fileInfo.Name() == headBase {
   393  			fileSize := fileInfo.Size()
   394  			totalSize += fileSize
   395  			headSize = fileSize
   396  			continue
   397  		} else if strings.HasPrefix(fileInfo.Name(), headBase) {
   398  			fileSize := fileInfo.Size()
   399  			totalSize += fileSize
   400  			indexedFilePattern := regexp.MustCompile(`^.+\.([0-9]{3,})$`)
   401  			submatch := indexedFilePattern.FindSubmatch([]byte(fileInfo.Name()))
   402  			if len(submatch) != 0 {
   403  				// Matches
   404  				fileIndex, err := strconv.Atoi(string(submatch[1]))
   405  				if err != nil {
   406  					panic(err)
   407  				}
   408  				if maxIndex < fileIndex {
   409  					maxIndex = fileIndex
   410  				}
   411  				if minIndex == -1 || fileIndex < minIndex {
   412  					minIndex = fileIndex
   413  				}
   414  			}
   415  		}
   416  	}
   417  
   418  	// Now account for the head.
   419  	if minIndex == -1 {
   420  		// If there were no numbered files,
   421  		// then the head is index 0.
   422  		minIndex, maxIndex = 0, 0
   423  	} else {
   424  		// Otherwise, the head file is 1 greater
   425  		maxIndex++
   426  	}
   427  	return GroupInfo{minIndex, maxIndex, totalSize, headSize}
   428  }
   429  
   430  func filePathForIndex(headPath string, index int, maxIndex int) string {
   431  	if index == maxIndex {
   432  		return headPath
   433  	}
   434  	return fmt.Sprintf("%v.%03d", headPath, index)
   435  }
   436  
   437  //--------------------------------------------------------------------------------
   438  
   439  // GroupReader provides an interface for reading from a Group.
   440  type GroupReader struct {
   441  	*Group
   442  	mtx       sync.Mutex
   443  	curIndex  int
   444  	curFile   *os.File
   445  	curReader *bufio.Reader
   446  	curLine   []byte
   447  }
   448  
   449  func newGroupReader(g *Group) *GroupReader {
   450  	return &GroupReader{
   451  		Group:     g,
   452  		curIndex:  0,
   453  		curFile:   nil,
   454  		curReader: nil,
   455  		curLine:   nil,
   456  	}
   457  }
   458  
   459  // Close closes the GroupReader by closing the cursor file.
   460  func (gr *GroupReader) Close() error {
   461  	gr.mtx.Lock()
   462  	defer gr.mtx.Unlock()
   463  
   464  	if gr.curReader != nil {
   465  		err := gr.curFile.Close()
   466  		gr.curIndex = 0
   467  		gr.curReader = nil
   468  		gr.curFile = nil
   469  		gr.curLine = nil
   470  		return err
   471  	}
   472  	return nil
   473  }
   474  
   475  // Read implements io.Reader, reading bytes from the current Reader
   476  // incrementing index until enough bytes are read.
   477  func (gr *GroupReader) Read(p []byte) (n int, err error) {
   478  	lenP := len(p)
   479  	if lenP == 0 {
   480  		return 0, errors.New("given empty slice")
   481  	}
   482  
   483  	gr.mtx.Lock()
   484  	defer gr.mtx.Unlock()
   485  
   486  	// Open file if not open yet
   487  	if gr.curReader == nil {
   488  		if err = gr.openFile(gr.curIndex); err != nil {
   489  			return 0, err
   490  		}
   491  	}
   492  
   493  	// Iterate over files until enough bytes are read
   494  	var nn int
   495  	for {
   496  		nn, err = gr.curReader.Read(p[n:])
   497  		n += nn
   498  		switch {
   499  		case err == io.EOF:
   500  			if n >= lenP {
   501  				return n, nil
   502  			}
   503  			// Open the next file
   504  			if err1 := gr.openFile(gr.curIndex + 1); err1 != nil {
   505  				return n, err1
   506  			}
   507  		case err != nil:
   508  			return n, err
   509  		case nn == 0: // empty file
   510  			return n, err
   511  		}
   512  	}
   513  }
   514  
   515  // IF index > gr.Group.maxIndex, returns io.EOF
   516  // CONTRACT: caller should hold gr.mtx
   517  func (gr *GroupReader) openFile(index int) error {
   518  	// Lock on Group to ensure that head doesn't move in the meanwhile.
   519  	gr.Group.mtx.Lock()
   520  	defer gr.Group.mtx.Unlock()
   521  
   522  	if index > gr.Group.maxIndex {
   523  		return io.EOF
   524  	}
   525  
   526  	curFilePath := filePathForIndex(gr.Head.Path, index, gr.Group.maxIndex)
   527  	curFile, err := os.OpenFile(curFilePath, os.O_RDONLY|os.O_CREATE, autoFilePerms)
   528  	if err != nil {
   529  		return err
   530  	}
   531  	curReader := bufio.NewReader(curFile)
   532  
   533  	// Update gr.cur*
   534  	if gr.curFile != nil {
   535  		gr.curFile.Close() // TODO return error?
   536  	}
   537  	gr.curIndex = index
   538  	gr.curFile = curFile
   539  	gr.curReader = curReader
   540  	gr.curLine = nil
   541  	return nil
   542  }
   543  
   544  // CurIndex returns cursor's file index.
   545  func (gr *GroupReader) CurIndex() int {
   546  	gr.mtx.Lock()
   547  	defer gr.mtx.Unlock()
   548  	return gr.curIndex
   549  }
   550  
   551  // SetIndex sets the cursor's file index to index by opening a file at this
   552  // position.
   553  func (gr *GroupReader) SetIndex(index int) error {
   554  	gr.mtx.Lock()
   555  	defer gr.mtx.Unlock()
   556  	return gr.openFile(index)
   557  }