github.com/evdatsion/aphelion-dpos-bft@v0.32.1/libs/autofile/group.go (about)

     1  package autofile
     2  
     3  import (
     4  	"bufio"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"path"
    10  	"path/filepath"
    11  	"regexp"
    12  	"strconv"
    13  	"strings"
    14  	"sync"
    15  	"time"
    16  
    17  	cmn "github.com/evdatsion/aphelion-dpos-bft/libs/common"
    18  )
    19  
    20  const (
    21  	defaultGroupCheckDuration = 5000 * time.Millisecond
    22  	defaultHeadSizeLimit      = 10 * 1024 * 1024       // 10MB
    23  	defaultTotalSizeLimit     = 1 * 1024 * 1024 * 1024 // 1GB
    24  	maxFilesToRemove          = 4                      // needs to be greater than 1
    25  )
    26  
    27  /*
    28  You can open a Group to keep restrictions on an AutoFile, like
    29  the maximum size of each chunk, and/or the total amount of bytes
    30  stored in the group.
    31  
    32  The first file to be written in the Group.Dir is the head file.
    33  
    34  	Dir/
    35  	- <HeadPath>
    36  
    37  Once the Head file reaches the size limit, it will be rotated.
    38  
    39  	Dir/
    40  	- <HeadPath>.000   // First rolled file
    41  	- <HeadPath>       // New head path, starts empty.
    42  										 // The implicit index is 001.
    43  
    44  As more files are written, the index numbers grow...
    45  
    46  	Dir/
    47  	- <HeadPath>.000   // First rolled file
    48  	- <HeadPath>.001   // Second rolled file
    49  	- ...
    50  	- <HeadPath>       // New head path
    51  
    52  The Group can also be used to binary-search for some line,
    53  assuming that marker lines are written occasionally.
    54  */
    55  type Group struct {
    56  	cmn.BaseService
    57  
    58  	ID                 string
    59  	Head               *AutoFile // The head AutoFile to write to
    60  	headBuf            *bufio.Writer
    61  	Dir                string // Directory that contains .Head
    62  	ticker             *time.Ticker
    63  	mtx                sync.Mutex
    64  	headSizeLimit      int64
    65  	totalSizeLimit     int64
    66  	groupCheckDuration time.Duration
    67  	minIndex           int // Includes head
    68  	maxIndex           int // Includes head, where Head will move to
    69  
    70  	// close this when the processTicks routine is done.
    71  	// this ensures we can cleanup the dir after calling Stop
    72  	// and the routine won't be trying to access it anymore
    73  	doneProcessTicks chan struct{}
    74  
    75  	// TODO: When we start deleting files, we need to start tracking GroupReaders
    76  	// and their dependencies.
    77  }
    78  
    79  // OpenGroup creates a new Group with head at headPath. It returns an error if
    80  // it fails to open head file.
    81  func OpenGroup(headPath string, groupOptions ...func(*Group)) (g *Group, err error) {
    82  	dir := path.Dir(headPath)
    83  	head, err := OpenAutoFile(headPath)
    84  	if err != nil {
    85  		return nil, err
    86  	}
    87  
    88  	g = &Group{
    89  		ID:                 "group:" + head.ID,
    90  		Head:               head,
    91  		headBuf:            bufio.NewWriterSize(head, 4096*10),
    92  		Dir:                dir,
    93  		headSizeLimit:      defaultHeadSizeLimit,
    94  		totalSizeLimit:     defaultTotalSizeLimit,
    95  		groupCheckDuration: defaultGroupCheckDuration,
    96  		minIndex:           0,
    97  		maxIndex:           0,
    98  		doneProcessTicks:   make(chan struct{}),
    99  	}
   100  
   101  	for _, option := range groupOptions {
   102  		option(g)
   103  	}
   104  
   105  	g.BaseService = *cmn.NewBaseService(nil, "Group", g)
   106  
   107  	gInfo := g.readGroupInfo()
   108  	g.minIndex = gInfo.MinIndex
   109  	g.maxIndex = gInfo.MaxIndex
   110  	return
   111  }
   112  
   113  // GroupCheckDuration allows you to overwrite default groupCheckDuration.
   114  func GroupCheckDuration(duration time.Duration) func(*Group) {
   115  	return func(g *Group) {
   116  		g.groupCheckDuration = duration
   117  	}
   118  }
   119  
   120  // GroupHeadSizeLimit allows you to overwrite default head size limit - 10MB.
   121  func GroupHeadSizeLimit(limit int64) func(*Group) {
   122  	return func(g *Group) {
   123  		g.headSizeLimit = limit
   124  	}
   125  }
   126  
   127  // GroupTotalSizeLimit allows you to overwrite default total size limit of the group - 1GB.
   128  func GroupTotalSizeLimit(limit int64) func(*Group) {
   129  	return func(g *Group) {
   130  		g.totalSizeLimit = limit
   131  	}
   132  }
   133  
   134  // OnStart implements cmn.Service by starting the goroutine that checks file
   135  // and group limits.
   136  func (g *Group) OnStart() error {
   137  	g.ticker = time.NewTicker(g.groupCheckDuration)
   138  	go g.processTicks()
   139  	return nil
   140  }
   141  
   142  // OnStop implements cmn.Service by stopping the goroutine described above.
   143  // NOTE: g.Head must be closed separately using Close.
   144  func (g *Group) OnStop() {
   145  	g.ticker.Stop()
   146  	g.FlushAndSync()
   147  }
   148  
   149  // Wait blocks until all internal goroutines are finished. Supposed to be
   150  // called after Stop.
   151  func (g *Group) Wait() {
   152  	// wait for processTicks routine to finish
   153  	<-g.doneProcessTicks
   154  }
   155  
   156  // Close closes the head file. The group must be stopped by this moment.
   157  func (g *Group) Close() {
   158  	g.FlushAndSync()
   159  
   160  	g.mtx.Lock()
   161  	_ = g.Head.closeFile()
   162  	g.mtx.Unlock()
   163  }
   164  
   165  // HeadSizeLimit returns the current head size limit.
   166  func (g *Group) HeadSizeLimit() int64 {
   167  	g.mtx.Lock()
   168  	defer g.mtx.Unlock()
   169  	return g.headSizeLimit
   170  }
   171  
   172  // TotalSizeLimit returns total size limit of the group.
   173  func (g *Group) TotalSizeLimit() int64 {
   174  	g.mtx.Lock()
   175  	defer g.mtx.Unlock()
   176  	return g.totalSizeLimit
   177  }
   178  
   179  // MaxIndex returns index of the last file in the group.
   180  func (g *Group) MaxIndex() int {
   181  	g.mtx.Lock()
   182  	defer g.mtx.Unlock()
   183  	return g.maxIndex
   184  }
   185  
   186  // MinIndex returns index of the first file in the group.
   187  func (g *Group) MinIndex() int {
   188  	g.mtx.Lock()
   189  	defer g.mtx.Unlock()
   190  	return g.minIndex
   191  }
   192  
   193  // Write writes the contents of p into the current head of the group. It
   194  // returns the number of bytes written. If nn < len(p), it also returns an
   195  // error explaining why the write is short.
   196  // NOTE: Writes are buffered so they don't write synchronously
   197  // TODO: Make it halt if space is unavailable
   198  func (g *Group) Write(p []byte) (nn int, err error) {
   199  	g.mtx.Lock()
   200  	defer g.mtx.Unlock()
   201  	return g.headBuf.Write(p)
   202  }
   203  
   204  // WriteLine writes line into the current head of the group. It also appends "\n".
   205  // NOTE: Writes are buffered so they don't write synchronously
   206  // TODO: Make it halt if space is unavailable
   207  func (g *Group) WriteLine(line string) error {
   208  	g.mtx.Lock()
   209  	defer g.mtx.Unlock()
   210  	_, err := g.headBuf.Write([]byte(line + "\n"))
   211  	return err
   212  }
   213  
   214  // Buffered returns the size of the currently buffered data.
   215  func (g *Group) Buffered() int {
   216  	g.mtx.Lock()
   217  	defer g.mtx.Unlock()
   218  	return g.headBuf.Buffered()
   219  }
   220  
   221  // FlushAndSync writes any buffered data to the underlying file and commits the
   222  // current content of the file to stable storage (fsync).
   223  func (g *Group) FlushAndSync() error {
   224  	g.mtx.Lock()
   225  	defer g.mtx.Unlock()
   226  	err := g.headBuf.Flush()
   227  	if err == nil {
   228  		err = g.Head.Sync()
   229  	}
   230  	return err
   231  }
   232  
   233  func (g *Group) processTicks() {
   234  	defer close(g.doneProcessTicks)
   235  	for {
   236  		select {
   237  		case <-g.ticker.C:
   238  			g.checkHeadSizeLimit()
   239  			g.checkTotalSizeLimit()
   240  		case <-g.Quit():
   241  			return
   242  		}
   243  	}
   244  }
   245  
   246  // NOTE: this function is called manually in tests.
   247  func (g *Group) checkHeadSizeLimit() {
   248  	limit := g.HeadSizeLimit()
   249  	if limit == 0 {
   250  		return
   251  	}
   252  	size, err := g.Head.Size()
   253  	if err != nil {
   254  		g.Logger.Error("Group's head may grow without bound", "head", g.Head.Path, "err", err)
   255  		return
   256  	}
   257  	if size >= limit {
   258  		g.RotateFile()
   259  	}
   260  }
   261  
   262  func (g *Group) checkTotalSizeLimit() {
   263  	limit := g.TotalSizeLimit()
   264  	if limit == 0 {
   265  		return
   266  	}
   267  
   268  	gInfo := g.readGroupInfo()
   269  	totalSize := gInfo.TotalSize
   270  	for i := 0; i < maxFilesToRemove; i++ {
   271  		index := gInfo.MinIndex + i
   272  		if totalSize < limit {
   273  			return
   274  		}
   275  		if index == gInfo.MaxIndex {
   276  			// Special degenerate case, just do nothing.
   277  			g.Logger.Error("Group's head may grow without bound", "head", g.Head.Path)
   278  			return
   279  		}
   280  		pathToRemove := filePathForIndex(g.Head.Path, index, gInfo.MaxIndex)
   281  		fInfo, err := os.Stat(pathToRemove)
   282  		if err != nil {
   283  			g.Logger.Error("Failed to fetch info for file", "file", pathToRemove)
   284  			continue
   285  		}
   286  		err = os.Remove(pathToRemove)
   287  		if err != nil {
   288  			g.Logger.Error("Failed to remove path", "path", pathToRemove)
   289  			return
   290  		}
   291  		totalSize -= fInfo.Size()
   292  	}
   293  }
   294  
   295  // RotateFile causes group to close the current head and assign it some index.
   296  // Note it does not create a new head.
   297  func (g *Group) RotateFile() {
   298  	g.mtx.Lock()
   299  	defer g.mtx.Unlock()
   300  
   301  	headPath := g.Head.Path
   302  
   303  	if err := g.headBuf.Flush(); err != nil {
   304  		panic(err)
   305  	}
   306  
   307  	if err := g.Head.Sync(); err != nil {
   308  		panic(err)
   309  	}
   310  
   311  	if err := g.Head.closeFile(); err != nil {
   312  		panic(err)
   313  	}
   314  
   315  	indexPath := filePathForIndex(headPath, g.maxIndex, g.maxIndex+1)
   316  	if err := os.Rename(headPath, indexPath); err != nil {
   317  		panic(err)
   318  	}
   319  
   320  	g.maxIndex++
   321  }
   322  
   323  // NewReader returns a new group reader.
   324  // CONTRACT: Caller must close the returned GroupReader.
   325  func (g *Group) NewReader(index int) (*GroupReader, error) {
   326  	r := newGroupReader(g)
   327  	err := r.SetIndex(index)
   328  	if err != nil {
   329  		return nil, err
   330  	}
   331  	return r, nil
   332  }
   333  
   334  // GroupInfo holds information about the group.
   335  type GroupInfo struct {
   336  	MinIndex  int   // index of the first file in the group, including head
   337  	MaxIndex  int   // index of the last file in the group, including head
   338  	TotalSize int64 // total size of the group
   339  	HeadSize  int64 // size of the head
   340  }
   341  
   342  // Returns info after scanning all files in g.Head's dir.
   343  func (g *Group) ReadGroupInfo() GroupInfo {
   344  	g.mtx.Lock()
   345  	defer g.mtx.Unlock()
   346  	return g.readGroupInfo()
   347  }
   348  
   349  // Index includes the head.
   350  // CONTRACT: caller should have called g.mtx.Lock
   351  func (g *Group) readGroupInfo() GroupInfo {
   352  	groupDir := filepath.Dir(g.Head.Path)
   353  	headBase := filepath.Base(g.Head.Path)
   354  	var minIndex, maxIndex int = -1, -1
   355  	var totalSize, headSize int64 = 0, 0
   356  
   357  	dir, err := os.Open(groupDir)
   358  	if err != nil {
   359  		panic(err)
   360  	}
   361  	defer dir.Close()
   362  	fiz, err := dir.Readdir(0)
   363  	if err != nil {
   364  		panic(err)
   365  	}
   366  
   367  	// For each file in the directory, filter by pattern
   368  	for _, fileInfo := range fiz {
   369  		if fileInfo.Name() == headBase {
   370  			fileSize := fileInfo.Size()
   371  			totalSize += fileSize
   372  			headSize = fileSize
   373  			continue
   374  		} else if strings.HasPrefix(fileInfo.Name(), headBase) {
   375  			fileSize := fileInfo.Size()
   376  			totalSize += fileSize
   377  			indexedFilePattern := regexp.MustCompile(`^.+\.([0-9]{3,})$`)
   378  			submatch := indexedFilePattern.FindSubmatch([]byte(fileInfo.Name()))
   379  			if len(submatch) != 0 {
   380  				// Matches
   381  				fileIndex, err := strconv.Atoi(string(submatch[1]))
   382  				if err != nil {
   383  					panic(err)
   384  				}
   385  				if maxIndex < fileIndex {
   386  					maxIndex = fileIndex
   387  				}
   388  				if minIndex == -1 || fileIndex < minIndex {
   389  					minIndex = fileIndex
   390  				}
   391  			}
   392  		}
   393  	}
   394  
   395  	// Now account for the head.
   396  	if minIndex == -1 {
   397  		// If there were no numbered files,
   398  		// then the head is index 0.
   399  		minIndex, maxIndex = 0, 0
   400  	} else {
   401  		// Otherwise, the head file is 1 greater
   402  		maxIndex++
   403  	}
   404  	return GroupInfo{minIndex, maxIndex, totalSize, headSize}
   405  }
   406  
   407  func filePathForIndex(headPath string, index int, maxIndex int) string {
   408  	if index == maxIndex {
   409  		return headPath
   410  	}
   411  	return fmt.Sprintf("%v.%03d", headPath, index)
   412  }
   413  
   414  //--------------------------------------------------------------------------------
   415  
   416  // GroupReader provides an interface for reading from a Group.
   417  type GroupReader struct {
   418  	*Group
   419  	mtx       sync.Mutex
   420  	curIndex  int
   421  	curFile   *os.File
   422  	curReader *bufio.Reader
   423  	curLine   []byte
   424  }
   425  
   426  func newGroupReader(g *Group) *GroupReader {
   427  	return &GroupReader{
   428  		Group:     g,
   429  		curIndex:  0,
   430  		curFile:   nil,
   431  		curReader: nil,
   432  		curLine:   nil,
   433  	}
   434  }
   435  
   436  // Close closes the GroupReader by closing the cursor file.
   437  func (gr *GroupReader) Close() error {
   438  	gr.mtx.Lock()
   439  	defer gr.mtx.Unlock()
   440  
   441  	if gr.curReader != nil {
   442  		err := gr.curFile.Close()
   443  		gr.curIndex = 0
   444  		gr.curReader = nil
   445  		gr.curFile = nil
   446  		gr.curLine = nil
   447  		return err
   448  	}
   449  	return nil
   450  }
   451  
   452  // Read implements io.Reader, reading bytes from the current Reader
   453  // incrementing index until enough bytes are read.
   454  func (gr *GroupReader) Read(p []byte) (n int, err error) {
   455  	lenP := len(p)
   456  	if lenP == 0 {
   457  		return 0, errors.New("given empty slice")
   458  	}
   459  
   460  	gr.mtx.Lock()
   461  	defer gr.mtx.Unlock()
   462  
   463  	// Open file if not open yet
   464  	if gr.curReader == nil {
   465  		if err = gr.openFile(gr.curIndex); err != nil {
   466  			return 0, err
   467  		}
   468  	}
   469  
   470  	// Iterate over files until enough bytes are read
   471  	var nn int
   472  	for {
   473  		nn, err = gr.curReader.Read(p[n:])
   474  		n += nn
   475  		if err == io.EOF {
   476  			if n >= lenP {
   477  				return n, nil
   478  			}
   479  			// Open the next file
   480  			if err1 := gr.openFile(gr.curIndex + 1); err1 != nil {
   481  				return n, err1
   482  			}
   483  		} else if err != nil {
   484  			return n, err
   485  		} else if nn == 0 { // empty file
   486  			return n, err
   487  		}
   488  	}
   489  }
   490  
   491  // IF index > gr.Group.maxIndex, returns io.EOF
   492  // CONTRACT: caller should hold gr.mtx
   493  func (gr *GroupReader) openFile(index int) error {
   494  	// Lock on Group to ensure that head doesn't move in the meanwhile.
   495  	gr.Group.mtx.Lock()
   496  	defer gr.Group.mtx.Unlock()
   497  
   498  	if index > gr.Group.maxIndex {
   499  		return io.EOF
   500  	}
   501  
   502  	curFilePath := filePathForIndex(gr.Head.Path, index, gr.Group.maxIndex)
   503  	curFile, err := os.OpenFile(curFilePath, os.O_RDONLY|os.O_CREATE, autoFilePerms)
   504  	if err != nil {
   505  		return err
   506  	}
   507  	curReader := bufio.NewReader(curFile)
   508  
   509  	// Update gr.cur*
   510  	if gr.curFile != nil {
   511  		gr.curFile.Close() // TODO return error?
   512  	}
   513  	gr.curIndex = index
   514  	gr.curFile = curFile
   515  	gr.curReader = curReader
   516  	gr.curLine = nil
   517  	return nil
   518  }
   519  
   520  // CurIndex returns cursor's file index.
   521  func (gr *GroupReader) CurIndex() int {
   522  	gr.mtx.Lock()
   523  	defer gr.mtx.Unlock()
   524  	return gr.curIndex
   525  }
   526  
   527  // SetIndex sets the cursor's file index to index by opening a file at this
   528  // position.
   529  func (gr *GroupReader) SetIndex(index int) error {
   530  	gr.mtx.Lock()
   531  	defer gr.mtx.Unlock()
   532  	return gr.openFile(index)
   533  }