github.com/DFWallet/tendermint-cosmos@v0.0.2/libs/autofile/group.go (about)

     1  package autofile
     2  
     3  import (
     4  	"bufio"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"path/filepath"
    10  	"regexp"
    11  	"strconv"
    12  	"strings"
    13  	"sync"
    14  	"time"
    15  
    16  	"github.com/DFWallet/tendermint-cosmos/libs/service"
    17  )
    18  
    19  const (
    20  	defaultGroupCheckDuration = 5000 * time.Millisecond
    21  	defaultHeadSizeLimit      = 10 * 1024 * 1024       // 10MB
    22  	defaultTotalSizeLimit     = 1 * 1024 * 1024 * 1024 // 1GB
    23  	maxFilesToRemove          = 4                      // needs to be greater than 1
    24  )
    25  
    26  /*
    27  You can open a Group to keep restrictions on an AutoFile, like
    28  the maximum size of each chunk, and/or the total amount of bytes
    29  stored in the group.
    30  
    31  The first file to be written in the Group.Dir is the head file.
    32  
    33  	Dir/
    34  	- <HeadPath>
    35  
    36  Once the Head file reaches the size limit, it will be rotated.
    37  
    38  	Dir/
    39  	- <HeadPath>.000   // First rolled file
    40  	- <HeadPath>       // New head path, starts empty.
    41  										 // The implicit index is 001.
    42  
    43  As more files are written, the index numbers grow...
    44  
    45  	Dir/
    46  	- <HeadPath>.000   // First rolled file
    47  	- <HeadPath>.001   // Second rolled file
    48  	- ...
    49  	- <HeadPath>       // New head path
    50  
    51  The Group can also be used to binary-search for some line,
    52  assuming that marker lines are written occasionally.
    53  */
    54  type Group struct {
    55  	service.BaseService
    56  
    57  	ID                 string
    58  	Head               *AutoFile // The head AutoFile to write to
    59  	headBuf            *bufio.Writer
    60  	Dir                string // Directory that contains .Head
    61  	ticker             *time.Ticker
    62  	mtx                sync.Mutex
    63  	headSizeLimit      int64
    64  	totalSizeLimit     int64
    65  	groupCheckDuration time.Duration
    66  	minIndex           int // Includes head
    67  	maxIndex           int // Includes head, where Head will move to
    68  
    69  	// close this when the processTicks routine is done.
    70  	// this ensures we can cleanup the dir after calling Stop
    71  	// and the routine won't be trying to access it anymore
    72  	doneProcessTicks chan struct{}
    73  
    74  	// TODO: When we start deleting files, we need to start tracking GroupReaders
    75  	// and their dependencies.
    76  }
    77  
    78  // OpenGroup creates a new Group with head at headPath. It returns an error if
    79  // it fails to open head file.
    80  func OpenGroup(headPath string, groupOptions ...func(*Group)) (*Group, error) {
    81  	dir, err := filepath.Abs(filepath.Dir(headPath))
    82  	if err != nil {
    83  		return nil, err
    84  	}
    85  	head, err := OpenAutoFile(headPath)
    86  	if err != nil {
    87  		return nil, err
    88  	}
    89  
    90  	g := &Group{
    91  		ID:                 "group:" + head.ID,
    92  		Head:               head,
    93  		headBuf:            bufio.NewWriterSize(head, 4096*10),
    94  		Dir:                dir,
    95  		headSizeLimit:      defaultHeadSizeLimit,
    96  		totalSizeLimit:     defaultTotalSizeLimit,
    97  		groupCheckDuration: defaultGroupCheckDuration,
    98  		minIndex:           0,
    99  		maxIndex:           0,
   100  		doneProcessTicks:   make(chan struct{}),
   101  	}
   102  
   103  	for _, option := range groupOptions {
   104  		option(g)
   105  	}
   106  
   107  	g.BaseService = *service.NewBaseService(nil, "Group", g)
   108  
   109  	gInfo := g.readGroupInfo()
   110  	g.minIndex = gInfo.MinIndex
   111  	g.maxIndex = gInfo.MaxIndex
   112  	return g, nil
   113  }
   114  
   115  // GroupCheckDuration allows you to overwrite default groupCheckDuration.
   116  func GroupCheckDuration(duration time.Duration) func(*Group) {
   117  	return func(g *Group) {
   118  		g.groupCheckDuration = duration
   119  	}
   120  }
   121  
   122  // GroupHeadSizeLimit allows you to overwrite default head size limit - 10MB.
   123  func GroupHeadSizeLimit(limit int64) func(*Group) {
   124  	return func(g *Group) {
   125  		g.headSizeLimit = limit
   126  	}
   127  }
   128  
   129  // GroupTotalSizeLimit allows you to overwrite default total size limit of the group - 1GB.
   130  func GroupTotalSizeLimit(limit int64) func(*Group) {
   131  	return func(g *Group) {
   132  		g.totalSizeLimit = limit
   133  	}
   134  }
   135  
   136  // OnStart implements service.Service by starting the goroutine that checks file
   137  // and group limits.
   138  func (g *Group) OnStart() error {
   139  	g.ticker = time.NewTicker(g.groupCheckDuration)
   140  	go g.processTicks()
   141  	return nil
   142  }
   143  
   144  // OnStop implements service.Service by stopping the goroutine described above.
   145  // NOTE: g.Head must be closed separately using Close.
   146  func (g *Group) OnStop() {
   147  	g.ticker.Stop()
   148  	if err := g.FlushAndSync(); err != nil {
   149  		g.Logger.Error("Error flushin to disk", "err", err)
   150  	}
   151  }
   152  
   153  // Wait blocks until all internal goroutines are finished. Supposed to be
   154  // called after Stop.
   155  func (g *Group) Wait() {
   156  	// wait for processTicks routine to finish
   157  	<-g.doneProcessTicks
   158  }
   159  
   160  // Close closes the head file. The group must be stopped by this moment.
   161  func (g *Group) Close() {
   162  	if err := g.FlushAndSync(); err != nil {
   163  		g.Logger.Error("Error flushin to disk", "err", err)
   164  	}
   165  
   166  	g.mtx.Lock()
   167  	_ = g.Head.closeFile()
   168  	g.mtx.Unlock()
   169  }
   170  
   171  // HeadSizeLimit returns the current head size limit.
   172  func (g *Group) HeadSizeLimit() int64 {
   173  	g.mtx.Lock()
   174  	defer g.mtx.Unlock()
   175  	return g.headSizeLimit
   176  }
   177  
   178  // TotalSizeLimit returns total size limit of the group.
   179  func (g *Group) TotalSizeLimit() int64 {
   180  	g.mtx.Lock()
   181  	defer g.mtx.Unlock()
   182  	return g.totalSizeLimit
   183  }
   184  
   185  // MaxIndex returns index of the last file in the group.
   186  func (g *Group) MaxIndex() int {
   187  	g.mtx.Lock()
   188  	defer g.mtx.Unlock()
   189  	return g.maxIndex
   190  }
   191  
   192  // MinIndex returns index of the first file in the group.
   193  func (g *Group) MinIndex() int {
   194  	g.mtx.Lock()
   195  	defer g.mtx.Unlock()
   196  	return g.minIndex
   197  }
   198  
   199  // Write writes the contents of p into the current head of the group. It
   200  // returns the number of bytes written. If nn < len(p), it also returns an
   201  // error explaining why the write is short.
   202  // NOTE: Writes are buffered so they don't write synchronously
   203  // TODO: Make it halt if space is unavailable
   204  func (g *Group) Write(p []byte) (nn int, err error) {
   205  	g.mtx.Lock()
   206  	defer g.mtx.Unlock()
   207  	return g.headBuf.Write(p)
   208  }
   209  
   210  // WriteLine writes line into the current head of the group. It also appends "\n".
   211  // NOTE: Writes are buffered so they don't write synchronously
   212  // TODO: Make it halt if space is unavailable
   213  func (g *Group) WriteLine(line string) error {
   214  	g.mtx.Lock()
   215  	defer g.mtx.Unlock()
   216  	_, err := g.headBuf.Write([]byte(line + "\n"))
   217  	return err
   218  }
   219  
   220  // Buffered returns the size of the currently buffered data.
   221  func (g *Group) Buffered() int {
   222  	g.mtx.Lock()
   223  	defer g.mtx.Unlock()
   224  	return g.headBuf.Buffered()
   225  }
   226  
   227  // FlushAndSync writes any buffered data to the underlying file and commits the
   228  // current content of the file to stable storage (fsync).
   229  func (g *Group) FlushAndSync() error {
   230  	g.mtx.Lock()
   231  	defer g.mtx.Unlock()
   232  	err := g.headBuf.Flush()
   233  	if err == nil {
   234  		err = g.Head.Sync()
   235  	}
   236  	return err
   237  }
   238  
   239  func (g *Group) processTicks() {
   240  	defer close(g.doneProcessTicks)
   241  	for {
   242  		select {
   243  		case <-g.ticker.C:
   244  			g.checkHeadSizeLimit()
   245  			g.checkTotalSizeLimit()
   246  		case <-g.Quit():
   247  			return
   248  		}
   249  	}
   250  }
   251  
   252  // NOTE: this function is called manually in tests.
   253  func (g *Group) checkHeadSizeLimit() {
   254  	limit := g.HeadSizeLimit()
   255  	if limit == 0 {
   256  		return
   257  	}
   258  	size, err := g.Head.Size()
   259  	if err != nil {
   260  		g.Logger.Error("Group's head may grow without bound", "head", g.Head.Path, "err", err)
   261  		return
   262  	}
   263  	if size >= limit {
   264  		g.RotateFile()
   265  	}
   266  }
   267  
   268  func (g *Group) checkTotalSizeLimit() {
   269  	limit := g.TotalSizeLimit()
   270  	if limit == 0 {
   271  		return
   272  	}
   273  
   274  	gInfo := g.readGroupInfo()
   275  	totalSize := gInfo.TotalSize
   276  	for i := 0; i < maxFilesToRemove; i++ {
   277  		index := gInfo.MinIndex + i
   278  		if totalSize < limit {
   279  			return
   280  		}
   281  		if index == gInfo.MaxIndex {
   282  			// Special degenerate case, just do nothing.
   283  			g.Logger.Error("Group's head may grow without bound", "head", g.Head.Path)
   284  			return
   285  		}
   286  		pathToRemove := filePathForIndex(g.Head.Path, index, gInfo.MaxIndex)
   287  		fInfo, err := os.Stat(pathToRemove)
   288  		if err != nil {
   289  			g.Logger.Error("Failed to fetch info for file", "file", pathToRemove)
   290  			continue
   291  		}
   292  		err = os.Remove(pathToRemove)
   293  		if err != nil {
   294  			g.Logger.Error("Failed to remove path", "path", pathToRemove)
   295  			return
   296  		}
   297  		totalSize -= fInfo.Size()
   298  	}
   299  }
   300  
   301  // RotateFile causes group to close the current head and assign it some index.
   302  // Note it does not create a new head.
   303  func (g *Group) RotateFile() {
   304  	g.mtx.Lock()
   305  	defer g.mtx.Unlock()
   306  
   307  	headPath := g.Head.Path
   308  
   309  	if err := g.headBuf.Flush(); err != nil {
   310  		panic(err)
   311  	}
   312  
   313  	if err := g.Head.Sync(); err != nil {
   314  		panic(err)
   315  	}
   316  
   317  	if err := g.Head.closeFile(); err != nil {
   318  		panic(err)
   319  	}
   320  
   321  	indexPath := filePathForIndex(headPath, g.maxIndex, g.maxIndex+1)
   322  	if err := os.Rename(headPath, indexPath); err != nil {
   323  		panic(err)
   324  	}
   325  
   326  	g.maxIndex++
   327  }
   328  
   329  // NewReader returns a new group reader.
   330  // CONTRACT: Caller must close the returned GroupReader.
   331  func (g *Group) NewReader(index int) (*GroupReader, error) {
   332  	r := newGroupReader(g)
   333  	err := r.SetIndex(index)
   334  	if err != nil {
   335  		return nil, err
   336  	}
   337  	return r, nil
   338  }
   339  
   340  // GroupInfo holds information about the group.
   341  type GroupInfo struct {
   342  	MinIndex  int   // index of the first file in the group, including head
   343  	MaxIndex  int   // index of the last file in the group, including head
   344  	TotalSize int64 // total size of the group
   345  	HeadSize  int64 // size of the head
   346  }
   347  
   348  // Returns info after scanning all files in g.Head's dir.
   349  func (g *Group) ReadGroupInfo() GroupInfo {
   350  	g.mtx.Lock()
   351  	defer g.mtx.Unlock()
   352  	return g.readGroupInfo()
   353  }
   354  
   355  // Index includes the head.
   356  // CONTRACT: caller should have called g.mtx.Lock
   357  func (g *Group) readGroupInfo() GroupInfo {
   358  	groupDir := filepath.Dir(g.Head.Path)
   359  	headBase := filepath.Base(g.Head.Path)
   360  	var minIndex, maxIndex int = -1, -1
   361  	var totalSize, headSize int64 = 0, 0
   362  
   363  	dir, err := os.Open(groupDir)
   364  	if err != nil {
   365  		panic(err)
   366  	}
   367  	defer dir.Close()
   368  	fiz, err := dir.Readdir(0)
   369  	if err != nil {
   370  		panic(err)
   371  	}
   372  
   373  	// For each file in the directory, filter by pattern
   374  	for _, fileInfo := range fiz {
   375  		if fileInfo.Name() == headBase {
   376  			fileSize := fileInfo.Size()
   377  			totalSize += fileSize
   378  			headSize = fileSize
   379  			continue
   380  		} else if strings.HasPrefix(fileInfo.Name(), headBase) {
   381  			fileSize := fileInfo.Size()
   382  			totalSize += fileSize
   383  			indexedFilePattern := regexp.MustCompile(`^.+\.([0-9]{3,})$`)
   384  			submatch := indexedFilePattern.FindSubmatch([]byte(fileInfo.Name()))
   385  			if len(submatch) != 0 {
   386  				// Matches
   387  				fileIndex, err := strconv.Atoi(string(submatch[1]))
   388  				if err != nil {
   389  					panic(err)
   390  				}
   391  				if maxIndex < fileIndex {
   392  					maxIndex = fileIndex
   393  				}
   394  				if minIndex == -1 || fileIndex < minIndex {
   395  					minIndex = fileIndex
   396  				}
   397  			}
   398  		}
   399  	}
   400  
   401  	// Now account for the head.
   402  	if minIndex == -1 {
   403  		// If there were no numbered files,
   404  		// then the head is index 0.
   405  		minIndex, maxIndex = 0, 0
   406  	} else {
   407  		// Otherwise, the head file is 1 greater
   408  		maxIndex++
   409  	}
   410  	return GroupInfo{minIndex, maxIndex, totalSize, headSize}
   411  }
   412  
   413  func filePathForIndex(headPath string, index int, maxIndex int) string {
   414  	if index == maxIndex {
   415  		return headPath
   416  	}
   417  	return fmt.Sprintf("%v.%03d", headPath, index)
   418  }
   419  
   420  //--------------------------------------------------------------------------------
   421  
   422  // GroupReader provides an interface for reading from a Group.
   423  type GroupReader struct {
   424  	*Group
   425  	mtx       sync.Mutex
   426  	curIndex  int
   427  	curFile   *os.File
   428  	curReader *bufio.Reader
   429  	curLine   []byte
   430  }
   431  
   432  func newGroupReader(g *Group) *GroupReader {
   433  	return &GroupReader{
   434  		Group:     g,
   435  		curIndex:  0,
   436  		curFile:   nil,
   437  		curReader: nil,
   438  		curLine:   nil,
   439  	}
   440  }
   441  
   442  // Close closes the GroupReader by closing the cursor file.
   443  func (gr *GroupReader) Close() error {
   444  	gr.mtx.Lock()
   445  	defer gr.mtx.Unlock()
   446  
   447  	if gr.curReader != nil {
   448  		err := gr.curFile.Close()
   449  		gr.curIndex = 0
   450  		gr.curReader = nil
   451  		gr.curFile = nil
   452  		gr.curLine = nil
   453  		return err
   454  	}
   455  	return nil
   456  }
   457  
   458  // Read implements io.Reader, reading bytes from the current Reader
   459  // incrementing index until enough bytes are read.
   460  func (gr *GroupReader) Read(p []byte) (n int, err error) {
   461  	lenP := len(p)
   462  	if lenP == 0 {
   463  		return 0, errors.New("given empty slice")
   464  	}
   465  
   466  	gr.mtx.Lock()
   467  	defer gr.mtx.Unlock()
   468  
   469  	// Open file if not open yet
   470  	if gr.curReader == nil {
   471  		if err = gr.openFile(gr.curIndex); err != nil {
   472  			return 0, err
   473  		}
   474  	}
   475  
   476  	// Iterate over files until enough bytes are read
   477  	var nn int
   478  	for {
   479  		nn, err = gr.curReader.Read(p[n:])
   480  		n += nn
   481  		switch {
   482  		case err == io.EOF:
   483  			if n >= lenP {
   484  				return n, nil
   485  			}
   486  			// Open the next file
   487  			if err1 := gr.openFile(gr.curIndex + 1); err1 != nil {
   488  				return n, err1
   489  			}
   490  		case err != nil:
   491  			return n, err
   492  		case nn == 0: // empty file
   493  			return n, err
   494  		}
   495  	}
   496  }
   497  
   498  // IF index > gr.Group.maxIndex, returns io.EOF
   499  // CONTRACT: caller should hold gr.mtx
   500  func (gr *GroupReader) openFile(index int) error {
   501  	// Lock on Group to ensure that head doesn't move in the meanwhile.
   502  	gr.Group.mtx.Lock()
   503  	defer gr.Group.mtx.Unlock()
   504  
   505  	if index > gr.Group.maxIndex {
   506  		return io.EOF
   507  	}
   508  
   509  	curFilePath := filePathForIndex(gr.Head.Path, index, gr.Group.maxIndex)
   510  	curFile, err := os.OpenFile(curFilePath, os.O_RDONLY|os.O_CREATE, autoFilePerms)
   511  	if err != nil {
   512  		return err
   513  	}
   514  	curReader := bufio.NewReader(curFile)
   515  
   516  	// Update gr.cur*
   517  	if gr.curFile != nil {
   518  		gr.curFile.Close() // TODO return error?
   519  	}
   520  	gr.curIndex = index
   521  	gr.curFile = curFile
   522  	gr.curReader = curReader
   523  	gr.curLine = nil
   524  	return nil
   525  }
   526  
   527  // CurIndex returns cursor's file index.
   528  func (gr *GroupReader) CurIndex() int {
   529  	gr.mtx.Lock()
   530  	defer gr.mtx.Unlock()
   531  	return gr.curIndex
   532  }
   533  
   534  // SetIndex sets the cursor's file index to index by opening a file at this
   535  // position.
   536  func (gr *GroupReader) SetIndex(index int) error {
   537  	gr.mtx.Lock()
   538  	defer gr.mtx.Unlock()
   539  	return gr.openFile(index)
   540  }