github.com/gnolang/gno@v0.0.0-20240520182011-228e9d0192ce/tm2/pkg/autofile/group.go (about)

     1  package autofile
     2  
     3  import (
     4  	"bufio"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"path"
    10  	"path/filepath"
    11  	"regexp"
    12  	"strconv"
    13  	"strings"
    14  	"sync"
    15  	"time"
    16  
    17  	"github.com/gnolang/gno/tm2/pkg/service"
    18  )
    19  
    20  const (
    21  	defaultGroupCheckDuration = 5000 * time.Millisecond
    22  	defaultHeadSizeLimit      = 10 * 1024 * 1024       // 10MB
    23  	defaultTotalSizeLimit     = 1 * 1024 * 1024 * 1024 // 1GB
    24  	maxFilesToRemove          = 4                      // needs to be greater than 1
    25  )
    26  
    27  /*
    28  You can open a Group to keep restrictions on an AutoFile, like
    29  the maximum size of each chunk, and/or the total amount of bytes
    30  stored in the group.
    31  
    32  The first file to be written in the Group.Dir is the head file.
    33  
    34  	Dir/
    35  	- <HeadPath>
    36  
    37  Once the Head file reaches the size limit, it will be rotated.
    38  
    39  	Dir/
    40  	- <HeadPath>.000   // First rolled file
    41  	- <HeadPath>       // New head path, starts empty.
    42  										 // The implicit index is 001.
    43  
    44  As more files are written, the index numbers grow...
    45  
    46  	Dir/
    47  	- <HeadPath>.000   // First rolled file
    48  	- <HeadPath>.001   // Second rolled file
    49  	- ...
    50  	- <HeadPath>       // New head path
    51  
    52  The Group can also be used to binary-search for some line,
    53  assuming that marker lines are written occasionally.
    54  */
    55  type Group struct {
    56  	service.BaseService
    57  
    58  	ID      string
    59  	Head    *AutoFile // The head AutoFile to write to
    60  	headBuf *bufio.Writer
    61  	Dir     string // Directory that contains .Head
    62  
    63  	mtx            sync.Mutex
    64  	headSizeLimit  int64
    65  	totalSizeLimit int64
    66  	info           GroupInfo
    67  
    68  	// TODO: When we start deleting files, we need to start tracking GroupReaders
    69  	// and their dependencies.
    70  }
    71  
    72  // OpenGroup creates a new Group with head at headPath. It returns an error if
    73  // it fails to open head file.
    74  func OpenGroup(headPath string, groupOptions ...func(*Group)) (g *Group, err error) {
    75  	dir := path.Dir(headPath)
    76  	head, err := OpenAutoFile(headPath)
    77  	if err != nil {
    78  		return nil, err
    79  	}
    80  
    81  	g = &Group{
    82  		ID:             "group:" + head.ID,
    83  		Head:           head,
    84  		headBuf:        bufio.NewWriterSize(head, 4096*10),
    85  		Dir:            dir,
    86  		headSizeLimit:  defaultHeadSizeLimit,
    87  		totalSizeLimit: defaultTotalSizeLimit,
    88  		info: GroupInfo{
    89  			MinIndex:  0,
    90  			MaxIndex:  0,
    91  			TotalSize: 0,
    92  			HeadSize:  0,
    93  		},
    94  	}
    95  
    96  	for _, option := range groupOptions {
    97  		option(g)
    98  	}
    99  
   100  	g.BaseService = *service.NewBaseService(nil, "Group", g)
   101  	g.info = g.readGroupInfo()
   102  	return
   103  }
   104  
   105  // GroupHeadSizeLimit allows you to overwrite default head size limit - 10MB.
   106  func GroupHeadSizeLimit(limit int64) func(*Group) {
   107  	return func(g *Group) {
   108  		g.headSizeLimit = limit
   109  	}
   110  }
   111  
   112  // GroupTotalSizeLimit allows you to overwrite default total size limit of the group - 1GB.
   113  func GroupTotalSizeLimit(limit int64) func(*Group) {
   114  	return func(g *Group) {
   115  		g.totalSizeLimit = limit
   116  	}
   117  }
   118  
   119  // OnStart implements service.Service by starting the goroutine that checks file
   120  // and group limits.
   121  func (g *Group) OnStart() error {
   122  	return nil
   123  }
   124  
   125  // OnStop implements service.Service by stopping the goroutine described above.
   126  // NOTE: g.Head must be closed separately using Close.
   127  func (g *Group) OnStop() {
   128  	if err := g.FlushAndSync(); err != nil {
   129  		g.Logger.Error(
   130  			fmt.Sprintf("unable to gracefully flush data, %s", err.Error()),
   131  		)
   132  	}
   133  }
   134  
   135  // Wait blocks until all internal goroutines are finished. Supposed to be
   136  // called after Stop.
   137  func (g *Group) Wait() {
   138  	// Nothing to wait for.
   139  }
   140  
   141  // Close closes the head file. The group must be stopped by this moment.
   142  func (g *Group) Close() {
   143  	if err := g.FlushAndSync(); err != nil {
   144  		g.Logger.Error(
   145  			fmt.Sprintf("unable to gracefully flush data, %s", err.Error()),
   146  		)
   147  	}
   148  
   149  	g.mtx.Lock()
   150  	defer g.mtx.Unlock()
   151  
   152  	if err := g.Head.Close(); err != nil {
   153  		g.Logger.Error(
   154  			fmt.Sprintf("unable to gracefully close group head, %s", err.Error()),
   155  		)
   156  	}
   157  }
   158  
   159  // HeadSizeLimit returns the current head size limit.
   160  func (g *Group) HeadSizeLimit() int64 {
   161  	g.mtx.Lock()
   162  	defer g.mtx.Unlock()
   163  	return g.headSizeLimit
   164  }
   165  
   166  // TotalSizeLimit returns total size limit of the group.
   167  func (g *Group) TotalSizeLimit() int64 {
   168  	g.mtx.Lock()
   169  	defer g.mtx.Unlock()
   170  	return g.totalSizeLimit
   171  }
   172  
   173  // MaxIndex returns index of the last file in the group.
   174  func (g *Group) MaxIndex() int {
   175  	g.mtx.Lock()
   176  	defer g.mtx.Unlock()
   177  	return g.info.MaxIndex
   178  }
   179  
   180  // MinIndex returns index of the first file in the group.
   181  func (g *Group) MinIndex() int {
   182  	g.mtx.Lock()
   183  	defer g.mtx.Unlock()
   184  	return g.info.MinIndex
   185  }
   186  
   187  func (g *Group) TotalSize() int64 {
   188  	g.mtx.Lock()
   189  	defer g.mtx.Unlock()
   190  	return g.info.TotalSize
   191  }
   192  
   193  func (g *Group) HeadSize() int64 {
   194  	g.mtx.Lock()
   195  	defer g.mtx.Unlock()
   196  	return g.info.HeadSize
   197  }
   198  
   199  // Write writes the contents of p into the current head of the group. It
   200  // returns the number of bytes written. If nn < len(p), it also returns an
   201  // error explaining why the write is short.
   202  // NOTE: Writes are buffered so they don't write synchronously
   203  // TODO: Make it halt if space is unavailable
   204  func (g *Group) Write(p []byte) (nn int, err error) {
   205  	g.mtx.Lock()
   206  	defer g.mtx.Unlock()
   207  	nn, err = g.headBuf.Write(p)
   208  
   209  	// Update limits
   210  	g.info.TotalSize += int64(nn)
   211  	g.info.HeadSize += int64(nn)
   212  
   213  	// Maybe rotate
   214  	if err == nil && 0 < g.headSizeLimit && g.headSizeLimit <= g.info.HeadSize {
   215  		g.rotateFile()
   216  	}
   217  	return
   218  }
   219  
   220  // WriteLine writes line into the current head of the group. It also appends "\n".
   221  // NOTE: Writes are buffered so they don't write synchronously
   222  // TODO: Make it halt if space is unavailable
   223  func (g *Group) WriteLine(line string) error {
   224  	g.mtx.Lock()
   225  	defer g.mtx.Unlock()
   226  	nn, err := g.headBuf.Write([]byte(line + "\n"))
   227  
   228  	// Update limits
   229  	g.info.TotalSize += int64(nn)
   230  	g.info.HeadSize += int64(nn)
   231  
   232  	// Maybe rotate
   233  	if err == nil && 0 < g.headSizeLimit && g.headSizeLimit <= g.info.HeadSize {
   234  		g.rotateFile()
   235  	}
   236  	return err
   237  }
   238  
   239  // Buffered returns the size of the currently buffered data.
   240  func (g *Group) Buffered() int {
   241  	g.mtx.Lock()
   242  	defer g.mtx.Unlock()
   243  	return g.headBuf.Buffered()
   244  }
   245  
   246  // FlushAndSync writes any buffered data to the underlying file and commits the
   247  // current content of the file to stable storage (fsync).
   248  func (g *Group) FlushAndSync() error {
   249  	g.mtx.Lock()
   250  	defer g.mtx.Unlock()
   251  	err := g.headBuf.Flush()
   252  	if err == nil {
   253  		err = g.Head.Sync()
   254  	}
   255  	return err
   256  }
   257  
   258  func (g *Group) ensureTotalSizeLimit() {
   259  	limit := g.totalSizeLimit
   260  	if limit == 0 {
   261  		return
   262  	}
   263  
   264  	for i := 0; i < maxFilesToRemove; i++ {
   265  		index := g.info.MinIndex + i
   266  		if g.info.TotalSize < limit {
   267  			return
   268  		}
   269  		if index == g.info.MaxIndex {
   270  			// Special degenerate case, just do nothing.
   271  			// group's head may grow without bound.
   272  			// TODO: an occasional warning?
   273  			return
   274  		}
   275  		pathToRemove := filePathForIndex(g.Head.Path, index, g.info.MaxIndex)
   276  		fInfo, err := os.Stat(pathToRemove)
   277  		if err != nil {
   278  			g.Logger.Error("Failed to fetch info for file", "file", pathToRemove)
   279  			g.info.MinIndex = index + 1 // bump MinIndex.
   280  			continue
   281  		}
   282  		err = os.Remove(pathToRemove)
   283  		if err != nil {
   284  			g.Logger.Error("Failed to remove path", "path", pathToRemove)
   285  			return
   286  		}
   287  		g.info.MinIndex = index + 1 // bump MinIndex.
   288  		g.info.TotalSize -= fInfo.Size()
   289  	}
   290  }
   291  
   292  // RotateFile causes group to close the current head and assign it some index.
   293  // After rotation, the earliest chunk may be removed if total size > totalSizeLimit.
   294  // Note it does not create a new head.
   295  func (g *Group) RotateFile() {
   296  	g.mtx.Lock()
   297  	defer g.mtx.Unlock()
   298  	g.rotateFile()
   299  }
   300  
   301  func (g *Group) rotateFile() {
   302  	headPath := g.Head.Path
   303  
   304  	if err := g.headBuf.Flush(); err != nil {
   305  		panic(err)
   306  	}
   307  
   308  	if err := g.Head.Sync(); err != nil {
   309  		panic(err)
   310  	}
   311  
   312  	if err := g.Head.closeFile(); err != nil {
   313  		panic(err)
   314  	}
   315  
   316  	indexPath := filePathForIndex(headPath, g.info.MaxIndex, g.info.MaxIndex+1)
   317  	if err := os.Rename(headPath, indexPath); err != nil {
   318  		panic(err)
   319  	}
   320  
   321  	g.info.HeadSize = 0
   322  	g.info.MaxIndex++
   323  
   324  	g.ensureTotalSizeLimit()
   325  }
   326  
   327  // NewReader returns a new group reader.
   328  // If endIndex != 0, reads until endIndex exclusive.
   329  // CONTRACT: Caller must close the returned GroupReader.
   330  func (g *Group) NewReader(startIndex int, endIndex int) (*GroupReader, error) {
   331  	r := newGroupReader(g, startIndex, endIndex)
   332  	return r, nil
   333  }
   334  
   335  // GroupInfo holds information about the group.
   336  type GroupInfo struct {
   337  	MinIndex  int   // index of the first file in the group, including head
   338  	MaxIndex  int   // index of the last file in the group, including head
   339  	TotalSize int64 // total size of the group
   340  	HeadSize  int64 // size of the head
   341  }
   342  
   343  // Returns info after scanning all files in g.Head's dir.
   344  func (g *Group) ReadGroupInfo() GroupInfo {
   345  	g.mtx.Lock()
   346  	defer g.mtx.Unlock()
   347  	return g.readGroupInfo()
   348  }
   349  
   350  var indexedFilePattern = regexp.MustCompile(`^.+\.([0-9]{3,})$`)
   351  
   352  // Index includes the head.
   353  // CONTRACT: caller should have called g.mtx.Lock
   354  func (g *Group) readGroupInfo() GroupInfo {
   355  	groupDir := filepath.Dir(g.Head.Path)
   356  	headBase := filepath.Base(g.Head.Path)
   357  	var minIndex, maxIndex int = -1, -1
   358  	var totalSize, headSize int64 = 0, 0
   359  
   360  	dir, err := os.Open(groupDir)
   361  	if err != nil {
   362  		panic(err)
   363  	}
   364  	defer dir.Close()
   365  	fiz, err := dir.Readdir(0)
   366  	if err != nil {
   367  		panic(err)
   368  	}
   369  
   370  	// For each file in the directory, filter by pattern
   371  	for _, fileInfo := range fiz {
   372  		if fileInfo.Name() == headBase {
   373  			fileSize := fileInfo.Size()
   374  			totalSize += fileSize
   375  			headSize = fileSize
   376  			continue
   377  		} else if strings.HasPrefix(fileInfo.Name(), headBase) {
   378  			fileSize := fileInfo.Size()
   379  			totalSize += fileSize
   380  			submatch := indexedFilePattern.FindSubmatch([]byte(fileInfo.Name()))
   381  			if len(submatch) != 0 {
   382  				// Matches
   383  				fileIndex, err := strconv.Atoi(string(submatch[1]))
   384  				if err != nil {
   385  					panic(err)
   386  				}
   387  				if maxIndex < fileIndex {
   388  					maxIndex = fileIndex
   389  				}
   390  				if minIndex == -1 || fileIndex < minIndex {
   391  					minIndex = fileIndex
   392  				}
   393  			}
   394  		}
   395  	}
   396  
   397  	// TODO ensure that all files are present between min and max.
   398  
   399  	// Now account for the head.
   400  	if minIndex == -1 {
   401  		// If there were no numbered files,
   402  		// then the head is index 0.
   403  		minIndex, maxIndex = 0, 0
   404  	} else {
   405  		// Otherwise, the head file is 1 greater
   406  		maxIndex++
   407  	}
   408  	return GroupInfo{minIndex, maxIndex, totalSize, headSize}
   409  }
   410  
   411  func filePathForIndex(headPath string, index int, maxIndex int) string {
   412  	if index == maxIndex {
   413  		return headPath
   414  	}
   415  	return fmt.Sprintf("%v.%03d", headPath, index)
   416  }
   417  
   418  // --------------------------------------------------------------------------------
   419  
   420  // GroupReader provides an interface for reading from a Group.
   421  type GroupReader struct {
   422  	*Group
   423  	mtx        sync.Mutex
   424  	startIndex int
   425  	endIndex   int
   426  	curIndex   int
   427  	curFile    *os.File
   428  	curReader  *bufio.Reader
   429  	curLine    []byte
   430  }
   431  
   432  func newGroupReader(g *Group, startIndex int, endIndex int) *GroupReader {
   433  	gr := &GroupReader{
   434  		Group:      g,
   435  		startIndex: startIndex,
   436  		endIndex:   endIndex,
   437  		curIndex:   0,
   438  		curFile:    nil,
   439  		curReader:  nil,
   440  		curLine:    nil,
   441  	}
   442  	gr.openFile(startIndex)
   443  	return gr
   444  }
   445  
   446  // Close closes the GroupReader by closing the cursor file.
   447  func (gr *GroupReader) Close() error {
   448  	gr.mtx.Lock()
   449  	defer gr.mtx.Unlock()
   450  
   451  	if gr.curReader != nil {
   452  		err := gr.curFile.Close()
   453  		gr.curIndex = 0
   454  		gr.curReader = nil
   455  		gr.curFile = nil
   456  		gr.curLine = nil
   457  		return err
   458  	}
   459  	return nil
   460  }
   461  
   462  // Read implements io.Reader, reading bytes from the current Reader
   463  // incrementing index until enough bytes are read.
   464  func (gr *GroupReader) Read(p []byte) (n int, err error) {
   465  	lenP := len(p)
   466  	if lenP == 0 {
   467  		return 0, errors.New("given empty slice")
   468  	}
   469  
   470  	gr.mtx.Lock()
   471  	defer gr.mtx.Unlock()
   472  
   473  	// Open file if not open yet
   474  	if gr.curReader == nil {
   475  		if err = gr.openFile(gr.curIndex); err != nil {
   476  			return 0, err
   477  		}
   478  	}
   479  
   480  	// Iterate over files until enough bytes are read
   481  	var nn int
   482  	for {
   483  		nn, err = gr.curReader.Read(p[n:])
   484  		n += nn
   485  		switch {
   486  		case errors.Is(err, io.EOF):
   487  			if n >= lenP {
   488  				return n, nil
   489  			}
   490  			// Open the next file
   491  			if err1 := gr.openFile(gr.curIndex + 1); err1 != nil {
   492  				return n, err1
   493  			}
   494  		case err != nil:
   495  			return n, err
   496  		case nn == 0: // empty file
   497  			return n, err
   498  		}
   499  	}
   500  }
   501  
   502  // IF index > gr.Group.maxIndex, returns io.EOF
   503  // CONTRACT: caller should hold gr.mtx
   504  func (gr *GroupReader) openFile(index int) error {
   505  	// Lock on Group to ensure that head doesn't move in the meanwhile.
   506  	gr.Group.mtx.Lock()
   507  	defer gr.Group.mtx.Unlock()
   508  
   509  	if gr.Group.info.MaxIndex < index {
   510  		return io.EOF
   511  	}
   512  	if gr.endIndex != 0 && gr.endIndex <= index {
   513  		return io.EOF
   514  	}
   515  
   516  	curFilePath := filePathForIndex(gr.Head.Path, index, gr.Group.info.MaxIndex)
   517  	curFile, err := os.OpenFile(curFilePath, os.O_RDONLY|os.O_CREATE, autoFilePerms)
   518  	if err != nil {
   519  		return err
   520  	}
   521  	curReader := bufio.NewReader(curFile)
   522  
   523  	// Update gr.cur*
   524  	if gr.curFile != nil {
   525  		gr.curFile.Close() // TODO return error?
   526  	}
   527  	gr.curIndex = index
   528  	gr.curFile = curFile
   529  	gr.curReader = curReader
   530  	gr.curLine = nil
   531  	return nil
   532  }
   533  
   534  // CurIndex returns cursor's file index.
   535  func (gr *GroupReader) CurIndex() int {
   536  	gr.mtx.Lock()
   537  	defer gr.mtx.Unlock()
   538  	return gr.curIndex
   539  }