github.com/scottcagno/storage@v1.8.0/pkg/_junk/_x/file/manager.go (about)

     1  package file
     2  
     3  import (
     4  	"fmt"
     5  	"github.com/scottcagno/storage/pkg/_junk/_lsmtree/encoding/binary"
     6  	"io"
     7  	"os"
     8  	"path/filepath"
     9  	"runtime"
    10  	"strings"
    11  	"sync"
    12  )
    13  
    14  // SegmentManager is a segmented file structure
    15  type SegmentManager struct {
    16  	lock       sync.RWMutex   // lock is a mutual exclusion lock
    17  	base       string         // base is the base filepath
    18  	r          *binary.Reader // r is a binary reader
    19  	w          *binary.Writer // w is a binary writer
    20  	firstIndex int64          // firstIndex is the index of the first entry
    21  	lastIndex  int64          // lastIndex is the index of the last entry
    22  	segments   []*Segment     // segments is an index of the current file segments
    23  	active     *Segment       // active is the current active Segment
    24  }
    25  
    26  // Open opens and returns a new segmented file structure
    27  func Open(path string) (*SegmentManager, error) {
    28  	// make sure we are working with absolute paths
    29  	base, err := filepath.Abs(path)
    30  	if err != nil {
    31  		return nil, err
    32  	}
    33  	// sanitize any path separators
    34  	base = filepath.ToSlash(base)
    35  	// create any directories if they are not there
    36  	err = os.MkdirAll(base, os.ModeDir)
    37  	if err != nil {
    38  		return nil, err
    39  	}
    40  	// create a new segmented file instance
    41  	sf := &SegmentManager{
    42  		base:       base,
    43  		firstIndex: 0,
    44  		lastIndex:  1,
    45  		segments:   make([]*Segment, 0),
    46  	}
    47  	// attempt to load segments
    48  	err = sf.loadSegmentIndex()
    49  	if err != nil {
    50  		return nil, err
    51  	}
    52  	// return segmented file
    53  	return sf, nil
    54  }
    55  
    56  // loadIndex initializes the Segment index. It looks for Segment
    57  // files in the base directory and attempts to index the Segment as
    58  // well as any of the entries within the Segment. If this is a new
    59  // instance, it will create a new Segment that is ready for writing.
    60  func (sf *SegmentManager) loadSegmentIndex() error {
    61  	// lock
    62  	sf.lock.Lock()
    63  	defer sf.lock.Unlock()
    64  	// get the files in the base directory path
    65  	files, err := os.ReadDir(sf.base)
    66  	if err != nil {
    67  		return err
    68  	}
    69  	// list the files in the base directory path and attempt to index the entries
    70  	for _, file := range files {
    71  		// skip non data files
    72  		if file.IsDir() ||
    73  			!strings.HasPrefix(file.Name(), FilePrefix) ||
    74  			!strings.HasSuffix(file.Name(), FileSuffix) {
    75  			continue // skip this, continue on to the next file
    76  		}
    77  		// attempt to load Segment (and index entries in Segment)
    78  		s, err := OpenSegment(filepath.Join(sf.base, file.Name()))
    79  		if err != nil {
    80  			return err
    81  		}
    82  		// Segment has been loaded successfully, append to the segments list
    83  		sf.segments = append(sf.segments, s)
    84  	}
    85  	// check to see if any segments were found. If not, initialize a new one
    86  	if len(sf.segments) == 0 {
    87  		// create a new Segment file
    88  		s, err := CreateSegment(sf.base, sf.lastIndex)
    89  		if err != nil {
    90  			return err
    91  		}
    92  		// Segment has been created successfully, append to the segments list
    93  		sf.segments = append(sf.segments, s)
    94  	}
    95  	// segments have either been loaded or created, so now we
    96  	// should go about updating the active Segment pointer to
    97  	// point to the "tail" (the last Segment in the Segment list)
    98  	sf.active = sf.getLastSegment()
    99  	// load active Segment entry index
   100  	sf.active.loadEntryIndex()
   101  	// we should be good to go, lets attempt to open a file
   102  	// reader to work with the active Segment
   103  	sf.r, err = binary.OpenReader(sf.active.path)
   104  	if err != nil {
   105  		return err
   106  	}
   107  	// and then attempt to open a file writer to also work
   108  	// with the active Segment, so we can begin appending data
   109  	sf.w, err = binary.OpenWriter(sf.active.path)
   110  	if err != nil {
   111  		return err
   112  	}
   113  	// finally, update the firstIndex and lastIndex
   114  	sf.firstIndex = sf.segments[0].index
   115  	// and update last index
   116  	sf.lastIndex = sf.getLastSegment().getLastIndex()
   117  	return nil
   118  }
   119  
   120  func (sf *SegmentManager) LoadSegment(index int64) (*Segment, error) {
   121  	s := sf.active
   122  	if index >= s.index {
   123  		return s, nil
   124  	}
   125  	s = sf.segments[sf.findSegmentIndex(index)]
   126  	if len(s.entries) == 0 {
   127  		_, err := s.loadEntryIndex()
   128  		if err != nil {
   129  			return nil, err
   130  		}
   131  	}
   132  	sf.active = s
   133  	return s, nil
   134  }
   135  
   136  // findSegmentIndex performs binary search to find the Segment containing provided index
   137  func (sf *SegmentManager) findSegmentIndex(index int64) int {
   138  	// declare for later
   139  	i, j := 0, len(sf.segments)
   140  	// otherwise, perform binary search
   141  	for i < j {
   142  		h := i + (j-i)/2
   143  		if index >= sf.segments[h].index {
   144  			i = h + 1
   145  		} else {
   146  			j = h
   147  		}
   148  	}
   149  	return i - 1
   150  }
   151  
   152  // getLastSegment returns the tail Segment in the segments index list
   153  func (sf *SegmentManager) getLastSegment() *Segment {
   154  	return sf.segments[len(sf.segments)-1]
   155  }
   156  
   157  // cycleSegment adds a new Segment to replace the current (active) Segment
   158  func (sf *SegmentManager) cycleSegment2(err error) error {
   159  	// check to see if we need to cycle
   160  	if err == nil && err != ErrSegmentFull {
   161  		return nil
   162  	}
   163  	// sync and close current file Segment
   164  	err = sf.w.Close()
   165  	if err != nil {
   166  		return err
   167  	}
   168  	// create a new Segment file
   169  	s, err := CreateSegment(sf.base, sf.lastIndex)
   170  	if err != nil {
   171  		return err
   172  	}
   173  	// add Segment to Segment index list
   174  	sf.segments = append(sf.segments, s)
   175  	// update the active Segment pointer
   176  	sf.active = sf.getLastSegment()
   177  	// open file writer associated with active Segment
   178  	sf.w, err = binary.OpenWriter(sf.active.path)
   179  	if err != nil {
   180  		return err
   181  	}
   182  	// update file reader associated with the active Segment
   183  	sf.r, err = binary.OpenReader(sf.active.path)
   184  	if err != nil {
   185  		return err
   186  	}
   187  	return nil
   188  }
   189  
   190  // cycleSegment adds a new Segment to replace the current (active) Segment
   191  func (sf *SegmentManager) cycleSegment(remaining int64) error {
   192  	// check to see if we need to cycle
   193  	if remaining > 0 {
   194  		return nil
   195  	}
   196  	// sync and close current file Segment
   197  	err := sf.w.Close()
   198  	if err != nil {
   199  		return err
   200  	}
   201  	// create a new Segment file
   202  	s, err := CreateSegment(sf.base, sf.lastIndex)
   203  	if err != nil {
   204  		return err
   205  	}
   206  	// add Segment to Segment index list
   207  	sf.segments = append(sf.segments, s)
   208  	// update the active Segment pointer
   209  	sf.active = sf.getLastSegment()
   210  	// open file writer associated with active Segment
   211  	sf.w, err = binary.OpenWriter(sf.active.path)
   212  	if err != nil {
   213  		return err
   214  	}
   215  	// update file reader associated with the active Segment
   216  	sf.r, err = binary.OpenReader(sf.active.path)
   217  	if err != nil {
   218  		return err
   219  	}
   220  	return nil
   221  }
   222  
   223  // Read reads an entry from the segmented file at the specified index
   224  func (sf *SegmentManager) Read(index int64) (string, []byte, error) {
   225  	// read lock
   226  	sf.lock.RLock()
   227  	defer sf.lock.RUnlock()
   228  	// error checking
   229  	if index < sf.firstIndex || index > sf.lastIndex {
   230  		return "", nil, ErrOutOfBounds
   231  	}
   232  	var err error
   233  	// find the Segment containing the provided index
   234  	//s := sf.segments[sf.findSegmentIndex(index)]
   235  	s, err := sf.LoadSegment(index)
   236  	if err != nil {
   237  		return "", nil, err
   238  	}
   239  	// make sure we are reading from the correct file
   240  	sf.r, err = sf.r.ReadFrom(s.path)
   241  	if err != nil {
   242  		return "", nil, err
   243  	}
   244  	// find the offset for the entry containing the provided index
   245  	offset := s.entries[s.findEntryIndex(index)].offset
   246  	// read entry at offset
   247  	e, err := sf.r.ReadEntryAt(offset)
   248  	if err != nil {
   249  		return "", nil, err
   250  	}
   251  	return string(e.Key), e.Value, nil
   252  }
   253  
   254  // ReadDataEntryUsingSegment reads an entry from the segmented file at the specified index
   255  func (sf *SegmentManager) ReadDataEntryUsingSegment(index int64) (string, []byte, error) {
   256  	// read lock
   257  	sf.lock.RLock()
   258  	defer sf.lock.RUnlock()
   259  	// error checking
   260  	if index < sf.firstIndex || index > sf.lastIndex {
   261  		return "", nil, ErrOutOfBounds
   262  	}
   263  	var err error
   264  	// find the Segment containing the provided index
   265  	s, err := sf.LoadSegment(index)
   266  	if err != nil {
   267  		return "", nil, err
   268  	}
   269  	e, err := s.ReadDataEntry(index)
   270  	if err != nil {
   271  		return "", nil, err
   272  	}
   273  	return string(e.Key), e.Value, nil
   274  }
   275  
   276  // WriteIndexEntry writes an entry to the segmented file in an append-only fashion
   277  func (sf *SegmentManager) _Write(key string, value []byte) (int64, error) {
   278  	// lock
   279  	sf.lock.Lock()
   280  	defer sf.lock.Unlock()
   281  	// write entry
   282  	e := &binary.DataEntry{
   283  		Id:    sf.lastIndex,
   284  		Key:   []byte(key),
   285  		Value: value,
   286  	}
   287  	offset, err := sf.w.WriteEntry(e)
   288  	if err != nil {
   289  		return 0, err
   290  	}
   291  	// add new entry to the Segment index
   292  	sf.active.entries = append(sf.active.entries, entry{
   293  		index:  sf.lastIndex,
   294  		offset: offset,
   295  	})
   296  	// update lastIndex
   297  	sf.lastIndex++
   298  	// grab the current offset written
   299  	offset2, err := sf.w.Offset()
   300  	if err != nil {
   301  		return 0, err
   302  	}
   303  	// update Segment remaining
   304  	sf.active.remaining -= offset2 - offset
   305  	// check to see if the active Segment needs to be cycled
   306  	if sf.active.remaining < 64 {
   307  		err = sf.cycleSegment(int64(sf.active.remaining - 64))
   308  		if err != nil {
   309  			return 0, err
   310  		}
   311  	}
   312  	return sf.lastIndex - 1, nil
   313  }
   314  
   315  // Write2 writes an entry to the segmented file in an append-only fashion
   316  func (sf *SegmentManager) Write(key string, value []byte) (int64, error) {
   317  	// lock
   318  	sf.lock.Lock()
   319  	defer sf.lock.Unlock()
   320  	e := &binary.DataEntry{
   321  		Id:    sf.lastIndex,
   322  		Key:   []byte(key),
   323  		Value: value,
   324  	}
   325  	// write entry
   326  	offset, err := sf.w.WriteEntry(e)
   327  	if err != nil {
   328  		return -1, err
   329  	}
   330  	// add new entry to the Segment index
   331  	sf.active.entries = append(sf.active.entries, entry{
   332  		index:  sf.lastIndex,
   333  		offset: offset,
   334  	})
   335  	// update lastIndex
   336  	sf.lastIndex++
   337  	// get updated offset to check cycle
   338  	offset, err = sf.w.Offset()
   339  	if err != nil {
   340  		return -1, err
   341  	}
   342  	// check to see if the active Segment needs to be cycled
   343  	err = sf.cycleSegment(int64(maxFileSize) - offset)
   344  	if err != nil {
   345  		return -1, err
   346  	}
   347  	return sf.lastIndex - 1, nil
   348  }
   349  
   350  func (sf *SegmentManager) WriteDataEntryUsingSegment(key string, value []byte) (int64, error) {
   351  	// lock
   352  	sf.lock.Lock()
   353  	defer sf.lock.Unlock()
   354  	e := &binary.DataEntry{
   355  		Id:    sf.lastIndex,
   356  		Key:   []byte(key),
   357  		Value: value,
   358  	}
   359  	// write entry
   360  	offset, err := sf.active.WriteDataEntry(e)
   361  	if err != nil {
   362  		return -1, err
   363  	}
   364  	// check cycle segment
   365  	err = sf.cycleSegment(maxFileSize - offset + 64)
   366  	// update lastIndex
   367  	sf.lastIndex++
   368  	// return index, and nil
   369  	return sf.lastIndex - 1, nil
   370  }
   371  
   372  // Scan provides an iterator method for the segmented file
   373  func (sf *SegmentManager) Scan(iter func(index int64, key string, value []byte) bool) error {
   374  	// lock
   375  	sf.lock.Lock()
   376  	defer sf.lock.Unlock()
   377  	// init for any errors
   378  	var err error
   379  	// range the Segment index
   380  	for _, sidx := range sf.segments {
   381  		fmt.Printf("Segment: %s\n", sidx)
   382  		// make sure we are reading the right data
   383  		sf.r, err = sf.r.ReadFrom(sidx.path)
   384  		if err != nil {
   385  			return err
   386  		}
   387  		// range the Segment entries index
   388  		for _, eidx := range sidx.entries {
   389  			// read entry
   390  			e, err := sf.r.ReadEntryAt(eidx.offset)
   391  			if err != nil {
   392  				if err == io.EOF || err == io.ErrUnexpectedEOF {
   393  					break
   394  				}
   395  				return err
   396  			}
   397  			// check entry against iterator boolean function
   398  			if !iter(e.Id, string(e.Key), e.Value) {
   399  				// if it returns false, then process next entry
   400  				continue
   401  			}
   402  		}
   403  		// outside entry loop
   404  	}
   405  	// outside Segment loop
   406  	return nil
   407  }
   408  
   409  // TruncateFront removes all segments and entries before specified index
   410  func (sf *SegmentManager) TruncateFront(index int64) error {
   411  	// lock
   412  	sf.lock.Lock()
   413  	defer sf.lock.Unlock()
   414  	// perform bounds check
   415  	if index == 0 ||
   416  		sf.lastIndex == 0 ||
   417  		index < sf.firstIndex || index > sf.lastIndex {
   418  		return ErrOutOfBounds
   419  	}
   420  	// more easy checking
   421  	if index == sf.firstIndex {
   422  		return nil // nothing to truncate
   423  	}
   424  	// locate segment in the segment index list containing specified index
   425  	sidx := sf.findSegmentIndex(index)
   426  	// remove all whole segments before index "sidx"
   427  	for i := 0; i < sidx; i++ {
   428  		// remove Segment file
   429  		err := os.Remove(sf.segments[i].path)
   430  		if err != nil {
   431  			return err
   432  		}
   433  	}
   434  	// remove segments from Segment index (cut, i-j)
   435  	i, j := 0, sidx
   436  	copy(sf.segments[i:], sf.segments[j:])
   437  	for k, n := len(sf.segments)-j+i, len(sf.segments); k < n; k++ {
   438  		sf.segments[k] = nil // or the zero value of T
   439  	}
   440  	sf.segments = sf.segments[:len(sf.segments)-j+i]
   441  	// update firstIndex
   442  	sf.firstIndex = sf.segments[0].index
   443  	// prepare to re-write partial Segment
   444  	//var err error
   445  	tmpfd, err := os.Create(filepath.Join(sf.base,
   446  		fmt.Sprintf("%stmp-part%s", FilePrefix, FileSuffix)))
   447  	if err != nil {
   448  		return err
   449  	}
   450  	// after the Segment index cut, Segment 0 will
   451  	// contain the partials that we must re-write
   452  	if sf.segments[0].index < index {
   453  		// make sure we are reading from the correct path
   454  		sf.r, err = sf.r.ReadFrom(sf.segments[0].path)
   455  		if err != nil {
   456  			return err
   457  		}
   458  		// init temp entries list
   459  		var entries []entry
   460  		// make sure entry index is loaded
   461  		if !sf.segments[0].hasEntriesLoaded() {
   462  			_, err := sf.segments[0].loadEntryIndex()
   463  			if err != nil {
   464  				return err
   465  			}
   466  		}
   467  		// range the entries within this Segment to find
   468  		// the ones that are greater than the index and
   469  		// write those to a temporary buffer....
   470  		for _, ent := range sf.segments[0].entries {
   471  			if ent.index < index {
   472  				continue // skip
   473  			}
   474  			// read entry
   475  			e, err := sf.r.ReadEntryAt(ent.offset)
   476  			if err != nil {
   477  				return err
   478  			}
   479  			// write entry to temp file
   480  			ent.offset, err = binary.EncodeEntry(tmpfd, e)
   481  			if err != nil {
   482  				return err
   483  			}
   484  			// sync write
   485  			err = tmpfd.Sync()
   486  			if err != nil {
   487  				return err
   488  			}
   489  			// append to a new entries list
   490  			entries = append(entries, ent)
   491  		}
   492  		// move reader back to active Segment file
   493  		sf.r, err = sf.r.ReadFrom(sf.active.path)
   494  		if err != nil {
   495  			return err
   496  		}
   497  		// close temp file
   498  		err = tmpfd.Close()
   499  		if err != nil {
   500  			return err
   501  		}
   502  		// remove partial Segment file
   503  		err = os.Remove(sf.segments[0].path)
   504  		if err != nil {
   505  			return err
   506  		}
   507  		// change temp file name
   508  		err = os.Rename(tmpfd.Name(), sf.segments[0].path)
   509  		if err != nil {
   510  			return err
   511  		}
   512  		// update Segment
   513  		sf.segments[0].entries = entries
   514  		sf.segments[0].index = entries[0].index
   515  	}
   516  	return nil
   517  }
   518  
   519  func (sf *SegmentManager) TruncateBack(index int64) error {
   520  	// TODO: implement
   521  	return nil
   522  }
   523  
   524  // Sort (stable) sorts entries (and re-writes them) in forward or reverse Lexicographic order
   525  func (sf *SegmentManager) Sort() error {
   526  	// TODO: implement
   527  	return nil
   528  }
   529  
   530  // CompactAndMerge removes any blank sections or duplicate entries and then merges (re-writes)
   531  // the data into a different Segment size using the maxSegSize provided
   532  func (sf *SegmentManager) CompactAndMerge(maxSegSize int64) error {
   533  	// TODO: implement
   534  	return nil
   535  }
   536  
   537  // Count returns the number of entries currently in the segmented file
   538  func (sf *SegmentManager) Count() int {
   539  	// lock
   540  	sf.lock.Lock()
   541  	defer sf.lock.Unlock()
   542  	// get count
   543  	var count int
   544  	for _, s := range sf.segments {
   545  		count += len(s.entries)
   546  	}
   547  	// return count
   548  	return count
   549  }
   550  
   551  func (sf *SegmentManager) Path() string {
   552  	return sf.base
   553  }
   554  
   555  // FirstIndex returns the segmented files first index
   556  func (sf *SegmentManager) FirstIndex() int64 {
   557  	// lock
   558  	sf.lock.Lock()
   559  	defer sf.lock.Unlock()
   560  	return sf.firstIndex
   561  }
   562  
   563  // LastIndex returns the segmented files first index
   564  func (sf *SegmentManager) LastIndex() int64 {
   565  	// lock
   566  	sf.lock.Lock()
   567  	defer sf.lock.Unlock()
   568  	return sf.lastIndex
   569  }
   570  
   571  // Close syncs and closes the segmented file
   572  func (sf *SegmentManager) Close() error {
   573  	// lock
   574  	sf.lock.Lock()
   575  	defer sf.lock.Unlock()
   576  	// sync and close writer
   577  	err := sf.w.Close()
   578  	if err != nil {
   579  		return err
   580  	}
   581  	// close reader
   582  	err = sf.r.Close()
   583  	if err != nil {
   584  		return err
   585  	}
   586  	// clean everything else up
   587  	sf.base = ""
   588  	sf.r = nil
   589  	sf.w = nil
   590  	sf.firstIndex = 0
   591  	sf.lastIndex = 0
   592  	sf.segments = nil
   593  	sf.active = nil
   594  	// force gc for good measure
   595  	runtime.GC()
   596  	return nil
   597  }
   598  
   599  // String is the stringer method for the segmented file
   600  func (sf *SegmentManager) String() string {
   601  	var ss string
   602  	ss += fmt.Sprintf("\n\n[segmented file]\n")
   603  	ss += fmt.Sprintf("base: %q\n", sf.base)
   604  	ss += fmt.Sprintf("firstIndex: %d\n", sf.firstIndex)
   605  	ss += fmt.Sprintf("lastIndex: %d\n", sf.lastIndex)
   606  	ss += fmt.Sprintf("segments: %d\n", len(sf.segments))
   607  	if sf.active != nil {
   608  		ss += fmt.Sprintf("active: %q\n", filepath.Base(sf.active.path))
   609  	}
   610  	if len(sf.segments) > 0 {
   611  		for i, s := range sf.segments {
   612  			ss += fmt.Sprintf("Segment[%d]:\n", i)
   613  			ss += fmt.Sprintf("\tpath: %q\n", filepath.Base(s.path))
   614  			ss += fmt.Sprintf("\tindex: %d\n", s.index)
   615  			ss += fmt.Sprintf("\tentries: %d\n", len(s.entries))
   616  			ss += fmt.Sprintf("\tremaining: %d\n", s.remaining)
   617  		}
   618  	}
   619  	ss += "\n"
   620  	return ss
   621  }