github.com/scottcagno/storage@v1.8.0/pkg/_junk/_x/file/segment.go (about)

     1  package file
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"github.com/scottcagno/storage/pkg/_junk/_lsmtree/encoding/binary"
     7  	"io"
     8  	"os"
     9  	"path/filepath"
    10  	"strconv"
    11  )
    12  
    13  const (
    14  	FilePrefix               = "dat-"
    15  	FileSuffix               = ".seg"
    16  	defaultMaxFileSize int64 = 4 << 20 // 4 MB
    17  )
    18  
    19  var (
    20  	maxFileSize       = defaultMaxFileSize
    21  	ErrOutOfBounds    = errors.New("error: out of bounds")
    22  	ErrSegmentFull    = errors.New("error: Segment is full")
    23  	ErrFileClosed     = errors.New("error: file closed")
    24  	ErrBadArgument    = errors.New("error: bad argument")
    25  	ErrNoPathProvided = errors.New("error: no path provided")
    26  	ErrOptionsMissing = errors.New("error: options missing")
    27  )
    28  
    29  // entry contains the metadata for a single entry within the file Segment
    30  type entry struct {
    31  	index  int64 // index is the "id" of this entry
    32  	offset int64 // offset is the actual offset of this entry in the Segment file
    33  }
    34  
    35  // String is the stringer method for an entry
    36  func (e entry) String() string {
    37  	return fmt.Sprintf("entry.index=%d, entry.offset=%d", e.index, e.offset)
    38  }
    39  
    40  // Segment contains the metadata for the file Segment
    41  type Segment struct {
    42  	path        string  // path is the full path to this Segment file
    43  	index       int64   // starting index of the Segment
    44  	entries     []entry // entries is an index of the entries in the Segment
    45  	firstOffset int64
    46  	lastOffset  int64
    47  	remaining   int64 // remaining is the bytes left after max file size minus entry data
    48  }
    49  
    50  // OpenSegment attempts to open the Segment file at the path provided
    51  // and index the entries within the Segment. It will return an os.PathError
    52  // if the file does not exist, an io.ErrUnexpectedEOF if the file exists
    53  // but is empty and has no data to read, and ErrSegmentFull if the file
    54  // has met the maxFileSize. It will return the Segment and nil error on success.
    55  func _OpenSegment(path string) (*Segment, error) {
    56  	// check to make sure path exists before continuing
    57  	_, err := os.Stat(path)
    58  	if err != nil {
    59  		return nil, err
    60  	}
    61  	// attempt to open existing Segment file for reading
    62  	fd, err := os.OpenFile(path, os.O_RDONLY, 0666)
    63  	if err != nil {
    64  		return nil, err
    65  	}
    66  	// defer file close
    67  	defer func(fd *os.File) {
    68  		_ = fd.Close()
    69  	}(fd)
    70  	// get Segment index
    71  	index, err := GetIndexFromFileName(filepath.Base(path))
    72  	if err != nil {
    73  		return nil, err
    74  	}
    75  	// create a new Segment to append indexed entries to
    76  	s := &Segment{
    77  		path:    path,
    78  		index:   index,
    79  		entries: make([]entry, 0),
    80  	}
    81  	// read Segment file and index entries
    82  	for {
    83  		// get the current offset of the
    84  		// reader for the entry later
    85  		offset, err := binary.Offset(fd)
    86  		if err != nil {
    87  			return nil, err
    88  		}
    89  		// read and decode entry
    90  		e, err := binary.DecodeEntry(fd)
    91  		if err != nil {
    92  			if err == io.EOF || err == io.ErrUnexpectedEOF {
    93  				break
    94  			}
    95  			return nil, err
    96  		}
    97  		// get current offset
    98  		// add entry index to Segment entries list
    99  		s.entries = append(s.entries, entry{
   100  			index:  e.Id,
   101  			offset: offset,
   102  		})
   103  		// continue to process the next entry
   104  	}
   105  	// make sure to fill out the Segment index from the first entry index
   106  	//s.index = s.entries[0].index
   107  	// get the offset of the reader to calculate bytes remaining
   108  	offset, err := binary.Offset(fd)
   109  	if err != nil {
   110  		return nil, err
   111  	}
   112  	// update the Segment remaining bytes
   113  	s.remaining = maxFileSize - offset
   114  	return s, nil
   115  }
   116  
   117  // getFirstIndex returns the first index in the entries list
   118  func (s *Segment) getFirstIndex() int64 {
   119  	return s.index
   120  }
   121  
   122  // getLastIndex returns the last index in the entries list
   123  func (s *Segment) getLastIndex() int64 {
   124  	if len(s.entries) > 0 {
   125  		return s.entries[len(s.entries)-1].index
   126  	}
   127  	return s.index
   128  }
   129  
   130  // findEntryIndex performs binary search to find the entry containing provided index
   131  func (s *Segment) findEntryIndex(index int64) int {
   132  	// declare for later
   133  	i, j := 0, len(s.entries)
   134  	// otherwise, perform binary search
   135  	for i < j {
   136  		h := i + (j-i)/2
   137  		if index >= s.entries[h].index {
   138  			i = h + 1
   139  		} else {
   140  			j = h
   141  		}
   142  	}
   143  	return i - 1
   144  }
   145  
   146  func OpenSegment(path string) (*Segment, error) {
   147  	// check to make sure path exists before continuing
   148  	_, err := os.Stat(path)
   149  	if err != nil {
   150  		return nil, err
   151  	}
   152  	// get Segment index
   153  	index, err := GetIndexFromFileName(filepath.Base(path))
   154  	if err != nil {
   155  		return nil, err
   156  	}
   157  	// create a new Segment to append indexed entries to
   158  	s := &Segment{
   159  		path:    path,
   160  		index:   index,
   161  		entries: make([]entry, 0),
   162  	}
   163  	return s, nil
   164  }
   165  
   166  func (s *Segment) WriteDataEntry(de *binary.DataEntry) (int64, error) {
   167  	// check to see if the entries are loaded
   168  	if !s.hasEntriesLoaded() {
   169  		// load the entry index
   170  		_, err := s.loadEntryIndex()
   171  		if err != nil {
   172  			return -1, err
   173  		}
   174  	}
   175  	// open writer
   176  	w, err := binary.OpenWriter(s.path)
   177  	if err != nil {
   178  		return -1, err
   179  	}
   180  	defer w.Close()
   181  	// write entry
   182  	offset, err := w.WriteEntry(de)
   183  	if err != nil {
   184  		return -1, err
   185  	}
   186  	// get "last index" TODO: might be a potential bug here
   187  	//lastIndex := s.entries[len(s.entries)-1].index
   188  	// add new entry to the entry index
   189  	s.entries = append(s.entries, entry{
   190  		index:  de.Id, // DataEntry.Id should == last index
   191  		offset: offset,
   192  	})
   193  	// return offset, and nil
   194  	return offset, nil
   195  }
   196  
   197  func (s *Segment) ReadDataEntry(index int64) (*binary.DataEntry, error) {
   198  	// check to see if the entries are loaded
   199  	if !s.hasEntriesLoaded() {
   200  		// load the entry index
   201  		_, err := s.loadEntryIndex()
   202  		if err != nil {
   203  			return nil, err
   204  		}
   205  	}
   206  	// open reader
   207  	r, err := binary.OpenReader(s.path)
   208  	if err != nil {
   209  		return nil, err
   210  	}
   211  	defer r.Close()
   212  	// find correct entry offset to read from
   213  	offset := s.entries[s.findEntryIndex(index)].offset
   214  	// attempt to read entry at offset
   215  	de, err := r.ReadEntryAt(offset)
   216  	if err != nil {
   217  		return nil, err
   218  	}
   219  	// return entry
   220  	return de, nil
   221  }
   222  
   223  func (s *Segment) hasEntriesLoaded() bool {
   224  	return len(s.entries) > 0
   225  }
   226  
   227  func (s *Segment) loadEntryIndex() (int64, error) {
   228  	// attempt to open existing Segment file for reading
   229  	fd, err := os.OpenFile(s.path, os.O_RDONLY, 0666)
   230  	if err != nil {
   231  		return -1, err
   232  	}
   233  	// defer file close
   234  	defer func(fd *os.File) {
   235  		_ = fd.Close()
   236  	}(fd)
   237  	// read Segment file and index entries
   238  	for {
   239  		// get the current offset of the
   240  		// reader for the entry later
   241  		offset, err := binary.Offset(fd)
   242  		if err != nil {
   243  			return -1, err
   244  		}
   245  		// read and decode entry
   246  		e, err := binary.DecodeEntry(fd)
   247  		if err != nil {
   248  			if err == io.EOF || err == io.ErrUnexpectedEOF {
   249  				break
   250  			}
   251  			return -1, err
   252  		}
   253  		// add entry index to Segment entries list
   254  		s.entries = append(s.entries, entry{
   255  			index:  e.Id,
   256  			offset: offset,
   257  		})
   258  		// continue to process the next entry
   259  	}
   260  	// return offset
   261  	offset, err := binary.Offset(fd)
   262  	if err != nil {
   263  		return -1, err
   264  	}
   265  	return offset, nil
   266  }
   267  
   268  func MakeFileNameFromIndex(index int64) string {
   269  	hexa := strconv.FormatInt(index, 16)
   270  	return fmt.Sprintf("%s%010s%s", FilePrefix, hexa, FileSuffix)
   271  }
   272  
   273  func GetIndexFromFileName(name string) (int64, error) {
   274  	hexa := name[len(FilePrefix) : len(name)-len(FileSuffix)]
   275  	return strconv.ParseInt(hexa, 16, 32)
   276  }
   277  
   278  // CreateSegment attempts to make a new Segment automatically using the timestamp
   279  // as the Segment name. On success, it will simply return a new Segment and a nil error
   280  func CreateSegment(base string, lastIndex int64) (*Segment, error) {
   281  	// create a new file
   282  	path := filepath.Join(base, MakeFileNameFromIndex(lastIndex))
   283  	fd, err := os.Create(path)
   284  	if err != nil {
   285  		return nil, err
   286  	}
   287  	// don't forget to close it
   288  	err = fd.Close()
   289  	if err != nil {
   290  		return nil, err
   291  	}
   292  	// create and return new Segment
   293  	s := &Segment{
   294  		path:      path,
   295  		index:     lastIndex,
   296  		entries:   make([]entry, 0),
   297  		remaining: maxFileSize,
   298  	}
   299  	return s, nil
   300  }
   301  
   302  // String is the stringer method for a Segment
   303  func (s *Segment) String() string {
   304  	var ss string
   305  	ss += fmt.Sprintf("path: %q\n", filepath.Base(s.path))
   306  	ss += fmt.Sprintf("index: %d\n", s.index)
   307  	ss += fmt.Sprintf("entries: %d\n", len(s.entries))
   308  	ss += fmt.Sprintf("remaining: %d\n", s.remaining)
   309  	return ss
   310  }