github.com/scottcagno/storage@v1.8.0/pkg/_junk/_lsmtree/wal/wal.go (about)

     1  package wal
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"github.com/scottcagno/storage/pkg/_junk/_lsmtree/encoding/binary"
     7  	"io"
     8  	"os"
     9  	"path/filepath"
    10  	"runtime"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  )
    15  
    16  const (
    17  	LogPrefix = "wal-"
    18  	LogSuffix = ".seg"
    19  
    20  	defaultMaxFileSize uint64 = 16 << 10 // 16 KB
    21  )
    22  
    23  var (
    24  	maxFileSize = defaultMaxFileSize
    25  
    26  	ErrOutOfBounds    = errors.New("error: out of bounds")
    27  	ErrSegmentFull    = errors.New("error: segment is full")
    28  	ErrFileClosed     = errors.New("error: file closed")
    29  	ErrBadArgument    = errors.New("error: bad argument")
    30  	ErrNoPathProvided = errors.New("error: no path provided")
    31  	ErrOptionsMissing = errors.New("error: options missing")
    32  )
    33  
    34  // segEntry contains the metadata for a single segEntry within the file segment
    35  type segEntry struct {
    36  	index  int64 // index is the "id" of this segEntry
    37  	offset int64 // offset is the actual offset of this segEntry in the segment file
    38  }
    39  
    40  // String is the stringer method for an segEntry
    41  func (e segEntry) String() string {
    42  	return fmt.Sprintf("segEntry.index=%d, segEntry.offset=%d", e.index, e.offset)
    43  }
    44  
    45  // segment contains the metadata for the file segment
    46  type segment struct {
    47  	path      string     // path is the full path to this segment file
    48  	index     int64      // starting index of the segment
    49  	entries   []segEntry // entries is an index of the entries in the segment
    50  	remaining uint64     // remaining is the bytes left after max file size minus segEntry data
    51  }
    52  
    53  // String is the stringer method for a segment
    54  func (s *segment) String() string {
    55  	var ss string
    56  	ss += fmt.Sprintf("path: %q\n", filepath.Base(s.path))
    57  	ss += fmt.Sprintf("index: %d\n", s.index)
    58  	ss += fmt.Sprintf("entries: %d\n", len(s.entries))
    59  	ss += fmt.Sprintf("remaining: %d\n", s.remaining)
    60  	return ss
    61  }
    62  
    63  // makeFileName returns a file name using the provided timestamp.
    64  // If t is nil, it will create a new name using time.Now()
    65  func makeFileName(t time.Time) string {
    66  	//tf := t.Format("2006-01-03_15:04:05:000000")
    67  	//return fmt.Sprintf("%s%s%s", LogPrefix, time.RFC3339Nano, LogSuffix)
    68  	return fmt.Sprintf("%s%d%s", LogPrefix, time.Now().UnixMicro(), LogSuffix)
    69  }
    70  
    71  // getFirstIndex returns the first index in the entries list
    72  func (s *segment) getFirstIndex() int64 {
    73  	return s.index
    74  }
    75  
    76  // getLastIndex returns the last index in the entries list
    77  func (s *segment) getLastIndex() int64 {
    78  	if len(s.entries) > 0 {
    79  		return s.entries[len(s.entries)-1].index
    80  	}
    81  	return s.index
    82  }
    83  
    84  // findEntryIndex performs binary search to find the segEntry containing provided index
    85  func (s *segment) findEntryIndex(index int64) int {
    86  	// declare for later
    87  	i, j := 0, len(s.entries)
    88  	// otherwise, perform binary search
    89  	for i < j {
    90  		h := i + (j-i)/2
    91  		if index >= s.entries[h].index {
    92  			i = h + 1
    93  		} else {
    94  			j = h
    95  		}
    96  	}
    97  	return i - 1
    98  }
    99  
   100  // WAL is a write-ahead log structure
   101  type WAL struct {
   102  	lock       sync.RWMutex   // lock is a mutual exclusion lock
   103  	base       string         // base is the base filepath
   104  	r          *binary.Reader // r is a binary reader
   105  	w          *binary.Writer // w is a binary writer
   106  	firstIndex int64          // firstIndex is the index of the first segEntry
   107  	lastIndex  int64          // lastIndex is the index of the last segEntry
   108  	segments   []*segment     // segments is an index of the current file segments
   109  	active     *segment       // active is the current active segment
   110  }
   111  
   112  // Open opens and returns a new write-ahead log structure
   113  func Open(base string) (*WAL, error) {
   114  	// make sure we are working with absolute paths
   115  	base, err := filepath.Abs(base)
   116  	if err != nil {
   117  		return nil, err
   118  	}
   119  	// sanitize any path separators
   120  	base = filepath.ToSlash(base)
   121  	// create any directories if they are not there
   122  	err = os.MkdirAll(base, os.ModeDir)
   123  	if err != nil {
   124  		return nil, err
   125  	}
   126  	// create a new write-ahead log instance
   127  	l := &WAL{
   128  		base:       base,
   129  		firstIndex: 0,
   130  		lastIndex:  1,
   131  		segments:   make([]*segment, 0),
   132  	}
   133  	// attempt to load segments
   134  	err = l.loadIndex()
   135  	if err != nil {
   136  		return nil, err
   137  	}
   138  	// return write-ahead log
   139  	return l, nil
   140  }
   141  
   142  // loadIndex initializes the segment index. It looks for segment
   143  // files in the base directory and attempts to index the segment as
   144  // well as any of the entries within the segment. If this is a new
   145  // instance, it will create a new segment that is ready for writing.
   146  func (l *WAL) loadIndex() error {
   147  	// lock
   148  	l.lock.Lock()
   149  	defer l.lock.Unlock()
   150  	// get the files in the base directory path
   151  	files, err := os.ReadDir(l.base)
   152  	if err != nil {
   153  		return err
   154  	}
   155  	// list the files in the base directory path and attempt to index the entries
   156  	for _, file := range files {
   157  		// skip non data files
   158  		if file.IsDir() ||
   159  			!strings.HasPrefix(file.Name(), LogPrefix) ||
   160  			!strings.HasSuffix(file.Name(), LogSuffix) {
   161  			continue // skip this, continue on to the next file
   162  		}
   163  		// attempt to load segment (and index entries in segment)
   164  		s, err := l.loadSegmentFile(filepath.Join(l.base, file.Name()))
   165  		if err != nil {
   166  			return err
   167  		}
   168  		// segment has been loaded successfully, append to the segments list
   169  		l.segments = append(l.segments, s)
   170  	}
   171  	// check to see if any segments were found. If not, initialize a new one
   172  	if len(l.segments) == 0 {
   173  		// create a new segment file
   174  		s, err := l.makeSegmentFile()
   175  		if err != nil {
   176  			return err
   177  		}
   178  		// segment has been created successfully, append to the segments list
   179  		l.segments = append(l.segments, s)
   180  	}
   181  	// segments have either been loaded or created, so now we
   182  	// should go about updating the active segment pointer to
   183  	// point to the "tail" (the last segment in the segment list)
   184  	l.active = l.getLastSegment()
   185  	// we should be good to go, lets attempt to open a file
   186  	// reader to work with the active segment
   187  	l.r, err = binary.OpenReader(l.active.path)
   188  	if err != nil {
   189  		return err
   190  	}
   191  	// and then attempt to open a file writer to also work
   192  	// with the active segment, so we can begin appending data
   193  	l.w, err = binary.OpenWriter(l.active.path)
   194  	if err != nil {
   195  		return err
   196  	}
   197  	// finally, update the firstIndex and lastIndex
   198  	l.firstIndex = l.segments[0].index
   199  	// and update last index
   200  	l.lastIndex = l.getLastSegment().getLastIndex()
   201  	return nil
   202  }
   203  
   204  // loadSegment attempts to open the segment file at the path provided
   205  // and index the entries within the segment. It will return an os.PathError
   206  // if the file does not exist, an io.ErrUnexpectedEOF if the file exists
   207  // but is empty and has no data to read, and ErrSegmentFull if the file
   208  // has met the maxFileSize. It will return the segment and nil error on success.
   209  func (l *WAL) loadSegmentFile(path string) (*segment, error) {
   210  	// check to make sure path exists before continuing
   211  	_, err := os.Stat(path)
   212  	if err != nil {
   213  		return nil, err
   214  	}
   215  	// attempt to open existing segment file for reading
   216  	fd, err := os.OpenFile(path, os.O_RDONLY, 0666)
   217  	if err != nil {
   218  		return nil, err
   219  	}
   220  	// defer file close
   221  	defer func(fd *os.File) {
   222  		_ = fd.Close()
   223  	}(fd)
   224  	// create a new segment to append indexed entries to
   225  	s := &segment{
   226  		path:    path,
   227  		entries: make([]segEntry, 0),
   228  	}
   229  	// read segment file and index entries
   230  	for {
   231  		// get the current offset of the
   232  		// reader for the segEntry later
   233  		offset, err := binary.Offset(fd)
   234  		if err != nil {
   235  			return nil, err
   236  		}
   237  		// read and decode segEntry
   238  		e, err := binary.DecodeEntry(fd)
   239  		if err != nil {
   240  			if err == io.EOF || err == io.ErrUnexpectedEOF {
   241  				break
   242  			}
   243  			return nil, err
   244  		}
   245  		// get current offset
   246  		// add segEntry index to segment entries list
   247  		s.entries = append(s.entries, segEntry{
   248  			index:  e.Id,
   249  			offset: offset,
   250  		})
   251  		// continue to process the next segEntry
   252  	}
   253  	// make sure to fill out the segment index from the first segEntry index
   254  	s.index = s.entries[0].index
   255  	// get the offset of the reader to calculate bytes remaining
   256  	offset, err := binary.Offset(fd)
   257  	if err != nil {
   258  		return nil, err
   259  	}
   260  	// update the segment remaining bytes
   261  	s.remaining = maxFileSize - uint64(offset)
   262  	return s, nil
   263  }
   264  
   265  // makeSegment attempts to make a new segment automatically using the timestamp
   266  // as the segment name. On success, it will simply return a new segment and a nil error
   267  func (l *WAL) makeSegmentFile() (*segment, error) {
   268  	// create a new file
   269  	path := filepath.Join(l.base, makeFileName(time.Now()))
   270  	fd, err := os.Create(path)
   271  	if err != nil {
   272  		return nil, err
   273  	}
   274  	// don't forget to close it
   275  	err = fd.Close()
   276  	if err != nil {
   277  		return nil, err
   278  	}
   279  	// create and return new segment
   280  	s := &segment{
   281  		path:      path,
   282  		index:     l.lastIndex,
   283  		entries:   make([]segEntry, 0),
   284  		remaining: maxFileSize,
   285  	}
   286  	return s, nil
   287  }
   288  
   289  // findSegmentIndex performs binary search to find the segment containing provided index
   290  func (l *WAL) findSegmentIndex(index int64) int {
   291  	// declare for later
   292  	i, j := 0, len(l.segments)
   293  	// otherwise, perform binary search
   294  	for i < j {
   295  		h := i + (j-i)/2
   296  		if index >= l.segments[h].index {
   297  			i = h + 1
   298  		} else {
   299  			j = h
   300  		}
   301  	}
   302  	return i - 1
   303  }
   304  
   305  // getLastSegment returns the tail segment in the segments index list
   306  func (l *WAL) getLastSegment() *segment {
   307  	return l.segments[len(l.segments)-1]
   308  }
   309  
   310  // cycleSegment adds a new segment to replace the current (active) segment
   311  func (l *WAL) cycleSegment() error {
   312  	// sync and close current file segment
   313  	err := l.w.Close()
   314  	if err != nil {
   315  		return err
   316  	}
   317  	// create a new segment file
   318  	s, err := l.makeSegmentFile()
   319  	if err != nil {
   320  		return err
   321  	}
   322  	// add segment to segment index list
   323  	l.segments = append(l.segments, s)
   324  	// update the active segment pointer
   325  	l.active = l.getLastSegment()
   326  	// open file writer associated with active segment
   327  	l.w, err = binary.OpenWriter(l.active.path)
   328  	if err != nil {
   329  		return err
   330  	}
   331  	// update file reader associated with the active segment
   332  	l.r, err = binary.OpenReader(l.active.path)
   333  	if err != nil {
   334  		return err
   335  	}
   336  	return nil
   337  }
   338  
   339  // Read reads an segEntry from the write-ahead log at the specified index
   340  func (l *WAL) Read(index int64) (string, []byte, error) {
   341  	// read lock
   342  	l.lock.RLock()
   343  	defer l.lock.RUnlock()
   344  	// error checking
   345  	if index < l.firstIndex || index > l.lastIndex {
   346  		return "", nil, ErrOutOfBounds
   347  	}
   348  	var err error
   349  	// find the segment containing the provided index
   350  	s := l.segments[l.findSegmentIndex(index)]
   351  	// make sure we are reading from the correct file
   352  	l.r, err = l.r.ReadFrom(s.path)
   353  	if err != nil {
   354  		return "", nil, err
   355  	}
   356  	// find the offset for the segEntry containing the provided index
   357  	offset := s.entries[s.findEntryIndex(index)].offset
   358  	// read segEntry at offset
   359  	e, err := l.r.ReadEntryAt(offset)
   360  	if err != nil {
   361  		return "", nil, err
   362  	}
   363  	return string(e.Key), e.Value, nil
   364  }
   365  
   366  // WriteIndexEntry writes an segEntry to the write-ahead log in an append-only fashion
   367  func (l *WAL) Write(key string, value []byte) (int64, error) {
   368  	// lock
   369  	l.lock.Lock()
   370  	defer l.lock.Unlock()
   371  	// write segEntry
   372  	offset, err := l.w.WriteEntry(&binary.DataEntry{
   373  		Id:    l.lastIndex,
   374  		Key:   []byte(key),
   375  		Value: value,
   376  	})
   377  	if err != nil {
   378  		return 0, err
   379  	}
   380  	// add new segEntry to the segment index
   381  	l.active.entries = append(l.active.entries, segEntry{
   382  		index:  l.lastIndex,
   383  		offset: offset,
   384  	})
   385  	// update lastIndex
   386  	l.lastIndex++
   387  	// grab the current offset written
   388  	offset2, err := l.w.Offset()
   389  	if err != nil {
   390  		return 0, err
   391  	}
   392  	// update segment remaining
   393  	l.active.remaining -= uint64(offset2 - offset)
   394  	// check to see if the active segment needs to be cycled
   395  	if l.active.remaining < 64 {
   396  		err = l.cycleSegment()
   397  		if err != nil {
   398  			return 0, err
   399  		}
   400  	}
   401  	return l.lastIndex - 1, nil
   402  }
   403  
   404  // Scan provides an iterator method for the write-ahead log
   405  func (l *WAL) Scan(iter func(index int64, key string, value []byte) bool) error {
   406  	// lock
   407  	l.lock.Lock()
   408  	defer l.lock.Unlock()
   409  	// init for any errors
   410  	var err error
   411  	// range the segment index
   412  	for _, sidx := range l.segments {
   413  		fmt.Printf("segment: %s\n", sidx)
   414  		// make sure we are reading the right data
   415  		l.r, err = l.r.ReadFrom(sidx.path)
   416  		if err != nil {
   417  			return err
   418  		}
   419  		// range the segment entries index
   420  		for _, eidx := range sidx.entries {
   421  			// read segEntry
   422  			e, err := l.r.ReadEntryAt(eidx.offset)
   423  			if err != nil {
   424  				if err == io.EOF || err == io.ErrUnexpectedEOF {
   425  					break
   426  				}
   427  				return err
   428  			}
   429  			// check segEntry against iterator boolean function
   430  			if !iter(e.Id, string(e.Key), e.Value) {
   431  				// if it returns false, then process next segEntry
   432  				continue
   433  			}
   434  		}
   435  		// outside segEntry loop
   436  	}
   437  	// outside segment loop
   438  	return nil
   439  }
   440  
   441  // TruncateFront removes all segments and entries before specified index
   442  func (l *WAL) TruncateFront(index int64) error {
   443  	// lock
   444  	l.lock.Lock()
   445  	defer l.lock.Unlock()
   446  	// perform bounds check
   447  	if index == 0 ||
   448  		l.lastIndex == 0 ||
   449  		index < l.firstIndex || index > l.lastIndex {
   450  		return ErrOutOfBounds
   451  	}
   452  	if index == l.firstIndex {
   453  		return nil // nothing to truncate
   454  	}
   455  	// locate segment in segment index list containing specified index
   456  	sidx := l.findSegmentIndex(index)
   457  	// isolate whole segments that can be removed
   458  	for i := 0; i < sidx; i++ {
   459  		// remove segment file
   460  		err := os.Remove(l.segments[i].path)
   461  		if err != nil {
   462  			return err
   463  		}
   464  	}
   465  	// remove segments from segment index (cut, i-j)
   466  	i, j := 0, sidx
   467  	copy(l.segments[i:], l.segments[j:])
   468  	for k, n := len(l.segments)-j+i, len(l.segments); k < n; k++ {
   469  		l.segments[k] = nil // or the zero value of T
   470  	}
   471  	l.segments = l.segments[:len(l.segments)-j+i]
   472  	// update firstIndex
   473  	l.firstIndex = l.segments[0].index
   474  	// prepare to re-write partial segment
   475  	var err error
   476  	var entries []segEntry
   477  	tmpfd, err := os.Create(filepath.Join(l.base, "tmp-partial.seg"))
   478  	if err != nil {
   479  		return err
   480  	}
   481  	// after the segment index cut, segment 0 will
   482  	// contain the partials that we must re-write
   483  	if l.segments[0].index < index {
   484  		// make sure we are reading from the correct path
   485  		l.r, err = l.r.ReadFrom(l.segments[0].path)
   486  		if err != nil {
   487  			return err
   488  		}
   489  		// range the entries within this segment to find
   490  		// the ones that are greater than the index and
   491  		// write those to a temporary buffer....
   492  		for _, ent := range l.segments[0].entries {
   493  			if ent.index < index {
   494  				continue // skip
   495  			}
   496  			// read segEntry
   497  			e, err := l.r.ReadEntryAt(ent.offset)
   498  			if err != nil {
   499  				return err
   500  			}
   501  			// write segEntry to temp file
   502  			ent.offset, err = binary.EncodeEntry(tmpfd, e)
   503  			if err != nil {
   504  				return err
   505  			}
   506  			// sync write
   507  			err = tmpfd.Sync()
   508  			if err != nil {
   509  				return err
   510  			}
   511  			// append to a new entries list
   512  			entries = append(entries, ent)
   513  		}
   514  		// move reader back to active segment file
   515  		l.r, err = l.r.ReadFrom(l.active.path)
   516  		if err != nil {
   517  			return err
   518  		}
   519  		// close temp file
   520  		err = tmpfd.Close()
   521  		if err != nil {
   522  			return err
   523  		}
   524  		// remove partial segment file
   525  		err = os.Remove(l.segments[0].path)
   526  		if err != nil {
   527  			return err
   528  		}
   529  		// change temp file name
   530  		err = os.Rename(tmpfd.Name(), l.segments[0].path)
   531  		if err != nil {
   532  			return err
   533  		}
   534  		// update segment
   535  		l.segments[0].entries = entries
   536  		l.segments[0].index = entries[0].index
   537  	}
   538  	return nil
   539  }
   540  
   541  // Count returns the number of entries currently in the write-ahead log
   542  func (l *WAL) Count() int {
   543  	// lock
   544  	l.lock.Lock()
   545  	defer l.lock.Unlock()
   546  	// get count
   547  	var count int
   548  	for _, s := range l.segments {
   549  		count += len(s.entries)
   550  	}
   551  	// return count
   552  	return count
   553  }
   554  
   555  // FirstIndex returns the write-ahead logs first index
   556  func (l *WAL) FirstIndex() int64 {
   557  	// lock
   558  	l.lock.Lock()
   559  	defer l.lock.Unlock()
   560  	return l.firstIndex
   561  }
   562  
   563  // LastIndex returns the write-ahead logs first index
   564  func (l *WAL) LastIndex() int64 {
   565  	// lock
   566  	l.lock.Lock()
   567  	defer l.lock.Unlock()
   568  	return l.lastIndex
   569  }
   570  
   571  // Close syncs and closes the write-ahead log
   572  func (l *WAL) Close() error {
   573  	// lock
   574  	l.lock.Lock()
   575  	defer l.lock.Unlock()
   576  	// sync and close writer
   577  	err := l.w.Close()
   578  	if err != nil {
   579  		return err
   580  	}
   581  	// close reader
   582  	err = l.r.Close()
   583  	if err != nil {
   584  		return err
   585  	}
   586  	// clean everything else up
   587  	l.base = ""
   588  	l.r = nil
   589  	l.w = nil
   590  	l.firstIndex = 0
   591  	l.lastIndex = 0
   592  	l.segments = nil
   593  	l.active = nil
   594  	// force gc for good measure
   595  	runtime.GC()
   596  	return nil
   597  }
   598  
   599  func (l *WAL) Path() string {
   600  	return l.base
   601  }
   602  
   603  // String is the stringer method for the write-ahead log
   604  func (l *WAL) String() string {
   605  	var ss string
   606  	ss += fmt.Sprintf("\n\n[write-ahead log]\n")
   607  	ss += fmt.Sprintf("base: %q\n", l.base)
   608  	ss += fmt.Sprintf("firstIndex: %d\n", l.firstIndex)
   609  	ss += fmt.Sprintf("lastIndex: %d\n", l.lastIndex)
   610  	ss += fmt.Sprintf("segments: %d\n", len(l.segments))
   611  	if l.active != nil {
   612  		ss += fmt.Sprintf("active: %q\n", filepath.Base(l.active.path))
   613  	}
   614  	if len(l.segments) > 0 {
   615  		for i, s := range l.segments {
   616  			ss += fmt.Sprintf("segment[%d]:\n", i)
   617  			ss += fmt.Sprintf("\tpath: %q\n", filepath.Base(s.path))
   618  			ss += fmt.Sprintf("\tindex: %d\n", s.index)
   619  			ss += fmt.Sprintf("\tentries: %d\n", len(s.entries))
   620  			ss += fmt.Sprintf("\tremaining: %d\n", s.remaining)
   621  		}
   622  	}
   623  	ss += "\n"
   624  	return ss
   625  }