github.com/scottcagno/storage@v1.8.0/pkg/swal/swal.go (about)

     1  package swal
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"github.com/scottcagno/storage/pkg/lsmt/binary"
     7  	"io"
     8  	"os"
     9  	"path/filepath"
    10  	"runtime"
    11  	"strings"
    12  	"sync"
    13  )
    14  
    15  var ErrOutOfBounds = errors.New("swal: out of bounds")
    16  
    17  const (
    18  	FilePrefix       = "dat-"
    19  	FileSuffix       = ".seg"
    20  	remainingTrigger = 64
    21  )
    22  
    23  // SWAL is a write-ahead log structure
    24  type SWAL struct {
    25  	lock       sync.RWMutex // lock is a mutual exclusion lock
    26  	conf       *SWALConfig
    27  	r          *binary.Reader // r is a binary reader
    28  	w          *binary.Writer // w is a binary writer
    29  	firstIndex int64          // firstIndex is the index of the first segEntry
    30  	lastIndex  int64          // lastIndex is the index of the last segEntry
    31  	segments   []*segment     // segments is an index of the current file segments
    32  	active     *segment       // active is the current active segment
    33  }
    34  
    35  // OpenSWAL opens and returns a new write-ahead log structure
    36  func OpenSWAL(c *SWALConfig) (*SWAL, error) {
    37  	// check config
    38  	conf := checkWALConfig(c)
    39  	// TODO: consider replacing `filepath.Abs()`, and `filepath.ToSlash()`
    40  	// TODO: with `filepath.Clean()` at some point or another. It should
    41  	// TODO: close enough to the same (possibly even better), so yeah.
    42  	// make sure we are working with absolute paths
    43  	base, err := filepath.Abs(conf.BasePath)
    44  	if err != nil {
    45  		return nil, err
    46  	}
    47  	// sanitize any path separators
    48  	base = filepath.ToSlash(base)
    49  	// create any directories if they are not there
    50  	err = os.MkdirAll(base, os.ModeDir)
    51  	if err != nil {
    52  		return nil, err
    53  	}
    54  	// create a new write-ahead log instance
    55  	l := &SWAL{
    56  		conf:       conf,
    57  		firstIndex: 0,
    58  		lastIndex:  1,
    59  		segments:   make([]*segment, 0),
    60  	}
    61  	// attempt to load segments
    62  	err = l.loadIndex()
    63  	if err != nil {
    64  		return nil, err
    65  	}
    66  	// return write-ahead log
    67  	return l, nil
    68  }
    69  
    70  func (l *SWAL) CloseAndRemove() error {
    71  	// lock
    72  	l.lock.Lock()
    73  	defer l.lock.Unlock()
    74  	// sync and close writer
    75  	err := l.w.Close()
    76  	if err != nil {
    77  		return err
    78  	}
    79  	// close reader
    80  	err = l.r.Close()
    81  	if err != nil {
    82  		return err
    83  	}
    84  	// reset the segments
    85  	l.segments = make([]*segment, 0)
    86  	// reset first and last index
    87  	l.firstIndex = 0
    88  	l.lastIndex = 1
    89  	// erase all files
    90  	err = os.RemoveAll(l.conf.BasePath)
    91  	if err != nil {
    92  		return err
    93  	}
    94  	return nil
    95  }
    96  
    97  // loadIndex initializes the segment index. It looks for segment
    98  // files in the base directory and attempts to index the segment as
    99  // well as any of the entries within the segment. If this is a new
   100  // instance, it will create a new segment that is ready for writing.
   101  func (l *SWAL) loadIndex() error {
   102  	// lock
   103  	l.lock.Lock()
   104  	defer l.lock.Unlock()
   105  	// get the files in the base directory path
   106  	files, err := os.ReadDir(l.conf.BasePath)
   107  	if err != nil {
   108  		return err
   109  	}
   110  	// list the files in the base directory path and attempt to index the entries
   111  	for _, file := range files {
   112  		// skip non data files
   113  		if file.IsDir() ||
   114  			!strings.HasPrefix(file.Name(), FilePrefix) ||
   115  			!strings.HasSuffix(file.Name(), FileSuffix) {
   116  			continue // skip this, continue on to the next file
   117  		}
   118  		// check the size of segment file
   119  		fi, err := file.Info()
   120  		if err != nil {
   121  			return err
   122  		}
   123  		// if the file is empty, remove it and skip to next file
   124  		if fi.Size() == 0 {
   125  			err = os.Remove(filepath.Join(l.conf.BasePath, file.Name()))
   126  			if err != nil {
   127  				return err
   128  			}
   129  			continue // make sure we skip to next segment
   130  		}
   131  		// attempt to load segment (and index entries in segment)
   132  		s, err := l.loadSegmentFile(filepath.Join(l.conf.BasePath, file.Name()))
   133  		if err != nil {
   134  			return err
   135  		}
   136  		// segment has been loaded successfully, append to the segments list
   137  		l.segments = append(l.segments, s)
   138  	}
   139  	// check to see if any segments were found. If not, initialize a new one
   140  	if len(l.segments) == 0 {
   141  		// create a new segment file
   142  		s, err := l.makeSegmentFile(l.lastIndex)
   143  		if err != nil {
   144  			return err
   145  		}
   146  		// segment has been created successfully, append to the segments list
   147  		l.segments = append(l.segments, s)
   148  	}
   149  	// segments have either been loaded or created, so now we
   150  	// should go about updating the active segment pointer to
   151  	// point to the "tail" (the last segment in the segment list)
   152  	l.active = l.getLastSegment()
   153  	// we should be good to go, lets attempt to open a file
   154  	// reader to work with the active segment
   155  	l.r, err = binary.OpenReader(l.active.path)
   156  	if err != nil {
   157  		return err
   158  	}
   159  	// and then attempt to open a file writer to also work
   160  	// with the active segment, so we can begin appending data
   161  	l.w, err = binary.OpenWriterWithSync(l.active.path, l.conf.SyncOnWrite)
   162  	if err != nil {
   163  		return err
   164  	}
   165  	// finally, update the firstIndex and lastIndex
   166  	l.firstIndex = l.segments[0].index
   167  	// and update last index
   168  	l.lastIndex = l.getLastSegment().getLastIndex()
   169  	return nil
   170  }
   171  
   172  // loadSegment attempts to open the segment file at the path provided
   173  // and index the entries within the segment. It will return an os.PathError
   174  // if the file does not exist, an io.ErrUnexpectedEOF if the file exists
   175  // but is empty and has no data to read, and ErrSegmentFull if the file
   176  // has met the maxFileSize. It will return the segment and nil error on success.
   177  func (l *SWAL) loadSegmentFile(path string) (*segment, error) {
   178  	// check to make sure path exists before continuing
   179  	_, err := os.Stat(path)
   180  	if err != nil {
   181  		return nil, err
   182  	}
   183  	// attempt to open existing segment file for reading
   184  	fd, err := os.OpenFile(path, os.O_RDONLY, 0666)
   185  	if err != nil {
   186  		return nil, err
   187  	}
   188  	// defer file close
   189  	defer func(fd *os.File) {
   190  		_ = fd.Close()
   191  	}(fd)
   192  	// create a new segment to append indexed entries to
   193  	s := &segment{
   194  		path:    path,
   195  		entries: make([]segEntry, 0),
   196  	}
   197  	// read segment file and index entries
   198  	index, err := GetIndexFromFileName(filepath.Base(fd.Name()))
   199  	if err != nil {
   200  		return nil, err
   201  	}
   202  	for {
   203  		// get the current offset of the
   204  		// reader for the segEntry later
   205  		offset, err := binary.Offset(fd)
   206  		if err != nil {
   207  			return nil, err
   208  		}
   209  		// read and decode segEntry
   210  		_, err = binary.DecodeEntry(fd)
   211  		if err != nil {
   212  			if err == io.EOF || err == io.ErrUnexpectedEOF {
   213  				break
   214  			}
   215  			return nil, err
   216  		}
   217  		// get current offset
   218  		// add segEntry index to segment entries list
   219  		s.entries = append(s.entries, segEntry{
   220  			index:  index,
   221  			offset: offset,
   222  		})
   223  		// continue to process the next segEntry
   224  		index++
   225  	}
   226  	// make sure to fill out the segment index from the first segEntry index
   227  	s.index = s.entries[0].index
   228  	// get the offset of the reader to calculate bytes remaining
   229  	offset, err := binary.Offset(fd)
   230  	if err != nil {
   231  		return nil, err
   232  	}
   233  	// update the segment remaining bytes
   234  	s.remaining = defaultMaxSegmentSize - offset
   235  	return s, nil
   236  }
   237  
   238  // makeSegment attempts to make a new segment automatically using the timestamp
   239  // as the segment name. On success, it will simply return a new segment and a nil error
   240  func (l *SWAL) makeSegmentFile(index int64) (*segment, error) {
   241  	// create a new file
   242  	path := filepath.Join(l.conf.BasePath, MakeFileNameFromIndex(index))
   243  	fd, err := os.Create(path)
   244  	if err != nil {
   245  		return nil, err
   246  	}
   247  	// don't forget to close it
   248  	err = fd.Close()
   249  	if err != nil {
   250  		return nil, err
   251  	}
   252  	// create and return new segment
   253  	s := &segment{
   254  		path:      path,
   255  		index:     l.lastIndex,
   256  		entries:   make([]segEntry, 0),
   257  		remaining: l.conf.MaxSegmentSize,
   258  	}
   259  	return s, nil
   260  }
   261  
   262  // findSegmentIndex performs binary search to find the segment containing provided index
   263  func (l *SWAL) findSegmentIndex(index int64) int {
   264  	// declare for later
   265  	i, j := 0, len(l.segments)
   266  	// otherwise, perform binary search
   267  	for i < j {
   268  		h := i + (j-i)/2
   269  		if index >= l.segments[h].index {
   270  			i = h + 1
   271  		} else {
   272  			j = h
   273  		}
   274  	}
   275  	return i - 1
   276  }
   277  
   278  // getLastSegment returns the tail segment in the segments index list
   279  func (l *SWAL) getLastSegment() *segment {
   280  	return l.segments[len(l.segments)-1]
   281  }
   282  
   283  // cycleSegment adds a new segment to replace the current (active) segment
   284  func (l *SWAL) cycleSegment() error {
   285  	// sync and close current file segment
   286  	err := l.w.Close()
   287  	if err != nil {
   288  		return err
   289  	}
   290  	// create a new segment file
   291  	s, err := l.makeSegmentFile(l.lastIndex)
   292  	if err != nil {
   293  		return err
   294  	}
   295  	// add segment to segment index list
   296  	l.segments = append(l.segments, s)
   297  	// update the active segment pointer
   298  	l.active = l.getLastSegment()
   299  	// open file writer associated with active segment
   300  	l.w, err = binary.OpenWriterWithSync(l.active.path, l.conf.SyncOnWrite)
   301  	if err != nil {
   302  		return err
   303  	}
   304  	// update file reader associated with the active segment
   305  	l.r, err = binary.OpenReader(l.active.path)
   306  	if err != nil {
   307  		return err
   308  	}
   309  	return nil
   310  }
   311  
   312  // Read reads an segEntry from the write-ahead log at the specified index
   313  func (l *SWAL) Read(index int64) (*binary.Entry, error) {
   314  	// read lock
   315  	l.lock.RLock()
   316  	defer l.lock.RUnlock()
   317  	// error checking
   318  	if index < l.firstIndex || index > l.lastIndex {
   319  		return nil, ErrOutOfBounds
   320  	}
   321  	var err error
   322  	// find the segment containing the provided index
   323  	s := l.segments[l.findSegmentIndex(index)]
   324  	// make sure we are reading from the correct file
   325  	l.r, err = l.r.ReadFrom(s.path)
   326  	if err != nil {
   327  		return nil, err
   328  	}
   329  	// find the offset for the segEntry containing the provided index
   330  	offset := s.entries[s.findEntryIndex(index)].offset
   331  	// read segEntry at offset
   332  	e, err := l.r.ReadEntryAt(offset)
   333  	if err != nil {
   334  		return nil, err
   335  	}
   336  	return e, nil
   337  }
   338  
   339  // Write writes an segEntry to the write-ahead log in an append-only fashion
   340  func (l *SWAL) Write(e *binary.Entry) (int64, error) {
   341  	// lock
   342  	l.lock.Lock()
   343  	defer l.lock.Unlock()
   344  	// write segEntry
   345  	offset, err := l.w.WriteEntry(e)
   346  	if err != nil {
   347  		return 0, err
   348  	}
   349  	// add new segEntry to the segment index
   350  	l.active.entries = append(l.active.entries, segEntry{
   351  		index:  l.lastIndex,
   352  		offset: offset,
   353  	})
   354  	// update lastIndex
   355  	l.lastIndex++
   356  	// grab the current offset written
   357  	offset2, err := l.w.Offset()
   358  	if err != nil {
   359  		return 0, err
   360  	}
   361  	// update segment remaining
   362  	l.active.remaining -= offset2 - offset
   363  	// check to see if the active segment needs to be cycled
   364  	if l.active.remaining < remainingTrigger {
   365  		err = l.cycleSegment()
   366  		if err != nil {
   367  			return 0, err
   368  		}
   369  	}
   370  	return l.lastIndex - 1, nil
   371  }
   372  
   373  // WriteBatch writes a batch of entries performing no syncing until the end of the batch
   374  func (l *SWAL) WriteBatch(batch *binary.Batch) error {
   375  	// lock
   376  	l.lock.Lock()
   377  	defer l.lock.Unlock()
   378  	// check sync policy
   379  	changedSyncPolicy := false
   380  	if l.conf.SyncOnWrite == true {
   381  		l.conf.SyncOnWrite = false // if it's on, temporarily disable
   382  		l.w.SetSyncOnWrite(false)
   383  		changedSyncPolicy = true
   384  	}
   385  	// iterate batch
   386  	for i := range batch.Entries {
   387  		// entry
   388  		e := batch.Entries[i]
   389  		// write entry to data file
   390  		offset, err := l.w.WriteEntry(e)
   391  		if err != nil {
   392  			return err
   393  		}
   394  		// add new segEntry to the segment index
   395  		l.active.entries = append(l.active.entries, segEntry{
   396  			index:  l.lastIndex,
   397  			offset: offset,
   398  		})
   399  		// update lastIndex
   400  		l.lastIndex++
   401  		// grab the current offset written
   402  		offset2, err := l.w.Offset()
   403  		if err != nil {
   404  			return err
   405  		}
   406  		// update segment remaining
   407  		l.active.remaining -= offset2 - offset
   408  		// check to see if the active segment needs to be cycled
   409  		if l.active.remaining < remainingTrigger {
   410  			err = l.cycleSegment()
   411  			if err != nil {
   412  				return err
   413  			}
   414  		}
   415  	}
   416  	// after batch, set everything back how it was
   417  	if changedSyncPolicy {
   418  		l.conf.SyncOnWrite = true
   419  		l.w.SetSyncOnWrite(true)
   420  	}
   421  	// after batch has been written, do sync
   422  	err := l.w.Sync()
   423  	if err != nil {
   424  		return err
   425  	}
   426  	return nil
   427  }
   428  
   429  // Scan provides an iterator method for the write-ahead log
   430  func (l *SWAL) Scan(iter func(e *binary.Entry) bool) error {
   431  	// lock
   432  	l.lock.Lock()
   433  	defer l.lock.Unlock()
   434  	// init for any errors
   435  	var err error
   436  	// range the segment index
   437  	for _, sidx := range l.segments {
   438  		//fmt.Printf("segment: %s\n", sidx)
   439  		// make sure we are reading the right data
   440  		l.r, err = l.r.ReadFrom(sidx.path)
   441  		if err != nil {
   442  			return err
   443  		}
   444  		// range the segment entries index
   445  		for _, eidx := range sidx.entries {
   446  			// read segEntry
   447  			e, err := l.r.ReadEntryAt(eidx.offset)
   448  			if err != nil {
   449  				if err == io.EOF || err == io.ErrUnexpectedEOF {
   450  					break
   451  				}
   452  				return err
   453  			}
   454  			// check segEntry against iterator boolean function
   455  			if !iter(e) {
   456  				// if it returns false, then process next segEntry
   457  				continue
   458  			}
   459  		}
   460  		// outside segEntry loop
   461  	}
   462  	// outside segment loop
   463  	return nil
   464  }
   465  
   466  // TruncateFront removes all segments and entries before specified index
   467  func (l *SWAL) TruncateFront(index int64) error {
   468  	// lock
   469  	l.lock.Lock()
   470  	defer l.lock.Unlock()
   471  	// perform bounds check
   472  	if index == 0 ||
   473  		l.lastIndex == 0 ||
   474  		index < l.firstIndex || index > l.lastIndex {
   475  		return ErrOutOfBounds
   476  	}
   477  	if index == l.firstIndex {
   478  		return nil // nothing to truncate
   479  	}
   480  	// locate segment in segment index list containing specified index
   481  	sidx := l.findSegmentIndex(index)
   482  	// isolate whole segments that can be removed
   483  	for i := 0; i < sidx; i++ {
   484  		// remove segment file
   485  		err := os.Remove(l.segments[i].path)
   486  		if err != nil {
   487  			return err
   488  		}
   489  	}
   490  	// remove segments from segment index (cut, i-j)
   491  	i, j := 0, sidx
   492  	copy(l.segments[i:], l.segments[j:])
   493  	for k, n := len(l.segments)-j+i, len(l.segments); k < n; k++ {
   494  		l.segments[k] = nil // or the zero value of T
   495  	}
   496  	l.segments = l.segments[:len(l.segments)-j+i]
   497  	// update firstIndex
   498  	l.firstIndex = l.segments[0].index
   499  	// prepare to re-write partial segment
   500  	var err error
   501  	var entries []segEntry
   502  	tmpfd, err := os.Create(filepath.Join(l.conf.BasePath, "tmp-partial.seg"))
   503  	if err != nil {
   504  		return err
   505  	}
   506  	// after the segment index cut, segment 0 will
   507  	// contain the partials that we must re-write
   508  	if l.segments[0].index < index {
   509  		// make sure we are reading from the correct path
   510  		l.r, err = l.r.ReadFrom(l.segments[0].path)
   511  		if err != nil {
   512  			return err
   513  		}
   514  		// range the entries within this segment to find
   515  		// the ones that are greater than the index and
   516  		// write those to a temporary buffer....
   517  		for _, ent := range l.segments[0].entries {
   518  			if ent.index < index {
   519  				continue // skip
   520  			}
   521  			// read segEntry
   522  			e, err := l.r.ReadEntryAt(ent.offset)
   523  			if err != nil {
   524  				return err
   525  			}
   526  			// write segEntry to temp file
   527  			ent.offset, err = binary.EncodeEntry(tmpfd, e)
   528  			if err != nil {
   529  				return err
   530  			}
   531  			// sync write
   532  			err = tmpfd.Sync()
   533  			if err != nil {
   534  				return err
   535  			}
   536  			// append to a new entries list
   537  			entries = append(entries, ent)
   538  		}
   539  		// move reader back to active segment file
   540  		l.r, err = l.r.ReadFrom(l.active.path)
   541  		if err != nil {
   542  			return err
   543  		}
   544  		// close temp file
   545  		err = tmpfd.Close()
   546  		if err != nil {
   547  			return err
   548  		}
   549  		// remove partial segment file
   550  		err = os.Remove(l.segments[0].path)
   551  		if err != nil {
   552  			return err
   553  		}
   554  		// change temp file name
   555  		err = os.Rename(tmpfd.Name(), l.segments[0].path)
   556  		if err != nil {
   557  			return err
   558  		}
   559  		// update segment
   560  		l.segments[0].entries = entries
   561  		l.segments[0].index = entries[0].index
   562  	}
   563  	return nil
   564  }
   565  
   566  func (l *SWAL) GetConfig() *SWALConfig {
   567  	// lock
   568  	l.lock.Lock()
   569  	defer l.lock.Unlock()
   570  	return l.conf
   571  }
   572  
   573  func (l *SWAL) Sync() error {
   574  	// lock
   575  	l.lock.Lock()
   576  	defer l.lock.Unlock()
   577  	err := l.w.Sync()
   578  	if err != nil {
   579  		return err
   580  	}
   581  	return nil
   582  }
   583  
   584  // Count returns the number of entries currently in the write-ahead log
   585  func (l *SWAL) Count() int {
   586  	// lock
   587  	l.lock.Lock()
   588  	defer l.lock.Unlock()
   589  	// get count
   590  	var count int
   591  	for _, s := range l.segments {
   592  		count += len(s.entries)
   593  	}
   594  	// return count
   595  	return count
   596  }
   597  
   598  // FirstIndex returns the write-ahead logs first index
   599  func (l *SWAL) FirstIndex() int64 {
   600  	// lock
   601  	l.lock.Lock()
   602  	defer l.lock.Unlock()
   603  	return l.firstIndex
   604  }
   605  
   606  // LastIndex returns the write-ahead logs first index
   607  func (l *SWAL) LastIndex() int64 {
   608  	// lock
   609  	l.lock.Lock()
   610  	defer l.lock.Unlock()
   611  	return l.lastIndex
   612  }
   613  
   614  // Close syncs and closes the write-ahead log
   615  func (l *SWAL) Close() error {
   616  	// lock
   617  	l.lock.Lock()
   618  	defer l.lock.Unlock()
   619  	// sync and close writer
   620  	err := l.w.Close()
   621  	if err != nil {
   622  		return err
   623  	}
   624  	// close reader
   625  	err = l.r.Close()
   626  	if err != nil {
   627  		return err
   628  	}
   629  	// clean everything else up
   630  	l.r = nil
   631  	l.w = nil
   632  	l.firstIndex = 0
   633  	l.lastIndex = 0
   634  	l.segments = nil
   635  	l.active = nil
   636  	// force gc for good measure
   637  	runtime.GC()
   638  	return nil
   639  }
   640  
   641  // String is the stringer method for the write-ahead log
   642  func (l *SWAL) String() string {
   643  	var ss string
   644  	ss += fmt.Sprintf("\n\n[write-ahead log]\n")
   645  	ss += fmt.Sprintf("base: %q\n", l.conf.BasePath)
   646  	ss += fmt.Sprintf("firstIndex: %d\n", l.firstIndex)
   647  	ss += fmt.Sprintf("lastIndex: %d\n", l.lastIndex)
   648  	ss += fmt.Sprintf("segments: %d\n", len(l.segments))
   649  	if l.active != nil {
   650  		ss += fmt.Sprintf("active: %q\n", filepath.Base(l.active.path))
   651  	}
   652  	if len(l.segments) > 0 {
   653  		for i, s := range l.segments {
   654  			ss += fmt.Sprintf("segment[%d]:\n", i)
   655  			ss += fmt.Sprintf("\tpath: %q\n", filepath.Base(s.path))
   656  			ss += fmt.Sprintf("\tindex: %d\n", s.index)
   657  			ss += fmt.Sprintf("\tentries: %d\n", len(s.entries))
   658  			ss += fmt.Sprintf("\tremaining: %d\n", s.remaining)
   659  		}
   660  	}
   661  	ss += "\n"
   662  	return ss
   663  }