github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/reflog_ring_buffer.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package nbs
    16  
    17  import (
    18  	"errors"
    19  	"fmt"
    20  	"sync"
    21  	"time"
    22  )
    23  
    24  // errUnsafeIteration is returned when iterating through a ring buffer too slowly and new, inserted data is detected
    25  // as wrapping around into the iteration range.
    26  var errUnsafeIteration = errors.New(
    27  	"unable to finish iteration: insertion index has wrapped around into iteration range")
    28  
    29  // reflogRootHashEntry is a data container for a root hash update that was recorded to the chunk journal. It contains
    30  // the root and the time at which it was written.
    31  type reflogRootHashEntry struct {
    32  	root      string
    33  	timestamp time.Time
    34  }
    35  
    36  // reflogRingBuffer is a fixed size circular buffer that allows the most recent N entries to be iterated over (where
    37  // N is equal to the size requested when this ring buffer is constructed. Its locking strategy assumes that
    38  // only new entries are written to the head (through Push) and that existing entries will never need to be
    39  // updated. Internally, it allocates a slice that is twice as large as the requested size, so that less locking
    40  // is needed when iterating over entries to read them.
    41  type reflogRingBuffer struct {
    42  	items         []reflogRootHashEntry
    43  	mu            *sync.Mutex
    44  	requestedSize int
    45  	totalSize     int
    46  	insertIndex   int
    47  	itemCount     int
    48  	epoch         uint
    49  }
    50  
    51  // newReflogRingBuffer creates a new reflogRingBuffer that allows the reflog to query up to |size| records.
    52  // Internally, the ring buffer allocates extra storage so that |size| records can be read while new root entries
    53  // are still being recorded.
    54  func newReflogRingBuffer(size int) *reflogRingBuffer {
    55  	if size < 0 {
    56  		panic(fmt.Sprintf("invalid size specified in newReflogRingBuffer construction: %d", size))
    57  	}
    58  
    59  	return &reflogRingBuffer{
    60  		requestedSize: size,
    61  		totalSize:     size * 2,
    62  		items:         make([]reflogRootHashEntry, size*2),
    63  		mu:            &sync.Mutex{},
    64  		insertIndex:   0,
    65  		itemCount:     0,
    66  		epoch:         1,
    67  	}
    68  }
    69  
    70  // Push pushes |newItem| onto this ring buffer, replacing the oldest entry in this ring buffer once the buffer
    71  // is fully populated.
    72  func (rb *reflogRingBuffer) Push(newItem reflogRootHashEntry) {
    73  	rb.mu.Lock()
    74  	defer rb.mu.Unlock()
    75  
    76  	rb.items[rb.insertIndex] = newItem
    77  	rb.insertIndex = (rb.insertIndex + 1) % len(rb.items)
    78  	if rb.insertIndex == 0 {
    79  		rb.epoch++
    80  	}
    81  
    82  	if rb.itemCount < rb.requestedSize {
    83  		rb.itemCount++
    84  	}
    85  }
    86  
    87  // Iterate traverses the entries in this ring buffer and invokes the specified callback function, |f|, on each
    88  // entry. Iteration starts with the oldest entries inserted into this ring buffer and ends with the most recent
    89  // entry. This function will iterate over at most N entries, where N is the requested size the caller specified
    90  // when constructing this ring buffer.
    91  func (rb *reflogRingBuffer) Iterate(f func(item reflogRootHashEntry) error) error {
    92  	startPosition, endPosition, startingEpoch := rb.getIterationIndexes()
    93  	if startPosition == endPosition {
    94  		return nil
    95  	}
    96  
    97  	for idx := startPosition; ; {
    98  		// The ring buffer holds twice as many entries as we ever expose through the Iterate function, so that
    99  		// entries can still be inserted without having to lock the whole ring buffer during iteration. However,
   100  		// as a sanity check, before we look at an index, we make sure the current insertion index hasn't
   101  		// gone into the range we're iterating.
   102  		if rb.insertionIndexIsInRange(startPosition, endPosition, startingEpoch) {
   103  			return errUnsafeIteration
   104  		}
   105  
   106  		err := f(rb.items[idx])
   107  		if err != nil {
   108  			return err
   109  		}
   110  
   111  		// Move to next spot
   112  		idx = (idx + 1) % rb.totalSize
   113  		if idx == endPosition {
   114  			break
   115  		}
   116  	}
   117  
   118  	return nil
   119  }
   120  
   121  // Truncate resets this ring buffer so that it is empty.
   122  func (rb *reflogRingBuffer) Truncate() {
   123  	rb.mu.Lock()
   124  	defer rb.mu.Unlock()
   125  	rb.itemCount = 0
   126  }
   127  
   128  // getIterationIndexes returns the start (inclusive) and end (exclusive) positions for iterating over the
   129  // entries in this ring buffer, as well as the current epoch, or generation of the ring buffer for the starting
   130  // position. Note that the end position may be less than the start position, which indicates that iteration
   131  // wraps around the ring buffer.
   132  func (rb *reflogRingBuffer) getIterationIndexes() (int, int, uint) {
   133  	rb.mu.Lock()
   134  	defer rb.mu.Unlock()
   135  
   136  	// If the buffer is empty, return the start position equal to the end position so that iteration is a no-op
   137  	if rb.itemCount == 0 || rb.totalSize == 0 {
   138  		return rb.insertIndex, rb.insertIndex, rb.epoch
   139  	}
   140  
   141  	// When the ring buffer isn't fully populated yet, we need to be careful to limit iteration to the number
   142  	// of items that have actually been inserted. Once more entries have been inserted than the requested size
   143  	// of this ring buffer, we will iterate over only the most recent entries and limit to the requested size.
   144  	itemCount := rb.itemCount
   145  	if itemCount > rb.requestedSize {
   146  		itemCount = rb.requestedSize
   147  	}
   148  
   149  	endPosition := rb.insertIndex
   150  	startPosition := (endPosition - itemCount) % rb.totalSize
   151  	epoch := rb.epoch
   152  	if startPosition < 0 {
   153  		startPosition = rb.totalSize + startPosition
   154  		epoch--
   155  	}
   156  
   157  	return startPosition, endPosition, epoch
   158  }
   159  
   160  // insertionIndexIsInRange returns true if the current insertion pointer for this ring buffer is within the
   161  // specified |rangeStart| and |rangeEnd| indexes. The |startingEpoch| parameter is used to determine if the
   162  // current insertion index has wrapped around the ring buffer, possibly multiple times.
   163  func (rb *reflogRingBuffer) insertionIndexIsInRange(rangeStart, rangeEnd int, startingEpoch uint) bool {
   164  	rb.mu.Lock()
   165  	currentInsertIndex := rb.insertIndex
   166  	currentEpoch := rb.epoch
   167  	rb.mu.Unlock()
   168  
   169  	// When the epoch value overflows and wraps around to 0 again, adjust the starting epoch accordingly
   170  	epochDelta := currentEpoch - startingEpoch
   171  	if epochDelta < 0 {
   172  		maxUint := ^uint(0)
   173  		epochDelta += maxUint
   174  	}
   175  
   176  	// If the range wraps around the ring buffer, adjust currentInsertIndex and rangeEnd
   177  	// so that we can use the same logic for an in range check.
   178  	if rangeStart > rangeEnd {
   179  		currentInsertIndex += rb.totalSize
   180  		rangeEnd += rb.totalSize
   181  		epochDelta--
   182  	}
   183  
   184  	switch epochDelta {
   185  	case 0:
   186  		// same epoch
   187  		return currentInsertIndex >= rangeStart && currentInsertIndex < rangeEnd
   188  	case 1:
   189  		return currentInsertIndex >= rangeStart
   190  	default:
   191  		return true
   192  	}
   193  }