github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/graveler/committed/diff.go (about)

     1  package committed
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"errors"
     7  
     8  	"github.com/treeverse/lakefs/pkg/graveler"
     9  )
    10  
    11  type iteratorValue struct {
    12  	rng    *Range
    13  	record *graveler.ValueRecord
    14  	err    error
    15  }
    16  
    17  // ErrNoRange occurs when calling nextRange while not in a range, could happen when the diff is currently comparing keys in two different ranges
    18  var ErrNoRange = errors.New("diff is not currently in a range")
    19  
    20  // currentRangeData holds state of the current RangeDiff
    21  type currentRangeData struct {
    22  	iter             Iterator
    23  	value            *iteratorValue
    24  	currentRangeDiff *RangeDiff
    25  }
    26  
    27  type diffIterator struct {
    28  	ctx          context.Context
    29  	left         Iterator
    30  	right        Iterator
    31  	leftValue    iteratorValue
    32  	rightValue   iteratorValue
    33  	currentRange currentRangeData
    34  	currentDiff  *graveler.Diff
    35  	err          error
    36  	state        diffIteratorState
    37  }
    38  
    39  // currentRangeLeftIdentity returns the current range identity in case the current range is the left range, otherwise returns nil
    40  func (d diffIterator) currentRangeLeftIdentity() (res []byte) {
    41  	if d.currentRange.iter == d.left {
    42  		res = make([]byte, len(d.currentRange.value.record.Identity))
    43  		copy(res, d.currentRange.value.record.Identity)
    44  	}
    45  	return
    46  }
    47  
    48  type diffIteratorState int
    49  
    50  const (
    51  	diffIteratorStatePreInit diffIteratorState = iota
    52  	diffIteratorStateOpen
    53  	diffIteratorStateClosed
    54  )
    55  
    56  type diffIteratorCompareResult int
    57  
    58  const (
    59  	diffItCompareResultDone diffIteratorCompareResult = iota
    60  	diffItCompareResultSameRanges
    61  	diffItCompareResultSameIdentities
    62  	diffItCompareResultSameKeys
    63  	diffItCompareResultNeedStartRangeBoth
    64  	diffItCompareResultNeedStartRangeLeft
    65  	diffItCompareResultNeedStartRangeRight
    66  	diffItCompareResultLeftBeforeRight
    67  	diffItCompareResultRightBeforeLeft
    68  	diffItCompareResultRightRangeBeforeLeft
    69  	diffItCompareResultLeftRangeBeforeRight
    70  	diffItCompareResultSameBounds
    71  )
    72  
    73  func NewDiffIterator(ctx context.Context, left Iterator, right Iterator) DiffIterator {
    74  	return &diffIterator{
    75  		ctx:   ctx,
    76  		left:  left,
    77  		right: right,
    78  	}
    79  }
    80  
    81  func diffIteratorNextValue(it Iterator) (*graveler.ValueRecord, *Range, error) {
    82  	if it.Next() {
    83  		rec, rng := it.Value()
    84  		return rec, rng, nil
    85  	}
    86  	return nil, nil, it.Err()
    87  }
    88  
    89  func diffIteratorNextRange(it Iterator) (*graveler.ValueRecord, *Range, error) {
    90  	if it.NextRange() {
    91  		val, rng := it.Value()
    92  		return val, rng, nil
    93  	}
    94  	return nil, nil, it.Err()
    95  }
    96  
    97  func (d *diffIterator) setCurrentRangeRight() {
    98  	d.currentRange.iter = d.right
    99  	d.currentRange.value = &d.rightValue
   100  	d.currentRange.currentRangeDiff = &RangeDiff{
   101  		Type:  graveler.DiffTypeAdded,
   102  		Range: d.rightValue.rng.Copy(),
   103  	}
   104  	d.currentDiff = nil
   105  }
   106  
   107  func (d *diffIterator) setCurrentRangeBoth() {
   108  	d.currentRange.iter = nil
   109  	d.currentRange.value = &d.rightValue
   110  	d.currentRange.currentRangeDiff = &RangeDiff{
   111  		Type:         graveler.DiffTypeChanged,
   112  		Range:        d.rightValue.rng.Copy(),
   113  		LeftIdentity: d.leftValue.rng.ID,
   114  	}
   115  	d.currentDiff = nil
   116  }
   117  
   118  func (d *diffIterator) setCurrentRangeLeft() {
   119  	d.currentRange.iter = d.left
   120  	d.currentRange.value = &d.leftValue
   121  	d.currentRange.currentRangeDiff = &RangeDiff{
   122  		Type:  graveler.DiffTypeRemoved,
   123  		Range: d.leftValue.rng.Copy(),
   124  	}
   125  	d.leftValue.record = nil
   126  	d.currentDiff = nil
   127  }
   128  
   129  func (d *diffIterator) clearCurrentRange() {
   130  	d.currentRange.iter = nil
   131  	d.currentRange.value = nil
   132  	d.currentRange.currentRangeDiff = nil
   133  }
   134  
   135  func (d *diffIterator) compareDiffKeys() int {
   136  	if d.leftValue.rng == nil {
   137  		return 1
   138  	}
   139  	if d.rightValue.rng == nil {
   140  		return -1
   141  	}
   142  	return bytes.Compare(getCurrentKey(d.left), getCurrentKey(d.right))
   143  }
   144  
   145  func (d *diffIterator) compareDiffIterators() diffIteratorCompareResult {
   146  	leftRange := d.leftValue.rng
   147  	rightRange := d.rightValue.rng
   148  	if leftRange == nil && rightRange == nil {
   149  		return diffItCompareResultDone
   150  	}
   151  	if leftRange != nil && rightRange != nil && leftRange.ID == rightRange.ID {
   152  		return diffItCompareResultSameRanges
   153  	}
   154  	leftStartRange := leftRange != nil && d.leftValue.record == nil
   155  	rightStartRange := rightRange != nil && d.rightValue.record == nil
   156  	leftBeforeRight := leftStartRange && rightRange == nil
   157  	rightBeforeLeft := rightStartRange && leftRange == nil
   158  	sameBounds := false
   159  	if leftStartRange && rightStartRange {
   160  		leftBeforeRight = bytes.Compare(leftRange.MaxKey, rightRange.MinKey) < 0
   161  		rightBeforeLeft = bytes.Compare(rightRange.MaxKey, leftRange.MinKey) < 0
   162  		sameBounds = bytes.Equal(leftRange.MinKey, rightRange.MinKey) && bytes.Equal(leftRange.MaxKey, rightRange.MaxKey)
   163  	}
   164  	comp := d.compareDiffKeys()
   165  	switch {
   166  	case leftBeforeRight:
   167  		return diffItCompareResultLeftRangeBeforeRight
   168  	case rightBeforeLeft:
   169  		return diffItCompareResultRightRangeBeforeLeft
   170  	case leftStartRange && rightStartRange && comp == 0 && sameBounds:
   171  		return diffItCompareResultSameBounds
   172  	case leftStartRange && rightStartRange && comp == 0:
   173  		return diffItCompareResultNeedStartRangeBoth
   174  	case leftStartRange && comp <= 0:
   175  		return diffItCompareResultNeedStartRangeLeft
   176  	case rightStartRange && comp >= 0:
   177  		return diffItCompareResultNeedStartRangeRight
   178  	case comp == 0 && bytes.Equal(d.leftValue.record.Identity, d.rightValue.record.Identity):
   179  		return diffItCompareResultSameIdentities
   180  	case comp == 0:
   181  		return diffItCompareResultSameKeys
   182  	case comp < 0:
   183  		return diffItCompareResultLeftBeforeRight
   184  	default:
   185  		return diffItCompareResultRightBeforeLeft
   186  	}
   187  }
   188  
   189  func (d *diffIterator) Next() bool {
   190  	if d.state == diffIteratorStateClosed || d.err != nil {
   191  		return false
   192  	}
   193  	if d.state == diffIteratorStatePreInit {
   194  		d.state = diffIteratorStateOpen
   195  		d.clearCurrentRange()
   196  		d.leftValue.record, d.leftValue.rng, d.leftValue.err = diffIteratorNextValue(d.left)
   197  		d.rightValue.record, d.rightValue.rng, d.rightValue.err = diffIteratorNextValue(d.right)
   198  	}
   199  	if d.currentRange.iter != nil {
   200  		// we are currently inside a range
   201  		d.currentRange.value.record, d.currentRange.value.rng, d.currentRange.value.err = diffIteratorNextValue(d.currentRange.iter)
   202  		if d.currentRange.value.err != nil {
   203  			d.err = d.currentRange.value.err
   204  			d.currentDiff = nil
   205  			d.clearCurrentRange()
   206  			return false
   207  		}
   208  		if d.currentRange.value.record != nil {
   209  			leftIdentity := d.currentRangeLeftIdentity()
   210  			d.currentDiff = &graveler.Diff{Type: d.currentRange.currentRangeDiff.Type, Key: d.currentRange.value.record.Key.Copy(), Value: d.currentRange.value.record.Value, LeftIdentity: leftIdentity}
   211  			return true
   212  		}
   213  		// current diff range over - clear current range and continue to get next range/value
   214  		d.clearCurrentRange()
   215  	}
   216  	select {
   217  	case <-d.ctx.Done():
   218  		d.err = d.ctx.Err()
   219  		return false
   220  	default:
   221  		for {
   222  			if d.rightValue.err != nil {
   223  				d.err = d.rightValue.err
   224  			}
   225  			if d.leftValue.err != nil {
   226  				d.err = d.leftValue.err
   227  			}
   228  			if d.err != nil {
   229  				d.currentDiff = nil
   230  				d.clearCurrentRange()
   231  				return false
   232  			}
   233  			compareResult := d.compareDiffIterators()
   234  			switch compareResult {
   235  			case diffItCompareResultDone:
   236  				d.currentDiff = nil
   237  				return false
   238  			case diffItCompareResultSameRanges:
   239  				d.leftValue.record, d.leftValue.rng, d.leftValue.err = diffIteratorNextRange(d.left)
   240  				d.rightValue.record, d.rightValue.rng, d.rightValue.err = diffIteratorNextRange(d.right)
   241  			case diffItCompareResultLeftRangeBeforeRight:
   242  				d.setCurrentRangeLeft()
   243  				return true
   244  			case diffItCompareResultRightRangeBeforeLeft:
   245  				d.setCurrentRangeRight()
   246  				return true
   247  			case diffItCompareResultSameBounds:
   248  				d.setCurrentRangeBoth()
   249  				d.state = diffIteratorStatePreInit
   250  				return true
   251  			case diffItCompareResultSameKeys:
   252  				// same keys on different ranges
   253  				d.currentDiff = &graveler.Diff{Type: graveler.DiffTypeChanged, Key: d.rightValue.record.Key.Copy(), Value: d.rightValue.record.Value, LeftIdentity: d.leftValue.record.Identity}
   254  				d.leftValue.record, d.leftValue.rng, d.leftValue.err = diffIteratorNextValue(d.left)
   255  				d.rightValue.record, d.rightValue.rng, d.rightValue.err = diffIteratorNextValue(d.right)
   256  				return true
   257  			case diffItCompareResultSameIdentities, diffItCompareResultNeedStartRangeBoth:
   258  				d.leftValue.record, d.leftValue.rng, d.leftValue.err = diffIteratorNextValue(d.left)
   259  				d.rightValue.record, d.rightValue.rng, d.rightValue.err = diffIteratorNextValue(d.right)
   260  			case diffItCompareResultNeedStartRangeLeft:
   261  				d.leftValue.record, d.leftValue.rng, d.leftValue.err = diffIteratorNextValue(d.left)
   262  			case diffItCompareResultNeedStartRangeRight:
   263  				d.rightValue.record, d.rightValue.rng, d.rightValue.err = diffIteratorNextValue(d.right)
   264  			case diffItCompareResultLeftBeforeRight:
   265  				// nothing on right, or left before right
   266  				d.currentDiff = &graveler.Diff{Type: graveler.DiffTypeRemoved, Key: d.leftValue.record.Key.Copy(), Value: d.leftValue.record.Value, LeftIdentity: d.leftValue.record.Identity}
   267  				d.leftValue.record, d.leftValue.rng, d.leftValue.err = diffIteratorNextValue(d.left)
   268  				return true
   269  			case diffItCompareResultRightBeforeLeft:
   270  				// nothing on left, or right before left
   271  				d.currentDiff = &graveler.Diff{Type: graveler.DiffTypeAdded, Key: d.rightValue.record.Key.Copy(), Value: d.rightValue.record.Value}
   272  				d.rightValue.record, d.rightValue.rng, d.rightValue.err = diffIteratorNextValue(d.right)
   273  				return true
   274  			}
   275  		}
   276  	}
   277  }
   278  
   279  func (d *diffIterator) NextRange() bool {
   280  	if d.currentRange.currentRangeDiff != nil && d.currentRange.currentRangeDiff.Type == graveler.DiffTypeChanged {
   281  		d.leftValue.record, d.leftValue.rng, d.leftValue.err = diffIteratorNextRange(d.left)
   282  		d.rightValue.record, d.rightValue.rng, d.rightValue.err = diffIteratorNextRange(d.right)
   283  		d.clearCurrentRange()
   284  		d.state = diffIteratorStateOpen
   285  		return d.Next()
   286  	}
   287  	if d.currentRange.iter == nil {
   288  		d.err = ErrNoRange
   289  		return false
   290  	}
   291  	d.currentRange.value.record, d.currentRange.value.rng, d.currentRange.value.err = diffIteratorNextRange(d.currentRange.iter)
   292  	d.clearCurrentRange()
   293  	return d.Next()
   294  }
   295  
   296  func (d *diffIterator) SeekGE(id graveler.Key) {
   297  	d.left.SeekGE(id)
   298  	d.right.SeekGE(id)
   299  	d.currentDiff = nil
   300  	d.leftValue = iteratorValue{}
   301  	d.rightValue = iteratorValue{}
   302  	d.err = nil
   303  	d.state = diffIteratorStatePreInit
   304  	d.currentRange = currentRangeData{}
   305  }
   306  
   307  func (d *diffIterator) Value() (*graveler.Diff, *RangeDiff) {
   308  	return d.currentDiff, d.currentRange.currentRangeDiff
   309  }
   310  
   311  func (d *diffIterator) Err() error {
   312  	return d.err
   313  }
   314  
   315  func (d *diffIterator) Close() {
   316  	d.left.Close()
   317  	d.right.Close()
   318  	d.currentDiff = nil
   319  	d.err = nil
   320  	d.state = diffIteratorStateClosed
   321  }
   322  
   323  func getCurrentKey(it Iterator) []byte {
   324  	val, rng := it.Value()
   325  	if val == nil {
   326  		return rng.MinKey
   327  	}
   328  	return val.Key
   329  }